From fe710790104015e01a1f9e2ed948b088197d4d44 Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:10:05 -0500 Subject: [PATCH 001/150] fix: add missing db files --- R/auxiliary_giotto.R | 9 ++- tests/testthat/test-dbMatrix_filterGiotto.R | 43 ++++++++++++++ tests/testthat/test-dbMatrix_libNorm.R | 62 +++++++++++++++++++++ tests/testthat/test-dbMatrix_logNorm.R | 62 +++++++++++++++++++++ tests/testthat/test-dbMatrix_scale.R | 62 +++++++++++++++++++++ 5 files changed, 235 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/test-dbMatrix_filterGiotto.R create mode 100644 tests/testthat/test-dbMatrix_libNorm.R create mode 100644 tests/testthat/test-dbMatrix_logNorm.R create mode 100644 tests/testthat/test-dbMatrix_scale.R diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 6d2c3f623..2d83d5df2 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -39,15 +39,18 @@ #' @title Log normalize expression matrix #' @keywords internal .log_norm_giotto = function(mymatrix, base, offset) { - + if(methods::is(mymatrix, 'DelayedArray')) { mymatrix = log(mymatrix + offset)/log(base) # } else if(methods::is(mymatrix, 'DelayedMatrix')) { # mymatrix = log(mymatrix + offset)/log(base) - } else if(methods::is(mymatrix, 'dgCMatrix')) { - mymatrix@x = log(mymatrix@x + offset)/log(base) # replace with sparseMatrixStats + } else if(methods::is(mymatrix, 'dgCMatrix')) { + mymatrix@x = log(mymatrix@x + offset)/log(base)# replace with sparseMatrixStats } else if(methods::is(mymatrix, 'Matrix')) { mymatrix@x = log(mymatrix@x + offset)/log(base) + } else if(methods::is(mymatrix, 'dbMatrix')) { + mymatrix[] = dplyr::mutate(mymatrix[], x = x + offset) # workaround for lack of @x slot + mymatrix = log(mymatrix)/log(base) } else { mymatrix = log(as.matrix(mymatrix) + offset)/log(base) } diff --git a/tests/testthat/test-dbMatrix_filterGiotto.R b/tests/testthat/test-dbMatrix_filterGiotto.R new file mode 100644 index 000000000..ad18666e9 --- /dev/null +++ b/tests/testthat/test-dbMatrix_filterGiotto.R @@ -0,0 +1,43 @@ +# silence deprecated internal functions +rlang::local_options(lifecycle_verbosity = "quiet") + +# ---------------------------------------------------------------------------- # +# Setup data +visium = GiottoData::loadGiottoMini(dataset = "visium") +dgc = getExpression(visium, output = "matrix") + +dbsm = dbMatrix::createDBMatrix(value = dgc, + db_path = ":temp:", + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create exprObj with dbsm +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create giotto object +gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) + +# ---------------------------------------------------------------------------- # +# Perform filtering +visium_filtered = filterGiotto(visium, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# Get filtered matrix +dgc_visium = getExpression(visium_filtered, output = "matrix") +mat_db = getExpression(gobject_db_filtered, output = "matrix") +dgc_db = dbMatrix:::as_matrix(mat_db) + +# ---------------------------------------------------------------------------- # +# Test filterGiotto() equivalence between dbMatrix and dgCMatrix + +test_that("dbMatrix equivalent to dgCMatrix after filterGiotto()", { + expect_equal(dgc_visium, dgc_db) +}) \ No newline at end of file diff --git a/tests/testthat/test-dbMatrix_libNorm.R b/tests/testthat/test-dbMatrix_libNorm.R new file mode 100644 index 000000000..f37d27037 --- /dev/null +++ b/tests/testthat/test-dbMatrix_libNorm.R @@ -0,0 +1,62 @@ +# silence deprecated internal functions +rlang::local_options(lifecycle_verbosity = "quiet") + +# ---------------------------------------------------------------------------- # +# Setup data +visium = GiottoData::loadGiottoMini(dataset = "visium") +dgc = getExpression(visium, output = "matrix") + +dbsm = dbMatrix::createDBMatrix(value = dgc, + db_path = ":temp:", + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create exprObj with dbsm +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create giotto object +gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) + +# ---------------------------------------------------------------------------- # +# Perform filtering +visium_filtered = filterGiotto(visium, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# ---------------------------------------------------------------------------- # +# Perform library normalization and scaling +visium_filtered = normalizeGiotto(gobject = visium_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = TRUE, + log_norm = FALSE, + scale_feats = FALSE, + scale_cells = FALSE) + + +gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = TRUE, + log_norm = FALSE, + scale_feats = FALSE, + scale_cells = FALSE) +# Get normalized matrix +dgc_visium = getExpression(visium_filtered, output = "matrix", values = "normalized") +mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "normalized") +dgc_db = dbMatrix:::as_matrix(mat_db) + +# ---------------------------------------------------------------------------- # +# Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix +test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(library_size_norm = TRUE)", { + expect_equal(dgc_visium, dgc_db) +}) \ No newline at end of file diff --git a/tests/testthat/test-dbMatrix_logNorm.R b/tests/testthat/test-dbMatrix_logNorm.R new file mode 100644 index 000000000..cd813289a --- /dev/null +++ b/tests/testthat/test-dbMatrix_logNorm.R @@ -0,0 +1,62 @@ +# silence deprecated internal functions +rlang::local_options(lifecycle_verbosity = "quiet") + +# ---------------------------------------------------------------------------- # +# Setup data +visium = GiottoData::loadGiottoMini(dataset = "visium") +dgc = getExpression(visium, output = "matrix") + +dbsm = dbMatrix::createDBMatrix(value = dgc, + db_path = ":temp:", + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create exprObj with dbsm +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create giotto object +gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) + +# ---------------------------------------------------------------------------- # +# Perform filtering +visium_filtered = filterGiotto(visium, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# ---------------------------------------------------------------------------- # +# Perform library normalization and scaling +visium_filtered = normalizeGiotto(gobject = visium_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = TRUE, + scale_feats = FALSE, + scale_cells = FALSE) + + +gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = TRUE, + scale_feats = FALSE, + scale_cells = FALSE) +# Get normalized matrix +dgc_visium = getExpression(visium_filtered, output = "matrix", values = "normalized") +mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "normalized") +dgc_db = dbMatrix:::as_matrix(mat_db) + +# ---------------------------------------------------------------------------- # +# Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix +test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(log_norm=TRUE)", { + expect_equal(dgc_visium, dgc_db) +}) \ No newline at end of file diff --git a/tests/testthat/test-dbMatrix_scale.R b/tests/testthat/test-dbMatrix_scale.R new file mode 100644 index 000000000..63227dd80 --- /dev/null +++ b/tests/testthat/test-dbMatrix_scale.R @@ -0,0 +1,62 @@ +# silence deprecated internal functions +rlang::local_options(lifecycle_verbosity = "quiet") + +# ---------------------------------------------------------------------------- # +# Setup data +visium = GiottoData::loadGiottoMini(dataset = "visium") +dgc = getExpression(visium, output = "matrix") + +dbsm = dbMatrix::createDBMatrix(value = dgc, + db_path = ":temp:", + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create exprObj with dbsm +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create giotto object +gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) + +# ---------------------------------------------------------------------------- # +# Perform filtering +visium_filtered = filterGiotto(visium, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# ---------------------------------------------------------------------------- # +# Perform library normalization and scaling +visium_filtered = normalizeGiotto(gobject = visium_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = FALSE, + scale_feats = TRUE, + scale_cells = TRUE) + + +gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = FALSE, + scale_feats = TRUE, + scale_cells = TRUE) +# Get normalized matrix +dgc_visium = getExpression(visium_filtered, output = "matrix", values = "scaled") |> as.matrix() +mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "scaled") +dgc_db = dbMatrix:::as_matrix(mat_db) + +# ---------------------------------------------------------------------------- # +# Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix +test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(scale_feats=T,scale=cells=T)", { + expect_equal(dgc_visium, dgc_db) +}) \ No newline at end of file From d4532b442130cdf795a3fe1e2f855c0ec89b12da Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Wed, 7 Feb 2024 15:55:17 -0500 Subject: [PATCH 002/150] chore: remove :temp: in place of :memory: in tests --- tests/testthat/test-dbMatrix_filterGiotto.R | 4 ++-- tests/testthat/test-dbMatrix_libNorm.R | 2 +- tests/testthat/test-dbMatrix_logNorm.R | 2 +- tests/testthat/test-dbMatrix_scale.R | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-dbMatrix_filterGiotto.R b/tests/testthat/test-dbMatrix_filterGiotto.R index ad18666e9..bd73266e3 100644 --- a/tests/testthat/test-dbMatrix_filterGiotto.R +++ b/tests/testthat/test-dbMatrix_filterGiotto.R @@ -7,7 +7,7 @@ visium = GiottoData::loadGiottoMini(dataset = "visium") dgc = getExpression(visium, output = "matrix") dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":temp:", + db_path = ":memory:", name = 'dgc', class = "dbSparseMatrix", overwrite = TRUE) @@ -40,4 +40,4 @@ dgc_db = dbMatrix:::as_matrix(mat_db) test_that("dbMatrix equivalent to dgCMatrix after filterGiotto()", { expect_equal(dgc_visium, dgc_db) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-dbMatrix_libNorm.R b/tests/testthat/test-dbMatrix_libNorm.R index f37d27037..755757b02 100644 --- a/tests/testthat/test-dbMatrix_libNorm.R +++ b/tests/testthat/test-dbMatrix_libNorm.R @@ -7,7 +7,7 @@ visium = GiottoData::loadGiottoMini(dataset = "visium") dgc = getExpression(visium, output = "matrix") dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":temp:", + db_path = ":memory:", name = 'dgc', class = "dbSparseMatrix", overwrite = TRUE) diff --git a/tests/testthat/test-dbMatrix_logNorm.R b/tests/testthat/test-dbMatrix_logNorm.R index cd813289a..4c76cf586 100644 --- a/tests/testthat/test-dbMatrix_logNorm.R +++ b/tests/testthat/test-dbMatrix_logNorm.R @@ -7,7 +7,7 @@ visium = GiottoData::loadGiottoMini(dataset = "visium") dgc = getExpression(visium, output = "matrix") dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":temp:", + db_path = ":memory:", name = 'dgc', class = "dbSparseMatrix", overwrite = TRUE) diff --git a/tests/testthat/test-dbMatrix_scale.R b/tests/testthat/test-dbMatrix_scale.R index 63227dd80..00c4b44ef 100644 --- a/tests/testthat/test-dbMatrix_scale.R +++ b/tests/testthat/test-dbMatrix_scale.R @@ -7,7 +7,7 @@ visium = GiottoData::loadGiottoMini(dataset = "visium") dgc = getExpression(visium, output = "matrix") dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":temp:", + db_path = ":memory:", name = 'dgc', class = "dbSparseMatrix", overwrite = TRUE) From 74f177aca8e56e9a5f9f821f4e5f0dc3932fd3de Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Wed, 7 Feb 2024 15:55:44 -0500 Subject: [PATCH 003/150] chore: update DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8018669aa..7e9cf7405 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -126,7 +126,8 @@ Suggests: tiff, trendsceek, testthat (>= 3.0.0), - qs + qs, + rmarkdown Remotes: drieslab/GiottoUtils, drieslab/GiottoClass, From 243bc02d69bba927d21fc5c1977d513adcfcef56 Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Wed, 7 Feb 2024 15:55:55 -0500 Subject: [PATCH 004/150] chore: update gitignore --- vignettes/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 vignettes/.gitignore diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 000000000..097b24163 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R From 3dbac5c24129cdc51dc50edc839e5362fb2562c5 Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Wed, 7 Feb 2024 15:56:24 -0500 Subject: [PATCH 005/150] feat: add dbMatrix vignette (WIP) --- TODO: - instructions on dbMatrix installation - subsequent steps in Giotto workflow --- vignettes/dbMatrix.Rmd | 86 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 vignettes/dbMatrix.Rmd diff --git a/vignettes/dbMatrix.Rmd b/vignettes/dbMatrix.Rmd new file mode 100644 index 000000000..55f4ba0c2 --- /dev/null +++ b/vignettes/dbMatrix.Rmd @@ -0,0 +1,86 @@ +--- +title: "Using dbMatrix with Giotto" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using dbMatrix with Giotto} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Introduction +This vignette demonstrates how to use a [`dbMatrix`](https://github.com/drieslab/dbMatrix) within a Giotto Object. The `dbMatrix` is a database-backed matrix that can be used to store large matrices in a database. This allows for efficient storage and retrieval of large matrices and enables efficiently working with larger-than-memory cell count matrices. + +# 1. Set up Giotto + +```{r, eval=FALSE} +# Ensure Giotto Suite is installed. +if(!"Giotto" %in% installed.packages()) { + devtools::install_github("drieslab/Giotto@suite") +} + +# Ensure GiottoData, a small, helper module for tutorials, is installed. +if(!"GiottoData" %in% installed.packages()) { + devtools::install_github("drieslab/GiottoData") +} + +library(Giotto) +library(GiottoData) + +# Ensure the Python environment for Giotto has been installed. +genv_exists = checkGiottoEnvironment() +if(!genv_exists){ + # The following command need only be run once to install the Giotto environment. + installGiottoEnvironment() +} +``` + +# 2. Create Giotto object with `dbMatrix` + +```{r} +# Get test dataset from Giotto Data package +visium = GiottoData::loadGiottoMini(dataset = "visium") + +# Extract the cell expression matrix as a test dataset +dgc = getExpression(visium, output = "matrix") + +# Create a dbSparseMatrix using the dbMatrix constructor function +dbsm = dbMatrix::createDBMatrix(value = dgc, + db_path = ":memory:", + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) + +# Create Giotto exprObj with the dbMatrix +expObj_db = createExprObj(expression_data = dbsm, + expression_matrix_class = 'dbSparseMatrix', + name = 'raw') + +# Create the Giotto object consisting of only the cell count matrix +gobject_db = createGiottoObject(expression = expObj_db) +``` + +# 3. Preprocess Giotto object with `dbMatrix` +```{r} +# Perform filtering +gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", + feat_type = "rna", + expression_values = "raw") + +# Perform library normalization and scaling +gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, + spat_unit = 'cell', + feat_type = 'rna', + expression_values = 'raw', + library_size_norm = FALSE, + log_norm = FALSE, + scale_feats = TRUE, + scale_cells = TRUE) +``` + From accac3e1a0b2a365e6879e5b53cd0ce4a01343db Mon Sep 17 00:00:00 2001 From: Ed Ruiz Date: Wed, 6 Mar 2024 15:47:01 -0500 Subject: [PATCH 006/150] feat: .compute_dbMatrix() internal function added to standard rna normalization workflow --- TODO: - update custom normalization workflows. - add proper setters from dbMatrix --- R/auxiliary_giotto.R | 73 +++++++++++++++++++++++++++++++++++++----- vignettes/dbMatrix.Rmd | 9 +++++- 2 files changed, 73 insertions(+), 9 deletions(-) diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 2d83d5df2..a376ff450 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -39,12 +39,12 @@ #' @title Log normalize expression matrix #' @keywords internal .log_norm_giotto = function(mymatrix, base, offset) { - + if(methods::is(mymatrix, 'DelayedArray')) { mymatrix = log(mymatrix + offset)/log(base) # } else if(methods::is(mymatrix, 'DelayedMatrix')) { # mymatrix = log(mymatrix + offset)/log(base) - } else if(methods::is(mymatrix, 'dgCMatrix')) { + } else if(methods::is(mymatrix, 'dgCMatrix')) { mymatrix@x = log(mymatrix@x + offset)/log(base)# replace with sparseMatrixStats } else if(methods::is(mymatrix, 'Matrix')) { mymatrix@x = log(mymatrix@x + offset)/log(base) @@ -611,6 +611,46 @@ filterGiotto = function(gobject, ### normalization #### +#' @title compute_dbMatrix +#' @description saves dbMatrix to db if global option is set +#' @details +#' Set \code{options(giotto.dbmatrix_compute = FALSE)} if saving dbMatrix +#' after each step of normalization workflow is not desired. +#' @keywords internal +.compute_dbMatrix <- function(dbMatrix, name, verbose = TRUE) { + # input validation + if(!inherits(dbMatrix, 'dbMatrix')) { + stop('dbMatrix must be of class dbMatrix') + } + + if(!is.character(name)) { + stop('name must be a character') + } + + # TODO: update with dbData generic + con = dbMatrix:::get_con(dbMatrix) + + # overwrite table by default + if(name %in% DBI::dbListTables(con)) { + DBI::dbRemoveTable(con, name) + } + + if(verbose){ + msg <- glue::glue('Computing {name} expression matrix on disk...') + cat(msg) + } + + dbMatrix[] |> + dplyr::compute(temporary=F, name = name) + + # TODO: update below with proper setters from dbMatrix + dbMatrix[] <- dplyr::tbl(con, name) # reassign to computed mat + dbMatrix@name <- name + + if(verbose) cat('done \n') + + return(dbMatrix) +} #' @title RNA standard normalization #' @name .rna_standard_normalization @@ -629,7 +669,6 @@ filterGiotto = function(gobject, scale_cells = TRUE, scale_order = c('first_feats', 'first_cells'), verbose = TRUE) { - # check feature type compatibility if(!feat_type %in% c('rna', 'RNA')) { warning('Caution: Standard normalization was developed for RNA data \n') @@ -644,13 +683,18 @@ filterGiotto = function(gobject, feat_names = rownames(raw_expr[]) col_names = colnames(raw_expr[]) - - + # set global option options(giotto.dbmatrix_compute = FALSE) if not desired + # see ?dplyr::compute() for more details + if(inherits(raw_expr[], "dbMatrix")){ + compute_mat <- getOption("giotto.dbmatrix_compute", TRUE) + } else { + compute_mat <- FALSE + } ## 1. library size normalize if(library_size_norm == TRUE) { norm_expr = .lib_norm_giotto(mymatrix = raw_expr[], - scalefactor = scalefactor) + scalefactor = scalefactor) } else { norm_expr = raw_expr[] } @@ -658,8 +702,8 @@ filterGiotto = function(gobject, ## 2. lognormalize if(log_norm == TRUE) { norm_expr = .log_norm_giotto(mymatrix = norm_expr, - base = logbase, - offset = log_offset) + base = logbase, + offset = log_offset) } ## 3. scale @@ -721,12 +765,25 @@ filterGiotto = function(gobject, } ## 5. create and set exprObj + # Save dbMatrix to db if global option is set + if(compute_mat){ + norm_expr <- .compute_dbMatrix(dbMatrix = norm_expr, + name = 'normalized', + verbose = verbose) + } + norm_expr = create_expr_obj(name = 'normalized', exprMat = norm_expr, spat_unit = spat_unit, feat_type = feat_type, provenance = provenance, misc = NULL) + + if(compute_mat){ + norm_scaled_expr = .compute_dbMatrix(dbMatrix = norm_scaled_expr, + name = 'scaled', + verbose = verbose) + } norm_scaled_expr = create_expr_obj(name = 'scaled', exprMat = norm_scaled_expr, diff --git a/vignettes/dbMatrix.Rmd b/vignettes/dbMatrix.Rmd index 55f4ba0c2..3b42a2d46 100644 --- a/vignettes/dbMatrix.Rmd +++ b/vignettes/dbMatrix.Rmd @@ -50,9 +50,12 @@ visium = GiottoData::loadGiottoMini(dataset = "visium") # Extract the cell expression matrix as a test dataset dgc = getExpression(visium, output = "matrix") +# Create a DBI connection object +con = DBI::dbConnect(duckb::duckdb(), ":memory:") + # Create a dbSparseMatrix using the dbMatrix constructor function dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":memory:", + con = con, name = 'dgc', class = "dbSparseMatrix", overwrite = TRUE) @@ -84,3 +87,7 @@ gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, scale_cells = TRUE) ``` + +```{r} +sessionInfo() +``` \ No newline at end of file From 4a043e4f73dd8d6489ed45e053e9a954c06060a6 Mon Sep 17 00:00:00 2001 From: Ed Ruiz Date: Tue, 19 Mar 2024 09:00:35 -0400 Subject: [PATCH 007/150] chore: update unit tests with new dbMatrix constructor --- tests/testthat/test-dbMatrix_filterGiotto.R | 4 +++- tests/testthat/test-dbMatrix_libNorm.R | 4 +++- tests/testthat/test-dbMatrix_logNorm.R | 4 +++- tests/testthat/test-dbMatrix_scale.R | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-dbMatrix_filterGiotto.R b/tests/testthat/test-dbMatrix_filterGiotto.R index bd73266e3..2d484d9c7 100644 --- a/tests/testthat/test-dbMatrix_filterGiotto.R +++ b/tests/testthat/test-dbMatrix_filterGiotto.R @@ -6,8 +6,10 @@ rlang::local_options(lifecycle_verbosity = "quiet") visium = GiottoData::loadGiottoMini(dataset = "visium") dgc = getExpression(visium, output = "matrix") +con = DBI::dbConnect(duckdb::duckdb(), ":memory:") + dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":memory:", + con = con, name = 'dgc', class = "dbSparseMatrix", overwrite = TRUE) diff --git a/tests/testthat/test-dbMatrix_libNorm.R b/tests/testthat/test-dbMatrix_libNorm.R index 755757b02..9d9af3201 100644 --- a/tests/testthat/test-dbMatrix_libNorm.R +++ b/tests/testthat/test-dbMatrix_libNorm.R @@ -6,8 +6,10 @@ rlang::local_options(lifecycle_verbosity = "quiet") visium = GiottoData::loadGiottoMini(dataset = "visium") dgc = getExpression(visium, output = "matrix") +con = DBI::dbConnect(duckdb::duckdb(), ":memory:") + dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":memory:", + con = con, name = 'dgc', class = "dbSparseMatrix", overwrite = TRUE) diff --git a/tests/testthat/test-dbMatrix_logNorm.R b/tests/testthat/test-dbMatrix_logNorm.R index 4c76cf586..c02ba8cc7 100644 --- a/tests/testthat/test-dbMatrix_logNorm.R +++ b/tests/testthat/test-dbMatrix_logNorm.R @@ -6,8 +6,10 @@ rlang::local_options(lifecycle_verbosity = "quiet") visium = GiottoData::loadGiottoMini(dataset = "visium") dgc = getExpression(visium, output = "matrix") +con = DBI::dbConnect(duckdb::duckdb(), ":memory:") + dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":memory:", + con = con, name = 'dgc', class = "dbSparseMatrix", overwrite = TRUE) diff --git a/tests/testthat/test-dbMatrix_scale.R b/tests/testthat/test-dbMatrix_scale.R index 00c4b44ef..554816a79 100644 --- a/tests/testthat/test-dbMatrix_scale.R +++ b/tests/testthat/test-dbMatrix_scale.R @@ -6,8 +6,10 @@ rlang::local_options(lifecycle_verbosity = "quiet") visium = GiottoData::loadGiottoMini(dataset = "visium") dgc = getExpression(visium, output = "matrix") +con = DBI::dbConnect(duckdb::duckdb(), ":memory:") + dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":memory:", + con = con, name = 'dgc', class = "dbSparseMatrix", overwrite = TRUE) From aefb2fe710fcfde993074e823c03078a6ce58c8d Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Sat, 23 Mar 2024 12:43:53 -0700 Subject: [PATCH 008/150] fix: catch null matrices in normalization --- R/auxiliary_giotto.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 70f946a5b..0bbfabbe2 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -766,7 +766,7 @@ filterGiotto = function(gobject, ## 5. create and set exprObj # Save dbMatrix to db if global option is set - if(compute_mat){ + if(compute_mat & !is.null(norm_expr)){ norm_expr <- .compute_dbMatrix(dbMatrix = norm_expr, name = 'normalized', verbose = verbose) @@ -779,7 +779,7 @@ filterGiotto = function(gobject, provenance = provenance, misc = NULL) - if(compute_mat){ + if(compute_mat & !is.null(norm_scaled_expr)){ norm_scaled_expr = .compute_dbMatrix(dbMatrix = norm_scaled_expr, name = 'scaled', verbose = verbose) From 5fe55ef17a324638162ee5d0269b94bf04d5e503 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:14:48 -0400 Subject: [PATCH 009/150] feat: add cosmx reader proto Modular reader class implementation for CosMx outputs --- R/classes.R | 301 ++++++++++++++++++++++++++++++++++++++++++++++++ R/convenience.R | 256 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 557 insertions(+) create mode 100644 R/classes.R diff --git a/R/classes.R b/R/classes.R new file mode 100644 index 000000000..5870124fb --- /dev/null +++ b/R/classes.R @@ -0,0 +1,301 @@ + + +setClass( + "cosmx_reader", + slots = list( + cosmx_dir = "character", + fovs = "numeric", + offsets = "data.frame", + calls = "list" + ), + prototype = list( + calls = list() + ) +) + +cosmxReader <- function(cosmx_dir = NULL, fovs = NULL) { + # get params + a <- list(Class = "cosmx_reader") + if (!is.null(cosmx_dir)) { + a$cosmx_dir <- cosmx_dir + } + if (!is.null(fovs)) { + a$fovs <- fovs + } + + do.call(new, args = a) +} + +setMethod("initialize", signature("cosmx_reader"), function(.Object, cosmx_dir, fovs) { + + if (!missing(cosmx_dir)) { + checkmate::assert_directory_exists(cosmx_dir) + .Object@cosmx_dir <- cosmx_dir + } + if (!missing(fovs)) { + checkmate::assert_numeric(fovs) + .Object@fovs <- fovs + } + + if (length(.Object@cosmx_dir) == 0) { + return(.Object) # return early if no path given + } + + p <- .Object@cosmx_dir + .detect_in_dir <- function(pattern) { + list.files(p, pattern = pattern, full.names = TRUE) + }[[1L]] + + # detect paths and dirs + pos_path <- .detect_in_dir("fov_positions_file") + meta_path <- .detect_in_dir("metadata_file") + tx_path <- .detect_in_dir("tx_file") + mask_dir <- .detect_in_dir("CellLabels") + expr_path <- .detect_in_dir("exprMat_file") + composite_img_path <- .detect_in_dir("CellComposite") + overlay_img_path <- .detect_in_dir("CellOverlay") + compart_img_path <- .detect_in_dir("CompartmentLabels") + + + # load fov offsets through one of several methods if not already existing + if (nrow(.Object@offsets) == 0L) { + if (!is.null(pos_path)) { + pos <- data.table::fread(pos_path) + data.table::setnames(pos, new = c("fov", "x", "y")) + } + else if (!is.null(meta_path)) { + pos <- .cosmx_infer_fov_shifts( + meta_dt = data.table::fread(meta_path), + flip_loc_y = FALSE + ) + } else if (!is.null(tx_path)) { + pos <- .cosmx_infer_fov_shifts( + tx_dt = data.table::fread(tx_path), + flip_loc_y = TRUE + ) + } + else { + pos <- data.table::data.table() + warning(wrap_txt( + "fov_positions_file, tx_file, and metadata_file not auto detected. + One of these must be provided to infer FOV shifts" + )) + } + .Object@offsets <- pos + } + + + + # transcripts load call + tx_fun <- function( + path = tx_path, + gpoints_params = list( + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb") + ), + verbose = NULL + ) { + .cosmx_transcript( + path = path, + fovs = .Object@fovs %none% NULL, + gpoints_params = gpoints_params, + cores = determine_cores(), + verbose = verbose + ) + } + .Object@calls$load_transcripts <- tx_fun + + + + # mask load call + mask_fun <- function( + path = mask_dir, + mask_params = list( + # VERTICAL FLIP + NO VERTICAL SHIFT + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + ID_fmt = NULL + ), + verbose = NULL + ) { + .cosmx_poly( + path = path, + fovs = .Object@fovs %none% NULL, + mask_params = mask_params, + offsets = .Object@offsets, + verbose = verbose + ) + } + .Object@calls$load_polys <- mask_fun + + + # expression load call + expr_fun <- function( + path = expr_path, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb") + ) { + .cosmx_expression( + path = path, + fovs = .Object@fovs %none% NULL, + feat_type = feat_type, + split_keyword = split_keyword + ) + } + .Object@calls$load_expression <- expr_fun + + + # images load call + img_fun <- function( + path = composite_img_path, + img_name_fmt = "composite_fov%03d", + negative_y = FALSE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL + ) { + .cosmx_image( + path = path, + fovs = .Object@fovs %none% NULL, + img_name_fmt = img_name_fmt, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + offsets = .Object@offsets, + verbose = verbose + ) + } + .Object@calls$load_images <- img_fun + + + # meta load call + meta_fun <- function( + path = meta_path, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px" + ), + verbose = NULL + ) { + .cosmx_cellmeta( + path = path, + fovs = .Object@fovs %none% NULL, + dropcols = dropcols, + cores = determine_cores(), + verbose = verbose + ) + } + .Object@calls$load_cellmeta <- meta_fun + + # build gobject call + gobject_fun <- function( + load_images = list( + composite = "composite", + overlay = "overlay" + ), + load_expression = FALSE, + load_cellmeta = FALSE + ) { + if (!is.null(load_images)) { + checkmate::assert_list(load_images) + if (is.null(names(load_images))) { + stop("Images directories provided to 'load_images' must be named") + } + } + g <- giotto() + + tx_list <- .Object@calls$load_transcripts() + polys <- .Object@calls$load_polys() + + if (!is.null(load_images)) { + # convenient shortnames + load_images[load_images == "composite"] <- composite_img_path + load_images[load_images == "overlay"] <- overlay_img_path + + imglist <- list() + dirnames <- names(load_images) + for (imdir_i in seq_along(load_images)) { + dir_imgs <- .Object@calls$load_images( + path = load_images[[imdir_i]], + img_name_fmt = paste0(dirnames, "_fov%03d") + ) + imglist <- c(imglist, dir_imgs) + } + } + + g <- setGiotto(g, gpoly) + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + g@largeImages <- imglist + + # TODO expression & meta + # Will need to check that names agree for poly/expr/meta + + return(g) + } + .Object@calls$create_gobject <- gobject_fun + + return(.Object) +}) + +#' @export +setMethod("$", signature("cosmx_reader"), function(x, name) { + basic_info <- c("offsets", "fovs", "cosmx_dir") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("cosmx_reader"), function(x, name, value) { + basic_info <- c("offsets", "fovs", "cosmx_dir") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.cosmx_reader` <- function(x, pattern) { + basic_info <- c("offsets", "fovs", "cosmx_dir") + c(basic_info, paste0(names(methods::slot(x, "calls")), "()")) +} + +setMethod("show", signature("cosmx_reader"), function(object) { + cat(sprintf("Giotto <%s>\n", "cosmx_reader")) + pre <- sprintf("%s :", format(c("dir", "fovs", "offsets", "funs"))) + d <- object@cosmx_dir + nch <- nchar(d) + if (nch > 60) { + d1 <- substring(d, first = 0L, last = 10L) + d2 <- substring(d, first = nch - 40, last = nch) + d <- paste0(d1, "[...]", d2) + } + cat(pre[1], d, "\n") + fovs <- object@fovs %none% "all" + cat(pre[2], paste(fovs, collapse = ", "), "\n") + offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") + cat(pre[3], offs_status, "\n") + + nfun <- length(object@calls) + funs <- names(object@calls) + if (nfun > 0L) { + pre_funs <- format(c(pre[4], rep("", nfun - 1L))) + for (i in seq_len(nfun)) { + cat(pre_funs[i], " ", funs[i], "()\n", sep = "") + } + } +}) + +setMethod("print", signature("cosmx_reader"), function(x, ...) show(x)) + + + diff --git a/R/convenience.R b/R/convenience.R index ed0925a9d..e3fda86a2 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2312,6 +2312,262 @@ NULL ## CosMx #### + +#' @param gpoints_params list of params passed to `createGiottoPoints()`. +#' Mainly to allow access to `feat_type` and `split_keyword` params. Default +#' is to split into rna and negprobes points objects +.cosmx_transcript <- function( + path, + fovs = NULL, + gpoints_params = list( + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb") + ), + cores = determine_cores(), + verbose = NULL + ) { + checkmate::assert_list(gpoints_params) + checkmate::assert_file_exists(path) + + GiottoUtils::vmsg(.v = verbose, "loading feature detections...") + + tx <- data.table::fread(input = path, nThread = cores) + if (!is.null(fovs)) { + # subset to only needed FOVs + tx <- tx[fov %in% as.numeric(fovs),] + } + + # giottoPoints ----------------------------------------------------- # + + # static gpoints params + gpoints_params$x_colname <- "x_global_px" + gpoints_params$y_colname <- "y_global_px" + gpoints_params$feat_ID_colname <- "target" + + gpoints <- do.call(createGiottoPoints, c(list(x = tx), gpoints_params)) + # ensure output is always a list + if (!is.list(gpoints)) { + gpoints <- list(gpoints) + names(gpoints) <- objName(gpoints[[1L]]) + } + + return(gpoints) +} + +#' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), +#' yshift (numeric) +.cosmx_infer_fov_shifts <- function(tx_dt, meta_dt, flip_loc_y = NULL) { + fov <- NULL # NSE vars + + if (!missing(tx_dt)) { + flip_loc_y %null% TRUE # default = TRUE + tx_head <- tx_dt[, head(.SD, 10L), by = fov] + x <- tx_head[, mean(x_global_px - x_local_px), by = fov] + if (flip_loc_y) { + # use +y if local y values are flipped + y <- tx_head[, mean(y_global_px + y_local_px), by = fov] + } else { + y <- tx_head[, mean(y_global_px - y_local_px), by = fov] + } + } + + if (!missing(meta_dt)) { + flip_loc_y %null% FALSE # default = FALSE + meta_head <- meta_dt[, head(.SD, 10L), by = fov] + x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] + if (flip_loc_y) { + # use +y if local y values are flipped + y <- meta_head[, mean(CenterY_global_px + CenterY_local_px), + by = fov] + } else { + y <- meta_head[, mean(CenterY_global_px - CenterY_local_px), + by = fov] + } + } + + res <- merge(x, y, by = "fov") + data.table::setnames(res, new = c("fov", "x", "y")) + + return(res) +} + +.cosmx_poly <- function( + path, + fovs = NULL, + mask_params = list( + # VERTICAL FLIP + NO VERTICAL SHIFT + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + ID_fmt = NULL + ), + offsets, + verbose = NULL +) { + fovs <- fovs %null% seq_along(list.files(path)) + gpolys <- lapply(fovs, function(fov) { + segfile <- Sys.glob(paths = sprintf("%s/*%03d*", path, fov)) + if (is.null(mask_params$ID_fmt)) { + mask_params$ID_fmt = paste0(sprintf("fov%03d", fov), "_cell%03d") + } + mask_params$verbose <- verbose %null% TRUE + gpoly <- do.call( + createGiottoPolygonsFromMask, + args = c(list(maskfile = segfile), mask_params) + ) + + gpoly_shift <- spatShift( + x = gpoly, + dx = offsets[fov, x], + dy = offsets[fov, y] + ) + }) + + if (length(gpolys) > 1L) { + gpolys <- do.call(rbind, args = gpolys) + } + + # never return lists. Only the single merged gpoly + return(gpolys) +} + +.cosmx_cellmeta <- function( + path, + fovs = NULL, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px" + ), + cores = determine_cores(), + verbose = NULL + ) { + verbose <- verbose %null% TRUE + + meta_dt <- data.table::fread(input = path, nThread = cores) + + # subset to needed fovs + if (!is.null(fovs)) { + fovs <- as.integer(fovs) + meta_dt <- meta_dt[fov %in% fovs,] + } + + dropcols <- dropcols[dropcols %in% meta_dt] + meta_dt[, (dropcols) := NULL] # remove dropcols + + # create cell ID as fov###_cell### + meta_dt[, cell_ID := sprintf("fov%03d_cell%03d", fov, cell_ID)] + # remove fov + meta_dt[, fov := NULL] + + # TODO figure out what to do about protein expression here. + cx <- createCellMetaObj( + metadata = meta_dt, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + return(cx) +} + +.cosmx_expression <- function( + path, + fovs = NULL, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + cores = determine_cores() + ) { + expr_dt <- data.table::fread(input = path, nThread = cores) + + # subset to needed fovs + if (!is.null(fovs)) { + fovs <- as.integer(fovs) + expr_dt <- expr_dt[fov %in% fovs,] + } + + # remove background values (cell 0) + expr_dt <- expr_dt[cell_ID != 0L,] + + # create cell ID as fov###_cell### + expr_dt[, cell_ID := sprintf("fov%03d_cell%03d", fov, cell_ID)] + # remove fov + expr_dt[, fov := NULL] + + # convert to Matrix + expr_mat <- dt_to_matrix(expr_dt) + expr_mat <- t_flex(expr_mat) + feat_ids <- rownames(expr_mat) + + # split expression for rna / negprb if any split keywords provided. + # Output of this chunk should always be a named list of 1 or more matrices + if (length(split_keyword) > 0) { + expr_list <- list() + for (key_i in seq_along(split_keyword)) { + bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) + # subset and store split matrix + sub_mat <- expr_mat[bool,] + expr_list[[feat_type[[key_i + 1L]]]] <- sub_mat + # remaining matrix + expr_mat <- expr_mat[!bool,] + } + expr_list[[feat_type[[1L]]]] <- expr_mat + } else { + expr_list <- list(expr_mat) + names(expr_list) <- feat_type[[1L]] + } + + expr_list <- lapply(seq_along(expr_list), function(expr_i) { + createExprObj(expression_data = expr_list[[expr_i]], + spat_unit = "cell", + feat_type = names(expr_list)[[expr_i]], + name = "raw", + provenance = "cell") + }) + + return(expr_list) +} + +.cosmx_image <- function( + path, + fovs = NULL, + img_name_fmt = "fov%03d", + negative_y = FALSE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + offsets, + verbose = NULL + ) { + fovs <- fovs %null% seq_along(list.files(path)) + verbose <- verbose %null% TRUE + + gimg_list <- lapply(fovs, function(fov) { + imgfile <- Sys.glob(paths = sprintf("%s/*%03d*", path, fov)) + img_name <- sprintf(img_name_fmt, fov) + + gimg <- createGiottoLargeImage( + raster_object = imgfile, + name = img_name, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + verbose = verbose + ) + + spatShift( + x = gimg, + dx = offsets[fov, x], + dy = offsets[fov, y] + ) + }) + + return(gimg_list) +} + + + #' @title Load CosMx folder subcellular info #' @name .load_cosmx_folder_subcellular #' @description loads in the feature detections information. Note that the mask From 79e858d9f6d58f8e8261da953018f878a1617314 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:34:44 -0400 Subject: [PATCH 010/150] fix: namespace issue - Fix stats::density and GiottoClass::density naming overlap - document --- DESCRIPTION | 2 + NAMESPACE | 10 ++++- R/package_imports.R | 8 ++-- R/suite_reexports.R | 4 ++ man/interpolateFeature.Rd | 95 +++++++++++++++++++++++++++++++++++++++ man/reexports.Rd | 4 +- 6 files changed, 117 insertions(+), 6 deletions(-) create mode 100644 man/interpolateFeature.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ad62d7b3b..33ca51146 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -134,6 +134,7 @@ Remotes: Collate: 'auxiliary_giotto.R' 'cell_segmentation.R' + 'classes.R' 'clustering.R' 'convenience.R' 'cross_section.R' @@ -148,6 +149,7 @@ Collate: 'suite_reexports.R' 'image_registration.R' 'interactivity.R' + 'kriging.R' 'package_imports.R' 'poly_influence.R' 'python_hmrf.R' diff --git a/NAMESPACE b/NAMESPACE index 8ed41889e..ad0a2697b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(.DollarNames,cosmx_reader) export("%>%") export("activeFeatType<-") export("activeSpatUnit<-") @@ -149,6 +150,7 @@ export(crossSectionGenePlot) export(crossSectionGenePlot3D) export(crossSectionPlot) export(crossSectionPlot3D) +export(density) export(detectSpatialCorFeats) export(detectSpatialCorFeatsMatrix) export(detectSpatialCorGenes) @@ -255,6 +257,7 @@ export(giottoToSpatialExperiment) export(heatmSpatialCorFeats) export(heatmSpatialCorGenes) export(hexVertices) +export(hist) export(hyperGeometricEnrich) export(initHMRF_V2) export(insertCrossSectionGenePlot3D) @@ -484,12 +487,15 @@ export(violinPlot) export(wrap) export(writeGiottoLargeImage) export(writeHMRFresults) +exportMethods("$") +exportMethods("$<-") +exportMethods(interpolateFeature) import(GiottoClass) import(GiottoUtils) import(GiottoVisuals) import(ggplot2) import(methods) -import(stats) +import(stats, except = density) import(utils) importClassesFrom(data.table,data.table) importFrom(GiottoClass,"activeFeatType<-") @@ -581,6 +587,7 @@ importFrom(GiottoClass,createSpatialNetwork) importFrom(GiottoClass,createSpatialWeightMatrix) importFrom(GiottoClass,crop) importFrom(GiottoClass,cropGiottoLargeImage) +importFrom(GiottoClass,density) importFrom(GiottoClass,distGiottoImage) importFrom(GiottoClass,estimateImageBg) importFrom(GiottoClass,ext) @@ -615,6 +622,7 @@ importFrom(GiottoClass,giottoToSeuratV4) importFrom(GiottoClass,giottoToSeuratV5) importFrom(GiottoClass,giottoToSpatialExperiment) importFrom(GiottoClass,hexVertices) +importFrom(GiottoClass,hist) importFrom(GiottoClass,installGiottoEnvironment) importFrom(GiottoClass,instructions) importFrom(GiottoClass,joinGiottoObjects) diff --git a/R/package_imports.R b/R/package_imports.R index 3aaaecefd..c9f96dd1e 100644 --- a/R/package_imports.R +++ b/R/package_imports.R @@ -1,10 +1,10 @@ -#' @import GiottoUtils -#' @import GiottoClass -#' @import GiottoVisuals #' @import methods #' @import utils -#' @import stats +#' @rawNamespace import(stats, except = density) #' @import ggplot2 +#' @import GiottoUtils +#' @import GiottoClass +#' @import GiottoVisuals #' @importClassesFrom data.table data.table #' @importFrom data.table setnames setorder setDT #' @importFrom data.table data.table diff --git a/R/suite_reexports.R b/R/suite_reexports.R index d84a72632..a7d286608 100644 --- a/R/suite_reexports.R +++ b/R/suite_reexports.R @@ -69,6 +69,8 @@ GiottoClass::copy #' @export GiottoClass::crop #' @export +GiottoClass::density +#' @export GiottoClass::flip #' @export GiottoClass::spin @@ -295,6 +297,8 @@ GiottoClass::giottoToSpatialExperiment #' @export GiottoClass::hexVertices #' @export +GiottoClass::hist +#' @export GiottoClass::installGiottoEnvironment #' @export GiottoClass::joinGiottoObjects diff --git a/man/interpolateFeature.Rd b/man/interpolateFeature.Rd new file mode 100644 index 000000000..325ceb5fd --- /dev/null +++ b/man/interpolateFeature.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kriging.R +\name{interpolateFeature} +\alias{interpolateFeature} +\alias{interpolateFeature,giotto,missing-method} +\alias{interpolateFeature,spatLocsObj,data.frame-method} +\title{Spatial feature interpolation} +\usage{ +\S4method{interpolateFeature}{giotto,missing}( + x, + spat_unit = NULL, + feat_type = NULL, + feats, + spatvalues_params = list(), + spat_loc_name = "raw", + ext = NULL, + buffer = 50, + name_fmt = "\%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + verbose = NULL, + ... +) + +\S4method{interpolateFeature}{spatLocsObj,data.frame}( + x, + y, + ext = NULL, + buffer = 50, + rastersize = 500, + name_fmt = "\%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + ... +) +} +\arguments{ +\item{x}{object containing coordinates to use interpolation with} + +\item{spat_unit}{(optional) spatial unit to use} + +\item{feat_type}{(optional) feature type to use} + +\item{feats}{character vector. Features to interpolate from the `giotto` +object} + +\item{spatvalues_params}{list. Additional list of parameters to pass to +[spatValues()] to help with data retrieval from `giotto` object} + +\item{spat_loc_name}{character. Name of spatial locations to use. Values to +be interpolated are spatially mapped to these locations by cell_ID.} + +\item{ext}{`SpatExtent`. (optional) extent across which to apply the +interpolation. If not provided, will default to the extent of the spatLocsObj +expanded by the value of `buffer`. It can be helpful to set this as the +extent of any polygons that will be used in aggregation.} + +\item{buffer}{numeric. (optional) default buffer to expand derived extent by +if `ext` is not provided.} + +\item{name_fmt}{character. sprintf fmt to apply to `feats` when naming the +resulting interpolation `giottoLargeImage` objects. Default is no change.} + +\item{savedir}{character. Output directory. Default is a new `interp_rasters` +folder in working directory.} + +\item{overwrite}{logical. Whether raster outputs should be overwritten if +the same `filename` is provided.} + +\item{verbose}{be verbose} + +\item{...}{additional params to pass downstream methods} + +\item{y}{data.frame-like. Values for interpolation. Must also have a +`cell_ID` column and that matches with `x`.} + +\item{rastersize}{numeric. Length of major axis in px of interpolation +raster to create.} + +\item{name}{name of interpolation `giottoLargeImage` to generate} + +\item{filename}{character. Output filename. Default is \[`name`\].tif within +the working directory.} +} +\value{ +`giotto` method returns a `giotto` object with newly made appended +feature interpolation rasters as `giottoLargeImages`\cr +} +\description{ +Spatial feature interpolation +} +\details{ +The data.frame method returns a `giottoLargeImage` linked to an interpolated +raster that is written to disk as GeoTIFF. +} diff --git a/man/reexports.Rd b/man/reexports.Rd index 2cc74eacd..2ce5f54fe 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -34,6 +34,7 @@ \alias{centroids} \alias{copy} \alias{crop} +\alias{density} \alias{flip} \alias{spin} \alias{spatShift} @@ -142,6 +143,7 @@ \alias{giottoToSeuratV5} \alias{giottoToSpatialExperiment} \alias{hexVertices} +\alias{hist} \alias{installGiottoEnvironment} \alias{joinGiottoObjects} \alias{loadGiotto} @@ -286,7 +288,7 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellFeatureInfo}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{installGiottoEnvironment}}, \code{\link[GiottoClass:instructions-generic]{instructions}}, \code{\link[GiottoClass:instructions-generic]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} + \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellFeatureInfo}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{density}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{hist}}, \code{\link[GiottoClass]{installGiottoEnvironment}}, \code{\link[GiottoClass:instructions-generic]{instructions}}, \code{\link[GiottoClass:instructions-generic]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} \item{GiottoUtils}{\code{\link[GiottoUtils:pipe]{\%>\%}}, \code{\link[GiottoUtils]{getDistinctColors}}, \code{\link[GiottoUtils]{getRainbowColors}}} From be50b56a36d8334a97bba5e66a00d61f54109b03 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 21:02:27 -0400 Subject: [PATCH 011/150] chore: improve docs --- NAMESPACE | 3 ++- R/classes.R | 57 ++++++++++++++++++++++++++++++++++++++-------- man/importCosMx.Rd | 49 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 11 deletions(-) create mode 100644 man/importCosMx.Rd diff --git a/NAMESPACE b/NAMESPACE index ad0a2697b..c3fc035b8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,6 @@ # Generated by roxygen2: do not edit by hand -S3method(.DollarNames,cosmx_reader) +S3method(.DollarNames,CosmxReader) export("%>%") export("activeFeatType<-") export("activeSpatUnit<-") @@ -259,6 +259,7 @@ export(heatmSpatialCorGenes) export(hexVertices) export(hist) export(hyperGeometricEnrich) +export(importCosMx) export(initHMRF_V2) export(insertCrossSectionGenePlot3D) export(insertCrossSectionSpatPlot3D) diff --git a/R/classes.R b/R/classes.R index 5870124fb..20bba8e17 100644 --- a/R/classes.R +++ b/R/classes.R @@ -1,7 +1,7 @@ setClass( - "cosmx_reader", + "CosmxReader", slots = list( cosmx_dir = "character", fovs = "numeric", @@ -13,9 +13,46 @@ setClass( ) ) -cosmxReader <- function(cosmx_dir = NULL, fovs = NULL) { +#' @title Import a CosMx Assay +#' @name importCosMx +#' @description +#' Giotto import functionalities for CosMx datasets. This function generates +#' a `CosmxReader` instance that has convenient reader functions for converting +#' individual pieces of CosMx data into Giotto-compatible representations when +#' the params `cosmx_dir` and `fovs` (if only a subset is desired) are provided. +#' A function that creates the full `giotto` object is also available. +#' These functions should have all param values provided as defaults, but +#' can be flexibly modified to do things such as look in alternative +#' directories or paths. +#' @param cosmx_dir CosMx output directory +#' @param fovs numeric. (optional) If provided, will load specific fovs. +#' Otherwise, all FOVs will be loaded +#' @returns CosmxReader object +#' @examples +#' # Create a `CosmxReader` object +#' reader <- importCosMx() +#' +#' \dontrun{ +#' # Set the cosmx_dir and fov parameters +#' reader$cosmx_dir <- "path to cosmx dir" +#' reader$fov <- c(1, 4) +#' +#' # Load polygons, transcripts, and images +#' polys <- reader$load_polys() +#' tx <- reader$load_transcripts() +#' imgs <- reader$load_images() +#' +#' # Create a `giotto` object and add the loaded data +#' g <- giotto() +#' g <- setGiotto(g, tx[["rna"]]) +#' g <- setGiotto(g, polys) +#' g <- addGiottoLargeImage(g, largeImages = imgs) +#' force(g) +#' } +#' @export +importCosMx <- function(cosmx_dir = NULL, fovs = NULL) { # get params - a <- list(Class = "cosmx_reader") + a <- list(Class = "CosmxReader") if (!is.null(cosmx_dir)) { a$cosmx_dir <- cosmx_dir } @@ -26,7 +63,7 @@ cosmxReader <- function(cosmx_dir = NULL, fovs = NULL) { do.call(new, args = a) } -setMethod("initialize", signature("cosmx_reader"), function(.Object, cosmx_dir, fovs) { +setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, fovs) { if (!missing(cosmx_dir)) { checkmate::assert_directory_exists(cosmx_dir) @@ -244,7 +281,7 @@ setMethod("initialize", signature("cosmx_reader"), function(.Object, cosmx_dir, }) #' @export -setMethod("$", signature("cosmx_reader"), function(x, name) { +setMethod("$", signature("CosmxReader"), function(x, name) { basic_info <- c("offsets", "fovs", "cosmx_dir") if (name %in% basic_info) return(methods::slot(x, name)) @@ -252,7 +289,7 @@ setMethod("$", signature("cosmx_reader"), function(x, name) { }) #' @export -setMethod("$<-", signature("cosmx_reader"), function(x, name, value) { +setMethod("$<-", signature("CosmxReader"), function(x, name, value) { basic_info <- c("offsets", "fovs", "cosmx_dir") if (name %in% basic_info) { methods::slot(x, name) <- value @@ -264,13 +301,13 @@ setMethod("$<-", signature("cosmx_reader"), function(x, name, value) { }) #' @export -`.DollarNames.cosmx_reader` <- function(x, pattern) { +`.DollarNames.CosmxReader` <- function(x, pattern) { basic_info <- c("offsets", "fovs", "cosmx_dir") c(basic_info, paste0(names(methods::slot(x, "calls")), "()")) } -setMethod("show", signature("cosmx_reader"), function(object) { - cat(sprintf("Giotto <%s>\n", "cosmx_reader")) +setMethod("show", signature("CosmxReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "CosmxReader")) pre <- sprintf("%s :", format(c("dir", "fovs", "offsets", "funs"))) d <- object@cosmx_dir nch <- nchar(d) @@ -295,7 +332,7 @@ setMethod("show", signature("cosmx_reader"), function(object) { } }) -setMethod("print", signature("cosmx_reader"), function(x, ...) show(x)) +setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) diff --git a/man/importCosMx.Rd b/man/importCosMx.Rd new file mode 100644 index 000000000..239cb04a5 --- /dev/null +++ b/man/importCosMx.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/classes.R +\name{importCosMx} +\alias{importCosMx} +\title{Import a CosMx Assay} +\usage{ +importCosMx(cosmx_dir = NULL, fovs = NULL) +} +\arguments{ +\item{cosmx_dir}{CosMx output directory} + +\item{fovs}{numeric. (optional) If provided, will load specific fovs. +Otherwise, all FOVs will be loaded} +} +\value{ +CosmxReader object +} +\description{ +Giotto import functionalities for CosMx datasets. This function generates +a `CosmxReader` instance that has convenient reader functions for converting +individual pieces of CosMx data into Giotto-compatible representations when +the params `cosmx_dir` and `fovs` (if only a subset is desired) are provided. +A function that creates the full `giotto` object is also available. +These functions should have all param values provided as defaults, but +can be flexibly modified to do things such as look in alternative +directories or paths. +} +\examples{ +# Create a `CosmxReader` object +reader <- importCosMx() + +\dontrun{ +# Set the cosmx_dir and fov parameters +reader$cosmx_dir <- "path to cosmx dir" +reader$fov <- c(1, 4) + +# Load polygons, transcripts, and images +polys <- reader$load_polys() +tx <- reader$load_transcripts() +imgs <- reader$load_images() + +# Create a `giotto` object and add the loaded data +g <- giotto() +g <- setGiotto(g, tx[["rna"]]) +g <- setGiotto(g, polys) +g <- addGiottoLargeImage(g, largeImages = imgs) +force(g) +} +} From 966eebbcaa9c39f01ee589ced6f88e94b178fab9 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 21:18:15 -0400 Subject: [PATCH 012/150] fix: catch empty condition of cosmx_dir --- R/classes.R | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/R/classes.R b/R/classes.R index 20bba8e17..ed6d9dfc3 100644 --- a/R/classes.R +++ b/R/classes.R @@ -309,18 +309,24 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { setMethod("show", signature("CosmxReader"), function(object) { cat(sprintf("Giotto <%s>\n", "CosmxReader")) pre <- sprintf("%s :", format(c("dir", "fovs", "offsets", "funs"))) + d <- object@cosmx_dir - nch <- nchar(d) - if (nch > 60) { - d1 <- substring(d, first = 0L, last = 10L) - d2 <- substring(d, first = nch - 40, last = nch) - d <- paste0(d1, "[...]", d2) + if (length(d) > 0L) { + nch <- nchar(d) + if (nch > 60L) { + d1 <- substring(d, first = 0L, last = 10L) + d2 <- substring(d, first = nch - 40L, last = nch) + d <- paste0(d1, "[...]", d2) + } + cat(pre[1L], d, "\n") + } else { + cat(pre[1L], "\n") } - cat(pre[1], d, "\n") + fovs <- object@fovs %none% "all" - cat(pre[2], paste(fovs, collapse = ", "), "\n") + cat(pre[2L], paste(fovs, collapse = ", "), "\n") offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") - cat(pre[3], offs_status, "\n") + cat(pre[3L], offs_status, "\n") nfun <- length(object@calls) funs <- names(object@calls) From 80f56e993dc606e90319a9bd236d68db065a907f Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 21:39:06 -0400 Subject: [PATCH 013/150] fix: typo --- R/classes.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/R/classes.R b/R/classes.R index ed6d9dfc3..4b61984c8 100644 --- a/R/classes.R +++ b/R/classes.R @@ -245,9 +245,17 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f } g <- giotto() + # transcripts tx_list <- .Object@calls$load_transcripts() + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + + # polys polys <- .Object@calls$load_polys() + g <- setGiotto(g, polys) + # images if (!is.null(load_images)) { # convenient shortnames load_images[load_images == "composite"] <- composite_img_path @@ -262,14 +270,9 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f ) imglist <- c(imglist, dir_imgs) } + g <- addGiottoLargeImage(g, largeImages = imglist) } - g <- setGiotto(g, gpoly) - for (tx in tx_list) { - g <- setGiotto(g, tx) - } - g@largeImages <- imglist - # TODO expression & meta # Will need to check that names agree for poly/expr/meta From e97a712e7de6efb8423aa5d6ce2f7f06bf5665f8 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 23:22:28 -0400 Subject: [PATCH 014/150] fix: reader image appending --- R/classes.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/classes.R b/R/classes.R index 4b61984c8..df165c861 100644 --- a/R/classes.R +++ b/R/classes.R @@ -252,7 +252,7 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f } # polys - polys <- .Object@calls$load_polys() + polys <- .Object@calls$load_polys(verbose = FALSE) g <- setGiotto(g, polys) # images @@ -270,7 +270,9 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f ) imglist <- c(imglist, dir_imgs) } - g <- addGiottoLargeImage(g, largeImages = imglist) + for (img_i in seq_along(imglist)) { + g <- addGiottoLargeImage(g, largeImages = imglist) + } } # TODO expression & meta From 864d96b4bf42806d402d263eec2566a54d1c90cb Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Wed, 27 Mar 2024 00:17:03 -0400 Subject: [PATCH 015/150] fix: typo --- R/classes.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/classes.R b/R/classes.R index df165c861..8d33a27a8 100644 --- a/R/classes.R +++ b/R/classes.R @@ -266,13 +266,11 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f for (imdir_i in seq_along(load_images)) { dir_imgs <- .Object@calls$load_images( path = load_images[[imdir_i]], - img_name_fmt = paste0(dirnames, "_fov%03d") + img_name_fmt = paste0(dirnames[[imdir_i]], "_fov%03d") ) imglist <- c(imglist, dir_imgs) } - for (img_i in seq_along(imglist)) { - g <- addGiottoLargeImage(g, largeImages = imglist) - } + g <- addGiottoLargeImage(g, largeImages = imglist) } # TODO expression & meta From d1249e0fb6bd0f5eea904ab7a311cc083061a376 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 09:27:11 -0400 Subject: [PATCH 016/150] enh: `importCosMx()` updates - add micron scaling - make naming align with nanostring standards - add image name parsing for FOV number - use individual params instead of param lists for clarity - change fov shifts detection to mainly being derived from metadata or transcripts instead of loading the values in since they are usually in micron scaled values. --- R/classes.R | 184 +++++++++++++++++++++++++++++++---------- R/convenience.R | 201 +++++++++++++++++++++++++++++++++++---------- man/importCosMx.Rd | 25 +++++- 3 files changed, 323 insertions(+), 87 deletions(-) diff --git a/R/classes.R b/R/classes.R index 8d33a27a8..d1b94f3dd 100644 --- a/R/classes.R +++ b/R/classes.R @@ -4,16 +4,23 @@ setClass( "CosmxReader", slots = list( cosmx_dir = "character", + slide = "numeric", fovs = "numeric", - offsets = "data.frame", + mm = "logical", + px2mm = "numeric", + offsets = "ANY", calls = "list" ), prototype = list( + slide = 1, + mm = FALSE, + px2mm = 0.12028, # from cosmx output help files + offsets = NULL, calls = list() ) ) -#' @title Import a CosMx Assay +#' @title Import a Nanostring CosMx Assay #' @name importCosMx #' @description #' Giotto import functionalities for CosMx datasets. This function generates @@ -25,8 +32,19 @@ setClass( #' can be flexibly modified to do things such as look in alternative #' directories or paths. #' @param cosmx_dir CosMx output directory +#' @param slide numeric. Slide number. Defaults to 1 #' @param fovs numeric. (optional) If provided, will load specific fovs. #' Otherwise, all FOVs will be loaded +#' @param mm logical. Whether to scale spatial information as millimeters +#' instead of the default pixels +#' @param px2mm numeric. Scalefactor from pixels to mm. Defaults to 0.12028 +#' based on `CosMx-ReadMe.html` info +#' @details +#' Loading functions are generated after the `cosmx_dir` is added. +#' Transcripts, expression, and metadata loading are all expected to be done +#' from the top level of the directory. Loading of polys, and any image sets +#' are expected to be from specific subdirectories containing only those +#' images for the set of FOVs. #' @returns CosmxReader object #' @examples #' # Create a `CosmxReader` object @@ -50,7 +68,9 @@ setClass( #' force(g) #' } #' @export -importCosMx <- function(cosmx_dir = NULL, fovs = NULL) { +importCosMx <- function( + cosmx_dir = NULL, slide = 1, fovs = NULL, mm = FALSE, px2mm = 0.12028 +) { # get params a <- list(Class = "CosmxReader") if (!is.null(cosmx_dir)) { @@ -59,32 +79,54 @@ importCosMx <- function(cosmx_dir = NULL, fovs = NULL) { if (!is.null(fovs)) { a$fovs <- fovs } + a$slide <- slide + a$mm <- mm + a$px2mm <- px2mm do.call(new, args = a) } -setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, fovs) { - +setMethod("initialize", signature("CosmxReader"), function( + .Object, cosmx_dir, slide, fovs, mm, px2mm +) { + # provided params (if any) if (!missing(cosmx_dir)) { checkmate::assert_directory_exists(cosmx_dir) .Object@cosmx_dir <- cosmx_dir } + if (!missing(slide)) { + .Object@slide <- slide + } if (!missing(fovs)) { - checkmate::assert_numeric(fovs) .Object@fovs <- fovs } + if (!missing(mm)) { + .Object@mm <- mm + } + if (!missing(px2mm)) { + .Object@px2mm <- px2mm + } + # NULL case if (length(.Object@cosmx_dir) == 0) { return(.Object) # return early if no path given } + + # detect paths and subdirs p <- .Object@cosmx_dir .detect_in_dir <- function(pattern) { - list.files(p, pattern = pattern, full.names = TRUE) - }[[1L]] + f <- list.files(p, pattern = pattern, full.names = TRUE) + lenf <- length(f) + if (lenf == 1L) return(f) + else if (lenf == 0L) { + warning(pattern, " not detected in CosMx directory", call. = FALSE) + return(NULL) + } + return(f[[1L]]) # more than one match + } - # detect paths and dirs - pos_path <- .detect_in_dir("fov_positions_file") + shifts_path <- .detect_in_dir("fov_positions_file") meta_path <- .detect_in_dir("metadata_file") tx_path <- .detect_in_dir("tx_file") mask_dir <- .detect_in_dir("CellLabels") @@ -94,18 +136,30 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f compart_img_path <- .detect_in_dir("CompartmentLabels") - # load fov offsets through one of several methods if not already existing - if (nrow(.Object@offsets) == 0L) { - if (!is.null(pos_path)) { - pos <- data.table::fread(pos_path) - data.table::setnames(pos, new = c("fov", "x", "y")) + # load fov offsets through one of several methods + if (is.null(.Object@offsets)) { # only run if not already existing + pos <- NULL + + if (!is.null(shifts_path)) { + fov_shifts <- data.table::fread(shifts_path) + if (!"X_mm" %in% colnames(fov_shifts)) { + # older version has fov, x, y (all numeric) in px shifts + data.table::setnames(fov_shifts, new = c("fov", "x", "y")) + pos <- fov_shifts + } } - else if (!is.null(meta_path)) { + + # proceed with other possible methods of inferring shifts if present + if (!is.null(meta_path) && is.null(pos)) { pos <- .cosmx_infer_fov_shifts( meta_dt = data.table::fread(meta_path), flip_loc_y = FALSE ) - } else if (!is.null(tx_path)) { + } else if (!is.null(tx_path) && is.null(pos)) { + warning(wrap_txt( + "metadata_file not found: + Detecting fov shifts from tx_file. (This is slower)" + ), call. = FALSE) pos <- .cosmx_infer_fov_shifts( tx_dt = data.table::fread(tx_path), flip_loc_y = TRUE @@ -118,6 +172,7 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f One of these must be provided to infer FOV shifts" )) } + .Object@offsets <- pos } @@ -126,16 +181,24 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f # transcripts load call tx_fun <- function( path = tx_path, - gpoints_params = list( - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb") + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" ), verbose = NULL ) { .cosmx_transcript( path = path, fovs = .Object@fovs %none% NULL, - gpoints_params = gpoints_params, + feat_type = feat_type, + split_keyword = split_keyword, + dropcols = dropcols, + mm = .Object@mm, + px2mm = .Object@px2mm, cores = determine_cores(), verbose = verbose ) @@ -147,20 +210,22 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f # mask load call mask_fun <- function( path = mask_dir, - mask_params = list( - # VERTICAL FLIP + NO VERTICAL SHIFT - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - ID_fmt = NULL - ), + # VERTICAL FLIP + NO VERTICAL SHIFT + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, verbose = NULL ) { .cosmx_poly( path = path, fovs = .Object@fovs %none% NULL, - mask_params = mask_params, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + shift_vertical_step = shift_vertical_step, + shift_horizontal_step = shift_horizontal_step, + mm = .Object@mm, + px2mm = .Object@px2mm, offsets = .Object@offsets, verbose = verbose ) @@ -200,6 +265,8 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f negative_y = negative_y, flip_vertical = flip_vertical, flip_horizontal = flip_horizontal, + mm = .Object@mm, + px2mm = .Object@px2mm, offsets = .Object@offsets, verbose = verbose ) @@ -283,9 +350,15 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f return(.Object) }) + + + + +# access #### + #' @export setMethod("$", signature("CosmxReader"), function(x, name) { - basic_info <- c("offsets", "fovs", "cosmx_dir") + basic_info <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm", "offsets") if (name %in% basic_info) return(methods::slot(x, name)) return(x@calls[[name]]) @@ -293,48 +366,75 @@ setMethod("$", signature("CosmxReader"), function(x, name) { #' @export setMethod("$<-", signature("CosmxReader"), function(x, name, value) { - basic_info <- c("offsets", "fovs", "cosmx_dir") + basic_info <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm") if (name %in% basic_info) { methods::slot(x, name) <- value return(initialize(x)) } + if (name == "offsets") { + methods::slot(x, name) <- data.table::setDT(value) + return(initialize(x)) + } + stop(sprintf("Only items in '%s' can be set", paste0(basic_info, collapse = "', '"))) }) #' @export `.DollarNames.CosmxReader` <- function(x, pattern) { - basic_info <- c("offsets", "fovs", "cosmx_dir") - c(basic_info, paste0(names(methods::slot(x, "calls")), "()")) + dn <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm", "offsets") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) } + +# show #### setMethod("show", signature("CosmxReader"), function(object) { cat(sprintf("Giotto <%s>\n", "CosmxReader")) - pre <- sprintf("%s :", format(c("dir", "fovs", "offsets", "funs"))) + print_slots <- c("dir", "slide", "fovs", "mm", "offsets", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + # dir d <- object@cosmx_dir if (length(d) > 0L) { nch <- nchar(d) if (nch > 60L) { - d1 <- substring(d, first = 0L, last = 10L) - d2 <- substring(d, first = nch - 40L, last = nch) + d1 <- substring(d, first = 0L, last = 15L) + d2 <- substring(d, first = nch - 35L, last = nch) d <- paste0(d1, "[...]", d2) } - cat(pre[1L], d, "\n") + cat(pre["dir"], d, "\n") } else { - cat(pre[1L], "\n") + cat(pre["dir"], "\n") } + # slide + slide <- object@slide + cat(pre["slide"], slide, "\n") + + # fovs fovs <- object@fovs %none% "all" - cat(pre[2L], paste(fovs, collapse = ", "), "\n") + cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") + + # mm scaling + mm <- ifelse(object@mm, object@px2mm, FALSE) + cat(pre["mm"], mm, "\n") + + # offsets offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") - cat(pre[3L], offs_status, "\n") + cat(pre["offsets"], offs_status, "\n") + # funs nfun <- length(object@calls) funs <- names(object@calls) if (nfun > 0L) { - pre_funs <- format(c(pre[4], rep("", nfun - 1L))) + pre_funs <- format(c(pre["funs"], rep("", nfun - 1L))) for (i in seq_len(nfun)) { cat(pre_funs[i], " ", funs[i], "()\n", sep = "") } diff --git a/R/convenience.R b/R/convenience.R index e3fda86a2..e0f88b0d2 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2313,33 +2313,52 @@ NULL ## CosMx #### -#' @param gpoints_params list of params passed to `createGiottoPoints()`. -#' Mainly to allow access to `feat_type` and `split_keyword` params. Default -#' is to split into rna and negprobes points objects + .cosmx_transcript <- function( path, fovs = NULL, - gpoints_params = list( - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb") + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" ), + mm = FALSE, + px2mm = 0.12028, cores = determine_cores(), verbose = NULL ) { - checkmate::assert_list(gpoints_params) + + if (missing(path)) { + stop(wrap_txt( + "No path to tx file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) GiottoUtils::vmsg(.v = verbose, "loading feature detections...") - tx <- data.table::fread(input = path, nThread = cores) + tx <- data.table::fread(input = path, nThread = cores, drop = dropcols) if (!is.null(fovs)) { # subset to only needed FOVs tx <- tx[fov %in% as.numeric(fovs),] } + # mm scaling if desired + if (mm) { + tx[, x_global_px := x_global_px * px2mm] + tx[, y_global_px := y_global_px * px2mm] + } + # giottoPoints ----------------------------------------------------- # # static gpoints params + gpoints_params <- list() + gpoints_params$feat_type <- feat_type + gpoints_params$split_keyword <- split_keyword gpoints_params$x_colname <- "x_global_px" gpoints_params$y_colname <- "y_global_px" gpoints_params$feat_ID_colname <- "target" @@ -2355,7 +2374,7 @@ NULL } #' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), -#' yshift (numeric) +#' yshift (numeric). Values should always be in pixels .cosmx_infer_fov_shifts <- function(tx_dt, meta_dt, flip_loc_y = NULL) { fov <- NULL # NSE vars @@ -2391,37 +2410,88 @@ NULL return(res) } +.cosmx_imgname_fovparser <- function( + path +) { + im_names <- list.files(path) + fovs <- as.numeric(sub(".*F(\\d+)\\..*", "\\1", im_names)) + if (any(is.na(fovs))) { + warning(wrap_txt( + "Images to load should be sets of images/fov in subdirectories. + No other files should be present." + )) + } + return(fovs) +} + .cosmx_poly <- function( path, + slide = 1, fovs = NULL, - mask_params = list( - # VERTICAL FLIP + NO VERTICAL SHIFT - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - ID_fmt = NULL - ), + name = "cell", + # VERTICAL FLIP + NO SHIFTS + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + mm = FALSE, + px2mm = 0.12028, offsets, verbose = NULL ) { - fovs <- fovs %null% seq_along(list.files(path)) - gpolys <- lapply(fovs, function(fov) { - segfile <- Sys.glob(paths = sprintf("%s/*%03d*", path, fov)) - if (is.null(mask_params$ID_fmt)) { - mask_params$ID_fmt = paste0(sprintf("fov%03d", fov), "_cell%03d") - } - mask_params$verbose <- verbose %null% TRUE + # NSE params + f <- x <- y <- NULL + + if (missing(path)) { + stop(wrap_txt( + "No path to polys subdirectory provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading segmentation masks...") + + mask_params <- list( + # static params + mask_method = "multiple", + # if removal is TRUE, a real cell segmentation gets removed. + # There is no background poly for nanostring masks + remove_background_polygon = FALSE, + fill_holes = TRUE, + calc_centroids = TRUE, + remove_unvalid_polygons = TRUE, + # input params + name = name, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + shift_vertical_step = shift_vertical_step, + shift_horizontal_step = shift_horizontal_step, + verbose = FALSE + ) + + fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL + gpolys <- lapply(fovs, function(f) { + segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID + mask_params$ID_fmt = paste0( + sprintf("c_%d_%d_", slide, f), "%d" + ) + gpoly <- do.call( createGiottoPolygonsFromMask, args = c(list(maskfile = segfile), mask_params) ) - gpoly_shift <- spatShift( - x = gpoly, - dx = offsets[fov, x], - dy = offsets[fov, y] - ) + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + # if micron scale + if (mm) { + gpoly <- rescale(gpoly, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) + xshift <- xshift * px2mm + yshift <- yshift * px2mm + } + + gpoly_shift <- spatShift(x = gpoly, dx = xshift, dy = yshift) }) if (length(gpolys) > 1L) { @@ -2434,6 +2504,7 @@ NULL .cosmx_cellmeta <- function( path, + slide = 1, fovs = NULL, dropcols = c( "CenterX_local_px", @@ -2444,6 +2515,15 @@ NULL cores = determine_cores(), verbose = NULL ) { + + if (missing(path)) { + stop(wrap_txt( + "No path to metadata file provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading cell metadata...") + verbose <- verbose %null% TRUE meta_dt <- data.table::fread(input = path, nThread = cores) @@ -2457,10 +2537,18 @@ NULL dropcols <- dropcols[dropcols %in% meta_dt] meta_dt[, (dropcols) := NULL] # remove dropcols - # create cell ID as fov###_cell### - meta_dt[, cell_ID := sprintf("fov%03d_cell%03d", fov, cell_ID)] - # remove fov - meta_dt[, fov := NULL] + # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` + if ("cell" %in% colnames(meta_dt)) { + # assume already formatted (current datasets Mar-27-2024) + meta_dt[, c("fov", "cell_ID") := NULL] + data.table::setnames(meta_dt, old = "cell", "cell_ID") + } else { + # older datasets + meta_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] + # remove fov + meta_dt[, fov := NULL] + } + # TODO figure out what to do about protein expression here. cx <- createCellMetaObj( @@ -2475,11 +2563,22 @@ NULL .cosmx_expression <- function( path, + slide = 1, fovs = NULL, feat_type = c("rna", "negprobes"), split_keyword = list("NegPrb"), - cores = determine_cores() + cores = determine_cores(), + verbose = NULL ) { + + if (missing(path)) { + stop(wrap_txt( + "No path to exprMat file provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading expression matrix...") + expr_dt <- data.table::fread(input = path, nThread = cores) # subset to needed fovs @@ -2491,8 +2590,8 @@ NULL # remove background values (cell 0) expr_dt <- expr_dt[cell_ID != 0L,] - # create cell ID as fov###_cell### - expr_dt[, cell_ID := sprintf("fov%03d_cell%03d", fov, cell_ID)] + # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` + expr_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] # remove fov expr_dt[, fov := NULL] @@ -2537,14 +2636,25 @@ NULL negative_y = FALSE, flip_vertical = FALSE, flip_horizontal = FALSE, + mm = FALSE, + px2mm = 0.12028, offsets, verbose = NULL ) { - fovs <- fovs %null% seq_along(list.files(path)) + + if (missing(path)) { + stop(wrap_txt( + "No path to image subdirectory to load provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, sprintf("loading images...")) + + fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL verbose <- verbose %null% TRUE gimg_list <- lapply(fovs, function(fov) { - imgfile <- Sys.glob(paths = sprintf("%s/*%03d*", path, fov)) + imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, fov)) img_name <- sprintf(img_name_fmt, fov) gimg <- createGiottoLargeImage( @@ -2556,11 +2666,16 @@ NULL verbose = verbose ) - spatShift( - x = gimg, - dx = offsets[fov, x], - dy = offsets[fov, y] - ) + xshift <- offsets[fov, x] + yshift <- offsets[fov, y] + + if (mm) { + gimg <- rescale(gimg, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) + xshift <- xshift * px2mm + yshift <- yshift * px2mm + } + + spatShift(x = gimg, dx = xshift, dy = yshift) }) return(gimg_list) diff --git a/man/importCosMx.Rd b/man/importCosMx.Rd index 239cb04a5..6d49996d5 100644 --- a/man/importCosMx.Rd +++ b/man/importCosMx.Rd @@ -2,15 +2,29 @@ % Please edit documentation in R/classes.R \name{importCosMx} \alias{importCosMx} -\title{Import a CosMx Assay} +\title{Import a Nanostring CosMx Assay} \usage{ -importCosMx(cosmx_dir = NULL, fovs = NULL) +importCosMx( + cosmx_dir = NULL, + slide = 1, + fovs = NULL, + mm = FALSE, + px2mm = 0.12028 +) } \arguments{ \item{cosmx_dir}{CosMx output directory} +\item{slide}{numeric. Slide number. Defaults to 1} + \item{fovs}{numeric. (optional) If provided, will load specific fovs. Otherwise, all FOVs will be loaded} + +\item{mm}{logical. Whether to scale spatial information as millimeters +instead of the default pixels} + +\item{px2mm}{numeric. Scalefactor from pixels to mm. Defaults to 0.12028 +based on `CosMx-ReadMe.html` info} } \value{ CosmxReader object @@ -25,6 +39,13 @@ These functions should have all param values provided as defaults, but can be flexibly modified to do things such as look in alternative directories or paths. } +\details{ +Loading functions are generated after the `cosmx_dir` is added. +Transcripts, expression, and metadata loading are all expected to be done +from the top level of the directory. Loading of polys, and any image sets +are expected to be from specific subdirectories containing only those +images for the set of FOVs. +} \examples{ # Create a `CosmxReader` object reader <- importCosMx() From 3dc308dd50ba50cafb181c2d2e9ce5af86321175 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 09:52:15 -0400 Subject: [PATCH 017/150] enh: `importCosMx()` - add progressr for FOV-specific operations --- R/classes.R | 5 ++- R/convenience.R | 108 +++++++++++++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 50 deletions(-) diff --git a/R/classes.R b/R/classes.R index d1b94f3dd..ab4bbd04f 100644 --- a/R/classes.R +++ b/R/classes.R @@ -324,7 +324,7 @@ setMethod("initialize", signature("CosmxReader"), function( # images if (!is.null(load_images)) { - # convenient shortnames + # replace convenient shortnames load_images[load_images == "composite"] <- composite_img_path load_images[load_images == "overlay"] <- overlay_img_path @@ -333,7 +333,8 @@ setMethod("initialize", signature("CosmxReader"), function( for (imdir_i in seq_along(load_images)) { dir_imgs <- .Object@calls$load_images( path = load_images[[imdir_i]], - img_name_fmt = paste0(dirnames[[imdir_i]], "_fov%03d") + img_type = dirnames[[imdir_i]], + img_name_fmt = paste(img_type, "_fov%03d") ) imglist <- c(imglist, dir_imgs) } diff --git a/R/convenience.R b/R/convenience.R index e0f88b0d2..5c687de47 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2469,29 +2469,34 @@ NULL ) fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL - gpolys <- lapply(fovs, function(f) { - segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) - # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID - mask_params$ID_fmt = paste0( - sprintf("c_%d_%d_", slide, f), "%d" - ) - - gpoly <- do.call( - createGiottoPolygonsFromMask, - args = c(list(maskfile = segfile), mask_params) - ) - - xshift <- offsets[fov == f, x] - yshift <- offsets[fov == f, y] - - # if micron scale - if (mm) { - gpoly <- rescale(gpoly, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) - xshift <- xshift * px2mm - yshift <- yshift * px2mm - } - - gpoly_shift <- spatShift(x = gpoly, dx = xshift, dy = yshift) + progressr::with_progress({ + p <- progressr::progressor(along = fovs) + + gpolys <- lapply(fovs, function(f) { + segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID + mask_params$ID_fmt = paste0( + sprintf("c_%d_%d_", slide, f), "%d" + ) + + gpoly <- do.call( + createGiottoPolygonsFromMask, + args = c(list(maskfile = segfile), mask_params) + ) + + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + # if micron scale + if (mm) { + gpoly <- rescale(gpoly, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) + xshift <- xshift * px2mm + yshift <- yshift * px2mm + } + + gpoly_shift <- spatShift(x = gpoly, dx = xshift, dy = yshift) + p(message = sprintf("F%03d", f)) + }) }) if (length(gpolys) > 1L) { @@ -2632,7 +2637,8 @@ NULL .cosmx_image <- function( path, fovs = NULL, - img_name_fmt = "fov%03d", + img_type = "composite", + img_name_fmt = paste(img_type, "_fov%03d"), negative_y = FALSE, flip_vertical = FALSE, flip_horizontal = FALSE, @@ -2648,36 +2654,42 @@ NULL ), call. = FALSE) } - GiottoUtils::vmsg(.v = verbose, sprintf("loading images...")) + GiottoUtils::vmsg(.v = verbose, sprintf("loading %s images...", img_type)) fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL verbose <- verbose %null% TRUE - gimg_list <- lapply(fovs, function(fov) { - imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, fov)) - img_name <- sprintf(img_name_fmt, fov) - - gimg <- createGiottoLargeImage( - raster_object = imgfile, - name = img_name, - negative_y = negative_y, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - verbose = verbose - ) - - xshift <- offsets[fov, x] - yshift <- offsets[fov, y] - - if (mm) { - gimg <- rescale(gimg, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) - xshift <- xshift * px2mm - yshift <- yshift * px2mm - } - - spatShift(x = gimg, dx = xshift, dy = yshift) + progressr::with_progress({ + p <- progressr::progressor(along = fovs) + + gimg_list <- lapply(fovs, function(f) { + imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + img_name <- sprintf(img_name_fmt, f) + + gimg <- createGiottoLargeImage( + raster_object = imgfile, + name = img_name, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + verbose = verbose + ) + + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + if (mm) { + gimg <- rescale(gimg, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) + xshift <- xshift * px2mm + yshift <- yshift * px2mm + } + + spatShift(x = gimg, dx = xshift, dy = yshift) + p(message = sprintf("F%03d", f)) + }) }) + return(gimg_list) } From eef4e28ef0198d6c695ccd1ec9fb7b364c2d7e8f Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 10:23:45 -0400 Subject: [PATCH 018/150] enh: `importCosMx()` - add debug message with path --- R/convenience.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/convenience.R b/R/convenience.R index 5c687de47..2dbd09d80 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2339,7 +2339,8 @@ NULL checkmate::assert_file_exists(path) - GiottoUtils::vmsg(.v = verbose, "loading feature detections...") + vmsg(.v = verbose, "loading feature detections...") + vmsg(.v = verbose, .is_debug = TRUE, path) tx <- data.table::fread(input = path, nThread = cores, drop = dropcols) if (!is.null(fovs)) { @@ -2449,6 +2450,7 @@ NULL } GiottoUtils::vmsg(.v = verbose, "loading segmentation masks...") + vmsg(.v = verbose, .is_debug = TRUE, path) mask_params <- list( # static params @@ -2528,6 +2530,7 @@ NULL } GiottoUtils::vmsg(.v = verbose, "loading cell metadata...") + vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE @@ -2583,6 +2586,7 @@ NULL } GiottoUtils::vmsg(.v = verbose, "loading expression matrix...") + vmsg(.v = verbose, .is_debug = TRUE, path) expr_dt <- data.table::fread(input = path, nThread = cores) @@ -2655,6 +2659,7 @@ NULL } GiottoUtils::vmsg(.v = verbose, sprintf("loading %s images...", img_type)) + vmsg(.v = verbose, .is_debug = TRUE, path) fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL verbose <- verbose %null% TRUE From 9d84a629455fbbb73a1a4bf40fadcff5ba55db25 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 11:18:01 -0400 Subject: [PATCH 019/150] fix: wrong values returned --- R/convenience.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/convenience.R b/R/convenience.R index 2dbd09d80..8d5a35188 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2496,8 +2496,9 @@ NULL yshift <- yshift * px2mm } - gpoly_shift <- spatShift(x = gpoly, dx = xshift, dy = yshift) + gpoly <- spatShift(x = gpoly, dx = xshift, dy = yshift) p(message = sprintf("F%03d", f)) + return(gpoly) }) }) @@ -2689,8 +2690,9 @@ NULL yshift <- yshift * px2mm } - spatShift(x = gimg, dx = xshift, dy = yshift) + gimg <- spatShift(x = gimg, dx = xshift, dy = yshift) p(message = sprintf("F%03d", f)) + return(gimg) }) }) From 5666b52507403cbe3b06c40002f69ac110555cb1 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 11:38:26 -0400 Subject: [PATCH 020/150] fix: change default for image load --- R/classes.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index ab4bbd04f..8e0cceed5 100644 --- a/R/classes.R +++ b/R/classes.R @@ -253,7 +253,7 @@ setMethod("initialize", signature("CosmxReader"), function( img_fun <- function( path = composite_img_path, img_name_fmt = "composite_fov%03d", - negative_y = FALSE, + negative_y = TRUE, flip_vertical = FALSE, flip_horizontal = FALSE, verbose = NULL From 699fbb51045fb6b14dcbdad1697ae1b1a1079df1 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 12:00:45 -0400 Subject: [PATCH 021/150] fix: try to fix metadata dropcols --- R/convenience.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience.R b/R/convenience.R index 8d5a35188..8ac2e4827 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2544,7 +2544,7 @@ NULL } dropcols <- dropcols[dropcols %in% meta_dt] - meta_dt[, (dropcols) := NULL] # remove dropcols + meta_dt[, `:=`(dropcols, NULL)] # remove dropcols # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` if ("cell" %in% colnames(meta_dt)) { From ce2d585eb355076a297e69a6e30f4311a87fe794 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 12:10:52 -0400 Subject: [PATCH 022/150] chore: update dropcols implementation --- R/convenience.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/convenience.R b/R/convenience.R index 8ac2e4827..4b3350553 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2537,15 +2537,16 @@ NULL meta_dt <- data.table::fread(input = path, nThread = cores) + # remove unneeded cols + dropcols <- dropcols[dropcols %in% colnames(meta_dt)] + meta_dt[, (dropcols) := NULL] # remove dropcols + # subset to needed fovs if (!is.null(fovs)) { fovs <- as.integer(fovs) meta_dt <- meta_dt[fov %in% fovs,] } - dropcols <- dropcols[dropcols %in% meta_dt] - meta_dt[, `:=`(dropcols, NULL)] # remove dropcols - # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` if ("cell" %in% colnames(meta_dt)) { # assume already formatted (current datasets Mar-27-2024) From 250b361f32d86827aa0142046e2486d7a6d139a1 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 21:48:50 -0400 Subject: [PATCH 023/150] enh: update cosmx expr matrix splitting --- R/convenience.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/convenience.R b/R/convenience.R index 4b3350553..2ace8ebd5 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2614,16 +2614,19 @@ NULL # split expression for rna / negprb if any split keywords provided. # Output of this chunk should always be a named list of 1 or more matrices if (length(split_keyword) > 0) { - expr_list <- list() + expr_list <- vector(mode = "list", length = length(feat_type)) + names(expr_list) <- feat_type + # iterate through other expr types for (key_i in seq_along(split_keyword)) { bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) # subset and store split matrix sub_mat <- expr_mat[bool,] - expr_list[[feat_type[[key_i + 1L]]]] <- sub_mat + expr_list[[key_i + 1L]] <- sub_mat # remaining matrix expr_mat <- expr_mat[!bool,] } - expr_list[[feat_type[[1L]]]] <- expr_mat + # assign the main expr + expr_list[[1L]] <- expr_mat } else { expr_list <- list(expr_mat) names(expr_list) <- feat_type[[1L]] From cf2d72e4ecfb1c97f8e927960b7f28393e67fd87 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 22:12:25 -0400 Subject: [PATCH 024/150] fix: indexing error --- R/convenience.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience.R b/R/convenience.R index 2ace8ebd5..3afc5cc72 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2609,7 +2609,6 @@ NULL # convert to Matrix expr_mat <- dt_to_matrix(expr_dt) expr_mat <- t_flex(expr_mat) - feat_ids <- rownames(expr_mat) # split expression for rna / negprb if any split keywords provided. # Output of this chunk should always be a named list of 1 or more matrices @@ -2618,6 +2617,7 @@ NULL names(expr_list) <- feat_type # iterate through other expr types for (key_i in seq_along(split_keyword)) { + feat_ids <- rownames(expr_mat) bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) # subset and store split matrix sub_mat <- expr_mat[bool,] From fec555feb9cddf21ecc2cf47352bbfd790e33866 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 22:43:21 -0400 Subject: [PATCH 025/150] fix: add missing params --- R/classes.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index 8e0cceed5..ff38a6f85 100644 --- a/R/classes.R +++ b/R/classes.R @@ -252,7 +252,8 @@ setMethod("initialize", signature("CosmxReader"), function( # images load call img_fun <- function( path = composite_img_path, - img_name_fmt = "composite_fov%03d", + img_type = "composite", + img_name_fmt = paste0(img_type, "_fov%03d"), negative_y = TRUE, flip_vertical = FALSE, flip_horizontal = FALSE, @@ -261,6 +262,7 @@ setMethod("initialize", signature("CosmxReader"), function( .cosmx_image( path = path, fovs = .Object@fovs %none% NULL, + img_type = img_type, img_name_fmt = img_name_fmt, negative_y = negative_y, flip_vertical = flip_vertical, From 780d303aba5018fa96ffea669fd4772f97b55326 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 23:13:20 -0400 Subject: [PATCH 026/150] enh: add expr and meta loading to cosmx importer --- R/classes.R | 70 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/R/classes.R b/R/classes.R index ff38a6f85..737cd8ce9 100644 --- a/R/classes.R +++ b/R/classes.R @@ -131,9 +131,9 @@ setMethod("initialize", signature("CosmxReader"), function( tx_path <- .detect_in_dir("tx_file") mask_dir <- .detect_in_dir("CellLabels") expr_path <- .detect_in_dir("exprMat_file") - composite_img_path <- .detect_in_dir("CellComposite") - overlay_img_path <- .detect_in_dir("CellOverlay") - compart_img_path <- .detect_in_dir("CompartmentLabels") + composite_img_dir <- .detect_in_dir("CellComposite") + overlay_img_dir <- .detect_in_dir("CellOverlay") + compart_img_dir <- .detect_in_dir("CompartmentLabels") # load fov offsets through one of several methods @@ -251,7 +251,7 @@ setMethod("initialize", signature("CosmxReader"), function( # images load call img_fun <- function( - path = composite_img_path, + path = composite_img_dir, img_type = "composite", img_name_fmt = paste0(img_type, "_fov%03d"), negative_y = TRUE, @@ -297,8 +297,17 @@ setMethod("initialize", signature("CosmxReader"), function( } .Object@calls$load_cellmeta <- meta_fun + # build gobject call gobject_fun <- function( + transcript_path = tx_path, + mask_dir = mask_dir, + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c("rna", "negprobes"), + split_keyword = list( + "NegPrb" + ), load_images = list( composite = "composite", overlay = "overlay" @@ -306,34 +315,48 @@ setMethod("initialize", signature("CosmxReader"), function( load_expression = FALSE, load_cellmeta = FALSE ) { + load_expression <- as.logical(load_expression) + load_cellmeta <- as.logical(load_cellmeta) + if (!is.null(load_images)) { checkmate::assert_list(load_images) if (is.null(names(load_images))) { stop("Images directories provided to 'load_images' must be named") } } + + funs <- .Object@calls + + # init gobject g <- giotto() # transcripts - tx_list <- .Object@calls$load_transcripts() + tx_list <- funs$load_transcripts( + path = transcript_path, + feat_type = feat_type, + split_keyword = split_keyword + ) for (tx in tx_list) { g <- setGiotto(g, tx) } # polys - polys <- .Object@calls$load_polys(verbose = FALSE) + polys <- funs$load_polys( + path = mask_dir, + verbose = FALSE + ) g <- setGiotto(g, polys) # images if (!is.null(load_images)) { # replace convenient shortnames - load_images[load_images == "composite"] <- composite_img_path - load_images[load_images == "overlay"] <- overlay_img_path + load_images[load_images == "composite"] <- composite_img_dir + load_images[load_images == "overlay"] <- overlay_img_dir imglist <- list() dirnames <- names(load_images) for (imdir_i in seq_along(load_images)) { - dir_imgs <- .Object@calls$load_images( + dir_imgs <- funs$load_images( path = load_images[[imdir_i]], img_type = dirnames[[imdir_i]], img_name_fmt = paste(img_type, "_fov%03d") @@ -343,8 +366,33 @@ setMethod("initialize", signature("CosmxReader"), function( g <- addGiottoLargeImage(g, largeImages = imglist) } - # TODO expression & meta - # Will need to check that names agree for poly/expr/meta + # expression & meta + # Need to check that names agree for poly/expr/meta + allowed_ids <- spatIDs(polys) + + if (load_expression) { + exlist <- funs$load_expression( + path = expression_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + + # only keep allowed cells and set into gobject + for (ex in exlist) { + bool <- colnames(ex[]) %in% allowed_ids + ex[] <- ex[][, bool] + g <- setGiotto(g, ex) + } + } + + if (load_cellmeta) { + cx <- funs$load_cellmeta( + path = metadata_path + ) + + cx[] <- c[][cell_ID %in% allowed_ids,] + g <- setGiotto(g, cx) + } return(g) } From 0a5af955ac0f6e460fe1ed269fc667ad8ef9484e Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 00:11:24 -0400 Subject: [PATCH 027/150] fix: try to fix param passing --- R/classes.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/classes.R b/R/classes.R index 737cd8ce9..783aa71dd 100644 --- a/R/classes.R +++ b/R/classes.R @@ -301,7 +301,7 @@ setMethod("initialize", signature("CosmxReader"), function( # build gobject call gobject_fun <- function( transcript_path = tx_path, - mask_dir = mask_dir, + cell_labels_dir = mask_dir, expression_path = expr_path, metadata_path = meta_path, feat_type = c("rna", "negprobes"), @@ -342,7 +342,7 @@ setMethod("initialize", signature("CosmxReader"), function( # polys polys <- funs$load_polys( - path = mask_dir, + path = cell_labels_dir, verbose = FALSE ) g <- setGiotto(g, polys) From d2d81f518d0bc8cd60ae85ec15ba664a59b1064b Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 00:34:15 -0400 Subject: [PATCH 028/150] fix: param passing --- R/classes.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index 783aa71dd..fc5d8d162 100644 --- a/R/classes.R +++ b/R/classes.R @@ -359,7 +359,6 @@ setMethod("initialize", signature("CosmxReader"), function( dir_imgs <- funs$load_images( path = load_images[[imdir_i]], img_type = dirnames[[imdir_i]], - img_name_fmt = paste(img_type, "_fov%03d") ) imglist <- c(imglist, dir_imgs) } From e5019fd0ded7a8d2e72a5f09dcc32d9f1cf16700 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 00:40:33 -0400 Subject: [PATCH 029/150] fox: typo --- R/classes.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index fc5d8d162..f39fcc57c 100644 --- a/R/classes.R +++ b/R/classes.R @@ -389,7 +389,7 @@ setMethod("initialize", signature("CosmxReader"), function( path = metadata_path ) - cx[] <- c[][cell_ID %in% allowed_ids,] + cx[] <- cx[][cell_ID %in% allowed_ids,] g <- setGiotto(g, cx) } From 02358f772d244ce20175cd69d01c70473dba4559 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 09:34:09 -0400 Subject: [PATCH 030/150] enh: `importCosMx()` - add `navg` param to `.cosmx_infer_fov_shifts()` - convert mm scaling to micron - add `plot()` `CosmxReader` method for previewing upper left corner of FOVs --- R/classes.R | 47 +++++++++++++++++---------- R/convenience.R | 53 ++++++++++++++++++++++--------- man/dot-cosmx_infer_fov_shifts.Rd | 30 +++++++++++++++++ man/importCosMx.Rd | 6 ++-- 4 files changed, 103 insertions(+), 33 deletions(-) create mode 100644 man/dot-cosmx_infer_fov_shifts.Rd diff --git a/R/classes.R b/R/classes.R index f39fcc57c..b0c44bf3b 100644 --- a/R/classes.R +++ b/R/classes.R @@ -6,14 +6,14 @@ setClass( cosmx_dir = "character", slide = "numeric", fovs = "numeric", - mm = "logical", + micron = "logical", px2mm = "numeric", offsets = "ANY", calls = "list" ), prototype = list( slide = 1, - mm = FALSE, + micron = FALSE, px2mm = 0.12028, # from cosmx output help files offsets = NULL, calls = list() @@ -35,7 +35,7 @@ setClass( #' @param slide numeric. Slide number. Defaults to 1 #' @param fovs numeric. (optional) If provided, will load specific fovs. #' Otherwise, all FOVs will be loaded -#' @param mm logical. Whether to scale spatial information as millimeters +#' @param micron logical. Whether to scale spatial information as micron #' instead of the default pixels #' @param px2mm numeric. Scalefactor from pixels to mm. Defaults to 0.12028 #' based on `CosMx-ReadMe.html` info @@ -55,6 +55,8 @@ setClass( #' reader$cosmx_dir <- "path to cosmx dir" #' reader$fov <- c(1, 4) #' +#' plot(reader) # displays FOVs (top left corner) in px scale. +#' #' # Load polygons, transcripts, and images #' polys <- reader$load_polys() #' tx <- reader$load_transcripts() @@ -69,7 +71,7 @@ setClass( #' } #' @export importCosMx <- function( - cosmx_dir = NULL, slide = 1, fovs = NULL, mm = FALSE, px2mm = 0.12028 + cosmx_dir = NULL, slide = 1, fovs = NULL, micron = FALSE, px2mm = 0.12028 ) { # get params a <- list(Class = "CosmxReader") @@ -80,14 +82,14 @@ importCosMx <- function( a$fovs <- fovs } a$slide <- slide - a$mm <- mm + a$micron <- micron a$px2mm <- px2mm do.call(new, args = a) } setMethod("initialize", signature("CosmxReader"), function( - .Object, cosmx_dir, slide, fovs, mm, px2mm + .Object, cosmx_dir, slide, fovs, micron, px2mm ) { # provided params (if any) if (!missing(cosmx_dir)) { @@ -100,8 +102,8 @@ setMethod("initialize", signature("CosmxReader"), function( if (!missing(fovs)) { .Object@fovs <- fovs } - if (!missing(mm)) { - .Object@mm <- mm + if (!missing(micron)) { + .Object@micron <- micron } if (!missing(px2mm)) { .Object@px2mm <- px2mm @@ -153,7 +155,7 @@ setMethod("initialize", signature("CosmxReader"), function( if (!is.null(meta_path) && is.null(pos)) { pos <- .cosmx_infer_fov_shifts( meta_dt = data.table::fread(meta_path), - flip_loc_y = FALSE + flip_loc_y = TRUE ) } else if (!is.null(tx_path) && is.null(pos)) { warning(wrap_txt( @@ -197,7 +199,7 @@ setMethod("initialize", signature("CosmxReader"), function( feat_type = feat_type, split_keyword = split_keyword, dropcols = dropcols, - mm = .Object@mm, + micron = .Object@micron, px2mm = .Object@px2mm, cores = determine_cores(), verbose = verbose @@ -224,7 +226,7 @@ setMethod("initialize", signature("CosmxReader"), function( flip_horizontal = flip_horizontal, shift_vertical_step = shift_vertical_step, shift_horizontal_step = shift_horizontal_step, - mm = .Object@mm, + micron = .Object@micron, px2mm = .Object@px2mm, offsets = .Object@offsets, verbose = verbose @@ -267,7 +269,7 @@ setMethod("initialize", signature("CosmxReader"), function( negative_y = negative_y, flip_vertical = flip_vertical, flip_horizontal = flip_horizontal, - mm = .Object@mm, + micron = .Object@micron, px2mm = .Object@px2mm, offsets = .Object@offsets, verbose = verbose @@ -444,7 +446,7 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { # show #### setMethod("show", signature("CosmxReader"), function(object) { cat(sprintf("Giotto <%s>\n", "CosmxReader")) - print_slots <- c("dir", "slide", "fovs", "mm", "offsets", "funs") + print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") pre <- sprintf( "%s :", format(print_slots) ) @@ -472,9 +474,9 @@ setMethod("show", signature("CosmxReader"), function(object) { fovs <- object@fovs %none% "all" cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") - # mm scaling - mm <- ifelse(object@mm, object@px2mm, FALSE) - cat(pre["mm"], mm, "\n") + # micron scaling + micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) + cat(pre["micron"], micron, "\n") # offsets offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") @@ -493,5 +495,18 @@ setMethod("show", signature("CosmxReader"), function(object) { setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) +setMethod( + "plot", signature(x = "CosmxReader", y = "missing"), + function(x, cex = 0.8, ...) { + a <- list(...) + dat <- x@offsets + + if (is.null(dat)) { # don't run if no offsets + cat("no offsets to plot\n") + return(invisible(NULL)) + } + plot(y ~ x, data = dat, asp = 1L, type = "n", ...) + text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) + }) diff --git a/R/convenience.R b/R/convenience.R index 3afc5cc72..84278afd6 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2325,7 +2325,7 @@ NULL "cell_ID", "cell" ), - mm = FALSE, + micron = FALSE, px2mm = 0.12028, cores = determine_cores(), verbose = NULL @@ -2350,8 +2350,9 @@ NULL # mm scaling if desired if (mm) { - tx[, x_global_px := x_global_px * px2mm] - tx[, y_global_px := y_global_px * px2mm] + px2micron <- px2mm / 1000 + tx[, x_global_px := x_global_px * px2micron] + tx[, y_global_px := y_global_px * px2micron] } # giottoPoints ----------------------------------------------------- # @@ -2374,14 +2375,30 @@ NULL return(gpoints) } +#' @name .cosmx_infer_fov_shifts +#' @title Infer CosMx local to global shifts +#' @description +#' From NanoString CosMx spatial info, infer the FOV shifts needed. These +#' values are needed for anything that requires the use of images, since those +#' do not come with spatial extent information embedded. +#' @param tx_dt transcript data.table input to use +#' (Only one of tx_dt or meta_dt should be used) +#' @param meta_dt cell metadata data.table input to use +#' (Only one of tx_dt or meta_dt should be used) +#' @param navg max n values to check per FOV to find average shift +#' @param flip_loc_y whether a y flip needs to be performed on the local y +#' values before camparing with global y values #' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), #' yshift (numeric). Values should always be in pixels -.cosmx_infer_fov_shifts <- function(tx_dt, meta_dt, flip_loc_y = NULL) { +#' @keywords internal +.cosmx_infer_fov_shifts <- function( + tx_dt, meta_dt, flip_loc_y = NULL, navg = 100L +) { fov <- NULL # NSE vars if (!missing(tx_dt)) { flip_loc_y %null% TRUE # default = TRUE - tx_head <- tx_dt[, head(.SD, 10L), by = fov] + tx_head <- tx_dt[, head(.SD, navg), by = fov] x <- tx_head[, mean(x_global_px - x_local_px), by = fov] if (flip_loc_y) { # use +y if local y values are flipped @@ -2393,7 +2410,7 @@ NULL if (!missing(meta_dt)) { flip_loc_y %null% FALSE # default = FALSE - meta_head <- meta_dt[, head(.SD, 10L), by = fov] + meta_head <- meta_dt[, head(.SD, navg), by = fov] x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] if (flip_loc_y) { # use +y if local y values are flipped @@ -2435,7 +2452,7 @@ NULL flip_horizontal = FALSE, shift_vertical_step = FALSE, shift_horizontal_step = FALSE, - mm = FALSE, + micron = FALSE, px2mm = 0.12028, offsets, verbose = NULL @@ -2491,9 +2508,12 @@ NULL # if micron scale if (mm) { - gpoly <- rescale(gpoly, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) - xshift <- xshift * px2mm - yshift <- yshift * px2mm + px2micron <- px2mm / 1000 + gpoly <- rescale( + gpoly, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + ) + xshift <- xshift * px2micron + yshift <- yshift * px2micron } gpoly <- spatShift(x = gpoly, dx = xshift, dy = yshift) @@ -2648,10 +2668,10 @@ NULL fovs = NULL, img_type = "composite", img_name_fmt = paste(img_type, "_fov%03d"), - negative_y = FALSE, + negative_y = TRUE, flip_vertical = FALSE, flip_horizontal = FALSE, - mm = FALSE, + micron = FALSE, px2mm = 0.12028, offsets, verbose = NULL @@ -2689,9 +2709,12 @@ NULL yshift <- offsets[fov == f, y] if (mm) { - gimg <- rescale(gimg, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) - xshift <- xshift * px2mm - yshift <- yshift * px2mm + px2micron <- px2mm / 1000 + gimg <- rescale( + gimg, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + ) + xshift <- xshift * px2micron + yshift <- yshift * px2micron } gimg <- spatShift(x = gimg, dx = xshift, dy = yshift) diff --git a/man/dot-cosmx_infer_fov_shifts.Rd b/man/dot-cosmx_infer_fov_shifts.Rd new file mode 100644 index 000000000..d11530350 --- /dev/null +++ b/man/dot-cosmx_infer_fov_shifts.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience.R +\name{.cosmx_infer_fov_shifts} +\alias{.cosmx_infer_fov_shifts} +\title{Infer CosMx local to global shifts} +\usage{ +.cosmx_infer_fov_shifts(tx_dt, meta_dt, flip_loc_y = NULL, navg = 100L) +} +\arguments{ +\item{tx_dt}{transcript data.table input to use +(Only one of tx_dt or meta_dt should be used)} + +\item{meta_dt}{cell metadata data.table input to use +(Only one of tx_dt or meta_dt should be used)} + +\item{flip_loc_y}{whether a y flip needs to be performed on the local y +values before camparing with global y values} + +\item{navg}{max n values to check per FOV to find average shift} +} +\value{ +data.table with three columns. 1. FOV (integer), xshift (numeric), +yshift (numeric). Values should always be in pixels +} +\description{ +From NanoString CosMx spatial info, infer the FOV shifts needed. These +values are needed for anything that requires the use of images, since those +do not come with spatial extent information embedded. +} +\keyword{internal} diff --git a/man/importCosMx.Rd b/man/importCosMx.Rd index 6d49996d5..adf975b7c 100644 --- a/man/importCosMx.Rd +++ b/man/importCosMx.Rd @@ -8,7 +8,7 @@ importCosMx( cosmx_dir = NULL, slide = 1, fovs = NULL, - mm = FALSE, + micron = FALSE, px2mm = 0.12028 ) } @@ -20,7 +20,7 @@ importCosMx( \item{fovs}{numeric. (optional) If provided, will load specific fovs. Otherwise, all FOVs will be loaded} -\item{mm}{logical. Whether to scale spatial information as millimeters +\item{micron}{logical. Whether to scale spatial information as micron instead of the default pixels} \item{px2mm}{numeric. Scalefactor from pixels to mm. Defaults to 0.12028 @@ -55,6 +55,8 @@ reader <- importCosMx() reader$cosmx_dir <- "path to cosmx dir" reader$fov <- c(1, 4) +plot(reader) # displays FOVs (top left corner) in px scale. + # Load polygons, transcripts, and images polys <- reader$load_polys() tx <- reader$load_transcripts() From e3d9cabb252d3a8e2536fd65e9818cc2b524b05c Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 11:30:27 -0400 Subject: [PATCH 031/150] enh: `importCosMx()` - add auto detection of whether local y values should be inverted during FOV shift calculation --- R/convenience.R | 47 +++++++++++++++++++++++++------ man/dot-cosmx_infer_fov_shifts.Rd | 17 +++++++++-- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/R/convenience.R b/R/convenience.R index 84278afd6..914ada037 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2387,32 +2387,61 @@ NULL #' (Only one of tx_dt or meta_dt should be used) #' @param navg max n values to check per FOV to find average shift #' @param flip_loc_y whether a y flip needs to be performed on the local y -#' values before camparing with global y values +#' values before comparing with global y values. See details #' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), #' yshift (numeric). Values should always be in pixels +#' @details +#' Shifts are found by looking at the average of differences between xy global +#' and local coordinates in either the metadata or transcripts file. The number +#' of shift value to average across is determined with `navg`. The average is +#' in place to get rid of small differences in shifts, likely due to rounding +#' errors. Across the different versions of the CosMx exports, whether the +#' local y values are flipped compared to the global values has differed, so +#' there is also a step that checks the variance of y values per sampled set +#' per fov. In cases where the shift is calculated with the correct (inverted +#' or non-inverted) y local values, the variance is expected to be very low. +#' When the variance is higher than 0.001, the function is re-run with the +#' opposite `flip_loc_y` value. #' @keywords internal .cosmx_infer_fov_shifts <- function( - tx_dt, meta_dt, flip_loc_y = NULL, navg = 100L + tx_dt, meta_dt, flip_loc_y = TRUE, navg = 100L ) { fov <- NULL # NSE vars - if (!missing(tx_dt)) { - flip_loc_y %null% TRUE # default = TRUE tx_head <- tx_dt[, head(.SD, navg), by = fov] x <- tx_head[, mean(x_global_px - x_local_px), by = fov] if (flip_loc_y) { + + # test if flip is needed + # Usual yshift variance / fov expected when correct is 0 to 1e-22 + # if var is too high for any fov, swap `flip_loc_y` value + y <- tx_head[, var(y_global_px + y_local_px), by = fov] + if (y[, any(V1 > 0.001)]) { + return(.cosmx_infer_fov_shifts( + tx_dt = tx_dt, flip_loc_y = FALSE, navg = navg + )) + } + # use +y if local y values are flipped y <- tx_head[, mean(y_global_px + y_local_px), by = fov] } else { y <- tx_head[, mean(y_global_px - y_local_px), by = fov] } - } - - if (!missing(meta_dt)) { - flip_loc_y %null% FALSE # default = FALSE + } else if (!missing(meta_dt)) { meta_head <- meta_dt[, head(.SD, navg), by = fov] x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] if (flip_loc_y) { + + # test if flip is needed + # Usual yshift variance / fov expected when correct is 0 to 1e-22 + # if var is too high for any fov, swap `flip_loc_y` value + y <- meta_head[, var(CenterY_global_px + CenterY_local_px), by = fov] + if (y[, any(V1 > 0.001)]) { + return(.cosmx_infer_fov_shifts( + meta_dt = meta_dt, flip_loc_y = FALSE, navg = navg + )) + } + # use +y if local y values are flipped y <- meta_head[, mean(CenterY_global_px + CenterY_local_px), by = fov] @@ -2420,6 +2449,8 @@ NULL y <- meta_head[, mean(CenterY_global_px - CenterY_local_px), by = fov] } + } else { + stop("One of tx_dt or meta_dt must be provided\n") } res <- merge(x, y, by = "fov") diff --git a/man/dot-cosmx_infer_fov_shifts.Rd b/man/dot-cosmx_infer_fov_shifts.Rd index d11530350..1a1be8809 100644 --- a/man/dot-cosmx_infer_fov_shifts.Rd +++ b/man/dot-cosmx_infer_fov_shifts.Rd @@ -4,7 +4,7 @@ \alias{.cosmx_infer_fov_shifts} \title{Infer CosMx local to global shifts} \usage{ -.cosmx_infer_fov_shifts(tx_dt, meta_dt, flip_loc_y = NULL, navg = 100L) +.cosmx_infer_fov_shifts(tx_dt, meta_dt, flip_loc_y = TRUE, navg = 100L) } \arguments{ \item{tx_dt}{transcript data.table input to use @@ -14,7 +14,7 @@ (Only one of tx_dt or meta_dt should be used)} \item{flip_loc_y}{whether a y flip needs to be performed on the local y -values before camparing with global y values} +values before comparing with global y values. See details} \item{navg}{max n values to check per FOV to find average shift} } @@ -27,4 +27,17 @@ From NanoString CosMx spatial info, infer the FOV shifts needed. These values are needed for anything that requires the use of images, since those do not come with spatial extent information embedded. } +\details{ +Shifts are found by looking at the average of differences between xy global +and local coordinates in either the metadata or transcripts file. The number +of shift value to average across is determined with `navg`. The average is +in place to get rid of small differences in shifts, likely due to rounding +errors. Across the different versions of the CosMx exports, whether the +local y values are flipped compared to the global values has differed, so +there is also a step that checks the variance of y values per sampled set +per fov. In cases where the shift is calculated with the correct (inverted +or non-inverted) y local values, the variance is expected to be very low. +When the variance is higher than 0.001, the function is re-run with the +opposite `flip_loc_y` value. +} \keyword{internal} From b577e43db250cb80ca4154cc59fb5e6f254f83df Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 11:37:09 -0400 Subject: [PATCH 032/150] fix: cleanup mm to micron arg change --- R/classes.R | 6 +++--- R/convenience.R | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/classes.R b/R/classes.R index b0c44bf3b..04d7566a9 100644 --- a/R/classes.R +++ b/R/classes.R @@ -410,7 +410,7 @@ setMethod("initialize", signature("CosmxReader"), function( #' @export setMethod("$", signature("CosmxReader"), function(x, name) { - basic_info <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm", "offsets") + basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") if (name %in% basic_info) return(methods::slot(x, name)) return(x@calls[[name]]) @@ -418,7 +418,7 @@ setMethod("$", signature("CosmxReader"), function(x, name) { #' @export setMethod("$<-", signature("CosmxReader"), function(x, name, value) { - basic_info <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm") + basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm") if (name %in% basic_info) { methods::slot(x, name) <- value return(initialize(x)) @@ -435,7 +435,7 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { #' @export `.DollarNames.CosmxReader` <- function(x, pattern) { - dn <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm", "offsets") + dn <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") if (length(methods::slot(x, "calls")) > 0) { dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) } diff --git a/R/convenience.R b/R/convenience.R index 914ada037..4f8500d90 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2348,8 +2348,8 @@ NULL tx <- tx[fov %in% as.numeric(fovs),] } - # mm scaling if desired - if (mm) { + # micron scaling if desired + if (micron) { px2micron <- px2mm / 1000 tx[, x_global_px := x_global_px * px2micron] tx[, y_global_px := y_global_px * px2micron] @@ -2538,7 +2538,7 @@ NULL yshift <- offsets[fov == f, y] # if micron scale - if (mm) { + if (micron) { px2micron <- px2mm / 1000 gpoly <- rescale( gpoly, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 @@ -2739,7 +2739,7 @@ NULL xshift <- offsets[fov == f, x] yshift <- offsets[fov == f, y] - if (mm) { + if (micron) { px2micron <- px2mm / 1000 gimg <- rescale( gimg, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 From 694152340178b447e3081cd1045601d956f250ac Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 17:08:48 -0400 Subject: [PATCH 033/150] feat: Xen Reader WIP --- R/classes.R | 238 ++++++++++++++++++++++++++++++++++-------------- R/convenience.R | 3 +- 2 files changed, 174 insertions(+), 67 deletions(-) diff --git a/R/classes.R b/R/classes.R index 04d7566a9..806d732b1 100644 --- a/R/classes.R +++ b/R/classes.R @@ -1,5 +1,117 @@ +# common internals #### +abbrev_path <- function(path, head = 15, tail = 35L) { + nch <- nchar(path) + if (nch > 60L) { + p1 <- substring(path, first = 0L, last = head) + p2 <- substring(path, first = nch - tail, last = nch) + path <- paste0(p1, "[...]", p2) + } + return(path) +} + +.reader_fun_prints <- function(x, pre) { + nfun <- length(x@calls) + funs <- names(x@calls) + if (nfun > 0L) { + pre_funs <- format(c(pre, rep("", nfun - 1L))) + for (i in seq_len(nfun)) { + cat(pre_funs[i], " ", funs[i], "()\n", sep = "") + } + } +} + + +# Xenium #### + +setClass( + "XeniumReader", + slots = list( + xenium_dir = "character", + format = "character", + fovs = "numeric", + qv = "ANY", + calls = "list" + ), + prototype = list( + format = "parquet", + qv = 20, + calls = list() + ) +) + +setMethod("show", signature("XeniumReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "XeniumReader")) + print_slots <- c("dir", "format", "fovs", "qv_cutoff", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@xenium_dir + if (length(d) > 0L) { + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # format + form <- object@format + cat(pre["format"], paste(form, collapse = ", "), "\n") + + # fovs + fovs <- object@fovs %none% "all" + cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") + + # qv + qv <- object@qv + cat(pre["qv_cutoff"], paste(qv, collapse = ", "), "\n") + + # funs + .fun_prints(x = object, pre = pre["fun"]) +}) + + + +# access #### + +#' @export +setMethod("$", signature("XeniumReader"), function(x, name) { + basic_info <- c("xenium_dir", "format", "fovs", "qv") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("XeniumReader"), function(x, name, value) { + basic_info <- c("xenium_dir", "format", "fovs", "qv") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.XeniumReader` <- function(x, pattern) { + dn <- c("xenium_dir", "format", "fovs", "qv") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) +} + + + + +# CosMx #### + setClass( "CosmxReader", slots = list( @@ -20,6 +132,64 @@ setClass( ) ) +setMethod("show", signature("CosmxReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "CosmxReader")) + print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@cosmx_dir + if (length(d) > 0L) { + nch <- nchar(d) + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # slide + slide <- object@slide + cat(pre["slide"], slide, "\n") + + # fovs + fovs <- object@fovs %none% "all" + cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") + + # micron scaling + micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) + cat(pre["micron"], micron, "\n") + + # offsets + offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") + cat(pre["offsets"], offs_status, "\n") + + # funs + .fun_prints(x = object, pre = pre["fun"]) +}) + +setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) + +setMethod( + "plot", signature(x = "CosmxReader", y = "missing"), + function(x, cex = 0.8, ...) { + a <- list(...) + dat <- x@offsets + + if (is.null(dat)) { # don't run if no offsets + cat("no offsets to plot\n") + return(invisible(NULL)) + } + + plot(y ~ x, data = dat, asp = 1L, type = "n", ...) + text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) + }) + + + + #' @title Import a Nanostring CosMx Assay #' @name importCosMx #' @description @@ -285,7 +455,8 @@ setMethod("initialize", signature("CosmxReader"), function( "CenterX_local_px", "CenterY_local_px", "CenterX_global_px", - "CenterY_global_px" + "CenterY_global_px", + "cell_id" ), verbose = NULL ) { @@ -443,70 +614,5 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { } -# show #### -setMethod("show", signature("CosmxReader"), function(object) { - cat(sprintf("Giotto <%s>\n", "CosmxReader")) - print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") - pre <- sprintf( - "%s :", format(print_slots) - ) - names(pre) <- print_slots - # dir - d <- object@cosmx_dir - if (length(d) > 0L) { - nch <- nchar(d) - if (nch > 60L) { - d1 <- substring(d, first = 0L, last = 15L) - d2 <- substring(d, first = nch - 35L, last = nch) - d <- paste0(d1, "[...]", d2) - } - cat(pre["dir"], d, "\n") - } else { - cat(pre["dir"], "\n") - } - - # slide - slide <- object@slide - cat(pre["slide"], slide, "\n") - - # fovs - fovs <- object@fovs %none% "all" - cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") - - # micron scaling - micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) - cat(pre["micron"], micron, "\n") - - # offsets - offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") - cat(pre["offsets"], offs_status, "\n") - - # funs - nfun <- length(object@calls) - funs <- names(object@calls) - if (nfun > 0L) { - pre_funs <- format(c(pre["funs"], rep("", nfun - 1L))) - for (i in seq_len(nfun)) { - cat(pre_funs[i], " ", funs[i], "()\n", sep = "") - } - } -}) - -setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) - -setMethod( - "plot", signature(x = "CosmxReader", y = "missing"), - function(x, cex = 0.8, ...) { - a <- list(...) - dat <- x@offsets - - if (is.null(dat)) { # don't run if no offsets - cat("no offsets to plot\n") - return(invisible(NULL)) - } - - plot(y ~ x, data = dat, asp = 1L, type = "n", ...) - text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) - }) diff --git a/R/convenience.R b/R/convenience.R index 4f8500d90..e04aa6a16 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2569,7 +2569,8 @@ NULL "CenterX_local_px", "CenterY_local_px", "CenterX_global_px", - "CenterY_global_px" + "CenterY_global_px", + "cell_id" ), cores = determine_cores(), verbose = NULL From 99a5a15bb77614d0a94c54405b1a5899945f2c0f Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 4 Apr 2024 11:02:41 -0400 Subject: [PATCH 034/150] fixes for cosmx importer - also WIP with Xenium importer --- NAMESPACE | 1 + R/classes.R | 442 ++++++++++++++++-- R/convenience.R | 398 +++++++++++++++- man/createGiottoCosMxObject.Rd | 6 +- man/createGiottoMerscopeObject.Rd | 6 +- man/createGiottoXeniumObject.Rd | 6 +- man/dot-createGiottoCosMxObject_aggregate.Rd | 6 +- man/dot-createGiottoCosMxObject_all.Rd | 6 +- ...dot-createGiottoCosMxObject_subcellular.Rd | 6 +- man/dot-createGiottoXeniumObject_aggregate.Rd | 6 +- ...ot-createGiottoXeniumObject_subcellular.Rd | 6 +- man/dot-load_cosmx_folder_aggregate.Rd | 3 +- man/dot-load_cosmx_folder_subcellular.Rd | 3 +- man/load_merscope_folder.Rd | 3 +- man/load_xenium_folder.Rd | 3 +- 15 files changed, 843 insertions(+), 58 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c3fc035b8..299b4a40c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand S3method(.DollarNames,CosmxReader) +S3method(.DollarNames,XeniumReader) export("%>%") export("activeFeatType<-") export("activeSpatUnit<-") diff --git a/R/classes.R b/R/classes.R index 806d732b1..ccc3e2843 100644 --- a/R/classes.R +++ b/R/classes.R @@ -22,6 +22,54 @@ abbrev_path <- function(path, head = 15, tail = 35L) { } } +.filetype_prints <- function(x, pre) { + nftype <- length(x@filetype) + datatype <- format(names(x@filetype)) + pre_ftypes <- format(c(pre, rep("", nftype - 1L))) + cat(sprintf("%s %s -- %s\n", + pre_ftypes, + datatype, + x@filetype), + sep = "") +} + +# pattern - list.files pattern to use to search for specific files/dirs +# warn - whether to warn when a pattern does not find any files +# first - whether to only return the first match +.detect_in_dir <- function( + path, pattern, platform, warn = TRUE, first = TRUE +) { + f <- list.files(path, pattern = pattern, full.names = TRUE) + lenf <- length(f) + if (lenf == 1L) return(f) # one match + else if (lenf == 0L) { # no matches + if (warn) { + warning(sprintf( + "%s not detected in %s directory", + pattern, + platform + ), + call. = FALSE) + } + return(NULL) + } + + # more than one match + if (first) { + return(f[[1L]]) + } else { + return(f) + } +} + + + + + + + + + # Xenium #### @@ -29,21 +77,26 @@ setClass( "XeniumReader", slots = list( xenium_dir = "character", - format = "character", - fovs = "numeric", + filetype = "list", qv = "ANY", calls = "list" ), prototype = list( - format = "parquet", + filetype = list( + transcripts = "parquet", + boundaries = "parquet", + expression = "h5", + cell_meta = "parquet" + ), qv = 20, calls = list() ) ) +# * show #### setMethod("show", signature("XeniumReader"), function(object) { cat(sprintf("Giotto <%s>\n", "XeniumReader")) - print_slots <- c("dir", "format", "fovs", "qv_cutoff", "funs") + print_slots <- c("dir", "filetype", "qv_cutoff", "funs") pre <- sprintf( "%s :", format(print_slots) ) @@ -58,29 +111,347 @@ setMethod("show", signature("XeniumReader"), function(object) { cat(pre["dir"], "\n") } - # format - form <- object@format - cat(pre["format"], paste(form, collapse = ", "), "\n") - - # fovs - fovs <- object@fovs %none% "all" - cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") - # qv qv <- object@qv cat(pre["qv_cutoff"], paste(qv, collapse = ", "), "\n") + # filetype + .filetype_prints(x = object, pre = pre["filetype"]) + # funs - .fun_prints(x = object, pre = pre["fun"]) + .reader_fun_prints(x = object, pre = pre["funs"]) }) +# * print #### +setMethod("print", signature("XeniumReader"), function(x, ...) show(x)) + +# * init #### +setMethod( + "initialize", signature("XeniumReader"), + function( + .Object, + xenium_dir, + filetype, + qv_cutoff + ) { + .Object <- callNextMethod(.Object) + + # provided params (if any) + if (!missing(xenium_dir)) { + checkmate::assert_directory_exists(xenium_dir) + .Object@xenium_dir <- xenium_dir + } + if (!missing(filetype)) { + .Object@filetype <- filetype + } + if (!missing(qv_cutoff)) { + .Object@qv <- qv_cutoff + } + + + # check filetype + ftype_data <- c("transcripts", "boundaries", "expression", "cell_meta") + if (!all(ftype_data %in% names(.Object@filetype))) { + stop(wrap_txt("`$filetype` must have entries for each of:\n", + paste(ftype_data, collapse = ", "))) + } + + ftype <- .Object@filetype + ft_tab <- c("csv", "parquet") + ft_exp <- c("h5", "mtx", "zarr") + if (!ftype$transcripts %in% ft_tab) { + stop(wrap_txt("`$filetype$transcripts` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$boundaries %in% ft_tab) { + stop(wrap_txt("`$filetype$boundaries` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$cell_meta %in% ft_tab) { + stop(wrap_txt("`$filetype$cell_meta` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$expression %in% ft_exp) { + stop(wrap_txt("`$filetype$expression` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + + + # detect paths and subdirs + p <- .Object@xenium_dir + .xenium_detect <- function(pattern, ...) { + .detect_in_dir( + pattern = pattern, ..., + path = p, platform = "Xenium", + ) + } + + cell_meta_path <- .xenium_detect("cells", first = FALSE) + panel_meta_path <- .xenium_detect("panel") # json + experiment_info_path <- .xenium_detect(".xenium") # json + + # 3D stack - DAPI + img_path <- .xenium_detect("morphology.", warn = FALSE) + # 2D fusion images + # - DAPI + # - stainings for multimodal segmentation + img_focus_path <- .xenium_detect("morphology_focus", warn = FALSE) + # Maximum intensity projection (MIP) of the morphology image. + # (Xenium Outputs v1.0 - 1.9. only) + img_mip_path <- .xenium_detect("morphology_mip", warn = FALSE) + + tx_path <- .xenium_detect("transcripts", first = FALSE) + cell_bound_path <- .xenium_detect("cell_bound", first = FALSE) + nuc_bound_path <- .xenium_detect("nucleus_bound", first = FALSE) + + expr_path <- .xenium_detect("cell_feature_matrix", first = FALSE) + + .xenium_ftype <- function(paths, ftype) { + paths[grepl(pattern = paste0(".", ftype), x = paths)] + } + + + # select file formats based on reader settings + tx_path <- .xenium_ftype(tx_path, ftype$transcripts) + cell_bound_path <- .xenium_ftype(cell_bound_path, ftype$boundaries) + nuc_bound_path <- .xenium_ftype(nuc_bound_path, ftype$boundaries) + expr_path <- .xenium_ftype(expr_path, ftype$expression) + cell_meta_path <- .xenium_ftype(cell_meta_path, ftype$cell_meta) + + + # transcripts load call + tx_fun <- function( + path = tx_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + dropcols = c(), + qv_threshold = .Object@qv, + cores = determine_cores(), + verbose = NULL + ) { + .xenium_transcript( + path = path, + feat_type = feat_type, + split_keyword = split_keyword, + dropcols = dropcols, + qv_threshold = qv_threshold, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_transcripts <- tx_fun + + # load polys call + poly_fun <- function( + path = cell_bound_path, + name = "cell", + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL + ) { + .xenium_poly( + path = path, + name = name, + calc_centroids = calc_centroids, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_polys <- poly_fun + + # load cellmeta + cmeta_fun <- function( + path = cell_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + .xenium_cellmeta( + path = path, + dropcols = dropcols, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_cellmeta <- cmeta_fun + + # load featmeta + fmeta_fun <- function( + path = panel_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + .xenium_featmeta( + path = path, + gene_ids, + dropcols = dropcols, + verbose = verbose + ) + } + .Object@calls$load_featmeta <- fmeta_fun + + # load expression call + expr_fun <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + .xenium_expression( + path = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_expression <- expr_fun + + # load image call + + + + + # create giotto object call + gobject_fun <- function( + transcript_path = tx_path, + load_bounds = list( + cell = "cell", + nucleus = "nucleus" + ), + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + load_images = list( + morphology = "focus", + ), + load_expression = FALSE, + load_cellmeta = FALSE + ) { + load_expression <- as.logical(load_expression) + load_cellmeta <- as.logical(load_cellmeta) + + if (!is.null(load_images)) { + checkmate::assert_list(load_images) + if (is.null(names(load_images))) { + stop("Images paths provided to 'load_images' must be named") + } + } + if (!is.null(load_bounds)) { + checkmate::assert_list(load_bounds) + if (is.null(names(load_bounds))) { + stop("bounds paths provided to 'load_bounds' must be named") + } + } + + + + funs <- .Object@calls + + # init gobject + g <- giotto() + + + # transcripts + tx_list <- funs$load_transcripts( + path = transcript_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + + + # polys + if (!is.null(load_bounds)) { + # replace convenient shortnames + load_bounds[load_bounds == "cell"] <- cell_bound_path + load_bounds[load_bounds == "nucleus"] <- nuc_bound_path + + blist <- list() + bnames <- names(load_bounds) + for (b_i in seq_along(load_bounds)) { + b <- funs$load_polys( + path = load_bounds[[b_i]], + name = bnames[[b_i]] + ) + blist <- c(blist, b) + } + for (gpoly_i in seq_along(blist)) { + g <- setGiotto(g, blist[[gpoly_i]]) + } + } + + + # feat metadata + fx <- funs$load_featmeta( + path = + ) + + + # expression + if (load_expression) { + + } + + + # cell metadata + if (load_cellmeta) { + + } + + + # images + if (!is.null(load_images)) { + # replace convenient shortnames + load_images[load_images == "focus"] <- img_focus_path + } + + + + + } + .Object@calls$create_gobject <- gobject_fun + + + return(.Object) + } +) + + # access #### #' @export setMethod("$", signature("XeniumReader"), function(x, name) { - basic_info <- c("xenium_dir", "format", "fovs", "qv") + basic_info <- c("xenium_dir", "filetype", "qv") if (name %in% basic_info) return(methods::slot(x, name)) return(x@calls[[name]]) @@ -88,7 +459,7 @@ setMethod("$", signature("XeniumReader"), function(x, name) { #' @export setMethod("$<-", signature("XeniumReader"), function(x, name, value) { - basic_info <- c("xenium_dir", "format", "fovs", "qv") + basic_info <- c("xenium_dir", "filetype", "qv") if (name %in% basic_info) { methods::slot(x, name) <- value return(initialize(x)) @@ -100,7 +471,7 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { #' @export `.DollarNames.XeniumReader` <- function(x, pattern) { - dn <- c("xenium_dir", "format", "fovs", "qv") + dn <- c("xenium_dir", "filetype", "qv") if (length(methods::slot(x, "calls")) > 0) { dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) } @@ -110,6 +481,8 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { + + # CosMx #### setClass( @@ -132,6 +505,7 @@ setClass( ) ) +# * show #### setMethod("show", signature("CosmxReader"), function(object) { cat(sprintf("Giotto <%s>\n", "CosmxReader")) print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") @@ -167,11 +541,13 @@ setMethod("show", signature("CosmxReader"), function(object) { cat(pre["offsets"], offs_status, "\n") # funs - .fun_prints(x = object, pre = pre["fun"]) + .fun_prints(x = object, pre = pre["funs"]) }) +# * print #### setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) +# * plot #### setMethod( "plot", signature(x = "CosmxReader", y = "missing"), function(x, cex = 0.8, ...) { @@ -258,6 +634,7 @@ importCosMx <- function( do.call(new, args = a) } +# * init #### setMethod("initialize", signature("CosmxReader"), function( .Object, cosmx_dir, slide, fovs, micron, px2mm ) { @@ -287,25 +664,18 @@ setMethod("initialize", signature("CosmxReader"), function( # detect paths and subdirs p <- .Object@cosmx_dir - .detect_in_dir <- function(pattern) { - f <- list.files(p, pattern = pattern, full.names = TRUE) - lenf <- length(f) - if (lenf == 1L) return(f) - else if (lenf == 0L) { - warning(pattern, " not detected in CosMx directory", call. = FALSE) - return(NULL) - } - return(f[[1L]]) # more than one match + .cosmx_detect <- function(pattern) { + .detect_in_dir(pattern = pattern, path = p, platform = "CosMx") } - shifts_path <- .detect_in_dir("fov_positions_file") - meta_path <- .detect_in_dir("metadata_file") - tx_path <- .detect_in_dir("tx_file") - mask_dir <- .detect_in_dir("CellLabels") - expr_path <- .detect_in_dir("exprMat_file") - composite_img_dir <- .detect_in_dir("CellComposite") - overlay_img_dir <- .detect_in_dir("CellOverlay") - compart_img_dir <- .detect_in_dir("CompartmentLabels") + shifts_path <- .cosmx_detect("fov_positions_file") + meta_path <- .cosmx_detect("metadata_file") + tx_path <- .cosmx_detect("tx_file") + mask_dir <- .cosmx_detect("CellLabels") + expr_path <- .cosmx_detect("exprMat_file") + composite_img_dir <- .cosmx_detect("CellComposite") + overlay_img_dir <- .cosmx_detect("CellOverlay") + compart_img_dir <- .cosmx_detect("CompartmentLabels") # load fov offsets through one of several methods @@ -387,6 +757,7 @@ setMethod("initialize", signature("CosmxReader"), function( flip_horizontal = FALSE, shift_vertical_step = FALSE, shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, verbose = NULL ) { .cosmx_poly( @@ -396,6 +767,7 @@ setMethod("initialize", signature("CosmxReader"), function( flip_horizontal = flip_horizontal, shift_vertical_step = shift_vertical_step, shift_horizontal_step = shift_horizontal_step, + remove_background_polygon = remove_background_polygon, micron = .Object@micron, px2mm = .Object@px2mm, offsets = .Object@offsets, @@ -577,7 +949,7 @@ setMethod("initialize", signature("CosmxReader"), function( -# access #### +# * access #### #' @export setMethod("$", signature("CosmxReader"), function(x, name) { diff --git a/R/convenience.R b/R/convenience.R index e04aa6a16..42ed7781f 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2483,6 +2483,7 @@ NULL flip_horizontal = FALSE, shift_vertical_step = FALSE, shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, micron = FALSE, px2mm = 0.12028, offsets, @@ -2503,9 +2504,10 @@ NULL mask_params <- list( # static params mask_method = "multiple", - # if removal is TRUE, a real cell segmentation gets removed. - # There is no background poly for nanostring masks - remove_background_polygon = FALSE, + # A background poly for nanostring masks sometimes shows up. + # removal works by looking for any polys with size more than 90% of the + # total FOV along either x or y axis + remove_background_polygon = remove_background_polygon, fill_holes = TRUE, calc_centroids = TRUE, remove_unvalid_polygons = TRUE, @@ -2911,6 +2913,396 @@ NULL ## Xenium #### + +.xenium_transcript <- function( + path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to tx file provided or auto-detected" + ), call. = FALSE) + } + + checkmate::assert_file_exists(path) + e <- file_extension(path) %>% head(1L) %>% tolower() + vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) + + # read in + a <- list( + path = path, + dropcols = dropcols, + qv_threshold = qv_threshold, + verbose = verbose + ) + vmsg("Loading transcript level info...", .v = verbose) + tx <- switch(e, + "csv" = do.call(.xenium_transcript_csv, + args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_transcript_parquet, args = a), + "zarr" = stop('zarr not yet supported') + ) + + # create gpoints + gpointslist <- createGiottoPoints( + x = tx, + feat_type = feat_type, + split_keyword = split_keyword + ) + + if (inherits(gpointslist, "list")) { + gpointslist <- list(gpointslist) + } + + return(gpointslist) +} + + +.xenium_transcript_csv <- function( + path, + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL + ) { + tx_dt <- data.table::fread( + path, nThread = cores, + colClasses = c(transcript_id = "character"), + drop = dropcols + ) + data.table::setnames( + x = tx_dt, + old = c('feature_name', 'x_location', 'y_location'), + new = c('feat_ID', 'x', 'y') + ) + + # qv filtering + if (!is.null(qv_threshold)) { + n_before <- tx_dt[,.N] + tx_dt <- tx_dt[qv >= qv_threshold] + n_after <- tx_dt[,.N] + + vmsg( + .v = verbose, + sprintf( + "QV cutoff: %d\n Feature points removed: %d, out of %d", + qv_threshold, + n_before - n_after, + n_before + ) + ) + } + + return(tx_dt) +} + +.xenium_transcript_parquet <- function( + path, + dropcols = c(), + qv_threshold = 20, + verbose = NULL + ) { + package_check( + pkg_name = c("arrow", "dplyr"), + repository = c("CRAN:arrow", "CRAN:dplyr") + ) + + tx_arrow <- arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(transcript_id = cast(transcript_id, arrow::string())) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::mutate(feature_name = cast(feature_name, arrow::string())) %>% + dplyr::select(-dplyr::any_of(dropcols)) + + # qv filtering + if (!is.null(qv_threshold)) { + .nr <- function(x) { + dplyr::tally(x) %>% dplyr::collect() %>% as.numeric() + } + n_before <- .nr(tx_arrow) + tx_arrow <- dplyr::filter(tx_arrow, qv > qv_threshold) + n_after <- .nr(tx_arrow) + + vmsg( + .v = verbose, + sprintf( + "QV cutoff: %d\n Feature points removed: %d, out of %d", + qv_threshold, + n_before - n_after, + n_before + ) + ) + } + + # convert to data.table + tx_dt <- as.data.frame(tx_arrow) %>% data.table::setDT() + data.table::setnames( + x = tx_dt, + old = c('feature_name', 'x_location', 'y_location'), + new = c('feat_ID', 'x', 'y') + ) + return(tx_dt) +} + +.xenium_poly <- function( + path, + name = "cell", + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL + ) { + checkmate::assert_file_exists(path) + checkmate::assert_character(name, len = 1L) + + e <- file_extension(path) %>% head(1L) %>% tolower() + + a <- list(path = path) + vmsg("Loading boundary info...", .v = verbose) + polys <- switch(e, + "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_poly_parquet, args = a), + "zarr" = stop("zarr not yet supported") + ) + + # create gpolys + verbose <- verbose %null% FALSE + gpolys <- createGiottoPolygon( + x = polys, + name = name, + calc_centroids = calc_centroids, + verbose = verbose + ) + return(gpolys) +} + +.xenium_poly_csv <- function(path, cores = determine_cores()) { + data.table::fread( + path, nThread = cores, + colClasses = c(cell_id = "character") + ) +} + +.xenium_poly_parquet <- function(path) { + package_check( + pkg_name = c("arrow", "dplyr"), + repository = c("CRAN:arrow", "CRAN:dplyr") + ) + # read & convert to DT + arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() +} + +.xenium_cellmeta <- function( + path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + if (missing(path)) { + stop(wrap_txt( + "No path to metadata file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + + e <- file_extension(path) %>% head(1L) %>% tolower() + a <- list(path = path, dropcols = dropcols) + vmsg('Loading cell metadata...', .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, path) + verbose <- verbose %null% TRUE + cx <- switch(e, + "csv" = do.call(.xenium_cellmeta_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_cellmeta_parquet, args = a) + ) + + cx <- createCellMetaObj( + metadata = cx, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + return(cx) +} + +.xenium_cellmeta_csv <- function( + path, dropcols = c(), cores = determine_cores() +) { + data.table::fread(path, nThread = cores, drop = dropcols) +} + +.xenium_cellmeta_parquet <- function(path, dropcols = c()) { + arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::select(-dplyr::any_of(dropcols)) %>% + as.data.frame() %>% + data.table::setDT() +} + +.xenium_featmeta <- function( + path, + gene_ids = "symbols", + dropcols = c(), + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to panel metadata file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_ext <- GiottoUtils::file_extension(path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json( + path = path, gene_ids = gene_ids + ) + } else { + feat_meta <- data.table::fread(path, nThread = cores) + colnames(feat_meta)[[1]] <- 'feat_ID' + } + + dropcols <- dropcols[dropcols %in% colnames(feat_meta)] + feat_meta[, (dropcols) := NULL] # remove dropcols + + fx <- createFeatMetaObj( + metadata = feat_meta, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + + return(fx) +} + +.xenium_expression <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to expression dir (mtx) or file (h5) provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + a <- list( + path = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) + + if (checkmate::test_directory_exists(path)) { + e <- "mtx" # assume mtx dir + # zarr can also be unzipped into a dir, but zarr implementation with + # 32bit UINT support is not available in R yet (needed for cell_IDs). + } else { + e <- file_extension(path) %>% head(1L) %>% tolower() + } + + vmsg("Loading 10x pre-aggregated expression...", .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, path) + verbose <- verbose %null% TRUE + ex <- switch(e, + "mtx" = do.call(.xenium_cellmeta_csv, args = a), + "h5" = do.call(.xenium_cellmeta_parquet, args = a) + ) + + eo <- createExprObj( + expression_data = ex, + name = "raw", + spat_unit = "cell", + feat_type = "rna", + provenance = "cell" + ) + return(eo) +} + +.xenium_expression_h5 <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE +) { + get10Xmatrix_h5( + path_to_data = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) +} + +.xenium_expression_mtx <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE +) { + gene_ids <- switch(gene_ids, + "ensembl" = 1, + "symbols" = 2 + ) + get10Xmatrix( + path_to_data = path, + gene_column_index = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) +} + +.xenium_image <- function( + path, + name = "image", + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + affine = NULL, + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to image file to load provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + + vmsg(.v = verbose, sprintf("loading image as '%s'", name)) + vmsg(.v = verbose, .is_debug = TRUE, path) + vmsg( + .v = verbose, .is_debug = TRUE, + sprintf("negative_y: %s\nflip_vertical: %s\nflip_horizontal: %s", + negative_y, flip_vertical, flip_horizontal), + .prefix = "" + ) + + verbose <- verbose %null% TRUE + + # TODO +} + + + #' @title Load xenium data from folder #' @name load_xenium_folder #' @param path_list list of full filepaths from .read_xenium_folder diff --git a/man/createGiottoCosMxObject.Rd b/man/createGiottoCosMxObject.Rd index 0362f0472..cbda75fe2 100644 --- a/man/createGiottoCosMxObject.Rd +++ b/man/createGiottoCosMxObject.Rd @@ -31,9 +31,11 @@ coordinates only. \code{'aggregate'} loads the provided aggregated expression ma \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/createGiottoMerscopeObject.Rd b/man/createGiottoMerscopeObject.Rd index 960cbeeba..902b18197 100644 --- a/man/createGiottoMerscopeObject.Rd +++ b/man/createGiottoMerscopeObject.Rd @@ -51,9 +51,11 @@ NULL loads all FOVs (very slow)} \item{aggregate_stack_param}{params to pass to \code{\link{aggregateStacks}}} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 1c04cf0b8..7200c3346 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -46,9 +46,11 @@ a subcellular transcript detection (default = 20)} \item{key_list}{(advanced) list of grep-based keywords to split the subcellular feature detections by feature type. See details} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/dot-createGiottoCosMxObject_aggregate.Rd b/man/dot-createGiottoCosMxObject_aggregate.Rd index f47f300db..1994734d6 100644 --- a/man/dot-createGiottoCosMxObject_aggregate.Rd +++ b/man/dot-createGiottoCosMxObject_aggregate.Rd @@ -12,11 +12,13 @@ ) } \arguments{ -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} } \description{ Load and create a CosMx Giotto object from aggregate info diff --git a/man/dot-createGiottoCosMxObject_all.Rd b/man/dot-createGiottoCosMxObject_all.Rd index 618a58814..df88e36c9 100644 --- a/man/dot-createGiottoCosMxObject_all.Rd +++ b/man/dot-createGiottoCosMxObject_all.Rd @@ -27,11 +27,13 @@ \item{remove_unvalid_polygons}{remove unvalid polygons (default: TRUE)} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} } \description{ Load and create a CosMx Giotto object from subcellular and aggregate info diff --git a/man/dot-createGiottoCosMxObject_subcellular.Rd b/man/dot-createGiottoCosMxObject_subcellular.Rd index 3e1c19c6b..c57640024 100644 --- a/man/dot-createGiottoCosMxObject_subcellular.Rd +++ b/man/dot-createGiottoCosMxObject_subcellular.Rd @@ -24,11 +24,13 @@ \item{remove_unvalid_polygons}{remove unvalid polygons (default: TRUE)} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} } \description{ Load and create a CosMx Giotto object from subcellular info diff --git a/man/dot-createGiottoXeniumObject_aggregate.Rd b/man/dot-createGiottoXeniumObject_aggregate.Rd index 4ed8cc25a..b77796716 100644 --- a/man/dot-createGiottoXeniumObject_aggregate.Rd +++ b/man/dot-createGiottoXeniumObject_aggregate.Rd @@ -14,9 +14,11 @@ \arguments{ \item{data_list}{list of data loaded by \code{.load_xenium_folder}} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/dot-createGiottoXeniumObject_subcellular.Rd b/man/dot-createGiottoXeniumObject_subcellular.Rd index 03886fd2a..5072b5eb2 100644 --- a/man/dot-createGiottoXeniumObject_subcellular.Rd +++ b/man/dot-createGiottoXeniumObject_subcellular.Rd @@ -22,9 +22,11 @@ into separate giottoPoints objects by feat_type} \item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/dot-load_cosmx_folder_aggregate.Rd b/man/dot-load_cosmx_folder_aggregate.Rd index 116cdeb8f..7ecd59161 100644 --- a/man/dot-load_cosmx_folder_aggregate.Rd +++ b/man/dot-load_cosmx_folder_aggregate.Rd @@ -7,7 +7,8 @@ .load_cosmx_folder_aggregate(dir_items, cores, verbose = TRUE) } \arguments{ -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/dot-load_cosmx_folder_subcellular.Rd b/man/dot-load_cosmx_folder_subcellular.Rd index aff95f1a3..d79067d4b 100644 --- a/man/dot-load_cosmx_folder_subcellular.Rd +++ b/man/dot-load_cosmx_folder_subcellular.Rd @@ -9,7 +9,8 @@ \arguments{ \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/load_merscope_folder.Rd b/man/load_merscope_folder.Rd index 42571cf25..6e9b9a54c 100644 --- a/man/load_merscope_folder.Rd +++ b/man/load_merscope_folder.Rd @@ -38,7 +38,8 @@ \item{data_to_use}{which of either the 'subcellular' or 'aggregate' information to use for object creation} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/load_xenium_folder.Rd b/man/load_xenium_folder.Rd index af16eff91..3ed9d4603 100644 --- a/man/load_xenium_folder.Rd +++ b/man/load_xenium_folder.Rd @@ -55,7 +55,8 @@ expression matrix} \item{gene_column_index}{which column from the features or genes .tsv file to use for row ids} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } From 61ec728a89cd5a27b43f94ae3de7ae1f7f9ad0ea Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 4 Apr 2024 11:35:18 -0400 Subject: [PATCH 035/150] Update classes.R --- R/classes.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/classes.R b/R/classes.R index ccc3e2843..16c81e5c8 100644 --- a/R/classes.R +++ b/R/classes.R @@ -541,7 +541,7 @@ setMethod("show", signature("CosmxReader"), function(object) { cat(pre["offsets"], offs_status, "\n") # funs - .fun_prints(x = object, pre = pre["funs"]) + .reader_fun_prints(x = object, pre = pre["funs"]) }) # * print #### @@ -710,9 +710,12 @@ setMethod("initialize", signature("CosmxReader"), function( else { pos <- data.table::data.table() warning(wrap_txt( - "fov_positions_file, tx_file, and metadata_file not auto detected. - One of these must be provided to infer FOV shifts" - )) + "NO FOV SHIFTS. + fov_positions_file, tx_file, and metadata_file not auto detected. + One of these must be provided to infer FOV shifts.\n + Alternatively, directly supply a data.table with: + fov(int), x(numeric), y(numeric) in px scaling to `$offsets`" + ), call. = FALSE) } .Object@offsets <- pos From e4e4c7134d9a605ff4ee01e8e5e672dc6d7a6e8e Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 4 Apr 2024 13:10:18 -0400 Subject: [PATCH 036/150] enh: add instructions param --- R/classes.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index 16c81e5c8..cde2954e7 100644 --- a/R/classes.R +++ b/R/classes.R @@ -861,7 +861,8 @@ setMethod("initialize", signature("CosmxReader"), function( overlay = "overlay" ), load_expression = FALSE, - load_cellmeta = FALSE + load_cellmeta = FALSE, + instructions = NULL ) { load_expression <- as.logical(load_expression) load_cellmeta <- as.logical(load_cellmeta) @@ -877,6 +878,9 @@ setMethod("initialize", signature("CosmxReader"), function( # init gobject g <- giotto() + if (!is.null(instructions)) { + instructions(g) <- instructions + } # transcripts tx_list <- funs$load_transcripts( From 8c79cb53d5b7fd750b93fdf765b9c3591c60553c Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Sun, 14 Apr 2024 11:01:11 -0400 Subject: [PATCH 037/150] fix: set default global option `dbmatrix_compute` to FALSE --- R/auxiliary_giotto.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 0bbfabbe2..b69775659 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -686,9 +686,7 @@ filterGiotto = function(gobject, # set global option options(giotto.dbmatrix_compute = FALSE) if not desired # see ?dplyr::compute() for more details if(inherits(raw_expr[], "dbMatrix")){ - compute_mat <- getOption("giotto.dbmatrix_compute", TRUE) - } else { - compute_mat <- FALSE + compute_mat <- getOption("giotto.dbmatrix_compute", FALSE) } ## 1. library size normalize From 4d3f7d82b947ba6746e4c4d83477e54fbaf1d351 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Wed, 1 May 2024 15:12:35 -0400 Subject: [PATCH 038/150] chore: compatibility for module changes - now require at least - _GiottoClass 0.3.0_ - _GiottoVisuals 0.2.0_ --- DESCRIPTION | 4 +- NAMESPACE | 281 +----------------------------------- R/suite_reexports.R | 12 +- man/crossSectionGenePlot.Rd | 2 +- man/reexports.Rd | 5 +- 5 files changed, 19 insertions(+), 285 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 33ca51146..0e534eb4a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,7 @@ Depends: utils (>= 3.5.0), R (>= 3.5.0), methods, - GiottoClass (>= 0.2.4) + GiottoClass (>= 0.3.0) Imports: BiocParallel, BiocSingular, @@ -43,7 +43,7 @@ Imports: ggplot2 (>= 3.1.1), ggrepel, GiottoUtils (>= 0.1.6), - GiottoVisuals (>= 0.1.1), + GiottoVisuals (>= 0.2.0), igraph (>= 1.2.4.1), jsonlite, limma, diff --git a/NAMESPACE b/NAMESPACE index 299b4a40c..cf064ec92 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -159,6 +159,7 @@ export(detectSpatialPatterns) export(dimCellPlot) export(dimCellPlot2D) export(dimFeatPlot2D) +export(dimFeatPlot3D) export(dimGenePlot3D) export(dimPlot) export(dimPlot2D) @@ -434,12 +435,14 @@ export(spatDeconvPlot) export(spatDimCellPlot) export(spatDimCellPlot2D) export(spatDimFeatPlot2D) +export(spatDimFeatPlot3D) export(spatDimGenePlot3D) export(spatDimPlot) export(spatDimPlot2D) export(spatDimPlot3D) export(spatFeatPlot2D) export(spatFeatPlot2D_single) +export(spatFeatPlot3D) export(spatGenePlot3D) export(spatIDs) export(spatInSituPlotDensity) @@ -500,284 +503,6 @@ import(methods) import(stats, except = density) import(utils) importClassesFrom(data.table,data.table) -importFrom(GiottoClass,"activeFeatType<-") -importFrom(GiottoClass,"activeSpatUnit<-") -importFrom(GiottoClass,"ext<-") -importFrom(GiottoClass,"featType<-") -importFrom(GiottoClass,"instructions<-") -importFrom(GiottoClass,"objName<-") -importFrom(GiottoClass,"prov<-") -importFrom(GiottoClass,"spatUnit<-") -importFrom(GiottoClass,activeFeatType) -importFrom(GiottoClass,activeSpatUnit) -importFrom(GiottoClass,addCellMetadata) -importFrom(GiottoClass,addFeatMetadata) -importFrom(GiottoClass,addGiottoImage) -importFrom(GiottoClass,addGiottoImageMG) -importFrom(GiottoClass,addGiottoLargeImage) -importFrom(GiottoClass,addGiottoPoints) -importFrom(GiottoClass,addGiottoPoints3D) -importFrom(GiottoClass,addGiottoPolygons) -importFrom(GiottoClass,addNetworkLayout) -importFrom(GiottoClass,addSpatialCentroidLocations) -importFrom(GiottoClass,addSpatialCentroidLocationsLayer) -importFrom(GiottoClass,aggregateStacks) -importFrom(GiottoClass,aggregateStacksExpression) -importFrom(GiottoClass,aggregateStacksLocations) -importFrom(GiottoClass,aggregateStacksPolygonOverlaps) -importFrom(GiottoClass,aggregateStacksPolygons) -importFrom(GiottoClass,anndataToGiotto) -importFrom(GiottoClass,annotateGiotto) -importFrom(GiottoClass,annotateSpatialGrid) -importFrom(GiottoClass,annotateSpatialNetwork) -importFrom(GiottoClass,as.points) -importFrom(GiottoClass,as.polygons) -importFrom(GiottoClass,as.sf) -importFrom(GiottoClass,as.sp) -importFrom(GiottoClass,as.stars) -importFrom(GiottoClass,as.terra) -importFrom(GiottoClass,calculateMetaTable) -importFrom(GiottoClass,calculateMetaTableCells) -importFrom(GiottoClass,calculateOverlap) -importFrom(GiottoClass,calculateOverlapParallel) -importFrom(GiottoClass,calculateOverlapPolygonImages) -importFrom(GiottoClass,calculateOverlapRaster) -importFrom(GiottoClass,calculateOverlapSerial) -importFrom(GiottoClass,calculateSpatCellMetadataProportions) -importFrom(GiottoClass,centroids) -importFrom(GiottoClass,changeGiottoInstructions) -importFrom(GiottoClass,changeImageBg) -importFrom(GiottoClass,checkGiottoEnvironment) -importFrom(GiottoClass,circleVertices) -importFrom(GiottoClass,combineCellData) -importFrom(GiottoClass,combineFeatureData) -importFrom(GiottoClass,combineFeatureOverlapData) -importFrom(GiottoClass,combineMetadata) -importFrom(GiottoClass,combineSpatialCellFeatureInfo) -importFrom(GiottoClass,combineSpatialCellMetadataInfo) -importFrom(GiottoClass,combineToMultiPolygon) -importFrom(GiottoClass,convertGiottoLargeImageToMG) -importFrom(GiottoClass,copy) -importFrom(GiottoClass,createBentoAdata) -importFrom(GiottoClass,createCellMetaObj) -importFrom(GiottoClass,createDimObj) -importFrom(GiottoClass,createExprObj) -importFrom(GiottoClass,createFeatMetaObj) -importFrom(GiottoClass,createGiottoImage) -importFrom(GiottoClass,createGiottoInstructions) -importFrom(GiottoClass,createGiottoLargeImage) -importFrom(GiottoClass,createGiottoLargeImageList) -importFrom(GiottoClass,createGiottoObject) -importFrom(GiottoClass,createGiottoObjectSubcellular) -importFrom(GiottoClass,createGiottoPoints) -importFrom(GiottoClass,createGiottoPolygon) -importFrom(GiottoClass,createGiottoPolygonsFromDfr) -importFrom(GiottoClass,createGiottoPolygonsFromGeoJSON) -importFrom(GiottoClass,createGiottoPolygonsFromMask) -importFrom(GiottoClass,createMetafeats) -importFrom(GiottoClass,createNearestNetObj) -importFrom(GiottoClass,createNearestNetwork) -importFrom(GiottoClass,createSpatEnrObj) -importFrom(GiottoClass,createSpatLocsObj) -importFrom(GiottoClass,createSpatNetObj) -importFrom(GiottoClass,createSpatialDefaultGrid) -importFrom(GiottoClass,createSpatialDelaunayNetwork) -importFrom(GiottoClass,createSpatialFeaturesKNNnetwork) -importFrom(GiottoClass,createSpatialGrid) -importFrom(GiottoClass,createSpatialKNNnetwork) -importFrom(GiottoClass,createSpatialNetwork) -importFrom(GiottoClass,createSpatialWeightMatrix) -importFrom(GiottoClass,crop) -importFrom(GiottoClass,cropGiottoLargeImage) -importFrom(GiottoClass,density) -importFrom(GiottoClass,distGiottoImage) -importFrom(GiottoClass,estimateImageBg) -importFrom(GiottoClass,ext) -importFrom(GiottoClass,fDataDT) -importFrom(GiottoClass,featIDs) -importFrom(GiottoClass,featType) -importFrom(GiottoClass,featureNetwork) -importFrom(GiottoClass,flip) -importFrom(GiottoClass,gefToGiotto) -importFrom(GiottoClass,getCellMetadata) -importFrom(GiottoClass,getDimReduction) -importFrom(GiottoClass,getExpression) -importFrom(GiottoClass,getFeatureInfo) -importFrom(GiottoClass,getFeatureMetadata) -importFrom(GiottoClass,getGiottoImage) -importFrom(GiottoClass,getMultiomics) -importFrom(GiottoClass,getNearestNetwork) -importFrom(GiottoClass,getPolygonInfo) -importFrom(GiottoClass,getSpatialEnrichment) -importFrom(GiottoClass,getSpatialGrid) -importFrom(GiottoClass,getSpatialLocations) -importFrom(GiottoClass,getSpatialNetwork) -importFrom(GiottoClass,giotto) -importFrom(GiottoClass,giottoImage) -importFrom(GiottoClass,giottoLargeImage) -importFrom(GiottoClass,giottoMasterToSuite) -importFrom(GiottoClass,giottoPoints) -importFrom(GiottoClass,giottoPolygon) -importFrom(GiottoClass,giottoToAnnData) -importFrom(GiottoClass,giottoToSeurat) -importFrom(GiottoClass,giottoToSeuratV4) -importFrom(GiottoClass,giottoToSeuratV5) -importFrom(GiottoClass,giottoToSpatialExperiment) -importFrom(GiottoClass,hexVertices) -importFrom(GiottoClass,hist) -importFrom(GiottoClass,installGiottoEnvironment) -importFrom(GiottoClass,instructions) -importFrom(GiottoClass,joinGiottoObjects) -importFrom(GiottoClass,loadGiotto) -importFrom(GiottoClass,makePseudoVisium) -importFrom(GiottoClass,objHistory) -importFrom(GiottoClass,objName) -importFrom(GiottoClass,orthoGrid) -importFrom(GiottoClass,overlapImagesToMatrix) -importFrom(GiottoClass,overlapToMatrix) -importFrom(GiottoClass,overlapToMatrixMultiPoly) -importFrom(GiottoClass,overlaps) -importFrom(GiottoClass,pDataDT) -importFrom(GiottoClass,plotGiottoImage) -importFrom(GiottoClass,polyStamp) -importFrom(GiottoClass,prov) -importFrom(GiottoClass,readCellMetadata) -importFrom(GiottoClass,readDimReducData) -importFrom(GiottoClass,readExprData) -importFrom(GiottoClass,readExprMatrix) -importFrom(GiottoClass,readFeatData) -importFrom(GiottoClass,readFeatMetadata) -importFrom(GiottoClass,readGiottoInstructions) -importFrom(GiottoClass,readNearestNetData) -importFrom(GiottoClass,readPolygonData) -importFrom(GiottoClass,readSpatEnrichData) -importFrom(GiottoClass,readSpatLocsData) -importFrom(GiottoClass,readSpatNetData) -importFrom(GiottoClass,reconnectGiottoImage) -importFrom(GiottoClass,rectVertices) -importFrom(GiottoClass,removeCellAnnotation) -importFrom(GiottoClass,removeFeatAnnotation) -importFrom(GiottoClass,removeGiottoEnvironment) -importFrom(GiottoClass,replaceGiottoInstructions) -importFrom(GiottoClass,rescale) -importFrom(GiottoClass,rescalePolygons) -importFrom(GiottoClass,saveGiotto) -importFrom(GiottoClass,setCellMetadata) -importFrom(GiottoClass,setDimReduction) -importFrom(GiottoClass,setExpression) -importFrom(GiottoClass,setFeatureInfo) -importFrom(GiottoClass,setFeatureMetadata) -importFrom(GiottoClass,setGiotto) -importFrom(GiottoClass,setGiottoImage) -importFrom(GiottoClass,setMultiomics) -importFrom(GiottoClass,setNearestNetwork) -importFrom(GiottoClass,setPolygonInfo) -importFrom(GiottoClass,setSpatialEnrichment) -importFrom(GiottoClass,setSpatialGrid) -importFrom(GiottoClass,setSpatialLocations) -importFrom(GiottoClass,setSpatialNetwork) -importFrom(GiottoClass,seuratToGiotto) -importFrom(GiottoClass,seuratToGiottoV4) -importFrom(GiottoClass,seuratToGiottoV5) -importFrom(GiottoClass,showGiottoCellMetadata) -importFrom(GiottoClass,showGiottoDimRed) -importFrom(GiottoClass,showGiottoExpression) -importFrom(GiottoClass,showGiottoFeatInfo) -importFrom(GiottoClass,showGiottoFeatMetadata) -importFrom(GiottoClass,showGiottoImageNames) -importFrom(GiottoClass,showGiottoInstructions) -importFrom(GiottoClass,showGiottoNearestNetworks) -importFrom(GiottoClass,showGiottoSpatEnrichments) -importFrom(GiottoClass,showGiottoSpatGrids) -importFrom(GiottoClass,showGiottoSpatLocs) -importFrom(GiottoClass,showGiottoSpatNetworks) -importFrom(GiottoClass,showGiottoSpatialInfo) -importFrom(GiottoClass,showProcessingSteps) -importFrom(GiottoClass,smoothGiottoPolygons) -importFrom(GiottoClass,spatIDs) -importFrom(GiottoClass,spatQueryGiottoPolygons) -importFrom(GiottoClass,spatShift) -importFrom(GiottoClass,spatUnit) -importFrom(GiottoClass,spatialExperimentToGiotto) -importFrom(GiottoClass,spin) -importFrom(GiottoClass,stitchFieldCoordinates) -importFrom(GiottoClass,stitchGiottoLargeImage) -importFrom(GiottoClass,subsetGiotto) -importFrom(GiottoClass,subsetGiottoLocs) -importFrom(GiottoClass,subsetGiottoLocsMulti) -importFrom(GiottoClass,subsetGiottoLocsSubcellular) -importFrom(GiottoClass,tessellate) -importFrom(GiottoClass,triGrid) -importFrom(GiottoClass,updateGiottoImage) -importFrom(GiottoClass,updateGiottoImageMG) -importFrom(GiottoClass,updateGiottoLargeImage) -importFrom(GiottoClass,updateGiottoObject) -importFrom(GiottoClass,updateGiottoPointsObject) -importFrom(GiottoClass,updateGiottoPolygonObject) -importFrom(GiottoClass,vect) -importFrom(GiottoClass,wrap) -importFrom(GiottoClass,writeGiottoLargeImage) -importFrom(GiottoUtils,"%>%") -importFrom(GiottoUtils,getDistinctColors) -importFrom(GiottoUtils,getRainbowColors) -importFrom(GiottoVisuals,"sankeyLabel<-") -importFrom(GiottoVisuals,"sankeyRelate<-") -importFrom(GiottoVisuals,addGiottoImageToSpatPlot) -importFrom(GiottoVisuals,dimCellPlot) -importFrom(GiottoVisuals,dimCellPlot2D) -importFrom(GiottoVisuals,dimFeatPlot2D) -importFrom(GiottoVisuals,dimGenePlot3D) -importFrom(GiottoVisuals,dimPlot) -importFrom(GiottoVisuals,dimPlot2D) -importFrom(GiottoVisuals,dimPlot3D) -importFrom(GiottoVisuals,getColors) -importFrom(GiottoVisuals,giottoSankeyPlan) -importFrom(GiottoVisuals,plotHeatmap) -importFrom(GiottoVisuals,plotMetaDataCellsHeatmap) -importFrom(GiottoVisuals,plotMetaDataHeatmap) -importFrom(GiottoVisuals,plotPCA) -importFrom(GiottoVisuals,plotPCA_2D) -importFrom(GiottoVisuals,plotPCA_3D) -importFrom(GiottoVisuals,plotStatDelaunayNetwork) -importFrom(GiottoVisuals,plotTSNE) -importFrom(GiottoVisuals,plotTSNE_2D) -importFrom(GiottoVisuals,plotTSNE_3D) -importFrom(GiottoVisuals,plotUMAP) -importFrom(GiottoVisuals,plotUMAP_2D) -importFrom(GiottoVisuals,plotUMAP_3D) -importFrom(GiottoVisuals,sankeyLabel) -importFrom(GiottoVisuals,sankeyPlot) -importFrom(GiottoVisuals,sankeyRelate) -importFrom(GiottoVisuals,sankeySet) -importFrom(GiottoVisuals,sankeySetAddresses) -importFrom(GiottoVisuals,showClusterDendrogram) -importFrom(GiottoVisuals,showClusterHeatmap) -importFrom(GiottoVisuals,showColorInstructions) -importFrom(GiottoVisuals,showSaveParameters) -importFrom(GiottoVisuals,spatCellPlot) -importFrom(GiottoVisuals,spatCellPlot2D) -importFrom(GiottoVisuals,spatDeconvPlot) -importFrom(GiottoVisuals,spatDimCellPlot) -importFrom(GiottoVisuals,spatDimCellPlot2D) -importFrom(GiottoVisuals,spatDimFeatPlot2D) -importFrom(GiottoVisuals,spatDimGenePlot3D) -importFrom(GiottoVisuals,spatDimPlot) -importFrom(GiottoVisuals,spatDimPlot2D) -importFrom(GiottoVisuals,spatDimPlot3D) -importFrom(GiottoVisuals,spatFeatPlot2D) -importFrom(GiottoVisuals,spatFeatPlot2D_single) -importFrom(GiottoVisuals,spatGenePlot3D) -importFrom(GiottoVisuals,spatInSituPlotDensity) -importFrom(GiottoVisuals,spatInSituPlotHex) -importFrom(GiottoVisuals,spatInSituPlotPoints) -importFrom(GiottoVisuals,spatNetwDistributions) -importFrom(GiottoVisuals,spatNetwDistributionsDistance) -importFrom(GiottoVisuals,spatNetwDistributionsKneighbors) -importFrom(GiottoVisuals,spatPlot) -importFrom(GiottoVisuals,spatPlot2D) -importFrom(GiottoVisuals,spatPlot3D) -importFrom(GiottoVisuals,subsetSankeySet) -importFrom(GiottoVisuals,violinPlot) importFrom(data.table,data.table) importFrom(data.table,frank) importFrom(data.table,fread) diff --git a/R/suite_reexports.R b/R/suite_reexports.R index a7d286608..fc2cf415e 100644 --- a/R/suite_reexports.R +++ b/R/suite_reexports.R @@ -477,7 +477,9 @@ GiottoVisuals::dimCellPlot2D #' @export GiottoVisuals::dimFeatPlot2D #' @export -GiottoVisuals::dimGenePlot3D +GiottoVisuals::dimGenePlot3D # TODO remove in next version +#' @export +GiottoVisuals::dimFeatPlot3D #' @export GiottoVisuals::dimPlot #' @export @@ -549,7 +551,9 @@ GiottoVisuals::spatDimCellPlot2D #' @export GiottoVisuals::spatDimFeatPlot2D #' @export -GiottoVisuals::spatDimGenePlot3D +GiottoVisuals::spatDimGenePlot3D # TODO remove in next version +#' @export +GiottoVisuals::spatDimFeatPlot3D #' @export GiottoVisuals::spatDimPlot #' @export @@ -561,7 +565,9 @@ GiottoVisuals::spatFeatPlot2D #' @export GiottoVisuals::spatFeatPlot2D_single #' @export -GiottoVisuals::spatGenePlot3D +GiottoVisuals::spatGenePlot3D # TODO remove in next version +#' @export +GiottoVisuals::spatFeatPlot3D #' @export GiottoVisuals::spatPlot #' @export diff --git a/man/crossSectionGenePlot.Rd b/man/crossSectionGenePlot.Rd index 084a99be8..6dc072505 100644 --- a/man/crossSectionGenePlot.Rd +++ b/man/crossSectionGenePlot.Rd @@ -39,5 +39,5 @@ Visualize cells and gene expression in a virtual cross section according to spat Description of parameters. } \seealso{ -\link[GiottoVisuals:spatGenePlot3D]{GiottoVisuals::spatGenePlot3D} and \link[GiottoVisuals:spatFeatPlot2D]{GiottoVisuals::spatFeatPlot2D} +\link[GiottoVisuals:spatFeatPlot3D]{GiottoVisuals::spatGenePlot3D} and \link[GiottoVisuals:spatFeatPlot2D]{GiottoVisuals::spatFeatPlot2D} } diff --git a/man/reexports.Rd b/man/reexports.Rd index 2ce5f54fe..a8f999847 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -228,6 +228,7 @@ \alias{dimCellPlot2D} \alias{dimFeatPlot2D} \alias{dimGenePlot3D} +\alias{dimFeatPlot3D} \alias{dimPlot} \alias{dimPlot2D} \alias{dimPlot3D} @@ -264,12 +265,14 @@ \alias{spatDimCellPlot2D} \alias{spatDimFeatPlot2D} \alias{spatDimGenePlot3D} +\alias{spatDimFeatPlot3D} \alias{spatDimPlot} \alias{spatDimPlot2D} \alias{spatDimPlot3D} \alias{spatFeatPlot2D} \alias{spatFeatPlot2D_single} \alias{spatGenePlot3D} +\alias{spatFeatPlot3D} \alias{spatPlot} \alias{spatPlot2D} \alias{spatPlot3D} @@ -292,6 +295,6 @@ below to see their documentation. \item{GiottoUtils}{\code{\link[GiottoUtils:pipe]{\%>\%}}, \code{\link[GiottoUtils]{getDistinctColors}}, \code{\link[GiottoUtils]{getRainbowColors}}} - \item{GiottoVisuals}{\code{\link[GiottoVisuals]{addGiottoImageToSpatPlot}}, \code{\link[GiottoVisuals]{dimCellPlot}}, \code{\link[GiottoVisuals:dimCellPlot]{dimCellPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot2D}}, \code{\link[GiottoVisuals]{dimGenePlot3D}}, \code{\link[GiottoVisuals]{dimPlot}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot2D}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot3D}}, \code{\link[GiottoVisuals]{getColors}}, \code{\link[GiottoVisuals]{giottoSankeyPlan}}, \code{\link[GiottoVisuals]{plotHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataCellsHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataHeatmap}}, \code{\link[GiottoVisuals]{plotPCA}}, \code{\link[GiottoVisuals]{plotPCA_2D}}, \code{\link[GiottoVisuals]{plotPCA_3D}}, \code{\link[GiottoVisuals]{plotStatDelaunayNetwork}}, \code{\link[GiottoVisuals]{plotTSNE}}, \code{\link[GiottoVisuals]{plotTSNE_2D}}, \code{\link[GiottoVisuals]{plotTSNE_3D}}, \code{\link[GiottoVisuals]{plotUMAP}}, \code{\link[GiottoVisuals]{plotUMAP_2D}}, \code{\link[GiottoVisuals]{plotUMAP_3D}}, \code{\link[GiottoVisuals]{sankeyLabel}}, \code{\link[GiottoVisuals:sankeyLabel]{sankeyLabel<-}}, \code{\link[GiottoVisuals]{sankeyPlot}}, \code{\link[GiottoVisuals]{sankeyRelate}}, \code{\link[GiottoVisuals:sankeyRelate]{sankeyRelate<-}}, \code{\link[GiottoVisuals]{sankeySet}}, \code{\link[GiottoVisuals]{sankeySetAddresses}}, \code{\link[GiottoVisuals]{showClusterDendrogram}}, \code{\link[GiottoVisuals]{showClusterHeatmap}}, \code{\link[GiottoVisuals]{showColorInstructions}}, \code{\link[GiottoVisuals]{showSaveParameters}}, \code{\link[GiottoVisuals]{spatCellPlot}}, \code{\link[GiottoVisuals:spatCellPlot]{spatCellPlot2D}}, \code{\link[GiottoVisuals]{spatDeconvPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot2D}}, \code{\link[GiottoVisuals]{spatDimGenePlot3D}}, \code{\link[GiottoVisuals]{spatDimPlot}}, \code{\link[GiottoVisuals:spatDimPlot]{spatDimPlot2D}}, \code{\link[GiottoVisuals]{spatDimPlot3D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D_single}}, \code{\link[GiottoVisuals]{spatGenePlot3D}}, \code{\link[GiottoVisuals]{spatInSituPlotDensity}}, \code{\link[GiottoVisuals]{spatInSituPlotHex}}, \code{\link[GiottoVisuals]{spatInSituPlotPoints}}, \code{\link[GiottoVisuals]{spatNetwDistributions}}, \code{\link[GiottoVisuals]{spatNetwDistributionsDistance}}, \code{\link[GiottoVisuals]{spatNetwDistributionsKneighbors}}, \code{\link[GiottoVisuals]{spatPlot}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot2D}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot3D}}, \code{\link[GiottoVisuals]{subsetSankeySet}}, \code{\link[GiottoVisuals]{violinPlot}}} + \item{GiottoVisuals}{\code{\link[GiottoVisuals]{addGiottoImageToSpatPlot}}, \code{\link[GiottoVisuals]{dimCellPlot}}, \code{\link[GiottoVisuals:dimCellPlot]{dimCellPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot3D}}, \code{\link[GiottoVisuals:dimFeatPlot3D]{dimGenePlot3D}}, \code{\link[GiottoVisuals]{dimPlot}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot2D}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot3D}}, \code{\link[GiottoVisuals]{getColors}}, \code{\link[GiottoVisuals]{giottoSankeyPlan}}, \code{\link[GiottoVisuals]{plotHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataCellsHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataHeatmap}}, \code{\link[GiottoVisuals]{plotPCA}}, \code{\link[GiottoVisuals]{plotPCA_2D}}, \code{\link[GiottoVisuals]{plotPCA_3D}}, \code{\link[GiottoVisuals]{plotStatDelaunayNetwork}}, \code{\link[GiottoVisuals]{plotTSNE}}, \code{\link[GiottoVisuals]{plotTSNE_2D}}, \code{\link[GiottoVisuals]{plotTSNE_3D}}, \code{\link[GiottoVisuals]{plotUMAP}}, \code{\link[GiottoVisuals]{plotUMAP_2D}}, \code{\link[GiottoVisuals]{plotUMAP_3D}}, \code{\link[GiottoVisuals]{sankeyLabel}}, \code{\link[GiottoVisuals:sankeyLabel]{sankeyLabel<-}}, \code{\link[GiottoVisuals]{sankeyPlot}}, \code{\link[GiottoVisuals]{sankeyRelate}}, \code{\link[GiottoVisuals:sankeyRelate]{sankeyRelate<-}}, \code{\link[GiottoVisuals]{sankeySet}}, \code{\link[GiottoVisuals]{sankeySetAddresses}}, \code{\link[GiottoVisuals]{showClusterDendrogram}}, \code{\link[GiottoVisuals]{showClusterHeatmap}}, \code{\link[GiottoVisuals]{showColorInstructions}}, \code{\link[GiottoVisuals]{showSaveParameters}}, \code{\link[GiottoVisuals]{spatCellPlot}}, \code{\link[GiottoVisuals:spatCellPlot]{spatCellPlot2D}}, \code{\link[GiottoVisuals]{spatDeconvPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot3D}}, \code{\link[GiottoVisuals:spatDimFeatPlot3D]{spatDimGenePlot3D}}, \code{\link[GiottoVisuals]{spatDimPlot}}, \code{\link[GiottoVisuals:spatDimPlot]{spatDimPlot2D}}, \code{\link[GiottoVisuals]{spatDimPlot3D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D_single}}, \code{\link[GiottoVisuals]{spatFeatPlot3D}}, \code{\link[GiottoVisuals:spatFeatPlot3D]{spatGenePlot3D}}, \code{\link[GiottoVisuals]{spatInSituPlotDensity}}, \code{\link[GiottoVisuals]{spatInSituPlotHex}}, \code{\link[GiottoVisuals]{spatInSituPlotPoints}}, \code{\link[GiottoVisuals]{spatNetwDistributions}}, \code{\link[GiottoVisuals]{spatNetwDistributionsDistance}}, \code{\link[GiottoVisuals]{spatNetwDistributionsKneighbors}}, \code{\link[GiottoVisuals]{spatPlot}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot2D}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot3D}}, \code{\link[GiottoVisuals]{subsetSankeySet}}, \code{\link[GiottoVisuals]{violinPlot}}} }} From e44244bae878d2b071e221c34d0a254270aea502 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 14 May 2024 13:41:22 -0400 Subject: [PATCH 039/150] Merge branch 'modular_readers' of https://github.com/drieslab/Giotto into modular_readers From feccfd8aa7382aad5338c6a57f0a46abe12199df Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 22 May 2024 01:15:16 -0400 Subject: [PATCH 040/150] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 717edc132..3db531a02 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Giotto Title: Spatial Single-Cell Transcriptomics Toolbox -Version: 4.0.8 +Version: 4.1.0 Authors@R: c( person("Ruben", "Dries", email = "rubendries@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7650-7754")), From 7b5460da835b54ab1733a4d44c9279733a6ff539 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 22 May 2024 14:10:16 -0400 Subject: [PATCH 041/150] chore: code reorganization --- DESCRIPTION | 5 +- R/classes.R | 997 ----- R/convenience.R | 3988 ----------------- R/convenience_cosmx.R | 1768 ++++++++ R/convenience_general.R | 1601 +++++++ R/convenience_xenium.R | 1626 +++++++ man/addVisiumPolygons.Rd | 2 +- man/createArchRProj.Rd | 12 +- man/createGiottoCosMxObject.Rd | 30 +- man/createGiottoMerscopeObject.Rd | 18 +- man/createGiottoObjectfromArchR.Rd | 6 +- man/createGiottoVisiumObject.Rd | 12 +- man/createGiottoXeniumObject.Rd | 28 +- man/createMerscopeLargeImage.Rd | 2 +- man/createSpatialGenomicsObject.Rd | 4 +- man/dot-cosmx_infer_fov_shifts.Rd | 2 +- man/dot-createGiottoCosMxObject_aggregate.Rd | 2 +- man/dot-createGiottoCosMxObject_all.Rd | 12 +- ...dot-createGiottoCosMxObject_subcellular.Rd | 4 +- man/dot-createGiottoXeniumObject_aggregate.Rd | 2 +- ...ot-createGiottoXeniumObject_subcellular.Rd | 4 +- man/dot-load_cosmx_folder_aggregate.Rd | 2 +- man/dot-load_cosmx_folder_subcellular.Rd | 4 +- man/dot-read_cosmx_folder.Rd | 2 +- man/dot-read_xenium_folder.Rd | 6 +- man/dot-visium_read_scalefactors.Rd | 2 +- man/dot-visium_spot_poly.Rd | 2 +- man/importCosMx.Rd | 2 +- man/load_merscope_folder.Rd | 6 +- man/load_xenium_folder.Rd | 4 +- man/read_data_folder.Rd | 2 +- man/visium_micron_scalefactor.Rd | 4 +- 32 files changed, 5086 insertions(+), 5075 deletions(-) delete mode 100644 R/classes.R delete mode 100644 R/convenience.R create mode 100644 R/convenience_cosmx.R create mode 100644 R/convenience_general.R create mode 100644 R/convenience_xenium.R diff --git a/DESCRIPTION b/DESCRIPTION index 3db531a02..f47279f03 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -133,9 +133,10 @@ Remotes: Collate: 'auxiliary_giotto.R' 'cell_segmentation.R' - 'classes.R' 'clustering.R' - 'convenience.R' + 'convenience_cosmx.R' + 'convenience_general.R' + 'convenience_xenium.R' 'cross_section.R' 'dd.R' 'differential_expression.R' diff --git a/R/classes.R b/R/classes.R deleted file mode 100644 index cde2954e7..000000000 --- a/R/classes.R +++ /dev/null @@ -1,997 +0,0 @@ - - -# common internals #### -abbrev_path <- function(path, head = 15, tail = 35L) { - nch <- nchar(path) - if (nch > 60L) { - p1 <- substring(path, first = 0L, last = head) - p2 <- substring(path, first = nch - tail, last = nch) - path <- paste0(p1, "[...]", p2) - } - return(path) -} - -.reader_fun_prints <- function(x, pre) { - nfun <- length(x@calls) - funs <- names(x@calls) - if (nfun > 0L) { - pre_funs <- format(c(pre, rep("", nfun - 1L))) - for (i in seq_len(nfun)) { - cat(pre_funs[i], " ", funs[i], "()\n", sep = "") - } - } -} - -.filetype_prints <- function(x, pre) { - nftype <- length(x@filetype) - datatype <- format(names(x@filetype)) - pre_ftypes <- format(c(pre, rep("", nftype - 1L))) - cat(sprintf("%s %s -- %s\n", - pre_ftypes, - datatype, - x@filetype), - sep = "") -} - -# pattern - list.files pattern to use to search for specific files/dirs -# warn - whether to warn when a pattern does not find any files -# first - whether to only return the first match -.detect_in_dir <- function( - path, pattern, platform, warn = TRUE, first = TRUE -) { - f <- list.files(path, pattern = pattern, full.names = TRUE) - lenf <- length(f) - if (lenf == 1L) return(f) # one match - else if (lenf == 0L) { # no matches - if (warn) { - warning(sprintf( - "%s not detected in %s directory", - pattern, - platform - ), - call. = FALSE) - } - return(NULL) - } - - # more than one match - if (first) { - return(f[[1L]]) - } else { - return(f) - } -} - - - - - - - - - - -# Xenium #### - -setClass( - "XeniumReader", - slots = list( - xenium_dir = "character", - filetype = "list", - qv = "ANY", - calls = "list" - ), - prototype = list( - filetype = list( - transcripts = "parquet", - boundaries = "parquet", - expression = "h5", - cell_meta = "parquet" - ), - qv = 20, - calls = list() - ) -) - -# * show #### -setMethod("show", signature("XeniumReader"), function(object) { - cat(sprintf("Giotto <%s>\n", "XeniumReader")) - print_slots <- c("dir", "filetype", "qv_cutoff", "funs") - pre <- sprintf( - "%s :", format(print_slots) - ) - names(pre) <- print_slots - - # dir - d <- object@xenium_dir - if (length(d) > 0L) { - d <- abbrev_path(d) - cat(pre["dir"], d, "\n") - } else { - cat(pre["dir"], "\n") - } - - # qv - qv <- object@qv - cat(pre["qv_cutoff"], paste(qv, collapse = ", "), "\n") - - # filetype - .filetype_prints(x = object, pre = pre["filetype"]) - - # funs - .reader_fun_prints(x = object, pre = pre["funs"]) -}) - -# * print #### -setMethod("print", signature("XeniumReader"), function(x, ...) show(x)) - -# * init #### -setMethod( - "initialize", signature("XeniumReader"), - function( - .Object, - xenium_dir, - filetype, - qv_cutoff - ) { - .Object <- callNextMethod(.Object) - - # provided params (if any) - if (!missing(xenium_dir)) { - checkmate::assert_directory_exists(xenium_dir) - .Object@xenium_dir <- xenium_dir - } - if (!missing(filetype)) { - .Object@filetype <- filetype - } - if (!missing(qv_cutoff)) { - .Object@qv <- qv_cutoff - } - - - # check filetype - ftype_data <- c("transcripts", "boundaries", "expression", "cell_meta") - if (!all(ftype_data %in% names(.Object@filetype))) { - stop(wrap_txt("`$filetype` must have entries for each of:\n", - paste(ftype_data, collapse = ", "))) - } - - ftype <- .Object@filetype - ft_tab <- c("csv", "parquet") - ft_exp <- c("h5", "mtx", "zarr") - if (!ftype$transcripts %in% ft_tab) { - stop(wrap_txt("`$filetype$transcripts` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) - } - if (!ftype$boundaries %in% ft_tab) { - stop(wrap_txt("`$filetype$boundaries` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) - } - if (!ftype$cell_meta %in% ft_tab) { - stop(wrap_txt("`$filetype$cell_meta` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) - } - if (!ftype$expression %in% ft_exp) { - stop(wrap_txt("`$filetype$expression` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) - } - - - # detect paths and subdirs - p <- .Object@xenium_dir - .xenium_detect <- function(pattern, ...) { - .detect_in_dir( - pattern = pattern, ..., - path = p, platform = "Xenium", - ) - } - - cell_meta_path <- .xenium_detect("cells", first = FALSE) - panel_meta_path <- .xenium_detect("panel") # json - experiment_info_path <- .xenium_detect(".xenium") # json - - # 3D stack - DAPI - img_path <- .xenium_detect("morphology.", warn = FALSE) - # 2D fusion images - # - DAPI - # - stainings for multimodal segmentation - img_focus_path <- .xenium_detect("morphology_focus", warn = FALSE) - # Maximum intensity projection (MIP) of the morphology image. - # (Xenium Outputs v1.0 - 1.9. only) - img_mip_path <- .xenium_detect("morphology_mip", warn = FALSE) - - tx_path <- .xenium_detect("transcripts", first = FALSE) - cell_bound_path <- .xenium_detect("cell_bound", first = FALSE) - nuc_bound_path <- .xenium_detect("nucleus_bound", first = FALSE) - - expr_path <- .xenium_detect("cell_feature_matrix", first = FALSE) - - .xenium_ftype <- function(paths, ftype) { - paths[grepl(pattern = paste0(".", ftype), x = paths)] - } - - - # select file formats based on reader settings - tx_path <- .xenium_ftype(tx_path, ftype$transcripts) - cell_bound_path <- .xenium_ftype(cell_bound_path, ftype$boundaries) - nuc_bound_path <- .xenium_ftype(nuc_bound_path, ftype$boundaries) - expr_path <- .xenium_ftype(expr_path, ftype$expression) - cell_meta_path <- .xenium_ftype(cell_meta_path, ftype$cell_meta) - - - # transcripts load call - tx_fun <- function( - path = tx_path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - dropcols = c(), - qv_threshold = .Object@qv, - cores = determine_cores(), - verbose = NULL - ) { - .xenium_transcript( - path = path, - feat_type = feat_type, - split_keyword = split_keyword, - dropcols = dropcols, - qv_threshold = qv_threshold, - cores = cores, - verbose = verbose - ) - } - .Object@calls$load_transcripts <- tx_fun - - # load polys call - poly_fun <- function( - path = cell_bound_path, - name = "cell", - calc_centroids = TRUE, - cores = determine_cores(), - verbose = NULL - ) { - .xenium_poly( - path = path, - name = name, - calc_centroids = calc_centroids, - cores = cores, - verbose = verbose - ) - } - .Object@calls$load_polys <- poly_fun - - # load cellmeta - cmeta_fun <- function( - path = cell_meta_path, - dropcols = c(), - cores = determine_cores(), - verbose = NULL - ) { - .xenium_cellmeta( - path = path, - dropcols = dropcols, - cores = cores, - verbose = verbose - ) - } - .Object@calls$load_cellmeta <- cmeta_fun - - # load featmeta - fmeta_fun <- function( - path = panel_meta_path, - dropcols = c(), - cores = determine_cores(), - verbose = NULL - ) { - .xenium_featmeta( - path = path, - gene_ids, - dropcols = dropcols, - verbose = verbose - ) - } - .Object@calls$load_featmeta <- fmeta_fun - - # load expression call - expr_fun <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = NULL - ) { - .xenium_expression( - path = path, - gene_ids = gene_ids, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type, - verbose = verbose - ) - } - .Object@calls$load_expression <- expr_fun - - # load image call - - - - - # create giotto object call - gobject_fun <- function( - transcript_path = tx_path, - load_bounds = list( - cell = "cell", - nucleus = "nucleus" - ), - expression_path = expr_path, - metadata_path = meta_path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - load_images = list( - morphology = "focus", - ), - load_expression = FALSE, - load_cellmeta = FALSE - ) { - load_expression <- as.logical(load_expression) - load_cellmeta <- as.logical(load_cellmeta) - - if (!is.null(load_images)) { - checkmate::assert_list(load_images) - if (is.null(names(load_images))) { - stop("Images paths provided to 'load_images' must be named") - } - } - if (!is.null(load_bounds)) { - checkmate::assert_list(load_bounds) - if (is.null(names(load_bounds))) { - stop("bounds paths provided to 'load_bounds' must be named") - } - } - - - - funs <- .Object@calls - - # init gobject - g <- giotto() - - - # transcripts - tx_list <- funs$load_transcripts( - path = transcript_path, - feat_type = feat_type, - split_keyword = split_keyword - ) - for (tx in tx_list) { - g <- setGiotto(g, tx) - } - - - # polys - if (!is.null(load_bounds)) { - # replace convenient shortnames - load_bounds[load_bounds == "cell"] <- cell_bound_path - load_bounds[load_bounds == "nucleus"] <- nuc_bound_path - - blist <- list() - bnames <- names(load_bounds) - for (b_i in seq_along(load_bounds)) { - b <- funs$load_polys( - path = load_bounds[[b_i]], - name = bnames[[b_i]] - ) - blist <- c(blist, b) - } - for (gpoly_i in seq_along(blist)) { - g <- setGiotto(g, blist[[gpoly_i]]) - } - } - - - # feat metadata - fx <- funs$load_featmeta( - path = - ) - - - # expression - if (load_expression) { - - } - - - # cell metadata - if (load_cellmeta) { - - } - - - # images - if (!is.null(load_images)) { - # replace convenient shortnames - load_images[load_images == "focus"] <- img_focus_path - } - - - - - } - .Object@calls$create_gobject <- gobject_fun - - - return(.Object) - } -) - - - - -# access #### - -#' @export -setMethod("$", signature("XeniumReader"), function(x, name) { - basic_info <- c("xenium_dir", "filetype", "qv") - if (name %in% basic_info) return(methods::slot(x, name)) - - return(x@calls[[name]]) -}) - -#' @export -setMethod("$<-", signature("XeniumReader"), function(x, name, value) { - basic_info <- c("xenium_dir", "filetype", "qv") - if (name %in% basic_info) { - methods::slot(x, name) <- value - return(initialize(x)) - } - - stop(sprintf("Only items in '%s' can be set", - paste0(basic_info, collapse = "', '"))) -}) - -#' @export -`.DollarNames.XeniumReader` <- function(x, pattern) { - dn <- c("xenium_dir", "filetype", "qv") - if (length(methods::slot(x, "calls")) > 0) { - dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) - } - return(dn) -} - - - - - - -# CosMx #### - -setClass( - "CosmxReader", - slots = list( - cosmx_dir = "character", - slide = "numeric", - fovs = "numeric", - micron = "logical", - px2mm = "numeric", - offsets = "ANY", - calls = "list" - ), - prototype = list( - slide = 1, - micron = FALSE, - px2mm = 0.12028, # from cosmx output help files - offsets = NULL, - calls = list() - ) -) - -# * show #### -setMethod("show", signature("CosmxReader"), function(object) { - cat(sprintf("Giotto <%s>\n", "CosmxReader")) - print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") - pre <- sprintf( - "%s :", format(print_slots) - ) - names(pre) <- print_slots - - # dir - d <- object@cosmx_dir - if (length(d) > 0L) { - nch <- nchar(d) - d <- abbrev_path(d) - cat(pre["dir"], d, "\n") - } else { - cat(pre["dir"], "\n") - } - - # slide - slide <- object@slide - cat(pre["slide"], slide, "\n") - - # fovs - fovs <- object@fovs %none% "all" - cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") - - # micron scaling - micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) - cat(pre["micron"], micron, "\n") - - # offsets - offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") - cat(pre["offsets"], offs_status, "\n") - - # funs - .reader_fun_prints(x = object, pre = pre["funs"]) -}) - -# * print #### -setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) - -# * plot #### -setMethod( - "plot", signature(x = "CosmxReader", y = "missing"), - function(x, cex = 0.8, ...) { - a <- list(...) - dat <- x@offsets - - if (is.null(dat)) { # don't run if no offsets - cat("no offsets to plot\n") - return(invisible(NULL)) - } - - plot(y ~ x, data = dat, asp = 1L, type = "n", ...) - text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) - }) - - - - -#' @title Import a Nanostring CosMx Assay -#' @name importCosMx -#' @description -#' Giotto import functionalities for CosMx datasets. This function generates -#' a `CosmxReader` instance that has convenient reader functions for converting -#' individual pieces of CosMx data into Giotto-compatible representations when -#' the params `cosmx_dir` and `fovs` (if only a subset is desired) are provided. -#' A function that creates the full `giotto` object is also available. -#' These functions should have all param values provided as defaults, but -#' can be flexibly modified to do things such as look in alternative -#' directories or paths. -#' @param cosmx_dir CosMx output directory -#' @param slide numeric. Slide number. Defaults to 1 -#' @param fovs numeric. (optional) If provided, will load specific fovs. -#' Otherwise, all FOVs will be loaded -#' @param micron logical. Whether to scale spatial information as micron -#' instead of the default pixels -#' @param px2mm numeric. Scalefactor from pixels to mm. Defaults to 0.12028 -#' based on `CosMx-ReadMe.html` info -#' @details -#' Loading functions are generated after the `cosmx_dir` is added. -#' Transcripts, expression, and metadata loading are all expected to be done -#' from the top level of the directory. Loading of polys, and any image sets -#' are expected to be from specific subdirectories containing only those -#' images for the set of FOVs. -#' @returns CosmxReader object -#' @examples -#' # Create a `CosmxReader` object -#' reader <- importCosMx() -#' -#' \dontrun{ -#' # Set the cosmx_dir and fov parameters -#' reader$cosmx_dir <- "path to cosmx dir" -#' reader$fov <- c(1, 4) -#' -#' plot(reader) # displays FOVs (top left corner) in px scale. -#' -#' # Load polygons, transcripts, and images -#' polys <- reader$load_polys() -#' tx <- reader$load_transcripts() -#' imgs <- reader$load_images() -#' -#' # Create a `giotto` object and add the loaded data -#' g <- giotto() -#' g <- setGiotto(g, tx[["rna"]]) -#' g <- setGiotto(g, polys) -#' g <- addGiottoLargeImage(g, largeImages = imgs) -#' force(g) -#' } -#' @export -importCosMx <- function( - cosmx_dir = NULL, slide = 1, fovs = NULL, micron = FALSE, px2mm = 0.12028 -) { - # get params - a <- list(Class = "CosmxReader") - if (!is.null(cosmx_dir)) { - a$cosmx_dir <- cosmx_dir - } - if (!is.null(fovs)) { - a$fovs <- fovs - } - a$slide <- slide - a$micron <- micron - a$px2mm <- px2mm - - do.call(new, args = a) -} - -# * init #### -setMethod("initialize", signature("CosmxReader"), function( - .Object, cosmx_dir, slide, fovs, micron, px2mm -) { - # provided params (if any) - if (!missing(cosmx_dir)) { - checkmate::assert_directory_exists(cosmx_dir) - .Object@cosmx_dir <- cosmx_dir - } - if (!missing(slide)) { - .Object@slide <- slide - } - if (!missing(fovs)) { - .Object@fovs <- fovs - } - if (!missing(micron)) { - .Object@micron <- micron - } - if (!missing(px2mm)) { - .Object@px2mm <- px2mm - } - - # NULL case - if (length(.Object@cosmx_dir) == 0) { - return(.Object) # return early if no path given - } - - - # detect paths and subdirs - p <- .Object@cosmx_dir - .cosmx_detect <- function(pattern) { - .detect_in_dir(pattern = pattern, path = p, platform = "CosMx") - } - - shifts_path <- .cosmx_detect("fov_positions_file") - meta_path <- .cosmx_detect("metadata_file") - tx_path <- .cosmx_detect("tx_file") - mask_dir <- .cosmx_detect("CellLabels") - expr_path <- .cosmx_detect("exprMat_file") - composite_img_dir <- .cosmx_detect("CellComposite") - overlay_img_dir <- .cosmx_detect("CellOverlay") - compart_img_dir <- .cosmx_detect("CompartmentLabels") - - - # load fov offsets through one of several methods - if (is.null(.Object@offsets)) { # only run if not already existing - pos <- NULL - - if (!is.null(shifts_path)) { - fov_shifts <- data.table::fread(shifts_path) - if (!"X_mm" %in% colnames(fov_shifts)) { - # older version has fov, x, y (all numeric) in px shifts - data.table::setnames(fov_shifts, new = c("fov", "x", "y")) - pos <- fov_shifts - } - } - - # proceed with other possible methods of inferring shifts if present - if (!is.null(meta_path) && is.null(pos)) { - pos <- .cosmx_infer_fov_shifts( - meta_dt = data.table::fread(meta_path), - flip_loc_y = TRUE - ) - } else if (!is.null(tx_path) && is.null(pos)) { - warning(wrap_txt( - "metadata_file not found: - Detecting fov shifts from tx_file. (This is slower)" - ), call. = FALSE) - pos <- .cosmx_infer_fov_shifts( - tx_dt = data.table::fread(tx_path), - flip_loc_y = TRUE - ) - } - else { - pos <- data.table::data.table() - warning(wrap_txt( - "NO FOV SHIFTS. - fov_positions_file, tx_file, and metadata_file not auto detected. - One of these must be provided to infer FOV shifts.\n - Alternatively, directly supply a data.table with: - fov(int), x(numeric), y(numeric) in px scaling to `$offsets`" - ), call. = FALSE) - } - - .Object@offsets <- pos - } - - - - # transcripts load call - tx_fun <- function( - path = tx_path, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb"), - dropcols = c( - "x_local_px", - "y_local_px", - "cell_ID", - "cell" - ), - verbose = NULL - ) { - .cosmx_transcript( - path = path, - fovs = .Object@fovs %none% NULL, - feat_type = feat_type, - split_keyword = split_keyword, - dropcols = dropcols, - micron = .Object@micron, - px2mm = .Object@px2mm, - cores = determine_cores(), - verbose = verbose - ) - } - .Object@calls$load_transcripts <- tx_fun - - - - # mask load call - mask_fun <- function( - path = mask_dir, - # VERTICAL FLIP + NO VERTICAL SHIFT - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - remove_background_polygon = TRUE, - verbose = NULL - ) { - .cosmx_poly( - path = path, - fovs = .Object@fovs %none% NULL, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - shift_vertical_step = shift_vertical_step, - shift_horizontal_step = shift_horizontal_step, - remove_background_polygon = remove_background_polygon, - micron = .Object@micron, - px2mm = .Object@px2mm, - offsets = .Object@offsets, - verbose = verbose - ) - } - .Object@calls$load_polys <- mask_fun - - - # expression load call - expr_fun <- function( - path = expr_path, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb") - ) { - .cosmx_expression( - path = path, - fovs = .Object@fovs %none% NULL, - feat_type = feat_type, - split_keyword = split_keyword - ) - } - .Object@calls$load_expression <- expr_fun - - - # images load call - img_fun <- function( - path = composite_img_dir, - img_type = "composite", - img_name_fmt = paste0(img_type, "_fov%03d"), - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - verbose = NULL - ) { - .cosmx_image( - path = path, - fovs = .Object@fovs %none% NULL, - img_type = img_type, - img_name_fmt = img_name_fmt, - negative_y = negative_y, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - micron = .Object@micron, - px2mm = .Object@px2mm, - offsets = .Object@offsets, - verbose = verbose - ) - } - .Object@calls$load_images <- img_fun - - - # meta load call - meta_fun <- function( - path = meta_path, - dropcols = c( - "CenterX_local_px", - "CenterY_local_px", - "CenterX_global_px", - "CenterY_global_px", - "cell_id" - ), - verbose = NULL - ) { - .cosmx_cellmeta( - path = path, - fovs = .Object@fovs %none% NULL, - dropcols = dropcols, - cores = determine_cores(), - verbose = verbose - ) - } - .Object@calls$load_cellmeta <- meta_fun - - - # build gobject call - gobject_fun <- function( - transcript_path = tx_path, - cell_labels_dir = mask_dir, - expression_path = expr_path, - metadata_path = meta_path, - feat_type = c("rna", "negprobes"), - split_keyword = list( - "NegPrb" - ), - load_images = list( - composite = "composite", - overlay = "overlay" - ), - load_expression = FALSE, - load_cellmeta = FALSE, - instructions = NULL - ) { - load_expression <- as.logical(load_expression) - load_cellmeta <- as.logical(load_cellmeta) - - if (!is.null(load_images)) { - checkmate::assert_list(load_images) - if (is.null(names(load_images))) { - stop("Images directories provided to 'load_images' must be named") - } - } - - funs <- .Object@calls - - # init gobject - g <- giotto() - if (!is.null(instructions)) { - instructions(g) <- instructions - } - - # transcripts - tx_list <- funs$load_transcripts( - path = transcript_path, - feat_type = feat_type, - split_keyword = split_keyword - ) - for (tx in tx_list) { - g <- setGiotto(g, tx) - } - - # polys - polys <- funs$load_polys( - path = cell_labels_dir, - verbose = FALSE - ) - g <- setGiotto(g, polys) - - # images - if (!is.null(load_images)) { - # replace convenient shortnames - load_images[load_images == "composite"] <- composite_img_dir - load_images[load_images == "overlay"] <- overlay_img_dir - - imglist <- list() - dirnames <- names(load_images) - for (imdir_i in seq_along(load_images)) { - dir_imgs <- funs$load_images( - path = load_images[[imdir_i]], - img_type = dirnames[[imdir_i]], - ) - imglist <- c(imglist, dir_imgs) - } - g <- addGiottoLargeImage(g, largeImages = imglist) - } - - # expression & meta - # Need to check that names agree for poly/expr/meta - allowed_ids <- spatIDs(polys) - - if (load_expression) { - exlist <- funs$load_expression( - path = expression_path, - feat_type = feat_type, - split_keyword = split_keyword - ) - - # only keep allowed cells and set into gobject - for (ex in exlist) { - bool <- colnames(ex[]) %in% allowed_ids - ex[] <- ex[][, bool] - g <- setGiotto(g, ex) - } - } - - if (load_cellmeta) { - cx <- funs$load_cellmeta( - path = metadata_path - ) - - cx[] <- cx[][cell_ID %in% allowed_ids,] - g <- setGiotto(g, cx) - } - - return(g) - } - .Object@calls$create_gobject <- gobject_fun - - return(.Object) -}) - - - - - -# * access #### - -#' @export -setMethod("$", signature("CosmxReader"), function(x, name) { - basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") - if (name %in% basic_info) return(methods::slot(x, name)) - - return(x@calls[[name]]) -}) - -#' @export -setMethod("$<-", signature("CosmxReader"), function(x, name, value) { - basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm") - if (name %in% basic_info) { - methods::slot(x, name) <- value - return(initialize(x)) - } - - if (name == "offsets") { - methods::slot(x, name) <- data.table::setDT(value) - return(initialize(x)) - } - - stop(sprintf("Only items in '%s' can be set", - paste0(basic_info, collapse = "', '"))) -}) - -#' @export -`.DollarNames.CosmxReader` <- function(x, pattern) { - dn <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") - if (length(methods::slot(x, "calls")) > 0) { - dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) - } - return(dn) -} - - - - diff --git a/R/convenience.R b/R/convenience.R deleted file mode 100644 index 32c0788de..000000000 --- a/R/convenience.R +++ /dev/null @@ -1,3988 +0,0 @@ -# Spatial Method-Specific Convenience Functions for Giotto Object Creation # - - - -# Common Utility Functions #### - -#' @title Read a structured folder of exported data -#' @name read_data_folder -#' @description Framework function for reading the exported folder of a spatial -#' method and detecting the presence of needed files. NULL values denote missing -#' items.\cr -#' `.read_data_folder()` should not be called directly. Instead, specific -#' reader functions should be built using it as a base. -#' @param spat_method spatial method for which the data is being read -#' @param data_dir exported data directory to read from -#' @param dir_items named list of directory items to expect and keywords to -#' match -#' @param data_to_use character. Which type(s) of expression data to build the -#' gobject with. Values should match with a *workflow* item in require_data_DT -#' (see details) -#' @param require_data_DT data.table detailing if expected data items are -#' required or optional for each \code{data_to_use} *workflow* -#' @param cores cores to use -#' @param verbose be verbose -#' @param toplevel stackframes back where the user-facing function was called. -#' default is one stackframe above `.read_data_folder`. -#' @returns data.table -#' @details -#' **Steps performed:** -#' \itemize{ -#' \item{1. detection of items within \code{data_dir} by looking for keywords -#' assigned through \code{dir_items}} -#' \item{2. check of detected items to see if everything needed has been found. -#' Dictionary of necessary vs optional items for each \code{data_to_use} -#' *workflow* is provided through \code{require_data_DT}} -#' \item{3. if multiple filepaths are found to be matching then select the -#' first one. This function is only intended to find the first level -#' subdirectories and files.} -#' } -#' -#' **Example reader implementation:** -#' \preformatted{ -#' foo <- function(x_dir, -#' data_to_use, -#' cores = NA, -#' verbose = NULL) { -#' dir_items <- list( -#' data1 = "regex_pattern1", -#' data2 = "regex_pattern2", -#' data3 = "regex_pattern3" -#' ) -#' -#' # DT of info to check directory for. Has 3 cols -#' require_data_DT <- data.table::data.table( -#' workflow = "a", # data_to_use is matched against this -#' item = c( -#' "data1", -#' "data2", -#' "data3" -#' ), -#' needed = c( -#' FALSE, # data1 optional for this workflow (if missing: warn) -#' TRUE, # data2 vital for this workflow (if missing: error) -#' TRUE # data3 vital for this workflow (if missing: error) -#' ) -#' ) -#' -#' .read_data_folder( -#' spat_method = "x_method", -#' data_dir = x_dir, -#' dir_items = dir_items, -#' data_to_use = data_to_use, -#' require_data_DT = require_data_DT, -#' cores = cores, -#' verbose = verbose -#' ) -#' } -#' } -#' -#' @md -NULL - -#' @describeIn read_data_folder Should not be used directly -#' @keywords internal -.read_data_folder <- function(spat_method = NULL, - data_dir = NULL, - dir_items, - data_to_use, - load_format = NULL, - require_data_DT, - cores = NA, - verbose = NULL, - toplevel = 2L) { - ch <- box_chars() - - # 0. check params - if (is.null(data_dir) || - !dir.exists(data_dir)) { - .gstop(.n = toplevel, "The full path to a", spat_method, - "directory must be given.") - } - vmsg(.v = verbose, "A structured", spat_method, "directory will be used") - if (!data_to_use %in% require_data_DT$workflow) { - .gstop(.n = toplevel, - "Data requirements for data_to_use not found in require_data_DT") - } - - # 1. detect items - dir_items <- lapply_flex(dir_items, function(x) { - Sys.glob(paths = file.path(data_dir, x)) - }, cores = cores) - # (length = 1 if present, length = 0 if missing) - dir_items_lengths <- lengths(dir_items) - - # 2. check directory contents - vmsg(.v = verbose, "Checking directory contents...") - - for (item in names(dir_items)) { - # IF ITEM FOUND - - if (dir_items_lengths[[item]] > 0) { - # print found items if verbose = "debug" - if (isTRUE(verbose)) { - vmsg( - .v = verbose, .is_debug = TRUE, - .initial = paste0(ch$s, "> "), - item, " found" - ) - for (item_i in seq_along(dir_items[[item]])) { - # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) - vmsg( - .v = verbose, .is_debug = TRUE, - .initial = paste0(ch$s, ch$s, ch$l, ch$h, ch$h), - subItem - ) - } - } - } else { - # IF ITEM MISSING - # necessary (error) - # optional (warning) - - # data.table variables - workflow <- needed <- filetype <- NULL - - - require_data_DT <- require_data_DT[workflow == data_to_use, ] - if (!is.null(load_format)) - require_data_DT <- require_data_DT[filetype == load_format, ] - - if (item %in% require_data_DT[needed == TRUE, item]) - stop(item, " is missing") - if (item %in% require_data_DT[needed == FALSE, item]) - warning(item, "is missing (optional)") - } - } - - # 3. select first path in list if multiple are detected - if (any(dir_items_lengths > 1)) { - warning(wrap_txt("Multiple matches for expected directory item(s). - First matching item selected")) - - multiples <- which(dir_items_lengths > 1) - for (mult_i in multiples) { - message(names(dir_items)[[mult_i]], "multiple matches found:") - print(dir_items[[mult_i]]) - dir_items[[mult_i]] <- dir_items[[mult_i]][[1]] - } - } - vmsg(.v = verbose, "Directory check done") - - return(dir_items) -} - - - - - - - - - -# *---- object creation ----* #### - - - - - - -## Visium #### - -#' @title Create a giotto object from 10x visium data -#' @name createGiottoVisiumObject -#' @description Create Giotto object directly from a 10X visium folder. Also -#' accepts visium H5 outputs. -#' -#' @param visium_dir path to the 10X visium directory [required] -#' @param expr_data raw or filtered data (see details) -#' @param gene_column_index which column index to select (see details) -#' @param h5_visium_path path to visium 10X .h5 file -#' @param h5_gene_ids gene names as symbols (default) or ensemble gene ids -#' @param h5_tissue_positions_path path to tissue locations (.csv file) -#' @param h5_image_png_path path to tissue .png file (optional). Image -#' autoscaling looks for matches in the filename for either 'hires' or 'lowres' -#' @param h5_json_scalefactors_path path to .json scalefactors (optional) -#' @param png_name select name of png to use (see details) -#' @param do_manual_adj deprecated -#' @param xmax_adj deprecated -#' @param xmin_adj deprecated -#' @param ymax_adj deprecated -#' @param ymin_adj deprecated -#' @param instructions list of instructions or output result from -#' \code{\link[GiottoClass]{createGiottoInstructions}} -#' @param cores how many cores or threads to use to read data if paths are -#' provided -#' @param expression_matrix_class class of expression matrix to use -#' (e.g. 'dgCMatrix', 'DelayedArray') -#' @param h5_file optional path to create an on-disk h5 file -#' @param verbose be verbose -#' -#' @return giotto object -#' @details -#' If starting from a Visium 10X directory: -#' \itemize{ -#' \item{expr_data: raw will take expression data from raw_feature_bc_matrix and filter from filtered_feature_bc_matrix} -#' \item{gene_column_index: which gene identifiers (names) to use if there are multiple columns (e.g. ensemble and gene symbol)} -#' \item{png_name: by default the first png will be selected, provide the png name to override this (e.g. myimage.png)} -#' \item{the file scalefactors_json.json will be detected automatically and used to attempt to align the data} -#' } -#' -#' If starting from a Visium 10X .h5 file -#' \itemize{ -#' \item{h5_visium_path: full path to .h5 file: /your/path/to/visium_file.h5} -#' \item{h5_tissue_positions_path: full path to spatial locations file: /you/path/to/tissue_positions_list.csv} -#' \item{h5_image_png_path: full path to png: /your/path/to/images/tissue_lowres_image.png} -#' \item{h5_json_scalefactors_path: full path to .json file: /your/path/to/scalefactors_json.json} -#' } -#' -#' @export -createGiottoVisiumObject <- function(visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - h5_visium_path = NULL, - h5_gene_ids = c("symbols", "ensembl"), - h5_tissue_positions_path = NULL, - h5_image_png_path = NULL, - h5_json_scalefactors_path = NULL, - png_name = NULL, - do_manual_adj = FALSE, # deprecated - xmax_adj = 0, # deprecated - xmin_adj = 0, # deprecated - ymax_adj = 0, # deprecated - ymin_adj = 0, # deprecated - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - cores = NA, - verbose = NULL) { - # NSE vars - barcode <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL - - # handle deprecations - img_dep_msg <- "The params 'do_manual_adj', 'xmax_adj', 'xmin_adj', - 'ymax_adj', 'ymin_adj' are no longer used. - Please use the automated workflow." - if (!isFALSE(do_manual_adj) || - xmax_adj != 0 || - xmin_adj != 0 || - ymax_adj != 0 || - ymin_adj != 0) { - stop(wrap_txt(img_dep_msg)) - } - - # set number of cores automatically, but with limit of 10 - cores <- determine_cores(cores) - data.table::setDTthreads(threads = cores) - - - # get arguments list for object creation - if (!is.null(h5_visium_path)) { - argslist <- .visium_read_h5( - h5_visium_path = h5_visium_path, # expression matrix file - h5_gene_ids = h5_gene_ids, # symbol or ensembl - h5_tissue_positions_path = h5_tissue_positions_path, - h5_image_png_path = h5_image_png_path, - h5_json_scalefactors_path = h5_json_scalefactors_path, - verbose = verbose - ) - } else { - argslist <- .visium_read_folder( - visium_dir = visium_dir, - expr_data = expr_data, # type of expression matrix to load - gene_column_index = gene_column_index, # symbol or ensembl - png_name = png_name, - verbose = verbose - ) - } - - # additional args to pass to object creation - argslist$verbose <- verbose - argslist$expression_matrix_class <- expression_matrix_class - argslist$h5_file <- h5_file - argslist$instructions <- instructions - - giotto_object <- do.call(.visium_create, args = argslist) - - return(giotto_object) -} - - - - - - - - -.visium_create <- function( - expr_counts_path, - h5_gene_ids = NULL, # h5 - gene_column_index = NULL, # folder - tissue_positions_path, - image_path = NULL, - scale_json_path = NULL, - png_name = NULL, - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - verbose = NULL) { - # NSE vars - barcode <- cell_ID <- row_pxl <- col_pxl <- in_tissue <- array_row <- - array_col <- NULL - - # Assume path checking has been done - - # 1. expression - if (!is.null(h5_gene_ids)) { - expr_results <- get10Xmatrix_h5( - path_to_data = expr_counts_path, - gene_ids = h5_gene_ids - ) - } else { - expr_results <- get10Xmatrix( - path_to_data = expr_counts_path, - gene_column_index = gene_column_index - ) - } - - # if expr_results is not a list, make it a list compatible with downstream - if (!is.list(expr_results)) expr_results <- list( - "Gene Expression" = expr_results) - - # format expected data into list to be used with readExprData() - raw_matrix_list <- list("cell" = list("rna" = list( - "raw" = expr_results[["Gene Expression"]]))) - - # add protein expression data to list if it exists - if ("Antibody Capture" %in% names(expr_results)) { - raw_matrix_list$cell$protein$raw <- expr_results[["Antibody Capture"]] - } - - - # 2. spatial locations - spatial_results <- data.table::fread(tissue_positions_path) - colnames(spatial_results) <- c("barcode", "in_tissue", "array_row", - "array_col", "col_pxl", "row_pxl") - spatial_results <- spatial_results[match(colnames( - raw_matrix_list$cell[[1]]$raw), barcode)] - data.table::setnames(spatial_results, old = "barcode", new = "cell_ID") - spatial_locs <- spatial_results[, .(cell_ID, row_pxl, -col_pxl)] - # flip x and y - colnames(spatial_locs) <- c("cell_ID", "sdimx", "sdimy") - - - # 3. scalefactors (optional) - json_info <- .visium_read_scalefactors(scale_json_path) - - - # 4. image (optional) - if (!is.null(image_path)) { - visium_png_list <- .visium_image( - image_path = image_path, - json_info = json_info, - verbose = verbose - ) - } - - # 5. metadata - meta_results <- spatial_results[ - , .(cell_ID, in_tissue, array_row, array_col)] - expr_types <- names(raw_matrix_list$cell) - meta_list <- list() - for (etype in expr_types) { - meta_list[[etype]] <- meta_results - } - - - # 6. giotto object - giotto_object <- createGiottoObject( - expression = raw_matrix_list, - spatial_locs = spatial_locs, - instructions = instructions, - cell_metadata = meta_list, - images = visium_png_list - ) - - - # 7. polygon information - if (!is.null(json_info)) { - visium_polygons <- .visium_spot_poly( - spatlocs = spatial_locs, - json_scalefactors = json_info - ) - giotto_object <- setPolygonInfo( - gobject = giotto_object, - x = visium_polygons, - centroids_to_spatlocs = FALSE, - verbose = FALSE, - initialize = TRUE - ) - } - - return(giotto_object) -} - - - -# Find and check the filepaths within a structured visium directory -.visium_read_folder <- function( - visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - png_name = NULL, - verbose = NULL) { - vmsg(.v = verbose, "A structured visium directory will be used") - - ## check arguments - if (is.null(visium_dir)) - .gstop("visium_dir needs to be a path to a visium directory") - visium_dir <- path.expand(visium_dir) - if (!dir.exists(visium_dir)) .gstop(visium_dir, " does not exist!") - expr_data <- match.arg(expr_data, choices = c("raw", "filter")) - - - ## 1. check expression - expr_counts_path <- switch(expr_data, - "raw" = paste0(visium_dir, "/", "raw_feature_bc_matrix/"), - "filter" = paste0(visium_dir, "/", "filtered_feature_bc_matrix/") - ) - if (!file.exists(expr_counts_path)) - .gstop(expr_counts_path, "does not exist!") - - - ## 2. check spatial locations - spatial_dir <- paste0(visium_dir, "/", "spatial/") - tissue_positions_path <- Sys.glob( - paths = file.path(spatial_dir, "tissue_positions*")) - - - ## 3. check spatial image - if (is.null(png_name)) { - png_list <- list.files(spatial_dir, pattern = "*.png") - png_name <- png_list[1] - } - png_path <- paste0(spatial_dir, "/", png_name) - if (!file.exists(png_path)) .gstop(png_path, " does not exist!") - - - ## 4. check scalefactors - scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") - if (!file.exists(scalefactors_path)) - .gstop(scalefactors_path, "does not exist!") - - - list( - expr_counts_path = expr_counts_path, - gene_column_index = gene_column_index, - tissue_positions_path = tissue_positions_path, - image_path = png_path, - scale_json_path = scalefactors_path - ) -} - - - -.visium_read_h5 <- function( - h5_visium_path = h5_visium_path, # expression matrix - h5_gene_ids = h5_gene_ids, - h5_tissue_positions_path = h5_tissue_positions_path, - h5_image_png_path = h5_image_png_path, - h5_json_scalefactors_path = h5_json_scalefactors_path, - verbose = NULL) { - # 1. filepaths - vmsg(.v = verbose, - "A path to an .h5 10X file was provided and will be used") - if (!file.exists(h5_visium_path)) - .gstop("The provided path ", h5_visium_path, " does not exist") - if (is.null(h5_tissue_positions_path)) - .gstop("A path to the tissue positions (.csv) needs to be provided to - h5_tissue_positions_path") - if (!file.exists(h5_tissue_positions_path)) - .gstop("The provided path ", h5_tissue_positions_path, - " does not exist") - if (!is.null(h5_image_png_path)) { - if (!file.exists(h5_image_png_path)) { - .gstop("The provided h5 image path ", h5_image_png_path, - "does not exist. - Set to NULL to exclude or provide the correct path.") - } - } - if (!is.null(h5_json_scalefactors_path)) { - if (!file.exists(h5_json_scalefactors_path)) { - warning(wrap_txt( - "No file found at h5_json_scalefactors_path. - Scalefactors are needed for proper image alignment and - polygon generation" - )) - } - } - - list( - expr_counts_path = h5_visium_path, - h5_gene_ids = h5_gene_ids, - tissue_positions_path = h5_tissue_positions_path, - image_path = h5_image_png_path, - scale_json_path = h5_json_scalefactors_path - ) -} - - - - - - - - - -# Visium Polygon Creation - -#' @title Add Visium Polygons to Giotto Object -#' @name addVisiumPolygons -#' @param gobject Giotto Object created with visium data, containing spatial -#' locations corresponding to spots -#' @param scalefactor_path path to scalefactors_json.json Visium output -#' @returns Giotto Object with to-scale circular polygons added at each spatial -#' location -#' @details -#' Adds circular giottoPolygons to the spatial_info slot of a Giotto Object -#' for the "cell" spatial unit. -#' @export -addVisiumPolygons <- function(gobject, - scalefactor_path = NULL) { - assert_giotto(gobject) - - visium_spat_locs <- getSpatialLocations( - gobject = gobject, - spat_unit = "cell" - ) - - scalefactors_list <- .visium_read_scalefactors( - json_path = scalefactor_path - ) - - visium_polygons <- .visium_spot_poly( - spatlocs = visium_spat_locs, - json_scalefactors = scalefactors_list - ) - - gobject <- addGiottoPolygons( - gobject = gobject, - gpolygons = list(visium_polygons) - ) - - return(gobject) -} - - - - - -#' @title Read Visium ScaleFactors -#' @name .visium_read_scalefactors -#' @param json_path path to scalefactors_json.json for Visium experimental data -#' @returns scalefactors within the provided json file as a named list, -#' or NULL if not discovered -#' @details asserts the existence of and reads in a .json file -#' containing scalefactors for Visium data in the expected format. -#' Returns NULL if no path is provided or if the file does not exist. -#' @keywords internal -.visium_read_scalefactors <- function(json_path = NULL) { - if (!checkmate::test_file_exists(json_path)) { - if (!is.null(json_path)) { - warning("scalefactors not discovered at: \n", - json_path, call. = FALSE) - } - return(NULL) - } - - json_scalefactors <- jsonlite::read_json(json_path) - - # Intial assertion that json dimensions are appropriate - checkmate::assert_list( - x = json_scalefactors, - types = "numeric", - min.len = 4L, - max.len = 5L - ) - - expected_json_names <- c( - "regist_target_img_scalef", # NEW as of 2023 - "spot_diameter_fullres", - "tissue_hires_scalef", - "fiducial_diameter_fullres", - "tissue_lowres_scalef" - ) - - # Visium assay with chemistry v2 contains an additional - # keyword in the json file - new_format_2023 <- checkmate::test_list( - x = json_scalefactors, - types = "numeric", - len = 5L - ) - - # If the scalefactors are of size 4 (older assay), clip the new keyword - if (!new_format_2023) expected_json_names <- expected_json_names[2:5] - - if (!setequal(names(json_scalefactors), expected_json_names)) { - warning(GiottoUtils::wrap_txt( - "h5 scalefactors json names differ from expected. - [Expected]:", expected_json_names, "\n", - "[Actual]:", names(json_scalefactors) - )) - } - - return(json_scalefactors) -} - - -#' @title Calculate Pixel to Micron Scalefactor -#' @name visium_micron_scalefactor -#' @param json_scalefactors list of scalefactors from -#' .visium_read_scalefactors() -#' @returns scale factor for converting pixel to micron -#' @details -#' Calculates pixel to micron scalefactor. -#' Visium xy coordinates are based on the fullres image -#' The values provided are directly usable for generating polygon information -#' or calculating the micron size relative to spatial coordinates for this set -#' of spatial information. -#' @keywords internal -.visium_micron_scale <- function(json_scalefactors) { - # visium spots diameter : 55 micron - # diameter of a spot at this spatial scaling : scalefactor_list$spot_diameter_fullres - px_to_micron <- 55 / json_scalefactors$spot_diameter_fullres - return(px_to_micron) -} - - -#' @title Create Polygons for Visium Data -#' @name .visium_spot_poly -#' @param spatlocs spatial locations data.table or `spatLocsObj` containing -#' centroid locations of visium spots -#' @param json_scalefactors list of scalefactors from -#' .visium_read_scalefactors() -#' @returns giottoPolygon object -#' @details -#' Creates circular polygons for spatial representation of -#' Visium spots. -#' @keywords internal -#' @md -.visium_spot_poly <- function(spatlocs = NULL, - json_scalefactors) { - if (inherits(spatlocs, "spatLocsObj")) { - spatlocs <- spatlocs[] - } - - vis_spot_poly <- GiottoClass::circleVertices( - radius = json_scalefactors$spot_diameter_fullres / 2 - ) - - GiottoClass::polyStamp( - stamp_dt = vis_spot_poly, - spatlocs = spatlocs, - verbose = FALSE - ) %>% - createGiottoPolygonsFromDfr( - calc_centroids = TRUE, - verbose = FALSE - ) -} - - - - - - -# json_info expects the list read output from .visium_read_scalefactors -# image_path should be expected to be full filepath -# should only be used when do_manual_adj (deprecated) is FALSE -.visium_image <- function( - image_path, - json_info = NULL, - micron_scale = FALSE, - verbose = NULL) { - # assume image already checked - vmsg(.v = verbose, .initial = " - ", "found image") - - # 1. determine image scalefactor to use ---------------------------------- # - if (!is.null(json_info)) checkmate::assert_list(json_info) - png_name <- basename(image_path) # used for name pattern matching only - - if (is.null(json_info)) { # if none provided - warning(wrap_txt( - "No scalefactors json info provided. - Visium image scale_factor defaulting to 1" - )) - scale_factor <- 1 - } else { # if provided - - scale_factor <- NULL # initial value - - # determine type of visium image - visium_img_type <- NULL - possible_types <- c("lowres", "hires") - for (img_type in possible_types) { - if (grepl(img_type, png_name)) visium_img_type <- img_type - } - - if (is.null(visium_img_type)) { # if not recognized visium image type - .gstop( - "\'image_path\' filename did not partial match either - \'lowres\' or \'hires\'. Ensure specified image is either the - Visium lowres or hires image and rename it accordingly" - ) - } - - vmsg( - .v = verbose, .initial = " - ", - "found scalefactors. attempting automatic alignment for the", - str_quote(visium_img_type), "image\n\n" - ) - - scale_factor <- switch(visium_img_type, - "lowres" = json_info[["tissue_lowres_scalef"]], - "hires" = json_info[["tissue_hires_scalef"]] - ) - } - - if (isTRUE(micron_scale)) { - scale_factor <- scale_factor * .visium_micron_scale(json_info) - } - - # 2. create image -------------------------------------------------------- # - visium_img <- createGiottoLargeImage( - raster_object = image_path, - name = "image", - negative_y = TRUE, - scale_factor = (1 / scale_factor) - ) - - visium_img_list <- list(visium_img) - names(visium_img_list) <- c("image") - - return(visium_img_list) -} - - - - - - - - - - - -## MERSCOPE #### - - -#' @title Create Vizgen MERSCOPE largeImage -#' @name createMerscopeLargeImage -#' @description -#' Read MERSCOPE stitched images as giottoLargeImage. Images will also be -#' transformed to match the spatial coordinate reference system of the paired -#' points and polygon data. -#' @param image_file character. Path to one or more MERSCOPE images to load -#' @param transforms_file character. Path to MERSCOPE transforms file. Usually -#' in the same folder as the images and named -#' 'micron_to_mosaic_pixel_transform.csv' -#' @param name character. name to assign the image. Multiple should be provided -#' if image_file is a list. -#' @returns giottoLargeImage -#' @export -createMerscopeLargeImage <- function(image_file, - transforms_file, - name = "image") { - checkmate::assert_character(transforms_file) - tfsDT <- data.table::fread(transforms_file) - if (inherits(image_file, "character")) { - image_file <- as.list(image_file) - } - checkmate::assert_list(image_file) - - scalef <- c(1 / tfsDT[[1, 1]], 1 / tfsDT[[2, 2]]) - x_shift <- -tfsDT[[1, 3]] / tfsDT[[1, 1]] - y_shift <- -tfsDT[[2, 3]] / tfsDT[[2, 2]] - - out <- lapply(seq_along(image_file), function(i) { - gimg <- createGiottoLargeImage( - raster_object = image_file[[i]], - name = name[[i]], - scale_factor = scalef, - negative_y = FALSE - ) - - gimg <- spatShift(gimg, dx = x_shift, dy = y_shift) - - gimg@extent <- terra::ext(gimg@raster_object) - return(gimg) - }) - - if (length(out) == 1L) { - out <- unlist(out) - } - - return(out) -} - - - - - - - -#' @title Create Vizgen MERSCOPE Giotto Object -#' @name createGiottoMerscopeObject -#' @description Given the path to a MERSCOPE experiment directory, creates a -#' Giotto object. -#' @param merscope_dir full path to the exported merscope directory -#' @param data_to_use which of either the 'subcellular' or 'aggregate' -#' information to use for object creation -#' @param FOVs which FOVs to use when building the subcellular object. -#' (default is NULL) -#' NULL loads all FOVs (very slow) -#' @param calculate_overlap whether to run \code{\link{calculateOverlapRaster}} -#' @param overlap_to_matrix whether to run \code{\link{overlapToMatrix}} -#' @param aggregate_stack whether to run \code{\link{aggregateStacks}} -#' @param aggregate_stack_param params to pass to \code{\link{aggregateStacks}} -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns a giotto object -#' @details -#' [\strong{Expected Directory}] This function generates a giotto object when -#' given a link to a MERSCOPE output directory. It expects the following items -#' within the directory where the \strong{bolded} portions are what this -#' function matches against: -#' \itemize{ -#' \item{\strong{cell_boundaries} (folder .hdf5 files)} -#' \item{\strong{images} (folder of .tif images and a scalefactor/transfrom table)} -#' \item{\strong{cell_by_gene}.csv (file)} -#' \item{cell_metadata\strong{fov_positions_file}.csv (file)} -#' \item{detected_transcripts\strong{metadata_file}.csv (file)} -#' } -#' @export -createGiottoMerscopeObject <- function(merscope_dir, - data_to_use = c("subcellular", "aggregate"), - FOVs = NULL, - poly_z_indices = 1:7, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - instructions = NULL, - cores = NA, - verbose = TRUE) { - fovs <- NULL - - # 0. setup - merscope_dir <- path.expand(merscope_dir) - - poly_z_indices <- as.integer(poly_z_indices) - if (any(poly_z_indices < 1)) { - stop(wrap_txt( - "poly_z_indices is a vector of one or more integers starting from 1.", - errWidth = TRUE - )) - } - - # determine data to use - data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) - - # 1. test if folder structure exists and is as expected - dir_items <- .read_merscope_folder( - merscope_dir = merscope_dir, - data_to_use = data_to_use, - cores = cores, - verbose = verbose - ) - - # 2. load in directory items - data_list <- .load_merscope_folder( - dir_items = dir_items, - data_to_use = data_to_use, - poly_z_indices = poly_z_indices, - fovs = fovs, - cores = cores, - verbose = verbose - ) - - # 3. Create giotto object - if (data_to_use == "subcellular") { - merscope_gobject <- .createGiottoMerscopeObject_subcellular( - data_list = data_list, - calculate_overlap = calculate_overlap, - overlap_to_matrix = overlap_to_matrix, - aggregate_stack = aggregate_stack, - aggregate_stack_param = aggregate_stack_param, - cores = cores, - verbose = verbose - ) - } else if (data_to_use == "aggregate") { - merscope_gobject <- .createGiottoMerscopeObject_aggregate( - data_list = data_list, - cores = cores, - verbose = verbose - ) - } else { - stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) - } - - return(merscope_gobject) -} - - - - -#' @describeIn createGiottoMerscopeObject Create giotto object with -#' 'subcellular' workflow -#' @param data_list list of loaded data from \code{\link{load_merscope_folder}} -#' @keywords internal -.createGiottoMerscopeObject_subcellular <- function(data_list, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - cores = NA, - verbose = TRUE) { - feat_coord <- neg_coord <- cellLabel_dir <- instructions <- NULL - - # unpack data_list - poly_info <- data_list$poly_info - tx_dt <- data_list$tx_dt - micronToPixelScale <- data_list$micronToPixelScale - image_list <- data_list$images - - # data.table vars - gene <- NULL - - # split tx_dt by expression and blank - vmsg("Splitting detections by feature vs blank", .v = verbose) - feat_id_all <- tx_dt[, unique(gene)] - blank_id <- feat_id_all[grepl(pattern = "Blank", feat_id_all)] - feat_id <- feat_id_all[!feat_id_all %in% blank_id] - - feat_dt <- tx_dt[gene %in% feat_id, ] - blank_dt <- tx_dt[gene %in% blank_id, ] - - # extract transcript_id col and store as feature meta - feat_meta <- unique(feat_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) - blank_meta <- unique(blank_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) - feat_dt[, c("transcript_id", "barcode_id") := NULL] - blank_dt[, c("transcript_id", "barcode_id") := NULL] - - if (isTRUE(verbose)) { - message(" > Features: ", feat_dt[, .N]) - message(" > Blanks: ", blank_dt[, .N]) - } - - # build giotto object - vmsg("Building subcellular giotto object...", .v = verbose) - z_sub <- createGiottoObjectSubcellular( - gpoints = list( - "rna" = feat_coord, - "neg_probe" = neg_coord - ), - gpolygons = list("cell" = cellLabel_dir), - polygon_mask_list_params = list( - mask_method = "guess", - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_horizontal_step = FALSE - ), - instructions = instructions, - cores = cores - ) -} - - - - -#' @describeIn createGiottoMerscopeObject Create giotto object with 'aggregate' -#' workflow -#' @param data_list list of loaded data from \code{\link{load_merscope_folder}} -#' @keywords internal -.createGiottoMerscopeObject_aggregate <- function(data_list, - cores = NA, - verbose = TRUE) { - # unpack data_list - micronToPixelScale <- data_list$micronToPixelScale - expr_dt <- data_list$expr_dt - cell_meta <- data_list$expr_mat - image_list <- data_list$images - - # split expr_dt by expression and blank - - # feat_id_all = -} - - - - -## Spatial Genomics #### - -#' @title Create Spatial Genomics Giotto Object -#' @name createSpatialGenomicsObject -#' @param sg_dir full path to the exported Spatial Genomics directory -#' @param instructions new instructions -#' (e.g. result from createGiottoInstructions) -#' @returns giotto object -#' @description Given the path to a Spatial Genomics data directory, creates a -#' Giotto object. -#' @export -createSpatialGenomicsObject <- function(sg_dir = NULL, - instructions = NULL) { - # Find files in Spatial Genomics directory - dapi <- list.files(sg_dir, full.names = TRUE, pattern = "DAPI") - mask <- list.files(sg_dir, full.names = TRUE, pattern = "mask") - tx <- list.files(sg_dir, full.names = TRUE, pattern = "transcript") - # Create Polygons - gpoly <- createGiottoPolygonsFromMask( - mask, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - flip_horizontal = FALSE, - flip_vertical = FALSE - ) - # Create Points - tx <- data.table::fread(tx) - gpoints <- createGiottoPoints(tx) - dim(tx) - # Create object and add image - gimg <- createGiottoLargeImage(dapi, use_rast_ext = TRUE) - sg <- createGiottoObjectSubcellular( - gpoints = list("rna" = gpoints), - gpolygons = list("cell" = gpoly), - instructions = instructions - ) - sg <- addGiottoImage(sg, images = list(image = gimg)) - # Return SG object - return(sg) -} - - - - - -## CosMx #### - -#' @title Create Nanostring CosMx Giotto Object -#' @name createGiottoCosMxObject -#' @description Given the path to a CosMx experiment directory, creates a Giotto -#' object. -#' @param cosmx_dir full path to the exported cosmx directory -#' @param data_to_use which type(s) of expression data to build the gobject with -#' Default is \code{'all'} information available. \code{'subcellular'} loads -#' the transcript coordinates only. \code{'aggregate'} loads the provided -#' aggregated expression matrix. -#' @param FOVs field of views to load (only affects subcellular data and images) -#' @param remove_background_polygon try to remove background polygon -#' (default: FALSE) -#' @param background_algo algorithm to remove background polygon -#' @param remove_unvalid_polygons remove unvalid polygons (default: TRUE) -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns a giotto object -#' @details -#' [\strong{Expected Directory}] This function generates a giotto object when -#' given a link to a cosmx output directory. It expects the following items -#' within the directory where the \strong{bolded} portions are what this -#' function matches against: -#' \itemize{ -#' \item{\strong{CellComposite} (folder of images)} -#' \item{\strong{CellLabels} (folder of images)} -#' \item{\strong{CellOverlay} (folder of images)} -#' \item{\strong{CompartmentLabels} (folder of images)} -#' \item{experimentname_\strong{exprMat_file}.csv (file)} -#' \item{experimentname_\strong{fov_positions_file}.csv (file)} -#' \item{experimentname_\strong{metadata_file}.csv (file)} -#' \item{experimentname_\strong{tx_file}.csv (file)} -#' } -#' -#' [\strong{Workflows}] Workflow to use is accessed through the data_to_use param -#' \itemize{ -#' \item{'all' - loads and requires subcellular information from tx_file and -#' fov_positions_file -#' and also the existing aggregated information -#' (expression, spatial locations, and metadata) -#' from exprMat_file and metadata_file.} -#' \item{'subcellular' - loads and requires subcellular information from -#' tx_file and -#' fov_positions_file only.} -#' \item{'aggregate' - loads and requires the existing aggregate information -#' (expression, spatial locations, and metadata) from exprMat_file and -#' metadata_file.} -#' } -#' -#' [\strong{Images}] Images in the default CellComposite, CellLabels, -#' CompartmentLabels, and CellOverlay -#' folders will be loaded as giotto largeImage objects in all workflows as -#' long as they are available. Additionally, CellComposite images will be -#' converted to giotto image objects, making plotting with -#' these image objects more responsive when accessing them from a server. -#' \code{\link{showGiottoImageNames}} can be used to see the available images. -#' @export -createGiottoCosMxObject <- function(cosmx_dir = NULL, - data_to_use = c("all", "subcellular", "aggregate"), - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - FOVs = NULL, - instructions = NULL, - cores = determine_cores(), - verbose = TRUE) { - # 0. setup - cosmx_dir <- path.expand(cosmx_dir) - - # determine data to use - data_to_use <- match.arg( - arg = data_to_use, choices = c("all", "subcellular", "aggregate")) - if (data_to_use %in% c("all", "aggregate")) { - stop(wrap_txt('Convenience workflows "all" and "aggregate" are not - available yet')) - } - - # Define for data.table - fov <- target <- x_local_px <- y_local_px <- z <- cell_ID <- - CenterX_global_px <- CenterY_global_px <- - CenterX_local_px <- CenterY_local_px <- NULL - - - # 1. test if folder structure exists and is as expected - dir_items <- .read_cosmx_folder( - cosmx_dir = cosmx_dir, - verbose = verbose - ) - - - # 2. load and create giotto object - cosmx_gobject <- switch(data_to_use, - "subcellular" = .createGiottoCosMxObject_subcellular( - dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ), - "aggregate" = .createGiottoCosMxObject_aggregate( - dir_items, - cores = cores, - verbose = verbose, - instructions = instructions - ), - "all" = .createGiottoCosMxObject_all( - dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ) - ) - - - # load in subcellular information, subcellular FOV objects, then join - - - # load in pre-generated aggregated expression matrix - if (data_to_use == "aggregate" | data_to_use == "all") { - - } - - - - message("done") - return(cosmx_gobject) -} - - - -#' @title Load and create a CosMx Giotto object from subcellular info -#' @name .createGiottoCosMxObject_subcellular -#' @inheritParams createGiottoCosMxObject -#' @returns giotto object -#' @keywords internal -.createGiottoCosMxObject_subcellular <- function( - dir_items, - FOVs = NULL, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL) { - target <- fov <- NULL - - # load tx detections and FOV offsets ------------------------------------- # - data_list <- .load_cosmx_folder_subcellular( - dir_items = dir_items, - FOVs = FOVs, - cores = cores, - verbose = verbose - ) - - # unpack data_list - FOV_ID <- data_list$FOV_ID - fov_offset_file <- data_list$fov_offset_file - tx_coord_all <- data_list$tx_coord_all - - # remove global xy values and cell_ID - tx_coord_all[, c("x_global_px", "y_global_px", "cell_ID") := NULL] - - data.table::setcolorder( - tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov")) - - # feature detection type splitting --------------------------------------- # - - if (isTRUE(verbose)) message("Splitting detections by feature vs neg probe") - all_IDs <- tx_coord_all[, unique(target)] - neg_IDs <- all_IDs[grepl(pattern = "NegPrb", all_IDs)] - feat_IDs <- all_IDs[!all_IDs %in% neg_IDs] - - # split detections DT - feat_coords_all <- tx_coord_all[target %in% feat_IDs] - neg_coords_all <- tx_coord_all[target %in% neg_IDs] - - if (isTRUE(verbose)) { - message(" > Features: ", feat_coords_all[, .N]) - message(" > NegProbes: ", neg_coords_all[, .N]) - } - - # FOV-based processing --------------------------------------------------- # - - fov_gobjects_list <- lapply(FOV_ID, function(x) { - # images --------------------------------------------------- # - # build image paths - if (isTRUE(verbose)) message("Loading image information...") - - composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("*", x, "*"))) - cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("*", x, "*"))) - compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("*", x, "*"))) - cellOverlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("*", x, "*"))) - - # Missing warnings - if (length(composite_dir) == 0) { - warning("[ FOV ", x, " ] No composite images found") - composite_dir <- NULL - } - if (length(cellLabel_dir) == 0) { - stop("[ FOV ", x, " ] No cell mask images found") - } # cell masks are necessary - if (length(compartmentLabel_dir) == 0) { - warning("[ FOV ", x, " ] No compartment label images found") - compartmentLabel_dir <- NULL - } - if (length(cellOverlay_dir) == 0) { - warning("[ FOV ", x, " ] No cell polygon overlay images found") - cellOverlay_dir <- NULL - } - - if (isTRUE(verbose)) message("Image load done") - - if (isTRUE(verbose)) wrap_msg("[ FOV ", x, "]") - - - # transcripts ---------------------------------------------- # - # get FOV specific tx locations - if (isTRUE(verbose)) message("Assigning FOV feature detections...") - - - # feature info - coord_oldnames <- c("target", "x_local_px", "y_local_px") - coord_newnames <- c("feat_ID", "x", "y") - - feat_coord <- feat_coords_all[fov == as.numeric(x)] - data.table::setnames( - feat_coord, old = coord_oldnames, new = coord_newnames) - # neg probe info - neg_coord <- neg_coords_all[fov == as.numeric(x)] - data.table::setnames( - neg_coord, old = coord_oldnames, new = coord_newnames) - - - # build giotto object -------------------------------------- # - if (isTRUE(verbose)) message("Building subcellular giotto object...") - fov_subset <- createGiottoObjectSubcellular( - gpoints = list( - "rna" = feat_coord, - "neg_probe" = neg_coord - ), - gpolygons = list("cell" = cellLabel_dir), - polygon_mask_list_params = list( - mask_method = "guess", - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_horizontal_step = FALSE, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons - ), - instructions = instructions, - cores = cores - ) - - - # find centroids as spatial locations ---------------------- # - if (isTRUE(verbose)) - message("Finding polygon centroids as cell spatial locations...") - fov_subset <- addSpatialCentroidLocations( - fov_subset, - poly_info = "cell", - spat_loc_name = "raw" - ) - - - # create and add giotto image objects ---------------------- # - if (isTRUE(verbose)) { - message("Attaching image files...") - print(composite_dir) - print(cellOverlay_dir) - print(compartmentLabel_dir) - } - - gImage_list <- list() - - # load image if files are found - if (!is.null(composite_dir)) { - gImage_list$composite <- createGiottoLargeImage( - raster_object = composite_dir, - negative_y = FALSE, - name = "composite" - ) - } - if (!is.null(cellOverlay_dir)) { - gImage_list$overlay <- createGiottoLargeImage( - raster_object = cellOverlay_dir, - negative_y = FALSE, - name = "overlay" - ) - } - if (!is.null(compartmentLabel_dir)) { - gImage_list$compartment <- createGiottoLargeImage( - raster_object = compartmentLabel_dir, - negative_y = FALSE, - name = "compartment" - ) - } # TODO - - - - if (length(gImage_list) > 0) { - fov_subset <- addGiottoImage( - gobject = fov_subset, - images = gImage_list - ) - - # convert to MG for faster loading (particularly relevant for - # pulling from server) - # TODO remove this - fov_subset <- convertGiottoLargeImageToMG( - giottoLargeImage = gImage_list$composite, - gobject = fov_subset, - return_gobject = TRUE, - verbose = FALSE - ) - } else { - message("No images found for fov") - } - }) # lapply end - - # returning -------------------------------------------------------------- # - - if (length(FOVs) == 1) { - return(fov_gobjects_list[[1]]) - } else { - # join giotto objects according to FOV positions file - if (isTRUE(verbose)) message("Joining FOV gobjects...") - new_gobj_names <- paste0("fov", FOV_ID) - id_match <- match(as.numeric(FOV_ID), fov_offset_file$fov) - x_shifts <- fov_offset_file[id_match]$x_global_px - y_shifts <- fov_offset_file[id_match]$y_global_px - - # Join giotto objects - cosmx_gobject <- joinGiottoObjects( - gobject_list = fov_gobjects_list, - gobject_names = new_gobj_names, - join_method = "shift", - x_shift = x_shifts, - y_shift = y_shifts - ) - return(cosmx_gobject) - } -} - - - -#' @title Load and create a CosMx Giotto object from aggregate info -#' @name .createGiottoCosMxObject_aggregate -#' @inheritParams createGiottoCosMxObject -#' @returns giotto object -#' @keywords internal -.createGiottoCosMxObject_aggregate <- function(dir_items, - cores, - verbose = TRUE, - instructions = NULL) { - data_to_use <- fov <- NULL - - data_list <- .load_cosmx_folder_aggregate( - dir_items = dir_items, - cores = cores, - verbose = verbose - ) - - # unpack data_list - spatlocs <- data_list$spatlocs - spatlocs_fov <- data_list$spatlocs_fov - metadata <- data_list$metadata - protM <- data_list$protM - spM <- data_list$spM - fov_shifts <- data_list$fov_shifts - - - # create standard gobject from aggregate matrix - if (data_to_use == "aggregate") { - # Create aggregate gobject - if (isTRUE(verbose)) message("Building giotto object...") - cosmx_gobject <- createGiottoObject( - expression = list("raw" = spM, "protein" = protM), - cell_metadata = list("cell" = list( - "rna" = metadata, - "protein" = metadata - )), - spatial_locs = spatlocs, - instructions = instructions, - cores = cores - ) - - - # load in images - img_ID <- data.table::data.table( - fov = fov_shifts[, fov], - img_name = paste0("fov", - sprintf("%03d", fov_shifts[, fov]), "-image") - ) - - if (isTRUE(verbose)) message("Attaching image files...") - composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("/*"))) - cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("/*"))) - compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("/*"))) - overlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("/*"))) - - if (length(cellLabel_imgList) > 0) { - cellLabel_imgList <- lapply(cellLabel_dir, function(x) { - createGiottoLargeImage(x, name = "cellLabel", negative_y = TRUE) - }) - } - if (length(composite_imgList) > 0) { - composite_imgList <- lapply(composite_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) - }) - } - if (length(compartmentLabel_dir) > 0) { - compartmentLabel_imgList <- lapply( - compartmentLabel_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) - }) - } - if (length(overlay_dir) > 0) { - overlay_imgList <- lapply(overlay_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) - }) - } - } -} - - - - -#' @title Load and create a CosMx Giotto object from subcellular and aggregate -#' info -#' @name .createGiottoCosMxObject_all -#' @param dir_items list of full directory paths from \code{.read_cosmx_folder} -#' @inheritParams createGiottoCosMxObject -#' @returns giotto object -#' @details Both \emph{subcellular} -#' (subellular transcript detection information) and -#' \emph{aggregate} (aggregated detection count matrices by cell polygon from -#' NanoString) -#' data will be loaded in. The two will be separated into 'cell' and 'cell_agg' -#' spatial units in order to denote the difference in origin of the two. -#' @seealso createGiottoCosMxObject .createGiottoCosMxObject_aggregate -#' .createGiottoCosMxObject_subcellular -#' @keywords internal -.createGiottoCosMxObject_all <- function(dir_items, - FOVs, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL, - ...) { - # 1. create subcellular giotto as spat_unit 'cell' - cosmx_gobject <- .createGiottoCosMxObject_subcellular( - dir_items = dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ) - - # 2. load and append aggregated information in spat_unit 'cell_agg' - agg_data <- .load_cosmx_folder_aggregate( - dir_items = dir_items, - cores = cores, - verbose = verbose - ) - - # unpack data_list - spatlocs <- agg_data$spatlocs - spatlocs_fov <- agg_data$spatlocs_fov - metadata <- agg_data$metadata - protM <- agg_data$protM - spM <- agg_data$spM - - # add in pre-generated aggregated expression matrix information for 'all' - # workflow - - # Add aggregate expression information - if (isTRUE(verbose)) wrap_msg( - 'Appending provided aggregate expression data as... - spat_unit: "cell_agg" - feat_type: "rna" - name: "raw"') - # add expression data to expression slot - s4_expr <- createExprObj( - name = "raw", - expression_data = spM, - spat_unit = "cell_agg", - feat_type = "rna", - provenance = "cell_agg" - ) - - cosmx_gobject <- set_expression_values(cosmx_gobject, values = s4_expr) - - # Add spatial locations - if (isTRUE(verbose)) wrap_msg( - 'Appending metadata provided spatial locations data as... - --> spat_unit: "cell_agg" name: "raw" - --> spat_unit: "cell" name: "raw_fov"') - if (isTRUE(verbose)) wrap_msg( - 'Polygon centroid derived spatial locations assigned as... - --> spat_unit: "cell" name: "raw" (default)') - - locsObj <- create_spat_locs_obj( - name = "raw", - coordinates = spatlocs, - spat_unit = "cell_agg", - provenance = "cell_agg" - ) - locsObj_fov <- create_spat_locs_obj( - name = "raw_fov", - coordinates = spatlocs_fov, - spat_unit = "cell_agg", - provenance = "cell_agg" - ) - - cosmx_gobject <- set_spatial_locations(cosmx_gobject, spatlocs = locsObj) - cosmx_gobject <- set_spatial_locations(cosmx_gobject, - spatlocs = locsObj_fov) - - # initialize cell and feat IDs and metadata slots for 'cell_agg' spat_unit - agg_cell_ID <- colnames(s4_expr[]) - agg_feat_ID <- rownames(s4_expr[]) - - sub_feat_ID <- featIDs(cosmx_gobject, feat_type = "rna") - feat_ID_new <- unique(c(agg_feat_ID, sub_feat_ID)) - - # cell metadata - - # Add metadata to both the given and the poly spat_units - if (isTRUE(verbose)) message("Appending provided cell metadata...") - cosmx_gobject <- addCellMetadata(cosmx_gobject, - spat_unit = "cell", - feat_type = "rna", - new_metadata = metadata, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - cosmx_gobject <- addCellMetadata(cosmx_gobject, - spat_unit = "cell_agg", - feat_type = "rna", - new_metadata = metadata, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - - initialize(cosmx_gobject) -} - - - - - - - - - - -## Xenium #### - -#' @title Create 10x Xenium Giotto Object -#' @name createGiottoXeniumObject -#' @description Given the path to a Xenium experiment output folder, creates a -#' Giotto object -#' @param xenium_dir full path to the exported xenium directory -#' @param data_to_use which type(s) of expression data to build the gobject with -#' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') -#' @param load_format files formats from which to load the data. Either `csv` or -#' `parquet` currently supported. -#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 -#' file. Default is \code{TRUE} -#' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene -#' expression matrix -#' @param bounds_to_load vector of boundary information to load -#' (e.g. \code{'cell'} -#' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both -#' at the same time.) -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included -#' as a subcellular transcript detection (default = 20) -#' @param key_list (advanced) list of grep-based keywords to split the -#' subcellular feature detections by feature type. See details -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @details -#' -#' [\strong{QC feature types}] -#' Xenium provides info on feature detections that include more than only the -#' Gene Expression specific probes. Additional probes for QC are included: -#' \emph{blank codeword}, \emph{negative control codeword}, and -#' \emph{negative control probe}. These additional QC probes each occupy and -#' are treated as their own feature types so that they can largely remain -#' independent of the gene expression information. -#' -#' [\strong{key_list}] -#' Related to \code{data_to_use = 'subcellular'} workflow only: -#' Additional QC probe information is in the subcellular feature detections -#' information and must be separated from the gene expression information -#' during processing. -#' The QC probes have prefixes that allow them to be selected from the rest of -#' the feature IDs. -#' Giotto uses a named list of keywords (\code{key_list}) to select these QC -#' probes, with the list names being the names that will be assigned as the -#' feature type of these feature detections. The default list is used when -#' \code{key_list} = NULL. -#' -#' Default list: -#' \preformatted{ -#' list(blank_code = 'BLANK_', -#' neg_code = 'NegControlCodeword_', -#' neg_probe = c('NegControlProbe_|antisense_')) -#' } -#' -#' The Gene expression subset is accepted as the subset of feat_IDs that do not -#' map to any of the keys. -#' -#' @export -createGiottoXeniumObject <- function(xenium_dir, - data_to_use = c("subcellular", "aggregate"), - load_format = "csv", - h5_expression = TRUE, - h5_gene_ids = c("symbols", "ensembl"), - gene_column_index = 1, - bounds_to_load = c("cell"), - qv_threshold = 20, - key_list = NULL, - instructions = NULL, - cores = NA, - verbose = TRUE) { - # 0. setup - xenium_dir <- path.expand(xenium_dir) - - # Determine data to load - data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) - - # Determine load formats - load_format <- "csv" # TODO Remove this and add as param once other options - # are available - load_format <- match.arg( - arg = load_format, choices = c("csv", "parquet", "zarr")) - - # set number of cores automatically, but with limit of 10 - cores <- determine_cores(cores) - data.table::setDTthreads(threads = cores) - - # 1. detect xenium folder and find filepaths to load - - # path_list contents: - # tx_path - # bound_paths - # cell_meta_path - # agg_expr_path - # panel_meta_path - path_list <- .read_xenium_folder( - xenium_dir = xenium_dir, - data_to_use = data_to_use, - bounds_to_load = bounds_to_load, - load_format = load_format, - h5_expression = h5_expression, - verbose = verbose - ) - - - # 2. load in data - - # data_list contents: - # feat_meta - # tx_dt - # bound_dt_list - # cell_meta - # agg_expr - data_list <- .load_xenium_folder( - path_list = path_list, - load_format = load_format, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ) - - - # TODO load images - - - # 3. Create giotto objects - - if (data_to_use == "subcellular") { - # ** feat type search keys ** - if (is.null(key_list)) { - key_list <- list( - blank_code = "BLANK_", - neg_code = "NegControlCodeword_", - neg_probe = c("NegControlProbe_|antisense_") - ) - } - - # needed: - # feat_meta - # tx_dt - # bound_dt_list - xenium_gobject <- .createGiottoXeniumObject_subcellular( - data_list = data_list, - qv_threshold = qv_threshold, - key_list = key_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - } - - if (data_to_use == "aggregate") { - # needed: - # feat_meta - # cell_meta - # agg_expr - # optional? - # tx_dt - # bound_dt_list - xenium_gobject <- .createGiottoXeniumObject_aggregate( - data_list = data_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - } - - return(xenium_gobject) -} - - - - -#' @title Create a Xenium Giotto object from subcellular info -#' @name .createGiottoXeniumObject_subcellular -#' @description Subcellular workflow for createGiottoXeniumObject -#' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} -#' @param key_list regex-based search keys for feature IDs to allow separation -#' into separate giottoPoints objects by feat_type -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included -#' as a subcellular transcript detection (default = 20) -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate -#' @keywords internal -.createGiottoXeniumObject_subcellular <- function(data_list, - key_list = NULL, - qv_threshold = 20, - instructions = NULL, - cores = NA, - verbose = TRUE) { - # data.table vars - qv <- NULL - - # Unpack data_list info - feat_meta <- data_list$feat_meta - tx_dt <- data_list$tx_dt - bound_dt_list <- data_list$bound_dt_list - - # define for data.table - cell_id <- feat_ID <- feature_name <- NULL - - vmsg("Building subcellular giotto object...", .v = verbose) - # Giotto points object - vmsg("> points data prep...", .v = verbose) - - # filter by qv_threshold - vmsg("> filtering feature detections for Phred score >= ", - qv_threshold, .v = verbose) - n_before <- tx_dt[, .N] - tx_dt_filtered <- tx_dt[qv >= qv_threshold] - n_after <- tx_dt_filtered[, .N] - - if (verbose) { - cat( - "Number of feature points removed: ", - n_before - n_after, - " out of ", n_before, "\n" - ) - } - - vmsg("> splitting detections by feat_type", .v = verbose) - # discover feat_IDs for each feat_type - all_IDs <- tx_dt_filtered[, unique(feat_ID)] - feat_types_IDs <- lapply( - key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) - rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) - feat_types_IDs <- append(rna, feat_types_IDs) - - # separate detections by feature type - points_list <- lapply( - feat_types_IDs, - function(types) { - tx_dt_filtered[feat_ID %in% types] - } - ) - - # Giotto polygons object - vmsg("> polygons data prep...", .v = verbose) - polys_list <- lapply( - bound_dt_list, - function(bound_type) { - bound_type[, cell_id := as.character(cell_id)] - } - ) - - xenium_gobject <- createGiottoObjectSubcellular( - gpoints = points_list, - gpolygons = polys_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - - # generate centroids - vmsg("Calculating polygon centroids...", .v = verbose) - xenium_gobject <- addSpatialCentroidLocations( - xenium_gobject, - poly_info = c(names(bound_dt_list)), - provenance = as.list(names(bound_dt_list)) - ) - - return(xenium_gobject) -} - - - - - -#' @title Create a Xenium Giotto object from aggregate info -#' @name .createGiottoXeniumObject_aggregate -#' @description Aggregate workflow for createGiottoXeniumObject -#' @param data_list list of data loaded by \code{.load_xenium_folder} -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular -#' @keywords internal -.createGiottoXeniumObject_aggregate <- function(data_list, - # include_analysis = FALSE, - instructions = NULL, - cores = NA, - verbose = TRUE) { - # Unpack data_list info - feat_meta <- data_list$feat_meta - cell_meta <- data_list$cell_meta - agg_expr <- data_list$agg_expr - - # define for data.table - cell_ID <- x_centroid <- y_centroid <- NULL - - # clean up names for aggregate matrices - names(agg_expr) <- gsub(pattern = " ", replacement = "_", names(agg_expr)) - geneExpMat <- which(names(agg_expr) == "Gene_Expression") - names(agg_expr)[[geneExpMat]] <- "raw" - - # set cell_id as character - cell_meta <- cell_meta[, data.table::setnames(.SD, "cell_id", "cell_ID")] - cell_meta <- cell_meta[, cell_ID := as.character(cell_ID)] - - # set up spatial locations - agg_spatlocs <- cell_meta[, .(x_centroid, y_centroid, cell_ID)] - - # set up metadata - agg_meta <- cell_meta[, !c("x_centroid", "y_centroid")] - - vmsg("Building aggregate giotto object...", .v = verbose) - xenium_gobject <- createGiottoObject( - expression = agg_expr, - spatial_locs = agg_spatlocs, - instructions = instructions, - cores = cores, - verbose = verbose - ) - - # append aggregate metadata - xenium_gobject <- addCellMetadata( - gobject = xenium_gobject, - new_metadata = agg_meta, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - xenium_gobject <- addFeatMetadata( - gobject = xenium_gobject, - new_metadata = feat_meta, - by_column = TRUE, - column_feat_ID = "feat_ID" - ) - - return(xenium_gobject) -} - - - - - - - -# *---- folder reading and detection ----* #### - - -#' @describeIn read_data_folder Read a structured MERSCOPE folder -#' @keywords internal -.read_merscope_folder <- function(merscope_dir, - data_to_use, - cores = NA, - verbose = NULL) { - # prepare dir_items list - dir_items <- list( - `boundary info` = "*cell_boundaries*", - `image info` = "*images*", - `cell feature matrix` = "*cell_by_gene*", - `cell metadata` = "*cell_metadata*", - `raw transcript info` = "*transcripts*" - ) - - # prepare require_data_DT - sub_reqs <- data.table::data.table( - workflow = c("subcellular"), - item = c( - "boundary info", - "raw transcript info", - "image info", - "cell by gene matrix", - "cell metadata" - ), - needed = c(TRUE, TRUE, FALSE, FALSE, FALSE) - ) - - agg_reqs <- data.table::data.table( - workflow = c("aggregate"), - item = c( - "boundary info", - "raw transcript info", - "image info", - "cell by gene matrix", - "cell metadata" - ), - needed = c(FALSE, FALSE, FALSE, TRUE, TRUE) - ) - - require_data_DT <- rbind(sub_reqs, agg_reqs) - - dir_items <- .read_data_folder( - spat_method = "MERSCOPE", - data_dir = merscope_dir, - dir_items = dir_items, - data_to_use = data_to_use, - require_data_DT = require_data_DT, - cores = cores, - verbose = verbose - ) - - return(dir_items) -} - - - -#' @title Read a structured CosMx folder -#' @name .read_cosmx_folder -#' @inheritParams createGiottoCosMxObject -#' @seealso createGiottoCosMxObject load_cosmx_folder -#' @returns path_list a list of cosmx files discovered and their filepaths. NULL -#' values denote missing items -#' @keywords internal -.read_cosmx_folder <- function(cosmx_dir, - verbose = TRUE) { - ch <- box_chars() - - if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) - stop("The full path to a cosmx directory must be given.") - vmsg("A structured CosMx directory will be used\n", .v = verbose) - - # find directories (length = 1 if present, length = 0 if missing) - dir_items <- list( - `CellLabels folder` = "*CellLabels", - `CompartmentLabels folder` = "*CompartmentLabels", - `CellComposite folder` = "*CellComposite", - `CellOverlay folder` = "*CellOverlay", - `transcript locations file` = "*tx_file*", - `fov positions file` = "*fov_positions_file*", - `expression matrix file` = "*exprMat_file*", - `metadata file` = "*metadata_file*" - ) - dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x))) - dir_items_lengths <- lengths(dir_items) - - if (isTRUE(verbose)) { - message("Checking directory contents...") - for (item in names(dir_items)) { - if (dir_items_lengths[[item]] > 0) { - message(ch$s, "> ", item, " found") - } else { - warning(item, " is missing\n") - } - } - } - - # select first directory in list if multiple are detected - if (any(dir_items_lengths > 1)) { - warning("Multiple matches for expected subdirectory item(s).\n - First matching item selected") - - multiples <- which(dir_items_lengths > 1) - for (mult_i in multiples) { - message(names(dir_items)[[mult_i]], "multiple matches found:") - print(dir_items[[mult_i]]) - dir_items[[mult_i]] <- dir_items[[mult_i]][[1]] - } - } - vmsg("Directory check done", .v = verbose) - - return(dir_items) -} - - - - -#' @title Read a structured xenium folder -#' @name .read_xenium_folder -#' @inheritParams createGiottoXeniumObject -#' @keywords internal -#' @returns path_list a list of xenium files discovered and their filepaths. NULL -#' values denote missing items -.read_xenium_folder <- function(xenium_dir, - data_to_use = "subcellular", - bounds_to_load = c("cell"), - load_format = "csv", - h5_expression = FALSE, - verbose = TRUE) { - # Check needed packages - if (load_format == "parquet") { - package_check(pkg_name = "arrow", repository = "CRAN") - package_check(pkg_name = "dplyr", repository = "CRAN") - } - if (isTRUE(h5_expression)) { - package_check(pkg_name = "hdf5r", repository = "CRAN") - } - - ch <- box_chars() - - - # 0. test if folder structure exists and is as expected - - - if (is.null(xenium_dir) | !dir.exists(xenium_dir)) - stop("The full path to a xenium directory must be given.") - vmsg("A structured Xenium directory will be used\n", .v = verbose) - - # find items (length = 1 if present, length = 0 if missing) - dir_items <- list( - `analysis info` = "*analysis*", - `boundary info` = "*bound*", - `cell feature matrix` = "*cell_feature_matrix*", - `cell metadata` = "*cells*", - `image info` = "*tif", - `panel metadata` = "*panel*", - `raw transcript info` = "*transcripts*", - `experiment info (.xenium)` = "*.xenium" - ) - - dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x))) - dir_items_lengths <- lengths(dir_items) - - if (isTRUE(verbose)) { - message("Checking directory contents...") - for (item in names(dir_items)) { - # IF ITEM FOUND - - if (dir_items_lengths[[item]] > 0) { - message(ch$s, "> ", item, " found") - for (item_i in seq_along(dir_items[[item]])) { - # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) - message(ch$s, ch$s, ch$l, ch$h, ch$h, subItem) - } - } else { - # IF ITEM MISSING - # Based on workflow, determine if: - # necessary (error) - # optional (warning) - - if (data_to_use == "subcellular") { - # necessary items - if (item %in% c("boundary info", "raw transcript info")) - stop(item, " is missing") - # optional items - if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata")) - warning(item, " is missing (optional)") - # items to ignore: analysis info, cell feature matrix, - # cell metadata - } else if (data_to_use == "aggregate") { - # necessary items - if (item %in% c("cell feature matrix", "cell metadata")) - stop(item, " is missing") - # optional items - if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata", "analysis info")) - warning(item, " is missing (optional)") - # items to ignore: boundary info, raw transcript info - } - } - } - } - - - # 1. Select data to load - - - # **** transcript info **** - tx_path <- NULL - tx_path <- dir_items$`raw transcript info`[grepl( - pattern = load_format, dir_items$`raw transcript info`)] - # **** cell metadata **** - cell_meta_path <- NULL - cell_meta_path <- dir_items$`cell metadata`[grepl( - pattern = load_format, dir_items$`cell metadata`)] - - # **** boundary info **** - # Select bound load format - if (load_format != "zarr") { # No zarr available for boundary info - dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = load_format, dir_items$`boundary info`)] - } else { - dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = "csv", dir_items$`boundary info`)] - } - - # Organize bound paths by type of bound (bounds_to_load param) - bound_paths <- NULL - bound_names <- bounds_to_load - bounds_to_load <- as.list(bounds_to_load) - bound_paths <- lapply(bounds_to_load, function(x) dir_items$`boundary info`[ - grepl(pattern = x, dir_items$`boundary info`)]) - names(bound_paths) <- bound_names - - # **** aggregated expression info **** - agg_expr_path <- NULL - if (isTRUE(h5_expression)) { # h5 expression matrix loading is default - agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "h5", dir_items$`cell feature matrix`)] - } else if (load_format == "zarr") { - agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "zarr", dir_items$`cell feature matrix`)] - } else { # No parquet for aggregated expression - default to normal 10x loading - agg_expr_path <- dir_items$`cell feature matrix`[sapply( - dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x))] - if (length(agg_expr_path) == 0) { - stop(wrap_txt( - "Expression matrix cannot be loaded.\n - Has cell_feature_matrix(.tar.gz) been unpacked into a - directory?" - )) - } - } - if (data_to_use == "aggregate") { - if (length(path_list$agg_expr_path) == 0) { - stop(wrap_txt( - "Aggregated expression not found.\n - Please confirm h5_expression and load_format params are correct" - )) - } - } - - # **** panel info **** - panel_meta_path <- NULL - panel_meta_path <- dir_items$`panel metadata` - - - vmsg("Directory check done", .v = verbose) - - path_list <- list( - "tx_path" = tx_path, - "bound_paths" = bound_paths, - "cell_meta_path" = cell_meta_path, - "agg_expr_path" = agg_expr_path, - "panel_meta_path" = panel_meta_path - ) - - return(path_list) -} - - - - - - -# * ---- folder loading ---- * #### - - - -## MERSCOPE #### - -#' @title Load MERSCOPE data from folder -#' @name load_merscope_folder -#' @param dir_items list of full filepaths from -#' \code{\link{.read_merscope_folder}} -#' @inheritParams createGiottoMerscopeObject -#' @returns list of loaded-in MERSCOPE data -NULL - -#' @rdname load_merscope_folder -#' @keywords internal -.load_merscope_folder <- function(dir_items, - data_to_use, - fovs = NULL, - poly_z_indices = 1L:7L, - cores = NA, - verbose = TRUE) { - # 1. load data_to_use-specific - if (data_to_use == "subcellular") { - data_list <- .load_merscope_folder_subcellular( - dir_items = dir_items, - data_to_use = data_to_use, - fovs = fovs, - poly_z_indices = poly_z_indices, - cores = cores, - verbose = verbose - ) - } else if (data_to_use == "aggregate") { - data_list <- .load_merscope_folder_aggregate( - dir_items = dir_items, - data_to_use = data_to_use, - cores = cores, - verbose = verbose - ) - } else { - stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) - } - - # 2. Load images if available - if (!is.null(dir_items$`image info`)) { - ## micron to px scaling factor - micronToPixelScale <- Sys.glob(paths = file.path( - dir_items$`image info`, "*micron_to_mosaic_pixel_transform*"))[[1]] - micronToPixelScale <- data.table::fread( - micronToPixelScale, nThread = cores) - # add to data_list - data_list$micronToPixelScale <- micronToPixelScale - - ## staining images - ## determine types of stains - images_filenames <- list.files(dir_items$`image info`) - bound_stains_filenames <- images_filenames[ - grep(pattern = ".tif", images_filenames)] - bound_stains_types <- sapply(strsplit( - bound_stains_filenames, "_"), `[`, 2) - bound_stains_types <- unique(bound_stains_types) - - img_list <- lapply_flex(bound_stains_types, function(stype) { - img_paths <- Sys.glob(paths = file.path( - dir_items$`image info`, paste0("*", stype, "*"))) - - lapply_flex(img_paths, function(img) { - createGiottoLargeImage(raster_object = img) - }, cores = cores) - }, cores = cores) - # add to data_list - data_list$images <- img_list - } - - - - return(data_list) -} - - - -#' @describeIn load_merscope_folder Load items for 'subcellular' workflow -#' @keywords internal -.load_merscope_folder_subcellular <- function(dir_items, - data_to_use, - cores = NA, - poly_z_indices = 1L:7L, - verbose = TRUE, - fovs = NULL) { - if (isTRUE(verbose)) message("Loading transcript level info...") - if (is.null(fovs)) { - tx_dt <- data.table::fread( - dir_items$`raw transcript info`, nThread = cores) - } else { - message("Selecting FOV subset transcripts") - tx_dt <- fread_colmatch( - file = dir_items$`raw transcript info`, - col = "fov", - values_to_match = fovs, - verbose = FALSE, - nThread = cores - ) - } - tx_dt[, c("x", "y") := NULL] # remove unneeded cols - data.table::setcolorder( - tx_dt, c("gene", "global_x", "global_y", "global_z")) - - if (isTRUE(verbose)) message("Loading polygon info...") - poly_info <- readPolygonFilesVizgenHDF5( - boundaries_path = dir_items$`boundary info`, - z_indices = poly_z_indices, - flip_y_axis = TRUE, - fovs = fovs - ) - - data_list <- list( - "poly_info" = poly_info, - "tx_dt" = tx_dt, - "micronToPixelScale" = NULL, - "expr_dt" = NULL, - "cell_meta" = NULL, - "images" = NULL - ) -} - - - -#' @describeIn load_merscope_folder Load items for 'aggregate' workflow -#' @keywords internal -.load_merscope_folder_aggregate <- function(dir_items, - data_to_use, - cores = NA, - verbose = TRUE) { - # metadata is polygon-related measurements - vmsg("Loading cell metadata...", .v = verbose) - cell_metadata_file <- data.table::fread( - dir_items$`cell metadata`, nThread = cores) - - vmsg("Loading expression matrix", .v = verbose) - expr_dt <- data.table::fread( - dir_items$`cell feature matrix`, nThread = cores) - - - data_list <- list( - "poly_info" = NULL, - "tx_dt" = NULL, - "micronToPixelScale" = NULL, - "expr_dt" = expr_dt, - "cell_meta" = cell_metadata_file, - "images" = NULL - ) -} - - - - - - - -## CosMx #### - - - -.cosmx_transcript <- function( - path, - fovs = NULL, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb"), - dropcols = c( - "x_local_px", - "y_local_px", - "cell_ID", - "cell" - ), - micron = FALSE, - px2mm = 0.12028, - cores = determine_cores(), - verbose = NULL - ) { - - if (missing(path)) { - stop(wrap_txt( - "No path to tx file provided or auto-detected" - ), call. = FALSE) - } - - checkmate::assert_file_exists(path) - - vmsg(.v = verbose, "loading feature detections...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - tx <- data.table::fread(input = path, nThread = cores, drop = dropcols) - if (!is.null(fovs)) { - # subset to only needed FOVs - tx <- tx[fov %in% as.numeric(fovs),] - } - - # micron scaling if desired - if (micron) { - px2micron <- px2mm / 1000 - tx[, x_global_px := x_global_px * px2micron] - tx[, y_global_px := y_global_px * px2micron] - } - - # giottoPoints ----------------------------------------------------- # - - # static gpoints params - gpoints_params <- list() - gpoints_params$feat_type <- feat_type - gpoints_params$split_keyword <- split_keyword - gpoints_params$x_colname <- "x_global_px" - gpoints_params$y_colname <- "y_global_px" - gpoints_params$feat_ID_colname <- "target" - - gpoints <- do.call(createGiottoPoints, c(list(x = tx), gpoints_params)) - # ensure output is always a list - if (!is.list(gpoints)) { - gpoints <- list(gpoints) - names(gpoints) <- objName(gpoints[[1L]]) - } - - return(gpoints) -} - -#' @name .cosmx_infer_fov_shifts -#' @title Infer CosMx local to global shifts -#' @description -#' From NanoString CosMx spatial info, infer the FOV shifts needed. These -#' values are needed for anything that requires the use of images, since those -#' do not come with spatial extent information embedded. -#' @param tx_dt transcript data.table input to use -#' (Only one of tx_dt or meta_dt should be used) -#' @param meta_dt cell metadata data.table input to use -#' (Only one of tx_dt or meta_dt should be used) -#' @param navg max n values to check per FOV to find average shift -#' @param flip_loc_y whether a y flip needs to be performed on the local y -#' values before comparing with global y values. See details -#' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), -#' yshift (numeric). Values should always be in pixels -#' @details -#' Shifts are found by looking at the average of differences between xy global -#' and local coordinates in either the metadata or transcripts file. The number -#' of shift value to average across is determined with `navg`. The average is -#' in place to get rid of small differences in shifts, likely due to rounding -#' errors. Across the different versions of the CosMx exports, whether the -#' local y values are flipped compared to the global values has differed, so -#' there is also a step that checks the variance of y values per sampled set -#' per fov. In cases where the shift is calculated with the correct (inverted -#' or non-inverted) y local values, the variance is expected to be very low. -#' When the variance is higher than 0.001, the function is re-run with the -#' opposite `flip_loc_y` value. -#' @keywords internal -.cosmx_infer_fov_shifts <- function( - tx_dt, meta_dt, flip_loc_y = TRUE, navg = 100L -) { - fov <- NULL # NSE vars - if (!missing(tx_dt)) { - tx_head <- tx_dt[, head(.SD, navg), by = fov] - x <- tx_head[, mean(x_global_px - x_local_px), by = fov] - if (flip_loc_y) { - - # test if flip is needed - # Usual yshift variance / fov expected when correct is 0 to 1e-22 - # if var is too high for any fov, swap `flip_loc_y` value - y <- tx_head[, var(y_global_px + y_local_px), by = fov] - if (y[, any(V1 > 0.001)]) { - return(.cosmx_infer_fov_shifts( - tx_dt = tx_dt, flip_loc_y = FALSE, navg = navg - )) - } - - # use +y if local y values are flipped - y <- tx_head[, mean(y_global_px + y_local_px), by = fov] - } else { - y <- tx_head[, mean(y_global_px - y_local_px), by = fov] - } - } else if (!missing(meta_dt)) { - meta_head <- meta_dt[, head(.SD, navg), by = fov] - x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] - if (flip_loc_y) { - - # test if flip is needed - # Usual yshift variance / fov expected when correct is 0 to 1e-22 - # if var is too high for any fov, swap `flip_loc_y` value - y <- meta_head[, var(CenterY_global_px + CenterY_local_px), by = fov] - if (y[, any(V1 > 0.001)]) { - return(.cosmx_infer_fov_shifts( - meta_dt = meta_dt, flip_loc_y = FALSE, navg = navg - )) - } - - # use +y if local y values are flipped - y <- meta_head[, mean(CenterY_global_px + CenterY_local_px), - by = fov] - } else { - y <- meta_head[, mean(CenterY_global_px - CenterY_local_px), - by = fov] - } - } else { - stop("One of tx_dt or meta_dt must be provided\n") - } - - res <- merge(x, y, by = "fov") - data.table::setnames(res, new = c("fov", "x", "y")) - - return(res) -} - -.cosmx_imgname_fovparser <- function( - path -) { - im_names <- list.files(path) - fovs <- as.numeric(sub(".*F(\\d+)\\..*", "\\1", im_names)) - if (any(is.na(fovs))) { - warning(wrap_txt( - "Images to load should be sets of images/fov in subdirectories. - No other files should be present." - )) - } - return(fovs) -} - -.cosmx_poly <- function( - path, - slide = 1, - fovs = NULL, - name = "cell", - # VERTICAL FLIP + NO SHIFTS - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - remove_background_polygon = TRUE, - micron = FALSE, - px2mm = 0.12028, - offsets, - verbose = NULL -) { - # NSE params - f <- x <- y <- NULL - - if (missing(path)) { - stop(wrap_txt( - "No path to polys subdirectory provided or auto-detected" - ), call. = FALSE) - } - - GiottoUtils::vmsg(.v = verbose, "loading segmentation masks...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - mask_params <- list( - # static params - mask_method = "multiple", - # A background poly for nanostring masks sometimes shows up. - # removal works by looking for any polys with size more than 90% of the - # total FOV along either x or y axis - remove_background_polygon = remove_background_polygon, - fill_holes = TRUE, - calc_centroids = TRUE, - remove_unvalid_polygons = TRUE, - # input params - name = name, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - shift_vertical_step = shift_vertical_step, - shift_horizontal_step = shift_horizontal_step, - verbose = FALSE - ) - - fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL - progressr::with_progress({ - p <- progressr::progressor(along = fovs) - - gpolys <- lapply(fovs, function(f) { - segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) - # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID - mask_params$ID_fmt = paste0( - sprintf("c_%d_%d_", slide, f), "%d" - ) - - gpoly <- do.call( - createGiottoPolygonsFromMask, - args = c(list(maskfile = segfile), mask_params) - ) - - xshift <- offsets[fov == f, x] - yshift <- offsets[fov == f, y] - - # if micron scale - if (micron) { - px2micron <- px2mm / 1000 - gpoly <- rescale( - gpoly, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 - ) - xshift <- xshift * px2micron - yshift <- yshift * px2micron - } - - gpoly <- spatShift(x = gpoly, dx = xshift, dy = yshift) - p(message = sprintf("F%03d", f)) - return(gpoly) - }) - }) - - if (length(gpolys) > 1L) { - gpolys <- do.call(rbind, args = gpolys) - } - - # never return lists. Only the single merged gpoly - return(gpolys) -} - -.cosmx_cellmeta <- function( - path, - slide = 1, - fovs = NULL, - dropcols = c( - "CenterX_local_px", - "CenterY_local_px", - "CenterX_global_px", - "CenterY_global_px", - "cell_id" - ), - cores = determine_cores(), - verbose = NULL - ) { - - if (missing(path)) { - stop(wrap_txt( - "No path to metadata file provided or auto-detected" - ), call. = FALSE) - } - - GiottoUtils::vmsg(.v = verbose, "loading cell metadata...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - verbose <- verbose %null% TRUE - - meta_dt <- data.table::fread(input = path, nThread = cores) - - # remove unneeded cols - dropcols <- dropcols[dropcols %in% colnames(meta_dt)] - meta_dt[, (dropcols) := NULL] # remove dropcols - - # subset to needed fovs - if (!is.null(fovs)) { - fovs <- as.integer(fovs) - meta_dt <- meta_dt[fov %in% fovs,] - } - - # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` - if ("cell" %in% colnames(meta_dt)) { - # assume already formatted (current datasets Mar-27-2024) - meta_dt[, c("fov", "cell_ID") := NULL] - data.table::setnames(meta_dt, old = "cell", "cell_ID") - } else { - # older datasets - meta_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] - # remove fov - meta_dt[, fov := NULL] - } - - - # TODO figure out what to do about protein expression here. - cx <- createCellMetaObj( - metadata = meta_dt, - spat_unit = "cell", - feat_type = "rna", - provenance = "cell", - verbose = verbose - ) - return(cx) -} - -.cosmx_expression <- function( - path, - slide = 1, - fovs = NULL, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb"), - cores = determine_cores(), - verbose = NULL - ) { - - if (missing(path)) { - stop(wrap_txt( - "No path to exprMat file provided or auto-detected" - ), call. = FALSE) - } - - GiottoUtils::vmsg(.v = verbose, "loading expression matrix...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - expr_dt <- data.table::fread(input = path, nThread = cores) - - # subset to needed fovs - if (!is.null(fovs)) { - fovs <- as.integer(fovs) - expr_dt <- expr_dt[fov %in% fovs,] - } - - # remove background values (cell 0) - expr_dt <- expr_dt[cell_ID != 0L,] - - # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` - expr_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] - # remove fov - expr_dt[, fov := NULL] - - # convert to Matrix - expr_mat <- dt_to_matrix(expr_dt) - expr_mat <- t_flex(expr_mat) - - # split expression for rna / negprb if any split keywords provided. - # Output of this chunk should always be a named list of 1 or more matrices - if (length(split_keyword) > 0) { - expr_list <- vector(mode = "list", length = length(feat_type)) - names(expr_list) <- feat_type - # iterate through other expr types - for (key_i in seq_along(split_keyword)) { - feat_ids <- rownames(expr_mat) - bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) - # subset and store split matrix - sub_mat <- expr_mat[bool,] - expr_list[[key_i + 1L]] <- sub_mat - # remaining matrix - expr_mat <- expr_mat[!bool,] - } - # assign the main expr - expr_list[[1L]] <- expr_mat - } else { - expr_list <- list(expr_mat) - names(expr_list) <- feat_type[[1L]] - } - - expr_list <- lapply(seq_along(expr_list), function(expr_i) { - createExprObj(expression_data = expr_list[[expr_i]], - spat_unit = "cell", - feat_type = names(expr_list)[[expr_i]], - name = "raw", - provenance = "cell") - }) - - return(expr_list) -} - -.cosmx_image <- function( - path, - fovs = NULL, - img_type = "composite", - img_name_fmt = paste(img_type, "_fov%03d"), - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - micron = FALSE, - px2mm = 0.12028, - offsets, - verbose = NULL - ) { - - if (missing(path)) { - stop(wrap_txt( - "No path to image subdirectory to load provided or auto-detected" - ), call. = FALSE) - } - - GiottoUtils::vmsg(.v = verbose, sprintf("loading %s images...", img_type)) - vmsg(.v = verbose, .is_debug = TRUE, path) - - fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL - verbose <- verbose %null% TRUE - - progressr::with_progress({ - p <- progressr::progressor(along = fovs) - - gimg_list <- lapply(fovs, function(f) { - imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) - img_name <- sprintf(img_name_fmt, f) - - gimg <- createGiottoLargeImage( - raster_object = imgfile, - name = img_name, - negative_y = negative_y, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - verbose = verbose - ) - - xshift <- offsets[fov == f, x] - yshift <- offsets[fov == f, y] - - if (micron) { - px2micron <- px2mm / 1000 - gimg <- rescale( - gimg, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 - ) - xshift <- xshift * px2micron - yshift <- yshift * px2micron - } - - gimg <- spatShift(x = gimg, dx = xshift, dy = yshift) - p(message = sprintf("F%03d", f)) - return(gimg) - }) - }) - - - return(gimg_list) -} - - - -#' @title Load CosMx folder subcellular info -#' @name .load_cosmx_folder_subcellular -#' @description loads in the feature detections information. Note that the mask -#' images are still required for a working subcellular object, and those are -#' loaded in \code{\link{.createGiottoCosMxObject_subcellular}} -#' @inheritParams createGiottoCosMxObject -#' @returns list -#' @keywords internal -.load_cosmx_folder_subcellular <- function(dir_items, - FOVs = NULL, - cores, - verbose = TRUE) { - vmsg(.v = verbose, "Loading subcellular information...") - - # subcellular checks - if (!file.exists(dir_items$`transcript locations file`)) { - stop(wrap_txt("No transcript locations file (.csv) detected")) - } - if (!file.exists(dir_items$`fov positions file`)) { - stop(wrap_txt("No fov positions file (.csv) detected")) - } - - # FOVs to load - vmsg(.v = verbose, "Loading FOV offsets...") - fov_offset_file <- fread( - input = dir_items$`fov positions file`, nThread = cores) - if (is.null(FOVs)) FOVs <- fov_offset_file$fov # default to ALL FOVs - FOV_ID <- as.list(sprintf("%03d", FOVs)) - - # TODO Load only relevant portions of file? - - vmsg(.v = verbose, "Loading transcript level info...") - tx_coord_all <- fread( - input = dir_items$`transcript locations file`, nThread = cores) - vmsg(.v = verbose, "Subcellular load done") - - data_list <- list( - "FOV_ID" = FOV_ID, - "fov_offset_file" = fov_offset_file, - "tx_coord_all" = tx_coord_all - ) - - return(data_list) -} - - - -#' @title Load CosMx folder aggregate info -#' @name .load_cosmx_folder_aggregate -#' @inheritParams createGiottoCosMxObject -#' @returns list -#' @keywords internal -.load_cosmx_folder_aggregate <- function(dir_items, - cores, - verbose = TRUE) { - # data.table vars - fov <- cell_ID <- fov_cell_ID <- CenterX_global_px <- - CenterY_global_px <- CenterX_local_px <- - CenterY_local_px <- x_shift <- y_shift <- NULL - - # load aggregate information - vmsg(.v = verbose, "Loading provided aggregated information...") - - # aggregate checks - if (!file.exists(dir_items$`expression matrix file`)) - stop(wrap_txt("No expression matrix file (.csv) detected")) - if (!file.exists(dir_items$`metadata file`)) - stop(wrap_txt("No metadata file (.csv) detected. Needed for cell - spatial locations.")) - - # read in aggregate data - expr_mat <- fread( - input = dir_items$`expression matrix file`, nThread = cores) - metadata <- fread(input = dir_items$`metadata file`, nThread = cores) - - # setorder expression and spatlocs - data.table::setorder(metadata, fov, cell_ID) - data.table::setorder(expr_mat, fov, cell_ID) - - - # generate unique cell IDs - expr_mat[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] - expr_mat <- expr_mat[, fov := NULL] - - metadata[, fov_cell_ID := cell_ID] - metadata[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] - # reorder - data.table::setcolorder(x = metadata, c("cell_ID", "fov", "fov_cell_ID")) - - - # extract spatial locations - spatlocs <- metadata[, .(CenterX_global_px, CenterY_global_px, cell_ID)] - spatlocs_fov <- metadata[, .(CenterX_local_px, CenterY_local_px, cell_ID)] - # regenerate FOV shifts - metadata[, x_shift := CenterX_global_px - CenterX_local_px] - metadata[, y_shift := CenterY_global_px - CenterY_local_px] - fov_shifts <- metadata[, .(mean(x_shift), mean(y_shift)), fov] - colnames(fov_shifts) <- c("fov", "x_shift", "y_shift") - - - # rename spatloc column names - spatloc_oldnames <- c("CenterX_global_px", "CenterY_global_px", "cell_ID") - spatloc_oldnames_fov <- c("CenterX_local_px", "CenterY_local_px", "cell_ID") - spatloc_newnames <- c("sdimx", "sdimy", "cell_ID") - data.table::setnames(spatlocs, old = spatloc_oldnames, new = spatloc_newnames) - data.table::setnames( - spatlocs_fov, old = spatloc_oldnames_fov, new = spatloc_newnames) - - # cleanup metadata and spatlocs - metadata <- metadata[, c("CenterX_global_px", "CenterY_global_px", - "CenterX_local_px", "CenterY_local_px") := NULL] - # find unique cell_IDs present in both expression and metadata - giotto_cell_ID <- unique(intersect(expr_mat$cell_ID, metadata$cell_ID)) - - # subset to only unique cell_IDs - expr_mat <- expr_mat[cell_ID %in% giotto_cell_ID, ] - metadata <- metadata[cell_ID %in% giotto_cell_ID, ] - - - # convert protein metadata to expr mat - # take all mean intensity protein information except for MembraneStain and DAPI - protein_meta_cols <- colnames(metadata) - protein_meta_cols <- protein_meta_cols[ - grepl(pattern = "Mean.*", x = protein_meta_cols)] - protein_meta_cols <- protein_meta_cols[ - !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI")] - protein_meta_cols <- c("cell_ID", protein_meta_cols) - - prot_expr <- metadata[, protein_meta_cols, with = FALSE] - prot_cell_ID <- metadata[, cell_ID] - protM <- Matrix::Matrix(as.matrix(prot_expr[, -1]), - dimnames = list(prot_expr[[1]], - colnames(prot_expr[, -1])), - sparse = FALSE) - protM <- t_flex(protM) - - # convert expression to sparse matrix - spM <- Matrix::Matrix(as.matrix(expr_mat[, -1]), - dimnames = list(expr_mat[[1]], - colnames(expr_mat[, -1])), - sparse = TRUE) - spM <- t_flex(spM) - - ## Ready for downstream aggregate gobject creation or appending into - # existing subcellular Giotto object ## - - data_list <- list( - "spatlocs" = spatlocs, - "spatlocs_fov" = spatlocs_fov, - "metadata" = metadata, - "protM" = protM, - "spM" = spM, - "fov_shifts" = fov_shifts - ) - - return(data_list) -} - - - - - - - -## Xenium #### - - -.xenium_transcript <- function( - path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - dropcols = c(), - qv_threshold = 20, - cores = determine_cores(), - verbose = NULL -) { - if (missing(path)) { - stop(wrap_txt( - "No path to tx file provided or auto-detected" - ), call. = FALSE) - } - - checkmate::assert_file_exists(path) - e <- file_extension(path) %>% head(1L) %>% tolower() - vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) - - # read in - a <- list( - path = path, - dropcols = dropcols, - qv_threshold = qv_threshold, - verbose = verbose - ) - vmsg("Loading transcript level info...", .v = verbose) - tx <- switch(e, - "csv" = do.call(.xenium_transcript_csv, - args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_transcript_parquet, args = a), - "zarr" = stop('zarr not yet supported') - ) - - # create gpoints - gpointslist <- createGiottoPoints( - x = tx, - feat_type = feat_type, - split_keyword = split_keyword - ) - - if (inherits(gpointslist, "list")) { - gpointslist <- list(gpointslist) - } - - return(gpointslist) -} - - -.xenium_transcript_csv <- function( - path, - dropcols = c(), - qv_threshold = 20, - cores = determine_cores(), - verbose = NULL - ) { - tx_dt <- data.table::fread( - path, nThread = cores, - colClasses = c(transcript_id = "character"), - drop = dropcols - ) - data.table::setnames( - x = tx_dt, - old = c('feature_name', 'x_location', 'y_location'), - new = c('feat_ID', 'x', 'y') - ) - - # qv filtering - if (!is.null(qv_threshold)) { - n_before <- tx_dt[,.N] - tx_dt <- tx_dt[qv >= qv_threshold] - n_after <- tx_dt[,.N] - - vmsg( - .v = verbose, - sprintf( - "QV cutoff: %d\n Feature points removed: %d, out of %d", - qv_threshold, - n_before - n_after, - n_before - ) - ) - } - - return(tx_dt) -} - -.xenium_transcript_parquet <- function( - path, - dropcols = c(), - qv_threshold = 20, - verbose = NULL - ) { - package_check( - pkg_name = c("arrow", "dplyr"), - repository = c("CRAN:arrow", "CRAN:dplyr") - ) - - tx_arrow <- arrow::read_parquet(file = path, as_data_frame = FALSE) %>% - dplyr::mutate(transcript_id = cast(transcript_id, arrow::string())) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - dplyr::mutate(feature_name = cast(feature_name, arrow::string())) %>% - dplyr::select(-dplyr::any_of(dropcols)) - - # qv filtering - if (!is.null(qv_threshold)) { - .nr <- function(x) { - dplyr::tally(x) %>% dplyr::collect() %>% as.numeric() - } - n_before <- .nr(tx_arrow) - tx_arrow <- dplyr::filter(tx_arrow, qv > qv_threshold) - n_after <- .nr(tx_arrow) - - vmsg( - .v = verbose, - sprintf( - "QV cutoff: %d\n Feature points removed: %d, out of %d", - qv_threshold, - n_before - n_after, - n_before - ) - ) - } - - # convert to data.table - tx_dt <- as.data.frame(tx_arrow) %>% data.table::setDT() - data.table::setnames( - x = tx_dt, - old = c('feature_name', 'x_location', 'y_location'), - new = c('feat_ID', 'x', 'y') - ) - return(tx_dt) -} - -.xenium_poly <- function( - path, - name = "cell", - calc_centroids = TRUE, - cores = determine_cores(), - verbose = NULL - ) { - checkmate::assert_file_exists(path) - checkmate::assert_character(name, len = 1L) - - e <- file_extension(path) %>% head(1L) %>% tolower() - - a <- list(path = path) - vmsg("Loading boundary info...", .v = verbose) - polys <- switch(e, - "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_poly_parquet, args = a), - "zarr" = stop("zarr not yet supported") - ) - - # create gpolys - verbose <- verbose %null% FALSE - gpolys <- createGiottoPolygon( - x = polys, - name = name, - calc_centroids = calc_centroids, - verbose = verbose - ) - return(gpolys) -} - -.xenium_poly_csv <- function(path, cores = determine_cores()) { - data.table::fread( - path, nThread = cores, - colClasses = c(cell_id = "character") - ) -} - -.xenium_poly_parquet <- function(path) { - package_check( - pkg_name = c("arrow", "dplyr"), - repository = c("CRAN:arrow", "CRAN:dplyr") - ) - # read & convert to DT - arrow::read_parquet(file = path, as_data_frame = FALSE) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() -} - -.xenium_cellmeta <- function( - path, - dropcols = c(), - cores = determine_cores(), - verbose = NULL - ) { - if (missing(path)) { - stop(wrap_txt( - "No path to metadata file provided or auto-detected" - ), call. = FALSE) - } - checkmate::assert_file_exists(path) - - e <- file_extension(path) %>% head(1L) %>% tolower() - a <- list(path = path, dropcols = dropcols) - vmsg('Loading cell metadata...', .v = verbose) - vmsg(.v = verbose, .is_debug = TRUE, path) - verbose <- verbose %null% TRUE - cx <- switch(e, - "csv" = do.call(.xenium_cellmeta_csv, args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_cellmeta_parquet, args = a) - ) - - cx <- createCellMetaObj( - metadata = cx, - spat_unit = "cell", - feat_type = "rna", - provenance = "cell", - verbose = verbose - ) - return(cx) -} - -.xenium_cellmeta_csv <- function( - path, dropcols = c(), cores = determine_cores() -) { - data.table::fread(path, nThread = cores, drop = dropcols) -} - -.xenium_cellmeta_parquet <- function(path, dropcols = c()) { - arrow::read_parquet(file = path, as_data_frame = FALSE) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - dplyr::select(-dplyr::any_of(dropcols)) %>% - as.data.frame() %>% - data.table::setDT() -} - -.xenium_featmeta <- function( - path, - gene_ids = "symbols", - dropcols = c(), - cores = determine_cores(), - verbose = NULL -) { - if (missing(path)) { - stop(wrap_txt( - "No path to panel metadata file provided or auto-detected" - ), call. = FALSE) - } - checkmate::assert_file_exists(path) - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_ext <- GiottoUtils::file_extension(path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json( - path = path, gene_ids = gene_ids - ) - } else { - feat_meta <- data.table::fread(path, nThread = cores) - colnames(feat_meta)[[1]] <- 'feat_ID' - } - - dropcols <- dropcols[dropcols %in% colnames(feat_meta)] - feat_meta[, (dropcols) := NULL] # remove dropcols - - fx <- createFeatMetaObj( - metadata = feat_meta, - spat_unit = "cell", - feat_type = "rna", - provenance = "cell", - verbose = verbose - ) - - return(fx) -} - -.xenium_expression <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = NULL -) { - if (missing(path)) { - stop(wrap_txt( - "No path to expression dir (mtx) or file (h5) provided or auto-detected" - ), call. = FALSE) - } - checkmate::assert_file_exists(path) - a <- list( - path = path, - gene_ids = gene_ids, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type - ) - - if (checkmate::test_directory_exists(path)) { - e <- "mtx" # assume mtx dir - # zarr can also be unzipped into a dir, but zarr implementation with - # 32bit UINT support is not available in R yet (needed for cell_IDs). - } else { - e <- file_extension(path) %>% head(1L) %>% tolower() - } - - vmsg("Loading 10x pre-aggregated expression...", .v = verbose) - vmsg(.v = verbose, .is_debug = TRUE, path) - verbose <- verbose %null% TRUE - ex <- switch(e, - "mtx" = do.call(.xenium_cellmeta_csv, args = a), - "h5" = do.call(.xenium_cellmeta_parquet, args = a) - ) - - eo <- createExprObj( - expression_data = ex, - name = "raw", - spat_unit = "cell", - feat_type = "rna", - provenance = "cell" - ) - return(eo) -} - -.xenium_expression_h5 <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE -) { - get10Xmatrix_h5( - path_to_data = path, - gene_ids = gene_ids, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type - ) -} - -.xenium_expression_mtx <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE -) { - gene_ids <- switch(gene_ids, - "ensembl" = 1, - "symbols" = 2 - ) - get10Xmatrix( - path_to_data = path, - gene_column_index = gene_ids, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type - ) -} - -.xenium_image <- function( - path, - name = "image", - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - affine = NULL, - verbose = NULL -) { - if (missing(path)) { - stop(wrap_txt( - "No path to image file to load provided or auto-detected" - ), call. = FALSE) - } - checkmate::assert_file_exists(path) - - vmsg(.v = verbose, sprintf("loading image as '%s'", name)) - vmsg(.v = verbose, .is_debug = TRUE, path) - vmsg( - .v = verbose, .is_debug = TRUE, - sprintf("negative_y: %s\nflip_vertical: %s\nflip_horizontal: %s", - negative_y, flip_vertical, flip_horizontal), - .prefix = "" - ) - - verbose <- verbose %null% TRUE - - # TODO -} - - - -#' @title Load xenium data from folder -#' @name load_xenium_folder -#' @param path_list list of full filepaths from .read_xenium_folder -#' @inheritParams createGiottoXeniumObject -#' @returns list of loaded in xenium data -NULL - -#' @rdname load_xenium_folder -#' @keywords internal -.load_xenium_folder <- function(path_list, - load_format = "csv", - data_to_use = "subcellular", - h5_expression = "FALSE", - h5_gene_ids = "symbols", - gene_column_index = 1, - cores, - verbose = TRUE) { - data_list <- switch(load_format, - "csv" = .load_xenium_folder_csv( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "parquet" = .load_xenium_folder_parquet( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) - ) - - return(data_list) -} - - -#' @describeIn load_xenium_folder Load from csv files -#' @keywords internal -.load_xenium_folder_csv <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { - # initialize return vars - feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL - - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_path <- path_list$panel_meta_path[[1]] - fdata_ext <- GiottoUtils::file_extension(fdata_path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json(path = fdata_path, - gene_ids = h5_gene_ids) - } else { - feat_meta <- data.table::fread(fdata_path, nThread = cores) - colnames(feat_meta)[[1]] <- "feat_ID" - } - - # **** subcellular info **** - if (data_to_use == "subcellular") { - # append missing QC probe info to feat_meta - if (isTRUE(h5_expression)) { - h5 <- hdf5r::H5File$new(path_list$agg_expr_path) - tryCatch({ - root <- names(h5) - feature_id <- h5[[paste0(root, "/features/id")]][] - feature_info <- h5[[paste0(root, "/features/feature_type")]][] - feature_names <- h5[[paste0(root, "/features/name")]][] - features_dt <- data.table::data.table( - "id" = feature_id, - "name" = feature_names, - "feature_type" = feature_info - ) - }, finally = { - h5$close_all() - }) - } else { - features_dt <- data.table::fread( - paste0(path_list$agg_expr_path, "/features.tsv.gz"), - header = FALSE - ) - } - colnames(features_dt) <- c("id", "feat_ID", "feat_class") - feat_meta <- merge( - features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") - - GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) - tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) - data.table::setnames( - x = tx_dt, - old = c("feature_name", "x_location", "y_location"), - new = c("feat_ID", "x", "y") - ) - - GiottoUtils::vmsg("Loading boundary info...", .v = verbose) - bound_dt_list <- lapply( - path_list$bound_paths, - function(x) data.table::fread(x[[1]], nThread = cores) - ) - } - - # **** aggregate info **** - GiottoUtils::vmsg("loading cell metadata...", .v = verbose) - cell_meta <- data.table::fread( - path_list$cell_meta_path[[1]], nThread = cores) - - if (data_to_use == "aggregate") { - GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) - if (isTRUE(h5_expression)) { - agg_expr <- get10Xmatrix_h5( - path_to_data = path_list$agg_expr_path, - gene_ids = h5_gene_ids, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } else { - agg_expr <- get10Xmatrix( - path_to_data = path_list$agg_expr_path, - gene_column_index = gene_column_index, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } - } - - data_list <- list( - "feat_meta" = feat_meta, - "tx_dt" = tx_dt, - "bound_dt_list" = bound_dt_list, - "cell_meta" = cell_meta, - "agg_expr" = agg_expr - ) - - return(data_list) -} - - - - -#' @describeIn load_xenium_folder Load from parquet files -#' @keywords internal -.load_xenium_folder_parquet <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { - # initialize return vars - feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL - # dplyr variable - cell_id <- NULL - - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_path <- path_list$panel_meta_path[[1]] - fdata_ext <- GiottoUtils::file_extension(fdata_path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json( - path = fdata_path, gene_ids = h5_gene_ids) - } else { - feat_meta <- data.table::fread(fdata_path, nThread = cores) - colnames(feat_meta)[[1]] <- "feat_ID" - } - - # **** subcellular info **** - if (data_to_use == "subcellular") { - # define for data.table - transcript_id <- feature_name <- NULL - - # append missing QC probe info to feat_meta - if (isTRUE(h5_expression)) { - h5 <- hdf5r::H5File$new(path_list$agg_expr_path) - tryCatch({ - root <- names(h5) - feature_id <- h5[[paste0(root, "/features/id")]][] - feature_info <- h5[[paste0(root, "/features/feature_type")]][] - feature_names <- h5[[paste0(root, "/features/name")]][] - features_dt <- data.table::data.table( - "id" = feature_id, - "name" = feature_names, - "feature_type" = feature_info - ) - }, finally = { - h5$close_all() - }) - } else { - features_dt <- arrow::read_tsv_arrow(paste0( - path_list$agg_expr_path, "/features.tsv.gz"), - col_names = FALSE - ) %>% - data.table::setDT() - } - colnames(features_dt) <- c("id", "feat_ID", "feat_class") - feat_meta <- merge(features_dt[ - , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") - - vmsg("Loading transcript level info...", .v = verbose) - tx_dt <- arrow::read_parquet( - file = path_list$tx_path[[1]], - as_data_frame = FALSE - ) %>% - dplyr::mutate( - transcript_id = cast(transcript_id, arrow::string())) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - dplyr::mutate( - feature_name = cast(feature_name, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - data.table::setnames( - x = tx_dt, - old = c("feature_name", "x_location", "y_location"), - new = c("feat_ID", "x", "y") - ) - vmsg("Loading boundary info...", .v = verbose) - bound_dt_list <- lapply(path_list$bound_paths, function(x) { - arrow::read_parquet(file = x[[1]], as_data_frame = FALSE) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - }) - } - # **** aggregate info **** - if (data_to_use == "aggregate") { - vmsg("Loading cell metadata...", .v = verbose) - cell_meta <- arrow::read_parquet( - file = path_list$cell_meta_path[[1]], - as_data_frame = FALSE - ) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - - # NOTE: no parquet for agg_expr. - vmsg("Loading aggregated expression...", .v = verbose) - if (isTRUE(h5_expression)) { - agg_expr <- get10Xmatrix_h5( - path_to_data = path_list$agg_expr_path, - gene_ids = h5_gene_ids, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } else { - agg_expr <- get10Xmatrix( - path_to_data = path_list$agg_expr_path, - gene_column_index = gene_column_index, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } - } - - data_list <- list( - "feat_meta" = feat_meta, - "tx_dt" = tx_dt, - "bound_dt_list" = bound_dt_list, - "cell_meta" = cell_meta, - "agg_expr" = agg_expr - ) - - return(data_list) -} - - - -.load_xenium_panel_json <- function(path, gene_ids = "symbols") { - gene_ids <- match.arg(gene_ids, c("symbols", "ensembl")) - - # tested on v1.6 - j <- jsonlite::fromJSON(path) - # j$metadata # dataset meta - # j$payload # main content - # j$payload$chemistry # panel chemistry used - # j$payload$customer # panel customer - # j$payload$designer # panel designer - # j$payload$spec_version # versioning - # j$payload$panel # dataset panel stats - - panel_info <- j$payload$targets$type %>% - data.table::as.data.table() - - switch(gene_ids, - "symbols" = data.table::setnames( - panel_info, - old = c("data.id", "data.name", "descriptor"), - new = c("ensembl", "feat_ID", "type") - ), - "ensembl" = data.table::setnames( - panel_info, - old = c("data.id", "data.name", "descriptor"), - new = c("feat_ID", "symbol", "type") - ) - ) - return(panel_info) -} - - - -## ArchR #### - -#' Create an ArchR project and run LSI dimension reduction -#' -#' @param fragmentsPath A character vector containing the paths to the input -#' files to use to generate the ArrowFiles. -#' These files can be in one of the following formats: (i) scATAC tabix files, -#' (ii) fragment files, or (iii) bam files. -#' @param genome A string indicating the default genome to be used for all ArchR -#' functions. Currently supported values include "hg19","hg38","mm9", and "mm10". -#' This value is stored as a global environment variable, not part of the -#' ArchRProject. -#' This can be overwritten on a per-function basis using the given function's -#' geneAnnotationand genomeAnnotation parameter. For something other than one of -#' the currently supported, see createGeneAnnnotation() and -#' createGenomeAnnnotation() -#' @param createArrowFiles_params list of parameters passed to -#' `ArchR::createArrowFiles` -#' @param ArchRProject_params list of parameters passed to `ArchR::ArchRProject` -#' @param addIterativeLSI_params list of parameters passed to -#' `ArchR::addIterativeLSI` -#' @param threads number of threads to use. Default = `ArchR::getArchRThreads()` -#' @param force Default = FALSE -#' @param verbose Default = TRUE -#' -#' @returns An ArchR project with GeneScoreMatrix, TileMatrix, and -#' TileMatrix-based LSI -#' @export -createArchRProj <- function(fragmentsPath, - genome = c("hg19", "hg38", "mm9", "mm10"), - createArrowFiles_params = list( - sampleNames = "sample1", - minTSS = 0, - minFrags = 0, - maxFrags = 1e+07, - minFragSize = 10, - maxFragSize = 2000, - offsetPlus = 0, - offsetMinus = 0, - TileMatParams = list(tileSize = 5000) - ), - ArchRProject_params = list( - outputDirectory = getwd(), - copyArrows = FALSE - ), - addIterativeLSI_params = list(), - threads = ArchR::getArchRThreads(), - force = FALSE, - verbose = TRUE) { - if (!requireNamespace("ArchR")) { - message('ArchR is needed. Install the package using - remotes::install_github("GreenleafLab/ArchR")') - } - - ## Add reference genome - message("Loading reference genome") - ArchR::addArchRGenome(genome) - - # Creating Arrow Files - message("Creating Arrow files") - ArrowFiles <- do.call( - ArchR::createArrowFiles, - c( - inputFiles = fragmentsPath, - verbose = verbose, - force = force, - createArrowFiles_params - ) - ) - - # Creating an ArchRProject - message("Creating ArchRProject") - proj <- do.call( - ArchR::ArchRProject, - c(list(ArrowFiles = ArrowFiles), - threads = threads, - ArchRProject_params - ) - ) - - # Data normalization and dimensionality reduction - message("Running dimension reduction") - proj <- do.call( - ArchR::addIterativeLSI, - c( - ArchRProj = proj, - verbose = verbose, - name = "IterativeLSI", - threads = threads, - force = force, - addIterativeLSI_params - ) - ) -} - -#' Create a Giotto object from an ArchR project -#' -#' @param archRproj ArchR project -#' @param expression expression information -#' @param expression_feat Giotto object available features (e.g. atac, rna, ...) -#' @param spatial_locs data.table or data.frame with coordinates for cell -#' centroids -#' @param sampleNames A character vector containing the ArchR project sample -#' name -#' @param ... additional arguments passed to `createGiottoObject` -#' -#' @returns A Giotto object with at least an atac or epigenetic modality -#' -#' @export -createGiottoObjectfromArchR <- function(archRproj, - expression = NULL, - expression_feat = "atac", - spatial_locs = NULL, - sampleNames = "sample1", - ...) { - # extract GeneScoreMatrix - GeneScoreMatrix_summarizedExperiment <- ArchR::getMatrixFromProject( - archRproj) - GeneScoreMatrix <- slot(slot( - GeneScoreMatrix_summarizedExperiment, "assays"), - "data")[["GeneScoreMatrix"]] - - ## get cell names - cell_names <- colnames(GeneScoreMatrix) - cell_names <- gsub(paste0(sampleNames, "#"), "", cell_names) - cell_names <- gsub("-1", "", cell_names) - - ## get gene names - gene_names <- slot(GeneScoreMatrix_summarizedExperiment, - "elementMetadata")[["name"]] - - ## replace colnames with cell names - colnames(GeneScoreMatrix) <- cell_names - - ## replace rownames with gene names - rownames(GeneScoreMatrix) <- gene_names - - - if (!is.null(expression)) { - expression_matrix <- data.table::fread(expression) - - expression_cell_names <- colnames(expression_matrix) - cell_names <- intersect(cell_names, expression_cell_names) - - expression_matrix <- Matrix::Matrix(as.matrix(expression_matrix[, -1]), - dimnames = list( - expression_matrix[[1]], - colnames(expression_matrix[, -1]) - ), - sparse = TRUE - ) - - expression <- expression_matrix[, cell_names] - - GeneScoreMatrix <- GeneScoreMatrix[, cell_names] - } - - - ## filter spatial locations - if (!is.null(spatial_locs)) { - x <- read.csv(spatial_locs) - x <- x[x$cell_ID %in% cell_names, ] - spatial_locs <- x - } - - # Creating GiottoObject - message("Creating GiottoObject") - - if (!is.null(expression)) { - gobject <- createGiottoObject( - expression = list( - GeneScoreMatrix = GeneScoreMatrix, - raw = expression - ), - expression_feat = expression_feat, - spatial_locs = spatial_locs, - ... - ) - } else { - gobject <- createGiottoObject( - expression = list(GeneScoreMatrix = GeneScoreMatrix), - expression_feat = expression_feat, - spatial_locs = spatial_locs, - ... - ) - } - - # add LSI dimension reduction - coordinates <- slot(archRproj, "reducedDims")[["IterativeLSI"]][["matSVD"]] - - ## clean cell names - lsi_cell_names <- rownames(coordinates) - lsi_cell_names <- gsub(paste0(sampleNames, "#"), "", lsi_cell_names) - lsi_cell_names <- gsub("-1", "", lsi_cell_names) - - rownames(coordinates) <- lsi_cell_names - - coordinates <- coordinates[cell_names, ] - - dimension_reduction <- Giotto::createDimObj( - coordinates = coordinates, - name = "lsi", - spat_unit = "cell", - feat_type = expression_feat[1], - method = "lsi" - ) - gobject <- setDimReduction(gobject, - dimension_reduction, - spat_unit = "cell", - feat_type = expression_feat[1], - name = "lsi", - reduction_method = "lsi" - ) - - return(gobject) -} diff --git a/R/convenience_cosmx.R b/R/convenience_cosmx.R new file mode 100644 index 000000000..a1037d188 --- /dev/null +++ b/R/convenience_cosmx.R @@ -0,0 +1,1768 @@ + + +# CLASS #### + + +setClass( + "CosmxReader", + slots = list( + cosmx_dir = "character", + slide = "numeric", + fovs = "numeric", + micron = "logical", + px2mm = "numeric", + offsets = "ANY", + calls = "list" + ), + prototype = list( + slide = 1, + micron = FALSE, + px2mm = 0.12028, # from cosmx output help files + offsets = NULL, + calls = list() + ) +) + +# * show #### +setMethod("show", signature("CosmxReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "CosmxReader")) + print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@cosmx_dir + if (length(d) > 0L) { + nch <- nchar(d) + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # slide + slide <- object@slide + cat(pre["slide"], slide, "\n") + + # fovs + fovs <- object@fovs %none% "all" + cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") + + # micron scaling + micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) + cat(pre["micron"], micron, "\n") + + # offsets + offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") + cat(pre["offsets"], offs_status, "\n") + + # funs + .reader_fun_prints(x = object, pre = pre["funs"]) +}) + +# * print #### +setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) + +# * plot #### +setMethod( + "plot", signature(x = "CosmxReader", y = "missing"), + function(x, cex = 0.8, ...) { + a <- list(...) + dat <- x@offsets + + if (is.null(dat)) { # don't run if no offsets + cat("no offsets to plot\n") + return(invisible(NULL)) + } + + plot(y ~ x, data = dat, asp = 1L, type = "n", ...) + text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) + }) + + + + +#' @title Import a Nanostring CosMx Assay +#' @name importCosMx +#' @description +#' Giotto import functionalities for CosMx datasets. This function generates +#' a `CosmxReader` instance that has convenient reader functions for converting +#' individual pieces of CosMx data into Giotto-compatible representations when +#' the params `cosmx_dir` and `fovs` (if only a subset is desired) are provided. +#' A function that creates the full `giotto` object is also available. +#' These functions should have all param values provided as defaults, but +#' can be flexibly modified to do things such as look in alternative +#' directories or paths. +#' @param cosmx_dir CosMx output directory +#' @param slide numeric. Slide number. Defaults to 1 +#' @param fovs numeric. (optional) If provided, will load specific fovs. +#' Otherwise, all FOVs will be loaded +#' @param micron logical. Whether to scale spatial information as micron +#' instead of the default pixels +#' @param px2mm numeric. Scalefactor from pixels to mm. Defaults to 0.12028 +#' based on `CosMx-ReadMe.html` info +#' @details +#' Loading functions are generated after the `cosmx_dir` is added. +#' Transcripts, expression, and metadata loading are all expected to be done +#' from the top level of the directory. Loading of polys, and any image sets +#' are expected to be from specific subdirectories containing only those +#' images for the set of FOVs. +#' @returns CosmxReader object +#' @examples +#' # Create a `CosmxReader` object +#' reader <- importCosMx() +#' +#' \dontrun{ +#' # Set the cosmx_dir and fov parameters +#' reader$cosmx_dir <- "path to cosmx dir" +#' reader$fov <- c(1, 4) +#' +#' plot(reader) # displays FOVs (top left corner) in px scale. +#' +#' # Load polygons, transcripts, and images +#' polys <- reader$load_polys() +#' tx <- reader$load_transcripts() +#' imgs <- reader$load_images() +#' +#' # Create a `giotto` object and add the loaded data +#' g <- giotto() +#' g <- setGiotto(g, tx[["rna"]]) +#' g <- setGiotto(g, polys) +#' g <- addGiottoLargeImage(g, largeImages = imgs) +#' force(g) +#' } +#' @export +importCosMx <- function( + cosmx_dir = NULL, slide = 1, fovs = NULL, micron = FALSE, px2mm = 0.12028 +) { + # get params + a <- list(Class = "CosmxReader") + if (!is.null(cosmx_dir)) { + a$cosmx_dir <- cosmx_dir + } + if (!is.null(fovs)) { + a$fovs <- fovs + } + a$slide <- slide + a$micron <- micron + a$px2mm <- px2mm + + do.call(new, args = a) +} + +# * init #### +setMethod("initialize", signature("CosmxReader"), function( + .Object, cosmx_dir, slide, fovs, micron, px2mm +) { + # provided params (if any) + if (!missing(cosmx_dir)) { + checkmate::assert_directory_exists(cosmx_dir) + .Object@cosmx_dir <- cosmx_dir + } + if (!missing(slide)) { + .Object@slide <- slide + } + if (!missing(fovs)) { + .Object@fovs <- fovs + } + if (!missing(micron)) { + .Object@micron <- micron + } + if (!missing(px2mm)) { + .Object@px2mm <- px2mm + } + + # NULL case + if (length(.Object@cosmx_dir) == 0) { + return(.Object) # return early if no path given + } + + + # detect paths and subdirs + p <- .Object@cosmx_dir + .cosmx_detect <- function(pattern) { + .detect_in_dir(pattern = pattern, path = p, platform = "CosMx") + } + + shifts_path <- .cosmx_detect("fov_positions_file") + meta_path <- .cosmx_detect("metadata_file") + tx_path <- .cosmx_detect("tx_file") + mask_dir <- .cosmx_detect("CellLabels") + expr_path <- .cosmx_detect("exprMat_file") + composite_img_dir <- .cosmx_detect("CellComposite") + overlay_img_dir <- .cosmx_detect("CellOverlay") + compart_img_dir <- .cosmx_detect("CompartmentLabels") + + + # load fov offsets through one of several methods + if (is.null(.Object@offsets)) { # only run if not already existing + pos <- NULL + + if (!is.null(shifts_path)) { + fov_shifts <- data.table::fread(shifts_path) + if (!"X_mm" %in% colnames(fov_shifts)) { + # older version has fov, x, y (all numeric) in px shifts + data.table::setnames(fov_shifts, new = c("fov", "x", "y")) + pos <- fov_shifts + } + } + + # proceed with other possible methods of inferring shifts if present + if (!is.null(meta_path) && is.null(pos)) { + pos <- .cosmx_infer_fov_shifts( + meta_dt = data.table::fread(meta_path), + flip_loc_y = TRUE + ) + } else if (!is.null(tx_path) && is.null(pos)) { + warning(wrap_txt( + "metadata_file not found: + Detecting fov shifts from tx_file. (This is slower)" + ), call. = FALSE) + pos <- .cosmx_infer_fov_shifts( + tx_dt = data.table::fread(tx_path), + flip_loc_y = TRUE + ) + } + else { + pos <- data.table::data.table() + warning(wrap_txt( + "NO FOV SHIFTS. + fov_positions_file, tx_file, and metadata_file not auto detected. + One of these must be provided to infer FOV shifts.\n + Alternatively, directly supply a data.table with: + fov(int), x(numeric), y(numeric) in px scaling to `$offsets`" + ), call. = FALSE) + } + + .Object@offsets <- pos + } + + + + # transcripts load call + tx_fun <- function( + path = tx_path, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" + ), + verbose = NULL + ) { + .cosmx_transcript( + path = path, + fovs = .Object@fovs %none% NULL, + feat_type = feat_type, + split_keyword = split_keyword, + dropcols = dropcols, + micron = .Object@micron, + px2mm = .Object@px2mm, + cores = determine_cores(), + verbose = verbose + ) + } + .Object@calls$load_transcripts <- tx_fun + + + + # mask load call + mask_fun <- function( + path = mask_dir, + # VERTICAL FLIP + NO VERTICAL SHIFT + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, + verbose = NULL + ) { + .cosmx_poly( + path = path, + fovs = .Object@fovs %none% NULL, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + shift_vertical_step = shift_vertical_step, + shift_horizontal_step = shift_horizontal_step, + remove_background_polygon = remove_background_polygon, + micron = .Object@micron, + px2mm = .Object@px2mm, + offsets = .Object@offsets, + verbose = verbose + ) + } + .Object@calls$load_polys <- mask_fun + + + # expression load call + expr_fun <- function( + path = expr_path, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb") + ) { + .cosmx_expression( + path = path, + fovs = .Object@fovs %none% NULL, + feat_type = feat_type, + split_keyword = split_keyword + ) + } + .Object@calls$load_expression <- expr_fun + + + # images load call + img_fun <- function( + path = composite_img_dir, + img_type = "composite", + img_name_fmt = paste0(img_type, "_fov%03d"), + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL + ) { + .cosmx_image( + path = path, + fovs = .Object@fovs %none% NULL, + img_type = img_type, + img_name_fmt = img_name_fmt, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + micron = .Object@micron, + px2mm = .Object@px2mm, + offsets = .Object@offsets, + verbose = verbose + ) + } + .Object@calls$load_images <- img_fun + + + # meta load call + meta_fun <- function( + path = meta_path, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px", + "cell_id" + ), + verbose = NULL + ) { + .cosmx_cellmeta( + path = path, + fovs = .Object@fovs %none% NULL, + dropcols = dropcols, + cores = determine_cores(), + verbose = verbose + ) + } + .Object@calls$load_cellmeta <- meta_fun + + + # build gobject call + gobject_fun <- function( + transcript_path = tx_path, + cell_labels_dir = mask_dir, + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c("rna", "negprobes"), + split_keyword = list( + "NegPrb" + ), + load_images = list( + composite = "composite", + overlay = "overlay" + ), + load_expression = FALSE, + load_cellmeta = FALSE, + instructions = NULL + ) { + load_expression <- as.logical(load_expression) + load_cellmeta <- as.logical(load_cellmeta) + + if (!is.null(load_images)) { + checkmate::assert_list(load_images) + if (is.null(names(load_images))) { + stop("Images directories provided to 'load_images' must be named") + } + } + + funs <- .Object@calls + + # init gobject + g <- giotto() + if (!is.null(instructions)) { + instructions(g) <- instructions + } + + # transcripts + tx_list <- funs$load_transcripts( + path = transcript_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + + # polys + polys <- funs$load_polys( + path = cell_labels_dir, + verbose = FALSE + ) + g <- setGiotto(g, polys) + + # images + if (!is.null(load_images)) { + # replace convenient shortnames + load_images[load_images == "composite"] <- composite_img_dir + load_images[load_images == "overlay"] <- overlay_img_dir + + imglist <- list() + dirnames <- names(load_images) + for (imdir_i in seq_along(load_images)) { + dir_imgs <- funs$load_images( + path = load_images[[imdir_i]], + img_type = dirnames[[imdir_i]], + ) + imglist <- c(imglist, dir_imgs) + } + g <- addGiottoLargeImage(g, largeImages = imglist) + } + + # expression & meta + # Need to check that names agree for poly/expr/meta + allowed_ids <- spatIDs(polys) + + if (load_expression) { + exlist <- funs$load_expression( + path = expression_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + + # only keep allowed cells and set into gobject + for (ex in exlist) { + bool <- colnames(ex[]) %in% allowed_ids + ex[] <- ex[][, bool] + g <- setGiotto(g, ex) + } + } + + if (load_cellmeta) { + cx <- funs$load_cellmeta( + path = metadata_path + ) + + cx[] <- cx[][cell_ID %in% allowed_ids,] + g <- setGiotto(g, cx) + } + + return(g) + } + .Object@calls$create_gobject <- gobject_fun + + return(.Object) +}) + + + + + +# * access #### + +#' @export +setMethod("$", signature("CosmxReader"), function(x, name) { + basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("CosmxReader"), function(x, name, value) { + basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + if (name == "offsets") { + methods::slot(x, name) <- data.table::setDT(value) + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.CosmxReader` <- function(x, pattern) { + dn <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) +} + + + + + +# MODULAR #### + +.cosmx_transcript <- function( + path, + fovs = NULL, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" + ), + micron = FALSE, + px2mm = 0.12028, + cores = determine_cores(), + verbose = NULL +) { + + if (missing(path)) { + stop(wrap_txt( + "No path to tx file provided or auto-detected" + ), call. = FALSE) + } + + checkmate::assert_file_exists(path) + + vmsg(.v = verbose, "loading feature detections...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + tx <- data.table::fread(input = path, nThread = cores, drop = dropcols) + if (!is.null(fovs)) { + # subset to only needed FOVs + tx <- tx[fov %in% as.numeric(fovs),] + } + + # micron scaling if desired + if (micron) { + px2micron <- px2mm / 1000 + tx[, x_global_px := x_global_px * px2micron] + tx[, y_global_px := y_global_px * px2micron] + } + + # giottoPoints ----------------------------------------------------- # + + # static gpoints params + gpoints_params <- list() + gpoints_params$feat_type <- feat_type + gpoints_params$split_keyword <- split_keyword + gpoints_params$x_colname <- "x_global_px" + gpoints_params$y_colname <- "y_global_px" + gpoints_params$feat_ID_colname <- "target" + + gpoints <- do.call(createGiottoPoints, c(list(x = tx), gpoints_params)) + # ensure output is always a list + if (!is.list(gpoints)) { + gpoints <- list(gpoints) + names(gpoints) <- objName(gpoints[[1L]]) + } + + return(gpoints) +} + +#' @name .cosmx_infer_fov_shifts +#' @title Infer CosMx local to global shifts +#' @description +#' From NanoString CosMx spatial info, infer the FOV shifts needed. These +#' values are needed for anything that requires the use of images, since those +#' do not come with spatial extent information embedded. +#' @param tx_dt transcript data.table input to use +#' (Only one of tx_dt or meta_dt should be used) +#' @param meta_dt cell metadata data.table input to use +#' (Only one of tx_dt or meta_dt should be used) +#' @param navg max n values to check per FOV to find average shift +#' @param flip_loc_y whether a y flip needs to be performed on the local y +#' values before comparing with global y values. See details +#' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), +#' yshift (numeric). Values should always be in pixels +#' @details +#' Shifts are found by looking at the average of differences between xy global +#' and local coordinates in either the metadata or transcripts file. The number +#' of shift value to average across is determined with `navg`. The average is +#' in place to get rid of small differences in shifts, likely due to rounding +#' errors. Across the different versions of the CosMx exports, whether the +#' local y values are flipped compared to the global values has differed, so +#' there is also a step that checks the variance of y values per sampled set +#' per fov. In cases where the shift is calculated with the correct (inverted +#' or non-inverted) y local values, the variance is expected to be very low. +#' When the variance is higher than 0.001, the function is re-run with the +#' opposite `flip_loc_y` value. +#' @keywords internal +.cosmx_infer_fov_shifts <- function( + tx_dt, meta_dt, flip_loc_y = TRUE, navg = 100L +) { + fov <- NULL # NSE vars + if (!missing(tx_dt)) { + tx_head <- tx_dt[, head(.SD, navg), by = fov] + x <- tx_head[, mean(x_global_px - x_local_px), by = fov] + if (flip_loc_y) { + + # test if flip is needed + # Usual yshift variance / fov expected when correct is 0 to 1e-22 + # if var is too high for any fov, swap `flip_loc_y` value + y <- tx_head[, var(y_global_px + y_local_px), by = fov] + if (y[, any(V1 > 0.001)]) { + return(.cosmx_infer_fov_shifts( + tx_dt = tx_dt, flip_loc_y = FALSE, navg = navg + )) + } + + # use +y if local y values are flipped + y <- tx_head[, mean(y_global_px + y_local_px), by = fov] + } else { + y <- tx_head[, mean(y_global_px - y_local_px), by = fov] + } + } else if (!missing(meta_dt)) { + meta_head <- meta_dt[, head(.SD, navg), by = fov] + x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] + if (flip_loc_y) { + + # test if flip is needed + # Usual yshift variance / fov expected when correct is 0 to 1e-22 + # if var is too high for any fov, swap `flip_loc_y` value + y <- meta_head[, var(CenterY_global_px + CenterY_local_px), by = fov] + if (y[, any(V1 > 0.001)]) { + return(.cosmx_infer_fov_shifts( + meta_dt = meta_dt, flip_loc_y = FALSE, navg = navg + )) + } + + # use +y if local y values are flipped + y <- meta_head[, mean(CenterY_global_px + CenterY_local_px), + by = fov] + } else { + y <- meta_head[, mean(CenterY_global_px - CenterY_local_px), + by = fov] + } + } else { + stop("One of tx_dt or meta_dt must be provided\n") + } + + res <- merge(x, y, by = "fov") + data.table::setnames(res, new = c("fov", "x", "y")) + + return(res) +} + +.cosmx_imgname_fovparser <- function( + path +) { + im_names <- list.files(path) + fovs <- as.numeric(sub(".*F(\\d+)\\..*", "\\1", im_names)) + if (any(is.na(fovs))) { + warning(wrap_txt( + "Images to load should be sets of images/fov in subdirectories. + No other files should be present." + )) + } + return(fovs) +} + +.cosmx_poly <- function( + path, + slide = 1, + fovs = NULL, + name = "cell", + # VERTICAL FLIP + NO SHIFTS + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, + micron = FALSE, + px2mm = 0.12028, + offsets, + verbose = NULL +) { + # NSE params + f <- x <- y <- NULL + + if (missing(path)) { + stop(wrap_txt( + "No path to polys subdirectory provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading segmentation masks...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + mask_params <- list( + # static params + mask_method = "multiple", + # A background poly for nanostring masks sometimes shows up. + # removal works by looking for any polys with size more than 90% of the + # total FOV along either x or y axis + remove_background_polygon = remove_background_polygon, + fill_holes = TRUE, + calc_centroids = TRUE, + remove_unvalid_polygons = TRUE, + # input params + name = name, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + shift_vertical_step = shift_vertical_step, + shift_horizontal_step = shift_horizontal_step, + verbose = FALSE + ) + + fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL + progressr::with_progress({ + p <- progressr::progressor(along = fovs) + + gpolys <- lapply(fovs, function(f) { + segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID + mask_params$ID_fmt = paste0( + sprintf("c_%d_%d_", slide, f), "%d" + ) + + gpoly <- do.call( + createGiottoPolygonsFromMask, + args = c(list(maskfile = segfile), mask_params) + ) + + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + # if micron scale + if (micron) { + px2micron <- px2mm / 1000 + gpoly <- rescale( + gpoly, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + ) + xshift <- xshift * px2micron + yshift <- yshift * px2micron + } + + gpoly <- spatShift(x = gpoly, dx = xshift, dy = yshift) + p(message = sprintf("F%03d", f)) + return(gpoly) + }) + }) + + if (length(gpolys) > 1L) { + gpolys <- do.call(rbind, args = gpolys) + } + + # never return lists. Only the single merged gpoly + return(gpolys) +} + +.cosmx_cellmeta <- function( + path, + slide = 1, + fovs = NULL, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px", + "cell_id" + ), + cores = determine_cores(), + verbose = NULL +) { + + if (missing(path)) { + stop(wrap_txt( + "No path to metadata file provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading cell metadata...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + verbose <- verbose %null% TRUE + + meta_dt <- data.table::fread(input = path, nThread = cores) + + # remove unneeded cols + dropcols <- dropcols[dropcols %in% colnames(meta_dt)] + meta_dt[, (dropcols) := NULL] # remove dropcols + + # subset to needed fovs + if (!is.null(fovs)) { + fovs <- as.integer(fovs) + meta_dt <- meta_dt[fov %in% fovs,] + } + + # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` + if ("cell" %in% colnames(meta_dt)) { + # assume already formatted (current datasets Mar-27-2024) + meta_dt[, c("fov", "cell_ID") := NULL] + data.table::setnames(meta_dt, old = "cell", "cell_ID") + } else { + # older datasets + meta_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] + # remove fov + meta_dt[, fov := NULL] + } + + + # TODO figure out what to do about protein expression here. + cx <- createCellMetaObj( + metadata = meta_dt, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + return(cx) +} + +.cosmx_expression <- function( + path, + slide = 1, + fovs = NULL, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + cores = determine_cores(), + verbose = NULL +) { + + if (missing(path)) { + stop(wrap_txt( + "No path to exprMat file provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading expression matrix...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + expr_dt <- data.table::fread(input = path, nThread = cores) + + # subset to needed fovs + if (!is.null(fovs)) { + fovs <- as.integer(fovs) + expr_dt <- expr_dt[fov %in% fovs,] + } + + # remove background values (cell 0) + expr_dt <- expr_dt[cell_ID != 0L,] + + # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` + expr_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] + # remove fov + expr_dt[, fov := NULL] + + # convert to Matrix + expr_mat <- dt_to_matrix(expr_dt) + expr_mat <- t_flex(expr_mat) + + # split expression for rna / negprb if any split keywords provided. + # Output of this chunk should always be a named list of 1 or more matrices + if (length(split_keyword) > 0) { + expr_list <- vector(mode = "list", length = length(feat_type)) + names(expr_list) <- feat_type + # iterate through other expr types + for (key_i in seq_along(split_keyword)) { + feat_ids <- rownames(expr_mat) + bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) + # subset and store split matrix + sub_mat <- expr_mat[bool,] + expr_list[[key_i + 1L]] <- sub_mat + # remaining matrix + expr_mat <- expr_mat[!bool,] + } + # assign the main expr + expr_list[[1L]] <- expr_mat + } else { + expr_list <- list(expr_mat) + names(expr_list) <- feat_type[[1L]] + } + + expr_list <- lapply(seq_along(expr_list), function(expr_i) { + createExprObj(expression_data = expr_list[[expr_i]], + spat_unit = "cell", + feat_type = names(expr_list)[[expr_i]], + name = "raw", + provenance = "cell") + }) + + return(expr_list) +} + +.cosmx_image <- function( + path, + fovs = NULL, + img_type = "composite", + img_name_fmt = paste(img_type, "_fov%03d"), + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + micron = FALSE, + px2mm = 0.12028, + offsets, + verbose = NULL +) { + + if (missing(path)) { + stop(wrap_txt( + "No path to image subdirectory to load provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, sprintf("loading %s images...", img_type)) + vmsg(.v = verbose, .is_debug = TRUE, path) + + fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL + verbose <- verbose %null% TRUE + + progressr::with_progress({ + p <- progressr::progressor(along = fovs) + + gimg_list <- lapply(fovs, function(f) { + imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + img_name <- sprintf(img_name_fmt, f) + + gimg <- createGiottoLargeImage( + raster_object = imgfile, + name = img_name, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + verbose = verbose + ) + + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + if (micron) { + px2micron <- px2mm / 1000 + gimg <- rescale( + gimg, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + ) + xshift <- xshift * px2micron + yshift <- yshift * px2micron + } + + gimg <- spatShift(x = gimg, dx = xshift, dy = yshift) + p(message = sprintf("F%03d", f)) + return(gimg) + }) + }) + + + return(gimg_list) +} + + + +#' @title Load CosMx folder subcellular info +#' @name .load_cosmx_folder_subcellular +#' @description loads in the feature detections information. Note that the mask +#' images are still required for a working subcellular object, and those are +#' loaded in \code{\link{.createGiottoCosMxObject_subcellular}} +#' @inheritParams createGiottoCosMxObject +#' @returns list +#' @keywords internal +.load_cosmx_folder_subcellular <- function(dir_items, + FOVs = NULL, + cores, + verbose = TRUE) { + vmsg(.v = verbose, "Loading subcellular information...") + + # subcellular checks + if (!file.exists(dir_items$`transcript locations file`)) { + stop(wrap_txt("No transcript locations file (.csv) detected")) + } + if (!file.exists(dir_items$`fov positions file`)) { + stop(wrap_txt("No fov positions file (.csv) detected")) + } + + # FOVs to load + vmsg(.v = verbose, "Loading FOV offsets...") + fov_offset_file <- fread( + input = dir_items$`fov positions file`, nThread = cores) + if (is.null(FOVs)) FOVs <- fov_offset_file$fov # default to ALL FOVs + FOV_ID <- as.list(sprintf("%03d", FOVs)) + + # TODO Load only relevant portions of file? + + vmsg(.v = verbose, "Loading transcript level info...") + tx_coord_all <- fread( + input = dir_items$`transcript locations file`, nThread = cores) + vmsg(.v = verbose, "Subcellular load done") + + data_list <- list( + "FOV_ID" = FOV_ID, + "fov_offset_file" = fov_offset_file, + "tx_coord_all" = tx_coord_all + ) + + return(data_list) +} + + + +#' @title Load CosMx folder aggregate info +#' @name .load_cosmx_folder_aggregate +#' @inheritParams createGiottoCosMxObject +#' @returns list +#' @keywords internal +.load_cosmx_folder_aggregate <- function(dir_items, + cores, + verbose = TRUE) { + # data.table vars + fov <- cell_ID <- fov_cell_ID <- CenterX_global_px <- + CenterY_global_px <- CenterX_local_px <- + CenterY_local_px <- x_shift <- y_shift <- NULL + + # load aggregate information + vmsg(.v = verbose, "Loading provided aggregated information...") + + # aggregate checks + if (!file.exists(dir_items$`expression matrix file`)) + stop(wrap_txt("No expression matrix file (.csv) detected")) + if (!file.exists(dir_items$`metadata file`)) + stop(wrap_txt("No metadata file (.csv) detected. Needed for cell + spatial locations.")) + + # read in aggregate data + expr_mat <- fread( + input = dir_items$`expression matrix file`, nThread = cores) + metadata <- fread(input = dir_items$`metadata file`, nThread = cores) + + # setorder expression and spatlocs + data.table::setorder(metadata, fov, cell_ID) + data.table::setorder(expr_mat, fov, cell_ID) + + + # generate unique cell IDs + expr_mat[, cell_ID := paste0( + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + expr_mat <- expr_mat[, fov := NULL] + + metadata[, fov_cell_ID := cell_ID] + metadata[, cell_ID := paste0( + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + # reorder + data.table::setcolorder(x = metadata, c("cell_ID", "fov", "fov_cell_ID")) + + + # extract spatial locations + spatlocs <- metadata[, .(CenterX_global_px, CenterY_global_px, cell_ID)] + spatlocs_fov <- metadata[, .(CenterX_local_px, CenterY_local_px, cell_ID)] + # regenerate FOV shifts + metadata[, x_shift := CenterX_global_px - CenterX_local_px] + metadata[, y_shift := CenterY_global_px - CenterY_local_px] + fov_shifts <- metadata[, .(mean(x_shift), mean(y_shift)), fov] + colnames(fov_shifts) <- c("fov", "x_shift", "y_shift") + + + # rename spatloc column names + spatloc_oldnames <- c("CenterX_global_px", "CenterY_global_px", "cell_ID") + spatloc_oldnames_fov <- c("CenterX_local_px", "CenterY_local_px", "cell_ID") + spatloc_newnames <- c("sdimx", "sdimy", "cell_ID") + data.table::setnames(spatlocs, old = spatloc_oldnames, new = spatloc_newnames) + data.table::setnames( + spatlocs_fov, old = spatloc_oldnames_fov, new = spatloc_newnames) + + # cleanup metadata and spatlocs + metadata <- metadata[, c("CenterX_global_px", "CenterY_global_px", + "CenterX_local_px", "CenterY_local_px") := NULL] + # find unique cell_IDs present in both expression and metadata + giotto_cell_ID <- unique(intersect(expr_mat$cell_ID, metadata$cell_ID)) + + # subset to only unique cell_IDs + expr_mat <- expr_mat[cell_ID %in% giotto_cell_ID, ] + metadata <- metadata[cell_ID %in% giotto_cell_ID, ] + + + # convert protein metadata to expr mat + # take all mean intensity protein information except for MembraneStain and DAPI + protein_meta_cols <- colnames(metadata) + protein_meta_cols <- protein_meta_cols[ + grepl(pattern = "Mean.*", x = protein_meta_cols)] + protein_meta_cols <- protein_meta_cols[ + !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI")] + protein_meta_cols <- c("cell_ID", protein_meta_cols) + + prot_expr <- metadata[, protein_meta_cols, with = FALSE] + prot_cell_ID <- metadata[, cell_ID] + protM <- Matrix::Matrix(as.matrix(prot_expr[, -1]), + dimnames = list(prot_expr[[1]], + colnames(prot_expr[, -1])), + sparse = FALSE) + protM <- t_flex(protM) + + # convert expression to sparse matrix + spM <- Matrix::Matrix(as.matrix(expr_mat[, -1]), + dimnames = list(expr_mat[[1]], + colnames(expr_mat[, -1])), + sparse = TRUE) + spM <- t_flex(spM) + + ## Ready for downstream aggregate gobject creation or appending into + # existing subcellular Giotto object ## + + data_list <- list( + "spatlocs" = spatlocs, + "spatlocs_fov" = spatlocs_fov, + "metadata" = metadata, + "protM" = protM, + "spM" = spM, + "fov_shifts" = fov_shifts + ) + + return(data_list) +} + + + + + + + + + +# OLD #### + + +#' @title Create Nanostring CosMx Giotto Object +#' @name createGiottoCosMxObject +#' @description Given the path to a CosMx experiment directory, creates a Giotto +#' object. +#' @param cosmx_dir full path to the exported cosmx directory +#' @param data_to_use which type(s) of expression data to build the gobject with +#' Default is \code{'all'} information available. \code{'subcellular'} loads +#' the transcript coordinates only. \code{'aggregate'} loads the provided +#' aggregated expression matrix. +#' @param FOVs field of views to load (only affects subcellular data and images) +#' @param remove_background_polygon try to remove background polygon +#' (default: FALSE) +#' @param background_algo algorithm to remove background polygon +#' @param remove_unvalid_polygons remove unvalid polygons (default: TRUE) +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns a giotto object +#' @details +#' [\strong{Expected Directory}] This function generates a giotto object when +#' given a link to a cosmx output directory. It expects the following items +#' within the directory where the \strong{bolded} portions are what this +#' function matches against: +#' \itemize{ +#' \item{\strong{CellComposite} (folder of images)} +#' \item{\strong{CellLabels} (folder of images)} +#' \item{\strong{CellOverlay} (folder of images)} +#' \item{\strong{CompartmentLabels} (folder of images)} +#' \item{experimentname_\strong{exprMat_file}.csv (file)} +#' \item{experimentname_\strong{fov_positions_file}.csv (file)} +#' \item{experimentname_\strong{metadata_file}.csv (file)} +#' \item{experimentname_\strong{tx_file}.csv (file)} +#' } +#' +#' [\strong{Workflows}] Workflow to use is accessed through the data_to_use param +#' \itemize{ +#' \item{'all' - loads and requires subcellular information from tx_file and +#' fov_positions_file +#' and also the existing aggregated information +#' (expression, spatial locations, and metadata) +#' from exprMat_file and metadata_file.} +#' \item{'subcellular' - loads and requires subcellular information from +#' tx_file and +#' fov_positions_file only.} +#' \item{'aggregate' - loads and requires the existing aggregate information +#' (expression, spatial locations, and metadata) from exprMat_file and +#' metadata_file.} +#' } +#' +#' [\strong{Images}] Images in the default CellComposite, CellLabels, +#' CompartmentLabels, and CellOverlay +#' folders will be loaded as giotto largeImage objects in all workflows as +#' long as they are available. Additionally, CellComposite images will be +#' converted to giotto image objects, making plotting with +#' these image objects more responsive when accessing them from a server. +#' \code{\link{showGiottoImageNames}} can be used to see the available images. +#' @export +createGiottoCosMxObject <- function(cosmx_dir = NULL, + data_to_use = c("all", "subcellular", "aggregate"), + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + FOVs = NULL, + instructions = NULL, + cores = determine_cores(), + verbose = TRUE) { + # 0. setup + cosmx_dir <- path.expand(cosmx_dir) + + # determine data to use + data_to_use <- match.arg( + arg = data_to_use, choices = c("all", "subcellular", "aggregate")) + if (data_to_use %in% c("all", "aggregate")) { + stop(wrap_txt('Convenience workflows "all" and "aggregate" are not + available yet')) + } + + # Define for data.table + fov <- target <- x_local_px <- y_local_px <- z <- cell_ID <- + CenterX_global_px <- CenterY_global_px <- + CenterX_local_px <- CenterY_local_px <- NULL + + + # 1. test if folder structure exists and is as expected + dir_items <- .read_cosmx_folder( + cosmx_dir = cosmx_dir, + verbose = verbose + ) + + + # 2. load and create giotto object + cosmx_gobject <- switch(data_to_use, + "subcellular" = .createGiottoCosMxObject_subcellular( + dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ), + "aggregate" = .createGiottoCosMxObject_aggregate( + dir_items, + cores = cores, + verbose = verbose, + instructions = instructions + ), + "all" = .createGiottoCosMxObject_all( + dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ) + ) + + + # load in subcellular information, subcellular FOV objects, then join + + + # load in pre-generated aggregated expression matrix + if (data_to_use == "aggregate" | data_to_use == "all") { + + } + + + + message("done") + return(cosmx_gobject) +} + + + +#' @title Load and create a CosMx Giotto object from subcellular info +#' @name .createGiottoCosMxObject_subcellular +#' @inheritParams createGiottoCosMxObject +#' @returns giotto object +#' @keywords internal +.createGiottoCosMxObject_subcellular <- function( + dir_items, + FOVs = NULL, + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL) { + target <- fov <- NULL + + # load tx detections and FOV offsets ------------------------------------- # + data_list <- .load_cosmx_folder_subcellular( + dir_items = dir_items, + FOVs = FOVs, + cores = cores, + verbose = verbose + ) + + # unpack data_list + FOV_ID <- data_list$FOV_ID + fov_offset_file <- data_list$fov_offset_file + tx_coord_all <- data_list$tx_coord_all + + # remove global xy values and cell_ID + tx_coord_all[, c("x_global_px", "y_global_px", "cell_ID") := NULL] + + data.table::setcolorder( + tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov")) + + # feature detection type splitting --------------------------------------- # + + if (isTRUE(verbose)) message("Splitting detections by feature vs neg probe") + all_IDs <- tx_coord_all[, unique(target)] + neg_IDs <- all_IDs[grepl(pattern = "NegPrb", all_IDs)] + feat_IDs <- all_IDs[!all_IDs %in% neg_IDs] + + # split detections DT + feat_coords_all <- tx_coord_all[target %in% feat_IDs] + neg_coords_all <- tx_coord_all[target %in% neg_IDs] + + if (isTRUE(verbose)) { + message(" > Features: ", feat_coords_all[, .N]) + message(" > NegProbes: ", neg_coords_all[, .N]) + } + + # FOV-based processing --------------------------------------------------- # + + fov_gobjects_list <- lapply(FOV_ID, function(x) { + # images --------------------------------------------------- # + # build image paths + if (isTRUE(verbose)) message("Loading image information...") + + composite_dir <- Sys.glob(paths = file.path( + dir_items$`CellComposite folder`, paste0("*", x, "*"))) + cellLabel_dir <- Sys.glob(paths = file.path( + dir_items$`CellLabels folder`, paste0("*", x, "*"))) + compartmentLabel_dir <- Sys.glob(paths = file.path( + dir_items$`CompartmentLabels folder`, paste0("*", x, "*"))) + cellOverlay_dir <- Sys.glob(paths = file.path( + dir_items$`CellOverlay folder`, paste0("*", x, "*"))) + + # Missing warnings + if (length(composite_dir) == 0) { + warning("[ FOV ", x, " ] No composite images found") + composite_dir <- NULL + } + if (length(cellLabel_dir) == 0) { + stop("[ FOV ", x, " ] No cell mask images found") + } # cell masks are necessary + if (length(compartmentLabel_dir) == 0) { + warning("[ FOV ", x, " ] No compartment label images found") + compartmentLabel_dir <- NULL + } + if (length(cellOverlay_dir) == 0) { + warning("[ FOV ", x, " ] No cell polygon overlay images found") + cellOverlay_dir <- NULL + } + + if (isTRUE(verbose)) message("Image load done") + + if (isTRUE(verbose)) wrap_msg("[ FOV ", x, "]") + + + # transcripts ---------------------------------------------- # + # get FOV specific tx locations + if (isTRUE(verbose)) message("Assigning FOV feature detections...") + + + # feature info + coord_oldnames <- c("target", "x_local_px", "y_local_px") + coord_newnames <- c("feat_ID", "x", "y") + + feat_coord <- feat_coords_all[fov == as.numeric(x)] + data.table::setnames( + feat_coord, old = coord_oldnames, new = coord_newnames) + # neg probe info + neg_coord <- neg_coords_all[fov == as.numeric(x)] + data.table::setnames( + neg_coord, old = coord_oldnames, new = coord_newnames) + + + # build giotto object -------------------------------------- # + if (isTRUE(verbose)) message("Building subcellular giotto object...") + fov_subset <- createGiottoObjectSubcellular( + gpoints = list( + "rna" = feat_coord, + "neg_probe" = neg_coord + ), + gpolygons = list("cell" = cellLabel_dir), + polygon_mask_list_params = list( + mask_method = "guess", + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_horizontal_step = FALSE, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons + ), + instructions = instructions, + cores = cores + ) + + + # find centroids as spatial locations ---------------------- # + if (isTRUE(verbose)) + message("Finding polygon centroids as cell spatial locations...") + fov_subset <- addSpatialCentroidLocations( + fov_subset, + poly_info = "cell", + spat_loc_name = "raw" + ) + + + # create and add giotto image objects ---------------------- # + if (isTRUE(verbose)) { + message("Attaching image files...") + print(composite_dir) + print(cellOverlay_dir) + print(compartmentLabel_dir) + } + + gImage_list <- list() + + # load image if files are found + if (!is.null(composite_dir)) { + gImage_list$composite <- createGiottoLargeImage( + raster_object = composite_dir, + negative_y = FALSE, + name = "composite" + ) + } + if (!is.null(cellOverlay_dir)) { + gImage_list$overlay <- createGiottoLargeImage( + raster_object = cellOverlay_dir, + negative_y = FALSE, + name = "overlay" + ) + } + if (!is.null(compartmentLabel_dir)) { + gImage_list$compartment <- createGiottoLargeImage( + raster_object = compartmentLabel_dir, + negative_y = FALSE, + name = "compartment" + ) + } # TODO + + + + if (length(gImage_list) > 0) { + fov_subset <- addGiottoImage( + gobject = fov_subset, + images = gImage_list + ) + + # convert to MG for faster loading (particularly relevant for + # pulling from server) + # TODO remove this + fov_subset <- convertGiottoLargeImageToMG( + giottoLargeImage = gImage_list$composite, + gobject = fov_subset, + return_gobject = TRUE, + verbose = FALSE + ) + } else { + message("No images found for fov") + } + }) # lapply end + + # returning -------------------------------------------------------------- # + + if (length(FOVs) == 1) { + return(fov_gobjects_list[[1]]) + } else { + # join giotto objects according to FOV positions file + if (isTRUE(verbose)) message("Joining FOV gobjects...") + new_gobj_names <- paste0("fov", FOV_ID) + id_match <- match(as.numeric(FOV_ID), fov_offset_file$fov) + x_shifts <- fov_offset_file[id_match]$x_global_px + y_shifts <- fov_offset_file[id_match]$y_global_px + + # Join giotto objects + cosmx_gobject <- joinGiottoObjects( + gobject_list = fov_gobjects_list, + gobject_names = new_gobj_names, + join_method = "shift", + x_shift = x_shifts, + y_shift = y_shifts + ) + return(cosmx_gobject) + } +} + + + +#' @title Load and create a CosMx Giotto object from aggregate info +#' @name .createGiottoCosMxObject_aggregate +#' @inheritParams createGiottoCosMxObject +#' @returns giotto object +#' @keywords internal +.createGiottoCosMxObject_aggregate <- function(dir_items, + cores, + verbose = TRUE, + instructions = NULL) { + data_to_use <- fov <- NULL + + data_list <- .load_cosmx_folder_aggregate( + dir_items = dir_items, + cores = cores, + verbose = verbose + ) + + # unpack data_list + spatlocs <- data_list$spatlocs + spatlocs_fov <- data_list$spatlocs_fov + metadata <- data_list$metadata + protM <- data_list$protM + spM <- data_list$spM + fov_shifts <- data_list$fov_shifts + + + # create standard gobject from aggregate matrix + if (data_to_use == "aggregate") { + # Create aggregate gobject + if (isTRUE(verbose)) message("Building giotto object...") + cosmx_gobject <- createGiottoObject( + expression = list("raw" = spM, "protein" = protM), + cell_metadata = list("cell" = list( + "rna" = metadata, + "protein" = metadata + )), + spatial_locs = spatlocs, + instructions = instructions, + cores = cores + ) + + + # load in images + img_ID <- data.table::data.table( + fov = fov_shifts[, fov], + img_name = paste0("fov", + sprintf("%03d", fov_shifts[, fov]), "-image") + ) + + if (isTRUE(verbose)) message("Attaching image files...") + composite_dir <- Sys.glob(paths = file.path( + dir_items$`CellComposite folder`, paste0("/*"))) + cellLabel_dir <- Sys.glob(paths = file.path( + dir_items$`CellLabels folder`, paste0("/*"))) + compartmentLabel_dir <- Sys.glob(paths = file.path( + dir_items$`CompartmentLabels folder`, paste0("/*"))) + overlay_dir <- Sys.glob(paths = file.path( + dir_items$`CellOverlay folder`, paste0("/*"))) + + if (length(cellLabel_imgList) > 0) { + cellLabel_imgList <- lapply(cellLabel_dir, function(x) { + createGiottoLargeImage(x, name = "cellLabel", negative_y = TRUE) + }) + } + if (length(composite_imgList) > 0) { + composite_imgList <- lapply(composite_dir, function(x) { + createGiottoLargeImage(x, name = "composite", negative_y = TRUE) + }) + } + if (length(compartmentLabel_dir) > 0) { + compartmentLabel_imgList <- lapply( + compartmentLabel_dir, function(x) { + createGiottoLargeImage(x, name = "composite", negative_y = TRUE) + }) + } + if (length(overlay_dir) > 0) { + overlay_imgList <- lapply(overlay_dir, function(x) { + createGiottoLargeImage(x, name = "composite", negative_y = TRUE) + }) + } + } +} + + + + +#' @title Load and create a CosMx Giotto object from subcellular and aggregate +#' info +#' @name .createGiottoCosMxObject_all +#' @param dir_items list of full directory paths from \code{.read_cosmx_folder} +#' @inheritParams createGiottoCosMxObject +#' @returns giotto object +#' @details Both \emph{subcellular} +#' (subellular transcript detection information) and +#' \emph{aggregate} (aggregated detection count matrices by cell polygon from +#' NanoString) +#' data will be loaded in. The two will be separated into 'cell' and 'cell_agg' +#' spatial units in order to denote the difference in origin of the two. +#' @seealso createGiottoCosMxObject .createGiottoCosMxObject_aggregate +#' .createGiottoCosMxObject_subcellular +#' @keywords internal +.createGiottoCosMxObject_all <- function(dir_items, + FOVs, + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL, + ...) { + # 1. create subcellular giotto as spat_unit 'cell' + cosmx_gobject <- .createGiottoCosMxObject_subcellular( + dir_items = dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ) + + # 2. load and append aggregated information in spat_unit 'cell_agg' + agg_data <- .load_cosmx_folder_aggregate( + dir_items = dir_items, + cores = cores, + verbose = verbose + ) + + # unpack data_list + spatlocs <- agg_data$spatlocs + spatlocs_fov <- agg_data$spatlocs_fov + metadata <- agg_data$metadata + protM <- agg_data$protM + spM <- agg_data$spM + + # add in pre-generated aggregated expression matrix information for 'all' + # workflow + + # Add aggregate expression information + if (isTRUE(verbose)) wrap_msg( + 'Appending provided aggregate expression data as... + spat_unit: "cell_agg" + feat_type: "rna" + name: "raw"') + # add expression data to expression slot + s4_expr <- createExprObj( + name = "raw", + expression_data = spM, + spat_unit = "cell_agg", + feat_type = "rna", + provenance = "cell_agg" + ) + + cosmx_gobject <- set_expression_values(cosmx_gobject, values = s4_expr) + + # Add spatial locations + if (isTRUE(verbose)) wrap_msg( + 'Appending metadata provided spatial locations data as... + --> spat_unit: "cell_agg" name: "raw" + --> spat_unit: "cell" name: "raw_fov"') + if (isTRUE(verbose)) wrap_msg( + 'Polygon centroid derived spatial locations assigned as... + --> spat_unit: "cell" name: "raw" (default)') + + locsObj <- create_spat_locs_obj( + name = "raw", + coordinates = spatlocs, + spat_unit = "cell_agg", + provenance = "cell_agg" + ) + locsObj_fov <- create_spat_locs_obj( + name = "raw_fov", + coordinates = spatlocs_fov, + spat_unit = "cell_agg", + provenance = "cell_agg" + ) + + cosmx_gobject <- set_spatial_locations(cosmx_gobject, spatlocs = locsObj) + cosmx_gobject <- set_spatial_locations(cosmx_gobject, + spatlocs = locsObj_fov) + + # initialize cell and feat IDs and metadata slots for 'cell_agg' spat_unit + agg_cell_ID <- colnames(s4_expr[]) + agg_feat_ID <- rownames(s4_expr[]) + + sub_feat_ID <- featIDs(cosmx_gobject, feat_type = "rna") + feat_ID_new <- unique(c(agg_feat_ID, sub_feat_ID)) + + # cell metadata + + # Add metadata to both the given and the poly spat_units + if (isTRUE(verbose)) message("Appending provided cell metadata...") + cosmx_gobject <- addCellMetadata(cosmx_gobject, + spat_unit = "cell", + feat_type = "rna", + new_metadata = metadata, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + cosmx_gobject <- addCellMetadata(cosmx_gobject, + spat_unit = "cell_agg", + feat_type = "rna", + new_metadata = metadata, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + + initialize(cosmx_gobject) +} + + + +#' @title Read a structured CosMx folder +#' @name .read_cosmx_folder +#' @inheritParams createGiottoCosMxObject +#' @seealso createGiottoCosMxObject load_cosmx_folder +#' @returns path_list a list of cosmx files discovered and their filepaths. NULL +#' values denote missing items +#' @keywords internal +.read_cosmx_folder <- function(cosmx_dir, + verbose = TRUE) { + ch <- box_chars() + + if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) + stop("The full path to a cosmx directory must be given.") + vmsg("A structured CosMx directory will be used\n", .v = verbose) + + # find directories (length = 1 if present, length = 0 if missing) + dir_items <- list( + `CellLabels folder` = "*CellLabels", + `CompartmentLabels folder` = "*CompartmentLabels", + `CellComposite folder` = "*CellComposite", + `CellOverlay folder` = "*CellOverlay", + `transcript locations file` = "*tx_file*", + `fov positions file` = "*fov_positions_file*", + `expression matrix file` = "*exprMat_file*", + `metadata file` = "*metadata_file*" + ) + dir_items <- lapply( + dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x))) + dir_items_lengths <- lengths(dir_items) + + if (isTRUE(verbose)) { + message("Checking directory contents...") + for (item in names(dir_items)) { + if (dir_items_lengths[[item]] > 0) { + message(ch$s, "> ", item, " found") + } else { + warning(item, " is missing\n") + } + } + } + + # select first directory in list if multiple are detected + if (any(dir_items_lengths > 1)) { + warning("Multiple matches for expected subdirectory item(s).\n + First matching item selected") + + multiples <- which(dir_items_lengths > 1) + for (mult_i in multiples) { + message(names(dir_items)[[mult_i]], "multiple matches found:") + print(dir_items[[mult_i]]) + dir_items[[mult_i]] <- dir_items[[mult_i]][[1]] + } + } + vmsg("Directory check done", .v = verbose) + + return(dir_items) +} + + + + + diff --git a/R/convenience_general.R b/R/convenience_general.R new file mode 100644 index 000000000..8ae4b661b --- /dev/null +++ b/R/convenience_general.R @@ -0,0 +1,1601 @@ +# Spatial Method-Specific Convenience Functions for Giotto Object Creation # + + + +# Common Utility Functions #### + +#' @title Read a structured folder of exported data +#' @name read_data_folder +#' @description Framework function for reading the exported folder of a spatial +#' method and detecting the presence of needed files. NULL values denote missing +#' items.\cr +#' `.read_data_folder()` should not be called directly. Instead, specific +#' reader functions should be built using it as a base. +#' @param spat_method spatial method for which the data is being read +#' @param data_dir exported data directory to read from +#' @param dir_items named list of directory items to expect and keywords to +#' match +#' @param data_to_use character. Which type(s) of expression data to build the +#' gobject with. Values should match with a *workflow* item in require_data_DT +#' (see details) +#' @param require_data_DT data.table detailing if expected data items are +#' required or optional for each \code{data_to_use} *workflow* +#' @param cores cores to use +#' @param verbose be verbose +#' @param toplevel stackframes back where the user-facing function was called. +#' default is one stackframe above `.read_data_folder`. +#' @returns data.table +#' @details +#' **Steps performed:** +#' \itemize{ +#' \item{1. detection of items within \code{data_dir} by looking for keywords +#' assigned through \code{dir_items}} +#' \item{2. check of detected items to see if everything needed has been found. +#' Dictionary of necessary vs optional items for each \code{data_to_use} +#' *workflow* is provided through \code{require_data_DT}} +#' \item{3. if multiple filepaths are found to be matching then select the +#' first one. This function is only intended to find the first level +#' subdirectories and files.} +#' } +#' +#' **Example reader implementation:** +#' \preformatted{ +#' foo <- function(x_dir, +#' data_to_use, +#' cores = NA, +#' verbose = NULL) { +#' dir_items <- list( +#' data1 = "regex_pattern1", +#' data2 = "regex_pattern2", +#' data3 = "regex_pattern3" +#' ) +#' +#' # DT of info to check directory for. Has 3 cols +#' require_data_DT <- data.table::data.table( +#' workflow = "a", # data_to_use is matched against this +#' item = c( +#' "data1", +#' "data2", +#' "data3" +#' ), +#' needed = c( +#' FALSE, # data1 optional for this workflow (if missing: warn) +#' TRUE, # data2 vital for this workflow (if missing: error) +#' TRUE # data3 vital for this workflow (if missing: error) +#' ) +#' ) +#' +#' .read_data_folder( +#' spat_method = "x_method", +#' data_dir = x_dir, +#' dir_items = dir_items, +#' data_to_use = data_to_use, +#' require_data_DT = require_data_DT, +#' cores = cores, +#' verbose = verbose +#' ) +#' } +#' } +#' +#' @md +NULL + +#' @describeIn read_data_folder Should not be used directly +#' @keywords internal +.read_data_folder <- function(spat_method = NULL, + data_dir = NULL, + dir_items, + data_to_use, + load_format = NULL, + require_data_DT, + cores = NA, + verbose = NULL, + toplevel = 2L) { + ch <- box_chars() + + # 0. check params + if (is.null(data_dir) || + !dir.exists(data_dir)) { + .gstop(.n = toplevel, "The full path to a", spat_method, + "directory must be given.") + } + vmsg(.v = verbose, "A structured", spat_method, "directory will be used") + if (!data_to_use %in% require_data_DT$workflow) { + .gstop(.n = toplevel, + "Data requirements for data_to_use not found in require_data_DT") + } + + # 1. detect items + dir_items <- lapply_flex(dir_items, function(x) { + Sys.glob(paths = file.path(data_dir, x)) + }, cores = cores) + # (length = 1 if present, length = 0 if missing) + dir_items_lengths <- lengths(dir_items) + + # 2. check directory contents + vmsg(.v = verbose, "Checking directory contents...") + + for (item in names(dir_items)) { + # IF ITEM FOUND + + if (dir_items_lengths[[item]] > 0) { + # print found items if verbose = "debug" + if (isTRUE(verbose)) { + vmsg( + .v = verbose, .is_debug = TRUE, + .initial = paste0(ch$s, "> "), + item, " found" + ) + for (item_i in seq_along(dir_items[[item]])) { + # print found item names + subItem <- gsub(pattern = ".*/", replacement = "", + x = dir_items[[item]][[item_i]]) + vmsg( + .v = verbose, .is_debug = TRUE, + .initial = paste0(ch$s, ch$s, ch$l, ch$h, ch$h), + subItem + ) + } + } + } else { + # IF ITEM MISSING + # necessary (error) + # optional (warning) + + # data.table variables + workflow <- needed <- filetype <- NULL + + + require_data_DT <- require_data_DT[workflow == data_to_use, ] + if (!is.null(load_format)) + require_data_DT <- require_data_DT[filetype == load_format, ] + + if (item %in% require_data_DT[needed == TRUE, item]) + stop(item, " is missing") + if (item %in% require_data_DT[needed == FALSE, item]) + warning(item, "is missing (optional)") + } + } + + # 3. select first path in list if multiple are detected + if (any(dir_items_lengths > 1)) { + warning(wrap_txt("Multiple matches for expected directory item(s). + First matching item selected")) + + multiples <- which(dir_items_lengths > 1) + for (mult_i in multiples) { + message(names(dir_items)[[mult_i]], "multiple matches found:") + print(dir_items[[mult_i]]) + dir_items[[mult_i]] <- dir_items[[mult_i]][[1]] + } + } + vmsg(.v = verbose, "Directory check done") + + return(dir_items) +} + + + + + +abbrev_path <- function(path, head = 15, tail = 35L) { + nch <- nchar(path) + if (nch > 60L) { + p1 <- substring(path, first = 0L, last = head) + p2 <- substring(path, first = nch - tail, last = nch) + path <- paste0(p1, "[...]", p2) + } + return(path) +} + +.reader_fun_prints <- function(x, pre) { + nfun <- length(x@calls) + funs <- names(x@calls) + if (nfun > 0L) { + pre_funs <- format(c(pre, rep("", nfun - 1L))) + for (i in seq_len(nfun)) { + cat(pre_funs[i], " ", funs[i], "()\n", sep = "") + } + } +} + +.filetype_prints <- function(x, pre) { + nftype <- length(x@filetype) + datatype <- format(names(x@filetype)) + pre_ftypes <- format(c(pre, rep("", nftype - 1L))) + cat(sprintf("%s %s -- %s\n", + pre_ftypes, + datatype, + x@filetype), + sep = "") +} + +# pattern - list.files pattern to use to search for specific files/dirs +# warn - whether to warn when a pattern does not find any files +# first - whether to only return the first match +.detect_in_dir <- function( + path, pattern, platform, warn = TRUE, first = TRUE +) { + f <- list.files(path, pattern = pattern, full.names = TRUE) + lenf <- length(f) + if (lenf == 1L) return(f) # one match + else if (lenf == 0L) { # no matches + if (warn) { + warning(sprintf( + "%s not detected in %s directory", + pattern, + platform + ), + call. = FALSE) + } + return(NULL) + } + + # more than one match + if (first) { + return(f[[1L]]) + } else { + return(f) + } +} + + + +# *---- object creation ----* #### + + + + + + +## Visium #### + +#' @title Create a giotto object from 10x visium data +#' @name createGiottoVisiumObject +#' @description Create Giotto object directly from a 10X visium folder. Also +#' accepts visium H5 outputs. +#' +#' @param visium_dir path to the 10X visium directory [required] +#' @param expr_data raw or filtered data (see details) +#' @param gene_column_index which column index to select (see details) +#' @param h5_visium_path path to visium 10X .h5 file +#' @param h5_gene_ids gene names as symbols (default) or ensemble gene ids +#' @param h5_tissue_positions_path path to tissue locations (.csv file) +#' @param h5_image_png_path path to tissue .png file (optional). Image +#' autoscaling looks for matches in the filename for either 'hires' or 'lowres' +#' @param h5_json_scalefactors_path path to .json scalefactors (optional) +#' @param png_name select name of png to use (see details) +#' @param do_manual_adj deprecated +#' @param xmax_adj deprecated +#' @param xmin_adj deprecated +#' @param ymax_adj deprecated +#' @param ymin_adj deprecated +#' @param instructions list of instructions or output result from +#' \code{\link[GiottoClass]{createGiottoInstructions}} +#' @param cores how many cores or threads to use to read data if paths are +#' provided +#' @param expression_matrix_class class of expression matrix to use +#' (e.g. 'dgCMatrix', 'DelayedArray') +#' @param h5_file optional path to create an on-disk h5 file +#' @param verbose be verbose +#' +#' @return giotto object +#' @details +#' If starting from a Visium 10X directory: +#' \itemize{ +#' \item{expr_data: raw will take expression data from raw_feature_bc_matrix and filter from filtered_feature_bc_matrix} +#' \item{gene_column_index: which gene identifiers (names) to use if there are multiple columns (e.g. ensemble and gene symbol)} +#' \item{png_name: by default the first png will be selected, provide the png name to override this (e.g. myimage.png)} +#' \item{the file scalefactors_json.json will be detected automatically and used to attempt to align the data} +#' } +#' +#' If starting from a Visium 10X .h5 file +#' \itemize{ +#' \item{h5_visium_path: full path to .h5 file: /your/path/to/visium_file.h5} +#' \item{h5_tissue_positions_path: full path to spatial locations file: /you/path/to/tissue_positions_list.csv} +#' \item{h5_image_png_path: full path to png: /your/path/to/images/tissue_lowres_image.png} +#' \item{h5_json_scalefactors_path: full path to .json file: /your/path/to/scalefactors_json.json} +#' } +#' +#' @export +createGiottoVisiumObject <- function(visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + h5_visium_path = NULL, + h5_gene_ids = c("symbols", "ensembl"), + h5_tissue_positions_path = NULL, + h5_image_png_path = NULL, + h5_json_scalefactors_path = NULL, + png_name = NULL, + do_manual_adj = FALSE, # deprecated + xmax_adj = 0, # deprecated + xmin_adj = 0, # deprecated + ymax_adj = 0, # deprecated + ymin_adj = 0, # deprecated + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + cores = NA, + verbose = NULL) { + # NSE vars + barcode <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL + + # handle deprecations + img_dep_msg <- "The params 'do_manual_adj', 'xmax_adj', 'xmin_adj', + 'ymax_adj', 'ymin_adj' are no longer used. + Please use the automated workflow." + if (!isFALSE(do_manual_adj) || + xmax_adj != 0 || + xmin_adj != 0 || + ymax_adj != 0 || + ymin_adj != 0) { + stop(wrap_txt(img_dep_msg)) + } + + # set number of cores automatically, but with limit of 10 + cores <- determine_cores(cores) + data.table::setDTthreads(threads = cores) + + + # get arguments list for object creation + if (!is.null(h5_visium_path)) { + argslist <- .visium_read_h5( + h5_visium_path = h5_visium_path, # expression matrix file + h5_gene_ids = h5_gene_ids, # symbol or ensembl + h5_tissue_positions_path = h5_tissue_positions_path, + h5_image_png_path = h5_image_png_path, + h5_json_scalefactors_path = h5_json_scalefactors_path, + verbose = verbose + ) + } else { + argslist <- .visium_read_folder( + visium_dir = visium_dir, + expr_data = expr_data, # type of expression matrix to load + gene_column_index = gene_column_index, # symbol or ensembl + png_name = png_name, + verbose = verbose + ) + } + + # additional args to pass to object creation + argslist$verbose <- verbose + argslist$expression_matrix_class <- expression_matrix_class + argslist$h5_file <- h5_file + argslist$instructions <- instructions + + giotto_object <- do.call(.visium_create, args = argslist) + + return(giotto_object) +} + + + + + + + + +.visium_create <- function( + expr_counts_path, + h5_gene_ids = NULL, # h5 + gene_column_index = NULL, # folder + tissue_positions_path, + image_path = NULL, + scale_json_path = NULL, + png_name = NULL, + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + verbose = NULL) { + # NSE vars + barcode <- cell_ID <- row_pxl <- col_pxl <- in_tissue <- array_row <- + array_col <- NULL + + # Assume path checking has been done + + # 1. expression + if (!is.null(h5_gene_ids)) { + expr_results <- get10Xmatrix_h5( + path_to_data = expr_counts_path, + gene_ids = h5_gene_ids + ) + } else { + expr_results <- get10Xmatrix( + path_to_data = expr_counts_path, + gene_column_index = gene_column_index + ) + } + + # if expr_results is not a list, make it a list compatible with downstream + if (!is.list(expr_results)) expr_results <- list( + "Gene Expression" = expr_results) + + # format expected data into list to be used with readExprData() + raw_matrix_list <- list("cell" = list("rna" = list( + "raw" = expr_results[["Gene Expression"]]))) + + # add protein expression data to list if it exists + if ("Antibody Capture" %in% names(expr_results)) { + raw_matrix_list$cell$protein$raw <- expr_results[["Antibody Capture"]] + } + + + # 2. spatial locations + spatial_results <- data.table::fread(tissue_positions_path) + colnames(spatial_results) <- c("barcode", "in_tissue", "array_row", + "array_col", "col_pxl", "row_pxl") + spatial_results <- spatial_results[match(colnames( + raw_matrix_list$cell[[1]]$raw), barcode)] + data.table::setnames(spatial_results, old = "barcode", new = "cell_ID") + spatial_locs <- spatial_results[, .(cell_ID, row_pxl, -col_pxl)] + # flip x and y + colnames(spatial_locs) <- c("cell_ID", "sdimx", "sdimy") + + + # 3. scalefactors (optional) + json_info <- .visium_read_scalefactors(scale_json_path) + + + # 4. image (optional) + if (!is.null(image_path)) { + visium_png_list <- .visium_image( + image_path = image_path, + json_info = json_info, + verbose = verbose + ) + } + + # 5. metadata + meta_results <- spatial_results[ + , .(cell_ID, in_tissue, array_row, array_col)] + expr_types <- names(raw_matrix_list$cell) + meta_list <- list() + for (etype in expr_types) { + meta_list[[etype]] <- meta_results + } + + + # 6. giotto object + giotto_object <- createGiottoObject( + expression = raw_matrix_list, + spatial_locs = spatial_locs, + instructions = instructions, + cell_metadata = meta_list, + images = visium_png_list + ) + + + # 7. polygon information + if (!is.null(json_info)) { + visium_polygons <- .visium_spot_poly( + spatlocs = spatial_locs, + json_scalefactors = json_info + ) + giotto_object <- setPolygonInfo( + gobject = giotto_object, + x = visium_polygons, + centroids_to_spatlocs = FALSE, + verbose = FALSE, + initialize = TRUE + ) + } + + return(giotto_object) +} + + + +# Find and check the filepaths within a structured visium directory +.visium_read_folder <- function( + visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + png_name = NULL, + verbose = NULL) { + vmsg(.v = verbose, "A structured visium directory will be used") + + ## check arguments + if (is.null(visium_dir)) + .gstop("visium_dir needs to be a path to a visium directory") + visium_dir <- path.expand(visium_dir) + if (!dir.exists(visium_dir)) .gstop(visium_dir, " does not exist!") + expr_data <- match.arg(expr_data, choices = c("raw", "filter")) + + + ## 1. check expression + expr_counts_path <- switch(expr_data, + "raw" = paste0(visium_dir, "/", "raw_feature_bc_matrix/"), + "filter" = paste0(visium_dir, "/", "filtered_feature_bc_matrix/") + ) + if (!file.exists(expr_counts_path)) + .gstop(expr_counts_path, "does not exist!") + + + ## 2. check spatial locations + spatial_dir <- paste0(visium_dir, "/", "spatial/") + tissue_positions_path <- Sys.glob( + paths = file.path(spatial_dir, "tissue_positions*")) + + + ## 3. check spatial image + if (is.null(png_name)) { + png_list <- list.files(spatial_dir, pattern = "*.png") + png_name <- png_list[1] + } + png_path <- paste0(spatial_dir, "/", png_name) + if (!file.exists(png_path)) .gstop(png_path, " does not exist!") + + + ## 4. check scalefactors + scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") + if (!file.exists(scalefactors_path)) + .gstop(scalefactors_path, "does not exist!") + + + list( + expr_counts_path = expr_counts_path, + gene_column_index = gene_column_index, + tissue_positions_path = tissue_positions_path, + image_path = png_path, + scale_json_path = scalefactors_path + ) +} + + + +.visium_read_h5 <- function( + h5_visium_path = h5_visium_path, # expression matrix + h5_gene_ids = h5_gene_ids, + h5_tissue_positions_path = h5_tissue_positions_path, + h5_image_png_path = h5_image_png_path, + h5_json_scalefactors_path = h5_json_scalefactors_path, + verbose = NULL) { + # 1. filepaths + vmsg(.v = verbose, + "A path to an .h5 10X file was provided and will be used") + if (!file.exists(h5_visium_path)) + .gstop("The provided path ", h5_visium_path, " does not exist") + if (is.null(h5_tissue_positions_path)) + .gstop("A path to the tissue positions (.csv) needs to be provided to + h5_tissue_positions_path") + if (!file.exists(h5_tissue_positions_path)) + .gstop("The provided path ", h5_tissue_positions_path, + " does not exist") + if (!is.null(h5_image_png_path)) { + if (!file.exists(h5_image_png_path)) { + .gstop("The provided h5 image path ", h5_image_png_path, + "does not exist. + Set to NULL to exclude or provide the correct path.") + } + } + if (!is.null(h5_json_scalefactors_path)) { + if (!file.exists(h5_json_scalefactors_path)) { + warning(wrap_txt( + "No file found at h5_json_scalefactors_path. + Scalefactors are needed for proper image alignment and + polygon generation" + )) + } + } + + list( + expr_counts_path = h5_visium_path, + h5_gene_ids = h5_gene_ids, + tissue_positions_path = h5_tissue_positions_path, + image_path = h5_image_png_path, + scale_json_path = h5_json_scalefactors_path + ) +} + + + + + + + + + +# Visium Polygon Creation + +#' @title Add Visium Polygons to Giotto Object +#' @name addVisiumPolygons +#' @param gobject Giotto Object created with visium data, containing spatial +#' locations corresponding to spots +#' @param scalefactor_path path to scalefactors_json.json Visium output +#' @returns Giotto Object with to-scale circular polygons added at each spatial +#' location +#' @details +#' Adds circular giottoPolygons to the spatial_info slot of a Giotto Object +#' for the "cell" spatial unit. +#' @export +addVisiumPolygons <- function(gobject, + scalefactor_path = NULL) { + assert_giotto(gobject) + + visium_spat_locs <- getSpatialLocations( + gobject = gobject, + spat_unit = "cell" + ) + + scalefactors_list <- .visium_read_scalefactors( + json_path = scalefactor_path + ) + + visium_polygons <- .visium_spot_poly( + spatlocs = visium_spat_locs, + json_scalefactors = scalefactors_list + ) + + gobject <- addGiottoPolygons( + gobject = gobject, + gpolygons = list(visium_polygons) + ) + + return(gobject) +} + + + + + +#' @title Read Visium ScaleFactors +#' @name .visium_read_scalefactors +#' @param json_path path to scalefactors_json.json for Visium experimental data +#' @returns scalefactors within the provided json file as a named list, +#' or NULL if not discovered +#' @details asserts the existence of and reads in a .json file +#' containing scalefactors for Visium data in the expected format. +#' Returns NULL if no path is provided or if the file does not exist. +#' @keywords internal +.visium_read_scalefactors <- function(json_path = NULL) { + if (!checkmate::test_file_exists(json_path)) { + if (!is.null(json_path)) { + warning("scalefactors not discovered at: \n", + json_path, call. = FALSE) + } + return(NULL) + } + + json_scalefactors <- jsonlite::read_json(json_path) + + # Intial assertion that json dimensions are appropriate + checkmate::assert_list( + x = json_scalefactors, + types = "numeric", + min.len = 4L, + max.len = 5L + ) + + expected_json_names <- c( + "regist_target_img_scalef", # NEW as of 2023 + "spot_diameter_fullres", + "tissue_hires_scalef", + "fiducial_diameter_fullres", + "tissue_lowres_scalef" + ) + + # Visium assay with chemistry v2 contains an additional + # keyword in the json file + new_format_2023 <- checkmate::test_list( + x = json_scalefactors, + types = "numeric", + len = 5L + ) + + # If the scalefactors are of size 4 (older assay), clip the new keyword + if (!new_format_2023) expected_json_names <- expected_json_names[2:5] + + if (!setequal(names(json_scalefactors), expected_json_names)) { + warning(GiottoUtils::wrap_txt( + "h5 scalefactors json names differ from expected. + [Expected]:", expected_json_names, "\n", + "[Actual]:", names(json_scalefactors) + )) + } + + return(json_scalefactors) +} + + +#' @title Calculate Pixel to Micron Scalefactor +#' @name visium_micron_scalefactor +#' @param json_scalefactors list of scalefactors from +#' .visium_read_scalefactors() +#' @returns scale factor for converting pixel to micron +#' @details +#' Calculates pixel to micron scalefactor. +#' Visium xy coordinates are based on the fullres image +#' The values provided are directly usable for generating polygon information +#' or calculating the micron size relative to spatial coordinates for this set +#' of spatial information. +#' @keywords internal +.visium_micron_scale <- function(json_scalefactors) { + # visium spots diameter : 55 micron + # diameter of a spot at this spatial scaling : scalefactor_list$spot_diameter_fullres + px_to_micron <- 55 / json_scalefactors$spot_diameter_fullres + return(px_to_micron) +} + + +#' @title Create Polygons for Visium Data +#' @name .visium_spot_poly +#' @param spatlocs spatial locations data.table or `spatLocsObj` containing +#' centroid locations of visium spots +#' @param json_scalefactors list of scalefactors from +#' .visium_read_scalefactors() +#' @returns giottoPolygon object +#' @details +#' Creates circular polygons for spatial representation of +#' Visium spots. +#' @keywords internal +#' @md +.visium_spot_poly <- function(spatlocs = NULL, + json_scalefactors) { + if (inherits(spatlocs, "spatLocsObj")) { + spatlocs <- spatlocs[] + } + + vis_spot_poly <- GiottoClass::circleVertices( + radius = json_scalefactors$spot_diameter_fullres / 2 + ) + + GiottoClass::polyStamp( + stamp_dt = vis_spot_poly, + spatlocs = spatlocs, + verbose = FALSE + ) %>% + createGiottoPolygonsFromDfr( + calc_centroids = TRUE, + verbose = FALSE + ) +} + + + + + + +# json_info expects the list read output from .visium_read_scalefactors +# image_path should be expected to be full filepath +# should only be used when do_manual_adj (deprecated) is FALSE +.visium_image <- function( + image_path, + json_info = NULL, + micron_scale = FALSE, + verbose = NULL) { + # assume image already checked + vmsg(.v = verbose, .initial = " - ", "found image") + + # 1. determine image scalefactor to use ---------------------------------- # + if (!is.null(json_info)) checkmate::assert_list(json_info) + png_name <- basename(image_path) # used for name pattern matching only + + if (is.null(json_info)) { # if none provided + warning(wrap_txt( + "No scalefactors json info provided. + Visium image scale_factor defaulting to 1" + )) + scale_factor <- 1 + } else { # if provided + + scale_factor <- NULL # initial value + + # determine type of visium image + visium_img_type <- NULL + possible_types <- c("lowres", "hires") + for (img_type in possible_types) { + if (grepl(img_type, png_name)) visium_img_type <- img_type + } + + if (is.null(visium_img_type)) { # if not recognized visium image type + .gstop( + "\'image_path\' filename did not partial match either + \'lowres\' or \'hires\'. Ensure specified image is either the + Visium lowres or hires image and rename it accordingly" + ) + } + + vmsg( + .v = verbose, .initial = " - ", + "found scalefactors. attempting automatic alignment for the", + str_quote(visium_img_type), "image\n\n" + ) + + scale_factor <- switch(visium_img_type, + "lowres" = json_info[["tissue_lowres_scalef"]], + "hires" = json_info[["tissue_hires_scalef"]] + ) + } + + if (isTRUE(micron_scale)) { + scale_factor <- scale_factor * .visium_micron_scale(json_info) + } + + # 2. create image -------------------------------------------------------- # + visium_img <- createGiottoLargeImage( + raster_object = image_path, + name = "image", + negative_y = TRUE, + scale_factor = (1 / scale_factor) + ) + + visium_img_list <- list(visium_img) + names(visium_img_list) <- c("image") + + return(visium_img_list) +} + + + + + + + + + + + +## MERSCOPE #### + + +#' @title Create Vizgen MERSCOPE largeImage +#' @name createMerscopeLargeImage +#' @description +#' Read MERSCOPE stitched images as giottoLargeImage. Images will also be +#' transformed to match the spatial coordinate reference system of the paired +#' points and polygon data. +#' @param image_file character. Path to one or more MERSCOPE images to load +#' @param transforms_file character. Path to MERSCOPE transforms file. Usually +#' in the same folder as the images and named +#' 'micron_to_mosaic_pixel_transform.csv' +#' @param name character. name to assign the image. Multiple should be provided +#' if image_file is a list. +#' @returns giottoLargeImage +#' @export +createMerscopeLargeImage <- function(image_file, + transforms_file, + name = "image") { + checkmate::assert_character(transforms_file) + tfsDT <- data.table::fread(transforms_file) + if (inherits(image_file, "character")) { + image_file <- as.list(image_file) + } + checkmate::assert_list(image_file) + + scalef <- c(1 / tfsDT[[1, 1]], 1 / tfsDT[[2, 2]]) + x_shift <- -tfsDT[[1, 3]] / tfsDT[[1, 1]] + y_shift <- -tfsDT[[2, 3]] / tfsDT[[2, 2]] + + out <- lapply(seq_along(image_file), function(i) { + gimg <- createGiottoLargeImage( + raster_object = image_file[[i]], + name = name[[i]], + scale_factor = scalef, + negative_y = FALSE + ) + + gimg <- spatShift(gimg, dx = x_shift, dy = y_shift) + + gimg@extent <- terra::ext(gimg@raster_object) + return(gimg) + }) + + if (length(out) == 1L) { + out <- unlist(out) + } + + return(out) +} + + + + + + + +#' @title Create Vizgen MERSCOPE Giotto Object +#' @name createGiottoMerscopeObject +#' @description Given the path to a MERSCOPE experiment directory, creates a +#' Giotto object. +#' @param merscope_dir full path to the exported merscope directory +#' @param data_to_use which of either the 'subcellular' or 'aggregate' +#' information to use for object creation +#' @param FOVs which FOVs to use when building the subcellular object. +#' (default is NULL) +#' NULL loads all FOVs (very slow) +#' @param calculate_overlap whether to run \code{\link{calculateOverlapRaster}} +#' @param overlap_to_matrix whether to run \code{\link{overlapToMatrix}} +#' @param aggregate_stack whether to run \code{\link{aggregateStacks}} +#' @param aggregate_stack_param params to pass to \code{\link{aggregateStacks}} +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns a giotto object +#' @details +#' [\strong{Expected Directory}] This function generates a giotto object when +#' given a link to a MERSCOPE output directory. It expects the following items +#' within the directory where the \strong{bolded} portions are what this +#' function matches against: +#' \itemize{ +#' \item{\strong{cell_boundaries} (folder .hdf5 files)} +#' \item{\strong{images} (folder of .tif images and a scalefactor/transfrom table)} +#' \item{\strong{cell_by_gene}.csv (file)} +#' \item{cell_metadata\strong{fov_positions_file}.csv (file)} +#' \item{detected_transcripts\strong{metadata_file}.csv (file)} +#' } +#' @export +createGiottoMerscopeObject <- function(merscope_dir, + data_to_use = c("subcellular", "aggregate"), + FOVs = NULL, + poly_z_indices = 1:7, + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + instructions = NULL, + cores = NA, + verbose = TRUE) { + fovs <- NULL + + # 0. setup + merscope_dir <- path.expand(merscope_dir) + + poly_z_indices <- as.integer(poly_z_indices) + if (any(poly_z_indices < 1)) { + stop(wrap_txt( + "poly_z_indices is a vector of one or more integers starting from 1.", + errWidth = TRUE + )) + } + + # determine data to use + data_to_use <- match.arg( + arg = data_to_use, choices = c("subcellular", "aggregate")) + + # 1. test if folder structure exists and is as expected + dir_items <- .read_merscope_folder( + merscope_dir = merscope_dir, + data_to_use = data_to_use, + cores = cores, + verbose = verbose + ) + + # 2. load in directory items + data_list <- .load_merscope_folder( + dir_items = dir_items, + data_to_use = data_to_use, + poly_z_indices = poly_z_indices, + fovs = fovs, + cores = cores, + verbose = verbose + ) + + # 3. Create giotto object + if (data_to_use == "subcellular") { + merscope_gobject <- .createGiottoMerscopeObject_subcellular( + data_list = data_list, + calculate_overlap = calculate_overlap, + overlap_to_matrix = overlap_to_matrix, + aggregate_stack = aggregate_stack, + aggregate_stack_param = aggregate_stack_param, + cores = cores, + verbose = verbose + ) + } else if (data_to_use == "aggregate") { + merscope_gobject <- .createGiottoMerscopeObject_aggregate( + data_list = data_list, + cores = cores, + verbose = verbose + ) + } else { + stop(wrap_txt('data_to_use "', data_to_use, + '" not implemented', sep = "")) + } + + return(merscope_gobject) +} + + + + +#' @describeIn createGiottoMerscopeObject Create giotto object with +#' 'subcellular' workflow +#' @param data_list list of loaded data from \code{\link{load_merscope_folder}} +#' @keywords internal +.createGiottoMerscopeObject_subcellular <- function(data_list, + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + cores = NA, + verbose = TRUE) { + feat_coord <- neg_coord <- cellLabel_dir <- instructions <- NULL + + # unpack data_list + poly_info <- data_list$poly_info + tx_dt <- data_list$tx_dt + micronToPixelScale <- data_list$micronToPixelScale + image_list <- data_list$images + + # data.table vars + gene <- NULL + + # split tx_dt by expression and blank + vmsg("Splitting detections by feature vs blank", .v = verbose) + feat_id_all <- tx_dt[, unique(gene)] + blank_id <- feat_id_all[grepl(pattern = "Blank", feat_id_all)] + feat_id <- feat_id_all[!feat_id_all %in% blank_id] + + feat_dt <- tx_dt[gene %in% feat_id, ] + blank_dt <- tx_dt[gene %in% blank_id, ] + + # extract transcript_id col and store as feature meta + feat_meta <- unique(feat_dt[, c("gene", "transcript_id", "barcode_id"), + with = FALSE]) + blank_meta <- unique(blank_dt[, c("gene", "transcript_id", "barcode_id"), + with = FALSE]) + feat_dt[, c("transcript_id", "barcode_id") := NULL] + blank_dt[, c("transcript_id", "barcode_id") := NULL] + + if (isTRUE(verbose)) { + message(" > Features: ", feat_dt[, .N]) + message(" > Blanks: ", blank_dt[, .N]) + } + + # build giotto object + vmsg("Building subcellular giotto object...", .v = verbose) + z_sub <- createGiottoObjectSubcellular( + gpoints = list( + "rna" = feat_coord, + "neg_probe" = neg_coord + ), + gpolygons = list("cell" = cellLabel_dir), + polygon_mask_list_params = list( + mask_method = "guess", + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_horizontal_step = FALSE + ), + instructions = instructions, + cores = cores + ) +} + + + + +#' @describeIn createGiottoMerscopeObject Create giotto object with 'aggregate' +#' workflow +#' @param data_list list of loaded data from \code{\link{load_merscope_folder}} +#' @keywords internal +.createGiottoMerscopeObject_aggregate <- function(data_list, + cores = NA, + verbose = TRUE) { + # unpack data_list + micronToPixelScale <- data_list$micronToPixelScale + expr_dt <- data_list$expr_dt + cell_meta <- data_list$expr_mat + image_list <- data_list$images + + # split expr_dt by expression and blank + + # feat_id_all = +} + + + + +## Spatial Genomics #### + +#' @title Create Spatial Genomics Giotto Object +#' @name createSpatialGenomicsObject +#' @param sg_dir full path to the exported Spatial Genomics directory +#' @param instructions new instructions +#' (e.g. result from createGiottoInstructions) +#' @returns giotto object +#' @description Given the path to a Spatial Genomics data directory, creates a +#' Giotto object. +#' @export +createSpatialGenomicsObject <- function(sg_dir = NULL, + instructions = NULL) { + # Find files in Spatial Genomics directory + dapi <- list.files(sg_dir, full.names = TRUE, pattern = "DAPI") + mask <- list.files(sg_dir, full.names = TRUE, pattern = "mask") + tx <- list.files(sg_dir, full.names = TRUE, pattern = "transcript") + # Create Polygons + gpoly <- createGiottoPolygonsFromMask( + mask, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + flip_horizontal = FALSE, + flip_vertical = FALSE + ) + # Create Points + tx <- data.table::fread(tx) + gpoints <- createGiottoPoints(tx) + dim(tx) + # Create object and add image + gimg <- createGiottoLargeImage(dapi, use_rast_ext = TRUE) + sg <- createGiottoObjectSubcellular( + gpoints = list("rna" = gpoints), + gpolygons = list("cell" = gpoly), + instructions = instructions + ) + sg <- addGiottoImage(sg, images = list(image = gimg)) + # Return SG object + return(sg) +} + + + + + + + + + + + + + + + +# *---- folder reading and detection ----* #### + + +#' @describeIn read_data_folder Read a structured MERSCOPE folder +#' @keywords internal +.read_merscope_folder <- function(merscope_dir, + data_to_use, + cores = NA, + verbose = NULL) { + # prepare dir_items list + dir_items <- list( + `boundary info` = "*cell_boundaries*", + `image info` = "*images*", + `cell feature matrix` = "*cell_by_gene*", + `cell metadata` = "*cell_metadata*", + `raw transcript info` = "*transcripts*" + ) + + # prepare require_data_DT + sub_reqs <- data.table::data.table( + workflow = c("subcellular"), + item = c( + "boundary info", + "raw transcript info", + "image info", + "cell by gene matrix", + "cell metadata" + ), + needed = c(TRUE, TRUE, FALSE, FALSE, FALSE) + ) + + agg_reqs <- data.table::data.table( + workflow = c("aggregate"), + item = c( + "boundary info", + "raw transcript info", + "image info", + "cell by gene matrix", + "cell metadata" + ), + needed = c(FALSE, FALSE, FALSE, TRUE, TRUE) + ) + + require_data_DT <- rbind(sub_reqs, agg_reqs) + + dir_items <- .read_data_folder( + spat_method = "MERSCOPE", + data_dir = merscope_dir, + dir_items = dir_items, + data_to_use = data_to_use, + require_data_DT = require_data_DT, + cores = cores, + verbose = verbose + ) + + return(dir_items) +} + + + + + + + + + + +# * ---- folder loading ---- * #### + + + +## MERSCOPE #### + +#' @title Load MERSCOPE data from folder +#' @name load_merscope_folder +#' @param dir_items list of full filepaths from +#' \code{\link{.read_merscope_folder}} +#' @inheritParams createGiottoMerscopeObject +#' @returns list of loaded-in MERSCOPE data +NULL + +#' @rdname load_merscope_folder +#' @keywords internal +.load_merscope_folder <- function(dir_items, + data_to_use, + fovs = NULL, + poly_z_indices = 1L:7L, + cores = NA, + verbose = TRUE) { + # 1. load data_to_use-specific + if (data_to_use == "subcellular") { + data_list <- .load_merscope_folder_subcellular( + dir_items = dir_items, + data_to_use = data_to_use, + fovs = fovs, + poly_z_indices = poly_z_indices, + cores = cores, + verbose = verbose + ) + } else if (data_to_use == "aggregate") { + data_list <- .load_merscope_folder_aggregate( + dir_items = dir_items, + data_to_use = data_to_use, + cores = cores, + verbose = verbose + ) + } else { + stop(wrap_txt('data_to_use "', data_to_use, + '" not implemented', sep = "")) + } + + # 2. Load images if available + if (!is.null(dir_items$`image info`)) { + ## micron to px scaling factor + micronToPixelScale <- Sys.glob(paths = file.path( + dir_items$`image info`, "*micron_to_mosaic_pixel_transform*"))[[1]] + micronToPixelScale <- data.table::fread( + micronToPixelScale, nThread = cores) + # add to data_list + data_list$micronToPixelScale <- micronToPixelScale + + ## staining images + ## determine types of stains + images_filenames <- list.files(dir_items$`image info`) + bound_stains_filenames <- images_filenames[ + grep(pattern = ".tif", images_filenames)] + bound_stains_types <- sapply(strsplit( + bound_stains_filenames, "_"), `[`, 2) + bound_stains_types <- unique(bound_stains_types) + + img_list <- lapply_flex(bound_stains_types, function(stype) { + img_paths <- Sys.glob(paths = file.path( + dir_items$`image info`, paste0("*", stype, "*"))) + + lapply_flex(img_paths, function(img) { + createGiottoLargeImage(raster_object = img) + }, cores = cores) + }, cores = cores) + # add to data_list + data_list$images <- img_list + } + + + + return(data_list) +} + + + +#' @describeIn load_merscope_folder Load items for 'subcellular' workflow +#' @keywords internal +.load_merscope_folder_subcellular <- function(dir_items, + data_to_use, + cores = NA, + poly_z_indices = 1L:7L, + verbose = TRUE, + fovs = NULL) { + if (isTRUE(verbose)) message("Loading transcript level info...") + if (is.null(fovs)) { + tx_dt <- data.table::fread( + dir_items$`raw transcript info`, nThread = cores) + } else { + message("Selecting FOV subset transcripts") + tx_dt <- fread_colmatch( + file = dir_items$`raw transcript info`, + col = "fov", + values_to_match = fovs, + verbose = FALSE, + nThread = cores + ) + } + tx_dt[, c("x", "y") := NULL] # remove unneeded cols + data.table::setcolorder( + tx_dt, c("gene", "global_x", "global_y", "global_z")) + + if (isTRUE(verbose)) message("Loading polygon info...") + poly_info <- readPolygonFilesVizgenHDF5( + boundaries_path = dir_items$`boundary info`, + z_indices = poly_z_indices, + flip_y_axis = TRUE, + fovs = fovs + ) + + data_list <- list( + "poly_info" = poly_info, + "tx_dt" = tx_dt, + "micronToPixelScale" = NULL, + "expr_dt" = NULL, + "cell_meta" = NULL, + "images" = NULL + ) +} + + + +#' @describeIn load_merscope_folder Load items for 'aggregate' workflow +#' @keywords internal +.load_merscope_folder_aggregate <- function(dir_items, + data_to_use, + cores = NA, + verbose = TRUE) { + # metadata is polygon-related measurements + vmsg("Loading cell metadata...", .v = verbose) + cell_metadata_file <- data.table::fread( + dir_items$`cell metadata`, nThread = cores) + + vmsg("Loading expression matrix", .v = verbose) + expr_dt <- data.table::fread( + dir_items$`cell feature matrix`, nThread = cores) + + + data_list <- list( + "poly_info" = NULL, + "tx_dt" = NULL, + "micronToPixelScale" = NULL, + "expr_dt" = expr_dt, + "cell_meta" = cell_metadata_file, + "images" = NULL + ) +} + + + + + + + + + + + + + + + + + +## ArchR #### + +#' Create an ArchR project and run LSI dimension reduction +#' +#' @param fragmentsPath A character vector containing the paths to the input +#' files to use to generate the ArrowFiles. +#' These files can be in one of the following formats: (i) scATAC tabix files, +#' (ii) fragment files, or (iii) bam files. +#' @param genome A string indicating the default genome to be used for all ArchR +#' functions. Currently supported values include "hg19","hg38","mm9", and "mm10". +#' This value is stored as a global environment variable, not part of the +#' ArchRProject. +#' This can be overwritten on a per-function basis using the given function's +#' geneAnnotationand genomeAnnotation parameter. For something other than one of +#' the currently supported, see createGeneAnnnotation() and +#' createGenomeAnnnotation() +#' @param createArrowFiles_params list of parameters passed to +#' `ArchR::createArrowFiles` +#' @param ArchRProject_params list of parameters passed to `ArchR::ArchRProject` +#' @param addIterativeLSI_params list of parameters passed to +#' `ArchR::addIterativeLSI` +#' @param threads number of threads to use. Default = `ArchR::getArchRThreads()` +#' @param force Default = FALSE +#' @param verbose Default = TRUE +#' +#' @returns An ArchR project with GeneScoreMatrix, TileMatrix, and +#' TileMatrix-based LSI +#' @export +createArchRProj <- function(fragmentsPath, + genome = c("hg19", "hg38", "mm9", "mm10"), + createArrowFiles_params = list( + sampleNames = "sample1", + minTSS = 0, + minFrags = 0, + maxFrags = 1e+07, + minFragSize = 10, + maxFragSize = 2000, + offsetPlus = 0, + offsetMinus = 0, + TileMatParams = list(tileSize = 5000) + ), + ArchRProject_params = list( + outputDirectory = getwd(), + copyArrows = FALSE + ), + addIterativeLSI_params = list(), + threads = ArchR::getArchRThreads(), + force = FALSE, + verbose = TRUE) { + if (!requireNamespace("ArchR")) { + message('ArchR is needed. Install the package using + remotes::install_github("GreenleafLab/ArchR")') + } + + ## Add reference genome + message("Loading reference genome") + ArchR::addArchRGenome(genome) + + # Creating Arrow Files + message("Creating Arrow files") + ArrowFiles <- do.call( + ArchR::createArrowFiles, + c( + inputFiles = fragmentsPath, + verbose = verbose, + force = force, + createArrowFiles_params + ) + ) + + # Creating an ArchRProject + message("Creating ArchRProject") + proj <- do.call( + ArchR::ArchRProject, + c(list(ArrowFiles = ArrowFiles), + threads = threads, + ArchRProject_params + ) + ) + + # Data normalization and dimensionality reduction + message("Running dimension reduction") + proj <- do.call( + ArchR::addIterativeLSI, + c( + ArchRProj = proj, + verbose = verbose, + name = "IterativeLSI", + threads = threads, + force = force, + addIterativeLSI_params + ) + ) +} + +#' Create a Giotto object from an ArchR project +#' +#' @param archRproj ArchR project +#' @param expression expression information +#' @param expression_feat Giotto object available features (e.g. atac, rna, ...) +#' @param spatial_locs data.table or data.frame with coordinates for cell +#' centroids +#' @param sampleNames A character vector containing the ArchR project sample +#' name +#' @param ... additional arguments passed to `createGiottoObject` +#' +#' @returns A Giotto object with at least an atac or epigenetic modality +#' +#' @export +createGiottoObjectfromArchR <- function(archRproj, + expression = NULL, + expression_feat = "atac", + spatial_locs = NULL, + sampleNames = "sample1", + ...) { + # extract GeneScoreMatrix + GeneScoreMatrix_summarizedExperiment <- ArchR::getMatrixFromProject( + archRproj) + GeneScoreMatrix <- slot(slot( + GeneScoreMatrix_summarizedExperiment, "assays"), + "data")[["GeneScoreMatrix"]] + + ## get cell names + cell_names <- colnames(GeneScoreMatrix) + cell_names <- gsub(paste0(sampleNames, "#"), "", cell_names) + cell_names <- gsub("-1", "", cell_names) + + ## get gene names + gene_names <- slot(GeneScoreMatrix_summarizedExperiment, + "elementMetadata")[["name"]] + + ## replace colnames with cell names + colnames(GeneScoreMatrix) <- cell_names + + ## replace rownames with gene names + rownames(GeneScoreMatrix) <- gene_names + + + if (!is.null(expression)) { + expression_matrix <- data.table::fread(expression) + + expression_cell_names <- colnames(expression_matrix) + cell_names <- intersect(cell_names, expression_cell_names) + + expression_matrix <- Matrix::Matrix(as.matrix(expression_matrix[, -1]), + dimnames = list( + expression_matrix[[1]], + colnames(expression_matrix[, -1]) + ), + sparse = TRUE + ) + + expression <- expression_matrix[, cell_names] + + GeneScoreMatrix <- GeneScoreMatrix[, cell_names] + } + + + ## filter spatial locations + if (!is.null(spatial_locs)) { + x <- read.csv(spatial_locs) + x <- x[x$cell_ID %in% cell_names, ] + spatial_locs <- x + } + + # Creating GiottoObject + message("Creating GiottoObject") + + if (!is.null(expression)) { + gobject <- createGiottoObject( + expression = list( + GeneScoreMatrix = GeneScoreMatrix, + raw = expression + ), + expression_feat = expression_feat, + spatial_locs = spatial_locs, + ... + ) + } else { + gobject <- createGiottoObject( + expression = list(GeneScoreMatrix = GeneScoreMatrix), + expression_feat = expression_feat, + spatial_locs = spatial_locs, + ... + ) + } + + # add LSI dimension reduction + coordinates <- slot(archRproj, "reducedDims")[["IterativeLSI"]][["matSVD"]] + + ## clean cell names + lsi_cell_names <- rownames(coordinates) + lsi_cell_names <- gsub(paste0(sampleNames, "#"), "", lsi_cell_names) + lsi_cell_names <- gsub("-1", "", lsi_cell_names) + + rownames(coordinates) <- lsi_cell_names + + coordinates <- coordinates[cell_names, ] + + dimension_reduction <- Giotto::createDimObj( + coordinates = coordinates, + name = "lsi", + spat_unit = "cell", + feat_type = expression_feat[1], + method = "lsi" + ) + gobject <- setDimReduction(gobject, + dimension_reduction, + spat_unit = "cell", + feat_type = expression_feat[1], + name = "lsi", + reduction_method = "lsi" + ) + + return(gobject) +} diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R new file mode 100644 index 000000000..6b736b839 --- /dev/null +++ b/R/convenience_xenium.R @@ -0,0 +1,1626 @@ + +# CLASS #### + + + +setClass( + "XeniumReader", + slots = list( + xenium_dir = "character", + filetype = "list", + qv = "ANY", + calls = "list" + ), + prototype = list( + filetype = list( + transcripts = "parquet", + boundaries = "parquet", + expression = "h5", + cell_meta = "parquet" + ), + qv = 20, + calls = list() + ) +) + +# * show #### +setMethod("show", signature("XeniumReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "XeniumReader")) + print_slots <- c("dir", "filetype", "qv_cutoff", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@xenium_dir + if (length(d) > 0L) { + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # qv + qv <- object@qv + cat(pre["qv_cutoff"], paste(qv, collapse = ", "), "\n") + + # filetype + .filetype_prints(x = object, pre = pre["filetype"]) + + # funs + .reader_fun_prints(x = object, pre = pre["funs"]) +}) + +# * print #### +setMethod("print", signature("XeniumReader"), function(x, ...) show(x)) + +# * init #### +setMethod( + "initialize", signature("XeniumReader"), + function( + .Object, + xenium_dir, + filetype, + qv_cutoff + ) { + .Object <- callNextMethod(.Object) + + # provided params (if any) + if (!missing(xenium_dir)) { + checkmate::assert_directory_exists(xenium_dir) + .Object@xenium_dir <- xenium_dir + } + if (!missing(filetype)) { + .Object@filetype <- filetype + } + if (!missing(qv_cutoff)) { + .Object@qv <- qv_cutoff + } + + + # check filetype + ftype_data <- c("transcripts", "boundaries", "expression", "cell_meta") + if (!all(ftype_data %in% names(.Object@filetype))) { + stop(wrap_txt("`$filetype` must have entries for each of:\n", + paste(ftype_data, collapse = ", "))) + } + + ftype <- .Object@filetype + ft_tab <- c("csv", "parquet") + ft_exp <- c("h5", "mtx", "zarr") + if (!ftype$transcripts %in% ft_tab) { + stop(wrap_txt("`$filetype$transcripts` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$boundaries %in% ft_tab) { + stop(wrap_txt("`$filetype$boundaries` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$cell_meta %in% ft_tab) { + stop(wrap_txt("`$filetype$cell_meta` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$expression %in% ft_exp) { + stop(wrap_txt("`$filetype$expression` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + + + # detect paths and subdirs + p <- .Object@xenium_dir + .xenium_detect <- function(pattern, ...) { + .detect_in_dir( + pattern = pattern, ..., + path = p, platform = "Xenium", + ) + } + + cell_meta_path <- .xenium_detect("cells", first = FALSE) + panel_meta_path <- .xenium_detect("panel") # json + experiment_info_path <- .xenium_detect(".xenium") # json + + # 3D stack - DAPI + img_path <- .xenium_detect("morphology.", warn = FALSE) + # 2D fusion images + # - DAPI + # - stainings for multimodal segmentation + img_focus_path <- .xenium_detect("morphology_focus", warn = FALSE) + # Maximum intensity projection (MIP) of the morphology image. + # (Xenium Outputs v1.0 - 1.9. only) + img_mip_path <- .xenium_detect("morphology_mip", warn = FALSE) + + tx_path <- .xenium_detect("transcripts", first = FALSE) + cell_bound_path <- .xenium_detect("cell_bound", first = FALSE) + nuc_bound_path <- .xenium_detect("nucleus_bound", first = FALSE) + + expr_path <- .xenium_detect("cell_feature_matrix", first = FALSE) + + .xenium_ftype <- function(paths, ftype) { + paths[grepl(pattern = paste0(".", ftype), x = paths)] + } + + + # select file formats based on reader settings + tx_path <- .xenium_ftype(tx_path, ftype$transcripts) + cell_bound_path <- .xenium_ftype(cell_bound_path, ftype$boundaries) + nuc_bound_path <- .xenium_ftype(nuc_bound_path, ftype$boundaries) + expr_path <- .xenium_ftype(expr_path, ftype$expression) + cell_meta_path <- .xenium_ftype(cell_meta_path, ftype$cell_meta) + + + # transcripts load call + tx_fun <- function( + path = tx_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + dropcols = c(), + qv_threshold = .Object@qv, + cores = determine_cores(), + verbose = NULL + ) { + .xenium_transcript( + path = path, + feat_type = feat_type, + split_keyword = split_keyword, + dropcols = dropcols, + qv_threshold = qv_threshold, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_transcripts <- tx_fun + + # load polys call + poly_fun <- function( + path = cell_bound_path, + name = "cell", + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL + ) { + .xenium_poly( + path = path, + name = name, + calc_centroids = calc_centroids, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_polys <- poly_fun + + # load cellmeta + cmeta_fun <- function( + path = cell_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + .xenium_cellmeta( + path = path, + dropcols = dropcols, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_cellmeta <- cmeta_fun + + # load featmeta + fmeta_fun <- function( + path = panel_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + .xenium_featmeta( + path = path, + gene_ids, + dropcols = dropcols, + verbose = verbose + ) + } + .Object@calls$load_featmeta <- fmeta_fun + + # load expression call + expr_fun <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + .xenium_expression( + path = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_expression <- expr_fun + + # load image call + + + + + # create giotto object call + gobject_fun <- function( + transcript_path = tx_path, + load_bounds = list( + cell = "cell", + nucleus = "nucleus" + ), + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + load_images = list( + morphology = "focus", + ), + load_expression = FALSE, + load_cellmeta = FALSE + ) { + load_expression <- as.logical(load_expression) + load_cellmeta <- as.logical(load_cellmeta) + + if (!is.null(load_images)) { + checkmate::assert_list(load_images) + if (is.null(names(load_images))) { + stop("Images paths provided to 'load_images' must be named") + } + } + if (!is.null(load_bounds)) { + checkmate::assert_list(load_bounds) + if (is.null(names(load_bounds))) { + stop("bounds paths provided to 'load_bounds' must be named") + } + } + + + + funs <- .Object@calls + + # init gobject + g <- giotto() + + + # transcripts + tx_list <- funs$load_transcripts( + path = transcript_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + + + # polys + if (!is.null(load_bounds)) { + # replace convenient shortnames + load_bounds[load_bounds == "cell"] <- cell_bound_path + load_bounds[load_bounds == "nucleus"] <- nuc_bound_path + + blist <- list() + bnames <- names(load_bounds) + for (b_i in seq_along(load_bounds)) { + b <- funs$load_polys( + path = load_bounds[[b_i]], + name = bnames[[b_i]] + ) + blist <- c(blist, b) + } + for (gpoly_i in seq_along(blist)) { + g <- setGiotto(g, blist[[gpoly_i]]) + } + } + + + # feat metadata + fx <- funs$load_featmeta( + path = + ) + + + # expression + if (load_expression) { + + } + + + # cell metadata + if (load_cellmeta) { + + } + + + # images + if (!is.null(load_images)) { + # replace convenient shortnames + load_images[load_images == "focus"] <- img_focus_path + } + + + + + } + .Object@calls$create_gobject <- gobject_fun + + + return(.Object) + } +) + + + + +# access #### + +#' @export +setMethod("$", signature("XeniumReader"), function(x, name) { + basic_info <- c("xenium_dir", "filetype", "qv") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("XeniumReader"), function(x, name, value) { + basic_info <- c("xenium_dir", "filetype", "qv") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.XeniumReader` <- function(x, pattern) { + dn <- c("xenium_dir", "filetype", "qv") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) +} + + + + +# MODULAR #### + + + + +.xenium_transcript <- function( + path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to tx file provided or auto-detected" + ), call. = FALSE) + } + + checkmate::assert_file_exists(path) + e <- file_extension(path) %>% head(1L) %>% tolower() + vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) + + # read in + a <- list( + path = path, + dropcols = dropcols, + qv_threshold = qv_threshold, + verbose = verbose + ) + vmsg("Loading transcript level info...", .v = verbose) + tx <- switch(e, + "csv" = do.call(.xenium_transcript_csv, + args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_transcript_parquet, args = a), + "zarr" = stop('zarr not yet supported') + ) + + # create gpoints + gpointslist <- createGiottoPoints( + x = tx, + feat_type = feat_type, + split_keyword = split_keyword + ) + + if (inherits(gpointslist, "list")) { + gpointslist <- list(gpointslist) + } + + return(gpointslist) +} + + +.xenium_transcript_csv <- function( + path, + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL +) { + tx_dt <- data.table::fread( + path, nThread = cores, + colClasses = c(transcript_id = "character"), + drop = dropcols + ) + data.table::setnames( + x = tx_dt, + old = c('feature_name', 'x_location', 'y_location'), + new = c('feat_ID', 'x', 'y') + ) + + # qv filtering + if (!is.null(qv_threshold)) { + n_before <- tx_dt[,.N] + tx_dt <- tx_dt[qv >= qv_threshold] + n_after <- tx_dt[,.N] + + vmsg( + .v = verbose, + sprintf( + "QV cutoff: %d\n Feature points removed: %d, out of %d", + qv_threshold, + n_before - n_after, + n_before + ) + ) + } + + return(tx_dt) +} + +.xenium_transcript_parquet <- function( + path, + dropcols = c(), + qv_threshold = 20, + verbose = NULL +) { + package_check( + pkg_name = c("arrow", "dplyr"), + repository = c("CRAN:arrow", "CRAN:dplyr") + ) + + tx_arrow <- arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(transcript_id = cast(transcript_id, arrow::string())) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::mutate(feature_name = cast(feature_name, arrow::string())) %>% + dplyr::select(-dplyr::any_of(dropcols)) + + # qv filtering + if (!is.null(qv_threshold)) { + .nr <- function(x) { + dplyr::tally(x) %>% dplyr::collect() %>% as.numeric() + } + n_before <- .nr(tx_arrow) + tx_arrow <- dplyr::filter(tx_arrow, qv > qv_threshold) + n_after <- .nr(tx_arrow) + + vmsg( + .v = verbose, + sprintf( + "QV cutoff: %d\n Feature points removed: %d, out of %d", + qv_threshold, + n_before - n_after, + n_before + ) + ) + } + + # convert to data.table + tx_dt <- as.data.frame(tx_arrow) %>% data.table::setDT() + data.table::setnames( + x = tx_dt, + old = c('feature_name', 'x_location', 'y_location'), + new = c('feat_ID', 'x', 'y') + ) + return(tx_dt) +} + +.xenium_poly <- function( + path, + name = "cell", + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL +) { + checkmate::assert_file_exists(path) + checkmate::assert_character(name, len = 1L) + + e <- file_extension(path) %>% head(1L) %>% tolower() + + a <- list(path = path) + vmsg("Loading boundary info...", .v = verbose) + polys <- switch(e, + "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_poly_parquet, args = a), + "zarr" = stop("zarr not yet supported") + ) + + # create gpolys + verbose <- verbose %null% FALSE + gpolys <- createGiottoPolygon( + x = polys, + name = name, + calc_centroids = calc_centroids, + verbose = verbose + ) + return(gpolys) +} + +.xenium_poly_csv <- function(path, cores = determine_cores()) { + data.table::fread( + path, nThread = cores, + colClasses = c(cell_id = "character") + ) +} + +.xenium_poly_parquet <- function(path) { + package_check( + pkg_name = c("arrow", "dplyr"), + repository = c("CRAN:arrow", "CRAN:dplyr") + ) + # read & convert to DT + arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() +} + +.xenium_cellmeta <- function( + path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to metadata file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + + e <- file_extension(path) %>% head(1L) %>% tolower() + a <- list(path = path, dropcols = dropcols) + vmsg('Loading cell metadata...', .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, path) + verbose <- verbose %null% TRUE + cx <- switch(e, + "csv" = do.call(.xenium_cellmeta_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_cellmeta_parquet, args = a) + ) + + cx <- createCellMetaObj( + metadata = cx, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + return(cx) +} + +.xenium_cellmeta_csv <- function( + path, dropcols = c(), cores = determine_cores() +) { + data.table::fread(path, nThread = cores, drop = dropcols) +} + +.xenium_cellmeta_parquet <- function(path, dropcols = c()) { + arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::select(-dplyr::any_of(dropcols)) %>% + as.data.frame() %>% + data.table::setDT() +} + +.xenium_featmeta <- function( + path, + gene_ids = "symbols", + dropcols = c(), + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to panel metadata file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_ext <- GiottoUtils::file_extension(path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json( + path = path, gene_ids = gene_ids + ) + } else { + feat_meta <- data.table::fread(path, nThread = cores) + colnames(feat_meta)[[1]] <- 'feat_ID' + } + + dropcols <- dropcols[dropcols %in% colnames(feat_meta)] + feat_meta[, (dropcols) := NULL] # remove dropcols + + fx <- createFeatMetaObj( + metadata = feat_meta, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + + return(fx) +} + +.xenium_expression <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to expression dir (mtx) or file (h5) provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + a <- list( + path = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) + + if (checkmate::test_directory_exists(path)) { + e <- "mtx" # assume mtx dir + # zarr can also be unzipped into a dir, but zarr implementation with + # 32bit UINT support is not available in R yet (needed for cell_IDs). + } else { + e <- file_extension(path) %>% head(1L) %>% tolower() + } + + vmsg("Loading 10x pre-aggregated expression...", .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, path) + verbose <- verbose %null% TRUE + ex <- switch(e, + "mtx" = do.call(.xenium_cellmeta_csv, args = a), + "h5" = do.call(.xenium_cellmeta_parquet, args = a) + ) + + eo <- createExprObj( + expression_data = ex, + name = "raw", + spat_unit = "cell", + feat_type = "rna", + provenance = "cell" + ) + return(eo) +} + +.xenium_expression_h5 <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE +) { + get10Xmatrix_h5( + path_to_data = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) +} + +.xenium_expression_mtx <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE +) { + gene_ids <- switch(gene_ids, + "ensembl" = 1, + "symbols" = 2 + ) + get10Xmatrix( + path_to_data = path, + gene_column_index = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) +} + +.xenium_image <- function( + path, + name = "image", + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + affine = NULL, + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to image file to load provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + + vmsg(.v = verbose, sprintf("loading image as '%s'", name)) + vmsg(.v = verbose, .is_debug = TRUE, path) + vmsg( + .v = verbose, .is_debug = TRUE, + sprintf("negative_y: %s\nflip_vertical: %s\nflip_horizontal: %s", + negative_y, flip_vertical, flip_horizontal), + .prefix = "" + ) + + verbose <- verbose %null% TRUE + + # TODO +} + + + +#' @title Load xenium data from folder +#' @name load_xenium_folder +#' @param path_list list of full filepaths from .read_xenium_folder +#' @inheritParams createGiottoXeniumObject +#' @returns list of loaded in xenium data +NULL + +#' @rdname load_xenium_folder +#' @keywords internal +.load_xenium_folder <- function(path_list, + load_format = "csv", + data_to_use = "subcellular", + h5_expression = "FALSE", + h5_gene_ids = "symbols", + gene_column_index = 1, + cores, + verbose = TRUE) { + data_list <- switch(load_format, + "csv" = .load_xenium_folder_csv( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "parquet" = .load_xenium_folder_parquet( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) + ) + + return(data_list) +} + + +#' @describeIn load_xenium_folder Load from csv files +#' @keywords internal +.load_xenium_folder_csv <- function(path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE) { + # initialize return vars + feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL + + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_path <- path_list$panel_meta_path[[1]] + fdata_ext <- GiottoUtils::file_extension(fdata_path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json(path = fdata_path, + gene_ids = h5_gene_ids) + } else { + feat_meta <- data.table::fread(fdata_path, nThread = cores) + colnames(feat_meta)[[1]] <- "feat_ID" + } + + # **** subcellular info **** + if (data_to_use == "subcellular") { + # append missing QC probe info to feat_meta + if (isTRUE(h5_expression)) { + h5 <- hdf5r::H5File$new(path_list$agg_expr_path) + tryCatch({ + root <- names(h5) + feature_id <- h5[[paste0(root, "/features/id")]][] + feature_info <- h5[[paste0(root, "/features/feature_type")]][] + feature_names <- h5[[paste0(root, "/features/name")]][] + features_dt <- data.table::data.table( + "id" = feature_id, + "name" = feature_names, + "feature_type" = feature_info + ) + }, finally = { + h5$close_all() + }) + } else { + features_dt <- data.table::fread( + paste0(path_list$agg_expr_path, "/features.tsv.gz"), + header = FALSE + ) + } + colnames(features_dt) <- c("id", "feat_ID", "feat_class") + feat_meta <- merge( + features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + + GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) + tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) + data.table::setnames( + x = tx_dt, + old = c("feature_name", "x_location", "y_location"), + new = c("feat_ID", "x", "y") + ) + + GiottoUtils::vmsg("Loading boundary info...", .v = verbose) + bound_dt_list <- lapply( + path_list$bound_paths, + function(x) data.table::fread(x[[1]], nThread = cores) + ) + } + + # **** aggregate info **** + GiottoUtils::vmsg("loading cell metadata...", .v = verbose) + cell_meta <- data.table::fread( + path_list$cell_meta_path[[1]], nThread = cores) + + if (data_to_use == "aggregate") { + GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) + if (isTRUE(h5_expression)) { + agg_expr <- get10Xmatrix_h5( + path_to_data = path_list$agg_expr_path, + gene_ids = h5_gene_ids, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } else { + agg_expr <- get10Xmatrix( + path_to_data = path_list$agg_expr_path, + gene_column_index = gene_column_index, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } + } + + data_list <- list( + "feat_meta" = feat_meta, + "tx_dt" = tx_dt, + "bound_dt_list" = bound_dt_list, + "cell_meta" = cell_meta, + "agg_expr" = agg_expr + ) + + return(data_list) +} + + + + +#' @describeIn load_xenium_folder Load from parquet files +#' @keywords internal +.load_xenium_folder_parquet <- function(path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE) { + # initialize return vars + feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL + # dplyr variable + cell_id <- NULL + + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_path <- path_list$panel_meta_path[[1]] + fdata_ext <- GiottoUtils::file_extension(fdata_path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json( + path = fdata_path, gene_ids = h5_gene_ids) + } else { + feat_meta <- data.table::fread(fdata_path, nThread = cores) + colnames(feat_meta)[[1]] <- "feat_ID" + } + + # **** subcellular info **** + if (data_to_use == "subcellular") { + # define for data.table + transcript_id <- feature_name <- NULL + + # append missing QC probe info to feat_meta + if (isTRUE(h5_expression)) { + h5 <- hdf5r::H5File$new(path_list$agg_expr_path) + tryCatch({ + root <- names(h5) + feature_id <- h5[[paste0(root, "/features/id")]][] + feature_info <- h5[[paste0(root, "/features/feature_type")]][] + feature_names <- h5[[paste0(root, "/features/name")]][] + features_dt <- data.table::data.table( + "id" = feature_id, + "name" = feature_names, + "feature_type" = feature_info + ) + }, finally = { + h5$close_all() + }) + } else { + features_dt <- arrow::read_tsv_arrow(paste0( + path_list$agg_expr_path, "/features.tsv.gz"), + col_names = FALSE + ) %>% + data.table::setDT() + } + colnames(features_dt) <- c("id", "feat_ID", "feat_class") + feat_meta <- merge(features_dt[ + , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + + vmsg("Loading transcript level info...", .v = verbose) + tx_dt <- arrow::read_parquet( + file = path_list$tx_path[[1]], + as_data_frame = FALSE + ) %>% + dplyr::mutate( + transcript_id = cast(transcript_id, arrow::string())) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::mutate( + feature_name = cast(feature_name, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() + data.table::setnames( + x = tx_dt, + old = c("feature_name", "x_location", "y_location"), + new = c("feat_ID", "x", "y") + ) + vmsg("Loading boundary info...", .v = verbose) + bound_dt_list <- lapply(path_list$bound_paths, function(x) { + arrow::read_parquet(file = x[[1]], as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() + }) + } + # **** aggregate info **** + if (data_to_use == "aggregate") { + vmsg("Loading cell metadata...", .v = verbose) + cell_meta <- arrow::read_parquet( + file = path_list$cell_meta_path[[1]], + as_data_frame = FALSE + ) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() + + # NOTE: no parquet for agg_expr. + vmsg("Loading aggregated expression...", .v = verbose) + if (isTRUE(h5_expression)) { + agg_expr <- get10Xmatrix_h5( + path_to_data = path_list$agg_expr_path, + gene_ids = h5_gene_ids, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } else { + agg_expr <- get10Xmatrix( + path_to_data = path_list$agg_expr_path, + gene_column_index = gene_column_index, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } + } + + data_list <- list( + "feat_meta" = feat_meta, + "tx_dt" = tx_dt, + "bound_dt_list" = bound_dt_list, + "cell_meta" = cell_meta, + "agg_expr" = agg_expr + ) + + return(data_list) +} + + + +.load_xenium_panel_json <- function(path, gene_ids = "symbols") { + gene_ids <- match.arg(gene_ids, c("symbols", "ensembl")) + + # tested on v1.6 + j <- jsonlite::fromJSON(path) + # j$metadata # dataset meta + # j$payload # main content + # j$payload$chemistry # panel chemistry used + # j$payload$customer # panel customer + # j$payload$designer # panel designer + # j$payload$spec_version # versioning + # j$payload$panel # dataset panel stats + + panel_info <- j$payload$targets$type %>% + data.table::as.data.table() + + switch(gene_ids, + "symbols" = data.table::setnames( + panel_info, + old = c("data.id", "data.name", "descriptor"), + new = c("ensembl", "feat_ID", "type") + ), + "ensembl" = data.table::setnames( + panel_info, + old = c("data.id", "data.name", "descriptor"), + new = c("feat_ID", "symbol", "type") + ) + ) + return(panel_info) +} + + +# OLD #### + + + + +#' @title Create 10x Xenium Giotto Object +#' @name createGiottoXeniumObject +#' @description Given the path to a Xenium experiment output folder, creates a +#' Giotto object +#' @param xenium_dir full path to the exported xenium directory +#' @param data_to_use which type(s) of expression data to build the gobject with +#' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') +#' @param load_format files formats from which to load the data. Either `csv` or +#' `parquet` currently supported. +#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 +#' file. Default is \code{TRUE} +#' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene +#' expression matrix +#' @param bounds_to_load vector of boundary information to load +#' (e.g. \code{'cell'} +#' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both +#' at the same time.) +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' as a subcellular transcript detection (default = 20) +#' @param key_list (advanced) list of grep-based keywords to split the +#' subcellular feature detections by feature type. See details +#' @inheritParams get10Xmatrix +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns giotto object +#' @details +#' +#' [\strong{QC feature types}] +#' Xenium provides info on feature detections that include more than only the +#' Gene Expression specific probes. Additional probes for QC are included: +#' \emph{blank codeword}, \emph{negative control codeword}, and +#' \emph{negative control probe}. These additional QC probes each occupy and +#' are treated as their own feature types so that they can largely remain +#' independent of the gene expression information. +#' +#' [\strong{key_list}] +#' Related to \code{data_to_use = 'subcellular'} workflow only: +#' Additional QC probe information is in the subcellular feature detections +#' information and must be separated from the gene expression information +#' during processing. +#' The QC probes have prefixes that allow them to be selected from the rest of +#' the feature IDs. +#' Giotto uses a named list of keywords (\code{key_list}) to select these QC +#' probes, with the list names being the names that will be assigned as the +#' feature type of these feature detections. The default list is used when +#' \code{key_list} = NULL. +#' +#' Default list: +#' \preformatted{ +#' list(blank_code = 'BLANK_', +#' neg_code = 'NegControlCodeword_', +#' neg_probe = c('NegControlProbe_|antisense_')) +#' } +#' +#' The Gene expression subset is accepted as the subset of feat_IDs that do not +#' map to any of the keys. +#' +#' @export +createGiottoXeniumObject <- function(xenium_dir, + data_to_use = c("subcellular", "aggregate"), + load_format = "csv", + h5_expression = TRUE, + h5_gene_ids = c("symbols", "ensembl"), + gene_column_index = 1, + bounds_to_load = c("cell"), + qv_threshold = 20, + key_list = NULL, + instructions = NULL, + cores = NA, + verbose = TRUE) { + # 0. setup + xenium_dir <- path.expand(xenium_dir) + + # Determine data to load + data_to_use <- match.arg( + arg = data_to_use, choices = c("subcellular", "aggregate")) + + # Determine load formats + load_format <- "csv" # TODO Remove this and add as param once other options + # are available + load_format <- match.arg( + arg = load_format, choices = c("csv", "parquet", "zarr")) + + # set number of cores automatically, but with limit of 10 + cores <- determine_cores(cores) + data.table::setDTthreads(threads = cores) + + # 1. detect xenium folder and find filepaths to load + + # path_list contents: + # tx_path + # bound_paths + # cell_meta_path + # agg_expr_path + # panel_meta_path + path_list <- .read_xenium_folder( + xenium_dir = xenium_dir, + data_to_use = data_to_use, + bounds_to_load = bounds_to_load, + load_format = load_format, + h5_expression = h5_expression, + verbose = verbose + ) + + + # 2. load in data + + # data_list contents: + # feat_meta + # tx_dt + # bound_dt_list + # cell_meta + # agg_expr + data_list <- .load_xenium_folder( + path_list = path_list, + load_format = load_format, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ) + + + # TODO load images + + + # 3. Create giotto objects + + if (data_to_use == "subcellular") { + # ** feat type search keys ** + if (is.null(key_list)) { + key_list <- list( + blank_code = "BLANK_", + neg_code = "NegControlCodeword_", + neg_probe = c("NegControlProbe_|antisense_") + ) + } + + # needed: + # feat_meta + # tx_dt + # bound_dt_list + xenium_gobject <- .createGiottoXeniumObject_subcellular( + data_list = data_list, + qv_threshold = qv_threshold, + key_list = key_list, + instructions = instructions, + cores = cores, + verbose = verbose + ) + } + + if (data_to_use == "aggregate") { + # needed: + # feat_meta + # cell_meta + # agg_expr + # optional? + # tx_dt + # bound_dt_list + xenium_gobject <- .createGiottoXeniumObject_aggregate( + data_list = data_list, + instructions = instructions, + cores = cores, + verbose = verbose + ) + } + + return(xenium_gobject) +} + + + + +#' @title Create a Xenium Giotto object from subcellular info +#' @name .createGiottoXeniumObject_subcellular +#' @description Subcellular workflow for createGiottoXeniumObject +#' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} +#' @param key_list regex-based search keys for feature IDs to allow separation +#' into separate giottoPoints objects by feat_type +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' as a subcellular transcript detection (default = 20) +#' @inheritParams get10Xmatrix +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns giotto object +#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate +#' @keywords internal +.createGiottoXeniumObject_subcellular <- function(data_list, + key_list = NULL, + qv_threshold = 20, + instructions = NULL, + cores = NA, + verbose = TRUE) { + # data.table vars + qv <- NULL + + # Unpack data_list info + feat_meta <- data_list$feat_meta + tx_dt <- data_list$tx_dt + bound_dt_list <- data_list$bound_dt_list + + # define for data.table + cell_id <- feat_ID <- feature_name <- NULL + + vmsg("Building subcellular giotto object...", .v = verbose) + # Giotto points object + vmsg("> points data prep...", .v = verbose) + + # filter by qv_threshold + vmsg("> filtering feature detections for Phred score >= ", + qv_threshold, .v = verbose) + n_before <- tx_dt[, .N] + tx_dt_filtered <- tx_dt[qv >= qv_threshold] + n_after <- tx_dt_filtered[, .N] + + if (verbose) { + cat( + "Number of feature points removed: ", + n_before - n_after, + " out of ", n_before, "\n" + ) + } + + vmsg("> splitting detections by feat_type", .v = verbose) + # discover feat_IDs for each feat_type + all_IDs <- tx_dt_filtered[, unique(feat_ID)] + feat_types_IDs <- lapply( + key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) + rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) + feat_types_IDs <- append(rna, feat_types_IDs) + + # separate detections by feature type + points_list <- lapply( + feat_types_IDs, + function(types) { + tx_dt_filtered[feat_ID %in% types] + } + ) + + # Giotto polygons object + vmsg("> polygons data prep...", .v = verbose) + polys_list <- lapply( + bound_dt_list, + function(bound_type) { + bound_type[, cell_id := as.character(cell_id)] + } + ) + + xenium_gobject <- createGiottoObjectSubcellular( + gpoints = points_list, + gpolygons = polys_list, + instructions = instructions, + cores = cores, + verbose = verbose + ) + + # generate centroids + vmsg("Calculating polygon centroids...", .v = verbose) + xenium_gobject <- addSpatialCentroidLocations( + xenium_gobject, + poly_info = c(names(bound_dt_list)), + provenance = as.list(names(bound_dt_list)) + ) + + return(xenium_gobject) +} + + + + + +#' @title Create a Xenium Giotto object from aggregate info +#' @name .createGiottoXeniumObject_aggregate +#' @description Aggregate workflow for createGiottoXeniumObject +#' @param data_list list of data loaded by \code{.load_xenium_folder} +#' @inheritParams get10Xmatrix +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns giotto object +#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular +#' @keywords internal +.createGiottoXeniumObject_aggregate <- function(data_list, + # include_analysis = FALSE, + instructions = NULL, + cores = NA, + verbose = TRUE) { + # Unpack data_list info + feat_meta <- data_list$feat_meta + cell_meta <- data_list$cell_meta + agg_expr <- data_list$agg_expr + + # define for data.table + cell_ID <- x_centroid <- y_centroid <- NULL + + # clean up names for aggregate matrices + names(agg_expr) <- gsub(pattern = " ", replacement = "_", names(agg_expr)) + geneExpMat <- which(names(agg_expr) == "Gene_Expression") + names(agg_expr)[[geneExpMat]] <- "raw" + + # set cell_id as character + cell_meta <- cell_meta[, data.table::setnames(.SD, "cell_id", "cell_ID")] + cell_meta <- cell_meta[, cell_ID := as.character(cell_ID)] + + # set up spatial locations + agg_spatlocs <- cell_meta[, .(x_centroid, y_centroid, cell_ID)] + + # set up metadata + agg_meta <- cell_meta[, !c("x_centroid", "y_centroid")] + + vmsg("Building aggregate giotto object...", .v = verbose) + xenium_gobject <- createGiottoObject( + expression = agg_expr, + spatial_locs = agg_spatlocs, + instructions = instructions, + cores = cores, + verbose = verbose + ) + + # append aggregate metadata + xenium_gobject <- addCellMetadata( + gobject = xenium_gobject, + new_metadata = agg_meta, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + xenium_gobject <- addFeatMetadata( + gobject = xenium_gobject, + new_metadata = feat_meta, + by_column = TRUE, + column_feat_ID = "feat_ID" + ) + + return(xenium_gobject) +} + + + + +#' @title Read a structured xenium folder +#' @name .read_xenium_folder +#' @inheritParams createGiottoXeniumObject +#' @keywords internal +#' @returns path_list a list of xenium files discovered and their filepaths. NULL +#' values denote missing items +.read_xenium_folder <- function(xenium_dir, + data_to_use = "subcellular", + bounds_to_load = c("cell"), + load_format = "csv", + h5_expression = FALSE, + verbose = TRUE) { + # Check needed packages + if (load_format == "parquet") { + package_check(pkg_name = "arrow", repository = "CRAN") + package_check(pkg_name = "dplyr", repository = "CRAN") + } + if (isTRUE(h5_expression)) { + package_check(pkg_name = "hdf5r", repository = "CRAN") + } + + ch <- box_chars() + + + # 0. test if folder structure exists and is as expected + + + if (is.null(xenium_dir) | !dir.exists(xenium_dir)) + stop("The full path to a xenium directory must be given.") + vmsg("A structured Xenium directory will be used\n", .v = verbose) + + # find items (length = 1 if present, length = 0 if missing) + dir_items <- list( + `analysis info` = "*analysis*", + `boundary info` = "*bound*", + `cell feature matrix` = "*cell_feature_matrix*", + `cell metadata` = "*cells*", + `image info` = "*tif", + `panel metadata` = "*panel*", + `raw transcript info` = "*transcripts*", + `experiment info (.xenium)` = "*.xenium" + ) + + dir_items <- lapply( + dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x))) + dir_items_lengths <- lengths(dir_items) + + if (isTRUE(verbose)) { + message("Checking directory contents...") + for (item in names(dir_items)) { + # IF ITEM FOUND + + if (dir_items_lengths[[item]] > 0) { + message(ch$s, "> ", item, " found") + for (item_i in seq_along(dir_items[[item]])) { + # print found item names + subItem <- gsub(pattern = ".*/", replacement = "", + x = dir_items[[item]][[item_i]]) + message(ch$s, ch$s, ch$l, ch$h, ch$h, subItem) + } + } else { + # IF ITEM MISSING + # Based on workflow, determine if: + # necessary (error) + # optional (warning) + + if (data_to_use == "subcellular") { + # necessary items + if (item %in% c("boundary info", "raw transcript info")) + stop(item, " is missing") + # optional items + if (item %in% c( + "image info", "experiment info (.xenium)", + "panel metadata")) + warning(item, " is missing (optional)") + # items to ignore: analysis info, cell feature matrix, + # cell metadata + } else if (data_to_use == "aggregate") { + # necessary items + if (item %in% c("cell feature matrix", "cell metadata")) + stop(item, " is missing") + # optional items + if (item %in% c( + "image info", "experiment info (.xenium)", + "panel metadata", "analysis info")) + warning(item, " is missing (optional)") + # items to ignore: boundary info, raw transcript info + } + } + } + } + + + # 1. Select data to load + + + # **** transcript info **** + tx_path <- NULL + tx_path <- dir_items$`raw transcript info`[grepl( + pattern = load_format, dir_items$`raw transcript info`)] + # **** cell metadata **** + cell_meta_path <- NULL + cell_meta_path <- dir_items$`cell metadata`[grepl( + pattern = load_format, dir_items$`cell metadata`)] + + # **** boundary info **** + # Select bound load format + if (load_format != "zarr") { # No zarr available for boundary info + dir_items$`boundary info` <- dir_items$`boundary info`[grepl( + pattern = load_format, dir_items$`boundary info`)] + } else { + dir_items$`boundary info` <- dir_items$`boundary info`[grepl( + pattern = "csv", dir_items$`boundary info`)] + } + + # Organize bound paths by type of bound (bounds_to_load param) + bound_paths <- NULL + bound_names <- bounds_to_load + bounds_to_load <- as.list(bounds_to_load) + bound_paths <- lapply(bounds_to_load, function(x) dir_items$`boundary info`[ + grepl(pattern = x, dir_items$`boundary info`)]) + names(bound_paths) <- bound_names + + # **** aggregated expression info **** + agg_expr_path <- NULL + if (isTRUE(h5_expression)) { # h5 expression matrix loading is default + agg_expr_path <- dir_items$`cell feature matrix`[grepl( + pattern = "h5", dir_items$`cell feature matrix`)] + } else if (load_format == "zarr") { + agg_expr_path <- dir_items$`cell feature matrix`[grepl( + pattern = "zarr", dir_items$`cell feature matrix`)] + } else { # No parquet for aggregated expression - default to normal 10x loading + agg_expr_path <- dir_items$`cell feature matrix`[sapply( + dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x))] + if (length(agg_expr_path) == 0) { + stop(wrap_txt( + "Expression matrix cannot be loaded.\n + Has cell_feature_matrix(.tar.gz) been unpacked into a + directory?" + )) + } + } + if (data_to_use == "aggregate") { + if (length(path_list$agg_expr_path) == 0) { + stop(wrap_txt( + "Aggregated expression not found.\n + Please confirm h5_expression and load_format params are correct" + )) + } + } + + # **** panel info **** + panel_meta_path <- NULL + panel_meta_path <- dir_items$`panel metadata` + + + vmsg("Directory check done", .v = verbose) + + path_list <- list( + "tx_path" = tx_path, + "bound_paths" = bound_paths, + "cell_meta_path" = cell_meta_path, + "agg_expr_path" = agg_expr_path, + "panel_meta_path" = panel_meta_path + ) + + return(path_list) +} + + diff --git a/man/addVisiumPolygons.Rd b/man/addVisiumPolygons.Rd index 96dff572c..7960acc7c 100644 --- a/man/addVisiumPolygons.Rd +++ b/man/addVisiumPolygons.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{addVisiumPolygons} \alias{addVisiumPolygons} \title{Add Visium Polygons to Giotto Object} diff --git a/man/createArchRProj.Rd b/man/createArchRProj.Rd index b43e48c6f..502a4aa49 100644 --- a/man/createArchRProj.Rd +++ b/man/createArchRProj.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createArchRProj} \alias{createArchRProj} \title{Create an ArchR project and run LSI dimension reduction} @@ -25,19 +25,19 @@ These files can be in one of the following formats: (i) scATAC tabix files, \item{genome}{A string indicating the default genome to be used for all ArchR functions. Currently supported values include "hg19","hg38","mm9", and "mm10". -This value is stored as a global environment variable, not part of the +This value is stored as a global environment variable, not part of the ArchRProject. This can be overwritten on a per-function basis using the given function's geneAnnotationand genomeAnnotation parameter. For something other than one of -the currently supported, see createGeneAnnnotation() and +the currently supported, see createGeneAnnnotation() and createGenomeAnnnotation()} -\item{createArrowFiles_params}{list of parameters passed to +\item{createArrowFiles_params}{list of parameters passed to `ArchR::createArrowFiles`} \item{ArchRProject_params}{list of parameters passed to `ArchR::ArchRProject`} -\item{addIterativeLSI_params}{list of parameters passed to +\item{addIterativeLSI_params}{list of parameters passed to `ArchR::addIterativeLSI`} \item{threads}{number of threads to use. Default = `ArchR::getArchRThreads()`} @@ -47,7 +47,7 @@ createGenomeAnnnotation()} \item{verbose}{Default = TRUE} } \value{ -An ArchR project with GeneScoreMatrix, TileMatrix, and +An ArchR project with GeneScoreMatrix, TileMatrix, and TileMatrix-based LSI } \description{ diff --git a/man/createGiottoCosMxObject.Rd b/man/createGiottoCosMxObject.Rd index 5343dde5a..24be2efaf 100644 --- a/man/createGiottoCosMxObject.Rd +++ b/man/createGiottoCosMxObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{createGiottoCosMxObject} \alias{createGiottoCosMxObject} \title{Create Nanostring CosMx Giotto Object} @@ -20,11 +20,11 @@ createGiottoCosMxObject( \item{cosmx_dir}{full path to the exported cosmx directory} \item{data_to_use}{which type(s) of expression data to build the gobject with -Default is \code{'all'} information available. \code{'subcellular'} loads -the transcript coordinates only. \code{'aggregate'} loads the provided +Default is \code{'all'} information available. \code{'subcellular'} loads +the transcript coordinates only. \code{'aggregate'} loads the provided aggregated expression matrix.} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} @@ -49,9 +49,9 @@ Given the path to a CosMx experiment directory, creates a Giotto object. } \details{ -[\strong{Expected Directory}] This function generates a giotto object when -given a link to a cosmx output directory. It expects the following items -within the directory where the \strong{bolded} portions are what this +[\strong{Expected Directory}] This function generates a giotto object when +given a link to a cosmx output directory. It expects the following items +within the directory where the \strong{bolded} portions are what this function matches against: \itemize{ \item{\strong{CellComposite} (folder of images)} @@ -66,23 +66,23 @@ function matches against: [\strong{Workflows}] Workflow to use is accessed through the data_to_use param \itemize{ - \item{'all' - loads and requires subcellular information from tx_file and + \item{'all' - loads and requires subcellular information from tx_file and fov_positions_file - and also the existing aggregated information + and also the existing aggregated information (expression, spatial locations, and metadata) from exprMat_file and metadata_file.} - \item{'subcellular' - loads and requires subcellular information from + \item{'subcellular' - loads and requires subcellular information from tx_file and fov_positions_file only.} - \item{'aggregate' - loads and requires the existing aggregate information - (expression, spatial locations, and metadata) from exprMat_file and + \item{'aggregate' - loads and requires the existing aggregate information + (expression, spatial locations, and metadata) from exprMat_file and metadata_file.} } -[\strong{Images}] Images in the default CellComposite, CellLabels, +[\strong{Images}] Images in the default CellComposite, CellLabels, CompartmentLabels, and CellOverlay -folders will be loaded as giotto largeImage objects in all workflows as -long as they are available. Additionally, CellComposite images will be +folders will be loaded as giotto largeImage objects in all workflows as +long as they are available. Additionally, CellComposite images will be converted to giotto image objects, making plotting with these image objects more responsive when accessing them from a server. \code{\link{showGiottoImageNames}} can be used to see the available images. diff --git a/man/createGiottoMerscopeObject.Rd b/man/createGiottoMerscopeObject.Rd index d93a7caa5..0daf8027f 100644 --- a/man/createGiottoMerscopeObject.Rd +++ b/man/createGiottoMerscopeObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createGiottoMerscopeObject} \alias{createGiottoMerscopeObject} \alias{.createGiottoMerscopeObject_subcellular} @@ -37,10 +37,10 @@ createGiottoMerscopeObject( \arguments{ \item{merscope_dir}{full path to the exported merscope directory} -\item{data_to_use}{which of either the 'subcellular' or 'aggregate' +\item{data_to_use}{which of either the 'subcellular' or 'aggregate' information to use for object creation} -\item{FOVs}{which FOVs to use when building the subcellular object. +\item{FOVs}{which FOVs to use when building the subcellular object. (default is NULL) NULL loads all FOVs (very slow)} @@ -66,13 +66,13 @@ provided} a giotto object } \description{ -Given the path to a MERSCOPE experiment directory, creates a +Given the path to a MERSCOPE experiment directory, creates a Giotto object. } \details{ -[\strong{Expected Directory}] This function generates a giotto object when -given a link to a MERSCOPE output directory. It expects the following items -within the directory where the \strong{bolded} portions are what this +[\strong{Expected Directory}] This function generates a giotto object when +given a link to a MERSCOPE output directory. It expects the following items +within the directory where the \strong{bolded} portions are what this function matches against: \itemize{ \item{\strong{cell_boundaries} (folder .hdf5 files)} @@ -84,10 +84,10 @@ function matches against: } \section{Functions}{ \itemize{ -\item \code{.createGiottoMerscopeObject_subcellular()}: Create giotto object with +\item \code{.createGiottoMerscopeObject_subcellular()}: Create giotto object with 'subcellular' workflow -\item \code{.createGiottoMerscopeObject_aggregate()}: Create giotto object with 'aggregate' +\item \code{.createGiottoMerscopeObject_aggregate()}: Create giotto object with 'aggregate' workflow }} diff --git a/man/createGiottoObjectfromArchR.Rd b/man/createGiottoObjectfromArchR.Rd index 35c8db106..c7aa2e53d 100644 --- a/man/createGiottoObjectfromArchR.Rd +++ b/man/createGiottoObjectfromArchR.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createGiottoObjectfromArchR} \alias{createGiottoObjectfromArchR} \title{Create a Giotto object from an ArchR project} @@ -20,10 +20,10 @@ createGiottoObjectfromArchR( \item{expression_feat}{Giotto object available features (e.g. atac, rna, ...)} -\item{spatial_locs}{data.table or data.frame with coordinates for cell +\item{spatial_locs}{data.table or data.frame with coordinates for cell centroids} -\item{sampleNames}{A character vector containing the ArchR project sample +\item{sampleNames}{A character vector containing the ArchR project sample name} \item{...}{additional arguments passed to `createGiottoObject`} diff --git a/man/createGiottoVisiumObject.Rd b/man/createGiottoVisiumObject.Rd index 6c7c17fae..ec6db99e6 100644 --- a/man/createGiottoVisiumObject.Rd +++ b/man/createGiottoVisiumObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createGiottoVisiumObject} \alias{createGiottoVisiumObject} \title{Create a giotto object from 10x visium data} @@ -39,7 +39,7 @@ createGiottoVisiumObject( \item{h5_tissue_positions_path}{path to tissue locations (.csv file)} -\item{h5_image_png_path}{path to tissue .png file (optional). Image +\item{h5_image_png_path}{path to tissue .png file (optional). Image autoscaling looks for matches in the filename for either 'hires' or 'lowres'} \item{h5_json_scalefactors_path}{path to .json scalefactors (optional)} @@ -56,15 +56,15 @@ autoscaling looks for matches in the filename for either 'hires' or 'lowres'} \item{ymin_adj}{deprecated} -\item{instructions}{list of instructions or output result from +\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{expression_matrix_class}{class of expression matrix to use +\item{expression_matrix_class}{class of expression matrix to use (e.g. 'dgCMatrix', 'DelayedArray')} \item{h5_file}{optional path to create an on-disk h5 file} -\item{cores}{how many cores or threads to use to read data if paths are +\item{cores}{how many cores or threads to use to read data if paths are provided} \item{verbose}{be verbose} @@ -73,7 +73,7 @@ provided} giotto object } \description{ -Create Giotto object directly from a 10X visium folder. Also +Create Giotto object directly from a 10X visium folder. Also accepts visium H5 outputs. } \details{ diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 0fddd0694..b5d7a1e34 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{createGiottoXeniumObject} \alias{createGiottoXeniumObject} \title{Create 10x Xenium Giotto Object} @@ -28,7 +28,7 @@ createGiottoXeniumObject( \item{load_format}{files formats from which to load the data. Either `csv` or `parquet` currently supported.} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{h5_gene_ids}{use gene symbols (default) or ensembl ids for the .h5 gene @@ -37,15 +37,15 @@ expression matrix} \item{gene_column_index}{which column from the features or genes .tsv file to use for row ids} -\item{bounds_to_load}{vector of boundary information to load +\item{bounds_to_load}{vector of boundary information to load (e.g. \code{'cell'} or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both at the same time.)} -\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included +\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} -\item{key_list}{(advanced) list of grep-based keywords to split the +\item{key_list}{(advanced) list of grep-based keywords to split the subcellular feature detections by feature type. See details} \item{instructions}{list of instructions or output result @@ -60,7 +60,7 @@ provided} giotto object } \description{ -Given the path to a Xenium experiment output folder, creates a +Given the path to a Xenium experiment output folder, creates a Giotto object } \details{ @@ -68,20 +68,20 @@ Giotto object Xenium provides info on feature detections that include more than only the Gene Expression specific probes. Additional probes for QC are included: \emph{blank codeword}, \emph{negative control codeword}, and -\emph{negative control probe}. These additional QC probes each occupy and -are treated as their own feature types so that they can largely remain +\emph{negative control probe}. These additional QC probes each occupy and +are treated as their own feature types so that they can largely remain independent of the gene expression information. [\strong{key_list}] Related to \code{data_to_use = 'subcellular'} workflow only: -Additional QC probe information is in the subcellular feature detections -information and must be separated from the gene expression information +Additional QC probe information is in the subcellular feature detections +information and must be separated from the gene expression information during processing. -The QC probes have prefixes that allow them to be selected from the rest of +The QC probes have prefixes that allow them to be selected from the rest of the feature IDs. -Giotto uses a named list of keywords (\code{key_list}) to select these QC -probes, with the list names being the names that will be assigned as the -feature type of these feature detections. The default list is used when +Giotto uses a named list of keywords (\code{key_list}) to select these QC +probes, with the list names being the names that will be assigned as the +feature type of these feature detections. The default list is used when \code{key_list} = NULL. Default list: diff --git a/man/createMerscopeLargeImage.Rd b/man/createMerscopeLargeImage.Rd index 8f0aedfba..f1a5c51dc 100644 --- a/man/createMerscopeLargeImage.Rd +++ b/man/createMerscopeLargeImage.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createMerscopeLargeImage} \alias{createMerscopeLargeImage} \title{Create Vizgen MERSCOPE largeImage} diff --git a/man/createSpatialGenomicsObject.Rd b/man/createSpatialGenomicsObject.Rd index 1571bcf4b..8b3ec8e37 100644 --- a/man/createSpatialGenomicsObject.Rd +++ b/man/createSpatialGenomicsObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createSpatialGenomicsObject} \alias{createSpatialGenomicsObject} \title{Create Spatial Genomics Giotto Object} @@ -9,7 +9,7 @@ createSpatialGenomicsObject(sg_dir = NULL, instructions = NULL) \arguments{ \item{sg_dir}{full path to the exported Spatial Genomics directory} -\item{instructions}{new instructions +\item{instructions}{new instructions (e.g. result from createGiottoInstructions)} } \value{ diff --git a/man/dot-cosmx_infer_fov_shifts.Rd b/man/dot-cosmx_infer_fov_shifts.Rd index 1a1be8809..8d5b70930 100644 --- a/man/dot-cosmx_infer_fov_shifts.Rd +++ b/man/dot-cosmx_infer_fov_shifts.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.cosmx_infer_fov_shifts} \alias{.cosmx_infer_fov_shifts} \title{Infer CosMx local to global shifts} diff --git a/man/dot-createGiottoCosMxObject_aggregate.Rd b/man/dot-createGiottoCosMxObject_aggregate.Rd index f85481fc6..8dcda4a9f 100644 --- a/man/dot-createGiottoCosMxObject_aggregate.Rd +++ b/man/dot-createGiottoCosMxObject_aggregate.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.createGiottoCosMxObject_aggregate} \alias{.createGiottoCosMxObject_aggregate} \title{Load and create a CosMx Giotto object from aggregate info} diff --git a/man/dot-createGiottoCosMxObject_all.Rd b/man/dot-createGiottoCosMxObject_all.Rd index 44e70f5d7..48a9caf16 100644 --- a/man/dot-createGiottoCosMxObject_all.Rd +++ b/man/dot-createGiottoCosMxObject_all.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.createGiottoCosMxObject_all} \alias{.createGiottoCosMxObject_all} -\title{Load and create a CosMx Giotto object from subcellular and aggregate +\title{Load and create a CosMx Giotto object from subcellular and aggregate info} \usage{ .createGiottoCosMxObject_all( @@ -22,7 +22,7 @@ info} \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} @@ -41,13 +41,13 @@ from \code{\link[GiottoClass]{createGiottoInstructions}}} giotto object } \description{ -Load and create a CosMx Giotto object from subcellular and aggregate +Load and create a CosMx Giotto object from subcellular and aggregate info } \details{ -Both \emph{subcellular} +Both \emph{subcellular} (subellular transcript detection information) and -\emph{aggregate} (aggregated detection count matrices by cell polygon from +\emph{aggregate} (aggregated detection count matrices by cell polygon from NanoString) data will be loaded in. The two will be separated into 'cell' and 'cell_agg' spatial units in order to denote the difference in origin of the two. diff --git a/man/dot-createGiottoCosMxObject_subcellular.Rd b/man/dot-createGiottoCosMxObject_subcellular.Rd index 17d07ada9..d0c315606 100644 --- a/man/dot-createGiottoCosMxObject_subcellular.Rd +++ b/man/dot-createGiottoCosMxObject_subcellular.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.createGiottoCosMxObject_subcellular} \alias{.createGiottoCosMxObject_subcellular} \title{Load and create a CosMx Giotto object from subcellular info} @@ -18,7 +18,7 @@ \arguments{ \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} diff --git a/man/dot-createGiottoXeniumObject_aggregate.Rd b/man/dot-createGiottoXeniumObject_aggregate.Rd index 49a348646..5baa80496 100644 --- a/man/dot-createGiottoXeniumObject_aggregate.Rd +++ b/man/dot-createGiottoXeniumObject_aggregate.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{.createGiottoXeniumObject_aggregate} \alias{.createGiottoXeniumObject_aggregate} \title{Create a Xenium Giotto object from aggregate info} diff --git a/man/dot-createGiottoXeniumObject_subcellular.Rd b/man/dot-createGiottoXeniumObject_subcellular.Rd index 75013fe11..b7e564a92 100644 --- a/man/dot-createGiottoXeniumObject_subcellular.Rd +++ b/man/dot-createGiottoXeniumObject_subcellular.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{.createGiottoXeniumObject_subcellular} \alias{.createGiottoXeniumObject_subcellular} \title{Create a Xenium Giotto object from subcellular info} @@ -19,7 +19,7 @@ \item{key_list}{regex-based search keys for feature IDs to allow separation into separate giottoPoints objects by feat_type} -\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included +\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} \item{instructions}{list of instructions or output result diff --git a/man/dot-load_cosmx_folder_aggregate.Rd b/man/dot-load_cosmx_folder_aggregate.Rd index b5bf8b11d..0d837368a 100644 --- a/man/dot-load_cosmx_folder_aggregate.Rd +++ b/man/dot-load_cosmx_folder_aggregate.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.load_cosmx_folder_aggregate} \alias{.load_cosmx_folder_aggregate} \title{Load CosMx folder aggregate info} diff --git a/man/dot-load_cosmx_folder_subcellular.Rd b/man/dot-load_cosmx_folder_subcellular.Rd index 3f70253c6..d218f1045 100644 --- a/man/dot-load_cosmx_folder_subcellular.Rd +++ b/man/dot-load_cosmx_folder_subcellular.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.load_cosmx_folder_subcellular} \alias{.load_cosmx_folder_subcellular} \title{Load CosMx folder subcellular info} @@ -19,7 +19,7 @@ list } \description{ loads in the feature detections information. Note that the mask -images are still required for a working subcellular object, and those are +images are still required for a working subcellular object, and those are loaded in \code{\link{.createGiottoCosMxObject_subcellular}} } \keyword{internal} diff --git a/man/dot-read_cosmx_folder.Rd b/man/dot-read_cosmx_folder.Rd index dd6fabace..a5541c896 100644 --- a/man/dot-read_cosmx_folder.Rd +++ b/man/dot-read_cosmx_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.read_cosmx_folder} \alias{.read_cosmx_folder} \title{Read a structured CosMx folder} diff --git a/man/dot-read_xenium_folder.Rd b/man/dot-read_xenium_folder.Rd index 255328100..f0e5dfda3 100644 --- a/man/dot-read_xenium_folder.Rd +++ b/man/dot-read_xenium_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{.read_xenium_folder} \alias{.read_xenium_folder} \title{Read a structured xenium folder} @@ -19,7 +19,7 @@ \item{data_to_use}{which type(s) of expression data to build the gobject with (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all')} -\item{bounds_to_load}{vector of boundary information to load +\item{bounds_to_load}{vector of boundary information to load (e.g. \code{'cell'} or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both at the same time.)} @@ -27,7 +27,7 @@ at the same time.)} \item{load_format}{files formats from which to load the data. Either `csv` or `parquet` currently supported.} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{verbose}{be verbose when building Giotto object} diff --git a/man/dot-visium_read_scalefactors.Rd b/man/dot-visium_read_scalefactors.Rd index eceab7c22..49209ecbf 100644 --- a/man/dot-visium_read_scalefactors.Rd +++ b/man/dot-visium_read_scalefactors.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{.visium_read_scalefactors} \alias{.visium_read_scalefactors} \title{Read Visium ScaleFactors} diff --git a/man/dot-visium_spot_poly.Rd b/man/dot-visium_spot_poly.Rd index f66977691..cc59b8c5a 100644 --- a/man/dot-visium_spot_poly.Rd +++ b/man/dot-visium_spot_poly.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{.visium_spot_poly} \alias{.visium_spot_poly} \title{Create Polygons for Visium Data} diff --git a/man/importCosMx.Rd b/man/importCosMx.Rd index adf975b7c..11c0c2eb6 100644 --- a/man/importCosMx.Rd +++ b/man/importCosMx.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/classes.R +% Please edit documentation in R/convenience_cosmx.R \name{importCosMx} \alias{importCosMx} \title{Import a Nanostring CosMx Assay} diff --git a/man/load_merscope_folder.Rd b/man/load_merscope_folder.Rd index d796bfa5b..f187f244a 100644 --- a/man/load_merscope_folder.Rd +++ b/man/load_merscope_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{load_merscope_folder} \alias{load_merscope_folder} \alias{.load_merscope_folder} @@ -33,10 +33,10 @@ ) } \arguments{ -\item{dir_items}{list of full filepaths from +\item{dir_items}{list of full filepaths from \code{\link{.read_merscope_folder}}} -\item{data_to_use}{which of either the 'subcellular' or 'aggregate' +\item{data_to_use}{which of either the 'subcellular' or 'aggregate' information to use for object creation} \item{cores}{how many cores or threads to use to read data if paths are diff --git a/man/load_xenium_folder.Rd b/man/load_xenium_folder.Rd index a6c07895d..ccff86d21 100644 --- a/man/load_xenium_folder.Rd +++ b/man/load_xenium_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{load_xenium_folder} \alias{load_xenium_folder} \alias{.load_xenium_folder} @@ -47,7 +47,7 @@ \item{data_to_use}{which type(s) of expression data to build the gobject with (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all')} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{h5_gene_ids}{use gene symbols (default) or ensembl ids for the .h5 gene diff --git a/man/read_data_folder.Rd b/man/read_data_folder.Rd index d073e2156..3dd678024 100644 --- a/man/read_data_folder.Rd +++ b/man/read_data_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{read_data_folder} \alias{read_data_folder} \alias{.read_data_folder} diff --git a/man/visium_micron_scalefactor.Rd b/man/visium_micron_scalefactor.Rd index 9c9f93949..41c2ac4b0 100644 --- a/man/visium_micron_scalefactor.Rd +++ b/man/visium_micron_scalefactor.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{visium_micron_scalefactor} \alias{visium_micron_scalefactor} \alias{.visium_micron_scale} @@ -8,7 +8,7 @@ .visium_micron_scale(json_scalefactors) } \arguments{ -\item{json_scalefactors}{list of scalefactors from +\item{json_scalefactors}{list of scalefactors from .visium_read_scalefactors()} } \value{ From c741fa6448fa852d270d9c117ef714b2811a7d8b Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 22 May 2024 17:08:13 -0400 Subject: [PATCH 042/150] chore: bump version for dev --- DESCRIPTION | 2 +- NEWS.md | 6 ++++ R/spatial_genes.R | 77 ++--------------------------------------------- 3 files changed, 9 insertions(+), 76 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6f9e00cbb..342a96157 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Giotto Title: Spatial Single-Cell Transcriptomics Toolbox -Version: 4.0.8 +Version: 4.0.9 Authors@R: c( person("Ruben", "Dries", email = "rubendries@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7650-7754")), diff --git a/NEWS.md b/NEWS.md index 44f4f4e10..385e07e7e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,10 @@ +# Giotto 4.0.9 + +## Breaking changes +* Deprecated `detectSpatialCorGenes()` removed. Use `detectSpatialCorFeats()` instead + + # Giotto 4.0.8 (2024/05/22) ## Breaking changes diff --git a/R/spatial_genes.R b/R/spatial_genes.R index 58bb3bfff..7b97c9aa7 100644 --- a/R/spatial_genes.R +++ b/R/spatial_genes.R @@ -542,7 +542,7 @@ NULL } groups <- ceiling(nrow(bin_matrix) / group_size) - cut_groups <- cut(seq_len(nrow(bin_matrix)), breaks = groups, + cut_groups <- cut(seq_len(nrow(bin_matrix)), breaks = groups, labels = seq_len(groups)) if (any(table(cut_groups) == 1)) { stop("With group size = ", group_size, @@ -3001,7 +3001,7 @@ selectPatternGenes <- function(spatPatObj, gene_cor_DT_m[, top_pos_rank := rank(value), by = "variable"] gene_cor_DT_m[, top_neg_rank := rank(-value), by = "variable"] selection <- gene_cor_DT_m[ - top_pos_rank %in% seq_len(top_pos_genes) | + top_pos_rank %in% seq_len(top_pos_genes) | top_neg_rank %in% seq_len(top_neg_genes)] # filter on min correlation @@ -3581,79 +3581,6 @@ detectSpatialCorFeats <- function(gobject, -#' @title detectSpatialCorGenes -#' @name detectSpatialCorGenes -#' @description Detect genes that are spatially correlated -#' @param gobject giotto object -#' @param feat_type feature type -#' @param spat_unit spatial unit -#' @param method method to use for spatial averaging -#' @param expression_values gene expression values to use -#' @param subset_feats subset of feats to use -#' @param subset_genes deprecated, use \code{subset_feats} -#' @param spatial_network_name name of spatial network to use -#' @param network_smoothing smoothing factor beteen 0 and 1 -#' (default: automatic) -#' @param spatial_grid_name name of spatial grid to use -#' @param min_cells_per_grid minimum number of cells to consider a grid -#' @param cor_method correlation method -#' @returns returns a spatial correlation object: "spatCorObject" -#' @details -#' For method = network, it expects a fully connected spatial network. You -#' can make sure to create a -#' fully connected network by setting minimal_k > 0 in the -#' \code{\link{createSpatialNetwork}} function. -#' \itemize{ -#' \item{1. grid-averaging: }{average gene expression values within a -#' predefined spatial grid} -#' \item{2. network-averaging: }{smoothens the gene expression matrix by -#' averaging the expression within one cell -#' by using the neighbours within the predefined spatial network. b is a -#' smoothening factor that defaults to 1 - 1/k, where k is the median -#' number of k-neighbors in the selected spatial network. Setting b = 0 -#' means no smoothing and b = 1 means no contribution -#' from its own expression.} -#' } -#' The spatCorObject can be further explored with showSpatialCorGenes() -#' @seealso \code{\link{showSpatialCorGenes}} -#' @export -detectSpatialCorGenes <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - method = c("grid", "network"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - subset_genes = NULL, - spatial_network_name = "Delaunay_network", - network_smoothing = NULL, - spatial_grid_name = "spatial_grid", - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman")) { - ## deprecated arguments - if (!is.null(subset_genes)) { - subset_feats <- subset_genes - warning("subset_genes is deprecated, use subset_feats in the future") - } - - warning("Deprecated and replaced by detectSpatialCorFeats") - - detectSpatialCorFeats( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - method = method, - expression_values = expression_values, - subset_feats = subset_feats, - spatial_network_name = spatial_network_name, - network_smoothing = network_smoothing, - spatial_grid_name = spatial_grid_name, - min_cells_per_grid = min_cells_per_grid, - cor_method = cor_method - ) -} - - - #' @title showSpatialCorFeats From 8078963eaca44d1ea4428273fc455788496deace Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 22 May 2024 17:38:26 -0400 Subject: [PATCH 043/150] chore: document `detectSpatialCorFeats()` and `detectSpatialCorFeatsMatrix()` together --- NAMESPACE | 1 - R/spatial_genes.R | 268 ++++++++++++++--------------- man/detectSpatialCorFeats.Rd | 71 ++++++-- man/detectSpatialCorFeatsMatrix.Rd | 73 -------- man/detectSpatialCorGenes.Rd | 74 -------- 5 files changed, 183 insertions(+), 304 deletions(-) delete mode 100644 man/detectSpatialCorFeatsMatrix.Rd delete mode 100644 man/detectSpatialCorGenes.Rd diff --git a/NAMESPACE b/NAMESPACE index 02aa123e7..bbe6fefac 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -151,7 +151,6 @@ export(crossSectionPlot) export(crossSectionPlot3D) export(detectSpatialCorFeats) export(detectSpatialCorFeatsMatrix) -export(detectSpatialCorGenes) export(detectSpatialPatterns) export(dimCellPlot) export(dimCellPlot2D) diff --git a/R/spatial_genes.R b/R/spatial_genes.R index 7b97c9aa7..bcfadc3c7 100644 --- a/R/spatial_genes.R +++ b/R/spatial_genes.R @@ -3228,16 +3228,25 @@ do_spatial_grid_averaging <- function(expression_matrix, -#' @title detectSpatialCorFeatsMatrix -#' @name detectSpatialCorFeatsMatrix -#' @description Detect genes that are spatially correlated +#' @title Detect spatially correlated features +#' @name detectSpatialCorFeats +#' @description Detect features that are spatially correlated. Functions for +#' starting from either a gobject (`detectSpatialCorFeats()`) or individual +#' pieces of data (`detectSpatialCorFeatsMatrix()`) are provided. +#' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param expression_values gene expression values to use #' @param expression_matrix provided expression matrix -#' @param method method to use for spatial averaging +#' @param spat_loc_name name for spatial locations +#' @param spatial_locs provided spatial locations +#' @param spatial_network_name name of spatial network to use #' @param spatial_network provided spatial network +#' @param spatial_grid_name name of spatial grid to use #' @param spatial_grid provided spatial grid -#' @param spatial_locs provided spatial locations +#' @param method method to use for spatial averaging #' @param subset_feats subset of features to use -#' @param network_smoothing smoothing factor beteen 0 and 1 +#' @param network_smoothing smoothing factor between 0 and 1 #' (has automatic default, see details) #' @param min_cells_per_grid minimum number of cells to consider a grid #' @param cor_method correlation method @@ -3257,25 +3266,54 @@ do_spatial_grid_averaging <- function(expression_matrix, #' Setting \eqn{b = 0} means no smoothing and \eqn{b = 1} means no #' contribution from its own expression. #' -#' The `spatCorObject` can be further explored with `showSpatialCorGenes()` +#' The `spatCorObject` can be further explored with `showSpatialCorFeats()` #' @seealso \code{\link{showSpatialCorFeats}} #' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' detectSpatialCorFeatsMatrix(expression_matrix = getExpression( -#' g, output = "matrix"), method = "network", -#' spatial_network = getSpatialNetwork(g, output = "networkDT")) +#' # Perform with data in a gobject +#' detectSpatialCorFeats(g, method = "network") +#' +#' # This analysis can also be performed with data outside of the gobject +#' detectSpatialCorFeatsMatrix( +#' expression_matrix = getExpression( +#' g, output = "matrix"), +#' method = "network", +#' spatial_network = getSpatialNetwork(g, output = "networkDT") +#' ) +#' +NULL + + + +#' @rdname detectSpatialCorFeats #' @export -detectSpatialCorFeatsMatrix <- function(expression_matrix, - method = c("grid", "network"), - spatial_network, - spatial_grid, - spatial_locs, - subset_feats = NULL, - network_smoothing = NULL, - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman")) { +detectSpatialCorFeats <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + method = c("grid", "network"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_name = "Delaunay_network", + network_smoothing = NULL, + spatial_grid_name = "spatial_grid", + min_cells_per_grid = 4, + cor_method = c("pearson", "kendall", "spearman") +) { + # set default spat_unit and feat_type + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + ## correlation method to be used cor_method <- match.arg( cor_method, choices = c("pearson", "kendall", "spearman")) @@ -3283,10 +3321,46 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, ## method to be used method <- match.arg(method, choices = c("grid", "network")) + # get expression matrix + values <- match.arg( + expression_values, + unique(c("normalized", "scaled", "custom", expression_values))) + expr_values <- getExpression( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + values = values, + output = "matrix" + ) + + if (!is.null(subset_feats)) { + expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] + } + + + + # get spatial locations + spatial_locs <- getSpatialLocations( + gobject, + spat_unit = spat_unit, + name = spat_loc_name, + output = "data.table", + copy_obj = TRUE + ) + ## spatial averaging or smoothing if (method == "grid") { + # get spatial grid + spatial_grid <- getSpatialGrid( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + name = spatial_grid_name, + return_grid_Obj = FALSE + ) + loc_av_expr_matrix <- do_spatial_grid_averaging( - expression_matrix = as.matrix(expression_matrix), + expression_matrix = as.matrix(expr_values), spatial_grid = spatial_grid, spatial_locs = spatial_locs, subset_feats = subset_feats, @@ -3296,8 +3370,8 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, # data.table variables feat_ID <- variable <- NULL - cor_spat_matrix <- cor_flex(t_flex( - as.matrix(loc_av_expr_matrix)), method = cor_method) + cor_spat_matrix <- cor_flex(t_flex(as.matrix( + loc_av_expr_matrix)), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] cor_spat_DT <- data.table::melt.data.table( @@ -3307,8 +3381,16 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, } if (method == "network") { + # get spatial network + spatial_network <- getSpatialNetwork( + gobject = gobject, + spat_unit = spat_unit, + name = spatial_network_name, + output = "networkDT" + ) + knn_av_expr_matrix <- do_spatial_knn_smoothing( - expression_matrix = as.matrix(expression_matrix), + expression_matrix = as.matrix(expr_values), spatial_network = spatial_network, subset_feats = subset_feats, b = network_smoothing @@ -3316,6 +3398,7 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, + cor_spat_matrix <- cor_flex(t_flex(as.matrix( knn_av_expr_matrix)), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) @@ -3333,13 +3416,6 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, ## 2. perform expression correlation at single-cell level without ## spatial information - - # matrix - expr_values <- expression_matrix - if (!is.null(subset_feats)) { - expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] - } - cor_matrix <- cor_flex(t_flex(expr_values), method = cor_method) cor_matrixDT <- data.table::as.data.table(cor_matrix) cor_matrixDT[, feat_ID := rownames(cor_matrix)] @@ -3357,9 +3433,9 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, doubleDT[, cordiff := spat_cor - expr_cor] # difference in rank scores - doubleDT[, spatrank := data.table::frank( + doubleDT[, spatrank := frank( -spat_cor, ties.method = "first"), by = feat_ID] - doubleDT[, exprrank := data.table::frank( + doubleDT[, exprrank := frank( -expr_cor, ties.method = "first"), by = feat_ID] doubleDT[, rankdiff := spatrank - exprrank] @@ -3373,73 +3449,23 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, cor_clusters = list() ) - class(spatCorObject) <- append(class(spatCorObject), "spatCorObject") + class(spatCorObject) <- append("spatCorObject", class(spatCorObject)) return(spatCorObject) } - -#' @title detectSpatialCorFeats -#' @name detectSpatialCorFeats -#' @description Detect features that are spatially correlated -#' @param gobject giotto object -#' @param spat_unit spatial unit -#' @param feat_type feature type -#' @param spat_loc_name name for spatial locations -#' @param method method to use for spatial averaging -#' @param expression_values gene expression values to use -#' @param subset_feats subset of feats to use -#' @param spatial_network_name name of spatial network to use -#' @param network_smoothing smoothing factor beteen 0 and 1 -#' (default: automatic) -#' @param spatial_grid_name name of spatial grid to use -#' @param min_cells_per_grid minimum number of cells to consider a grid -#' @param cor_method correlation method -#' @returns returns a spatial correlation object: "spatCorObject" -#' @details -#' For method = network, it expects a fully connected spatial network. You -#' can make sure to create a -#' fully connected network by setting minimal_k > 0 in the -#' \code{\link{createSpatialNetwork}} function. -#' \itemize{ -#' \item{1. grid-averaging: }{average gene expression values within a predefined spatial grid} -#' \item{2. network-averaging: }{smoothens the gene expression matrix by averaging the expression within one cell -#' by using the neighbours within the predefined spatial network. b is a smoothening factor -#' that defaults to 1 - 1/k, where k is the median number of k-neighbors in the -#' selected spatial network. Setting b = 0 means no smoothing and b = 1 means no contribution -#' from its own expression.} -#' } -#' The spatCorObject can be further explored with showSpatialCorFeats() -#' @seealso \code{\link{showSpatialCorFeats}} -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' detectSpatialCorFeats(g, method = "network") +#' @rdname detectSpatialCorFeats #' @export -detectSpatialCorFeats <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", +detectSpatialCorFeatsMatrix <- function(expression_matrix, method = c("grid", "network"), - expression_values = c("normalized", "scaled", "custom"), + spatial_network, + spatial_grid, + spatial_locs, subset_feats = NULL, - spatial_network_name = "Delaunay_network", network_smoothing = NULL, - spatial_grid_name = "spatial_grid", min_cells_per_grid = 4, cor_method = c("pearson", "kendall", "spearman")) { - # set default spat_unit and feat_type - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - ## correlation method to be used cor_method <- match.arg( cor_method, choices = c("pearson", "kendall", "spearman")) @@ -3447,45 +3473,10 @@ detectSpatialCorFeats <- function(gobject, ## method to be used method <- match.arg(method, choices = c("grid", "network")) - # get expression matrix - values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) - expr_values <- getExpression( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - values = values, - output = "matrix" - ) - - if (!is.null(subset_feats)) { - expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] - } - - - - # get spatial locations - spatial_locs <- getSpatialLocations(gobject, - spat_unit = spat_unit, - name = spat_loc_name, - output = "data.table", - copy_obj = TRUE - ) - ## spatial averaging or smoothing if (method == "grid") { - # get spatial grid - spatial_grid <- getSpatialGrid( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - name = spatial_grid_name, - return_grid_Obj = FALSE - ) - loc_av_expr_matrix <- do_spatial_grid_averaging( - expression_matrix = as.matrix(expr_values), + expression_matrix = as.matrix(expression_matrix), spatial_grid = spatial_grid, spatial_locs = spatial_locs, subset_feats = subset_feats, @@ -3495,8 +3486,8 @@ detectSpatialCorFeats <- function(gobject, # data.table variables feat_ID <- variable <- NULL - cor_spat_matrix <- cor_flex(t_flex(as.matrix( - loc_av_expr_matrix)), method = cor_method) + cor_spat_matrix <- cor_flex(t_flex( + as.matrix(loc_av_expr_matrix)), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] cor_spat_DT <- data.table::melt.data.table( @@ -3506,16 +3497,8 @@ detectSpatialCorFeats <- function(gobject, } if (method == "network") { - # get spatial network - spatial_network <- getSpatialNetwork( - gobject = gobject, - spat_unit = spat_unit, - name = spatial_network_name, - output = "networkDT" - ) - knn_av_expr_matrix <- do_spatial_knn_smoothing( - expression_matrix = as.matrix(expr_values), + expression_matrix = as.matrix(expression_matrix), spatial_network = spatial_network, subset_feats = subset_feats, b = network_smoothing @@ -3523,7 +3506,6 @@ detectSpatialCorFeats <- function(gobject, - cor_spat_matrix <- cor_flex(t_flex(as.matrix( knn_av_expr_matrix)), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) @@ -3541,6 +3523,13 @@ detectSpatialCorFeats <- function(gobject, ## 2. perform expression correlation at single-cell level without ## spatial information + + # matrix + expr_values <- expression_matrix + if (!is.null(subset_feats)) { + expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] + } + cor_matrix <- cor_flex(t_flex(expr_values), method = cor_method) cor_matrixDT <- data.table::as.data.table(cor_matrix) cor_matrixDT[, feat_ID := rownames(cor_matrix)] @@ -3558,9 +3547,9 @@ detectSpatialCorFeats <- function(gobject, doubleDT[, cordiff := spat_cor - expr_cor] # difference in rank scores - doubleDT[, spatrank := frank( + doubleDT[, spatrank := data.table::frank( -spat_cor, ties.method = "first"), by = feat_ID] - doubleDT[, exprrank := frank( + doubleDT[, exprrank := data.table::frank( -expr_cor, ties.method = "first"), by = feat_ID] doubleDT[, rankdiff := spatrank - exprrank] @@ -3574,7 +3563,7 @@ detectSpatialCorFeats <- function(gobject, cor_clusters = list() ) - class(spatCorObject) <- append("spatCorObject", class(spatCorObject)) + class(spatCorObject) <- append(class(spatCorObject), "spatCorObject") return(spatCorObject) } @@ -3583,6 +3572,7 @@ detectSpatialCorFeats <- function(gobject, + #' @title showSpatialCorFeats #' @name showSpatialCorFeats #' @description Shows and filters spatially correlated features diff --git a/man/detectSpatialCorFeats.Rd b/man/detectSpatialCorFeats.Rd index 4cba6edaf..834197249 100644 --- a/man/detectSpatialCorFeats.Rd +++ b/man/detectSpatialCorFeats.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/spatial_genes.R \name{detectSpatialCorFeats} \alias{detectSpatialCorFeats} -\title{detectSpatialCorFeats} +\alias{detectSpatialCorFeatsMatrix} +\title{Detect spatially correlated features} \usage{ detectSpatialCorFeats( gobject, @@ -18,6 +19,18 @@ detectSpatialCorFeats( min_cells_per_grid = 4, cor_method = c("pearson", "kendall", "spearman") ) + +detectSpatialCorFeatsMatrix( + expression_matrix, + method = c("grid", "network"), + spatial_network, + spatial_grid, + spatial_locs, + subset_feats = NULL, + network_smoothing = NULL, + min_cells_per_grid = 4, + cor_method = c("pearson", "kendall", "spearman") +) } \arguments{ \item{gobject}{giotto object} @@ -32,44 +45,68 @@ detectSpatialCorFeats( \item{expression_values}{gene expression values to use} -\item{subset_feats}{subset of feats to use} +\item{subset_feats}{subset of features to use} \item{spatial_network_name}{name of spatial network to use} -\item{network_smoothing}{smoothing factor beteen 0 and 1 -(default: automatic)} +\item{network_smoothing}{smoothing factor between 0 and 1 +(has automatic default, see details)} \item{spatial_grid_name}{name of spatial grid to use} \item{min_cells_per_grid}{minimum number of cells to consider a grid} \item{cor_method}{correlation method} + +\item{expression_matrix}{provided expression matrix} + +\item{spatial_network}{provided spatial network} + +\item{spatial_grid}{provided spatial grid} + +\item{spatial_locs}{provided spatial locations} } \value{ -returns a spatial correlation object: "spatCorObject" +returns a spatial correlation object: \code{spatCorObject} } \description{ -Detect features that are spatially correlated +Detect features that are spatially correlated. Functions for +starting from either a gobject (\code{detectSpatialCorFeats()}) or individual +pieces of data (\code{detectSpatialCorFeatsMatrix()}) are provided. } \details{ -For method = network, it expects a fully connected spatial network. You -can make sure to create a +For \code{method = network}, it expects a fully connected spatial network. +You can make sure to create a fully connected network by setting minimal_k > 0 in the - \code{\link{createSpatialNetwork}} function. -\itemize{ - \item{1. grid-averaging: }{average gene expression values within a predefined spatial grid} - \item{2. network-averaging: }{smoothens the gene expression matrix by averaging the expression within one cell - by using the neighbours within the predefined spatial network. b is a smoothening factor - that defaults to 1 - 1/k, where k is the median number of k-neighbors in the - selected spatial network. Setting b = 0 means no smoothing and b = 1 means no contribution - from its own expression.} +\code{\link{createSpatialNetwork}} function. +\enumerate{ +\item \strong{grid-averaging:} average gene expression values within a predefined +spatial grid +\item \strong{network-averaging:} smoothens the gene expression matrix by +averaging the expression within one cell by using the neighbours within +the predefined spatial network. \eqn{b} is a smoothening factor passed by +\code{network_smoothing} param that defaults to \eqn{1 - 1/k}, where \eqn{k} +is the median number of k-neighbors in the selected spatial network. +Setting \eqn{b = 0} means no smoothing and \eqn{b = 1} means no +contribution from its own expression. } -The spatCorObject can be further explored with showSpatialCorFeats() + +The \code{spatCorObject} can be further explored with \code{showSpatialCorFeats()} } \examples{ g <- GiottoData::loadGiottoMini("visium") +# Perform with data in a gobject detectSpatialCorFeats(g, method = "network") + +# This analysis can also be performed with data outside of the gobject +detectSpatialCorFeatsMatrix( + expression_matrix = getExpression( + g, output = "matrix"), + method = "network", + spatial_network = getSpatialNetwork(g, output = "networkDT") +) + } \seealso{ \code{\link{showSpatialCorFeats}} diff --git a/man/detectSpatialCorFeatsMatrix.Rd b/man/detectSpatialCorFeatsMatrix.Rd deleted file mode 100644 index 515557f80..000000000 --- a/man/detectSpatialCorFeatsMatrix.Rd +++ /dev/null @@ -1,73 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_genes.R -\name{detectSpatialCorFeatsMatrix} -\alias{detectSpatialCorFeatsMatrix} -\title{detectSpatialCorFeatsMatrix} -\usage{ -detectSpatialCorFeatsMatrix( - expression_matrix, - method = c("grid", "network"), - spatial_network, - spatial_grid, - spatial_locs, - subset_feats = NULL, - network_smoothing = NULL, - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman") -) -} -\arguments{ -\item{expression_matrix}{provided expression matrix} - -\item{method}{method to use for spatial averaging} - -\item{spatial_network}{provided spatial network} - -\item{spatial_grid}{provided spatial grid} - -\item{spatial_locs}{provided spatial locations} - -\item{subset_feats}{subset of features to use} - -\item{network_smoothing}{smoothing factor beteen 0 and 1 -(has automatic default, see details)} - -\item{min_cells_per_grid}{minimum number of cells to consider a grid} - -\item{cor_method}{correlation method} -} -\value{ -returns a spatial correlation object: \code{spatCorObject} -} -\description{ -Detect genes that are spatially correlated -} -\details{ -For \code{method = network}, it expects a fully connected spatial network. -You can make sure to create a -fully connected network by setting minimal_k > 0 in the -\code{\link{createSpatialNetwork}} function. -\enumerate{ -\item \strong{grid-averaging:} average gene expression values within a predefined -spatial grid -\item \strong{network-averaging:} smoothens the gene expression matrix by -averaging the expression within one cell by using the neighbours within -the predefined spatial network. \eqn{b} is a smoothening factor passed by -\code{network_smoothing} param that defaults to \eqn{1 - 1/k}, where \eqn{k} -is the median number of k-neighbors in the selected spatial network. -Setting \eqn{b = 0} means no smoothing and \eqn{b = 1} means no -contribution from its own expression. -} - -The \code{spatCorObject} can be further explored with \code{showSpatialCorGenes()} -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") - -detectSpatialCorFeatsMatrix(expression_matrix = getExpression( -g, output = "matrix"), method = "network", -spatial_network = getSpatialNetwork(g, output = "networkDT")) -} -\seealso{ -\code{\link{showSpatialCorFeats}} -} diff --git a/man/detectSpatialCorGenes.Rd b/man/detectSpatialCorGenes.Rd deleted file mode 100644 index 6c5a2ce6b..000000000 --- a/man/detectSpatialCorGenes.Rd +++ /dev/null @@ -1,74 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_genes.R -\name{detectSpatialCorGenes} -\alias{detectSpatialCorGenes} -\title{detectSpatialCorGenes} -\usage{ -detectSpatialCorGenes( - gobject, - feat_type = NULL, - spat_unit = NULL, - method = c("grid", "network"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - subset_genes = NULL, - spatial_network_name = "Delaunay_network", - network_smoothing = NULL, - spatial_grid_name = "spatial_grid", - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman") -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{feat_type}{feature type} - -\item{spat_unit}{spatial unit} - -\item{method}{method to use for spatial averaging} - -\item{expression_values}{gene expression values to use} - -\item{subset_feats}{subset of feats to use} - -\item{subset_genes}{deprecated, use \code{subset_feats}} - -\item{spatial_network_name}{name of spatial network to use} - -\item{network_smoothing}{smoothing factor beteen 0 and 1 -(default: automatic)} - -\item{spatial_grid_name}{name of spatial grid to use} - -\item{min_cells_per_grid}{minimum number of cells to consider a grid} - -\item{cor_method}{correlation method} -} -\value{ -returns a spatial correlation object: "spatCorObject" -} -\description{ -Detect genes that are spatially correlated -} -\details{ -For method = network, it expects a fully connected spatial network. You -can make sure to create a -fully connected network by setting minimal_k > 0 in the -\code{\link{createSpatialNetwork}} function. -\itemize{ - \item{1. grid-averaging: }{average gene expression values within a - predefined spatial grid} - \item{2. network-averaging: }{smoothens the gene expression matrix by - averaging the expression within one cell - by using the neighbours within the predefined spatial network. b is a - smoothening factor that defaults to 1 - 1/k, where k is the median - number of k-neighbors in the selected spatial network. Setting b = 0 - means no smoothing and b = 1 means no contribution - from its own expression.} -} -The spatCorObject can be further explored with showSpatialCorGenes() -} -\seealso{ -\code{\link{showSpatialCorGenes}} -} From 4eae86e54df392ab2806a619d38dfea7cb68ce77 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 22 May 2024 18:31:44 -0400 Subject: [PATCH 044/150] fix: add catch for too high ncp --- NEWS.md | 5 ++ R/dimension_reduction.R | 190 +++++++++++++++++++++------------------- 2 files changed, 106 insertions(+), 89 deletions(-) diff --git a/NEWS.md b/NEWS.md index 385e07e7e..1b68999ae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,11 @@ ## Breaking changes * Deprecated `detectSpatialCorGenes()` removed. Use `detectSpatialCorFeats()` instead +* Deprecated `findInteractionChangedGenes()` removed. Use `findInteractionChangedFeats()` instead +* Deprecated `findCellProximityGenes()` removed. Use `findInteractionChangedFeats()` instead + +## Bug fixes +* Add catch in `runPCAprojectionBatch()` for when ncp requested exceeds number of feats used # Giotto 4.0.8 (2024/05/22) diff --git a/R/dimension_reduction.R b/R/dimension_reduction.R index 249c677d8..a52599a59 100644 --- a/R/dimension_reduction.R +++ b/R/dimension_reduction.R @@ -63,7 +63,7 @@ colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) # coordinates - coords <- sweep(pca_res$var$coord, + coords <- sweep(pca_res$var$coord, 2, sqrt(eigenvalues[seq_len(ncp)]), FUN = "/") rownames(coords) <- colnames(x) colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) @@ -107,7 +107,7 @@ eigenvalues = eigenvalues, loadings = loadings, coords = coords) } - vmsg(.is_debug = TRUE, + vmsg(.is_debug = TRUE, "finished .run_pca_factominer, method == factominer") return(result) @@ -143,7 +143,7 @@ min_ncp <- min(dim(x)) if (ncp >= min_ncp) { - warning("ncp >= minimum dimension of x, will be set to + warning("ncp >= minimum dimension of x, will be set to minimum dimension of x - 1") ncp <- min_ncp - 1 } @@ -291,7 +291,7 @@ if (feats_to_use %in% colnames(feat_metadata)) { vmsg( .v = verbose, str_double_quote(feats_to_use), - "column was found in the feats metadata information and will be + "column was found in the feats metadata information and will be used to select highly variable features" ) feats_to_use <- feat_metadata[ @@ -305,12 +305,12 @@ ) } } else { - vmsg(.v = verbose, + vmsg(.v = verbose, "a custom vector of genes will be used to subset the matrix") sel_matrix <- sel_matrix[rownames(sel_matrix) %in% feats_to_use, ] } - vmsg(.v = verbose, .is_debug = TRUE, + vmsg(.v = verbose, .is_debug = TRUE, "class of selected matrix: ", class(sel_matrix)) return(sel_matrix) @@ -349,7 +349,7 @@ #' \item feats_to_use = NULL: will use all features from the selected matrix #' \item feats_to_use = : can be used to select a column name of #' highly variable features, created by (see \code{\link{calculateHVF}}) -#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually +#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually #' provided features #' } #' By default the number of principle components that we calculate is 100, which @@ -357,7 +357,7 @@ #' will calculate all the principle components. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runPCA(g) #' @export runPCA <- function(gobject, @@ -400,7 +400,7 @@ runPCA <- function(gobject, # expression values to be used values <- match.arg( - expression_values, + expression_values, unique(c("normalized", "scaled", "custom", expression_values))) expr_values <- getExpression( gobject = gobject, @@ -463,7 +463,7 @@ runPCA <- function(gobject, ... ) } else { - stop("only PCA methods from the BiocSingular and factominer + stop("only PCA methods from the BiocSingular and factominer package have been implemented") } } else { @@ -488,7 +488,7 @@ runPCA <- function(gobject, set_seed = set_seed, seed_number = seed_number, ... ) } else { - stop("only PCA methods from the irlba and factominer package have + stop("only PCA methods from the irlba and factominer package have been implemented") } } @@ -586,7 +586,7 @@ runPCA <- function(gobject, min_ncp <- min(dim(x)) if (ncp >= min_ncp) { - warning("ncp >= minimum dimension of x, will be set to minimum + warning("ncp >= minimum dimension of x, will be set to minimum dimension of x - 1") ncp <- min_ncp - 1 } @@ -721,7 +721,7 @@ runPCA <- function(gobject, #' @title runPCAprojection #' @name runPCAprojection -#' @description runs a Principal Component Analysis on a random +#' @description runs a Principal Component Analysis on a random #' subset + projection #' @param gobject giotto object #' @param spat_unit spatial unit @@ -743,7 +743,7 @@ runPCA <- function(gobject, #' @param verbose verbosity of the function #' @param ... additional parameters for PCA (see details) #' @returns giotto object with updated PCA dimension recuction -#' @details See \code{\link[BiocSingular]{runPCA}} and +#' @details See \code{\link[BiocSingular]{runPCA}} and #' \code{\link[FactoMineR]{PCA}} for more information about other parameters. #' This PCA implementation is similar to \code{\link{runPCA}}, except that it #' performs PCA on a subset of the cells or features, and predict on the others. @@ -752,12 +752,12 @@ runPCA <- function(gobject, #' \item feats_to_use = NULL: will use all features from the selected matrix #' \item feats_to_use = : can be used to select a column name of #' highly variable features, created by (see \code{\link{calculateHVF}}) -#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually +#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually #' provided features #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runPCAprojection(g) #' @export runPCAprojection <- function(gobject, @@ -801,7 +801,7 @@ runPCAprojection <- function(gobject, # expression values to be used values <- match.arg( - expression_values, + expression_values, unique(c("normalized", "scaled", "custom", expression_values))) expr_values <- getExpression( gobject = gobject, @@ -946,7 +946,7 @@ runPCAprojection <- function(gobject, #' @title runPCAprojectionBatch #' @name runPCAprojectionBatch -#' @description runs a Principal Component Analysis on multiple random +#' @description runs a Principal Component Analysis on multiple random #' batches + projection #' @param gobject giotto object #' @param spat_unit spatial unit @@ -969,27 +969,30 @@ runPCAprojection <- function(gobject, #' @param verbose verbosity of the function #' @param ... additional parameters for PCA (see details) #' @returns giotto object with updated PCA dimension reduction -#' @details See \code{\link[BiocSingular]{runPCA}} and +#' @details See \code{\link[BiocSingular]{runPCA}} and #' \code{\link[FactoMineR]{PCA}} for more information about other parameters. -#' This PCA implementation is similar to \code{\link{runPCA}} and +#' This PCA implementation is similar to \code{\link{runPCA}} and #' \code{\link{runPCAprojection}}, -#' except that it performs PCA on multiple subsets (batches) of the cells or +#' except that it performs PCA on multiple subsets (batches) of the cells or #' features, -#' and predict on the others. This can significantly increase speed without +#' and predict on the others. This can significantly increase speed without #' sacrificing accuracy too much. #' \itemize{ #' \item feats_to_use = NULL: will use all features from the selected matrix #' \item feats_to_use = : can be used to select a column name of #' highly variable features, created by (see \code{\link{calculateHVF}}) -#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually +#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually #' provided features #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' runPCAprojectionBatch(g) +#' +#' # set feats_to_use to NULL since there are not many hvfs +#' # (only 48 in this mini dataset) +#' runPCAprojectionBatch(g, feats_to_use = NULL) #' @export -runPCAprojectionBatch <- function(gobject, +runPCAprojectionBatch <- function( + gobject, spat_unit = NULL, feat_type = NULL, expression_values = c("normalized", "scaled", "custom"), @@ -1008,7 +1011,8 @@ runPCAprojectionBatch <- function(gobject, set_seed = TRUE, seed_number = 1234, verbose = TRUE, - ...) { + ... +) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1031,7 +1035,7 @@ runPCAprojectionBatch <- function(gobject, # expression values to be used values <- match.arg( - expression_values, + expression_values, unique(c("normalized", "scaled", "custom", expression_values))) expr_values <- getExpression( gobject = gobject, @@ -1071,7 +1075,6 @@ runPCAprojectionBatch <- function(gobject, } - ## subset matrix if (!is.null(feats_to_use)) { expr_values <- .create_feats_to_use_matrix( @@ -1085,6 +1088,15 @@ runPCAprojectionBatch <- function(gobject, } + if (ncp >= nrow(expr_values)) { + ncp <- nrow(expr_values) - 1L + warning(wrap_txt( + "ncp >= number of available features + ncp will be set to minimum of n features - 1" + )) + } + + # do PCA dimension reduction reduction <- match.arg(reduction, c("cells", "feats")) @@ -1095,7 +1107,7 @@ runPCAprojectionBatch <- function(gobject, pca_batch_results <- list() for (batch in seq_len(batch_number)) { - if (verbose) wrap_msg("start batch ", batch) + vmsg(.v = verbose, "start batch ", batch) if (isTRUE(set_seed)) { seed_batch <- seed_number + batch @@ -1119,7 +1131,7 @@ runPCAprojectionBatch <- function(gobject, ... ) - # adjust the sign of the coordinates and loadings vector relative + # adjust the sign of the coordinates and loadings vector relative # to first batch this is necessary for the next averaging step if (batch == 1) { pca_batch_results[[batch]] <- pca_object @@ -1137,7 +1149,7 @@ runPCAprojectionBatch <- function(gobject, } } - if (verbose) message("start averaging pca results of batches") + vmsg(.v = verbose, "start averaging pca results of batches") # calculate average eigenvalues, coordinates and loadings # TODO: test out DT approach, might be faster and more efficient for @@ -1153,7 +1165,7 @@ runPCAprojectionBatch <- function(gobject, coords_list <- lapply(pca_batch_results, FUN = function(x) x$coords) coords_vector <- do.call("c", coords_list) coords_array <- array( - data = coords_vector, + data = coords_vector, dim = c(ncol(expr_values), ncp, length(pca_batch_results))) coords_all <- apply(coords_array, MARGIN = seq_len(2), function(arr) { mean(arr, na.rm = TRUE) @@ -1165,7 +1177,7 @@ runPCAprojectionBatch <- function(gobject, loadings_list <- lapply(pca_batch_results, FUN = function(x) x$loadings) loadings_vector <- do.call("c", loadings_list) loadings_array <- array( - data = loadings_vector, + data = loadings_vector, dim = c(nrow(expr_values), ncp, length(pca_batch_results))) loadings_all <- apply( loadings_array, MARGIN = seq_len(2), function(arr) { @@ -1176,7 +1188,7 @@ runPCAprojectionBatch <- function(gobject, pca_object <- list( - eigenvalues = eigenvalues_mean, + eigenvalues = eigenvalues_mean, loadings = loadings_all, coords = coords_all) } else { pca_batch_results <- list() @@ -1207,7 +1219,7 @@ runPCAprojectionBatch <- function(gobject, ) - # adjust the sign of the coordinates and loadings vector relative + # adjust the sign of the coordinates and loadings vector relative # to first batch this is necessary for the next averaging step if (batch == 1) { pca_batch_results[[batch]] <- pca_object @@ -1225,7 +1237,7 @@ runPCAprojectionBatch <- function(gobject, } } - if (verbose) wrap_msg("start averaging pca results of batches") + vmsg(.v = verbose, "start averaging pca results of batches") # calculate average eigenvalues, coordinates and loadings # TODO: test out DT approach, might be faster and more efficient for @@ -1241,7 +1253,7 @@ runPCAprojectionBatch <- function(gobject, coords_list <- lapply(pca_batch_results, FUN = function(x) x$coords) coords_vector <- do.call("c", coords_list) coords_array <- array( - data = coords_vector, + data = coords_vector, dim = c(ncol(expr_values), ncp, length(pca_batch_results))) coords_all <- apply(coords_array, MARGIN = seq_len(2), function(arr) { mean(arr, na.rm = TRUE) @@ -1253,7 +1265,7 @@ runPCAprojectionBatch <- function(gobject, loadings_list <- lapply(pca_batch_results, FUN = function(x) x$loadings) loadings_vector <- do.call("c", loadings_list) loadings_array <- array( - data = loadings_vector, + data = loadings_vector, dim = c(nrow(expr_values), ncp, length(pca_batch_results))) loadings_all <- apply( loadings_array, MARGIN = seq_len(2), function(arr) { @@ -1264,7 +1276,7 @@ runPCAprojectionBatch <- function(gobject, pca_object <- list( - eigenvalues = eigenvalues_mean, + eigenvalues = eigenvalues_mean, loadings = loadings_all, coords = coords_all) } @@ -1326,7 +1338,7 @@ runPCAprojectionBatch <- function(gobject, #' @title screePlot #' @name screePlot -#' @description identify significant principal components (PCs) using an +#' @description identify significant principal components (PCs) using an #' screeplot (a.k.a. elbowplot) #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -1344,13 +1356,13 @@ runPCAprojectionBatch <- function(gobject, #' @returns ggplot object for scree method #' @details #' Screeplot works by plotting the explained variance of each -#' individual PC in a barplot allowing you to identify which PC provides a +#' individual PC in a barplot allowing you to identify which PC provides a #' significant contribution (a.k.a 'elbow method'). \cr -#' Screeplot will use an available pca object, based on the parameter 'name', +#' Screeplot will use an available pca object, based on the parameter 'name', #' or it will create it if it's not available (see \code{\link{runPCA}}) #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' screePlot(g) #' @export screePlot <- function(gobject, @@ -1407,21 +1419,21 @@ screePlot <- function(gobject, # if pca already exists plot if (!is.null(pca_obj)) { - if (isTRUE(verbose)) - wrap_msg("PCA with name: ", name, + if (isTRUE(verbose)) + wrap_msg("PCA with name: ", name, " already exists and will be used for the screeplot") screeplot <- create_screeplot( eigs = slot(pca_obj, "misc")$eigenvalues, ncp = ncp, ylim = ylim) } else { # if pca doesn't exists, then create pca and then plot - if (isTRUE(verbose)) - wrap_msg("PCA with name: ", name, + if (isTRUE(verbose)) + wrap_msg("PCA with name: ", name, " does NOT exist, PCA will be done first") # expression values to be used values <- match.arg( - expression_values, + expression_values, unique(c("normalized", "scaled", "custom", expression_values))) expr_values <- getExpression( gobject = gobject, @@ -1466,10 +1478,10 @@ screePlot <- function(gobject, ) } else if (method == "factominer") { pca_object <- .run_pca_factominer( - x = t_flex(expr_values), + x = t_flex(expr_values), scale = scale_unit, ncp = ncp, rev = rev, ...) } else { - stop("only PCA methods from the irlba and factominer package + stop("only PCA methods from the irlba and factominer package have been implemented") } @@ -1489,7 +1501,7 @@ screePlot <- function(gobject, ) screeplot <- create_screeplot( - eigs = slot(dimObject, "misc")$eigenvalues, + eigs = slot(dimObject, "misc")$eigenvalues, ncp = ncp, ylim = ylim) } } @@ -1550,7 +1562,7 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_bw() pl <- pl + ggplot2::geom_bar( - data = screeDT[seq_len(ncp)], + data = screeDT[seq_len(ncp)], ggplot2::aes(x = PC, y = var_expl), stat = "identity") pl <- pl + ggplot2::coord_cartesian(ylim = ylim) pl <- pl + ggplot2::theme( @@ -1560,7 +1572,7 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { cpl <- ggplot2::ggplot() cpl <- cpl + ggplot2::theme_bw() cpl <- cpl + ggplot2::geom_bar( - data = screeDT[seq_len(ncp)], + data = screeDT[seq_len(ncp)], ggplot2::aes(x = PC, y = var_expl_cum), stat = "identity") cpl <- cpl + ggplot2::theme(axis.text.x = ggplot2::element_text( angle = 45, hjust = 1, vjust = 1)) @@ -1604,12 +1616,12 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { #' @param verbose show progress of jackstraw method #' @returns ggplot object for jackstraw method #' @details -#' The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} -#' function. By systematically permuting genes it identifies robust, and thus +#' The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} +#' function. By systematically permuting genes it identifies robust, and thus #' significant, PCs. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' jackstrawPlot(gobject = g) #' @export jackstrawPlot <- function(gobject, @@ -1644,11 +1656,11 @@ jackstrawPlot <- function(gobject, ) # print message with information # - if (verbose) - message("using 'jackstraw' to identify significant PCs If used in - published research, please cite: + if (verbose) + message("using 'jackstraw' to identify significant PCs If used in + published research, please cite: Neo Christopher Chung and John D. Storey (2014). - 'Statistical significance of variables driving systematic variation in + 'Statistical significance of variables driving systematic variation in high-dimensional data. Bioinformatics") # select direction of reduction @@ -1656,7 +1668,7 @@ jackstrawPlot <- function(gobject, # expression values to be used values <- match.arg( - expression_values, + expression_values, unique(c("normalized", "scaled", "custom", expression_values))) expr_values <- getExpression( gobject = gobject, @@ -1687,17 +1699,17 @@ jackstrawPlot <- function(gobject, } jtest <- jackstraw::permutationPA( - dat = as.matrix(expr_values), + dat = as.matrix(expr_values), B = iter, threshold = threshold, verbose = verbose) ## results ## nr_sign_components <- jtest$r - if (verbose) - cat("number of estimated significant components: ", + if (verbose) + cat("number of estimated significant components: ", nr_sign_components) final_results <- jtest$p jackplot <- create_jackstrawplot( - jackstraw_data = final_results, + jackstraw_data = final_results, ncp = ncp, ylim = ylim, threshold = threshold) } @@ -1746,7 +1758,7 @@ create_jackstrawplot <- function(jackstraw_data, pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_bw() pl <- pl + ggplot2::geom_point( - data = testDT[seq_len(ncp)], + data = testDT[seq_len(ncp)], ggplot2::aes(x = PC, y = p.val, fill = sign), shape = 21) pl <- pl + ggplot2::scale_fill_manual( values = c("n.s." = "lightgrey", "sign" = "darkorange")) @@ -1787,19 +1799,19 @@ create_jackstrawplot <- function(jackstraw_data, #' @param jack_ylim y-axis limits on jackstraw plot #' @param verbose be verbose #' @returns ggplot object for scree method and maxtrix of p-values for jackstraw -#' @details Two different methods can be used to assess the number of relevant +#' @details Two different methods can be used to assess the number of relevant #' or significant prinicipal components (PC's). \cr #' 1. Screeplot works by plotting the explained variance of each -#' individual PC in a barplot allowing you to identify which PC provides a +#' individual PC in a barplot allowing you to identify which PC provides a #' significant #' contribution (a.k.a. 'elbow method'). \cr -#' 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} -#' function. By systematically permuting genes it identifies robust, and thus +#' 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} +#' function. By systematically permuting genes it identifies robust, and thus #' significant, PCs. #' \cr #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' signPCA(g) #' @export signPCA <- function(gobject, @@ -1944,7 +1956,7 @@ signPCA <- function(gobject, show_plot = show_plot, default_save_name = default_save_name, save_param = save_param, - else_return = jackplot + else_return = jackplot # TODO potentially return all results instead )) } @@ -1987,7 +1999,7 @@ signPCA <- function(gobject, #' @param seed_number seed number to use #' @param verbose verbosity of function #' @param toplevel_params parameters to extract -#' @inheritDotParams uwot::umap -X -n_neighbors -n_components -n_epochs +#' @inheritDotParams uwot::umap -X -n_neighbors -n_components -n_epochs #' -min_dist -n_threads -spread -seed -scale -pca -pca_center -pca_method #' @returns giotto object with updated UMAP dimension reduction #' @details See \code{\link[uwot]{umap}} for more information about these and @@ -2004,7 +2016,7 @@ signPCA <- function(gobject, #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runUMAP(g) #' @export runUMAP <- function(gobject, @@ -2107,7 +2119,7 @@ runUMAP <- function(gobject, ## using original matrix ## # expression values to be used values <- match.arg( - expression_values, + expression_values, unique(c("normalized", "scaled", "custom", expression_values))) expr_values <- getExpression( @@ -2200,7 +2212,7 @@ runUMAP <- function(gobject, ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, + gobject <- set_dimReduction(gobject = gobject, dimObject = dimObject) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -2254,7 +2266,7 @@ runUMAP <- function(gobject, #' @param toplevel_params parameters to extract #' @param ... additional UMAP parameters #' @returns giotto object with updated UMAP dimension reduction -#' @details See \code{\link[uwot]{umap}} for more information about these and +#' @details See \code{\link[uwot]{umap}} for more information about these and #' other parameters. #' \itemize{ #' \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca') @@ -2265,7 +2277,7 @@ runUMAP <- function(gobject, #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runUMAPprojection(g) #' @export runUMAPprojection <- function(gobject, @@ -2517,7 +2529,7 @@ runUMAPprojection <- function(gobject, #' @param verbose verbosity of the function #' @param ... additional tSNE parameters #' @returns giotto object with updated tSNE dimension recuction -#' @details See \code{\link[Rtsne]{Rtsne}} for more information about these and +#' @details See \code{\link[Rtsne]{Rtsne}} for more information about these and #' other parameters. \cr #' \itemize{ #' \item Input for tSNE dimension reduction can be another dimension reduction (default = 'pca') @@ -2528,7 +2540,7 @@ runUMAPprojection <- function(gobject, #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runtSNE(g) #' @export runtSNE <- function(gobject, @@ -2612,7 +2624,7 @@ runtSNE <- function(gobject, ## using original matrix ## # expression values to be used values <- match.arg( - expression_values, + expression_values, unique(c("normalized", "scaled", "custom", expression_values))) expr_values <- getExpression( gobject = gobject, @@ -2693,7 +2705,7 @@ runtSNE <- function(gobject, ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, + gobject <- set_dimReduction(gobject = gobject, dimObject = dimObject) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -2740,11 +2752,11 @@ runtSNE <- function(gobject, #' @param verbose be verbose #' @param ... additional \code{\link[harmony]{HarmonyMatrix}} parameters #' @returns giotto object with updated Harmony dimension reduction -#' @details This is a simple wrapper for the HarmonyMatrix function in the +#' @details This is a simple wrapper for the HarmonyMatrix function in the #' Harmony package \doi{10.1038/s41592-019-0619-0}. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' runGiottoHarmony(g, vars_use = "leiden_clus") #' @export runGiottoHarmony <- function(gobject, @@ -2770,7 +2782,7 @@ runGiottoHarmony <- function(gobject, # print message with information # - message("using 'Harmony' to integrate different datasets. If used in + message("using 'Harmony' to integrate different datasets. If used in published research, please cite:") wrap_msg("Korsunsky, I., Millard, N., Fan, J. et al. @@ -2844,7 +2856,7 @@ runGiottoHarmony <- function(gobject, ## using original matrix ## # expression values to be used values <- match.arg( - expression_values, + expression_values, unique(c("normalized", "scaled", "custom", expression_values))) expr_values <- getExpression( gobject = gobject, @@ -2918,12 +2930,12 @@ runGiottoHarmony <- function(gobject, ) if (name %in% harmony_names) { - cat(name, + cat(name, " has already been used with harmony, will be overwritten") } ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, + gobject <- set_dimReduction(gobject = gobject, dimObject = harmdimObject) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### From 64586f9d0609f45e279a78f75d96b4f8ab0638e4 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Thu, 23 May 2024 09:20:46 -0400 Subject: [PATCH 045/150] fix: `spatCellCellcom()` verbosity - also cleanup some of the printouts - fixes #949 --- NEWS.md | 1 + R/spatial_interaction.R | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1b68999ae..ad52e9273 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ ## Bug fixes * Add catch in `runPCAprojectionBatch()` for when ncp requested exceeds number of feats used +* Make `spatCellCellcom()` respect `verbose` flag [#949](https://github.com/drieslab/Giotto/issues/949) by rbutleriii # Giotto 4.0.8 (2024/05/22) diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index 9073f3d5c..dd62a1926 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -190,7 +190,7 @@ cellProximityEnrichment <- function(gobject, minimum_simulations <- unique_ints[rep( seq_len(nrow(unique_ints)), number_of_simulations), ] minimum_simulations[, round := rep( - paste0("sim", seq_len(number_of_simulations)), + paste0("sim", seq_len(number_of_simulations)), each = nrow(unique_ints))] minimum_simulations[, N := 0] @@ -2297,7 +2297,7 @@ average_feat_feat_expression_in_groups <- function(gobject, lig_test <- data.table::as.data.table( reshape2::melt(ligand_match, measure.vars = all_ligand_cols)) lig_test[, ligand := rep(rownames(ligand_match), ncol(ligand_match))] - lig_test[, ligand := strsplit(ligand, "\\.")[[1]][1], + lig_test[, ligand := strsplit(ligand, "\\.")[[1]][1], by = seq_len(nrow(lig_test))] lig_test[, LR_comb := rep(LR_pairs, ncol(ligand_match))] setnames(lig_test, "value", "lig_expr") @@ -2620,7 +2620,7 @@ exprCellCellcom <- function(gobject, #' @param set_seed set a seed for reproducibility #' @param seed_number seed number #' @param verbose verbose -#' @returns Cell-Cell communication scores for feature pairs based on spatial +#' @returns Cell-Cell communication scores for feature pairs based on spatial #' interaction #' @details Statistical framework to identify if pairs of features #' (such as ligand-receptor combinations) @@ -2650,7 +2650,7 @@ exprCellCellcom <- function(gobject, #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' specificCellCellcommunicationScores(g, cluster_column = "leiden_clus") #' @export specificCellCellcommunicationScores <- function(gobject, @@ -2788,11 +2788,13 @@ specificCellCellcommunicationScores <- function(gobject, subset_metadata <- cell_metadata[cell_ID %in% subset_ids] needed_cell_types <- subset_metadata[[cluster_column]] - + if (verbose) cat("simulations:") ## simulations ## for (sim in seq_len(random_iter)) { - if (verbose == TRUE) cat("simulation ", sim) + if (verbose) { + cat(sprintf(" %s ", sim)) + } # get random ids and subset if (set_seed == TRUE) { @@ -2849,6 +2851,7 @@ specificCellCellcommunicationScores <- function(gobject, difference[difference < 0] <- -1 total_bool <- total_bool + difference } + if (verbose) cat("\n") comScore[, rand_expr := total_av / random_iter] @@ -3034,7 +3037,7 @@ spatCellCellcom <- function(gobject, combn_DT <- rbind(same_DT, combn_DT) ## parallel option ## - if (do_parallel == TRUE) { + if (isTRUE(do_parallel)) { savelist <- lapply_flex( X = seq_len(nrow(combn_DT)), future.seed = TRUE, cores = cores, fun = function(row) { @@ -3058,7 +3061,8 @@ spatCellCellcom <- function(gobject, adjust_method = adjust_method, adjust_target = adjust_target, set_seed = set_seed, - seed_number = seed_number + seed_number = seed_number, + verbose = verbose %in% c("a lot") ) }) } else { @@ -3070,9 +3074,9 @@ spatCellCellcom <- function(gobject, cell_type_1 <- combn_DT[row][["V1"]] cell_type_2 <- combn_DT[row][["V2"]] - if (verbose == "a little" | verbose == "a lot") - cat("PROCESS nr ", countdown, ": ", cell_type_1, " and ", - cell_type_2) + if (verbose == "a little" || verbose == "a lot") + cat(sprintf("[PROCESS nr %d : %d and %d] ", + countdown, cell_type_1, cell_type_2)) if (verbose %in% c("a little", "none")) { specific_verbose <- FALSE From c098f45c9e88c03ab71349b522e5f2b374f5f9bd Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Thu, 23 May 2024 09:30:41 -0400 Subject: [PATCH 046/150] Update spatial_interaction.R --- R/spatial_interaction.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index dd62a1926..b59e12cf3 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -2884,6 +2884,7 @@ specificCellCellcommunicationScores <- function(gobject, # get minimum adjusted p.value that is not zero all_p.adj <- comScore[["p.adj"]] + # TODO catch when len = 0 lowest_p.adj <- min(all_p.adj[all_p.adj != 0]) comScore[, PI := ifelse(p.adj == 0, log2fc * ( -log10(lowest_p.adj)), log2fc * (-log10(p.adj)))] From 7796cc5993543cddf30b928c895733324896c8d7 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Thu, 23 May 2024 09:42:03 -0400 Subject: [PATCH 047/150] change: print when no nonzero adjusted p.values are found - this should probably be caught differently --- R/spatial_interaction.R | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index b59e12cf3..b6075e35f 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -2851,7 +2851,6 @@ specificCellCellcommunicationScores <- function(gobject, difference[difference < 0] <- -1 total_bool <- total_bool + difference } - if (verbose) cat("\n") comScore[, rand_expr := total_av / random_iter] @@ -2884,11 +2883,23 @@ specificCellCellcommunicationScores <- function(gobject, # get minimum adjusted p.value that is not zero all_p.adj <- comScore[["p.adj"]] - # TODO catch when len = 0 - lowest_p.adj <- min(all_p.adj[all_p.adj != 0]) + nonzero_p.adj <- all_p.adj[all_p.adj != 0] + if (length(nonzero_p.adj) == 0L) { + warning( + call. = FALSE, + "no adjusted p.values that are not zero; returning Inf" + ) + if (verbose) cat("<- Inf returned") + lowest_p.adj <- Inf + } else { + lowest_p.adj <- min(nonzero_p.adj) + } + comScore[, PI := ifelse(p.adj == 0, log2fc * ( -log10(lowest_p.adj)), log2fc * (-log10(p.adj)))] + if (verbose) cat("\n") + return(comScore) } } From 8d2a9491577663f69f2db7b3b32a5b5f28cc6f40 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Thu, 23 May 2024 14:10:19 -0400 Subject: [PATCH 048/150] run devtools::document --- man/jackstrawPlot.Rd | 4 ++-- man/runGiottoHarmony.Rd | 2 +- man/runPCA.Rd | 2 +- man/runPCAprojection.Rd | 6 +++--- man/runPCAprojectionBatch.Rd | 16 +++++++++------- man/runUMAPprojection.Rd | 2 +- man/runtSNE.Rd | 2 +- man/screePlot.Rd | 6 +++--- man/signPCA.Rd | 8 ++++---- man/specificCellCellcommunicationScores.Rd | 2 +- 10 files changed, 26 insertions(+), 24 deletions(-) diff --git a/man/jackstrawPlot.Rd b/man/jackstrawPlot.Rd index 6dd082e9c..6a757e5c7 100644 --- a/man/jackstrawPlot.Rd +++ b/man/jackstrawPlot.Rd @@ -69,8 +69,8 @@ ggplot object for jackstraw method identify significant prinicipal components (PCs) } \details{ -The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} - function. By systematically permuting genes it identifies robust, and thus +The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} + function. By systematically permuting genes it identifies robust, and thus significant, PCs. } \examples{ diff --git a/man/runGiottoHarmony.Rd b/man/runGiottoHarmony.Rd index 274b3bb23..35348339b 100644 --- a/man/runGiottoHarmony.Rd +++ b/man/runGiottoHarmony.Rd @@ -70,7 +70,7 @@ giotto object with updated Harmony dimension reduction run UMAP } \details{ -This is a simple wrapper for the HarmonyMatrix function in the +This is a simple wrapper for the HarmonyMatrix function in the Harmony package \doi{10.1038/s41592-019-0619-0}. } \examples{ diff --git a/man/runPCA.Rd b/man/runPCA.Rd index 58073b9d0..2e3ca25e1 100644 --- a/man/runPCA.Rd +++ b/man/runPCA.Rd @@ -78,7 +78,7 @@ dimension reduction and clusterings are based on your features of interest. \item feats_to_use = NULL: will use all features from the selected matrix \item feats_to_use = : can be used to select a column name of highly variable features, created by (see \code{\link{calculateHVF}}) - \item feats_to_use = c('geneA', 'geneB', ...): will use all manually + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features } By default the number of principle components that we calculate is 100, which diff --git a/man/runPCAprojection.Rd b/man/runPCAprojection.Rd index 31d0251f6..1492e60ad 100644 --- a/man/runPCAprojection.Rd +++ b/man/runPCAprojection.Rd @@ -69,11 +69,11 @@ runPCAprojection( giotto object with updated PCA dimension recuction } \description{ -runs a Principal Component Analysis on a random +runs a Principal Component Analysis on a random subset + projection } \details{ -See \code{\link[BiocSingular]{runPCA}} and +See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. This PCA implementation is similar to \code{\link{runPCA}}, except that it performs PCA on a subset of the cells or features, and predict on the others. @@ -82,7 +82,7 @@ This can significantly increase speed without sacrificing accuracy too much. \item feats_to_use = NULL: will use all features from the selected matrix \item feats_to_use = : can be used to select a column name of highly variable features, created by (see \code{\link{calculateHVF}}) - \item feats_to_use = c('geneA', 'geneB', ...): will use all manually + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features } } diff --git a/man/runPCAprojectionBatch.Rd b/man/runPCAprojectionBatch.Rd index 926001375..518ff46c0 100644 --- a/man/runPCAprojectionBatch.Rd +++ b/man/runPCAprojectionBatch.Rd @@ -72,28 +72,30 @@ runPCAprojectionBatch( giotto object with updated PCA dimension reduction } \description{ -runs a Principal Component Analysis on multiple random +runs a Principal Component Analysis on multiple random batches + projection } \details{ -See \code{\link[BiocSingular]{runPCA}} and +See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. -This PCA implementation is similar to \code{\link{runPCA}} and +This PCA implementation is similar to \code{\link{runPCA}} and \code{\link{runPCAprojection}}, -except that it performs PCA on multiple subsets (batches) of the cells or +except that it performs PCA on multiple subsets (batches) of the cells or features, -and predict on the others. This can significantly increase speed without +and predict on the others. This can significantly increase speed without sacrificing accuracy too much. \itemize{ \item feats_to_use = NULL: will use all features from the selected matrix \item feats_to_use = : can be used to select a column name of highly variable features, created by (see \code{\link{calculateHVF}}) - \item feats_to_use = c('geneA', 'geneB', ...): will use all manually + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features } } \examples{ g <- GiottoData::loadGiottoMini("visium") -runPCAprojectionBatch(g) +# set feats_to_use to NULL since there are not many hvfs +# (only 48 in this mini dataset) +runPCAprojectionBatch(g, feats_to_use = NULL) } diff --git a/man/runUMAPprojection.Rd b/man/runUMAPprojection.Rd index ad99c7cda..4bc1e2abd 100644 --- a/man/runUMAPprojection.Rd +++ b/man/runUMAPprojection.Rd @@ -84,7 +84,7 @@ giotto object with updated UMAP dimension reduction run UMAP on subset and project on the rest } \details{ -See \code{\link[uwot]{umap}} for more information about these and +See \code{\link[uwot]{umap}} for more information about these and other parameters. \itemize{ \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca') diff --git a/man/runtSNE.Rd b/man/runtSNE.Rd index ac280eba2..ff8cddfee 100644 --- a/man/runtSNE.Rd +++ b/man/runtSNE.Rd @@ -72,7 +72,7 @@ giotto object with updated tSNE dimension recuction run tSNE } \details{ -See \code{\link[Rtsne]{Rtsne}} for more information about these and +See \code{\link[Rtsne]{Rtsne}} for more information about these and other parameters. \cr \itemize{ \item Input for tSNE dimension reduction can be another dimension reduction (default = 'pca') diff --git a/man/screePlot.Rd b/man/screePlot.Rd index d4f0a542f..2f65fb4f9 100644 --- a/man/screePlot.Rd +++ b/man/screePlot.Rd @@ -72,14 +72,14 @@ screePlot( ggplot object for scree method } \description{ -identify significant principal components (PCs) using an +identify significant principal components (PCs) using an screeplot (a.k.a. elbowplot) } \details{ Screeplot works by plotting the explained variance of each - individual PC in a barplot allowing you to identify which PC provides a + individual PC in a barplot allowing you to identify which PC provides a significant contribution (a.k.a 'elbow method'). \cr - Screeplot will use an available pca object, based on the parameter 'name', + Screeplot will use an available pca object, based on the parameter 'name', or it will create it if it's not available (see \code{\link{runPCA}}) } \examples{ diff --git a/man/signPCA.Rd b/man/signPCA.Rd index 6245bde35..0df0b6379 100644 --- a/man/signPCA.Rd +++ b/man/signPCA.Rd @@ -84,14 +84,14 @@ ggplot object for scree method and maxtrix of p-values for jackstraw identify significant prinicipal components (PCs) } \details{ -Two different methods can be used to assess the number of relevant +Two different methods can be used to assess the number of relevant or significant prinicipal components (PC's). \cr 1. Screeplot works by plotting the explained variance of each - individual PC in a barplot allowing you to identify which PC provides a + individual PC in a barplot allowing you to identify which PC provides a significant contribution (a.k.a. 'elbow method'). \cr - 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} - function. By systematically permuting genes it identifies robust, and thus + 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} + function. By systematically permuting genes it identifies robust, and thus significant, PCs. \cr } diff --git a/man/specificCellCellcommunicationScores.Rd b/man/specificCellCellcommunicationScores.Rd index ac12f78b9..917da1a3f 100644 --- a/man/specificCellCellcommunicationScores.Rd +++ b/man/specificCellCellcommunicationScores.Rd @@ -73,7 +73,7 @@ considered} \item{verbose}{verbose} } \value{ -Cell-Cell communication scores for feature pairs based on spatial +Cell-Cell communication scores for feature pairs based on spatial interaction } \description{ From b46be6d3ffff7b6be3296cb99df33425ff94859e Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 11:16:59 -0400 Subject: [PATCH 049/150] change: GiottoUtils req for `deprecate_param()` --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 342a96157..a8426d509 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,7 +41,7 @@ Imports: dbscan (>= 1.1-3), ggplot2 (>= 3.1.1), ggrepel, - GiottoUtils (>= 0.1.8), + GiottoUtils (>= 0.1.9), GiottoVisuals (>= 0.2.2), igraph (>= 1.2.4.1), jsonlite, From 8cc41e71c64a043f715b386e8fa7e179c07e3386 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 11:18:34 -0400 Subject: [PATCH 050/150] enh: `verbose` for `calculateHVF()` --- R/variable_genes.R | 73 ++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 35 deletions(-) diff --git a/R/variable_genes.R b/R/variable_genes.R index c7db7fb44..85a04dea9 100644 --- a/R/variable_genes.R +++ b/R/variable_genes.R @@ -64,7 +64,7 @@ loess_formula, data = feat_in_cells_detected) feat_in_cells_detected$pred_cov_feats <- stats::predict( loess_model_sample, newdata = feat_in_cells_detected) - feat_in_cells_detected[, cov_diff := get(var_col) - pred_cov_feats, + feat_in_cells_detected[, cov_diff := get(var_col) - pred_cov_feats, by = seq_len(nrow(feat_in_cells_detected))] data.table::setorder(feat_in_cells_detected, -cov_diff) feat_in_cells_detected[, selected := ifelse( @@ -96,7 +96,7 @@ test <- apply(X = scaled_matrix, MARGIN = 1, FUN = function(x) var(x)) } else { test <- future.apply::future_apply( - X = scaled_matrix, MARGIN = 1, FUN = function(x) var(x), + X = scaled_matrix, MARGIN = 1, FUN = function(x) var(x), future.seed = TRUE ) } @@ -219,53 +219,54 @@ #' @param feat_type feature type #' @param expression_values expression values to use #' @param method method to calculate highly variable features -#' @param reverse_log_scale reverse log-scale of expression values +#' @param reverse_log_scale reverse log-scale of expression values #' (default = FALSE) #' @param logbase if `reverse_log_scale` is TRUE, which log base was used? #' @param expression_threshold expression threshold to consider a gene detected -#' @param nr_expression_groups (cov_groups) number of expression groups for +#' @param nr_expression_groups (cov_groups) number of expression groups for #' cov_groups #' @param zscore_threshold (cov_groups) zscore to select hvg for cov_groups #' @param HVFname name for highly variable features in cell metadata -#' @param difference_in_cov (cov_loess) minimum difference in coefficient of +#' @param difference_in_cov (cov_loess) minimum difference in coefficient of #' variance required -#' @param var_threshold (var_p_resid) variance threshold for features for +#' @param var_threshold (var_p_resid) variance threshold for features for #' var_p_resid method -#' @param var_number (var_p_resid) number of top variance features for +#' @param var_number (var_p_resid) number of top variance features for #' var_p_resid method -#' @param random_subset random subset to perform HVF detection on. +#' @param random_subset random subset to perform HVF detection on. #' Passing `NULL` runs HVF on all cells. #' @param set_seed logical. whether to set a seed when random_subset is used #' @param seed_number seed number to use when random_subset is used #' @param show_plot show plot #' @param return_plot return ggplot object (overridden by `return_gobject`) #' @param save_plot logical. directly save the plot -#' @param save_param list of saving parameters from +#' @param save_param list of saving parameters from #' [GiottoVisuals::all_plots_save_function()] -#' @param default_save_name default save name for saving, don't change, change +#' @param default_save_name default save name for saving, don't change, change #' save_name in save_param #' @param return_gobject boolean: return giotto object (default = TRUE) -#' @returns giotto object highly variable features appended to feature metadata +#' @param verbose be verbose +#' @returns giotto object highly variable features appended to feature metadata #' (`fDataDT()`) #' @details #' Currently we provide 2 ways to calculate highly variable genes: #' #' \strong{1. high coeff of variance (COV) within groups: } \cr -#' First genes are binned (\emph{nr_expression_groups}) into average expression -#' groups and the COV for each feature is converted into a z-score within each -#' bin. Features with a z-score higher than the threshold +#' First genes are binned (\emph{nr_expression_groups}) into average expression +#' groups and the COV for each feature is converted into a z-score within each +#' bin. Features with a z-score higher than the threshold #' (\emph{zscore_threshold}) are considered highly variable. \cr #' #' \strong{2. high COV based on loess regression prediction: } \cr -#' A predicted COV is calculated for each feature using loess regression +#' A predicted COV is calculated for each feature using loess regression #' (COV~log(mean expression)) -#' Features that show a higher than predicted COV (\emph{difference_in_cov}) +#' Features that show a higher than predicted COV (\emph{difference_in_cov}) #' are considered highly variable. \cr #' #' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' calculateHVF(g) #' @export calculateHVF <- function(gobject, @@ -290,7 +291,8 @@ calculateHVF <- function(gobject, save_plot = NULL, save_param = list(), default_save_name = "HVFplot", - return_gobject = TRUE) { + return_gobject = TRUE, + verbose = TRUE) { # NSE vars selected <- feats <- var <- NULL @@ -317,7 +319,7 @@ calculateHVF <- function(gobject, # expression values to be used values <- match.arg( - expression_values, + expression_values, unique(c("normalized", "scaled", "custom", expression_values))) expr_values <- getExpression( gobject = gobject, @@ -346,14 +348,14 @@ calculateHVF <- function(gobject, # print, return and save parameters - show_plot <- ifelse(is.na(show_plot), - readGiottoInstructions(gobject, param = "show_plot"), + show_plot <- ifelse(is.na(show_plot), + readGiottoInstructions(gobject, param = "show_plot"), show_plot) - save_plot <- ifelse(is.na(save_plot), - readGiottoInstructions(gobject, param = "save_plot"), + save_plot <- ifelse(is.na(save_plot), + readGiottoInstructions(gobject, param = "save_plot"), save_plot) - return_plot <- ifelse(is.na(return_plot), - readGiottoInstructions(gobject, param = "return_plot"), + return_plot <- ifelse(is.na(return_plot), + readGiottoInstructions(gobject, param = "return_plot"), return_plot) @@ -415,8 +417,8 @@ calculateHVF <- function(gobject, ## save plot if (isTRUE(save_plot)) { do.call( - GiottoVisuals::all_plots_save_function, - c(list(gobject = gobject, plot_object = pl, + GiottoVisuals::all_plots_save_function, + c(list(gobject = gobject, plot_object = pl, default_save_name = default_save_name), save_param)) } @@ -424,7 +426,7 @@ calculateHVF <- function(gobject, if (isTRUE(return_plot)) { if (isTRUE(return_gobject)) { message("return_plot = TRUE and return_gobject = TRUE \n - plot will not be returned to object, but can still be + plot will not be returned to object, but can still be saved with save_plot = TRUE or manually") } else { return(pl) @@ -444,7 +446,8 @@ calculateHVF <- function(gobject, column_names_feat_metadata <- colnames(feat_metadata[]) if (HVFname %in% column_names_feat_metadata) { - cat(HVFname, " has already been used, will be overwritten") + vmsg(.v = verbose, HVFname, + " has already been used, will be overwritten") feat_metadata[][, eval(HVFname) := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -500,7 +503,7 @@ calculateHVF <- function(gobject, axis.text = ggplot2::element_text(size = 12) ) pl <- pl + ggplot2::geom_point( - data = feat_in_cells_detected, + data = feat_in_cells_detected, ggplot2::aes_string(x = "mean_expr", y = "cov", color = "selected")) pl <- pl + ggplot2::scale_color_manual( values = c(no = "lightgrey", yes = "orange"), @@ -529,16 +532,16 @@ calculateHVF <- function(gobject, axis.text = ggplot2::element_text(size = 12) ) pl <- pl + ggplot2::geom_point( - data = feat_in_cells_detected, - ggplot2::aes_string(x = "log(mean_expr)", y = var_col, + data = feat_in_cells_detected, + ggplot2::aes_string(x = "log(mean_expr)", y = var_col, color = "selected")) pl <- pl + ggplot2::geom_line( - data = feat_in_cells_detected, - ggplot2::aes_string(x = "log(mean_expr)", y = "pred_cov_feats"), + data = feat_in_cells_detected, + ggplot2::aes_string(x = "log(mean_expr)", y = "pred_cov_feats"), color = "blue") hvg_line <- paste0("pred_cov_feats+", difference_in_cov) pl <- pl + ggplot2::geom_line( - data = feat_in_cells_detected, + data = feat_in_cells_detected, ggplot2::aes_string(x = "log(mean_expr)", y = hvg_line), linetype = 2) pl <- pl + ggplot2::labs(x = "log(mean expression)", y = var_col) pl <- pl + ggplot2::scale_color_manual( From d147f5dafadeb4d166098ef2d6dbeae717d7df33 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 11:19:58 -0400 Subject: [PATCH 051/150] change: use setter for checking dimreduction naming overlaps --- R/dimension_reduction.R | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/R/dimension_reduction.R b/R/dimension_reduction.R index a52599a59..1d5107a2c 100644 --- a/R/dimension_reduction.R +++ b/R/dimension_reduction.R @@ -497,17 +497,6 @@ runPCA <- function(gobject, if (isTRUE(return_gobject)) { - pca_names <- list_dim_reductions_names( - gobject = gobject, - data_type = reduction, - spat_unit = spat_unit, - feat_type = feat_type, - dim_type = "pca" - ) - - if (name %in% pca_names) { - cat(name, " has already been used, will be overwritten") - } if (reduction == "cells") { my_row_names <- colnames(expr_values) @@ -531,7 +520,9 @@ runPCA <- function(gobject, ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, dimObject = dimObject) + gobject <- set_dimReduction( + gobject = gobject, dimObject = dimObject, verbose = verbose + ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### From 86576c33788b6b7dae364754534e887c035908a3 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 11:20:49 -0400 Subject: [PATCH 052/150] fix: `.doLouvainCluster_multinet()` multinet generation --- R/clustering.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/clustering.R b/R/clustering.R index ab0e0a138..ae7d1d553 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -731,8 +731,8 @@ doGiottoClustree <- function(gobject, # create mlnetworkobject mln_object <- multinet::ml_empty() - multinet::add_vertices_ml( - n = mln_object, vertices = igraph::V(igraph_object)) + # multinet::add_vertices_ml( + # n = mln_object, vertices = igraph::V(igraph_object)) multinet::add_igraph_layer_ml( n = mln_object, g = igraph_object, name = name) From f547ecf0549882c929dd5226bd184b7ee221f85e Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 11:21:01 -0400 Subject: [PATCH 053/150] Update NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index ad52e9273..fccc69684 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ * Add catch in `runPCAprojectionBatch()` for when ncp requested exceeds number of feats used * Make `spatCellCellcom()` respect `verbose` flag [#949](https://github.com/drieslab/Giotto/issues/949) by rbutleriii +## Changes +* require GiottoUtils (>= 0.1.9) # Giotto 4.0.8 (2024/05/22) From 54b90b6e34fb030ac777f5870e58b1cd177a128b Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 11:22:53 -0400 Subject: [PATCH 054/150] enh: `verbose` for `addStatistics()` --- R/auxiliary_giotto.R | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 3ce6d6296..fb13d1b17 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -1387,6 +1387,7 @@ processGiotto <- function(gobject, #' @param expression_values expression values to use #' @param detection_threshold detection threshold to consider a gene detected #' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param verbose be verbose #' @returns giotto object if return_gobject = TRUE #' @details #' This function will add the following statistics to feature metadata: @@ -1411,7 +1412,8 @@ addFeatStatistics <- function(gobject, spat_unit = NULL, expression_values = c("normalized", "scaled", "custom"), detection_threshold = 0, - return_gobject = TRUE) { + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1475,8 +1477,8 @@ addFeatStatistics <- function(gobject, metadata_names <- colnames(feat_metadata[]) if ("nr_cells" %in% metadata_names) { - message("feat statistics has already been applied once, will be - overwritten") + vmsg(.v = verbose, "feat statistics has already been applied", + "once; overwriting") feat_metadata[][, c( "nr_cells", "perc_cells", "total_expr", "mean_expr", "mean_expr_det") := NULL] @@ -1553,6 +1555,7 @@ addFeatStatistics <- function(gobject, #' @param expression_values expression values to use #' @param detection_threshold detection threshold to consider a gene detected #' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param verbose be verbose #' @returns giotto object if return_gobject = TRUE #' @details #' This function will add the following statistics to cell metadata: @@ -1574,7 +1577,8 @@ addCellStatistics <- function(gobject, spat_unit = NULL, expression_values = c("normalized", "scaled", "custom"), detection_threshold = 0, - return_gobject = TRUE) { + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1628,8 +1632,8 @@ addCellStatistics <- function(gobject, metadata_names <- colnames(cell_metadata[]) if ("nr_feats" %in% metadata_names) { - message("cells statistics has already been applied once, will be - overwritten") + vmsg(.v = verbose, "cells statistics has already been applied", + "once; overwriting") cell_metadata[][, c("nr_feats", "perc_feats", "total_expr") := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### gobject <- set_cell_metadata(gobject, @@ -1703,6 +1707,7 @@ addCellStatistics <- function(gobject, #' @param expression_values expression values to use #' @param detection_threshold detection threshold to consider a feature detected #' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param verbose be verbose #' @returns giotto object if return_gobject = TRUE, else a list with results #' @details See \code{\link{addFeatStatistics}} and #' \code{\link{addCellStatistics}} @@ -1716,7 +1721,8 @@ addStatistics <- function(gobject, spat_unit = NULL, expression_values = c("normalized", "scaled", "custom"), detection_threshold = 0, - return_gobject = TRUE) { + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1735,7 +1741,8 @@ addStatistics <- function(gobject, spat_unit = spat_unit, expression_values = expression_values, detection_threshold = detection_threshold, - return_gobject = return_gobject + return_gobject = return_gobject, + verbose = verbose ) if (return_gobject == TRUE) { @@ -1749,7 +1756,8 @@ addStatistics <- function(gobject, spat_unit = spat_unit, expression_values = expression_values, detection_threshold = detection_threshold, - return_gobject = return_gobject + return_gobject = return_gobject, + verbose = verbose ) if (return_gobject == TRUE) { From 82fade8a58e4116a1a16187750fb5be19c64cef8 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 11:24:39 -0400 Subject: [PATCH 055/150] fix: subclustering and combine docs - also deprecations of `gene` -> `feat` args --- R/clustering.R | 776 ++++++++++++++++++++++--------------------------- 1 file changed, 340 insertions(+), 436 deletions(-) diff --git a/R/clustering.R b/R/clustering.R index ae7d1d553..33cdc9d16 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -1816,73 +1816,210 @@ clusterCells <- function(gobject, +# subclustering #### -#' @title doLeidenSubCluster -#' @name doLeidenSubCluster -#' @description Further subcluster cells using a NN-network and the Leiden -#' algorithm -#' @param gobject giotto object -#' @param feat_type feature type +#' @title Cell subclustering +#' @name subClusterCells +#' @description Perform cell subclustering by taking an annotated group of +#' cells and performing another round of clustering on just that subset. +#' Several methods are implemented. `subClusterCells()` is the main wrapper +#' function. `doLeidenSubCluster()` and `doLouvainSubCluster()` are more +#' specific implementations. +#' @param gobject `giotto` object #' @param name name for new clustering result +#' @param cluster_method clustering method to use. Currently one of "leiden" +#' (default), "louvain_community", "louvain_multinet" #' @param cluster_column cluster column to subcluster #' @param selected_clusters only do subclustering on these clusters -#' @param hvf_param parameters for calculateHVf -#' @param hvg_param deprecatd, use hvf_param +#' @param hvf_param list of parameters for [calculateHVF()] +#' @param hvg_param deprecated #' @param hvf_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_min_perc_cells deprecated, use hvf_min_perc_cells +#' @param hvg_min_perc_cells deprecated #' @param hvf_mean_expr_det threshold for mean expression level in cells with #' detection -#' @param hvg_mean_expr_det deprecated, use hvf_mean_expr_det +#' @param hvg_mean_expr_det deprecated #' @param use_all_feats_as_hvf forces all features to be HVF and to be used as #' input for PCA -#' @param use_all_genes_as_hvg deprecated, use use_all_feats_as_hvf +#' @param use_all_genes_as_hvg deprecated #' @param min_nr_of_hvf minimum number of HVF, or all features will be used as #' input for PCA -#' @param min_nr_of_hvg deprecated, use min_nr_of_hvf -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param resolution resolution of Leiden clustering -#' @param n_iterations number of interations to run the Leiden algorithm. +#' @param min_nr_of_hvg deprecated +#' @param pca_param list of parameters for [runPCA()] +#' @param nn_param list of parameters for [createNearestNetwork()] +#' @param k_neighbors number of k for [createNearestNetwork()] +#' @param resolution resolution for community algorithm +#' @param n_iterations number of iterations to run the Leiden algorithm. +#' @param gamma gamma +#' @param omega omega #' @param python_path specify specific path to python if required #' @param nn_network_to_use type of NN network to use (kNN vs sNN) #' @param network_name name of NN network to use -#' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param return_gobject logical. return `giotto` object (default = TRUE) #' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering using the Leiden algorithm on -#' selected clusters. +#' @returns `giotto` object with new subclusters appended to cell metadata +#' @details This function performs subclustering on selected clusters. #' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable fetures} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do Leiden clustering} -#' } -#' @seealso \code{\link{doLeidenCluster}} +#' 1. subset Giotto object +#' 2. identify highly variable genes +#' 3. run PCA +#' 4. create nearest neighbouring network +#' 5. do clustering +#' #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' +#' # Run some subclusterings based on "leiden_clus" annotations that already +#' # exist in the visium mini object +#' +#' # default method is leiden subclustering +#' subClusterCells(g, cluster_column = "leiden_clus") +#' +#' # use louvain instead +#' subClusterCells(g, cluster_column = "leiden_clus", +#' cluster_method = "louvain_community") +#' +#' # directly call the more specific functions #' doLeidenSubCluster(g, cluster_column = "leiden_clus") +#' +#' doLouvainSubCluster(g, cluster_column = "leiden_clus") +#' @md +NULL + + + + +#' @rdname subClusterCells +#' @export +subClusterCells <- function( + gobject, + name = "sub_clus", + cluster_method = c( + "leiden", + "louvain_community", + "louvain_multinet" + ), + cluster_column = NULL, + selected_clusters = NULL, + hvg_param = deprecated(), + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 1, + n_iterations = 1000, + gamma = 1, + omega = 1, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE +) { + ## select cluster method + cluster_method <- match.arg(arg = cluster_method, choices = c( + "leiden", + "louvain_community", + "louvain_multinet" + )) + + # deprecations + .dep_param <- function(...) { + GiottoUtils::deprecate_param( + ..., fun = "subClusterCells", when = "4.0.9" + ) + } + + hvf_param <- .dep_param(hvg_param, hvf_param) + hvf_min_perc_cells <- .dep_param(hvg_min_perc_cells, hvf_min_perc_cells) + hvf_mean_expr_det <- .dep_param(hvg_mean_expr_det, hvf_mean_expr_det) + use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, use_all_feats_as_hvf) + min_nr_of_hvf <- .dep_param(min_nr_of_hvg, min_nr_of_hvf) + + # gather common args + common_args <- get_args_list(keep = c( + "gobject", + "cluster_column", + "selected_clusters", + "hvf_param", + "hvf_min_perc_cells", + "hvf_mean_expr_det", + "use_all_feats_as_hvf", + "min_nr_of_hvf", + "pca_param", + "nn_param", + "k_neighbors", + "nn_network_to_use", + "network_name", + "name", + "return_gobject", + "verbose" + )) + + result <- switch(cluster_method, + "leiden" = { + do.call(doLeidenSubCluster, args = c( + common_args, + list(resolution = resolution, + n_iterations = n_iterations, + python_path = python_path, + toplevel = 4) + )) + }, + "louvain_community" = { + do.call(.doLouvainSubCluster_community, args = c( + common_args, + list(resolution = resolution, + python_path = python_path) + )) + }, + "louvain_multinet" = { + do.call(.doLouvainSubCluster_multinet, args = c( + common_args, + list(gamma = gamma, + omega = omega) + )) + } + ) + + return(result) +} + + + + + +#' @describeIn subClusterCells Further subcluster cells using a NN-network and +#' the Leiden algorithm +#' @param toplevel do not use #' @export doLeidenSubCluster <- function(gobject, feat_type = NULL, - name = "sub_pleiden_clus", + name = "sub_leiden_clus", cluster_column = NULL, selected_clusters = NULL, hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), - hvg_param = NULL, + hvg_param = deprecated(), hvf_min_perc_cells = 5, - hvg_min_perc_cells = NULL, + hvg_min_perc_cells = deprecated(), hvf_mean_expr_det = 1, - hvg_mean_expr_det = NULL, + hvg_mean_expr_det = deprecated(), use_all_feats_as_hvf = FALSE, - use_all_genes_as_hvg = NULL, + use_all_genes_as_hvg = deprecated(), min_nr_of_hvf = 5, - min_nr_of_hvg = NULL, + min_nr_of_hvg = deprecated(), pca_param = list(expression_values = "normalized", scale_unit = TRUE), nn_param = list(dimensions_to_use = 1:20), k_neighbors = 10, @@ -1892,37 +2029,25 @@ doLeidenSubCluster <- function(gobject, nn_network_to_use = "sNN", network_name = "sNN.pca", return_gobject = TRUE, + toplevel = 2, verbose = TRUE) { # specify feat_type if (is.null(feat_type)) { feat_type <- gobject@expression_feat[[1]] } - ## deprecated arguments - if (!is.null(hvg_param)) { - hvf_param <- hvg_param - warning("hvg_param is deprecated, use hvf_param in the future") - } - if (!is.null(hvg_min_perc_cells)) { - hvf_min_perc_cells <- hvg_min_perc_cells - warning("hvg_min_perc_cells is deprecated, use hvf_min_perc_cells in - the future") - } - if (!is.null(hvg_mean_expr_det)) { - hvf_mean_expr_det <- hvg_mean_expr_det - warning("hvg_mean_expr_det is deprecated, use hvf_mean_expr_det in the - future") - } - if (!is.null(use_all_genes_as_hvg)) { - use_all_feats_as_hvf <- use_all_genes_as_hvg - warning("use_all_genes_as_hvg is deprecated, use use_all_feats_as_hvf - in the future") - } - if (!is.null(min_nr_of_hvg)) { - min_nr_of_hvf <- min_nr_of_hvg - warning("min_nr_of_hvg is deprecated, use min_nr_of_hvf in the future") + # deprecated arguments + .dep_param <- function(x, y) { + GiottoUtils::deprecate_param( + x, y, fun = "doLeidenSubCluster", when = "4.0.9" + ) } + hvf_param <- .dep_param(hvg_param, hvf_param) + hvf_min_perc_cells <- .dep_param(hvg_min_perc_cells, hvf_min_perc_cells) + hvf_mean_expr_det <- .dep_param(hvg_mean_expr_det, hvf_mean_expr_det) + use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, use_all_feats_as_hvf) + min_nr_of_hvf <- .dep_param(min_nr_of_hvg, min_nr_of_hvf) iter_list <- list() @@ -1943,7 +2068,7 @@ doLeidenSubCluster <- function(gobject, for (cluster in unique_clusters) { - if (verbose == TRUE) cat("start with cluster: ", cluster) + vmsg(.v = verbose, "start with cluster: ", cluster, "\n") ## get subset subset_cell_IDs <- cell_metadata[ @@ -1968,10 +2093,12 @@ doLeidenSubCluster <- function(gobject, ## calculate stats temp_giotto <- addStatistics( gobject = temp_giotto, - feat_type = feat_type + feat_type = feat_type, + verbose = FALSE ) ## calculate variable feats + hvf_param$verbose <- FALSE temp_giotto <- do.call( "calculateHVF", c(gobject = temp_giotto, hvf_param)) @@ -1979,31 +2106,33 @@ doLeidenSubCluster <- function(gobject, feat_metadata <- fDataDT(temp_giotto, feat_type = feat_type ) - featfeats <- feat_metadata[ + usefeats <- feat_metadata[ hvf == "yes" & perc_cells >= hvf_min_perc_cells & mean_expr_det >= hvf_mean_expr_det]$feat_ID ## catch too low number of hvg if (use_all_feats_as_hvf == TRUE) { - featfeats == feat_metadata$feat_ID + usefeats == feat_metadata$feat_ID } else { if (verbose == TRUE) - cat(length(featfeats), - "highly variable feats have been selected") - if (length(featfeats) <= min_nr_of_hvf) { + cat(length(usefeats), + "highly variable feats have been selected\n") + if (length(usefeats) <= min_nr_of_hvf) { message("too few feats, will continue with all feats instead") - featfeats <- feat_metadata$feat_ID + usefeats <- feat_metadata$feat_ID } } ## run PCA + pca_param$verbose = FALSE temp_giotto <- do.call( "runPCA", - c(gobject = temp_giotto, feats_to_use = list(featfeats), + c(gobject = temp_giotto, feats_to_use = list(usefeats), pca_param)) ## nearest neighbor and clustering + nn_param$verbose <- FALSE temp_giotto <- do.call( "createNearestNetwork", c(gobject = temp_giotto, k = k_neighbors, nn_param)) @@ -2049,7 +2178,9 @@ doLeidenSubCluster <- function(gobject, ) ## update parameters used ## - gobject <- update_giotto_params(gobject, description = "_sub_cluster") + gobject <- update_giotto_params( + gobject, description = "_sub_cluster", toplevel = toplevel + ) return(gobject) } else { return(together) @@ -2057,54 +2188,21 @@ doLeidenSubCluster <- function(gobject, } -#' @title doLouvainSubCluster community -#' @name .doLouvainSubCluster_community -#' @description subcluster cells using a NN-network and the Louvain community -#' detection algorithm -#' @param gobject giotto object -#' @param name name for new clustering result -#' @param cluster_column cluster column to subcluster -#' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG -#' @param hvg_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_mean_expr_det threshold for mean expression level in cells with -#' detection -#' @param use_all_genes_as_hvg forces all genes to be HVG and to be used as -#' input for PCA -#' @param min_nr_of_hvg minimum number of HVG, or all genes will be used as -#' input for PCA -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param resolution resolution -#' @param python_path specify specific path to python if required -#' @param nn_network_to_use type of NN network to use (kNN vs sNN) -#' @param network_name name of NN network to use -#' @param return_gobject Boolean: return giotto object (default = TRUE) -#' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering using the Louvain community -#' algorithm on selected clusters. -#' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable genes} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do Louvain community clustering} -#' } -#' @seealso \code{\link{.doLouvainCluster_community}} -#' @keywords internal +# subcluster cells using a NN-network and the Louvain community +# detection algorithm .doLouvainSubCluster_community <- function(gobject, name = "sub_louvain_comm_clus", cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, + hvf_param = list( + reverse_log_scale = TRUE, + difference_in_cov = 1, + expression_values = "normalized" + ), + hvf_min_perc_cells = 5, + hvf_mean_expr_det = 1, + use_all_feats_as_hvf = FALSE, + min_nr_of_hvf = 5, pca_param = list(expression_values = "normalized", scale_unit = TRUE), nn_param = list(dimensions_to_use = 1:20), k_neighbors = 10, @@ -2127,7 +2225,7 @@ doLeidenSubCluster <- function(gobject, index_offset <- ifelse(0 %in% unique_clusters, 1, 0) for (cluster in unique_clusters) { - if (verbose == TRUE) cat("start with cluster: ", cluster) + if (verbose == TRUE) cat("start with cluster: ", cluster, "\n") ## get subset subset_cell_IDs <- cell_metadata[ @@ -2147,48 +2245,53 @@ doLeidenSubCluster <- function(gobject, # selection ## calculate stats - temp_giotto <- addStatistics(gobject = temp_giotto) + temp_giotto <- addStatistics( + gobject = temp_giotto, verbose = FALSE + ) ## calculate variable genes + hvf_param$verbose <- FALSE temp_giotto <- do.call( - "calculateHVG", c(gobject = temp_giotto, hvg_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param)) - ## get hvg - gene_metadata <- fDataDT(temp_giotto) + ## get hvf + feat_metadata <- fDataDT(temp_giotto) - # data.table variables - hvg <- perc_cells <- mean_expr_det <- NULL + # NSE variables + hvf <- perc_cells <- mean_expr_det <- NULL - featgenes <- gene_metadata[ - hvg == "yes" & perc_cells >= hvg_min_perc_cells & - mean_expr_det >= hvg_mean_expr_det]$gene_ID + usefeats <- feat_metadata[ + hvf == "yes" & + perc_cells >= hvf_min_perc_cells & + mean_expr_det >= hvf_mean_expr_det]$feat_ID - ## catch too low number of hvg - if (use_all_genes_as_hvg == TRUE) { - featgenes == gene_metadata$gene_ID + ## catch too low number of hvf + if (isTRUE(use_all_feats_as_hvf)) { + usefeats == feat_metadata$feat_ID } else { - if (verbose == TRUE) - cat(length(featgenes), - "highly variable genes have been selected") - if (length(featgenes) <= min_nr_of_hvg) { - message("too few genes, will continue with all genes - instead") - featgenes <- gene_metadata$gene_ID + if (isTRUE(verbose)) + cat(length(usefeats), + "highly variable features have been selected\n") + if (length(usefeats) <= min_nr_of_hvf) { + wrap_msg("too few features + will continue with all features instead") + usefeats <- feat_metadata$feat_ID } } ## run PCA + pca_param$verbose <- FALSE temp_giotto <- do.call( "runPCA", - c(gobject = temp_giotto, genes_to_use = list(featgenes), + c(gobject = temp_giotto, feats_to_use = list(usefeats), pca_param)) ## nearest neighbor and clustering + nn_param$verbose <- FALSE temp_giotto <- do.call( "createNearestNetwork", c(gobject = temp_giotto, k = k_neighbors, nn_param)) - ## Leiden Cluster ## TO DO: expand to all clustering options temp_cluster <- .doLouvainCluster_community( gobject = temp_giotto, @@ -2256,54 +2359,20 @@ doLeidenSubCluster <- function(gobject, -#' @title doLouvainSubCluster multinet -#' @name .doLouvainSubCluster_multinet -#' @description subcluster cells using a NN-network and the Louvain multinet -#' detection algorithm -#' @param gobject giotto object -#' @param name name for new clustering result -#' @param cluster_column cluster column to subcluster -#' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG -#' @param hvg_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_mean_expr_det threshold for mean expression level in cells with -#' detection -#' @param use_all_genes_as_hvg forces all genes to be HVG and to be used as -#' input for PCA -#' @param min_nr_of_hvg minimum number of HVG, or all genes will be used as -#' input for PCA -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param gamma gamma -#' @param omega omega -#' @param nn_network_to_use type of NN network to use (kNN vs sNN) -#' @param network_name name of NN network to use -#' @param return_gobject boolean: return giotto object (default = TRUE) -#' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering using the Louvain multinet -#' algorithm on selected clusters. -#' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable genes} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do Louvain multinet clustering} -#' } -#' @seealso \code{\link{.doLouvainCluster_multinet}} -#' @keywords internal +# subcluster cells using a NN-network and the Louvain multinet +# detection algorithm .doLouvainSubCluster_multinet <- function(gobject, name = "sub_louvain_mult_clus", cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvf_min_perc_cells = 5, + hvf_mean_expr_det = 1, + use_all_feats_as_hvf = FALSE, + min_nr_of_hvf = 5, pca_param = list(expression_values = "normalized", scale_unit = TRUE), nn_param = list(dimensions_to_use = 1:20), k_neighbors = 10, @@ -2335,12 +2404,12 @@ doLeidenSubCluster <- function(gobject, # data.table variables - hvg <- perc_cells <- mean_expr_det <- parent_cluster <- cell_ID <- + hvf <- perc_cells <- mean_expr_det <- parent_cluster <- cell_ID <- comb <- tempclus <- NULL for (cluster in unique_clusters) { - if (verbose == TRUE) cat("start with cluster: ", cluster) + if (verbose == TRUE) cat("start with cluster: ", cluster, "\n") ## get subset subset_cell_IDs <- cell_metadata[ @@ -2360,44 +2429,48 @@ doLeidenSubCluster <- function(gobject, # selection ## calculate stats - temp_giotto <- addStatistics(gobject = temp_giotto) + temp_giotto <- addStatistics( + gobject = temp_giotto, verbose = FALSE + ) ## calculate variable genes + hvf_param$verbose <- FALSE temp_giotto <- do.call( - "calculateHVG", c(gobject = temp_giotto, hvg_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param)) - ## get hvg - gene_metadata <- fDataDT(temp_giotto) - featgenes <- gene_metadata[ - hvg == "yes" & perc_cells >= hvg_min_perc_cells & - mean_expr_det >= hvg_mean_expr_det]$gene_ID + ## get hvf + feat_metadata <- fDataDT(temp_giotto) + usefeats <- feat_metadata[ + hvf == "yes" & perc_cells >= hvf_min_perc_cells & + mean_expr_det >= hvf_mean_expr_det]$feat_ID - ## catch too low number of hvg - if (use_all_genes_as_hvg == TRUE) { - featgenes == gene_metadata$gene_ID + ## catch too low number of hvf + if (use_all_feats_as_hvf == TRUE) { + usefeats == feat_metadata$feat_ID } else { if (verbose == TRUE) - cat(length(featgenes), - "highly variable genes have been selecteds") - if (length(featgenes) <= min_nr_of_hvg) { - message("too few genes, will continue with all genes + cat(length(usefeats), + "highly variable features have been selected\n") + if (length(usefeats) <= min_nr_of_hvf) { + message("too few features, will continue with all features instead") - featgenes <- gene_metadata$gene_ID + usefeats <- feat_metadata$feat_ID } } ## run PCA + pca_param$verbose <- FALSE temp_giotto <- do.call( "runPCA", - c(gobject = temp_giotto, genes_to_use = list(featgenes), + c(gobject = temp_giotto, feats_to_use = list(usefeats), pca_param)) ## nearest neighbor and clustering + nn_param$verbose <- FALSE temp_giotto <- do.call( "createNearestNetwork", c(gobject = temp_giotto, k = k_neighbors, nn_param)) - ## Leiden Cluster ## TO DO: expand to all clustering options temp_cluster <- .doLouvainCluster_multinet( gobject = temp_giotto, @@ -2460,62 +2533,30 @@ doLeidenSubCluster <- function(gobject, -#' @title doLouvainSubCluster -#' @name doLouvainSubCluster -#' @description subcluster cells using a NN-network and the Louvain algorithm -#' @param gobject giotto object -#' @param name name for new clustering result -#' @param version version of Louvain algorithm to use -#' @param cluster_column cluster column to subcluster -#' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG -#' @param hvg_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_mean_expr_det threshold for mean expression level in cells with -#' detection -#' @param use_all_genes_as_hvg forces all genes to be HVG and to be used as -#' input for PCA -#' @param min_nr_of_hvg minimum number of HVG, or all genes will be used as -#' input for PCA -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param resolution resolution for community algorithm -#' @param gamma gamma -#' @param omega omega -#' @param python_path specify specific path to python if required -#' @param nn_network_to_use type of NN network to use (kNN vs sNN) -#' @param network_name name of NN network to use -#' @param return_gobject boolean: return giotto object (default = TRUE) -#' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering using the Louvain algorithm on -#' selected clusters. -#' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable genes} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do Louvain clustering} -#' } -#' @seealso \code{\link{.doLouvainCluster_multinet}} and -#' \code{\link{.doLouvainCluster_community}} -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' doLouvainSubCluster(g, cluster_column = "leiden_clus") + +#' @describeIn subClusterCells subcluster cells using a NN-network and the +#' Louvain algorithm +#' @param version version of Louvain algorithm to use. One of "community" or +#' "multinet", with the default being "community" #' @export doLouvainSubCluster <- function(gobject, name = "sub_louvain_clus", version = c("community", "multinet"), cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, + hvg_param = deprecated(), + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, pca_param = list(expression_values = "normalized", scale_unit = TRUE), nn_param = list(dimensions_to_use = 1:20), k_neighbors = 10, @@ -2530,199 +2571,59 @@ doLouvainSubCluster <- function(gobject, ## louvain clustering version to use version <- match.arg(version, c("community", "multinet")) - - # python community implementation - if (version == "community") { - result <- .doLouvainSubCluster_community( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_mean_expr_det = hvg_mean_expr_det, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - resolution = resolution, - python_path = python_path, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose - ) - } else if (version == "multinet") { - result <- .doLouvainSubCluster_multinet( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_mean_expr_det = hvg_mean_expr_det, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - gamma = gamma, - omega = omega, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose + # deprecations + .dep_param <- function(x, y) { + GiottoUtils::deprecate_param( + x, y, fun = "doLouvainSubCluster", when = "4.0.9" ) } - return(result) -} - - - - - -#' @title subClusterCells -#' @name subClusterCells -#' @description subcluster cells -#' @param gobject giotto object -#' @param name name for new clustering result -#' @param cluster_method clustering method to use -#' @param cluster_column cluster column to subcluster -#' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG -#' @param hvg_min_perc_cells threshold for detection in min percentage of cells -#' @param hvg_mean_expr_det threshold for mean expression level in cells with -#' detection -#' @param use_all_genes_as_hvg forces all genes to be HVG and to be used as -#' input for PCA -#' @param min_nr_of_hvg minimum number of HVG, or all genes will be used as -#' input for PCA -#' @param pca_param parameters for runPCA -#' @param nn_param parameters for parameters for createNearestNetwork -#' @param k_neighbors number of k for createNearestNetwork -#' @param resolution resolution -#' @param n_iterations number of interations to run the Leiden algorithm. -#' @param gamma gamma -#' @param omega omega -#' @param python_path specify specific path to python if required -#' @param nn_network_to_use type of NN network to use (kNN vs sNN) -#' @param network_name name of NN network to use -#' @param return_gobject boolean: return giotto object (default = TRUE) -#' @param verbose verbose -#' @returns giotto object with new subclusters appended to cell metadata -#' @details This function performs subclustering on selected clusters. -#' The systematic steps are: -#' \itemize{ -#' \item{1. subset Giotto object} -#' \item{2. identify highly variable genes} -#' \item{3. run PCA} -#' \item{4. create nearest neighbouring network} -#' \item{5. do clustering} -#' } -#' @seealso \code{\link{.doLouvainCluster_multinet}}, -#' \code{\link{.doLouvainCluster_community}} -#' and @seealso \code{\link{doLeidenCluster}} -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' subClusterCells(g, cluster_column = "leiden_clus") -#' @export -subClusterCells <- function(gobject, - name = "sub_clus", - cluster_method = c( - "leiden", - "louvain_community", - "louvain_multinet" - ), - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 1, - n_iterations = 1000, - gamma = 1, - omega = 1, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { - ## select cluster method - cluster_method <- match.arg(arg = cluster_method, choices = c( - "leiden", - "louvain_community", - "louvain_multinet" + hvf_param <- .dep_param(hvg_param, hvf_param) + hvf_min_perc_cells <- .dep_param(hvg_min_perc_cells, hvf_min_perc_cells) + hvf_mean_expr_det <- .dep_param(hvg_mean_expr_det, hvf_mean_expr_det) + use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, use_all_feats_as_hvf) + min_nr_of_hvf <- .dep_param(min_nr_of_hvg, min_nr_of_hvf) + + # get common args + common_args <- get_args_list(keep = c( + "gobject", + "cluster_column", + "selected_clusters", + "hvf_param", + "hvf_min_perc_cells", + "hvf_mean_expr_det", + "use_all_feats_as_hvf", + "min_nr_of_hvf", + "pca_param", + "nn_param", + "k_neighbors", + "nn_network_to_use", + "network_name", + "name", + "return_gobject", + "verbose" )) - - if (cluster_method == "leiden") { - result <- doLeidenSubCluster( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_min_perc_cells = hvg_min_perc_cells, - hvg_mean_expr_det = hvg_mean_expr_det, - use_all_genes_as_hvg = use_all_genes_as_hvg, - min_nr_of_hvg = min_nr_of_hvg, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - resolution = resolution, - n_iterations = n_iterations, - python_path = python_path, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose - ) - } else if (cluster_method == "louvain_community") { - result <- .doLouvainSubCluster_community( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_min_perc_cells = hvg_min_perc_cells, - hvg_mean_expr_det = hvg_mean_expr_det, - use_all_genes_as_hvg = use_all_genes_as_hvg, - min_nr_of_hvg = min_nr_of_hvg, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - resolution = resolution, - python_path = python_path, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose - ) - } else if (cluster_method == "louvain_multinet") { - result <- .doLouvainSubCluster_multinet( - gobject = gobject, - cluster_column = cluster_column, - selected_clusters = selected_clusters, - hvg_param = hvg_param, - hvg_min_perc_cells = hvg_min_perc_cells, - hvg_mean_expr_det = hvg_mean_expr_det, - use_all_genes_as_hvg = use_all_genes_as_hvg, - min_nr_of_hvg = min_nr_of_hvg, - pca_param = pca_param, - nn_param = nn_param, - k_neighbors = k_neighbors, - gamma = gamma, - omega = omega, - nn_network_to_use = nn_network_to_use, - network_name = network_name, - name = name, - return_gobject = return_gobject, - verbose = verbose - ) - } + result <- switch(version, + "community" = { + do.call(.doLouvainSubCluster_community, args = c( + common_args, + list( + resolution = resolution, + python_path = python_path + ) + )) + }, + "multinet" = { + do.call(.doLouvainSubCluster_multinet, args = c( + common_args, + list( + gamma = gamma, + omega = omega + ) + )) + } + ) return(result) } @@ -2734,6 +2635,9 @@ subClusterCells <- function(gobject, +# cluster manipulation #### + + #' @title getClusterSimilarity #' @name getClusterSimilarity #' @description Creates data.table with pairwise correlation scores between @@ -2813,7 +2717,7 @@ getClusterSimilarity <- function(gobject, cor_table[, c("group1", "group2") := list( as.character(group1), as.character(group2))] cor_table[, unified_group := paste( - sort(c(group1, group2)), collapse = "--"), + sort(c(group1, group2)), collapse = "--"), by = 1:nrow(cor_table)] cor_table <- cor_table[!duplicated(cor_table[, .(value, unified_group)])] From e7f0422495f1abfd86e515e6169391e062d17864 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 11:25:32 -0400 Subject: [PATCH 056/150] chore: document --- man/addCellStatistics.Rd | 5 +- man/addFeatStatistics.Rd | 5 +- man/addStatistics.Rd | 5 +- man/calculateHVF.Rd | 5 +- man/doLeidenSubCluster.Rd | 116 -------------- man/doLouvainSubCluster.Rd | 104 ------------- man/dot-doLouvainSubCluster_community.Rd | 91 ----------- man/dot-doLouvainSubCluster_multinet.Rd | 91 ----------- man/jackstrawPlot.Rd | 4 +- man/runGiottoHarmony.Rd | 2 +- man/runPCA.Rd | 2 +- man/runPCAprojection.Rd | 6 +- man/runPCAprojectionBatch.Rd | 16 +- man/runUMAPprojection.Rd | 2 +- man/runtSNE.Rd | 2 +- man/screePlot.Rd | 6 +- man/signPCA.Rd | 8 +- man/specificCellCellcommunicationScores.Rd | 2 +- man/subClusterCells.Rd | 168 +++++++++++++++++---- 19 files changed, 178 insertions(+), 462 deletions(-) delete mode 100644 man/doLeidenSubCluster.Rd delete mode 100644 man/doLouvainSubCluster.Rd delete mode 100644 man/dot-doLouvainSubCluster_community.Rd delete mode 100644 man/dot-doLouvainSubCluster_multinet.Rd diff --git a/man/addCellStatistics.Rd b/man/addCellStatistics.Rd index 9abfb793b..42707b57f 100644 --- a/man/addCellStatistics.Rd +++ b/man/addCellStatistics.Rd @@ -10,7 +10,8 @@ addCellStatistics( spat_unit = NULL, expression_values = c("normalized", "scaled", "custom"), detection_threshold = 0, - return_gobject = TRUE + return_gobject = TRUE, + verbose = TRUE ) } \arguments{ @@ -25,6 +26,8 @@ addCellStatistics( \item{detection_threshold}{detection threshold to consider a gene detected} \item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{verbose}{be verbose} } \value{ giotto object if return_gobject = TRUE diff --git a/man/addFeatStatistics.Rd b/man/addFeatStatistics.Rd index 84b5f4e91..c532b733b 100644 --- a/man/addFeatStatistics.Rd +++ b/man/addFeatStatistics.Rd @@ -10,7 +10,8 @@ addFeatStatistics( spat_unit = NULL, expression_values = c("normalized", "scaled", "custom"), detection_threshold = 0, - return_gobject = TRUE + return_gobject = TRUE, + verbose = TRUE ) } \arguments{ @@ -25,6 +26,8 @@ addFeatStatistics( \item{detection_threshold}{detection threshold to consider a gene detected} \item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{verbose}{be verbose} } \value{ giotto object if return_gobject = TRUE diff --git a/man/addStatistics.Rd b/man/addStatistics.Rd index 95df75bda..9fee79234 100644 --- a/man/addStatistics.Rd +++ b/man/addStatistics.Rd @@ -10,7 +10,8 @@ addStatistics( spat_unit = NULL, expression_values = c("normalized", "scaled", "custom"), detection_threshold = 0, - return_gobject = TRUE + return_gobject = TRUE, + verbose = TRUE ) } \arguments{ @@ -25,6 +26,8 @@ addStatistics( \item{detection_threshold}{detection threshold to consider a feature detected} \item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{verbose}{be verbose} } \value{ giotto object if return_gobject = TRUE, else a list with results diff --git a/man/calculateHVF.Rd b/man/calculateHVF.Rd index 9c826f008..d7d3a2de6 100644 --- a/man/calculateHVF.Rd +++ b/man/calculateHVF.Rd @@ -27,7 +27,8 @@ calculateHVF( save_plot = NULL, save_param = list(), default_save_name = "HVFplot", - return_gobject = TRUE + return_gobject = TRUE, + verbose = TRUE ) } \arguments{ @@ -84,6 +85,8 @@ Passing \code{NULL} runs HVF on all cells.} save_name in save_param} \item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{verbose}{be verbose} } \value{ giotto object highly variable features appended to feature metadata diff --git a/man/doLeidenSubCluster.Rd b/man/doLeidenSubCluster.Rd deleted file mode 100644 index 51fcb0dd1..000000000 --- a/man/doLeidenSubCluster.Rd +++ /dev/null @@ -1,116 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clustering.R -\name{doLeidenSubCluster} -\alias{doLeidenSubCluster} -\title{doLeidenSubCluster} -\usage{ -doLeidenSubCluster( - gobject, - feat_type = NULL, - name = "sub_pleiden_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = - "normalized"), - hvg_param = NULL, - hvf_min_perc_cells = 5, - hvg_min_perc_cells = NULL, - hvf_mean_expr_det = 1, - hvg_mean_expr_det = NULL, - use_all_feats_as_hvf = FALSE, - use_all_genes_as_hvg = NULL, - min_nr_of_hvf = 5, - min_nr_of_hvg = NULL, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - n_iterations = 500, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{feat_type}{feature type} - -\item{name}{name for new clustering result} - -\item{cluster_column}{cluster column to subcluster} - -\item{selected_clusters}{only do subclustering on these clusters} - -\item{hvf_param}{parameters for calculateHVf} - -\item{hvg_param}{deprecatd, use hvf_param} - -\item{hvf_min_perc_cells}{threshold for detection in min percentage of cells} - -\item{hvg_min_perc_cells}{deprecated, use hvf_min_perc_cells} - -\item{hvf_mean_expr_det}{threshold for mean expression level in cells with -detection} - -\item{hvg_mean_expr_det}{deprecated, use hvf_mean_expr_det} - -\item{use_all_feats_as_hvf}{forces all features to be HVF and to be used as -input for PCA} - -\item{use_all_genes_as_hvg}{deprecated, use use_all_feats_as_hvf} - -\item{min_nr_of_hvf}{minimum number of HVF, or all features will be used as -input for PCA} - -\item{min_nr_of_hvg}{deprecated, use min_nr_of_hvf} - -\item{pca_param}{parameters for runPCA} - -\item{nn_param}{parameters for parameters for createNearestNetwork} - -\item{k_neighbors}{number of k for createNearestNetwork} - -\item{resolution}{resolution of Leiden clustering} - -\item{n_iterations}{number of interations to run the Leiden algorithm.} - -\item{python_path}{specify specific path to python if required} - -\item{nn_network_to_use}{type of NN network to use (kNN vs sNN)} - -\item{network_name}{name of NN network to use} - -\item{return_gobject}{boolean: return giotto object (default = TRUE)} - -\item{verbose}{verbose} -} -\value{ -giotto object with new subclusters appended to cell metadata -} -\description{ -Further subcluster cells using a NN-network and the Leiden -algorithm -} -\details{ -This function performs subclustering using the Leiden algorithm on -selected clusters. -The systematic steps are: -\itemize{ - \item{1. subset Giotto object} - \item{2. identify highly variable fetures} - \item{3. run PCA} - \item{4. create nearest neighbouring network} - \item{5. do Leiden clustering} -} -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") - -doLeidenSubCluster(g, cluster_column = "leiden_clus") -} -\seealso{ -\code{\link{doLeidenCluster}} -} diff --git a/man/doLouvainSubCluster.Rd b/man/doLouvainSubCluster.Rd deleted file mode 100644 index 0b2c5daf5..000000000 --- a/man/doLouvainSubCluster.Rd +++ /dev/null @@ -1,104 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clustering.R -\name{doLouvainSubCluster} -\alias{doLouvainSubCluster} -\title{doLouvainSubCluster} -\usage{ -doLouvainSubCluster( - gobject, - name = "sub_louvain_clus", - version = c("community", "multinet"), - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = - "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - gamma = 1, - omega = 1, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{name}{name for new clustering result} - -\item{version}{version of Louvain algorithm to use} - -\item{cluster_column}{cluster column to subcluster} - -\item{selected_clusters}{only do subclustering on these clusters} - -\item{hvg_param}{parameters for calculateHVG} - -\item{hvg_min_perc_cells}{threshold for detection in min percentage of cells} - -\item{hvg_mean_expr_det}{threshold for mean expression level in cells with -detection} - -\item{use_all_genes_as_hvg}{forces all genes to be HVG and to be used as -input for PCA} - -\item{min_nr_of_hvg}{minimum number of HVG, or all genes will be used as -input for PCA} - -\item{pca_param}{parameters for runPCA} - -\item{nn_param}{parameters for parameters for createNearestNetwork} - -\item{k_neighbors}{number of k for createNearestNetwork} - -\item{resolution}{resolution for community algorithm} - -\item{gamma}{gamma} - -\item{omega}{omega} - -\item{python_path}{specify specific path to python if required} - -\item{nn_network_to_use}{type of NN network to use (kNN vs sNN)} - -\item{network_name}{name of NN network to use} - -\item{return_gobject}{boolean: return giotto object (default = TRUE)} - -\item{verbose}{verbose} -} -\value{ -giotto object with new subclusters appended to cell metadata -} -\description{ -subcluster cells using a NN-network and the Louvain algorithm -} -\details{ -This function performs subclustering using the Louvain algorithm on -selected clusters. -The systematic steps are: -\itemize{ - \item{1. subset Giotto object} - \item{2. identify highly variable genes} - \item{3. run PCA} - \item{4. create nearest neighbouring network} - \item{5. do Louvain clustering} -} -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") - -doLouvainSubCluster(g, cluster_column = "leiden_clus") -} -\seealso{ -\code{\link{.doLouvainCluster_multinet}} and -\code{\link{.doLouvainCluster_community}} -} diff --git a/man/dot-doLouvainSubCluster_community.Rd b/man/dot-doLouvainSubCluster_community.Rd deleted file mode 100644 index c120d7fa0..000000000 --- a/man/dot-doLouvainSubCluster_community.Rd +++ /dev/null @@ -1,91 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clustering.R -\name{.doLouvainSubCluster_community} -\alias{.doLouvainSubCluster_community} -\title{doLouvainSubCluster community} -\usage{ -.doLouvainSubCluster_community( - gobject, - name = "sub_louvain_comm_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = - "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{name}{name for new clustering result} - -\item{cluster_column}{cluster column to subcluster} - -\item{selected_clusters}{only do subclustering on these clusters} - -\item{hvg_param}{parameters for calculateHVG} - -\item{hvg_min_perc_cells}{threshold for detection in min percentage of cells} - -\item{hvg_mean_expr_det}{threshold for mean expression level in cells with -detection} - -\item{use_all_genes_as_hvg}{forces all genes to be HVG and to be used as -input for PCA} - -\item{min_nr_of_hvg}{minimum number of HVG, or all genes will be used as -input for PCA} - -\item{pca_param}{parameters for runPCA} - -\item{nn_param}{parameters for parameters for createNearestNetwork} - -\item{k_neighbors}{number of k for createNearestNetwork} - -\item{resolution}{resolution} - -\item{python_path}{specify specific path to python if required} - -\item{nn_network_to_use}{type of NN network to use (kNN vs sNN)} - -\item{network_name}{name of NN network to use} - -\item{return_gobject}{Boolean: return giotto object (default = TRUE)} - -\item{verbose}{verbose} -} -\value{ -giotto object with new subclusters appended to cell metadata -} -\description{ -subcluster cells using a NN-network and the Louvain community -detection algorithm -} -\details{ -This function performs subclustering using the Louvain community -algorithm on selected clusters. -The systematic steps are: -\itemize{ - \item{1. subset Giotto object} - \item{2. identify highly variable genes} - \item{3. run PCA} - \item{4. create nearest neighbouring network} - \item{5. do Louvain community clustering} -} -} -\seealso{ -\code{\link{.doLouvainCluster_community}} -} -\keyword{internal} diff --git a/man/dot-doLouvainSubCluster_multinet.Rd b/man/dot-doLouvainSubCluster_multinet.Rd deleted file mode 100644 index ae36d9afd..000000000 --- a/man/dot-doLouvainSubCluster_multinet.Rd +++ /dev/null @@ -1,91 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clustering.R -\name{.doLouvainSubCluster_multinet} -\alias{.doLouvainSubCluster_multinet} -\title{doLouvainSubCluster multinet} -\usage{ -.doLouvainSubCluster_multinet( - gobject, - name = "sub_louvain_mult_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = - "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - gamma = 1, - omega = 1, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{name}{name for new clustering result} - -\item{cluster_column}{cluster column to subcluster} - -\item{selected_clusters}{only do subclustering on these clusters} - -\item{hvg_param}{parameters for calculateHVG} - -\item{hvg_min_perc_cells}{threshold for detection in min percentage of cells} - -\item{hvg_mean_expr_det}{threshold for mean expression level in cells with -detection} - -\item{use_all_genes_as_hvg}{forces all genes to be HVG and to be used as -input for PCA} - -\item{min_nr_of_hvg}{minimum number of HVG, or all genes will be used as -input for PCA} - -\item{pca_param}{parameters for runPCA} - -\item{nn_param}{parameters for parameters for createNearestNetwork} - -\item{k_neighbors}{number of k for createNearestNetwork} - -\item{gamma}{gamma} - -\item{omega}{omega} - -\item{nn_network_to_use}{type of NN network to use (kNN vs sNN)} - -\item{network_name}{name of NN network to use} - -\item{return_gobject}{boolean: return giotto object (default = TRUE)} - -\item{verbose}{verbose} -} -\value{ -giotto object with new subclusters appended to cell metadata -} -\description{ -subcluster cells using a NN-network and the Louvain multinet -detection algorithm -} -\details{ -This function performs subclustering using the Louvain multinet -algorithm on selected clusters. -The systematic steps are: -\itemize{ - \item{1. subset Giotto object} - \item{2. identify highly variable genes} - \item{3. run PCA} - \item{4. create nearest neighbouring network} - \item{5. do Louvain multinet clustering} -} -} -\seealso{ -\code{\link{.doLouvainCluster_multinet}} -} -\keyword{internal} diff --git a/man/jackstrawPlot.Rd b/man/jackstrawPlot.Rd index 6dd082e9c..6a757e5c7 100644 --- a/man/jackstrawPlot.Rd +++ b/man/jackstrawPlot.Rd @@ -69,8 +69,8 @@ ggplot object for jackstraw method identify significant prinicipal components (PCs) } \details{ -The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} - function. By systematically permuting genes it identifies robust, and thus +The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} + function. By systematically permuting genes it identifies robust, and thus significant, PCs. } \examples{ diff --git a/man/runGiottoHarmony.Rd b/man/runGiottoHarmony.Rd index 274b3bb23..35348339b 100644 --- a/man/runGiottoHarmony.Rd +++ b/man/runGiottoHarmony.Rd @@ -70,7 +70,7 @@ giotto object with updated Harmony dimension reduction run UMAP } \details{ -This is a simple wrapper for the HarmonyMatrix function in the +This is a simple wrapper for the HarmonyMatrix function in the Harmony package \doi{10.1038/s41592-019-0619-0}. } \examples{ diff --git a/man/runPCA.Rd b/man/runPCA.Rd index 58073b9d0..2e3ca25e1 100644 --- a/man/runPCA.Rd +++ b/man/runPCA.Rd @@ -78,7 +78,7 @@ dimension reduction and clusterings are based on your features of interest. \item feats_to_use = NULL: will use all features from the selected matrix \item feats_to_use = : can be used to select a column name of highly variable features, created by (see \code{\link{calculateHVF}}) - \item feats_to_use = c('geneA', 'geneB', ...): will use all manually + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features } By default the number of principle components that we calculate is 100, which diff --git a/man/runPCAprojection.Rd b/man/runPCAprojection.Rd index 31d0251f6..1492e60ad 100644 --- a/man/runPCAprojection.Rd +++ b/man/runPCAprojection.Rd @@ -69,11 +69,11 @@ runPCAprojection( giotto object with updated PCA dimension recuction } \description{ -runs a Principal Component Analysis on a random +runs a Principal Component Analysis on a random subset + projection } \details{ -See \code{\link[BiocSingular]{runPCA}} and +See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. This PCA implementation is similar to \code{\link{runPCA}}, except that it performs PCA on a subset of the cells or features, and predict on the others. @@ -82,7 +82,7 @@ This can significantly increase speed without sacrificing accuracy too much. \item feats_to_use = NULL: will use all features from the selected matrix \item feats_to_use = : can be used to select a column name of highly variable features, created by (see \code{\link{calculateHVF}}) - \item feats_to_use = c('geneA', 'geneB', ...): will use all manually + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features } } diff --git a/man/runPCAprojectionBatch.Rd b/man/runPCAprojectionBatch.Rd index 926001375..518ff46c0 100644 --- a/man/runPCAprojectionBatch.Rd +++ b/man/runPCAprojectionBatch.Rd @@ -72,28 +72,30 @@ runPCAprojectionBatch( giotto object with updated PCA dimension reduction } \description{ -runs a Principal Component Analysis on multiple random +runs a Principal Component Analysis on multiple random batches + projection } \details{ -See \code{\link[BiocSingular]{runPCA}} and +See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. -This PCA implementation is similar to \code{\link{runPCA}} and +This PCA implementation is similar to \code{\link{runPCA}} and \code{\link{runPCAprojection}}, -except that it performs PCA on multiple subsets (batches) of the cells or +except that it performs PCA on multiple subsets (batches) of the cells or features, -and predict on the others. This can significantly increase speed without +and predict on the others. This can significantly increase speed without sacrificing accuracy too much. \itemize{ \item feats_to_use = NULL: will use all features from the selected matrix \item feats_to_use = : can be used to select a column name of highly variable features, created by (see \code{\link{calculateHVF}}) - \item feats_to_use = c('geneA', 'geneB', ...): will use all manually + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features } } \examples{ g <- GiottoData::loadGiottoMini("visium") -runPCAprojectionBatch(g) +# set feats_to_use to NULL since there are not many hvfs +# (only 48 in this mini dataset) +runPCAprojectionBatch(g, feats_to_use = NULL) } diff --git a/man/runUMAPprojection.Rd b/man/runUMAPprojection.Rd index ad99c7cda..4bc1e2abd 100644 --- a/man/runUMAPprojection.Rd +++ b/man/runUMAPprojection.Rd @@ -84,7 +84,7 @@ giotto object with updated UMAP dimension reduction run UMAP on subset and project on the rest } \details{ -See \code{\link[uwot]{umap}} for more information about these and +See \code{\link[uwot]{umap}} for more information about these and other parameters. \itemize{ \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca') diff --git a/man/runtSNE.Rd b/man/runtSNE.Rd index ac280eba2..ff8cddfee 100644 --- a/man/runtSNE.Rd +++ b/man/runtSNE.Rd @@ -72,7 +72,7 @@ giotto object with updated tSNE dimension recuction run tSNE } \details{ -See \code{\link[Rtsne]{Rtsne}} for more information about these and +See \code{\link[Rtsne]{Rtsne}} for more information about these and other parameters. \cr \itemize{ \item Input for tSNE dimension reduction can be another dimension reduction (default = 'pca') diff --git a/man/screePlot.Rd b/man/screePlot.Rd index d4f0a542f..2f65fb4f9 100644 --- a/man/screePlot.Rd +++ b/man/screePlot.Rd @@ -72,14 +72,14 @@ screePlot( ggplot object for scree method } \description{ -identify significant principal components (PCs) using an +identify significant principal components (PCs) using an screeplot (a.k.a. elbowplot) } \details{ Screeplot works by plotting the explained variance of each - individual PC in a barplot allowing you to identify which PC provides a + individual PC in a barplot allowing you to identify which PC provides a significant contribution (a.k.a 'elbow method'). \cr - Screeplot will use an available pca object, based on the parameter 'name', + Screeplot will use an available pca object, based on the parameter 'name', or it will create it if it's not available (see \code{\link{runPCA}}) } \examples{ diff --git a/man/signPCA.Rd b/man/signPCA.Rd index 6245bde35..0df0b6379 100644 --- a/man/signPCA.Rd +++ b/man/signPCA.Rd @@ -84,14 +84,14 @@ ggplot object for scree method and maxtrix of p-values for jackstraw identify significant prinicipal components (PCs) } \details{ -Two different methods can be used to assess the number of relevant +Two different methods can be used to assess the number of relevant or significant prinicipal components (PC's). \cr 1. Screeplot works by plotting the explained variance of each - individual PC in a barplot allowing you to identify which PC provides a + individual PC in a barplot allowing you to identify which PC provides a significant contribution (a.k.a. 'elbow method'). \cr - 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} - function. By systematically permuting genes it identifies robust, and thus + 2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} + function. By systematically permuting genes it identifies robust, and thus significant, PCs. \cr } diff --git a/man/specificCellCellcommunicationScores.Rd b/man/specificCellCellcommunicationScores.Rd index ac12f78b9..917da1a3f 100644 --- a/man/specificCellCellcommunicationScores.Rd +++ b/man/specificCellCellcommunicationScores.Rd @@ -73,7 +73,7 @@ considered} \item{verbose}{verbose} } \value{ -Cell-Cell communication scores for feature pairs based on spatial +Cell-Cell communication scores for feature pairs based on spatial interaction } \description{ diff --git a/man/subClusterCells.Rd b/man/subClusterCells.Rd index 4eca454f8..7d81a1613 100644 --- a/man/subClusterCells.Rd +++ b/man/subClusterCells.Rd @@ -2,7 +2,9 @@ % Please edit documentation in R/clustering.R \name{subClusterCells} \alias{subClusterCells} -\title{subClusterCells} +\alias{doLeidenSubCluster} +\alias{doLouvainSubCluster} +\title{Cell subclustering} \usage{ subClusterCells( gobject, @@ -10,12 +12,17 @@ subClusterCells( cluster_method = c("leiden", "louvain_community", "louvain_multinet"), cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + hvg_param = deprecated(), + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), - hvg_min_perc_cells = 5, - hvg_mean_expr_det = 1, - use_all_genes_as_hvg = FALSE, - min_nr_of_hvg = 5, + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, pca_param = list(expression_values = "normalized", scale_unit = TRUE), nn_param = list(dimensions_to_use = 1:20), k_neighbors = 10, @@ -29,40 +36,111 @@ subClusterCells( return_gobject = TRUE, verbose = TRUE ) + +doLeidenSubCluster( + gobject, + feat_type = NULL, + name = "sub_leiden_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + "normalized"), + hvg_param = deprecated(), + hvf_min_perc_cells = 5, + hvg_min_perc_cells = deprecated(), + hvf_mean_expr_det = 1, + hvg_mean_expr_det = deprecated(), + use_all_feats_as_hvf = FALSE, + use_all_genes_as_hvg = deprecated(), + min_nr_of_hvf = 5, + min_nr_of_hvg = deprecated(), + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + n_iterations = 500, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + toplevel = 2, + verbose = TRUE +) + +doLouvainSubCluster( + gobject, + name = "sub_louvain_clus", + version = c("community", "multinet"), + cluster_column = NULL, + selected_clusters = NULL, + hvg_param = deprecated(), + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + "normalized"), + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + gamma = 1, + omega = 1, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE +) } \arguments{ -\item{gobject}{giotto object} +\item{gobject}{\code{giotto} object} \item{name}{name for new clustering result} -\item{cluster_method}{clustering method to use} +\item{cluster_method}{clustering method to use. Currently one of "leiden" +(default), "louvain_community", "louvain_multinet"} \item{cluster_column}{cluster column to subcluster} \item{selected_clusters}{only do subclustering on these clusters} -\item{hvg_param}{parameters for calculateHVG} +\item{hvg_param}{deprecated} + +\item{hvf_param}{list of parameters for \code{\link[=calculateHVF]{calculateHVF()}}} + +\item{hvg_min_perc_cells}{deprecated} -\item{hvg_min_perc_cells}{threshold for detection in min percentage of cells} +\item{hvf_min_perc_cells}{threshold for detection in min percentage of cells} -\item{hvg_mean_expr_det}{threshold for mean expression level in cells with +\item{hvg_mean_expr_det}{deprecated} + +\item{hvf_mean_expr_det}{threshold for mean expression level in cells with detection} -\item{use_all_genes_as_hvg}{forces all genes to be HVG and to be used as +\item{use_all_genes_as_hvg}{deprecated} + +\item{use_all_feats_as_hvf}{forces all features to be HVF and to be used as input for PCA} -\item{min_nr_of_hvg}{minimum number of HVG, or all genes will be used as +\item{min_nr_of_hvg}{deprecated} + +\item{min_nr_of_hvf}{minimum number of HVF, or all features will be used as input for PCA} -\item{pca_param}{parameters for runPCA} +\item{pca_param}{list of parameters for \code{\link[=runPCA]{runPCA()}}} -\item{nn_param}{parameters for parameters for createNearestNetwork} +\item{nn_param}{list of parameters for \code{\link[=createNearestNetwork]{createNearestNetwork()}}} -\item{k_neighbors}{number of k for createNearestNetwork} +\item{k_neighbors}{number of k for \code{\link[=createNearestNetwork]{createNearestNetwork()}}} -\item{resolution}{resolution} +\item{resolution}{resolution for community algorithm} -\item{n_iterations}{number of interations to run the Leiden algorithm.} +\item{n_iterations}{number of iterations to run the Leiden algorithm.} \item{gamma}{gamma} @@ -74,34 +152,60 @@ input for PCA} \item{network_name}{name of NN network to use} -\item{return_gobject}{boolean: return giotto object (default = TRUE)} +\item{return_gobject}{logical. return \code{giotto} object (default = TRUE)} \item{verbose}{verbose} + +\item{toplevel}{do not use} + +\item{version}{version of Louvain algorithm to use. One of "community" or +"multinet", with the default being "community"} } \value{ -giotto object with new subclusters appended to cell metadata +\code{giotto} object with new subclusters appended to cell metadata } \description{ -subcluster cells +Perform cell subclustering by taking an annotated group of +cells and performing another round of clustering on just that subset. +Several methods are implemented. \code{subClusterCells()} is the main wrapper +function. \code{doLeidenSubCluster()} and \code{doLouvainSubCluster()} are more +specific implementations. } \details{ This function performs subclustering on selected clusters. The systematic steps are: -\itemize{ - \item{1. subset Giotto object} - \item{2. identify highly variable genes} - \item{3. run PCA} - \item{4. create nearest neighbouring network} - \item{5. do clustering} +\enumerate{ +\item subset Giotto object +\item identify highly variable genes +\item run PCA +\item create nearest neighbouring network +\item do clustering } } +\section{Functions}{ +\itemize{ +\item \code{doLeidenSubCluster()}: Further subcluster cells using a NN-network and +the Leiden algorithm + +\item \code{doLouvainSubCluster()}: subcluster cells using a NN-network and the +Louvain algorithm + +}} \examples{ g <- GiottoData::loadGiottoMini("visium") +# Run some subclusterings based on "leiden_clus" annotations that already +# exist in the visium mini object + +# default method is leiden subclustering subClusterCells(g, cluster_column = "leiden_clus") -} -\seealso{ -\code{\link{.doLouvainCluster_multinet}}, -\code{\link{.doLouvainCluster_community}} -and @seealso \code{\link{doLeidenCluster}} + +# use louvain instead +subClusterCells(g, cluster_column = "leiden_clus", + cluster_method = "louvain_community") + +# directly call the more specific functions +doLeidenSubCluster(g, cluster_column = "leiden_clus") + +doLouvainSubCluster(g, cluster_column = "leiden_clus") } From 312c8d52481acc9530ce45d39add0b915536c1b5 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 12:19:01 -0400 Subject: [PATCH 057/150] fix: `doRandomWalkCluster()` - retrieved network must be igraph - remove overlapping meta name checking (already performed in `addCellMetadata()`) - update example --- R/clustering.R | 15 ++++----------- man/doRandomWalkCluster.Rd | 3 ++- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/R/clustering.R b/R/clustering.R index 33cdc9d16..d57c227ef 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -957,7 +957,8 @@ doLouvainCluster <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' doRandomWalkCluster(g) +#' g <- doRandomWalkCluster(g) +#' pDataDT(g) #' @export doRandomWalkCluster <- function(gobject, name = "random_walk_clus", @@ -976,7 +977,8 @@ doRandomWalkCluster <- function(gobject, igraph_object <- getNearestNetwork( gobject, nn_type = nn_network_to_use, - name = network_name + name = network_name, + output = "igraph" ) @@ -1000,17 +1002,8 @@ doRandomWalkCluster <- function(gobject, set.seed(Sys.time()) } - ## return if (return_gobject == TRUE) { - cluster_names <- names(gobject@cell_metadata) - if (name %in% cluster_names) { - cat(name, " has already been used, will be overwritten") - cell_metadata <- gobject@cell_metadata - cell_metadata[, eval(name) := NULL] - gobject@cell_metadata <- cell_metadata - } - gobject <- addCellMetadata( gobject = gobject, new_metadata = ident_clusters_DT[, c("cell_ID", name), diff --git a/man/doRandomWalkCluster.Rd b/man/doRandomWalkCluster.Rd index bf0d36cbb..626be7fd4 100644 --- a/man/doRandomWalkCluster.Rd +++ b/man/doRandomWalkCluster.Rd @@ -52,5 +52,6 @@ package in R for more information. \examples{ g <- GiottoData::loadGiottoMini("visium") -doRandomWalkCluster(g) +g <- doRandomWalkCluster(g) +pDataDT(g) } From 376d10df20f3bc5eff43cc45eb56c1c77d8bcb04 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 12:32:12 -0400 Subject: [PATCH 058/150] Update python_scrublet.R - example should be updated to a single cell dataset in the future --- R/python_scrublet.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/R/python_scrublet.R b/R/python_scrublet.R index 200305ec1..2ed213c8d 100644 --- a/R/python_scrublet.R +++ b/R/python_scrublet.R @@ -1,6 +1,7 @@ #' @title doScrubletDetect #' @name doScrubletDetect -#' @description run *scrublet* doublet detection for raw expression. +#' @description Run *scrublet* doublet detection for raw expression. Intended +#' for single cell data #' @param gobject giotto object containing expression data #' @param feat_type feature type #' @param spat_unit spatial unit @@ -21,13 +22,15 @@ #' @seealso This function wraps the python package scrublet #' \doi{10.1016/j.cels.2018.11.005} #' @returns if `return_gobject = FALSE`, a `data.table` cell_ID, doublet scores, -#' and classifications are returned. If `TRUE`, that information is appended -#' into the input `giotto` object's metadata and the `giotto` object is +#' and classifications are returned. If `TRUE`, that information is appended +#' into the input `giotto` object's metadata and the `giotto` object is #' returned. #' @md #' @examples +#' # Should only be done with single cell data, but this is just a +#' # convenient dataset #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' doScrubletDetect(g) #' @export doScrubletDetect <- function(gobject, @@ -48,10 +51,10 @@ doScrubletDetect <- function(gobject, ) # print message with information # - message("using 'scrublet' to detect doublets. If used in published + message("using 'scrublet' to detect doublets. If used in published research, please cite: \n Wolock, S. L., Lopez, R. & Klein, A. M. - Scrublet: Computational Identification of Cell Doublets in Single-Cell + Scrublet: Computational Identification of Cell Doublets in Single-Cell Transcriptomic Data. Cell Syst. 8, 281-291.e9 (2019). https://doi.org/10.1016/j.cels.2018.11.005") From 8ed5588e2a45befd154f868445cbf0190208e193 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 24 May 2024 12:35:36 -0400 Subject: [PATCH 059/150] chore: docs --- R/python_scrublet.R | 7 +++++-- man/doScrubletDetect.Rd | 10 ++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/R/python_scrublet.R b/R/python_scrublet.R index 2ed213c8d..03d608fb5 100644 --- a/R/python_scrublet.R +++ b/R/python_scrublet.R @@ -28,10 +28,13 @@ #' @md #' @examples #' # Should only be done with single cell data, but this is just a -#' # convenient dataset +#' # convenient example. #' g <- GiottoData::loadGiottoMini("visium") #' -#' doScrubletDetect(g) +#' g <- doScrubletDetect(g) +#' +#' pDataDT(g) # doublet_scores and doublet cols are added +#' dimPlot2D(g, cell_color = "doublet_scores", color_as_factor = FALSE) #' @export doScrubletDetect <- function(gobject, feat_type = NULL, diff --git a/man/doScrubletDetect.Rd b/man/doScrubletDetect.Rd index b6937c579..fb68af241 100644 --- a/man/doScrubletDetect.Rd +++ b/man/doScrubletDetect.Rd @@ -54,12 +54,18 @@ into the input \code{giotto} object's metadata and the \code{giotto} object is returned. } \description{ -run \emph{scrublet} doublet detection for raw expression. +Run \emph{scrublet} doublet detection for raw expression. Intended +for single cell data } \examples{ +# Should only be done with single cell data, but this is just a +# convenient example. g <- GiottoData::loadGiottoMini("visium") -doScrubletDetect(g) +g <- doScrubletDetect(g) + +pDataDT(g) # doublet_scores and doublet cols are added +dimPlot2D(g, cell_color = "doublet_scores", color_as_factor = FALSE) } \seealso{ This function wraps the python package scrublet From 415ab9e7e3483dcbaf473bded71f712818de9a6e Mon Sep 17 00:00:00 2001 From: josschavezf Date: Fri, 24 May 2024 15:16:55 -0400 Subject: [PATCH 060/150] fix accessor output --- R/clustering.R | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/R/clustering.R b/R/clustering.R index ab0e0a138..52aff9252 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -976,7 +976,8 @@ doRandomWalkCluster <- function(gobject, igraph_object <- getNearestNetwork( gobject, nn_type = nn_network_to_use, - name = network_name + name = network_name, + output = "igraph" ) @@ -2065,7 +2066,7 @@ doLeidenSubCluster <- function(gobject, #' @param name name for new clustering result #' @param cluster_column cluster column to subcluster #' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG +#' @param hvf_param parameters for calculateHVF #' @param hvg_min_perc_cells threshold for detection in min percentage of cells #' @param hvg_mean_expr_det threshold for mean expression level in cells with #' detection @@ -2099,7 +2100,7 @@ doLeidenSubCluster <- function(gobject, name = "sub_louvain_comm_clus", cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), hvg_min_perc_cells = 5, hvg_mean_expr_det = 1, @@ -2151,7 +2152,7 @@ doLeidenSubCluster <- function(gobject, ## calculate variable genes temp_giotto <- do.call( - "calculateHVG", c(gobject = temp_giotto, hvg_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param)) ## get hvg gene_metadata <- fDataDT(temp_giotto) @@ -2264,7 +2265,7 @@ doLeidenSubCluster <- function(gobject, #' @param name name for new clustering result #' @param cluster_column cluster column to subcluster #' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG +#' @param hvf_param parameters for calculateHVF #' @param hvg_min_perc_cells threshold for detection in min percentage of cells #' @param hvg_mean_expr_det threshold for mean expression level in cells with #' detection @@ -2298,7 +2299,7 @@ doLeidenSubCluster <- function(gobject, name = "sub_louvain_mult_clus", cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), hvg_min_perc_cells = 5, hvg_mean_expr_det = 1, @@ -2364,7 +2365,7 @@ doLeidenSubCluster <- function(gobject, ## calculate variable genes temp_giotto <- do.call( - "calculateHVG", c(gobject = temp_giotto, hvg_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param)) ## get hvg gene_metadata <- fDataDT(temp_giotto) @@ -2468,7 +2469,7 @@ doLeidenSubCluster <- function(gobject, #' @param version version of Louvain algorithm to use #' @param cluster_column cluster column to subcluster #' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG +#' @param hvf_param parameters for calculateHVF #' @param hvg_min_perc_cells threshold for detection in min percentage of cells #' @param hvg_mean_expr_det threshold for mean expression level in cells with #' detection @@ -2510,7 +2511,7 @@ doLouvainSubCluster <- function(gobject, version = c("community", "multinet"), cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), hvg_min_perc_cells = 5, hvg_mean_expr_det = 1, @@ -2537,7 +2538,7 @@ doLouvainSubCluster <- function(gobject, gobject = gobject, cluster_column = cluster_column, selected_clusters = selected_clusters, - hvg_param = hvg_param, + hvf_param = hvf_param, hvg_mean_expr_det = hvg_mean_expr_det, pca_param = pca_param, nn_param = nn_param, @@ -2555,7 +2556,7 @@ doLouvainSubCluster <- function(gobject, gobject = gobject, cluster_column = cluster_column, selected_clusters = selected_clusters, - hvg_param = hvg_param, + hvf_param = hvf_param, hvg_mean_expr_det = hvg_mean_expr_det, pca_param = pca_param, nn_param = nn_param, @@ -2585,7 +2586,7 @@ doLouvainSubCluster <- function(gobject, #' @param cluster_method clustering method to use #' @param cluster_column cluster column to subcluster #' @param selected_clusters only do subclustering on these clusters -#' @param hvg_param parameters for calculateHVG +#' @param hvf_param parameters for calculateHVF #' @param hvg_min_perc_cells threshold for detection in min percentage of cells #' @param hvg_mean_expr_det threshold for mean expression level in cells with #' detection @@ -2632,7 +2633,7 @@ subClusterCells <- function(gobject, ), cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), hvg_min_perc_cells = 5, hvg_mean_expr_det = 1, @@ -2663,7 +2664,7 @@ subClusterCells <- function(gobject, gobject = gobject, cluster_column = cluster_column, selected_clusters = selected_clusters, - hvg_param = hvg_param, + hvf_param = hvf_param, hvg_min_perc_cells = hvg_min_perc_cells, hvg_mean_expr_det = hvg_mean_expr_det, use_all_genes_as_hvg = use_all_genes_as_hvg, @@ -2685,7 +2686,7 @@ subClusterCells <- function(gobject, gobject = gobject, cluster_column = cluster_column, selected_clusters = selected_clusters, - hvg_param = hvg_param, + hvf_param = hvf_param, hvg_min_perc_cells = hvg_min_perc_cells, hvg_mean_expr_det = hvg_mean_expr_det, use_all_genes_as_hvg = use_all_genes_as_hvg, @@ -2706,7 +2707,7 @@ subClusterCells <- function(gobject, gobject = gobject, cluster_column = cluster_column, selected_clusters = selected_clusters, - hvg_param = hvg_param, + hvf_param = hvf_param, hvg_min_perc_cells = hvg_min_perc_cells, hvg_mean_expr_det = hvg_mean_expr_det, use_all_genes_as_hvg = use_all_genes_as_hvg, From 726885dec10549c96118e46773e943e09f4e989d Mon Sep 17 00:00:00 2001 From: josschavezf Date: Fri, 24 May 2024 15:17:17 -0400 Subject: [PATCH 061/150] fix items list --- R/spatial_genes.R | 14 ++-- R/spatial_interaction.R | 103 ++++++++++++++------------ R/spatial_interaction_spot.R | 135 +++++++++++++++++++++-------------- 3 files changed, 146 insertions(+), 106 deletions(-) diff --git a/R/spatial_genes.R b/R/spatial_genes.R index bcfadc3c7..754d46abd 100644 --- a/R/spatial_genes.R +++ b/R/spatial_genes.R @@ -2484,9 +2484,10 @@ spark <- function(gobject, #' @details #' Steps to identify spatial patterns: #' \itemize{ -#' \item{1. average gene expression for cells within a grid, see createSpatialGrid} -#' \item{2. perform PCA on the average grid expression profiles} -#' \item{3. convert variance of principlal components (PCs) to z-scores and select PCs based on a z-score threshold} +#' * 1. average gene expression for cells within a grid, see createSpatialGrid +#' * 2. perform PCA on the average grid expression profiles +#' * 3. convert variance of principal components (PCs) to z-scores and +#' select PCs based on a z-score threshold #' } #' @export detectSpatialPatterns <- function(gobject, @@ -4068,9 +4069,10 @@ rankSpatialCorGroups <- function(gobject, #' @details There are 3 different ways of selecting features from the spatial #' co-expression modules #' \itemize{ -#' \item{1. weighted: }{Features are ranked based on summarized pairwise co-expression scores} -#' \item{2. random: }{A random selection of features, set seed for reproducibility} -#' \item{3. informed: }{Features are selected based on prior information/ranking} +#' * 1. weighted: Features are ranked based on summarized pairwise +#' co-expression scores +#' * 2. random: A random selection of features, set seed for reproducibility +#' * 3. informed: Features are selected based on prior information/ranking #' } #' @export getBalancedSpatCoexpressionFeats <- function(spatCorObject, diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index b6075e35f..25744e8e0 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -1080,20 +1080,22 @@ NULL #' other cell types. The results data.table in the icfObject contains #' - at least - the following columns: #' \itemize{ -#' \item{features:}{ All or selected list of tested features} -#' \item{sel:}{ average feature expression in the interacting cells from the target cell type } -#' \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } -#' \item{log2fc:}{ log2 fold-change between sel and other} -#' \item{diff:}{ spatial expression difference between sel and other} -#' \item{p.value:}{ associated p-value} -#' \item{p.adj:}{ adjusted p-value} -#' \item{cell_type:}{ target cell type} -#' \item{int_cell_type:}{ interacting cell type} -#' \item{nr_select:}{ number of cells for selected target cell type} -#' \item{int_nr_select:}{ number of cells for interacting cell type} -#' \item{nr_other:}{ number of other cells of selected target cell type} -#' \item{int_nr_other:}{ number of other cells for interacting cell type} -#' \item{unif_int:}{ cell-cell interaction} +#' * features: All or selected list of tested features +#' * sel: average feature expression in the interacting cells from the target +#' cell type +#' * other: average feature expression in the NOT-interacting cells from the +#' target cell type +#' * log2fc: log2 fold-change between sel and other +#' * diff: spatial expression difference between sel and other +#' * p.value: associated p-value +#' * p.adj: adjusted p-value +#' * cell_type: target cell type +#' * int_cell_type: interacting cell type +#' * nr_select: number of cells for selected target cell type +#' * int_nr_select: number of cells for interacting cell type +#' * nr_other: number of other cells of selected target cell type +#' * int_nr_other: number of other cells for interacting cell type +#' * unif_int: cell-cell interaction #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") @@ -1335,20 +1337,22 @@ findCellProximityGenes <- function(...) { #' other cell types. The results data.table in the `icfObject` contains #' - at least - the following columns: #' \itemize{ -#' \item{features:}{ All or selected list of tested features} -#' \item{sel:}{ average feature expression in the interacting cells from the target cell type } -#' \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } -#' \item{log2fc:}{ log2 fold-change between sel and other} -#' \item{diff:}{ spatial expression difference between sel and other} -#' \item{p.value:}{ associated p-value} -#' \item{p.adj:}{ adjusted p-value} -#' \item{cell_type:}{ target cell type} -#' \item{int_cell_type:}{ interacting cell type} -#' \item{nr_select:}{ number of cells for selected target cell type} -#' \item{int_nr_select:}{ number of cells for interacting cell type} -#' \item{nr_other:}{ number of other cells of selected target cell type} -#' \item{int_nr_other:}{ number of other cells for interacting cell type} -#' \item{unif_int:}{ cell-cell interaction} +#' * features: All or selected list of tested features +#' * sel: average feature expression in the interacting cells from the target +#' cell type +#' * other: average feature expression in the NOT-interacting cells from the +#' target cell type +#' * log2fc: log2 fold-change between sel and other +#' * diff: spatial expression difference between sel and other +#' * p.value: associated p-value +#' * p.adj: adjusted p-value +#' * cell_type: target cell type +#' * int_cell_type: interacting cell type +#' * nr_select: number of cells for selected target cell type +#' * int_nr_select: number of cells for interacting cell type +#' * nr_other: number of other cells of selected target cell type +#' * int_nr_other: number of other cells for interacting cell type +#' * unif_int: cell-cell interaction #' } #' @seealso \code{\link{findInteractionChangedFeats}} #' @examples @@ -2628,25 +2632,30 @@ exprCellCellcom <- function(gobject, #' distribution of feature expression values in cells that are spatially in #' proximity to each other. #' \itemize{ -#' \item{LR_comb:}{Pair of ligand and receptor} -#' \item{lig_cell_type:}{ cell type to assess expression level of ligand } -#' \item{lig_expr:}{ average expression of ligand in lig_cell_type } -#' \item{ligand:}{ ligand name } -#' \item{rec_cell_type:}{ cell type to assess expression level of receptor } -#' \item{rec_expr:}{ average expression of receptor in rec_cell_type} -#' \item{receptor:}{ receptor name } -#' \item{LR_expr:}{ combined average ligand and receptor expression } -#' \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } -#' \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } -#' \item{rand_expr:}{ average combined ligand and receptor expression from random spatial permutations } -#' \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{z_score:}{ (optinal) z-score } -#' \item{log2fc:}{ log2 fold-change (LR_expr/rand_expr) } -#' \item{pvalue:}{ p-value } -#' \item{LR_cell_comb:}{ cell type pair combination } -#' \item{p.adj:}{ adjusted p-value } -#' \item{PI:}{ significanc score: log2fc * -log10(p.adj) } +#' * LR_comb: Pair of ligand and receptor +#' * lig_cell_type: cell type to assess expression level of ligand +#' * lig_expr: average expression of ligand in lig_cell_type +#' * ligand: ligand name +#' * rec_cell_type: cell type to assess expression level of receptor +#' * rec_expr: average expression of receptor in rec_cell_type +#' * receptor: receptor name +#' * LR_expr: combined average ligand and receptor expression +#' * lig_nr: total number of cells from lig_cell_type that spatially interact +#' with cells from rec_cell_type +#' * rec_nr: total number of cells from rec_cell_type that spatially interact +#' with cells from lig_cell_type +#' * rand_expr: average combined ligand and receptor expression from random +#' spatial permutations +#' * av_diff: average difference between LR_expr and rand_expr over all +#' random spatial permutations +#' * sd_diff: (optional) standard deviation of the difference between LR_expr +#' and rand_expr over all random spatial permutations +#' * z_score: (optional) z-score +#' * log2fc: log2 fold-change (LR_expr/rand_expr) +#' * pvalue: p-value +#' * LR_cell_comb: cell type pair combination +#' * p.adj: adjusted p-value +#' * PI: significanec score: log2fc \* -log10(p.adj) #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") diff --git a/R/spatial_interaction_spot.R b/R/spatial_interaction_spot.R index 16cdc59aa..ebd7652a9 100644 --- a/R/spatial_interaction_spot.R +++ b/R/spatial_interaction_spot.R @@ -1100,25 +1100,40 @@ NULL #' The results data.table in the icfObject contains - at least - #' the following columns: #' \itemize{ -#' \item{features:}{ All or selected list of tested features} -#' \item{sel:}{ average feature expression residual in the interacting cells from the target cell type } -#' \item{other:}{ average feature expression residual in the NOT-interacting cells from the target cell type } -#' \item{pcc_sel:}{ correlation between cell proximity score and expression residual in the interacting cells from the target cell type} -#' \item{pcc_other:}{ correlation between cell proximity score and expression residual in the NOT-interacting cells from the target cell type } -#' \item{pcc_diff:}{ correlation difference between sel and other} -#' \item{p.value:}{ associated p-value} -#' \item{p.adj:}{ adjusted p-value} -#' \item{cell_type:}{ target cell type} -#' \item{int_cell_type:}{ interacting cell type} -#' \item{nr_select:}{ number of cells for selected target cell type} -#' \item{int_nr_select:}{ number of cells for interacting cell type} -#' \item{unif_int:}{ cell-cell interaction} +#' * features: All or selected list of tested features +#' * sel: average feature expression residual in the interacting cells from +#' the target cell type +#' * other: average feature expression residual in the NOT-interacting cells +#' from the target cell type +#' * pcc_sel: correlation between cell proximity score and expression residual +#' in the interacting cells from the target cell type +#' * pcc_other: correlation between cell proximity score and expression +#' residual in the NOT-interacting cells from the target cell type +#' * pcc_diff: correlation difference between sel and other +#' * p.value: associated p-value +#' * p.adj: adjusted p-value +#' * cell_type: target cell type +#' * int_cell_type: interacting cell type +#' * nr_select: number of cells for selected target cell type +#' * int_nr_select: number of cells for interacting cell type +#' * unif_int: cell-cell interaction #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' g_expression <- getExpression(g, output = "matrix") +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20) +#' sign_gene <- x$feats #' -#' findICFSpot(g, spat_unit = "cell", feat_type = "rna", ave_celltype_exp = g_expression, spatial_network_name = "spatial_network") +#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), +#' nrow = length(sign_gene)) +#' rownames(sign_matrix) <- sign_gene +#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +#' +#' g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) +#' g_expression <- getExpression(g, output = "matrix") +#' +#' findICFSpot(g, spat_unit = "cell", feat_type = "rna", +#' ave_celltype_exp = g_expression, spatial_network_name = "spatial_network") #' @export findICFSpot <- function(gobject, spat_unit = NULL, @@ -1823,25 +1838,32 @@ plotCellProximityFeatSpot <- function(gobject, #' expected based on a reshuffled null distribution of feature expression #' values in cells that are spatially in proximity to each other. #' \itemize{ -#' \item{LR_comb:}{Pair of ligand and receptor} -#' \item{lig_cell_type:}{ cell type to assess expression level of ligand } -#' \item{lig_expr:}{ average expressionresidual(observed - DWLS_predicted) of ligand in lig_cell_type } -#' \item{ligand:}{ ligand name } -#' \item{rec_cell_type:}{ cell type to assess expression level of receptor } -#' \item{rec_expr:}{ average expression residual(observed - DWLS_predicted) of receptor in rec_cell_type} -#' \item{receptor:}{ receptor name } -#' \item{LR_expr:}{ combined average ligand and receptor expression } -#' \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } -#' \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } -#' \item{rand_expr:}{ average combined ligand and receptor expression residual from random spatial permutations } -#' \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{z_score:}{ (optinal) z-score } -#' \item{log2fc:}{ LR_expr - rand_expr } -#' \item{pvalue:}{ p-value } -#' \item{LR_cell_comb:}{ cell type pair combination } -#' \item{p.adj:}{ adjusted p-value } -#' \item{PI:}{ significanc score: log2fc * -log10(p.adj) } +#' * LR_comb: Pair of ligand and receptor +#' * lig_cell_type: cell type to assess expression level of ligand +#' * lig_expr: average expression residual (observed - DWLS_predicted) of +#' ligand in lig_cell_type +#' * ligand: ligand name +#' * rec_cell_type: cell type to assess expression level of receptor +#' * rec_expr: average expression residual(observed - DWLS_predicted) of +#' receptor in rec_cell_type +#' * receptor: receptor name +#' * LR_expr: combined average ligand and receptor expression +#' * lig_nr: total number of cells from lig_cell_type that spatially interact +#' with cells from rec_cell_type +#' * rec_nr: total number of cells from rec_cell_type that spatially interact +#' with cells from lig_cell_type +#' * rand_expr: average combined ligand and receptor expression residual from +#' random spatial permutations +#' * av_diff: average difference between LR_expr and rand_expr over all random +#' spatial permutations +#' * sd_diff: (optional) standard deviation of the difference between LR_expr +#' and rand_expr over all random spatial permutations +#' * z_score: (optinal) z-score +#' * log2fc: LR_expr - rand_expr +#' * pvalue: p-value +#' * LR_cell_comb: cell type pair combination +#' * p.adj: adjusted p-value +#' * PI: significance score: log2fc \* -log10(p.adj) #' } #' @keywords internal .specific_CCCScores_spots <- function(gobject, @@ -2072,25 +2094,32 @@ plotCellProximityFeatSpot <- function(gobject, #' expected based on a reshuffled null distribution of feature expression #' values in cells that are spatially in proximity to each other. #' \itemize{ -#' \item{LR_comb:}{Pair of ligand and receptor} -#' \item{lig_cell_type:}{ cell type to assess expression level of ligand } -#' \item{lig_expr:}{ average expression residual(observed - DWLS_predicted) of ligand in lig_cell_type } -#' \item{ligand:}{ ligand name } -#' \item{rec_cell_type:}{ cell type to assess expression level of receptor } -#' \item{rec_expr:}{ average expression residual(observed - DWLS_predicted) of receptor in rec_cell_type} -#' \item{receptor:}{ receptor name } -#' \item{LR_expr:}{ combined average ligand and receptor expression residual} -#' \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } -#' \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } -#' \item{rand_expr:}{ average combined ligand and receptor expression residual from random spatial permutations } -#' \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{z_score:}{ (optinal) z-score } -#' \item{log2fc:}{ LR_expr - rand_expr } -#' \item{pvalue:}{ p-value } -#' \item{LR_cell_comb:}{ cell type pair combination } -#' \item{p.adj:}{ adjusted p-value } -#' \item{PI:}{ significanc score: log2fc * -log10(p.adj) } +#' * LR_comb:Pair of ligand and receptor +#' * lig_cell_type: cell type to assess expression level of ligand +#' * lig_expr: average expression residual(observed - DWLS_predicted) of +#' ligand in lig_cell_type +#' * ligand: ligand name +#' * rec_cell_type: cell type to assess expression level of receptor +#' * rec_expr: average expression residual(observed - DWLS_predicted) of +#' receptor in rec_cell_type +#' * receptor: receptor name +#' * LR_expr: combined average ligand and receptor expression residual +#' * lig_nr: total number of cells from lig_cell_type that spatially interact +#' with cells from rec_cell_type +#' * rec_nr: total number of cells from rec_cell_type that spatially interact +#' with cells from lig_cell_type +#' * rand_expr: average combined ligand and receptor expression residual from +#' random spatial permutations +#' * av_diff: average difference between LR_expr and rand_expr over all random +#' spatial permutations +#' * sd_diff: (optional) standard deviation of the difference between LR_expr +#' and rand_expr over all random spatial permutations +#' * z_score: (optional) z-score +#' * log2fc: LR_expr - rand_expr +#' * pvalue: p-value +#' * LR_cell_comb: cell type pair combination +#' * p.adj: adjusted p-value +#' * PI: significanc score: log2fc \* -log10(p.adj) #' } #' @export spatCellCellcomSpots <- function(gobject, From 623f3a4c16c7f9cd4be344e8e2802e3b05fb3d13 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Fri, 24 May 2024 15:17:34 -0400 Subject: [PATCH 062/150] run devtools::document --- man/detectSpatialPatterns.Rd | 7 ++-- man/doLouvainSubCluster.Rd | 4 +- man/dot-doLouvainSubCluster_community.Rd | 4 +- man/dot-doLouvainSubCluster_multinet.Rd | 4 +- man/dot-specific_CCCScores_spots.Rd | 45 +++++++++++++--------- man/findICF.Rd | 30 ++++++++------- man/findICFSpot.Rd | 30 ++++++++------- man/findInteractionChangedFeats.Rd | 30 ++++++++------- man/getBalancedSpatCoexpressionFeats.Rd | 7 ++-- man/spatCellCellcomSpots.Rd | 45 +++++++++++++--------- man/specificCellCellcommunicationScores.Rd | 43 ++++++++++++--------- man/subClusterCells.Rd | 4 +- 12 files changed, 141 insertions(+), 112 deletions(-) diff --git a/man/detectSpatialPatterns.Rd b/man/detectSpatialPatterns.Rd index f45d0592c..242811b5f 100644 --- a/man/detectSpatialPatterns.Rd +++ b/man/detectSpatialPatterns.Rd @@ -43,8 +43,9 @@ in a spatial grid. \details{ Steps to identify spatial patterns: \itemize{ - \item{1. average gene expression for cells within a grid, see createSpatialGrid} - \item{2. perform PCA on the average grid expression profiles} - \item{3. convert variance of principlal components (PCs) to z-scores and select PCs based on a z-score threshold} + * 1. average gene expression for cells within a grid, see createSpatialGrid + * 2. perform PCA on the average grid expression profiles + * 3. convert variance of principal components (PCs) to z-scores and + select PCs based on a z-score threshold } } diff --git a/man/doLouvainSubCluster.Rd b/man/doLouvainSubCluster.Rd index 0b2c5daf5..1aaa94114 100644 --- a/man/doLouvainSubCluster.Rd +++ b/man/doLouvainSubCluster.Rd @@ -10,7 +10,7 @@ doLouvainSubCluster( version = c("community", "multinet"), cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), hvg_min_perc_cells = 5, hvg_mean_expr_det = 1, @@ -40,7 +40,7 @@ doLouvainSubCluster( \item{selected_clusters}{only do subclustering on these clusters} -\item{hvg_param}{parameters for calculateHVG} +\item{hvf_param}{parameters for calculateHVF} \item{hvg_min_perc_cells}{threshold for detection in min percentage of cells} diff --git a/man/dot-doLouvainSubCluster_community.Rd b/man/dot-doLouvainSubCluster_community.Rd index c120d7fa0..79b7d39a7 100644 --- a/man/dot-doLouvainSubCluster_community.Rd +++ b/man/dot-doLouvainSubCluster_community.Rd @@ -9,7 +9,7 @@ name = "sub_louvain_comm_clus", cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), hvg_min_perc_cells = 5, hvg_mean_expr_det = 1, @@ -35,7 +35,7 @@ \item{selected_clusters}{only do subclustering on these clusters} -\item{hvg_param}{parameters for calculateHVG} +\item{hvf_param}{parameters for calculateHVF} \item{hvg_min_perc_cells}{threshold for detection in min percentage of cells} diff --git a/man/dot-doLouvainSubCluster_multinet.Rd b/man/dot-doLouvainSubCluster_multinet.Rd index ae36d9afd..5af3f443b 100644 --- a/man/dot-doLouvainSubCluster_multinet.Rd +++ b/man/dot-doLouvainSubCluster_multinet.Rd @@ -9,7 +9,7 @@ name = "sub_louvain_mult_clus", cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), hvg_min_perc_cells = 5, hvg_mean_expr_det = 1, @@ -35,7 +35,7 @@ \item{selected_clusters}{only do subclustering on these clusters} -\item{hvg_param}{parameters for calculateHVG} +\item{hvf_param}{parameters for calculateHVF} \item{hvg_min_perc_cells}{threshold for detection in min percentage of cells} diff --git a/man/dot-specific_CCCScores_spots.Rd b/man/dot-specific_CCCScores_spots.Rd index e609a4444..67fbc794b 100644 --- a/man/dot-specific_CCCScores_spots.Rd +++ b/man/dot-specific_CCCScores_spots.Rd @@ -79,25 +79,32 @@ Statistical framework to identify if pairs of features expected based on a reshuffled null distribution of feature expression values in cells that are spatially in proximity to each other. \itemize{ - \item{LR_comb:}{Pair of ligand and receptor} - \item{lig_cell_type:}{ cell type to assess expression level of ligand } - \item{lig_expr:}{ average expressionresidual(observed - DWLS_predicted) of ligand in lig_cell_type } - \item{ligand:}{ ligand name } - \item{rec_cell_type:}{ cell type to assess expression level of receptor } - \item{rec_expr:}{ average expression residual(observed - DWLS_predicted) of receptor in rec_cell_type} - \item{receptor:}{ receptor name } - \item{LR_expr:}{ combined average ligand and receptor expression } - \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } - \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } - \item{rand_expr:}{ average combined ligand and receptor expression residual from random spatial permutations } - \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } - \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } - \item{z_score:}{ (optinal) z-score } - \item{log2fc:}{ LR_expr - rand_expr } - \item{pvalue:}{ p-value } - \item{LR_cell_comb:}{ cell type pair combination } - \item{p.adj:}{ adjusted p-value } - \item{PI:}{ significanc score: log2fc * -log10(p.adj) } + * LR_comb: Pair of ligand and receptor + * lig_cell_type: cell type to assess expression level of ligand + * lig_expr: average expression residual (observed - DWLS_predicted) of + ligand in lig_cell_type + * ligand: ligand name + * rec_cell_type: cell type to assess expression level of receptor + * rec_expr: average expression residual(observed - DWLS_predicted) of + receptor in rec_cell_type + * receptor: receptor name + * LR_expr: combined average ligand and receptor expression + * lig_nr: total number of cells from lig_cell_type that spatially interact + with cells from rec_cell_type + * rec_nr: total number of cells from rec_cell_type that spatially interact + with cells from lig_cell_type + * rand_expr: average combined ligand and receptor expression residual from + random spatial permutations + * av_diff: average difference between LR_expr and rand_expr over all random + spatial permutations + * sd_diff: (optional) standard deviation of the difference between LR_expr + and rand_expr over all random spatial permutations + * z_score: (optinal) z-score + * log2fc: LR_expr - rand_expr + * pvalue: p-value + * LR_cell_comb: cell type pair combination + * p.adj: adjusted p-value + * PI: significance score: log2fc \* -log10(p.adj) } } \keyword{internal} diff --git a/man/findICF.Rd b/man/findICF.Rd index 86e830342..fa9d2ec13 100644 --- a/man/findICF.Rd +++ b/man/findICF.Rd @@ -78,20 +78,22 @@ cell types when they interact (approximated by physical proximity) with other cell types. The results data.table in the `icfObject` contains - at least - the following columns: \itemize{ - \item{features:}{ All or selected list of tested features} - \item{sel:}{ average feature expression in the interacting cells from the target cell type } - \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } - \item{log2fc:}{ log2 fold-change between sel and other} - \item{diff:}{ spatial expression difference between sel and other} - \item{p.value:}{ associated p-value} - \item{p.adj:}{ adjusted p-value} - \item{cell_type:}{ target cell type} - \item{int_cell_type:}{ interacting cell type} - \item{nr_select:}{ number of cells for selected target cell type} - \item{int_nr_select:}{ number of cells for interacting cell type} - \item{nr_other:}{ number of other cells of selected target cell type} - \item{int_nr_other:}{ number of other cells for interacting cell type} - \item{unif_int:}{ cell-cell interaction} + * features: All or selected list of tested features + * sel: average feature expression in the interacting cells from the target + cell type + * other: average feature expression in the NOT-interacting cells from the + target cell type + * log2fc: log2 fold-change between sel and other + * diff: spatial expression difference between sel and other + * p.value: associated p-value + * p.adj: adjusted p-value + * cell_type: target cell type + * int_cell_type: interacting cell type + * nr_select: number of cells for selected target cell type + * int_nr_select: number of cells for interacting cell type + * nr_other: number of other cells of selected target cell type + * int_nr_other: number of other cells for interacting cell type + * unif_int: cell-cell interaction } } \examples{ diff --git a/man/findICFSpot.Rd b/man/findICFSpot.Rd index 5f13d7dc4..3dc5e99af 100644 --- a/man/findICFSpot.Rd +++ b/man/findICFSpot.Rd @@ -83,19 +83,23 @@ average_expressed_in_cell_type) The results data.table in the icfObject contains - at least - the following columns: \itemize{ - \item{features:}{ All or selected list of tested features} - \item{sel:}{ average feature expression residual in the interacting cells from the target cell type } - \item{other:}{ average feature expression residual in the NOT-interacting cells from the target cell type } - \item{pcc_sel:}{ correlation between cell proximity score and expression residual in the interacting cells from the target cell type} - \item{pcc_other:}{ correlation between cell proximity score and expression residual in the NOT-interacting cells from the target cell type } - \item{pcc_diff:}{ correlation difference between sel and other} - \item{p.value:}{ associated p-value} - \item{p.adj:}{ adjusted p-value} - \item{cell_type:}{ target cell type} - \item{int_cell_type:}{ interacting cell type} - \item{nr_select:}{ number of cells for selected target cell type} - \item{int_nr_select:}{ number of cells for interacting cell type} - \item{unif_int:}{ cell-cell interaction} + * features: All or selected list of tested features + * sel: average feature expression residual in the interacting cells from + the target cell type + * other: average feature expression residual in the NOT-interacting cells + from the target cell type + * pcc_sel: correlation between cell proximity score and expression residual + in the interacting cells from the target cell type + * pcc_other: correlation between cell proximity score and expression + residual in the NOT-interacting cells from the target cell type + * pcc_diff: correlation difference between sel and other + * p.value: associated p-value + * p.adj: adjusted p-value + * cell_type: target cell type + * int_cell_type: interacting cell type + * nr_select: number of cells for selected target cell type + * int_nr_select: number of cells for interacting cell type + * unif_int: cell-cell interaction } } \examples{ diff --git a/man/findInteractionChangedFeats.Rd b/man/findInteractionChangedFeats.Rd index 0252ce158..985c84dce 100644 --- a/man/findInteractionChangedFeats.Rd +++ b/man/findInteractionChangedFeats.Rd @@ -78,20 +78,22 @@ cell types when they interact (approximated by physical proximity) with other cell types. The results data.table in the icfObject contains - at least - the following columns: \itemize{ - \item{features:}{ All or selected list of tested features} - \item{sel:}{ average feature expression in the interacting cells from the target cell type } - \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } - \item{log2fc:}{ log2 fold-change between sel and other} - \item{diff:}{ spatial expression difference between sel and other} - \item{p.value:}{ associated p-value} - \item{p.adj:}{ adjusted p-value} - \item{cell_type:}{ target cell type} - \item{int_cell_type:}{ interacting cell type} - \item{nr_select:}{ number of cells for selected target cell type} - \item{int_nr_select:}{ number of cells for interacting cell type} - \item{nr_other:}{ number of other cells of selected target cell type} - \item{int_nr_other:}{ number of other cells for interacting cell type} - \item{unif_int:}{ cell-cell interaction} + * features: All or selected list of tested features + * sel: average feature expression in the interacting cells from the target + cell type + * other: average feature expression in the NOT-interacting cells from the + target cell type + * log2fc: log2 fold-change between sel and other + * diff: spatial expression difference between sel and other + * p.value: associated p-value + * p.adj: adjusted p-value + * cell_type: target cell type + * int_cell_type: interacting cell type + * nr_select: number of cells for selected target cell type + * int_nr_select: number of cells for interacting cell type + * nr_other: number of other cells of selected target cell type + * int_nr_other: number of other cells for interacting cell type + * unif_int: cell-cell interaction } } \examples{ diff --git a/man/getBalancedSpatCoexpressionFeats.Rd b/man/getBalancedSpatCoexpressionFeats.Rd index 674807070..0a8595780 100644 --- a/man/getBalancedSpatCoexpressionFeats.Rd +++ b/man/getBalancedSpatCoexpressionFeats.Rd @@ -38,8 +38,9 @@ balanced manner There are 3 different ways of selecting features from the spatial co-expression modules \itemize{ - \item{1. weighted: }{Features are ranked based on summarized pairwise co-expression scores} - \item{2. random: }{A random selection of features, set seed for reproducibility} - \item{3. informed: }{Features are selected based on prior information/ranking} + * 1. weighted: Features are ranked based on summarized pairwise + co-expression scores + * 2. random: A random selection of features, set seed for reproducibility + * 3. informed: Features are selected based on prior information/ranking } } diff --git a/man/spatCellCellcomSpots.Rd b/man/spatCellCellcomSpots.Rd index 033757291..212a6179b 100644 --- a/man/spatCellCellcomSpots.Rd +++ b/man/spatCellCellcomSpots.Rd @@ -84,24 +84,31 @@ Statistical framework to identify if pairs of features expected based on a reshuffled null distribution of feature expression values in cells that are spatially in proximity to each other. \itemize{ - \item{LR_comb:}{Pair of ligand and receptor} - \item{lig_cell_type:}{ cell type to assess expression level of ligand } - \item{lig_expr:}{ average expression residual(observed - DWLS_predicted) of ligand in lig_cell_type } - \item{ligand:}{ ligand name } - \item{rec_cell_type:}{ cell type to assess expression level of receptor } - \item{rec_expr:}{ average expression residual(observed - DWLS_predicted) of receptor in rec_cell_type} - \item{receptor:}{ receptor name } - \item{LR_expr:}{ combined average ligand and receptor expression residual} - \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } - \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } - \item{rand_expr:}{ average combined ligand and receptor expression residual from random spatial permutations } - \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } - \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } - \item{z_score:}{ (optinal) z-score } - \item{log2fc:}{ LR_expr - rand_expr } - \item{pvalue:}{ p-value } - \item{LR_cell_comb:}{ cell type pair combination } - \item{p.adj:}{ adjusted p-value } - \item{PI:}{ significanc score: log2fc * -log10(p.adj) } + * LR_comb:Pair of ligand and receptor + * lig_cell_type: cell type to assess expression level of ligand + * lig_expr: average expression residual(observed - DWLS_predicted) of + ligand in lig_cell_type + * ligand: ligand name + * rec_cell_type: cell type to assess expression level of receptor + * rec_expr: average expression residual(observed - DWLS_predicted) of + receptor in rec_cell_type + * receptor: receptor name + * LR_expr: combined average ligand and receptor expression residual + * lig_nr: total number of cells from lig_cell_type that spatially interact + with cells from rec_cell_type + * rec_nr: total number of cells from rec_cell_type that spatially interact + with cells from lig_cell_type + * rand_expr: average combined ligand and receptor expression residual from + random spatial permutations + * av_diff: average difference between LR_expr and rand_expr over all random + spatial permutations + * sd_diff: (optional) standard deviation of the difference between LR_expr + and rand_expr over all random spatial permutations + * z_score: (optional) z-score + * log2fc: LR_expr - rand_expr + * pvalue: p-value + * LR_cell_comb: cell type pair combination + * p.adj: adjusted p-value + * PI: significanc score: log2fc \* -log10(p.adj) } } diff --git a/man/specificCellCellcommunicationScores.Rd b/man/specificCellCellcommunicationScores.Rd index 917da1a3f..a118f6cbc 100644 --- a/man/specificCellCellcommunicationScores.Rd +++ b/man/specificCellCellcommunicationScores.Rd @@ -87,25 +87,30 @@ are expressed at higher levels than expected based on a reshuffled null distribution of feature expression values in cells that are spatially in proximity to each other. \itemize{ - \item{LR_comb:}{Pair of ligand and receptor} - \item{lig_cell_type:}{ cell type to assess expression level of ligand } - \item{lig_expr:}{ average expression of ligand in lig_cell_type } - \item{ligand:}{ ligand name } - \item{rec_cell_type:}{ cell type to assess expression level of receptor } - \item{rec_expr:}{ average expression of receptor in rec_cell_type} - \item{receptor:}{ receptor name } - \item{LR_expr:}{ combined average ligand and receptor expression } - \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } - \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } - \item{rand_expr:}{ average combined ligand and receptor expression from random spatial permutations } - \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } - \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } - \item{z_score:}{ (optinal) z-score } - \item{log2fc:}{ log2 fold-change (LR_expr/rand_expr) } - \item{pvalue:}{ p-value } - \item{LR_cell_comb:}{ cell type pair combination } - \item{p.adj:}{ adjusted p-value } - \item{PI:}{ significanc score: log2fc * -log10(p.adj) } + * LR_comb: Pair of ligand and receptor + * lig_cell_type: cell type to assess expression level of ligand + * lig_expr: average expression of ligand in lig_cell_type + * ligand: ligand name + * rec_cell_type: cell type to assess expression level of receptor + * rec_expr: average expression of receptor in rec_cell_type + * receptor: receptor name + * LR_expr: combined average ligand and receptor expression + * lig_nr: total number of cells from lig_cell_type that spatially interact + with cells from rec_cell_type + * rec_nr: total number of cells from rec_cell_type that spatially interact + with cells from lig_cell_type + * rand_expr: average combined ligand and receptor expression from random + spatial permutations + * av_diff: average difference between LR_expr and rand_expr over all + random spatial permutations + * sd_diff: (optional) standard deviation of the difference between LR_expr + and rand_expr over all random spatial permutations + * z_score: (optional) z-score + * log2fc: log2 fold-change (LR_expr/rand_expr) + * pvalue: p-value + * LR_cell_comb: cell type pair combination + * p.adj: adjusted p-value + * PI: significanec score: log2fc \* -log10(p.adj) } } \examples{ diff --git a/man/subClusterCells.Rd b/man/subClusterCells.Rd index 4eca454f8..c7a6e8f8c 100644 --- a/man/subClusterCells.Rd +++ b/man/subClusterCells.Rd @@ -10,7 +10,7 @@ subClusterCells( cluster_method = c("leiden", "louvain_community", "louvain_multinet"), cluster_column = NULL, selected_clusters = NULL, - hvg_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = + hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, expression_values = "normalized"), hvg_min_perc_cells = 5, hvg_mean_expr_det = 1, @@ -41,7 +41,7 @@ subClusterCells( \item{selected_clusters}{only do subclustering on these clusters} -\item{hvg_param}{parameters for calculateHVG} +\item{hvf_param}{parameters for calculateHVF} \item{hvg_min_perc_cells}{threshold for detection in min percentage of cells} From fa4917447256f3901a726348a59d037494f0721e Mon Sep 17 00:00:00 2001 From: josschavezf Date: Fri, 24 May 2024 15:40:12 -0400 Subject: [PATCH 063/150] run biocstyle --- R/auxiliary_giotto.R | 565 +++-- R/cell_segmentation.R | 13 +- R/clustering.R | 1241 +++++----- R/convenience.R | 1079 +++++---- R/cross_section.R | 410 ++-- R/differential_expression.R | 390 +-- R/dimension_reduction.R | 721 +++--- R/feature_set_enrichment.R | 107 +- R/general_help.R | 625 ++--- R/giotto_viewer.R | 128 +- R/gstop.R | 17 +- R/image_registration.R | 392 +-- R/interactivity.R | 158 +- R/kriging.R | 48 +- R/poly_influence.R | 166 +- R/python_hmrf.R | 515 ++-- R/python_scrublet.R | 27 +- R/spatial_clusters.R | 20 +- R/spatial_enrichment.R | 988 +++++--- R/spatial_enrichment_visuals.R | 83 +- R/spatial_genes.R | 1690 +++++++------ R/spatial_interaction.R | 1271 +++++----- R/spatial_interaction_spot.R | 1075 +++++---- R/spatial_interaction_visuals.R | 2129 ++++++++++------- R/spdep.R | 41 +- R/variable_genes.R | 193 +- R/wnn.R | 150 +- R/zzz.R | 1 - man/addCellIntMetadata.Rd | 6 +- man/addHMRF.Rd | 9 +- man/cellProximityBarplot.Rd | 13 +- man/cellProximityEnrichmentEachSpot.Rd | 7 +- man/cellProximityEnrichmentSpots.Rd | 10 +- man/cellProximityHeatmap.Rd | 2 +- man/cellProximityNetwork.Rd | 4 +- man/cellProximitySpatPlot.Rd | 4 +- man/cellProximitySpatPlot2D.Rd | 8 +- man/cellProximitySpatPlot3D.Rd | 2 +- man/cellProximityVisPlot.Rd | 8 +- man/cellProximityVisPlot_internals.Rd | 6 +- man/clusterSpatialCorFeats.Rd | 4 +- man/combCCcom.Rd | 14 +- man/combineICF.Rd | 6 +- man/combineInteractionChangedFeats.Rd | 5 +- man/compareCellAbundance.Rd | 9 +- man/comparePolygonExpression.Rd | 9 +- man/convertEnsemblToGeneSymbol.Rd | 2 +- man/createArchRProj.Rd | 10 +- man/createCrossSection.Rd | 2 +- man/createGiottoCosMxObject.Rd | 28 +- man/createGiottoMerscopeObject.Rd | 16 +- man/createGiottoObjectfromArchR.Rd | 4 +- man/createGiottoVisiumObject.Rd | 10 +- man/createGiottoXeniumObject.Rd | 28 +- man/createSpatialGenomicsObject.Rd | 2 +- man/detectSpatialCorFeats.Rd | 4 +- man/detectSpatialPatterns.Rd | 2 +- man/doClusterProjection.Rd | 8 +- man/doFeatureSetEnrichment.Rd | 24 +- man/doGiottoClustree.Rd | 6 +- man/doHMRF.Rd | 6 +- man/dot-createGiottoCosMxObject_all.Rd | 10 +- ...dot-createGiottoCosMxObject_subcellular.Rd | 2 +- ...ot-createGiottoXeniumObject_subcellular.Rd | 2 +- man/dot-determine_switch_string_equal.Rd | 2 +- man/dot-determine_switch_string_unequal.Rd | 2 +- man/dot-get_img_corners.Rd | 2 +- man/dot-kmeans_arma_subset_binarize.Rd | 2 +- man/dot-load_cosmx_folder_subcellular.Rd | 2 +- man/dot-plotRecovery_sub.Rd | 2 +- man/dot-read_xenium_folder.Rd | 4 +- man/dot-rigid_transform_spatial_locations.Rd | 2 +- man/dot-specific_CCCScores_spots.Rd | 48 +- man/dot-trakem2_rigid_transforms.Rd | 2 +- man/exprCellCellcom.Rd | 6 +- man/findCellTypesFromEnrichment.Rd | 2 +- man/findICF.Rd | 14 +- man/findICFSpot.Rd | 30 +- man/findInteractionChangedFeats.Rd | 14 +- man/findMastMarkers.Rd | 6 +- man/findNetworkNeighbors.Rd | 6 +- man/get10Xmatrix.Rd | 12 +- man/get10Xmatrix_h5.Rd | 10 +- man/getBalancedSpatCoexpressionFeats.Rd | 2 +- man/getCellsFromPolygon.Rd | 9 +- man/loadHMRF.Rd | 12 +- man/load_merscope_folder.Rd | 4 +- man/load_xenium_folder.Rd | 4 +- man/makeSignMatrixDWLS.Rd | 16 +- man/makeSignMatrixDWLSfromMatrix.Rd | 18 +- man/makeSignMatrixPAGE.Rd | 26 +- man/makeSignMatrixRank.Rd | 18 +- man/pieCellTypesFromEnrichment.Rd | 2 +- man/plotCCcomDotplot.Rd | 16 +- man/plotCCcomHeatmap.Rd | 14 +- man/plotCPF.Rd | 14 +- man/plotCellProximityFeatSpot.Rd | 8 +- man/plotCellProximityFeats.Rd | 8 +- man/plotCellTypesFromEnrichment.Rd | 4 +- man/plotCombineCCcom.Rd | 30 +- man/plotCombineCellCellCommunication.Rd | 30 +- man/plotCombineICF.Rd | 15 +- man/plotCombineInteractionChangedFeats.Rd | 17 +- man/plotICF.Rd | 14 +- man/plotICFSpot.Rd | 14 +- man/plotInteractionChangedFeats.Rd | 14 +- man/plotPolygons.Rd | 9 +- man/plotRankSpatvsExpr.Rd | 18 +- man/plotRecovery.Rd | 16 +- man/processGiotto.Rd | 6 +- man/rankSpatialCorGroups.Rd | 6 +- man/readPolygonFilesVizgen.Rd | 2 +- man/readPolygonFilesVizgenHDF5.Rd | 10 +- man/readPolygonFilesVizgenHDF5_old.Rd | 6 +- man/readPolygonVizgenParquet.Rd | 4 +- man/registerGiottoObjectList.Rd | 16 +- man/registerGiottoObjectListFiji.Rd | 20 +- man/registerGiottoObjectListRvision.Rd | 6 +- man/registerImagesFIJI.Rd | 10 +- man/runDWLSDeconv.Rd | 10 +- man/runHyperGeometricEnrich.Rd | 10 +- man/runPAGEEnrich.Rd | 15 +- man/runPatternSimulation.Rd | 10 +- man/runRankEnrich.Rd | 18 +- man/runSpatialDeconv.Rd | 12 +- man/runSpatialEnrich.Rd | 12 +- man/runWNN.Rd | 4 +- man/showCellProportionSwitchedPie.Rd | 4 +- man/showCellProportionSwitchedSanKey.Rd | 4 +- man/showPolygonSizeInfluence.Rd | 8 +- man/simulateOneGenePatternGiottoObject.Rd | 12 +- man/spatCellCellcomSpots.Rd | 46 +- man/spdepAutoCorr.Rd | 4 +- man/specificCellCellcommunicationScores.Rd | 44 +- man/subClusterCells.Rd | 6 +- man/visium_micron_scalefactor.Rd | 2 +- man/write_giotto_viewer_annotation.Rd | 2 +- vignettes/intro_to_giotto.Rmd | 18 +- 138 files changed, 8887 insertions(+), 6495 deletions(-) diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index fb13d1b17..11dafbe7a 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -113,24 +113,25 @@ #' #' filterDistributions(g) #' @export -filterDistributions <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("raw", "normalized", "scaled", "custom"), - method = c("threshold", "sum", "mean"), - expression_threshold = 1, - detection = c("feats", "cells"), - plot_type = c("histogram", "violin"), - scale_y = NULL, - nr_bins = 30, - fill_color = "lightblue", - scale_axis = "identity", - axis_offset = 0, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "filterDistributions") { +filterDistributions <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("raw", "normalized", "scaled", "custom"), + method = c("threshold", "sum", "mean"), + expression_threshold = 1, + detection = c("feats", "cells"), + plot_type = c("histogram", "violin"), + scale_y = NULL, + nr_bins = 30, + fill_color = "lightblue", + scale_axis = "identity", + axis_offset = 0, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "filterDistributions") { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -145,7 +146,8 @@ filterDistributions <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("raw", "normalized", "scaled", "custom", expression_values))) + unique(c("raw", "normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -170,15 +172,18 @@ filterDistributions <- function(gobject, if (detection == "feats") { if (method == "threshold") { feat_detection_levels <- data.table::as.data.table( - rowSums_flex(expr_values >= expression_threshold)) + rowSums_flex(expr_values >= expression_threshold) + ) mytitle <- "feat detected in # of cells" } else if (method == "sum") { feat_detection_levels <- data.table::as.data.table( - rowSums_flex(expr_values)) + rowSums_flex(expr_values) + ) mytitle <- "total sum of feature detected in all cells" } else if (method == "mean") { feat_detection_levels <- data.table::as.data.table( - rowMeans_flex(expr_values)) + rowMeans_flex(expr_values) + ) mytitle <- "average of feature detected in all cells" } @@ -216,15 +221,18 @@ filterDistributions <- function(gobject, } else if (detection == "cells") { if (method == "threshold") { cell_detection_levels <- data.table::as.data.table( - colSums_flex(expr_values >= expression_threshold)) + colSums_flex(expr_values >= expression_threshold) + ) mytitle <- "feats detected per cell" } else if (method == "sum") { cell_detection_levels <- data.table::as.data.table( - colSums_flex(expr_values)) + colSums_flex(expr_values) + ) mytitle <- "total features per cell" } else if (method == "mean") { cell_detection_levels <- data.table::as.data.table( - colMeans_flex(expr_values)) + colMeans_flex(expr_values) + ) mytitle <- "average number of features per cell" } @@ -302,22 +310,23 @@ filterDistributions <- function(gobject, #' #' filterCombinations(g) #' @export -filterCombinations <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("raw", "normalized", "scaled", "custom"), - expression_thresholds = c(1, 2), - feat_det_in_min_cells = c(5, 50), - min_det_feats_per_cell = c(200, 400), - scale_x_axis = "identity", - x_axis_offset = 0, - scale_y_axis = "identity", - y_axis_offset = 0, - show_plot = TRUE, - return_plot = FALSE, - save_plot = NULL, - save_param = list(), - default_save_name = "filterCombinations") { +filterCombinations <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("raw", "normalized", "scaled", "custom"), + expression_thresholds = c(1, 2), + feat_det_in_min_cells = c(5, 50), + min_det_feats_per_cell = c(200, 400), + scale_x_axis = "identity", + x_axis_offset = 0, + scale_y_axis = "identity", + y_axis_offset = 0, + show_plot = TRUE, + return_plot = FALSE, + save_plot = NULL, + save_param = list(), + default_save_name = "filterCombinations") { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -333,7 +342,8 @@ filterCombinations <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("raw", "normalized", "scaled", "custom", expression_values))) + unique(c("raw", "normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -361,16 +371,20 @@ filterCombinations <- function(gobject, # first remove feats filter_index_feats <- rowSums_flex( - expr_values >= threshold) >= min_cells_for_feat + expr_values >= threshold + ) >= min_cells_for_feat removed_feats <- length(filter_index_feats[ - filter_index_feats == FALSE]) + filter_index_feats == FALSE + ]) det_cells_res[[combn_i]] <- removed_feats # then remove cells filter_index_cells <- colSums_flex(expr_values[ - filter_index_feats, ] >= threshold) >= min_feats_per_cell + filter_index_feats, + ] >= threshold) >= min_feats_per_cell removed_cells <- length(filter_index_cells[ - filter_index_cells == FALSE]) + filter_index_cells == FALSE + ]) det_feats_res[[combn_i]] <- removed_cells } @@ -393,7 +407,8 @@ filterCombinations <- function(gobject, result_DT[["min_detected_feats_per_cell"]] <- min_det_feats_per_cell result_DT[["combination"]] <- paste0( result_DT$feat_detected_in_min_cells, "-", - result_DT$min_detected_feats_per_cell) + result_DT$min_detected_feats_per_cell + ) result_DT <- result_DT[, .( threshold, @@ -420,18 +435,22 @@ filterCombinations <- function(gobject, color = as.factor(threshold) )) pl <- pl + scale_color_discrete( - guide = guide_legend(title = "threshold(s)")) + guide = guide_legend(title = "threshold(s)") + ) pl <- pl + ggrepel::geom_text_repel(data = result_DT, aes( x = removed_cells + x_axis_offset, y = removed_feats + y_axis_offset, label = combination )) pl <- pl + ggplot2::scale_x_continuous( - trans = scale_x_axis, limits = c(0, maximum_x_value)) + trans = scale_x_axis, limits = c(0, maximum_x_value) + ) pl <- pl + ggplot2::scale_y_continuous( - trans = scale_y_axis, limits = c(0, maximum_y_value)) + trans = scale_y_axis, limits = c(0, maximum_y_value) + ) pl <- pl + ggplot2::labs( - x = "number of removed cells", y = "number of removed feats") + x = "number of removed cells", y = "number of removed feats" + ) return(plot_output_handler( @@ -491,23 +510,24 @@ filterCombinations <- function(gobject, #' #' filterGiotto(g) #' @export -filterGiotto <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("raw", "normalized", "scaled", "custom"), - expression_threshold = 1, - feat_det_in_min_cells = 100, - min_det_feats_per_cell = 100, - spat_unit_fsub = ":all:", - feat_type_ssub = ":all:", - all_spat_units = NULL, - all_feat_types = NULL, - poly_info = NULL, - tag_cells = FALSE, - tag_cell_name = "tag", - tag_feats = FALSE, - tag_feats_name = "tag", - verbose = TRUE) { +filterGiotto <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("raw", "normalized", "scaled", "custom"), + expression_threshold = 1, + feat_det_in_min_cells = 100, + min_det_feats_per_cell = 100, + spat_unit_fsub = ":all:", + feat_type_ssub = ":all:", + all_spat_units = NULL, + all_feat_types = NULL, + poly_info = NULL, + tag_cells = FALSE, + tag_cell_name = "tag", + tag_feats = FALSE, + tag_feats_name = "tag", + verbose = TRUE) { # data.table vars cell_ID <- feat_ID <- NULL @@ -574,7 +594,8 @@ filterGiotto <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("raw", "normalized", "scaled", "custom", expression_values))) + unique(c("raw", "normalized", "scaled", "custom", expression_values)) + ) # get expression values to perform filtering on # Only the first spat_unit and feat_type provided are filtered. @@ -596,14 +617,16 @@ filterGiotto <- function(gobject, ## filter features filter_index_feats <- rowSums_flex( - expr_values >= expression_threshold) >= feat_det_in_min_cells + expr_values >= expression_threshold + ) >= feat_det_in_min_cells selected_feat_ids <- names(filter_index_feats[filter_index_feats == TRUE]) ## filter cells filter_index_cells <- colSums_flex(expr_values[ - filter_index_feats, ] >= expression_threshold) >= min_det_feats_per_cell + filter_index_feats, + ] >= expression_threshold) >= min_det_feats_per_cell selected_cell_ids <- names(filter_index_cells[filter_index_cells == TRUE]) @@ -612,7 +635,8 @@ filterGiotto <- function(gobject, if (isTRUE(tag_cells)) { cell_meta <- getCellMetadata(gobject = gobject, copy_obj = TRUE) cell_meta[][, c(tag_cell_name) := ifelse( - cell_ID %in% selected_cell_ids, 0, 1)] + cell_ID %in% selected_cell_ids, 0, 1 + )] gobject <- setCellMetadata( gobject = gobject, x = cell_meta, initialize = FALSE ) @@ -624,7 +648,8 @@ filterGiotto <- function(gobject, if (isTRUE(tag_feats)) { feat_meta <- getFeatureMetadata(gobject = gobject, copy_obj = TRUE) feat_meta[][, c(tag_feats_name) := ifelse( - feat_ID %in% selected_feat_ids, 0, 1)] + feat_ID %in% selected_feat_ids, 0, 1 + )] gobject <- setFeatureMetadata( gobject = gobject, x = feat_meta, initialize = FALSE ) @@ -660,19 +685,27 @@ filterGiotto <- function(gobject, cat("Feature type: ", feat_type, "\n") if (isTRUE(tag_cells)) { - cat("Number of cells tagged: ", removed_cells, " out of ", - total_cells, "\n") + cat( + "Number of cells tagged: ", removed_cells, " out of ", + total_cells, "\n" + ) } else { - cat("Number of cells removed: ", removed_cells, " out of ", - total_cells, "\n") + cat( + "Number of cells removed: ", removed_cells, " out of ", + total_cells, "\n" + ) } if (isTRUE(tag_feats)) { - cat("Number of feats tagged: ", removed_feats, " out of ", - total_feats, "\n") + cat( + "Number of feats tagged: ", removed_feats, " out of ", + total_feats, "\n" + ) } else { - cat("Number of feats removed: ", removed_feats, " out of ", - total_feats, "\n") + cat( + "Number of feats removed: ", removed_feats, " out of ", + total_feats, "\n" + ) } } @@ -695,7 +728,9 @@ filterGiotto <- function(gobject, # If this function call is not downstream of processGiotto, update normally newGiottoObject <- update_giotto_params( - newGiottoObject, description = "_filter") + newGiottoObject, + description = "_filter" + ) return(newGiottoObject) } @@ -711,19 +746,20 @@ filterGiotto <- function(gobject, #' @description standard function for RNA normalization #' @returns giotto object #' @keywords internal -.rna_standard_normalization <- function(gobject, - raw_expr, - feat_type, - spat_unit, - library_size_norm = TRUE, - scalefactor = 6e3, - log_norm = TRUE, - log_offset = 1, - logbase = 2, - scale_feats = TRUE, - scale_cells = TRUE, - scale_order = c("first_feats", "first_cells"), - verbose = TRUE) { +.rna_standard_normalization <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + library_size_norm = TRUE, + scalefactor = 6e3, + log_norm = TRUE, + log_offset = 1, + logbase = 2, + scale_feats = TRUE, + scale_cells = TRUE, + scale_order = c("first_feats", "first_cells"), + verbose = TRUE) { # check feature type compatibility if (!feat_type %in% c("rna", "RNA")) { warning("Caution: Standard normalization was developed for RNA data \n") @@ -765,37 +801,42 @@ filterGiotto <- function(gobject, ## 3. scale if (scale_feats == TRUE & scale_cells == TRUE) { scale_order <- match.arg( - arg = scale_order, choices = c("first_feats", "first_cells")) + arg = scale_order, choices = c("first_feats", "first_cells") + ) if (scale_order == "first_feats") { - if (isTRUE(verbose)) + if (isTRUE(verbose)) { wrap_msg("\n first scale feats and then cells \n") + } norm_scaled_expr <- t_flex(standardise_flex( - x = t_flex(norm_expr), center = TRUE, scale = TRUE)) + x = t_flex(norm_expr), center = TRUE, scale = TRUE + )) norm_scaled_expr <- standardise_flex( - x = norm_scaled_expr, center = TRUE, scale = TRUE) - + x = norm_scaled_expr, center = TRUE, scale = TRUE + ) } else if (scale_order == "first_cells") { - if (isTRUE(verbose)) + if (isTRUE(verbose)) { wrap_msg("\n first scale cells and then feats \n") + } norm_scaled_expr <- standardise_flex( - x = norm_expr, center = TRUE, scale = TRUE) + x = norm_expr, center = TRUE, scale = TRUE + ) norm_scaled_expr <- t_flex(standardise_flex( - x = t_flex(norm_scaled_expr), center = TRUE, scale = TRUE)) - + x = t_flex(norm_scaled_expr), center = TRUE, scale = TRUE + )) } else { stop("\n scale order must be given \n") } } else if (scale_feats == TRUE) { norm_scaled_expr <- t_flex(standardise_flex( - x = t_flex(norm_expr), center = TRUE, scale = TRUE)) - + x = t_flex(norm_expr), center = TRUE, scale = TRUE + )) } else if (scale_cells == TRUE) { norm_scaled_expr <- standardise_flex( - x = norm_expr, center = TRUE, scale = TRUE) - + x = norm_expr, center = TRUE, scale = TRUE + ) } else { norm_scaled_expr <- NULL } @@ -853,12 +894,13 @@ filterGiotto <- function(gobject, #' @description function for RNA normalization according to osmFISH paper #' @returns giotto object #' @keywords internal -.rna_osmfish_normalization <- function(gobject, - raw_expr, - feat_type, - spat_unit, - name = "custom", - verbose = TRUE) { +.rna_osmfish_normalization <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + name = "custom", + verbose = TRUE) { # check feature type compatibility if (!feat_type %in% c("rna", "RNA")) { warning("Caution: osmFISH normalization was developed for RNA in situ @@ -869,12 +911,15 @@ filterGiotto <- function(gobject, norm_feats <- (raw_expr[] / rowSums_flex(raw_expr[])) * nrow(raw_expr[]) # 2. normalize per cells with scale-factor equal to number of cells norm_feats_cells <- t_flex((t_flex(norm_feats) / - colSums_flex(norm_feats)) * ncol(raw_expr[])) + colSums_flex(norm_feats)) * ncol(raw_expr[])) # return results to Giotto object - if (verbose == TRUE) - message("\n osmFISH-like normalized data will be returned to the", - name, "Giotto slot \n") + if (verbose == TRUE) { + message( + "\n osmFISH-like normalized data will be returned to the", + name, "Giotto slot \n" + ) + } norm_feats_cells <- create_expr_obj( name = name, @@ -903,20 +948,22 @@ filterGiotto <- function(gobject, #' Adapted from https://gist.github.com/hypercompetent/51a3c428745e1c06d826d76c3671797c#file-pearson_residuals-r #' @returns giotto object #' @keywords internal -.rna_pears_resid_normalization <- function(gobject, - raw_expr, - feat_type, - spat_unit, - theta = 100, - name = "scaled", - verbose = TRUE) { +.rna_pears_resid_normalization <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + theta = 100, + name = "scaled", + verbose = TRUE) { # print message with information # - if (verbose) - message("using 'Lause/Kobak' method to normalize count matrix If used in + if (verbose) { + message("using 'Lause/Kobak' method to normalize count matrix If used in published research, please cite: Jan Lause, Philipp Berens, Dmitry Kobak (2020). 'Analytic Pearson residuals for normalization of single-cell RNA-seq UMI data' ") + } # check feature type compatibility @@ -927,9 +974,13 @@ filterGiotto <- function(gobject, if (methods::is(raw_expr[], "HDF5Matrix")) { counts_sum0 <- methods::as(matrix( - MatrixGenerics::colSums2(raw_expr[]), nrow = 1), "HDF5Matrix") + MatrixGenerics::colSums2(raw_expr[]), + nrow = 1 + ), "HDF5Matrix") counts_sum1 <- methods::as(matrix( - MatrixGenerics::rowSums2(raw_expr[]), ncol = 1), "HDF5Matrix") + MatrixGenerics::rowSums2(raw_expr[]), + ncol = 1 + ), "HDF5Matrix") counts_sum <- sum(raw_expr[]) # get residuals @@ -942,9 +993,11 @@ filterGiotto <- function(gobject, z[z < -sqrt(n)] <- -sqrt(n) } else { counts_sum0 <- methods::as(matrix(Matrix::colSums( - raw_expr[]), nrow = 1), "dgCMatrix") + raw_expr[] + ), nrow = 1), "dgCMatrix") counts_sum1 <- methods::as(matrix(Matrix::rowSums( - raw_expr[]), ncol = 1), "dgCMatrix") + raw_expr[] + ), ncol = 1), "dgCMatrix") counts_sum <- sum(raw_expr[]) # get residuals @@ -958,9 +1011,12 @@ filterGiotto <- function(gobject, } # return results to Giotto object - if (verbose == TRUE) - message("\n Pearson residual normalized data will be returned to the ", - name, " Giotto slot \n") + if (verbose == TRUE) { + message( + "\n Pearson residual normalized data will be returned to the ", + name, " Giotto slot \n" + ) + } z <- create_expr_obj( name = name, @@ -1033,23 +1089,24 @@ filterGiotto <- function(gobject, #' #' normalizeGiotto(g) #' @export -normalizeGiotto <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = "raw", - norm_methods = c("standard", "pearson_resid", "osmFISH"), - library_size_norm = TRUE, - scalefactor = 6e3, - log_norm = TRUE, - log_offset = 1, - logbase = 2, - scale_feats = TRUE, - scale_genes = NULL, - scale_cells = TRUE, - scale_order = c("first_feats", "first_cells"), - theta = 100, - update_slot = "scaled", - verbose = TRUE) { +normalizeGiotto <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = "raw", + norm_methods = c("standard", "pearson_resid", "osmFISH"), + library_size_norm = TRUE, + scalefactor = 6e3, + log_norm = TRUE, + log_offset = 1, + logbase = 2, + scale_feats = TRUE, + scale_genes = NULL, + scale_cells = TRUE, + scale_order = c("first_feats", "first_cells"), + theta = 100, + update_slot = "scaled", + verbose = TRUE) { ## deprecated arguments if (!is.null(scale_genes)) { scale_feats <- scale_genes @@ -1078,7 +1135,8 @@ normalizeGiotto <- function(gobject, ) norm_methods <- match.arg( - arg = norm_methods, choices = c("standard", "pearson_resid", "osmFISH")) + arg = norm_methods, choices = c("standard", "pearson_resid", "osmFISH") + ) # normalization according to standard methods if (norm_methods == "standard") { @@ -1163,14 +1221,15 @@ normalizeGiotto <- function(gobject, #' #' adjustGiottoMatrix(g, covariate_columns = "leiden_clus") #' @export -adjustGiottoMatrix <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - batch_columns = NULL, - covariate_columns = NULL, - return_gobject = TRUE, - update_slot = c("custom")) { +adjustGiottoMatrix <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + batch_columns = NULL, + covariate_columns = NULL, + return_gobject = TRUE, + update_slot = c("custom")) { # Catch for both batch and covariate being null if (is.null(batch_columns) & is.null(covariate_columns)) { stop("Metadata for either different batches or covariates must be @@ -1210,12 +1269,14 @@ adjustGiottoMatrix <- function(gobject, } update_slot <- match.arg( - update_slot, c("normalized", "scaled", "custom", update_slot)) + update_slot, c("normalized", "scaled", "custom", update_slot) + ) # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1241,7 +1302,8 @@ adjustGiottoMatrix <- function(gobject, # covariate columns if (!is.null(covariate_columns)) { covariates <- as.matrix( - cell_metadata[, covariate_columns, with = FALSE]) + cell_metadata[, covariate_columns, with = FALSE] + ) } else { covariates <- NULL } @@ -1318,43 +1380,51 @@ adjustGiottoMatrix <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' processGiotto(gobject = g, -#' adjust_params = list(covariate_columns = "leiden_clus")) +#' processGiotto( +#' gobject = g, +#' adjust_params = list(covariate_columns = "leiden_clus") +#' ) #' @export -processGiotto <- function(gobject, - filter_params = list(), - norm_params = list(), - stat_params = list(), - adjust_params = list(), - verbose = TRUE) { +processGiotto <- function( + gobject, + filter_params = list(), + norm_params = list(), + stat_params = list(), + adjust_params = list(), + verbose = TRUE) { # filter Giotto if (verbose == TRUE) message("1. start filter step") - if (!inherits(filter_params, "list")) + if (!inherits(filter_params, "list")) { stop("filter_params need to be a list of parameters for filterGiotto") + } gobject <- do.call("filterGiotto", c(gobject = gobject, filter_params)) # normalize Giotto if (verbose == TRUE) message("2. start normalization step") - if (!inherits(norm_params, "list")) + if (!inherits(norm_params, "list")) { stop("norm_params need to be a list of parameters for normalizeGiotto") + } gobject <- do.call("normalizeGiotto", c(gobject = gobject, norm_params)) # add Statistics if (verbose == TRUE) message("3. start cell and gene statistics step") - if (!inherits(stat_params, "list")) + if (!inherits(stat_params, "list")) { stop("stat_params need to be a list of parameters for addStatistics ") + } stat_params[["return_gobject"]] <- TRUE # force this to be true gobject <- do.call("addStatistics", c(gobject = gobject, stat_params)) # adjust Giotto, if applicable if (!is.null(adjust_params)) { if (verbose == TRUE) message("4. start adjusted matrix step") - if (!inherits(adjust_params, "list")) + if (!inherits(adjust_params, "list")) { stop("adjust_params need to be a list of parameters for adjustGiottoMatrix") + } adjust_params[["return_gobject"]] <- TRUE # force this to be true gobject <- do.call( - "adjustGiottoMatrix", c(gobject = gobject, adjust_params)) + "adjustGiottoMatrix", c(gobject = gobject, adjust_params) + ) } gobject <- update_giotto_params(gobject, description = "_process") @@ -1407,13 +1477,14 @@ processGiotto <- function(gobject, #' #' addFeatStatistics(g) #' @export -addFeatStatistics <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - detection_threshold = 0, - return_gobject = TRUE, - verbose = TRUE) { +addFeatStatistics <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + detection_threshold = 0, + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1428,7 +1499,8 @@ addFeatStatistics <- function(gobject, # expression values to be used expression_values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1443,7 +1515,7 @@ addFeatStatistics <- function(gobject, feats = rownames(expr_data[]), nr_cells = rowSums_flex(expr_data[] > detection_threshold), perc_cells = (rowSums_flex(expr_data[] > detection_threshold) / - ncol(expr_data[])) * 100, + ncol(expr_data[])) * 100, total_expr = rowSums_flex(expr_data[]), mean_expr = rowMeans_flex(expr_data[]) ) @@ -1452,7 +1524,9 @@ addFeatStatistics <- function(gobject, mean_expr_det <- NULL mean_expr_detected <- .mean_expr_det_test( - expr_data[], detection_threshold = detection_threshold) + expr_data[], + detection_threshold = detection_threshold + ) feat_stats[, mean_expr_det := mean_expr_detected] @@ -1477,11 +1551,14 @@ addFeatStatistics <- function(gobject, metadata_names <- colnames(feat_metadata[]) if ("nr_cells" %in% metadata_names) { - vmsg(.v = verbose, "feat statistics has already been applied", - "once; overwriting") + vmsg( + .v = verbose, "feat statistics has already been applied", + "once; overwriting" + ) feat_metadata[][, c( "nr_cells", "perc_cells", "total_expr", "mean_expr", - "mean_expr_det") := NULL] + "mean_expr_det" + ) := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### gobject <- set_feature_metadata(gobject, metadata = feat_metadata, @@ -1522,16 +1599,19 @@ addFeatStatistics <- function(gobject, # normally if (is.null(cl)) { gobject <- update_giotto_params(gobject, - description = "_feat_stats") + description = "_feat_stats" + ) } else { fname <- as.character(cl[[1]]) if (fname == "addStatistics") { gobject <- update_giotto_params(gobject, - description = "_feat_stats", - toplevel = 3) + description = "_feat_stats", + toplevel = 3 + ) } else { gobject <- update_giotto_params(gobject, - description = "_feat_stats") + description = "_feat_stats" + ) } } @@ -1572,13 +1652,14 @@ addFeatStatistics <- function(gobject, #' #' addCellStatistics(g) #' @export -addCellStatistics <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - detection_threshold = 0, - return_gobject = TRUE, - verbose = TRUE) { +addCellStatistics <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + detection_threshold = 0, + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1593,7 +1674,8 @@ addCellStatistics <- function(gobject, # expression values to be used expression_values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1609,7 +1691,7 @@ addCellStatistics <- function(gobject, cells = colnames(expr_data[]), nr_feats = colSums_flex(expr_data[] > detection_threshold), perc_feats = (colSums_flex(expr_data[] > detection_threshold) / - nrow(expr_data[])) * 100, + nrow(expr_data[])) * 100, total_expr = colSums_flex(expr_data[]) ) @@ -1632,8 +1714,10 @@ addCellStatistics <- function(gobject, metadata_names <- colnames(cell_metadata[]) if ("nr_feats" %in% metadata_names) { - vmsg(.v = verbose, "cells statistics has already been applied", - "once; overwriting") + vmsg( + .v = verbose, "cells statistics has already been applied", + "once; overwriting" + ) cell_metadata[][, c("nr_feats", "perc_feats", "total_expr") := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### gobject <- set_cell_metadata(gobject, @@ -1677,16 +1761,19 @@ addCellStatistics <- function(gobject, # normally if (is.null(cl)) { gobject <- update_giotto_params(gobject, - description = "_cell_stats") + description = "_cell_stats" + ) } else { fname <- as.character(cl[[1]]) if (fname == "addStatistics") { gobject <- update_giotto_params(gobject, - description = "_cell_stats", - toplevel = 3) + description = "_cell_stats", + toplevel = 3 + ) } else { gobject <- update_giotto_params(gobject, - description = "_cell_stats") + description = "_cell_stats" + ) } } @@ -1716,13 +1803,14 @@ addCellStatistics <- function(gobject, #' #' addStatistics(g) #' @export -addStatistics <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - detection_threshold = 0, - return_gobject = TRUE, - verbose = TRUE) { +addStatistics <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + detection_threshold = 0, + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1789,13 +1877,14 @@ addStatistics <- function(gobject, #' #' addFeatsPerc(g, feats = c("Gm19935", "9630013A20Rik", "2900040C04Rik")) #' @export -addFeatsPerc <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - feats = NULL, - vector_name = "feat_perc", - return_gobject = TRUE) { +addFeatsPerc <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + feats = NULL, + vector_name = "feat_perc", + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1820,7 +1909,8 @@ addFeatsPerc <- function(gobject, # expression values to be used expression_values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1846,7 +1936,8 @@ addFeatsPerc <- function(gobject, ## update parameters used ## temp_gobj <- update_giotto_params(temp_gobj, - description = "_feats_perc") + description = "_feats_perc" + ) return(temp_gobj) } else { @@ -1878,14 +1969,17 @@ addFeatsPerc <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' findNetworkNeighbors(gobject = g, spatial_network_name = "spatial_network", -#' source_cell_ids = c("AACTCGATGGCGCAGT-1", "GGCTGGCTAGCTTAAA-1")) +#' findNetworkNeighbors( +#' gobject = g, spatial_network_name = "spatial_network", +#' source_cell_ids = c("AACTCGATGGCGCAGT-1", "GGCTGGCTAGCTTAAA-1") +#' ) #' @export -findNetworkNeighbors <- function(gobject, - spat_unit = NULL, - spatial_network_name = NULL, - source_cell_ids = NULL, - name = "nb_cells") { +findNetworkNeighbors <- function( + gobject, + spat_unit = NULL, + spatial_network_name = NULL, + source_cell_ids = NULL, + name = "nb_cells") { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit @@ -1913,11 +2007,14 @@ findNetworkNeighbors <- function(gobject, full_network_DT <- convert_to_full_spatial_network(spatial_network) potential_target_cells <- full_network_DT[ - source %in% source_cells][["target"]] + source %in% source_cells + ][["target"]] source_and_target_cells <- potential_target_cells[ - potential_target_cells %in% source_cells] + potential_target_cells %in% source_cells + ] target_cells <- potential_target_cells[ - !potential_target_cells %in% source_and_target_cells] + !potential_target_cells %in% source_and_target_cells + ] cell_meta <- pDataDT(gobject) diff --git a/R/cell_segmentation.R b/R/cell_segmentation.R index db82d5806..05ff56040 100644 --- a/R/cell_segmentation.R +++ b/R/cell_segmentation.R @@ -16,14 +16,15 @@ #' of the tile: sx (start x), ex (end x), sy, and ey. #' #' @export -doCellSegmentation <- function(raster_img, - folder_path, - reduce_resolution = 4, - overlapping_pixels = 50, - python_path = NULL) { +doCellSegmentation <- function( + raster_img, + folder_path, + reduce_resolution = 4, + overlapping_pixels = 50, + python_path = NULL) { package_check("deepcell", repository = "pip") package_check("PIL", repository = "pip") - + # prepare python path and segmentation script reticulate::use_python(required = TRUE, python = python_path) python_segmentation_function <- system.file("python", diff --git a/R/clustering.R b/R/clustering.R index d57c227ef..e39ab3e06 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -47,24 +47,25 @@ #' #' doLeidenCluster(g) #' @export -doLeidenCluster <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = "leiden_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - python_path = NULL, - resolution = 1, - weight_col = "weight", - partition_type = c( - "RBConfigurationVertexPartition", - "ModularityVertexPartition" - ), - init_membership = NULL, - n_iterations = 1000, - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +doLeidenCluster <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = "leiden_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + python_path = NULL, + resolution = 1, + weight_col = "weight", + partition_type = c( + "RBConfigurationVertexPartition", + "ModularityVertexPartition" + ), + init_membership = NULL, + n_iterations = 1000, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -94,7 +95,8 @@ doLeidenCluster <- function(gobject, ## select partition type partition_type <- match.arg(partition_type, choices = c( - "RBConfigurationVertexPartition", "ModularityVertexPartition") + "RBConfigurationVertexPartition", "ModularityVertexPartition" + ) ) ## check or make paths @@ -106,7 +108,8 @@ doLeidenCluster <- function(gobject, ## prepare python path and louvain script reticulate::use_python(required = TRUE, python = python_path) python_leiden_function <- system.file("python", "python_leiden.py", - package = "Giotto") + package = "Giotto" + ) reticulate::source_python(file = python_leiden_function) ## set seed @@ -118,7 +121,8 @@ doLeidenCluster <- function(gobject, ## extract NN network network_edge_dt <- data.table::as.data.table( - igraph::as_data_frame(x = igraph_object, what = "edges")) + igraph::as_data_frame(x = igraph_object, what = "edges") + ) # data.table variables weight <- NULL @@ -130,7 +134,9 @@ doLeidenCluster <- function(gobject, } else { # weight is defined by attribute of igraph object network_edge_dt <- network_edge_dt[ - , c("from", "to", weight_col), with = FALSE] + , c("from", "to", weight_col), + with = FALSE + ] data.table::setnames(network_edge_dt, weight_col, "weight") } } else { @@ -143,8 +149,10 @@ doLeidenCluster <- function(gobject, ## do python leiden clustering - reticulate::py_set_seed(seed = seed_number, - disable_hash_randomization = TRUE) + reticulate::py_set_seed( + seed = seed_number, + disable_hash_randomization = TRUE + ) pyth_leid_result <- python_leiden( df = network_edge_dt, partition_type = partition_type, @@ -156,7 +164,8 @@ doLeidenCluster <- function(gobject, ) ident_clusters_DT <- data.table::data.table( - cell_ID = pyth_leid_result[[1]], "name" = pyth_leid_result[[2]]) + cell_ID = pyth_leid_result[[1]], "name" = pyth_leid_result[[2]] + ) data.table::setnames(ident_clusters_DT, "name", name) @@ -252,22 +261,23 @@ doLeidenCluster <- function(gobject, #' #' doLeidenClusterIgraph(g) #' @export -doLeidenClusterIgraph <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = "leiden_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - objective_function = c("modularity", "CPM"), - weights = NULL, - resolution_parameter = 1, - beta = 0.01, - initial_membership = NULL, - n_iterations = 1000, - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234, - ...) { +doLeidenClusterIgraph <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = "leiden_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + objective_function = c("modularity", "CPM"), + weights = NULL, + resolution_parameter = 1, + beta = 0.01, + initial_membership = NULL, + n_iterations = 1000, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -321,7 +331,8 @@ doLeidenClusterIgraph <- function(gobject, # summarize results ident_clusters_DT <- data.table::data.table( - "cell_ID" = leiden_clusters$names, "name" = leiden_clusters$membership) + "cell_ID" = leiden_clusters$names, "name" = leiden_clusters$membership + ) data.table::setnames(ident_clusters_DT, "name", name) @@ -410,20 +421,23 @@ doLeidenClusterIgraph <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' doGiottoClustree(gobject = g, res_vector = c(0.5, 0.8), return_plot = FALSE, -#' show_plot = FALSE, save_plot = FALSE) +#' doGiottoClustree( +#' gobject = g, res_vector = c(0.5, 0.8), return_plot = FALSE, +#' show_plot = FALSE, save_plot = FALSE +#' ) #' @export -doGiottoClustree <- function(gobject, - res_vector = NULL, - res_seq = NULL, - return_gobject = FALSE, - show_plot = NULL, - save_plot = NULL, - return_plot = NULL, - save_param = list(), - default_save_name = "clustree", - verbose = TRUE, - ...) { +doGiottoClustree <- function( + gobject, + res_vector = NULL, + res_seq = NULL, + return_gobject = FALSE, + show_plot = NULL, + save_plot = NULL, + return_plot = NULL, + save_param = list(), + default_save_name = "clustree", + verbose = TRUE, + ...) { package_check(pkg_name = "clustree", repository = "CRAN") ## setting resolutions to use if (is.null(res_vector)) { @@ -496,20 +510,21 @@ doGiottoClustree <- function(gobject, #' Set \emph{weight_col = NULL} to give equal weight (=1) to each edge. #' @md #' @keywords internal -.doLouvainCluster_community <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = "louvain_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - python_path = NULL, - resolution = 1, - weight_col = NULL, - louv_random = FALSE, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234, - ...) { +.doLouvainCluster_community <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = "louvain_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + python_path = NULL, + resolution = 1, + weight_col = NULL, + louv_random = FALSE, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -544,7 +559,9 @@ doGiottoClustree <- function(gobject, # prepare python path and louvain script reticulate::use_python(required = TRUE, python = python_path) python_louvain_function <- system.file( - "python", "python_louvain.py", package = "Giotto") + "python", "python_louvain.py", + package = "Giotto" + ) reticulate::source_python(file = python_louvain_function) # set seed @@ -555,7 +572,8 @@ doGiottoClustree <- function(gobject, } network_edge_dt <- data.table::as.data.table(igraph::as_data_frame( - x = igraph_object, what = "edges")) + x = igraph_object, what = "edges" + )) # data.table variables weight <- NULL @@ -566,7 +584,9 @@ doGiottoClustree <- function(gobject, } else { # weight is defined by attribute of igraph object network_edge_dt <- network_edge_dt[ - , c("from", "to", weight_col), with = FALSE] + , c("from", "to", weight_col), + with = FALSE + ] setnames(network_edge_dt, weight_col, "weight") } } else { @@ -578,19 +598,24 @@ doGiottoClustree <- function(gobject, # do python louvain clustering if (louv_random == FALSE) { reticulate::py_set_seed( - seed = seed_number, disable_hash_randomization = TRUE) + seed = seed_number, disable_hash_randomization = TRUE + ) pyth_louv_result <- python_louvain( - df = network_edge_dt, resolution = resolution, randomize = FALSE) + df = network_edge_dt, resolution = resolution, randomize = FALSE + ) } else { reticulate::py_set_seed( - seed = seed_number, disable_hash_randomization = TRUE) + seed = seed_number, disable_hash_randomization = TRUE + ) pyth_louv_result <- python_louvain( df = network_edge_dt, resolution = resolution, - random_state = seed_number) + random_state = seed_number + ) } ident_clusters_DT <- data.table::data.table( - cell_ID = rownames(pyth_louv_result), "name" = pyth_louv_result[[1]]) + cell_ID = rownames(pyth_louv_result), "name" = pyth_louv_result[[1]] + ) data.table::setnames(ident_clusters_DT, "name", name) @@ -647,11 +672,13 @@ doGiottoClustree <- function(gobject, fname <- as.character(cl[[1]]) if (fname == "doLouvainCluster") { gobject <- update_giotto_params(gobject, - description = "_cluster", - toplevel = 3) + description = "_cluster", + toplevel = 3 + ) } else { gobject <- update_giotto_params(gobject, - description = "_cluster") + description = "_cluster" + ) } } @@ -686,17 +713,18 @@ doGiottoClustree <- function(gobject, #' in R for more information. #' #' @keywords internal -.doLouvainCluster_multinet <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = "louvain_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - gamma = 1, - omega = 1, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234) { +.doLouvainCluster_multinet <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = "louvain_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + gamma = 1, + omega = 1, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234) { if ("multinet" %in% rownames(installed.packages()) == FALSE) { stop( "package 'multinet' is not yet installed \n", @@ -734,7 +762,8 @@ doGiottoClustree <- function(gobject, # multinet::add_vertices_ml( # n = mln_object, vertices = igraph::V(igraph_object)) multinet::add_igraph_layer_ml( - n = mln_object, g = igraph_object, name = name) + n = mln_object, g = igraph_object, name = name + ) # start seed if (isTRUE(set_seed)) { @@ -745,7 +774,8 @@ doGiottoClustree <- function(gobject, cell_ID <- actor <- weight_col <- NULL louvain_clusters <- multinet::glouvain_ml( - n = mln_object, gamma = gamma, omega = omega) + n = mln_object, gamma = gamma, omega = omega + ) ident_clusters_DT <- data.table::as.data.table(louvain_clusters) ident_clusters_DT[, cell_ID := actor] data.table::setnames(ident_clusters_DT, "cid", name) @@ -808,11 +838,13 @@ doGiottoClustree <- function(gobject, fname <- as.character(cl[[1]]) if (fname == "doLouvainCluster") { gobject <- update_giotto_params(gobject, - description = "_cluster", - toplevel = 3) + description = "_cluster", + toplevel = 3 + ) } else { gobject <- update_giotto_params(gobject, - description = "_cluster") + description = "_cluster" + ) } } return(gobject) @@ -861,23 +893,24 @@ doGiottoClustree <- function(gobject, #' #' doLouvainCluster(g) #' @export -doLouvainCluster <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - version = c("community", "multinet"), - name = "louvain_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - python_path = NULL, - resolution = 1, - weight_col = NULL, - gamma = 1, - omega = 1, - louv_random = FALSE, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234, - ...) { +doLouvainCluster <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + version = c("community", "multinet"), + name = "louvain_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + python_path = NULL, + resolution = 1, + weight_col = NULL, + gamma = 1, + omega = 1, + louv_random = FALSE, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -960,16 +993,17 @@ doLouvainCluster <- function(gobject, #' g <- doRandomWalkCluster(g) #' pDataDT(g) #' @export -doRandomWalkCluster <- function(gobject, - name = "random_walk_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - walk_steps = 4, - walk_clusters = 10, - walk_weights = NA, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234) { +doRandomWalkCluster <- function( + gobject, + name = "random_walk_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + walk_steps = 4, + walk_clusters = 10, + walk_weights = NA, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234) { ## get cell IDs ## cell_ID_vec <- gobject@cell_ID @@ -988,13 +1022,16 @@ doRandomWalkCluster <- function(gobject, } randomwalk_clusters <- igraph::cluster_walktrap( - graph = igraph_object, steps = walk_steps, weights = walk_weights) + graph = igraph_object, steps = walk_steps, weights = walk_weights + ) randomwalk_clusters <- as.factor(igraph::cut_at( - communities = randomwalk_clusters, no = walk_clusters)) + communities = randomwalk_clusters, no = walk_clusters + )) ident_clusters_DT <- data.table::data.table( "cell_ID" = igraph::V(igraph_object)$name, - "name" = randomwalk_clusters) + "name" = randomwalk_clusters + ) data.table::setnames(ident_clusters_DT, "name", name) # exit seed @@ -1007,7 +1044,8 @@ doRandomWalkCluster <- function(gobject, gobject <- addCellMetadata( gobject = gobject, new_metadata = ident_clusters_DT[, c("cell_ID", name), - with = FALSE], + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -1015,7 +1053,8 @@ doRandomWalkCluster <- function(gobject, ## update parameters used ## gobject <- update_giotto_params(gobject, - description = "_randomwalk_cluster") + description = "_randomwalk_cluster" + ) return(gobject) } else { # else return clustering result @@ -1051,17 +1090,18 @@ doRandomWalkCluster <- function(gobject, #' #' doSNNCluster(g) #' @export -doSNNCluster <- function(gobject, - name = "sNN_clus", - nn_network_to_use = "kNN", - network_name = "kNN.pca", - k = 20, - eps = 4, - minPts = 16, - borderPoints = TRUE, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234) { +doSNNCluster <- function( + gobject, + name = "sNN_clus", + nn_network_to_use = "kNN", + network_name = "kNN.pca", + k = 20, + eps = 4, + minPts = 16, + borderPoints = TRUE, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234) { ## get cell IDs ## cell_ID_vec <- gobject@cell_ID @@ -1089,18 +1129,24 @@ doSNNCluster <- function(gobject, ## SNN clust igraph_DT <- data.table::as.data.table(igraph::as_data_frame( - igraph_object, what = "edges")) + igraph_object, + what = "edges" + )) igraph_DT <- igraph_DT[order(from)] cell_id_numeric <- unique(x = c(igraph_DT$from, igraph_DT$to)) names(cell_id_numeric) <- seq_along(cell_id_numeric) igraph_DT[, from_T := as.numeric(names(cell_id_numeric[ - cell_id_numeric == from])), by = 1:nrow(igraph_DT)] + cell_id_numeric == from + ])), by = 1:nrow(igraph_DT)] igraph_DT[, to_T := as.numeric(names(cell_id_numeric[ - cell_id_numeric == to])), by = 1:nrow(igraph_DT)] + cell_id_numeric == to + ])), by = 1:nrow(igraph_DT)] temp_igraph_DT <- igraph_DT[, .(from_T, to_T, weight, distance)] data.table::setnames( - temp_igraph_DT, old = c("from_T", "to_T"), new = c("from", "to")) + temp_igraph_DT, + old = c("from_T", "to_T"), new = c("from", "to") + ) kNN_object <- nnDT_to_kNN(nnDT = temp_igraph_DT) sNN_clusters <- dbscan::sNNclust( @@ -1110,7 +1156,8 @@ doSNNCluster <- function(gobject, ident_clusters_DT <- data.table::data.table( "cell_ID" = cell_id_numeric[seq_len(nrow(kNN_object$dist))], - "name" = sNN_clusters$cluster) + "name" = sNN_clusters$cluster + ) data.table::setnames(ident_clusters_DT, "name", name) # exit seed @@ -1131,7 +1178,8 @@ doSNNCluster <- function(gobject, gobject <- addCellMetadata( gobject = gobject, new_metadata = ident_clusters_DT[, c("cell_ID", name), - with = FALSE], + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -1182,27 +1230,28 @@ doSNNCluster <- function(gobject, #' #' doKmeans(g) #' @export -doKmeans <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - feats_to_use = NULL, - dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - distance_method = c( - "original", "pearson", "spearman", - "euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski" - ), - centers = 10, - iter_max = 100, - nstart = 1000, - algorithm = "Hartigan-Wong", - name = "kmeans", - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +doKmeans <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + feats_to_use = NULL, + dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + distance_method = c( + "original", "pearson", "spearman", + "euclidean", "maximum", "manhattan", + "canberra", "binary", "minkowski" + ), + centers = 10, + iter_max = 100, + nstart = 1000, + algorithm = "Hartigan-Wong", + name = "kmeans", + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1216,7 +1265,9 @@ doKmeans <- function(gobject, dim_reduction_to_use <- match.arg( - dim_reduction_to_use, choices = c("cells", "pca", "umap", "tsne")) + dim_reduction_to_use, + choices = c("cells", "pca", "umap", "tsne") + ) distance_method <- match.arg(distance_method, choices = c( "original", "pearson", "spearman", "euclidean", "maximum", "manhattan", @@ -1240,12 +1291,14 @@ doKmeans <- function(gobject, ) dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% seq_len(ncol(dim_coord[]))] + dimensions_to_use %in% seq_len(ncol(dim_coord[])) + ] matrix_to_use <- dim_coord[][, dimensions_to_use] } else { values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) ## using original matrix ## expr_values <- getExpression( @@ -1259,7 +1312,8 @@ doKmeans <- function(gobject, # subset expression matrix if (!is.null(feats_to_use)) { expr_values[] <- expr_values[][ - rownames(expr_values[]) %in% feats_to_use, ] + rownames(expr_values[]) %in% feats_to_use, + ] } # features as columns @@ -1273,7 +1327,8 @@ doKmeans <- function(gobject, celldist <- matrix_to_use } else if (distance_method %in% c("spearman", "pearson")) { celldist <- stats::as.dist(1 - cor_flex( - x = t_flex(matrix_to_use), method = distance_method)) + x = t_flex(matrix_to_use), method = distance_method + )) } else if (distance_method %in% c( "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski" @@ -1310,7 +1365,6 @@ doKmeans <- function(gobject, ## add clusters to metadata ## if (isTRUE(return_gobject)) { - cluster_names <- names(pDataDT( gobject = gobject, spat_unit = spat_unit, @@ -1350,7 +1404,8 @@ doKmeans <- function(gobject, ## update parameters used ## gobject <- update_giotto_params(gobject, - description = "_kmeans_cluster") + description = "_kmeans_cluster" + ) return(gobject) } else { return(ident_clusters_DT) @@ -1388,30 +1443,31 @@ doKmeans <- function(gobject, #' #' doHclust(g) #' @export -doHclust <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - feats_to_use = NULL, - dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - distance_method = c( - "pearson", "spearman", "original", - "euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski" - ), - agglomeration_method = c( - "ward.D2", "ward.D", "single", - "complete", "average", "mcquitty", - "median", "centroid" - ), - k = 10, - h = NULL, - name = "hclust", - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +doHclust <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + feats_to_use = NULL, + dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + distance_method = c( + "pearson", "spearman", "original", + "euclidean", "maximum", "manhattan", + "canberra", "binary", "minkowski" + ), + agglomeration_method = c( + "ward.D2", "ward.D", "single", + "complete", "average", "mcquitty", + "median", "centroid" + ), + k = 10, + h = NULL, + name = "hclust", + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1463,7 +1519,8 @@ doHclust <- function(gobject, ) dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% seq_len(ncol(dim_coord))] + dimensions_to_use %in% seq_len(ncol(dim_coord)) + ] matrix_to_use <- dim_coord[, dimensions_to_use] } else { ## using original matrix ## @@ -1478,7 +1535,8 @@ doHclust <- function(gobject, # subset expression matrix if (!is.null(feats_to_use)) { expr_values <- expr_values[ - rownames(expr_values) %in% feats_to_use, ] + rownames(expr_values) %in% feats_to_use, + ] } # features as columns @@ -1491,7 +1549,8 @@ doHclust <- function(gobject, celldist <- matrix_to_use } else if (distance_method %in% c("spearman", "pearson")) { celldist <- stats::as.dist(1 - cor_flex(x = t_flex( - matrix_to_use), method = distance_method)) + matrix_to_use + ), method = distance_method)) } else if (distance_method %in% c( "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski" @@ -1562,7 +1621,8 @@ doHclust <- function(gobject, ## update parameters used ## gobject <- update_giotto_params(gobject, - description = "_hierarchical_cluster") + description = "_hierarchical_cluster" + ) return(gobject) } else { return(list("hclust" = hclusters, "DT" = ident_clusters_DT)) @@ -1628,59 +1688,62 @@ doHclust <- function(gobject, #' #' clusterCells(g) #' @export -clusterCells <- function(gobject, - cluster_method = c( - "leiden", - "louvain_community", "louvain_multinet", - "randomwalk", "sNNclust", - "kmeans", "hierarchical" - ), - name = "cluster_name", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - pyth_leid_resolution = 1, - pyth_leid_weight_col = "weight", - pyth_leid_part_type = c("RBConfigurationVertexPartition", - "ModularityVertexPartition"), - pyth_leid_init_memb = NULL, - pyth_leid_iterations = 1000, - pyth_louv_resolution = 1, - pyth_louv_weight_col = NULL, - python_louv_random = FALSE, - python_path = NULL, - louvain_gamma = 1, - louvain_omega = 1, - walk_steps = 4, - walk_clusters = 10, - walk_weights = NA, - sNNclust_k = 20, - sNNclust_eps = 4, - sNNclust_minPts = 16, - borderPoints = TRUE, - expression_values = c("normalized", "scaled", "custom"), - feats_to_use = NULL, - dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - distance_method = c( - "original", "pearson", "spearman", - "euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski" - ), - km_centers = 10, - km_iter_max = 100, - km_nstart = 1000, - km_algorithm = "Hartigan-Wong", - hc_agglomeration_method = c( - "ward.D2", "ward.D", "single", - "complete", "average", "mcquitty", - "median", "centroid" - ), - hc_k = 10, - hc_h = NULL, - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +clusterCells <- function( + gobject, + cluster_method = c( + "leiden", + "louvain_community", "louvain_multinet", + "randomwalk", "sNNclust", + "kmeans", "hierarchical" + ), + name = "cluster_name", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + pyth_leid_resolution = 1, + pyth_leid_weight_col = "weight", + pyth_leid_part_type = c( + "RBConfigurationVertexPartition", + "ModularityVertexPartition" + ), + pyth_leid_init_memb = NULL, + pyth_leid_iterations = 1000, + pyth_louv_resolution = 1, + pyth_louv_weight_col = NULL, + python_louv_random = FALSE, + python_path = NULL, + louvain_gamma = 1, + louvain_omega = 1, + walk_steps = 4, + walk_clusters = 10, + walk_weights = NA, + sNNclust_k = 20, + sNNclust_eps = 4, + sNNclust_minPts = 16, + borderPoints = TRUE, + expression_values = c("normalized", "scaled", "custom"), + feats_to_use = NULL, + dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + distance_method = c( + "original", "pearson", "spearman", + "euclidean", "maximum", "manhattan", + "canberra", "binary", "minkowski" + ), + km_centers = 10, + km_iter_max = 100, + km_nstart = 1000, + km_algorithm = "Hartigan-Wong", + hc_agglomeration_method = c( + "ward.D2", "ward.D", "single", + "complete", "average", "mcquitty", + "median", "centroid" + ), + hc_k = 10, + hc_h = NULL, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { ## select cluster method cluster_method <- match.arg( arg = cluster_method, @@ -1869,8 +1932,10 @@ clusterCells <- function(gobject, #' subClusterCells(g, cluster_column = "leiden_clus") #' #' # use louvain instead -#' subClusterCells(g, cluster_column = "leiden_clus", -#' cluster_method = "louvain_community") +#' subClusterCells(g, +#' cluster_column = "leiden_clus", +#' cluster_method = "louvain_community" +#' ) #' #' # directly call the more specific functions #' doLeidenSubCluster(g, cluster_column = "leiden_clus") @@ -1884,42 +1949,40 @@ NULL #' @rdname subClusterCells #' @export -subClusterCells <- function( - gobject, - name = "sub_clus", - cluster_method = c( - "leiden", - "louvain_community", - "louvain_multinet" - ), - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = deprecated(), - hvf_param = list( - reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized" - ), - hvg_min_perc_cells = deprecated(), - hvf_min_perc_cells = 5, - hvg_mean_expr_det = deprecated(), - hvf_mean_expr_det = 1, - use_all_genes_as_hvg = deprecated(), - use_all_feats_as_hvf = FALSE, - min_nr_of_hvg = deprecated(), - min_nr_of_hvf = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 1, - n_iterations = 1000, - gamma = 1, - omega = 1, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE -) { +subClusterCells <- function(gobject, + name = "sub_clus", + cluster_method = c( + "leiden", + "louvain_community", + "louvain_multinet" + ), + cluster_column = NULL, + selected_clusters = NULL, + hvg_param = deprecated(), + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 1, + n_iterations = 1000, + gamma = 1, + omega = 1, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { ## select cluster method cluster_method <- match.arg(arg = cluster_method, choices = c( "leiden", @@ -1930,7 +1993,8 @@ subClusterCells <- function( # deprecations .dep_param <- function(...) { GiottoUtils::deprecate_param( - ..., fun = "subClusterCells", when = "4.0.9" + ..., + fun = "subClusterCells", when = "4.0.9" ) } @@ -1961,29 +2025,35 @@ subClusterCells <- function( )) result <- switch(cluster_method, - "leiden" = { - do.call(doLeidenSubCluster, args = c( - common_args, - list(resolution = resolution, - n_iterations = n_iterations, - python_path = python_path, - toplevel = 4) - )) - }, - "louvain_community" = { - do.call(.doLouvainSubCluster_community, args = c( - common_args, - list(resolution = resolution, - python_path = python_path) - )) - }, - "louvain_multinet" = { - do.call(.doLouvainSubCluster_multinet, args = c( - common_args, - list(gamma = gamma, - omega = omega) - )) - } + "leiden" = { + do.call(doLeidenSubCluster, args = c( + common_args, + list( + resolution = resolution, + n_iterations = n_iterations, + python_path = python_path, + toplevel = 4 + ) + )) + }, + "louvain_community" = { + do.call(.doLouvainSubCluster_community, args = c( + common_args, + list( + resolution = resolution, + python_path = python_path + ) + )) + }, + "louvain_multinet" = { + do.call(.doLouvainSubCluster_multinet, args = c( + common_args, + list( + gamma = gamma, + omega = omega + ) + )) + } ) return(result) @@ -1997,33 +2067,36 @@ subClusterCells <- function( #' the Leiden algorithm #' @param toplevel do not use #' @export -doLeidenSubCluster <- function(gobject, - feat_type = NULL, - name = "sub_leiden_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvf_param = list(reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized"), - hvg_param = deprecated(), - hvf_min_perc_cells = 5, - hvg_min_perc_cells = deprecated(), - hvf_mean_expr_det = 1, - hvg_mean_expr_det = deprecated(), - use_all_feats_as_hvf = FALSE, - use_all_genes_as_hvg = deprecated(), - min_nr_of_hvf = 5, - min_nr_of_hvg = deprecated(), - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - n_iterations = 500, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - toplevel = 2, - verbose = TRUE) { +doLeidenSubCluster <- function( + gobject, + feat_type = NULL, + name = "sub_leiden_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_param = deprecated(), + hvf_min_perc_cells = 5, + hvg_min_perc_cells = deprecated(), + hvf_mean_expr_det = 1, + hvg_mean_expr_det = deprecated(), + use_all_feats_as_hvf = FALSE, + use_all_genes_as_hvg = deprecated(), + min_nr_of_hvf = 5, + min_nr_of_hvg = deprecated(), + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + n_iterations = 500, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + toplevel = 2, + verbose = TRUE) { # specify feat_type if (is.null(feat_type)) { feat_type <- gobject@expression_feat[[1]] @@ -2032,7 +2105,8 @@ doLeidenSubCluster <- function(gobject, # deprecated arguments .dep_param <- function(x, y) { GiottoUtils::deprecate_param( - x, y, fun = "doLeidenSubCluster", when = "4.0.9" + x, y, + fun = "doLeidenSubCluster", when = "4.0.9" ) } @@ -2065,7 +2139,8 @@ doLeidenSubCluster <- function(gobject, ## get subset subset_cell_IDs <- cell_metadata[ - get(cluster_column) == cluster][["cell_ID"]] + get(cluster_column) == cluster + ][["cell_ID"]] temp_giotto <- subsetGiotto( gobject = gobject, feat_type = feat_type, @@ -2077,7 +2152,8 @@ doLeidenSubCluster <- function(gobject, temp_cluster <- data.table( "cell_ID" = subset_cell_IDs, "tempclus" = 1, - "parent_cluster" = cluster) + "parent_cluster" = cluster + ) iter_list[[cluster]] <- temp_cluster } else { # continue for selected clusters or all clusters if there is no @@ -2093,7 +2169,8 @@ doLeidenSubCluster <- function(gobject, ## calculate variable feats hvf_param$verbose <- FALSE temp_giotto <- do.call( - "calculateHVF", c(gobject = temp_giotto, hvf_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param) + ) ## get hvg feat_metadata <- fDataDT(temp_giotto, @@ -2101,15 +2178,19 @@ doLeidenSubCluster <- function(gobject, ) usefeats <- feat_metadata[ hvf == "yes" & perc_cells >= hvf_min_perc_cells & - mean_expr_det >= hvf_mean_expr_det]$feat_ID + mean_expr_det >= hvf_mean_expr_det + ]$feat_ID ## catch too low number of hvg if (use_all_feats_as_hvf == TRUE) { usefeats == feat_metadata$feat_ID } else { - if (verbose == TRUE) - cat(length(usefeats), - "highly variable feats have been selected\n") + if (verbose == TRUE) { + cat( + length(usefeats), + "highly variable feats have been selected\n" + ) + } if (length(usefeats) <= min_nr_of_hvf) { message("too few feats, will continue with all feats instead") @@ -2118,17 +2199,21 @@ doLeidenSubCluster <- function(gobject, } ## run PCA - pca_param$verbose = FALSE + pca_param$verbose <- FALSE temp_giotto <- do.call( "runPCA", - c(gobject = temp_giotto, feats_to_use = list(usefeats), - pca_param)) + c( + gobject = temp_giotto, feats_to_use = list(usefeats), + pca_param + ) + ) ## nearest neighbor and clustering nn_param$verbose <- FALSE temp_giotto <- do.call( "createNearestNetwork", - c(gobject = temp_giotto, k = k_neighbors, nn_param)) + c(gobject = temp_giotto, k = k_neighbors, nn_param) + ) ## Leiden Cluster ## TO DO: expand to all clustering options @@ -2172,7 +2257,8 @@ doLeidenSubCluster <- function(gobject, ## update parameters used ## gobject <- update_giotto_params( - gobject, description = "_sub_cluster", toplevel = toplevel + gobject, + description = "_sub_cluster", toplevel = toplevel ) return(gobject) } else { @@ -2183,28 +2269,29 @@ doLeidenSubCluster <- function(gobject, # subcluster cells using a NN-network and the Louvain community # detection algorithm -.doLouvainSubCluster_community <- function(gobject, - name = "sub_louvain_comm_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvf_param = list( - reverse_log_scale = TRUE, - difference_in_cov = 1, - expression_values = "normalized" - ), - hvf_min_perc_cells = 5, - hvf_mean_expr_det = 1, - use_all_feats_as_hvf = FALSE, - min_nr_of_hvf = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +.doLouvainSubCluster_community <- function( + gobject, + name = "sub_louvain_comm_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list( + reverse_log_scale = TRUE, + difference_in_cov = 1, + expression_values = "normalized" + ), + hvf_min_perc_cells = 5, + hvf_mean_expr_det = 1, + use_all_feats_as_hvf = FALSE, + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { iter_list <- list() cell_metadata <- pDataDT(gobject) @@ -2222,16 +2309,20 @@ doLeidenSubCluster <- function(gobject, ## get subset subset_cell_IDs <- cell_metadata[ - get(cluster_column) == cluster][["cell_ID"]] - temp_giotto <- subsetGiotto(gobject = gobject, - cell_ids = subset_cell_IDs) + get(cluster_column) == cluster + ][["cell_ID"]] + temp_giotto <- subsetGiotto( + gobject = gobject, + cell_ids = subset_cell_IDs + ) ## if cluster is not selected if (!is.null(selected_clusters) & !cluster %in% selected_clusters) { temp_cluster <- data.table( "cell_ID" = subset_cell_IDs, "tempclus" = 1, - "parent_cluster" = cluster) + "parent_cluster" = cluster + ) iter_list[[cluster + index_offset]] <- temp_cluster } else { # continue for selected clusters or all clusters if there is no @@ -2245,7 +2336,8 @@ doLeidenSubCluster <- function(gobject, ## calculate variable genes hvf_param$verbose <- FALSE temp_giotto <- do.call( - "calculateHVF", c(gobject = temp_giotto, hvf_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param) + ) ## get hvf feat_metadata <- fDataDT(temp_giotto) @@ -2256,15 +2348,19 @@ doLeidenSubCluster <- function(gobject, usefeats <- feat_metadata[ hvf == "yes" & perc_cells >= hvf_min_perc_cells & - mean_expr_det >= hvf_mean_expr_det]$feat_ID + mean_expr_det >= hvf_mean_expr_det + ]$feat_ID ## catch too low number of hvf if (isTRUE(use_all_feats_as_hvf)) { usefeats == feat_metadata$feat_ID } else { - if (isTRUE(verbose)) - cat(length(usefeats), - "highly variable features have been selected\n") + if (isTRUE(verbose)) { + cat( + length(usefeats), + "highly variable features have been selected\n" + ) + } if (length(usefeats) <= min_nr_of_hvf) { wrap_msg("too few features will continue with all features instead") @@ -2276,14 +2372,18 @@ doLeidenSubCluster <- function(gobject, pca_param$verbose <- FALSE temp_giotto <- do.call( "runPCA", - c(gobject = temp_giotto, feats_to_use = list(usefeats), - pca_param)) + c( + gobject = temp_giotto, feats_to_use = list(usefeats), + pca_param + ) + ) ## nearest neighbor and clustering nn_param$verbose <- FALSE temp_giotto <- do.call( "createNearestNetwork", - c(gobject = temp_giotto, k = k_neighbors, nn_param)) + c(gobject = temp_giotto, k = k_neighbors, nn_param) + ) ## TO DO: expand to all clustering options temp_cluster <- .doLouvainCluster_community( @@ -2354,27 +2454,28 @@ doLeidenSubCluster <- function(gobject, # subcluster cells using a NN-network and the Louvain multinet # detection algorithm -.doLouvainSubCluster_multinet <- function(gobject, - name = "sub_louvain_mult_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvf_param = list( - reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized" - ), - hvf_min_perc_cells = 5, - hvf_mean_expr_det = 1, - use_all_feats_as_hvf = FALSE, - min_nr_of_hvf = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - gamma = 1, - omega = 1, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +.doLouvainSubCluster_multinet <- function( + gobject, + name = "sub_louvain_mult_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvf_min_perc_cells = 5, + hvf_mean_expr_det = 1, + use_all_feats_as_hvf = FALSE, + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + gamma = 1, + omega = 1, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { if ("multinet" %in% rownames(installed.packages()) == FALSE) { stop( "package 'multinet' is not yet installed \n", @@ -2406,16 +2507,20 @@ doLeidenSubCluster <- function(gobject, ## get subset subset_cell_IDs <- cell_metadata[ - get(cluster_column) == cluster][["cell_ID"]] - temp_giotto <- subsetGiotto(gobject = gobject, - cell_ids = subset_cell_IDs) + get(cluster_column) == cluster + ][["cell_ID"]] + temp_giotto <- subsetGiotto( + gobject = gobject, + cell_ids = subset_cell_IDs + ) ## if cluster is not selected if (!is.null(selected_clusters) & !cluster %in% selected_clusters) { temp_cluster <- data.table( "cell_ID" = subset_cell_IDs, "tempclus" = 1, - "parent_cluster" = cluster) + "parent_cluster" = cluster + ) iter_list[[cluster + index_offset]] <- temp_cluster } else { # continue for selected clusters or all clusters if there is no @@ -2429,21 +2534,26 @@ doLeidenSubCluster <- function(gobject, ## calculate variable genes hvf_param$verbose <- FALSE temp_giotto <- do.call( - "calculateHVF", c(gobject = temp_giotto, hvf_param)) + "calculateHVF", c(gobject = temp_giotto, hvf_param) + ) ## get hvf feat_metadata <- fDataDT(temp_giotto) usefeats <- feat_metadata[ hvf == "yes" & perc_cells >= hvf_min_perc_cells & - mean_expr_det >= hvf_mean_expr_det]$feat_ID + mean_expr_det >= hvf_mean_expr_det + ]$feat_ID ## catch too low number of hvf if (use_all_feats_as_hvf == TRUE) { usefeats == feat_metadata$feat_ID } else { - if (verbose == TRUE) - cat(length(usefeats), - "highly variable features have been selected\n") + if (verbose == TRUE) { + cat( + length(usefeats), + "highly variable features have been selected\n" + ) + } if (length(usefeats) <= min_nr_of_hvf) { message("too few features, will continue with all features instead") @@ -2455,14 +2565,18 @@ doLeidenSubCluster <- function(gobject, pca_param$verbose <- FALSE temp_giotto <- do.call( "runPCA", - c(gobject = temp_giotto, feats_to_use = list(usefeats), - pca_param)) + c( + gobject = temp_giotto, feats_to_use = list(usefeats), + pca_param + ) + ) ## nearest neighbor and clustering nn_param$verbose <- FALSE temp_giotto <- do.call( "createNearestNetwork", - c(gobject = temp_giotto, k = k_neighbors, nn_param)) + c(gobject = temp_giotto, k = k_neighbors, nn_param) + ) ## TO DO: expand to all clustering options temp_cluster <- .doLouvainCluster_multinet( @@ -2532,42 +2646,44 @@ doLeidenSubCluster <- function(gobject, #' @param version version of Louvain algorithm to use. One of "community" or #' "multinet", with the default being "community" #' @export -doLouvainSubCluster <- function(gobject, - name = "sub_louvain_clus", - version = c("community", "multinet"), - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = deprecated(), - hvf_param = list( - reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized" - ), - hvg_min_perc_cells = deprecated(), - hvf_min_perc_cells = 5, - hvg_mean_expr_det = deprecated(), - hvf_mean_expr_det = 1, - use_all_genes_as_hvg = deprecated(), - use_all_feats_as_hvf = FALSE, - min_nr_of_hvg = deprecated(), - min_nr_of_hvf = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - gamma = 1, - omega = 1, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +doLouvainSubCluster <- function( + gobject, + name = "sub_louvain_clus", + version = c("community", "multinet"), + cluster_column = NULL, + selected_clusters = NULL, + hvg_param = deprecated(), + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + gamma = 1, + omega = 1, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { ## louvain clustering version to use version <- match.arg(version, c("community", "multinet")) # deprecations .dep_param <- function(x, y) { GiottoUtils::deprecate_param( - x, y, fun = "doLouvainSubCluster", when = "4.0.9" + x, y, + fun = "doLouvainSubCluster", when = "4.0.9" ) } @@ -2651,12 +2767,13 @@ doLouvainSubCluster <- function(gobject, #' #' getClusterSimilarity(g, cluster_column = "leiden_clus") #' @export -getClusterSimilarity <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - cor = c("pearson", "spearman")) { +getClusterSimilarity <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + cor = c("pearson", "spearman")) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2674,7 +2791,8 @@ getClusterSimilarity <- function(gobject, cor <- match.arg(cor, c("pearson", "spearman")) values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) metadata <- pDataDT(gobject, feat_type = feat_type, @@ -2699,26 +2817,38 @@ getClusterSimilarity <- function(gobject, metadata_cols = cluster_column ) dcast_metatable <- data.table::dcast.data.table( - metatable, formula = variable ~ uniq_ID, value.var = "value") + metatable, + formula = variable ~ uniq_ID, value.var = "value" + ) testmatrix <- dt_to_matrix(x = dcast_metatable) # correlation matrix cormatrix <- cor_flex(x = testmatrix, method = cor) cor_table <- data.table::as.data.table(reshape2::melt(cormatrix)) data.table::setnames( - cor_table, old = c("Var1", "Var2"), c("group1", "group2")) + cor_table, + old = c("Var1", "Var2"), c("group1", "group2") + ) cor_table[, c("group1", "group2") := list( - as.character(group1), as.character(group2))] + as.character(group1), as.character(group2) + )] cor_table[, unified_group := paste( - sort(c(group1, group2)), collapse = "--"), - by = 1:nrow(cor_table)] + sort(c(group1, group2)), + collapse = "--" + ), + by = 1:nrow(cor_table) + ] cor_table <- cor_table[!duplicated(cor_table[, .(value, unified_group)])] cor_table <- merge( - cor_table, by.x = "group1", clustersize, by.y = "clusters") + cor_table, + by.x = "group1", clustersize, by.y = "clusters" + ) setnames(cor_table, "size", "group1_size") cor_table <- merge( - cor_table, by.x = "group2", clustersize, by.y = "clusters") + cor_table, + by.x = "group2", clustersize, by.y = "clusters" + ) setnames(cor_table, "size", "group2_size") return(cor_table) @@ -2762,19 +2892,20 @@ getClusterSimilarity <- function(gobject, #' #' mergeClusters(g, cluster_column = "leiden_clus") #' @export -mergeClusters <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - cor = c("pearson", "spearman"), - new_cluster_name = "merged_cluster", - min_cor_score = 0.8, - max_group_size = 20, - force_min_group_size = 10, - max_sim_clusters = 10, - return_gobject = TRUE, - verbose = TRUE) { +mergeClusters <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + cor = c("pearson", "spearman"), + new_cluster_name = "merged_cluster", + min_cor_score = 0.8, + max_group_size = 20, + force_min_group_size = 10, + max_sim_clusters = 10, + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2789,7 +2920,8 @@ mergeClusters <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) # correlation score to be used cor <- match.arg(cor, c("pearson", "spearman")) @@ -2812,12 +2944,16 @@ mergeClusters <- function(gobject, min_reached <- cumsum_reached <- NULL filter_set_first <- similarityDT[group1 != group2][ - group1_size < max_group_size][value >= min_cor_score] + group1_size < max_group_size + ][value >= min_cor_score] # 2. small clusters minimum_set <- similarityDT[group1 != group2][ - group1_size < force_min_group_size][order(-value)][ - , head(.SD, max_sim_clusters), by = group1] + group1_size < force_min_group_size + ][order(-value)][ + , head(.SD, max_sim_clusters), + by = group1 + ] # 2.1 take all clusters necessary to reach force_min_group_size minimum_set[, cumsum_val := cumsum(group2_size) + group1_size, by = group1] @@ -2847,7 +2983,8 @@ mergeClusters <- function(gobject, } else { who <- which(res == TRUE)[[1]] finallist[[who]] <- unique( - c(finallist[[who]], first_clus, second_clus)) + c(finallist[[who]], first_clus, second_clus) + ) } } @@ -2894,7 +3031,9 @@ mergeClusters <- function(gobject, spat_unit = spat_unit, feat_type = feat_type, new_metadata = metadata[ - , c("cell_ID", new_cluster_name), with = FALSE], + , c("cell_ID", new_cluster_name), + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -2932,10 +3071,12 @@ mergeClusters <- function(gobject, dend_1 <- dendextend::find_dendrogram( dend = dend, - selected_labels = names(numerical_leaves[selected_labels_ind_1])) + selected_labels = names(numerical_leaves[selected_labels_ind_1]) + ) dend_2 <- dendextend::find_dendrogram( dend = dend, - selected_labels = names(numerical_leaves[selected_labels_ind_2])) + selected_labels = names(numerical_leaves[selected_labels_ind_2]) + ) return(list(theight = top_height, dend1 = dend_1, dend2 = dend_2)) } @@ -2972,7 +3113,9 @@ mergeClusters <- function(gobject, # check which heights are available available_h <- as.numeric(unlist(lapply( - dend_list, FUN = function(x) attributes(x)$height))) + dend_list, + FUN = function(x) attributes(x)$height + ))) # get dendrogram associated with height and split in two select_dend_ind <- which.min(abs(available_h - n_height)) @@ -2982,13 +3125,19 @@ mergeClusters <- function(gobject, # find leave labels toph <- tempres[[1]] first_group <- dendextend::get_leaves_attr( - tempres[[2]], attribute = "label") + tempres[[2]], + attribute = "label" + ) second_group <- dendextend::get_leaves_attr( - tempres[[3]], attribute = "label") + tempres[[3]], + attribute = "label" + ) - result_list[[j]] <- list("height" = toph, - "first" = first_group, - "sec" = second_group) + result_list[[j]] <- list( + "height" = toph, + "first" = first_group, + "sec" = second_group + ) j <- j + 1 @@ -3034,17 +3183,18 @@ mergeClusters <- function(gobject, #' #' getDendrogramSplits(g, cluster_column = "leiden_clus") #' @export -getDendrogramSplits <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - cor = c("pearson", "spearman"), - distance = "ward.D", - h = NULL, - h_color = "red", - show_dend = TRUE, - verbose = TRUE) { +getDendrogramSplits <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + cor = c("pearson", "spearman"), + distance = "ward.D", + h = NULL, + h_color = "red", + show_dend = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -3065,7 +3215,8 @@ getDendrogramSplits <- function(gobject, cor <- match.arg(cor, c("pearson", "spearman")) values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) # create average expression matrix per cluster metatable <- calculateMetaTable( @@ -3076,7 +3227,9 @@ getDendrogramSplits <- function(gobject, metadata_cols = cluster_column ) dcast_metatable <- data.table::dcast.data.table( - metatable, formula = variable ~ uniq_ID, value.var = "value") + metatable, + formula = variable ~ uniq_ID, value.var = "value" + ) testmatrix <- dt_to_matrix(x = dcast_metatable) # correlation @@ -3101,7 +3254,8 @@ getDendrogramSplits <- function(gobject, splitList <- .node_clusters(hclus_obj = corclus, verbose = verbose) splitDT <- data.table::as.data.table(t_flex( - data.table::as.data.table(splitList[[2]]))) + data.table::as.data.table(splitList[[2]]) + )) colnames(splitDT) <- c("node_h", "tree_1", "tree_2") splitDT[, nodeID := paste0("node_", seq_len(.N))] @@ -3154,27 +3308,30 @@ getDendrogramSplits <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- pDataDT(g) -#' g_small <- subsetGiotto(g, cell_ids = sample(x$cell_ID,300)) -#' doClusterProjection(target_gobject = g, source_gobject = g_small, -#' source_cluster_labels = "leiden_clus") +#' g_small <- subsetGiotto(g, cell_ids = sample(x$cell_ID, 300)) +#' doClusterProjection( +#' target_gobject = g, source_gobject = g_small, +#' source_cluster_labels = "leiden_clus" +#' ) #' @export -doClusterProjection <- function(target_gobject, - target_cluster_label_name = "knn_labels", - spat_unit = NULL, - feat_type = NULL, - source_gobject, - source_cluster_labels = NULL, - reduction = "cells", - reduction_method = "pca", - reduction_name = "pca", - dimensions_to_use = 1:10, - knn_k = 10, - prob = FALSE, - algorithm = c( - "kd_tree", - "cover_tree", "brute" - ), - return_gobject = TRUE) { +doClusterProjection <- function( + target_gobject, + target_cluster_label_name = "knn_labels", + spat_unit = NULL, + feat_type = NULL, + source_gobject, + source_cluster_labels = NULL, + reduction = "cells", + reduction_method = "pca", + reduction_name = "pca", + dimensions_to_use = 1:10, + knn_k = 10, + prob = FALSE, + algorithm = c( + "kd_tree", + "cover_tree", "brute" + ), + return_gobject = TRUE) { # NSE vars cell_ID <- temp_name_prob <- NULL @@ -3215,7 +3372,8 @@ doClusterProjection <- function(target_gobject, dim_coord <- dim_obj[] dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% seq_len(ncol(dim_coord))] + dimensions_to_use %in% seq_len(ncol(dim_coord)) + ] matrix_to_use <- dim_coord[, dimensions_to_use] ## create the training and testset from the matrix @@ -3224,12 +3382,14 @@ doClusterProjection <- function(target_gobject, # (w/ labels) # and target giotto object train <- matrix_to_use[ - rownames(matrix_to_use) %in% names(source_annot_vec), ] + rownames(matrix_to_use) %in% names(source_annot_vec), + ] train <- train[match(names(source_annot_vec), rownames(train)), ] # the test set are the remaining cell_IDs that need a label test <- matrix_to_use[ - !rownames(matrix_to_use) %in% names(source_annot_vec), ] + !rownames(matrix_to_use) %in% names(source_annot_vec), + ] cl <- source_annot_vec # make prediction @@ -3267,14 +3427,18 @@ doClusterProjection <- function(target_gobject, if (isTRUE(prob)) { cell_meta_target[, temp_name_prob := probs[cell_ID]] cell_meta_target <- cell_meta_target[ - , .(cell_ID, temp_name, temp_name_prob)] + , .(cell_ID, temp_name, temp_name_prob) + ] cell_meta_target[, temp_name_prob := ifelse( - is.na(temp_name_prob), 1, temp_name_prob)] + is.na(temp_name_prob), 1, temp_name_prob + )] data.table::setnames(cell_meta_target, old = c("temp_name", "temp_name_prob"), - new = c(target_cluster_label_name, - paste0(target_cluster_label_name, "_prob")) + new = c( + target_cluster_label_name, + paste0(target_cluster_label_name, "_prob") + ) ) } else { cell_meta_target <- cell_meta_target[, .(cell_ID, temp_name)] @@ -3295,7 +3459,8 @@ doClusterProjection <- function(target_gobject, feat_type = feat_type, new_metadata = cell_meta_target[ , c("cell_ID", target_cluster_label_name, prob_label), - with = FALSE], + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -3305,7 +3470,9 @@ doClusterProjection <- function(target_gobject, spat_unit = spat_unit, feat_type = feat_type, new_metadata = cell_meta_target[ - , c("cell_ID", target_cluster_label_name), with = FALSE], + , c("cell_ID", target_cluster_label_name), + with = FALSE + ], by_column = TRUE, column_cell_ID = "cell_ID" ) diff --git a/R/convenience.R b/R/convenience.R index e94f8e20c..69fe5feee 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -13,12 +13,12 @@ #' reader functions should be built using it as a base. #' @param spat_method spatial method for which the data is being read #' @param data_dir exported data directory to read from -#' @param dir_items named list of directory items to expect and keywords to +#' @param dir_items named list of directory items to expect and keywords to #' match #' @param data_to_use character. Which type(s) of expression data to build the #' gobject with. Values should match with a *workflow* item in require_data_DT #' (see details) -#' @param require_data_DT data.table detailing if expected data items are +#' @param require_data_DT data.table detailing if expected data items are #' required or optional for each \code{data_to_use} *workflow* #' @param cores cores to use #' @param verbose be verbose @@ -31,10 +31,10 @@ #' \item{1. detection of items within \code{data_dir} by looking for keywords #' assigned through \code{dir_items}} #' \item{2. check of detected items to see if everything needed has been found. -#' Dictionary of necessary vs optional items for each \code{data_to_use} +#' Dictionary of necessary vs optional items for each \code{data_to_use} #' *workflow* is provided through \code{require_data_DT}} -#' \item{3. if multiple filepaths are found to be matching then select the -#' first one. This function is only intended to find the first level +#' \item{3. if multiple filepaths are found to be matching then select the +#' first one. This function is only intended to find the first level #' subdirectories and files.} #' } #' @@ -82,27 +82,32 @@ NULL #' @describeIn read_data_folder Should not be used directly #' @keywords internal -.read_data_folder <- function(spat_method = NULL, - data_dir = NULL, - dir_items, - data_to_use, - load_format = NULL, - require_data_DT, - cores = NA, - verbose = NULL, - toplevel = 2L) { +.read_data_folder <- function( + spat_method = NULL, + data_dir = NULL, + dir_items, + data_to_use, + load_format = NULL, + require_data_DT, + cores = NA, + verbose = NULL, + toplevel = 2L) { ch <- box_chars() # 0. check params if (is.null(data_dir) || !dir.exists(data_dir)) { - .gstop(.n = toplevel, "The full path to a", spat_method, - "directory must be given.") + .gstop( + .n = toplevel, "The full path to a", spat_method, + "directory must be given." + ) } vmsg(.v = verbose, "A structured", spat_method, "directory will be used") if (!data_to_use %in% require_data_DT$workflow) { - .gstop(.n = toplevel, - "Data requirements for data_to_use not found in require_data_DT") + .gstop( + .n = toplevel, + "Data requirements for data_to_use not found in require_data_DT" + ) } # 1. detect items @@ -126,10 +131,12 @@ NULL .initial = paste0(ch$s, "> "), item, " found" ) - for (item_i in seq_along(dir_items[[item]])) { + for (item_i in seq_along(dir_items[[item]])) { # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) + subItem <- gsub( + pattern = ".*/", replacement = "", + x = dir_items[[item]][[item_i]] + ) vmsg( .v = verbose, .is_debug = TRUE, .initial = paste0(ch$s, ch$s, ch$l, ch$h, ch$h), @@ -147,13 +154,16 @@ NULL require_data_DT <- require_data_DT[workflow == data_to_use, ] - if (!is.null(load_format)) + if (!is.null(load_format)) { require_data_DT <- require_data_DT[filetype == load_format, ] + } - if (item %in% require_data_DT[needed == TRUE, item]) + if (item %in% require_data_DT[needed == TRUE, item]) { stop(item, " is missing") - if (item %in% require_data_DT[needed == FALSE, item]) + } + if (item %in% require_data_DT[needed == FALSE, item]) { warning(item, "is missing (optional)") + } } } @@ -193,7 +203,7 @@ NULL #' @title Create a giotto object from 10x visium data #' @name createGiottoVisiumObject -#' @description Create Giotto object directly from a 10X visium folder. Also +#' @description Create Giotto object directly from a 10X visium folder. Also #' accepts visium H5 outputs. #' #' @param visium_dir path to the 10X visium directory [required] @@ -202,7 +212,7 @@ NULL #' @param h5_visium_path path to visium 10X .h5 file #' @param h5_gene_ids gene names as symbols (default) or ensemble gene ids #' @param h5_tissue_positions_path path to tissue locations (.csv file) -#' @param h5_image_png_path path to tissue .png file (optional). Image +#' @param h5_image_png_path path to tissue .png file (optional). Image #' autoscaling looks for matches in the filename for either 'hires' or 'lowres' #' @param h5_json_scalefactors_path path to .json scalefactors (optional) #' @param png_name select name of png to use (see details) @@ -211,11 +221,11 @@ NULL #' @param xmin_adj deprecated #' @param ymax_adj deprecated #' @param ymin_adj deprecated -#' @param instructions list of instructions or output result from +#' @param instructions list of instructions or output result from #' \code{\link[GiottoClass]{createGiottoInstructions}} -#' @param cores how many cores or threads to use to read data if paths are +#' @param cores how many cores or threads to use to read data if paths are #' provided -#' @param expression_matrix_class class of expression matrix to use +#' @param expression_matrix_class class of expression matrix to use #' (e.g. 'dgCMatrix', 'DelayedArray') #' @param h5_file optional path to create an on-disk h5 file #' @param verbose be verbose @@ -239,31 +249,32 @@ NULL #' } #' #' @export -createGiottoVisiumObject <- function(visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - h5_visium_path = NULL, - h5_gene_ids = c("symbols", "ensembl"), - h5_tissue_positions_path = NULL, - h5_image_png_path = NULL, - h5_json_scalefactors_path = NULL, - png_name = NULL, - do_manual_adj = FALSE, # deprecated - xmax_adj = 0, # deprecated - xmin_adj = 0, # deprecated - ymax_adj = 0, # deprecated - ymin_adj = 0, # deprecated - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - cores = NA, - verbose = NULL) { +createGiottoVisiumObject <- function( + visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + h5_visium_path = NULL, + h5_gene_ids = c("symbols", "ensembl"), + h5_tissue_positions_path = NULL, + h5_image_png_path = NULL, + h5_json_scalefactors_path = NULL, + png_name = NULL, + do_manual_adj = FALSE, # deprecated + xmax_adj = 0, # deprecated + xmin_adj = 0, # deprecated + ymax_adj = 0, # deprecated + ymin_adj = 0, # deprecated + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + cores = NA, + verbose = NULL) { # NSE vars barcode <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL # handle deprecations - img_dep_msg <- "The params 'do_manual_adj', 'xmax_adj', 'xmin_adj', - 'ymax_adj', 'ymin_adj' are no longer used. + img_dep_msg <- "The params 'do_manual_adj', 'xmax_adj', 'xmin_adj', + 'ymax_adj', 'ymin_adj' are no longer used. Please use the automated workflow." if (!isFALSE(do_manual_adj) || xmax_adj != 0 || @@ -316,18 +327,17 @@ createGiottoVisiumObject <- function(visium_dir = NULL, -.visium_create <- function( - expr_counts_path, - h5_gene_ids = NULL, # h5 - gene_column_index = NULL, # folder - tissue_positions_path, - image_path = NULL, - scale_json_path = NULL, - png_name = NULL, - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - verbose = NULL) { +.visium_create <- function(expr_counts_path, + h5_gene_ids = NULL, # h5 + gene_column_index = NULL, # folder + tissue_positions_path, + image_path = NULL, + scale_json_path = NULL, + png_name = NULL, + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + verbose = NULL) { # NSE vars barcode <- cell_ID <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL @@ -348,12 +358,16 @@ createGiottoVisiumObject <- function(visium_dir = NULL, } # if expr_results is not a list, make it a list compatible with downstream - if (!is.list(expr_results)) expr_results <- list( - "Gene Expression" = expr_results) + if (!is.list(expr_results)) { + expr_results <- list( + "Gene Expression" = expr_results + ) + } # format expected data into list to be used with readExprData() raw_matrix_list <- list("cell" = list("rna" = list( - "raw" = expr_results[["Gene Expression"]]))) + "raw" = expr_results[["Gene Expression"]] + ))) # add protein expression data to list if it exists if ("Antibody Capture" %in% names(expr_results)) { @@ -363,12 +377,15 @@ createGiottoVisiumObject <- function(visium_dir = NULL, # 2. spatial locations spatial_results <- data.table::fread(tissue_positions_path) - colnames(spatial_results) <- c("barcode", "in_tissue", "array_row", - "array_col", "col_pxl", "row_pxl") + colnames(spatial_results) <- c( + "barcode", "in_tissue", "array_row", + "array_col", "col_pxl", "row_pxl" + ) spatial_results <- spatial_results[match(colnames( - raw_matrix_list$cell[[1]]$raw), barcode)] + raw_matrix_list$cell[[1]]$raw + ), barcode)] data.table::setnames(spatial_results, old = "barcode", new = "cell_ID") - spatial_locs <- spatial_results[, .(cell_ID, row_pxl, -col_pxl)] + spatial_locs <- spatial_results[, .(cell_ID, row_pxl, -col_pxl)] # flip x and y colnames(spatial_locs) <- c("cell_ID", "sdimx", "sdimy") @@ -388,7 +405,8 @@ createGiottoVisiumObject <- function(visium_dir = NULL, # 5. metadata meta_results <- spatial_results[ - , .(cell_ID, in_tissue, array_row, array_col)] + , .(cell_ID, in_tissue, array_row, array_col) + ] expr_types <- names(raw_matrix_list$cell) meta_list <- list() for (etype in expr_types) { @@ -427,17 +445,17 @@ createGiottoVisiumObject <- function(visium_dir = NULL, # Find and check the filepaths within a structured visium directory -.visium_read_folder <- function( - visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - png_name = NULL, - verbose = NULL) { +.visium_read_folder <- function(visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + png_name = NULL, + verbose = NULL) { vmsg(.v = verbose, "A structured visium directory will be used") ## check arguments - if (is.null(visium_dir)) + if (is.null(visium_dir)) { .gstop("visium_dir needs to be a path to a visium directory") + } visium_dir <- path.expand(visium_dir) if (!dir.exists(visium_dir)) .gstop(visium_dir, " does not exist!") expr_data <- match.arg(expr_data, choices = c("raw", "filter")) @@ -448,14 +466,16 @@ createGiottoVisiumObject <- function(visium_dir = NULL, "raw" = paste0(visium_dir, "/", "raw_feature_bc_matrix/"), "filter" = paste0(visium_dir, "/", "filtered_feature_bc_matrix/") ) - if (!file.exists(expr_counts_path)) + if (!file.exists(expr_counts_path)) { .gstop(expr_counts_path, "does not exist!") + } ## 2. check spatial locations spatial_dir <- paste0(visium_dir, "/", "spatial/") tissue_positions_path <- Sys.glob( - paths = file.path(spatial_dir, "tissue_positions*")) + paths = file.path(spatial_dir, "tissue_positions*") + ) ## 3. check spatial image @@ -469,8 +489,9 @@ createGiottoVisiumObject <- function(visium_dir = NULL, ## 4. check scalefactors scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") - if (!file.exists(scalefactors_path)) + if (!file.exists(scalefactors_path)) { .gstop(scalefactors_path, "does not exist!") + } list( @@ -484,36 +505,44 @@ createGiottoVisiumObject <- function(visium_dir = NULL, -.visium_read_h5 <- function( - h5_visium_path = h5_visium_path, # expression matrix - h5_gene_ids = h5_gene_ids, - h5_tissue_positions_path = h5_tissue_positions_path, - h5_image_png_path = h5_image_png_path, - h5_json_scalefactors_path = h5_json_scalefactors_path, - verbose = NULL) { +.visium_read_h5 <- function(h5_visium_path = h5_visium_path, # expression matrix + h5_gene_ids = h5_gene_ids, + h5_tissue_positions_path = h5_tissue_positions_path, + h5_image_png_path = h5_image_png_path, + h5_json_scalefactors_path = h5_json_scalefactors_path, + verbose = NULL) { # 1. filepaths - vmsg(.v = verbose, - "A path to an .h5 10X file was provided and will be used") - if (!file.exists(h5_visium_path)) + vmsg( + .v = verbose, + "A path to an .h5 10X file was provided and will be used" + ) + if (!file.exists(h5_visium_path)) { .gstop("The provided path ", h5_visium_path, " does not exist") - if (is.null(h5_tissue_positions_path)) - .gstop("A path to the tissue positions (.csv) needs to be provided to + } + if (is.null(h5_tissue_positions_path)) { + .gstop("A path to the tissue positions (.csv) needs to be provided to h5_tissue_positions_path") - if (!file.exists(h5_tissue_positions_path)) - .gstop("The provided path ", h5_tissue_positions_path, - " does not exist") + } + if (!file.exists(h5_tissue_positions_path)) { + .gstop( + "The provided path ", h5_tissue_positions_path, + " does not exist" + ) + } if (!is.null(h5_image_png_path)) { if (!file.exists(h5_image_png_path)) { - .gstop("The provided h5 image path ", h5_image_png_path, - "does not exist. - Set to NULL to exclude or provide the correct path.") + .gstop( + "The provided h5 image path ", h5_image_png_path, + "does not exist. + Set to NULL to exclude or provide the correct path." + ) } } if (!is.null(h5_json_scalefactors_path)) { if (!file.exists(h5_json_scalefactors_path)) { warning(wrap_txt( "No file found at h5_json_scalefactors_path. - Scalefactors are needed for proper image alignment and + Scalefactors are needed for proper image alignment and polygon generation" )) } @@ -549,8 +578,9 @@ createGiottoVisiumObject <- function(visium_dir = NULL, #' Adds circular giottoPolygons to the spatial_info slot of a Giotto Object #' for the "cell" spatial unit. #' @export -addVisiumPolygons <- function(gobject, - scalefactor_path = NULL) { +addVisiumPolygons <- function( + gobject, + scalefactor_path = NULL) { assert_giotto(gobject) visium_spat_locs <- getSpatialLocations( @@ -591,8 +621,10 @@ addVisiumPolygons <- function(gobject, .visium_read_scalefactors <- function(json_path = NULL) { if (!checkmate::test_file_exists(json_path)) { if (!is.null(json_path)) { - warning("scalefactors not discovered at: \n", - json_path, call. = FALSE) + warning("scalefactors not discovered at: \n", + json_path, + call. = FALSE + ) } return(NULL) } @@ -640,7 +672,7 @@ addVisiumPolygons <- function(gobject, #' @title Calculate Pixel to Micron Scalefactor #' @name visium_micron_scalefactor -#' @param json_scalefactors list of scalefactors from +#' @param json_scalefactors list of scalefactors from #' .visium_read_scalefactors() #' @returns scale factor for converting pixel to micron #' @details @@ -662,7 +694,7 @@ addVisiumPolygons <- function(gobject, #' @name .visium_spot_poly #' @param spatlocs spatial locations data.table or `spatLocsObj` containing #' centroid locations of visium spots -#' @param json_scalefactors list of scalefactors from +#' @param json_scalefactors list of scalefactors from #' .visium_read_scalefactors() #' @returns giottoPolygon object #' @details @@ -670,8 +702,9 @@ addVisiumPolygons <- function(gobject, #' Visium spots. #' @keywords internal #' @md -.visium_spot_poly <- function(spatlocs = NULL, - json_scalefactors) { +.visium_spot_poly <- function( + spatlocs = NULL, + json_scalefactors) { if (inherits(spatlocs, "spatLocsObj")) { spatlocs <- spatlocs[] } @@ -699,11 +732,10 @@ addVisiumPolygons <- function(gobject, # json_info expects the list read output from .visium_read_scalefactors # image_path should be expected to be full filepath # should only be used when do_manual_adj (deprecated) is FALSE -.visium_image <- function( - image_path, - json_info = NULL, - micron_scale = FALSE, - verbose = NULL) { +.visium_image <- function(image_path, + json_info = NULL, + micron_scale = FALSE, + verbose = NULL) { # assume image already checked vmsg(.v = verbose, .initial = " - ", "found image") @@ -730,8 +762,8 @@ addVisiumPolygons <- function(gobject, if (is.null(visium_img_type)) { # if not recognized visium image type .gstop( - "\'image_path\' filename did not partial match either - \'lowres\' or \'hires\'. Ensure specified image is either the + "\'image_path\' filename did not partial match either + \'lowres\' or \'hires\'. Ensure specified image is either the Visium lowres or hires image and rename it accordingly" ) } @@ -793,9 +825,10 @@ addVisiumPolygons <- function(gobject, #' if image_file is a list. #' @returns giottoLargeImage #' @export -createMerscopeLargeImage <- function(image_file, - transforms_file, - name = "image") { +createMerscopeLargeImage <- function( + image_file, + transforms_file, + name = "image") { checkmate::assert_character(transforms_file) tfsDT <- data.table::fread(transforms_file) if (inherits(image_file, "character")) { @@ -836,12 +869,12 @@ createMerscopeLargeImage <- function(image_file, #' @title Create Vizgen MERSCOPE Giotto Object #' @name createGiottoMerscopeObject -#' @description Given the path to a MERSCOPE experiment directory, creates a +#' @description Given the path to a MERSCOPE experiment directory, creates a #' Giotto object. #' @param merscope_dir full path to the exported merscope directory -#' @param data_to_use which of either the 'subcellular' or 'aggregate' +#' @param data_to_use which of either the 'subcellular' or 'aggregate' #' information to use for object creation -#' @param FOVs which FOVs to use when building the subcellular object. +#' @param FOVs which FOVs to use when building the subcellular object. #' (default is NULL) #' NULL loads all FOVs (very slow) #' @param calculate_overlap whether to run \code{\link{calculateOverlapRaster}} @@ -851,9 +884,9 @@ createMerscopeLargeImage <- function(image_file, #' @inheritParams GiottoClass::createGiottoObjectSubcellular #' @returns a giotto object #' @details -#' [\strong{Expected Directory}] This function generates a giotto object when -#' given a link to a MERSCOPE output directory. It expects the following items -#' within the directory where the \strong{bolded} portions are what this +#' [\strong{Expected Directory}] This function generates a giotto object when +#' given a link to a MERSCOPE output directory. It expects the following items +#' within the directory where the \strong{bolded} portions are what this #' function matches against: #' \itemize{ #' \item{\strong{cell_boundaries} (folder .hdf5 files)} @@ -863,21 +896,22 @@ createMerscopeLargeImage <- function(image_file, #' \item{detected_transcripts\strong{metadata_file}.csv (file)} #' } #' @export -createGiottoMerscopeObject <- function(merscope_dir, - data_to_use = c("subcellular", "aggregate"), - FOVs = NULL, - poly_z_indices = 1:7, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - instructions = NULL, - cores = NA, - verbose = TRUE) { +createGiottoMerscopeObject <- function( + merscope_dir, + data_to_use = c("subcellular", "aggregate"), + FOVs = NULL, + poly_z_indices = 1:7, + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + instructions = NULL, + cores = NA, + verbose = TRUE) { fovs <- NULL # 0. setup @@ -893,7 +927,8 @@ createGiottoMerscopeObject <- function(merscope_dir, # determine data to use data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) + arg = data_to_use, choices = c("subcellular", "aggregate") + ) # 1. test if folder structure exists and is as expected dir_items <- .read_merscope_folder( @@ -931,8 +966,10 @@ createGiottoMerscopeObject <- function(merscope_dir, verbose = verbose ) } else { - stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) + stop(wrap_txt('data_to_use "', data_to_use, + '" not implemented', + sep = "" + )) } return(merscope_gobject) @@ -941,21 +978,22 @@ createGiottoMerscopeObject <- function(merscope_dir, -#' @describeIn createGiottoMerscopeObject Create giotto object with +#' @describeIn createGiottoMerscopeObject Create giotto object with #' 'subcellular' workflow #' @param data_list list of loaded data from \code{\link{load_merscope_folder}} #' @keywords internal -.createGiottoMerscopeObject_subcellular <- function(data_list, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - cores = NA, - verbose = TRUE) { +.createGiottoMerscopeObject_subcellular <- function( + data_list, + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + cores = NA, + verbose = TRUE) { feat_coord <- neg_coord <- cellLabel_dir <- instructions <- NULL # unpack data_list @@ -977,10 +1015,12 @@ createGiottoMerscopeObject <- function(merscope_dir, blank_dt <- tx_dt[gene %in% blank_id, ] # extract transcript_id col and store as feature meta - feat_meta <- unique(feat_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) - blank_meta <- unique(blank_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) + feat_meta <- unique(feat_dt[, c("gene", "transcript_id", "barcode_id"), + with = FALSE + ]) + blank_meta <- unique(blank_dt[, c("gene", "transcript_id", "barcode_id"), + with = FALSE + ]) feat_dt[, c("transcript_id", "barcode_id") := NULL] blank_dt[, c("transcript_id", "barcode_id") := NULL] @@ -1011,13 +1051,14 @@ createGiottoMerscopeObject <- function(merscope_dir, -#' @describeIn createGiottoMerscopeObject Create giotto object with 'aggregate' +#' @describeIn createGiottoMerscopeObject Create giotto object with 'aggregate' #' workflow #' @param data_list list of loaded data from \code{\link{load_merscope_folder}} #' @keywords internal -.createGiottoMerscopeObject_aggregate <- function(data_list, - cores = NA, - verbose = TRUE) { +.createGiottoMerscopeObject_aggregate <- function( + data_list, + cores = NA, + verbose = TRUE) { # unpack data_list micronToPixelScale <- data_list$micronToPixelScale expr_dt <- data_list$expr_dt @@ -1037,14 +1078,15 @@ createGiottoMerscopeObject <- function(merscope_dir, #' @title Create Spatial Genomics Giotto Object #' @name createSpatialGenomicsObject #' @param sg_dir full path to the exported Spatial Genomics directory -#' @param instructions new instructions +#' @param instructions new instructions #' (e.g. result from createGiottoInstructions) #' @returns giotto object #' @description Given the path to a Spatial Genomics data directory, creates a #' Giotto object. #' @export -createSpatialGenomicsObject <- function(sg_dir = NULL, - instructions = NULL) { +createSpatialGenomicsObject <- function( + sg_dir = NULL, + instructions = NULL) { # Find files in Spatial Genomics directory dapi <- list.files(sg_dir, full.names = TRUE, pattern = "DAPI") mask <- list.files(sg_dir, full.names = TRUE, pattern = "mask") @@ -1085,20 +1127,20 @@ createSpatialGenomicsObject <- function(sg_dir = NULL, #' object. #' @param cosmx_dir full path to the exported cosmx directory #' @param data_to_use which type(s) of expression data to build the gobject with -#' Default is \code{'all'} information available. \code{'subcellular'} loads -#' the transcript coordinates only. \code{'aggregate'} loads the provided +#' Default is \code{'all'} information available. \code{'subcellular'} loads +#' the transcript coordinates only. \code{'aggregate'} loads the provided #' aggregated expression matrix. #' @param FOVs field of views to load (only affects subcellular data and images) -#' @param remove_background_polygon try to remove background polygon +#' @param remove_background_polygon try to remove background polygon #' (default: FALSE) #' @param background_algo algorithm to remove background polygon #' @param remove_unvalid_polygons remove unvalid polygons (default: TRUE) #' @inheritParams GiottoClass::createGiottoObjectSubcellular #' @returns a giotto object #' @details -#' [\strong{Expected Directory}] This function generates a giotto object when -#' given a link to a cosmx output directory. It expects the following items -#' within the directory where the \strong{bolded} portions are what this +#' [\strong{Expected Directory}] This function generates a giotto object when +#' given a link to a cosmx output directory. It expects the following items +#' within the directory where the \strong{bolded} portions are what this #' function matches against: #' \itemize{ #' \item{\strong{CellComposite} (folder of images)} @@ -1113,49 +1155,51 @@ createSpatialGenomicsObject <- function(sg_dir = NULL, #' #' [\strong{Workflows}] Workflow to use is accessed through the data_to_use param #' \itemize{ -#' \item{'all' - loads and requires subcellular information from tx_file and +#' \item{'all' - loads and requires subcellular information from tx_file and #' fov_positions_file -#' and also the existing aggregated information +#' and also the existing aggregated information #' (expression, spatial locations, and metadata) #' from exprMat_file and metadata_file.} -#' \item{'subcellular' - loads and requires subcellular information from +#' \item{'subcellular' - loads and requires subcellular information from #' tx_file and #' fov_positions_file only.} -#' \item{'aggregate' - loads and requires the existing aggregate information -#' (expression, spatial locations, and metadata) from exprMat_file and +#' \item{'aggregate' - loads and requires the existing aggregate information +#' (expression, spatial locations, and metadata) from exprMat_file and #' metadata_file.} #' } #' -#' [\strong{Images}] Images in the default CellComposite, CellLabels, +#' [\strong{Images}] Images in the default CellComposite, CellLabels, #' CompartmentLabels, and CellOverlay -#' folders will be loaded as giotto largeImage objects in all workflows as -#' long as they are available. Additionally, CellComposite images will be +#' folders will be loaded as giotto largeImage objects in all workflows as +#' long as they are available. Additionally, CellComposite images will be #' converted to giotto image objects, making plotting with #' these image objects more responsive when accessing them from a server. #' \code{\link{showGiottoImageNames}} can be used to see the available images. #' @export -createGiottoCosMxObject <- function(cosmx_dir = NULL, - data_to_use = c("all", "subcellular", "aggregate"), - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - FOVs = NULL, - instructions = NULL, - cores = determine_cores(), - verbose = TRUE) { +createGiottoCosMxObject <- function( + cosmx_dir = NULL, + data_to_use = c("all", "subcellular", "aggregate"), + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + FOVs = NULL, + instructions = NULL, + cores = determine_cores(), + verbose = TRUE) { # 0. setup cosmx_dir <- path.expand(cosmx_dir) # determine data to use data_to_use <- match.arg( - arg = data_to_use, choices = c("all", "subcellular", "aggregate")) + arg = data_to_use, choices = c("all", "subcellular", "aggregate") + ) if (data_to_use %in% c("all", "aggregate")) { - stop(wrap_txt('Convenience workflows "all" and "aggregate" are not + stop(wrap_txt('Convenience workflows "all" and "aggregate" are not available yet')) } # Define for data.table - fov <- target <- x_local_px <- y_local_px <- z <- cell_ID <- + fov <- target <- x_local_px <- y_local_px <- z <- cell_ID <- CenterX_global_px <- CenterY_global_px <- CenterX_local_px <- CenterY_local_px <- NULL @@ -1219,15 +1263,14 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' @inheritParams createGiottoCosMxObject #' @returns giotto object #' @keywords internal -.createGiottoCosMxObject_subcellular <- function( - dir_items, - FOVs = NULL, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL) { +.createGiottoCosMxObject_subcellular <- function(dir_items, + FOVs = NULL, + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL) { target <- fov <- NULL # load tx detections and FOV offsets ------------------------------------- # @@ -1247,7 +1290,8 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, tx_coord_all[, c("x_global_px", "y_global_px", "cell_ID") := NULL] data.table::setcolorder( - tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov")) + tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov") + ) # feature detection type splitting --------------------------------------- # @@ -1273,13 +1317,17 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, if (isTRUE(verbose)) message("Loading image information...") composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("*", x, "*"))) + dir_items$`CellComposite folder`, paste0("*", x, "*") + )) cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("*", x, "*"))) + dir_items$`CellLabels folder`, paste0("*", x, "*") + )) compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("*", x, "*"))) + dir_items$`CompartmentLabels folder`, paste0("*", x, "*") + )) cellOverlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("*", x, "*"))) + dir_items$`CellOverlay folder`, paste0("*", x, "*") + )) # Missing warnings if (length(composite_dir) == 0) { @@ -1314,11 +1362,15 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, feat_coord <- feat_coords_all[fov == as.numeric(x)] data.table::setnames( - feat_coord, old = coord_oldnames, new = coord_newnames) + feat_coord, + old = coord_oldnames, new = coord_newnames + ) # neg probe info neg_coord <- neg_coords_all[fov == as.numeric(x)] data.table::setnames( - neg_coord, old = coord_oldnames, new = coord_newnames) + neg_coord, + old = coord_oldnames, new = coord_newnames + ) # build giotto object -------------------------------------- # @@ -1344,8 +1396,9 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, # find centroids as spatial locations ---------------------- # - if (isTRUE(verbose)) + if (isTRUE(verbose)) { message("Finding polygon centroids as cell spatial locations...") + } fov_subset <- addSpatialCentroidLocations( fov_subset, poly_info = "cell", @@ -1394,7 +1447,7 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, images = gImage_list ) - # convert to MG for faster loading (particularly relevant for + # convert to MG for faster loading (particularly relevant for # pulling from server) # TODO remove this fov_subset <- convertGiottoLargeImageToMG( @@ -1439,10 +1492,11 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' @inheritParams createGiottoCosMxObject #' @returns giotto object #' @keywords internal -.createGiottoCosMxObject_aggregate <- function(dir_items, - cores, - verbose = TRUE, - instructions = NULL) { +.createGiottoCosMxObject_aggregate <- function( + dir_items, + cores, + verbose = TRUE, + instructions = NULL) { data_to_use <- fov <- NULL data_list <- .load_cosmx_folder_aggregate( @@ -1479,19 +1533,25 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, # load in images img_ID <- data.table::data.table( fov = fov_shifts[, fov], - img_name = paste0("fov", - sprintf("%03d", fov_shifts[, fov]), "-image") + img_name = paste0( + "fov", + sprintf("%03d", fov_shifts[, fov]), "-image" + ) ) if (isTRUE(verbose)) message("Attaching image files...") composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("/*"))) + dir_items$`CellComposite folder`, paste0("/*") + )) cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("/*"))) + dir_items$`CellLabels folder`, paste0("/*") + )) compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("/*"))) + dir_items$`CompartmentLabels folder`, paste0("/*") + )) overlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("/*"))) + dir_items$`CellOverlay folder`, paste0("/*") + )) if (length(cellLabel_imgList) > 0) { cellLabel_imgList <- lapply(cellLabel_dir, function(x) { @@ -1506,8 +1566,9 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, if (length(compartmentLabel_dir) > 0) { compartmentLabel_imgList <- lapply( compartmentLabel_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) - }) + createGiottoLargeImage(x, name = "composite", negative_y = TRUE) + } + ) } if (length(overlay_dir) > 0) { overlay_imgList <- lapply(overlay_dir, function(x) { @@ -1520,30 +1581,31 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, -#' @title Load and create a CosMx Giotto object from subcellular and aggregate +#' @title Load and create a CosMx Giotto object from subcellular and aggregate #' info #' @name .createGiottoCosMxObject_all #' @param dir_items list of full directory paths from \code{.read_cosmx_folder} #' @inheritParams createGiottoCosMxObject #' @returns giotto object -#' @details Both \emph{subcellular} +#' @details Both \emph{subcellular} #' (subellular transcript detection information) and -#' \emph{aggregate} (aggregated detection count matrices by cell polygon from +#' \emph{aggregate} (aggregated detection count matrices by cell polygon from #' NanoString) #' data will be loaded in. The two will be separated into 'cell' and 'cell_agg' #' spatial units in order to denote the difference in origin of the two. #' @seealso createGiottoCosMxObject .createGiottoCosMxObject_aggregate #' .createGiottoCosMxObject_subcellular #' @keywords internal -.createGiottoCosMxObject_all <- function(dir_items, - FOVs, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL, - ...) { +.createGiottoCosMxObject_all <- function( + dir_items, + FOVs, + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL, + ...) { # 1. create subcellular giotto as spat_unit 'cell' cosmx_gobject <- .createGiottoCosMxObject_subcellular( dir_items = dir_items, @@ -1570,15 +1632,18 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, protM <- agg_data$protM spM <- agg_data$spM - # add in pre-generated aggregated expression matrix information for 'all' + # add in pre-generated aggregated expression matrix information for 'all' # workflow # Add aggregate expression information - if (isTRUE(verbose)) wrap_msg( - 'Appending provided aggregate expression data as... + if (isTRUE(verbose)) { + wrap_msg( + 'Appending provided aggregate expression data as... spat_unit: "cell_agg" feat_type: "rna" - name: "raw"') + name: "raw"' + ) + } # add expression data to expression slot s4_expr <- createExprObj( name = "raw", @@ -1591,13 +1656,19 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, cosmx_gobject <- set_expression_values(cosmx_gobject, values = s4_expr) # Add spatial locations - if (isTRUE(verbose)) wrap_msg( - 'Appending metadata provided spatial locations data as... + if (isTRUE(verbose)) { + wrap_msg( + 'Appending metadata provided spatial locations data as... --> spat_unit: "cell_agg" name: "raw" - --> spat_unit: "cell" name: "raw_fov"') - if (isTRUE(verbose)) wrap_msg( - 'Polygon centroid derived spatial locations assigned as... - --> spat_unit: "cell" name: "raw" (default)') + --> spat_unit: "cell" name: "raw_fov"' + ) + } + if (isTRUE(verbose)) { + wrap_msg( + 'Polygon centroid derived spatial locations assigned as... + --> spat_unit: "cell" name: "raw" (default)' + ) + } locsObj <- create_spat_locs_obj( name = "raw", @@ -1613,8 +1684,9 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, ) cosmx_gobject <- set_spatial_locations(cosmx_gobject, spatlocs = locsObj) - cosmx_gobject <- set_spatial_locations(cosmx_gobject, - spatlocs = locsObj_fov) + cosmx_gobject <- set_spatial_locations(cosmx_gobject, + spatlocs = locsObj_fov + ) # initialize cell and feat IDs and metadata slots for 'cell_agg' spat_unit agg_cell_ID <- colnames(s4_expr[]) @@ -1658,24 +1730,24 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' @title Create 10x Xenium Giotto Object #' @name createGiottoXeniumObject -#' @description Given the path to a Xenium experiment output folder, creates a +#' @description Given the path to a Xenium experiment output folder, creates a #' Giotto object #' @param xenium_dir full path to the exported xenium directory #' @param data_to_use which type(s) of expression data to build the gobject with #' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') #' @param load_format files formats from which to load the data. Either `csv` or #' `parquet` currently supported. -#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 +#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 #' file. Default is \code{TRUE} #' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene #' expression matrix -#' @param bounds_to_load vector of boundary information to load +#' @param bounds_to_load vector of boundary information to load #' (e.g. \code{'cell'} #' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both #' at the same time.) -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included #' as a subcellular transcript detection (default = 20) -#' @param key_list (advanced) list of grep-based keywords to split the +#' @param key_list (advanced) list of grep-based keywords to split the #' subcellular feature detections by feature type. See details #' @inheritParams get10Xmatrix #' @inheritParams GiottoClass::createGiottoObjectSubcellular @@ -1686,20 +1758,20 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' Xenium provides info on feature detections that include more than only the #' Gene Expression specific probes. Additional probes for QC are included: #' \emph{blank codeword}, \emph{negative control codeword}, and -#' \emph{negative control probe}. These additional QC probes each occupy and -#' are treated as their own feature types so that they can largely remain +#' \emph{negative control probe}. These additional QC probes each occupy and +#' are treated as their own feature types so that they can largely remain #' independent of the gene expression information. #' #' [\strong{key_list}] #' Related to \code{data_to_use = 'subcellular'} workflow only: -#' Additional QC probe information is in the subcellular feature detections -#' information and must be separated from the gene expression information +#' Additional QC probe information is in the subcellular feature detections +#' information and must be separated from the gene expression information #' during processing. -#' The QC probes have prefixes that allow them to be selected from the rest of +#' The QC probes have prefixes that allow them to be selected from the rest of #' the feature IDs. -#' Giotto uses a named list of keywords (\code{key_list}) to select these QC -#' probes, with the list names being the names that will be assigned as the -#' feature type of these feature detections. The default list is used when +#' Giotto uses a named list of keywords (\code{key_list}) to select these QC +#' probes, with the list names being the names that will be assigned as the +#' feature type of these feature detections. The default list is used when #' \code{key_list} = NULL. #' #' Default list: @@ -1713,30 +1785,33 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, #' map to any of the keys. #' #' @export -createGiottoXeniumObject <- function(xenium_dir, - data_to_use = c("subcellular", "aggregate"), - load_format = "csv", - h5_expression = TRUE, - h5_gene_ids = c("symbols", "ensembl"), - gene_column_index = 1, - bounds_to_load = c("cell"), - qv_threshold = 20, - key_list = NULL, - instructions = NULL, - cores = NA, - verbose = TRUE) { +createGiottoXeniumObject <- function( + xenium_dir, + data_to_use = c("subcellular", "aggregate"), + load_format = "csv", + h5_expression = TRUE, + h5_gene_ids = c("symbols", "ensembl"), + gene_column_index = 1, + bounds_to_load = c("cell"), + qv_threshold = 20, + key_list = NULL, + instructions = NULL, + cores = NA, + verbose = TRUE) { # 0. setup xenium_dir <- path.expand(xenium_dir) # Determine data to load data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) + arg = data_to_use, choices = c("subcellular", "aggregate") + ) # Determine load formats - load_format <- "csv" # TODO Remove this and add as param once other options + load_format <- "csv" # TODO Remove this and add as param once other options # are available load_format <- match.arg( - arg = load_format, choices = c("csv", "parquet", "zarr")) + arg = load_format, choices = c("csv", "parquet", "zarr") + ) # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) @@ -1837,19 +1912,20 @@ createGiottoXeniumObject <- function(xenium_dir, #' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} #' @param key_list regex-based search keys for feature IDs to allow separation #' into separate giottoPoints objects by feat_type -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included #' as a subcellular transcript detection (default = 20) #' @inheritParams get10Xmatrix #' @inheritParams GiottoClass::createGiottoObjectSubcellular #' @returns giotto object #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate #' @keywords internal -.createGiottoXeniumObject_subcellular <- function(data_list, - key_list = NULL, - qv_threshold = 20, - instructions = NULL, - cores = NA, - verbose = TRUE) { +.createGiottoXeniumObject_subcellular <- function( + data_list, + key_list = NULL, + qv_threshold = 20, + instructions = NULL, + cores = NA, + verbose = TRUE) { # data.table vars qv <- NULL @@ -1866,8 +1942,10 @@ createGiottoXeniumObject <- function(xenium_dir, vmsg("> points data prep...", .v = verbose) # filter by qv_threshold - vmsg("> filtering feature detections for Phred score >= ", - qv_threshold, .v = verbose) + vmsg("> filtering feature detections for Phred score >= ", + qv_threshold, + .v = verbose + ) n_before <- tx_dt[, .N] tx_dt_filtered <- tx_dt[qv >= qv_threshold] n_after <- tx_dt_filtered[, .N] @@ -1884,7 +1962,8 @@ createGiottoXeniumObject <- function(xenium_dir, # discover feat_IDs for each feat_type all_IDs <- tx_dt_filtered[, unique(feat_ID)] feat_types_IDs <- lapply( - key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) + key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)] + ) rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) feat_types_IDs <- append(rna, feat_types_IDs) @@ -1937,11 +2016,12 @@ createGiottoXeniumObject <- function(xenium_dir, #' @returns giotto object #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular #' @keywords internal -.createGiottoXeniumObject_aggregate <- function(data_list, - # include_analysis = FALSE, - instructions = NULL, - cores = NA, - verbose = TRUE) { +.createGiottoXeniumObject_aggregate <- function( + data_list, + # include_analysis = FALSE, + instructions = NULL, + cores = NA, + verbose = TRUE) { # Unpack data_list info feat_meta <- data_list$feat_meta cell_meta <- data_list$cell_meta @@ -2002,10 +2082,11 @@ createGiottoXeniumObject <- function(xenium_dir, #' @describeIn read_data_folder Read a structured MERSCOPE folder #' @keywords internal -.read_merscope_folder <- function(merscope_dir, - data_to_use, - cores = NA, - verbose = NULL) { +.read_merscope_folder <- function( + merscope_dir, + data_to_use, + cores = NA, + verbose = NULL) { # prepare dir_items list dir_items <- list( `boundary info` = "*cell_boundaries*", @@ -2064,12 +2145,14 @@ createGiottoXeniumObject <- function(xenium_dir, #' @returns path_list a list of cosmx files discovered and their filepaths. NULL #' values denote missing items #' @keywords internal -.read_cosmx_folder <- function(cosmx_dir, - verbose = TRUE) { +.read_cosmx_folder <- function( + cosmx_dir, + verbose = TRUE) { ch <- box_chars() - if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) + if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) { stop("The full path to a cosmx directory must be given.") + } vmsg("A structured CosMx directory will be used\n", .v = verbose) # find directories (length = 1 if present, length = 0 if missing) @@ -2084,7 +2167,8 @@ createGiottoXeniumObject <- function(xenium_dir, `metadata file` = "*metadata_file*" ) dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x))) + dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x)) + ) dir_items_lengths <- lengths(dir_items) if (isTRUE(verbose)) { @@ -2100,7 +2184,7 @@ createGiottoXeniumObject <- function(xenium_dir, # select first directory in list if multiple are detected if (any(dir_items_lengths > 1)) { - warning("Multiple matches for expected subdirectory item(s).\n + warning("Multiple matches for expected subdirectory item(s).\n First matching item selected") multiples <- which(dir_items_lengths > 1) @@ -2124,12 +2208,13 @@ createGiottoXeniumObject <- function(xenium_dir, #' @keywords internal #' @returns path_list a list of xenium files discovered and their filepaths. NULL #' values denote missing items -.read_xenium_folder <- function(xenium_dir, - data_to_use = "subcellular", - bounds_to_load = c("cell"), - load_format = "csv", - h5_expression = FALSE, - verbose = TRUE) { +.read_xenium_folder <- function( + xenium_dir, + data_to_use = "subcellular", + bounds_to_load = c("cell"), + load_format = "csv", + h5_expression = FALSE, + verbose = TRUE) { # Check needed packages if (load_format == "parquet") { package_check(pkg_name = "arrow", repository = "CRAN") @@ -2145,8 +2230,9 @@ createGiottoXeniumObject <- function(xenium_dir, # 0. test if folder structure exists and is as expected - if (is.null(xenium_dir) | !dir.exists(xenium_dir)) + if (is.null(xenium_dir) | !dir.exists(xenium_dir)) { stop("The full path to a xenium directory must be given.") + } vmsg("A structured Xenium directory will be used\n", .v = verbose) # find items (length = 1 if present, length = 0 if missing) @@ -2162,7 +2248,8 @@ createGiottoXeniumObject <- function(xenium_dir, ) dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x))) + dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x)) + ) dir_items_lengths <- lengths(dir_items) if (isTRUE(verbose)) { @@ -2172,10 +2259,12 @@ createGiottoXeniumObject <- function(xenium_dir, if (dir_items_lengths[[item]] > 0) { message(ch$s, "> ", item, " found") - for (item_i in seq_along(dir_items[[item]])) { + for (item_i in seq_along(dir_items[[item]])) { # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) + subItem <- gsub( + pattern = ".*/", replacement = "", + x = dir_items[[item]][[item_i]] + ) message(ch$s, ch$s, ch$l, ch$h, ch$h, subItem) } } else { @@ -2186,24 +2275,30 @@ createGiottoXeniumObject <- function(xenium_dir, if (data_to_use == "subcellular") { # necessary items - if (item %in% c("boundary info", "raw transcript info")) + if (item %in% c("boundary info", "raw transcript info")) { stop(item, " is missing") + } # optional items if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata")) + "image info", "experiment info (.xenium)", + "panel metadata" + )) { warning(item, " is missing (optional)") - # items to ignore: analysis info, cell feature matrix, + } + # items to ignore: analysis info, cell feature matrix, # cell metadata } else if (data_to_use == "aggregate") { # necessary items - if (item %in% c("cell feature matrix", "cell metadata")) + if (item %in% c("cell feature matrix", "cell metadata")) { stop(item, " is missing") + } # optional items if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata", "analysis info")) + "image info", "experiment info (.xenium)", + "panel metadata", "analysis info" + )) { warning(item, " is missing (optional)") + } # items to ignore: boundary info, raw transcript info } } @@ -2217,45 +2312,55 @@ createGiottoXeniumObject <- function(xenium_dir, # **** transcript info **** tx_path <- NULL tx_path <- dir_items$`raw transcript info`[grepl( - pattern = load_format, dir_items$`raw transcript info`)] + pattern = load_format, dir_items$`raw transcript info` + )] # **** cell metadata **** cell_meta_path <- NULL cell_meta_path <- dir_items$`cell metadata`[grepl( - pattern = load_format, dir_items$`cell metadata`)] + pattern = load_format, dir_items$`cell metadata` + )] # **** boundary info **** # Select bound load format if (load_format != "zarr") { # No zarr available for boundary info dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = load_format, dir_items$`boundary info`)] + pattern = load_format, dir_items$`boundary info` + )] } else { dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = "csv", dir_items$`boundary info`)] + pattern = "csv", dir_items$`boundary info` + )] } # Organize bound paths by type of bound (bounds_to_load param) bound_paths <- NULL bound_names <- bounds_to_load bounds_to_load <- as.list(bounds_to_load) - bound_paths <- lapply(bounds_to_load, function(x) dir_items$`boundary info`[ - grepl(pattern = x, dir_items$`boundary info`)]) + bound_paths <- lapply(bounds_to_load, function(x) { + dir_items$`boundary info`[ + grepl(pattern = x, dir_items$`boundary info`) + ] + }) names(bound_paths) <- bound_names # **** aggregated expression info **** agg_expr_path <- NULL if (isTRUE(h5_expression)) { # h5 expression matrix loading is default agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "h5", dir_items$`cell feature matrix`)] + pattern = "h5", dir_items$`cell feature matrix` + )] } else if (load_format == "zarr") { agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "zarr", dir_items$`cell feature matrix`)] + pattern = "zarr", dir_items$`cell feature matrix` + )] } else { # No parquet for aggregated expression - default to normal 10x loading agg_expr_path <- dir_items$`cell feature matrix`[sapply( - dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x))] + dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x) + )] if (length(agg_expr_path) == 0) { stop(wrap_txt( "Expression matrix cannot be loaded.\n - Has cell_feature_matrix(.tar.gz) been unpacked into a + Has cell_feature_matrix(.tar.gz) been unpacked into a directory?" )) } @@ -2300,7 +2405,7 @@ createGiottoXeniumObject <- function(xenium_dir, #' @title Load MERSCOPE data from folder #' @name load_merscope_folder -#' @param dir_items list of full filepaths from +#' @param dir_items list of full filepaths from #' \code{\link{.read_merscope_folder}} #' @inheritParams createGiottoMerscopeObject #' @returns list of loaded-in MERSCOPE data @@ -2308,12 +2413,13 @@ NULL #' @rdname load_merscope_folder #' @keywords internal -.load_merscope_folder <- function(dir_items, - data_to_use, - fovs = NULL, - poly_z_indices = 1L:7L, - cores = NA, - verbose = TRUE) { +.load_merscope_folder <- function( + dir_items, + data_to_use, + fovs = NULL, + poly_z_indices = 1L:7L, + cores = NA, + verbose = TRUE) { # 1. load data_to_use-specific if (data_to_use == "subcellular") { data_list <- .load_merscope_folder_subcellular( @@ -2332,17 +2438,22 @@ NULL verbose = verbose ) } else { - stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) + stop(wrap_txt('data_to_use "', data_to_use, + '" not implemented', + sep = "" + )) } # 2. Load images if available if (!is.null(dir_items$`image info`)) { ## micron to px scaling factor micronToPixelScale <- Sys.glob(paths = file.path( - dir_items$`image info`, "*micron_to_mosaic_pixel_transform*"))[[1]] + dir_items$`image info`, "*micron_to_mosaic_pixel_transform*" + ))[[1]] micronToPixelScale <- data.table::fread( - micronToPixelScale, nThread = cores) + micronToPixelScale, + nThread = cores + ) # add to data_list data_list$micronToPixelScale <- micronToPixelScale @@ -2350,14 +2461,17 @@ NULL ## determine types of stains images_filenames <- list.files(dir_items$`image info`) bound_stains_filenames <- images_filenames[ - grep(pattern = ".tif", images_filenames)] + grep(pattern = ".tif", images_filenames) + ] bound_stains_types <- sapply(strsplit( - bound_stains_filenames, "_"), `[`, 2) + bound_stains_filenames, "_" + ), `[`, 2) bound_stains_types <- unique(bound_stains_types) img_list <- lapply_flex(bound_stains_types, function(stype) { img_paths <- Sys.glob(paths = file.path( - dir_items$`image info`, paste0("*", stype, "*"))) + dir_items$`image info`, paste0("*", stype, "*") + )) lapply_flex(img_paths, function(img) { createGiottoLargeImage(raster_object = img) @@ -2376,16 +2490,19 @@ NULL #' @describeIn load_merscope_folder Load items for 'subcellular' workflow #' @keywords internal -.load_merscope_folder_subcellular <- function(dir_items, - data_to_use, - cores = NA, - poly_z_indices = 1L:7L, - verbose = TRUE, - fovs = NULL) { +.load_merscope_folder_subcellular <- function( + dir_items, + data_to_use, + cores = NA, + poly_z_indices = 1L:7L, + verbose = TRUE, + fovs = NULL) { if (isTRUE(verbose)) message("Loading transcript level info...") if (is.null(fovs)) { tx_dt <- data.table::fread( - dir_items$`raw transcript info`, nThread = cores) + dir_items$`raw transcript info`, + nThread = cores + ) } else { message("Selecting FOV subset transcripts") tx_dt <- fread_colmatch( @@ -2398,7 +2515,8 @@ NULL } tx_dt[, c("x", "y") := NULL] # remove unneeded cols data.table::setcolorder( - tx_dt, c("gene", "global_x", "global_y", "global_z")) + tx_dt, c("gene", "global_x", "global_y", "global_z") + ) if (isTRUE(verbose)) message("Loading polygon info...") poly_info <- readPolygonFilesVizgenHDF5( @@ -2422,18 +2540,23 @@ NULL #' @describeIn load_merscope_folder Load items for 'aggregate' workflow #' @keywords internal -.load_merscope_folder_aggregate <- function(dir_items, - data_to_use, - cores = NA, - verbose = TRUE) { +.load_merscope_folder_aggregate <- function( + dir_items, + data_to_use, + cores = NA, + verbose = TRUE) { # metadata is polygon-related measurements vmsg("Loading cell metadata...", .v = verbose) cell_metadata_file <- data.table::fread( - dir_items$`cell metadata`, nThread = cores) + dir_items$`cell metadata`, + nThread = cores + ) vmsg("Loading expression matrix", .v = verbose) expr_dt <- data.table::fread( - dir_items$`cell feature matrix`, nThread = cores) + dir_items$`cell feature matrix`, + nThread = cores + ) data_list <- list( @@ -2457,15 +2580,16 @@ NULL #' @title Load CosMx folder subcellular info #' @name .load_cosmx_folder_subcellular #' @description loads in the feature detections information. Note that the mask -#' images are still required for a working subcellular object, and those are +#' images are still required for a working subcellular object, and those are #' loaded in \code{\link{.createGiottoCosMxObject_subcellular}} #' @inheritParams createGiottoCosMxObject #' @returns list #' @keywords internal -.load_cosmx_folder_subcellular <- function(dir_items, - FOVs = NULL, - cores, - verbose = TRUE) { +.load_cosmx_folder_subcellular <- function( + dir_items, + FOVs = NULL, + cores, + verbose = TRUE) { vmsg(.v = verbose, "Loading subcellular information...") # subcellular checks @@ -2479,7 +2603,8 @@ NULL # FOVs to load vmsg(.v = verbose, "Loading FOV offsets...") fov_offset_file <- fread( - input = dir_items$`fov positions file`, nThread = cores) + input = dir_items$`fov positions file`, nThread = cores + ) if (is.null(FOVs)) FOVs <- fov_offset_file$fov # default to ALL FOVs FOV_ID <- as.list(sprintf("%03d", FOVs)) @@ -2487,7 +2612,8 @@ NULL vmsg(.v = verbose, "Loading transcript level info...") tx_coord_all <- fread( - input = dir_items$`transcript locations file`, nThread = cores) + input = dir_items$`transcript locations file`, nThread = cores + ) vmsg(.v = verbose, "Subcellular load done") data_list <- list( @@ -2506,11 +2632,12 @@ NULL #' @inheritParams createGiottoCosMxObject #' @returns list #' @keywords internal -.load_cosmx_folder_aggregate <- function(dir_items, - cores, - verbose = TRUE) { +.load_cosmx_folder_aggregate <- function( + dir_items, + cores, + verbose = TRUE) { # data.table vars - fov <- cell_ID <- fov_cell_ID <- CenterX_global_px <- + fov <- cell_ID <- fov_cell_ID <- CenterX_global_px <- CenterY_global_px <- CenterX_local_px <- CenterY_local_px <- x_shift <- y_shift <- NULL @@ -2518,15 +2645,18 @@ NULL vmsg(.v = verbose, "Loading provided aggregated information...") # aggregate checks - if (!file.exists(dir_items$`expression matrix file`)) + if (!file.exists(dir_items$`expression matrix file`)) { stop(wrap_txt("No expression matrix file (.csv) detected")) - if (!file.exists(dir_items$`metadata file`)) - stop(wrap_txt("No metadata file (.csv) detected. Needed for cell + } + if (!file.exists(dir_items$`metadata file`)) { + stop(wrap_txt("No metadata file (.csv) detected. Needed for cell spatial locations.")) + } # read in aggregate data expr_mat <- fread( - input = dir_items$`expression matrix file`, nThread = cores) + input = dir_items$`expression matrix file`, nThread = cores + ) metadata <- fread(input = dir_items$`metadata file`, nThread = cores) # setorder expression and spatlocs @@ -2536,12 +2666,14 @@ NULL # generate unique cell IDs expr_mat[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID + )] expr_mat <- expr_mat[, fov := NULL] metadata[, fov_cell_ID := cell_ID] metadata[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID + )] # reorder data.table::setcolorder(x = metadata, c("cell_ID", "fov", "fov_cell_ID")) @@ -2562,11 +2694,15 @@ NULL spatloc_newnames <- c("sdimx", "sdimy", "cell_ID") data.table::setnames(spatlocs, old = spatloc_oldnames, new = spatloc_newnames) data.table::setnames( - spatlocs_fov, old = spatloc_oldnames_fov, new = spatloc_newnames) + spatlocs_fov, + old = spatloc_oldnames_fov, new = spatloc_newnames + ) # cleanup metadata and spatlocs - metadata <- metadata[, c("CenterX_global_px", "CenterY_global_px", - "CenterX_local_px", "CenterY_local_px") := NULL] + metadata <- metadata[, c( + "CenterX_global_px", "CenterY_global_px", + "CenterX_local_px", "CenterY_local_px" + ) := NULL] # find unique cell_IDs present in both expression and metadata giotto_cell_ID <- unique(intersect(expr_mat$cell_ID, metadata$cell_ID)) @@ -2579,27 +2715,35 @@ NULL # take all mean intensity protein information except for MembraneStain and DAPI protein_meta_cols <- colnames(metadata) protein_meta_cols <- protein_meta_cols[ - grepl(pattern = "Mean.*", x = protein_meta_cols)] + grepl(pattern = "Mean.*", x = protein_meta_cols) + ] protein_meta_cols <- protein_meta_cols[ - !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI")] + !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI") + ] protein_meta_cols <- c("cell_ID", protein_meta_cols) prot_expr <- metadata[, protein_meta_cols, with = FALSE] prot_cell_ID <- metadata[, cell_ID] - protM <- Matrix::Matrix(as.matrix(prot_expr[, -1]), - dimnames = list(prot_expr[[1]], - colnames(prot_expr[, -1])), - sparse = FALSE) + protM <- Matrix::Matrix(as.matrix(prot_expr[, -1]), + dimnames = list( + prot_expr[[1]], + colnames(prot_expr[, -1]) + ), + sparse = FALSE + ) protM <- t_flex(protM) # convert expression to sparse matrix - spM <- Matrix::Matrix(as.matrix(expr_mat[, -1]), - dimnames = list(expr_mat[[1]], - colnames(expr_mat[, -1])), - sparse = TRUE) + spM <- Matrix::Matrix(as.matrix(expr_mat[, -1]), + dimnames = list( + expr_mat[[1]], + colnames(expr_mat[, -1]) + ), + sparse = TRUE + ) spM <- t_flex(spM) - ## Ready for downstream aggregate gobject creation or appending into + ## Ready for downstream aggregate gobject creation or appending into # existing subcellular Giotto object ## data_list <- list( @@ -2631,14 +2775,15 @@ NULL #' @rdname load_xenium_folder #' @keywords internal -.load_xenium_folder <- function(path_list, - load_format = "csv", - data_to_use = "subcellular", - h5_expression = "FALSE", - h5_gene_ids = "symbols", - gene_column_index = 1, - cores, - verbose = TRUE) { +.load_xenium_folder <- function( + path_list, + load_format = "csv", + data_to_use = "subcellular", + h5_expression = "FALSE", + h5_gene_ids = "symbols", + gene_column_index = 1, + cores, + verbose = TRUE) { data_list <- switch(load_format, "csv" = .load_xenium_folder_csv( path_list = path_list, @@ -2667,13 +2812,14 @@ NULL #' @describeIn load_xenium_folder Load from csv files #' @keywords internal -.load_xenium_folder_csv <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { +.load_xenium_folder_csv <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE) { # initialize return vars feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL @@ -2682,8 +2828,10 @@ NULL fdata_path <- path_list$panel_meta_path[[1]] fdata_ext <- GiottoUtils::file_extension(fdata_path) if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json(path = fdata_path, - gene_ids = h5_gene_ids) + feat_meta <- .load_xenium_panel_json( + path = fdata_path, + gene_ids = h5_gene_ids + ) } else { feat_meta <- data.table::fread(fdata_path, nThread = cores) colnames(feat_meta)[[1]] <- "feat_ID" @@ -2715,7 +2863,9 @@ NULL } colnames(features_dt) <- c("id", "feat_ID", "feat_class") feat_meta <- merge( - features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + features_dt[, c(2, 3)], feat_meta, + all.x = TRUE, by = "feat_ID" + ) GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) @@ -2735,7 +2885,9 @@ NULL # **** aggregate info **** GiottoUtils::vmsg("loading cell metadata...", .v = verbose) cell_meta <- data.table::fread( - path_list$cell_meta_path[[1]], nThread = cores) + path_list$cell_meta_path[[1]], + nThread = cores + ) if (data_to_use == "aggregate") { GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) @@ -2772,13 +2924,14 @@ NULL #' @describeIn load_xenium_folder Load from parquet files #' @keywords internal -.load_xenium_folder_parquet <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { +.load_xenium_folder_parquet <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE) { # initialize return vars feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL # dplyr variable @@ -2790,7 +2943,8 @@ NULL fdata_ext <- GiottoUtils::file_extension(fdata_path) if ("json" %in% fdata_ext) { feat_meta <- .load_xenium_panel_json( - path = fdata_path, gene_ids = h5_gene_ids) + path = fdata_path, gene_ids = h5_gene_ids + ) } else { feat_meta <- data.table::fread(fdata_path, nThread = cores) colnames(feat_meta)[[1]] <- "feat_ID" @@ -2818,15 +2972,18 @@ NULL h5$close_all() }) } else { - features_dt <- arrow::read_tsv_arrow(paste0( - path_list$agg_expr_path, "/features.tsv.gz"), + features_dt <- arrow::read_tsv_arrow( + paste0( + path_list$agg_expr_path, "/features.tsv.gz" + ), col_names = FALSE ) %>% data.table::setDT() } colnames(features_dt) <- c("id", "feat_ID", "feat_class") feat_meta <- merge(features_dt[ - , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + , c(2, 3) + ], feat_meta, all.x = TRUE, by = "feat_ID") vmsg("Loading transcript level info...", .v = verbose) tx_dt <- arrow::read_parquet( @@ -2834,10 +2991,12 @@ NULL as_data_frame = FALSE ) %>% dplyr::mutate( - transcript_id = cast(transcript_id, arrow::string())) %>% + transcript_id = cast(transcript_id, arrow::string()) + ) %>% dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% dplyr::mutate( - feature_name = cast(feature_name, arrow::string())) %>% + feature_name = cast(feature_name, arrow::string()) + ) %>% as.data.frame() %>% data.table::setDT() data.table::setnames( @@ -2939,47 +3098,48 @@ NULL #' (ii) fragment files, or (iii) bam files. #' @param genome A string indicating the default genome to be used for all ArchR #' functions. Currently supported values include "hg19","hg38","mm9", and "mm10". -#' This value is stored as a global environment variable, not part of the +#' This value is stored as a global environment variable, not part of the #' ArchRProject. #' This can be overwritten on a per-function basis using the given function's #' geneAnnotationand genomeAnnotation parameter. For something other than one of -#' the currently supported, see createGeneAnnnotation() and +#' the currently supported, see createGeneAnnnotation() and #' createGenomeAnnnotation() -#' @param createArrowFiles_params list of parameters passed to +#' @param createArrowFiles_params list of parameters passed to #' `ArchR::createArrowFiles` #' @param ArchRProject_params list of parameters passed to `ArchR::ArchRProject` -#' @param addIterativeLSI_params list of parameters passed to +#' @param addIterativeLSI_params list of parameters passed to #' `ArchR::addIterativeLSI` #' @param threads number of threads to use. Default = `ArchR::getArchRThreads()` #' @param force Default = FALSE #' @param verbose Default = TRUE #' -#' @returns An ArchR project with GeneScoreMatrix, TileMatrix, and +#' @returns An ArchR project with GeneScoreMatrix, TileMatrix, and #' TileMatrix-based LSI #' @export -createArchRProj <- function(fragmentsPath, - genome = c("hg19", "hg38", "mm9", "mm10"), - createArrowFiles_params = list( - sampleNames = "sample1", - minTSS = 0, - minFrags = 0, - maxFrags = 1e+07, - minFragSize = 10, - maxFragSize = 2000, - offsetPlus = 0, - offsetMinus = 0, - TileMatParams = list(tileSize = 5000) - ), - ArchRProject_params = list( - outputDirectory = getwd(), - copyArrows = FALSE - ), - addIterativeLSI_params = list(), - threads = ArchR::getArchRThreads(), - force = FALSE, - verbose = TRUE) { +createArchRProj <- function( + fragmentsPath, + genome = c("hg19", "hg38", "mm9", "mm10"), + createArrowFiles_params = list( + sampleNames = "sample1", + minTSS = 0, + minFrags = 0, + maxFrags = 1e+07, + minFragSize = 10, + maxFragSize = 2000, + offsetPlus = 0, + offsetMinus = 0, + TileMatParams = list(tileSize = 5000) + ), + ArchRProject_params = list( + outputDirectory = getwd(), + copyArrows = FALSE + ), + addIterativeLSI_params = list(), + threads = ArchR::getArchRThreads(), + force = FALSE, + verbose = TRUE) { if (!requireNamespace("ArchR")) { - message('ArchR is needed. Install the package using + message('ArchR is needed. Install the package using remotes::install_github("GreenleafLab/ArchR")') } @@ -3029,27 +3189,32 @@ createArchRProj <- function(fragmentsPath, #' @param archRproj ArchR project #' @param expression expression information #' @param expression_feat Giotto object available features (e.g. atac, rna, ...) -#' @param spatial_locs data.table or data.frame with coordinates for cell +#' @param spatial_locs data.table or data.frame with coordinates for cell #' centroids -#' @param sampleNames A character vector containing the ArchR project sample +#' @param sampleNames A character vector containing the ArchR project sample #' name #' @param ... additional arguments passed to `createGiottoObject` #' #' @returns A Giotto object with at least an atac or epigenetic modality #' #' @export -createGiottoObjectfromArchR <- function(archRproj, - expression = NULL, - expression_feat = "atac", - spatial_locs = NULL, - sampleNames = "sample1", - ...) { +createGiottoObjectfromArchR <- function( + archRproj, + expression = NULL, + expression_feat = "atac", + spatial_locs = NULL, + sampleNames = "sample1", + ...) { # extract GeneScoreMatrix GeneScoreMatrix_summarizedExperiment <- ArchR::getMatrixFromProject( - archRproj) - GeneScoreMatrix <- slot(slot( - GeneScoreMatrix_summarizedExperiment, "assays"), - "data")[["GeneScoreMatrix"]] + archRproj + ) + GeneScoreMatrix <- slot( + slot( + GeneScoreMatrix_summarizedExperiment, "assays" + ), + "data" + )[["GeneScoreMatrix"]] ## get cell names cell_names <- colnames(GeneScoreMatrix) @@ -3057,8 +3222,10 @@ createGiottoObjectfromArchR <- function(archRproj, cell_names <- gsub("-1", "", cell_names) ## get gene names - gene_names <- slot(GeneScoreMatrix_summarizedExperiment, - "elementMetadata")[["name"]] + gene_names <- slot( + GeneScoreMatrix_summarizedExperiment, + "elementMetadata" + )[["name"]] ## replace colnames with cell names colnames(GeneScoreMatrix) <- cell_names diff --git a/R/cross_section.R b/R/cross_section.R index 5d0dcb0d9..fbf51fd86 100644 --- a/R/cross_section.R +++ b/R/cross_section.R @@ -31,20 +31,21 @@ #' @param cell_subset_projection_coords 2D PCA coordinates of selected cells #' in the cross section plane #' @returns crossSection object -create_crossSection_object <- function(name = NULL, - method = NULL, - thickness_unit = NULL, - slice_thickness = NULL, - cell_distance_estimate_method = NULL, - extend_ratio = NULL, - plane_equation = NULL, - mesh_grid_n = NULL, - mesh_obj = NULL, - cell_subset = NULL, - cell_subset_spatial_locations = NULL, - cell_subset_projection_locations = NULL, - cell_subset_projection_PCA = NULL, - cell_subset_projection_coords = NULL) { +create_crossSection_object <- function( + name = NULL, + method = NULL, + thickness_unit = NULL, + slice_thickness = NULL, + cell_distance_estimate_method = NULL, + extend_ratio = NULL, + plane_equation = NULL, + mesh_grid_n = NULL, + mesh_obj = NULL, + cell_subset = NULL, + cell_subset_spatial_locations = NULL, + cell_subset_projection_locations = NULL, + cell_subset_projection_PCA = NULL, + cell_subset_projection_coords = NULL) { crossSection_obj <- list( "method" = method, "thickness_unit" = thickness_unit, @@ -69,11 +70,11 @@ create_crossSection_object <- function(name = NULL, #' @param spatial_network_name spatial_network_name #' @returns crossSectionObjects #' @keywords internal -read_crossSection <- function(gobject, - spat_unit = NULL, - name = NULL, - spatial_network_name = NULL) { - +read_crossSection <- function( + gobject, + spat_unit = NULL, + name = NULL, + spatial_network_name = NULL) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -108,8 +109,10 @@ read_crossSection <- function(gobject, } if (!name %in% names(cs_list)) { - stop(sprintf("crossSectionObject '%s' has not been created.", - name)) + stop(sprintf( + "crossSectionObject '%s' has not been created.", + name + )) } crossSection_obj <- cs_list[[name]] @@ -128,11 +131,11 @@ read_crossSection <- function(gobject, #' @param method method #' @returns matrix #' @keywords internal -estimateCellCellDistance <- function(gobject, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - method = c("mean", "median")) { - +estimateCellCellDistance <- function( + gobject, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + method = c("mean", "median")) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -162,16 +165,17 @@ estimateCellCellDistance <- function(gobject, #' @param plane_equation plane_equation #' @returns numeric #' @keywords internal -get_sectionThickness <- function(gobject, - spat_unit = NULL, - thickness_unit = c("cell", "natural"), - slice_thickness = 2, - spatial_network_name = "Delaunay_network", - cell_distance_estimate_method = c("mean", "median"), - plane_equation = NULL) { +get_sectionThickness <- function( + gobject, + spat_unit = NULL, + thickness_unit = c("cell", "natural"), + slice_thickness = 2, + spatial_network_name = "Delaunay_network", + cell_distance_estimate_method = c("mean", "median"), + plane_equation = NULL) { thickness_unit <- match.arg(thickness_unit, c("cell", "natural")) - section_thickness = switch(thickness_unit, + section_thickness <- switch(thickness_unit, "cell" = { CellCellDistance <- estimateCellCellDistance( gobject = gobject, @@ -225,19 +229,23 @@ projection_fun <- function(point_to_project, plane_point, plane_norm) { #' @param mesh_obj mesh_obj #' @returns numeric #' @keywords internal -adapt_aspect_ratio <- function(current_ratio, cell_locations, - sdimx = NULL, sdimy = NULL, sdimz = NULL, - mesh_obj = NULL) { +adapt_aspect_ratio <- function( + current_ratio, cell_locations, + sdimx = NULL, sdimy = NULL, sdimz = NULL, + mesh_obj = NULL) { x_range <- max(cell_locations[[sdimx]]) - min(cell_locations[[sdimx]]) y_range <- max(cell_locations[[sdimy]]) - min(cell_locations[[sdimy]]) z_range <- max(cell_locations[[sdimz]]) - min(cell_locations[[sdimz]]) x_mesh_range <- max(mesh_obj$mesh_grid_lines$mesh_grid_lines_X) - min( - mesh_obj$mesh_grid_lines$mesh_grid_lines_X) + mesh_obj$mesh_grid_lines$mesh_grid_lines_X + ) y_mesh_range <- max(mesh_obj$mesh_grid_lines$mesh_grid_lines_Y) - min( - mesh_obj$mesh_grid_lines$mesh_grid_lines_Y) + mesh_obj$mesh_grid_lines$mesh_grid_lines_Y + ) z_mesh_range <- max(mesh_obj$mesh_grid_lines$mesh_grid_lines_Z) - min( - mesh_obj$mesh_grid_lines$mesh_grid_lines_Z) + mesh_obj$mesh_grid_lines$mesh_grid_lines_Z + ) if (x_mesh_range > x_range) { x_adapt <- x_mesh_range / x_range @@ -256,7 +264,8 @@ adapt_aspect_ratio <- function(current_ratio, cell_locations, } new_ratio <- as.numeric(current_ratio) * c( - as.numeric(x_adapt), as.numeric(y_adapt), as.numeric(z_adapt)) + as.numeric(x_adapt), as.numeric(y_adapt), as.numeric(z_adapt) + ) new_ratio <- new_ratio / min(new_ratio) return(new_ratio) } @@ -311,8 +320,7 @@ find_x_y_ranges <- function(data, extend_ratio) { #' @param mesh_grid_n mesh_grid_n #' @returns 2d mesh grid line object #' @keywords internal -create_2d_mesh_grid_line_obj <- function( - x_min, x_max, y_min, y_max, mesh_grid_n) { +create_2d_mesh_grid_line_obj <- function(x_min, x_max, y_min, y_max, mesh_grid_n) { x_grid <- seq(x_min, x_max, length.out = mesh_grid_n) y_grid <- seq(y_min, y_max, length.out = mesh_grid_n) @@ -366,9 +374,13 @@ reshape_to_data_point <- function(mesh_grid_obj) { reshape_to_mesh_grid_obj <- function(data_points, mesh_grid_n) { if (dim(data_points)[2] == 2) { mesh_grid_lines_X <- matrix( - data_points[, 1], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 1], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_lines_Y <- matrix( - data_points[, 2], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 2], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_obj <- list( "mesh_grid_lines_X" = mesh_grid_lines_X, @@ -376,11 +388,17 @@ reshape_to_mesh_grid_obj <- function(data_points, mesh_grid_n) { ) } else if (dim(data_points)[2] == 3) { mesh_grid_lines_X <- matrix( - data_points[, 1], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 1], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_lines_Y <- matrix( - data_points[, 2], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 2], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_lines_Z <- matrix( - data_points[, 3], nrow = mesh_grid_n, byrow = FALSE) + data_points[, 3], + nrow = mesh_grid_n, byrow = FALSE + ) mesh_grid_obj <- list( "mesh_grid_lines_X" = mesh_grid_lines_X, "mesh_grid_lines_Y" = mesh_grid_lines_Y, @@ -400,17 +418,19 @@ reshape_to_mesh_grid_obj <- function(data_points, mesh_grid_n) { #' @param mesh_grid_n mesh_grid_n #' @returns 3d mesh #' @keywords internal -transform_2d_mesh_to_3d_mesh <- function( - mesh_line_obj_2d, pca_out, center_vec, mesh_grid_n) { +transform_2d_mesh_to_3d_mesh <- function(mesh_line_obj_2d, pca_out, center_vec, mesh_grid_n) { data_point_2d <- reshape_to_data_point(mesh_line_obj_2d) center_mat <- matrix( rep(center_vec, dim(data_point_2d)[1]), - nrow = dim(data_point_2d)[1], byrow = TRUE) + nrow = dim(data_point_2d)[1], byrow = TRUE + ) data_point_3d <- cbind( data_point_2d, - rep(0, dim(data_point_2d)[1])) %*% t((pca_out$rotation)) + center_mat + rep(0, dim(data_point_2d)[1]) + ) %*% t((pca_out$rotation)) + center_mat mesh_grid_line_obj_3d <- reshape_to_mesh_grid_obj( - data_point_3d, mesh_grid_n) + data_point_3d, mesh_grid_n + ) return(mesh_grid_line_obj_3d) } @@ -423,10 +443,12 @@ transform_2d_mesh_to_3d_mesh <- function( #' @keywords internal get_cross_section_coordinates <- function(cell_subset_projection_locations) { cell_subset_projection_PCA <- stats::prcomp( - cell_subset_projection_locations) + cell_subset_projection_locations + ) cell_subset_projection_coords <- cell_subset_projection_PCA$x[ - , c("PC1", "PC2")] + , c("PC1", "PC2") + ] return(cell_subset_projection_coords) } @@ -439,13 +461,14 @@ get_cross_section_coordinates <- function(cell_subset_projection_locations) { #' @param mesh_grid_n mesh_grid_n #' @returns mesh grid lines #' @keywords internal -create_mesh_grid_lines <- function( - cell_subset_projection_locations, extend_ratio, mesh_grid_n) { +create_mesh_grid_lines <- function(cell_subset_projection_locations, extend_ratio, mesh_grid_n) { cell_subset_projection_PCA <- stats::prcomp( - cell_subset_projection_locations) + cell_subset_projection_locations + ) cell_subset_projection_coords <- cell_subset_projection_PCA$x[ - , c("PC1", "PC2")] + , c("PC1", "PC2") + ] x_y_ranges <- find_x_y_ranges(cell_subset_projection_coords, extend_ratio) @@ -457,7 +480,8 @@ create_mesh_grid_lines <- function( mesh_grid_n ) center_vec <- apply( - cell_subset_projection_locations, 2, function(x) mean(x)) + cell_subset_projection_locations, 2, function(x) mean(x) + ) mesh_grid_line_obj_3d <- transform_2d_mesh_to_3d_mesh( mesh_line_obj_2d, cell_subset_projection_PCA, @@ -527,7 +551,7 @@ create_mesh_grid_lines <- function( #' g <- createCrossSection( #' gobject = g, #' method = "equation", -#' equation=c(0,1,0,600), +#' equation = c(0, 1, 0, 600), #' extend_ratio = 0.6, #' name = "new_cs", #' return_gobject = TRUE @@ -535,25 +559,27 @@ create_mesh_grid_lines <- function( #' #' crossSectionPlot(g, name = "new_cs") #' @export -createCrossSection <- function(gobject, - spat_unit = NULL, - spat_loc_name = "raw", - name = "cross_section", - spatial_network_name = "Delaunay_network", - thickness_unit = c("cell", "natural"), - slice_thickness = 2, - cell_distance_estimate_method = "mean", - extend_ratio = 0.2, - method = c("equation", "3 points", "point and norm vector", - "point and two plane vectors"), - equation = NULL, - point1 = NULL, point2 = NULL, point3 = NULL, - normVector = NULL, - planeVector1 = NULL, planeVector2 = NULL, - mesh_grid_n = 20, - return_gobject = TRUE, - verbose = NULL) { - +createCrossSection <- function( + gobject, + spat_unit = NULL, + spat_loc_name = "raw", + name = "cross_section", + spatial_network_name = "Delaunay_network", + thickness_unit = c("cell", "natural"), + slice_thickness = 2, + cell_distance_estimate_method = "mean", + extend_ratio = 0.2, + method = c( + "equation", "3 points", "point and norm vector", + "point and two plane vectors" + ), + equation = NULL, + point1 = NULL, point2 = NULL, point3 = NULL, + normVector = NULL, + planeVector1 = NULL, planeVector2 = NULL, + mesh_grid_n = 20, + return_gobject = TRUE, + verbose = NULL) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -572,8 +598,11 @@ createCrossSection <- function(gobject, method <- match.arg( method, - c("equation", "3 points", "point and norm vector", - "point and two plane vectors")) + c( + "equation", "3 points", "point and norm vector", + "point and two plane vectors" + ) + ) switch(method, "equation" = { @@ -635,10 +664,12 @@ createCrossSection <- function(gobject, # calculate distances to cross section spatial_locations_mat <- cbind( - spatial_locations, as.matrix(rep(1, dim(spatial_locations)[1]))) + spatial_locations, as.matrix(rep(1, dim(spatial_locations)[1])) + ) norm_vec <- function(x) sqrt(sum(x^2)) distance_to_plane_vector <- abs(spatial_locations_mat %*% as.matrix( - plane_equation) / norm_vec(plane_equation[1:3])) + plane_equation + ) / norm_vec(plane_equation[1:3])) # select cells within section ### cell_subset <- distance_to_plane_vector <= max_distance_to_section_plane @@ -657,18 +688,26 @@ createCrossSection <- function(gobject, ## find the projection Xp,Yp,Zp coordinates ## cell_subset_projection_locations <- t(apply( cell_subset_spatial_locations, 1, - function(x) projection_fun(x, plane_point = plane_point, - plane_norm = plane_equation[1:3]))) + function(x) { + projection_fun(x, + plane_point = plane_point, + plane_norm = plane_equation[1:3] + ) + } + )) # get the local coordinates of selected cells on the section plane cell_subset_projection_PCA <- stats::prcomp( - cell_subset_projection_locations) + cell_subset_projection_locations + ) cell_subset_projection_coords <- get_cross_section_coordinates( - cell_subset_projection_locations) + cell_subset_projection_locations + ) # create mesh grid lines for the cross section ### mesh_grid_lines <- create_mesh_grid_lines( - cell_subset_projection_locations, extend_ratio, mesh_grid_n) + cell_subset_projection_locations, extend_ratio, mesh_grid_n + ) mesh_obj <- list("mesh_grid_lines" = mesh_grid_lines) ### save and update the spatial object ### @@ -689,7 +728,6 @@ createCrossSection <- function(gobject, if (return_gobject) { - sn <- getSpatialNetwork( gobject = gobject, spat_unit = spat_unit, @@ -704,8 +742,8 @@ createCrossSection <- function(gobject, if (name %in% cs_names) { vmsg(.v = verbose, sprintf( "name '%s' has already been used, will be overwritten", - name) - ) + name + )) } sn@crossSectionObjects[[name]] <- crossSection_obj @@ -743,17 +781,15 @@ createCrossSection <- function(gobject, #' @md #' @seealso [GiottoVisuals::spatGenePlot3D] and [GiottoVisuals::spatFeatPlot2D] #' @export -crossSectionFeatPlot <- function( - gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - default_save_name = "crossSectionGenePlot", - ...) { - +crossSectionFeatPlot <- function(gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + default_save_name = "crossSectionGenePlot", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -831,16 +867,16 @@ crossSectionFeatPlot <- function( #' @details Description of parameters. #' @export #' @seealso \code{\link{crossSectionPlot}} -crossSectionPlot <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - default_save_name = "crossSectionPlot", - ...) { - +crossSectionPlot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + default_save_name = "crossSectionPlot", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -917,18 +953,17 @@ crossSectionPlot <- function(gobject, #' @return ggplot #' @details Description of parameters. #' @export -crossSectionFeatPlot3D <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - show_other_cells = TRUE, - other_cell_color = alpha("lightgrey", 0), - default_save_name = "crossSectionGenePlot3D", - ... -) { - +crossSectionFeatPlot3D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + show_other_cells = TRUE, + other_cell_color = alpha("lightgrey", 0), + default_save_name = "crossSectionGenePlot3D", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -942,7 +977,8 @@ crossSectionFeatPlot3D <- function(gobject, gobject = gobject, spat_unit = spat_unit, name = name, - spatial_network_name = spatial_network_name) + spatial_network_name = spatial_network_name + ) } cell_subset <- crossSection_obj$cell_subset @@ -986,18 +1022,17 @@ crossSectionFeatPlot3D <- function(gobject, #' @returns ggplot #' @details Description of parameters. #' @export -crossSectionPlot3D <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - show_other_cells = TRUE, - other_cell_color = alpha("lightgrey", 0), - default_save_name = "crossSection3D", - ... -) { - +crossSectionPlot3D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + show_other_cells = TRUE, + other_cell_color = alpha("lightgrey", 0), + default_save_name = "crossSection3D", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -1066,24 +1101,23 @@ crossSectionPlot3D <- function(gobject, #' @returns ggplot #' @details Description of parameters. #' @export -insertCrossSectionSpatPlot3D <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - mesh_grid_color = "#1f77b4", - mesh_grid_width = 3, - mesh_grid_style = "dot", - sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", - show_other_cells = FALSE, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - default_save_name = "spat3D_with_cross_section", - ... -) { - +insertCrossSectionSpatPlot3D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + mesh_grid_color = "#1f77b4", + mesh_grid_width = 3, + mesh_grid_style = "dot", + sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", + show_other_cells = FALSE, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + default_save_name = "spat3D_with_cross_section", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -1119,14 +1153,17 @@ insertCrossSectionSpatPlot3D <- function(gobject, ) for (i in seq_len(dim( - crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X)[2])) { + crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X + )[2])) { pl <- pl %>% plotly::add_trace( x = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X[, i], y = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_Y[, i], z = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_Z[, i], mode = "lines", type = "scatter3d", - line = list(color = mesh_grid_color, - width = mesh_grid_width, dash = mesh_grid_style) + line = list( + color = mesh_grid_color, + width = mesh_grid_width, dash = mesh_grid_style + ) ) } @@ -1136,13 +1173,14 @@ insertCrossSectionSpatPlot3D <- function(gobject, set_defaults = TRUE ) - current_ratio <- plotly_axis_scale_3D(cell_locations = sl, + current_ratio <- plotly_axis_scale_3D( + cell_locations = sl, sdimx = sdimx, sdimy = sdimy, sdimz = sdimz, mode = axis_scale, custom_ratio = custom_ratio ) new_ratio <- adapt_aspect_ratio( - current_ratio = current_ratio, + current_ratio = current_ratio, cell_locations = sl, sdimx = sdimx, sdimy = sdimy, sdimz = sdimz, mesh_obj = crossSection_obj$mesh_obj @@ -1196,26 +1234,24 @@ insertCrossSectionSpatPlot3D <- function(gobject, #' @details Description of parameters. #' @md #' @export -insertCrossSectionFeatPlot3D <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - mesh_grid_color = "#1f77b4", - mesh_grid_width = 3, - mesh_grid_style = "dot", - sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", - show_other_cells = FALSE, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - show_plot = NULL, return_plot = NULL, save_plot = NULL, - save_param = list(), - default_save_name = "spatGenePlot3D_with_cross_section", - ...) { - +insertCrossSectionFeatPlot3D <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + mesh_grid_color = "#1f77b4", + mesh_grid_width = 3, + mesh_grid_style = "dot", + sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", + show_other_cells = FALSE, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + show_plot = NULL, return_plot = NULL, save_plot = NULL, + save_param = list(), + default_save_name = "spatGenePlot3D_with_cross_section", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -1247,15 +1283,18 @@ insertCrossSectionFeatPlot3D <- function( ) for (i in seq_len(dim( - crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X)[2])) { + crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X + )[2])) { pl <- pl %>% plotly::add_trace( x = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_X[, i], y = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_Y[, i], z = crossSection_obj$mesh_obj$mesh_grid_lines$mesh_grid_lines_Z[, i], mode = "lines+markers", type = "scatter3d", color = mesh_grid_color, marker = list(color = alpha(mesh_grid_color, 0)), - line = list(color = mesh_grid_color, - width = mesh_grid_width, dash = mesh_grid_style) + line = list( + color = mesh_grid_color, + width = mesh_grid_width, dash = mesh_grid_style + ) ) } @@ -1266,7 +1305,8 @@ insertCrossSectionFeatPlot3D <- function( ) - current_ratio <- plotly_axis_scale_3D(cell_locations = sl, + current_ratio <- plotly_axis_scale_3D( + cell_locations = sl, sdimx = sdimx, sdimy = sdimy, sdimz = sdimz, mode = axis_scale, custom_ratio = custom_ratio ) diff --git a/R/differential_expression.R b/R/differential_expression.R index 644f28c58..5eead79a0 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -35,18 +35,19 @@ #' #' findScranMarkers(g, cluster_column = "leiden_clus") #' @export -findScranMarkers <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - group_1 = NULL, - group_1_name = NULL, - group_2 = NULL, - group_2_name = NULL, - verbose = TRUE, - ...) { +findScranMarkers <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + group_1 = NULL, + group_1_name = NULL, + group_2 = NULL, + group_2_name = NULL, + verbose = TRUE, + ...) { # verify if optional package is installed package_check(pkg_name = "scran", repository = "Bioc") @@ -76,8 +77,11 @@ findScranMarkers <- function(gobject, # expression data values <- match.arg( expression_values, - choices = unique(c("normalized", "scaled", "custom", - expression_values))) + choices = unique(c( + "normalized", "scaled", "custom", + expression_values + )) + ) expr_data <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -104,20 +108,23 @@ findScranMarkers <- function(gobject, expr_data <- expr_data[, colnames(expr_data) %in% subset_cell_IDs] } else if (!is.null(group_1) & !is.null(group_2)) { cell_metadata <- cell_metadata[ - get(cluster_column) %in% c(group_1, group_2)] + get(cluster_column) %in% c(group_1, group_2) + ] # create new pairwise group if (!is.null(group_1_name)) { - if (!is.character(group_1_name)) + if (!is.character(group_1_name)) { stop("group_1_name needs to be a character") + } group_1_name <- group_1_name } else { group_1_name <- paste0(group_1, collapse = "_") } if (!is.null(group_2_name)) { - if (!is.character(group_2_name)) + if (!is.character(group_2_name)) { stop("group_2_name needs to be a character") + } group_2_name <- group_2_name } else { group_2_name <- paste0(group_2, collapse = "_") @@ -128,7 +135,8 @@ findScranMarkers <- function(gobject, pairwise_select_comp <- NULL cell_metadata[, pairwise_select_comp := ifelse( - get(cluster_column) %in% group_1, group_1_name, group_2_name)] + get(cluster_column) %in% group_1, group_1_name, group_2_name + )] cluster_column <- "pairwise_select_comp" @@ -140,7 +148,8 @@ findScranMarkers <- function(gobject, ## SCRAN ## marker_results <- scran::findMarkers( - x = expr_data, groups = cell_metadata[[cluster_column]], ...) + x = expr_data, groups = cell_metadata[[cluster_column]], ... + ) # data.table variables genes <- cluster <- feats <- NULL @@ -180,18 +189,19 @@ findScranMarkers <- function(gobject, #' #' findScranMarkers_one_vs_all(g, cluster_column = "leiden_clus") #' @export -findScranMarkers_one_vs_all <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - pval = 0.01, - logFC = 0.5, - min_feats = 10, - min_genes = NULL, - verbose = TRUE, - ...) { +findScranMarkers_one_vs_all <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + pval = 0.01, + logFC = 0.5, + min_feats = 10, + min_genes = NULL, + verbose = TRUE, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -203,12 +213,13 @@ findScranMarkers_one_vs_all <- function(gobject, package_check(pkg_name = "scran", repository = "Bioc") # print message with information # - if (verbose) - message("using 'Scran' to detect marker feats. If used in published + if (verbose) { + message("using 'Scran' to detect marker feats. If used in published research, please cite: Lun ATL, McCarthy DJ, Marioni JC (2016). 'A step-by-step workflow for low-level analysis of single-cell RNA-seq data with Bioconductor.' F1000Res., 5, 2122. doi: 10.12688/f1000research.9501.2. ") + } # Set feat_type and spat_unit @@ -225,8 +236,11 @@ findScranMarkers_one_vs_all <- function(gobject, # expression data values <- match.arg( expression_values, - choices = unique(c("normalized", "scaled", "custom", - expression_values))) + choices = unique(c( + "normalized", "scaled", "custom", + expression_values + )) + ) # cluster column cell_metadata <- getCellMetadata(gobject, @@ -294,7 +308,8 @@ findScranMarkers_one_vs_all <- function(gobject, unique(x$cluster) == selected_clus })) selected_table <- data.table::as.data.table( - markers[select_bool]) + markers[select_bool] + ) # remove summary column from scran output if present col_ind_keep <- !grepl("summary", colnames(selected_table)) @@ -302,9 +317,11 @@ findScranMarkers_one_vs_all <- function(gobject, # change logFC.xxx name to logFC data.table::setnames( - selected_table, colnames(selected_table)[4], "logFC") + selected_table, colnames(selected_table)[4], "logFC" + ) data.table::setnames( - selected_table, colnames(selected_table)[5], "feats") + selected_table, colnames(selected_table)[5], "feats" + ) # filter selected table filtered_table <- selected_table[logFC > 0] @@ -314,7 +331,8 @@ findScranMarkers_one_vs_all <- function(gobject, p.value <- ranking <- NULL filtered_table <- filtered_table[ - (p.value <= pval & logFC >= logFC) | (ranking <= min_feats)] + (p.value <= pval & logFC >= logFC) | (ranking <= min_feats) + ] pb(message = c("cluster ", clus_i, "/", length(uniq_clusters))) return(filtered_table) @@ -385,22 +403,23 @@ findScranMarkers_one_vs_all <- function(gobject, #' #' findGiniMarkers(g, cluster_column = "leiden_clus") #' @export -findGiniMarkers <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - group_1 = NULL, - group_1_name = NULL, - group_2 = NULL, - group_2_name = NULL, - min_expr_gini_score = 0.2, - min_det_gini_score = 0.2, - detection_threshold = 0, - rank_score = 1, - min_feats = 5, - min_genes = NULL) { +findGiniMarkers <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + group_1 = NULL, + group_1_name = NULL, + group_2 = NULL, + group_2_name = NULL, + min_expr_gini_score = 0.2, + min_det_gini_score = 0.2, + detection_threshold = 0, + rank_score = 1, + min_feats = 5, + min_genes = NULL) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -422,7 +441,8 @@ findGiniMarkers <- function(gobject, ## select expression values values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) # cluster column @@ -441,7 +461,8 @@ findGiniMarkers <- function(gobject, # subset clusters if (!is.null(subset_clusters)) { cell_metadata[] <- cell_metadata[][ - get(cluster_column) %in% subset_clusters] + get(cluster_column) %in% subset_clusters + ] subset_cell_IDs <- cell_metadata[][["cell_ID"]] gobject <- subsetGiotto( gobject = gobject, @@ -451,20 +472,23 @@ findGiniMarkers <- function(gobject, ) } else if (!is.null(group_1) & !is.null(group_2)) { cell_metadata[] <- cell_metadata[][ - get(cluster_column) %in% c(group_1, group_2)] + get(cluster_column) %in% c(group_1, group_2) + ] # create new pairwise group if (!is.null(group_1_name)) { - if (!is.character(group_1_name)) + if (!is.character(group_1_name)) { stop("group_1_name needs to be a character") + } group_1_name <- group_1_name } else { group_1_name <- paste0(group_1, collapse = "_") } if (!is.null(group_2_name)) { - if (!is.character(group_2_name)) + if (!is.character(group_2_name)) { stop("group_2_name needs to be a character") + } group_2_name <- group_2_name } else { group_2_name <- paste0(group_2, collapse = "_") @@ -473,7 +497,8 @@ findGiniMarkers <- function(gobject, pairwise_select_comp <- NULL cell_metadata[][, pairwise_select_comp := ifelse( - get(cluster_column) %in% group_1, group_1_name, group_2_name)] + get(cluster_column) %in% group_1, group_1_name, group_2_name + )] cluster_column <- "pairwise_select_comp" @@ -526,9 +551,11 @@ findGiniMarkers <- function(gobject, detection_threshold = detection_threshold ) aggr_detection_sc_clusters_DT <- data.table::as.data.table( - aggr_detection_sc_clusters) + aggr_detection_sc_clusters + ) aggr_detection_sc_clusters_DT[, feats := rownames( - aggr_detection_sc_clusters)] + aggr_detection_sc_clusters + )] aggr_detection_sc_clusters_DT_melt <- data.table::melt.data.table( aggr_detection_sc_clusters_DT, variable.name = "cluster", @@ -541,15 +568,20 @@ findGiniMarkers <- function(gobject, expression_gini <- detection_gini <- detection <- NULL aggr_sc_clusters_DT_melt[, expression_gini := mygini_fun( - expression), by = feats] + expression + ), by = feats] aggr_detection_sc_clusters_DT_melt[, detection_gini := mygini_fun( - detection), by = feats] + detection + ), by = feats] ## combine - aggr_sc <- cbind(aggr_sc_clusters_DT_melt, - aggr_detection_sc_clusters_DT_melt[ - , .(detection, detection_gini)]) + aggr_sc <- cbind( + aggr_sc_clusters_DT_melt, + aggr_detection_sc_clusters_DT_melt[ + , .(detection, detection_gini) + ] + ) ## create combined rank @@ -561,13 +593,17 @@ findGiniMarkers <- function(gobject, aggr_sc[, expression_rank := rank(-expression), by = feats] aggr_sc[, expression_rank := scales::rescale( - expression_rank, to = c(1, 0.1)), by = cluster] + expression_rank, + to = c(1, 0.1) + ), by = cluster] # detection rank for each feat in all samples # rescale detection rank range between 1 and 0.1 aggr_sc[, detection_rank := rank(-detection), by = feats] aggr_sc[, detection_rank := scales::rescale( - detection_rank, to = c(1, 0.1)), by = cluster] + detection_rank, + to = c(1, 0.1) + ), by = cluster] # create combine score based on rescaled ranks and gini scores @@ -590,7 +626,8 @@ findGiniMarkers <- function(gobject, original_uniq_cluster_names <- unique(cell_metadata[][[cluster_column]]) if (sum(grepl("cluster_", original_uniq_cluster_names)) == 0) { top_feats_scores_filtered[, cluster := gsub( - x = cluster, "cluster_", "")] + x = cluster, "cluster_", "" + )] } return(top_feats_scores_filtered) @@ -623,19 +660,20 @@ findGiniMarkers <- function(gobject, #' #' findGiniMarkers_one_vs_all(g, cluster_column = "leiden_clus") #' @export -findGiniMarkers_one_vs_all <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - min_expr_gini_score = 0.5, - min_det_gini_score = 0.5, - detection_threshold = 0, - rank_score = 1, - min_feats = 4, - min_genes = NULL, - verbose = TRUE) { +findGiniMarkers_one_vs_all <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + min_expr_gini_score = 0.5, + min_det_gini_score = 0.5, + detection_threshold = 0, + rank_score = 1, + min_feats = 4, + min_genes = NULL, + verbose = TRUE) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -657,7 +695,8 @@ findGiniMarkers_one_vs_all <- function(gobject, ## select expression values values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) # cluster column @@ -767,21 +806,24 @@ findGiniMarkers_one_vs_all <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' findMastMarkers(gobject = g, cluster_column = "leiden_clus", group_1 = 1, -#' group_2 = 2) +#' findMastMarkers( +#' gobject = g, cluster_column = "leiden_clus", group_1 = 1, +#' group_2 = 2 +#' ) #' @export -findMastMarkers <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - group_1 = NULL, - group_1_name = NULL, - group_2 = NULL, - group_2_name = NULL, - adjust_columns = NULL, - verbose = FALSE, - ...) { +findMastMarkers <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + group_1 = NULL, + group_1_name = NULL, + group_2 = NULL, + group_2_name = NULL, + adjust_columns = NULL, + verbose = FALSE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -797,16 +839,18 @@ findMastMarkers <- function(gobject, package_check(pkg_name = "MAST", repository = "Bioc") # print message with information # - if (verbose) - message("using 'MAST' to detect marker feats. If used in published + if (verbose) { + message("using 'MAST' to detect marker feats. If used in published research, please cite: McDavid A, Finak G, Yajima M (2020). MAST: Model-based Analysis of Single Cell Transcriptomics. R package version 1.14.0, https://github.com/RGLab/MAST/.") + } ## select expression values to use values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) ## cluster column cell_metadata <- getCellMetadata(gobject, @@ -826,7 +870,8 @@ findMastMarkers <- function(gobject, ## subset data based on group_1 and group_2 cell_metadata[] <- cell_metadata[][ - get(cluster_column) %in% c(group_1, group_2)] + get(cluster_column) %in% c(group_1, group_2) + ] if (nrow(cell_metadata[]) == 0) { stop("there are no cells for group_1 or group_2, check cluster column") } @@ -839,7 +884,8 @@ findMastMarkers <- function(gobject, pairwise_select_comp <- NULL cell_metadata[][, pairwise_select_comp := ifelse( - get(cluster_column) %in% group_1, group_1_name, group_2_name)] + get(cluster_column) %in% group_1, group_1_name, group_2_name + )] if (nrow(cell_metadata[][pairwise_select_comp == group_1_name]) == 0) { stop("there are no cells for group_1, check cluster column") @@ -873,8 +919,11 @@ findMastMarkers <- function(gobject, # expression data values <- match.arg( expression_values, - choices = unique(c("normalized", "scaled", "custom", - expression_values))) + choices = unique(c( + "normalized", "scaled", "custom", + expression_values + )) + ) expr_data <- getExpression( gobject = gobject, feat_type = feat_type, @@ -914,7 +963,8 @@ findMastMarkers <- function(gobject, if (!is.null(adjust_columns)) { myformula <- stats::as.formula(paste0( "~ 1 + ", cluster_column, " + ", - paste(adjust_columns, collapse = " + "))) + paste(adjust_columns, collapse = " + ") + )) } else { myformula <- stats::as.formula(paste0("~ 1 + ", cluster_column)) } @@ -929,12 +979,15 @@ findMastMarkers <- function(gobject, sample <- paste0(cluster_column, group_1_name) summaryCond <- MAST::summary(zlmCond, doLRT = sample) summaryDt <- summaryCond$datatable - fcHurdle <- merge(summaryDt[ - contrast == sample & component == "H", - .(primerid, `Pr(>Chisq)`)], # hurdle P values + fcHurdle <- merge( + summaryDt[ + contrast == sample & component == "H", + .(primerid, `Pr(>Chisq)`) + ], # hurdle P values summaryDt[ contrast == sample & component == "logFC", - .(primerid, coef, ci.hi, ci.lo)], + .(primerid, coef, ci.hi, ci.lo) + ], by = "primerid" ) # logFC coefficients fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")] @@ -976,19 +1029,20 @@ findMastMarkers <- function(gobject, #' #' findMastMarkers_one_vs_all(gobject = g, cluster_column = "leiden_clus") #' @export -findMastMarkers_one_vs_all <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - adjust_columns = NULL, - pval = 0.001, - logFC = 1, - min_feats = 10, - min_genes = NULL, - verbose = TRUE, - ...) { +findMastMarkers_one_vs_all <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + adjust_columns = NULL, + pval = 0.001, + logFC = 1, + min_feats = 10, + min_genes = NULL, + verbose = TRUE, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -1011,11 +1065,12 @@ findMastMarkers_one_vs_all <- function(gobject, package_check(pkg_name = "MAST", repository = "Bioc") # print message with information # - if (verbose) + if (verbose) { message("using 'MAST' to detect marker feats. If used in published research, please cite: McDavid A, Finak G, Yajima M (2020). MAST: Model-based Analysis of Single Cell Transcriptomics. R package version 1.14.0, https://github.com/RGLab/MAST/.") + } ## cluster column @@ -1087,7 +1142,8 @@ findMastMarkers_one_vs_all <- function(gobject, result_dt[, ranking := seq_len(.N), by = "cluster"] filtered_result_dt <- result_dt[ - ranking <= min_feats | (fdr < pval & coef > logFC)] + ranking <= min_feats | (fdr < pval & coef > logFC) + ] return(filtered_result_dt) } @@ -1134,25 +1190,26 @@ findMastMarkers_one_vs_all <- function(gobject, #' #' findMarkers(g, cluster_column = "leiden_clus") #' @export -findMarkers <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column = NULL, - method = c("scran", "gini", "mast"), - subset_clusters = NULL, - group_1 = NULL, - group_2 = NULL, - min_expr_gini_score = 0.5, - min_det_gini_score = 0.5, - detection_threshold = 0, - rank_score = 1, - min_feats = 4, - min_genes = NULL, - group_1_name = NULL, - group_2_name = NULL, - adjust_columns = NULL, - ...) { +findMarkers <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column = NULL, + method = c("scran", "gini", "mast"), + subset_clusters = NULL, + group_1 = NULL, + group_2 = NULL, + min_expr_gini_score = 0.5, + min_det_gini_score = 0.5, + detection_threshold = 0, + rank_score = 1, + min_feats = 4, + min_genes = NULL, + group_1_name = NULL, + group_2_name = NULL, + adjust_columns = NULL, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -1256,27 +1313,28 @@ findMarkers <- function(gobject, #' #' findMarkers_one_vs_all(g, cluster_column = "leiden_clus") #' @export -findMarkers_one_vs_all <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - method = c("scran", "gini", "mast"), - # scran & mast - pval = 0.01, - logFC = 0.5, - min_feats = 10, - min_genes = NULL, - # gini - min_expr_gini_score = 0.5, - min_det_gini_score = 0.5, - detection_threshold = 0, - rank_score = 1, - # mast specific - adjust_columns = NULL, - verbose = TRUE, - ...) { +findMarkers_one_vs_all <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + method = c("scran", "gini", "mast"), + # scran & mast + pval = 0.01, + logFC = 0.5, + min_feats = 10, + min_genes = NULL, + # gini + min_expr_gini_score = 0.5, + min_det_gini_score = 0.5, + detection_threshold = 0, + rank_score = 1, + # mast specific + adjust_columns = NULL, + verbose = TRUE, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes diff --git a/R/dimension_reduction.R b/R/dimension_reduction.R index 1d5107a2c..0e4bcbf70 100644 --- a/R/dimension_reduction.R +++ b/R/dimension_reduction.R @@ -19,13 +19,14 @@ #' @param seed_number seed number to use #' @keywords internal #' @returns list of eigenvalues, loadings and pca coordinates -.run_pca_factominer <- function(x, - ncp = 100, - scale = TRUE, - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - ...) { +.run_pca_factominer <- function( + x, + ncp = 100, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + ...) { # verify if optional package is installed package_check(pkg_name = "FactoMineR", repository = "CRAN") @@ -47,7 +48,8 @@ } pca_res <- FactoMineR::PCA( - X = x, ncp = ncp, scale.unit = scale, graph = FALSE, ...) + X = x, ncp = ncp, scale.unit = scale, graph = FALSE, ... + ) # exit seed if (isTRUE(set_seed)) { @@ -64,12 +66,15 @@ # coordinates coords <- sweep(pca_res$var$coord, - 2, sqrt(eigenvalues[seq_len(ncp)]), FUN = "/") + 2, sqrt(eigenvalues[seq_len(ncp)]), + FUN = "/" + ) rownames(coords) <- colnames(x) colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } else { if (ncp > ncol(x)) { warning("ncp > ncol(x), will be set to ncol(x)") @@ -82,7 +87,8 @@ } pca_res <- FactoMineR::PCA( - X = x, ncp = ncp, scale.unit = scale, graph = FALSE, ...) + X = x, ncp = ncp, scale.unit = scale, graph = FALSE, ... + ) # exit seed if (isTRUE(set_seed)) { @@ -94,7 +100,9 @@ # PC loading loadings <- sweep( - pca_res$var$coord, 2, sqrt(eigenvalues[seq_len(ncp)]), FUN = "/") + pca_res$var$coord, 2, sqrt(eigenvalues[seq_len(ncp)]), + FUN = "/" + ) rownames(loadings) <- colnames(x) colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) @@ -104,11 +112,14 @@ colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } - vmsg(.is_debug = TRUE, - "finished .run_pca_factominer, method == factominer") + vmsg( + .is_debug = TRUE, + "finished .run_pca_factominer, method == factominer" + ) return(result) } @@ -128,16 +139,17 @@ #' @param BPPARAM BiocParallelParam object #' @keywords internal #' @returns list of eigenvalues, loadings and pca coordinates -.run_pca_biocsingular <- function(x, - ncp = 100, - center = TRUE, - scale = TRUE, - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - BSPARAM = c("irlba", "exact", "random"), - BPPARAM = BiocParallel::SerialParam(), - ...) { +.run_pca_biocsingular <- function( + x, + ncp = 100, + center = TRUE, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + BSPARAM = c("irlba", "exact", "random"), + BPPARAM = BiocParallel::SerialParam(), + ...) { BSPARAM <- match.arg(BSPARAM, choices = c("irlba", "exact", "random")) min_ncp <- min(dim(x)) @@ -195,7 +207,8 @@ rownames(coords) <- colnames(x) colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } else { if (BSPARAM == "irlba") { pca_res <- BiocSingular::runPCA( @@ -234,7 +247,8 @@ rownames(coords) <- rownames(x) colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } # exit seed @@ -263,12 +277,13 @@ #' @param verbose verbosity #' @keywords internal #' @returns subsetted matrix based on selected features -.create_feats_to_use_matrix <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - sel_matrix, - feats_to_use, - verbose = FALSE) { +.create_feats_to_use_matrix <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + sel_matrix, + feats_to_use, + verbose = FALSE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -295,7 +310,8 @@ used to select highly variable features" ) feats_to_use <- feat_metadata[ - get(feats_to_use) == "yes"][["feat_ID"]] + get(feats_to_use) == "yes" + ][["feat_ID"]] sel_matrix <- sel_matrix[rownames(sel_matrix) %in% feats_to_use, ] } else { vmsg( @@ -305,13 +321,17 @@ ) } } else { - vmsg(.v = verbose, - "a custom vector of genes will be used to subset the matrix") + vmsg( + .v = verbose, + "a custom vector of genes will be used to subset the matrix" + ) sel_matrix <- sel_matrix[rownames(sel_matrix) %in% feats_to_use, ] } - vmsg(.v = verbose, .is_debug = TRUE, - "class of selected matrix: ", class(sel_matrix)) + vmsg( + .v = verbose, .is_debug = TRUE, + "class of selected matrix: ", class(sel_matrix) + ) return(sel_matrix) } @@ -360,24 +380,25 @@ #' #' runPCA(g) #' @export -runPCA <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - name = NULL, - feats_to_use = "hvf", - return_gobject = TRUE, - center = TRUE, - scale_unit = TRUE, - ncp = 100, - method = c("irlba", "exact", "random", "factominer"), - method_params = BiocParallel::SerialParam(), - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runPCA <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + name = NULL, + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba", "exact", "random", "factominer"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -401,7 +422,8 @@ runPCA <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, feat_type = feat_type, @@ -497,7 +519,6 @@ runPCA <- function(gobject, if (isTRUE(return_gobject)) { - if (reduction == "cells") { my_row_names <- colnames(expr_values) } else { @@ -561,17 +582,18 @@ runPCA <- function(gobject, #' @param verbose verbosity level #' @keywords internal #' @returns list of eigenvalues, loadings and pca coordinates -.run_pca_biocsingular_irlba_projection <- function(x, - ncp = 100, - center = TRUE, - scale = TRUE, - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - BPPARAM = BiocParallel::SerialParam(), - random_subset = 500, - verbose = TRUE, - ...) { +.run_pca_biocsingular_irlba_projection <- function( + x, + ncp = 100, + center = TRUE, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + BPPARAM = BiocParallel::SerialParam(), + random_subset = 500, + verbose = TRUE, + ...) { x <- scale(x, center = center, scale = scale) min_ncp <- min(dim(x)) @@ -646,7 +668,8 @@ runPCA <- function(gobject, colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } else { # store cell ID order information cell_ID_order <- rownames(x) @@ -696,7 +719,8 @@ runPCA <- function(gobject, colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords) + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) } return(result) @@ -751,25 +775,26 @@ runPCA <- function(gobject, #' #' runPCAprojection(g) #' @export -runPCAprojection <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - random_subset = 500, - name = "pca.projection", - feats_to_use = "hvf", - return_gobject = TRUE, - center = TRUE, - scale_unit = TRUE, - ncp = 100, - method = c("irlba"), - method_params = BiocParallel::SerialParam(), - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runPCAprojection <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + random_subset = 500, + name = "pca.projection", + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -793,7 +818,8 @@ runPCAprojection <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, feat_type = feat_type, @@ -983,27 +1009,26 @@ runPCAprojection <- function(gobject, #' runPCAprojectionBatch(g, feats_to_use = NULL) #' @export runPCAprojectionBatch <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - random_subset = 500, - batch_number = 5, - name = "pca.projection.batch", - feats_to_use = "hvf", - return_gobject = TRUE, - center = TRUE, - scale_unit = TRUE, - ncp = 100, - method = c("irlba"), - method_params = BiocParallel::SerialParam(), - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ... -) { + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + random_subset = 500, + batch_number = 5, + name = "pca.projection.batch", + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1027,7 +1052,8 @@ runPCAprojectionBatch <- function( # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, feat_type = feat_type, @@ -1129,7 +1155,8 @@ runPCAprojectionBatch <- function( } else { for (dimension in seq_len(ncol(pca_object[["coords"]]))) { sum_evaluation <- sum(sign(pca_batch_results[[1]][[ - "coords"]][seq_len(20), dimension]) * + "coords" + ]][seq_len(20), dimension]) * sign(pca_object[["coords"]][seq_len(20), dimension])) if (sum_evaluation < 0) { pca_object$coords[, dimension] <- -1 * pca_object$coords[, dimension] @@ -1148,7 +1175,9 @@ runPCAprojectionBatch <- function( # eigenvalues eigenvalues_list <- lapply( - pca_batch_results, FUN = function(x) x$eigenvalues) + pca_batch_results, + FUN = function(x) x$eigenvalues + ) eigenvalues_matrix <- do.call("cbind", eigenvalues_list) eigenvalues_mean <- rowMeans_flex(eigenvalues_matrix) @@ -1157,7 +1186,8 @@ runPCAprojectionBatch <- function( coords_vector <- do.call("c", coords_list) coords_array <- array( data = coords_vector, - dim = c(ncol(expr_values), ncp, length(pca_batch_results))) + dim = c(ncol(expr_values), ncp, length(pca_batch_results)) + ) coords_all <- apply(coords_array, MARGIN = seq_len(2), function(arr) { mean(arr, na.rm = TRUE) }) @@ -1169,18 +1199,22 @@ runPCAprojectionBatch <- function( loadings_vector <- do.call("c", loadings_list) loadings_array <- array( data = loadings_vector, - dim = c(nrow(expr_values), ncp, length(pca_batch_results))) + dim = c(nrow(expr_values), ncp, length(pca_batch_results)) + ) loadings_all <- apply( - loadings_array, MARGIN = seq_len(2), function(arr) { - mean(arr, na.rm = TRUE) - }) + loadings_array, + MARGIN = seq_len(2), function(arr) { + mean(arr, na.rm = TRUE) + } + ) rownames(loadings_all) <- rownames(pca_batch_results[[1]][["loadings"]]) colnames(loadings_all) <- colnames(pca_batch_results[[1]][["loadings"]]) pca_object <- list( eigenvalues = eigenvalues_mean, - loadings = loadings_all, coords = coords_all) + loadings = loadings_all, coords = coords_all + ) } else { pca_batch_results <- list() @@ -1217,7 +1251,8 @@ runPCAprojectionBatch <- function( } else { for (dimension in seq_len(ncol(pca_object[["coords"]]))) { sum_evaluation <- sum(sign(pca_batch_results[[1]][[ - "coords"]][seq_len(20), dimension]) * + "coords" + ]][seq_len(20), dimension]) * sign(pca_object[["coords"]][seq_len(20), dimension])) if (sum_evaluation < 0) { pca_object$coords[, dimension] <- -1 * pca_object$coords[, dimension] @@ -1236,7 +1271,9 @@ runPCAprojectionBatch <- function( # eigenvalues eigenvalues_list <- lapply( - pca_batch_results, FUN = function(x) x$eigenvalues) + pca_batch_results, + FUN = function(x) x$eigenvalues + ) eigenvalues_matrix <- do.call("cbind", eigenvalues_list) eigenvalues_mean <- rowMeans_flex(eigenvalues_matrix) @@ -1245,7 +1282,8 @@ runPCAprojectionBatch <- function( coords_vector <- do.call("c", coords_list) coords_array <- array( data = coords_vector, - dim = c(ncol(expr_values), ncp, length(pca_batch_results))) + dim = c(ncol(expr_values), ncp, length(pca_batch_results)) + ) coords_all <- apply(coords_array, MARGIN = seq_len(2), function(arr) { mean(arr, na.rm = TRUE) }) @@ -1257,18 +1295,22 @@ runPCAprojectionBatch <- function( loadings_vector <- do.call("c", loadings_list) loadings_array <- array( data = loadings_vector, - dim = c(nrow(expr_values), ncp, length(pca_batch_results))) + dim = c(nrow(expr_values), ncp, length(pca_batch_results)) + ) loadings_all <- apply( - loadings_array, MARGIN = seq_len(2), function(arr) { - mean(arr, na.rm = TRUE) - }) + loadings_array, + MARGIN = seq_len(2), function(arr) { + mean(arr, na.rm = TRUE) + } + ) rownames(loadings_all) <- rownames(pca_batch_results[[1]][["loadings"]]) colnames(loadings_all) <- colnames(pca_batch_results[[1]][["loadings"]]) pca_object <- list( eigenvalues = eigenvalues_mean, - loadings = loadings_all, coords = coords_all) + loadings = loadings_all, coords = coords_all + ) } @@ -1356,26 +1398,27 @@ runPCAprojectionBatch <- function( #' #' screePlot(g) #' @export -screePlot <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - name = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - method = c("irlba", "exact", "random", "factominer"), - rev = FALSE, - feats_to_use = NULL, - center = FALSE, - scale_unit = FALSE, - ncp = 100, - ylim = c(0, 20), - verbose = TRUE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "screePlot", - ...) { +screePlot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + method = c("irlba", "exact", "random", "factominer"), + rev = FALSE, + feats_to_use = NULL, + center = FALSE, + scale_unit = FALSE, + ncp = 100, + ylim = c(0, 20), + verbose = TRUE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "screePlot", + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1410,22 +1453,30 @@ screePlot <- function(gobject, # if pca already exists plot if (!is.null(pca_obj)) { - if (isTRUE(verbose)) - wrap_msg("PCA with name: ", name, - " already exists and will be used for the screeplot") + if (isTRUE(verbose)) { + wrap_msg( + "PCA with name: ", name, + " already exists and will be used for the screeplot" + ) + } screeplot <- create_screeplot( - eigs = slot(pca_obj, "misc")$eigenvalues, ncp = ncp, ylim = ylim) + eigs = slot(pca_obj, "misc")$eigenvalues, ncp = ncp, ylim = ylim + ) } else { # if pca doesn't exists, then create pca and then plot - if (isTRUE(verbose)) - wrap_msg("PCA with name: ", name, - " does NOT exist, PCA will be done first") + if (isTRUE(verbose)) { + wrap_msg( + "PCA with name: ", name, + " does NOT exist, PCA will be done first" + ) + } # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1470,7 +1521,8 @@ screePlot <- function(gobject, } else if (method == "factominer") { pca_object <- .run_pca_factominer( x = t_flex(expr_values), - scale = scale_unit, ncp = ncp, rev = rev, ...) + scale = scale_unit, ncp = ncp, rev = rev, ... + ) } else { stop("only PCA methods from the irlba and factominer package have been implemented") @@ -1493,7 +1545,8 @@ screePlot <- function(gobject, screeplot <- create_screeplot( eigs = slot(dimObject, "misc")$eigenvalues, - ncp = ncp, ylim = ylim) + ncp = ncp, ylim = ylim + ) } } @@ -1554,19 +1607,23 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { pl <- pl + ggplot2::theme_bw() pl <- pl + ggplot2::geom_bar( data = screeDT[seq_len(ncp)], - ggplot2::aes(x = PC, y = var_expl), stat = "identity") + ggplot2::aes(x = PC, y = var_expl), stat = "identity" + ) pl <- pl + ggplot2::coord_cartesian(ylim = ylim) pl <- pl + ggplot2::theme( - axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1)) + axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::labs(x = "", y = "% of variance explained per PC") cpl <- ggplot2::ggplot() cpl <- cpl + ggplot2::theme_bw() cpl <- cpl + ggplot2::geom_bar( data = screeDT[seq_len(ncp)], - ggplot2::aes(x = PC, y = var_expl_cum), stat = "identity") + ggplot2::aes(x = PC, y = var_expl_cum), stat = "identity" + ) cpl <- cpl + ggplot2::theme(axis.text.x = ggplot2::element_text( - angle = 45, hjust = 1, vjust = 1)) + angle = 45, hjust = 1, vjust = 1 + )) cpl <- cpl + ggplot2::labs(x = "", y = "cumulative % of variance explained") savelist <- list(pl, cpl) @@ -1615,24 +1672,25 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { #' #' jackstrawPlot(gobject = g) #' @export -jackstrawPlot <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - feats_to_use = NULL, - center = FALSE, - scale_unit = FALSE, - ncp = 20, - ylim = c(0, 1), - iter = 10, - threshold = 0.01, - verbose = TRUE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "jackstrawPlot") { +jackstrawPlot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + feats_to_use = NULL, + center = FALSE, + scale_unit = FALSE, + ncp = 20, + ylim = c(0, 1), + iter = 10, + threshold = 0.01, + verbose = TRUE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "jackstrawPlot") { package_check(pkg_name = "jackstraw", repository = "CRAN") # Set feat_type and spat_unit @@ -1647,12 +1705,13 @@ jackstrawPlot <- function(gobject, ) # print message with information # - if (verbose) + if (verbose) { message("using 'jackstraw' to identify significant PCs If used in published research, please cite: Neo Christopher Chung and John D. Storey (2014). 'Statistical significance of variables driving systematic variation in high-dimensional data. Bioinformatics") + } # select direction of reduction reduction <- match.arg(reduction, c("cells", "feats")) @@ -1660,7 +1719,8 @@ jackstrawPlot <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1686,22 +1746,29 @@ jackstrawPlot <- function(gobject, if (reduction == "cells") { if (scale_unit == TRUE | center == TRUE) { expr_values <- t_flex(scale( - t_flex(expr_values), center = center, scale = scale_unit)) + t_flex(expr_values), + center = center, scale = scale_unit + )) } jtest <- jackstraw::permutationPA( dat = as.matrix(expr_values), - B = iter, threshold = threshold, verbose = verbose) + B = iter, threshold = threshold, verbose = verbose + ) ## results ## nr_sign_components <- jtest$r - if (verbose) - cat("number of estimated significant components: ", - nr_sign_components) + if (verbose) { + cat( + "number of estimated significant components: ", + nr_sign_components + ) + } final_results <- jtest$p jackplot <- create_jackstrawplot( jackstraw_data = final_results, - ncp = ncp, ylim = ylim, threshold = threshold) + ncp = ncp, ylim = ylim, threshold = threshold + ) } return(plot_output_handler( @@ -1728,10 +1795,11 @@ jackstrawPlot <- function(gobject, #' @keywords internal #' @returns ggplot #' @export -create_jackstrawplot <- function(jackstraw_data, - ncp = 20, - ylim = c(0, 1), - threshold = 0.01) { +create_jackstrawplot <- function( + jackstraw_data, + ncp = 20, + ylim = c(0, 1), + threshold = 0.01) { checkmate::assert_numeric(ncp, len = 1L) checkmate::assert_numeric(ylim, len = 2L) checkmate::assert_numeric(threshold, len = 1L) @@ -1750,11 +1818,14 @@ create_jackstrawplot <- function(jackstraw_data, pl <- pl + ggplot2::theme_bw() pl <- pl + ggplot2::geom_point( data = testDT[seq_len(ncp)], - ggplot2::aes(x = PC, y = p.val, fill = sign), shape = 21) + ggplot2::aes(x = PC, y = p.val, fill = sign), shape = 21 + ) pl <- pl + ggplot2::scale_fill_manual( - values = c("n.s." = "lightgrey", "sign" = "darkorange")) + values = c("n.s." = "lightgrey", "sign" = "darkorange") + ) pl <- pl + ggplot2::theme( - axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1)) + axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::coord_cartesian(ylim = ylim) pl <- pl + ggplot2::theme(panel.grid.major.x = ggplot2::element_blank()) pl <- pl + ggplot2::labs(x = "", y = "p-value per PC") @@ -1805,29 +1876,30 @@ create_jackstrawplot <- function(jackstraw_data, #' #' signPCA(g) #' @export -signPCA <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - name = NULL, - method = c("screeplot", "jackstraw"), - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - pca_method = c("irlba", "factominer"), - rev = FALSE, - feats_to_use = NULL, - center = TRUE, - scale_unit = TRUE, - ncp = 50, - scree_ylim = c(0, 10), - jack_iter = 10, - jack_threshold = 0.01, - jack_ylim = c(0, 1), - verbose = TRUE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "signPCA") { +signPCA <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + name = NULL, + method = c("screeplot", "jackstraw"), + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + pca_method = c("irlba", "factominer"), + rev = FALSE, + feats_to_use = NULL, + center = TRUE, + scale_unit = TRUE, + ncp = 50, + scree_ylim = c(0, 10), + jack_iter = 10, + jack_threshold = 0.01, + jack_ylim = c(0, 1), + verbose = TRUE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "signPCA") { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2010,28 +2082,29 @@ signPCA <- function(gobject, #' #' runUMAP(g) #' @export -runUMAP <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - name = NULL, - feats_to_use = NULL, - return_gobject = TRUE, - n_neighbors = 40, - n_components = 2, - n_epochs = 400, - min_dist = 0.01, - n_threads = NA, - spread = 5, - set_seed = TRUE, - seed_number = 1234L, - verbose = TRUE, - toplevel_params = 2L, - ...) { +runUMAP <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + n_neighbors = 40, + n_components = 2, + n_epochs = 400, + min_dist = 0.01, + n_threads = NA, + spread = 5, + set_seed = TRUE, + seed_number = 1234L, + verbose = TRUE, + toplevel_params = 2L, + ...) { # NSE vars cell_ID <- NULL @@ -2102,7 +2175,8 @@ runUMAP <- function(gobject, "Ignoring dimensions_to_use that are outside the range." )) dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% seq(ncol(matrix_to_use))] + dimensions_to_use %in% seq(ncol(matrix_to_use)) + ] } matrix_to_use <- matrix_to_use[, dimensions_to_use] @@ -2111,7 +2185,8 @@ runUMAP <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, @@ -2203,8 +2278,10 @@ runUMAP <- function(gobject, ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, - dimObject = dimObject) + gobject <- set_dimReduction( + gobject = gobject, + dimObject = dimObject + ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -2271,29 +2348,30 @@ runUMAP <- function(gobject, #' #' runUMAPprojection(g) #' @export -runUMAPprojection <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - random_subset = 500, - name = NULL, - feats_to_use = NULL, - return_gobject = TRUE, - n_neighbors = 40, - n_components = 2, - n_epochs = 400, - min_dist = 0.01, - n_threads = NA, - spread = 5, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - toplevel_params = 2, - ...) { +runUMAPprojection <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + random_subset = 500, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + n_neighbors = 40, + n_components = 2, + n_epochs = 400, + min_dist = 0.01, + n_threads = NA, + spread = 5, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + toplevel_params = 2, + ...) { # NSE vars cell_ID <- NULL @@ -2403,7 +2481,8 @@ runUMAPprojection <- function(gobject, # create random selection random_selection <- sort(sample( - seq_len(nrow(matrix_to_use)), random_subset)) + seq_len(nrow(matrix_to_use)), random_subset + )) subsample_matrix <- matrix_to_use[random_selection, ] uwot_clus_subset <- uwot::umap( @@ -2429,7 +2508,8 @@ runUMAPprojection <- function(gobject, # combine subset and prediction coords_umap <- rbind(uwot_clus_subset$embedding, uwot_clus_pred) coords_umap <- coords_umap[ - match(cell_ID_order, rownames(coords_umap)), ] + match(cell_ID_order, rownames(coords_umap)), + ] coords_umap_DT <- data.table::as.data.table(coords_umap) coords_umap_DT[, cell_ID := rownames(coords_umap)] @@ -2534,25 +2614,26 @@ runUMAPprojection <- function(gobject, #' #' runtSNE(g) #' @export -runtSNE <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - name = NULL, - feats_to_use = NULL, - return_gobject = TRUE, - dims = 2, - perplexity = 30, - theta = 0.5, - do_PCA_first = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runtSNE <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + dims = 2, + perplexity = 30, + theta = 0.5, + do_PCA_first = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2616,7 +2697,8 @@ runtSNE <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2696,8 +2778,10 @@ runtSNE <- function(gobject, ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, - dimObject = dimObject) + gobject <- set_dimReduction( + gobject = gobject, + dimObject = dimObject + ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## update parameters used ## @@ -2750,24 +2834,25 @@ runtSNE <- function(gobject, #' #' runGiottoHarmony(g, vars_use = "leiden_clus") #' @export -runGiottoHarmony <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - vars_use = "list_ID", - do_pca = FALSE, - expression_values = c("normalized", "scaled", "custom"), - reduction = "cells", - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - name = NULL, - feats_to_use = NULL, - set_seed = TRUE, - seed_number = 1234, - toplevel_params = 2, - return_gobject = TRUE, - verbose = NULL, - ...) { +runGiottoHarmony <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + vars_use = "list_ID", + do_pca = FALSE, + expression_values = c("normalized", "scaled", "custom"), + reduction = "cells", + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + name = NULL, + feats_to_use = NULL, + set_seed = TRUE, + seed_number = 1234, + toplevel_params = 2, + return_gobject = TRUE, + verbose = NULL, + ...) { # verify if optional package is installed package_check(pkg_name = "harmony", repository = "CRAN") @@ -2848,7 +2933,8 @@ runGiottoHarmony <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2911,7 +2997,6 @@ runGiottoHarmony <- function(gobject, # return giotto object or harmony results if (isTRUE(return_gobject)) { - harmony_names <- list_dim_reductions_names( gobject = gobject, data_type = reduction, @@ -2921,13 +3006,17 @@ runGiottoHarmony <- function(gobject, ) if (name %in% harmony_names) { - cat(name, - " has already been used with harmony, will be overwritten") + cat( + name, + " has already been used with harmony, will be overwritten" + ) } ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, - dimObject = harmdimObject) + gobject <- set_dimReduction( + gobject = gobject, + dimObject = harmdimObject + ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### diff --git a/R/feature_set_enrichment.R b/R/feature_set_enrichment.R index c30cb848a..c5bf382ea 100644 --- a/R/feature_set_enrichment.R +++ b/R/feature_set_enrichment.R @@ -4,28 +4,28 @@ #' @param dryrun do a dry run, default TRUE. #' @param path_to_GSEA path to GSEA command line executable, e.g. gsea-XXX.jar. #' See details (1.) for more information. -#' @param GSEA_dataset path to a Human/Mouse collection from GSEA, e.g. +#' @param GSEA_dataset path to a Human/Mouse collection from GSEA, e.g. #' Hallmarks C1. See details (2.) for more information. -#' @param GSEA_ranked_file path to .rnk file for GSEA. See details (3.) for +#' @param GSEA_ranked_file path to .rnk file for GSEA. See details (3.) for #' more information -#' @param output_folder path to which the GSEA results will be saved. Default +#' @param output_folder path to which the GSEA results will be saved. Default #' is current working directory. -#' @param name_analysis_folder default output subdirectory prefix to which +#' @param name_analysis_folder default output subdirectory prefix to which #' results are saved. -#' Will live within output_folder; equivalent of +#' Will live within output_folder; equivalent of #' "Analysis Name" in GSEA Application. -#' @param collapse only 'false' is supported. This will use your dataset as-is, +#' @param collapse only 'false' is supported. This will use your dataset as-is, #' in the original format. -#' @param mode option selected in Advanced Field "Collapsing Mode for +#' @param mode option selected in Advanced Field "Collapsing Mode for #' Probe Sets => 1 gene" #' @param norm normalization mode; only meandiv is supported. #' @param nperm number of permutations, default 1000 -#' @param scoring_scheme Default "weighted", equivalent of +#' @param scoring_scheme Default "weighted", equivalent of #' "enrichment statistic" in GSEA Application #' @param plot_top_x Default 20, number of enrichment plots to produce. -#' @param set_max default 500, equivalent to "max size; exclude larger sets" +#' @param set_max default 500, equivalent to "max size; exclude larger sets" #' in Basic Fields in GSEA Application -#' @param set_min default 15, equivalent to "min size; exclude smaller sets" +#' @param set_min default 15, equivalent to "min size; exclude smaller sets" #' in Basic Fields in GSEA Application #' @returns data.table #' @details @@ -33,11 +33,11 @@ #' 1. download and install the COMMAND line (all platforms) gsea-XXX.jar #' https://www.gsea-msigdb.org/gsea/downloads.jsp #' 1.1. download zip file -#' 1.2. unzip and move to known location +#' 1.2. unzip and move to known location #' (e.g. in path/to/your/applications/gsea/GSEA_4.3.2) #' #' 2. download the Human and Mouse collections -#' https://www.gsea-msigdb.org/gsea/msigdb/index.jsp or zipped folder +#' https://www.gsea-msigdb.org/gsea/msigdb/index.jsp or zipped folder #' https://www.gsea-msigdb.org/gsea/downloads.jsp (all downloaded) #' #' 3. create ranked gene lists @@ -50,37 +50,40 @@ #' please reference GSEA's documentation here: #' https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Syntax #' @export -doFeatureSetEnrichment <- function(dryrun = TRUE, - path_to_GSEA = NULL, - GSEA_dataset = NULL, - GSEA_ranked_file = NULL, - output_folder = NULL, - name_analysis_folder = "my_GSEA_analysis", - collapse = "false", - mode = c( - "Abs_max_of_probes", - "Max_probe", - "Median_of_probes", - "Mean_of_probes", - "Sum_of_probes" - ), - norm = "meandiv", - nperm = 1000, - scoring_scheme = "weighted", - plot_top_x = 20, - set_max = 500, - set_min = 15) { +doFeatureSetEnrichment <- function( + dryrun = TRUE, + path_to_GSEA = NULL, + GSEA_dataset = NULL, + GSEA_ranked_file = NULL, + output_folder = NULL, + name_analysis_folder = "my_GSEA_analysis", + collapse = "false", + mode = c( + "Abs_max_of_probes", + "Max_probe", + "Median_of_probes", + "Mean_of_probes", + "Sum_of_probes" + ), + norm = "meandiv", + nperm = 1000, + scoring_scheme = "weighted", + plot_top_x = 20, + set_max = 500, + set_min = 15) { # set don't run to false as a start dont_run <- FALSE # SYSTEM CHECK FOR JAVA java_not_installed <- as.logical(system("java -version")) - # returns 0 if java is installed (i.e., command runs successfully), + # returns 0 if java is installed (i.e., command runs successfully), # 1 otherwise - if (java_not_installed) - stop(wrap_txt("Java must be installed for doFeatureSetEnrichment() to + if (java_not_installed) { + stop(wrap_txt("Java must be installed for doFeatureSetEnrichment() to run. Please install Java: https://www.java.com/en/download/", - errWidth = TRUE)) + errWidth = TRUE + )) + } mode <- match.arg(mode, choices = c( @@ -91,26 +94,33 @@ doFeatureSetEnrichment <- function(dryrun = TRUE, "Sum_of_probes" )) - if (is.null(output_folder)) output_folder <- paste0( - getwd(), "/Feature_set_enrichment_results/") + if (is.null(output_folder)) { + output_folder <- paste0( + getwd(), "/Feature_set_enrichment_results/" + ) + } if (!dir.exists(output_folder)) { - wrap_msg(paste0("Directory does not yet exist. Creating directory at:", - output_folder)) + wrap_msg(paste0( + "Directory does not yet exist. Creating directory at:", + output_folder + )) dir.create(output_folder) } # check for path to GSEA tool - if (is.null(path_to_GSEA)) + if (is.null(path_to_GSEA)) { stop("Path to the GSEA directory needs to be provided") - if (!file.exists(path_to_GSEA)) + } + if (!file.exists(path_to_GSEA)) { stop("Path to the GSEA directory does not exist") + } path_to_GSEA <- paste0('"', path_to_GSEA, '"') # check for path to GSEA dataset .gmt if (is.null(GSEA_dataset)) { - warning("Path to a GSEA dataset needs to be provided, only dryrun will + warning("Path to a GSEA dataset needs to be provided, only dryrun will work for testing") dont_run <- TRUE GSEA_dataset <- "test.gmt" @@ -120,14 +130,15 @@ doFeatureSetEnrichment <- function(dryrun = TRUE, # check for GSRA ranked file (path or data.frame) if (is.null(GSEA_ranked_file)) { - warning("A ranked gene file needs to be provided, only dryrun will work + warning("A ranked gene file needs to be provided, only dryrun will work for testing") dont_run <- TRUE GSEA_ranked_file <- "my_ranked_file.rnk" } else if (inherits(GSEA_ranked_file, "character")) { message("The ranked list looks like a path to a file") - if (!file.exists(GSEA_ranked_file)) + if (!file.exists(GSEA_ranked_file)) { stop("Path to the ranked file does not exist") + } } else if (inherits(GSEA_ranked_file, "data.frame")) { message("The ranked list looks like a data.frame") @@ -155,10 +166,12 @@ doFeatureSetEnrichment <- function(dryrun = TRUE, if (my_os == "windows") { execution_path <- paste0( - path_to_GSEA, "/", "gsea-cli.bat", " ", operation) + path_to_GSEA, "/", "gsea-cli.bat", " ", operation + ) } else { execution_path <- paste0( - path_to_GSEA, "/", "gsea-cli.sh", " ", operation) + path_to_GSEA, "/", "gsea-cli.sh", " ", operation + ) } created_command <- sprintf( diff --git a/R/general_help.R b/R/general_help.R index e34c94a65..67e6e7c13 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -7,8 +7,9 @@ #' @description calculate gini coefficient #' @keywords internal #' @returns gini coefficient -mygini_fun <- function(x, - weights = rep(1, length(x))) { +mygini_fun <- function( + x, + weights = rep(1, length(x))) { # adapted from R package GiniWegNeg dataset <- cbind(x, weights) ord_x <- order(x) @@ -36,9 +37,10 @@ mygini_fun <- function(x, #' @description calculate gini coefficient on a minimum length vector #' @keywords internal #' @returns gini coefficient -extended_gini_fun <- function(x, - weights = rep(1, length = length(x)), - minimum_length = 16) { +extended_gini_fun <- function( + x, + weights = rep(1, length = length(x)), + minimum_length = 16) { if (length(x) < minimum_length) { difference <- minimum_length - length(x) min_value <- min(x) @@ -57,16 +59,19 @@ extended_gini_fun <- function(x, #' @description create binarized scores from a vector using kmeans #' @returns numeric #' @keywords internal -.kmeans_binarize <- function(x, - nstart = 3, - iter.max = 10, - seed = NULL) { +.kmeans_binarize <- function( + x, + nstart = 3, + iter.max = 10, + seed = NULL) { if (!is.null(seed)) { on.exit(random_seed(), add = TRUE) set.seed(seed) } sel_gene_km <- stats::kmeans( - x, centers = 2, nstart = nstart, iter.max = iter.max)$cluster + x, + centers = 2, nstart = nstart, iter.max = iter.max + )$cluster mean_1 <- mean(x[sel_gene_km == 1]) mean_2 <- mean(x[sel_gene_km == 2]) @@ -125,22 +130,25 @@ extended_gini_fun <- function(x, #' @title .kmeans_arma_subset_binarize #' @name .kmeans_arma_subset_binarize -#' @description create binarized scores from a subsetted vector using +#' @description create binarized scores from a subsetted vector using #' kmeans_arma #' @returns numeric #' @keywords internal -.kmeans_arma_subset_binarize <- function(x, - n_iter = 5, - extreme_nr = 20, - sample_nr = 200, - seed = NULL) { +.kmeans_arma_subset_binarize <- function( + x, + n_iter = 5, + extreme_nr = 20, + sample_nr = 200, + seed = NULL) { length_x <- length(x) vector_x <- sort(x) first_set <- vector_x[seq_len(extreme_nr)] last_set <- vector_x[(length_x - (extreme_nr - 1)):length_x] random_set <- sample( - vector_x[(extreme_nr + 1):(length_x - extreme_nr)], size = sample_nr) + vector_x[(extreme_nr + 1):(length_x - extreme_nr)], + size = sample_nr + ) testset <- c(first_set, last_set, random_set) if (!is.null(seed)) { @@ -182,15 +190,14 @@ extended_gini_fun <- function(x, #' @description wrapper for different binarization functions #' @returns matrix #' @keywords internal -kmeans_binarize_wrapper <- function( - expr_values, - subset_feats = NULL, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - seed = NULL) { +kmeans_binarize_wrapper <- function(expr_values, + subset_feats = NULL, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + seed = NULL) { # expression values if (!is.null(subset_feats)) { expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] @@ -247,9 +254,10 @@ kmeans_binarize_wrapper <- function( #' @description wrapper for rank binarization function #' @returns matrix #' @keywords internal -rank_binarize_wrapper <- function(expr_values, - subset_feats = NULL, - percentage_rank = 30) { +rank_binarize_wrapper <- function( + expr_values, + subset_feats = NULL, + percentage_rank = 30) { # expression values if (!is.null(subset_feats)) { expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] @@ -257,7 +265,8 @@ rank_binarize_wrapper <- function(expr_values, max_rank <- (ncol(expr_values) / 100) * percentage_rank bin_matrix <- t_flex(apply( - X = expr_values, MARGIN = 1, FUN = .rank_binarize, max_rank = max_rank)) + X = expr_values, MARGIN = 1, FUN = .rank_binarize, max_rank = max_rank + )) return(bin_matrix) } @@ -270,15 +279,16 @@ rank_binarize_wrapper <- function(expr_values, #' @title convertEnsemblToGeneSymbol #' @name convertEnsemblToGeneSymbol -#' @description This function convert ensembl gene IDs from a matrix to +#' @description This function convert ensembl gene IDs from a matrix to #' official gene symbols #' @param matrix an expression matrix with ensembl gene IDs as rownames #' @param species species to use for gene symbol conversion #' @returns expression matrix with gene symbols as rownames #' @details This function requires that the biomaRt library is installed #' @export -convertEnsemblToGeneSymbol <- function(matrix, - species = c("mouse", "human")) { +convertEnsemblToGeneSymbol <- function( + matrix, + species = c("mouse", "human")) { # data.table: set global variable dupes <- mgi_symbol <- gene_symbol <- ensembl_gene_id <- hgnc_symbol <- NULL @@ -306,11 +316,14 @@ convertEnsemblToGeneSymbol <- function(matrix, ifelse(mgi_symbol == "", ensembl_gene_id, "temporary") ), by = mgi_symbol] gene_names_DT[, gene_symbol := ifelse( - mgi_symbol == "", ensembl_gene_id, gene_symbol)] + mgi_symbol == "", ensembl_gene_id, gene_symbol + )] gene_names_DT[, gene_symbol := ifelse( - gene_symbol == "temporary", - paste0(mgi_symbol, "--", seq_len(.N)), gene_symbol), - by = mgi_symbol] + gene_symbol == "temporary", + paste0(mgi_symbol, "--", seq_len(.N)), gene_symbol + ), + by = mgi_symbol + ] # filter matrix <- matrix[rownames(matrix) %in% gene_names_DT$ensembl_gene_id, ] @@ -346,11 +359,14 @@ convertEnsemblToGeneSymbol <- function(matrix, ifelse(hgnc_symbol == "", ensembl_gene_id, "temporary") ), by = hgnc_symbol] gene_names_DT[, gene_symbol := ifelse( - hgnc_symbol == "", ensembl_gene_id, gene_symbol)] + hgnc_symbol == "", ensembl_gene_id, gene_symbol + )] gene_names_DT[, gene_symbol := ifelse( - gene_symbol == "temporary", - paste0(hgnc_symbol, "--", seq_len(.N)), gene_symbol), - by = hgnc_symbol] + gene_symbol == "temporary", + paste0(hgnc_symbol, "--", seq_len(.N)), gene_symbol + ), + by = hgnc_symbol + ] # filter matrix <- matrix[rownames(matrix) %in% gene_names_DT$ensembl_gene_id, ] @@ -385,17 +401,16 @@ convertEnsemblToGeneSymbol <- function(matrix, #' @name gpoly_from_dfr_smoothed_wrapped #' @returns giottoPolygon #' @keywords internal -gpoly_from_dfr_smoothed_wrapped <- function( - segmdfr, - name = "cell", - calc_centroids = FALSE, - smooth_polygons = FALSE, - vertices = 20L, - k = 3L, - set_neg_to_zero = TRUE, - skip_eval_dfr = FALSE, - copy_dt = TRUE, - verbose = TRUE) { +gpoly_from_dfr_smoothed_wrapped <- function(segmdfr, + name = "cell", + calc_centroids = FALSE, + smooth_polygons = FALSE, + vertices = 20L, + k = 3L, + set_neg_to_zero = TRUE, + skip_eval_dfr = FALSE, + copy_dt = TRUE, + verbose = TRUE) { gpoly <- createGiottoPolygonsFromDfr( segmdfr = segmdfr, name = name, @@ -412,13 +427,18 @@ gpoly_from_dfr_smoothed_wrapped <- function( set_neg_to_zero = set_neg_to_zero ) } - if (isTRUE(calc_centroids)) gpoly <- centroids( - gpoly, append_gpolygon = TRUE) + if (isTRUE(calc_centroids)) { + gpoly <- centroids( + gpoly, + append_gpolygon = TRUE + ) + } slot(gpoly, "spatVector") <- terra::wrap(slot(gpoly, "spatVector")) if (isTRUE(calc_centroids)) { slot(gpoly, "spatVectorCentroids") <- terra::wrap( - slot(gpoly, "spatVectorCentroids")) + slot(gpoly, "spatVectorCentroids") + ) } return(gpoly) } @@ -429,33 +449,34 @@ gpoly_from_dfr_smoothed_wrapped <- function( #' @title get10Xmatrix #' @name get10Xmatrix -#' @description This function creates an expression matrix from a 10X +#' @description This function creates an expression matrix from a 10X #' structured folder #' @param path_to_data path to the 10X folder -#' @param gene_column_index which column from the features or genes .tsv file +#' @param gene_column_index which column from the features or genes .tsv file #' to use for row ids #' @param remove_zero_rows removes rows with sum equal to zero -#' @param split_by_type split into multiple matrices based on 3rd column of +#' @param split_by_type split into multiple matrices based on 3rd column of #' features.tsv(.gz) #' @returns sparse expression matrix from 10X -#' @details A typical 10X folder is named raw_feature_bc_matrix or +#' @details A typical 10X folder is named raw_feature_bc_matrix or #' filtered_feature_bc_matrix and it has 3 files: #' \itemize{ #' \item{barcodes.tsv(.gz)} #' \item{features.tsv(.gz) or genes.tsv(.gz)} #' \item{matrix.mtx(.gz)} #' } -#' By default the first column of the features or genes .tsv file will be used, +#' By default the first column of the features or genes .tsv file will be used, #' however if multiple -#' annotations are provided (e.g. ensembl gene ids and gene symbols) the user +#' annotations are provided (e.g. ensembl gene ids and gene symbols) the user #' can select another column. #' @export -get10Xmatrix <- function(path_to_data, - gene_column_index = 1, - remove_zero_rows = TRUE, - split_by_type = TRUE) { +get10Xmatrix <- function( + path_to_data, + gene_column_index = 1, + remove_zero_rows = TRUE, + split_by_type = TRUE) { # data.table variables - total <- gene_symbol <- gene_id <- gene_id_num <- cell_id <- + total <- gene_symbol <- gene_id <- gene_id_num <- cell_id <- cell_id_num <- sort_gene_id_num <- NULL # data directory @@ -464,14 +485,16 @@ get10Xmatrix <- function(path_to_data, # get barcodes and create vector barcodes_file <- grep(files_10X, pattern = "barcodes", value = TRUE) barcodesDT <- data.table::fread( - input = paste0(path_to_data, "/", barcodes_file), header = FALSE) + input = paste0(path_to_data, "/", barcodes_file), header = FALSE + ) barcodes_vec <- barcodesDT$V1 names(barcodes_vec) <- seq_len(nrow(barcodesDT)) # get features and create vector features_file <- grep(files_10X, pattern = "features|genes", value = TRUE) featuresDT <- data.table::fread( - input = paste0(path_to_data, "/", features_file), header = FALSE) + input = paste0(path_to_data, "/", features_file), header = FALSE + ) g_name <- colnames(featuresDT)[gene_column_index] ## convert ensembl gene id to gene symbol ## @@ -479,8 +502,9 @@ get10Xmatrix <- function(path_to_data, featuresDT[, total := .N, by = get(g_name)] featuresDT[, gene_symbol := ifelse( - total > 1, paste0(get(g_name), "--", seq_len(.N)), - get(g_name)), by = get(g_name)] + total > 1, paste0(get(g_name), "--", seq_len(.N)), + get(g_name) + ), by = get(g_name)] features_vec <- featuresDT$gene_symbol names(features_vec) <- seq_len(nrow(featuresDT)) @@ -526,23 +550,24 @@ get10Xmatrix <- function(path_to_data, #' @title get10Xmatrix_h5 #' @name get10Xmatrix_h5 -#' @description This function creates an expression matrix from a 10X h5 file +#' @description This function creates an expression matrix from a 10X h5 file #' path #' @param path_to_data path to the 10X .h5 file -#' @param gene_ids use gene symbols (default) or ensembl ids for the gene +#' @param gene_ids use gene symbols (default) or ensembl ids for the gene #' expression matrix #' @inheritParams get10Xmatrix #' @returns (list of) sparse expression matrix from 10X -#' @details If the .h5 10x file has multiple classes of features -#' (e.g. expression vs QC probes) or modalities (e.g. RNA and protein), and +#' @details If the .h5 10x file has multiple classes of features +#' (e.g. expression vs QC probes) or modalities (e.g. RNA and protein), and #' \code{split_by_type} param is \code{TRUE}, multiple matrices will be returned #' @export -get10Xmatrix_h5 <- function(path_to_data, - gene_ids = c("symbols", "ensembl"), - remove_zero_rows = TRUE, - split_by_type = TRUE) { +get10Xmatrix_h5 <- function( + path_to_data, + gene_ids = c("symbols", "ensembl"), + remove_zero_rows = TRUE, + split_by_type = TRUE) { ## function inspired by and modified from the VISION package - ## see read_10x_h5_v3 in + ## see read_10x_h5_v3 in ## https://github.com/YosefLab/VISION/blob/master/R/Utilities.R # verify if optional package is installed @@ -597,7 +622,8 @@ get10Xmatrix_h5 <- function(path_to_data, features_dt[, nr_name := seq_len(.N), by = name] features_dt[, uniq_name := ifelse( - nr_name == 1, name, paste0(name, "_", (nr_name - 1)))] + nr_name == 1, name, paste0(name, "_", (nr_name - 1)) + )] # dimension names @@ -617,7 +643,8 @@ get10Xmatrix_h5 <- function(path_to_data, for (fclass in unique(feature_types)) { result_list[[fclass]] <- sparsemat[ - features_dt$feature_type == fclass, ] + features_dt$feature_type == fclass, + ] # change names to gene symbols if it's expression if (fclass == "Gene Expression" & gene_ids == "symbols") { @@ -662,11 +689,11 @@ get10Xmatrix_h5 <- function(path_to_data, #' @title readPolygonFilesVizgenHDF5 #' @name readPolygonFilesVizgenHDF5_old -#' @description Read and create polygons for all cells, or for only selected +#' @description Read and create polygons for all cells, or for only selected #' FOVs. #' @param boundaries_path path to the cell_boundaries folder #' @param fovs subset of fovs to use -#' @param custom_polygon_names a character vector to provide custom polygon +#' @param custom_polygon_names a character vector to provide custom polygon #' names (optional) #' @param polygon_feat_types a vector containing the polygon feature types #' @param flip_x_axis flip x axis of polygon coordinates (multiply by -1) @@ -679,21 +706,22 @@ get10Xmatrix_h5 <- function(path_to_data, #' @param verbose be verbose #' @seealso \code{\link{smoothGiottoPolygons}} #' @returns data.table -#' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission +#' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission #' issues. #' @export -readPolygonFilesVizgenHDF5_old <- function(boundaries_path, - fovs = NULL, - polygon_feat_types = 0:6, - custom_polygon_names = NULL, - flip_x_axis = FALSE, - flip_y_axis = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60, - set_neg_to_zero = FALSE, - H5Fopen_flags = "H5F_ACC_RDWR", - cores = NA, - verbose = TRUE) { +readPolygonFilesVizgenHDF5_old <- function( + boundaries_path, + fovs = NULL, + polygon_feat_types = 0:6, + custom_polygon_names = NULL, + flip_x_axis = FALSE, + flip_y_axis = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60, + set_neg_to_zero = FALSE, + H5Fopen_flags = "H5F_ACC_RDWR", + cores = NA, + verbose = TRUE) { # necessary pkgs package_check(pkg_name = "rhdf5", repository = "Bioc") @@ -709,12 +737,12 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, # provide your own custom names if (!is.null(custom_polygon_names)) { if (!is.character(custom_polygon_names)) { - stop(wrap_txt("If custom_polygon_names are provided, it needs to + stop(wrap_txt("If custom_polygon_names are provided, it needs to be a character vector")) } if (length(custom_polygon_names) != length(poly_feat_names)) { - stop(wrap_txt("length of custom names need to be same as + stop(wrap_txt("length of custom names need to be same as polygon_feat_types")) } else { poly_feat_names <- custom_polygon_names @@ -729,14 +757,17 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, selected_hdf5s <- paste0("feature_data_", fovs, ".hdf5") selected_hdf5s_concatenated <- paste0(selected_hdf5s, collapse = "|") hdf5_boundary_selected_list <- grep( - selected_hdf5s_concatenated, x = hdf5_boundary_list, value = TRUE) + selected_hdf5s_concatenated, + x = hdf5_boundary_list, value = TRUE + ) } else { hdf5_boundary_selected_list <- hdf5_boundary_list } - if (isTRUE(verbose)) - wrap_msg("finished listing .hdf5 files start extracting .hdf5 + if (isTRUE(verbose)) { + wrap_msg("finished listing .hdf5 files start extracting .hdf5 information") + } # open selected polygon files hdf5_list_length <- length(hdf5_boundary_selected_list) @@ -751,18 +782,21 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, function(bound_i) { # get feature data read_file <- rhdf5::H5Fopen( - hdf5_boundary_selected_list[[bound_i]][[1]], - flags = H5Fopen_flags) + hdf5_boundary_selected_list[[bound_i]][[1]], + flags = H5Fopen_flags + ) fov_info <- read_file$featuredata # update progress - if (verbose) + if (verbose) { print(basename(hdf5_boundary_selected_list[[bound_i]])) + } elapsed <- (proc.time() - init)[[3L]] step_time <- elapsed / bound_i est <- (hdf5_list_length * step_time) - elapsed pb(message = c( - "// E:", time_format(elapsed), "| R:", time_format(est))) + "// E:", time_format(elapsed), "| R:", time_format(est) + )) rhdf5::H5Fclose(read_file) return(fov_info) } @@ -776,57 +810,67 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, # extract values for each z index and cell from read_list result_list <- lapply_flex( - seq_along(poly_feat_indexes), cores = cores, function(z_i) { - lapply_flex(seq_along(read_list), cores = cores, function(cell_i) { - singlearray <- read_list[[cell_i]][[ - poly_feat_indexes[z_i]]]$p_0$coordinates - cell_name <- cell_names[[cell_i]] - if (!is.null(singlearray)) { - singlearraydt <- data.table::as.data.table(t_flex( - as.matrix(singlearray[, , 1]))) - data.table::setnames( - singlearraydt, old = c("V1", "V2"), new = c("x", "y")) - if (flip_x_axis) singlearraydt[, x := -1 * x] - if (flip_y_axis) singlearraydt[, y := -1 * y] - - singlearraydt[, cell_id := cell_name] - } - }) - }) + seq_along(poly_feat_indexes), + cores = cores, function(z_i) { + lapply_flex(seq_along(read_list), cores = cores, function(cell_i) { + singlearray <- read_list[[cell_i]][[ + poly_feat_indexes[z_i] + ]]$p_0$coordinates + cell_name <- cell_names[[cell_i]] + if (!is.null(singlearray)) { + singlearraydt <- data.table::as.data.table(t_flex( + as.matrix(singlearray[, , 1]) + )) + data.table::setnames( + singlearraydt, + old = c("V1", "V2"), new = c("x", "y") + ) + if (flip_x_axis) singlearraydt[, x := -1 * x] + if (flip_y_axis) singlearraydt[, y := -1 * y] + + singlearraydt[, cell_id := cell_name] + } + }) + } + ) result_list_rbind <- lapply_flex( - seq_along(result_list), cores = cores, function(z_i) { - data.table::rbindlist(result_list[[z_i]]) - }) + seq_along(result_list), + cores = cores, function(z_i) { + data.table::rbindlist(result_list[[z_i]]) + } + ) - if (isTRUE(verbose)) + if (isTRUE(verbose)) { wrap_msg("finished extracting .hdf5 files start creating polygons") + } # create Giotto polygons and add them to gobject progressr::with_progress({ pb <- progressr::progressor(along = result_list_rbind) - smooth_cell_polygons_list <- lapply_flex(seq_along(result_list_rbind), - cores = cores, function(i) { - dfr_subset <- result_list_rbind[[i]][, .(x, y, cell_id)] - cell_polygons <- createGiottoPolygonsFromDfr( - segmdfr = dfr_subset, - name = poly_feat_names[i], - verbose = verbose - ) + smooth_cell_polygons_list <- lapply_flex(seq_along(result_list_rbind), + cores = cores, function(i) { + dfr_subset <- result_list_rbind[[i]][, .(x, y, cell_id)] + cell_polygons <- createGiottoPolygonsFromDfr( + segmdfr = dfr_subset, + name = poly_feat_names[i], + verbose = verbose + ) - pb(message = poly_feat_names[i]) + pb(message = poly_feat_names[i]) - if (smooth_polygons == TRUE) { - return(smoothGiottoPolygons(cell_polygons, - vertices = smooth_vertices, - set_neg_to_zero = set_neg_to_zero - )) - } else { - return(cell_polygons) + if (smooth_polygons == TRUE) { + return(smoothGiottoPolygons(cell_polygons, + vertices = smooth_vertices, + set_neg_to_zero = set_neg_to_zero + )) + } else { + return(cell_polygons) + } } - }) + ) }) @@ -842,14 +886,14 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, #' @title readPolygonFilesVizgenHDF5 #' @name readPolygonFilesVizgenHDF5 #' @description Read polygon info for all cells or for only selected FOVs from -#' Vizgen HDF5 files. Data is returned as a list of giottoPolygons or +#' Vizgen HDF5 files. Data is returned as a list of giottoPolygons or #' data.tables of the requested z indices. #' @param boundaries_path path to the cell_boundaries folder #' @param fovs subset of fovs to use #' @param z_indices z indices of polygons to use #' @param segm_to_use segmentation results to use (usually = 1. Depends on if #' alternative segmentations were generated) -#' @param custom_polygon_names a character vector to provide custom polygon +#' @param custom_polygon_names a character vector to provide custom polygon #' names (optional) #' @param polygon_feat_types deprecated. Use \code{z_indices} #' @param flip_x_axis flip x axis of polygon coordinates (multiply by -1) @@ -860,36 +904,37 @@ readPolygonFilesVizgenHDF5_old <- function(boundaries_path, #' @param calc_centroids calculate centroids (default = FALSE) #' @param H5Fopen_flags see \code{\link[rhdf5]{H5Fopen}} for more details #' @param cores cores to use -#' @param create_gpoly_parallel (default = TRUE) Whether to run gpoly creation +#' @param create_gpoly_parallel (default = TRUE) Whether to run gpoly creation #' in parallel #' @param create_gpoly_bin (Optional, default = FALSE) Parallelization option. -#' Accepts integer values as an binning size when generating giottoPolygon +#' Accepts integer values as an binning size when generating giottoPolygon #' objects #' @param verbose be verbose #' @param output whether to return as list of giottoPolygon or data.table #' @seealso \code{\link{smoothGiottoPolygons}} #' @returns list of giottoPolygon or data.table -#' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission +#' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission #' issues. #' @export -readPolygonFilesVizgenHDF5 <- function(boundaries_path, - fovs = NULL, - z_indices = 1L:7L, - segm_to_use = 1L, - custom_polygon_names = NULL, - flip_x_axis = FALSE, - flip_y_axis = TRUE, - calc_centroids = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60L, - set_neg_to_zero = FALSE, - H5Fopen_flags = "H5F_ACC_RDWR", - cores = determine_cores(), - create_gpoly_parallel = TRUE, - create_gpoly_bin = FALSE, - verbose = TRUE, - output = c("giottoPolygon", "data.table"), - polygon_feat_types = NULL) { +readPolygonFilesVizgenHDF5 <- function( + boundaries_path, + fovs = NULL, + z_indices = 1L:7L, + segm_to_use = 1L, + custom_polygon_names = NULL, + flip_x_axis = FALSE, + flip_y_axis = TRUE, + calc_centroids = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60L, + set_neg_to_zero = FALSE, + H5Fopen_flags = "H5F_ACC_RDWR", + cores = determine_cores(), + create_gpoly_parallel = TRUE, + create_gpoly_bin = FALSE, + verbose = TRUE, + output = c("giottoPolygon", "data.table"), + polygon_feat_types = NULL) { # necessary pkgs package_check(pkg_name = "rhdf5", repository = "Bioc") @@ -909,13 +954,14 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, # provide your own custom names if (!is.null(custom_polygon_names)) { if (!is.character(custom_polygon_names)) { - stop(wrap_txt("If custom_polygon_names are provided, it needs to + stop(wrap_txt("If custom_polygon_names are provided, it needs to be a character vector")) } if (length(custom_polygon_names) != length(z_indices)) { stop(wrap_txt( - "length of custom names need to be same as z_indices")) + "length of custom names need to be same as z_indices" + )) } } @@ -927,14 +973,17 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, selected_hdf5s <- paste0("feature_data_", fovs, ".hdf5") selected_hdf5s_concatenated <- paste0(selected_hdf5s, collapse = "|") hdf5_boundary_selected_list <- grep( - selected_hdf5s_concatenated, x = hdf5_boundary_list, value = TRUE) + selected_hdf5s_concatenated, + x = hdf5_boundary_list, value = TRUE + ) } else { hdf5_boundary_selected_list <- hdf5_boundary_list } - if (isTRUE(verbose)) - message("finished listing .hdf5 files start extracting .hdf5 + if (isTRUE(verbose)) { + message("finished listing .hdf5 files start extracting .hdf5 information") + } # open selected polygon files @@ -953,8 +1002,9 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, ) # update progress - if (verbose) + if (verbose) { print(basename(hdf5_boundary_selected_list[[bound_i]])) + } if (bound_i %% 5 == 0) { pb() } @@ -1012,15 +1062,16 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, #' @keywords internal #' @noRd -.create_giotto_polygons_vizgen <- function(z_read_DT, - poly_names = names(z_read_DT), - set_neg_to_zero = FALSE, - calc_centroids = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60L, - create_gpoly_parallel = TRUE, - create_gpoly_bin = FALSE, - verbose = TRUE) { +.create_giotto_polygons_vizgen <- function( + z_read_DT, + poly_names = names(z_read_DT), + set_neg_to_zero = FALSE, + calc_centroids = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60L, + create_gpoly_parallel = TRUE, + create_gpoly_bin = FALSE, + verbose = TRUE) { checkmate::assert_list(z_read_DT) checkmate::assert_numeric(smooth_vertices) @@ -1035,34 +1086,40 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, pb <- progressr::progressor(along = z_read_DT) smooth_cell_polygons_list <- lapply( seq_along(z_read_DT), function(i) { - dfr_subset <- z_read_DT[[i]][, .(x, y, cell_id)] - data.table::setnames( - dfr_subset, old = "cell_id", new = "poly_ID") - cell_polygons <- createGiottoPolygonsFromDfr( - segmdfr = dfr_subset, - name = poly_names[i], - calc_centroids = FALSE, - skip_eval_dfr = TRUE, - copy_dt = FALSE, - verbose = verbose - ) - if (isTRUE(smooth_polygons)) { - cell_polygons <- smoothGiottoPolygons( - gpolygon = cell_polygons, - vertices = smooth_vertices, - k = 3L, - set_neg_to_zero = set_neg_to_zero + dfr_subset <- z_read_DT[[i]][, .(x, y, cell_id)] + data.table::setnames( + dfr_subset, + old = "cell_id", new = "poly_ID" ) + cell_polygons <- createGiottoPolygonsFromDfr( + segmdfr = dfr_subset, + name = poly_names[i], + calc_centroids = FALSE, + skip_eval_dfr = TRUE, + copy_dt = FALSE, + verbose = verbose + ) + if (isTRUE(smooth_polygons)) { + cell_polygons <- smoothGiottoPolygons( + gpolygon = cell_polygons, + vertices = smooth_vertices, + k = 3L, + set_neg_to_zero = set_neg_to_zero + ) + } + if (isTRUE(calc_centroids)) { + # NOTE: won't recalculate if centroids are already attached + cell_polygons <- centroids( + cell_polygons, + append_gpolygon = TRUE + ) + } + pb(message = c( + poly_names[i], " (", i, "/", length(z_read_DT), ")" + )) + return(cell_polygons) } - if (isTRUE(calc_centroids)) { - # NOTE: won't recalculate if centroids are already attached - cell_polygons <- centroids( - cell_polygons, append_gpolygon = TRUE) - } - pb(message = c( - poly_names[i], " (", i, "/", length(z_read_DT), ")")) - return(cell_polygons) - }) + ) }) return(smooth_cell_polygons_list) } @@ -1079,7 +1136,9 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, function(i) { dfr_subset <- z_read_DT[[i]][, .(x, y, cell_id)] data.table::setnames( - dfr_subset, old = "cell_id", new = "poly_ID") + dfr_subset, + old = "cell_id", new = "poly_ID" + ) cell_polygons <- gpoly_from_dfr_smoothed_wrapped( segmdfr = dfr_subset, name = poly_names[i], @@ -1093,7 +1152,8 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, ) pb(message = c( - poly_names[i], " (", i, "/", length(z_read_DT), ")")) + poly_names[i], " (", i, "/", length(z_read_DT), ")" + )) return(cell_polygons) } ) @@ -1102,13 +1162,15 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, # unwrap results smooth_cell_polygons_list <- lapply( smooth_cell_polygons_list, function(x) { - slot(x, "spatVector") <- terra::vect(slot(x, "spatVector")) - if (isTRUE(calc_centroids)) { - slot(x, "spatVectorCentroids") <- terra::vect( - slot(x, "spatVectorCentroids")) + slot(x, "spatVector") <- terra::vect(slot(x, "spatVector")) + if (isTRUE(calc_centroids)) { + slot(x, "spatVectorCentroids") <- terra::vect( + slot(x, "spatVectorCentroids") + ) + } + return(x) } - return(x) - }) + ) } else { # with binning @@ -1127,7 +1189,9 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, ) ) DT <- data.table::merge.data.table( - DT, bin_pid, by = "poly_ID", all.x = TRUE) + DT, bin_pid, + by = "poly_ID", all.x = TRUE + ) DT <- split(DT, DT$bin_ID) }, bin = create_gpoly_bin) @@ -1155,8 +1219,9 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, ) pb(message = c( - poly_names[i], " (", i, "/", - length(dfr_subset), ")")) + poly_names[i], " (", i, "/", + length(dfr_subset), ")" + )) return(cell_polygons) } ) @@ -1167,18 +1232,20 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, # unwrap results smooth_cell_polygons_list <- lapply( seq_along(smooth_cell_polygons_list), function(i) { - p_list <- lapply(smooth_cell_polygons_list[[i]], function(x) { - slot(x, "spatVector") <- terra::vect(slot(x, "spatVector")) - if (isTRUE(calc_centroids)) { - slot(x, "spatVectorCentroids") <- terra::vect( - slot(x, "spatVectorCentroids")) - } - return(x) - }) - # rbind results - names(p_list) <- NULL - return(do.call("rbind", p_list)) - }) + p_list <- lapply(smooth_cell_polygons_list[[i]], function(x) { + slot(x, "spatVector") <- terra::vect(slot(x, "spatVector")) + if (isTRUE(calc_centroids)) { + slot(x, "spatVectorCentroids") <- terra::vect( + slot(x, "spatVectorCentroids") + ) + } + return(x) + }) + # rbind results + names(p_list) <- NULL + return(do.call("rbind", p_list)) + } + ) } @@ -1194,20 +1261,19 @@ readPolygonFilesVizgenHDF5 <- function(boundaries_path, #' @title Read MERSCOPE polygons from parquet #' @name readPolygonVizgenParquet #' @description -#' Read Vizgen exported cell boundary parquet files as giottoPolyons. The z +#' Read Vizgen exported cell boundary parquet files as giottoPolyons. The z #' level can be selected. #' @param file parquet file to load -#' @param z_index either 'all' or a numeric vector of z_indices to get polygons +#' @param z_index either 'all' or a numeric vector of z_indices to get polygons #' for #' @param calc_centroids calculate centroids for the polygons (default = TRUE) #' @param verbose be verbose #' @returns giottoPolygons #' @export -readPolygonVizgenParquet <- function( - file, - z_index = "all", - calc_centroids = TRUE, - verbose = TRUE) { +readPolygonVizgenParquet <- function(file, + z_index = "all", + calc_centroids = TRUE, + verbose = TRUE) { # package checks package_check("arrow") package_check("sf") @@ -1228,7 +1294,7 @@ readPolygonVizgenParquet <- function( avail_z_idx <- arrow::open_dataset(file) %>% dplyr::distinct(ZIndex) %>% dplyr::pull() %>% - # dplyr::pull(as_vector = TRUE) %>% # switch to this in future and add + # dplyr::pull(as_vector = TRUE) %>% # switch to this in future and add # arrow version requirement sort() @@ -1237,13 +1303,14 @@ readPolygonVizgenParquet <- function( } else if (is.numeric(z_index)) { z_index <- as.integer(z_index) if (!all(z_index %in% avail_z_idx)) { - stop(paste("Not all z indices found in cell boundaries.\n + stop(paste("Not all z indices found in cell boundaries.\n Existing indices are:", paste(avail_z_idx, collapse = " "))) } z_index } - if (isTRUE(verbose)) + if (isTRUE(verbose)) { message("loading poly z_indices: ", paste(get_z_idx, collapse = " ")) + } # 2. collect by z index filter and convert WKB to multipolygon @@ -1263,7 +1330,8 @@ readPolygonVizgenParquet <- function( future.seed = TRUE ) names(multipolygons) <- lapply( - multipolygons, function(x) paste0("z", unique(x$ZIndex))) + multipolygons, function(x) paste0("z", unique(x$ZIndex)) + ) # 3. convert to giottoPolygons and append meta @@ -1315,17 +1383,18 @@ readPolygonVizgenParquet <- function( #' @returns giotto object or cell polygons list #' @seealso \code{\link{smoothGiottoPolygons}} #' @export -readPolygonFilesVizgen <- function(gobject, - boundaries_path, - fovs = NULL, - polygon_feat_types = 0:6, - flip_x_axis = FALSE, - flip_y_axis = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60, - set_neg_to_zero = FALSE, - return_gobject = TRUE, - verbose = TRUE) { +readPolygonFilesVizgen <- function( + gobject, + boundaries_path, + fovs = NULL, + polygon_feat_types = 0:6, + flip_x_axis = FALSE, + flip_y_axis = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60, + set_neg_to_zero = FALSE, + return_gobject = TRUE, + verbose = TRUE) { # define names poly_feat_names <- paste0("z", polygon_feat_types) poly_feat_indexes <- paste0("zIndex_", polygon_feat_types) @@ -1367,18 +1436,20 @@ readPolygonFilesVizgen <- function(gobject, -#' @describeIn readPolygonFilesVizgen (internal) Optimized .hdf5 reading for +#' @describeIn readPolygonFilesVizgen (internal) Optimized .hdf5 reading for #' vizgen merscope output. Returns a data.table of xyz coords and cell_id #' @keywords internal -.h5_read_vizgen <- function(h5File, - z_indices = 1L:7L, - segm_to_use = "p_0", - H5Fopen_flags = "H5F_ACC_RDWR") { +.h5_read_vizgen <- function( + h5File, + z_indices = 1L:7L, + segm_to_use = "p_0", + H5Fopen_flags = "H5F_ACC_RDWR") { # data.table vars group <- name <- cell <- z_name <- otype <- d_name <- cell_id <- NULL h5_ls <- data.table::setDT( - rhdf5::h5ls(h5File, recursive = 5, datasetinfo = FALSE)) + rhdf5::h5ls(h5File, recursive = 5, datasetinfo = FALSE) + ) cell_names <- as.character(h5_ls[group == "/featuredata", name]) z_names <- h5_ls[grep("zIndex", name), unique(name)] @@ -1387,10 +1458,12 @@ readPolygonFilesVizgen <- function(gobject, dset_names <- dset_names[grep(segm_to_use, group), ] # tag cellnames dset_names[, cell := gsub( - pattern = "/featuredata/|/zIndex.*$", replacement = "", x = group)] + pattern = "/featuredata/|/zIndex.*$", replacement = "", x = group + )] # tag z_names dset_names[, z_name := gsub( - pattern = "^.*/(zIndex_\\d*).*$", replacement = "\\1", x = group)] + pattern = "^.*/(zIndex_\\d*).*$", replacement = "\\1", x = group + )] # subset by z_indices dset_names <- dset_names[z_name %in% z_names[z_indices], ] # create full file location @@ -1403,7 +1476,9 @@ readPolygonFilesVizgen <- function(gobject, zvals <- .h5_read_bare( file = fid, name = paste0( - c("/featuredata", cell_name, "z_coordinates"), collapse = "/"), + c("/featuredata", cell_name, "z_coordinates"), + collapse = "/" + ), dapl = dapl ) names(zvals) <- z_names @@ -1413,13 +1488,16 @@ readPolygonFilesVizgen <- function(gobject, cell_data <- lapply( seq(nrow(cell_dsets)), function(fid, dapl, zvals, d_i) { - res <- .h5_read_bare( - file = fid, name = cell_dsets[d_i, d_name], dapl = dapl) - res <- t_flex(res[, , 1L]) - res <- cbind(res, zvals[cell_dsets[d_i, z_name]]) - colnames(res) <- c("x", "y", "z") - res - }, fid = fid, dapl = dapl, zvals = zvals) + res <- .h5_read_bare( + file = fid, name = cell_dsets[d_i, d_name], dapl = dapl + ) + res <- t_flex(res[, , 1L]) + res <- cbind(res, zvals[cell_dsets[d_i, z_name]]) + colnames(res) <- c("x", "y", "z") + res + }, + fid = fid, dapl = dapl, zvals = zvals + ) cell_data <- data.table::as.data.table(do.call("rbind", cell_data)) cell_data[, cell_id := cell_name] cell_data @@ -1446,7 +1524,7 @@ readPolygonFilesVizgen <- function(gobject, PACKAGE = "rhdf5" ) invisible(.Call("_H5Dclose", did, PACKAGE = "rhdf5")) - + res } @@ -1466,8 +1544,9 @@ readPolygonFilesVizgen <- function(gobject, #' @param bin_size bin size to select from .gef file #' @returns transcript with coordinates #' @export -getGEFtxCoords <- function(gef_file, - bin_size = "bin100") { +getGEFtxCoords <- function( + gef_file, + bin_size = "bin100") { # data.table vars genes <- NULL @@ -1489,9 +1568,9 @@ getGEFtxCoords <- function(gef_file, ) setDT(geneDT) - # Step 3: Combine read expression and gene data by repeating count + # Step 3: Combine read expression and gene data by repeating count # (match offset index) - # See STOMICS file format manual for more information about exprDT and + # See STOMICS file format manual for more information about exprDT and # geneDT exprDT[, genes := rep(x = geneDT$gene, geneDT$count)] diff --git a/R/giotto_viewer.R b/R/giotto_viewer.R index ae903aac4..c21f66f75 100644 --- a/R/giotto_viewer.R +++ b/R/giotto_viewer.R @@ -1,19 +1,21 @@ #' @title write_giotto_viewer_annotation -#' @description write out factor-like annotation data from a giotto object for +#' @description write out factor-like annotation data from a giotto object for #' the Viewer #' @param annotation annotation from the data.table from giotto object #' @param annot_name name of the annotation #' @param output_directory directory where to save the files #' @returns write a .txt and .annot file for the selection annotation #' @keywords internal -write_giotto_viewer_annotation <- function(annotation, - annot_name = "test", - output_directory = getwd()) { +write_giotto_viewer_annotation <- function( + annotation, + annot_name = "test", + output_directory = getwd()) { if (is.numeric(annotation) == TRUE) { # annotation information and mapping sorted_unique_numbers <- sort(unique(annotation)) annot_map <- data.table::data.table( - num = sorted_unique_numbers, fac = sorted_unique_numbers) + num = sorted_unique_numbers, fac = sorted_unique_numbers + ) annot_information <- annotation } else { # factors to numerics @@ -54,9 +56,10 @@ write_giotto_viewer_annotation <- function(annotation, #' @param output_directory directory where to save the files #' @returns write a .txt and .annot file for the selection annotation #' @keywords internal -write_giotto_viewer_numeric_annotation <- function(annotation, - annot_name = "test", - output_directory = getwd()) { +write_giotto_viewer_numeric_annotation <- function( + annotation, + annot_name = "test", + output_directory = getwd()) { # write to output directory annot_inf_map <- paste0(annot_name, "_num_annot_information", ".txt") write.table(annotation, @@ -79,14 +82,16 @@ write_giotto_viewer_numeric_annotation <- function(annotation, #' @param output_directory directory where to save the files #' @returns write a .txt and .annot file for the selection annotation #' @keywords internal -write_giotto_viewer_dim_reduction <- function(dim_reduction_cell, - dim_red = NULL, - dim_red_name = NULL, - dim_red_rounding = NULL, - dim_red_rescale = c(-20, 20), - output_directory = getwd()) { +write_giotto_viewer_dim_reduction <- function( + dim_reduction_cell, + dim_red = NULL, + dim_red_name = NULL, + dim_red_rounding = NULL, + dim_red_rescale = c(-20, 20), + output_directory = getwd()) { dim_red_coord <- dim_reduction_cell[[dim_red]][[ - dim_red_name]]$coordinates[, seq_len(2)] + dim_red_name + ]]$coordinates[, seq_len(2)] if (is.null(dim_red_coord)) { cat("\n combination of ", dim_red, " and ", dim_red_name, " does not exist \n") @@ -99,7 +104,8 @@ write_giotto_viewer_dim_reduction <- function(dim_reduction_cell, # rescale dimension reduction coordinates if (!is.null(dim_red_rescale) & length(dim_red_rescale) == 2) { dim_red_coord <- scales::rescale( - x = dim_red_coord, to = dim_red_rescale) + x = dim_red_coord, to = dim_red_rescale + ) } dim_red_name <- paste0(dim_red, "_", dim_red_name, "_dim_coord.txt") @@ -137,33 +143,34 @@ write_giotto_viewer_dim_reduction <- function(dim_reduction_cell, #' include the provided spatial enrichment name (default PAGE or rank) #' and add the gene signature names (.e.g cell types) to the numeric annotations parameter. #' @export -exportGiottoViewer <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - output_directory = NULL, - spat_enr_names = NULL, - factor_annotations = NULL, - numeric_annotations = NULL, - dim_reductions, - dim_reduction_names, - expression_values = c("scaled", "normalized", "custom"), - dim_red_rounding = NULL, - dim_red_rescale = c(-20, 20), - expression_rounding = 2, - overwrite_dir = TRUE, - verbose = TRUE) { +exportGiottoViewer <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + output_directory = NULL, + spat_enr_names = NULL, + factor_annotations = NULL, + numeric_annotations = NULL, + dim_reductions, + dim_reduction_names, + expression_values = c("scaled", "normalized", "custom"), + dim_red_rounding = NULL, + dim_red_rescale = c(-20, 20), + expression_rounding = 2, + overwrite_dir = TRUE, + verbose = TRUE) { ## output directory ## if (file.exists(output_directory)) { if (overwrite_dir == TRUE) { - message("output directory already exists, files will be + message("output directory already exists, files will be overwritten") } else { - stop("output directory already exists, change overwrite_dir = TRUE + stop("output directory already exists, change overwrite_dir = TRUE to overwrite files \n") } } else if (is.null(output_directory)) { - message("no output directory is provided, defaults to current + message("no output directory is provided, defaults to current directory: ", getwd(), "\n") output_directory <- getwd() } else { @@ -265,10 +272,13 @@ exportGiottoViewer <- function(gobject, } annot_list <- data.table( - txtfiles = unlist(text_file_names), names = unlist(annot_names)) + txtfiles = unlist(text_file_names), names = unlist(annot_names) + ) write.table(annot_list, - file = paste0(output_directory, "/", - "annotation_list", "_", feat, ".txt"), + file = paste0( + output_directory, "/", + "annotation_list", "_", feat, ".txt" + ), quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " " ) } @@ -302,10 +312,13 @@ exportGiottoViewer <- function(gobject, } annot_list <- data.table( - txtfiles = unlist(text_file_names), names = unlist(annot_names)) + txtfiles = unlist(text_file_names), names = unlist(annot_names) + ) write.table(annot_list, - file = paste0(output_directory, "/", - "annotation_num_list", "_", feat, ".txt"), + file = paste0( + output_directory, "/", + "annotation_num_list", "_", feat, ".txt" + ), quote = FALSE, row.names = FALSE, col.names = FALSE, sep = " " ) } @@ -364,38 +377,41 @@ exportGiottoViewer <- function(gobject, } output_directory_norm <- normalizePath(output_directory) fileWrite_directory <- paste0( - output_directory_norm, "/", "giotto_expression.csv") + output_directory_norm, "/", "giotto_expression.csv" + ) data.table::fwrite( - data.table::as.data.table(expr_values, keep.rownames = "gene"), - file = fileWrite_directory, sep = ",", - quote = FALSE, row.names = FALSE, col.names = TRUE) + data.table::as.data.table(expr_values, keep.rownames = "gene"), + file = fileWrite_directory, sep = ",", + quote = FALSE, row.names = FALSE, col.names = TRUE + ) - if (verbose == TRUE) + if (verbose == TRUE) { cat("finished writing giotto viewer files to", output_directory) + } if (verbose == TRUE) { message("=========================================================") - message("Next steps. Please manually run the following in a SHELL + message("Next steps. Please manually run the following in a SHELL terminal:") message("=========================================================") message(paste("cd ", output_directory)) - message("giotto_setup_image --require-stitch=n --image=n - --image-multi-channel=n --segmentation=n --multi-fov=n + message("giotto_setup_image --require-stitch=n --image=n + --image-multi-channel=n --segmentation=n --multi-fov=n --output-json=step1.json") message("smfish_step1_setup -c step1.json") - message("giotto_setup_viewer --num-panel=2 - --input-preprocess-json=step1.json - --panel-1=PanelPhysicalSimple --panel-2=PanelTsne - --output-json=step2.json + message("giotto_setup_viewer --num-panel=2 + --input-preprocess-json=step1.json + --panel-1=PanelPhysicalSimple --panel-2=PanelTsne + --output-json=step2.json --input-annotation-list=annotation_list.txt") - message("smfish_read_config -c step2.json -o test.dec6.js + message("smfish_read_config -c step2.json -o test.dec6.js -p test.dec6.html -q test.dec6.css") message("giotto_copy_js_css --output .") message("python3 -m http.server") message("=========================================================") - message("Finally, open your browser, navigate to - http://localhost:8000/. Then click on the file + message("Finally, open your browser, navigate to + http://localhost:8000/. Then click on the file test.dec6.html to see the viewer.") message("For more information, http://spatialgiotto.rc.fas.harvard.edu/giotto.viewer.setup3.html", "\n") } diff --git a/R/gstop.R b/R/gstop.R index be2a805e1..d83ad98d8 100644 --- a/R/gstop.R +++ b/R/gstop.R @@ -2,14 +2,15 @@ # .n should be increased when called from a nested location if capturing the # original call is desired. # .n should be increased to 2L when within a generic method -.gstop <- function(..., - sep = " ", - strWidth = 100, - errWidth = FALSE, - .prefix = " ", - .initial = "", - .n = 1L, - .call = FALSE) { +.gstop <- function( + ..., + sep = " ", + strWidth = 100, + errWidth = FALSE, + .prefix = " ", + .initial = "", + .n = 1L, + .call = FALSE) { GiottoUtils::gstop( ..., sep = sep, diff --git a/R/image_registration.R b/R/image_registration.R index 677e66246..e671ae779 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -5,7 +5,7 @@ #' @name .trakem2_rigid_transforms #' @title Read trakem2 rigid transforms -#' @description Extract rigid registration transformation values from FIJI +#' @description Extract rigid registration transformation values from FIJI #' TrakEM2 xml file. Generated through register_virtual_stack_slices. #' @param inputstring string read in from TrakeEM2 xml file #' @returns rigid registration transformation values @@ -56,18 +56,20 @@ out <- c(out, 0, 0) out <- data.table::data.table(t(matrix(out))) - colnames(out) <- c("Theta", "Xtransform", "Ytransform", "itx", "ity", - "XFinalTransform", "YFinalTransform") + colnames(out) <- c( + "Theta", "Xtransform", "Ytransform", "itx", "ity", + "XFinalTransform", "YFinalTransform" + ) - # itx and ity are additional values in the trakem2 xml files that must be - # added to Xtransform and Ytransform in order to get the final + # itx and ity are additional values in the trakem2 xml files that must be + # added to Xtransform and Ytransform in order to get the final # transformation values. - # only relevant for sampleset with more than 1 slice away from the + # only relevant for sampleset with more than 1 slice away from the # reference image out$XFinalTransform <- out$Xtransform + out$itx out$YFinalTransform <- out$Ytransform + out$ity - # Multiply theta by -1 due to differences in R and image plotting + # Multiply theta by -1 due to differences in R and image plotting # coordinates out$Theta <- -out$Theta @@ -78,7 +80,7 @@ #' @title Rigid transform spatial locations #' @name .rigid_transform_spatial_locations -#' @description Performs appropriate transforms to align spatial locations +#' @description Performs appropriate transforms to align spatial locations #' with registered images. #' @param spatlocs input spatial locations #' @param transform_values transformation values to use @@ -86,16 +88,18 @@ #' @returns spatlocs #' @keywords internal # Rotation is performed first, followed by XY transform. -.rigid_transform_spatial_locations <- function(spatlocs, - transform_values, - method) { +.rigid_transform_spatial_locations <- function( + spatlocs, + transform_values, + method) { if (method == "fiji") { spatlocsXY <- spatlocs[, c("sdimx", "sdimy")] # These functions must be performed in positive y values spatlocsXY$sdimy <- -1 * spatlocsXY$sdimy spatlocsXY <- spin(spatlocsXY, GiottoUtils::degrees( - transform_values$Theta)) %>% + transform_values$Theta + )) %>% spatShift( dx = transform_values$XFinalTransform, dy = transform_values$YFinalTransform @@ -118,7 +122,7 @@ return(spatlocs) } else { - stop('Image registration method must be provided. Only "fiji" and + stop('Image registration method must be provided. Only "fiji" and "rvision" methods currently supported.') } } @@ -135,34 +139,37 @@ #' @returns list #' @keywords internal # Automatically account for changes in image size due to alignment -.reg_img_minmax_finder <- function(gobject_list, - image_unreg = NULL, - largeImage_unreg = NULL, # TODO Currently unused - scale_factor, - transform_values, - method) { +.reg_img_minmax_finder <- function( + gobject_list, + image_unreg = NULL, + largeImage_unreg = NULL, # TODO Currently unused + scale_factor, + transform_values, + method) { # Find image spatial info from original image if possible - # Check to make sure that image_unreg finds an existing image in each + # Check to make sure that image_unreg finds an existing image in each # gobject to be registered imgPresent <- function(gobject, image, img_type) { image %in% list_images_names(gobject = gobject, img_type = img_type) } if (!is.null(image_unreg)) img_type <- "image" # TODO needs reworking - if (!is.null(largeImage_unreg)) img_type <- "largeImage" # TODO needs - # reworking - currently only pays attention to 'image' and not + if (!is.null(largeImage_unreg)) img_type <- "largeImage" # TODO needs + # reworking - currently only pays attention to 'image' and not # 'largeImage' types if (all(as.logical(lapply( - X = gobject_list, FUN = imgPresent, image = image_unreg, - img_type = img_type)))) { + X = gobject_list, FUN = imgPresent, image = image_unreg, + img_type = img_type + )))) { giottoImage_list <- lapply( - X = gobject_list, FUN = get_giottoImage, name = image_unreg, - image_type = img_type) + X = gobject_list, FUN = get_giottoImage, name = image_unreg, + image_type = img_type + ) image_corners <- lapply(giottoImage_list, .get_img_corners) # Infer image corners of registered images PRIOR TO REGISTRATION - # scale unreg_image corners to registered image (use + # scale unreg_image corners to registered image (use # reg_scalefactor/unreg_scalefactor as scale factor) image_corners <- lapply_flex( seq_along(gobject_list), @@ -175,7 +182,7 @@ } ) - # register corners based on transform values (only possible at + # register corners based on transform values (only possible at # reg_image scaling) image_corners_reg <- lapply( seq_along(image_corners), @@ -193,7 +200,9 @@ seq_along(image_corners_reg), function(x) { rescale( - image_corners_reg[[x]], (1 / scale_factor[[x]]), x0 = 0, y0 = 0) + image_corners_reg[[x]], (1 / scale_factor[[x]]), + x0 = 0, y0 = 0 + ) } ) @@ -209,7 +218,7 @@ # return the minmax values - already scaled to spatlocs return(minmaxRegVals) } else { - warning("Original images must be supplied for registered images to be + warning("Original images must be supplied for registered images to be aligned.") } } @@ -217,7 +226,7 @@ #' @title Get image corners #' @name .get_img_corners -#' @description finds four corner spatial coords of giottoImages or +#' @description finds four corner spatial coords of giottoImages or #' magick-images #' @param img_object giottoImage or magick-image to use #' @returns data.frame @@ -253,46 +262,47 @@ #' @title registerGiottoObjectList #' @name registerGiottoObjectList -#' @description Wrapper function for registerGiottoObjectListFiji and +#' @description Wrapper function for registerGiottoObjectListFiji and #' registerGiottoObjectListRvision #' @param gobject_list List of gobjects to register #' @param spat_unit spatial unit -#' @param method Method used to align gobjects. Current options are either +#' @param method Method used to align gobjects. Current options are either #' using FIJI register_virtual_stack_slices output or rvision #' @param image_unreg Gobject image slot to use. Defaults to 'image' (optional) -#' @param image_reg_name Arbitrary image slot name for registered images to +#' @param image_reg_name Arbitrary image slot name for registered images to #' occupy. Defaults to replacement of 'image' slot (optional) #' @param image_list RVISION - under construction #' @param save_dir RVISION - under construction -#' @param spatloc_unreg Unregistered spatial locations to align. Defaults to +#' @param spatloc_unreg Unregistered spatial locations to align. Defaults to #' 'raw' slot (optional) -#' @param spatloc_reg_name Arbitrary name for registered spatial locations. +#' @param spatloc_reg_name Arbitrary name for registered spatial locations. #' Defaults to replacement of 'raw' slot (optional) #' @param fiji_xml_files Filepaths to FIJI registration XML outputs -#' @param fiji_registered_images Registered images output by FIJI +#' @param fiji_registered_images Registered images output by FIJI #' register_virtual_stack_slices #' @param scale_factor Scaling to be applied to spatial coordinates -#' @param allow_rvision_autoscale Whether or not to allow rvision to +#' @param allow_rvision_autoscale Whether or not to allow rvision to #' automatically scale the images when performing image registration #' @param verbose Be verbose -#' @returns List of registered giotto objects where the registered images and +#' @returns List of registered giotto objects where the registered images and #' spatial locations #' @export -registerGiottoObjectList <- function(gobject_list, - spat_unit = NULL, - method = c("fiji", "rvision"), - image_unreg = "image", - image_reg_name = "image", - image_list = NULL, # Rvision - save_dir = NULL, # Rvision - spatloc_unreg = "raw", - spatloc_reg_name = "raw", - fiji_xml_files, - fiji_registered_images, - scale_factor = NULL, - allow_rvision_autoscale = TRUE, # Rvision - # auto_comp_reg_border = TRUE, - verbose = TRUE) { +registerGiottoObjectList <- function( + gobject_list, + spat_unit = NULL, + method = c("fiji", "rvision"), + image_unreg = "image", + image_reg_name = "image", + image_list = NULL, # Rvision + save_dir = NULL, # Rvision + spatloc_unreg = "raw", + spatloc_reg_name = "raw", + fiji_xml_files, + fiji_registered_images, + scale_factor = NULL, + allow_rvision_autoscale = TRUE, # Rvision + # auto_comp_reg_border = TRUE, + verbose = TRUE) { method <- match.arg(method, choices = c("fiji", "rvision")) if (method == "fiji") { @@ -318,7 +328,7 @@ registerGiottoObjectList <- function(gobject_list, verbose = verbose ) } else { - stop("Invalid method input\n Only fiji and rvision methods are + stop("Invalid method input\n Only fiji and rvision methods are currently supported.") } @@ -328,43 +338,44 @@ registerGiottoObjectList <- function(gobject_list, #' @title registerGiottoObjectListFiji #' @name registerGiottoObjectListFiji -#' @description Function to spatially align gobject data based on FIJI image +#' @description Function to spatially align gobject data based on FIJI image #' registration. #' @param gobject_list list of gobjects to register #' @param spat_unit spatial unit -#' @param image_unreg name of original unregistered images. Defaults to +#' @param image_unreg name of original unregistered images. Defaults to #' 'image' (optional) -#' @param image_reg_name arbitrary name for registered images to occupy. +#' @param image_reg_name arbitrary name for registered images to occupy. #' Defaults to replacement of 'image' (optional) -#' @param image_replace_name arbitrary name for any images replaced due to +#' @param image_replace_name arbitrary name for any images replaced due to #' image_reg_name argument (optional) -#' @param registered_images registered images output by FIJI +#' @param registered_images registered images output by FIJI #' register_virtual_stack_slices #' @param spatloc_unreg spatial locations to use. Defaults to 'raw' (optional) -#' @param spatloc_reg_name name for registered spatial locations. Defaults to +#' @param spatloc_reg_name name for registered spatial locations. Defaults to #' replacement of 'raw' (optional) -#' @param spatloc_replace_name arbitrary name for any spatial locations +#' @param spatloc_replace_name arbitrary name for any spatial locations #' replaced due to spatloc_reg_name argument (optional) -#' @param xml_files atomic vector of filepaths to xml outputs from FIJI +#' @param xml_files atomic vector of filepaths to xml outputs from FIJI #' register_virtual_stack_slices -#' @param scale_factor vector of scaling factors of images used in registration +#' @param scale_factor vector of scaling factors of images used in registration #' vs spatlocs #' @param verbose be verbose -#' @returns list of registered giotto objects where the registered images and +#' @returns list of registered giotto objects where the registered images and #' spatial locations #' @export -registerGiottoObjectListFiji <- function(gobject_list, - spat_unit = NULL, - image_unreg = "image", - image_reg_name = "image", - image_replace_name = "unregistered", - registered_images = NULL, - spatloc_unreg = "raw", - spatloc_reg_name = "raw", - spatloc_replace_name = "unregistered", - xml_files, - scale_factor = NULL, - verbose = TRUE) { +registerGiottoObjectListFiji <- function( + gobject_list, + spat_unit = NULL, + image_unreg = "image", + image_reg_name = "image", + image_replace_name = "unregistered", + registered_images = NULL, + spatloc_unreg = "raw", + spatloc_reg_name = "raw", + spatloc_replace_name = "unregistered", + xml_files, + scale_factor = NULL, + verbose = TRUE) { # set spat_unit based on first gobject spat_unit <- set_default_spat_unit( gobject = gobject_list[[1]], @@ -373,20 +384,22 @@ registerGiottoObjectListFiji <- function(gobject_list, ## 0. Check Params ## if (length(gobject_list) != length(xml_files)) { - stop("xml spatial transforms must be supplied for every gobject to be + stop("xml spatial transforms must be supplied for every gobject to be registered.") } if (is.null(registered_images) == FALSE) { - # If there are not the same number of registered images as gobjects, + # If there are not the same number of registered images as gobjects, # stop if (length(registered_images) != length(gobject_list)) { - stop("A registered image should be supplied for every gobject to + stop("A registered image should be supplied for every gobject to align") } if (sum(as.logical(lapply( - registered_images, methods::is, class2 = "giottoImage"))) > 0) { - stop("Registered images should be supplied as either magick-objects + registered_images, methods::is, + class2 = "giottoImage" + ))) > 0) { + stop("Registered images should be supplied as either magick-objects or filepaths") } } @@ -395,15 +408,15 @@ registerGiottoObjectListFiji <- function(gobject_list, if (!is.numeric(scale_factor)) { stop("scale_factor only accepts numerics") } - if ((length(scale_factor) != length(gobject_list)) && + if ((length(scale_factor) != length(gobject_list)) && (length(scale_factor) != 1)) { - stop("If more than one scale_factor is given, there must be one for + stop("If more than one scale_factor is given, there must be one for each gobject to be registered.") } } - # scale_factors will always be given externally. Registered images do not + # scale_factors will always be given externally. Registered images do not # have gobjects yet. # expand scale_factor if given as a single value scale_list <- c() @@ -435,7 +448,9 @@ registerGiottoObjectListFiji <- function(gobject_list, t_file <- xml_files[[file_i]] #------ Put all transform files together transf_list[[file_i]] <- paste( - readLines(t_file, warn = FALSE), collapse = "\n") + readLines(t_file, warn = FALSE), + collapse = "\n" + ) } # Select useful info out of the TrakEM2 files @@ -492,19 +507,23 @@ registerGiottoObjectListFiji <- function(gobject_list, # Params check for conflicting names if (verbose == TRUE) { if (image_unreg == image_reg_name) { - cat("Registered image name already used. Previous image named ", - image_reg_name, " renamed to ", image_replace_name) + cat( + "Registered image name already used. Previous image named ", + image_reg_name, " renamed to ", image_replace_name + ) } if (spatloc_unreg == spatloc_reg_name) { - cat("Registered spatloc name already used. - Previous spatloc named ", spatloc_reg_name, - " renamed to ", spatloc_replace_name) + cat( + "Registered spatloc name already used. + Previous spatloc named ", spatloc_reg_name, + " renamed to ", spatloc_replace_name + ) } } # Update Spatial - # Rename original spatial locations to 'unregistered' if conflicting + # Rename original spatial locations to 'unregistered' if conflicting # with output if (spatloc_unreg == spatloc_reg_name) { gobj <- set_spatial_locations( @@ -531,7 +550,7 @@ registerGiottoObjectListFiji <- function(gobject_list, # Update images - # If there is an existing image with the image_reg_name, rename it + # If there is an existing image with the image_reg_name, rename it # "unregistered" # Move the original image to 'unregistered' if (image_unreg == image_reg_name) { @@ -571,7 +590,8 @@ registerGiottoObjectListFiji <- function(gobject_list, )) names(boundaries) <- c( - "xmax_adj", "xmin_adj", "ymax_adj", "ymin_adj") + "xmax_adj", "xmin_adj", "ymax_adj", "ymin_adj" + ) gobj@images[[image_reg_name]]@boundaries <- boundaries } @@ -581,30 +601,31 @@ registerGiottoObjectListFiji <- function(gobject_list, return(gobject_list) } -# TODO check if spatloc is actually provided in createGiottoImage() and ignore +# TODO check if spatloc is actually provided in createGiottoImage() and ignore # auto align if not. #' @title registerGiottoObjectListRvision #' @name registerGiottoObjectListRvision -#' @description Function to spatially align gobject data based on Rvision image +#' @description Function to spatially align gobject data based on Rvision image #' registration. #' @param gobject_list list of gobjects to register #' @param image_list Filepaths to unregistered images #' @param save_dir (Optional) If given, save registered images to this directory #' @param spatloc_unreg spatial locations to use -#' @param spatloc_reg_name name for registered spatial locations to. Defaults +#' @param spatloc_reg_name name for registered spatial locations to. Defaults #' to replacement of spat_unreg (optional) #' @param verbose be verbose -#' @returns list of registered giotto objects where the registered images and +#' @returns list of registered giotto objects where the registered images and #' spatial locations #' @export # Register giotto objects when given raw images and spatial locations -registerGiottoObjectListRvision <- function(gobject_list = gobject_list, - image_list = NULL, - save_dir = NULL, - spatloc_unreg = NULL, - spatloc_reg_name = "raw", - verbose = TRUE) { # Not used +registerGiottoObjectListRvision <- function( + gobject_list = gobject_list, + image_list = NULL, + save_dir = NULL, + spatloc_unreg = NULL, + spatloc_reg_name = "raw", + verbose = TRUE) { # Not used package_check( pkg_name = "Rvision", @@ -635,11 +656,13 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, color_images <- c() for (path in image_list) { unreg_images <- append( - unreg_images, Rvision::image(filename = path), - after = length(unreg_images)) + unreg_images, Rvision::image(filename = path), + after = length(unreg_images) + ) color_images <- append( - color_images, Rvision::image(filename = path), - after = length(color_images)) + color_images, Rvision::image(filename = path), + after = length(color_images) + ) } ## 3. Perform preprocessing @@ -648,7 +671,9 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, for (image_i in seq_along(unreg_images)) { # Make images grayscale Rvision::changeColorSpace( - unreg_images[[image_i]], colorspace = "GRAY", target = "self") + unreg_images[[image_i]], + colorspace = "GRAY", target = "self" + ) # Retrieve image dimensions dims <- dim(unreg_images[[image_i]]) rows <- append(rows, dims[[1]], after = length(rows)) @@ -662,16 +687,24 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, for (i in seq_along(unreg_images)) { # Add border so all images have same square dimensions Rvision::border( - unreg_images[[i]], squmax - rows[[i]], 0, - squmax - cols[[i]], 0, border_color = "white", target = "self") + unreg_images[[i]], squmax - rows[[i]], 0, + squmax - cols[[i]], 0, + border_color = "white", target = "self" + ) Rvision::border( - color_images[[i]], squmax - rows[[i]], 0, - squmax - cols[[i]], 0, border_color = "white", target = "self") + color_images[[i]], squmax - rows[[i]], 0, + squmax - cols[[i]], 0, + border_color = "white", target = "self" + ) # Apply scaling so all images of reasonable size for processing unreg_images[[i]] <- Rvision::resize( - unreg_images[[i]], height = enddim, width = enddim, target = "new") + unreg_images[[i]], + height = enddim, width = enddim, target = "new" + ) color_images[[i]] <- Rvision::resize( - color_images[[i]], height = enddim, width = enddim, target = "new") + color_images[[i]], + height = enddim, width = enddim, target = "new" + ) } rm(cols, rows) @@ -683,8 +716,10 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, transfs <- base::vector(mode = "list", length = length(unreg_images)) for (i in seq_along(unreg_images)) { transfs[[i]] <- Rvision::findTransformECC( - refImage, unreg_images[[i]], warp_mode = "euclidean", - filt_size = 101) + refImage, unreg_images[[i]], + warp_mode = "euclidean", + filt_size = 101 + ) } rm(refImage) @@ -693,10 +728,14 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, for (i in seq_along(unreg_images)) { # Apply scaling spatloc_list[[i]][] <- rescale( - spatloc_list[[i]][], enddim / squmax, x0 = 0, y0 = 0) + spatloc_list[[i]][], enddim / squmax, + x0 = 0, y0 = 0 + ) # Apply transform to spatlocs spatloc_list[[i]][] <- .rigid_transform_spatial_locations( - spatloc_list[[i]][], transfs[[i]], method = "rvision") + spatloc_list[[i]][], transfs[[i]], + method = "rvision" + ) } rm(squmax, enddim) @@ -733,9 +772,13 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, # Apply transform to image transf_images <- c() for (i in seq_along(unreg_images)) { - transf_images <- append(transf_images, Rvision::warpAffine( - color_images[[i]], transfs[[i]], target = "new"), - length(transf_images)) + transf_images <- append( + transf_images, Rvision::warpAffine( + color_images[[i]], transfs[[i]], + target = "new" + ), + length(transf_images) + ) } # Save images to save directory for (image_i in seq_along(transf_images)) { @@ -777,8 +820,10 @@ fiji <- function(fijiPath = NULL) { fijiPath <- getOption("giotto.fiji") if (!is.null(fijiPath)) { if (!file.exists(fijiPath)) { - stop("fiji is not at: ", fijiPath, - " as specified by options('giotto.fiji')!") + stop( + "fiji is not at: ", fijiPath, + " as specified by options('giotto.fiji')!" + ) } } else { # look for it in sensible places @@ -789,7 +834,7 @@ fiji <- function(fijiPath = NULL) { } else { stop( "Unable to find fiji! ", - "Set options('giotto.fiji') to point to the fiji + "Set options('giotto.fiji') to point to the fiji command line executable!" ) } @@ -804,13 +849,13 @@ fiji <- function(fijiPath = NULL) { #' @title registerImagesFIJI #' @name registerImagesFIJI -#' @description Wrapper function for Register Virtual Stack Slices plugin in +#' @description Wrapper function for Register Virtual Stack Slices plugin in #' FIJI #' @param source_img_dir Folder containing images to be registered #' @param output_img_dir Folder to save registered images to -#' @param transforms_save_dir (jython implementation only) Folder to save +#' @param transforms_save_dir (jython implementation only) Folder to save #' transforms to -#' @param ref_img_name (jython implementation only) File name of reference +#' @param ref_img_name (jython implementation only) File name of reference #' image for the registration #' @param init_gauss_blur Point detector option: initial image blurring #' @param steps_per_scale_octave Point detector option @@ -834,41 +879,42 @@ fiji <- function(fijiPath = NULL) { #' \code{options(giotto.fiji="/some/path")}) #' @param DryRun Whether to return the command to be run rather than actually #' executing it. -#' @returns list of registered giotto objects where the registered images and +#' @returns list of registered giotto objects where the registered images and #' spatial locations -#' @details This function was adapted from runFijiMacro function in +#' @details This function was adapted from runFijiMacro function in #' jimpipeline by jefferislab #' #' @export -registerImagesFIJI <- function(source_img_dir, - output_img_dir, - transforms_save_dir, - ref_img_name, - # Scale Invariant Interest Point Detector Options - init_gauss_blur = 1.6, - steps_per_scale_octave = 3, - min_img_size = 64, - max_img_size = 1024, - # Feature Descriptor Options - feat_desc_size = 8, - feat_desc_orient_bins = 8, - closest_next_closest_ratio = 0.92, - # Geometric Consensus Filter Options - max_align_err = 25, - inlier_ratio = 0.05, - # FIJI Options - headless = FALSE, - batch = TRUE, - MinMem = MaxMem, - MaxMem = 2500, - IncrementalGC = TRUE, - Threads = NULL, - fijiArgs = NULL, - javaArgs = NULL, - ijArgs = NULL, - jython = FALSE, - fijiPath = fiji(), - DryRun = FALSE) { +registerImagesFIJI <- function( + source_img_dir, + output_img_dir, + transforms_save_dir, + ref_img_name, + # Scale Invariant Interest Point Detector Options + init_gauss_blur = 1.6, + steps_per_scale_octave = 3, + min_img_size = 64, + max_img_size = 1024, + # Feature Descriptor Options + feat_desc_size = 8, + feat_desc_orient_bins = 8, + closest_next_closest_ratio = 0.92, + # Geometric Consensus Filter Options + max_align_err = 25, + inlier_ratio = 0.05, + # FIJI Options + headless = FALSE, + batch = TRUE, + MinMem = MaxMem, + MaxMem = 2500, + IncrementalGC = TRUE, + Threads = NULL, + fijiArgs = NULL, + javaArgs = NULL, + ijArgs = NULL, + jython = FALSE, + fijiPath = fiji(), + DryRun = FALSE) { # Check if output directory exists. If not, create the directory if (!file.exists(output_img_dir)) { dir.create(output_img_dir) @@ -882,20 +928,24 @@ registerImagesFIJI <- function(source_img_dir, if (headless) fijiArgs <- c(fijiArgs, "--headless") fijiArgs <- paste(fijiArgs, collapse = " ") - javaArgs <- c(paste("-Xms", MinMem, "m", sep = ""), - paste("-Xmx", MaxMem, "m", sep = ""), javaArgs) + javaArgs <- c( + paste("-Xms", MinMem, "m", sep = ""), + paste("-Xmx", MaxMem, "m", sep = ""), javaArgs + ) if (IncrementalGC) javaArgs <- c(javaArgs, "-Xincgc") javaArgs <- paste(javaArgs, collapse = " ") threadAdjust <- ifelse( - is.null(Threads), "", - paste("run(\"Memory & Threads...\", \"parallel=", Threads, "\");", - sep = "")) + is.null(Threads), "", + paste("run(\"Memory & Threads...\", \"parallel=", Threads, "\");", + sep = "" + ) + ) if (jython == TRUE) { # TODO Add check to see if jython script is installed. - message('jython implementation requires Headless_RVSS.py in - "/Giotto/inst/fiji/" to be copied to + message('jython implementation requires Headless_RVSS.py in + "/Giotto/inst/fiji/" to be copied to "/Applications/Fiji.app/plugins/Scripts/MyScripts/Headless_RVSS.py"') macroCall <- paste(" -eval '", @@ -1009,15 +1059,15 @@ parse_affine <- function(x) { # install_FIJI_scripts = function(fiji = fiji()) {} # TODO These things require a correct set of boundary values -# - Subset images in Giotto using Magick and followup reassignment as the +# - Subset images in Giotto using Magick and followup reassignment as the # default 'image' # - Follow this up with potential registration -# - Need a way to determine the pixel distances between spots to get an idea of +# - Need a way to determine the pixel distances between spots to get an idea of # which regions of image 'belong' to a spot -# - Would be nice to be able to put together an image mask even in magick and -# apply it to the image to aid with img_reg and take care of jagged lines after +# - Would be nice to be able to put together an image mask even in magick and +# apply it to the image to aid with img_reg and take care of jagged lines after # image subsetting # - A shiny app to subset tissue regions would be nice # The shiny app should be able to select spots in a 2d plane by default -# If given the ability, it should also select spots of a single plane or within +# If given the ability, it should also select spots of a single plane or within # a certain range of z values and plot them as a 2D for selection purposes diff --git a/R/interactivity.R b/R/interactivity.R index eb0deca31..c9815439f 100644 --- a/R/interactivity.R +++ b/R/interactivity.R @@ -11,10 +11,11 @@ #' @returns A `data.table` containing x,y coordinates from the plotted polygons. #' #' @export -plotInteractivePolygons <- function(x, - width = "auto", - height = "auto", - ...) { +plotInteractivePolygons <- function( + x, + width = "auto", + height = "auto", + ...) { package_check(pkg_name = "miniUI", repository = "CRAN") package_check(pkg_name = "shiny", repository = "CRAN") @@ -29,8 +30,10 @@ plotInteractivePolygons <- function(x, miniUI::gadgetTitleBar("Plot Interactive Polygons"), miniUI::miniContentPanel( shiny::textInput( - "polygon_name", label = "Polygon name", - value = "polygon 1"), + "polygon_name", + label = "Polygon name", + value = "polygon 1" + ), shiny::sliderInput("xrange", label = "x coordinates", min = min(terra::ext(x))[1], @@ -57,8 +60,10 @@ plotInteractivePolygons <- function(x, miniUI::gadgetTitleBar("Plot Interactive Polygons"), miniUI::miniContentPanel( shiny::textInput( - "polygon_name", label = "Polygon name", - value = "polygon 1"), + "polygon_name", + label = "Polygon name", + value = "polygon 1" + ), shiny::sliderInput("xrange", label = "x coordinates", min = min(x[["layers"]][[1]]$data$sdimx), @@ -100,8 +105,10 @@ plotInteractivePolygons <- function(x, theme(legend.position = "none") } else { terra::plot(x) - lapply(split(clicklist(), by = "name"), - function(x) graphics::polygon(x$x, x$y, ...)) + lapply( + split(clicklist(), by = "name"), + function(x) graphics::polygon(x$x, x$y, ...) + ) } }, res = 96, @@ -110,14 +117,16 @@ plotInteractivePolygons <- function(x, ) clicklist <- shiny::reactiveVal(data.table::data.table( - x = numeric(), y = numeric(), name = character())) # empty table + x = numeric(), y = numeric(), name = character() + )) # empty table shiny::observeEvent(input$plot_click, { click_x <- input$plot_click$x click_y <- input$plot_click$y polygon_name <- input$polygon_name temp <- clicklist() # get the table of past clicks temp <- rbind(temp, data.table::data.table( - x = click_x, y = click_y, name = polygon_name)) + x = click_x, y = click_y, name = polygon_name + )) clicklist(temp) }) @@ -147,12 +156,15 @@ plotInteractivePolygons <- function(x, #' @examples #' ## Plot interactive polygons #' g <- GiottoData::loadGiottoMini("visium") -#' my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +#' my_polygon_coords <- data.frame( +#' poly_ID = rep("polygon1", 3), +#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +#' ) #' #' ## Add polygon coordinates to Giotto object #' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -#' name = "selections") +#' name = "selections" +#' ) #' g <- addGiottoPolygons( #' gobject = g, #' gpolygons = list(my_giotto_polygons) @@ -165,11 +177,12 @@ plotInteractivePolygons <- function(x, #' getCellsFromPolygon(g) #' #' @export -getCellsFromPolygon <- function(gobject, - polygon_name = "selections", - spat_unit = "cell", - spat_loc_name = "raw", - polygons = NULL) { +getCellsFromPolygon <- function( + gobject, + polygon_name = "selections", + spat_unit = "cell", + spat_loc_name = "raw", + polygons = NULL) { if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") } @@ -201,7 +214,8 @@ getCellsFromPolygon <- function(gobject, if (!is.null(polygons)) { polygonCells <- terra::subset( - polygonCells, polygonCells$poly_ID %in% polygons) + polygonCells, polygonCells$poly_ID %in% polygons + ) } return(polygonCells) @@ -247,13 +261,14 @@ getCellsFromPolygon <- function(gobject, #' g <- addPolygonCells(g) #' pDataDT(g) #' @export -addPolygonCells <- function(gobject, - polygon_name = "selections", - spat_unit = "cell", - spat_loc_name = "raw", - feat_type = "rna", - polygons = NULL, - na.label = "no_polygon") { +addPolygonCells <- function( + gobject, + polygon_name = "selections", + spat_unit = "cell", + spat_loc_name = "raw", + feat_type = "rna", + polygons = NULL, + na.label = "no_polygon") { ## verify gobject if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") @@ -291,7 +306,8 @@ addPolygonCells <- function(gobject, ## assign a default ID to cells outside of polygons selection_values <- new_cell_metadata[[polygon_name]] selection_values <- ifelse( - is.na(selection_values), na.label, selection_values) + is.na(selection_values), na.label, selection_values + ) new_cell_metadata[, c(polygon_name) := selection_values] ## keep original order of cells @@ -328,12 +344,15 @@ addPolygonCells <- function(gobject, #' @examples #' ## Plot interactive polygons #' g <- GiottoData::loadGiottoMini("visium") -#' my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +#' my_polygon_coords <- data.frame( +#' poly_ID = rep("polygon1", 3), +#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +#' ) #' #' ## Add polygon coordinates to Giotto object #' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -#' name = "selections") +#' name = "selections" +#' ) #' g <- addGiottoPolygons( #' gobject = g, #' gpolygons = list(my_giotto_polygons) @@ -344,14 +363,15 @@ addPolygonCells <- function(gobject, #' #' comparePolygonExpression(g) #' @export -comparePolygonExpression <- function(gobject, - polygon_name = "selections", - spat_unit = "cell", - feat_type = "rna", - selected_feats = "top_genes", - expression_values = "normalized", - method = "scran", - ...) { +comparePolygonExpression <- function( + gobject, + polygon_name = "selections", + spat_unit = "cell", + feat_type = "rna", + selected_feats = "top_genes", + expression_values = "normalized", + method = "scran", + ...) { # verify gobject if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") @@ -446,12 +466,15 @@ comparePolygonExpression <- function(gobject, #' @examples #' ## Plot interactive polygons #' g <- GiottoData::loadGiottoMini("visium") -#' my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +#' my_polygon_coords <- data.frame( +#' poly_ID = rep("polygon1", 3), +#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +#' ) #' #' ## Add polygon coordinates to Giotto object #' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -#' name = "selections") +#' name = "selections" +#' ) #' g <- addGiottoPolygons( #' gobject = g, #' gpolygons = list(my_giotto_polygons) @@ -462,12 +485,13 @@ comparePolygonExpression <- function(gobject, #' #' compareCellAbundance(g) #' @export -compareCellAbundance <- function(gobject, - polygon_name = "selections", - spat_unit = "cell", - feat_type = "rna", - cell_type_column = "leiden_clus", - ...) { +compareCellAbundance <- function( + gobject, + polygon_name = "selections", + spat_unit = "cell", + feat_type = "rna", + cell_type_column = "leiden_clus", + ...) { # verify gobject if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") @@ -521,12 +545,15 @@ compareCellAbundance <- function(gobject, #' @examples #' ## Plot interactive polygons #' g <- GiottoData::loadGiottoMini("visium") -#' my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +#' my_polygon_coords <- data.frame( +#' poly_ID = rep("polygon1", 3), +#' sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +#' ) #' #' ## Add polygon coordinates to Giotto object #' my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -#' name = "selections") +#' name = "selections" +#' ) #' g <- addGiottoPolygons( #' gobject = g, #' gpolygons = list(my_giotto_polygons) @@ -540,12 +567,13 @@ compareCellAbundance <- function(gobject, #' #' plotPolygons(g, x = x) #' @export -plotPolygons <- function(gobject, - polygon_name = "selections", - x, - spat_unit = "cell", - polygons = NULL, - ...) { +plotPolygons <- function( + gobject, + polygon_name = "selections", + x, + spat_unit = "cell", + polygons = NULL, + ...) { ## verify gobject if (!inherits(gobject, "giotto")) { stop("gobject must be a Giotto object") @@ -614,10 +642,11 @@ plotPolygons <- function(gobject, #' @returns data.table with selected cell_IDs, spatial coordinates, and #' cluster_ID. #' @export -plotInteractive3D <- function(gobject, spat_unit = "cell", feat_type = "rna", - cell_color = "leiden_clus", - cell_color_code = NULL, point_size = 0.5, - width = "100%", height = "400px") { +plotInteractive3D <- function( + gobject, spat_unit = "cell", feat_type = "rna", + cell_color = "leiden_clus", + cell_color_code = NULL, point_size = 0.5, + width = "100%", height = "400px") { # NSE vars sdimx <- sdimy <- sdimz <- cell_ID <- NULL @@ -684,8 +713,9 @@ plotInteractive3D <- function(gobject, spat_unit = "cell", feat_type = "rna", data[data[[cell_color]] %in% input$clusters, ] %>% plotly::filter( sdimx >= input$xrange[1] & sdimx <= input$xrange[2] & - sdimy >= input$yrange[1] & sdimy <= input$yrange[2] & - sdimz >= input$zrange[1] & sdimz <= input$zrange[2]) %>% + sdimy >= input$yrange[1] & sdimy <= input$yrange[2] & + sdimz >= input$zrange[1] & sdimz <= input$zrange[2] + ) %>% plotly::select(cell_ID, sdimx, sdimy, sdimz, cell_color) }) diff --git a/R/kriging.R b/R/kriging.R index 44841edb4..53ef9d159 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -43,21 +43,22 @@ NULL #' @export setMethod( "interpolateFeature", signature(x = "giotto", y = "missing"), - function(x, - spat_unit = NULL, - feat_type = NULL, - feats, - spatvalues_params = list(), - spat_loc_name = "raw", - ext = NULL, - buffer = 50, - name_fmt = "%s", - savedir = file.path(getwd(), "interp_rasters"), - overwrite = FALSE, - verbose = NULL, - ...) { + function( + x, + spat_unit = NULL, + feat_type = NULL, + feats, + spatvalues_params = list(), + spat_loc_name = "raw", + ext = NULL, + buffer = 50, + name_fmt = "%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + verbose = NULL, + ...) { sl <- NULL - + # This method prepares the data from the giotto object to pass # downstream where the actual interpolation happens @@ -144,15 +145,16 @@ setMethod( setMethod( "interpolateFeature", signature(x = "spatLocsObj", y = "data.frame"), - function(x, y, - ext = NULL, - buffer = 50, - rastersize = 500, - name_fmt = "%s", - savedir = file.path(getwd(), "interp_rasters"), - overwrite = FALSE, - # cores = GiottoUtils::determine_cores(), - ...) { + function( + x, y, + ext = NULL, + buffer = 50, + rastersize = 500, + name_fmt = "%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + # cores = GiottoUtils::determine_cores(), + ...) { checkmate::assert_character(savedir) checkmate::assert_character(name_fmt) checkmate::assert_logical(overwrite) diff --git a/R/poly_influence.R b/R/poly_influence.R index a0bffa8bb..cf5a2a031 100644 --- a/R/poly_influence.R +++ b/R/poly_influence.R @@ -2,10 +2,10 @@ #' @name showPolygonSizeInfluence #' @param gobject giotto object #' @param spat_unit spatial unit -#' @param alt_spat_unit alternaitve spatial unit which represents resized +#' @param alt_spat_unit alternaitve spatial unit which represents resized #' polygon data #' @param feat_type feature type -#' @param clus_name name of cluster column in cell_metadata for given spat_unit +#' @param clus_name name of cluster column in cell_metadata for given spat_unit #' and alt_spat_unit, i.e. "kmeans" #' @param return_plot logical. whether to return the plot object #' @param verbose be verbose @@ -16,29 +16,31 @@ #' New columns, resize_switch and cluster_interaction, will be created within #' cell_metadata for spat_unit-feat_type. #' -#' These new columns will describe if a given cell switched cluster number when +#' These new columns will describe if a given cell switched cluster number when #' resized. #' If the same amount of clusters exist for spat_unit-feat_type and #' alt_spat_unit-feat_type, then clusters are determined to be #' corresponding based on % overlap in cell_IDs in each cluster. #' -#' Otherwise, multiple clusters from the spatial unit feature type pair are +#' Otherwise, multiple clusters from the spatial unit feature type pair are #' condensed to align with the smaller number of clusters and ensure overlap. #' #' @export -showPolygonSizeInfluence <- function(gobject = NULL, - spat_unit = NULL, - alt_spat_unit = NULL, - feat_type = NULL, - clus_name = "kmeans", - return_plot = FALSE, - verbose = FALSE) { +showPolygonSizeInfluence <- function( + gobject = NULL, + spat_unit = NULL, + alt_spat_unit = NULL, + feat_type = NULL, + clus_name = "kmeans", + return_plot = FALSE, + verbose = FALSE) { # NSE vars cell_ID <- total_expr <- cluster_interactions <- N <- resize_switch <- NULL # Guards - if (!c("giotto") %in% class(gobject)) + if (!c("giotto") %in% class(gobject)) { stop(wrap_txt("Please provide a valid Giotto Object.", errWidth = TRUE)) + } spat_unit <- set_default_spat_unit( gobject = gobject, @@ -52,28 +54,35 @@ showPolygonSizeInfluence <- function(gobject = NULL, if (!alt_spat_unit %in% names(gobject@expression)) { stop(wrap_txt(paste0( - "Alternative spatial unit ", alt_spat_unit, - " not found. Please ensure it exists."), errWidth = TRUE)) + "Alternative spatial unit ", alt_spat_unit, + " not found. Please ensure it exists." + ), errWidth = TRUE)) } meta_cols <- names(getCellMetadata( - gobject, - spat_unit = spat_unit, - feat_type = feat_type, - output = "data.table")) + gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = "data.table" + )) if (!clus_name %in% meta_cols) { - stop(wrap_txt(paste0( - "Cluster name ", clus_name, - " not found within cell metadata. Please ensure it exists."), - errWidth = TRUE)) + stop(wrap_txt( + paste0( + "Cluster name ", clus_name, + " not found within cell metadata. Please ensure it exists." + ), + errWidth = TRUE + )) } if (c("cluster_interactions") %in% meta_cols) { - warning((wrap_txt(paste0("Switch interactions already found within - cell_metadata for spat_unit feat_type pair:`", spat_unit, "-", - feat_type, "`. They will be overwritten."), errWidth = TRUE))) + warning((wrap_txt(paste0( + "Switch interactions already found within + cell_metadata for spat_unit feat_type pair:`", spat_unit, "-", + feat_type, "`. They will be overwritten." + ), errWidth = TRUE))) } ## Compare clustering results between cell and smallcell data ####### # ----------------------------------------------------------------- # @@ -86,7 +95,8 @@ showPolygonSizeInfluence <- function(gobject = NULL, cell_meta <- merge.data.table(cell_meta, new_clus_table, by = "cell_ID") cell_meta[, cluster_interactions := paste0(cell_meta[[ - paste0(clus_name, ".x")]], "-", cell_meta[[paste0(clus_name, ".y")]])] + paste0(clus_name, ".x") + ]], "-", cell_meta[[paste0(clus_name, ".y")]])] switches2 <- cell_meta[, .N, by = "cluster_interactions"] setorder(switches2, N) @@ -116,13 +126,15 @@ showPolygonSizeInfluence <- function(gobject = NULL, } cell_meta[, resize_switch := ifelse( - cluster_interactions %in% switch_strs, "same", "switch")] + cluster_interactions %in% switch_strs, "same", "switch" + )] gobject <- addCellMetadata( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, new_metadata = cell_meta[ - , .(cell_ID, resize_switch, cluster_interactions)], + , .(cell_ID, resize_switch, cluster_interactions) + ], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -144,11 +156,13 @@ showPolygonSizeInfluence <- function(gobject = NULL, ) num_cells_switched <- sum( - getCellMetadata(gobject)$resize_switch == "switch") + getCellMetadata(gobject)$resize_switch == "switch" + ) num_cells_same <- sum(getCellMetadata(gobject)$resize_switch == "same") if (verbose) print(paste0(num_cells_switched, " cells switched clusters.")) - if (verbose) + if (verbose) { print(paste0(num_cells_same, " cells remained in the same cluster.")) + } if (return_plot) { return(poly_plot) @@ -169,13 +183,14 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' y_m is a cluster number from the resized spatial unit #' n is the number of clusters #' -#' Clusters are determined to be corresponding based on % overlap in cell_IDs +#' Clusters are determined to be corresponding based on % overlap in cell_IDs #' in each cluster. #' #' @keywords internal -.determine_switch_string_equal <- function(cell_meta = NULL, - cell_meta_new = NULL, - clus_name = NULL) { +.determine_switch_string_equal <- function( + cell_meta = NULL, + cell_meta_new = NULL, + clus_name = NULL) { k_clusters <- sort(unique(cell_meta[[clus_name]])) num_clusters <- k_clusters[length(k_clusters)] @@ -212,7 +227,7 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' @param num_orig sorted vector of cluster numbers in the original metadata #' @param num_new sorted vector of cluster numbers in the new, resized metadata #' @returns switch_str, a vector of corresponding cluster numbers in strings -#' @details determines how to create a string in the format +#' @details determines how to create a string in the format #' c("x_1-y_1", "x_2-y_2"..."x_n, y_m") #' Where: #' x_n is a cluster number from the original spatial unit @@ -223,8 +238,9 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' Essentially determines iteration order for .create_switch_string_unequal() #' #' @keywords internal -.determine_switch_string_unequal <- function(num_orig = NULL, - num_new = NULL) { +.determine_switch_string_unequal <- function( + num_orig = NULL, + num_new = NULL) { switch_strs <- c() orig_first <- TRUE @@ -261,15 +277,22 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' n is the number of clusters in the original spatial unit #' m is the number of clusters in the new spatial unit #' @keywords internal -.create_switch_string_unequal <- function(num_first = NULL, - num_second = NULL, - switch_strs = NULL) { +.create_switch_string_unequal <- function( + num_first = NULL, + num_second = NULL, + switch_strs = NULL) { for (o in num_first) { for (n in num_second) { - if (as.integer(o) == as.integer(n)) switch_strs <- c( - switch_strs, paste0(as.character(o), "-", as.character(n))) - if (o > n && n == num_second[length(num_second)]) switch_strs <- c( - switch_strs, paste0(as.character(o), "-", as.character(n))) + if (as.integer(o) == as.integer(n)) { + switch_strs <- c( + switch_strs, paste0(as.character(o), "-", as.character(n)) + ) + } + if (o > n && n == num_second[length(num_second)]) { + switch_strs <- c( + switch_strs, paste0(as.character(o), "-", as.character(n)) + ) + } } } @@ -284,20 +307,22 @@ showPolygonSizeInfluence <- function(gobject = NULL, #' @param spat_unit spatial unit #' @param feat_type feature type #' @returns ggplot -#' @details Creates a pie chart showing how many cells switched clusters after +#' @details Creates a pie chart showing how many cells switched clusters after #' annotation resizing. -#' The function showPolygonSizeInfluence() must have been run on the Giotto +#' The function showPolygonSizeInfluence() must have been run on the Giotto #' Object for this function to run. #' @export -showCellProportionSwitchedPie <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL) { +showCellProportionSwitchedPie <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL) { # NSE vars cluster_status <- num_cells <- resize_switch <- perc <- ypos <- NULL # Guards - if (!"giotto" %in% class(gobject)) + if (!"giotto" %in% class(gobject)) { stop(wrap_txt("Please provide a valid Giotto Object.", errWidth = TRUE)) + } spat_unit <- set_default_spat_unit( gobject = gobject, @@ -319,14 +344,15 @@ showCellProportionSwitchedPie <- function(gobject = NULL, ) if (!c("resize_switch") %in% names(cmeta)) { - stop(wrap_txt("Column 'resize_switch' not found in cell metadata. + stop(wrap_txt("Column 'resize_switch' not found in cell metadata. Ensure showPolygonSizeInfluence() has been run.", errWidth = TRUE)) } plotdf <- data.table::data.table() plotdf[, cluster_status := c("switch", "same")] plotdf[, num_cells := c(sum(cmeta[ - , resize_switch == "switch"]), sum(cmeta[, resize_switch == "same"]))] + , resize_switch == "switch" + ]), sum(cmeta[, resize_switch == "same"]))] per_switch <- plotdf$num_cells[[1]] / sum(plotdf$num_cells) * 100 per_same <- plotdf$num_cells[[2]] / sum(plotdf$num_cells) * 100 @@ -341,7 +367,8 @@ showCellProportionSwitchedPie <- function(gobject = NULL, print(plotdf) ggplot( - as.data.frame(plotdf), aes(x = "", y = perc, fill = cluster_status)) + + as.data.frame(plotdf), aes(x = "", y = perc, fill = cluster_status) + ) + coord_polar("y", start = 0) + geom_bar(stat = "identity", width = 1) + theme_void() + @@ -350,26 +377,28 @@ showCellProportionSwitchedPie <- function(gobject = NULL, #' @title showCellProportionSwitchedSanKey #' @name showCellProportionSwitchedSanKey -#' @param gobject giotto object which contains metadata for both spat_unit and +#' @param gobject giotto object which contains metadata for both spat_unit and #' alt_spat_unit #' @param spat_unit spatial unit -#' @param alt_spat_unit alternative spatial unit which stores data after +#' @param alt_spat_unit alternative spatial unit which stores data after #' resizing annotations #' @param feat_type feature type #' @returns D3 JavaScript Sankey diagram #' @details Creates a Sankey Diagram to illustrate cluster switching behavior. #' Currently only supports displaying cluster switching for kmeans clusters. #' @export -showCellProportionSwitchedSanKey <- function(gobject = NULL, - spat_unit = NULL, - alt_spat_unit = NULL, - feat_type = NULL) { +showCellProportionSwitchedSanKey <- function( + gobject = NULL, + spat_unit = NULL, + alt_spat_unit = NULL, + feat_type = NULL) { # NSE vars kmeans_small <- cell_ID <- NULL # Guards - if (!"giotto" %in% class(gobject)) + if (!"giotto" %in% class(gobject)) { stop(wrap_txt("Please provide a valid Giotto Object.", errWidth = TRUE)) + } spat_unit <- set_default_spat_unit( gobject = gobject, @@ -382,8 +411,9 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, ) if (!alt_spat_unit %in% names(gobject@expression)) { stop(wrap_txt(paste0( - "Alternative spatial unit ", alt_spat_unit, - " not found. Please ensure it exists."), errWidth = TRUE)) + "Alternative spatial unit ", alt_spat_unit, + " not found. Please ensure it exists." + ), errWidth = TRUE)) } package_check("networkD3") @@ -397,7 +427,7 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, ) if (!c("resize_switch") %in% names(cmeta)) { - stop(wrap_txt("Column 'resize_switch' not found in cell metadata. + stop(wrap_txt("Column 'resize_switch' not found in cell metadata. Ensure showPolygonSizeInfluence() has been run.", errWidth = TRUE)) } @@ -422,7 +452,9 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, small_cmeta_clus$kmeans <- NULL merged_cmeta <- data.table::merge.data.table( - cmeta, small_cmeta_clus, by.x = "cell_ID", by.y = "cell_ID") + cmeta, small_cmeta_clus, + by.x = "cell_ID", by.y = "cell_ID" + ) k1 <- unique(merged_cmeta$kmeans) @@ -449,7 +481,8 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, for (i in seq_len(flen)) { num_occ[i] <- dim(na.omit(merged_cmeta[kmeans == (c_k1[i] + 1)][ - merged_cmeta[kmeans_small == (c_k2[i] + 1)]]))[[1]] + merged_cmeta[kmeans_small == (c_k2[i] + 1)] + ]))[[1]] } fdt[, "k1"] <- c_k1 @@ -459,7 +492,8 @@ showCellProportionSwitchedSanKey <- function(gobject = NULL, label_dt <- data.table::data.table() label_dt[, "name"] <- c(paste0("original_", as.character(sort(k1))), paste0( - "resized_", as.character(sort(k2)))) + "resized_", as.character(sort(k2)) + )) label_dt master <- list(fdt, label_dt) diff --git a/R/python_hmrf.R b/R/python_hmrf.R index 2f7504179..b152244bd 100644 --- a/R/python_hmrf.R +++ b/R/python_hmrf.R @@ -32,36 +32,39 @@ #' g <- GiottoData::loadGiottoMini("visium") #' spat_genes <- binSpect(g) #' -#' doHMRF(g, spatial_genes = spat_genes[seq_len(10)]$feats, -#' output_folder = tempdir()) +#' doHMRF(g, +#' spatial_genes = spat_genes[seq_len(10)]$feats, +#' output_folder = tempdir() +#' ) #' @export -doHMRF <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - spatial_network_name = "Delaunay_network", - spat_loc_name = "raw", - spatial_genes = NULL, - spatial_dimensions = c("sdimx", "sdimy", "sdimz"), - dim_reduction_to_use = NULL, - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - seed = 100, - name = "test", - k = 10, - betas = c(0, 2, 50), - tolerance = 1e-10, - zscore = c("none", "rowcol", "colrow"), - numinit = 100, - python_path = NULL, - output_folder = NULL, - overwrite_output = TRUE) { +doHMRF <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + spatial_network_name = "Delaunay_network", + spat_loc_name = "raw", + spatial_genes = NULL, + spatial_dimensions = c("sdimx", "sdimy", "sdimz"), + dim_reduction_to_use = NULL, + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + seed = 100, + name = "test", + k = 10, + betas = c(0, 2, 50), + tolerance = 1e-10, + zscore = c("none", "rowcol", "colrow"), + numinit = 100, + python_path = NULL, + output_folder = NULL, + overwrite_output = TRUE) { if (!requireNamespace("smfishHmrf", quietly = TRUE)) { stop("package ", "smfishHmrf", " is not yet installed \n", - "To install: \n", - "remotes::install_bitbucket(repo = 'qzhudfci/smfishhmrf-r', ref='master')", - "see http://spatial.rc.fas.harvard.edu/install.html for more information", - call. = FALSE + "To install: \n", + "remotes::install_bitbucket(repo = 'qzhudfci/smfishhmrf-r', ref='master')", + "see http://spatial.rc.fas.harvard.edu/install.html for more information", + call. = FALSE ) } @@ -95,7 +98,8 @@ doHMRF <- function(gobject, output_folder <- paste0(getwd(), "/", "HMRF_output") if (!file.exists(output_folder)) { dir.create( - path = paste0(getwd(), "/", "HMRF_output"), recursive = TRUE) + path = paste0(getwd(), "/", "HMRF_output"), recursive = TRUE + ) } } # folder path specified @@ -113,7 +117,6 @@ doHMRF <- function(gobject, ## 1. expression values if (!is.null(dim_reduction_to_use)) { - expr_values <- getDimReduction( gobject = gobject, spat_unit = spat_unit, @@ -128,7 +131,8 @@ doHMRF <- function(gobject, } else { values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -154,8 +158,8 @@ doHMRF <- function(gobject, data.table::fwrite( data.table::as.data.table(expr_values, keep.rownames = "gene"), file = expression_file, quote = FALSE, col.names = TRUE, - row.names = FALSE, sep = " ") - + row.names = FALSE, sep = " " + ) } else if (file.exists(expression_file) & overwrite_output == FALSE) { message("expression_matrix.txt already exists at this location, will be used again") @@ -163,7 +167,8 @@ doHMRF <- function(gobject, data.table::fwrite( data.table::as.data.table(expr_values, keep.rownames = "gene"), file = expression_file, quote = FALSE, col.names = TRUE, - row.names = FALSE, sep = " ") + row.names = FALSE, sep = " " + ) } @@ -176,13 +181,15 @@ doHMRF <- function(gobject, dimred_rownames <- rownames(expr_values) spatial_genes_detected <- dimred_rownames[dimensions_to_use] spatial_genes_detected <- spatial_genes_detected[ - !is.na(spatial_genes_detected)] + !is.na(spatial_genes_detected) + ] } else { if (is.null(spatial_genes)) { stop("you need to provide a vector of spatial genes (~500)") } spatial_genes_detected <- spatial_genes[ - spatial_genes %in% rownames(expr_values)] + spatial_genes %in% rownames(expr_values) + ] } spatial_genes_file <- paste0(output_folder, "/", "spatial_genes.txt") @@ -248,11 +255,15 @@ doHMRF <- function(gobject, # select spatial dimensions that are available # spatial_dimensions <- spatial_dimensions[ - spatial_dimensions %in% colnames(spatial_location)] + spatial_dimensions %in% colnames(spatial_location) + ] spatial_location <- spatial_location[ - , c(spatial_dimensions, "cell_ID"), with = FALSE] + , c(spatial_dimensions, "cell_ID"), + with = FALSE + ] spatial_location_file <- paste0( - output_folder, "/", "spatial_cell_locations.txt") + output_folder, "/", "spatial_cell_locations.txt" + ) if (file.exists(spatial_location_file) & overwrite_output == TRUE) { message("spatial_cell_locations.txt already exists at this location, @@ -348,18 +359,23 @@ doHMRF <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- tempdir() -#' doHMRF(g, spatial_genes = c("Gna12", "Ccnd2"), output_folder = x, -#' betas = c(0, 2, 50)) +#' doHMRF(g, +#' spatial_genes = c("Gna12", "Ccnd2"), output_folder = x, +#' betas = c(0, 2, 50) +#' ) #' -#' loadHMRF(output_folder_used = x, betas_used = c(0, 2, 50), -#' python_path_used = NULL) +#' loadHMRF( +#' output_folder_used = x, betas_used = c(0, 2, 50), +#' python_path_used = NULL +#' ) #' #' @export -loadHMRF <- function(name_used = "test", - output_folder_used, - k_used = 10, - betas_used, - python_path_used) { +loadHMRF <- function( + name_used = "test", + output_folder_used, + k_used = 10, + betas_used, + python_path_used) { output_data <- paste0(output_folder_used, "/", "result.spatial.zscore") if (!file.exists(output_data)) { stop("doHMRF was not run in this output directory") @@ -395,12 +411,13 @@ loadHMRF <- function(name_used = "test", #' @returns spatial plots with HMRF domains #' @seealso \code{\link{spatPlot2D}} and \code{\link{spatPlot3D}} #' @export -viewHMRFresults <- function(gobject, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - third_dim = FALSE, - ...) { +viewHMRFresults <- function( + gobject, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + third_dim = FALSE, + ...) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("HMRFoutput needs to be output from doHMRFextend") } @@ -408,7 +425,9 @@ viewHMRFresults <- function(gobject, ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -423,8 +442,10 @@ viewHMRFresults <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_view_detected <- betas_to_view[betas_to_view %in% possible_betas] @@ -451,14 +472,16 @@ viewHMRFresults <- function(gobject, cell_color = output, show_plot = TRUE, title = title_name, - ...) + ... + ) if (third_dim == TRUE) { spatPlot3D( gobject = gobject, cell_color = output, show_plot = TRUE, - ...) + ... + ) } } } @@ -475,11 +498,12 @@ viewHMRFresults <- function(gobject, #' @param print_command see the python command #' @returns data.table with HMRF results for each b and the selected k #' @export -writeHMRFresults <- function(gobject, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - print_command = FALSE) { +writeHMRFresults <- function( + gobject, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + print_command = FALSE) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("HMRFoutput needs to be output from doHMRFextend") } @@ -487,7 +511,9 @@ writeHMRFresults <- function(gobject, ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -502,8 +528,10 @@ writeHMRFresults <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_view_detected <- betas_to_view[betas_to_view %in% possible_betas] @@ -533,7 +561,8 @@ writeHMRFresults <- function(gobject, result_DT <- data.table::as.data.table(do.call("cbind", result_list)) result_DT <- cbind(data.table::data.table( - "cell_ID" = gobject@cell_ID), result_DT) + "cell_ID" = gobject@cell_ID + ), result_DT) return(result_DT) } @@ -555,11 +584,12 @@ writeHMRFresults <- function(gobject, #' g <- GiottoData::loadGiottoMini("visium") #' spat_genes <- binSpect(g) #' -#' output_folder <- file.path(tempdir(), 'HMRF') -#' if(!file.exists(output_folder)) dir.create(output_folder, recursive = TRUE) +#' output_folder <- file.path(tempdir(), "HMRF") +#' if (!file.exists(output_folder)) dir.create(output_folder, recursive = TRUE) #' #' out <- doHMRF( -#' g, spatial_genes = spat_genes[seq_len(20)]$feats, +#' g, +#' spatial_genes = spat_genes[seq_len(20)]$feats, #' expression_values = "scaled", #' spatial_network_name = "Delaunay_network", #' k = 6, betas = c(0, 10, 5), @@ -575,16 +605,17 @@ writeHMRFresults <- function(gobject, #' ) #' #' spatPlot( -#' gobject = g, cell_color = 'HMRF_k6_b.20', +#' gobject = g, cell_color = "HMRF_k6_b.20", #' ) #' @export -addHMRF <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - HMRFoutput, - k = NULL, - betas_to_add = NULL, - hmrf_name = NULL) { +addHMRF <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_add = NULL, + hmrf_name = NULL) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("HMRFoutput needs to be output from doHMRFextend") } @@ -606,7 +637,9 @@ addHMRF <- function(gobject, ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -621,8 +654,10 @@ addHMRF <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_add_detected <- betas_to_add[betas_to_add %in% possible_betas] @@ -700,14 +735,14 @@ addHMRF <- function(gobject, #' @returns spatial plots with HMRF domains #' @seealso \code{\link{spatPlot2D}} #' @export -viewHMRFresults2D <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - ...) { - +viewHMRFresults2D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -722,7 +757,9 @@ viewHMRFresults2D <- function(gobject, ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -737,8 +774,10 @@ viewHMRFresults2D <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_view_detected <- betas_to_view[betas_to_view %in% possible_betas] @@ -794,7 +833,8 @@ viewHMRFresults2D <- function(gobject, show_plot = TRUE, save_plot = FALSE, title = title_name, - ...) + ... + ) } } @@ -812,13 +852,14 @@ viewHMRFresults2D <- function(gobject, #' @returns spatial plots with HMRF domains #' @seealso \code{\link{spatPlot3D}} #' @export -viewHMRFresults3D <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - ...) { +viewHMRFresults3D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + ...) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("HMRFoutput needs to be output from doHMRFextend") } @@ -833,7 +874,9 @@ viewHMRFresults3D <- function(gobject, ## reader.py and get_result.py paths # TODO: part of the package get_result_path <- system.file( - "python", "get_result2.py", package = "Giotto") + "python", "get_result2.py", + package = "Giotto" + ) # paths and name name <- HMRFoutput$name @@ -848,8 +891,10 @@ viewHMRFresults3D <- function(gobject, # betas betas <- HMRFoutput$betas - possible_betas <- seq(betas[1], to = betas[1] + (betas[2] * (betas[3] - 1)), - by = betas[2]) + possible_betas <- seq(betas[1], + to = betas[1] + (betas[2] * (betas[3] - 1)), + by = betas[2] + ) betas_to_view_detected <- betas_to_view[betas_to_view %in% possible_betas] @@ -905,7 +950,8 @@ viewHMRFresults3D <- function(gobject, show_plot = TRUE, save_plot = FALSE, title = title_name, - ...) + ... + ) } } @@ -930,10 +976,11 @@ viewHMRFresults3D <- function(gobject, #' Changing from equal size by setting sample_rate = 1 to with exact proportion #' of each cluster by setting sample_rate = +Inf #' @keywords internal -sampling_sp_genes <- function(clust, - sample_rate = 2, - target = 500, - seed = 10) { +sampling_sp_genes <- function( + clust, + sample_rate = 2, + target = 500, + seed = 10) { tot <- 0 num_cluster <- length(unique(clust)) gene_list <- list() @@ -968,7 +1015,8 @@ sampling_sp_genes <- function(clust, return(list( union_genes = union_genes, num_sample = num_sample, - num_gene = genes, gene_list = gene_list)) + num_gene = genes, gene_list = gene_list + )) } @@ -986,9 +1034,10 @@ sampling_sp_genes <- function(clust, #' This function calculates the number of data points in a sorted sequence #' below a line with given slope through a certain point on this sequence. #' @keywords internal -numPts_below_line <- function(myVector, - slope, - x) { +numPts_below_line <- function( + myVector, + slope, + x) { yPt <- myVector[x] b <- yPt - (slope * x) xPts <- seq_along(myVector) @@ -1017,13 +1066,13 @@ numPts_below_line <- function(myVector, #' #' filterSpatialGenes(g, spatial_genes = "Gm19935") #' @export -filterSpatialGenes <- function( - gobject, spat_unit = NULL, feat_type = NULL, spatial_genes, max = 2500, - name = c("binSpect", "silhouetteRank", "silhouetteRankTest"), - method = c("none", "elbow")) { +filterSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = NULL, spatial_genes, max = 2500, + name = c("binSpect", "silhouetteRank", "silhouetteRankTest"), + method = c("none", "elbow")) { name <- match.arg( name, - unique(c("binSpect", "silhouetteRank", "silhouetteRankTest", name))) + unique(c("binSpect", "silhouetteRank", "silhouetteRankTest", name)) + ) method <- match.arg(method, unique(c("none", "elbow", method))) @@ -1070,14 +1119,18 @@ filterSpatialGenes <- function( slope <- (max(y0s) - min(y0s)) / length(y0s) # This is the slope of the # line we want to slide. This is the diagonal. xPt <- floor(optimize( - numPts_below_line, lower = 1, upper = length(y0s), - myVector = y0s, slope = slope)$minimum) + numPts_below_line, + lower = 1, upper = length(y0s), + myVector = y0s, slope = slope + )$minimum) xPt <- length(y0s) - xPt y_cutoff <- y0[xPt] # The y-value at this x point. This is our y_cutoff. gx_sorted <- head(gx_sorted, n = xPt) message("Elbow method chosen to determine number of spatial genes.") - cat(paste0("Elbow point determined to be at x=", xPt, " genes", - " y=", y_cutoff)) + cat(paste0( + "Elbow point determined to be at x=", xPt, " genes", + " y=", y_cutoff + )) } # filter user's gene list (spatial_genes) @@ -1086,7 +1139,8 @@ filterSpatialGenes <- function( num_genes_removed <- length(spatial_genes) - nrow(gx_sorted) return(list( - genes = gx_sorted$feat_ID, num_genes_removed = num_genes_removed)) + genes = gx_sorted$feat_ID, num_genes_removed = num_genes_removed + )) } @@ -1106,8 +1160,7 @@ filterSpatialGenes <- function( #' Priorities for showing the spatial gene test names are ‘binSpect’ > #' ‘silhouetteRankTest’ > ‘silhouetteRank’. #' @keywords internal -chooseAvailableSpatialGenes <- function( - gobject, spat_unit = NULL, feat_type = NULL) { +chooseAvailableSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = NULL) { gx <- fDataDT(gobject, spat_unit = NULL, feat_type = NULL) eval1 <- "binSpect.pval" %in% names(gx) eval2 <- "silhouetteRankTest.pval" %in% names(gx) @@ -1141,11 +1194,12 @@ chooseAvailableSpatialGenes <- function( #' SilhouetteRank works only with score, and SilhouetteRankTest works only #' with pval. Use parameter use_score to specify. #' @keywords internal -checkAndFixSpatialGenes <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - use_spatial_genes, - use_score = FALSE) { +checkAndFixSpatialGenes <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + use_spatial_genes, + use_score = FALSE) { gx <- fDataDT(gobject, spat_unit = NULL, feat_type = NULL) if (use_spatial_genes == "silhouetteRank") { @@ -1179,12 +1233,14 @@ checkAndFixSpatialGenes <- function(gobject, if (eval1 == FALSE) { stop(paste0("use_spatial_genes is set to binSpect, but it has not been run yet. Run binSpect first."), - call. = FALSE) + call. = FALSE + ) } return(use_spatial_genes) } else { stop(paste0("use_spatial_genes is set to one that is not supported."), - call. = FALSE) + call. = FALSE + ) } } @@ -1277,39 +1333,40 @@ checkAndFixSpatialGenes <- function(gobject, #' initHMRF_V2(gobject = g, cl.method = "km") #' @export initHMRF_V2 <- - function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("scaled", "normalized", "custom"), - spatial_network_name = "Delaunay_network", - use_spatial_genes = c("binSpect", "silhouetteRank"), - use_score = FALSE, - gene_list_from_top = 2500, - filter_method = c("none", "elbow"), - user_gene_list = NULL, - use_pca = FALSE, - use_pca_dim = 1:20, - gene_samples = 500, - gene_sampling_rate = 2, - gene_sampling_seed = 10, - use_metagene = FALSE, - cluster_metagene = 50, - top_metagene = 20, - existing_spatial_enrichm_to_use = NULL, - use_neighborhood_composition = FALSE, - spatial_network_name_for_neighborhood = NULL, - metadata_to_use = NULL, - hmrf_seed = 100, - cl.method = c("km", "leiden", "louvain"), - resolution.cl = 1, - k = 10, - tolerance = 1e-05, - zscore = c("none", "rowcol", "colrow"), - nstart = 1000, - factor_step = 1.05, - python_path = NULL) { + function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("scaled", "normalized", "custom"), + spatial_network_name = "Delaunay_network", + use_spatial_genes = c("binSpect", "silhouetteRank"), + use_score = FALSE, + gene_list_from_top = 2500, + filter_method = c("none", "elbow"), + user_gene_list = NULL, + use_pca = FALSE, + use_pca_dim = 1:20, + gene_samples = 500, + gene_sampling_rate = 2, + gene_sampling_seed = 10, + use_metagene = FALSE, + cluster_metagene = 50, + top_metagene = 20, + existing_spatial_enrichm_to_use = NULL, + use_neighborhood_composition = FALSE, + spatial_network_name_for_neighborhood = NULL, + metadata_to_use = NULL, + hmrf_seed = 100, + cl.method = c("km", "leiden", "louvain"), + resolution.cl = 1, + k = 10, + tolerance = 1e-05, + zscore = c("none", "rowcol", "colrow"), + nstart = 1000, + factor_step = 1.05, + python_path = NULL) { wrap_msg( - "If used in published research, please cite: + "If used in published research, please cite: Q Zhu, S Shah, R Dries, L Cai, GC Yuan. 'Identification of spatially associated subpopulations by combining scRNAseq and sequential fluorescence in situ hybridization data' @@ -1351,7 +1408,8 @@ initHMRF_V2 <- spat_unit = spat_unit, name = spatial_network_name, output = "networkDT", - copy_obj = FALSE) + copy_obj = FALSE + ) spatial_network <- spatial_network[, .(to, from)] if (use_neighborhood_composition) { @@ -1372,8 +1430,10 @@ initHMRF_V2 <- ) } - cat(paste0("use spatial network composition of \'", - metadata_to_use, "\' for domain clustering")) + cat(paste0( + "use spatial network composition of \'", + metadata_to_use, "\' for domain clustering" + )) name.cl <- as.character(sort(unique(cx[[metadata_to_use]]))) @@ -1385,13 +1445,19 @@ initHMRF_V2 <- copy_obj = FALSE ) - from.all <- c(spatial_network_for_neighborhood$from, - spatial_network_for_neighborhood$to) - to.all <- c(spatial_network_for_neighborhood$to, - spatial_network_for_neighborhood$from) + from.all <- c( + spatial_network_for_neighborhood$from, + spatial_network_for_neighborhood$to + ) + to.all <- c( + spatial_network_for_neighborhood$to, + spatial_network_for_neighborhood$from + ) - ct.tab <- aggregate(cx[[metadata_to_use]][match( - to.all, cx[["cell_ID"]])], + ct.tab <- aggregate( + cx[[metadata_to_use]][match( + to.all, cx[["cell_ID"]] + )], by = list(cell_ID = from.all), function(y) { table(y)[name.cl] } @@ -1401,7 +1467,6 @@ initHMRF_V2 <- y0[is.na(y0)] <- 0 rownames(y0) <- ct.tab$cell_ID y0 <- y0 / rowSums(y0) - } else if (!is.null(existing_spatial_enrichm_to_use)) { y0 <- getSpatialEnrichment( gobject, @@ -1413,8 +1478,10 @@ initHMRF_V2 <- y0 <- as.data.frame(y0[, -"cell_ID"]) rownames(y0) <- cell_ID_enrich - cat(paste0("Spatial enrichment result: \'", - existing_spatial_enrichm_to_use, "\' is used.")) + cat(paste0( + "Spatial enrichment result: \'", + existing_spatial_enrichm_to_use, "\' is used." + )) if (sum(!rownames(y0) %in% cx$cell_ID) > 0) { stop("Rownames of selected spatial enrichment result do not @@ -1494,8 +1561,10 @@ initHMRF_V2 <- " from user's input gene list due to being absent or non-spatial genes." )) - cat(paste0("Kept ", length(filtered$genes), - " spatial genes for next step")) + cat(paste0( + "Kept ", length(filtered$genes), + " spatial genes for next step" + )) } spatial_genes <- filtered$genes @@ -1527,8 +1596,10 @@ initHMRF_V2 <- name = use_spatial_genes, method = filter_method ) - cat(paste0("Kept ", length(filtered$genes), - " top spatial genes for next step")) + cat(paste0( + "Kept ", length(filtered$genes), + " top spatial genes for next step" + )) spatial_genes <- filtered$genes } @@ -1536,7 +1607,8 @@ initHMRF_V2 <- expr_values <- expr_values[spatial_genes, ] pc.expr <- prcomp(expr_values)[[2]] use_pca_dim <- use_pca_dim[ - use_pca_dim %in% seq_len(ncol(pc.expr))] + use_pca_dim %in% seq_len(ncol(pc.expr)) + ] y0 <- (pc.expr[, use_pca_dim]) } else { message("Computing spatial coexpression modules...") @@ -1582,11 +1654,13 @@ initHMRF_V2 <- expr_values <- expr_values[spatial_genes_selected, ] } else { k.sp <- min( - ceiling(length(spatial_genes) / 20), cluster_metagene) + ceiling(length(spatial_genes) / 20), cluster_metagene + ) if (k.sp < cluster_metagene) { cat(paste0( - "construct ", k.sp, - " coexpression modules due to limited gene size...")) + "construct ", k.sp, + " coexpression modules due to limited gene size..." + )) } spat_cor_netw_DT <- clusterSpatialCorFeats(spat_cor_netw_DT, name = "spat_netw_clus", k = k.sp @@ -1601,7 +1675,9 @@ initHMRF_V2 <- metagenes from ", k.sp, " coexpression modules...")) top_per_module <- cluster_genes_DT[ - , head(.SD, top_metagene), by = clus] + , head(.SD, top_metagene), + by = clus + ] cluster_genes <- top_per_module$clus names(cluster_genes) <- top_per_module$feat_ID @@ -1616,9 +1692,11 @@ initHMRF_V2 <- expr_values <- t(meta.genes@enrichDT[, seq_len(k.sp)]) colnames(expr_values) <- unlist( - meta.genes@enrichDT[, "cell_ID"]) + meta.genes@enrichDT[, "cell_ID"] + ) rownames(expr_values) <- paste0( - "metagene_", rownames(expr_values)) + "metagene_", rownames(expr_values) + ) } y0 <- t(as.matrix(expr_values)) @@ -1674,9 +1752,11 @@ initHMRF_V2 <- } message("Parsing neighborhood graph...") pp <- tidygraph::tbl_graph( - edges = as.data.frame(edgelist), directed = FALSE) + edges = as.data.frame(edgelist), directed = FALSE + ) yy <- pp %>% dplyr::mutate( - color = as.factor(graphcoloring::color_dsatur())) + color = as.factor(graphcoloring::color_dsatur()) + ) colors <- as.list(yy)$nodes$color cl_color <- sort(unique(colors)) blocks <- lapply(cl_color, function(cl) { @@ -1725,7 +1805,8 @@ initHMRF_V2 <- resolution = resolution.cl ) cl.match <- leiden.cl$leiden_clus[ - match(rownames(y), leiden.cl$cell_ID)] + match(rownames(y), leiden.cl$cell_ID) + ] mu <- aggregate(y, by = list(cl.match), FUN = mean) } else if (cl.method == "louvain") { message("Louvain clustering initialization...") @@ -1739,7 +1820,8 @@ initHMRF_V2 <- resolution = resolution.cl ) cl.match <- louvain.cl$louvain_clus[ - match(rownames(y), louvain.cl$cell_ID)] + match(rownames(y), louvain.cl$cell_ID) + ] mu <- aggregate(y, by = list(cl.match), FUN = mean) } @@ -1811,7 +1893,7 @@ initHMRF_V2 <- #' @export doHMRF_V2 <- function(HMRF_init_obj, betas = NULL) { message( - "If used in published research, please cite: + "If used in published research, please cite: Q Zhu, S Shah, R Dries, L Cai, GC Yuan. 'Identification of spatially associated subpopulations by combining scRNAseq and sequential fluorescence in situ hybridization data' @@ -1901,7 +1983,7 @@ doHMRF_V2 <- function(HMRF_init_obj, betas = NULL) { tc.hmrfem$mu <- NULL rownames(tc.hmrfem$prob) <- rownames(y) rownames(tc.hmrfem$unnormprob) <- rownames(y) - #names(tc.hmrfem$class) <- rownames(y) + # names(tc.hmrfem$class) <- rownames(y) res[[t_key]] <- tc.hmrfem } result.hmrf <- res @@ -1962,9 +2044,9 @@ addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { column_cell_ID = "cell_ID", # new_metadata = HMRFoutput[[i]]$class[match( # ordered_cell_IDs, names(HMRFoutput[[i]]$class))], - new_metadata = HMRFoutput[[i]]$prob[ordered_cell_IDs,], + new_metadata = HMRFoutput[[i]]$prob[ordered_cell_IDs, ], vector_name = paste(name, names(HMRFoutput)[i]) - #by_column = TRUE + # by_column = TRUE ) } return(gobject) @@ -2000,34 +2082,39 @@ addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { #' (for example name of ‘hmrf1 k=8 b=0.00’ is ‘hmrf1’) #' @export viewHMRFresults_V2 <- - function(gobject, k, betas, - hmrf_name, - spat_unit = NULL, - feat_type = NULL, - third_dim = FALSE, - cow_n_col = 2, - cow_rel_h = 1, - cow_rel_w = 1, - cow_align = "h", - show_plot = TRUE, - save_plot = TRUE, - return_plot = TRUE, - default_save_name = "HMRF_result", - save_param = list(), - ...) { + function( + gobject, k, betas, + hmrf_name, + spat_unit = NULL, + feat_type = NULL, + third_dim = FALSE, + cow_n_col = 2, + cow_rel_h = 1, + cow_rel_w = 1, + cow_align = "h", + show_plot = TRUE, + save_plot = TRUE, + return_plot = TRUE, + default_save_name = "HMRF_result", + save_param = list(), + ...) { # beta_seq = round(betas,digits = 2) # t_key = paste0(hmrf_name,'_k', k, '_b.',beta_seq) t_key <- paste(hmrf_name, sprintf("k=%d b=%.2f", k, betas)) meta_names <- colnames(combineMetadata( - gobject = gobject, spat_unit = spat_unit, feat_type = feat_type)) + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + )) if (length(setdiff(t_key, meta_names)) > 0) { beta_null <- paste(betas[which(!t_key %in% meta_names)], - collapse = ",") - stop(paste0('\n HMRF result "', hmrf_name, '" of k = ', k, - ", beta = ", beta_null, - " was not found in the Giotto object.")) + collapse = "," + ) + stop(paste0( + '\n HMRF result "', hmrf_name, '" of k = ', k, + ", beta = ", beta_null, + " was not found in the Giotto object." + )) } savelist <- list() diff --git a/R/python_scrublet.R b/R/python_scrublet.R index 03d608fb5..0f422ceae 100644 --- a/R/python_scrublet.R +++ b/R/python_scrublet.R @@ -36,17 +36,18 @@ #' pDataDT(g) # doublet_scores and doublet cols are added #' dimPlot2D(g, cell_color = "doublet_scores", color_as_factor = FALSE) #' @export -doScrubletDetect <- function(gobject, - feat_type = NULL, - spat_unit = "cell", - expression_values = "raw", - expected_doublet_rate = 0.06, - min_counts = 1, - min_cells = 1, - min_gene_variability_pctl = 85, - n_prin_comps = 30, - return_gobject = TRUE, - seed = 1234) { +doScrubletDetect <- function( + gobject, + feat_type = NULL, + spat_unit = "cell", + expression_values = "raw", + expected_doublet_rate = 0.06, + min_counts = 1, + min_cells = 1, + min_gene_variability_pctl = 85, + n_prin_comps = 30, + return_gobject = TRUE, + seed = 1234) { # verify if optional package is installed package_check( pkg_name = "scrublet", @@ -65,7 +66,9 @@ doScrubletDetect <- function(gobject, python_path <- readGiottoInstructions(gobject, param = "python_path") reticulate::use_python(required = TRUE, python = python_path) python_scrublet_function <- system.file( - "python", "python_scrublet.py", package = "Giotto") + "python", "python_scrublet.py", + package = "Giotto" + ) reticulate::source_python(file = python_scrublet_function, convert = TRUE) # set seed diff --git a/R/spatial_clusters.R b/R/spatial_clusters.R index b9fef3f44..ca7fbd6cb 100644 --- a/R/spatial_clusters.R +++ b/R/spatial_clusters.R @@ -77,12 +77,13 @@ #' # don't show legend since there are too many categories generated #' spatPlot2D(g, cell_color = "new", show_legend = FALSE) #' @export -spatialSplitCluster <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "Delaunay_network", - cluster_col, - split_clus_name = paste0(cluster_col, "_split")) { +spatialSplitCluster <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + cluster_col, + split_clus_name = paste0(cluster_col, "_split")) { # NSE vars cell_ID <- NULL @@ -113,14 +114,15 @@ spatialSplitCluster <- function(gobject, verbose = FALSE, ) - clus_info <- cell_meta[, c("cell_ID", cluster_col), with = FALSE] + clus_info <- cell_meta[, c("cell_ID", cluster_col), with = FALSE] # subset to needed cols - g <- GiottoClass::spat_net_to_igraph(sn) + g <- GiottoClass::spat_net_to_igraph(sn) # convert spatialNetworkObject to igraph # assign cluster info to igraph nodes clus_values <- clus_info[ - match(igraph::V(g)$name, cell_ID), get(cluster_col)] + match(igraph::V(g)$name, cell_ID), get(cluster_col) + ] igraph::V(g)$cluster <- clus_values # split cluster by spatial igraph diff --git a/R/spatial_enrichment.R b/R/spatial_enrichment.R index e9e0d7848..ffcb3b801 100644 --- a/R/spatial_enrichment.R +++ b/R/spatial_enrichment.R @@ -14,18 +14,29 @@ #' @returns matrix #' @seealso \code{\link{PAGEEnrich}} #' @examples -#' sign_list <- list(cell_type1 = c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", -#' "Rims2", "Gfap", "Gjc3", "Chrna4", "Prkcd"), -#' cell_type2 = c("Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", "Tmbim1", -#' "Carhsp1", "Tmem88b", "Ugt8a"), -#' cell_type2 = c("Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", -#' "Cygb", "Ttc9b","Ipcef1")) +#' sign_list <- list( +#' cell_type1 = c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", +#' "Rims2", "Gfap", "Gjc3", "Chrna4", "Prkcd" +#' ), +#' cell_type2 = c( +#' "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", "Tmbim1", +#' "Carhsp1", "Tmem88b", "Ugt8a" +#' ), +#' cell_type2 = c( +#' "Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", +#' "Cygb", "Ttc9b", "Ipcef1" +#' ) +#' ) #' -#' makeSignMatrixPAGE(sign_names = c("cell_type1", "cell_type2", "cell_type3"), -#' sign_list = sign_list) +#' makeSignMatrixPAGE( +#' sign_names = c("cell_type1", "cell_type2", "cell_type3"), +#' sign_list = sign_list +#' ) #' @export -makeSignMatrixPAGE <- function(sign_names, - sign_list) { +makeSignMatrixPAGE <- function( + sign_names, + sign_list) { ## check input if (!inherits(sign_list, "list")) { stop("sign_list needs to be a list of signatures for each cell type / @@ -45,11 +56,14 @@ makeSignMatrixPAGE <- function(sign_names, res <- rep(x = name_subset, length(subset)) }) mydt <- data.table::data.table( - genes = genes, types = unlist(types), value = 1) + genes = genes, types = unlist(types), value = 1 + ) # convert data.table to signature matrix dtmatrix <- data.table::dcast.data.table( - mydt, formula = genes ~ types, value.var = "value", fill = 0) + mydt, + formula = genes ~ types, value.var = "value", fill = 0 + ) final_sig_matrix <- Matrix::as.matrix(dtmatrix[, -1]) rownames(final_sig_matrix) <- dtmatrix$genes @@ -70,21 +84,26 @@ makeSignMatrixPAGE <- function(sign_names, #' @returns matrix #' @seealso \code{\link{runDWLSDeconv}} #' @examples -#' sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +#' sign_gene <- c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", +#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", +#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", +#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +#' ) #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*3), nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 3), nrow = length(sign_gene)) #' rownames(sign_matrix) <- sign_gene #' colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") #' -#' makeSignMatrixDWLSfromMatrix(matrix = sign_matrix, sign_gene = sign_gene, -#' cell_type_vector = c("cell_type1", "cell_type2", "cell_type3")) +#' makeSignMatrixDWLSfromMatrix( +#' matrix = sign_matrix, sign_gene = sign_gene, +#' cell_type_vector = c("cell_type1", "cell_type2", "cell_type3") +#' ) #' @export -makeSignMatrixDWLSfromMatrix <- function(matrix, - sign_gene, - cell_type_vector) { +makeSignMatrixDWLSfromMatrix <- function( + matrix, + sign_gene, + cell_type_vector) { # 1. check if cell_type_vector and matrix are compatible if (ncol(matrix) != length(cell_type_vector)) { stop("ncol(matrix) needs to be the same as length(cell_type_vector)") @@ -114,7 +133,8 @@ makeSignMatrixDWLSfromMatrix <- function(matrix, cell_type <- unique(cell_type_vector)[cell_type_i] selected_cells <- colnames(matrix_subset)[cell_type_vector == cell_type] mean_expr_in_selected_cells <- rowMeans_flex(matrix_subset[ - , selected_cells]) + , selected_cells + ]) signMatrix[, cell_type_i] <- mean_expr_in_selected_cells } @@ -145,23 +165,28 @@ makeSignMatrixDWLSfromMatrix <- function(matrix, #' @seealso \code{\link{runDWLSDeconv}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +#' sign_gene <- c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", +#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", +#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", +#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +#' ) #' -#' makeSignMatrixDWLS(gobject = g, sign_gene = sign_gene, -#' cell_type_vector = pDataDT(g)[["leiden_clus"]]) +#' makeSignMatrixDWLS( +#' gobject = g, sign_gene = sign_gene, +#' cell_type_vector = pDataDT(g)[["leiden_clus"]] +#' ) #' @export -makeSignMatrixDWLS <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reverse_log = TRUE, - log_base = 2, - sign_gene, - cell_type_vector, - cell_type = NULL) { +makeSignMatrixDWLS <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reverse_log = TRUE, + log_base = 2, + sign_gene, + cell_type_vector, + cell_type = NULL) { ## deprecated arguments if (!is.null(cell_type)) { warning("cell_type is deprecated, use cell_type_vector in the future") @@ -182,8 +207,9 @@ makeSignMatrixDWLS <- function(gobject, ## 1. expression matrix values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- get_expression_values( gobject = gobject, spat_unit = spat_unit, @@ -223,22 +249,27 @@ makeSignMatrixDWLS <- function(gobject, #' @returns matrix #' @seealso \code{\link{rankEnrich}} #' @examples -#' sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +#' sign_gene <- c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", +#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", +#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", +#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +#' ) #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*3), nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 3), nrow = length(sign_gene)) #' rownames(sign_matrix) <- sign_gene #' colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") #' -#' makeSignMatrixRank(sc_matrix = sign_matrix, -#' sc_cluster_ids = c("cell_type1", "cell_type2", "cell_type3")) +#' makeSignMatrixRank( +#' sc_matrix = sign_matrix, +#' sc_cluster_ids = c("cell_type1", "cell_type2", "cell_type3") +#' ) #' @export -makeSignMatrixRank <- function(sc_matrix, - sc_cluster_ids, - ties_method = c("random", "max"), - gobject = NULL) { +makeSignMatrixRank <- function( + sc_matrix, + sc_cluster_ids, + ties_method = c("random", "max"), + gobject = NULL) { if (inherits(sc_matrix, "exprObj")) { sc_matrix <- sc_matrix[] } @@ -293,14 +324,18 @@ makeSignMatrixRank <- function(sc_matrix, # calculate fold change and rank of fold-change comb_dt[, fold := log2(mean_expr + 1) - log2(av_expr + 1)] comb_dt[, rankFold := data.table::frank( - -fold, ties.method = ties_method), by = clusters] + -fold, + ties.method = ties_method + ), by = clusters] # create matrix comb_rank_mat <- data.table::dcast.data.table( - data = comb_dt, genes ~ clusters, value.var = "rankFold") + data = comb_dt, genes ~ clusters, value.var = "rankFold" + ) comb_rank_matrix <- dt_to_matrix(comb_rank_mat) comb_rank_matrix <- comb_rank_matrix[ - rownames(sc_matrix), unique(sc_cluster_ids)] + rownames(sc_matrix), unique(sc_cluster_ids) + ] return(comb_rank_matrix) } @@ -315,19 +350,22 @@ makeSignMatrixRank <- function(sc_matrix, #' @description creates permutation for the PAGEEnrich test #' @returns PAGEEnrich test #' @keywords internal -.do_page_permutation <- function(gobject, - sig_gene, - ntimes) { +.do_page_permutation <- function( + gobject, + sig_gene, + ntimes) { # check available gene available_ct <- c() for (i in colnames(sig_gene)) { gene_i <- rownames(sig_gene)[which(sig_gene[, i] == 1)] overlap_i <- intersect( - gene_i, rownames(gobject@expression$rna$normalized)) + gene_i, rownames(gobject@expression$rna$normalized) + ) if (length(overlap_i) <= 5) { output <- paste0( "Warning, ", i, " only has ", length(overlap_i), - " overlapped genes. Will remove it.") + " overlapped genes. Will remove it." + ) } else { available_ct <- c(available_ct, i) } @@ -337,7 +375,8 @@ makeSignMatrixRank <- function(sc_matrix, } # only continue with genes present in both datasets interGene <- intersect( - rownames(sig_gene), rownames(gobject@expression$rna$normalized)) + rownames(sig_gene), rownames(gobject@expression$rna$normalized) + ) sign_matrix <- sig_gene[interGene, available_ct] ct_gene_counts <- NULL @@ -347,7 +386,8 @@ makeSignMatrixRank <- function(sc_matrix, } uniq_ct_gene_counts <- unique(ct_gene_counts) background_mean_sd <- matrix( - data = NA, nrow = length(uniq_ct_gene_counts) + 1, ncol = 3) + data = NA, nrow = length(uniq_ct_gene_counts) + 1, ncol = 3 + ) for (i in seq_along(uniq_ct_gene_counts)) { gene_num <- uniq_ct_gene_counts[i] all_sample_names <- NULL @@ -355,16 +395,18 @@ makeSignMatrixRank <- function(sc_matrix, for (j in seq_len(ntimes)) { set.seed(j) random_gene <- sample(rownames( - gobject@expression$rna$normalized), gene_num, replace = FALSE) + gobject@expression$rna$normalized + ), gene_num, replace = FALSE) ct_name <- paste("ct", j, sep = "") all_sample_names <- c(all_sample_names, ct_name) all_sample_list <- c(all_sample_list, list(random_gene)) } random_sig <- makeSignMatrixPAGE(all_sample_names, all_sample_list) random_DT <- runPAGEEnrich( - gobject, - sign_matrix = random_sig, - p_value = FALSE) + gobject, + sign_matrix = random_sig, + p_value = FALSE + ) background <- unlist(random_DT[, 2:dim(random_DT)[2]]) df_row_name <- paste("gene_num_", uniq_ct_gene_counts[i], sep = "") list_back_i <- c(df_row_name, mean(background), stats::sd(background)) @@ -407,16 +449,17 @@ makeSignMatrixRank <- function(sc_matrix, #' gene set. #' @seealso \code{\link{makeSignMatrixPAGE}} #' @export -runPAGEEnrich_OLD <- function(gobject, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - n_times = 1000, - name = NULL, - return_gobject = TRUE) { +runPAGEEnrich_OLD <- function( + gobject, + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + reverse_log_scale = TRUE, + logbase = 2, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + n_times = 1000, + name = NULL, + return_gobject = TRUE) { # expression values to be used values <- match.arg(expression_values, c("normalized", "scaled", "custom")) expr_values <- get_expression_values(gobject = gobject, values = values) @@ -432,7 +475,8 @@ runPAGEEnrich_OLD <- function(gobject, if (length(overlap_i) <= 5) { output <- paste0( "Warning, ", i, " only has ", length(overlap_i), - " overlapped genes. Will remove it.") + " overlapped genes. Will remove it." + ) } else { available_ct <- c(available_ct, i) } @@ -444,7 +488,9 @@ runPAGEEnrich_OLD <- function(gobject, # output enrichment output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) # only continue with genes present in both datasets interGene <- intersect(rownames(sign_matrix), rownames(expr_values)) @@ -465,7 +511,8 @@ runPAGEEnrich_OLD <- function(gobject, # get enrichment scores enrichment <- matrix( - data = NA, nrow = dim(filterSig)[2], ncol = length(cellColMean)) + data = NA, nrow = dim(filterSig)[2], ncol = length(cellColMean) + ) for (i in seq_len(dim(filterSig)[2])) { signames <- rownames(filterSig)[which(filterSig[, i] == 1)] sigColMean <- apply(geneFold[signames, ], 2, mean) @@ -501,12 +548,14 @@ runPAGEEnrich_OLD <- function(gobject, for (i in colnames(sign_matrix)) { gene_i <- rownames(sign_matrix)[which(sign_matrix[, i] == 1)] overlap_i <- intersect( - gene_i, rownames(gobject@expression$rna$normalized)) + gene_i, rownames(gobject@expression$rna$normalized) + ) if (length(overlap_i) <= 5) { output <- paste0( "Warning, ", i, " only has ", length(overlap_i), - " overlapped genes. It will be removed.") + " overlapped genes. It will be removed." + ) } else { available_ct <- c(available_ct, i) } @@ -518,7 +567,8 @@ runPAGEEnrich_OLD <- function(gobject, # only continue with genes present in both datasets interGene <- intersect( - rownames(sign_matrix), rownames(gobject@expression$rna$normalized)) + rownames(sign_matrix), rownames(gobject@expression$rna$normalized) + ) filter_sign_matrix <- sign_matrix[interGene, available_ct] background_mean_sd <- .do_page_permutation( @@ -531,13 +581,17 @@ runPAGEEnrich_OLD <- function(gobject, length_gene <- length(which(filter_sign_matrix[, i] == 1)) join_gene_with_length <- paste("gene_num_", length_gene, sep = "") mean_i <- as.numeric(as.character( - background_mean_sd[join_gene_with_length, ][[1]])) + background_mean_sd[join_gene_with_length, ][[1]] + )) sd_i <- as.numeric(as.character( - background_mean_sd[join_gene_with_length, ][[2]])) + background_mean_sd[join_gene_with_length, ][[2]] + )) j <- i + 1 enrichmentDT[[j]] <- stats::pnorm( - enrichmentDT[[j]], mean = mean_i, sd = sd_i, - lower.tail = FALSE, log.p = FALSE) + enrichmentDT[[j]], + mean = mean_i, sd = sd_i, + lower.tail = FALSE, log.p = FALSE + ) } } @@ -584,17 +638,18 @@ runPAGEEnrich_OLD <- function(gobject, #' @param expr_values matrix of expression values #' @returns data.table #' @keywords internal -.page_dt_method <- function(sign_matrix, - expr_values, - min_overlap_genes = 5, - logbase = 2, - reverse_log_scale = TRUE, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - include_depletion = FALSE, - n_times = 1000, - max_block = 20e6, - verbose = TRUE) { +.page_dt_method <- function( + sign_matrix, + expr_values, + min_overlap_genes = 5, + logbase = 2, + reverse_log_scale = TRUE, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + include_depletion = FALSE, + n_times = 1000, + max_block = 20e6, + verbose = TRUE) { # data.table variables Var1 <- value <- Var2 <- V1 <- marker <- nr_markers <- fc <- cell_ID <- zscore <- colmean <- colSd <- pval <- NULL @@ -602,7 +657,9 @@ runPAGEEnrich_OLD <- function(gobject, # output enrichment output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) ## identify available cell types all_genes <- rownames(expr_values) @@ -615,9 +672,10 @@ runPAGEEnrich_OLD <- function(gobject, if (nrow(lost_cell_types_DT) > 0) { for (row in seq_len(nrow(lost_cell_types_DT))) { output <- paste0( - "Warning, ", lost_cell_types_DT[row][["Var2"]], " only has ", - lost_cell_types_DT[row][["V1"]], - " overlapping genes. Will be removed.") + "Warning, ", lost_cell_types_DT[row][["Var2"]], " only has ", + lost_cell_types_DT[row][["V1"]], + " overlapping genes. Will be removed." + ) if (verbose) print(output) } } @@ -659,13 +717,17 @@ runPAGEEnrich_OLD <- function(gobject, colnames(geneFold_DT) <- c("gene", "cell_ID", "fc") mergetest <- data.table::merge.data.table( - sub_ct_DT, geneFold_DT, by = "gene") + sub_ct_DT, geneFold_DT, + by = "gene" + ) mergetest <- mergetest[, mean(fc), by = .(cell_type, cell_ID, nr_markers)] if (is.integer(mergetest$cell_ID) && is.character(cellColMeanSd$cell_ID)) { mergetest$cell_ID <- as.character(mergetest$cell_ID) } mergetest <- data.table::merge.data.table( - mergetest, cellColMeanSd, by = "cell_ID") + mergetest, cellColMeanSd, + by = "cell_ID" + ) mergetest[, zscore := ((V1 - colmean) * nr_markers^(1 / 2)) / colSd] if (output_enrichment == "zscore") { @@ -730,7 +792,9 @@ runPAGEEnrich_OLD <- function(gobject, names(all_perms_num) <- all_perms group_labels <- paste0("group_", seq_len(nr_groups)) groups_vec <- cut( - all_perms_num, breaks = nr_groups, labels = group_labels) + all_perms_num, + breaks = nr_groups, labels = group_labels + ) names(all_perms) <- groups_vec @@ -742,16 +806,24 @@ runPAGEEnrich_OLD <- function(gobject, cell_type_perm_DT_sub <- cell_type_perm_DT[round %in% sub_perms] mergetest_perm_sub <- data.table::merge.data.table( - cell_type_perm_DT_sub, geneFold_DT, allow.cartesian = TRUE) + cell_type_perm_DT_sub, geneFold_DT, + allow.cartesian = TRUE + ) mergetest_perm_sub <- mergetest_perm_sub[ - , mean(fc), by = .(cell_type, cell_ID, nr_markers, round)] + , mean(fc), + by = .(cell_type, cell_ID, nr_markers, round) + ] if (is.integer(mergetest_perm_sub$cell_ID) && is.character( - cellColMeanSd$cell_ID)) { + cellColMeanSd$cell_ID + )) { mergetest_perm_sub$cell_ID <- as.character( - mergetest_perm_sub$cell_ID) + mergetest_perm_sub$cell_ID + ) } mergetest_perm_sub <- data.table::merge.data.table( - mergetest_perm_sub, cellColMeanSd, by = "cell_ID") + mergetest_perm_sub, cellColMeanSd, + by = "cell_ID" + ) mergetest_perm_sub[, zscore := (( V1 - colmean) * nr_markers^(1 / 2)) / colSd] @@ -761,19 +833,26 @@ runPAGEEnrich_OLD <- function(gobject, res_list_comb <- do.call("rbind", res_list) res_list_comb_average <- res_list_comb[ , .(mean_zscore = mean(zscore), sd_zscore = stats::sd(zscore)), - by = c("cell_ID", "cell_type")] + by = c("cell_ID", "cell_type") + ] mergetest_final <- data.table::merge.data.table( - mergetest, res_list_comb_average, by = c("cell_ID", "cell_type")) + mergetest, res_list_comb_average, + by = c("cell_ID", "cell_type") + ) ## calculate p.values based on normal distribution if (include_depletion == TRUE) { mergetest_final[, pval := stats::pnorm( - abs(zscore), mean = mean_zscore, sd = sd_zscore, - lower.tail = FALSE, log.p = FALSE)] + abs(zscore), + mean = mean_zscore, sd = sd_zscore, + lower.tail = FALSE, log.p = FALSE + )] } else { mergetest_final[, pval := stats::pnorm( - zscore, mean = mean_zscore, sd = sd_zscore, - lower.tail = FALSE, log.p = FALSE)] + zscore, + mean = mean_zscore, sd = sd_zscore, + lower.tail = FALSE, log.p = FALSE + )] } data.table::setorder(mergetest_final, pval) @@ -787,12 +866,16 @@ runPAGEEnrich_OLD <- function(gobject, resultmatrix <- data.table::dcast( - mergetest_final, formula = cell_ID ~ cell_type, - value.var = "pval_score") + mergetest_final, + formula = cell_ID ~ cell_type, + value.var = "pval_score" + ) return(list(DT = mergetest_final, matrix = resultmatrix)) } else { resultmatrix <- data.table::dcast( - mergetest, formula = cell_ID ~ cell_type, value.var = "zscore") + mergetest, + formula = cell_ID ~ cell_type, value.var = "zscore" + ) return(list(DT = mergetest, matrix = resultmatrix)) } } @@ -836,34 +919,38 @@ runPAGEEnrich_OLD <- function(gobject, #' @seealso \code{\link{makeSignMatrixPAGE}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*3, mean = 10), -#' nrow = length(sign_gene)) +#' sign_gene <- c( +#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", +#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", +#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", +#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +#' ) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 3, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene #' colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") -#' +#' #' runPAGEEnrich(gobject = g, sign_matrix = sign_matrix) #' @export -runPAGEEnrich <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - min_overlap_genes = 5, - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - include_depletion = FALSE, - n_times = 1000, - max_block = 20e6, - name = NULL, - verbose = TRUE, - return_gobject = TRUE) { +runPAGEEnrich <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + min_overlap_genes = 5, + reverse_log_scale = TRUE, + logbase = 2, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + include_depletion = FALSE, + n_times = 1000, + max_block = 20e6, + name = NULL, + verbose = TRUE, + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -878,7 +965,8 @@ runPAGEEnrich <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom"), expression_values)) + unique(c("normalized", "scaled", "custom"), expression_values) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1002,7 +1090,9 @@ PAGEEnrich <- function(...) { for (i in seq_len(n)) { set.seed(i) random_rank <- sample( - seq_along(sc_gene), length(sc_gene), replace = FALSE) + seq_along(sc_gene), length(sc_gene), + replace = FALSE + ) random_df[, i] <- random_rank } rownames(random_df) <- sc_gene @@ -1044,33 +1134,38 @@ PAGEEnrich <- function(...) { #' @seealso \code{\link{makeSignMatrixRank}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) -#' -#' runRankEnrich(gobject = g, sign_matrix = sign_matrix, -#' expression_values = "normalized") +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) +#' +#' runRankEnrich( +#' gobject = g, sign_matrix = sign_matrix, +#' expression_values = "normalized" +#' ) #' @export -runRankEnrich <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "raw", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - ties_method = c("average", "max"), - p_value = FALSE, - n_times = 1000, - rbp_p = 0.99, - num_agg = 100, - name = NULL, - return_gobject = TRUE) { +runRankEnrich <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "raw", "scaled", "custom"), + reverse_log_scale = TRUE, + logbase = 2, + output_enrichment = c("original", "zscore"), + ties_method = c("average", "max"), + p_value = FALSE, + n_times = 1000, + rbp_p = 0.99, + num_agg = 100, + name = NULL, + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1088,7 +1183,8 @@ runRankEnrich <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1113,7 +1209,9 @@ runRankEnrich <- function(gobject, # output enrichment output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) enrichment <- matrix( data = NA, @@ -1124,7 +1222,9 @@ runRankEnrich <- function(gobject, # calculate mean gene expression if (reverse_log_scale == TRUE) { mean_gene_expr <- log(Matrix::rowMeans( - logbase^expr_values[] - 1, dims = 1) + 1) + logbase^expr_values[] - 1, + dims = 1 + ) + 1) } else { mean_gene_expr <- Matrix::rowMeans(expr_values[]) } @@ -1199,14 +1299,19 @@ runRankEnrich <- function(gobject, background <- unlist(random_DT[, 2:dim(random_DT)[2]]) fit.gamma <- fitdistrplus::fitdist( - background, distr = "gamma", method = "mle") + background, + distr = "gamma", method = "mle" + ) pvalue_DT <- enrichmentDT enrichmentDT[, 2:dim(enrichmentDT)[2]] <- lapply( enrichmentDT[, 2:dim(enrichmentDT)[2]], function(x) { - stats::pgamma( - x, fit.gamma$estimate[1], rate = fit.gamma$estimate[2], - lower.tail = FALSE, log.p = FALSE) - }) + stats::pgamma( + x, fit.gamma$estimate[1], + rate = fit.gamma$estimate[2], + lower.tail = FALSE, log.p = FALSE + ) + } + ) } # create spatial enrichment object @@ -1230,7 +1335,8 @@ runRankEnrich <- function(gobject, ## return object or results ## if (return_gobject == TRUE) { spenr_names <- list_spatial_enrichments_names( - gobject = gobject, spat_unit = spat_unit, feat_type = feat_type) + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) if (name %in% spenr_names) { @@ -1308,28 +1414,31 @@ rankEnrich <- function(...) { #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) #' #' runHyperGeometricEnrich(gobject = g, sign_matrix = sign_matrix) #' @export -runHyperGeometricEnrich <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - top_percentage = 5, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - name = NULL, - return_gobject = TRUE) { +runHyperGeometricEnrich <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + reverse_log_scale = TRUE, + logbase = 2, + top_percentage = 5, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + name = NULL, + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1343,7 +1452,8 @@ runHyperGeometricEnrich <- function(gobject, values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1358,7 +1468,9 @@ runHyperGeometricEnrich <- function(gobject, # output enrichment output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) # calculate mean gene expression if (reverse_log_scale == TRUE) { @@ -1375,7 +1487,9 @@ runHyperGeometricEnrich <- function(gobject, top_q <- 1 - top_percentage / 100 quantilecut <- apply( - foldChange, 2, stats::quantile, probs = top_q, na.rm = TRUE) + foldChange, 2, stats::quantile, + probs = top_q, na.rm = TRUE + ) expbinary <- t_flex(1 * t_flex(foldChange > quantilecut)) markerGenes <- rownames(inter_sign_matrix) @@ -1389,19 +1503,24 @@ runHyperGeometricEnrich <- function(gobject, for (i in seq_len(dim(inter_sign_matrix)[2])) { signames <- rownames(inter_sign_matrix)[ - which(inter_sign_matrix[, i] == 1)] + which(inter_sign_matrix[, i] == 1) + ] vectorX <- NULL for (j in seq_len(dim(expbinaryOverlap)[2])) { cellsiggene <- names(expbinaryOverlap[ - which(expbinaryOverlap[, j] == 1), j]) + which(expbinaryOverlap[, j] == 1), j + ]) x <- length(intersect(cellsiggene, signames)) m <- length(rownames(inter_sign_matrix)[which( - inter_sign_matrix[, i] == 1)]) + inter_sign_matrix[, i] == 1 + )]) n <- total - m k <- length(intersect(cellsiggene, markerGenes)) enrich <- (0 - log10(stats::phyper( - x, m, n, k, log.p = FALSE, lower.tail = FALSE))) + x, m, n, k, + log.p = FALSE, lower.tail = FALSE + ))) vectorX <- append(vectorX, enrich) } enrichment[i, ] <- vectorX @@ -1424,8 +1543,9 @@ runHyperGeometricEnrich <- function(gobject, if (p_value == TRUE) { enrichmentDT[, 2:dim(enrichmentDT)[2]] <- lapply( enrichmentDT[, 2:dim(enrichmentDT)[2]], function(x) { - 10^(-x) - }) + 10^(-x) + } + ) } # create spatial enrichment object @@ -1449,7 +1569,8 @@ runHyperGeometricEnrich <- function(gobject, ## return object or results ## if (return_gobject == TRUE) { spenr_names <- list_spatial_enrichments_names( - gobject = gobject, spat_unit = spat_unit, feat_type = feat_type) + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type + ) if (name %in% spenr_names) { cat(name, " has already been used, will be overwritten") @@ -1537,41 +1658,48 @@ hyperGeometricEnrich <- function(...) { #' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) -#' +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) +#' #' runSpatialEnrich(gobject = g, sign_matrix = sign_matrix) #' @export -runSpatialEnrich <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - enrich_method = c("PAGE", "rank", "hypergeometric"), - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - min_overlap_genes = 5, - reverse_log_scale = TRUE, - logbase = 2, - p_value = FALSE, - n_times = 1000, - rbp_p = 0.99, - num_agg = 100, - max_block = 20e6, - top_percentage = 5, - output_enrichment = c("original", "zscore"), - name = NULL, - verbose = TRUE, - return_gobject = TRUE) { +runSpatialEnrich <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + enrich_method = c("PAGE", "rank", "hypergeometric"), + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + min_overlap_genes = 5, + reverse_log_scale = TRUE, + logbase = 2, + p_value = FALSE, + n_times = 1000, + rbp_p = 0.99, + num_agg = 100, + max_block = 20e6, + top_percentage = 5, + output_enrichment = c("original", "zscore"), + name = NULL, + verbose = TRUE, + return_gobject = TRUE) { enrich_method <- match.arg( - enrich_method, choices = c("PAGE", "rank", "hypergeometric")) + enrich_method, + choices = c("PAGE", "rank", "hypergeometric") + ) output_enrichment <- match.arg( - output_enrichment, choices = c("original", "zscore")) + output_enrichment, + choices = c("original", "zscore") + ) if (enrich_method == "PAGE") { @@ -1710,29 +1838,32 @@ NULL #' \item{\emph{Geary's C} 'geary'} #' } #' @export -spatialAutoCorGlobal <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - feats = NULL, - method = c("moran", "geary"), - data_to_use = c("expression", "cell_meta"), - expression_values = c("normalized", "scaled", "custom"), - meta_cols = NULL, - spatial_network_to_use = "kNN_network", - wm_method = c("distance", "adjacency"), - wm_name = "spat_weights", - node_values = NULL, - weight_matrix = NULL, - test_method = c("none", "monte_carlo"), - mc_nsim = 99, - cor_name = NULL, - return_gobject = FALSE, - verbose = TRUE) { +spatialAutoCorGlobal <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + feats = NULL, + method = c("moran", "geary"), + data_to_use = c("expression", "cell_meta"), + expression_values = c("normalized", "scaled", "custom"), + meta_cols = NULL, + spatial_network_to_use = "kNN_network", + wm_method = c("distance", "adjacency"), + wm_name = "spat_weights", + node_values = NULL, + weight_matrix = NULL, + test_method = c("none", "monte_carlo"), + mc_nsim = 99, + cor_name = NULL, + return_gobject = FALSE, + verbose = TRUE) { # 0. determine inputs method <- match.arg(method, choices = c("moran", "geary")) test_method <- match.arg(test_method, choices = c("none", "monte_carlo")) data_to_use <- match.arg( - data_to_use, choices = c("expression", "cell_meta")) + data_to_use, + choices = c("expression", "cell_meta") + ) if (is.null(cor_name)) cor_name <- method if (!is.null(node_values)) { if (is.numeric(node_values)) { @@ -1743,9 +1874,13 @@ spatialAutoCorGlobal <- function(gobject = NULL, } use_ext_vals <- data.table::fifelse( - !is.null(node_values), yes = TRUE, no = FALSE) + !is.null(node_values), + yes = TRUE, no = FALSE + ) use_sn <- data.table::fifelse( - !is.null(weight_matrix), yes = FALSE, no = TRUE) + !is.null(weight_matrix), + yes = FALSE, no = TRUE + ) use_expr <- data.table::fcase( isTRUE(use_ext_vals), FALSE, @@ -1837,9 +1972,12 @@ spatialAutoCorGlobal <- function(gobject = NULL, # return info if (isTRUE(return_gobject)) { - if (isTRUE(verbose)) - wrap_msg("Appending", method, - "results to feature metadata: fDataDT()") + if (isTRUE(verbose)) { + wrap_msg( + "Appending", method, + "results to feature metadata: fDataDT()" + ) + } gobject <- addFeatMetadata( gobject = gobject, spat_unit = spat_unit, @@ -1873,30 +2011,35 @@ spatialAutoCorGlobal <- function(gobject = NULL, #' \item{\emph{Local mean} 'mean'} #' } #' @export -spatialAutoCorLocal <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - feats = NULL, - method = c("moran", "gi", "gi*", "mean"), - data_to_use = c("expression", "cell_meta"), - expression_values = c("normalized", "scaled", "custom"), - meta_cols = NULL, - spatial_network_to_use = "kNN_network", - wm_method = c("distance", "adjacency"), - wm_name = "spat_weights", - node_values = NULL, - weight_matrix = NULL, - test_method = c("none"), - # cor_name = NULL, - enrich_name = NULL, - return_gobject = TRUE, - output = c("spatEnrObj", "data.table"), - verbose = TRUE) { +spatialAutoCorLocal <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + feats = NULL, + method = c("moran", "gi", "gi*", "mean"), + data_to_use = c("expression", "cell_meta"), + expression_values = c("normalized", "scaled", "custom"), + meta_cols = NULL, + spatial_network_to_use = "kNN_network", + wm_method = c("distance", "adjacency"), + wm_name = "spat_weights", + node_values = NULL, + weight_matrix = NULL, + test_method = c("none"), + # cor_name = NULL, + enrich_name = NULL, + return_gobject = TRUE, + output = c("spatEnrObj", "data.table"), + verbose = TRUE) { # 0. determine inputs method_select <- match.arg( - method, choices = c("moran", "gi", "gi*", "mean")) + method, + choices = c("moran", "gi", "gi*", "mean") + ) data_to_use <- match.arg( - data_to_use, choices = c("expression", "cell_meta")) + data_to_use, + choices = c("expression", "cell_meta") + ) output <- match.arg(output, choices = c("spatEnrObj", "data.table")) # if(is.null(cor_name)) cor_name = method @@ -1916,9 +2059,13 @@ spatialAutoCorLocal <- function(gobject = NULL, use_ext_vals <- data.table::fifelse( - !is.null(node_values), yes = TRUE, no = FALSE) + !is.null(node_values), + yes = TRUE, no = FALSE + ) use_sn <- data.table::fifelse( - !is.null(weight_matrix), yes = FALSE, no = TRUE) + !is.null(weight_matrix), + yes = FALSE, no = TRUE + ) use_expr <- data.table::fcase( isTRUE(use_ext_vals), FALSE, @@ -2021,7 +2168,7 @@ spatialAutoCorLocal <- function(gobject = NULL, if (isTRUE(return_gobject)) { if (isTRUE(verbose)) { wrap_msg("Attaching ", method_select, - ' results as spatial enrichment: "', + ' results as spatial enrichment: "', enrich_name, '"', sep = "" ) @@ -2053,13 +2200,14 @@ spatialAutoCorLocal <- function(gobject = NULL, #' .run_spat_autocor_global #' @returns data.table #' @keywords internal -.run_spat_autocor_global <- function(use_values, - feats, - weight_matrix, - method, - test_method, - mc_nsim, - cor_name) { +.run_spat_autocor_global <- function( + use_values, + feats, + weight_matrix, + method, + test_method, + mc_nsim, + cor_name) { # data.table vars cell_ID <- nsim <- NULL @@ -2071,8 +2219,11 @@ spatialAutoCorLocal <- function(gobject = NULL, } progressr::with_progress({ - if (step_size > 1) pb <- progressr::progressor( - steps = nfeats / step_size) + if (step_size > 1) { + pb <- progressr::progressor( + steps = nfeats / step_size + ) + } res_list <- lapply_flex( seq_along(feats), # future.packages = c('terra', 'data.table'), @@ -2125,7 +2276,8 @@ spatialAutoCorLocal <- function(gobject = NULL, colnames(res_dt) <- c("feat_ID", cor_name) } else { colnames(res_dt) <- c("feat_ID", cor_name, paste0( - cor_name, "_", test_method)) + cor_name, "_", test_method + )) } return(res_dt) } @@ -2133,12 +2285,13 @@ spatialAutoCorLocal <- function(gobject = NULL, #' .run_spat_autocor_local #' @returns data.table #' @keywords internal -.run_spat_autocor_local <- function(use_values, - feats, - weight_matrix, - method, - test_method, - IDs) { +.run_spat_autocor_local <- function( + use_values, + feats, + weight_matrix, + method, + test_method, + IDs) { cell_ID <- NULL nfeats <- length(feats) @@ -2149,8 +2302,11 @@ spatialAutoCorLocal <- function(gobject = NULL, } progressr::with_progress({ - if (step_size > 1) pb <- progressr::progressor( - steps = nfeats / step_size) + if (step_size > 1) { + pb <- progressr::progressor( + steps = nfeats / step_size + ) + } res_list <- lapply_flex( seq_along(feats), # future.packages = c('terra', 'data.table'), @@ -2216,23 +2372,24 @@ spatialAutoCorLocal <- function(gobject = NULL, # 4, IDs - cell_IDs if available # Some additional information about information used in specific workflows are # also returned -.evaluate_autocor_input <- function(gobject, - use_ext_vals, - use_sn, - use_expr, - use_meta, - spat_unit, - feat_type, - feats, - data_to_use, - expression_values, - meta_cols, - spatial_network_to_use, - wm_method, - wm_name, - node_values, - weight_matrix, - verbose = TRUE) { +.evaluate_autocor_input <- function( + gobject, + use_ext_vals, + use_sn, + use_expr, + use_meta, + spat_unit, + feat_type, + feats, + data_to_use, + expression_values, + meta_cols, + spatial_network_to_use, + wm_method, + wm_name, + node_values, + weight_matrix, + verbose = TRUE) { cell_ID <- NULL # 1. Get spatial network to either get or generate a spatial weight matrix @@ -2250,7 +2407,9 @@ spatialAutoCorLocal <- function(gobject = NULL, # if no weight_matrix already generated... if (is.null(weight_matrix)) { wm_method <- match.arg( - wm_method, choices = c("distance", "adjacency")) + wm_method, + choices = c("distance", "adjacency") + ) if (isTRUE(verbose)) { wrap_msg( "No spatial weight matrix found in selected spatial network @@ -2294,7 +2453,8 @@ spatialAutoCorLocal <- function(gobject = NULL, # EXPR=================================================================# values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) use_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2358,7 +2518,8 @@ spatialAutoCorLocal <- function(gobject = NULL, (nrow(use_values) != nrow(weight_matrix))) { stop(wrap_txt("Number of values to correlate do not match number of weight matrix entries", - errWidth = TRUE)) + errWidth = TRUE + )) } @@ -2391,11 +2552,12 @@ spatialAutoCorLocal <- function(gobject = NULL, #' @description Rui to fill in #' @returns matrix #' @keywords internal -enrich_deconvolution <- function(expr, - log_expr, - cluster_info, - ct_exp, - cutoff) { +enrich_deconvolution <- function( + expr, + log_expr, + cluster_info, + ct_exp, + cutoff) { ##### generate enrich 0/1 matrix based on expression matrix ct_exp <- ct_exp[rowSums(ct_exp) > 0, ] enrich_matrix <- matrix(0, nrow = dim(ct_exp)[1], ncol = dim(ct_exp)[2]) @@ -2423,7 +2585,8 @@ enrich_deconvolution <- function(expr, cluster_info <- cluster_info for (i in seq_along(cluster_sort)) { cluster_i_enrich <- enrich_result[ - , which(cluster_info == cluster_sort[i])] + , which(cluster_info == cluster_sort[i]) + ] row_i_max <- Rfast::rowMaxs(cluster_i_enrich, value = TRUE) ct <- rownames(enrich_result)[which(row_i_max > cutoff)] if (length(ct) < 2) { @@ -2433,7 +2596,8 @@ enrich_deconvolution <- function(expr, ct_gene <- c() for (j in seq_along(ct)) { sig_gene_j <- rownames(enrich_matrix)[ - which(enrich_matrix[, ct[j]] == 1)] + which(enrich_matrix[, ct[j]] == 1) + ] ct_gene <- c(ct_gene, sig_gene_j) } uniq_ct_gene <- intersect(rownames(expr), unique(ct_gene)) @@ -2442,7 +2606,8 @@ enrich_deconvolution <- function(expr, cluster_cell_exp <- expr[uniq_ct_gene, cluster_i_cell] cluster_i_dwls <- optimize_deconvolute_dwls( - cluster_cell_exp, select_sig_exp) + cluster_cell_exp, select_sig_exp + ) dwls_results[ct, cluster_i_cell] <- cluster_i_dwls } ##### remove negative values @@ -2459,10 +2624,11 @@ enrich_deconvolution <- function(expr, #' @description Rui to fill in #' @returns matrix #' @keywords internal -spot_deconvolution <- function(expr, - cluster_info, - ct_exp, - binary_matrix) { +spot_deconvolution <- function( + expr, + cluster_info, + ct_exp, + binary_matrix) { ##### generate enrich 0/1 matrix based on expression matrix enrich_matrix <- matrix(0, nrow = dim(ct_exp)[1], ncol = dim(ct_exp)[2]) rowmax_col <- Rfast::rowMaxs(ct_exp) @@ -2480,7 +2646,8 @@ spot_deconvolution <- function(expr, for (i in seq_along(cluster_sort)) { cluster_i_matrix <- binary_matrix[ - , which(cluster_info == cluster_sort[i])] + , which(cluster_info == cluster_sort[i]) + ] row_i_max <- Rfast::rowMaxs(cluster_i_matrix, value = TRUE) ct_i <- rownames(cluster_i_matrix)[which(row_i_max == 1)] ######## calculate proportion based on binarized deconvolution @@ -2491,7 +2658,8 @@ spot_deconvolution <- function(expr, ct_gene <- c() for (j in seq_along(ct_i)) { sig_gene_j <- rownames(enrich_matrix)[ - which(enrich_matrix[, ct_i[j]] == 1)] + which(enrich_matrix[, ct_i[j]] == 1) + ] ct_gene <- c(ct_gene, sig_gene_j) } uniq_ct_gene <- intersect(rownames(expr), unique(ct_gene)) @@ -2503,12 +2671,14 @@ spot_deconvolution <- function(expr, all_exp <- Matrix::rowMeans(cluster_cell_exp) solution_all_exp <- solve_OLS_internal(select_sig_exp, all_exp) constant_J <- find_dampening_constant( - select_sig_exp, all_exp, solution_all_exp) + select_sig_exp, all_exp, solution_all_exp + ) ###### deconvolution for each spot for (k in seq_len(dim(cluster_cell_exp)[2])) { B <- Matrix::as.matrix(cluster_cell_exp[, k]) ct_spot_k <- rownames(cluster_i_matrix)[ - which(cluster_i_matrix[, k] == 1)] + which(cluster_i_matrix[, k] == 1) + ] if (sum(B) == 0 || length(ct_spot_k) == 0) { ####* must include the case where all genes are 0 dwls_results[, colnames(cluster_cell_exp)[k]] <- NA @@ -2517,16 +2687,19 @@ spot_deconvolution <- function(expr, } if (length(ct_spot_k) == 1) { dwls_results[ - ct_spot_k[1], colnames(cluster_cell_exp)[k]] <- 1 + ct_spot_k[1], colnames(cluster_cell_exp)[k] + ] <- 1 } else { ct_k_gene <- c() for (m in seq_along(ct_spot_k)) { sig_gene_k <- rownames(enrich_matrix)[which( - enrich_matrix[, ct_spot_k[m]] == 1)] + enrich_matrix[, ct_spot_k[m]] == 1 + )] ct_k_gene <- c(ct_k_gene, sig_gene_k) } uniq_ct_k_gene <- intersect( - rownames(ct_exp), unique(ct_k_gene)) + rownames(ct_exp), unique(ct_k_gene) + ) S_k <- Matrix::as.matrix(ct_exp[uniq_ct_k_gene, ct_spot_k]) if (sum(B[uniq_ct_k_gene, ]) == 0) { ####* must include the case all genes are 0 @@ -2534,7 +2707,8 @@ spot_deconvolution <- function(expr, ####* will produce NAs for some spots in the output } else { solDWLS <- optimize_solveDampenedWLS(S_k, B[ - uniq_ct_k_gene, ], constant_J) + uniq_ct_k_gene, + ], constant_J) dwls_results[names(solDWLS), colnames(cluster_cell_exp)[k]] <- solDWLS } } @@ -2555,9 +2729,10 @@ spot_deconvolution <- function(expr, #' @description Rui to fill in #' @returns enrichment values #' @keywords internal -cluster_enrich_analysis <- function(exp_matrix, - cluster_info, - enrich_sig_matrix) { +cluster_enrich_analysis <- function( + exp_matrix, + cluster_info, + enrich_sig_matrix) { uniq_cluster <- mixedsort(unique(cluster_info)) if (length(uniq_cluster) == 1) { stop("Only one cluster identified, need at least two.") @@ -2566,8 +2741,11 @@ cluster_enrich_analysis <- function(exp_matrix, for (i in uniq_cluster) { cluster_exp <- cbind( cluster_exp, - (apply(exp_matrix, 1, - function(y) mean(y[which(cluster_info == i)])))) + (apply( + exp_matrix, 1, + function(y) mean(y[which(cluster_info == i)]) + )) + ) } log_cluster_exp <- log2(cluster_exp + 1) colnames(log_cluster_exp) <- uniq_cluster @@ -2580,8 +2758,9 @@ cluster_enrich_analysis <- function(exp_matrix, #' @description Rui to fill in #' @returns enrichment matrix #' @keywords internal -enrich_analysis <- function(expr_values, - sign_matrix) { +enrich_analysis <- function( + expr_values, + sign_matrix) { # output enrichment # only continue with genes present in both datasets interGene <- intersect(rownames(sign_matrix), rownames(expr_values)) @@ -2597,7 +2776,8 @@ enrich_analysis <- function(expr_values, # get enrichment scores enrichment <- matrix( - data = NA, nrow = dim(filterSig)[2], ncol = length(cellColMean)) + data = NA, nrow = dim(filterSig)[2], ncol = length(cellColMean) + ) for (i in seq_len(dim(filterSig)[2])) { signames <- rownames(filterSig)[which(filterSig[, i] == 1)] sigColMean <- apply(geneFold[signames, ], 2, mean) @@ -2623,8 +2803,9 @@ enrich_analysis <- function(expr_values, #' @description Rui to fill in #' @returns matrix #' @keywords internal -optimize_deconvolute_dwls <- function(exp, - Signature) { +optimize_deconvolute_dwls <- function( + exp, + Signature) { ###### overlap signature with spatial genes Genes <- intersect(rownames(Signature), rownames(exp)) S <- Signature[Genes, ] @@ -2658,9 +2839,10 @@ optimize_deconvolute_dwls <- function(exp, #' @title optimize_solveDampenedWLS #' @returns numeric #' @keywords internal -optimize_solveDampenedWLS <- function(S, - B, - constant_J) { +optimize_solveDampenedWLS <- function( + S, + B, + constant_J) { # first solve OLS, use this solution to find a starting point for the # weights solution <- solve_OLS_internal(S, B) @@ -2696,9 +2878,10 @@ optimize_solveDampenedWLS <- function(S, #' @description find a dampening constant for the weights using cross-validation #' @returns numeric #' @keywords internal -find_dampening_constant <- function(S, - B, - goldStandard) { +find_dampening_constant <- function( + S, + B, + goldStandard) { solutionsSd <- NULL # goldStandard is used to define the weights @@ -2728,7 +2911,9 @@ find_dampening_constant <- function(S, # solve dampened weighted least squares for subset fit <- stats::lm( - B[subset] ~ -1 + S[subset, ], weights = wsDampened[subset]) + B[subset] ~ -1 + S[subset, ], + weights = wsDampened[subset] + ) sol <- fit$coef * sum(goldStandard) / sum(fit$coef) solutions <- cbind(solutions, sol) } @@ -2745,8 +2930,9 @@ find_dampening_constant <- function(S, #' @description basic functions for dwls #' @returns numeric #' @keywords internal -solve_OLS_internal <- function(S, - B) { +solve_OLS_internal <- function( + S, + B) { D <- t(S) %*% S d <- t(S) %*% B A <- cbind(diag(dim(S)[2])) @@ -2811,10 +2997,11 @@ solve_OLS_internal <- function(S, #' @description solve WLS given a dampening constant #' @returns matrix #' @keywords internal -solve_dampened_WLSj <- function(S, - B, - goldStandard, - j) { +solve_dampened_WLSj <- function( + S, + B, + goldStandard, + j) { multiplier <- 1 * 2^(j - 1) sol <- goldStandard ws <- as.vector((1 / (S %*% sol))^2) @@ -2863,27 +3050,30 @@ solve_dampened_WLSj <- function(S, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) #' #' runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) #' @export -runDWLSDeconv <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized"), - logbase = 2, - cluster_column = "leiden_clus", - sign_matrix, - n_cell = 50, - cutoff = 2, - name = NULL, - return_gobject = TRUE) { +runDWLSDeconv <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized"), + logbase = 2, + cluster_column = "leiden_clus", + sign_matrix, + n_cell = 50, + cutoff = 2, + name = NULL, + return_gobject = TRUE) { # verify if optional package is installed package_check(pkg_name = "quadprog", repository = "CRAN") package_check(pkg_name = "Rfast", repository = "CRAN") @@ -2905,7 +3095,8 @@ runDWLSDeconv <- function(gobject, values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2954,9 +3145,11 @@ runDWLSDeconv <- function(gobject, binary_matrix = binarize_proportion ) deconvolutionDT <- data.table::data.table( - cell_ID = colnames(spot_proportion)) + cell_ID = colnames(spot_proportion) + ) deconvolutionDT <- cbind( - deconvolutionDT, data.table::as.data.table(t(spot_proportion))) + deconvolutionDT, data.table::as.data.table(t(spot_proportion)) + ) # create spatial enrichment object enrObj <- create_spat_enr_obj( @@ -3039,29 +3232,32 @@ runDWLSDeconv <- function(gobject, #' @seealso \code{\link{runDWLSDeconv}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' x <- findMarkers_one_vs_all(g, +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) -#' +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) +#' #' runSpatialDeconv(gobject = g, sign_matrix = sign_matrix) #' @export -runSpatialDeconv <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - deconv_method = c("DWLS"), - expression_values = c("normalized"), - logbase = 2, - cluster_column = "leiden_clus", - sign_matrix, - n_cell = 50, - cutoff = 2, - name = NULL, - return_gobject = TRUE) { +runSpatialDeconv <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + deconv_method = c("DWLS"), + expression_values = c("normalized"), + logbase = 2, + cluster_column = "leiden_clus", + sign_matrix, + n_cell = 50, + cutoff = 2, + name = NULL, + return_gobject = TRUE) { deconv_method <- match.arg(deconv_method, choices = c("DWLS")) diff --git a/R/spatial_enrichment_visuals.R b/R/spatial_enrichment_visuals.R index e898eb5fc..fec02713c 100644 --- a/R/spatial_enrichment_visuals.R +++ b/R/spatial_enrichment_visuals.R @@ -2,7 +2,7 @@ #' @name findCellTypesFromEnrichment #' @param gobject Giotto Object #' @param spat_unit spatial unit in which the enrichment information is stored -#' @param feat_type feature type for which the enrichment information was +#' @param feat_type feature type for which the enrichment information was #' calculated #' @param enrichment_name name of the spatial enrichment #' i.e. output from GiottoClass::list_spatial_enrichment_names() @@ -24,15 +24,17 @@ #' the associated cell types from the enrichment. #' #' @export -findCellTypesFromEnrichment <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - enrichment_name = "PAGE_z_score", - return_frequency_table = FALSE) { +findCellTypesFromEnrichment <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + enrichment_name = "PAGE_z_score", + return_frequency_table = FALSE) { # guard clauses - if (!inherits(gobject, "giotto")) + if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") + } spat_unit <- set_default_spat_unit( gobject = gobject, @@ -69,17 +71,20 @@ findCellTypesFromEnrichment <- function(gobject = NULL, # new column, mapping a cell to it's most likely type if (enrich_is_p_value) { pz_enrich[, probable_cell_type := names( - .SD)[max.col(-.SD)], .SDcols = 2:n_c] + .SD + )[max.col(-.SD)], .SDcols = 2:n_c] } else { pz_enrich[, probable_cell_type := names( - .SD)[max.col(.SD)], .SDcols = 2:n_c] + .SD + )[max.col(.SD)], .SDcols = 2:n_c] } cell_ID_and_types_pz_enrich <- pz_enrich[, .(cell_ID, probable_cell_type)] if (return_frequency_table) { pz_enrich_cell_type_frequencies <- table( - cell_ID_and_types_pz_enrich$probable_cell_type) + cell_ID_and_types_pz_enrich$probable_cell_type + ) return(pz_enrich_cell_type_frequencies) } @@ -90,7 +95,7 @@ findCellTypesFromEnrichment <- function(gobject = NULL, #' @name plotCellTypesFromEnrichment #' @param gobject Giotto Object #' @param spat_unit spatial unit in which the enrichment information is stored -#' @param feat_type feature type for which the enrichment information was +#' @param feat_type feature type for which the enrichment information was #' calculated #' @param enrichment_name name of the spatial enrichment #' i.e. output from GiottoClass::list_spatial_enrichment_names() @@ -104,20 +109,21 @@ findCellTypesFromEnrichment <- function(gobject = NULL, #' This function generates a bar plot of cell types vs the frequency #' of that cell type in the data. These cell type results are #' based on the provided `enrichment_name`, and will be determined -#' by the maximum value of the z-score or p-value for a given cell or +#' by the maximum value of the z-score or p-value for a given cell or #' annotation. #' #' @export -plotCellTypesFromEnrichment <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - enrichment_name = "PAGE_z_score", - title = NULL, - save_param = list(), - default_save_name = "cell_types_from_enrichment", - save_plot = NULL, - show_plot = NULL, - return_plot = NULL) { +plotCellTypesFromEnrichment <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + enrichment_name = "PAGE_z_score", + title = NULL, + save_param = list(), + default_save_name = "cell_types_from_enrichment", + save_plot = NULL, + show_plot = NULL, + return_plot = NULL) { # guard clauses handled at first step downstream # therefore, omitting here. id_and_types <- findCellTypesFromEnrichment( @@ -131,8 +137,11 @@ plotCellTypesFromEnrichment <- function(gobject = NULL, # data.table column probable_cell_type <- NULL - if (is.null(title)) title <- paste0( - spat_unit, "cell types (maximum", enrichment_name, ")") + if (is.null(title)) { + title <- paste0( + spat_unit, "cell types (maximum", enrichment_name, ")" + ) + } pl <- ggplot2::ggplot(id_and_types, aes(x = probable_cell_type)) + ggplot2::geom_bar() + @@ -163,7 +172,7 @@ plotCellTypesFromEnrichment <- function(gobject = NULL, #' @name pieCellTypesFromEnrichment #' @param gobject Giotto Object #' @param spat_unit spatial unit in which the enrichment information is stored -#' @param feat_type feature type for which the enrichment information was +#' @param feat_type feature type for which the enrichment information was #' calculated #' @param enrichment_name name of the spatial enrichment #' i.e. output from GiottoClass::list_spatial_enrichment_names() @@ -179,16 +188,17 @@ plotCellTypesFromEnrichment <- function(gobject = NULL, #' and will be determined by the maximum value of the z-score #' or p-value for a given cell or annotation. #' @export -pieCellTypesFromEnrichment <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - enrichment_name = "PAGE_z_score", - title = NULL, - save_param = list(), - default_save_name = "cell_types_from_enrichment_pie", - save_plot = NULL, - show_plot = NULL, - return_plot = NULL) { +pieCellTypesFromEnrichment <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + enrichment_name = "PAGE_z_score", + title = NULL, + save_param = list(), + default_save_name = "cell_types_from_enrichment_pie", + save_plot = NULL, + show_plot = NULL, + return_plot = NULL) { # guard clauses handled one step downstream freq_table <- findCellTypesFromEnrichment( @@ -211,7 +221,8 @@ pieCellTypesFromEnrichment <- function(gobject = NULL, for (i in cell_types) { # hackish, admittedly nullvar <- freq_dt[cell_type == i, perc := num_cells / sum( - freq_dt$num_cells) * 100] + freq_dt$num_cells + ) * 100] } rm(nullvar) # saves memory diff --git a/R/spatial_genes.R b/R/spatial_genes.R index 754d46abd..45e446bff 100644 --- a/R/spatial_genes.R +++ b/R/spatial_genes.R @@ -8,11 +8,12 @@ NULL #' @rdname spat_fisher_exact #' @keywords internal -.spat_fish_func <- function(feat, - bin_matrix, - spat_mat, - calc_hub = FALSE, - hub_min_int = 3) { +.spat_fish_func <- function( + feat, + bin_matrix, + spat_mat, + calc_hub = FALSE, + hub_min_int = 3) { feat_vector <- bin_matrix[rownames(bin_matrix) == feat, ] feat_vectorA <- feat_vector[names(feat_vector) %in% rownames(spat_mat)] @@ -47,19 +48,22 @@ NULL high_cells <- names(feat_vector[feat_vector == 1]) subset_spat_mat <- spat_mat[ rownames(spat_mat) %in% high_cells, colnames(spat_mat) %in% - high_cells] + high_cells + ] if (length(subset_spat_mat) == 1) { hub_nr <- 0 } else { subset_spat_mat <- spat_mat[ rownames(spat_mat) %in% high_cells, colnames(spat_mat) %in% - high_cells] + high_cells + ] rowhubs <- rowSums_flex(subset_spat_mat) colhubs <- colSums_flex(subset_spat_mat) hub_nr <- length(unique(c( names(colhubs[colhubs > hub_min_int]), - names(rowhubs[colhubs > hub_min_int])))) + names(rowhubs[colhubs > hub_min_int]) + ))) } fish_res <- stats::fisher.test(table_matrix)[c("p.value", "estimate")] @@ -72,11 +76,12 @@ NULL #' @describeIn spat_fisher_exact data.table implementation #' @keywords internal -.spat_fish_func_dt <- function(bin_matrix_DTm, - spat_netw_min, - calc_hub = FALSE, - hub_min_int = 3, - cores = NA) { +.spat_fish_func_dt <- function( + bin_matrix_DTm, + spat_netw_min, + calc_hub = FALSE, + hub_min_int = 3, + cores = NA) { # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) @@ -91,12 +96,15 @@ NULL bin_matrix_DTm, by.x = "from", by.y = "variable", - allow.cartesian = TRUE) + allow.cartesian = TRUE + ) data.table::setnames(spatial_network_min_ext, "value", "from_value") spatial_network_min_ext <- data.table::merge.data.table( - spatial_network_min_ext, by.x = c("to", "feat_ID"), - bin_matrix_DTm, by.y = c("variable", "feat_ID")) + spatial_network_min_ext, + by.x = c("to", "feat_ID"), + bin_matrix_DTm, by.y = c("variable", "feat_ID") + ) data.table::setnames(spatial_network_min_ext, "value", "to_value") @@ -121,7 +129,8 @@ NULL # sort the combinations and run fisher test data.table::setorder(freq_summary2, feat_ID, combn, -N) fish_results <- freq_summary2[, stats::fisher.test( - matrix(N, nrow = 2))[c(1, 3)], by = feat_ID] + matrix(N, nrow = 2) + )[c(1, 3)], by = feat_ID] ## hubs ## @@ -140,14 +149,17 @@ NULL # get hubs and add 0's hub_DT <- double_pos_both[V1 > hub_min_int, .N, by = feat_ID] hub_DT_zeroes <- data.table::data.table(feat_ID = unique( - spatial_network_min_ext$feat_ID), N = 0) + spatial_network_min_ext$feat_ID + ), N = 0) hub_DT2 <- rbind(hub_DT, hub_DT_zeroes) hub_DT2 <- hub_DT2[, sum(N), by = feat_ID] data.table::setnames(hub_DT2, "V1", "hub_nr") fish_results <- data.table::merge.data.table( - fish_results, hub_DT2, by = "feat_ID") + fish_results, hub_DT2, + by = "feat_ID" + ) } return(fish_results) @@ -164,11 +176,12 @@ NULL #' @rdname spat_odds_ratio #' @keywords internal -.spat_or_func <- function(feat, - bin_matrix, - spat_mat, - calc_hub = FALSE, - hub_min_int = 3) { +.spat_or_func <- function( + feat, + bin_matrix, + spat_mat, + calc_hub = FALSE, + hub_min_int = 3) { feat_vector <- bin_matrix[rownames(bin_matrix) == feat, ] feat_vectorA <- feat_vector[names(feat_vector) %in% rownames(spat_mat)] @@ -204,7 +217,8 @@ NULL high_cells <- names(feat_vector[feat_vector == 1]) subset_spat_mat <- spat_mat[ rownames(spat_mat) %in% high_cells, colnames(spat_mat) %in% - high_cells] + high_cells + ] if (length(subset_spat_mat) == 1) { hub_nr <- 0 @@ -212,14 +226,16 @@ NULL rowhubs <- rowSums_flex(subset_spat_mat) colhubs <- colSums_flex(subset_spat_mat) hub_nr <- length(unique(c(names( - colhubs[colhubs > hub_min_int]), names( - rowhubs[colhubs > hub_min_int])))) + colhubs[colhubs > hub_min_int] + ), names( + rowhubs[colhubs > hub_min_int] + )))) } fish_matrix <- table_matrix fish_matrix <- fish_matrix / 1000 OR <- ((fish_matrix[1] * fish_matrix[4]) / - (fish_matrix[2] * fish_matrix[3])) + (fish_matrix[2] * fish_matrix[3])) return(c(feats = list(feat), OR, hubs = list(hub_nr))) } @@ -234,11 +250,12 @@ NULL #' @describeIn spat_odds_ratio data.table implementation #' @keywords internal -.spat_or_func_dt <- function(bin_matrix_DTm, - spat_netw_min, - calc_hub = FALSE, - hub_min_int = 3, - cores = NA) { +.spat_or_func_dt <- function( + bin_matrix_DTm, + spat_netw_min, + calc_hub = FALSE, + hub_min_int = 3, + cores = NA) { # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) @@ -251,12 +268,15 @@ NULL spatial_network_min_ext <- data.table::merge.data.table( spat_netw_min, bin_matrix_DTm, by.x = "from", by.y = "variable", - allow.cartesian = TRUE) + allow.cartesian = TRUE + ) data.table::setnames(spatial_network_min_ext, "value", "from_value") spatial_network_min_ext <- data.table::merge.data.table( - spatial_network_min_ext, by.x = c("to", "feat_ID"), - bin_matrix_DTm, by.y = c("variable", "feat_ID")) + spatial_network_min_ext, + by.x = c("to", "feat_ID"), + bin_matrix_DTm, by.y = c("variable", "feat_ID") + ) data.table::setnames(spatial_network_min_ext, "value", "to_value") @@ -281,7 +301,9 @@ NULL # sort the combinations and run fisher test setorder(freq_summary2, feat_ID, combn, -N) or_results <- freq_summary2[ - , .or_test_func(matrix(N, nrow = 2)), by = feat_ID] + , .or_test_func(matrix(N, nrow = 2)), + by = feat_ID + ] ## hubs ## @@ -300,14 +322,17 @@ NULL # get hubs and add 0's hub_DT <- double_pos_both[V1 > hub_min_int, .N, by = feat_ID] hub_DT_zeroes <- data.table::data.table( - feat_ID = unique(spatial_network_min_ext$feat_ID), N = 0) + feat_ID = unique(spatial_network_min_ext$feat_ID), N = 0 + ) hub_DT2 <- rbind(hub_DT, hub_DT_zeroes) hub_DT2 <- hub_DT2[, sum(N), by = feat_ID] data.table::setnames(hub_DT2, "V1", "hub_nr") or_results <- data.table::merge.data.table( - or_results, hub_DT2, by = "feat_ID") + or_results, hub_DT2, + by = "feat_ID" + ) } return(or_results) @@ -336,10 +361,11 @@ NULL #' @describeIn calculate_spatial_enrichment calculate using a 'simple' and #' efficient for loop #' @keywords internal -.calc_spatial_enrichment_minimum <- function(spatial_network, - bin_matrix, - adjust_method = "fdr", - do_fisher_test = TRUE) { +.calc_spatial_enrichment_minimum <- function( + spatial_network, + bin_matrix, + adjust_method = "fdr", + do_fisher_test = TRUE) { # data.table variables from <- to <- feats <- variable <- value <- p.value <- adj.p.value <- score <- estimate <- NULL @@ -355,7 +381,8 @@ NULL # preallocate final matrix for results matrix_res <- matrix( - data = NA, nrow = nrow(bin_matrix), ncol = nrow(spatial_network_min)) + data = NA, nrow = nrow(bin_matrix), ncol = nrow(spatial_network_min) + ) ## 1. summarize results for each edge in the network for (row_i in seq_len(nrow(spatial_network_min))) { @@ -363,7 +390,8 @@ NULL to_id <- spatial_network_min[row_i][["to"]] sumres <- data.table::as.data.table(bin_matrix[ - , all_colindex[c(from_id, to_id)]]) + , all_colindex[c(from_id, to_id)] + ]) sumres[, combn := paste0(get(from_id), "-", get(to_id))] code_res <- convert_code[sumres$combn] @@ -398,20 +426,26 @@ NULL ## run fisher test ## if (do_fisher_test == TRUE) { results <- rable_resDTm[, stats::fisher.test(matrix( - value, nrow = 2))[c(1, 3)], by = feats] + value, + nrow = 2 + ))[c(1, 3)], by = feats] # replace zero p-values with lowest p-value min_pvalue <- min(results$p.value[results$p.value > 0]) results[, p.value := ifelse(p.value == 0, min_pvalue, p.value)] results[, adj.p.value := stats::p.adjust( - p.value, method = adjust_method)] + p.value, + method = adjust_method + )] # sort feats based on p-value and estimate results[, score := -log(p.value) * estimate] data.table::setorder(results, -score) } else { results <- rable_resDTm[, .or_test_func(matrix( - value, nrow = 2)), by = feats] + value, + nrow = 2 + )), by = feats] data.table::setorder(results, -estimate) } @@ -421,21 +455,24 @@ NULL #' @describeIn calculate_spatial_enrichment calculate using 'matrix' #' implementation #' @keywords internal -.calc_spatial_enrichment_matrix <- function(spatial_network, - bin_matrix, - adjust_method = "fdr", - do_fisher_test = TRUE, - do_parallel = TRUE, - cores = NA, - calc_hub = FALSE, - hub_min_int = 3, - verbose = TRUE) { +.calc_spatial_enrichment_matrix <- function( + spatial_network, + bin_matrix, + adjust_method = "fdr", + do_fisher_test = TRUE, + do_parallel = TRUE, + cores = NA, + calc_hub = FALSE, + hub_min_int = 3, + verbose = TRUE) { # data.table variables verbose <- feats <- p.value <- estimate <- adj.p.value <- score <- NULL # convert spatial network data.table to spatial matrix dc_spat_network <- data.table::dcast.data.table( - spatial_network, formula = to ~ from, value.var = "distance", fill = 0) + spatial_network, + formula = to ~ from, value.var = "distance", fill = 0 + ) spat_mat <- dt_to_matrix(dc_spat_network) spat_mat[spat_mat > 0] <- 1 @@ -486,13 +523,16 @@ NULL if (do_fisher_test == TRUE) { result[, c("p.value", "estimate") := list( - as.numeric(p.value), as.numeric(estimate))] + as.numeric(p.value), as.numeric(estimate) + )] # convert p.value = 0 to lowest p-value min_pvalue <- min(result$p.value[result$p.value > 0]) result[, p.value := ifelse(p.value == 0, min_pvalue, p.value)] result[, adj.p.value := stats::p.adjust( - p.value, method = adjust_method)] + p.value, + method = adjust_method + )] result[, score := -log(p.value) * estimate] data.table::setorder(result, -score) @@ -508,15 +548,14 @@ NULL #' @describeIn calculate_spatial_enrichment calculate using 'data.table' #' implementation #' @keywords internal -.calc_spatial_enrichment_dt <- function( - bin_matrix, - spatial_network, - calc_hub = FALSE, - hub_min_int = 3, - group_size = "automatic", - do_fisher_test = TRUE, - adjust_method = "fdr", - cores = NA) { +.calc_spatial_enrichment_dt <- function(bin_matrix, + spatial_network, + calc_hub = FALSE, + hub_min_int = 3, + group_size = "automatic", + do_fisher_test = TRUE, + adjust_method = "fdr", + cores = NA) { # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) @@ -542,12 +581,16 @@ NULL } groups <- ceiling(nrow(bin_matrix) / group_size) - cut_groups <- cut(seq_len(nrow(bin_matrix)), breaks = groups, - labels = seq_len(groups)) + cut_groups <- cut(seq_len(nrow(bin_matrix)), + breaks = groups, + labels = seq_len(groups) + ) if (any(table(cut_groups) == 1)) { - stop("With group size = ", group_size, + stop( + "With group size = ", group_size, " you have a single gene in a group. Manually pick another group - size") + size" + ) } indexes <- seq_len(nrow(bin_matrix)) names(indexes) <- cut_groups @@ -560,7 +603,9 @@ NULL bin_matrix_DT <- data.table::as.data.table(bin_matrix[sel_indices, ]) bin_matrix_DT[, feat_ID := rownames(bin_matrix[sel_indices, ])] bin_matrix_DTm <- data.table::melt.data.table( - bin_matrix_DT, id.vars = "feat_ID") + bin_matrix_DT, + id.vars = "feat_ID" + ) if (do_fisher_test == TRUE) { test <- .spat_fish_func_dt( @@ -590,7 +635,9 @@ NULL min_pvalue <- min(result$p.value[result$p.value > 0]) result[, p.value := ifelse(p.value == 0, min_pvalue, p.value)] result[, adj.p.value := stats::p.adjust( - p.value, method = adjust_method)] + p.value, + method = adjust_method + )] result[, score := -log(p.value) * estimate] data.table::setorder(result, -score) @@ -716,40 +763,38 @@ NULL #' @rdname binSpect #' @export -binSpect <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - bin_method = c("kmeans", "rank"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_name = "Delaunay_network", - spatial_network_k = NULL, - reduce_network = FALSE, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = 30, - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - knn_params = NULL, - set.seed = deprecated(), - seed = 1234, - bin_matrix = NULL, - summarize = c("p.value", "adj.p.value"), - return_gobject = FALSE -) { +binSpect <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + bin_method = c("kmeans", "rank"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_name = "Delaunay_network", + spatial_network_k = NULL, + reduce_network = FALSE, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = 30, + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + knn_params = NULL, + set.seed = deprecated(), + seed = 1234, + bin_matrix = NULL, + summarize = c("p.value", "adj.p.value"), + return_gobject = FALSE) { # TODO align set.seed, set_seed, seed_number naming and usage across # packages # use only param seed. If NULL, set no seed. If !NULL set value as seed @@ -770,7 +815,7 @@ binSpect <- function( "subset_feats", "reduce_network", "kmeans_algo", "nstart", "iter_max", "extreme_nr", "sample_nr", "percentage_rank", "do_fisher_test", "adjust_method", - "calc_hub" , "hub_min_int", "get_av_expr", "get_high_expr", + "calc_hub", "hub_min_int", "get_av_expr", "get_high_expr", "implementation", "group_size", "do_parallel", "cores", "seed", "verbose" )) @@ -791,9 +836,9 @@ binSpect <- function( } if (isTRUE(return_gobject)) { - result_dt <- data.table::data.table( - feats = output$feats, pval = output$adj.p.value) + feats = output$feats, pval = output$adj.p.value + ) data.table::setnames(result_dt, old = "pval", new = "binSpect.pval") gobject <- addFeatMetadata( gobject, @@ -818,30 +863,31 @@ binSpect <- function( #' @param expression_matrix expression matrix #' @param spatial_network spatial network in data.table format #' @export -binSpectSingleMatrix <- function(expression_matrix, - spatial_network = NULL, - bin_matrix = NULL, - bin_method = c("kmeans", "rank"), - subset_feats = NULL, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = 30, - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = FALSE, - set.seed = deprecated(), - seed = 1234) { +binSpectSingleMatrix <- function( + expression_matrix, + spatial_network = NULL, + bin_matrix = NULL, + bin_method = c("kmeans", "rank"), + subset_feats = NULL, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = 30, + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = FALSE, + set.seed = deprecated(), + seed = 1234) { if (is_present(set.seed) && !is.function(set.seed)) { deprecate_warn( when = "4.0.3", @@ -875,11 +921,14 @@ binSpectSingleMatrix <- function(expression_matrix, # kmeans algorithm kmeans_algo <- match.arg( kmeans_algo, - choices = c("kmeans", "kmeans_arma", "kmeans_arma_subset")) + choices = c("kmeans", "kmeans_arma", "kmeans_arma_subset") + ) # implementation implementation <- match.arg( - implementation, choices = c("data.table", "simple", "matrix")) + implementation, + choices = c("data.table", "simple", "matrix") + ) # spatial network @@ -975,7 +1024,8 @@ binSpectSingleMatrix <- function(expression_matrix, # expression if (!is.null(subset_feats)) { expr_values <- expression_matrix[ - rownames(expression_matrix) %in% subset_feats, ] + rownames(expression_matrix) %in% subset_feats, + ] } else { expr_values <- expression_matrix } @@ -985,7 +1035,8 @@ binSpectSingleMatrix <- function(expression_matrix, mean(x[x > 0]) }) av_expr_DT <- data.table::data.table( - feats = names(av_expr), av_expr = av_expr) + feats = names(av_expr), av_expr = av_expr + ) result <- merge(result, av_expr_DT, by = "feats") vmsg(.v = verbose, "\n 3. (optional) average expression of high @@ -1000,7 +1051,8 @@ binSpectSingleMatrix <- function(expression_matrix, if (get_high_expr) { high_expr <- rowSums(bin_matrix) high_expr_DT <- data.table::data.table( - feats = names(high_expr), high_expr = high_expr) + feats = names(high_expr), high_expr = high_expr + ) result <- merge(result, high_expr_DT, by = "feats") vmsg(.v = verbose, "\n 4. (optional) number of high expressing cells @@ -1022,34 +1074,35 @@ binSpectSingleMatrix <- function(expression_matrix, #' @describeIn binSpect binSpect for a single spatial network #' @export -binSpectSingle <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - bin_method = c("kmeans", "rank"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_name = "Delaunay_network", - reduce_network = FALSE, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = 30, - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - set.seed = deprecated(), - seed = 1234, - bin_matrix = NULL) { +binSpectSingle <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + bin_method = c("kmeans", "rank"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_name = "Delaunay_network", + reduce_network = FALSE, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = 30, + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + set.seed = deprecated(), + seed = 1234, + bin_matrix = NULL) { ## deprecated arguments if (is_present(set.seed) && !is.function(set.seed)) { @@ -1077,7 +1130,8 @@ binSpectSingle <- function(gobject, ## 1. expression matrix values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1095,15 +1149,18 @@ binSpectSingle <- function(gobject, output = "networkDT" ) if (is.null(spatial_network)) { - stop("spatial_network_name: ", spatial_network_name, - " does not exist, create a spatial network first") + stop( + "spatial_network_name: ", spatial_network_name, + " does not exist, create a spatial network first" + ) } # convert to full network if (reduce_network == FALSE) { spatial_network <- convert_to_full_spatial_network(spatial_network) data.table::setnames( - spatial_network, c("source", "target"), c("from", "to")) + spatial_network, c("source", "target"), c("from", "to") + ) } @@ -1140,35 +1197,36 @@ binSpectSingle <- function(gobject, #' @describeIn binSpect binSpect for multiple spatial kNN networks #' @export -binSpectMulti <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - bin_method = c("kmeans", "rank"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_k = c(5, 10, 20), - reduce_network = FALSE, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = c(10, 30), - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - knn_params = NULL, - set.seed = deprecated(), - seed = 1234, - summarize = c("adj.p.value", "p.value")) { +binSpectMulti <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + bin_method = c("kmeans", "rank"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_k = c(5, 10, 20), + reduce_network = FALSE, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = c(10, 30), + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + knn_params = NULL, + set.seed = deprecated(), + seed = 1234, + summarize = c("adj.p.value", "p.value")) { ## deprecated arguments if (is_present(set.seed) && !is.function(set.seed)) { deprecate_warn( @@ -1192,8 +1250,9 @@ binSpectMulti <- function(gobject, feat_type = feat_type ) - if (verbose == TRUE) + if (verbose == TRUE) { message("This is the multi parameter version of binSpect") + } # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) @@ -1224,8 +1283,9 @@ binSpectMulti <- function(gobject, )) for (rank_i in percentage_rank) { - if (verbose == TRUE) + if (verbose == TRUE) { cat("Run for k = ", k, " and rank % = ", rank_i) + } result <- binSpectSingle( gobject = temp_gobject, @@ -1267,7 +1327,8 @@ binSpectMulti <- function(gobject, ## expression matrix values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1348,10 +1409,12 @@ binSpectMulti <- function(gobject, simple_result <- combined_result[, sum(log(get(summarize))), by = feats] simple_result[, V1 := V1 * -2] simple_result[, p.val := stats::pchisq( - q = V1, df = total_trials, log.p = FALSE, lower.tail = FALSE)] + q = V1, df = total_trials, log.p = FALSE, lower.tail = FALSE + )] return(list( - combined = combined_result, simple = simple_result[, .(feats, p.val)])) + combined = combined_result, simple = simple_result[, .(feats, p.val)] + )) } @@ -1393,31 +1456,32 @@ binSpectMulti <- function(gobject, #' is set. #' @param summarize summarize the p-values or adjusted p-values #' @returns data.table with results -binSpectMultiMatrix <- function(expression_matrix, - spatial_networks, - bin_method = c("kmeans", "rank"), - subset_feats = NULL, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = c(10, 30), - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - knn_params = NULL, - set.seed = deprecated(), - seed = 1234, - summarize = c("adj.p.value", "p.value")) { +binSpectMultiMatrix <- function( + expression_matrix, + spatial_networks, + bin_method = c("kmeans", "rank"), + subset_feats = NULL, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = c(10, 30), + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + knn_params = NULL, + set.seed = deprecated(), + seed = 1234, + summarize = c("adj.p.value", "p.value")) { if (is_present(set.seed) && !is.function(set.seed)) { deprecate_warn( when = "4.0.3", @@ -1430,8 +1494,9 @@ binSpectMultiMatrix <- function(expression_matrix, } - if (verbose == TRUE) + if (verbose == TRUE) { message("This is the multi parameter version of binSpect") + } # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) @@ -1452,8 +1517,9 @@ binSpectMultiMatrix <- function(expression_matrix, for (k in seq_along(spatial_networks)) { for (rank_i in percentage_rank) { - if (verbose == TRUE) + if (verbose == TRUE) { cat("Run for spatial network ", k, " and rank % = ", rank_i) + } result <- binSpectSingleMatrix( expression_matrix = expression_matrix, @@ -1546,10 +1612,12 @@ binSpectMultiMatrix <- function(expression_matrix, simple_result <- combined_result[, sum(log(get(summarize))), by = feats] simple_result[, V1 := V1 * -2] simple_result[, p.val := stats::pchisq( - q = V1, df = total_trials, log.p = FALSE, lower.tail = FALSE)] + q = V1, df = total_trials, log.p = FALSE, lower.tail = FALSE + )] return(list( - combined = combined_result, simple = simple_result[, .(feats, p.val)])) + combined = combined_result, simple = simple_result[, .(feats, p.val)] + )) } @@ -1580,13 +1648,14 @@ binSpectMultiMatrix <- function(expression_matrix, #' #' silhouetteRank(g) #' @export -silhouetteRank <- function(gobject, - expression_values = c("normalized", "scaled", "custom"), - metric = "euclidean", - subset_genes = NULL, - rbp_p = 0.95, - examine_top = 0.3, - python_path = NULL) { +silhouetteRank <- function( + gobject, + expression_values = c("normalized", "scaled", "custom"), + metric = "euclidean", + subset_genes = NULL, + rbp_p = 0.95, + examine_top = 0.3, + python_path = NULL) { # expression values values <- match.arg(expression_values, c("normalized", "scaled", "custom")) expr_values <- getExpression( @@ -1622,7 +1691,9 @@ silhouetteRank <- function(gobject, ## prepare python path and louvain script reticulate::use_python(required = TRUE, python = python_path) python_silh_function <- system.file( - "python", "python_spatial_genes.py", package = "Giotto") + "python", "python_spatial_genes.py", + package = "Giotto" + ) reticulate::source_python(file = python_silh_function) output_python <- python_spatial_genes( @@ -1671,18 +1742,19 @@ silhouetteRank <- function(gobject, #' #' silhouetteRankTest(g) #' @export -silhouetteRankTest <- function(gobject, - expression_values = c("normalized", "scaled", "custom"), - subset_genes = NULL, - overwrite_input_bin = TRUE, - rbp_ps = c(0.95, 0.99), - examine_tops = c(0.005, 0.010, 0.050, 0.100, 0.300), - matrix_type = "dissim", - num_core = 4, - parallel_path = "/usr/bin", - output = NULL, - query_sizes = 10L, - verbose = FALSE) { +silhouetteRankTest <- function( + gobject, + expression_values = c("normalized", "scaled", "custom"), + subset_genes = NULL, + overwrite_input_bin = TRUE, + rbp_ps = c(0.95, 0.99), + examine_tops = c(0.005, 0.010, 0.050, 0.100, 0.300), + matrix_type = "dissim", + num_core = 4, + parallel_path = "/usr/bin", + output = NULL, + query_sizes = 10L, + verbose = FALSE) { # data.table variables cell_ID <- sdimx <- sdimy <- sdimz <- NULL @@ -1697,7 +1769,6 @@ silhouetteRankTest <- function(gobject, "To install: \n", "install.packages('eva')" ) - } ## test if python package is installed @@ -1748,16 +1819,28 @@ silhouetteRankTest <- function(gobject, if (is.null(output)) { save_dir <- readGiottoInstructions(gobject, param = "save_dir") silh_output_dir <- paste0(save_dir, "/", "silhouetteRank_output/") - if (!file.exists(silh_output_dir)) dir.create( - silh_output_dir, recursive = TRUE) + if (!file.exists(silh_output_dir)) { + dir.create( + silh_output_dir, + recursive = TRUE + ) + } } else if (file.exists(output)) { silh_output_dir <- paste0(output, "/", "silhouetteRank_output/") - if (!file.exists(silh_output_dir)) dir.create( - silh_output_dir, recursive = TRUE) + if (!file.exists(silh_output_dir)) { + dir.create( + silh_output_dir, + recursive = TRUE + ) + } } else { silh_output_dir <- paste0(output, "/", "silhouetteRank_output/") - if (!file.exists(silh_output_dir)) dir.create( - silh_output_dir, recursive = TRUE) + if (!file.exists(silh_output_dir)) { + dir.create( + silh_output_dir, + recursive = TRUE + ) + } } # log directory @@ -1786,8 +1869,11 @@ silhouetteRankTest <- function(gobject, silh_output_dir_norm <- normalizePath(silh_output_dir) expr_values_path_norm <- paste0(silh_output_dir_norm, "/", "expression.txt") - data.table::fwrite(data.table::as.data.table( - expr_values, keep.rownames = "gene"), + data.table::fwrite( + data.table::as.data.table( + expr_values, + keep.rownames = "gene" + ), file = expr_values_path_norm, quote = FALSE, sep = "\t", @@ -1801,7 +1887,9 @@ silhouetteRankTest <- function(gobject, python_path <- readGiottoInstructions(gobject, param = "python_path") reticulate::use_python(required = TRUE, python = python_path) python_silh_function <- system.file( - "python", "silhouette_rank_wrapper.py", package = "Giotto") + "python", "silhouette_rank_wrapper.py", + package = "Giotto" + ) reticulate::source_python(file = python_silh_function) @@ -1856,21 +1944,22 @@ silhouetteRankTest <- function(gobject, #' #' spatialDE(g) #' @export -spatialDE <- function(gobject = NULL, - feat_type = NULL, - spat_unit = NULL, - spat_loc_name = "raw", - expression_values = c("raw", "normalized", "scaled", "custom"), - size = c(4, 2, 1), - color = c("blue", "green", "red"), - sig_alpha = 0.5, - unsig_alpha = 0.5, - python_path = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "SpatialDE") { +spatialDE <- function( + gobject = NULL, + feat_type = NULL, + spat_unit = NULL, + spat_loc_name = "raw", + expression_values = c("raw", "normalized", "scaled", "custom"), + size = c(4, 2, 1), + color = c("blue", "green", "red"), + sig_alpha = 0.5, + unsig_alpha = 0.5, + python_path = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "SpatialDE") { # test if SPARK is installed ## module_test <- reticulate::py_module_available("SpatialDE") @@ -1920,7 +2009,8 @@ spatialDE <- function(gobject = NULL, # expression values <- match.arg( - expression_values, c("raw", "normalized", "scaled", "custom")) + expression_values, c("raw", "normalized", "scaled", "custom") + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1937,7 +2027,9 @@ spatialDE <- function(gobject = NULL, ## source python file reticulate::use_python(required = TRUE, python = python_path) reader_path <- system.file( - "python", "SpatialDE_wrapper.py", package = "Giotto") + "python", "SpatialDE_wrapper.py", + package = "Giotto" + ) reticulate::source_python(file = reader_path) ## get spatial locations @@ -1952,13 +2044,15 @@ spatialDE <- function(gobject = NULL, ## run spatialDE Spatial_DE_results <- Spatial_DE( - as.data.frame(t(as.matrix(expr_values))), spatial_locs) + as.data.frame(t(as.matrix(expr_values))), spatial_locs + ) results <- as.data.frame(reticulate::py_to_r(Spatial_DE_results[[1]])) if (length(Spatial_DE_results) == 2) { ms_results <- as.data.frame( - reticulate::py_to_r(Spatial_DE_results[[2]])) + reticulate::py_to_r(Spatial_DE_results[[2]]) + ) spatial_genes_results <- list(results, ms_results) names(spatial_genes_results) <- c("results", "ms_results") } else { @@ -1969,11 +2063,17 @@ spatialDE <- function(gobject = NULL, # print, return and save parameters show_plot <- ifelse(is.na(show_plot), readGiottoInstructions( - gobject, param = "show_plot"), show_plot) + gobject, + param = "show_plot" + ), show_plot) save_plot <- ifelse(is.na(save_plot), readGiottoInstructions( - gobject, param = "save_plot"), save_plot) + gobject, + param = "save_plot" + ), save_plot) return_plot <- ifelse(is.na(return_plot), readGiottoInstructions( - gobject, param = "return_plot"), return_plot) + gobject, + param = "return_plot" + ), return_plot) ## create plot if (isTRUE(show_plot) || @@ -1998,8 +2098,11 @@ spatialDE <- function(gobject = NULL, if (save_plot == TRUE) { do.call( "all_plots_save_function", - c(list(gobject = gobject, plot_object = FSV_plot, - default_save_name = default_save_name), save_param)) + c(list( + gobject = gobject, plot_object = FSV_plot, + default_save_name = default_save_name + ), save_param) + ) } ## return results and plot (optional) @@ -2033,17 +2136,18 @@ spatialDE <- function(gobject = NULL, #' #' spatialAEH(g) #' @export -spatialAEH <- function(gobject = NULL, - feat_type = NULL, - spat_unit = NULL, - spat_loc_name = "raw", - SpatialDE_results = NULL, - name_pattern = "AEH_patterns", - expression_values = c("raw", "normalized", "scaled", "custom"), - pattern_num = 6, - l = 1.05, - python_path = NULL, - return_gobject = TRUE) { +spatialAEH <- function( + gobject = NULL, + feat_type = NULL, + spat_unit = NULL, + spat_loc_name = "raw", + SpatialDE_results = NULL, + name_pattern = "AEH_patterns", + expression_values = c("raw", "normalized", "scaled", "custom"), + pattern_num = 6, + l = 1.05, + python_path = NULL, + return_gobject = TRUE) { # data.table variables cell_ID <- NULL @@ -2060,7 +2164,8 @@ spatialAEH <- function(gobject = NULL, # expression values <- match.arg( - expression_values, c("raw", "normalized", "scaled", "custom")) + expression_values, c("raw", "normalized", "scaled", "custom") + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -2076,7 +2181,9 @@ spatialAEH <- function(gobject = NULL, ## source python file reticulate::use_python(required = TRUE, python = python_path) reader_path <- system.file( - "python", "SpatialDE_wrapper.py", package = "Giotto") + "python", "SpatialDE_wrapper.py", + package = "Giotto" + ) reticulate::source_python(file = reader_path) @@ -2105,14 +2212,17 @@ spatialAEH <- function(gobject = NULL, spatial_pattern_results <- list(histology_results, cell_pattern_score) names(spatial_pattern_results) <- c( - "histology_results", "cell_pattern_score") + "histology_results", "cell_pattern_score" + ) if (return_gobject == TRUE) { dt_res <- data.table::as.data.table( - spatial_pattern_results[["cell_pattern_score"]]) + spatial_pattern_results[["cell_pattern_score"]] + ) dt_res[["cell_ID"]] <- rownames( - spatial_pattern_results[["cell_pattern_score"]]) + spatial_pattern_results[["cell_pattern_score"]] + ) gobject@spatial_enrichment[[name_pattern]] <- dt_res return(gobject) } else { @@ -2132,15 +2242,18 @@ spatialAEH <- function(gobject = NULL, #' @param unsig_alpha transparency of unsignificant genes #' @returns ggplot object #' @keywords internal -FSV_show <- function(results, - ms_results = NULL, - size = c(4, 2, 1), - color = c("blue", "green", "red"), - sig_alpha = 0.5, - unsig_alpha = 0.5) { +FSV_show <- function( + results, + ms_results = NULL, + size = c(4, 2, 1), + color = c("blue", "green", "red"), + sig_alpha = 0.5, + unsig_alpha = 0.5) { results$FSV95conf <- 2 * sqrt(results$s2_FSV) results$intervals <- cut( - results$FSV95conf, c(0, 1e-1, 1e0, Inf), label = FALSE) + results$FSV95conf, c(0, 1e-1, 1e0, Inf), + label = FALSE + ) results$log_pval <- log10(results$pval) if (is.null(ms_results)) { @@ -2161,7 +2274,8 @@ FSV_show <- function(results, pl <- pl + ggplot2::geom_point( data = results[results$qval < 0.05, ], ggplot2::aes_string( - x = "FSV", y = "log_pval", fill = "model_bic", size = "intervals"), + x = "FSV", y = "log_pval", fill = "model_bic", size = "intervals" + ), show.legend = TRUE, shape = 21, alpha = sig_alpha, stroke = 0.1, color = "black" ) + @@ -2180,10 +2294,12 @@ FSV_show <- function(results, labels = c("linear", "periodical", "general") ) + ggplot2::geom_hline(yintercept = max(results[ - results$qval < 0.05, ]$log_pval), linetype = "dashed") + + results$qval < 0.05, + ]$log_pval), linetype = "dashed") + ggplot2::geom_text(ggplot2::aes(0.9, max(results[ - results$qval < 0.05, ]$log_pval), - label = "FDR = 0.05", vjust = -1 + results$qval < 0.05, + ]$log_pval), + label = "FDR = 0.05", vjust = -1 )) + ggplot2::scale_y_reverse() @@ -2217,15 +2333,16 @@ FSV_show <- function(results, #' #' trendSceek(g) #' @export -trendSceek <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - spat_loc_name = "raw", - expression_values = c("normalized", "raw"), - subset_genes = NULL, - nrand = 100, - ncores = 8, - ...) { +trendSceek <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + spat_loc_name = "raw", + expression_values = c("normalized", "raw"), + subset_genes = NULL, + nrand = 100, + ncores = 8, + ...) { # verify if optional package is installed package_check( pkg_name = "trendsceek", @@ -2295,7 +2412,9 @@ trendSceek <- function(gobject, ## run trendsceek trendsceektest <- trendsceek::trendsceek_test( - pp, nrand = nrand, ncores = ncores, ...) + pp, + nrand = nrand, ncores = ncores, ... + ) ## get final results trendsceektest <- trendsceektest$supstats_wide @@ -2338,17 +2457,18 @@ trendSceek <- function(gobject, #' #' spark(g) #' @export -spark <- function(gobject, - spat_loc_name = "raw", - feat_type = NULL, - spat_unit = NULL, - percentage = 0.1, - min_count = 10, - expression_values = "raw", - num_core = 5, - covariates = NULL, - return_object = c("data.table", "spark"), - ...) { +spark <- function( + gobject, + spat_loc_name = "raw", + feat_type = NULL, + spat_unit = NULL, + percentage = 0.1, + min_count = 10, + expression_values = "raw", + num_core = 5, + covariates = NULL, + return_object = c("data.table", "spark"), + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2486,25 +2606,26 @@ spark <- function(gobject, #' \itemize{ #' * 1. average gene expression for cells within a grid, see createSpatialGrid #' * 2. perform PCA on the average grid expression profiles -#' * 3. convert variance of principal components (PCs) to z-scores and +#' * 3. convert variance of principal components (PCs) to z-scores and #' select PCs based on a z-score threshold #' } #' @export -detectSpatialPatterns <- function(gobject, - expression_values = c("normalized", "scaled", "custom"), - spatial_grid_name = "spatial_grid", - min_cells_per_grid = 4, - scale_unit = FALSE, - ncp = 100, - show_plot = TRUE, - PC_zscore = 1.5) { +detectSpatialPatterns <- function( + gobject, + expression_values = c("normalized", "scaled", "custom"), + spatial_grid_name = "spatial_grid", + min_cells_per_grid = 4, + scale_unit = FALSE, + ncp = 100, + show_plot = TRUE, + PC_zscore = 1.5) { ############################################################################ stop(wrap_txt( - "This function has not been updated for use with the current version + "This function has not been updated for use with the current version of Giotto. See details: https://github.com/drieslab/Giotto/issues/666#issuecomment-1540447537", - errWidth = TRUE + errWidth = TRUE )) ############################################################################ # expression values to be used @@ -2537,10 +2658,12 @@ detectSpatialPatterns <- function(gobject, if (all(c("sdimx", "sdimy", "sdimz") %in% colnames(spatial_locs))) { spatial_locs <- annotate_spatlocs_with_spatgrid_3D( - spatloc = spatial_locs, spatgrid = spatial_grid) + spatloc = spatial_locs, spatgrid = spatial_grid + ) } else if (all(c("sdimx", "sdimy") %in% colnames(spatial_locs))) { spatial_locs <- annotate_spatlocs_with_spatgrid_2D( - spatloc = spatial_locs, spatgrid = spatial_grid) + spatloc = spatial_locs, spatgrid = spatial_grid + ) } @@ -2575,7 +2698,8 @@ detectSpatialPatterns <- function(gobject, X = t(loc_av_expr_matrix), scale.unit = scale_unit, ncp = ncp, - graph = FALSE) + graph = FALSE + ) # screeplot screeplot <- factoextra::fviz_eig(mypca, addlabels = TRUE, ylim = c(0, 50)) @@ -2607,7 +2731,8 @@ detectSpatialPatterns <- function(gobject, data.table::setnames(pca_matrix_DT, old = "dimkeep", dims_to_keep) } else { pca_matrix_DT <- data.table::as.data.table(pca_matrix[ - , seq_along(dims_to_keep)]) + , seq_along(dims_to_keep) + ]) pca_matrix_DT[, loc_ID := colnames(loc_av_expr_matrix)] } @@ -2622,7 +2747,8 @@ detectSpatialPatterns <- function(gobject, data.table::setnames(feat_matrix_DT, old = "featkeep", dims_to_keep) } else { feat_matrix_DT <- data.table::as.data.table(feat_matrix[ - , seq_along(dims_to_keep)]) + , seq_along(dims_to_keep) + ]) feat_matrix_DT[, gene_ID := rownames(loc_av_expr_matrix)] } @@ -2660,19 +2786,20 @@ detectSpatialPatterns <- function(gobject, #' change save_name in save_param #' @returns ggplot #' @export -showPattern2D <- function(gobject, - spatPatObj, - dimension = 1, - trim = c(0.02, 0.98), - background_color = "white", - grid_border_color = "grey", - show_legend = TRUE, - point_size = 1, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "showPattern2D") { +showPattern2D <- function( + gobject, + spatPatObj, + dimension = 1, + trim = c(0.02, 0.98), + background_color = "white", + grid_border_color = "grey", + show_legend = TRUE, + point_size = 1, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "showPattern2D") { if (!"spatPatObj" %in% class(spatPatObj)) { stop("spatPatObj needs to be the output from detectSpatialPatterns") } @@ -2687,16 +2814,21 @@ showPattern2D <- function(gobject, # annotate grid with PC values annotated_grid <- merge( - spatPatObj$spatial_grid, by.x = "gr_name", PC_DT, by.y = "loc_ID") + spatPatObj$spatial_grid, + by.x = "gr_name", PC_DT, by.y = "loc_ID" + ) # trim PC values if (!is.null(trim)) { boundaries <- stats::quantile(annotated_grid[[ - selected_PC]], probs = trim) + selected_PC + ]], probs = trim) annotated_grid[[selected_PC]][annotated_grid[[ - selected_PC]] < boundaries[1]] <- boundaries[1] + selected_PC + ]] < boundaries[1]] <- boundaries[1] annotated_grid[[selected_PC]][annotated_grid[[ - selected_PC]] > boundaries[2]] <- boundaries[2] + selected_PC + ]] > boundaries[2]] <- boundaries[2] } # 2D-plot @@ -2774,24 +2906,25 @@ showPattern <- function(gobject, spatPatObj, ...) { #' change save_name in save_param #' @returns plotly #' @export -showPattern3D <- function(gobject, - spatPatObj, - dimension = 1, - trim = c(0.02, 0.98), - background_color = "white", - grid_border_color = "grey", - show_legend = TRUE, - point_size = 1, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "showPattern3D") { +showPattern3D <- function( + gobject, + spatPatObj, + dimension = 1, + trim = c(0.02, 0.98), + background_color = "white", + grid_border_color = "grey", + show_legend = TRUE, + point_size = 1, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "showPattern3D") { # data.table variables center_x <- x_start <- x_end <- center_y <- y_start <- y_end <- center_z <- z_start <- z_end <- NULL @@ -2810,16 +2943,21 @@ showPattern3D <- function(gobject, # annotate grid with PC values annotated_grid <- merge( - spatPatObj$spatial_grid, by.x = "gr_name", PC_DT, by.y = "loc_ID") + spatPatObj$spatial_grid, + by.x = "gr_name", PC_DT, by.y = "loc_ID" + ) # trim PC values if (!is.null(trim)) { boundaries <- stats::quantile(annotated_grid[[ - selected_PC]], probs = trim) + selected_PC + ]], probs = trim) annotated_grid[[selected_PC]][annotated_grid[[ - selected_PC]] < boundaries[1]] <- boundaries[1] + selected_PC + ]] < boundaries[1]] <- boundaries[1] annotated_grid[[selected_PC]][annotated_grid[[ - selected_PC]] > boundaries[2]] <- boundaries[2] + selected_PC + ]] > boundaries[2]] <- boundaries[2] } @@ -2854,7 +2992,8 @@ showPattern3D <- function(gobject, ) )) dpl <- dpl %>% plotly::colorbar( - title = paste(paste("dim.", dimension, sep = ""), "genes", sep = " ")) + title = paste(paste("dim.", dimension, sep = ""), "genes", sep = " ") + ) # output plot return(GiottoVisuals::plot_output_handler( @@ -2892,18 +3031,19 @@ showPattern3D <- function(gobject, #' change save_name in save_param #' @returns ggplot #' @export -showPatternGenes <- function(gobject, - spatPatObj, - dimension = 1, - top_pos_genes = 5, - top_neg_genes = 5, - point_size = 1, - return_DT = FALSE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "showPatternGenes") { +showPatternGenes <- function( + gobject, + spatPatObj, + dimension = 1, + top_pos_genes = 5, + top_neg_genes = 5, + point_size = 1, + return_DT = FALSE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "showPatternGenes") { # data.table variables gene_ID <- NULL @@ -2923,11 +3063,14 @@ showPatternGenes <- function(gobject, # order and subset gene_cor_DT <- gene_cor_DT[ - !is.na(get(selected_PC))][order(get(selected_PC))] + !is.na(get(selected_PC)) + ][order(get(selected_PC))] subset <- gene_cor_DT[ c(seq_len(top_neg_genes), (nrow( - gene_cor_DT) - top_pos_genes):nrow(gene_cor_DT))] + gene_cor_DT + ) - top_pos_genes):nrow(gene_cor_DT)) + ] subset[, gene_ID := factor(gene_ID, gene_ID)] ## return DT and make not plot ## @@ -2939,7 +3082,8 @@ showPatternGenes <- function(gobject, pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_point( data = subset, - aes_string(x = selected_PC, y = "gene_ID"), size = point_size) + aes_string(x = selected_PC, y = "gene_ID"), size = point_size + ) pl <- pl + ggplot2::geom_vline(xintercept = 0, linetype = 2) pl <- pl + ggplot2::labs(x = "correlation", y = "", title = selected_PC) pl <- pl + ggplot2::theme(plot.title = element_text(hjust = 0.5)) @@ -2972,13 +3116,14 @@ showPatternGenes <- function(gobject, #' @returns Data.table with genes associated with selected dimension (PC). #' @details Description. #' @export -selectPatternGenes <- function(spatPatObj, - dimensions = 1:5, - top_pos_genes = 10, - top_neg_genes = 10, - min_pos_cor = 0.5, - min_neg_cor = -0.5, - return_top_selection = FALSE) { +selectPatternGenes <- function( + spatPatObj, + dimensions = 1:5, + top_pos_genes = 10, + top_neg_genes = 10, + min_pos_cor = 0.5, + min_neg_cor = -0.5, + return_top_selection = FALSE) { if (!"spatPatObj" %in% class(spatPatObj)) { stop("spatPatObj needs to be the output from detectSpatialPatterns") } @@ -2998,12 +3143,15 @@ selectPatternGenes <- function(spatPatObj, # melt and select gene_cor_DT_m <- data.table::melt.data.table( - gene_cor_DT, id.vars = "gene_ID") + gene_cor_DT, + id.vars = "gene_ID" + ) gene_cor_DT_m[, top_pos_rank := rank(value), by = "variable"] gene_cor_DT_m[, top_neg_rank := rank(-value), by = "variable"] selection <- gene_cor_DT_m[ top_pos_rank %in% seq_len(top_pos_genes) | - top_neg_rank %in% seq_len(top_neg_genes)] + top_neg_rank %in% seq_len(top_neg_genes) + ] # filter on min correlation selection <- selection[value > min_pos_cor | value < min_neg_cor] @@ -3020,9 +3168,11 @@ selectPatternGenes <- function(spatPatObj, # add other genes back output_selection <- uniq_selection[, .(gene_ID, variable)] other_genes <- gene_cor_DT[!gene_ID %in% output_selection$gene_ID][[ - "gene_ID"]] + "gene_ID" + ]] other_genes_DT <- data.table::data.table( - gene_ID = other_genes, variable = "noDim") + gene_ID = other_genes, variable = "noDim" + ) comb_output_genes <- rbind(output_selection, other_genes_DT) setnames(comb_output_genes, "variable", "patDim") @@ -3056,10 +3206,11 @@ selectPatternGenes <- function(spatPatObj, #' number of k-neighbors in the selected spatial network. Setting b = 0 means #' no smoothing and b = 1 means no contribution from its own expression. #' @keywords internal -do_spatial_knn_smoothing <- function(expression_matrix, - spatial_network, - subset_feats = NULL, - b = NULL) { +do_spatial_knn_smoothing <- function( + expression_matrix, + spatial_network, + subset_feats = NULL, + b = NULL) { # checks if (!is.null(b)) { if (b > 1 | b < 0) { @@ -3087,7 +3238,9 @@ do_spatial_knn_smoothing <- function(expression_matrix, expr_values_dt <- data.table::as.data.table(as.matrix(expr_values)) expr_values_dt[, feat_ID := rownames(expr_values)] expr_values_dt_m <- data.table::melt.data.table( - expr_values_dt, id.vars = "feat_ID", variable.name = "cell_ID") + expr_values_dt, + id.vars = "feat_ID", variable.name = "cell_ID" + ) # merge spatial network and matrix @@ -3101,13 +3254,16 @@ do_spatial_knn_smoothing <- function(expression_matrix, # exclude 0's? # trimmed mean? spatial_network_ext_smooth <- spatial_network_ext[ - , mean(value), by = c("to", "feat_ID")] + , mean(value), + by = c("to", "feat_ID") + ] # convert back to matrix spatial_smooth_dc <- data.table::dcast.data.table( data = spatial_network_ext_smooth, formula = feat_ID ~ to, - value.var = "V1") + value.var = "V1" + ) spatial_smooth_matrix <- dt_to_matrix(spatial_smooth_dc) # if network was not fully connected, some cells might be missing and @@ -3120,11 +3276,13 @@ do_spatial_knn_smoothing <- function(expression_matrix, if (length(missing_cells) > 0) { missing_matrix <- expr_values[, missing_cells] spatial_smooth_matrix <- cbind(spatial_smooth_matrix[ - rownames(expr_values), ], missing_matrix) + rownames(expr_values), + ], missing_matrix) } spatial_smooth_matrix <- spatial_smooth_matrix[ - rownames(expr_values), colnames(expr_values)] + rownames(expr_values), colnames(expr_values) + ] # combine original and smoothed values according to smoothening b # create best guess for b if not given @@ -3172,11 +3330,12 @@ evaluate_provided_spatial_locations <- function(spatial_locs) { #' @description smooth gene expression over a defined spatial grid #' @returns matrix with smoothened gene expression values based on spatial grid #' @keywords internal -do_spatial_grid_averaging <- function(expression_matrix, - spatial_grid, - spatial_locs, - subset_feats = NULL, - min_cells_per_grid = 4) { +do_spatial_grid_averaging <- function( + expression_matrix, + spatial_grid, + spatial_locs, + subset_feats = NULL, + min_cells_per_grid = 4) { # matrix expr_values <- expression_matrix if (!is.null(subset_feats)) { @@ -3194,10 +3353,12 @@ do_spatial_grid_averaging <- function(expression_matrix, # annoate spatial locations with spatial grid if (all(c("sdimx", "sdimy", "sdimz") %in% colnames(spatial_locs))) { spatial_locs <- annotate_spatlocs_with_spatgrid_3D( - spatloc = spatial_locs, spatgrid = spatial_grid) + spatloc = spatial_locs, spatgrid = spatial_grid + ) } else if (all(c("sdimx", "sdimy") %in% colnames(spatial_locs))) { spatial_locs <- annotate_spatlocs_with_spatgrid_2D( - spatloc = spatial_locs, spatgrid = spatial_grid) + spatloc = spatial_locs, spatgrid = spatial_grid + ) } @@ -3279,7 +3440,9 @@ do_spatial_grid_averaging <- function(expression_matrix, #' # This analysis can also be performed with data outside of the gobject #' detectSpatialCorFeatsMatrix( #' expression_matrix = getExpression( -#' g, output = "matrix"), +#' g, +#' output = "matrix" +#' ), #' method = "network", #' spatial_network = getSpatialNetwork(g, output = "networkDT") #' ) @@ -3290,20 +3453,18 @@ NULL #' @rdname detectSpatialCorFeats #' @export -detectSpatialCorFeats <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - method = c("grid", "network"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_name = "Delaunay_network", - network_smoothing = NULL, - spatial_grid_name = "spatial_grid", - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman") -) { +detectSpatialCorFeats <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + method = c("grid", "network"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_name = "Delaunay_network", + network_smoothing = NULL, + spatial_grid_name = "spatial_grid", + min_cells_per_grid = 4, + cor_method = c("pearson", "kendall", "spearman")) { # set default spat_unit and feat_type spat_unit <- set_default_spat_unit( gobject = gobject, @@ -3317,7 +3478,9 @@ detectSpatialCorFeats <- function( ## correlation method to be used cor_method <- match.arg( - cor_method, choices = c("pearson", "kendall", "spearman")) + cor_method, + choices = c("pearson", "kendall", "spearman") + ) ## method to be used method <- match.arg(method, choices = c("grid", "network")) @@ -3325,7 +3488,8 @@ detectSpatialCorFeats <- function( # get expression matrix values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -3372,7 +3536,8 @@ detectSpatialCorFeats <- function( feat_ID <- variable <- NULL cor_spat_matrix <- cor_flex(t_flex(as.matrix( - loc_av_expr_matrix)), method = cor_method) + loc_av_expr_matrix + )), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] cor_spat_DT <- data.table::melt.data.table( @@ -3401,7 +3566,8 @@ detectSpatialCorFeats <- function( cor_spat_matrix <- cor_flex(t_flex(as.matrix( - knn_av_expr_matrix)), method = cor_method) + knn_av_expr_matrix + )), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] cor_spat_DT <- data.table::melt.data.table( @@ -3435,9 +3601,13 @@ detectSpatialCorFeats <- function( # difference in rank scores doubleDT[, spatrank := frank( - -spat_cor, ties.method = "first"), by = feat_ID] + -spat_cor, + ties.method = "first" + ), by = feat_ID] doubleDT[, exprrank := frank( - -expr_cor, ties.method = "first"), by = feat_ID] + -expr_cor, + ties.method = "first" + ), by = feat_ID] doubleDT[, rankdiff := spatrank - exprrank] # sort data @@ -3458,18 +3628,21 @@ detectSpatialCorFeats <- function( #' @rdname detectSpatialCorFeats #' @export -detectSpatialCorFeatsMatrix <- function(expression_matrix, - method = c("grid", "network"), - spatial_network, - spatial_grid, - spatial_locs, - subset_feats = NULL, - network_smoothing = NULL, - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman")) { +detectSpatialCorFeatsMatrix <- function( + expression_matrix, + method = c("grid", "network"), + spatial_network, + spatial_grid, + spatial_locs, + subset_feats = NULL, + network_smoothing = NULL, + min_cells_per_grid = 4, + cor_method = c("pearson", "kendall", "spearman")) { ## correlation method to be used cor_method <- match.arg( - cor_method, choices = c("pearson", "kendall", "spearman")) + cor_method, + choices = c("pearson", "kendall", "spearman") + ) ## method to be used method <- match.arg(method, choices = c("grid", "network")) @@ -3488,7 +3661,8 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, feat_ID <- variable <- NULL cor_spat_matrix <- cor_flex(t_flex( - as.matrix(loc_av_expr_matrix)), method = cor_method) + as.matrix(loc_av_expr_matrix) + ), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] cor_spat_DT <- data.table::melt.data.table( @@ -3508,7 +3682,8 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, cor_spat_matrix <- cor_flex(t_flex(as.matrix( - knn_av_expr_matrix)), method = cor_method) + knn_av_expr_matrix + )), method = cor_method) cor_spat_matrixDT <- data.table::as.data.table(cor_spat_matrix) cor_spat_matrixDT[, feat_ID := rownames(cor_spat_matrix)] cor_spat_DT <- data.table::melt.data.table( @@ -3549,9 +3724,13 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, # difference in rank scores doubleDT[, spatrank := data.table::frank( - -spat_cor, ties.method = "first"), by = feat_ID] + -spat_cor, + ties.method = "first" + ), by = feat_ID] doubleDT[, exprrank := data.table::frank( - -expr_cor, ties.method = "first"), by = feat_ID] + -expr_cor, + ties.method = "first" + ), by = feat_ID] doubleDT[, rankdiff := spatrank - exprrank] # sort data @@ -3590,15 +3769,16 @@ detectSpatialCorFeatsMatrix <- function(expression_matrix, #' @param show_top_feats show top features per gene #' @returns data.table with filtered information #' @export -showSpatialCorFeats <- function(spatCorObject, - use_clus_name = NULL, - selected_clusters = NULL, - feats = NULL, - min_spat_cor = 0.5, - min_expr_cor = NULL, - min_cor_diff = NULL, - min_rank_diff = NULL, - show_top_feats = NULL) { +showSpatialCorFeats <- function( + spatCorObject, + use_clus_name = NULL, + selected_clusters = NULL, + feats = NULL, + min_spat_cor = 0.5, + min_expr_cor = NULL, + min_cor_diff = NULL, + min_rank_diff = NULL, + show_top_feats = NULL) { # data.table variables clus <- feat_ID <- spat_cor <- cor_diff <- rankdiff <- NULL @@ -3616,9 +3796,12 @@ showSpatialCorFeats <- function(spatCorObject, clusters <- clusters_part names_clusters <- names(clusters_part) clusters_DT <- data.table::data.table( - "feat_ID" = names_clusters, "clus" = clusters) + "feat_ID" = names_clusters, "clus" = clusters + ) filter_DT <- data.table::merge.data.table( - filter_DT, clusters_DT, by = "feat_ID") + filter_DT, clusters_DT, + by = "feat_ID" + ) } ## 0. subset clusters @@ -3678,15 +3861,16 @@ showSpatialCorFeats <- function(spatCorObject, #' @param show_top_genes show top genes per gene #' @returns data.table with filtered information #' @export -showSpatialCorGenes <- function(spatCorObject, - use_clus_name = NULL, - selected_clusters = NULL, - genes = NULL, - min_spat_cor = 0.5, - min_expr_cor = NULL, - min_cor_diff = NULL, - min_rank_diff = NULL, - show_top_genes = NULL) { +showSpatialCorGenes <- function( + spatCorObject, + use_clus_name = NULL, + selected_clusters = NULL, + genes = NULL, + min_spat_cor = 0.5, + min_expr_cor = NULL, + min_cor_diff = NULL, + min_rank_diff = NULL, + show_top_genes = NULL) { warning("Deprecated and replaced by showSpatialCorFeats") showSpatialCorFeats( @@ -3721,13 +3905,16 @@ showSpatialCorGenes <- function(spatCorObject, #' g <- GiottoData::loadGiottoMini("visium") #' #' clusterSpatialCorFeats(spatCorObject = detectSpatialCorFeats( -#' g, method = "network")) +#' g, +#' method = "network" +#' )) #' @export -clusterSpatialCorFeats <- function(spatCorObject, - name = "spat_clus", - hclust_method = "ward.D", - k = 10, - return_obj = TRUE) { +clusterSpatialCorFeats <- function( + spatCorObject, + name = "spat_clus", + hclust_method = "ward.D", + k = 10, + return_obj = TRUE) { # check input if (!"spatCorObject" %in% class(spatCorObject)) { stop("spatCorObject needs to be the output from @@ -3737,7 +3924,9 @@ clusterSpatialCorFeats <- function(spatCorObject, # create correlation matrix cor_DT <- spatCorObject[["cor_DT"]] cor_DT_dc <- data.table::dcast.data.table( - cor_DT, formula = feat_ID ~ variable, value.var = "spat_cor") + cor_DT, + formula = feat_ID ~ variable, value.var = "spat_cor" + ) cor_matrix <- dt_to_matrix(cor_DT_dc) # re-ordering matrix @@ -3773,11 +3962,12 @@ clusterSpatialCorFeats <- function(spatCorObject, #' @param return_obj return spatial correlation object (spatCorObject) #' @returns spatCorObject or cluster results #' @export -clusterSpatialCorGenes <- function(spatCorObject, - name = "spat_clus", - hclust_method = "ward.D", - k = 10, - return_obj = TRUE) { +clusterSpatialCorGenes <- function( + spatCorObject, + name = "spat_clus", + hclust_method = "ward.D", + k = 10, + return_obj = TRUE) { warning("Deprecated and replaced by clusterSpatialCorFeats") clusterSpatialCorFeats( @@ -3816,20 +4006,21 @@ clusterSpatialCorGenes <- function(spatCorObject, #' \code{\link[ComplexHeatmap]{Heatmap}} function from ComplexHeatmap #' @returns Heatmap generated by ComplexHeatmap #' @export -heatmSpatialCorFeats <- function(gobject, - spatCorObject, - use_clus_name = NULL, - show_cluster_annot = TRUE, - show_row_dend = TRUE, - show_column_dend = FALSE, - show_row_names = FALSE, - show_column_names = FALSE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "heatmSpatialCorFeats", - ...) { +heatmSpatialCorFeats <- function( + gobject, + spatCorObject, + use_clus_name = NULL, + show_cluster_annot = TRUE, + show_row_dend = TRUE, + show_column_dend = FALSE, + show_row_names = FALSE, + show_column_names = FALSE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "heatmSpatialCorFeats", + ...) { ## check input if (!"spatCorObject" %in% class(spatCorObject)) { stop("spatCorObject needs to be the output from @@ -3842,7 +4033,9 @@ heatmSpatialCorFeats <- function(gobject, ## create correlation matrix cor_DT <- spatCorObject[["cor_DT"]] cor_DT_dc <- data.table::dcast.data.table( - cor_DT, formula = feat_ID ~ variable, value.var = "spat_cor") + cor_DT, + formula = feat_ID ~ variable, value.var = "spat_cor" + ) cor_matrix <- dt_to_matrix(cor_DT_dc) # re-ordering matrix @@ -3950,18 +4143,21 @@ heatmSpatialCorGenes <- function(...) { #' spatCorObject <- detectSpatialCorFeats(g, method = "network") #' clusters <- clusterSpatialCorFeats(spatCorObject = spatCorObject) #' -#' rankSpatialCorGroups(gobject = g, spatCorObject = clusters, -#' use_clus_name = "spat_clus") +#' rankSpatialCorGroups( +#' gobject = g, spatCorObject = clusters, +#' use_clus_name = "spat_clus" +#' ) #' @md #' @export -rankSpatialCorGroups <- function(gobject, - spatCorObject, - use_clus_name = NULL, - show_plot = NULL, - return_plot = FALSE, - save_plot = NULL, - save_param = list(), - default_save_name = "rankSpatialCorGroups") { +rankSpatialCorGroups <- function( + gobject, + spatCorObject, + use_clus_name = NULL, + show_plot = NULL, + return_plot = FALSE, + save_plot = NULL, + save_param = list(), + default_save_name = "rankSpatialCorGroups") { ## check input if (!"spatCorObject" %in% class(spatCorObject)) { stop("spatCorObject needs to be the output from @@ -3981,7 +4177,9 @@ rankSpatialCorGroups <- function(gobject, ## create correlation matrix cor_DT <- spatCorObject[["cor_DT"]] cor_DT_dc <- data.table::dcast.data.table( - cor_DT, formula = feat_ID ~ variable, value.var = "spat_cor") + cor_DT, + formula = feat_ID ~ variable, value.var = "spat_cor" + ) cor_matrix <- dt_to_matrix(cor_DT_dc) # re-ordering matrix @@ -4001,13 +4199,15 @@ rankSpatialCorGroups <- function(gobject, sub_cor_matrix <- cor_matrix[ rownames(cor_matrix) %in% selected_feats, - colnames(cor_matrix) %in% selected_feats] + colnames(cor_matrix) %in% selected_feats + ] mean_score <- mean_flex(sub_cor_matrix) res_cor_list[[id]] <- mean_score sub_neg_cor_matrix <- cor_matrix[ rownames(cor_matrix) %in% selected_feats, - !colnames(cor_matrix) %in% selected_feats] + !colnames(cor_matrix) %in% selected_feats + ] mean_neg_score <- mean_flex(sub_neg_cor_matrix) res_neg_cor_list[[id]] <- mean_neg_score } @@ -4069,18 +4269,19 @@ rankSpatialCorGroups <- function(gobject, #' @details There are 3 different ways of selecting features from the spatial #' co-expression modules #' \itemize{ -#' * 1. weighted: Features are ranked based on summarized pairwise +#' * 1. weighted: Features are ranked based on summarized pairwise #' co-expression scores #' * 2. random: A random selection of features, set seed for reproducibility #' * 3. informed: Features are selected based on prior information/ranking #' } #' @export -getBalancedSpatCoexpressionFeats <- function(spatCorObject, - maximum = 50, - rank = c("weighted", "random", "informed"), - informed_ranking = NULL, - seed = NA, - verbose = TRUE) { +getBalancedSpatCoexpressionFeats <- function( + spatCorObject, + maximum = 50, + rank = c("weighted", "random", "informed"), + informed_ranking = NULL, + seed = NA, + verbose = TRUE) { # data.table vars feat_ID <- variable <- combo <- spat_cor <- rnk <- feat_id <- V1 <- NULL @@ -4109,7 +4310,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, wrap_msg( "There are only ", feat_length, " features for cluster ", clus, "\n", - "Maximum will be set to ", feat_length) + "Maximum will be set to ", feat_length + ) } else { maximum_to_use <- maximum } @@ -4140,7 +4342,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, selected_cluster_features <- names(clusters[clusters == clus]) subset_cor_data <- cor_data[ feat_ID %in% selected_cluster_features & - variable %in% selected_cluster_features] + variable %in% selected_cluster_features + ] subset_cor_data <- subset_cor_data[feat_ID != variable] subset_cor_data <- dt_sort_combine_two_columns( DT = subset_cor_data, @@ -4153,10 +4356,12 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, # create a ranked data.table rnk1DT <- data.table::data.table( feat_id = subset_cor_data$feat_ID, - rnk = seq_along(subset_cor_data$feat_ID)) + rnk = seq_along(subset_cor_data$feat_ID) + ) rnk2DT <- data.table::data.table( feat_id = subset_cor_data$variable, - rnk = seq_along(subset_cor_data$variable)) + rnk = seq_along(subset_cor_data$variable) + ) rnkDT <- data.table::rbindlist(list(rnk1DT, rnk2DT)) data.table::setorder(rnkDT, rnk) @@ -4170,7 +4375,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, wrap_msg( "There are only ", feat_length, " features for cluster ", clus, "\n", - "Maximum will be set to ", feat_length) + "Maximum will be set to ", feat_length + ) } else { maximum_to_use <- maximum } @@ -4208,7 +4414,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, wrap_msg( "There are only ", feat_length, " features for cluster ", clus, "\n", - "Maximum will be set to ", feat_length) + "Maximum will be set to ", feat_length + ) } else { maximum_to_use <- maximum } @@ -4216,7 +4423,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, informed_subset <- informed_ranking_numerical[ names(informed_ranking_numerical) %in% - selected_cluster_features] + selected_cluster_features + ] informed_subset <- sort(informed_subset) feat_length <- length(informed_subset) @@ -4225,7 +4433,8 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, wrap_msg( "There are only ", feat_length, " features for cluster ", clus, "\n", - "Maximum will be set to ", feat_length) + "Maximum will be set to ", feat_length + ) } else { maximum_to_use <- maximum } @@ -4271,20 +4480,25 @@ getBalancedSpatCoexpressionFeats <- function(spatCorObject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' simulateOneGenePatternGiottoObject(gobject = g, -#' pattern_cell_ids = c("AAAGGGATGTAGCAAG-1", "TCAAACAACCGCGTCG-1", -#' "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1"), -#' gene_name = "Gna12") +#' simulateOneGenePatternGiottoObject( +#' gobject = g, +#' pattern_cell_ids = c( +#' "AAAGGGATGTAGCAAG-1", "TCAAACAACCGCGTCG-1", +#' "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" +#' ), +#' gene_name = "Gna12" +#' ) #' @export -simulateOneGenePatternGiottoObject <- function(gobject, - pattern_name = "pattern", - pattern_cell_ids = NULL, - gene_name = NULL, - spatial_prob = 0.95, - gradient_direction = NULL, - show_pattern = TRUE, - pattern_colors = c("in" = "green", "out" = "red"), - normalization_params = list()) { +simulateOneGenePatternGiottoObject <- function( + gobject, + pattern_name = "pattern", + pattern_cell_ids = NULL, + gene_name = NULL, + spatial_prob = 0.95, + gradient_direction = NULL, + show_pattern = TRUE, + pattern_colors = c("in" = "green", "out" = "red"), + normalization_params = list()) { # data.table variables cell_ID <- sdimx_y <- sdimx <- sdimy <- NULL @@ -4295,7 +4509,8 @@ simulateOneGenePatternGiottoObject <- function(gobject, ## create and add annotation for pattern cell_meta <- pDataDT(gobject) cell_meta[, (pattern_name) := ifelse( - cell_ID %in% pattern_cell_ids, "in", "out")] + cell_ID %in% pattern_cell_ids, "in", "out" + )] newgobject <- addCellMetadata( gobject, @@ -4324,24 +4539,30 @@ simulateOneGenePatternGiottoObject <- function(gobject, copy_obj = TRUE ) cell_meta <- data.table::merge.data.table( - cell_meta, cell_coord, by = "cell_ID") + cell_meta, cell_coord, + by = "cell_ID" + ) ## get number of cells within pattern cell_number <- nrow(cell_meta[get(pattern_name) == "in"]) ## normalized expression - #expr_data <- newgobject@norm_expr - expr_data <- getExpression(gobject = newgobject, - values = "normalized", - output = "matrix") + # expr_data <- newgobject@norm_expr + expr_data <- getExpression( + gobject = newgobject, + values = "normalized", + output = "matrix" + ) result_list <- list() ## raw expression - #raw_expr_data <- newgobject@raw_exprs - raw_expr_data <- getExpression(gobject = newgobject, - values = "raw", - output = "matrix") + # raw_expr_data <- newgobject@raw_exprs + raw_expr_data <- getExpression( + gobject = newgobject, + values = "raw", + output = "matrix" + ) raw_result_list <- list() @@ -4369,15 +4590,19 @@ simulateOneGenePatternGiottoObject <- function(gobject, outside_prob <- 1 - spatial_prob prob_vector <- c( rep(spatial_prob, cell_number), - rep(outside_prob, remaining_cell_number)) + rep(outside_prob, remaining_cell_number) + ) # first get the 'in' pattern sample values randomly sample_values <- sample( - sort_expr_gene, replace = FALSE, size = cell_number, prob = prob_vector) + sort_expr_gene, + replace = FALSE, size = cell_number, prob = prob_vector + ) # then take the remaining 'out' pattern values randomly remain_values <- sort_expr_gene[ - !names(sort_expr_gene) %in% names(sample_values)] + !names(sort_expr_gene) %in% names(sample_values) + ] remain_values <- sample(remain_values, size = length(remain_values)) @@ -4427,18 +4652,22 @@ simulateOneGenePatternGiottoObject <- function(gobject, # change the original matrices raw_expr_data[rownames(raw_expr_data) == gene_name, ] <- new_sim_raw_values - #newgobject@raw_exprs <- raw_expr_data - newgobject <- setExpression(gobject = newgobject, - x = createExprObj( - expression_data = raw_expr_data, - name = "raw"), - name = "raw", - provenance = prov(getCellMetadata(newgobject))) + # newgobject@raw_exprs <- raw_expr_data + newgobject <- setExpression( + gobject = newgobject, + x = createExprObj( + expression_data = raw_expr_data, + name = "raw" + ), + name = "raw", + provenance = prov(getCellMetadata(newgobject)) + ) # recalculate normalized values newgobject <- do.call( "normalizeGiotto", - args = c(gobject = newgobject, normalization_params)) + args = c(gobject = newgobject, normalization_params) + ) newgobject <- addStatistics(gobject = newgobject) @@ -4455,25 +4684,30 @@ simulateOneGenePatternGiottoObject <- function(gobject, #' @description runs all spatial tests for 1 probability and 1 rep #' @returns data.table #' @keywords internal -run_spatial_sim_tests_one_rep <- function(gobject, - pattern_name = "pattern", - pattern_cell_ids = NULL, - gene_name = NULL, - spatial_prob = 0.95, - show_pattern = FALSE, - spatial_network_name = "kNN_network", - spat_methods = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - spat_methods_params = list(NA, NA, NA, NA, NA), - spat_methods_names = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - save_plot = FALSE, - save_raw = FALSE, - save_norm = FALSE, - save_dir = "~", - save_name = "plot", - run_simulations = TRUE, - ...) { +run_spatial_sim_tests_one_rep <- function( + gobject, + pattern_name = "pattern", + pattern_cell_ids = NULL, + gene_name = NULL, + spatial_prob = 0.95, + show_pattern = FALSE, + spatial_network_name = "kNN_network", + spat_methods = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + spat_methods_params = list(NA, NA, NA, NA, NA), + spat_methods_names = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + save_plot = FALSE, + save_raw = FALSE, + save_norm = FALSE, + save_dir = "~", + save_name = "plot", + run_simulations = TRUE, + ...) { # data.table variables genes <- prob <- time <- adj.p.value <- method <- p.val <- sd <- qval <- pval <- g <- adjusted_pvalue <- feats <- NULL @@ -4525,9 +4759,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, write.table( x = as.matrix(getExpression( - gobject = simulate_patch, values = "raw", output = "matrix")), + gobject = simulate_patch, values = "raw", output = "matrix" + )), file = paste0( - save_dir, "/", pattern_name, "/", save_name, "_raw_data.txt"), + save_dir, "/", pattern_name, "/", save_name, "_raw_data.txt" + ), sep = "\t" ) } @@ -4541,9 +4777,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, x = as.matrix(getExpression( gobject = simulate_patch, values = "normalized", - output = "matrix")), + output = "matrix" + )), file = paste0( - save_dir, "/", pattern_name, "/", save_name, "_norm_data.txt"), + save_dir, "/", pattern_name, "/", save_name, "_norm_data.txt" + ), sep = "\t" ) } @@ -4557,8 +4795,10 @@ run_spatial_sim_tests_one_rep <- function(gobject, # method selected_method <- spat_methods[test] if (!selected_method %in% - c("binSpect_single", "binSpect_multi", "spatialDE", "spark", - "silhouetteRank")) { + c( + "binSpect_single", "binSpect_multi", "spatialDE", "spark", + "silhouetteRank" + )) { stop(selected_method, " is not a know spatial method") } @@ -4639,9 +4879,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, spatial_gene_results[, time := total_time[["elapsed"]]] spatial_gene_results <- spatial_gene_results[ - , .(feats, adj.p.value, prob, time)] + , .(feats, adj.p.value, prob, time) + ] colnames(spatial_gene_results) <- c( - "feats", "adj.p.value", "prob", "time") + "feats", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := selected_name] } else if (selected_method == "binSpect_multi") { @@ -4659,20 +4901,25 @@ run_spatial_sim_tests_one_rep <- function(gobject, spatial_gene_results[, time := total_time[["elapsed"]]] spatial_gene_results <- spatial_gene_results[ - , .(feats, p.val, prob, time)] + , .(feats, p.val, prob, time) + ] colnames(spatial_gene_results) <- c( - "feats", "adj.p.value", "prob", "time") + "feats", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := selected_name] } else if (selected_method == "spatialDE") { start <- proc.time() new_raw_sim_matrix <- getExpression(simulate_patch, - values = "raw", - output = "matrix") + values = "raw", + output = "matrix" + ) sd_cells <- apply(new_raw_sim_matrix, 2, sd) sd_non_zero_cells <- names(sd_cells[sd_cells != 0]) simulate_patch_fix <- subsetGiotto( - simulate_patch, cell_ids = sd_non_zero_cells) + simulate_patch, + cell_ids = sd_non_zero_cells + ) spatial_gene_results <- do.call("spatialDE", c( gobject = simulate_patch_fix, @@ -4680,14 +4927,17 @@ run_spatial_sim_tests_one_rep <- function(gobject, )) spatialDE_spatialgenes_sim_res <- spatial_gene_results$results$results - if (is.null(spatialDE_spatialgenes_sim_res)) + if (is.null(spatialDE_spatialgenes_sim_res)) { spatialDE_spatialgenes_sim_res <- spatial_gene_results$results + } spatialDE_spatialgenes_sim_res <- data.table::as.data.table( - spatialDE_spatialgenes_sim_res) + spatialDE_spatialgenes_sim_res + ) data.table::setorder(spatialDE_spatialgenes_sim_res, qval, pval) spatialDE_result <- spatialDE_spatialgenes_sim_res[ - g == gene_name] + g == gene_name + ] spatialDE_time <- proc.time() - start @@ -4695,9 +4945,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, spatialDE_result[, time := spatialDE_time[["elapsed"]]] spatial_gene_results <- spatialDE_result[ - , .(g, qval, prob, time)] + , .(g, qval, prob, time) + ] colnames(spatial_gene_results) <- c( - "feats", "adj.p.value", "prob", "time") + "feats", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := "spatialDE"] } else if (selected_method == "spark") { ## spark @@ -4714,9 +4966,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, spark_result[, time := spark_time[["elapsed"]]] spatial_gene_results <- spark_result[ - , .(genes, adjusted_pvalue, prob, time)] + , .(genes, adjusted_pvalue, prob, time) + ] colnames(spatial_gene_results) <- c( - "genes", "adj.p.value", "prob", "time") + "genes", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := "spark"] } else if (selected_method == "silhouetteRank") { ## silhouetterank @@ -4728,7 +4982,9 @@ run_spatial_sim_tests_one_rep <- function(gobject, )) data.table::setnames( - spatial_gene_results, old = "gene", new = "genes") + spatial_gene_results, + old = "gene", new = "genes" + ) spatial_gene_results <- spatial_gene_results[genes == gene_name] silh_time <- proc.time() - start @@ -4737,9 +4993,11 @@ run_spatial_sim_tests_one_rep <- function(gobject, # silhrank uses qval by default spatial_gene_results <- spatial_gene_results[ - , .(genes, qval, prob, time)] + , .(genes, qval, prob, time) + ] colnames(spatial_gene_results) <- c( - "genes", "adj.p.value", "prob", "time") + "genes", "adj.p.value", "prob", "time" + ) spatial_gene_results[, method := "silhouette"] } @@ -4763,25 +5021,30 @@ run_spatial_sim_tests_one_rep <- function(gobject, #' repetitions #' @returns data.table #' @keywords internal -run_spatial_sim_tests_multi <- function(gobject, - pattern_name = "pattern", - pattern_cell_ids = NULL, - gene_name = NULL, - spatial_probs = c(0.5, 1), - reps = 2, - spatial_network_name = "kNN_network", - spat_methods = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - spat_methods_params = list(NA, NA, NA, NA, NA), - spat_methods_names = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - save_plot = FALSE, - save_raw = FALSE, - save_norm = FALSE, - save_dir = "~", - verbose = TRUE, - run_simulations = TRUE, - ...) { +run_spatial_sim_tests_multi <- function( + gobject, + pattern_name = "pattern", + pattern_cell_ids = NULL, + gene_name = NULL, + spatial_probs = c(0.5, 1), + reps = 2, + spatial_network_name = "kNN_network", + spat_methods = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + spat_methods_params = list(NA, NA, NA, NA, NA), + spat_methods_names = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + save_plot = FALSE, + save_raw = FALSE, + save_norm = FALSE, + save_dir = "~", + verbose = TRUE, + run_simulations = TRUE, + ...) { prob_list <- list() for (prob_ind in seq_along(spatial_probs)) { prob_i <- spatial_probs[prob_ind] @@ -4793,8 +5056,10 @@ run_spatial_sim_tests_multi <- function(gobject, if (verbose) message("repetition = ", rep_i) - plot_name <- paste0("plot_", gene_name, "_prob", - prob_i, "_rep", rep_i) + plot_name <- paste0( + "plot_", gene_name, "_prob", + prob_i, "_rep", rep_i + ) rep_res <- run_spatial_sim_tests_one_rep(gobject, @@ -4868,33 +5133,42 @@ run_spatial_sim_tests_multi <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' runPatternSimulation(gobject = g, pattern_cell_ids = c("AAAGGGATGTAGCAAG-1", -#' "TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1"), -#' spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2")) +#' runPatternSimulation( +#' gobject = g, pattern_cell_ids = c( +#' "AAAGGGATGTAGCAAG-1", +#' "TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" +#' ), +#' spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2") +#' ) #' @export -runPatternSimulation <- function(gobject, - pattern_name = "pattern", - pattern_colors = c("in" = "green", "out" = "red"), - pattern_cell_ids = NULL, - gene_names = NULL, - spatial_probs = c(0.5, 1), - reps = 2, - spatial_network_name = "kNN_network", - spat_methods = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - spat_methods_params = list(NA, NA, NA, NA, NA), - spat_methods_names = c("binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank"), - scalefactor = 6000, - save_plot = TRUE, - save_raw = TRUE, - save_norm = TRUE, - save_dir = "~", - max_col = 4, - height = 7, - width = 7, - run_simulations = TRUE, - ...) { +runPatternSimulation <- function( + gobject, + pattern_name = "pattern", + pattern_colors = c("in" = "green", "out" = "red"), + pattern_cell_ids = NULL, + gene_names = NULL, + spatial_probs = c(0.5, 1), + reps = 2, + spatial_network_name = "kNN_network", + spat_methods = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + spat_methods_params = list(NA, NA, NA, NA, NA), + spat_methods_names = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + scalefactor = 6000, + save_plot = TRUE, + save_raw = TRUE, + save_norm = TRUE, + save_dir = "~", + max_col = 4, + height = 7, + width = 7, + run_simulations = TRUE, + ...) { # data.table variables prob <- method <- adj.p.value <- time <- NULL @@ -4905,8 +5179,10 @@ runPatternSimulation <- function(gobject, pattern_cell_ids = pattern_cell_ids, gene_name = gene_names[1], spatial_prob = 1, - normalization_params = list(scalefactor = scalefactor, - verbose = TRUE) + normalization_params = list( + scalefactor = scalefactor, + verbose = TRUE + ) ) spatPlot2D(example_patch, @@ -4974,13 +5250,17 @@ runPatternSimulation <- function(gobject, if (save_plot == TRUE) { subdir <- paste0(save_dir, "/", pattern_name, "/") - if (!file.exists(subdir)) dir.create( - path = subdir, recursive = TRUE) + if (!file.exists(subdir)) { + dir.create( + path = subdir, recursive = TRUE + ) + } # write results data.table::fwrite( x = generesults, file = paste0(subdir, "/", gene, "_results.txt"), - sep = "\t", quote = FALSE) + sep = "\t", quote = FALSE + ) } all_results[[gene_ind]] <- generesults @@ -5002,21 +5282,28 @@ runPatternSimulation <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_boxplot( data = results, - ggplot2::aes(x = method, y = adj.p.value, color = prob)) + ggplot2::aes(x = method, y = adj.p.value, color = prob) + ) pl <- pl + ggplot2::geom_point( data = results, ggplot2::aes(x = method, y = adj.p.value, color = prob), - size = 2, position = ggplot2::position_jitterdodge()) + size = 2, position = ggplot2::position_jitterdodge() + ) pl <- pl + ggplot2::theme_bw() + ggplot2::theme(axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + )) pl <- pl + ggplot2::facet_wrap(~genes, nrow = nr_rows) pl <- pl + ggplot2::geom_hline( - yintercept = 0.05, color = "red", linetype = 2) + yintercept = 0.05, color = "red", linetype = 2 + ) - grDevices::pdf(file = paste0( - save_dir, "/", pattern_name, "_boxplot_pvalues.pdf"), - width = width, height = height) + grDevices::pdf( + file = paste0( + save_dir, "/", pattern_name, "_boxplot_pvalues.pdf" + ), + width = width, height = height + ) print(pl) grDevices::dev.off() @@ -5026,19 +5313,26 @@ runPatternSimulation <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_boxplot( data = results, - ggplot2::aes(x = method, y = -log10(adj.p.value), color = prob)) + ggplot2::aes(x = method, y = -log10(adj.p.value), color = prob) + ) pl <- pl + ggplot2::geom_point( data = results, ggplot2::aes(x = method, y = -log10(adj.p.value), color = prob), - size = 2, position = ggplot2::position_jitterdodge()) + size = 2, position = ggplot2::position_jitterdodge() + ) pl <- pl + ggplot2::theme_bw() + ggplot2::theme( axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + ) + ) pl <- pl + ggplot2::facet_wrap(~genes, nrow = nr_rows) - grDevices::pdf(file = paste0( - save_dir, "/", pattern_name, "_boxplot_log10pvalues.pdf"), - width = width, height = height) + grDevices::pdf( + file = paste0( + save_dir, "/", pattern_name, "_boxplot_log10pvalues.pdf" + ), + width = width, height = height + ) print(pl) grDevices::dev.off() @@ -5047,18 +5341,25 @@ runPatternSimulation <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_boxplot( data = results, - ggplot2::aes(x = method, y = time, color = prob)) + ggplot2::aes(x = method, y = time, color = prob) + ) pl <- pl + ggplot2::geom_point( data = results, ggplot2::aes(x = method, y = time, color = prob), size = 2, - position = ggplot2::position_jitterdodge()) + position = ggplot2::position_jitterdodge() + ) pl <- pl + ggplot2::theme_bw() + ggplot2::theme( axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + ) + ) - grDevices::pdf(file = paste0( - save_dir, "/", pattern_name, "_boxplot_time.pdf"), - width = width, height = height) + grDevices::pdf( + file = paste0( + save_dir, "/", pattern_name, "_boxplot_time.pdf" + ), + width = width, height = height + ) print(pl) grDevices::dev.off() } @@ -5068,7 +5369,8 @@ runPatternSimulation <- function(gobject, data.table::fwrite( x = results, file = paste0(save_dir, "/", pattern_name, "_results.txt"), - sep = "\t", quote = FALSE) + sep = "\t", quote = FALSE + ) return(results) } else { return(NULL) diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index 25744e8e0..0b9b7cce9 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -5,14 +5,15 @@ #' @description Simulate random network. #' @returns data.table #' @keywords internal -make_simulated_network <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "Delaunay_network", - cluster_column, - number_of_simulations = 100, - set_seed = TRUE, - seed_number = 1234) { +make_simulated_network <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + cluster_column, + number_of_simulations = 100, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -47,8 +48,10 @@ make_simulated_network <- function(gobject, s1_list <- list() s2_list <- list() - all_cell_type <- c(spatial_network_annot$from_cell_type, - spatial_network_annot$to_cell_type) + all_cell_type <- c( + spatial_network_annot$from_cell_type, + spatial_network_annot$to_cell_type + ) middle_point <- length(all_cell_type) / 2 for (sim in seq_len(number_of_simulations)) { @@ -58,13 +61,15 @@ make_simulated_network <- function(gobject, } reshuffled_all_cell_type <- sample( - x = all_cell_type, size = length(all_cell_type), replace = FALSE) + x = all_cell_type, size = length(all_cell_type), replace = FALSE + ) new_from_cell_type <- reshuffled_all_cell_type[seq_len(middle_point)] s1_list[[sim]] <- new_from_cell_type new_to_cell_type <- reshuffled_all_cell_type[ - (middle_point + 1):length(all_cell_type)] + (middle_point + 1):length(all_cell_type) + ] s2_list[[sim]] <- new_to_cell_type } @@ -77,12 +82,16 @@ make_simulated_network <- function(gobject, s1 <- s2 <- unified_int <- type_int <- NULL sample_dt <- data.table::data.table( - s1 = s1_vector, s2 = s2_vector, round = round_vector) + s1 = s1_vector, s2 = s2_vector, round = round_vector + ) uniq_sim_comb <- unique(sample_dt[, .(s1, s2)]) uniq_sim_comb[, unified_int := paste( - sort(c(s1, s2)), collapse = "--"), by = seq_len(nrow(uniq_sim_comb))] + sort(c(s1, s2)), + collapse = "--" + ), by = seq_len(nrow(uniq_sim_comb))] sample_dt[uniq_sim_comb, unified_int := unified_int, on = c( - s1 = "s1", s2 = "s2")] + s1 = "s1", s2 = "s2" + )] sample_dt[, type_int := ifelse(s1 == s2, "homo", "hetero")] return(sample_dt) @@ -118,19 +127,20 @@ make_simulated_network <- function(gobject, #' #' cellProximityEnrichment(g, cluster_column = "leiden_clus") #' @export -cellProximityEnrichment <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "Delaunay_network", - cluster_column, - number_of_simulations = 1000, - adjust_method = c( - "none", "fdr", "bonferroni", "BH", - "holm", "hochberg", "hommel", - "BY" - ), - set_seed = TRUE, - seed_number = 1234) { +cellProximityEnrichment <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + cluster_column, + number_of_simulations = 1000, + adjust_method = c( + "none", "fdr", "bonferroni", "BH", + "holm", "hochberg", "hommel", + "BY" + ), + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -164,7 +174,8 @@ cellProximityEnrichment <- function(gobject, unified_cells <- type_int <- N <- NULL spatial_network_annot <- dt_sort_combine_two_columns( - spatial_network_annot, "to", "from", "unified_cells") + spatial_network_annot, "to", "from", "unified_cells" + ) spatial_network_annot <- spatial_network_annot[!duplicated(unified_cells)] sample_dt <- make_simulated_network( @@ -180,7 +191,8 @@ cellProximityEnrichment <- function(gobject, # combine original and simulated network table_sim_results <- sample_dt[, .N, by = c( - "unified_int", "type_int", "round")] + "unified_int", "type_int", "round" + )] ## create complete simulations ## add 0 if no single interaction was found @@ -188,17 +200,21 @@ cellProximityEnrichment <- function(gobject, # data.table with 0's for all interactions minimum_simulations <- unique_ints[rep( - seq_len(nrow(unique_ints)), number_of_simulations), ] + seq_len(nrow(unique_ints)), number_of_simulations + ), ] minimum_simulations[, round := rep( paste0("sim", seq_len(number_of_simulations)), - each = nrow(unique_ints))] + each = nrow(unique_ints) + )] minimum_simulations[, N := 0] table_sim_minimum_results <- rbind(table_sim_results, minimum_simulations) table_sim_minimum_results[, V1 := sum(N), by = c( - "unified_int", "type_int", "round")] + "unified_int", "type_int", "round" + )] table_sim_minimum_results <- unique( - table_sim_minimum_results[, .(unified_int, type_int, round, V1)]) + table_sim_minimum_results[, .(unified_int, type_int, round, V1)] + ) table_sim_results <- table_sim_minimum_results @@ -209,7 +225,8 @@ cellProximityEnrichment <- function(gobject, spatial_network_annot[, round := "original"] table_orig_results <- spatial_network_annot[, .N, by = c( - "unified_int", "type_int", "round")] + "unified_int", "type_int", "round" + )] table_orig_results[, orig := "original"] data.table::setnames(table_orig_results, old = "N", new = "V1") @@ -220,27 +237,39 @@ cellProximityEnrichment <- function(gobject, # add missing combinations from original or simulations # probably not needed anymore all_simulation_ints <- as.character(unique(table_results[ - orig == "simulations"]$unified_int)) + orig == "simulations" + ]$unified_int)) all_original_ints <- as.character(unique(table_results[ - orig == "original"]$unified_int)) + orig == "original" + ]$unified_int)) missing_in_original <- all_simulation_ints[ - !all_simulation_ints %in% all_original_ints] + !all_simulation_ints %in% all_original_ints + ] missing_in_simulations <- all_original_ints[ - !all_original_ints %in% all_simulation_ints] + !all_original_ints %in% all_simulation_ints + ] create_missing_for_original <- table_results[ - unified_int %in% missing_in_original] + unified_int %in% missing_in_original + ] create_missing_for_original <- unique(create_missing_for_original[ - , c("orig", "V1") := list("original", 0)]) + , c("orig", "V1") := list("original", 0) + ]) create_missing_for_simulations <- table_results[ - unified_int %in% missing_in_simulations] + unified_int %in% missing_in_simulations + ] create_missing_for_simulations <- unique( create_missing_for_simulations[, c("orig", "V1") := list( - "simulations", 0)]) + "simulations", 0 + )] + ) table_results <- do.call( "rbind", - list(table_results, create_missing_for_original, - create_missing_for_simulations)) + list( + table_results, create_missing_for_original, + create_missing_for_simulations + ) + ) ## p-values @@ -264,9 +293,9 @@ cellProximityEnrichment <- function(gobject, } p_orig_higher <- 1 - (sum((orig_value + 1) > (sim_values + 1)) / - number_of_simulations) + number_of_simulations) p_orig_lower <- 1 - (sum((orig_value + 1) < (sim_values + 1)) / - number_of_simulations) + number_of_simulations) combo_list[[int_combo]] <- this_combo p_high[[int_combo]] <- p_orig_higher @@ -275,21 +304,26 @@ cellProximityEnrichment <- function(gobject, res_pvalue_DT <- data.table::data.table( unified_int = as.vector(combo_list), p_higher_orig = p_high, - p_lower_orig = p_low) + p_lower_orig = p_low + ) # depletion or enrichment in barplot format table_mean_results <- table_results[, .(mean(V1)), by = c( - "orig", "unified_int", "type_int")] + "orig", "unified_int", "type_int" + )] table_mean_results_dc <- data.table::dcast.data.table( data = table_mean_results, formula = type_int + unified_int ~ orig, - value.var = "V1") + value.var = "V1" + ) table_mean_results_dc[, original := ifelse(is.na(original), 0, original)] table_mean_results_dc[, enrichm := log2((original + 1) / (simulations + 1))] table_mean_results_dc <- merge( - table_mean_results_dc, res_pvalue_DT, by = "unified_int") + table_mean_results_dc, res_pvalue_DT, + by = "unified_int" + ) data.table::setorder(table_mean_results_dc, enrichm) table_mean_results_dc[, unified_int := factor(unified_int, unified_int)] @@ -300,9 +334,13 @@ cellProximityEnrichment <- function(gobject, PI_value <- int_ranking <- NULL table_mean_results_dc[, p.adj_higher := stats::p.adjust( - p_higher_orig, method = sel_adjust_method)] + p_higher_orig, + method = sel_adjust_method + )] table_mean_results_dc[, p.adj_lower := stats::p.adjust( - p_lower_orig, method = sel_adjust_method)] + p_lower_orig, + method = sel_adjust_method + )] table_mean_results_dc[, PI_value := ifelse(p.adj_higher <= p.adj_lower, @@ -315,8 +353,10 @@ cellProximityEnrichment <- function(gobject, table_mean_results_dc <- table_mean_results_dc[order(-PI_value)] table_mean_results_dc[, int_ranking := seq_len(.N)] - return(list(raw_sim_table = table_results, - enrichm_res = table_mean_results_dc)) + return(list( + raw_sim_table = table_results, + enrichm_res = table_mean_results_dc + )) } @@ -345,17 +385,20 @@ cellProximityEnrichment <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' addCellIntMetadata(g, cluster_column = "leiden_clus", -#' cell_interaction = "custom_leiden") +#' addCellIntMetadata(g, +#' cluster_column = "leiden_clus", +#' cell_interaction = "custom_leiden" +#' ) #' @export -addCellIntMetadata <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network = "spatial_network", - cluster_column, - cell_interaction, - name = "select_int", - return_gobject = TRUE) { +addCellIntMetadata <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network = "spatial_network", + cluster_column, + cell_interaction, + name = "select_int", + return_gobject = TRUE) { # set spatial unit and feature type spat_unit <- set_default_spat_unit( gobject = gobject, @@ -413,16 +456,18 @@ addCellIntMetadata <- function(gobject, cell_type_2 <- strsplit(cell_interaction, split = "--")[[1]][2] cell_metadata[][, c(name) := ifelse(!get(cluster_column) %in% c( - cell_type_1, cell_type_2), "other", - ifelse(get(cluster_column) == cell_type_1 & cell_ID %in% selected_cells, - paste0("select_", cell_type_1), - ifelse(get(cluster_column) == cell_type_2 & cell_ID %in% - selected_cells, paste0("select_", cell_type_2), - ifelse(get(cluster_column) == cell_type_1, - paste0("other_", cell_type_1), - paste0("other_", cell_type_2)) - ) + cell_type_1, cell_type_2 + ), "other", + ifelse(get(cluster_column) == cell_type_1 & cell_ID %in% selected_cells, + paste0("select_", cell_type_1), + ifelse(get(cluster_column) == cell_type_2 & cell_ID %in% + selected_cells, paste0("select_", cell_type_2), + ifelse(get(cluster_column) == cell_type_1, + paste0("other_", cell_type_1), + paste0("other_", cell_type_2) + ) ) + ) )] if (return_gobject == TRUE) { @@ -435,7 +480,8 @@ addCellIntMetadata <- function(gobject, ## update parameters used ## gobject <- update_giotto_params(gobject, - description = "_add_cell_int_info") + description = "_add_cell_int_info" + ) return(gobject) } else { @@ -460,21 +506,26 @@ NULL #' @describeIn cell_proximity_tests t.test #' @keywords internal -.do_ttest <- function(expr_values, - select_ind, - other_ind, - adjust_method, - mean_method, - offset = 0.1) { +.do_ttest <- function( + expr_values, + select_ind, + other_ind, + adjust_method, + mean_method, + offset = 0.1) { vmsg(.is_debug = TRUE, ".do_ttest") # data.table variables p.value <- p.adj <- NULL mean_sel <- my_rowMeans( - expr_values[, select_ind], method = mean_method, offset = offset) + expr_values[, select_ind], + method = mean_method, offset = offset + ) mean_all <- my_rowMeans( - expr_values[, other_ind], method = mean_method, offset = offset) + expr_values[, other_ind], + method = mean_method, offset = offset + ) if (length(select_ind) == 1 | length(other_ind) == 1) { results <- NaN @@ -492,7 +543,8 @@ NULL "feats" = rownames(expr_values), "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff, - "p.value" = unlist(results)) + "p.value" = unlist(results) + ) resultsDT[, p.value := ifelse(is.nan(p.value), 1, p.value)] resultsDT[, p.adj := stats::p.adjust(p.value, method = adjust_method)] setorder(resultsDT, p.adj) @@ -505,20 +557,24 @@ NULL #' @describeIn cell_proximity_tests limma t.test #' @keywords internal -.do_limmatest <- function(expr_values, - select_ind, - other_ind, - mean_method, - offset = 0.1) { +.do_limmatest <- function( + expr_values, + select_ind, + other_ind, + mean_method, + offset = 0.1) { vmsg(.is_debug = TRUE, ".do_limmatest") # data.table variables sel <- other <- feats <- P.Value <- adj.P.Val <- p.adj <- NULL expr_values_subset <- cbind( - expr_values[, select_ind], expr_values[, other_ind]) - mygroups <- c(rep("sel", length(select_ind)), - rep("other", length(other_ind))) + expr_values[, select_ind], expr_values[, other_ind] + ) + mygroups <- c( + rep("sel", length(select_ind)), + rep("other", length(other_ind)) + ) mygroups <- factor(mygroups, levels = unique(mygroups)) design <- stats::model.matrix(~ 0 + mygroups) @@ -536,15 +592,21 @@ NULL # limma to DT limma_result <- limma::topTable( - fitc_ebayes, coef = 1, number = 100000, confint = TRUE) + fitc_ebayes, + coef = 1, number = 100000, confint = TRUE + ) limmaDT <- data.table::as.data.table(limma_result) limmaDT[, feats := rownames(limma_result)] # other info mean_sel <- my_rowMeans( - expr_values[, select_ind], method = mean_method, offset = offset) + expr_values[, select_ind], + method = mean_method, offset = offset + ) mean_all <- my_rowMeans( - expr_values[, other_ind], method = mean_method, offset = offset) + expr_values[, other_ind], + method = mean_method, offset = offset + ) log2fc <- log2((mean_sel + offset) / (mean_all + offset)) diff <- mean_sel - mean_all @@ -558,9 +620,12 @@ NULL ) limmaDT <- data.table::merge.data.table(limmaDT, tempDT, by = "feats") limmaDT <- limmaDT[ - , .(feats, sel, other, log2fc, diff, P.Value, adj.P.Val)] - colnames(limmaDT) <- c("feats", "sel", "other", "log2fc", "diff", - "p.value", "p.adj") + , .(feats, sel, other, log2fc, diff, P.Value, adj.P.Val) + ] + colnames(limmaDT) <- c( + "feats", "sel", "other", "log2fc", "diff", + "p.value", "p.adj" + ) setorder(limmaDT, p.adj) @@ -572,21 +637,26 @@ NULL #' @describeIn cell_proximity_tests wilcoxon #' @keywords internal -.do_wilctest <- function(expr_values, - select_ind, - other_ind, - adjust_method, - mean_method, - offset = 0.1) { +.do_wilctest <- function( + expr_values, + select_ind, + other_ind, + adjust_method, + mean_method, + offset = 0.1) { vmsg(.is_debug = TRUE, ".do_wilctest") # data.table variables p.value <- p.adj <- NULL mean_sel <- my_rowMeans( - expr_values[, select_ind], method = mean_method, offset = offset) + expr_values[, select_ind], + method = mean_method, offset = offset + ) mean_all <- my_rowMeans( - expr_values[, other_ind], method = mean_method, offset = offset) + expr_values[, other_ind], + method = mean_method, offset = offset + ) if (length(select_ind) == 1 | length(other_ind) == 1) { results <- NaN @@ -606,7 +676,8 @@ NULL "other" = mean_all, "log2fc" = log2fc, "diff" = diff, - "p.value" = unlist(results)) + "p.value" = unlist(results) + ) resultsDT[, p.value := ifelse(is.nan(p.value), 1, p.value)] resultsDT[, p.adj := stats::p.adjust(p.value, method = adjust_method)] setorder(resultsDT, p.adj) @@ -616,25 +687,29 @@ NULL # calculate original values -.do_permuttest_original <- function(expr_values, - select_ind, - other_ind, - name = "orig", - mean_method, - offset = 0.1) { +.do_permuttest_original <- function( + expr_values, + select_ind, + other_ind, + name = "orig", + mean_method, + offset = 0.1) { # data.table variables feats <- NULL mean_sel <- my_rowMeans(expr_values[ - , select_ind], method = mean_method, offset = offset) + , select_ind + ], method = mean_method, offset = offset) mean_all <- my_rowMeans(expr_values[ - , other_ind], method = mean_method, offset = offset) + , other_ind + ], method = mean_method, offset = offset) log2fc <- log2((mean_sel + offset) / (mean_all + offset)) diff <- mean_sel - mean_all resultsDT <- data.table( - "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff) + "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff + ) resultsDT[, feats := rownames(expr_values)] resultsDT[, name := name] @@ -644,14 +719,15 @@ NULL # calculate random values -.do_permuttest_random <- function(expr_values, - select_ind, - other_ind, - name = "perm_1", - mean_method, - offset = 0.1, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest_random <- function( + expr_values, + select_ind, + other_ind, + name = "perm_1", + mean_method, + offset = 0.1, + set_seed = TRUE, + seed_number = 1234) { # data.table variables feats <- NULL @@ -668,15 +744,20 @@ NULL # alternative mean_sel <- my_rowMeans( - expr_values[, random_select], method = mean_method, offset = offset) + expr_values[, random_select], + method = mean_method, offset = offset + ) mean_all <- my_rowMeans( - expr_values[, random_other], method = mean_method, offset = offset) + expr_values[, random_other], + method = mean_method, offset = offset + ) log2fc <- log2((mean_sel + offset) / (mean_all + offset)) diff <- mean_sel - mean_all resultsDT <- data.table( - "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff) + "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff + ) resultsDT[, feats := rownames(expr_values)] resultsDT[, name := name] @@ -687,14 +768,15 @@ NULL # calculate multiple random values -.do_multi_permuttest_random <- function(expr_values, - select_ind, - other_ind, - mean_method, - offset = 0.1, - n = 100, - set_seed = TRUE, - seed_number = 1234) { +.do_multi_permuttest_random <- function( + expr_values, + select_ind, + other_ind, + mean_method, + offset = 0.1, + n = 100, + set_seed = TRUE, + seed_number = 1234) { if (set_seed == TRUE) { seed_number_list <- seed_number:(seed_number + (n - 1)) } @@ -720,14 +802,15 @@ NULL #' @describeIn cell_proximity_tests random permutation #' @keywords internal -.do_permuttest <- function(expr_values, - select_ind, other_ind, - n_perm = 1000, - adjust_method = "fdr", - mean_method, - offset = 0.1, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest <- function( + expr_values, + select_ind, other_ind, + n_perm = 1000, + adjust_method = "fdr", + mean_method, + offset = 0.1, + set_seed = TRUE, + seed_number = 1234) { # data.table variables log2fc_diff <- log2fc <- sel <- other <- feats <- p_higher <- p_lower <- perm_sel <- NULL @@ -759,9 +842,11 @@ NULL ## random_perms[, log2fc_diff := rep(original$log2fc, n_perm) - log2fc] random_perms[, - c("perm_sel", "perm_other", "perm_log2fc", "perm_diff") := list( - mean(sel), mean(other), mean(log2fc), mean(diff)), - by = feats] + c("perm_sel", "perm_other", "perm_log2fc", "perm_diff") := list( + mean(sel), mean(other), mean(log2fc), mean(diff) + ), + by = feats + ] ## get p-values random_perms[, p_higher := sum(log2fc_diff > 0), by = feats] @@ -771,19 +856,26 @@ NULL ## combine results permutation and original random_perms_res <- unique(random_perms[ - , .(feats, perm_sel, perm_other, perm_log2fc, perm_diff, p_higher, - p_lower)]) + , .( + feats, perm_sel, perm_other, perm_log2fc, perm_diff, p_higher, + p_lower + ) + ]) results_m <- data.table::merge.data.table( random_perms_res, original[, .(feats, sel, other, log2fc, diff)], - by = "feats") + by = "feats" + ) # select lowest p-value and perform p.adj results_m[, p.value := ifelse(p_higher <= p_lower, p_higher, p_lower)] results_m[, p.adj := stats::p.adjust(p.value, method = adjust_method)] results_m <- results_m[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, perm_sel, - perm_other, perm_log2fc, perm_diff)] + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, perm_sel, + perm_other, perm_log2fc, perm_diff + ) + ] setorder(results_m, p.adj, -log2fc) return(results_m) @@ -798,22 +890,25 @@ NULL #' @returns differential test on subsets of a matrix #' @keywords internal #' @seealso [cell_proximity_tests] -.do_cell_proximity_test <- function(expr_values, - select_ind, other_ind, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - n_perm = 100, - adjust_method = c( - "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "fdr", "none" - ), - set_seed = TRUE, - seed_number = 1234, - verbose = FALSE) { +.do_cell_proximity_test <- function( + expr_values, + select_ind, other_ind, + diff_test = c("permutation", "limma", "t.test", "wilcox"), + mean_method = c("arithmic", "geometric"), + offset = 0.1, + n_perm = 100, + adjust_method = c( + "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "fdr", "none" + ), + set_seed = TRUE, + seed_number = 1234, + verbose = FALSE) { # get parameters diff_test <- match.arg( - diff_test, choices = c("permutation", "limma", "t.test", "wilcox")) + diff_test, + choices = c("permutation", "limma", "t.test", "wilcox") + ) adjust_method <- match.arg(adjust_method, choices = c( "bonferroni", "BH", "holm", "hochberg", "hommel", "BY", "fdr", "none" @@ -867,21 +962,22 @@ NULL #' @returns data.table #' @keywords internal #' @seealso [.do_cell_proximity_test()] for specific tests -.findCellProximityFeats_per_interaction <- function(sel_int, - expr_values, - cell_metadata, - annot_spatnetwork, - cluster_column = NULL, - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - exclude_selected_cells_from_test = TRUE, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - adjust_method = "bonferroni", - nr_permutations = 100, - set_seed = TRUE, - seed_number = 1234) { +.findCellProximityFeats_per_interaction <- function( + sel_int, + expr_values, + cell_metadata, + annot_spatnetwork, + cluster_column = NULL, + minimum_unique_cells = 1, + minimum_unique_int_cells = 1, + exclude_selected_cells_from_test = TRUE, + diff_test = c("permutation", "limma", "t.test", "wilcox"), + mean_method = c("arithmic", "geometric"), + offset = 0.1, + adjust_method = "bonferroni", + nr_permutations = 100, + set_seed = TRUE, + seed_number = 1234) { # data.table variables unified_int <- to_cell_type <- from_cell_type <- cell_type <- int_cell_type <- NULL @@ -890,14 +986,16 @@ NULL # select test to perform diff_test <- match.arg( arg = diff_test, - choices = c("permutation", "limma", "t.test", "wilcox")) + choices = c("permutation", "limma", "t.test", "wilcox") + ) # select subnetwork sub_spatnetwork <- annot_spatnetwork[unified_int == sel_int] # unique cell types unique_cell_types <- unique( - c(sub_spatnetwork$to_cell_type, sub_spatnetwork$from_cell_type)) + c(sub_spatnetwork$to_cell_type, sub_spatnetwork$from_cell_type) + ) if (length(unique_cell_types) == 2) { first_cell_type <- unique_cell_types[1] @@ -915,9 +1013,11 @@ NULL ## all cell ids all_cell1 <- cell_metadata[get(cluster_column) == first_cell_type][[ - "cell_ID"]] + "cell_ID" + ]] all_cell2 <- cell_metadata[get(cluster_column) == second_cell_type][[ - "cell_ID"]] + "cell_ID" + ]] ## exclude selected if (exclude_selected_cells_from_test == TRUE) { @@ -1004,7 +1104,8 @@ NULL ## all cell ids all_cell1 <- cell_metadata[get(cluster_column) == first_cell_type][[ - "cell_ID"]] + "cell_ID" + ]] ## exclude selected if (exclude_selected_cells_from_test == TRUE) { @@ -1081,10 +1182,10 @@ NULL #' - at least - the following columns: #' \itemize{ #' * features: All or selected list of tested features -#' * sel: average feature expression in the interacting cells from the target -#' cell type -#' * other: average feature expression in the NOT-interacting cells from the -#' target cell type +#' * sel: average feature expression in the interacting cells from the target +#' cell type +#' * other: average feature expression in the NOT-interacting cells from the +#' target cell type #' * log2fc: log2 fold-change between sel and other #' * diff: spatial expression difference between sel and other #' * p.value: associated p-value @@ -1100,30 +1201,33 @@ NULL #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) #' @export -findInteractionChangedFeats <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = "normalized", - selected_feats = NULL, - cluster_column, - spatial_network_name = "Delaunay_network", - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - adjust_method = c( - "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "fdr", "none" - ), - nr_permutations = 1000, - exclude_selected_cells_from_test = TRUE, - do_parallel = TRUE, - set_seed = TRUE, - seed_number = 1234) { +findInteractionChangedFeats <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = "normalized", + selected_feats = NULL, + cluster_column, + spatial_network_name = "Delaunay_network", + minimum_unique_cells = 1, + minimum_unique_int_cells = 1, + diff_test = c("permutation", "limma", "t.test", "wilcox"), + mean_method = c("arithmic", "geometric"), + offset = 0.1, + adjust_method = c( + "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "fdr", "none" + ), + nr_permutations = 1000, + exclude_selected_cells_from_test = TRUE, + do_parallel = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1138,7 +1242,8 @@ findInteractionChangedFeats <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1156,7 +1261,9 @@ findInteractionChangedFeats <- function(gobject, # difference test diff_test <- match.arg( - diff_test, choices = c("permutation", "limma", "t.test", "wilcox")) + diff_test, + choices = c("permutation", "limma", "t.test", "wilcox") + ) # p.adj test adjust_method <- match.arg(adjust_method, choices = c( @@ -1184,25 +1291,25 @@ findInteractionChangedFeats <- function(gobject, if (do_parallel == TRUE) { fin_result <- lapply_flex( X = all_interactions, future.seed = TRUE, FUN = function(x) { - - tempres <- .findCellProximityFeats_per_interaction( - expr_values = expr_values, - cell_metadata = cell_metadata, - annot_spatnetwork = annot_spatnetwork, - minimum_unique_cells = minimum_unique_cells, - minimum_unique_int_cells = minimum_unique_int_cells, - sel_int = x, - cluster_column = cluster_column, - exclude_selected_cells_from_test = exclude_selected_cells_from_test, - diff_test = diff_test, - mean_method = mean_method, - offset = offset, - adjust_method = adjust_method, - nr_permutations = nr_permutations, - set_seed = set_seed, - seed_number = seed_number - ) - }) + tempres <- .findCellProximityFeats_per_interaction( + expr_values = expr_values, + cell_metadata = cell_metadata, + annot_spatnetwork = annot_spatnetwork, + minimum_unique_cells = minimum_unique_cells, + minimum_unique_int_cells = minimum_unique_int_cells, + sel_int = x, + cluster_column = cluster_column, + exclude_selected_cells_from_test = exclude_selected_cells_from_test, + diff_test = diff_test, + mean_method = mean_method, + offset = offset, + adjust_method = adjust_method, + nr_permutations = nr_permutations, + set_seed = set_seed, + seed_number = seed_number + ) + } + ) } else { fin_result <- list() @@ -1241,13 +1348,15 @@ findInteractionChangedFeats <- function(gobject, final_result[, spec_int := paste0(cell_type, "--", int_cell_type)] final_result[, type_int := ifelse( - cell_type == int_cell_type, "homo", "hetero")] + cell_type == int_cell_type, "homo", "hetero" + )] # return(final_result) permutation_test <- ifelse( - diff_test == "permutation", nr_permutations, "no permutations") + diff_test == "permutation", nr_permutations, "no permutations" + ) icfObject <- list( ICFscores = final_result, @@ -1338,10 +1447,10 @@ findCellProximityGenes <- function(...) { #' - at least - the following columns: #' \itemize{ #' * features: All or selected list of tested features -#' * sel: average feature expression in the interacting cells from the target -#' cell type -#' * other: average feature expression in the NOT-interacting cells from the -#' target cell type +#' * sel: average feature expression in the interacting cells from the target +#' cell type +#' * other: average feature expression in the NOT-interacting cells from the +#' target cell type #' * log2fc: log2 fold-change between sel and other #' * diff: spatial expression difference between sel and other #' * p.value: associated p-value @@ -1358,30 +1467,33 @@ findCellProximityGenes <- function(...) { #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' findICF(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' findICF(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) #' @export -findICF <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = "normalized", - selected_feats = NULL, - cluster_column, - spatial_network_name = "Delaunay_network", - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - adjust_method = c( - "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "fdr", "none" - ), - nr_permutations = 100, - exclude_selected_cells_from_test = TRUE, - do_parallel = TRUE, - set_seed = TRUE, - seed_number = 1234) { +findICF <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = "normalized", + selected_feats = NULL, + cluster_column, + spatial_network_name = "Delaunay_network", + minimum_unique_cells = 1, + minimum_unique_int_cells = 1, + diff_test = c("permutation", "limma", "t.test", "wilcox"), + mean_method = c("arithmic", "geometric"), + offset = 0.1, + adjust_method = c( + "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "fdr", "none" + ), + nr_permutations = 100, + exclude_selected_cells_from_test = TRUE, + do_parallel = TRUE, + set_seed = TRUE, + seed_number = 1234) { findInteractionChangedFeats( gobject = gobject, feat_type = feat_type, @@ -1459,17 +1571,18 @@ findCPG <- function(...) { #' @param direction differential expression directions to keep #' @returns icfObject that contains the filtered differential feature scores #' @export -filterInteractionChangedFeats <- function(icfObject, - min_cells = 4, - min_cells_expr = 1, - min_int_cells = 4, - min_int_cells_expr = 1, - min_fdr = 0.1, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down")) { +filterInteractionChangedFeats <- function( + icfObject, + min_cells = 4, + min_cells_expr = 1, + min_int_cells = 4, + min_int_cells_expr = 1, + min_fdr = 0.1, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down")) { # data.table variables nr_select <- int_nr_select <- zscores <- log2fc <- sel <- other <- p.adj <- NULL @@ -1480,7 +1593,9 @@ filterInteractionChangedFeats <- function(icfObject, } zscores_column <- match.arg( - zscores_column, choices = c("cell_type", "feats")) + zscores_column, + choices = c("cell_type", "feats") + ) ICFscore <- copy(icfObject[["ICFscores"]]) @@ -1491,7 +1606,7 @@ filterInteractionChangedFeats <- function(icfObject, ## sequential filter steps ## # 1. minimum number of source and target cells selection_scores <- ICFscore[nr_select >= min_cells & - int_nr_select >= min_int_cells] + int_nr_select >= min_int_cells] # 2. create z-scores for log2fc per cell type selection_scores[, zscores := scale(log2fc), by = c(zscores_column)] @@ -1499,12 +1614,12 @@ filterInteractionChangedFeats <- function(icfObject, # 3. filter based on z-scores and minimum levels comb_DT <- rbind( selection_scores[zscores >= min_zscore & - abs(diff) >= min_spat_diff & - log2fc >= min_log2_fc & sel >= min_cells_expr], + abs(diff) >= min_spat_diff & + log2fc >= min_log2_fc & sel >= min_cells_expr], selection_scores[zscores <= -min_zscore & - abs(diff) >= min_spat_diff & - log2fc <= -min_log2_fc & - other >= min_int_cells_expr] + abs(diff) >= min_spat_diff & + log2fc <= -min_log2_fc & + other >= min_int_cells_expr] ) # 4. filter based on adjusted p-value (fdr) @@ -1581,17 +1696,18 @@ filterCellProximityGenes <- function(...) { #' #' filterICF(g_icf) #' @export -filterICF <- function(icfObject, - min_cells = 4, - min_cells_expr = 1, - min_int_cells = 4, - min_int_cells_expr = 1, - min_fdr = 0.1, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down")) { +filterICF <- function( + icfObject, + min_cells = 4, + min_cells_expr = 1, + min_int_cells = 4, + min_int_cells_expr = 1, + min_fdr = 0.1, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down")) { filterInteractionChangedFeats( icfObject = icfObject, min_cells = min_cells, @@ -1649,16 +1765,17 @@ filterCPG <- function(...) { #' @description Combine ICF scores per interaction #' @returns data.table #' @keywords internal -.combineInteractionChangedFeatures_per_interaction <- function(icfObject, - sel_int, - selected_feats = NULL, - specific_feats_1 = NULL, - specific_feats_2 = NULL, - min_cells = 5, - min_int_cells = 3, - min_fdr = 0.05, - min_spat_diff = 0, - min_log2_fc = 0.5) { +.combineInteractionChangedFeatures_per_interaction <- function( + icfObject, + sel_int, + selected_feats = NULL, + specific_feats_1 = NULL, + specific_feats_2 = NULL, + min_cells = 5, + min_int_cells = 3, + min_fdr = 0.05, + min_spat_diff = 0, + min_log2_fc = 0.5) { # data.table variables unif_int <- feats <- cell_type <- p.adj <- nr_select <- int_nr_select <- log2fc <- sel <- NULL @@ -1770,16 +1887,23 @@ filterCPG <- function(...) { } else { # make it specific subset_cell_1 <- subset_cell_1[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, cell_type, int_cell_type, nr_select, nr_other, - unif_int)] + unif_int + ) + ] data.table::setnames(subset_cell_1, - old = c("feats", "sel", "other", "log2fc", "diff", - "p.value", "p.adj", "cell_type", "int_cell_type", - "nr_select", "nr_other"), - new = c("feats_1", "sel_1", "other_1", "log2fc_1", - "diff_1", "p.value_1", "p.adj_1", "cell_type_1", - "int_cell_type_1", "nr_select_1", "nr_other_1") + old = c( + "feats", "sel", "other", "log2fc", "diff", + "p.value", "p.adj", "cell_type", "int_cell_type", + "nr_select", "nr_other" + ), + new = c( + "feats_1", "sel_1", "other_1", "log2fc_1", + "diff_1", "p.value_1", "p.adj_1", "cell_type_1", + "int_cell_type_1", "nr_select_1", "nr_other_1" + ) ) } } @@ -1858,23 +1982,32 @@ filterCPG <- function(...) { ) } else { subset_cell_2 <- subset_cell_2[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, cell_type, int_cell_type, nr_select, nr_other, - unif_int)] + unif_int + ) + ] data.table::setnames(subset_cell_2, - old = c("feats", "sel", "other", "log2fc", "diff", - "p.value", "p.adj", "cell_type", "int_cell_type", - "nr_select", "nr_other"), - new = c("feats_2", "sel_2", "other_2", "log2fc_2", - "diff_2", "p.value_2", "p.adj_2", "cell_type_2", - "int_cell_type_2", "nr_select_2", "nr_other_2") + old = c( + "feats", "sel", "other", "log2fc", "diff", + "p.value", "p.adj", "cell_type", "int_cell_type", + "nr_select", "nr_other" + ), + new = c( + "feats_2", "sel_2", "other_2", "log2fc_2", + "diff_2", "p.value_2", "p.adj_2", "cell_type_2", + "int_cell_type_2", "nr_select_2", "nr_other_2" + ) ) } } merge_subsets <- data.table::merge.data.table( - subset_cell_1, subset_cell_2, by = c("unif_int"), - allow.cartesian = TRUE) + subset_cell_1, subset_cell_2, + by = c("unif_int"), + allow.cartesian = TRUE + ) } else if (length(unique_cell_types) == 1) { ## CELL TYPE 1 subset_cell_1 <- subset[cell_type == unique_cell_types[1]] @@ -1912,15 +2045,22 @@ filterCPG <- function(...) { ) } else { subset_cell_1A <- subset_cell_1[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, - cell_type, int_cell_type, nr_select, nr_other, unif_int)] + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, + cell_type, int_cell_type, nr_select, nr_other, unif_int + ) + ] data.table::setnames(subset_cell_1A, - old = c("feats", "sel", "other", "log2fc", "diff", "p.value", - "p.adj", "cell_type", "int_cell_type", "nr_select", - "nr_other"), - new = c("feats_1", "sel_1", "other_1", "log2fc_1", "diff_1", - "p.value_1", "p.adj_1", "cell_type_1", - "int_cell_type_1", "nr_select_1", "nr_other_1") + old = c( + "feats", "sel", "other", "log2fc", "diff", "p.value", + "p.adj", "cell_type", "int_cell_type", "nr_select", + "nr_other" + ), + new = c( + "feats_1", "sel_1", "other_1", "log2fc_1", "diff_1", + "p.value_1", "p.adj_1", "cell_type_1", + "int_cell_type_1", "nr_select_1", "nr_other_1" + ) ) } @@ -1951,21 +2091,30 @@ filterCPG <- function(...) { ) } else { subset_cell_1B <- subset_cell_1[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, - cell_type, int_cell_type, nr_select, nr_other, unif_int)] + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, + cell_type, int_cell_type, nr_select, nr_other, unif_int + ) + ] data.table::setnames(subset_cell_1B, - old = c("feats", "sel", "other", "log2fc", "diff", "p.value", - "p.adj", "cell_type", "int_cell_type", "nr_select", - "nr_other"), - new = c("feats_2", "sel_2", "other_2", "log2fc_2", "diff_2", - "p.value_2", "p.adj_2", "cell_type_2", - "int_cell_type_2", "nr_select_2", "nr_other_2") + old = c( + "feats", "sel", "other", "log2fc", "diff", "p.value", + "p.adj", "cell_type", "int_cell_type", "nr_select", + "nr_other" + ), + new = c( + "feats_2", "sel_2", "other_2", "log2fc_2", "diff_2", + "p.value_2", "p.adj_2", "cell_type_2", + "int_cell_type_2", "nr_select_2", "nr_other_2" + ) ) } merge_subsets <- data.table::merge.data.table( - subset_cell_1A, subset_cell_1B, by = c("unif_int"), - allow.cartesian = TRUE) + subset_cell_1A, subset_cell_1B, + by = c("unif_int"), + allow.cartesian = TRUE + ) } # restrict to feature combinations if needed @@ -2005,23 +2154,25 @@ filterCPG <- function(...) { #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' g_icf <- findInteractionChangedFeats(g, -#' cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) #' #' combineInteractionChangedFeats(g_icf) #' @export -combineInteractionChangedFeats <- function(icfObject, - selected_ints = NULL, - selected_feats = NULL, - specific_feats_1 = NULL, - specific_feats_2 = NULL, - min_cells = 5, - min_int_cells = 3, - min_fdr = 0.05, - min_spat_diff = 0, - min_log2_fc = 0.5, - do_parallel = TRUE, - verbose = TRUE) { +combineInteractionChangedFeats <- function( + icfObject, + selected_ints = NULL, + selected_feats = NULL, + specific_feats_1 = NULL, + specific_feats_2 = NULL, + min_cells = 5, + min_int_cells = 3, + min_fdr = 0.05, + min_spat_diff = 0, + min_log2_fc = 0.5, + do_parallel = TRUE, + verbose = TRUE) { # data.table variables unif_int <- feat1_feat2 <- feats_1 <- feats_2 <- comb_logfc <- log2fc_1 <- log2fc_2 <- direction <- NULL @@ -2107,9 +2258,11 @@ combineInteractionChangedFeats <- function(icfObject, "p.adj" = icfObject[["test_info"]][["p.adj"]], "min cells" = icfObject[["test_info"]][["min cells"]], "min interacting cells" = icfObject[["test_info"]][[ - "min interacting cells"]], + "min interacting cells" + ]], "exclude selected cells" = icfObject[["test_info"]][[ - "exclude selected cells"]], + "exclude selected cells" + ]], "perm" = icfObject[["test_info"]][["perm"]] ) ) @@ -2165,23 +2318,26 @@ combineCellProximityGenes <- function(...) { #' @returns icfObject that contains the filtered differential feats scores #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' g_icf <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' g_icf <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) #' #' combineICF(g_icf) #' @export -combineICF <- function(icfObject, - selected_ints = NULL, - selected_feats = NULL, - specific_feats_1 = NULL, - specific_feats_2 = NULL, - min_cells = 5, - min_int_cells = 3, - min_fdr = 0.05, - min_spat_diff = 0, - min_log2_fc = 0.5, - do_parallel = TRUE, - verbose = TRUE) { +combineICF <- function( + icfObject, + selected_ints = NULL, + selected_feats = NULL, + specific_feats_1 = NULL, + specific_feats_2 = NULL, + min_cells = 5, + min_int_cells = 3, + min_fdr = 0.05, + min_spat_diff = 0, + min_log2_fc = 0.5, + do_parallel = TRUE, + verbose = TRUE) { combineInteractionChangedFeats( icfObject = icfObject, selected_ints = selected_ints, @@ -2243,12 +2399,13 @@ combineCPG <- function(...) { #' @param feat_set_2 second specific feat set from feat pairs #' @returns data.table with average expression scores for each cluster #' @keywords internal -average_feat_feat_expression_in_groups <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - cluster_column = "cell_types", - feat_set_1, - feat_set_2) { +average_feat_feat_expression_in_groups <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + cluster_column = "cell_types", + feat_set_1, + feat_set_2) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2269,7 +2426,8 @@ average_feat_feat_expression_in_groups <- function(gobject, # change column names back to original new_colnames <- gsub( - pattern = "cluster_", replacement = "", colnames(average_DT)) + pattern = "cluster_", replacement = "", colnames(average_DT) + ) colnames(average_DT) <- new_colnames # keep order of colnames @@ -2289,9 +2447,13 @@ average_feat_feat_expression_in_groups <- function(gobject, # get ligand and receptor information ligand_match <- average_DT[ - match(feat_set_1, rownames(average_DT)), , drop = FALSE] + match(feat_set_1, rownames(average_DT)), , + drop = FALSE + ] receptor_match <- average_DT[ - match(feat_set_2, rownames(average_DT)), , drop = FALSE] + match(feat_set_2, rownames(average_DT)), , + drop = FALSE + ] # data.table variables ligand <- LR_comb <- receptor <- LR_expr <- lig_expr <- rec_expr <- @@ -2299,33 +2461,44 @@ average_feat_feat_expression_in_groups <- function(gobject, all_ligand_cols <- colnames(ligand_match) lig_test <- data.table::as.data.table( - reshape2::melt(ligand_match, measure.vars = all_ligand_cols)) + reshape2::melt(ligand_match, measure.vars = all_ligand_cols) + ) lig_test[, ligand := rep(rownames(ligand_match), ncol(ligand_match))] lig_test[, ligand := strsplit(ligand, "\\.")[[1]][1], - by = seq_len(nrow(lig_test))] + by = seq_len(nrow(lig_test)) + ] lig_test[, LR_comb := rep(LR_pairs, ncol(ligand_match))] setnames(lig_test, "value", "lig_expr") setnames(lig_test, "variable", "lig_cell_type") all_receptor_cols <- colnames(receptor_match) rec_test <- data.table::as.data.table(reshape2::melt( - receptor_match, measure.vars = all_receptor_cols)) + receptor_match, + measure.vars = all_receptor_cols + )) rec_test[, receptor := rep(rownames(receptor_match), ncol(receptor_match))] rec_test[, receptor := strsplit( - receptor, "\\.")[[1]][1], by = seq_len(nrow(rec_test))] + receptor, "\\." + )[[1]][1], by = seq_len(nrow(rec_test))] rec_test[, LR_comb := rep(LR_pairs, ncol(receptor_match))] setnames(rec_test, "value", "rec_expr") setnames(rec_test, "variable", "rec_cell_type") lig_rec_test <- merge( - lig_test, rec_test, by = "LR_comb", allow.cartesian = TRUE) + lig_test, rec_test, + by = "LR_comb", allow.cartesian = TRUE + ) lig_rec_test[, LR_expr := lig_expr + rec_expr] lig_rec_test[, lig_cell_type := factor( - lig_cell_type, levels = colnames_order)] + lig_cell_type, + levels = colnames_order + )] lig_rec_test[, rec_cell_type := factor( - rec_cell_type, levels = colnames_order)] + rec_cell_type, + levels = colnames_order + )] setorder(lig_rec_test, LR_comb, lig_cell_type, rec_cell_type) return(lig_rec_test) @@ -2361,26 +2534,29 @@ average_feat_feat_expression_in_groups <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") +#' exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) #' @export -exprCellCellcom <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - cluster_column = "cell_types", - random_iter = 1000, - feat_set_1, - feat_set_2, - log2FC_addendum = 0.1, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("feats", "cells"), - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE) { +exprCellCellcom <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + cluster_column = "cell_types", + random_iter = 1000, + feat_set_1, + feat_set_2, + log2FC_addendum = 0.1, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("feats", "cells"), + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2518,10 +2694,12 @@ exprCellCellcom <- function(gobject, if (adjust_target == "feats") { comScore[, p.adj := stats::p.adjust(pvalue, method = adjust_method), - by = .(LR_cell_comb)] + by = .(LR_cell_comb) + ] } else if (adjust_target == "cells") { comScore[, p.adj := stats::p.adjust(pvalue, method = adjust_method), - by = .(LR_comb)] + by = .(LR_comb) + ] } @@ -2529,7 +2707,8 @@ exprCellCellcom <- function(gobject, all_p.adj <- comScore[["p.adj"]] lowest_p.adj <- min(all_p.adj[all_p.adj != 0]) comScore[, PI := ifelse(p.adj == 0, log2fc * (-log10(lowest_p.adj)), - log2fc * (-log10(p.adj)))] + log2fc * (-log10(p.adj)) + )] data.table::setorder(comScore, LR_comb, -LR_expr) @@ -2550,13 +2729,14 @@ exprCellCellcom <- function(gobject, #' @param seed_number seed number #' @returns list of randomly sampled cell ids with same cell type composition #' @keywords internal -.create_cell_type_random_cell_IDs <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - cluster_column = "cell_types", - needed_cell_types, - set_seed = FALSE, - seed_number = 1234) { +.create_cell_type_random_cell_IDs <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + cluster_column = "cell_types", + needed_cell_types, + set_seed = FALSE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2574,7 +2754,8 @@ exprCellCellcom <- function(gobject, spat_unit = spat_unit ) possible_metadata <- full_metadata[get(cluster_column) %in% unique( - needed_cell_types)] + needed_cell_types + )] sample_ids <- list() @@ -2583,12 +2764,14 @@ exprCellCellcom <- function(gobject, for (i in seq_along(uniq_types)) { uniq_type <- uniq_types[i] length_random <- length(needed_cell_types[ - needed_cell_types == uniq_type]) + needed_cell_types == uniq_type + ]) if (set_seed == TRUE) { set.seed(seed = seed_number) } sub_sample_ids <- possible_metadata[get(cluster_column) == uniq_type][ - sample(x = seq_len(.N), size = length_random)][["cell_ID"]] + sample(x = seq_len(.N), size = length_random) + ][["cell_ID"]] sample_ids[[i]] <- sub_sample_ids } return(unlist(sample_ids)) @@ -2633,58 +2816,59 @@ exprCellCellcom <- function(gobject, #' proximity to each other. #' \itemize{ #' * LR_comb: Pair of ligand and receptor -#' * lig_cell_type: cell type to assess expression level of ligand -#' * lig_expr: average expression of ligand in lig_cell_type -#' * ligand: ligand name -#' * rec_cell_type: cell type to assess expression level of receptor +#' * lig_cell_type: cell type to assess expression level of ligand +#' * lig_expr: average expression of ligand in lig_cell_type +#' * ligand: ligand name +#' * rec_cell_type: cell type to assess expression level of receptor #' * rec_expr: average expression of receptor in rec_cell_type -#' * receptor: receptor name -#' * LR_expr: combined average ligand and receptor expression -#' * lig_nr: total number of cells from lig_cell_type that spatially interact -#' with cells from rec_cell_type -#' * rec_nr: total number of cells from rec_cell_type that spatially interact -#' with cells from lig_cell_type -#' * rand_expr: average combined ligand and receptor expression from random -#' spatial permutations -#' * av_diff: average difference between LR_expr and rand_expr over all -#' random spatial permutations -#' * sd_diff: (optional) standard deviation of the difference between LR_expr -#' and rand_expr over all random spatial permutations -#' * z_score: (optional) z-score -#' * log2fc: log2 fold-change (LR_expr/rand_expr) -#' * pvalue: p-value -#' * LR_cell_comb: cell type pair combination -#' * p.adj: adjusted p-value -#' * PI: significanec score: log2fc \* -log10(p.adj) +#' * receptor: receptor name +#' * LR_expr: combined average ligand and receptor expression +#' * lig_nr: total number of cells from lig_cell_type that spatially interact +#' with cells from rec_cell_type +#' * rec_nr: total number of cells from rec_cell_type that spatially interact +#' with cells from lig_cell_type +#' * rand_expr: average combined ligand and receptor expression from random +#' spatial permutations +#' * av_diff: average difference between LR_expr and rand_expr over all +#' random spatial permutations +#' * sd_diff: (optional) standard deviation of the difference between LR_expr +#' and rand_expr over all random spatial permutations +#' * z_score: (optional) z-score +#' * log2fc: log2 fold-change (LR_expr/rand_expr) +#' * pvalue: p-value +#' * LR_cell_comb: cell type pair combination +#' * p.adj: adjusted p-value +#' * PI: significanec score: log2fc \* -log10(p.adj) #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' #' specificCellCellcommunicationScores(g, cluster_column = "leiden_clus") #' @export -specificCellCellcommunicationScores <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", - random_iter = 100, - cell_type_1 = "astrocyte", - cell_type_2 = "endothelial", - feat_set_1, - feat_set_2, - gene_set_1 = NULL, - gene_set_2 = NULL, - log2FC_addendum = 0.1, - min_observations = 2, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("feats", "cells"), - set_seed = FALSE, - seed_number = 1234, - verbose = TRUE) { +specificCellCellcommunicationScores <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + cluster_column = "cell_types", + random_iter = 100, + cell_type_1 = "astrocyte", + cell_type_2 = "endothelial", + feat_set_1, + feat_set_2, + gene_set_1 = NULL, + gene_set_2 = NULL, + log2FC_addendum = 0.1, + min_observations = 2, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("feats", "cells"), + set_seed = FALSE, + seed_number = 1234, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2740,7 +2924,8 @@ specificCellCellcommunicationScores <- function(gobject, cell_direction_2 <- paste0(cell_type_2, "-", cell_type_1) subset_annot_network <- annot_network[from_to %in% c( - cell_direction_1, cell_direction_2)] + cell_direction_1, cell_direction_2 + )] # make sure that there are sufficient observations if (nrow(subset_annot_network) <= min_observations) { @@ -2748,7 +2933,8 @@ specificCellCellcommunicationScores <- function(gobject, } else { # subset giotto object to only interacting cells subset_ids <- unique(c( - subset_annot_network$to, subset_annot_network$from)) + subset_annot_network$to, subset_annot_network$from + )) subsetGiotto <- subsetGiotto( gobject = gobject, cell_ids = subset_ids, @@ -2762,7 +2948,9 @@ specificCellCellcommunicationScores <- function(gobject, spat_unit = spat_unit ) nr_cell_types <- temp_meta[cell_ID %in% subset_ids][ - , .N, by = c(cluster_column)] + , .N, + by = c(cluster_column) + ] nr_cells <- nr_cell_types$N names(nr_cells) <- nr_cell_types$cell_types @@ -2776,7 +2964,7 @@ specificCellCellcommunicationScores <- function(gobject, feat_set_2 = feat_set_2 ) comScore <- comScore[(lig_cell_type == cell_type_1 & - rec_cell_type == cell_type_2) | + rec_cell_type == cell_type_2) | (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] comScore[, lig_nr := nr_cells[lig_cell_type]] @@ -2836,7 +3024,7 @@ specificCellCellcommunicationScores <- function(gobject, feat_set_2 = feat_set_2 ) randomScore <- randomScore[(lig_cell_type == cell_type_1 & - rec_cell_type == cell_type_2) | + rec_cell_type == cell_type_2) | (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] @@ -2866,7 +3054,9 @@ specificCellCellcommunicationScores <- function(gobject, if (detailed == TRUE) { av_difference_scores <- rowMeans_flex(total_sum) sd_difference_scores <- apply( - total_sum, MARGIN = 1, FUN = stats::sd) + total_sum, + MARGIN = 1, FUN = stats::sd + ) comScore[, av_diff := av_difference_scores] comScore[, sd_diff := sd_difference_scores] @@ -2884,10 +3074,14 @@ specificCellCellcommunicationScores <- function(gobject, if (adjust_target == "feats") { comScore[, p.adj := stats::p.adjust( - pvalue, method = adjust_method), by = .(LR_cell_comb)] + pvalue, + method = adjust_method + ), by = .(LR_cell_comb)] } else if (adjust_target == "cells") { comScore[, p.adj := stats::p.adjust( - pvalue, method = adjust_method), by = .(LR_comb)] + pvalue, + method = adjust_method + ), by = .(LR_comb)] } # get minimum adjusted p.value that is not zero @@ -2985,29 +3179,30 @@ specificCellCellcommunicationScores <- function(gobject, #' random_iter = 10 #' ) #' @export -spatCellCellcom <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", - random_iter = 1000, - feat_set_1, - feat_set_2, - gene_set_1 = NULL, - gene_set_2 = NULL, - log2FC_addendum = 0.1, - min_observations = 2, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("feats", "cells"), - do_parallel = TRUE, - cores = NA, - set_seed = TRUE, - seed_number = 1234, - verbose = c("a little", "a lot", "none")) { +spatCellCellcom <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + cluster_column = "cell_types", + random_iter = 1000, + feat_set_1, + feat_set_2, + gene_set_1 = NULL, + gene_set_2 = NULL, + log2FC_addendum = 0.1, + min_observations = 2, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("feats", "cells"), + do_parallel = TRUE, + cores = NA, + set_seed = TRUE, + seed_number = 1234, + verbose = c("a little", "a lot", "none")) { verbose <- match.arg(verbose, choices = c("a little", "a lot", "none")) # Set feat_type and spat_unit @@ -3053,7 +3248,8 @@ spatCellCellcom <- function(gobject, ## get all combinations between cell types all_uniq_values <- unique(cell_metadata[[cluster_column]]) same_DT <- data.table::data.table( - V1 = all_uniq_values, V2 = all_uniq_values) + V1 = all_uniq_values, V2 = all_uniq_values + ) combn_DT <- data.table::as.data.table(t(combn(all_uniq_values, m = 2))) combn_DT <- rbind(same_DT, combn_DT) @@ -3062,30 +3258,31 @@ spatCellCellcom <- function(gobject, savelist <- lapply_flex( X = seq_len(nrow(combn_DT)), future.seed = TRUE, cores = cores, fun = function(row) { - cell_type_1 <- combn_DT[row][["V1"]] - cell_type_2 <- combn_DT[row][["V2"]] - - specific_scores <- specificCellCellcommunicationScores( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - cluster_column = cluster_column, - random_iter = random_iter, - cell_type_1 = cell_type_1, - cell_type_2 = cell_type_2, - feat_set_1 = feat_set_1, - feat_set_2 = feat_set_2, - spatial_network_name = spatial_network_name, - log2FC_addendum = log2FC_addendum, - min_observations = min_observations, - detailed = detailed, - adjust_method = adjust_method, - adjust_target = adjust_target, - set_seed = set_seed, - seed_number = seed_number, - verbose = verbose %in% c("a lot") - ) - }) + cell_type_1 <- combn_DT[row][["V1"]] + cell_type_2 <- combn_DT[row][["V2"]] + + specific_scores <- specificCellCellcommunicationScores( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + cluster_column = cluster_column, + random_iter = random_iter, + cell_type_1 = cell_type_1, + cell_type_2 = cell_type_2, + feat_set_1 = feat_set_1, + feat_set_2 = feat_set_2, + spatial_network_name = spatial_network_name, + log2FC_addendum = log2FC_addendum, + min_observations = min_observations, + detailed = detailed, + adjust_method = adjust_method, + adjust_target = adjust_target, + set_seed = set_seed, + seed_number = seed_number, + verbose = verbose %in% c("a lot") + ) + } + ) } else { ## for loop over all combinations ## savelist <- list() @@ -3095,9 +3292,12 @@ spatCellCellcom <- function(gobject, cell_type_1 <- combn_DT[row][["V1"]] cell_type_2 <- combn_DT[row][["V2"]] - if (verbose == "a little" || verbose == "a lot") - cat(sprintf("[PROCESS nr %d : %d and %d] ", - countdown, cell_type_1, cell_type_2)) + if (verbose == "a little" || verbose == "a lot") { + cat(sprintf( + "[PROCESS nr %d : %d and %d] ", + countdown, cell_type_1, cell_type_2 + )) + } if (verbose %in% c("a little", "none")) { specific_verbose <- FALSE @@ -3160,28 +3360,33 @@ spatCellCellcom <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -#' random_iter = 10) +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) +#' spatialCC <- spatCellCellcom( +#' gobject = g, cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", +#' random_iter = 10 +#' ) #' #' combCCcom(spatialCC = spatialCC, exprCC = exprCC) #' @export -combCCcom <- function(spatialCC, - exprCC, - min_lig_nr = 3, - min_rec_nr = 3, - min_padj_value = 1, - min_log2fc = 0, - min_av_diff = 0, - detailed = FALSE) { +combCCcom <- function( + spatialCC, + exprCC, + min_lig_nr = 3, + min_rec_nr = 3, + min_padj_value = 1, + min_log2fc = 0, + min_av_diff = 0, + detailed = FALSE) { # data.table variables lig_nr <- rec_nr <- p.adj <- log2fc <- av_diff <- NULL spatialCC <- spatialCC[lig_nr >= min_lig_nr & rec_nr >= min_rec_nr & p.adj <= min_padj_value & abs(log2fc) >= min_log2fc & - abs(av_diff) >= min_av_diff] + abs(av_diff) >= min_av_diff] if (detailed == TRUE) { diff --git a/R/spatial_interaction_spot.R b/R/spatial_interaction_spot.R index ebd7652a9..5ea6327d3 100644 --- a/R/spatial_interaction_spot.R +++ b/R/spatial_interaction_spot.R @@ -20,8 +20,9 @@ NULL #' value inner each spot #' @param cell_IDs cell_IDs #' @keywords internal -.cell_proximity_spots_internal <- function(cell_IDs, - dwls_values) { +.cell_proximity_spots_internal <- function( + cell_IDs, + dwls_values) { # data.table variables value <- unified_int <- Var1 <- Var2 <- internal <- NULL @@ -41,7 +42,8 @@ NULL unified_int_same <- names(same_ct) unified_int_same <- paste0(unified_int_same, "--", unified_int_same) same_ct <- data.table::data.table( - "unified_int" = unified_int_same, "internal" = same_ct) + "unified_int" = unified_int_same, "internal" = same_ct + ) } # calculate proximity of different cell type (A==B) @@ -55,13 +57,17 @@ NULL diff_ct <- data.table::as.data.table(reshape2::melt(diff_ct)) diff_ct <- diff_ct[value != "NA"] diff_ct[, c("Var1", "Var2") := lapply( - .SD, as.character), .SDcols = c("Var1", "Var2")] + .SD, as.character + ), .SDcols = c("Var1", "Var2")] diff_ct[, unified_int := ifelse( Var1 < Var2, paste0(Var1, "--", Var2), - paste0(Var2, "--", Var1))] + paste0(Var2, "--", Var1) + )] diff_ct <- diff_ct[, c("unified_int", "value")] data.table::setnames( - diff_ct, old = c("value"), new = c("internal")) + diff_ct, + old = c("value"), new = c("internal") + ) } # merge spot proximity to proximity data.table @@ -80,21 +86,23 @@ NULL #' value for interacted spots #' @param pairs data.table of paired spots. Format: cell_ID1, cell_ID2, N #' @keywords internal -.cell_proximity_spots_external <- function( - pairs, - dwls_values) { +.cell_proximity_spots_external <- function(pairs, + dwls_values) { cell_IDs <- unique(c(pairs$from, pairs$to)) pairs <- pairs[, .N, by = c("from", "to")] # add internal pairs to make full matrix pairs_spots <- data.table::data.table(from = cell_IDs, to = cell_IDs, N = 0) pairs_balance <- data.table::data.table( - from = pairs$to, to = pairs$from, N = pairs$N) + from = pairs$to, to = pairs$from, N = pairs$N + ) pairs_for_mat <- rbind(pairs_spots, pairs, pairs_balance) pairs_for_mat <- pairs_for_mat[, .N, by = c("from", "to")] # make square matrix of interaction between spots pairs_mat <- reshape2::acast( - pairs_for_mat, from ~ to, value.var = "N", fill = 0) + pairs_for_mat, from ~ to, + value.var = "N", fill = 0 + ) pairs_mat <- pairs_mat[cell_IDs, cell_IDs] # calculate cell-type/cell-type interactions @@ -133,9 +141,10 @@ NULL #' @param pairs_external data.table of paired spots. Format: cell_ID1, cell_ID2, #' N. Passes to `.cell_proximity_spots_external` `pairs` param #' @keywords internal -.cell_proximity_spots <- function(cell_IDs, - pairs_external, - dwls_values) { +.cell_proximity_spots <- function( + cell_IDs, + pairs_external, + dwls_values) { # data.table variables V1 <- internal <- external <- s1 <- s2 <- unified_int <- type_int <- NULL @@ -156,7 +165,9 @@ NULL if (length(cell_IDs) > 0) { proximity_dt <- merge( - proximity_ex, proximity_in, by = "unified_int", all = TRUE) + proximity_ex, proximity_in, + by = "unified_int", all = TRUE + ) } else { proximity_dt <- proximity_ex[, "internal" := 0] } @@ -164,12 +175,15 @@ NULL proximity_dt[, V1 := internal + external] proximity_dt[, s1 := strsplit(as.character( - unified_int), split = "--")[[1]][1], by = seq_len(nrow(proximity_dt))] + unified_int + ), split = "--")[[1]][1], by = seq_len(nrow(proximity_dt))] proximity_dt[, s2 := strsplit(as.character( - unified_int), split = "--")[[1]][2], by = seq_len(nrow(proximity_dt))] + unified_int + ), split = "--")[[1]][2], by = seq_len(nrow(proximity_dt))] proximity_dt[, type_int := ifelse(s1 == s2, "homo", "hetero")] proximity_dt <- proximity_dt[ - , c("unified_int", "type_int", "V1", "external", "internal")] + , c("unified_int", "type_int", "V1", "external", "internal") + ] return(proximity_dt) } @@ -207,33 +221,36 @@ NULL #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) #' #' g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) #' #' cellProximityEnrichmentSpots(gobject = g) #' @export -cellProximityEnrichmentSpots <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "spatial_network", - cluster_column = "cell_ID", - cells_in_spot = 1, - number_of_simulations = 100, - adjust_method = c( - "none", "fdr", "bonferroni", "BH", - "holm", "hochberg", "hommel", - "BY" - ), - set_seed = TRUE, - seed_number = 1234, - verbose = FALSE) { +cellProximityEnrichmentSpots <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "spatial_network", + cluster_column = "cell_ID", + cells_in_spot = 1, + number_of_simulations = 100, + adjust_method = c( + "none", "fdr", "bonferroni", "BH", + "holm", "hochberg", "hommel", + "BY" + ), + set_seed = TRUE, + seed_number = 1234, + verbose = FALSE) { # p.adj test sel_adjust_method <- match.arg(adjust_method, choices = c( "none", "fdr", "bonferroni", "BH", @@ -254,7 +271,8 @@ cellProximityEnrichmentSpots <- function(gobject, V1 <- original <- enrichm <- simulations <- NULL spatial_network_annot <- dt_sort_combine_two_columns( - spatial_network_annot, "to", "from", "unified_cells") + spatial_network_annot, "to", "from", "unified_cells" + ) spatial_network_annot <- spatial_network_annot[!duplicated(unified_cells)] # exact spatial_enrichment matrix @@ -325,21 +343,26 @@ cellProximityEnrichmentSpots <- function(gobject, # add missing combinations from original or simulations # probably not needed anymore all_sim_ints <- as.character(unique(table_results[ - orig == "simulations"]$unified_int)) + orig == "simulations" + ]$unified_int)) all_orig_ints <- as.character(unique(table_results[ - orig == "original"]$unified_int)) + orig == "original" + ]$unified_int)) missing_in_orig <- all_sim_ints[!all_sim_ints %in% all_orig_ints] missing_in_sim <- all_orig_ints[!all_orig_ints %in% all_sim_ints] create_missing_for_orig <- table_results[unified_int %in% missing_in_orig] create_missing_for_orig <- unique(create_missing_for_orig[ - , c("orig", "V1") := list("original", 0)]) + , c("orig", "V1") := list("original", 0) + ]) create_missing_for_sim <- table_results[unified_int %in% missing_in_sim] create_missing_for_sim <- unique(create_missing_for_sim[ - , c("orig", "V1") := list("simulations", 0)]) + , c("orig", "V1") := list("simulations", 0) + ]) table_results <- do.call( "rbind", - list(table_results, create_missing_for_orig, create_missing_for_sim)) + list(table_results, create_missing_for_orig, create_missing_for_sim) + ) ## p-values if (verbose) message("3/5 Calculating p-values") @@ -364,9 +387,9 @@ cellProximityEnrichmentSpots <- function(gobject, } p_orig_higher <- 1 - (sum((orig_value + 1) > (sim_values + 1)) / - number_of_simulations) + number_of_simulations) p_orig_lower <- 1 - (sum((orig_value + 1) < (sim_values + 1)) / - number_of_simulations) + number_of_simulations) combo_list[[int_combo]] <- this_combo p_high[[int_combo]] <- p_orig_higher @@ -375,23 +398,29 @@ cellProximityEnrichmentSpots <- function(gobject, res_pvalue_DT <- data.table::data.table( unified_int = as.vector(combo_list), p_higher_orig = p_high, - p_lower_orig = p_low) + p_lower_orig = p_low + ) # depletion or enrichment in barplot format if (verbose) message("4/5 Depletion or enrichment in barplot format") table_mean_results <- table_results[ - , .(mean(V1)), by = c("orig", "unified_int", "type_int")] + , .(mean(V1)), + by = c("orig", "unified_int", "type_int") + ] table_mean_results_dc <- data.table::dcast.data.table( data = table_mean_results, - formula = type_int + unified_int ~ orig, value.var = "V1") + formula = type_int + unified_int ~ orig, value.var = "V1" + ) table_mean_results_dc[, original := ifelse(is.na(original), 0, original)] table_mean_results_dc[, enrichm := log2((original + 1) / (simulations + 1))] table_mean_results_dc <- merge( - table_mean_results_dc, res_pvalue_DT, by = "unified_int") + table_mean_results_dc, res_pvalue_DT, + by = "unified_int" + ) data.table::setorder(table_mean_results_dc, enrichm) table_mean_results_dc[, unified_int := factor(unified_int, unified_int)] @@ -404,9 +433,13 @@ cellProximityEnrichmentSpots <- function(gobject, PI_value <- int_ranking <- NULL table_mean_results_dc[, p.adj_higher := stats::p.adjust( - p_higher_orig, method = sel_adjust_method)] + p_higher_orig, + method = sel_adjust_method + )] table_mean_results_dc[, p.adj_lower := stats::p.adjust( - p_lower_orig, method = sel_adjust_method)] + p_lower_orig, + method = sel_adjust_method + )] table_mean_results_dc[, PI_value := ifelse(p.adj_higher <= p.adj_lower, @@ -420,7 +453,8 @@ cellProximityEnrichmentSpots <- function(gobject, table_mean_results_dc[, int_ranking := seq_len(.N)] return(list( - raw_sim_table = table_results, enrichm_res = table_mean_results_dc)) + raw_sim_table = table_results, enrichm_res = table_mean_results_dc + )) } @@ -442,10 +476,11 @@ cellProximityEnrichmentSpots <- function(gobject, #' #' @returns matrix #' @export -featExpDWLS <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - ave_celltype_exp) { +featExpDWLS <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + ave_celltype_exp) { # exact spatial_enrichment matrix dwls_values <- getSpatialEnrichment(gobject, spat_unit = spat_unit, @@ -502,17 +537,21 @@ featExpDWLS <- function(gobject, #' @param ave_celltype_exp average expression matrix in cell types #' @returns matrix #' @keywords internal -.cal_expr_residual <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - ave_celltype_exp) { +.cal_expr_residual <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + ave_celltype_exp) { # expression data values <- match.arg( - expression_values, choices = c("normalized", "scaled", "custom")) + expression_values, + choices = c("normalized", "scaled", "custom") + ) expr_observed <- slot(gobject@expression[[spat_unit]][[ - feat_type]][[values]], "exprMat") + feat_type + ]][[values]], "exprMat") # Compute predicted feature expression value expr_predicted <- featExpDWLS( @@ -525,9 +564,11 @@ featExpDWLS <- function(gobject, # Get the difference expression matrix between observed and predicted # expression intersect_feature <- intersect( - rownames(expr_predicted), rownames(expr_observed)) + rownames(expr_predicted), rownames(expr_observed) + ) expr_residual <- expr_observed[intersect_feature, ] - expr_predicted[ - intersect_feature, ] + intersect_feature, + ] expr_residual <- as.matrix(expr_residual) return(expr_residual) @@ -554,20 +595,22 @@ featExpDWLS <- function(gobject, #' ) #' sign_gene <- x$feats #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) #' #' g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) #' #' cellProximityEnrichmentEachSpot(gobject = g) #' @export -cellProximityEnrichmentEachSpot <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "spatial_network", - cluster_column = "cell_ID") { +cellProximityEnrichmentEachSpot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "spatial_network", + cluster_column = "cell_ID") { spatial_network_annot <- annotateSpatialNetwork( gobject = gobject, spat_unit = spat_unit, @@ -581,7 +624,8 @@ cellProximityEnrichmentEachSpot <- function(gobject, unified_cells <- type_int <- N <- NULL spatial_network_annot <- dt_sort_combine_two_columns( - spatial_network_annot, "to", "from", "unified_cells") + spatial_network_annot, "to", "from", "unified_cells" + ) spatial_network_annot <- spatial_network_annot[!duplicated(unified_cells)] # exact spatial_enrichment matrix @@ -602,9 +646,11 @@ cellProximityEnrichmentEachSpot <- function(gobject, # get cell-cell types pairs cts <- colnames(dwls_values) ct_pairs <- data.table::data.table( - V1 = rep(cts, each = length(cts)), V2 = rep(cts, length(cts))) + V1 = rep(cts, each = length(cts)), V2 = rep(cts, length(cts)) + ) ct_pairs[, unified_int := paste0(V1, "--", V2), - by = seq_len(nrow(ct_pairs))] + by = seq_len(nrow(ct_pairs)) + ] unified_int <- ct_pairs$unified_int @@ -672,7 +718,8 @@ cellProximityEnrichmentEachSpot <- function(gobject, spot_proximity <- reshape2::melt(spot_proximity) spot_proximity <- data.table::data.table(spot_proximity) spot_proximity[, c("Var1", "Var2") := lapply( - .SD, as.character), .SDcols = c("Var1", "Var2")] + .SD, as.character + ), .SDcols = c("Var1", "Var2")] spot_proximity[, unified_int := paste0(Var1, "--", Var2)] # add to proximityMat(matrix) @@ -687,12 +734,13 @@ cellProximityEnrichmentEachSpot <- function(gobject, #' cell proximity score of selected cell for spots #' @returns data.table #' @keywords internal -.cal_diff_per_interaction <- function(sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual) { +.cal_diff_per_interaction <- function( + sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual) { pcc_diff <- sel <- other <- NULL # get data @@ -731,7 +779,9 @@ cellProximityEnrichmentEachSpot <- function(gobject, expr_residual_dt[, diff := sel - other] results_dt <- data.table::merge.data.table( - expr_residual_dt, pcc_dt, by = "features") + expr_residual_dt, pcc_dt, + by = "features" + ) return(results_dt) } @@ -748,13 +798,14 @@ NULL #' @describeIn do_permuttest_spot Calculate original values for spots #' @keywords internal -.do_permuttest_original_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - name = "orig", - proximityMat, - expr_residual) { +.do_permuttest_original_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + name = "orig", + proximityMat, + expr_residual) { resultsDT <- .cal_diff_per_interaction( sel_int = sel_int, other_ints = other_ints, @@ -770,15 +821,16 @@ NULL #' @describeIn do_permuttest_spot Calculate random values for spots #' @keywords internal -.do_permuttest_random_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - name = "perm_1", - proximityMat, - expr_residual, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest_random_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + name = "perm_1", + proximityMat, + expr_residual, + set_seed = TRUE, + seed_number = 1234) { # data.table variables features <- NULL @@ -801,10 +853,16 @@ NULL prox <- proximityMat[random_sel_int, ] prox <- prox[prox > 0] random_select <- c(sample( - all_IDs, size = l_select_ind - 1, replace = FALSE), names(prox[1])) - random_other <- c(sample( - all_IDs, size = l_other_ind, replace = FALSE), - names(prox[length(prox)])) + all_IDs, + size = l_select_ind - 1, replace = FALSE + ), names(prox[1])) + random_other <- c( + sample( + all_IDs, + size = l_other_ind, replace = FALSE + ), + names(prox[length(prox)]) + ) resultsDT <- .cal_diff_per_interaction( sel_int = random_sel_int, @@ -823,16 +881,17 @@ NULL #' @describeIn do_permuttest_spot Calculate multiple random values for spots #' @keywords internal -.do_multi_permuttest_random_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual, - n = 100, - cores = NA, - set_seed = TRUE, - seed_number = 1234) { +.do_multi_permuttest_random_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual, + n = 100, + cores = NA, + set_seed = TRUE, + seed_number = 1234) { if (set_seed == TRUE) { seed_number_list <- seed_number:(seed_number + (n - 1)) } @@ -860,17 +919,18 @@ NULL #' @describeIn do_permuttest_spot Performs permutation test on subsets of a #' matrix for spots #' @keywords internal -.do_permuttest_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual, - n_perm = 100, - adjust_method = "fdr", - cores = 2, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual, + n_perm = 100, + adjust_method = "fdr", + cores = 2, + set_seed = TRUE, + seed_number = 1234) { # data.table variables log2fc_diff <- log2fc <- sel <- other <- features <- p_higher <- p_lower <- perm_sel <- NULL @@ -906,9 +966,12 @@ NULL ## # random_perms[, log2fc_diff := rep(original$log2fc, n_perm) - log2fc] random_perms[, c( - "perm_sel", "perm_other", "perm_pcc_sel", "perm_pcc_diff") := list( - mean(sel), mean(other), mean(pcc_sel), mean(pcc_diff)), - by = features] + "perm_sel", "perm_other", "perm_pcc_sel", "perm_pcc_diff" + ) := list( + mean(sel), mean(other), mean(pcc_sel), mean(pcc_diff) + ), + by = features + ] ## get p-values random_perms[, p_higher := sum(pcc_diff > 0), by = features] @@ -919,11 +982,13 @@ NULL ## combine results permutation and original random_perms_res <- unique(random_perms[, .( features, perm_sel, perm_other, perm_pcc_sel, perm_pcc_diff, - p_higher, p_lower)]) + p_higher, p_lower + )]) results_m <- data.table::merge.data.table( random_perms_res, original[, .(features, sel, other, diff, pcc_sel, pcc_other, pcc_diff)], - by = "features") + by = "features" + ) # select lowest p-value and perform p.adj results_m[, p.value := ifelse(p_higher <= p_lower, p_higher, p_lower)] @@ -931,7 +996,8 @@ NULL results_m <- results_m[, .( features, sel, other, pcc_sel, pcc_other, pcc_diff, p.value, p.adj, - perm_sel, perm_other, perm_pcc_sel, perm_pcc_diff)] + perm_sel, perm_other, perm_pcc_sel, perm_pcc_diff + )] setorder(results_m, p.adj, -pcc_diff) return(results_m) @@ -944,21 +1010,24 @@ NULL #' for spots #' @returns differential test on subsets of a matrix #' @keywords internal -.do_cell_proximity_test_spot <- function(sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual, - diff_test, - n_perm = 100, - adjust_method = "fdr", - cores = 2, - set_seed = TRUE, - seed_number = 1234) { +.do_cell_proximity_test_spot <- function( + sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual, + diff_test, + n_perm = 100, + adjust_method = "fdr", + cores = 2, + set_seed = TRUE, + seed_number = 1234) { # get parameters diff_test <- match.arg( - diff_test, choices = c("permutation", "limma", "t.test", "wilcox")) + diff_test, + choices = c("permutation", "limma", "t.test", "wilcox") + ) adjust_method <- match.arg(adjust_method, choices = c( "bonferroni", "BH", "holm", "hochberg", "hommel", "BY", "fdr", "none" @@ -989,21 +1058,22 @@ NULL #' proximity to other cell types for spots. #' @returns data.table #' @keywords internal -.findICF_per_interaction_spot <- function(sel_int, - all_ints, - proximityMat, - expr_residual, - dwls_values, - dwls_cutoff = 0.001, - CCI_cell_score = 0.01, - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - diff_test = "permutation", - n_perm = 100, - adjust_method = "fdr", - cores = 2, - set_seed = TRUE, - seed_number = 1234) { +.findICF_per_interaction_spot <- function( + sel_int, + all_ints, + proximityMat, + expr_residual, + dwls_values, + dwls_cutoff = 0.001, + CCI_cell_score = 0.01, + minimum_unique_cells = 1, + minimum_unique_int_cells = 1, + diff_test = "permutation", + n_perm = 100, + adjust_method = "fdr", + cores = 2, + set_seed = TRUE, + seed_number = 1234) { # data.table variables unified_int <- NULL @@ -1028,7 +1098,8 @@ NULL ## do not continue if too few cells ## if (length(spec_IDs) < minimum_unique_cells | length( - other_IDs) < minimum_unique_cells) { + other_IDs + ) < minimum_unique_cells) { result <- NULL } else { result <- .do_cell_proximity_test_spot( @@ -1101,14 +1172,14 @@ NULL #' the following columns: #' \itemize{ #' * features: All or selected list of tested features -#' * sel: average feature expression residual in the interacting cells from -#' the target cell type -#' * other: average feature expression residual in the NOT-interacting cells -#' from the target cell type -#' * pcc_sel: correlation between cell proximity score and expression residual +#' * sel: average feature expression residual in the interacting cells from +#' the target cell type +#' * other: average feature expression residual in the NOT-interacting cells +#' from the target cell type +#' * pcc_sel: correlation between cell proximity score and expression residual #' in the interacting cells from the target cell type -#' * pcc_other: correlation between cell proximity score and expression -#' residual in the NOT-interacting cells from the target cell type +#' * pcc_other: correlation between cell proximity score and expression +#' residual in the NOT-interacting cells from the target cell type #' * pcc_diff: correlation difference between sel and other #' * p.value: associated p-value #' * p.adj: adjusted p-value @@ -1121,47 +1192,55 @@ NULL #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' x <- findMarkers_one_vs_all(g, -#' cluster_column = "leiden_clus", min_feats = 20) +#' cluster_column = "leiden_clus", min_feats = 20 +#' ) #' sign_gene <- x$feats #' -#' sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -#' nrow = length(sign_gene)) +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' nrow = length(sign_gene) +#' ) #' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +#' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) #' #' g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) #' g_expression <- getExpression(g, output = "matrix") -#' -#' findICFSpot(g, spat_unit = "cell", feat_type = "rna", -#' ave_celltype_exp = g_expression, spatial_network_name = "spatial_network") +#' +#' findICFSpot(g, +#' spat_unit = "cell", feat_type = "rna", +#' ave_celltype_exp = g_expression, spatial_network_name = "spatial_network" +#' ) #' @export -findICFSpot <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - ave_celltype_exp, - selected_features = NULL, - spatial_network_name = "Delaunay_network", - minimum_unique_cells = 5, - minimum_unique_int_cells = 5, - CCI_cell_score = 0.1, - dwls_cutoff = 0.001, - diff_test = "permutation", - nr_permutations = 100, - adjust_method = "fdr", - do_parallel = TRUE, - cores = NA, - set_seed = TRUE, - seed_number = 1234, - verbose = FALSE) { +findICFSpot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + ave_celltype_exp, + selected_features = NULL, + spatial_network_name = "Delaunay_network", + minimum_unique_cells = 5, + minimum_unique_int_cells = 5, + CCI_cell_score = 0.1, + dwls_cutoff = 0.001, + diff_test = "permutation", + nr_permutations = 100, + adjust_method = "fdr", + do_parallel = TRUE, + cores = NA, + set_seed = TRUE, + seed_number = 1234, + verbose = FALSE) { # data.table variables unified_int <- NULL # expression data values <- match.arg( - expression_values, choices = c("normalized", "scaled", "custom")) + expression_values, + choices = c("normalized", "scaled", "custom") + ) features_overlap <- intersect( - slot(gobject, "feat_ID")[[feat_type]], rownames(ave_celltype_exp)) + slot(gobject, "feat_ID")[[feat_type]], rownames(ave_celltype_exp) + ) ave_celltype_exp_sel <- ave_celltype_exp[features_overlap, ] expr_residual <- .cal_expr_residual( gobject = gobject, @@ -1173,7 +1252,8 @@ findICFSpot <- function(gobject, ## test selected features ## if (!is.null(selected_features)) { expr_residual <- expr_residual[ - rownames(expr_residual) %in% selected_features, ] + rownames(expr_residual) %in% selected_features, + ] } # compute cell proximity for each spot @@ -1186,9 +1266,11 @@ findICFSpot <- function(gobject, # compute correlation between features and cell-types to find ICFs all_ints <- data.table::data.table(unified_int = rownames(proximityMat)) all_ints[, cell_type := strsplit( - as.character(unified_int), "--")[[1]][1], by = seq_len(nrow(all_ints))] + as.character(unified_int), "--" + )[[1]][1], by = seq_len(nrow(all_ints))] all_ints[, int_cell_type := strsplit( - as.character(unified_int), "--")[[1]][2], by = seq_len(nrow(all_ints))] + as.character(unified_int), "--" + )[[1]][2], by = seq_len(nrow(all_ints))] # exact spatial_enrichment matrix dwls_values <- getSpatialEnrichment( @@ -1207,23 +1289,24 @@ findICFSpot <- function(gobject, if (do_parallel == TRUE) { fin_result <- lapply_flex( X = all_ints$unified_int, cores = cores, fun = function(x) { - tempres <- .findICF_per_interaction_spot( - sel_int = x, - all_ints = all_ints, - proximityMat = proximityMat, - expr_residual = expr_residual, - dwls_values = dwls_values, - dwls_cutoff = dwls_cutoff, - CCI_cell_score = CCI_cell_score, - minimum_unique_cells = minimum_unique_cells, - minimum_unique_int_cells = minimum_unique_int_cells, - n_perm = nr_permutations, - adjust_method = adjust_method, - cores = cores, - set_seed = set_seed, - seed_number = seed_number - ) - }) + tempres <- .findICF_per_interaction_spot( + sel_int = x, + all_ints = all_ints, + proximityMat = proximityMat, + expr_residual = expr_residual, + dwls_values = dwls_values, + dwls_cutoff = dwls_cutoff, + CCI_cell_score = CCI_cell_score, + minimum_unique_cells = minimum_unique_cells, + minimum_unique_int_cells = minimum_unique_int_cells, + n_perm = nr_permutations, + adjust_method = adjust_method, + cores = cores, + set_seed = set_seed, + seed_number = seed_number + ) + } + ) } else { fin_result <- list() @@ -1258,13 +1341,15 @@ findICFSpot <- function(gobject, final_result[, spec_int := paste0(cell_type, "--", int_cell_type)] final_result[, type_int := ifelse( - cell_type == int_cell_type, "homo", "hetero")] + cell_type == int_cell_type, "homo", "hetero" + )] # return(final_result) permutation_test <- ifelse( - diff_test == "permutation", nr_permutations, "no permutations") + diff_test == "permutation", nr_permutations, "no permutations" + ) icfObject <- list( ICFscores = final_result, @@ -1310,16 +1395,17 @@ findICFSpot <- function(gobject, #' #' filterICFSpot(icfObject = icfObject) #' @export -filterICFSpot <- function(icfObject, - min_cells = 4, - min_cells_expr_resi = 0.05, - min_int_cells = 4, - min_int_cells_expr_resi = 0.05, - min_fdr = 0.5, - min_pcc_diff = 0.05, - min_zscore = 0.05, - zscores_column = c("cell_type", "features"), - direction = c("both", "up", "down")) { +filterICFSpot <- function( + icfObject, + min_cells = 4, + min_cells_expr_resi = 0.05, + min_int_cells = 4, + min_int_cells_expr_resi = 0.05, + min_fdr = 0.5, + min_pcc_diff = 0.05, + min_zscore = 0.05, + zscores_column = c("cell_type", "features"), + direction = c("both", "up", "down")) { # data.table variables nr_select <- int_nr_select <- zscores <- perm_diff <- sel <- other <- p.adj <- NULL @@ -1331,7 +1417,9 @@ filterICFSpot <- function(icfObject, } zscores_column <- match.arg( - zscores_column, choices = c("cell_type", "features")) + zscores_column, + choices = c("cell_type", "features") + ) ICFscore <- copy(icfObject[["ICFscores"]]) @@ -1342,7 +1430,8 @@ filterICFSpot <- function(icfObject, ## sequential filter steps ## # 1. minimum number of source and target cells selection_scores <- ICFscore[ - nr_select >= min_cells & int_nr_select >= min_int_cells] + nr_select >= min_cells & int_nr_select >= min_int_cells + ] # 2. create z-scores for log2fc per cell type selection_scores[, zscores := scale(perm_diff), by = c(zscores_column)] @@ -1350,9 +1439,11 @@ filterICFSpot <- function(icfObject, # 3. filter based on z-scores and minimum levels comb_DT <- rbind( selection_scores[zscores >= min_zscore & abs( - perm_diff) >= min_pcc_diff & sel >= min_cells_expr_resi], + perm_diff + ) >= min_pcc_diff & sel >= min_cells_expr_resi], selection_scores[zscores <= -min_zscore & abs( - perm_diff) >= min_pcc_diff & other >= min_int_cells_expr_resi] + perm_diff + ) >= min_pcc_diff & other >= min_int_cells_expr_resi] ) # 4. filter based on adjusted p-value (fdr) @@ -1387,24 +1478,29 @@ filterICFSpot <- function(icfObject, #' @returns plot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' icfObject <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) #' -#' plotICFSpot(gobject = g, icfObject = icfObject, -#' source_type = "1", source_markers = "Ccnd2", -#' ICF_features = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17")) +#' plotICFSpot( +#' gobject = g, icfObject = icfObject, +#' source_type = "1", source_markers = "Ccnd2", +#' ICF_features = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") +#' ) #' @export -plotICFSpot <- function(gobject, - icfObject, - source_type, - source_markers, - ICF_features, - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotICFSpot") { +plotICFSpot <- function( + gobject, + icfObject, + source_type, + source_markers, + ICF_features, + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotICFSpot") { # data.table variables cell_type <- int_cell_type <- pcc_diff <- feats <- perm_diff <- NULL @@ -1435,16 +1531,20 @@ plotICFSpot <- function(gobject, features <- group <- NULL tempDT <- ICFscores[feats %in% all_features][ - cell_type == source_type][int_cell_type %in% neighbor_types] + cell_type == source_type + ][int_cell_type %in% neighbor_types] tempDT[, features := factor(feats, levels = detected_features)] tempDT[, group := names(ICF_features[ - ICF_features == feats]), by = seq_len(nrow(tempDT))] + ICF_features == feats + ]), by = seq_len(nrow(tempDT))] if (is.null(cell_color_code)) { mycolors <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = length(unique( - tempDT$int_cell_type))) + instrs = instructions(gobject) + )(n = length(unique( + tempDT$int_cell_type + ))) names(mycolors) <- unique(tempDT$int_cell_type) } else { mycolors <- cell_color_code @@ -1454,17 +1554,20 @@ plotICFSpot <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.text.x = ggplot2::element_text( - size = 14, angle = 45, vjust = 1, hjust = 1), + size = 14, angle = 45, vjust = 1, hjust = 1 + ), axis.text.y = ggplot2::element_text(size = 14), axis.title = ggplot2::element_text(size = 14) ) pl <- pl + ggplot2::geom_bar( data = tempDT, ggplot2::aes(x = feats, y = perm_diff, fill = int_cell_type), - stat = "identity", position = ggplot2::position_dodge()) + stat = "identity", position = ggplot2::position_dodge() + ) pl <- pl + ggplot2::scale_fill_manual(values = mycolors) pl <- pl + ggplot2::labs(x = "", title = paste0( - "fold-change z-scores in ", source_type)) + "fold-change z-scores in ", source_type + )) return(plot_output_handler( gobject = gobject, @@ -1500,29 +1603,34 @@ plotICFSpot <- function(gobject, #' g <- GiottoData::loadGiottoMini("visium") #' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") #' -#' plotCellProximityFeatSpot(gobject = g, icfObject = icfObject, -#' show_plot = TRUE, save_plot = FALSE, return_plot = FALSE, -#' min_pcc_diff = 0.01) +#' plotCellProximityFeatSpot( +#' gobject = g, icfObject = icfObject, +#' show_plot = TRUE, save_plot = FALSE, return_plot = FALSE, +#' min_pcc_diff = 0.01 +#' ) #' @export -plotCellProximityFeatSpot <- function(gobject, - icfObject, - method = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot"), - min_cells = 4, - min_cells_expr_resi = 0.05, - min_int_cells = 4, - min_int_cells_expr_resi = 0.05, - min_fdr = 0.5, - min_pcc_diff = 0.05, - min_zscore = 0.05, - zscores_column = c("cell_type", "features"), - direction = c("both", "up", "down"), - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCellProximityFeats") { +plotCellProximityFeatSpot <- function( + gobject, + icfObject, + method = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ), + min_cells = 4, + min_cells_expr_resi = 0.05, + min_int_cells = 4, + min_int_cells_expr_resi = 0.05, + min_fdr = 0.5, + min_pcc_diff = 0.05, + min_zscore = 0.05, + zscores_column = c("cell_type", "features"), + direction = c("both", "up", "down"), + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCellProximityFeats") { if (!"icfObject" %in% class(icfObject)) { stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") @@ -1532,14 +1640,17 @@ plotCellProximityFeatSpot <- function(gobject, show_plot <- ifelse( is.null(show_plot), readGiottoInstructions(gobject, param = "show_plot"), - show_plot) + show_plot + ) save_plot <- ifelse( is.null(save_plot), readGiottoInstructions(gobject, param = "save_plot"), - save_plot) + save_plot + ) return_plot <- ifelse( is.null(return_plot), - readGiottoInstructions(gobject, param = "return_plot"), return_plot) + readGiottoInstructions(gobject, param = "return_plot"), return_plot + ) ## first filter @@ -1562,8 +1673,11 @@ plotCellProximityFeatSpot <- function(gobject, ## other parameters method <- match.arg( method, - choices = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot")) + choices = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ) + ) # variables @@ -1577,7 +1691,9 @@ plotCellProximityFeatSpot <- function(gobject, data = complete_part, ggplot2::aes( x = perm_diff, - y = ifelse(is.infinite(-log10(p.adj)), 1000, -log10(p.adj)))) + y = ifelse(is.infinite(-log10(p.adj)), 1000, -log10(p.adj)) + ) + ) pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_vline(xintercept = 0, linetype = 2) pl <- pl + ggplot2::labs(x = "pcc diff", y = "-log10(p.adjusted)") @@ -1592,8 +1708,11 @@ plotCellProximityFeatSpot <- function(gobject, if (save_plot == TRUE) { do.call( "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1609,10 +1728,12 @@ plotCellProximityFeatSpot <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( data = complete_part, - ggplot2::aes(x = unif_int, fill = unif_int)) + ggplot2::aes(x = unif_int, fill = unif_int) + ) pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.text.x = ggplot2::element_text - (angle = 90, hjust = 1, vjust = 1)) + (angle = 90, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::coord_flip() ## print plot @@ -1624,8 +1745,11 @@ plotCellProximityFeatSpot <- function(gobject, if (save_plot == TRUE) { do.call( "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1642,14 +1766,17 @@ plotCellProximityFeatSpot <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( data = complete_part, - ggplot2::aes(x = cell_type, fill = int_cell_type)) + ggplot2::aes(x = cell_type, fill = int_cell_type) + ) if (!is.null(cell_color_code)) { pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) } pl <- pl + ggplot2::theme_classic() + ggplot2::theme( - axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::labs( - x = "", y = "# of features influenced by cell neighborhood") + x = "", y = "# of features influenced by cell neighborhood" + ) ## print plot @@ -1661,8 +1788,11 @@ plotCellProximityFeatSpot <- function(gobject, if (save_plot == TRUE) { do.call( "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1683,14 +1813,18 @@ plotCellProximityFeatSpot <- function(gobject, ) + ggalluvial::geom_alluvium(aes(fill = cell_type), width = 1 / 12) + ggalluvial::geom_stratum( - width = 1 / 12, fill = "black", color = "grey") + + width = 1 / 12, fill = "black", color = "grey" + ) + ggplot2::scale_x_discrete( - limits = c("cell type", "neighbours"), expand = c(.05, .05)) + + limits = c("cell type", "neighbours"), expand = c(.05, .05) + ) + ggplot2::geom_label( - tat = "stratum", label.strata = TRUE, size = 3) + + tat = "stratum", label.strata = TRUE, size = 3 + ) + ggplot2::theme_classic() + ggplot2::labs( - x = "", y = "# of features influenced by cell neighborhood") + x = "", y = "# of features influenced by cell neighborhood" + ) if (!is.null(cell_color_code)) { pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) @@ -1707,8 +1841,11 @@ plotCellProximityFeatSpot <- function(gobject, if (save_plot == TRUE) { do.call( "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1717,23 +1854,30 @@ plotCellProximityFeatSpot <- function(gobject, } } else if (method == "dotplot") { changed_features <- complete_part[ - , .N, by = c("cell_type", "int_cell_type")] + , .N, + by = c("cell_type", "int_cell_type") + ] changed_features[, cell_type := factor(cell_type, unique(cell_type))] changed_features[, int_cell_type := factor( - int_cell_type, unique(int_cell_type))] + int_cell_type, unique(int_cell_type) + )] pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_point( data = changed_features, - ggplot2::aes(x = cell_type, y = int_cell_type, size = N)) + ggplot2::aes(x = cell_type, y = int_cell_type, size = N) + ) pl <- pl + ggplot2::scale_size_continuous( - guide = guide_legend(title = "# of ICFs")) + guide = guide_legend(title = "# of ICFs") + ) pl <- pl + ggplot2::theme(axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + )) pl <- pl + ggplot2::labs( - x = "source cell type", y = "neighbor cell type") + x = "source cell type", y = "neighbor cell type" + ) ## print plot if (show_plot == TRUE) { @@ -1744,8 +1888,11 @@ plotCellProximityFeatSpot <- function(gobject, if (save_plot == TRUE) { do.call( "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1754,17 +1901,21 @@ plotCellProximityFeatSpot <- function(gobject, } } else if (method == "heatmap") { changed_features <- complete_part[ - , .N, by = c("cell_type", "int_cell_type")] + , .N, + by = c("cell_type", "int_cell_type") + ] changed_features[, cell_type := factor(cell_type, unique(cell_type))] changed_features[, int_cell_type := factor( - int_cell_type, unique(int_cell_type))] + int_cell_type, unique(int_cell_type) + )] changed_features_d <- data.table::dcast.data.table( changed_features, cell_type ~ int_cell_type, value.var = "N", - fill = 0) + fill = 0 + ) changed_features_m <- dt_to_matrix(changed_features_d) col_fun <- GiottoVisuals::colorRamp2( @@ -1772,8 +1923,10 @@ plotCellProximityFeatSpot <- function(gobject, colors = c("white", "white", "blue", "yellow", "red") ) - heatm <- ComplexHeatmap::Heatmap(as.matrix(log2( - changed_features_m + 1)), + heatm <- ComplexHeatmap::Heatmap( + as.matrix(log2( + changed_features_m + 1 + )), col = col_fun, row_title = "cell_type", column_title = "int_cell_type", @@ -1789,8 +1942,11 @@ plotCellProximityFeatSpot <- function(gobject, if (save_plot == TRUE) { do.call( "all_plots_save_function", - c(list(gobject = gobject, plot_object = heatm, - default_save_name = default_save_name), save_param)) + c(list( + gobject = gobject, plot_object = heatm, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1839,54 +1995,55 @@ plotCellProximityFeatSpot <- function(gobject, #' values in cells that are spatially in proximity to each other. #' \itemize{ #' * LR_comb: Pair of ligand and receptor -#' * lig_cell_type: cell type to assess expression level of ligand -#' * lig_expr: average expression residual (observed - DWLS_predicted) of -#' ligand in lig_cell_type -#' * ligand: ligand name -#' * rec_cell_type: cell type to assess expression level of receptor -#' * rec_expr: average expression residual(observed - DWLS_predicted) of +#' * lig_cell_type: cell type to assess expression level of ligand +#' * lig_expr: average expression residual (observed - DWLS_predicted) of +#' ligand in lig_cell_type +#' * ligand: ligand name +#' * rec_cell_type: cell type to assess expression level of receptor +#' * rec_expr: average expression residual(observed - DWLS_predicted) of #' receptor in rec_cell_type -#' * receptor: receptor name -#' * LR_expr: combined average ligand and receptor expression -#' * lig_nr: total number of cells from lig_cell_type that spatially interact -#' with cells from rec_cell_type -#' * rec_nr: total number of cells from rec_cell_type that spatially interact -#' with cells from lig_cell_type -#' * rand_expr: average combined ligand and receptor expression residual from -#' random spatial permutations -#' * av_diff: average difference between LR_expr and rand_expr over all random -#' spatial permutations -#' * sd_diff: (optional) standard deviation of the difference between LR_expr -#' and rand_expr over all random spatial permutations -#' * z_score: (optinal) z-score -#' * log2fc: LR_expr - rand_expr -#' * pvalue: p-value -#' * LR_cell_comb: cell type pair combination -#' * p.adj: adjusted p-value -#' * PI: significance score: log2fc \* -log10(p.adj) +#' * receptor: receptor name +#' * LR_expr: combined average ligand and receptor expression +#' * lig_nr: total number of cells from lig_cell_type that spatially interact +#' with cells from rec_cell_type +#' * rec_nr: total number of cells from rec_cell_type that spatially interact +#' with cells from lig_cell_type +#' * rand_expr: average combined ligand and receptor expression residual from +#' random spatial permutations +#' * av_diff: average difference between LR_expr and rand_expr over all random +#' spatial permutations +#' * sd_diff: (optional) standard deviation of the difference between LR_expr +#' and rand_expr over all random spatial permutations +#' * z_score: (optinal) z-score +#' * log2fc: LR_expr - rand_expr +#' * pvalue: p-value +#' * LR_cell_comb: cell type pair combination +#' * p.adj: adjusted p-value +#' * PI: significance score: log2fc \* -log10(p.adj) #' } #' @keywords internal -.specific_CCCScores_spots <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expr_residual, - dwls_values, - proximityMat, - random_iter = 1000, - cell_type_1 = "astrocytes", - cell_type_2 = "endothelial", - feature_set_1, - feature_set_2, - min_observations = 2, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", " BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("features", "cells"), - set_seed = FALSE, - seed_number = 1234, - verbose = FALSE) { +.specific_CCCScores_spots <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expr_residual, + dwls_values, + proximityMat, + random_iter = 1000, + cell_type_1 = "astrocytes", + cell_type_2 = "endothelial", + feature_set_1, + feature_set_2, + min_observations = 2, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", " BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("features", "cells"), + set_seed = FALSE, + seed_number = 1234, + verbose = FALSE) { # data.table variables from_to <- cell_ID <- lig_cell_type <- rec_cell_type <- lig_nr <- rec_nr <- rand_expr <- NULL @@ -1905,8 +2062,11 @@ plotCellProximityFeatSpot <- function(gobject, cell_direction_1 <- paste0(cell_type_1, "--", cell_type_2) cell_direction_2 <- paste0(cell_type_2, "--", cell_type_1) - if (verbose) print(paste0( - "Processing specific CCC Scores: ", cell_direction_1)) + if (verbose) { + print(paste0( + "Processing specific CCC Scores: ", cell_direction_1 + )) + } proxi_1 <- proximityMat[cell_direction_1, ] proxi_2 <- proximityMat[cell_direction_2, ] @@ -1977,9 +2137,13 @@ plotCellProximityFeatSpot <- function(gobject, } random_ids_1 <- sample( - all_cell_ids, size = length(ct1_cell_ids), replace = FALSE) + all_cell_ids, + size = length(ct1_cell_ids), replace = FALSE + ) random_ids_2 <- sample( - all_cell_ids, size = length(ct2_cell_ids), replace = FALSE) + all_cell_ids, + size = length(ct2_cell_ids), replace = FALSE + ) # get feature expression residual for ligand and receptor random_expr_res_L <- expr_residual[feature_set_1, random_ids_1] @@ -2022,7 +2186,9 @@ plotCellProximityFeatSpot <- function(gobject, if (detailed == TRUE) { av_difference_scores <- rowMeans_flex(total_sum) sd_difference_scores <- apply( - total_sum, MARGIN = 1, FUN = stats::sd) + total_sum, + MARGIN = 1, FUN = stats::sd + ) comScore[, av_diff := av_difference_scores] comScore[, sd_diff := sd_difference_scores] @@ -2038,10 +2204,14 @@ plotCellProximityFeatSpot <- function(gobject, if (adjust_target == "features") { comScore[, p.adj := stats::p.adjust( - pvalue, method = adjust_method), by = .(LR_cell_comb)] + pvalue, + method = adjust_method + ), by = .(LR_cell_comb)] } else if (adjust_target == "cells") { comScore[, p.adj := stats::p.adjust( - pvalue, method = adjust_method), by = .(LR_comb)] + pvalue, + method = adjust_method + ), by = .(LR_comb)] } # get minimum adjusted p.value that is not zero @@ -2050,7 +2220,8 @@ plotCellProximityFeatSpot <- function(gobject, comScore[, PI := ifelse( p.adj == 0, log2fc * (-log10(lowest_p.adj)), - log2fc * (-log10(p.adj)))] + log2fc * (-log10(p.adj)) + )] return(comScore) } @@ -2095,55 +2266,56 @@ plotCellProximityFeatSpot <- function(gobject, #' values in cells that are spatially in proximity to each other. #' \itemize{ #' * LR_comb:Pair of ligand and receptor -#' * lig_cell_type: cell type to assess expression level of ligand -#' * lig_expr: average expression residual(observed - DWLS_predicted) of -#' ligand in lig_cell_type -#' * ligand: ligand name -#' * rec_cell_type: cell type to assess expression level of receptor -#' * rec_expr: average expression residual(observed - DWLS_predicted) of +#' * lig_cell_type: cell type to assess expression level of ligand +#' * lig_expr: average expression residual(observed - DWLS_predicted) of +#' ligand in lig_cell_type +#' * ligand: ligand name +#' * rec_cell_type: cell type to assess expression level of receptor +#' * rec_expr: average expression residual(observed - DWLS_predicted) of #' receptor in rec_cell_type -#' * receptor: receptor name +#' * receptor: receptor name #' * LR_expr: combined average ligand and receptor expression residual -#' * lig_nr: total number of cells from lig_cell_type that spatially interact -#' with cells from rec_cell_type -#' * rec_nr: total number of cells from rec_cell_type that spatially interact -#' with cells from lig_cell_type -#' * rand_expr: average combined ligand and receptor expression residual from -#' random spatial permutations -#' * av_diff: average difference between LR_expr and rand_expr over all random -#' spatial permutations -#' * sd_diff: (optional) standard deviation of the difference between LR_expr -#' and rand_expr over all random spatial permutations -#' * z_score: (optional) z-score -#' * log2fc: LR_expr - rand_expr -#' * pvalue: p-value -#' * LR_cell_comb: cell type pair combination -#' * p.adj: adjusted p-value -#' * PI: significanc score: log2fc \* -log10(p.adj) +#' * lig_nr: total number of cells from lig_cell_type that spatially interact +#' with cells from rec_cell_type +#' * rec_nr: total number of cells from rec_cell_type that spatially interact +#' with cells from lig_cell_type +#' * rand_expr: average combined ligand and receptor expression residual from +#' random spatial permutations +#' * av_diff: average difference between LR_expr and rand_expr over all random +#' spatial permutations +#' * sd_diff: (optional) standard deviation of the difference between LR_expr +#' and rand_expr over all random spatial permutations +#' * z_score: (optional) z-score +#' * log2fc: LR_expr - rand_expr +#' * pvalue: p-value +#' * LR_cell_comb: cell type pair combination +#' * p.adj: adjusted p-value +#' * PI: significanc score: log2fc \* -log10(p.adj) #' } #' @export -spatCellCellcomSpots <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - ave_celltype_exp, - spatial_network_name = "Delaunay_network", - cluster_column = "cell_ID", - random_iter = 1000, - feature_set_1, - feature_set_2, - min_observations = 2, - expression_values = c("normalized", "scaled", "custom"), - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("features", "cells"), - do_parallel = TRUE, - cores = NA, - set_seed = TRUE, - seed_number = 1234, - verbose = c("a little", "a lot", "none")) { +spatCellCellcomSpots <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + ave_celltype_exp, + spatial_network_name = "Delaunay_network", + cluster_column = "cell_ID", + random_iter = 1000, + feature_set_1, + feature_set_2, + min_observations = 2, + expression_values = c("normalized", "scaled", "custom"), + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("features", "cells"), + do_parallel = TRUE, + cores = NA, + set_seed = TRUE, + seed_number = 1234, + verbose = c("a little", "a lot", "none")) { # data.table vars V1 <- V2 <- LR_cell_comb <- NULL @@ -2169,7 +2341,9 @@ spatCellCellcomSpots <- function(gobject, # expression data values <- match.arg( - expression_values, choices = c("normalized", "scaled", "custom")) + expression_values, + choices = c("normalized", "scaled", "custom") + ) expr_residual <- .cal_expr_residual( gobject = gobject, spat_unit = spat_unit, @@ -2187,7 +2361,8 @@ spatCellCellcomSpots <- function(gobject, # select overlapped spots intersect_cell_IDs <- intersect( - colnames(expr_residual), colnames(proximityMat)) + colnames(expr_residual), colnames(proximityMat) + ) expr_residual <- expr_residual[, intersect_cell_IDs] proximityMat <- proximityMat[, intersect_cell_IDs] @@ -2205,17 +2380,19 @@ spatCellCellcomSpots <- function(gobject, # check feature list LR_comb <- data.table::data.table( - ligand = feature_set_1, receptor = feature_set_2) + ligand = feature_set_1, receptor = feature_set_2 + ) # check LR pair not captured in giotto object LR_out <- LR_comb[!LR_comb$ligand %in% rownames( - expr_residual) | !LR_comb$receptor %in% rownames(expr_residual)] + expr_residual + ) | !LR_comb$receptor %in% rownames(expr_residual)] if (dim(LR_out)[1] > 0) { message("Ligand or receptor were removed after computing expresion residual.") print(LR_out) LR_comb <- LR_comb[LR_comb$ligand %in% rownames(expr_residual) & - LR_comb$receptor %in% rownames(expr_residual)] + LR_comb$receptor %in% rownames(expr_residual)] feature_set_1 <- LR_comb$ligand feature_set_2 <- LR_comb$receptor } @@ -2223,38 +2400,41 @@ spatCellCellcomSpots <- function(gobject, ## get all combinations between cell types combn_DT <- data.table::data.table(LR_cell_comb = rownames(proximityMat)) combn_DT[, V1 := strsplit( - LR_cell_comb, "--")[[1]][1], by = seq_len(nrow(combn_DT))] + LR_cell_comb, "--" + )[[1]][1], by = seq_len(nrow(combn_DT))] combn_DT[, V2 := strsplit( - LR_cell_comb, "--")[[1]][2], by = seq_len(nrow(combn_DT))] + LR_cell_comb, "--" + )[[1]][2], by = seq_len(nrow(combn_DT))] ## parallel option ## if (do_parallel == TRUE) { savelist <- lapply_flex( X = seq_len(nrow(combn_DT)), cores = cores, fun = function(row) { - cell_type_1 <- combn_DT[row][["V1"]] - cell_type_2 <- combn_DT[row][["V2"]] - - - specific_scores <- .specific_CCCScores_spots( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - expr_residual = expr_residual, - dwls_values = dwls_values, - proximityMat = proximityMat, - random_iter = random_iter, - cell_type_1 = cell_type_1, - cell_type_2 = cell_type_2, - feature_set_1 = feature_set_1, - feature_set_2 = feature_set_2, - min_observations = min_observations, - detailed = detailed, - adjust_method = adjust_method, - adjust_target = adjust_target, - set_seed = set_seed, - seed_number = seed_number - ) - }) + cell_type_1 <- combn_DT[row][["V1"]] + cell_type_2 <- combn_DT[row][["V2"]] + + + specific_scores <- .specific_CCCScores_spots( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + expr_residual = expr_residual, + dwls_values = dwls_values, + proximityMat = proximityMat, + random_iter = random_iter, + cell_type_1 = cell_type_1, + cell_type_2 = cell_type_2, + feature_set_1 = feature_set_1, + feature_set_2 = feature_set_2, + min_observations = min_observations, + detailed = detailed, + adjust_method = adjust_method, + adjust_target = adjust_target, + set_seed = set_seed, + seed_number = seed_number + ) + } + ) } else { ## for loop over all combinations ## savelist <- list() @@ -2264,9 +2444,12 @@ spatCellCellcomSpots <- function(gobject, cell_type_1 <- combn_DT[row][["V1"]] cell_type_2 <- combn_DT[row][["V2"]] - if (verbose == "a little" | verbose == "a lot") - cat("PROCESS nr ", countdown, ": ", - cell_type_1, " and ", cell_type_2) + if (verbose == "a little" | verbose == "a lot") { + cat( + "PROCESS nr ", countdown, ": ", + cell_type_1, " and ", cell_type_2 + ) + } specific_scores <- .specific_CCCScores_spots( gobject = gobject, diff --git a/R/spatial_interaction_visuals.R b/R/spatial_interaction_visuals.R index a373502b0..15769e7d9 100644 --- a/R/spatial_interaction_visuals.R +++ b/R/spatial_interaction_visuals.R @@ -10,40 +10,46 @@ #' @returns ggplot barplot #' @details This function creates a barplot that shows the spatial proximity #' enrichment or depletion of cell type pairs. -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' cellProximityBarplot(gobject = g, -#' CPscore = cellProximityEnrichment(g, cluster_column = "leiden_clus")) +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' cellProximityBarplot( +#' gobject = g, +#' CPscore = cellProximityEnrichment(g, cluster_column = "leiden_clus") +#' ) #' @export -cellProximityBarplot <- function(gobject, - CPscore, - min_orig_ints = 5, - min_sim_ints = 5, - p_val = 0.05, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximityBarplot") { +cellProximityBarplot <- function( + gobject, + CPscore, + min_orig_ints = 5, + min_sim_ints = 5, + p_val = 0.05, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximityBarplot") { table_mean_results_dc <- CPscore$enrichm_res ## filter to remove low number of cell-cell proximity interactions ## # data.table variables - original <- simulations <- p_higher_orig <- p_lower_orig <- enrichm <- + original <- simulations <- p_higher_orig <- p_lower_orig <- enrichm <- type_int <- unified_int <- NULL table_mean_results_dc_filter <- table_mean_results_dc[ - original >= min_orig_ints & simulations >= min_sim_ints, ] + original >= min_orig_ints & simulations >= min_sim_ints, + ] table_mean_results_dc_filter <- table_mean_results_dc_filter[ - p_higher_orig <= p_val | p_lower_orig <= p_val, ] + p_higher_orig <= p_val | p_lower_orig <= p_val, + ] pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( - data = table_mean_results_dc_filter, - ggplot2::aes(x = unified_int, y = enrichm, fill = type_int), - stat = "identity", show.legend = FALSE) + data = table_mean_results_dc_filter, + ggplot2::aes(x = unified_int, y = enrichm, fill = type_int), + stat = "identity", show.legend = FALSE + ) pl <- pl + ggplot2::coord_flip() pl <- pl + ggplot2::theme_bw() pl <- pl + ggplot2::labs(y = "enrichment/depletion") @@ -51,18 +57,22 @@ cellProximityBarplot <- function(gobject, bpl <- ggplot2::ggplot() bpl <- bpl + ggplot2::geom_bar( - data = table_mean_results_dc_filter, - ggplot2::aes(x = unified_int, y = original, fill = type_int), - stat = "identity", show.legend = TRUE) + data = table_mean_results_dc_filter, + ggplot2::aes(x = unified_int, y = original, fill = type_int), + stat = "identity", show.legend = TRUE + ) bpl <- bpl + ggplot2::coord_flip() bpl <- bpl + ggplot2::theme_bw() + ggplot2::theme( - axis.text.y = element_blank()) + axis.text.y = element_blank() + ) bpl <- bpl + ggplot2::labs(y = "# of interactions") bpl combo_plot <- cowplot::plot_grid( - pl, bpl, ncol = 2, rel_heights = c(1), - rel_widths = c(3, 1.5), align = "h") + pl, bpl, + ncol = 2, rel_heights = c(1), + rel_widths = c(3, 1.5), align = "h" + ) # output plot return(GiottoVisuals::plot_output_handler( @@ -85,7 +95,7 @@ cellProximityBarplot <- function(gobject, #' @param CPscore CPscore, output from cellProximityEnrichment() #' @param scale scale cell-cell proximity interaction scores #' @param order_cell_types order cell types based on enrichment correlation -#' @param color_breaks numerical vector of length 3 to represent min, mean +#' @param color_breaks numerical vector of length 3 to represent min, mean #' and maximum #' @param color_names character color vector of length 3 #' @returns ggplot heatmap @@ -93,50 +103,60 @@ cellProximityBarplot <- function(gobject, #' enrichment or depletion of cell type pairs. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -#' +#' #' cellProximityHeatmap(gobject = g, CPscore = x) #' @export -cellProximityHeatmap <- function(gobject, - CPscore, - scale = TRUE, - order_cell_types = TRUE, - color_breaks = NULL, - color_names = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximityHeatmap") { +cellProximityHeatmap <- function( + gobject, + CPscore, + scale = TRUE, + order_cell_types = TRUE, + color_breaks = NULL, + color_names = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximityHeatmap") { enrich_res <- CPscore$enrichm_res # data.table variables first_type <- second_type <- unified_int <- NULL enrich_res[, first_type := strsplit( - x = as.character(unified_int), split = "--")[[1]][1], - by = seq_len(nrow(enrich_res))] + x = as.character(unified_int), split = "--" + )[[1]][1], + by = seq_len(nrow(enrich_res)) + ] enrich_res[, second_type := strsplit( - x = as.character(unified_int), split = "--")[[1]][2], - by = seq_len(nrow(enrich_res))] + x = as.character(unified_int), split = "--" + )[[1]][2], + by = seq_len(nrow(enrich_res)) + ] # create matrix enrich_mat <- data.table::dcast.data.table( - data = enrich_res, - formula = first_type ~ second_type, - value.var = "enrichm") + data = enrich_res, + formula = first_type ~ second_type, + value.var = "enrichm" + ) matrix_d <- as.matrix(enrich_mat[, -1]) rownames(matrix_d) <- as.vector(enrich_mat[[1]]) t_matrix_d <- t_flex(matrix_d) # fill in NAs based on values in upper and lower matrix triangle t_matrix_d[upper.tri(t_matrix_d)][is.na(t_matrix_d[ - upper.tri(t_matrix_d)])] <- matrix_d[upper.tri(matrix_d)][ - is.na(t_matrix_d[upper.tri(t_matrix_d)])] + upper.tri(t_matrix_d) + ])] <- matrix_d[upper.tri(matrix_d)][ + is.na(t_matrix_d[upper.tri(t_matrix_d)]) + ] t_matrix_d[lower.tri(t_matrix_d)][is.na(t_matrix_d[ - lower.tri(t_matrix_d)])] <- matrix_d[lower.tri(matrix_d)][ - is.na(t_matrix_d[lower.tri(t_matrix_d)])] + lower.tri(t_matrix_d) + ])] <- matrix_d[lower.tri(matrix_d)][ + is.na(t_matrix_d[lower.tri(t_matrix_d)]) + ] t_matrix_d[is.na(t_matrix_d)] <- 0 final_matrix <- t_matrix_d @@ -145,7 +165,8 @@ cellProximityHeatmap <- function(gobject, final_matrix <- t_flex(scale(t_flex(final_matrix))) final_matrix <- t_flex(final_matrix) final_matrix[lower.tri(final_matrix)] <- t_flex(final_matrix)[ - lower.tri(final_matrix)] + lower.tri(final_matrix) + ] } # order cell types @@ -171,17 +192,19 @@ cellProximityHeatmap <- function(gobject, } heatm <- ComplexHeatmap::Heatmap( - matrix = final_matrix, - cluster_rows = FALSE, + matrix = final_matrix, + cluster_rows = FALSE, cluster_columns = FALSE, col = GiottoVisuals::colorRamp2( - breaks = color_breaks, colors = color_names) + breaks = color_breaks, colors = color_names + ) ) } else { heatm <- ComplexHeatmap::Heatmap( - matrix = final_matrix, - cluster_rows = FALSE, - cluster_columns = FALSE) + matrix = final_matrix, + cluster_rows = FALSE, + cluster_columns = FALSE + ) } return(plot_output_handler( @@ -208,9 +231,9 @@ cellProximityHeatmap <- function(gobject, #' @param color_depletion color for depleted cell-cell interactions #' @param color_enrichment color for enriched cell-cell interactions #' @param rescale_edge_weights rescale edge weights (boolean) -#' @param edge_weight_range_depletion numerical vector of length 2 to rescale +#' @param edge_weight_range_depletion numerical vector of length 2 to rescale #' depleted edge weights -#' @param edge_weight_range_enrichment numerical vector of length 2 to rescale +#' @param edge_weight_range_enrichment numerical vector of length 2 to rescale #' enriched edge weights #' @param layout layout algorithm to use to draw nodes and edges #' @param only_show_enrichment_edges show only the enriched pairwise scores @@ -223,31 +246,32 @@ cellProximityHeatmap <- function(gobject, #' enrichment or depletion of cell type pairs. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -#' +#' #' cellProximityNetwork(gobject = g, CPscore = x) #' @export -cellProximityNetwork <- function(gobject, - CPscore, - remove_self_edges = FALSE, - self_loop_strength = 0.1, - color_depletion = "lightgreen", - color_enrichment = "red", - rescale_edge_weights = TRUE, - edge_weight_range_depletion = c(0.1, 1), - edge_weight_range_enrichment = c(1, 5), - layout = c("Fruchterman", "DrL", "Kamada-Kawai"), - only_show_enrichment_edges = FALSE, - edge_width_range = c(0.1, 2), - node_size = 4, - node_color_code = NULL, - node_text_size = 6, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximityNetwork") { +cellProximityNetwork <- function( + gobject, + CPscore, + remove_self_edges = FALSE, + self_loop_strength = 0.1, + color_depletion = "lightgreen", + color_enrichment = "red", + rescale_edge_weights = TRUE, + edge_weight_range_depletion = c(0.1, 1), + edge_weight_range_enrichment = c(1, 5), + layout = c("Fruchterman", "DrL", "Kamada-Kawai"), + only_show_enrichment_edges = FALSE, + edge_width_range = c(0.1, 2), + node_size = 4, + node_color_code = NULL, + node_text_size = 6, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximityNetwork") { # extract scores # data.table variables @@ -255,19 +279,27 @@ cellProximityNetwork <- function(gobject, CPscores <- CPscore[["enrichm_res"]] CPscores[, cell_1 := strsplit( - as.character(unified_int), split = "--")[[1]][1], - by = seq_len(nrow(CPscores))] + as.character(unified_int), + split = "--" + )[[1]][1], + by = seq_len(nrow(CPscores)) + ] CPscores[, cell_2 := strsplit( - as.character(unified_int), split = "--")[[1]][2], - by = seq_len(nrow(CPscores))] + as.character(unified_int), + split = "--" + )[[1]][2], + by = seq_len(nrow(CPscores)) + ] # create igraph with enrichm as weight edges igd <- igraph::graph_from_data_frame( - d = CPscores[, c("cell_1", "cell_2", "enrichm")], directed = FALSE) + d = CPscores[, c("cell_1", "cell_2", "enrichm")], directed = FALSE + ) if (remove_self_edges == TRUE) { igd <- igraph::simplify( - graph = igd, remove.loops = TRUE, remove.multiple = FALSE) + graph = igd, remove.loops = TRUE, remove.multiple = FALSE + ) } edges_sizes <- igraph::get.edge.attribute(igd, "enrichm") @@ -277,9 +309,11 @@ cellProximityNetwork <- function(gobject, # rescale if wanted if (rescale_edge_weights == TRUE) { pos_edges_sizes_resc <- scales::rescale( - x = post_edges_sizes, to = edge_weight_range_enrichment) + x = post_edges_sizes, to = edge_weight_range_enrichment + ) neg_edges_sizes_resc <- scales::rescale( - x = neg_edges_sizes, to = edge_weight_range_depletion) + x = neg_edges_sizes, to = edge_weight_range_depletion + ) edges_sizes_resc <- c(pos_edges_sizes_resc, neg_edges_sizes_resc) } else { edges_sizes_resc <- c(post_edges_sizes, neg_edges_sizes) @@ -300,15 +334,18 @@ cellProximityNetwork <- function(gobject, } } else { layout <- match.arg( - arg = layout, choices = c("Fruchterman", "DrL", "Kamada-Kawai")) + arg = layout, choices = c("Fruchterman", "DrL", "Kamada-Kawai") + ) } igd <- igraph::set.edge.attribute( - graph = igd, index = igraph::E(igd), name = "color", - value = edges_colors) + graph = igd, index = igraph::E(igd), name = "color", + value = edges_colors + ) igd <- igraph::set.edge.attribute( - graph = igd, index = igraph::E(igd), name = "size", - value = as.numeric(edges_sizes_resc)) + graph = igd, index = igraph::E(igd), name = "size", + value = as.numeric(edges_sizes_resc) + ) ## only show attractive edges if (only_show_enrichment_edges == TRUE) { @@ -323,13 +360,16 @@ cellProximityNetwork <- function(gobject, ## get coordinates layouts if (layout == "Fruchterman") { coords <- igraph::layout_with_fr( - graph = igd, weights = edges_sizes_resc) + graph = igd, weights = edges_sizes_resc + ) } else if (layout == "DrL") { coords <- igraph::layout_with_drl( - graph = igd, weights = edges_sizes_resc) + graph = igd, weights = edges_sizes_resc + ) } else if (layout == "Kamada-Kawai") { coords <- igraph::layout_with_kk( - graph = igd, weights = edges_sizes_resc) + graph = igd, weights = edges_sizes_resc + ) } else { stop("Currently no other layouts have been implemented") } @@ -337,25 +377,36 @@ cellProximityNetwork <- function(gobject, ## create plot gpl <- ggraph::ggraph(graph = igd, layout = coords) gpl <- gpl + ggraph::geom_edge_link( - ggplot2::aes(color = factor(color), - edge_width = size, edge_alpha = size), - show.legend = FALSE) + ggplot2::aes( + color = factor(color), + edge_width = size, edge_alpha = size + ), + show.legend = FALSE + ) if (remove_self_edges == FALSE) { gpl <- gpl + ggraph::geom_edge_loop( - ggplot2::aes(color = factor(color), edge_width = size, - edge_alpha = size, strength = self_loop_strength), - show.legend = FALSE) + ggplot2::aes( + color = factor(color), edge_width = size, + edge_alpha = size, strength = self_loop_strength + ), + show.legend = FALSE + ) } gpl <- gpl + ggraph::scale_edge_color_manual( - values = c("enriched" = color_enrichment, "depleted" = color_depletion)) + values = c("enriched" = color_enrichment, "depleted" = color_depletion) + ) gpl <- gpl + ggraph::scale_edge_width(range = edge_width_range) gpl <- gpl + ggraph::scale_edge_alpha(range = c(0.1, 1)) gpl <- gpl + ggraph::geom_node_text( - ggplot2::aes(label = name), repel = TRUE, size = node_text_size) + ggplot2::aes(label = name), + repel = TRUE, size = node_text_size + ) gpl <- gpl + ggraph::geom_node_point( - ggplot2::aes(color = name), size = node_size) + ggplot2::aes(color = name), + size = node_size + ) if (!is.null(node_color_code)) { gpl <- gpl + ggplot2::scale_color_manual(values = node_color_code) } @@ -392,52 +443,59 @@ cellProximityNetwork <- function(gobject, NULL -#' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell +#' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell #' interactions according to spatial coordinates in ggplot mode #' @keywords internal -.cellProximityVisPlot_2D_ggplot <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - coord_fix_ratio = 1, - show_legend = TRUE, - point_size_select = 2, - point_select_border_col = "black", - point_select_border_stroke = 0.05, - point_size_other = 1, - point_alpha_other = 0.3, - point_other_border_col = "lightgrey", - point_other_border_stroke = 0.01, - ...) { +.cellProximityVisPlot_2D_ggplot <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + coord_fix_ratio = 1, + show_legend = TRUE, + point_size_select = 2, + point_select_border_col = "black", + point_select_border_stroke = 0.05, + point_size_other = 1, + point_alpha_other = 0.3, + point_other_border_col = "lightgrey", + point_other_border_stroke = 0.01, + ...) { # data.table variables - unified_int <- sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- + unified_int <- sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- x_start <- x_end <- NULL y_start <- y_end <- cell_ID <- NULL if (is.null(interaction_name)) { - stop("you need to specific at least one interaction name, run + stop("you need to specific at least one interaction name, run cellProximityEnrichment") } - cell_locations <- getSpatialLocations(gobject = gobject, - output = "data.table") - spatial_grid <- getSpatialGrid(gobject = gobject, - name = spatial_grid_name) - cell_metadata <- getCellMetadata(gobject = gobject, - output = "data.table") + cell_locations <- getSpatialLocations( + gobject = gobject, + output = "data.table" + ) + spatial_grid <- getSpatialGrid( + gobject = gobject, + name = spatial_grid_name + ) + cell_metadata <- getCellMetadata( + gobject = gobject, + output = "data.table" + ) @@ -456,7 +514,8 @@ NULL if (show_other_cells) { CellType <- strsplit(interaction_name, "--") all_cell_IDs <- cell_metadata[cell_metadata[[ - cluster_column]] == CellType[[1]][1] | + cluster_column + ]] == CellType[[1]][1] | cell_metadata[[cluster_column]] == CellType[[1]][2], ]$cell_ID other_cell_IDs <- setdiff(all_cell_IDs, cell_IDs_to_keep) } @@ -467,13 +526,15 @@ NULL cell_locations_metadata <- cell_locations } else { cell_locations_metadata <- merge( - cell_locations, cell_metadata, by = "cell_ID") + cell_locations, cell_metadata, + by = "cell_ID" + ) } # first 2 dimensions need to be defined if (is.null(sdimx) | is.null(sdimy)) { - message("first and second dimenion need to be defined, default is + message("first and second dimenion need to be defined, default is first 2\n") sdimx <- "sdimx" sdimy <- "sdimy" @@ -487,15 +548,19 @@ NULL if (show_other_network) { pl <- pl + ggplot2::geom_segment( data = spatial_network[!unified_int %in% interaction_name], - aes(x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, - yend = sdimy_end), + aes( + x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, + yend = sdimy_end + ), color = "lightgrey", size = 0.5, alpha = 0.5 ) } pl <- pl + ggplot2::geom_segment( data = spatial_network[unified_int %in% interaction_name], - aes(x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, - yend = sdimy_end), + aes( + x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, + yend = sdimy_end + ), color = network_color, size = 0.5, alpha = 0.5 ) } @@ -503,7 +568,7 @@ NULL if (!is.null(spatial_grid) & show_grid == TRUE) { if (is.null(grid_color)) grid_color <- "black" pl <- pl + ggplot2::geom_rect( - data = spatial_grid, + data = spatial_grid, aes(xmin = x_start, xmax = x_end, ymin = y_start, ymax = y_end), color = grid_color, fill = NA ) @@ -513,22 +578,22 @@ NULL if (is.null(cell_color)) { cell_color <- "lightblue" pl <- pl + ggplot2::geom_point( - data = cell_locations[!cell_ID %in% cell_IDs_to_keep], + data = cell_locations[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = "lightgrey", + show.legend = show_legend, shape = 21, fill = "lightgrey", size = point_size_other ) pl <- pl + ggplot2::geom_point( - data = cell_locations[cell_ID %in% cell_IDs_to_keep], + data = cell_locations[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, size = point_size_select ) if (show_other_cells) { pl <- pl + ggplot2::geom_point( - data = cell_locations[cell_ID %in% other_cell_IDs], + data = cell_locations[cell_ID %in% other_cell_IDs], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, alpha = point_alpha_other, size = point_size_select * 0.5 ) @@ -541,24 +606,24 @@ NULL } pl <- pl + ggplot2::geom_point( - data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], + data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), fill = "lightgrey", shape = 21, size = point_size_other, - color = point_other_border_col, + color = point_other_border_col, stroke = point_other_border_stroke ) pl <- pl + ggplot2::geom_point( - data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], + data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy, fill = cell_color), show.legend = show_legend, shape = 21, size = point_size_select, - color = point_select_border_col, + color = point_select_border_col, stroke = point_select_border_stroke ) if (show_other_cells) { pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[cell_ID %in% other_cell_IDs], aes_string(x = sdimx, y = sdimy, fill = cell_color), - show.legend = show_legend, shape = 21, + show.legend = show_legend, shape = 21, alpha = point_alpha_other, size = point_size_select * 0.5 ) @@ -571,7 +636,8 @@ NULL } else if (color_as_factor == TRUE) { number_colors <- length(unique(factor_data)) cell_color_code <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = number_colors) + instrs = instructions(gobject) + )(n = number_colors) names(cell_color_code) <- unique(factor_data) pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) } else if (color_as_factor == FALSE) { @@ -588,19 +654,20 @@ NULL } else { pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[ - !cell_ID %in% cell_IDs_to_keep], + !cell_ID %in% cell_IDs_to_keep + ], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, + show.legend = show_legend, shape = 21, fill = "lightgrey", size = point_size_other, - color = point_other_border_col, + color = point_other_border_col, stroke = point_other_border_stroke ) pl <- pl + ggplot2::geom_point( - data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], + data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, size = point_size_select, - color = point_select_border_col, + color = point_select_border_col, stroke = point_select_border_stroke ) } @@ -624,39 +691,40 @@ NULL -#' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell +#' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell #' interactions according to spatial coordinates in plotly mode #' @keywords internal -.cellProximityVisPlot_2D_plotly <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - show_legend = TRUE, - point_size_select = 2, - point_size_other = 1, - point_alpha_other = 0.3, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - ...) { +.cellProximityVisPlot_2D_plotly <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + show_legend = TRUE, + point_size_select = 2, + point_size_other = 1, + point_alpha_other = 0.3, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + ...) { # data.table variables cell_ID <- unified_int <- NULL if (is.null(interaction_name)) { - stop("you need to specific at least one interaction name, run + stop("you need to specific at least one interaction name, run cellProximityEnrichment") } @@ -667,18 +735,21 @@ NULL spatial_network <- annotateSpatialNetwork( - gobject = gobject, - spatial_network_name = spatial_network_name, - cluster_column = cluster_column) + gobject = gobject, + spatial_network_name = spatial_network_name, + cluster_column = cluster_column + ) cell_IDs_to_keep <- unique(c( - spatial_network[unified_int %in% interaction_name]$to, - spatial_network[unified_int %in% interaction_name]$from)) + spatial_network[unified_int %in% interaction_name]$to, + spatial_network[unified_int %in% interaction_name]$from + )) if (show_other_cells) { CellType <- strsplit(interaction_name, "-") all_cell_IDs <- cell_metadata[cell_metadata[[ - cluster_column]] == CellType[[1]][1] | + cluster_column + ]] == CellType[[1]][1] | cell_metadata[[cluster_column]] == CellType[[1]][2], ]$cell_ID other_cell_IDs <- setdiff(all_cell_IDs, cell_IDs_to_keep) } @@ -688,7 +759,9 @@ NULL cell_locations_metadata <- cell_locations } else { cell_locations_metadata <- merge( - cell_locations, cell_metadata, by = "cell_ID") + cell_locations, cell_metadata, + by = "cell_ID" + ) } @@ -696,7 +769,7 @@ NULL # first 2 dimensions need to be defined if (is.null(sdimx) | is.null(sdimy)) { - message("first and second dimenion need to be defined, default is + message("first and second dimenion need to be defined, default is first 2") sdimx <- "sdimx" sdimy <- "sdimy" @@ -775,12 +848,15 @@ NULL if (cell_color %in% colnames(cell_locations_metadata)) { if (is.null(cell_color_code)) { number_colors <- length(unique(cell_locations_metadata[[ - cell_color]])) + cell_color + ]])) cell_color_code <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = number_colors) + instrs = instructions(gobject) + )(n = number_colors) } cell_locations_metadata[[cell_color]] <- as.factor( - cell_locations_metadata[[cell_color]]) + cell_locations_metadata[[cell_color]] + ) pl <- pl %>% plotly::add_trace( type = "scatter", mode = "markers", @@ -788,7 +864,8 @@ NULL data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, color = cell_locations_metadata[ - cell_ID %in% cell_IDs_to_keep][[cell_color]], + cell_ID %in% cell_IDs_to_keep + ][[cell_color]], colors = cell_color_code, marker = list(size = point_size_select) ) @@ -799,7 +876,8 @@ NULL data = cell_locations_metadata[cell_ID %in% other_cell_IDs], x = ~sdimx, y = ~sdimy, color = cell_locations_metadata[ - cell_ID %in% other_cell_IDs][[cell_color]], + cell_ID %in% other_cell_IDs + ][[cell_color]], colors = cell_color_code, opacity = point_alpha_other, marker = list(size = point_size_select * 0.7) @@ -811,9 +889,10 @@ NULL data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, marker = list( - size = point_size_other, - color = "lightgray", - colors = "lightgray"), + size = point_size_other, + color = "lightgray", + colors = "lightgray" + ), opacity = point_alpha_other ) } else { @@ -826,9 +905,10 @@ NULL data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, marker = list( - size = point_size_select, - color = "lightblue", - colors = "lightblue") + size = point_size_select, + color = "lightblue", + colors = "lightblue" + ) ) if (show_other_cells) { pl <- pl %>% plotly::add_trace( @@ -837,9 +917,10 @@ NULL x = ~sdimx, y = ~sdimy, name = "selected cells outside network", marker = list( - size = point_size_select * 0.7, - color = "lightblue", - colors = "lightblue"), + size = point_size_select * 0.7, + color = "lightblue", + colors = "lightblue" + ), opacity = point_alpha_other ) } @@ -849,9 +930,10 @@ NULL data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, marker = list( - size = point_size_other, - color = "lightgray", - colors = "lightgray"), + size = point_size_other, + color = "lightgray", + colors = "lightgray" + ), opacity = point_alpha_other ) } @@ -871,41 +953,42 @@ NULL } -#' @describeIn cellProximityVisPlot_internals Visualize 3D cell-cell +#' @describeIn cellProximityVisPlot_internals Visualize 3D cell-cell #' interactions according to spatial coordinates in plotly mode #' @keywords internal -.cellProximityVisPlot_3D_plotly <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - sdimz = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - show_legend = TRUE, - point_size_select = 2, - point_size_other = 1, - point_alpha_other = 0.5, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - ...) { +.cellProximityVisPlot_3D_plotly <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + sdimz = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + show_legend = TRUE, + point_size_select = 2, + point_size_other = 1, + point_alpha_other = 0.5, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + ...) { # data.table variables cell_ID <- unified_int <- NULL if (is.null(interaction_name)) { - stop("you need to specific at least one interaction name, + stop("you need to specific at least one interaction name, run cellProximityEnrichment") } @@ -916,18 +999,23 @@ NULL spatial_network <- annotateSpatialNetwork( - gobject = gobject, - spatial_network_name = spatial_network_name, - cluster_column = cluster_column) + gobject = gobject, + spatial_network_name = spatial_network_name, + cluster_column = cluster_column + ) - cell_IDs_to_keep <- unique(c(spatial_network[ - unified_int %in% interaction_name]$to, - spatial_network[unified_int %in% interaction_name]$from)) + cell_IDs_to_keep <- unique(c( + spatial_network[ + unified_int %in% interaction_name + ]$to, + spatial_network[unified_int %in% interaction_name]$from + )) if (show_other_cells) { CellType <- strsplit(interaction_name, "-") all_cell_IDs <- cell_metadata[cell_metadata[[ - cluster_column]] == CellType[[1]][1] | + cluster_column + ]] == CellType[[1]][1] | cell_metadata[[cluster_column]] == CellType[[1]][2], ]$cell_ID other_cell_IDs <- setdiff(all_cell_IDs, cell_IDs_to_keep) } @@ -937,7 +1025,9 @@ NULL cell_locations_metadata <- cell_locations } else { cell_locations_metadata <- merge( - cell_locations, cell_metadata, by = "cell_ID") + cell_locations, cell_metadata, + by = "cell_ID" + ) } @@ -945,7 +1035,7 @@ NULL # first 2 dimensions need to be defined if (is.null(sdimx) | is.null(sdimy)) { - message("first and second dimenion need to be defined, default is + message("first and second dimenion need to be defined, default is first 2") sdimx <- "sdimx" sdimy <- "sdimy" @@ -972,21 +1062,26 @@ NULL if (cell_color %in% colnames(cell_locations_metadata)) { if (is.null(cell_color_code)) { number_colors <- length(unique(cell_locations_metadata[[ - cell_color]])) + cell_color + ]])) cell_color_code <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = number_colors) + instrs = instructions(gobject) + )(n = number_colors) } cell_locations_metadata[[cell_color]] <- as.factor( - cell_locations_metadata[[cell_color]]) + cell_locations_metadata[[cell_color]] + ) pl <- pl %>% plotly::add_trace( type = "scatter3d", mode = "markers", data = cell_locations_metadata[ - cell_ID %in% cell_IDs_to_keep], + cell_ID %in% cell_IDs_to_keep + ], x = ~sdimx, y = ~sdimy, z = ~sdimz, color = cell_locations_metadata[ - cell_ID %in% cell_IDs_to_keep][[cell_color]], + cell_ID %in% cell_IDs_to_keep + ][[cell_color]], colors = cell_color_code, marker = list(size = point_size_select) ) %>% @@ -994,12 +1089,14 @@ NULL type = "scatter3d", mode = "markers", name = "unselected cells", data = cell_locations_metadata[ - !cell_ID %in% cell_IDs_to_keep], + !cell_ID %in% cell_IDs_to_keep + ], x = ~sdimx, y = ~sdimy, z = ~sdimz, marker = list( - size = point_size_other, - color = "lightgray", - colors = "lightgray"), + size = point_size_other, + color = "lightgray", + colors = "lightgray" + ), opacity = point_alpha_other ) if (show_other_cells) { @@ -1009,7 +1106,8 @@ NULL data = cell_locations_metadata[cell_ID %in% other_cell_IDs], x = ~sdimx, y = ~sdimy, z = ~sdimz, color = cell_locations_metadata[ - cell_ID %in% other_cell_IDs][[cell_color]], + cell_ID %in% other_cell_IDs + ][[cell_color]], colors = cell_color_code, opacity = point_alpha_other, marker = list(size = point_size_select * 0.7) @@ -1026,9 +1124,10 @@ NULL data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, z = ~sdimz, marker = list( - size = point_size_select, - color = "lightblue", - colors = "lightblue") + size = point_size_select, + color = "lightblue", + colors = "lightblue" + ) ) %>% plotly::add_trace( type = "scatter3d", mode = "markers", @@ -1036,9 +1135,10 @@ NULL data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], x = ~sdimx, y = ~sdimy, z = ~sdimz, marker = list( - size = point_size_other, - color = "lightgray", - colors = "lightgray"), + size = point_size_other, + color = "lightgray", + colors = "lightgray" + ), opacity = point_alpha_other ) if (show_other_cells) { @@ -1048,9 +1148,10 @@ NULL x = ~sdimx, y = ~sdimy, z = ~sdimz, name = "selected cells outside network", marker = list( - size = point_size_select * 0.7, - color = "lightblue", - colors = "lightblue"), + size = point_size_select * 0.7, + color = "lightblue", + colors = "lightblue" + ), opacity = point_alpha_other ) } @@ -1062,18 +1163,18 @@ NULL unselect_network <- spatial_network[!unified_int %in% interaction_name] select_network <- spatial_network[unified_int %in% interaction_name] pl <- pl %>% plotly::add_trace( - name = "sptial network", mode = "lines", + name = "sptial network", mode = "lines", type = "scatter3d", opacity = 0.5, data = plotly_network(select_network), - x = ~x, y = ~y, z = ~z, inherit = FALSE, + x = ~x, y = ~y, z = ~z, inherit = FALSE, line = list(color = network_color) ) if (show_other_network == TRUE) { pl <- pl %>% plotly::add_trace( - name = "unselected sptial network", mode = "lines", + name = "unselected sptial network", mode = "lines", type = "scatter3d", opacity = 0.1, data = plotly_network(unselect_network), - x = ~x, y = ~y, z = ~z, inherit = FALSE, + x = ~x, y = ~y, z = ~z, inherit = FALSE, line = list(color = "lightgray") ) } @@ -1095,7 +1196,7 @@ NULL #' @title cellProximityVisPlot #' @name cellProximityVisPlot -#' @description Visualize cell-cell interactions according to spatial +#' @description Visualize cell-cell interactions according to spatial #' coordinates #' @param gobject giotto object #' @param interaction_name cell-cell interaction name @@ -1136,43 +1237,46 @@ NULL #' g <- GiottoData::loadGiottoMini("visium") #' g <- createSpatialGrid(g, sdimx_stepsize = 5, sdimy_stepsize = 5) #' x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -#' -#' cellProximityVisPlot(gobject = g, interaction_name = x, -#' cluster_column = "leiden_clus", sdimx = "sdimx", sdimy = "sdimy") +#' +#' cellProximityVisPlot( +#' gobject = g, interaction_name = x, +#' cluster_column = "leiden_clus", sdimx = "sdimx", sdimy = "sdimy" +#' ) #' @export -cellProximityVisPlot <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - sdimz = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - coord_fix_ratio = 1, - show_legend = TRUE, - point_size_select = 2, - point_select_border_col = "black", - point_select_border_stroke = 0.05, - point_size_other = 1, - point_alpha_other = 0.3, - point_other_border_col = "lightgrey", - point_other_border_stroke = 0.01, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - plot_method = c("ggplot", "plotly"), - ...) { +cellProximityVisPlot <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + sdimz = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + coord_fix_ratio = 1, + show_legend = TRUE, + point_size_select = 2, + point_select_border_col = "black", + point_select_border_stroke = 0.05, + point_size_other = 1, + point_alpha_other = 0.3, + point_other_border_col = "lightgrey", + point_other_border_stroke = 0.01, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + plot_method = c("ggplot", "plotly"), + ...) { ## decide plot method plot_method <- match.arg(plot_method, choices = c("ggplot", "plotly")) axis_scale <- match.arg(axis_scale, c("cube", "real", "custom")) @@ -1180,7 +1284,7 @@ cellProximityVisPlot <- function(gobject, if (plot_method == "ggplot") { if (is.null(sdimx) | is.null(sdimy)) { - warning("plot_method = ggplot, but spatial dimensions for sdimx + warning("plot_method = ggplot, but spatial dimensions for sdimx and sdimy for 2D plotting are not given. \n It will default to the 'sdimx' and 'sdimy'") sdimx <- "sdimx" @@ -1188,7 +1292,7 @@ cellProximityVisPlot <- function(gobject, } if (length(c(sdimx, sdimy, sdimz)) == 3) { - warning("ggplot is not able to produce 3D plot! Please choose + warning("ggplot is not able to produce 3D plot! Please choose plotly method") } result <- .cellProximityVisPlot_2D_ggplot( @@ -1311,7 +1415,7 @@ cellProximityVisPlot <- function(gobject, #' @param min_cells minimum number of source cell type #' @param min_cells_expr minimum expression level for source cell type #' @param min_int_cells minimum number of interacting neighbor cell type -#' @param min_int_cells_expr minimum expression level for interacting neighbor +#' @param min_int_cells_expr minimum expression level for interacting neighbor #' cell type #' @param min_fdr minimum adjusted p-value #' @param min_spat_diff minimum absolute spatial expression difference @@ -1323,45 +1427,53 @@ cellProximityVisPlot <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") -#' -#' plotCellProximityFeats(gobject = g, icfObject = icfObject, -#' show_plot = TRUE, save_plot = FALSE, return_plot = FALSE) +#' +#' plotCellProximityFeats( +#' gobject = g, icfObject = icfObject, +#' show_plot = TRUE, save_plot = FALSE, return_plot = FALSE +#' ) #' @export -plotCellProximityFeats <- function(gobject, - icfObject, - method = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot"), - min_cells = 4, - min_cells_expr = 1, - min_int_cells = 4, - min_int_cells_expr = 1, - min_fdr = 0.1, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down"), - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCellProximityFeats") { +plotCellProximityFeats <- function( + gobject, + icfObject, + method = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ), + min_cells = 4, + min_cells_expr = 1, + min_int_cells = 4, + min_int_cells_expr = 1, + min_fdr = 0.1, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down"), + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCellProximityFeats") { if (!"icfObject" %in% class(icfObject)) { - stop("icfObject needs to be the output from + stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") } # print, return and save parameters show_plot <- ifelse( - is.na(show_plot), - readGiottoInstructions(gobject, param = "show_plot"), show_plot) + is.na(show_plot), + readGiottoInstructions(gobject, param = "show_plot"), show_plot + ) save_plot <- ifelse( - is.na(save_plot), - readGiottoInstructions(gobject, param = "save_plot"), save_plot) + is.na(save_plot), + readGiottoInstructions(gobject, param = "save_plot"), save_plot + ) return_plot <- ifelse( - is.na(return_plot), - readGiottoInstructions(gobject, param = "return_plot"), return_plot) + is.na(return_plot), + readGiottoInstructions(gobject, param = "return_plot"), return_plot + ) ## first filter @@ -1383,9 +1495,12 @@ plotCellProximityFeats <- function(gobject, ## other parameters method <- match.arg( - method, - choices = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot")) + method, + choices = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ) + ) # variables @@ -1396,14 +1511,19 @@ plotCellProximityFeats <- function(gobject, ## volcanoplot pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_point( - data = complete_part, - ggplot2::aes(x = log2fc, - y = ifelse(is.infinite(-log10(p.adj)), - 1000, -log10(p.adj)))) + data = complete_part, + ggplot2::aes( + x = log2fc, + y = ifelse(is.infinite(-log10(p.adj)), + 1000, -log10(p.adj) + ) + ) + ) pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_vline(xintercept = 0, linetype = 2) pl <- pl + ggplot2::labs( - x = "log2 fold-change", y = "-log10(p.adjusted)") + x = "log2 fold-change", y = "-log10(p.adjusted)" + ) ## print plot @@ -1414,9 +1534,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1431,11 +1554,14 @@ plotCellProximityFeats <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( - data = complete_part, - ggplot2::aes(x = unif_int, fill = unif_int)) + data = complete_part, + ggplot2::aes(x = unif_int, fill = unif_int) + ) pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.text.x = ggplot2::element_text( - angle = 90, hjust = 1, vjust = 1)) + angle = 90, hjust = 1, vjust = 1 + ) + ) pl <- pl + ggplot2::coord_flip() ## print plot @@ -1446,9 +1572,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1465,14 +1594,17 @@ plotCellProximityFeats <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_bar( data = complete_part, - ggplot2::aes(x = cell_type, fill = int_cell_type)) + ggplot2::aes(x = cell_type, fill = int_cell_type) + ) if (!is.null(cell_color_code)) { pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) } pl <- pl + ggplot2::theme_classic() + ggplot2::theme( - axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1) + ) pl <- pl + ggplot2::labs( - x = "", y = "# of features influenced by cell neighborhood") + x = "", y = "# of features influenced by cell neighborhood" + ) ## print plot @@ -1483,9 +1615,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1505,14 +1640,18 @@ plotCellProximityFeats <- function(gobject, ) + ggalluvial::geom_alluvium(aes(fill = cell_type), width = 1 / 12) + ggalluvial::geom_stratum( - width = 1 / 12, fill = "black", color = "grey") + + width = 1 / 12, fill = "black", color = "grey" + ) + ggplot2::scale_x_discrete( - limits = c("cell type", "neighbours"), expand = c(.05, .05)) + + limits = c("cell type", "neighbours"), expand = c(.05, .05) + ) + ggplot2::geom_label( - stat = "stratum", label.strata = TRUE, size = 3) + + stat = "stratum", label.strata = TRUE, size = 3 + ) + ggplot2::theme_classic() + ggplot2::labs( - x = "", y = "# of features influenced by cell neighborhood") + x = "", y = "# of features influenced by cell neighborhood" + ) if (!is.null(cell_color_code)) { pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) @@ -1528,9 +1667,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1539,23 +1681,30 @@ plotCellProximityFeats <- function(gobject, } } else if (method == "dotplot") { changed_feats <- complete_part[ - , .N, by = c("cell_type", "int_cell_type")] + , .N, + by = c("cell_type", "int_cell_type") + ] changed_feats[, cell_type := factor(cell_type, unique(cell_type))] changed_feats[, int_cell_type := factor( - int_cell_type, unique(int_cell_type))] + int_cell_type, unique(int_cell_type) + )] pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_point( - data = changed_feats, - ggplot2::aes(x = cell_type, y = int_cell_type, size = N)) + data = changed_feats, + ggplot2::aes(x = cell_type, y = int_cell_type, size = N) + ) pl <- pl + ggplot2::scale_size_continuous( - guide = guide_legend(title = "# of ICFs")) + guide = guide_legend(title = "# of ICFs") + ) pl <- pl + ggplot2::theme(axis.text.x = ggplot2::element_text( - angle = 90, vjust = 1, hjust = 1)) + angle = 90, vjust = 1, hjust = 1 + )) pl <- pl + ggplot2::labs( - x = "source cell type", y = "neighbor cell type") + x = "source cell type", y = "neighbor cell type" + ) ## print plot if (show_plot == TRUE) { @@ -1565,9 +1714,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1576,14 +1728,19 @@ plotCellProximityFeats <- function(gobject, } } else if (method == "heatmap") { changed_feats <- complete_part[ - , .N, by = c("cell_type", "int_cell_type")] + , .N, + by = c("cell_type", "int_cell_type") + ] changed_feats[, cell_type := factor(cell_type, unique(cell_type))] changed_feats[, int_cell_type := factor( - int_cell_type, unique(int_cell_type))] + int_cell_type, unique(int_cell_type) + )] changed_feats_d <- data.table::dcast.data.table( - changed_feats, cell_type ~ int_cell_type, value.var = "N", fill = 0) + changed_feats, cell_type ~ int_cell_type, + value.var = "N", fill = 0 + ) changed_feats_m <- dt_to_matrix(changed_feats_d) col_fun <- GiottoVisuals::colorRamp2( @@ -1593,7 +1750,7 @@ plotCellProximityFeats <- function(gobject, heatm <- ComplexHeatmap::Heatmap(log2(changed_feats_m + 1), col = col_fun, - row_title = "cell_type", + row_title = "cell_type", column_title = "int_cell_type", heatmap_legend_param = list(title = "log2(# DEGs)") ) @@ -1606,9 +1763,12 @@ plotCellProximityFeats <- function(gobject, ## save plot if (save_plot == TRUE) { do.call( - "all_plots_save_function", - c(list(gobject = gobject, plot_object = heatm, - default_save_name = default_save_name), save_param)) + "all_plots_save_function", + c(list( + gobject = gobject, plot_object = heatm, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -1641,7 +1801,7 @@ plotCellProximityFeats <- function(gobject, #' @param min_cells minimum number of source cell type #' @param min_cells_expr minimum expression level for source cell type #' @param min_int_cells minimum number of interacting neighbor cell type -#' @param min_int_cells_expr minimum expression level for interacting neighbor +#' @param min_int_cells_expr minimum expression level for interacting neighbor #' cell type #' @param min_fdr minimum adjusted p-value #' @param min_spat_diff minimum absolute spatial expression difference @@ -1652,32 +1812,39 @@ plotCellProximityFeats <- function(gobject, #' @returns plot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' -#' plotCPF(gobject = g, icfObject = icfObject, show_plot = TRUE, -#' save_plot = FALSE, return_plot = FALSE) +#' icfObject <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' +#' plotCPF( +#' gobject = g, icfObject = icfObject, show_plot = TRUE, +#' save_plot = FALSE, return_plot = FALSE +#' ) #' @export -plotCPF <- function(gobject, - icfObject, - method = c("volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot"), - min_cells = 5, - min_cells_expr = 1, - min_int_cells = 3, - min_int_cells_expr = 1, - min_fdr = 0.05, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down"), - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCPG") { +plotCPF <- function( + gobject, + icfObject, + method = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ), + min_cells = 5, + min_cells_expr = 1, + min_int_cells = 3, + min_int_cells_expr = 1, + min_fdr = 0.05, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down"), + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCPG") { plotCellProximityFeats( gobject = gobject, icfObject = icfObject, @@ -1716,30 +1883,35 @@ plotCPF <- function(gobject, #' @returns plot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' -#' plotInteractionChangedFeats(gobject = g, icfObject = icfObject, -#' source_type = "1", source_markers = "Ccnd2", -#' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17")) +#' icfObject <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' +#' plotInteractionChangedFeats( +#' gobject = g, icfObject = icfObject, +#' source_type = "1", source_markers = "Ccnd2", +#' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") +#' ) #' @export -plotInteractionChangedFeats <- function(gobject, - icfObject, - source_type, - source_markers, - ICF_feats, - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotInteractionChangedFeats") { +plotInteractionChangedFeats <- function( + gobject, + icfObject, + source_type, + source_markers, + ICF_feats, + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotInteractionChangedFeats") { # data.table variables cell_type <- int_cell_type <- log2fc <- NULL if (!"icfObject" %in% class(icfObject)) { - stop("icfObject needs to be the output from + stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") } @@ -1756,23 +1928,28 @@ plotInteractionChangedFeats <- function(gobject, if (length(not_detected_feats) > 0) { cat( "These selected features are not in the icfObject: \n", - not_detected_feats) + not_detected_feats + ) } # data.table set column names feats <- group <- NULL tempDT <- ICFscores[feats %in% all_feats][cell_type == source_type][ - int_cell_type %in% neighbor_types] + int_cell_type %in% neighbor_types + ] tempDT[, feats := factor(feats, levels = detected_feats)] - tempDT[, group := names(ICF_feats[ICF_feats == feats]), - by = seq_len(nrow(tempDT))] + tempDT[, group := names(ICF_feats[ICF_feats == feats]), + by = seq_len(nrow(tempDT)) + ] if (is.null(cell_color_code)) { mycolors <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = length(unique( - tempDT$int_cell_type))) + instrs = instructions(gobject) + )(n = length(unique( + tempDT$int_cell_type + ))) names(mycolors) <- unique(tempDT$int_cell_type) } else { mycolors <- cell_color_code @@ -1782,17 +1959,20 @@ plotInteractionChangedFeats <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.text.x = ggplot2::element_text( - size = 14, angle = 45, vjust = 1, hjust = 1), + size = 14, angle = 45, vjust = 1, hjust = 1 + ), axis.text.y = ggplot2::element_text(size = 14), axis.title = ggplot2::element_text(size = 14) ) pl <- pl + ggplot2::geom_bar( - data = tempDT, - ggplot2::aes(x = feats, y = log2fc, fill = int_cell_type), - stat = "identity", position = ggplot2::position_dodge()) + data = tempDT, + ggplot2::aes(x = feats, y = log2fc, fill = int_cell_type), + stat = "identity", position = ggplot2::position_dodge() + ) pl <- pl + ggplot2::scale_fill_manual(values = mycolors) pl <- pl + ggplot2::labs(x = "", title = paste0( - "fold-change z-scores in ", source_type)) + "fold-change z-scores in ", source_type + )) # output plot return(GiottoVisuals::plot_output_handler( @@ -1826,24 +2006,29 @@ plotInteractionChangedFeats <- function(gobject, #' @returns plot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' -#' plotICF(gobject = g, icfObject = icfObject, -#' source_type = "1", source_markers = "Ccnd2", -#' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17")) +#' icfObject <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' +#' plotICF( +#' gobject = g, icfObject = icfObject, +#' source_type = "1", source_markers = "Ccnd2", +#' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") +#' ) #' @export -plotICF <- function(gobject, - icfObject, - source_type, - source_markers, - ICF_feats, - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotICF") { +plotICF <- function( + gobject, + icfObject, + source_type, + source_markers, + ICF_feats, + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotICF") { plotInteractionChangedFeats( gobject = gobject, icfObject = icfObject, @@ -1884,58 +2069,64 @@ plotICF <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' g_icf <- findInteractionChangedFeats(g, -#' cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' +#' +#' g_icf <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' #' combIcfObject <- combineInteractionChangedFeats(g_icf) -#' -#' plotCombineInteractionChangedFeats(gobject = g, -#' combIcfObject = combIcfObject, -#' selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), -#' selected_interactions = "1--8") +#' +#' plotCombineInteractionChangedFeats( +#' gobject = g, +#' combIcfObject = combIcfObject, +#' selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), +#' selected_interactions = "1--8" +#' ) #' @export -plotCombineInteractionChangedFeats <- function(gobject, - combIcfObject, - selected_interactions = NULL, - selected_feat_to_feat = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "feats"), - facet_scales = "fixed", - facet_ncol = length(selected_feat_to_feat), - facet_nrow = length(selected_interactions), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineICF") { +plotCombineInteractionChangedFeats <- function( + gobject, + combIcfObject, + selected_interactions = NULL, + selected_feat_to_feat = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "feats"), + facet_scales = "fixed", + facet_ncol = length(selected_feat_to_feat), + facet_nrow = length(selected_interactions), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineICF") { ## check validity if (!"combIcfObject" %in% class(combIcfObject)) { - stop("combIcfObject needs to be the output from + stop("combIcfObject needs to be the output from combineInteractionChangedFeats() or combineICF()") } combIcfscore <- copy(combIcfObject[["combICFscores"]]) if (is.null(selected_interactions) | is.null(selected_feat_to_feat)) { - stop("You need to provide a selection of cell-cell interactions and + stop("You need to provide a selection of cell-cell interactions and features-features to plot") } # data.table variables - unif_feat_feat <- unif_int <- other_2 <- sel_2 <- other_1 <- sel_1 <- + unif_feat_feat <- unif_int <- other_2 <- sel_2 <- other_1 <- sel_1 <- cols <- NULL - subDT <- combIcfscore[unif_feat_feat %in% selected_feat_to_feat & - unif_int %in% selected_interactions] + subDT <- combIcfscore[unif_feat_feat %in% selected_feat_to_feat & + unif_int %in% selected_interactions] # order interactions and feat-to-feat according to input subDT[, unif_feat_feat := factor( - unif_feat_feat, levels = selected_feat_to_feat)] + unif_feat_feat, + levels = selected_feat_to_feat + )] subDT[, unif_int := factor(unif_int, levels = selected_interactions)] if (simple_plot == FALSE) { @@ -1944,31 +2135,37 @@ plotCombineInteractionChangedFeats <- function(gobject, if (detail_plot == TRUE) { pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = 0, y = other_2, colour = "other cell expression"), - shape = 1) + data = subDT, + aes(x = 0, y = other_2, colour = "other cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = 0, y = sel_2, colour = "selected cell expression"), - shape = 1) + data = subDT, + aes(x = 0, y = sel_2, colour = "selected cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = other_1, y = 0, colour = "other cell expression"), - shape = 1) + data = subDT, + aes(x = other_1, y = 0, colour = "other cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sel_1, y = 0, colour = "selected cell expression"), - shape = 1) + data = subDT, + aes(x = sel_1, y = 0, colour = "selected cell expression"), + shape = 1 + ) } pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = other_1, y = other_2, colour = "other cell expression"), - size = 2) + data = subDT, + aes(x = other_1, y = other_2, colour = "other cell expression"), + size = 2 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sel_1, y = sel_2, colour = "selected cell expression"), - size = 2) + data = subDT, + aes(x = sel_1, y = sel_2, colour = "selected cell expression"), + size = 2 + ) pl <- pl + ggplot2::geom_segment(data = subDT, aes( x = other_1, xend = sel_1, y = other_2, yend = sel_2 @@ -1978,14 +2175,16 @@ plotCombineInteractionChangedFeats <- function(gobject, y = paste(subDT$feats_2, subDT$cell_type_2, sep = " in ") ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = colors) + name = "expression source", values = colors + ) pl <- pl + ggplot2::facet_wrap(~ unif_feat_feat + unif_int, nrow = facet_nrow, ncol = facet_ncol, scales = facet_scales ) } else { simple_plot_facet <- match.arg( - arg = simple_plot_facet, choices = c("interaction", "feats")) + arg = simple_plot_facet, choices = c("interaction", "feats") + ) if (simple_plot_facet == "interaction") { pl <- ggplot2::ggplot() @@ -1995,15 +2194,22 @@ plotCombineInteractionChangedFeats <- function(gobject, y = unif_feat_feat, yend = unif_feat_feat ), linetype = 2) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sum(c(other_1, other_2)), y = unif_feat_feat, - colour = "other cell expression")) + data = subDT, + aes( + x = sum(c(other_1, other_2)), y = unif_feat_feat, + colour = "other cell expression" + ) + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sum(c(sel_1, sel_2)), y = unif_feat_feat, - colour = "selected cell expression")) + data = subDT, + aes( + x = sum(c(sel_1, sel_2)), y = unif_feat_feat, + colour = "selected cell expression" + ) + ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = cols) + name = "expression source", values = cols + ) pl <- pl + ggplot2::facet_wrap(~unif_int, scales = facet_scales) pl <- pl + ggplot2::labs(x = "interactions", y = "feat-feat") } else { @@ -2014,17 +2220,26 @@ plotCombineInteractionChangedFeats <- function(gobject, y = unif_int, yend = unif_int ), linetype = 2) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sum(c(other_1, other_2)), y = unif_int, - colour = "other cell expression")) + data = subDT, + aes( + x = sum(c(other_1, other_2)), y = unif_int, + colour = "other cell expression" + ) + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = sum(c(sel_1, sel_2)), y = unif_int, - colour = "selected cell expression")) + data = subDT, + aes( + x = sum(c(sel_1, sel_2)), y = unif_int, + colour = "selected cell expression" + ) + ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = cols) + name = "expression source", values = cols + ) pl <- pl + ggplot2::facet_wrap( - ~unif_feat_feat, scales = facet_scales) + ~unif_feat_feat, + scales = facet_scales + ) pl <- pl + ggplot2::labs(x = "feat-feat", y = "interactions") } } @@ -2066,33 +2281,37 @@ plotCombineInteractionChangedFeats <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' g_icf <- findInteractionChangedFeats(g, -#' cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' +#' +#' g_icf <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +#' ) +#' #' combIcfObject <- combineInteractionChangedFeats(g_icf) -#' -#' plotCombineICF(gobject = g, combIcfObject = combIcfObject, -#' selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), -#' selected_interactions = "1--8") +#' +#' plotCombineICF( +#' gobject = g, combIcfObject = combIcfObject, +#' selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), +#' selected_interactions = "1--8" +#' ) #' @export -plotCombineICF <- function(gobject, - combIcfObject, - selected_interactions = NULL, - selected_feat_to_feat = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "feats"), - facet_scales = "fixed", - facet_ncol = length(selected_feat_to_feat), - facet_nrow = length(selected_interactions), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineICF") { +plotCombineICF <- function( + gobject, + combIcfObject, + selected_interactions = NULL, + selected_feat_to_feat = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "feats"), + facet_scales = "fixed", + facet_ncol = length(selected_feat_to_feat), + facet_nrow = length(selected_interactions), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineICF") { plotCombineInteractionChangedFeats( gobject = gobject, combIcfObject = combIcfObject, @@ -2129,13 +2348,13 @@ plotCombineICF <- function(gobject, #' @title plotCombineCellCellCommunication #' @name plotCombineCellCellCommunication -#' @description Create visualization for combined (pairwise) cell proximity +#' @description Create visualization for combined (pairwise) cell proximity #' gene scores #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @param combCCcom combined communcation scores, output from combCCcom() #' @param selected_LR selected ligand-receptor pair -#' @param selected_cell_LR selected cell-cell interaction pair for +#' @param selected_cell_LR selected cell-cell interaction pair for #' ligand-receptor pair #' @param detail_plot show detailed info in both interacting cell types #' @param simple_plot show a simplified plot @@ -2147,49 +2366,59 @@ plotCombineICF <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) -#' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot") -#' +#' +#' comScores <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), +#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +#' ) +#' +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) +#' spatialCC <- spatCellCellcom( +#' gobject = g, cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot" +#' ) +#' #' combCCcom <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -#' -#' plotCombineCellCellCommunication(gobject = g, combCCcom = combCCcom, -#' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1")) +#' +#' plotCombineCellCellCommunication( +#' gobject = g, combCCcom = combCCcom, +#' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1") +#' ) #' @export -plotCombineCellCellCommunication <- function(gobject, - combCCcom, - selected_LR = NULL, - selected_cell_LR = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "genes"), - facet_scales = "fixed", - facet_ncol = length(selected_LR), - facet_nrow = length(selected_cell_LR), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineCellCellCommunication") { +plotCombineCellCellCommunication <- function( + gobject, + combCCcom, + selected_LR = NULL, + selected_cell_LR = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "genes"), + facet_scales = "fixed", + facet_ncol = length(selected_LR), + facet_nrow = length(selected_cell_LR), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineCellCellCommunication") { # data.table variables - LR_comb <- LR_cell_comb <- lig_expr <- lig_expr_spat <- rec_expr <- + LR_comb <- LR_cell_comb <- lig_expr <- lig_expr_spat <- rec_expr <- rec_expr_spat <- LR_expr <- LR_expr_spat <- NULL ## check validity if (is.null(selected_cell_LR) | is.null(selected_LR)) { - stop("You need to provide a selection of cell-cell interactions + stop("You need to provide a selection of cell-cell interactions and genes-genes to plot") } subDT <- combCCcom[ - LR_comb %in% selected_LR & LR_cell_comb %in% selected_cell_LR] + LR_comb %in% selected_LR & LR_cell_comb %in% selected_cell_LR + ] # order interactions and gene-to-gene according to input subDT[, LR_comb := factor(LR_comb, levels = selected_LR)] @@ -2201,31 +2430,43 @@ plotCombineCellCellCommunication <- function(gobject, if (detail_plot == TRUE) { pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = 0, y = lig_expr, colour = "overall cell expression"), - shape = 1) + data = subDT, + aes(x = 0, y = lig_expr, colour = "overall cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = 0, y = lig_expr_spat, - colour = "spatial cell expression"), shape = 1) + data = subDT, + aes( + x = 0, y = lig_expr_spat, + colour = "spatial cell expression" + ), shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = rec_expr, y = 0, colour = "overall cell expression"), - shape = 1) + data = subDT, + aes(x = rec_expr, y = 0, colour = "overall cell expression"), + shape = 1 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = rec_expr_spat, y = 0, - colour = "spatial cell expression"), shape = 1) + data = subDT, + aes( + x = rec_expr_spat, y = 0, + colour = "spatial cell expression" + ), shape = 1 + ) } pl <- pl + ggplot2::geom_point( - data = subDT, + data = subDT, aes(x = rec_expr, y = lig_expr, colour = "overall cell expression"), - size = 2) + size = 2 + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = rec_expr_spat, y = lig_expr_spat, - colour = "spatial cell expression"), size = 2) + data = subDT, + aes( + x = rec_expr_spat, y = lig_expr_spat, + colour = "spatial cell expression" + ), size = 2 + ) pl <- pl + ggplot2::geom_segment(data = subDT, aes( x = rec_expr, xend = rec_expr_spat, y = lig_expr, yend = lig_expr_spat @@ -2235,14 +2476,16 @@ plotCombineCellCellCommunication <- function(gobject, y = paste(subDT$ligand, subDT$lig_cell_type, sep = " in ") ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = colors) + name = "expression source", values = colors + ) pl <- pl + ggplot2::facet_wrap(~ LR_comb + LR_cell_comb, nrow = facet_nrow, ncol = facet_ncol, scales = facet_scales ) } else { simple_plot_facet <- match.arg( - arg = simple_plot_facet, choices = c("interaction", "genes")) + arg = simple_plot_facet, choices = c("interaction", "genes") + ) if (simple_plot_facet == "interaction") { pl <- ggplot2::ggplot() @@ -2252,15 +2495,22 @@ plotCombineCellCellCommunication <- function(gobject, y = LR_comb, yend = LR_comb ), linetype = 2) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = LR_expr, y = LR_comb, - colour = "overall cell expression")) + data = subDT, + aes( + x = LR_expr, y = LR_comb, + colour = "overall cell expression" + ) + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = LR_expr_spat, y = LR_comb, - colour = "spatial cell expression")) + data = subDT, + aes( + x = LR_expr_spat, y = LR_comb, + colour = "spatial cell expression" + ) + ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = colors) + name = "expression source", values = colors + ) pl <- pl + ggplot2::facet_wrap(~LR_cell_comb, scales = "fixed") pl <- pl + ggplot2::labs(x = "interactions", y = "gene-gene") pl @@ -2272,15 +2522,22 @@ plotCombineCellCellCommunication <- function(gobject, y = LR_cell_comb, yend = LR_cell_comb ), linetype = 2) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = LR_expr, y = LR_cell_comb, - colour = "overall cell expression")) + data = subDT, + aes( + x = LR_expr, y = LR_cell_comb, + colour = "overall cell expression" + ) + ) pl <- pl + ggplot2::geom_point( - data = subDT, - aes(x = LR_expr_spat, y = LR_cell_comb, - colour = "spatial cell expression")) + data = subDT, + aes( + x = LR_expr_spat, y = LR_cell_comb, + colour = "spatial cell expression" + ) + ) pl <- pl + ggplot2::scale_colour_manual( - name = "expression source", values = colors) + name = "expression source", values = colors + ) pl <- pl + ggplot2::facet_wrap(~LR_comb, scales = facet_scales) pl <- pl + ggplot2::labs(x = "gene-gene", y = "interactions") } @@ -2303,13 +2560,13 @@ plotCombineCellCellCommunication <- function(gobject, #' @title plotCombineCCcom #' @name plotCombineCCcom -#' @description Create visualization for combined (pairwise) cell proximity +#' @description Create visualization for combined (pairwise) cell proximity #' gene scores #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @param combCCcom combined communcation scores, output from combCCcom() #' @param selected_LR selected ligand-receptor pair -#' @param selected_cell_LR selected cell-cell interaction pair for +#' @param selected_cell_LR selected cell-cell interaction pair for #' ligand-receptor pair #' @param detail_plot show detailed info in both interacting cell types #' @param simple_plot show a simplified plot @@ -2321,37 +2578,46 @@ plotCombineCellCellCommunication <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) -#' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot") -#' +#' +#' comScores <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), +#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +#' ) +#' +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) +#' spatialCC <- spatCellCellcom( +#' gobject = g, cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot" +#' ) +#' #' combCCcom <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -#' -#' plotCombineCCcom(gobject = g, combCCcom = combCCcom, -#' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1")) +#' +#' plotCombineCCcom( +#' gobject = g, combCCcom = combCCcom, +#' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1") +#' ) #' @export -plotCombineCCcom <- function(gobject, - combCCcom, - selected_LR = NULL, - selected_cell_LR = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "genes"), - facet_scales = "fixed", - facet_ncol = length(selected_LR), - facet_nrow = length(selected_cell_LR), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineCCcom") { +plotCombineCCcom <- function( + gobject, + combCCcom, + selected_LR = NULL, + selected_cell_LR = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "genes"), + facet_scales = "fixed", + facet_ncol = length(selected_LR), + facet_nrow = length(selected_cell_LR), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineCCcom") { plotCombineCellCellCommunication( gobject = gobject, combCCcom = combCCcom, @@ -2376,15 +2642,15 @@ plotCombineCCcom <- function(gobject, #' @title plotCCcomHeatmap #' @name plotCCcomHeatmap -#' @description Plots heatmap for ligand-receptor communication scores in +#' @description Plots heatmap for ligand-receptor communication scores in #' cell-cell interactions #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @inheritParams plot_params -#' @param comScores communinication scores from \code{\link{exprCellCellcom}} +#' @param comScores communinication scores from \code{\link{exprCellCellcom}} #' or \code{\link{spatCellCellcom}} #' @param selected_LR selected ligand-receptor combinations -#' @param selected_cell_LR selected cell-cell combinations for ligand-receptor +#' @param selected_cell_LR selected cell-cell combinations for ligand-receptor #' combinations #' @param show_LR_names show ligand-receptor names #' @param show_cell_LR_names show cell-cell names @@ -2394,33 +2660,40 @@ plotCombineCCcom <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) -#' +#' +#' comScores <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), +#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +#' ) +#' #' plotCCcomHeatmap(gobject = g, comScores = comScores, show_plot = TRUE) #' @export -plotCCcomHeatmap <- function(gobject, - comScores, - selected_LR = NULL, - selected_cell_LR = NULL, - show_LR_names = TRUE, - show_cell_LR_names = TRUE, - show = c("PI", "LR_expr", "log2fc"), - cor_method = c("pearson", "kendall", "spearman"), - aggl_method = c("ward.D", "ward.D2", "single", "complete", "average", - "mcquitty", "median", "centroid"), - gradient_color = NULL, - gradient_style = c("divergent", "sequential"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCCcomHeatmap") { +plotCCcomHeatmap <- function( + gobject, + comScores, + selected_LR = NULL, + selected_cell_LR = NULL, + show_LR_names = TRUE, + show_cell_LR_names = TRUE, + show = c("PI", "LR_expr", "log2fc"), + cor_method = c("pearson", "kendall", "spearman"), + aggl_method = c( + "ward.D", "ward.D2", "single", "complete", "average", + "mcquitty", "median", "centroid" + ), + gradient_color = NULL, + gradient_style = c("divergent", "sequential"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCCcomHeatmap") { # get parameters cor_method <- match.arg( - cor_method, choices = c("pearson", "kendall", "spearman")) + cor_method, + choices = c("pearson", "kendall", "spearman") + ) aggl_method <- match.arg(aggl_method, choices = c( "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid" @@ -2432,8 +2705,8 @@ plotCCcomHeatmap <- function(gobject, # plot method if (!is.null(selected_LR) & !is.null(selected_cell_LR)) { - selDT <- comScores[LR_comb %in% selected_LR & LR_cell_comb %in% - selected_cell_LR] + selDT <- comScores[LR_comb %in% selected_LR & LR_cell_comb %in% + selected_cell_LR] } else if (!is.null(selected_LR)) { selDT <- comScores[LR_comb %in% selected_LR] } else if (!is.null(selected_cell_LR)) { @@ -2445,14 +2718,18 @@ plotCCcomHeatmap <- function(gobject, # creat matrix show <- match.arg(show, choices = c("PI", "LR_expr", "log2fc")) selDT_d <- data.table::dcast.data.table( - selDT, LR_cell_comb ~ LR_comb, value.var = show, fill = 0) + selDT, LR_cell_comb ~ LR_comb, + value.var = show, fill = 0 + ) selDT_m <- dt_to_matrix(selDT_d) ## cells corclus_cells_dist <- stats::as.dist( - 1 - cor_flex(x = t_flex(selDT_m), method = cor_method)) + 1 - cor_flex(x = t_flex(selDT_m), method = cor_method) + ) hclusters_cells <- stats::hclust( - d = corclus_cells_dist, method = aggl_method) + d = corclus_cells_dist, method = aggl_method + ) clus_names <- rownames(selDT_m) names(clus_names) <- seq_along(clus_names) clus_sort_names <- clus_names[hclusters_cells$order] @@ -2460,9 +2737,11 @@ plotCCcomHeatmap <- function(gobject, ## genes corclus_genes_dist <- stats::as.dist( - 1 - cor_flex(x = selDT_m, method = cor_method)) + 1 - cor_flex(x = selDT_m, method = cor_method) + ) hclusters_genes <- stats::hclust( - d = corclus_genes_dist, method = aggl_method) + d = corclus_genes_dist, method = aggl_method + ) clus_names <- colnames(selDT_m) names(clus_names) <- seq_along(clus_names) clus_sort_names <- clus_names[hclusters_genes$order] @@ -2516,19 +2795,19 @@ plotCCcomHeatmap <- function(gobject, #' @title plotCCcomDotplot #' @name plotCCcomDotplot -#' @description Plots dotplot for ligand-receptor communication scores in +#' @description Plots dotplot for ligand-receptor communication scores in #' cell-cell interactions #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @inheritParams plot_params -#' @param comScores communication scores from \code{\link{exprCellCellcom}} +#' @param comScores communication scores from \code{\link{exprCellCellcom}} #' or \code{\link{spatCellCellcom}} #' @param selected_LR selected ligand-receptor combinations -#' @param selected_cell_LR selected cell-cell combinations for ligand-receptor +#' @param selected_cell_LR selected cell-cell combinations for ligand-receptor #' combinations #' @param show_LR_names show ligand-receptor names #' @param show_cell_LR_names show cell-cell names -#' @param cluster_on values to use for clustering of cell-cell and +#' @param cluster_on values to use for clustering of cell-cell and #' ligand-receptor pairs #' @param cor_method correlation method used for clustering #' @param aggl_method agglomeration method used by hclust @@ -2537,33 +2816,40 @@ plotCCcomHeatmap <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) -#' +#' +#' comScores <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), +#' feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +#' ) +#' #' plotCCcomDotplot(gobject = g, comScores = comScores, show_plot = TRUE) #' @export -plotCCcomDotplot <- function(gobject, - comScores, - selected_LR = NULL, - selected_cell_LR = NULL, - show_LR_names = TRUE, - show_cell_LR_names = TRUE, - cluster_on = c("PI", "LR_expr", "log2fc"), - cor_method = c("pearson", "kendall", "spearman"), - aggl_method = c("ward.D", "ward.D2", "single", "complete", "average", - "mcquitty", "median", "centroid"), - dot_color_gradient = NULL, - gradient_style = c("divergent", "sequential"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCCcomDotplot") { +plotCCcomDotplot <- function( + gobject, + comScores, + selected_LR = NULL, + selected_cell_LR = NULL, + show_LR_names = TRUE, + show_cell_LR_names = TRUE, + cluster_on = c("PI", "LR_expr", "log2fc"), + cor_method = c("pearson", "kendall", "spearman"), + aggl_method = c( + "ward.D", "ward.D2", "single", "complete", "average", + "mcquitty", "median", "centroid" + ), + dot_color_gradient = NULL, + gradient_style = c("divergent", "sequential"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCCcomDotplot") { # get parameters cor_method <- match.arg( - cor_method, choices = c("pearson", "kendall", "spearman")) + cor_method, + choices = c("pearson", "kendall", "spearman") + ) aggl_method <- match.arg(aggl_method, choices = c( "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid" @@ -2575,8 +2861,8 @@ plotCCcomDotplot <- function(gobject, # plot method if (!is.null(selected_LR) & !is.null(selected_cell_LR)) { - selDT <- comScores[LR_comb %in% selected_LR & LR_cell_comb %in% - selected_cell_LR] + selDT <- comScores[LR_comb %in% selected_LR & LR_cell_comb %in% + selected_cell_LR] } else if (!is.null(selected_LR)) { selDT <- comScores[LR_comb %in% selected_LR] } else if (!is.null(selected_cell_LR)) { @@ -2588,27 +2874,37 @@ plotCCcomDotplot <- function(gobject, # creat matrix cluster_on <- match.arg(cluster_on, choices = c("PI", "LR_expr", "log2fc")) selDT_d <- data.table::dcast.data.table( - selDT, LR_cell_comb ~ LR_comb, value.var = cluster_on, fill = 0) + selDT, LR_cell_comb ~ LR_comb, + value.var = cluster_on, fill = 0 + ) selDT_m <- dt_to_matrix(selDT_d) # remove zero variance sd_rows <- apply(selDT_m, 1, sd) sd_rows_zero <- names(sd_rows[sd_rows == 0]) - if (length(sd_rows_zero) > 0) selDT_m <- selDT_m[ - !rownames(selDT_m) %in% sd_rows_zero, ] + if (length(sd_rows_zero) > 0) { + selDT_m <- selDT_m[ + !rownames(selDT_m) %in% sd_rows_zero, + ] + } sd_cols <- apply(selDT_m, 2, sd) sd_cols_zero <- names(sd_cols[sd_cols == 0]) - if (length(sd_cols_zero) > 0) selDT_m <- selDT_m[ - , !colnames(selDT_m) %in% sd_cols_zero] + if (length(sd_cols_zero) > 0) { + selDT_m <- selDT_m[ + , !colnames(selDT_m) %in% sd_cols_zero + ] + } ## cells corclus_cells_dist <- stats::as.dist( - 1 - cor_flex(x = t_flex(selDT_m), method = cor_method)) + 1 - cor_flex(x = t_flex(selDT_m), method = cor_method) + ) hclusters_cells <- stats::hclust( - d = corclus_cells_dist, method = aggl_method) + d = corclus_cells_dist, method = aggl_method + ) clus_names <- rownames(selDT_m) names(clus_names) <- seq_along(clus_names) clus_sort_names <- clus_names[hclusters_cells$order] @@ -2616,9 +2912,11 @@ plotCCcomDotplot <- function(gobject, ## genes corclus_genes_dist <- stats::as.dist( - 1 - cor_flex(x = selDT_m, method = cor_method)) + 1 - cor_flex(x = selDT_m, method = cor_method) + ) hclusters_genes <- stats::hclust( - d = corclus_genes_dist, method = aggl_method) + d = corclus_genes_dist, method = aggl_method + ) clus_names <- colnames(selDT_m) names(clus_names) <- seq_along(clus_names) clus_sort_names <- clus_names[hclusters_genes$order] @@ -2678,7 +2976,7 @@ plotCCcomDotplot <- function(gobject, #' @title plotRankSpatvsExpr #' @name plotRankSpatvsExpr -#' @description Plots dotplot to compare ligand-receptor rankings from +#' @description Plots dotplot to compare ligand-receptor rankings from #' spatial and expression information #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -2694,39 +2992,44 @@ plotCCcomDotplot <- function(gobject, #' @param size_range size ranges of dotplot #' @param xlims x-limits, numerical vector of 2 #' @param ylims y-limits, numerical vector of 2 -#' @param selected_ranks numerical vector, will be used to print out the +#' @param selected_ranks numerical vector, will be used to print out the #' percentage of top spatial ranks are recovered #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -#' random_iter = 10) -#' +#' +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) +#' spatialCC <- spatCellCellcom( +#' gobject = g, cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", +#' random_iter = 10 +#' ) +#' #' combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -#' +#' #' plotRankSpatvsExpr(gobject = g, combCC = combCC) #' @export -plotRankSpatvsExpr <- function(gobject, - combCC, - expr_rnk_column = "LR_expr_rnk", - spat_rnk_column = "LR_spat_rnk", - dot_color_gradient = NULL, - midpoint = deprecated(), - gradient_midpoint = 10, - gradient_style = c("divergent", "sequential"), - size_range = c(0.01, 1.5), - xlims = NULL, - ylims = NULL, - selected_ranks = c(1, 10, 20), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotRankSpatvsExpr") { +plotRankSpatvsExpr <- function( + gobject, + combCC, + expr_rnk_column = "LR_expr_rnk", + spat_rnk_column = "LR_spat_rnk", + dot_color_gradient = NULL, + midpoint = deprecated(), + gradient_midpoint = 10, + gradient_style = c("divergent", "sequential"), + size_range = c(0.01, 1.5), + xlims = NULL, + ylims = NULL, + selected_ranks = c(1, 10, 20), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotRankSpatvsExpr") { # deprecate if (GiottoUtils::is_present(midpoint)) { deprecate_warn( @@ -2746,9 +3049,13 @@ plotRankSpatvsExpr <- function(gobject, rnk_list <- list() spt_list <- list() for (rnk in seq_len(total_rnks)) { - mytab <- table(cut(sort(combCC[get(expr_rnk_column) == rnk][[ - spat_rnk_column]]), breaks = seq(0, total_rnks, 1), - labels = seq_len(total_rnks))) + mytab <- table(cut( + sort(combCC[get(expr_rnk_column) == rnk][[ + spat_rnk_column + ]]), + breaks = seq(0, total_rnks, 1), + labels = seq_len(total_rnks) + )) rnk_list[[rnk]] <- mytab spt_list[[rnk]] <- names(mytab) } @@ -2763,20 +3070,24 @@ plotRankSpatvsExpr <- function(gobject, rnk_res_m[, diff := variable - spt_rank] for (i in selected_ranks) { - perc_recovered <- 100 * (sum(rnk_res_m[abs(diff) < i]$value) / - sum(rnk_res_m$value)) - cat("for top ", i, " expression ranks, you recover ", - round(perc_recovered, 2), "% of the highest spatial rank") + perc_recovered <- 100 * (sum(rnk_res_m[abs(diff) < i]$value) / + sum(rnk_res_m$value)) + cat( + "for top ", i, " expression ranks, you recover ", + round(perc_recovered, 2), "% of the highest spatial rank" + ) } # full plot pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( - axis.text = element_blank()) + axis.text = element_blank() + ) pl <- pl + ggplot2::geom_point( - data = rnk_res_m, - ggplot2::aes(x = variable, y = spt_rank, size = value, color = value)) + data = rnk_res_m, + ggplot2::aes(x = variable, y = spt_rank, size = value, color = value) + ) pl <- pl + set_default_color_continuous_CCcom_dotplot( colors = dot_color_gradient, instrs = instructions(gobject), @@ -2786,7 +3097,8 @@ plotRankSpatvsExpr <- function(gobject, guide = guide_legend(title = "") ) pl <- pl + ggplot2::scale_size_continuous( - range = size_range, guide = "none") + range = size_range, guide = "none" + ) pl <- pl + ggplot2::labs(x = "expression rank", y = "spatial rank") if (!is.null(xlims)) { @@ -2814,16 +3126,17 @@ plotRankSpatvsExpr <- function(gobject, #' @title Create recovery plot #' @name .plotRecovery_sub -#' @description Plots recovery plot to compare ligand-receptor rankings from +#' @description Plots recovery plot to compare ligand-receptor rankings from #' spatial and expression information #' @param combCC combined communinication scores from \code{\link{combCCcom}} #' @param first_col first column to use #' @param second_col second column to use #' @returns ggplot #' @keywords internal -.plotRecovery_sub <- function(combCC, - first_col = "LR_expr_rnk", - second_col = "LR_spat_rnk") { +.plotRecovery_sub <- function( + combCC, + first_col = "LR_expr_rnk", + second_col = "LR_spat_rnk") { # data.table variables concord <- perc <- not_concord <- secondrank <- secondrank_perc <- NULL @@ -2857,8 +3170,9 @@ plotRankSpatvsExpr <- function(gobject, pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() pl <- pl + ggplot2::geom_point( - data = mymatDT, - aes(x = secondrank_perc, y = perc)) + data = mymatDT, + aes(x = secondrank_perc, y = perc) + ) pl <- pl + ggplot2::scale_x_continuous(expand = c(0, 0), limits = c(0, 100)) pl <- pl + ggplot2::scale_y_continuous(expand = c(0, 0), limits = c(0, 100)) pl <- pl + ggplot2::geom_abline(slope = 1, intercept = 0, color = "blue") @@ -2872,7 +3186,7 @@ plotRankSpatvsExpr <- function(gobject, #' @title plotRecovery #' @name plotRecovery -#' @description Plots recovery plot to compare ligand-receptor rankings from +#' @description Plots recovery plot to compare ligand-receptor rankings from #' spatial and expression information #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -2883,29 +3197,36 @@ plotRankSpatvsExpr <- function(gobject, #' @returns ggplot #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -#' random_iter = 10) -#' +#' +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +#' ) +#' spatialCC <- spatCellCellcom( +#' gobject = g, cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", +#' random_iter = 10 +#' ) +#' #' combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -#' +#' #' plotRecovery(gobject = g, combCC = combCC) #' @export -plotRecovery <- function(gobject, - combCC, - expr_rnk_column = "exprPI_rnk", - spat_rnk_column = "spatPI_rnk", - ground_truth = c("spatial", "expression"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotRecovery") { +plotRecovery <- function( + gobject, + combCC, + expr_rnk_column = "exprPI_rnk", + spat_rnk_column = "spatPI_rnk", + ground_truth = c("spatial", "expression"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotRecovery") { ground_truth <- match.arg( - ground_truth, choices = c("spatial", "expression")) + ground_truth, + choices = c("spatial", "expression") + ) if (ground_truth == "spatial") { @@ -2915,8 +3236,9 @@ plotRecovery <- function(gobject, second_col = expr_rnk_column ) pl <- pl + ggplot2::labs( - x = "% expression rank included", - y = "% highest spatial rank recovered") + x = "% expression rank included", + y = "% highest spatial rank recovered" + ) } else if (ground_truth == "expression") { pl <- .plotRecovery_sub( combCC = combCC, @@ -2924,8 +3246,9 @@ plotRecovery <- function(gobject, second_col = spat_rnk_column ) pl <- pl + ggplot2::labs( - x = "% spatial rank included", - y = "% highest expression rank recovered") + x = "% spatial rank included", + y = "% highest expression rank recovered" + ) } return(plot_output_handler( @@ -2953,7 +3276,7 @@ plotRecovery <- function(gobject, #' @title cellProximitySpatPlot2D #' @name cellProximitySpatPlot2D -#' @description Visualize 2D cell-cell interactions according to spatial +#' @description Visualize 2D cell-cell interactions according to spatial #' coordinates in ggplot mode #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -2986,45 +3309,48 @@ plotRecovery <- function(gobject, #' g <- GiottoData::loadGiottoMini("visium") #' g <- createSpatialGrid(g, sdimx_stepsize = 5, sdimy_stepsize = 5) #' x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -#' -#' cellProximitySpatPlot2D(gobject = g, cluster_column = "leiden_clus", -#' interaction_name = x) +#' +#' cellProximitySpatPlot2D( +#' gobject = g, cluster_column = "leiden_clus", +#' interaction_name = x +#' ) #' @export -cellProximitySpatPlot2D <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = NULL, - interaction_name = NULL, - cluster_column = NULL, - sdimx = "sdimx", - sdimy = "sdimy", - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - coord_fix_ratio = 1, - show_legend = TRUE, - point_size_select = 2, - point_select_border_col = "black", - point_select_border_stroke = 0.05, - point_size_other = 1, - point_alpha_other = 0.3, - point_other_border_col = "lightgrey", - point_other_border_stroke = 0.01, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximitySpatPlot2D") { +cellProximitySpatPlot2D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = NULL, + interaction_name = NULL, + cluster_column = NULL, + sdimx = "sdimx", + sdimy = "sdimy", + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + coord_fix_ratio = 1, + show_legend = TRUE, + point_size_select = 2, + point_select_border_col = "black", + point_select_border_stroke = 0.05, + point_size_other = 1, + point_alpha_other = 0.3, + point_other_border_col = "lightgrey", + point_other_border_stroke = 0.01, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximitySpatPlot2D") { if (is.null(interaction_name)) { - stop("you need to specific at least one interaction name, run + stop("you need to specific at least one interaction name, run cellProximityEnrichment") } @@ -3074,7 +3400,7 @@ cellProximitySpatPlot2D <- function(gobject, # data.table variables - unified_int <- sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- + unified_int <- sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- x_start <- x_end <- y_start <- y_end <- cell_ID <- NULL cell_IDs_to_keep <- unique(c( @@ -3085,7 +3411,8 @@ cellProximitySpatPlot2D <- function(gobject, if (show_other_cells) { CellType <- strsplit(interaction_name, "--") all_cell_IDs <- cell_metadata[cell_metadata[[ - cluster_column]] == CellType[[1]][1] | + cluster_column + ]] == CellType[[1]][1] | cell_metadata[[cluster_column]] == CellType[[1]][2], ]$cell_ID other_cell_IDs <- setdiff(all_cell_IDs, cell_IDs_to_keep) } @@ -3096,13 +3423,15 @@ cellProximitySpatPlot2D <- function(gobject, cell_locations_metadata <- cell_locations } else { cell_locations_metadata <- merge( - cell_locations, cell_metadata, by = "cell_ID") + cell_locations, cell_metadata, + by = "cell_ID" + ) } # first 2 dimensions need to be defined if (is.null(sdimx) | is.null(sdimy)) { - message("first and second dimension need to be defined, default is + message("first and second dimension need to be defined, default is first 2") sdimx <- "sdimx" sdimy <- "sdimy" @@ -3116,15 +3445,19 @@ cellProximitySpatPlot2D <- function(gobject, if (show_other_network) { pl <- pl + ggplot2::geom_segment( data = spatial_network[!unified_int %in% interaction_name], - aes(x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, - yend = sdimy_end), + aes( + x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, + yend = sdimy_end + ), color = "lightgrey", size = 0.5, alpha = 0.5 ) } pl <- pl + ggplot2::geom_segment( data = spatial_network[unified_int %in% interaction_name], - aes(x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, - yend = sdimy_end), + aes( + x = sdimx_begin, y = sdimy_begin, xend = sdimx_end, + yend = sdimy_end + ), color = network_color, size = 0.5, alpha = 0.5 ) } @@ -3132,7 +3465,7 @@ cellProximitySpatPlot2D <- function(gobject, if (!is.null(spatial_grid) & show_grid == TRUE) { if (is.null(grid_color)) grid_color <- "black" pl <- pl + ggplot2::geom_rect( - data = spatial_grid, + data = spatial_grid, aes(xmin = x_start, xmax = x_end, ymin = y_start, ymax = y_end), color = grid_color, fill = NA ) @@ -3142,22 +3475,22 @@ cellProximitySpatPlot2D <- function(gobject, if (is.null(cell_color)) { cell_color <- "lightblue" pl <- pl + ggplot2::geom_point( - data = cell_locations[!cell_ID %in% cell_IDs_to_keep], + data = cell_locations[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = "lightgrey", + show.legend = show_legend, shape = 21, fill = "lightgrey", size = point_size_other ) pl <- pl + ggplot2::geom_point( - data = cell_locations[cell_ID %in% cell_IDs_to_keep], + data = cell_locations[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, size = point_size_select ) if (show_other_cells) { pl <- pl + ggplot2::geom_point( - data = cell_locations[cell_ID %in% other_cell_IDs], + data = cell_locations[cell_ID %in% other_cell_IDs], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, alpha = point_alpha_other, size = point_size_select * 0.5 ) @@ -3173,21 +3506,21 @@ cellProximitySpatPlot2D <- function(gobject, data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), fill = "lightgrey", shape = 21, size = point_size_other, - color = point_other_border_col, + color = point_other_border_col, stroke = point_other_border_stroke ) pl <- pl + ggplot2::geom_point( - data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], + data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy, fill = cell_color), show.legend = show_legend, shape = 21, size = point_size_select, - color = point_select_border_col, + color = point_select_border_col, stroke = point_select_border_stroke ) if (show_other_cells) { pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[cell_ID %in% other_cell_IDs], aes_string(x = sdimx, y = sdimy, fill = cell_color), - show.legend = show_legend, shape = 21, + show.legend = show_legend, shape = 21, alpha = point_alpha_other, size = point_size_select * 0.5 ) @@ -3200,7 +3533,8 @@ cellProximitySpatPlot2D <- function(gobject, } else if (color_as_factor == TRUE) { number_colors <- length(unique(factor_data)) cell_color_code <- set_default_color_discrete_cell( - instrs = instructions(gobject))(n = number_colors) + instrs = instructions(gobject) + )(n = number_colors) names(cell_color_code) <- unique(factor_data) pl <- pl + ggplot2::scale_fill_manual(values = cell_color_code) } else if (color_as_factor == FALSE) { @@ -3218,17 +3552,17 @@ cellProximitySpatPlot2D <- function(gobject, pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[!cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = "lightgrey", + show.legend = show_legend, shape = 21, fill = "lightgrey", size = point_size_other, - color = point_other_border_col, + color = point_other_border_col, stroke = point_other_border_stroke ) pl <- pl + ggplot2::geom_point( data = cell_locations_metadata[cell_ID %in% cell_IDs_to_keep], aes_string(x = sdimx, y = sdimy), - show.legend = show_legend, shape = 21, fill = cell_color, + show.legend = show_legend, shape = 21, fill = cell_color, size = point_size_select, - color = point_select_border_col, + color = point_select_border_col, stroke = point_select_border_stroke ) } @@ -3263,14 +3597,14 @@ cellProximitySpatPlot2D <- function(gobject, #' @title cellProximitySpatPlot #' @name cellProximitySpatPlot -#' @description Visualize 2D cell-cell interactions according to spatial +#' @description Visualize 2D cell-cell interactions according to spatial #' coordinates in ggplot mode #' @param gobject giotto object #' @inheritDotParams cellProximitySpatPlot2D -gobject #' @returns ggplot #' @details Description of parameters. #' @export -#' @seealso \code{\link{cellProximitySpatPlot2D}} and +#' @seealso \code{\link{cellProximitySpatPlot2D}} and #' \code{\link{cellProximitySpatPlot3D}} for 3D cellProximitySpatPlot <- function(gobject, ...) { cellProximitySpatPlot2D(gobject = gobject, ...) @@ -3279,7 +3613,7 @@ cellProximitySpatPlot <- function(gobject, ...) { #' @title cellProximitySpatPlot3D #' @name cellProximitySpatPlot3D -#' @description Visualize 3D cell-cell interactions according to spatial +#' @description Visualize 3D cell-cell interactions according to spatial #' coordinates in plotly mode #' @inheritParams data_access_params #' @inheritParams plot_output_params @@ -3310,38 +3644,39 @@ cellProximitySpatPlot <- function(gobject, ...) { #' @returns plotly #' @details Description of parameters. #' @export -cellProximitySpatPlot3D <- function(gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = "sdimx", - sdimy = "sdimy", - sdimz = "sdimz", - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = TRUE, - show_network = TRUE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - show_legend = TRUE, - point_size_select = 4, - point_size_other = 2, - point_alpha_other = 0.5, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximitySpatPlot3D", - ...) { +cellProximitySpatPlot3D <- function( + gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = "sdimx", + sdimy = "sdimy", + sdimz = "sdimz", + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = TRUE, + show_network = TRUE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + show_legend = TRUE, + point_size_select = 4, + point_size_other = 2, + point_alpha_other = 0.5, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximitySpatPlot3D", + ...) { if (is.null(sdimz)) { pl <- .cellProximityVisPlot_2D_plotly( gobject = gobject, diff --git a/R/spdep.R b/R/spdep.R index d557c0021..6abbe9cad 100644 --- a/R/spdep.R +++ b/R/spdep.R @@ -2,12 +2,12 @@ #' #' @param gobject Input a Giotto object. #' @param method Specify a method name to compute auto correlation. -#' Available methods include +#' Available methods include #' \code{"geary.test", "lee.test", "lm.morantest","moran.test"}. #' @param spat_unit spatial unit #' @param feat_type feature type #' @param expression_values expression values to use, default = normalized -#' @param spatial_network_to_use spatial network to use, +#' @param spatial_network_to_use spatial network to use, #' default = spatial_network #' @param verbose be verbose #' @param return_gobject if FALSE, results are returned as data.table. @@ -15,17 +15,18 @@ #' @returns A data table with computed values for each feature. #' @examples #' g <- GiottoData::loadGiottoMini("visium") -#' +#' #' spdepAutoCorr(g) #' @export -spdepAutoCorr <- function(gobject, - method = c("geary.test", "lee.test", "lm.morantest", "moran.test"), - spat_unit = NULL, - feat_type = NULL, - expression_values = "normalized", - spatial_network_to_use = "spatial_network", - return_gobject = FALSE, - verbose = FALSE) { +spdepAutoCorr <- function( + gobject, + method = c("geary.test", "lee.test", "lm.morantest", "moran.test"), + spat_unit = NULL, + feat_type = NULL, + expression_values = "normalized", + spatial_network_to_use = "spatial_network", + return_gobject = FALSE, + verbose = FALSE) { # Check and match the specified method argument method <- match.arg(method) @@ -77,8 +78,11 @@ spdepAutoCorr <- function(gobject, result_list <- list() progressr::with_progress({ - if (step_size > 1) pb <- progressr::progressor( - steps = nfeats / step_size) + if (step_size > 1) { + pb <- progressr::progressor( + steps = nfeats / step_size + ) + } result_list <- lapply_flex( seq_along(feat), future.packages = c("data.table", "spdep"), @@ -91,7 +95,8 @@ spdepAutoCorr <- function(gobject, # Extract the estimated value from the result result_value <- callSpdepVar$estimate[1] temp_dt <- data.table( - feat_ID = feat[feat_value], value = result_value) + feat_ID = feat[feat_value], value = result_value + ) # increment progress if (exists("pb")) if (feat_value %% step_size == 0) pb() return(temp_dt) @@ -141,11 +146,11 @@ callSpdep <- function(method, ...) { # Check if 'method' argument is NULL, if so, stop with an error if (is.null(method)) { - stop("The 'method' argument has not been provided. Please specify a + stop("The 'method' argument has not been provided. Please specify a valid method.") } - # Check if 'method' exists in the 'spdep' package, if not, stop with an + # Check if 'method' exists in the 'spdep' package, if not, stop with an # error method <- try(eval(get(method, envir = loadNamespace("spdep"))), silent = TRUE @@ -186,7 +191,7 @@ callSpdep <- function(method, ...) { if (all(!(names(methodparam)) %in% allArgs)) { stop("Invalid or missing parameters.") } - # A vector of specified arguments that trigger + # A vector of specified arguments that trigger # 'spW <- spweights.constants()' requiredArgs <- c("n", "n1", "n2", "n3", "nn", "S0", "S1", "S2") @@ -194,7 +199,7 @@ callSpdep <- function(method, ...) { if (any(requiredArgs %in% allArgs)) { # Obtain arguments from 'spweights.constants' spW <- spdep::spweights.constants(listw = methodparam$listw) - # Combine user-provided arguments and 'spW', checking only against + # Combine user-provided arguments and 'spW', checking only against # 'feats' value combinedParams <- append(methodparam, spW) } else { diff --git a/R/variable_genes.R b/R/variable_genes.R index 85a04dea9..c65c0cb89 100644 --- a/R/variable_genes.R +++ b/R/variable_genes.R @@ -1,9 +1,10 @@ -.calc_cov_group_hvf <- function(feat_in_cells_detected, - nr_expression_groups = 20, - zscore_threshold = 1, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL) { +.calc_cov_group_hvf <- function( + feat_in_cells_detected, + nr_expression_groups = 20, + zscore_threshold = 1, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL) { # NSE vars cov_group_zscore <- cov <- selected <- mean_expr <- NULL @@ -11,13 +12,17 @@ prob_sequence <- seq(0, 1, steps) prob_sequence[length(prob_sequence)] <- 1 expr_group_breaks <- stats::quantile( - feat_in_cells_detected$mean_expr, probs = prob_sequence) + feat_in_cells_detected$mean_expr, + probs = prob_sequence + ) ## remove zero's from cuts if there are too many and make first group zero if (any(duplicated(expr_group_breaks))) { m_expr_vector <- feat_in_cells_detected$mean_expr expr_group_breaks <- stats::quantile( - m_expr_vector[m_expr_vector > 0], probs = prob_sequence) + m_expr_vector[m_expr_vector > 0], + probs = prob_sequence + ) expr_group_breaks[[1]] <- 0 } @@ -30,11 +35,13 @@ feat_in_cells_detected[, expr_groups := expr_groups] feat_in_cells_detected[, cov_group_zscore := scale(cov), by = expr_groups] feat_in_cells_detected[, selected := ifelse( - cov_group_zscore > zscore_threshold, "yes", "no")] + cov_group_zscore > zscore_threshold, "yes", "no" + )] if (any(isTRUE(show_plot), isTRUE(return_plot), isTRUE(save_plot))) { pl <- .create_cov_group_hvf_plot( - feat_in_cells_detected, nr_expression_groups) + feat_in_cells_detected, nr_expression_groups + ) return(list(dt = feat_in_cells_detected, pl = pl)) } else { @@ -48,11 +55,12 @@ -.calc_cov_loess_hvf <- function(feat_in_cells_detected, - difference_in_cov = 0.1, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL) { +.calc_cov_loess_hvf <- function( + feat_in_cells_detected, + difference_in_cov = 0.1, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL) { # NSE vars cov_diff <- pred_cov_feats <- selected <- NULL @@ -61,18 +69,25 @@ var_col <- "cov" loess_model_sample <- stats::loess( - loess_formula, data = feat_in_cells_detected) + loess_formula, + data = feat_in_cells_detected + ) feat_in_cells_detected$pred_cov_feats <- stats::predict( - loess_model_sample, newdata = feat_in_cells_detected) + loess_model_sample, + newdata = feat_in_cells_detected + ) feat_in_cells_detected[, cov_diff := get(var_col) - pred_cov_feats, - by = seq_len(nrow(feat_in_cells_detected))] + by = seq_len(nrow(feat_in_cells_detected)) + ] data.table::setorder(feat_in_cells_detected, -cov_diff) feat_in_cells_detected[, selected := ifelse( - cov_diff > difference_in_cov, "yes", "no")] + cov_diff > difference_in_cov, "yes", "no" + )] if (any(isTRUE(show_plot), isTRUE(return_plot), isTRUE(save_plot))) { pl <- .create_cov_loess_hvf_plot( - feat_in_cells_detected, difference_in_cov, var_col) + feat_in_cells_detected, difference_in_cov, var_col + ) return(list(dt = feat_in_cells_detected, pl = pl)) } else { @@ -82,13 +97,14 @@ -.calc_var_hvf <- function(scaled_matrix, - var_threshold = 1.5, - var_number = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - use_parallel = FALSE) { +.calc_var_hvf <- function( + scaled_matrix, + var_threshold = 1.5, + var_number = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + use_parallel = FALSE) { # NSE vars var <- selected <- NULL @@ -167,10 +183,9 @@ } -.calc_expr_cov_stats_parallel <- function( - expr_values, - expression_threshold, - cores = GiottoUtils::determine_cores()) { +.calc_expr_cov_stats_parallel <- function(expr_values, + expression_threshold, + cores = GiottoUtils::determine_cores()) { # NSE vars cov <- sd <- mean_expr <- NULL @@ -269,30 +284,31 @@ #' #' calculateHVF(g) #' @export -calculateHVF <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - method = c("cov_groups", "cov_loess", "var_p_resid"), - reverse_log_scale = FALSE, - logbase = 2, - expression_threshold = 0, - nr_expression_groups = 20, - zscore_threshold = 1.5, - HVFname = "hvf", - difference_in_cov = 0.1, - var_threshold = 1.5, - var_number = NULL, - random_subset = NULL, - set_seed = TRUE, - seed_number = 1234, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "HVFplot", - return_gobject = TRUE, - verbose = TRUE) { +calculateHVF <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + method = c("cov_groups", "cov_loess", "var_p_resid"), + reverse_log_scale = FALSE, + logbase = 2, + expression_threshold = 0, + nr_expression_groups = 20, + zscore_threshold = 1.5, + HVFname = "hvf", + difference_in_cov = 0.1, + var_threshold = 1.5, + var_number = NULL, + random_subset = NULL, + set_seed = TRUE, + seed_number = 1234, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "HVFplot", + return_gobject = TRUE, + verbose = TRUE) { # NSE vars selected <- feats <- var <- NULL @@ -320,7 +336,8 @@ calculateHVF <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -339,7 +356,8 @@ calculateHVF <- function(gobject, if (isTRUE(set_seed)) set.seed(seed = seed_number) random_selection <- sort(sample( - seq_len(ncol(expr_values)), random_subset)) + seq_len(ncol(expr_values)), random_subset + )) expr_values <- expr_values[, random_selection] if (isTRUE(set_seed)) GiottoUtils::random_seed() @@ -349,19 +367,24 @@ calculateHVF <- function(gobject, # print, return and save parameters show_plot <- ifelse(is.na(show_plot), - readGiottoInstructions(gobject, param = "show_plot"), - show_plot) + readGiottoInstructions(gobject, param = "show_plot"), + show_plot + ) save_plot <- ifelse(is.na(save_plot), - readGiottoInstructions(gobject, param = "save_plot"), - save_plot) + readGiottoInstructions(gobject, param = "save_plot"), + save_plot + ) return_plot <- ifelse(is.na(return_plot), - readGiottoInstructions(gobject, param = "return_plot"), - return_plot) + readGiottoInstructions(gobject, param = "return_plot"), + return_plot + ) # method to use method <- match.arg( - method, choices = c("cov_groups", "cov_loess", "var_p_resid")) + method, + choices = c("cov_groups", "cov_loess", "var_p_resid") + ) # select function to use based on whether future parallelization is planned calc_cov_fun <- ifelse( use_parallel, @@ -418,8 +441,11 @@ calculateHVF <- function(gobject, if (isTRUE(save_plot)) { do.call( GiottoVisuals::all_plots_save_function, - c(list(gobject = gobject, plot_object = pl, - default_save_name = default_save_name), save_param)) + c(list( + gobject = gobject, plot_object = pl, + default_save_name = default_save_name + ), save_param) + ) } ## return plot @@ -446,8 +472,10 @@ calculateHVF <- function(gobject, column_names_feat_metadata <- colnames(feat_metadata[]) if (HVFname %in% column_names_feat_metadata) { - vmsg(.v = verbose, HVFname, - " has already been used, will be overwritten") + vmsg( + .v = verbose, HVFname, + " has already been used, will be overwritten" + ) feat_metadata[][, eval(HVFname) := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -494,8 +522,7 @@ calculateHVF <- function(gobject, # plot generation #### -.create_cov_group_hvf_plot <- function( - feat_in_cells_detected, nr_expression_groups) { +.create_cov_group_hvf_plot <- function(feat_in_cells_detected, nr_expression_groups) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( @@ -504,7 +531,8 @@ calculateHVF <- function(gobject, ) pl <- pl + ggplot2::geom_point( data = feat_in_cells_detected, - ggplot2::aes_string(x = "mean_expr", y = "cov", color = "selected")) + ggplot2::aes_string(x = "mean_expr", y = "cov", color = "selected") + ) pl <- pl + ggplot2::scale_color_manual( values = c(no = "lightgrey", yes = "orange"), guide = ggplot2::guide_legend( @@ -513,7 +541,9 @@ calculateHVF <- function(gobject, ) ) pl <- pl + ggplot2::facet_wrap( - ~expr_groups, ncol = nr_expression_groups, scales = "free_x") + ~expr_groups, + ncol = nr_expression_groups, scales = "free_x" + ) pl <- pl + ggplot2::theme( axis.text.x = ggplot2::element_blank(), strip.text = ggplot2::element_text(size = 4) @@ -523,8 +553,7 @@ calculateHVF <- function(gobject, } -.create_cov_loess_hvf_plot <- function( - feat_in_cells_detected, difference_in_cov, var_col) { +.create_cov_loess_hvf_plot <- function(feat_in_cells_detected, difference_in_cov, var_col) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( @@ -533,16 +562,21 @@ calculateHVF <- function(gobject, ) pl <- pl + ggplot2::geom_point( data = feat_in_cells_detected, - ggplot2::aes_string(x = "log(mean_expr)", y = var_col, - color = "selected")) + ggplot2::aes_string( + x = "log(mean_expr)", y = var_col, + color = "selected" + ) + ) pl <- pl + ggplot2::geom_line( data = feat_in_cells_detected, ggplot2::aes_string(x = "log(mean_expr)", y = "pred_cov_feats"), - color = "blue") + color = "blue" + ) hvg_line <- paste0("pred_cov_feats+", difference_in_cov) pl <- pl + ggplot2::geom_line( data = feat_in_cells_detected, - ggplot2::aes_string(x = "log(mean_expr)", y = hvg_line), linetype = 2) + ggplot2::aes_string(x = "log(mean_expr)", y = hvg_line), linetype = 2 + ) pl <- pl + ggplot2::labs(x = "log(mean expression)", y = var_col) pl <- pl + ggplot2::scale_color_manual( values = c(no = "lightgrey", yes = "orange"), @@ -558,7 +592,8 @@ calculateHVF <- function(gobject, .create_calc_var_hvf_plot <- function(dt_res) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::geom_point( - data = dt_res, aes_string(x = "rank", y = "var", color = "selected")) + data = dt_res, aes_string(x = "rank", y = "var", color = "selected") + ) pl <- pl + ggplot2::scale_x_reverse() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( axis.title = ggplot2::element_text(size = 14), diff --git a/R/wnn.R b/R/wnn.R index 293e5d645..588d11888 100644 --- a/R/wnn.R +++ b/R/wnn.R @@ -13,22 +13,23 @@ #' @param w_name_modality_2 name for modality 2 weights #' @param verbose be verbose #' -#' @returns A Giotto object with integrated UMAP (integrated.umap) within the -#' dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the +#' @returns A Giotto object with integrated UMAP (integrated.umap) within the +#' dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the #' cellular metadata. #' @export -runWNN <- function(gobject, - spat_unit = "cell", - modality_1 = "rna", - modality_2 = "protein", - pca_name_modality_1 = "rna.pca", - pca_name_modality_2 = "protein.pca", - k = 20, - integrated_feat_type = NULL, - matrix_result_name = NULL, - w_name_modality_1 = NULL, - w_name_modality_2 = NULL, - verbose = FALSE) { +runWNN <- function( + gobject, + spat_unit = "cell", + modality_1 = "rna", + modality_2 = "protein", + pca_name_modality_1 = "rna.pca", + pca_name_modality_2 = "protein.pca", + k = 20, + integrated_feat_type = NULL, + matrix_result_name = NULL, + w_name_modality_1 = NULL, + w_name_modality_2 = NULL, + verbose = FALSE) { # validate Giotto object if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") @@ -36,9 +37,9 @@ runWNN <- function(gobject, # validate modalities if (!modality_1 %in% names( - gobject@dimension_reduction$cells[[spat_unit]]) || - !modality_2 %in% names(gobject@dimension_reduction$cells[[spat_unit]])) - { + gobject@dimension_reduction$cells[[spat_unit]] + ) || + !modality_2 %in% names(gobject@dimension_reduction$cells[[spat_unit]])) { stop(paste(modality_1, "and", modality_2, " pca must exist")) } @@ -124,18 +125,24 @@ runWNN <- function(gobject, ## modality1 modality1 - if (verbose) - message(paste("Calculating low dimensional cell-cell distances for", - modality_1)) + if (verbose) { + message(paste( + "Calculating low dimensional cell-cell distances for", + modality_1 + )) + } all_cell_distances_1_1 <- dist(pca_1) all_cell_distances_1_1 <- as.matrix(all_cell_distances_1_1) ## modality2 modality2 - if (verbose) - message(paste("Calculating low dimensional cell-cell distances for", - modality_2)) + if (verbose) { + message(paste( + "Calculating low dimensional cell-cell distances for", + modality_2 + )) + } all_cell_distances_2_2 <- dist(pca_2) @@ -234,13 +241,15 @@ runWNN <- function(gobject, if (nrow(jaccard_values == 20)) { further_cell_cell_distances <- all_cell_distances_1_1[ - cell_a, jaccard_values$to] + cell_a, jaccard_values$to + ] } else { further_cell_cell_distances <- tail(sort(all_cell_distances_1_1[ - cell_a, ]), 20) + cell_a, + ]), 20) } - modality1_sigma_i[cell_a] <- mean(further_cell_cell_distances) + modality1_sigma_i[cell_a] <- mean(further_cell_cell_distances) # cell-specific kernel bandwidth. } @@ -254,13 +263,15 @@ runWNN <- function(gobject, if (nrow(jaccard_values == 20)) { further_cell_cell_distances <- all_cell_distances_2_2[ - cell_a, jaccard_values$to] + cell_a, jaccard_values$to + ] } else { further_cell_cell_distances <- tail(sort(all_cell_distances_2_2[ - cell_a, ]), 20) + cell_a, + ]), 20) } - modality2_sigma_i[cell_a] <- mean(further_cell_cell_distances) + modality2_sigma_i[cell_a] <- mean(further_cell_cell_distances) # cell-specific kernel bandwidth. } @@ -282,13 +293,13 @@ runWNN <- function(gobject, d_modality1_i_modality1_knn1 <- sqrt(sum(( modality1_i - modality1_knn1)^2)) - difference_distances <- d_modality1_i_modality2_predicted - + difference_distances <- d_modality1_i_modality2_predicted - d_modality1_i_modality1_knn1 max_value <- max(c(difference_distances, 0)) theta_1_1[[cell_a]] <- exp(( - -max_value) / (modality1_sigma_i[cell_a] - - d_modality1_i_modality1_knn1)) + -max_value) / (modality1_sigma_i[cell_a] - + d_modality1_i_modality1_knn1)) } ## modality2 modality2 @@ -304,13 +315,13 @@ runWNN <- function(gobject, d_modality2_i_modality2_knn1 <- sqrt(sum(( modality2_i - modality2_knn1)^2)) - difference_distances <- d_modality2_i_modality2_predicted - + difference_distances <- d_modality2_i_modality2_predicted - d_modality2_i_modality2_knn1 max_value <- max(c(difference_distances, 0)) theta_modality2_modality2[[cell_a]] <- exp(( - -max_value) / (modality2_sigma_i[cell_a] - - d_modality2_i_modality2_knn1)) + -max_value) / (modality2_sigma_i[cell_a] - + d_modality2_i_modality2_knn1)) } @@ -327,13 +338,13 @@ runWNN <- function(gobject, d_modality1_i_modality1_knn1 <- sqrt(sum(( modality1_i - modality1_knn1)^2)) - difference_distances <- d_modality1_i_modality2_predicted - + difference_distances <- d_modality1_i_modality2_predicted - d_modality1_i_modality1_knn1 max_value <- max(c(difference_distances, 0)) theta_modality1_modality2[[cell_a]] <- exp(( - -max_value) / (modality1_sigma_i[cell_a] - - d_modality1_i_modality1_knn1)) + -max_value) / (modality1_sigma_i[cell_a] - + d_modality1_i_modality1_knn1)) } @@ -350,13 +361,13 @@ runWNN <- function(gobject, d_modality2_i_modality2_knn1 <- sqrt(sum(( modality2_i - modality2_knn1)^2)) - difference_distances <- d_modality2_i_modality1_predicted - + difference_distances <- d_modality2_i_modality1_predicted - d_modality2_i_modality2_knn1 max_value <- max(c(difference_distances, 0)) theta_modality2_modality1[[cell_a]] <- exp(( - -max_value) / (modality2_sigma_i[cell_a] - - d_modality2_i_modality2_knn1)) + -max_value) / (modality2_sigma_i[cell_a] - + d_modality2_i_modality2_knn1)) } @@ -370,7 +381,7 @@ runWNN <- function(gobject, ratio_modality1 <- list() for (cell_a in cell_names) { - ratio_modality1[[cell_a]] <- theta_1_1[[cell_a]] / + ratio_modality1[[cell_a]] <- theta_1_1[[cell_a]] / (theta_modality1_modality2[[cell_a]] + epsilon) } @@ -379,7 +390,7 @@ runWNN <- function(gobject, ratio_modality2 <- list() for (cell_a in cell_names) { - ratio_modality2[[cell_a]] <- theta_modality2_modality2[[cell_a]] / + ratio_modality2[[cell_a]] <- theta_modality2_modality2[[cell_a]] / (theta_modality2_modality1[[cell_a]] + epsilon) } @@ -392,7 +403,7 @@ runWNN <- function(gobject, names(w_modality1) <- cell_names for (cell_a in cell_names) { - w_modality1[cell_a] <- exp(ratio_modality1[[cell_a]]) / + w_modality1[cell_a] <- exp(ratio_modality1[[cell_a]]) / (exp(ratio_modality1[[cell_a]]) + exp(ratio_modality2[[cell_a]])) } @@ -400,7 +411,7 @@ runWNN <- function(gobject, names(w_modality2) <- cell_names for (cell_a in cell_names) { - w_modality2[cell_a] <- exp(ratio_modality2[[cell_a]]) / + w_modality2[cell_a] <- exp(ratio_modality2[[cell_a]]) / (exp(ratio_modality1[[cell_a]]) + exp(ratio_modality2[[cell_a]])) } @@ -421,15 +432,15 @@ runWNN <- function(gobject, ## theta_modality1 - theta_modality1_cella_cellb <- exp(-1 * (all_cell_distances_1_1 / - modality1_sigma_i)**kernelpower) + theta_modality1_cella_cellb <- exp(-1 * (all_cell_distances_1_1 / + modality1_sigma_i)**kernelpower) ## theta_modality2 - theta_modality2_cella_cellb <- exp(-1 * (all_cell_distances_2_2 / - modality2_sigma_i)**kernelpower) + theta_modality2_cella_cellb <- exp(-1 * (all_cell_distances_2_2 / + modality2_sigma_i)**kernelpower) ## theta_weighted - theta_weighted <- w_modality1 * theta_modality1_cella_cellb + + theta_weighted <- w_modality1 * theta_modality1_cella_cellb + w_modality2 * theta_modality2_cella_cellb @@ -511,18 +522,19 @@ runWNN <- function(gobject, #' #' @returns A Giotto object with integrated UMAP #' @export -runIntegratedUMAP <- function(gobject, - spat_unit = "cell", - modality1 = "rna", - modality2 = "protein", - integrated_feat_type = NULL, - integration_method = "WNN", - matrix_result_name = "theta_weighted_matrix", - k = 20, - spread = 5, - min_dist = 0.01, - force = FALSE, - ...) { +runIntegratedUMAP <- function( + gobject, + spat_unit = "cell", + modality1 = "rna", + modality2 = "protein", + integrated_feat_type = NULL, + integration_method = "WNN", + matrix_result_name = "theta_weighted_matrix", + k = 20, + spread = 5, + min_dist = 0.01, + force = FALSE, + ...) { if (is.null(integrated_feat_type)) { integrated_feat_type <- paste0(modality1, "_", modality2) } @@ -537,7 +549,8 @@ runIntegratedUMAP <- function(gobject, theta_weighted[is.na(theta_weighted)] <- 0 if (is.null(gobject@nn_network[[spat_unit]][[ - modality1]]$kNN$integrated_kNN) || force == TRUE) { + modality1 + ]]$kNN$integrated_kNN) || force == TRUE) { ################# Calculate integrated Nearest Neighbors ############### message("Calculating integrated Nearest Neighbors") @@ -545,7 +558,7 @@ runIntegratedUMAP <- function(gobject, cell_names <- colnames(theta_weighted) nn_network <- dbscan::kNN(x = theta_weighted, k = k, sort = TRUE) - from <- to <- weight <- distance <- from_cell_ID <- to_cell_ID <- + from <- to <- weight <- distance <- from_cell_ID <- to_cell_ID <- shared <- NULL nn_network_dt <- data.table::data.table( from = rep( @@ -559,7 +572,8 @@ runIntegratedUMAP <- function(gobject, nn_network_dt[, `:=`(from_cell_ID, cell_names[from])] nn_network_dt[, `:=`(to_cell_ID, cell_names[to])] all_index <- unique( - x = c(nn_network_dt$from_cell_ID, nn_network_dt$to_cell_ID)) + x = c(nn_network_dt$from_cell_ID, nn_network_dt$to_cell_ID) + ) ################################ Create igraph ######################### @@ -649,7 +663,8 @@ runIntegratedUMAP <- function(gobject, ## add umap gobject@dimension_reduction$cells[[spat_unit]][[modality1]][["umap"]][[ - "integrated.umap"]] <- list( + "integrated.umap" + ]] <- list( name = "integrated.umap", feat_type = modality1, spat_unit = spat_unit, @@ -659,7 +674,8 @@ runIntegratedUMAP <- function(gobject, ) gobject@dimension_reduction$cells[[spat_unit]][[modality2]][["umap"]][[ - "integrated.umap"]] <- list( + "integrated.umap" + ]] <- list( name = "integrated.umap", feat_type = modality2, spat_unit = spat_unit, diff --git a/R/zzz.R b/R/zzz.R index ee9961f77..9cfa6b5af 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -42,5 +42,4 @@ # GiottoUtils # # ----------- # init_option("giotto.verbose", TRUE) - } diff --git a/man/addCellIntMetadata.Rd b/man/addCellIntMetadata.Rd index 67181b306..58907a270 100644 --- a/man/addCellIntMetadata.Rd +++ b/man/addCellIntMetadata.Rd @@ -52,6 +52,8 @@ all other cell types found within the selected cell type column. \examples{ g <- GiottoData::loadGiottoMini("visium") -addCellIntMetadata(g, cluster_column = "leiden_clus", -cell_interaction = "custom_leiden") +addCellIntMetadata(g, + cluster_column = "leiden_clus", + cell_interaction = "custom_leiden" +) } diff --git a/man/addHMRF.Rd b/man/addHMRF.Rd index 6a751fa6d..2e398efcb 100644 --- a/man/addHMRF.Rd +++ b/man/addHMRF.Rd @@ -39,11 +39,12 @@ Add selected results from doHMRF to the giotto object g <- GiottoData::loadGiottoMini("visium") spat_genes <- binSpect(g) -output_folder <- file.path(tempdir(), 'HMRF') -if(!file.exists(output_folder)) dir.create(output_folder, recursive = TRUE) +output_folder <- file.path(tempdir(), "HMRF") +if (!file.exists(output_folder)) dir.create(output_folder, recursive = TRUE) out <- doHMRF( - g, spatial_genes = spat_genes[seq_len(20)]$feats, + g, + spatial_genes = spat_genes[seq_len(20)]$feats, expression_values = "scaled", spatial_network_name = "Delaunay_network", k = 6, betas = c(0, 10, 5), @@ -59,6 +60,6 @@ g <- addHMRF( ) spatPlot( - gobject = g, cell_color = 'HMRF_k6_b.20', + gobject = g, cell_color = "HMRF_k6_b.20", ) } diff --git a/man/cellProximityBarplot.Rd b/man/cellProximityBarplot.Rd index aef957080..bebc29ba1 100644 --- a/man/cellProximityBarplot.Rd +++ b/man/cellProximityBarplot.Rd @@ -47,9 +47,12 @@ Create barplot from cell-cell proximity scores \details{ This function creates a barplot that shows the spatial proximity enrichment or depletion of cell type pairs. - @examples - g <- GiottoData::loadGiottoMini("visium") - - cellProximityBarplot(gobject = g, - CPscore = cellProximityEnrichment(g, cluster_column = "leiden_clus")) +} +\examples{ +g <- GiottoData::loadGiottoMini("visium") + +cellProximityBarplot( + gobject = g, + CPscore = cellProximityEnrichment(g, cluster_column = "leiden_clus") +) } diff --git a/man/cellProximityEnrichmentEachSpot.Rd b/man/cellProximityEnrichmentEachSpot.Rd index f0b68545c..0d269746b 100644 --- a/man/cellProximityEnrichmentEachSpot.Rd +++ b/man/cellProximityEnrichmentEachSpot.Rd @@ -38,10 +38,11 @@ x <- findMarkers_one_vs_all(g, ) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) diff --git a/man/cellProximityEnrichmentSpots.Rd b/man/cellProximityEnrichmentSpots.Rd index f3d41eff1..e618dbbc3 100644 --- a/man/cellProximityEnrichmentSpots.Rd +++ b/man/cellProximityEnrichmentSpots.Rd @@ -66,13 +66,15 @@ each node (spot) in the spatial network. \examples{ g <- GiottoData::loadGiottoMini("visium") x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) diff --git a/man/cellProximityHeatmap.Rd b/man/cellProximityHeatmap.Rd index 9cca766d1..7939e7d52 100644 --- a/man/cellProximityHeatmap.Rd +++ b/man/cellProximityHeatmap.Rd @@ -27,7 +27,7 @@ cellProximityHeatmap( \item{order_cell_types}{order cell types based on enrichment correlation} -\item{color_breaks}{numerical vector of length 3 to represent min, mean +\item{color_breaks}{numerical vector of length 3 to represent min, mean and maximum} \item{color_names}{character color vector of length 3} diff --git a/man/cellProximityNetwork.Rd b/man/cellProximityNetwork.Rd index faf220369..1a67385bd 100644 --- a/man/cellProximityNetwork.Rd +++ b/man/cellProximityNetwork.Rd @@ -42,10 +42,10 @@ cellProximityNetwork( \item{rescale_edge_weights}{rescale edge weights (boolean)} -\item{edge_weight_range_depletion}{numerical vector of length 2 to rescale +\item{edge_weight_range_depletion}{numerical vector of length 2 to rescale depleted edge weights} -\item{edge_weight_range_enrichment}{numerical vector of length 2 to rescale +\item{edge_weight_range_enrichment}{numerical vector of length 2 to rescale enriched edge weights} \item{layout}{layout algorithm to use to draw nodes and edges} diff --git a/man/cellProximitySpatPlot.Rd b/man/cellProximitySpatPlot.Rd index 8b5875886..d6bd596a5 100644 --- a/man/cellProximitySpatPlot.Rd +++ b/man/cellProximitySpatPlot.Rd @@ -53,13 +53,13 @@ named vector of colors} ggplot } \description{ -Visualize 2D cell-cell interactions according to spatial +Visualize 2D cell-cell interactions according to spatial coordinates in ggplot mode } \details{ Description of parameters. } \seealso{ -\code{\link{cellProximitySpatPlot2D}} and +\code{\link{cellProximitySpatPlot2D}} and \code{\link{cellProximitySpatPlot3D}} for 3D } diff --git a/man/cellProximitySpatPlot2D.Rd b/man/cellProximitySpatPlot2D.Rd index c3183fa5c..d6fbf5549 100644 --- a/man/cellProximitySpatPlot2D.Rd +++ b/man/cellProximitySpatPlot2D.Rd @@ -114,7 +114,7 @@ are used when this is TRUE. continuous colors when FALSE.} ggplot } \description{ -Visualize 2D cell-cell interactions according to spatial +Visualize 2D cell-cell interactions according to spatial coordinates in ggplot mode } \details{ @@ -125,6 +125,8 @@ g <- GiottoData::loadGiottoMini("visium") g <- createSpatialGrid(g, sdimx_stepsize = 5, sdimy_stepsize = 5) x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -cellProximitySpatPlot2D(gobject = g, cluster_column = "leiden_clus", -interaction_name = x) +cellProximitySpatPlot2D( + gobject = g, cluster_column = "leiden_clus", + interaction_name = x +) } diff --git a/man/cellProximitySpatPlot3D.Rd b/man/cellProximitySpatPlot3D.Rd index 7ee4885fb..79c56c1c2 100644 --- a/man/cellProximitySpatPlot3D.Rd +++ b/man/cellProximitySpatPlot3D.Rd @@ -111,7 +111,7 @@ are used when this is TRUE. continuous colors when FALSE.} plotly } \description{ -Visualize 3D cell-cell interactions according to spatial +Visualize 3D cell-cell interactions according to spatial coordinates in plotly mode } \details{ diff --git a/man/cellProximityVisPlot.Rd b/man/cellProximityVisPlot.Rd index f095c8629..7b13edc05 100644 --- a/man/cellProximityVisPlot.Rd +++ b/man/cellProximityVisPlot.Rd @@ -111,7 +111,7 @@ cellProximityVisPlot( ggplot or plotly } \description{ -Visualize cell-cell interactions according to spatial +Visualize cell-cell interactions according to spatial coordinates } \details{ @@ -122,6 +122,8 @@ g <- GiottoData::loadGiottoMini("visium") g <- createSpatialGrid(g, sdimx_stepsize = 5, sdimy_stepsize = 5) x <- cellProximityEnrichment(g, cluster_column = "leiden_clus") -cellProximityVisPlot(gobject = g, interaction_name = x, -cluster_column = "leiden_clus", sdimx = "sdimx", sdimy = "sdimy") +cellProximityVisPlot( + gobject = g, interaction_name = x, + cluster_column = "leiden_clus", sdimx = "sdimx", sdimy = "sdimy" +) } diff --git a/man/cellProximityVisPlot_internals.Rd b/man/cellProximityVisPlot_internals.Rd index 631140393..226436e52 100644 --- a/man/cellProximityVisPlot_internals.Rd +++ b/man/cellProximityVisPlot_internals.Rd @@ -102,13 +102,13 @@ Create the plots for `cellProximityVisPlot()` } \section{Functions}{ \itemize{ -\item \code{.cellProximityVisPlot_2D_ggplot()}: Visualize 2D cell-cell +\item \code{.cellProximityVisPlot_2D_ggplot()}: Visualize 2D cell-cell interactions according to spatial coordinates in ggplot mode -\item \code{.cellProximityVisPlot_2D_plotly()}: Visualize 2D cell-cell +\item \code{.cellProximityVisPlot_2D_plotly()}: Visualize 2D cell-cell interactions according to spatial coordinates in plotly mode -\item \code{.cellProximityVisPlot_3D_plotly()}: Visualize 3D cell-cell +\item \code{.cellProximityVisPlot_3D_plotly()}: Visualize 3D cell-cell interactions according to spatial coordinates in plotly mode }} diff --git a/man/clusterSpatialCorFeats.Rd b/man/clusterSpatialCorFeats.Rd index 7c85aeaff..467bb8c49 100644 --- a/man/clusterSpatialCorFeats.Rd +++ b/man/clusterSpatialCorFeats.Rd @@ -33,5 +33,7 @@ Cluster based on spatially correlated features g <- GiottoData::loadGiottoMini("visium") clusterSpatialCorFeats(spatCorObject = detectSpatialCorFeats( -g, method = "network")) + g, + method = "network" +)) } diff --git a/man/combCCcom.Rd b/man/combCCcom.Rd index 61d253f98..76ed08ff2 100644 --- a/man/combCCcom.Rd +++ b/man/combCCcom.Rd @@ -43,11 +43,15 @@ data.tables \examples{ g <- GiottoData::loadGiottoMini("visium") -exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -random_iter = 10) +exprCC <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +) +spatialCC <- spatCellCellcom( + gobject = g, cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", + random_iter = 10 +) combCCcom(spatialCC = spatialCC, exprCC = exprCC) } diff --git a/man/combineICF.Rd b/man/combineICF.Rd index 6cf6d1413..8f59fd35c 100644 --- a/man/combineICF.Rd +++ b/man/combineICF.Rd @@ -54,8 +54,10 @@ Combine ICF scores in a pairwise manner. } \examples{ g <- GiottoData::loadGiottoMini("visium") -g_icf <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +g_icf <- findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) combineICF(g_icf) } diff --git a/man/combineInteractionChangedFeats.Rd b/man/combineInteractionChangedFeats.Rd index 506685ce2..9f535f930 100644 --- a/man/combineInteractionChangedFeats.Rd +++ b/man/combineInteractionChangedFeats.Rd @@ -55,8 +55,9 @@ Combine ICF scores in a pairwise manner. \examples{ g <- GiottoData::loadGiottoMini("visium") g_icf <- findInteractionChangedFeats(g, -cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) combineInteractionChangedFeats(g_icf) } diff --git a/man/compareCellAbundance.Rd b/man/compareCellAbundance.Rd index 9fac528ad..0c96076de 100644 --- a/man/compareCellAbundance.Rd +++ b/man/compareCellAbundance.Rd @@ -35,12 +35,15 @@ Compare cell types percent per polygon \examples{ ## Plot interactive polygons g <- GiottoData::loadGiottoMini("visium") -my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +my_polygon_coords <- data.frame( + poly_ID = rep("polygon1", 3), + sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +) ## Add polygon coordinates to Giotto object my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -name = "selections") + name = "selections" +) g <- addGiottoPolygons( gobject = g, gpolygons = list(my_giotto_polygons) diff --git a/man/comparePolygonExpression.Rd b/man/comparePolygonExpression.Rd index ddbcfff98..238091577 100644 --- a/man/comparePolygonExpression.Rd +++ b/man/comparePolygonExpression.Rd @@ -43,12 +43,15 @@ Compare gene expression between polygon areas \examples{ ## Plot interactive polygons g <- GiottoData::loadGiottoMini("visium") -my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +my_polygon_coords <- data.frame( + poly_ID = rep("polygon1", 3), + sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +) ## Add polygon coordinates to Giotto object my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -name = "selections") + name = "selections" +) g <- addGiottoPolygons( gobject = g, gpolygons = list(my_giotto_polygons) diff --git a/man/convertEnsemblToGeneSymbol.Rd b/man/convertEnsemblToGeneSymbol.Rd index 88d106181..ab4148656 100644 --- a/man/convertEnsemblToGeneSymbol.Rd +++ b/man/convertEnsemblToGeneSymbol.Rd @@ -15,7 +15,7 @@ convertEnsemblToGeneSymbol(matrix, species = c("mouse", "human")) expression matrix with gene symbols as rownames } \description{ -This function convert ensembl gene IDs from a matrix to +This function convert ensembl gene IDs from a matrix to official gene symbols } \details{ diff --git a/man/createArchRProj.Rd b/man/createArchRProj.Rd index b43e48c6f..dd5971525 100644 --- a/man/createArchRProj.Rd +++ b/man/createArchRProj.Rd @@ -25,19 +25,19 @@ These files can be in one of the following formats: (i) scATAC tabix files, \item{genome}{A string indicating the default genome to be used for all ArchR functions. Currently supported values include "hg19","hg38","mm9", and "mm10". -This value is stored as a global environment variable, not part of the +This value is stored as a global environment variable, not part of the ArchRProject. This can be overwritten on a per-function basis using the given function's geneAnnotationand genomeAnnotation parameter. For something other than one of -the currently supported, see createGeneAnnnotation() and +the currently supported, see createGeneAnnnotation() and createGenomeAnnnotation()} -\item{createArrowFiles_params}{list of parameters passed to +\item{createArrowFiles_params}{list of parameters passed to `ArchR::createArrowFiles`} \item{ArchRProject_params}{list of parameters passed to `ArchR::ArchRProject`} -\item{addIterativeLSI_params}{list of parameters passed to +\item{addIterativeLSI_params}{list of parameters passed to `ArchR::addIterativeLSI`} \item{threads}{number of threads to use. Default = `ArchR::getArchRThreads()`} @@ -47,7 +47,7 @@ createGenomeAnnnotation()} \item{verbose}{Default = TRUE} } \value{ -An ArchR project with GeneScoreMatrix, TileMatrix, and +An ArchR project with GeneScoreMatrix, TileMatrix, and TileMatrix-based LSI } \description{ diff --git a/man/createCrossSection.Rd b/man/createCrossSection.Rd index c80dce48b..76df8790e 100644 --- a/man/createCrossSection.Rd +++ b/man/createCrossSection.Rd @@ -111,7 +111,7 @@ g <- GiottoData::loadGiottoMini("starmap") g <- createCrossSection( gobject = g, method = "equation", - equation=c(0,1,0,600), + equation = c(0, 1, 0, 600), extend_ratio = 0.6, name = "new_cs", return_gobject = TRUE diff --git a/man/createGiottoCosMxObject.Rd b/man/createGiottoCosMxObject.Rd index 5343dde5a..2d0a13235 100644 --- a/man/createGiottoCosMxObject.Rd +++ b/man/createGiottoCosMxObject.Rd @@ -20,11 +20,11 @@ createGiottoCosMxObject( \item{cosmx_dir}{full path to the exported cosmx directory} \item{data_to_use}{which type(s) of expression data to build the gobject with -Default is \code{'all'} information available. \code{'subcellular'} loads -the transcript coordinates only. \code{'aggregate'} loads the provided +Default is \code{'all'} information available. \code{'subcellular'} loads +the transcript coordinates only. \code{'aggregate'} loads the provided aggregated expression matrix.} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} @@ -49,9 +49,9 @@ Given the path to a CosMx experiment directory, creates a Giotto object. } \details{ -[\strong{Expected Directory}] This function generates a giotto object when -given a link to a cosmx output directory. It expects the following items -within the directory where the \strong{bolded} portions are what this +[\strong{Expected Directory}] This function generates a giotto object when +given a link to a cosmx output directory. It expects the following items +within the directory where the \strong{bolded} portions are what this function matches against: \itemize{ \item{\strong{CellComposite} (folder of images)} @@ -66,23 +66,23 @@ function matches against: [\strong{Workflows}] Workflow to use is accessed through the data_to_use param \itemize{ - \item{'all' - loads and requires subcellular information from tx_file and + \item{'all' - loads and requires subcellular information from tx_file and fov_positions_file - and also the existing aggregated information + and also the existing aggregated information (expression, spatial locations, and metadata) from exprMat_file and metadata_file.} - \item{'subcellular' - loads and requires subcellular information from + \item{'subcellular' - loads and requires subcellular information from tx_file and fov_positions_file only.} - \item{'aggregate' - loads and requires the existing aggregate information - (expression, spatial locations, and metadata) from exprMat_file and + \item{'aggregate' - loads and requires the existing aggregate information + (expression, spatial locations, and metadata) from exprMat_file and metadata_file.} } -[\strong{Images}] Images in the default CellComposite, CellLabels, +[\strong{Images}] Images in the default CellComposite, CellLabels, CompartmentLabels, and CellOverlay -folders will be loaded as giotto largeImage objects in all workflows as -long as they are available. Additionally, CellComposite images will be +folders will be loaded as giotto largeImage objects in all workflows as +long as they are available. Additionally, CellComposite images will be converted to giotto image objects, making plotting with these image objects more responsive when accessing them from a server. \code{\link{showGiottoImageNames}} can be used to see the available images. diff --git a/man/createGiottoMerscopeObject.Rd b/man/createGiottoMerscopeObject.Rd index d93a7caa5..23722c1de 100644 --- a/man/createGiottoMerscopeObject.Rd +++ b/man/createGiottoMerscopeObject.Rd @@ -37,10 +37,10 @@ createGiottoMerscopeObject( \arguments{ \item{merscope_dir}{full path to the exported merscope directory} -\item{data_to_use}{which of either the 'subcellular' or 'aggregate' +\item{data_to_use}{which of either the 'subcellular' or 'aggregate' information to use for object creation} -\item{FOVs}{which FOVs to use when building the subcellular object. +\item{FOVs}{which FOVs to use when building the subcellular object. (default is NULL) NULL loads all FOVs (very slow)} @@ -66,13 +66,13 @@ provided} a giotto object } \description{ -Given the path to a MERSCOPE experiment directory, creates a +Given the path to a MERSCOPE experiment directory, creates a Giotto object. } \details{ -[\strong{Expected Directory}] This function generates a giotto object when -given a link to a MERSCOPE output directory. It expects the following items -within the directory where the \strong{bolded} portions are what this +[\strong{Expected Directory}] This function generates a giotto object when +given a link to a MERSCOPE output directory. It expects the following items +within the directory where the \strong{bolded} portions are what this function matches against: \itemize{ \item{\strong{cell_boundaries} (folder .hdf5 files)} @@ -84,10 +84,10 @@ function matches against: } \section{Functions}{ \itemize{ -\item \code{.createGiottoMerscopeObject_subcellular()}: Create giotto object with +\item \code{.createGiottoMerscopeObject_subcellular()}: Create giotto object with 'subcellular' workflow -\item \code{.createGiottoMerscopeObject_aggregate()}: Create giotto object with 'aggregate' +\item \code{.createGiottoMerscopeObject_aggregate()}: Create giotto object with 'aggregate' workflow }} diff --git a/man/createGiottoObjectfromArchR.Rd b/man/createGiottoObjectfromArchR.Rd index 35c8db106..1b7748a2b 100644 --- a/man/createGiottoObjectfromArchR.Rd +++ b/man/createGiottoObjectfromArchR.Rd @@ -20,10 +20,10 @@ createGiottoObjectfromArchR( \item{expression_feat}{Giotto object available features (e.g. atac, rna, ...)} -\item{spatial_locs}{data.table or data.frame with coordinates for cell +\item{spatial_locs}{data.table or data.frame with coordinates for cell centroids} -\item{sampleNames}{A character vector containing the ArchR project sample +\item{sampleNames}{A character vector containing the ArchR project sample name} \item{...}{additional arguments passed to `createGiottoObject`} diff --git a/man/createGiottoVisiumObject.Rd b/man/createGiottoVisiumObject.Rd index 6c7c17fae..3229754b9 100644 --- a/man/createGiottoVisiumObject.Rd +++ b/man/createGiottoVisiumObject.Rd @@ -39,7 +39,7 @@ createGiottoVisiumObject( \item{h5_tissue_positions_path}{path to tissue locations (.csv file)} -\item{h5_image_png_path}{path to tissue .png file (optional). Image +\item{h5_image_png_path}{path to tissue .png file (optional). Image autoscaling looks for matches in the filename for either 'hires' or 'lowres'} \item{h5_json_scalefactors_path}{path to .json scalefactors (optional)} @@ -56,15 +56,15 @@ autoscaling looks for matches in the filename for either 'hires' or 'lowres'} \item{ymin_adj}{deprecated} -\item{instructions}{list of instructions or output result from +\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{expression_matrix_class}{class of expression matrix to use +\item{expression_matrix_class}{class of expression matrix to use (e.g. 'dgCMatrix', 'DelayedArray')} \item{h5_file}{optional path to create an on-disk h5 file} -\item{cores}{how many cores or threads to use to read data if paths are +\item{cores}{how many cores or threads to use to read data if paths are provided} \item{verbose}{be verbose} @@ -73,7 +73,7 @@ provided} giotto object } \description{ -Create Giotto object directly from a 10X visium folder. Also +Create Giotto object directly from a 10X visium folder. Also accepts visium H5 outputs. } \details{ diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 0fddd0694..e738694d6 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -28,24 +28,24 @@ createGiottoXeniumObject( \item{load_format}{files formats from which to load the data. Either `csv` or `parquet` currently supported.} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{h5_gene_ids}{use gene symbols (default) or ensembl ids for the .h5 gene expression matrix} -\item{gene_column_index}{which column from the features or genes .tsv file +\item{gene_column_index}{which column from the features or genes .tsv file to use for row ids} -\item{bounds_to_load}{vector of boundary information to load +\item{bounds_to_load}{vector of boundary information to load (e.g. \code{'cell'} or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both at the same time.)} -\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included +\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} -\item{key_list}{(advanced) list of grep-based keywords to split the +\item{key_list}{(advanced) list of grep-based keywords to split the subcellular feature detections by feature type. See details} \item{instructions}{list of instructions or output result @@ -60,7 +60,7 @@ provided} giotto object } \description{ -Given the path to a Xenium experiment output folder, creates a +Given the path to a Xenium experiment output folder, creates a Giotto object } \details{ @@ -68,20 +68,20 @@ Giotto object Xenium provides info on feature detections that include more than only the Gene Expression specific probes. Additional probes for QC are included: \emph{blank codeword}, \emph{negative control codeword}, and -\emph{negative control probe}. These additional QC probes each occupy and -are treated as their own feature types so that they can largely remain +\emph{negative control probe}. These additional QC probes each occupy and +are treated as their own feature types so that they can largely remain independent of the gene expression information. [\strong{key_list}] Related to \code{data_to_use = 'subcellular'} workflow only: -Additional QC probe information is in the subcellular feature detections -information and must be separated from the gene expression information +Additional QC probe information is in the subcellular feature detections +information and must be separated from the gene expression information during processing. -The QC probes have prefixes that allow them to be selected from the rest of +The QC probes have prefixes that allow them to be selected from the rest of the feature IDs. -Giotto uses a named list of keywords (\code{key_list}) to select these QC -probes, with the list names being the names that will be assigned as the -feature type of these feature detections. The default list is used when +Giotto uses a named list of keywords (\code{key_list}) to select these QC +probes, with the list names being the names that will be assigned as the +feature type of these feature detections. The default list is used when \code{key_list} = NULL. Default list: diff --git a/man/createSpatialGenomicsObject.Rd b/man/createSpatialGenomicsObject.Rd index 1571bcf4b..98650b81f 100644 --- a/man/createSpatialGenomicsObject.Rd +++ b/man/createSpatialGenomicsObject.Rd @@ -9,7 +9,7 @@ createSpatialGenomicsObject(sg_dir = NULL, instructions = NULL) \arguments{ \item{sg_dir}{full path to the exported Spatial Genomics directory} -\item{instructions}{new instructions +\item{instructions}{new instructions (e.g. result from createGiottoInstructions)} } \value{ diff --git a/man/detectSpatialCorFeats.Rd b/man/detectSpatialCorFeats.Rd index 834197249..42b69eb39 100644 --- a/man/detectSpatialCorFeats.Rd +++ b/man/detectSpatialCorFeats.Rd @@ -102,7 +102,9 @@ detectSpatialCorFeats(g, method = "network") # This analysis can also be performed with data outside of the gobject detectSpatialCorFeatsMatrix( expression_matrix = getExpression( - g, output = "matrix"), + g, + output = "matrix" + ), method = "network", spatial_network = getSpatialNetwork(g, output = "networkDT") ) diff --git a/man/detectSpatialPatterns.Rd b/man/detectSpatialPatterns.Rd index 242811b5f..0547b4b78 100644 --- a/man/detectSpatialPatterns.Rd +++ b/man/detectSpatialPatterns.Rd @@ -45,7 +45,7 @@ Steps to identify spatial patterns: \itemize{ * 1. average gene expression for cells within a grid, see createSpatialGrid * 2. perform PCA on the average grid expression profiles - * 3. convert variance of principal components (PCs) to z-scores and + * 3. convert variance of principal components (PCs) to z-scores and select PCs based on a z-score threshold } } diff --git a/man/doClusterProjection.Rd b/man/doClusterProjection.Rd index 6ccea3858..7bc3dd27b 100644 --- a/man/doClusterProjection.Rd +++ b/man/doClusterProjection.Rd @@ -78,7 +78,9 @@ Giotto object. \examples{ g <- GiottoData::loadGiottoMini("visium") x <- pDataDT(g) -g_small <- subsetGiotto(g, cell_ids = sample(x$cell_ID,300)) -doClusterProjection(target_gobject = g, source_gobject = g_small, -source_cluster_labels = "leiden_clus") +g_small <- subsetGiotto(g, cell_ids = sample(x$cell_ID, 300)) +doClusterProjection( + target_gobject = g, source_gobject = g_small, + source_cluster_labels = "leiden_clus" +) } diff --git a/man/doFeatureSetEnrichment.Rd b/man/doFeatureSetEnrichment.Rd index 28387127f..359b7d3e5 100644 --- a/man/doFeatureSetEnrichment.Rd +++ b/man/doFeatureSetEnrichment.Rd @@ -28,39 +28,39 @@ doFeatureSetEnrichment( \item{path_to_GSEA}{path to GSEA command line executable, e.g. gsea-XXX.jar. See details (1.) for more information.} -\item{GSEA_dataset}{path to a Human/Mouse collection from GSEA, e.g. +\item{GSEA_dataset}{path to a Human/Mouse collection from GSEA, e.g. Hallmarks C1. See details (2.) for more information.} -\item{GSEA_ranked_file}{path to .rnk file for GSEA. See details (3.) for +\item{GSEA_ranked_file}{path to .rnk file for GSEA. See details (3.) for more information} -\item{output_folder}{path to which the GSEA results will be saved. Default +\item{output_folder}{path to which the GSEA results will be saved. Default is current working directory.} -\item{name_analysis_folder}{default output subdirectory prefix to which +\item{name_analysis_folder}{default output subdirectory prefix to which results are saved. - Will live within output_folder; equivalent of + Will live within output_folder; equivalent of "Analysis Name" in GSEA Application.} -\item{collapse}{only 'false' is supported. This will use your dataset as-is, +\item{collapse}{only 'false' is supported. This will use your dataset as-is, in the original format.} -\item{mode}{option selected in Advanced Field "Collapsing Mode for +\item{mode}{option selected in Advanced Field "Collapsing Mode for Probe Sets => 1 gene"} \item{norm}{normalization mode; only meandiv is supported.} \item{nperm}{number of permutations, default 1000} -\item{scoring_scheme}{Default "weighted", equivalent of +\item{scoring_scheme}{Default "weighted", equivalent of "enrichment statistic" in GSEA Application} \item{plot_top_x}{Default 20, number of enrichment plots to produce.} -\item{set_max}{default 500, equivalent to "max size; exclude larger sets" +\item{set_max}{default 500, equivalent to "max size; exclude larger sets" in Basic Fields in GSEA Application} -\item{set_min}{default 15, equivalent to "min size; exclude smaller sets" +\item{set_min}{default 15, equivalent to "min size; exclude smaller sets" in Basic Fields in GSEA Application} } \value{ @@ -74,11 +74,11 @@ NECESSARY PREREQUISITES 1. download and install the COMMAND line (all platforms) gsea-XXX.jar https://www.gsea-msigdb.org/gsea/downloads.jsp 1.1. download zip file -1.2. unzip and move to known location +1.2. unzip and move to known location (e.g. in path/to/your/applications/gsea/GSEA_4.3.2) 2. download the Human and Mouse collections -https://www.gsea-msigdb.org/gsea/msigdb/index.jsp or zipped folder +https://www.gsea-msigdb.org/gsea/msigdb/index.jsp or zipped folder https://www.gsea-msigdb.org/gsea/downloads.jsp (all downloaded) 3. create ranked gene lists diff --git a/man/doGiottoClustree.Rd b/man/doGiottoClustree.Rd index cc605fabf..ff99d4c6a 100644 --- a/man/doGiottoClustree.Rd +++ b/man/doGiottoClustree.Rd @@ -68,8 +68,10 @@ will be returned. \examples{ g <- GiottoData::loadGiottoMini("visium") -doGiottoClustree(gobject = g, res_vector = c(0.5, 0.8), return_plot = FALSE, -show_plot = FALSE, save_plot = FALSE) +doGiottoClustree( + gobject = g, res_vector = c(0.5, 0.8), return_plot = FALSE, + show_plot = FALSE, save_plot = FALSE +) } \seealso{ \code{\link{doLeidenCluster}} diff --git a/man/doHMRF.Rd b/man/doHMRF.Rd index 578ec8200..0e2975699 100644 --- a/man/doHMRF.Rd +++ b/man/doHMRF.Rd @@ -88,6 +88,8 @@ Description of HMRF parameters ... g <- GiottoData::loadGiottoMini("visium") spat_genes <- binSpect(g) -doHMRF(g, spatial_genes = spat_genes[seq_len(10)]$feats, -output_folder = tempdir()) +doHMRF(g, + spatial_genes = spat_genes[seq_len(10)]$feats, + output_folder = tempdir() +) } diff --git a/man/dot-createGiottoCosMxObject_all.Rd b/man/dot-createGiottoCosMxObject_all.Rd index 44e70f5d7..40c3a1ed6 100644 --- a/man/dot-createGiottoCosMxObject_all.Rd +++ b/man/dot-createGiottoCosMxObject_all.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/convenience.R \name{.createGiottoCosMxObject_all} \alias{.createGiottoCosMxObject_all} -\title{Load and create a CosMx Giotto object from subcellular and aggregate +\title{Load and create a CosMx Giotto object from subcellular and aggregate info} \usage{ .createGiottoCosMxObject_all( @@ -22,7 +22,7 @@ info} \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} @@ -41,13 +41,13 @@ from \code{\link[GiottoClass]{createGiottoInstructions}}} giotto object } \description{ -Load and create a CosMx Giotto object from subcellular and aggregate +Load and create a CosMx Giotto object from subcellular and aggregate info } \details{ -Both \emph{subcellular} +Both \emph{subcellular} (subellular transcript detection information) and -\emph{aggregate} (aggregated detection count matrices by cell polygon from +\emph{aggregate} (aggregated detection count matrices by cell polygon from NanoString) data will be loaded in. The two will be separated into 'cell' and 'cell_agg' spatial units in order to denote the difference in origin of the two. diff --git a/man/dot-createGiottoCosMxObject_subcellular.Rd b/man/dot-createGiottoCosMxObject_subcellular.Rd index 17d07ada9..cc5c273b2 100644 --- a/man/dot-createGiottoCosMxObject_subcellular.Rd +++ b/man/dot-createGiottoCosMxObject_subcellular.Rd @@ -18,7 +18,7 @@ \arguments{ \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} diff --git a/man/dot-createGiottoXeniumObject_subcellular.Rd b/man/dot-createGiottoXeniumObject_subcellular.Rd index 75013fe11..11f6b946b 100644 --- a/man/dot-createGiottoXeniumObject_subcellular.Rd +++ b/man/dot-createGiottoXeniumObject_subcellular.Rd @@ -19,7 +19,7 @@ \item{key_list}{regex-based search keys for feature IDs to allow separation into separate giottoPoints objects by feat_type} -\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included +\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} \item{instructions}{list of instructions or output result diff --git a/man/dot-determine_switch_string_equal.Rd b/man/dot-determine_switch_string_equal.Rd index 932a9c746..40ecefa0d 100644 --- a/man/dot-determine_switch_string_equal.Rd +++ b/man/dot-determine_switch_string_equal.Rd @@ -30,7 +30,7 @@ Where: y_m is a cluster number from the resized spatial unit n is the number of clusters -Clusters are determined to be corresponding based on % overlap in cell_IDs +Clusters are determined to be corresponding based on % overlap in cell_IDs in each cluster. } \keyword{internal} diff --git a/man/dot-determine_switch_string_unequal.Rd b/man/dot-determine_switch_string_unequal.Rd index c32b81af6..a1b921233 100644 --- a/man/dot-determine_switch_string_unequal.Rd +++ b/man/dot-determine_switch_string_unequal.Rd @@ -18,7 +18,7 @@ switch_str, a vector of corresponding cluster numbers in strings Determine switch string unequal } \details{ -determines how to create a string in the format +determines how to create a string in the format c("x_1-y_1", "x_2-y_2"..."x_n, y_m") Where: x_n is a cluster number from the original spatial unit diff --git a/man/dot-get_img_corners.Rd b/man/dot-get_img_corners.Rd index 84f5f0225..5b8100883 100644 --- a/man/dot-get_img_corners.Rd +++ b/man/dot-get_img_corners.Rd @@ -13,7 +13,7 @@ data.frame } \description{ -finds four corner spatial coords of giottoImages or +finds four corner spatial coords of giottoImages or magick-images } \keyword{internal} diff --git a/man/dot-kmeans_arma_subset_binarize.Rd b/man/dot-kmeans_arma_subset_binarize.Rd index 1ecde0ea0..abbb010ad 100644 --- a/man/dot-kmeans_arma_subset_binarize.Rd +++ b/man/dot-kmeans_arma_subset_binarize.Rd @@ -16,7 +16,7 @@ numeric } \description{ -create binarized scores from a subsetted vector using +create binarized scores from a subsetted vector using kmeans_arma } \keyword{internal} diff --git a/man/dot-load_cosmx_folder_subcellular.Rd b/man/dot-load_cosmx_folder_subcellular.Rd index 3f70253c6..e96bc86f8 100644 --- a/man/dot-load_cosmx_folder_subcellular.Rd +++ b/man/dot-load_cosmx_folder_subcellular.Rd @@ -19,7 +19,7 @@ list } \description{ loads in the feature detections information. Note that the mask -images are still required for a working subcellular object, and those are +images are still required for a working subcellular object, and those are loaded in \code{\link{.createGiottoCosMxObject_subcellular}} } \keyword{internal} diff --git a/man/dot-plotRecovery_sub.Rd b/man/dot-plotRecovery_sub.Rd index d6856cabe..ef8fa56f9 100644 --- a/man/dot-plotRecovery_sub.Rd +++ b/man/dot-plotRecovery_sub.Rd @@ -21,7 +21,7 @@ ggplot } \description{ -Plots recovery plot to compare ligand-receptor rankings from +Plots recovery plot to compare ligand-receptor rankings from spatial and expression information } \keyword{internal} diff --git a/man/dot-read_xenium_folder.Rd b/man/dot-read_xenium_folder.Rd index 255328100..f526ef2a8 100644 --- a/man/dot-read_xenium_folder.Rd +++ b/man/dot-read_xenium_folder.Rd @@ -19,7 +19,7 @@ \item{data_to_use}{which type(s) of expression data to build the gobject with (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all')} -\item{bounds_to_load}{vector of boundary information to load +\item{bounds_to_load}{vector of boundary information to load (e.g. \code{'cell'} or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both at the same time.)} @@ -27,7 +27,7 @@ at the same time.)} \item{load_format}{files formats from which to load the data. Either `csv` or `parquet` currently supported.} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{verbose}{be verbose when building Giotto object} diff --git a/man/dot-rigid_transform_spatial_locations.Rd b/man/dot-rigid_transform_spatial_locations.Rd index abaf84995..187c02dbb 100644 --- a/man/dot-rigid_transform_spatial_locations.Rd +++ b/man/dot-rigid_transform_spatial_locations.Rd @@ -17,7 +17,7 @@ spatlocs } \description{ -Performs appropriate transforms to align spatial locations +Performs appropriate transforms to align spatial locations with registered images. } \keyword{internal} diff --git a/man/dot-specific_CCCScores_spots.Rd b/man/dot-specific_CCCScores_spots.Rd index 67fbc794b..bd7daf6df 100644 --- a/man/dot-specific_CCCScores_spots.Rd +++ b/man/dot-specific_CCCScores_spots.Rd @@ -80,31 +80,31 @@ expected based on a reshuffled null distribution of feature expression values in cells that are spatially in proximity to each other. \itemize{ * LR_comb: Pair of ligand and receptor - * lig_cell_type: cell type to assess expression level of ligand - * lig_expr: average expression residual (observed - DWLS_predicted) of - ligand in lig_cell_type - * ligand: ligand name - * rec_cell_type: cell type to assess expression level of receptor - * rec_expr: average expression residual(observed - DWLS_predicted) of + * lig_cell_type: cell type to assess expression level of ligand + * lig_expr: average expression residual (observed - DWLS_predicted) of + ligand in lig_cell_type + * ligand: ligand name + * rec_cell_type: cell type to assess expression level of receptor + * rec_expr: average expression residual(observed - DWLS_predicted) of receptor in rec_cell_type - * receptor: receptor name - * LR_expr: combined average ligand and receptor expression - * lig_nr: total number of cells from lig_cell_type that spatially interact - with cells from rec_cell_type - * rec_nr: total number of cells from rec_cell_type that spatially interact - with cells from lig_cell_type - * rand_expr: average combined ligand and receptor expression residual from - random spatial permutations - * av_diff: average difference between LR_expr and rand_expr over all random - spatial permutations - * sd_diff: (optional) standard deviation of the difference between LR_expr - and rand_expr over all random spatial permutations - * z_score: (optinal) z-score - * log2fc: LR_expr - rand_expr - * pvalue: p-value - * LR_cell_comb: cell type pair combination - * p.adj: adjusted p-value - * PI: significance score: log2fc \* -log10(p.adj) + * receptor: receptor name + * LR_expr: combined average ligand and receptor expression + * lig_nr: total number of cells from lig_cell_type that spatially interact + with cells from rec_cell_type + * rec_nr: total number of cells from rec_cell_type that spatially interact + with cells from lig_cell_type + * rand_expr: average combined ligand and receptor expression residual from + random spatial permutations + * av_diff: average difference between LR_expr and rand_expr over all random + spatial permutations + * sd_diff: (optional) standard deviation of the difference between LR_expr + and rand_expr over all random spatial permutations + * z_score: (optinal) z-score + * log2fc: LR_expr - rand_expr + * pvalue: p-value + * LR_cell_comb: cell type pair combination + * p.adj: adjusted p-value + * PI: significance score: log2fc \* -log10(p.adj) } } \keyword{internal} diff --git a/man/dot-trakem2_rigid_transforms.Rd b/man/dot-trakem2_rigid_transforms.Rd index bd267ec00..29ea8da33 100644 --- a/man/dot-trakem2_rigid_transforms.Rd +++ b/man/dot-trakem2_rigid_transforms.Rd @@ -13,7 +13,7 @@ rigid registration transformation values } \description{ -Extract rigid registration transformation values from FIJI +Extract rigid registration transformation values from FIJI TrakEM2 xml file. Generated through register_virtual_stack_slices. } \keyword{internal} diff --git a/man/exprCellCellcom.Rd b/man/exprCellCellcom.Rd index b571651d3..00d5dd3a9 100644 --- a/man/exprCellCellcom.Rd +++ b/man/exprCellCellcom.Rd @@ -70,6 +70,8 @@ More details will follow soon. \examples{ g <- GiottoData::loadGiottoMini("visium") -exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") +exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +) } diff --git a/man/findCellTypesFromEnrichment.Rd b/man/findCellTypesFromEnrichment.Rd index 093b2f84a..e919112a5 100644 --- a/man/findCellTypesFromEnrichment.Rd +++ b/man/findCellTypesFromEnrichment.Rd @@ -17,7 +17,7 @@ findCellTypesFromEnrichment( \item{spat_unit}{spatial unit in which the enrichment information is stored} -\item{feat_type}{feature type for which the enrichment information was +\item{feat_type}{feature type for which the enrichment information was calculated} \item{enrichment_name}{name of the spatial enrichment diff --git a/man/findICF.Rd b/man/findICF.Rd index fa9d2ec13..18ecb872f 100644 --- a/man/findICF.Rd +++ b/man/findICF.Rd @@ -79,10 +79,10 @@ other cell types. The results data.table in the `icfObject` contains - at least - the following columns: \itemize{ * features: All or selected list of tested features - * sel: average feature expression in the interacting cells from the target - cell type - * other: average feature expression in the NOT-interacting cells from the - target cell type + * sel: average feature expression in the interacting cells from the target + cell type + * other: average feature expression in the NOT-interacting cells from the + target cell type * log2fc: log2 fold-change between sel and other * diff: spatial expression difference between sel and other * p.value: associated p-value @@ -99,8 +99,10 @@ other cell types. The results data.table in the `icfObject` contains \examples{ g <- GiottoData::loadGiottoMini("visium") -findICF(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +findICF(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) } \seealso{ \code{\link{findInteractionChangedFeats}} diff --git a/man/findICFSpot.Rd b/man/findICFSpot.Rd index 184d4feb5..04389711c 100644 --- a/man/findICFSpot.Rd +++ b/man/findICFSpot.Rd @@ -84,14 +84,14 @@ The results data.table in the icfObject contains - at least - the following columns: \itemize{ * features: All or selected list of tested features - * sel: average feature expression residual in the interacting cells from - the target cell type - * other: average feature expression residual in the NOT-interacting cells - from the target cell type - * pcc_sel: correlation between cell proximity score and expression residual + * sel: average feature expression residual in the interacting cells from + the target cell type + * other: average feature expression residual in the NOT-interacting cells + from the target cell type + * pcc_sel: correlation between cell proximity score and expression residual in the interacting cells from the target cell type - * pcc_other: correlation between cell proximity score and expression - residual in the NOT-interacting cells from the target cell type + * pcc_other: correlation between cell proximity score and expression + residual in the NOT-interacting cells from the target cell type * pcc_diff: correlation difference between sel and other * p.value: associated p-value * p.adj: adjusted p-value @@ -105,17 +105,21 @@ the following columns: \examples{ g <- GiottoData::loadGiottoMini("visium") x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) g_expression <- getExpression(g, output = "matrix") -findICFSpot(g, spat_unit = "cell", feat_type = "rna", -ave_celltype_exp = g_expression, spatial_network_name = "spatial_network") +findICFSpot(g, + spat_unit = "cell", feat_type = "rna", + ave_celltype_exp = g_expression, spatial_network_name = "spatial_network" +) } diff --git a/man/findInteractionChangedFeats.Rd b/man/findInteractionChangedFeats.Rd index 985c84dce..701761c95 100644 --- a/man/findInteractionChangedFeats.Rd +++ b/man/findInteractionChangedFeats.Rd @@ -79,10 +79,10 @@ other cell types. The results data.table in the icfObject contains - at least - the following columns: \itemize{ * features: All or selected list of tested features - * sel: average feature expression in the interacting cells from the target - cell type - * other: average feature expression in the NOT-interacting cells from the - target cell type + * sel: average feature expression in the interacting cells from the target + cell type + * other: average feature expression in the NOT-interacting cells from the + target cell type * log2fc: log2 fold-change between sel and other * diff: spatial expression difference between sel and other * p.value: associated p-value @@ -99,6 +99,8 @@ other cell types. The results data.table in the icfObject contains \examples{ g <- GiottoData::loadGiottoMini("visium") -findInteractionChangedFeats(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) } diff --git a/man/findMastMarkers.Rd b/man/findMastMarkers.Rd index fe2114267..09f100047 100644 --- a/man/findMastMarkers.Rd +++ b/man/findMastMarkers.Rd @@ -63,6 +63,8 @@ MAST might take a long time to run and finish \examples{ g <- GiottoData::loadGiottoMini("visium") -findMastMarkers(gobject = g, cluster_column = "leiden_clus", group_1 = 1, -group_2 = 2) +findMastMarkers( + gobject = g, cluster_column = "leiden_clus", group_1 = 1, + group_2 = 2 +) } diff --git a/man/findNetworkNeighbors.Rd b/man/findNetworkNeighbors.Rd index 827c3edd7..04fb0942a 100644 --- a/man/findNetworkNeighbors.Rd +++ b/man/findNetworkNeighbors.Rd @@ -34,6 +34,8 @@ the selected spatial network. \examples{ g <- GiottoData::loadGiottoMini("visium") -findNetworkNeighbors(gobject = g, spatial_network_name = "spatial_network", -source_cell_ids = c("AACTCGATGGCGCAGT-1", "GGCTGGCTAGCTTAAA-1")) +findNetworkNeighbors( + gobject = g, spatial_network_name = "spatial_network", + source_cell_ids = c("AACTCGATGGCGCAGT-1", "GGCTGGCTAGCTTAAA-1") +) } diff --git a/man/get10Xmatrix.Rd b/man/get10Xmatrix.Rd index 65883902b..70b4a6eaa 100644 --- a/man/get10Xmatrix.Rd +++ b/man/get10Xmatrix.Rd @@ -14,31 +14,31 @@ get10Xmatrix( \arguments{ \item{path_to_data}{path to the 10X folder} -\item{gene_column_index}{which column from the features or genes .tsv file +\item{gene_column_index}{which column from the features or genes .tsv file to use for row ids} \item{remove_zero_rows}{removes rows with sum equal to zero} -\item{split_by_type}{split into multiple matrices based on 3rd column of +\item{split_by_type}{split into multiple matrices based on 3rd column of features.tsv(.gz)} } \value{ sparse expression matrix from 10X } \description{ -This function creates an expression matrix from a 10X +This function creates an expression matrix from a 10X structured folder } \details{ -A typical 10X folder is named raw_feature_bc_matrix or +A typical 10X folder is named raw_feature_bc_matrix or filtered_feature_bc_matrix and it has 3 files: \itemize{ \item{barcodes.tsv(.gz)} \item{features.tsv(.gz) or genes.tsv(.gz)} \item{matrix.mtx(.gz)} } -By default the first column of the features or genes .tsv file will be used, +By default the first column of the features or genes .tsv file will be used, however if multiple -annotations are provided (e.g. ensembl gene ids and gene symbols) the user +annotations are provided (e.g. ensembl gene ids and gene symbols) the user can select another column. } diff --git a/man/get10Xmatrix_h5.Rd b/man/get10Xmatrix_h5.Rd index 81802562c..22950009c 100644 --- a/man/get10Xmatrix_h5.Rd +++ b/man/get10Xmatrix_h5.Rd @@ -14,23 +14,23 @@ get10Xmatrix_h5( \arguments{ \item{path_to_data}{path to the 10X .h5 file} -\item{gene_ids}{use gene symbols (default) or ensembl ids for the gene +\item{gene_ids}{use gene symbols (default) or ensembl ids for the gene expression matrix} \item{remove_zero_rows}{removes rows with sum equal to zero} -\item{split_by_type}{split into multiple matrices based on 3rd column of +\item{split_by_type}{split into multiple matrices based on 3rd column of features.tsv(.gz)} } \value{ (list of) sparse expression matrix from 10X } \description{ -This function creates an expression matrix from a 10X h5 file +This function creates an expression matrix from a 10X h5 file path } \details{ -If the .h5 10x file has multiple classes of features -(e.g. expression vs QC probes) or modalities (e.g. RNA and protein), and +If the .h5 10x file has multiple classes of features +(e.g. expression vs QC probes) or modalities (e.g. RNA and protein), and \code{split_by_type} param is \code{TRUE}, multiple matrices will be returned } diff --git a/man/getBalancedSpatCoexpressionFeats.Rd b/man/getBalancedSpatCoexpressionFeats.Rd index 0a8595780..ccc796273 100644 --- a/man/getBalancedSpatCoexpressionFeats.Rd +++ b/man/getBalancedSpatCoexpressionFeats.Rd @@ -38,7 +38,7 @@ balanced manner There are 3 different ways of selecting features from the spatial co-expression modules \itemize{ - * 1. weighted: Features are ranked based on summarized pairwise + * 1. weighted: Features are ranked based on summarized pairwise co-expression scores * 2. random: A random selection of features, set seed for reproducibility * 3. informed: Features are selected based on prior information/ranking diff --git a/man/getCellsFromPolygon.Rd b/man/getCellsFromPolygon.Rd index 4b7f12e08..56fa70eea 100644 --- a/man/getCellsFromPolygon.Rd +++ b/man/getCellsFromPolygon.Rd @@ -34,12 +34,15 @@ Get cells located within the polygons area \examples{ ## Plot interactive polygons g <- GiottoData::loadGiottoMini("visium") -my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +my_polygon_coords <- data.frame( + poly_ID = rep("polygon1", 3), + sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +) ## Add polygon coordinates to Giotto object my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -name = "selections") + name = "selections" +) g <- addGiottoPolygons( gobject = g, gpolygons = list(my_giotto_polygons) diff --git a/man/loadHMRF.Rd b/man/loadHMRF.Rd index 17ca2b838..000d323a0 100644 --- a/man/loadHMRF.Rd +++ b/man/loadHMRF.Rd @@ -32,10 +32,14 @@ load previous HMRF \examples{ g <- GiottoData::loadGiottoMini("visium") x <- tempdir() -doHMRF(g, spatial_genes = c("Gna12", "Ccnd2"), output_folder = x, -betas = c(0, 2, 50)) +doHMRF(g, + spatial_genes = c("Gna12", "Ccnd2"), output_folder = x, + betas = c(0, 2, 50) +) -loadHMRF(output_folder_used = x, betas_used = c(0, 2, 50), -python_path_used = NULL) +loadHMRF( + output_folder_used = x, betas_used = c(0, 2, 50), + python_path_used = NULL +) } diff --git a/man/load_merscope_folder.Rd b/man/load_merscope_folder.Rd index d796bfa5b..ab1f888ed 100644 --- a/man/load_merscope_folder.Rd +++ b/man/load_merscope_folder.Rd @@ -33,10 +33,10 @@ ) } \arguments{ -\item{dir_items}{list of full filepaths from +\item{dir_items}{list of full filepaths from \code{\link{.read_merscope_folder}}} -\item{data_to_use}{which of either the 'subcellular' or 'aggregate' +\item{data_to_use}{which of either the 'subcellular' or 'aggregate' information to use for object creation} \item{cores}{how many cores or threads to use to read data if paths are diff --git a/man/load_xenium_folder.Rd b/man/load_xenium_folder.Rd index a6c07895d..73808b43e 100644 --- a/man/load_xenium_folder.Rd +++ b/man/load_xenium_folder.Rd @@ -47,13 +47,13 @@ \item{data_to_use}{which type(s) of expression data to build the gobject with (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all')} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{h5_gene_ids}{use gene symbols (default) or ensembl ids for the .h5 gene expression matrix} -\item{gene_column_index}{which column from the features or genes .tsv file +\item{gene_column_index}{which column from the features or genes .tsv file to use for row ids} \item{cores}{how many cores or threads to use to read data if paths are diff --git a/man/makeSignMatrixDWLS.Rd b/man/makeSignMatrixDWLS.Rd index 204b848b3..de71e35d3 100644 --- a/man/makeSignMatrixDWLS.Rd +++ b/man/makeSignMatrixDWLS.Rd @@ -46,13 +46,17 @@ from the cell metadata (\code{\link{pDataDT}}). } \examples{ g <- GiottoData::loadGiottoMini("visium") -sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -"Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -"Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -"Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +sign_gene <- c( + "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", + "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", + "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", + "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +) -makeSignMatrixDWLS(gobject = g, sign_gene = sign_gene, -cell_type_vector = pDataDT(g)[["leiden_clus"]]) +makeSignMatrixDWLS( + gobject = g, sign_gene = sign_gene, + cell_type_vector = pDataDT(g)[["leiden_clus"]] +) } \seealso{ \code{\link{runDWLSDeconv}} diff --git a/man/makeSignMatrixDWLSfromMatrix.Rd b/man/makeSignMatrixDWLSfromMatrix.Rd index fffe99932..4873980f1 100644 --- a/man/makeSignMatrixDWLSfromMatrix.Rd +++ b/man/makeSignMatrixDWLSfromMatrix.Rd @@ -21,17 +21,21 @@ Function to convert a single-cell RNAseq matrix into a format that can be used with \code{\link{runDWLSDeconv}}. } \examples{ -sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -"Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -"Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -"Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +sign_gene <- c( + "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", + "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", + "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", + "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +) -sign_matrix <- matrix(rnorm(length(sign_gene)*3), nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 3), nrow = length(sign_gene)) rownames(sign_matrix) <- sign_gene colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") -makeSignMatrixDWLSfromMatrix(matrix = sign_matrix, sign_gene = sign_gene, -cell_type_vector = c("cell_type1", "cell_type2", "cell_type3")) +makeSignMatrixDWLSfromMatrix( + matrix = sign_matrix, sign_gene = sign_gene, + cell_type_vector = c("cell_type1", "cell_type2", "cell_type3") +) } \seealso{ \code{\link{runDWLSDeconv}} diff --git a/man/makeSignMatrixPAGE.Rd b/man/makeSignMatrixPAGE.Rd index 07dcd6f5e..805dde245 100644 --- a/man/makeSignMatrixPAGE.Rd +++ b/man/makeSignMatrixPAGE.Rd @@ -25,15 +25,25 @@ The names of the cell types or processes that are provided in the list need to be given (sign_names). } \examples{ -sign_list <- list(cell_type1 = c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", -"Rims2", "Gfap", "Gjc3", "Chrna4", "Prkcd"), -cell_type2 = c("Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", "Tmbim1", -"Carhsp1", "Tmem88b", "Ugt8a"), -cell_type2 = c("Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", -"Cygb", "Ttc9b","Ipcef1")) +sign_list <- list( + cell_type1 = c( + "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", + "Rims2", "Gfap", "Gjc3", "Chrna4", "Prkcd" + ), + cell_type2 = c( + "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", "Tmbim1", + "Carhsp1", "Tmem88b", "Ugt8a" + ), + cell_type2 = c( + "Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", + "Cygb", "Ttc9b", "Ipcef1" + ) +) -makeSignMatrixPAGE(sign_names = c("cell_type1", "cell_type2", "cell_type3"), -sign_list = sign_list) +makeSignMatrixPAGE( + sign_names = c("cell_type1", "cell_type2", "cell_type3"), + sign_list = sign_list +) } \seealso{ \code{\link{PAGEEnrich}} diff --git a/man/makeSignMatrixRank.Rd b/man/makeSignMatrixRank.Rd index f96c404bf..f6008fdcf 100644 --- a/man/makeSignMatrixRank.Rd +++ b/man/makeSignMatrixRank.Rd @@ -30,17 +30,21 @@ and a corresponding single-cell cluster vector into a rank matrix that can be used with the Rank enrichment option. } \examples{ -sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -"Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -"Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -"Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +sign_gene <- c( + "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", + "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", + "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", + "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +) -sign_matrix <- matrix(rnorm(length(sign_gene)*3), nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 3), nrow = length(sign_gene)) rownames(sign_matrix) <- sign_gene colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") -makeSignMatrixRank(sc_matrix = sign_matrix, -sc_cluster_ids = c("cell_type1", "cell_type2", "cell_type3")) +makeSignMatrixRank( + sc_matrix = sign_matrix, + sc_cluster_ids = c("cell_type1", "cell_type2", "cell_type3") +) } \seealso{ \code{\link{rankEnrich}} diff --git a/man/pieCellTypesFromEnrichment.Rd b/man/pieCellTypesFromEnrichment.Rd index 0febd10be..86aac16bf 100644 --- a/man/pieCellTypesFromEnrichment.Rd +++ b/man/pieCellTypesFromEnrichment.Rd @@ -22,7 +22,7 @@ pieCellTypesFromEnrichment( \item{spat_unit}{spatial unit in which the enrichment information is stored} -\item{feat_type}{feature type for which the enrichment information was +\item{feat_type}{feature type for which the enrichment information was calculated} \item{enrichment_name}{name of the spatial enrichment diff --git a/man/plotCCcomDotplot.Rd b/man/plotCCcomDotplot.Rd index 3a2c48589..854d74f64 100644 --- a/man/plotCCcomDotplot.Rd +++ b/man/plotCCcomDotplot.Rd @@ -27,19 +27,19 @@ plotCCcomDotplot( \arguments{ \item{gobject}{giotto object} -\item{comScores}{communication scores from \code{\link{exprCellCellcom}} +\item{comScores}{communication scores from \code{\link{exprCellCellcom}} or \code{\link{spatCellCellcom}}} \item{selected_LR}{selected ligand-receptor combinations} -\item{selected_cell_LR}{selected cell-cell combinations for ligand-receptor +\item{selected_cell_LR}{selected cell-cell combinations for ligand-receptor combinations} \item{show_LR_names}{show ligand-receptor names} \item{show_cell_LR_names}{show cell-cell names} -\item{cluster_on}{values to use for clustering of cell-cell and +\item{cluster_on}{values to use for clustering of cell-cell and ligand-receptor pairs} \item{cor_method}{correlation method used for clustering} @@ -66,15 +66,17 @@ or 'sequential' (scaled based on data range)} ggplot } \description{ -Plots dotplot for ligand-receptor communication scores in +Plots dotplot for ligand-receptor communication scores in cell-cell interactions } \examples{ g <- GiottoData::loadGiottoMini("visium") -comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) +comScores <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), + feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +) plotCCcomDotplot(gobject = g, comScores = comScores, show_plot = TRUE) } diff --git a/man/plotCCcomHeatmap.Rd b/man/plotCCcomHeatmap.Rd index ebaf01ba7..cc370e675 100644 --- a/man/plotCCcomHeatmap.Rd +++ b/man/plotCCcomHeatmap.Rd @@ -27,12 +27,12 @@ plotCCcomHeatmap( \arguments{ \item{gobject}{giotto object} -\item{comScores}{communinication scores from \code{\link{exprCellCellcom}} +\item{comScores}{communinication scores from \code{\link{exprCellCellcom}} or \code{\link{spatCellCellcom}}} \item{selected_LR}{selected ligand-receptor combinations} -\item{selected_cell_LR}{selected cell-cell combinations for ligand-receptor +\item{selected_cell_LR}{selected cell-cell combinations for ligand-receptor combinations} \item{show_LR_names}{show ligand-receptor names} @@ -65,15 +65,17 @@ or 'sequential' (scaled based on data range)} ggplot } \description{ -Plots heatmap for ligand-receptor communication scores in +Plots heatmap for ligand-receptor communication scores in cell-cell interactions } \examples{ g <- GiottoData::loadGiottoMini("visium") -comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) +comScores <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), + feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +) plotCCcomHeatmap(gobject = g, comScores = comScores, show_plot = TRUE) } diff --git a/man/plotCPF.Rd b/man/plotCPF.Rd index 1b67ffcdc..8b5829673 100644 --- a/man/plotCPF.Rd +++ b/man/plotCPF.Rd @@ -39,7 +39,7 @@ plotCPF( \item{min_int_cells}{minimum number of interacting neighbor cell type} -\item{min_int_cells_expr}{minimum expression level for interacting neighbor +\item{min_int_cells_expr}{minimum expression level for interacting neighbor cell type} \item{min_fdr}{minimum adjusted p-value} @@ -75,9 +75,13 @@ Create visualization for cell proximity feature scores } \examples{ g <- GiottoData::loadGiottoMini("visium") -icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +icfObject <- findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) -plotCPF(gobject = g, icfObject = icfObject, show_plot = TRUE, -save_plot = FALSE, return_plot = FALSE) +plotCPF( + gobject = g, icfObject = icfObject, show_plot = TRUE, + save_plot = FALSE, return_plot = FALSE +) } diff --git a/man/plotCellProximityFeatSpot.Rd b/man/plotCellProximityFeatSpot.Rd index ac2c68d4a..d1b765942 100644 --- a/man/plotCellProximityFeatSpot.Rd +++ b/man/plotCellProximityFeatSpot.Rd @@ -72,7 +72,9 @@ Create visualization for cell proximity feature scores g <- GiottoData::loadGiottoMini("visium") icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") -plotCellProximityFeatSpot(gobject = g, icfObject = icfObject, -show_plot = TRUE, save_plot = FALSE, return_plot = FALSE, -min_pcc_diff = 0.01) +plotCellProximityFeatSpot( + gobject = g, icfObject = icfObject, + show_plot = TRUE, save_plot = FALSE, return_plot = FALSE, + min_pcc_diff = 0.01 +) } diff --git a/man/plotCellProximityFeats.Rd b/man/plotCellProximityFeats.Rd index f2e0bb574..bff1fd4fc 100644 --- a/man/plotCellProximityFeats.Rd +++ b/man/plotCellProximityFeats.Rd @@ -39,7 +39,7 @@ plotCellProximityFeats( \item{min_int_cells}{minimum number of interacting neighbor cell type} -\item{min_int_cells_expr}{minimum expression level for interacting neighbor +\item{min_int_cells_expr}{minimum expression level for interacting neighbor cell type} \item{min_fdr}{minimum adjusted p-value} @@ -77,6 +77,8 @@ Create visualization for cell proximity feature scores g <- GiottoData::loadGiottoMini("visium") icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") -plotCellProximityFeats(gobject = g, icfObject = icfObject, -show_plot = TRUE, save_plot = FALSE, return_plot = FALSE) +plotCellProximityFeats( + gobject = g, icfObject = icfObject, + show_plot = TRUE, save_plot = FALSE, return_plot = FALSE +) } diff --git a/man/plotCellTypesFromEnrichment.Rd b/man/plotCellTypesFromEnrichment.Rd index 25d8cf44d..31eaa8a8f 100644 --- a/man/plotCellTypesFromEnrichment.Rd +++ b/man/plotCellTypesFromEnrichment.Rd @@ -22,7 +22,7 @@ plotCellTypesFromEnrichment( \item{spat_unit}{spatial unit in which the enrichment information is stored} -\item{feat_type}{feature type for which the enrichment information was +\item{feat_type}{feature type for which the enrichment information was calculated} \item{enrichment_name}{name of the spatial enrichment @@ -52,6 +52,6 @@ plotCellTypesFromEnrichment This function generates a bar plot of cell types vs the frequency of that cell type in the data. These cell type results are based on the provided `enrichment_name`, and will be determined -by the maximum value of the z-score or p-value for a given cell or +by the maximum value of the z-score or p-value for a given cell or annotation. } diff --git a/man/plotCombineCCcom.Rd b/man/plotCombineCCcom.Rd index 0c2e16932..f5c014c70 100644 --- a/man/plotCombineCCcom.Rd +++ b/man/plotCombineCCcom.Rd @@ -30,7 +30,7 @@ plotCombineCCcom( \item{selected_LR}{selected ligand-receptor pair} -\item{selected_cell_LR}{selected cell-cell interaction pair for +\item{selected_cell_LR}{selected cell-cell interaction pair for ligand-receptor pair} \item{detail_plot}{show detailed info in both interacting cell types} @@ -61,23 +61,31 @@ ligand-receptor pair} ggplot } \description{ -Create visualization for combined (pairwise) cell proximity +Create visualization for combined (pairwise) cell proximity gene scores } \examples{ g <- GiottoData::loadGiottoMini("visium") -comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) +comScores <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), + feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +) -exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot") +exprCC <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +) +spatialCC <- spatCellCellcom( + gobject = g, cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot" +) combCCcom <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -plotCombineCCcom(gobject = g, combCCcom = combCCcom, -selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1")) +plotCombineCCcom( + gobject = g, combCCcom = combCCcom, + selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1") +) } diff --git a/man/plotCombineCellCellCommunication.Rd b/man/plotCombineCellCellCommunication.Rd index 85f61c833..6527a36d0 100644 --- a/man/plotCombineCellCellCommunication.Rd +++ b/man/plotCombineCellCellCommunication.Rd @@ -30,7 +30,7 @@ plotCombineCellCellCommunication( \item{selected_LR}{selected ligand-receptor pair} -\item{selected_cell_LR}{selected cell-cell interaction pair for +\item{selected_cell_LR}{selected cell-cell interaction pair for ligand-receptor pair} \item{detail_plot}{show detailed info in both interacting cell types} @@ -61,23 +61,31 @@ ligand-receptor pair} ggplot } \description{ -Create visualization for combined (pairwise) cell proximity +Create visualization for combined (pairwise) cell proximity gene scores } \examples{ g <- GiottoData::loadGiottoMini("visium") -comScores <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), -feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17")) +comScores <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = c("Gm19935", "2900040C04Rik", "Ccnd2"), + feat_set_2 = c("9630013A20Rik", "Gna12", "Btbd17") +) -exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot") +exprCC <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +) +spatialCC <- spatCellCellcom( + gobject = g, cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot" +) combCCcom <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) -plotCombineCellCellCommunication(gobject = g, combCCcom = combCCcom, -selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1")) +plotCombineCellCellCommunication( + gobject = g, combCCcom = combCCcom, + selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1") +) } diff --git a/man/plotCombineICF.Rd b/man/plotCombineICF.Rd index bc5c19f32..4c9a94f75 100644 --- a/man/plotCombineICF.Rd +++ b/man/plotCombineICF.Rd @@ -65,13 +65,16 @@ Create visualization for combined (pairwise) ICF scores \examples{ g <- GiottoData::loadGiottoMini("visium") -g_icf <- findInteractionChangedFeats(g, -cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +g_icf <- findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) combIcfObject <- combineInteractionChangedFeats(g_icf) -plotCombineICF(gobject = g, combIcfObject = combIcfObject, -selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), -selected_interactions = "1--8") +plotCombineICF( + gobject = g, combIcfObject = combIcfObject, + selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), + selected_interactions = "1--8" +) } diff --git a/man/plotCombineInteractionChangedFeats.Rd b/man/plotCombineInteractionChangedFeats.Rd index 651eafea0..901b07c1d 100644 --- a/man/plotCombineInteractionChangedFeats.Rd +++ b/man/plotCombineInteractionChangedFeats.Rd @@ -65,14 +65,17 @@ Create visualization for combined (pairwise) ICF scores \examples{ g <- GiottoData::loadGiottoMini("visium") -g_icf <- findInteractionChangedFeats(g, -cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +g_icf <- findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) combIcfObject <- combineInteractionChangedFeats(g_icf) -plotCombineInteractionChangedFeats(gobject = g, -combIcfObject = combIcfObject, -selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), -selected_interactions = "1--8") +plotCombineInteractionChangedFeats( + gobject = g, + combIcfObject = combIcfObject, + selected_feat_to_feat = c("Btbd17--Ccnd2", "Btbd17--Gna12"), + selected_interactions = "1--8" +) } diff --git a/man/plotICF.Rd b/man/plotICF.Rd index a81872f3c..c67c3628e 100644 --- a/man/plotICF.Rd +++ b/man/plotICF.Rd @@ -50,10 +50,14 @@ Create barplot to visualize interaction changed features } \examples{ g <- GiottoData::loadGiottoMini("visium") -icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +icfObject <- findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) -plotICF(gobject = g, icfObject = icfObject, -source_type = "1", source_markers = "Ccnd2", -ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17")) +plotICF( + gobject = g, icfObject = icfObject, + source_type = "1", source_markers = "Ccnd2", + ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") +) } diff --git a/man/plotICFSpot.Rd b/man/plotICFSpot.Rd index 408175e8d..7e5fca157 100644 --- a/man/plotICFSpot.Rd +++ b/man/plotICFSpot.Rd @@ -49,10 +49,14 @@ Create barplot to visualize interaction changed features } \examples{ g <- GiottoData::loadGiottoMini("visium") -icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +icfObject <- findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) -plotICFSpot(gobject = g, icfObject = icfObject, -source_type = "1", source_markers = "Ccnd2", -ICF_features = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17")) +plotICFSpot( + gobject = g, icfObject = icfObject, + source_type = "1", source_markers = "Ccnd2", + ICF_features = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") +) } diff --git a/man/plotInteractionChangedFeats.Rd b/man/plotInteractionChangedFeats.Rd index 8fe37185d..7395258f9 100644 --- a/man/plotInteractionChangedFeats.Rd +++ b/man/plotInteractionChangedFeats.Rd @@ -50,10 +50,14 @@ Create barplot to visualize interaction changed features } \examples{ g <- GiottoData::loadGiottoMini("visium") -icfObject <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +icfObject <- findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 +) -plotInteractionChangedFeats(gobject = g, icfObject = icfObject, -source_type = "1", source_markers = "Ccnd2", -ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17")) +plotInteractionChangedFeats( + gobject = g, icfObject = icfObject, + source_type = "1", source_markers = "Ccnd2", + ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") +) } diff --git a/man/plotPolygons.Rd b/man/plotPolygons.Rd index dcbe77bba..11454f434 100644 --- a/man/plotPolygons.Rd +++ b/man/plotPolygons.Rd @@ -37,12 +37,15 @@ Plot stored polygons \examples{ ## Plot interactive polygons g <- GiottoData::loadGiottoMini("visium") -my_polygon_coords <- data.frame(poly_ID = rep("polygon1", 3), -sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202)) +my_polygon_coords <- data.frame( + poly_ID = rep("polygon1", 3), + sdimx = c(5477, 5959, 4720), sdimy = c(-4125, -2808, -5202) +) ## Add polygon coordinates to Giotto object my_giotto_polygons <- createGiottoPolygonsFromDfr(my_polygon_coords, -name = "selections") + name = "selections" +) g <- addGiottoPolygons( gobject = g, gpolygons = list(my_giotto_polygons) diff --git a/man/plotRankSpatvsExpr.Rd b/man/plotRankSpatvsExpr.Rd index 0ebfbc8f8..fa91fe188 100644 --- a/man/plotRankSpatvsExpr.Rd +++ b/man/plotRankSpatvsExpr.Rd @@ -49,7 +49,7 @@ or 'sequential' (scaled based on data range)} \item{ylims}{y-limits, numerical vector of 2} -\item{selected_ranks}{numerical vector, will be used to print out the +\item{selected_ranks}{numerical vector, will be used to print out the percentage of top spatial ranks are recovered} \item{show_plot}{logical. show plot} @@ -66,17 +66,21 @@ percentage of top spatial ranks are recovered} ggplot } \description{ -Plots dotplot to compare ligand-receptor rankings from +Plots dotplot to compare ligand-receptor rankings from spatial and expression information } \examples{ g <- GiottoData::loadGiottoMini("visium") -exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -random_iter = 10) +exprCC <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +) +spatialCC <- spatCellCellcom( + gobject = g, cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", + random_iter = 10 +) combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) diff --git a/man/plotRecovery.Rd b/man/plotRecovery.Rd index 59c47fea0..4cba6b66b 100644 --- a/man/plotRecovery.Rd +++ b/man/plotRecovery.Rd @@ -42,17 +42,21 @@ plotRecovery( ggplot } \description{ -Plots recovery plot to compare ligand-receptor rankings from +Plots recovery plot to compare ligand-receptor rankings from spatial and expression information } \examples{ g <- GiottoData::loadGiottoMini("visium") -exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -random_iter = 10) +exprCC <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik" +) +spatialCC <- spatCellCellcom( + gobject = g, cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", + random_iter = 10 +) combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) diff --git a/man/processGiotto.Rd b/man/processGiotto.Rd index d1cbee687..4aebcb3e5 100644 --- a/man/processGiotto.Rd +++ b/man/processGiotto.Rd @@ -43,6 +43,8 @@ adjust_params must be set to NULL \examples{ g <- GiottoData::loadGiottoMini("visium") -processGiotto(gobject = g, -adjust_params = list(covariate_columns = "leiden_clus")) +processGiotto( + gobject = g, + adjust_params = list(covariate_columns = "leiden_clus") +) } diff --git a/man/rankSpatialCorGroups.Rd b/man/rankSpatialCorGroups.Rd index 67ca19916..1cb000fe6 100644 --- a/man/rankSpatialCorGroups.Rd +++ b/man/rankSpatialCorGroups.Rd @@ -49,6 +49,8 @@ g <- GiottoData::loadGiottoMini("visium") spatCorObject <- detectSpatialCorFeats(g, method = "network") clusters <- clusterSpatialCorFeats(spatCorObject = spatCorObject) -rankSpatialCorGroups(gobject = g, spatCorObject = clusters, -use_clus_name = "spat_clus") +rankSpatialCorGroups( + gobject = g, spatCorObject = clusters, + use_clus_name = "spat_clus" +) } diff --git a/man/readPolygonFilesVizgen.Rd b/man/readPolygonFilesVizgen.Rd index 228b6de32..24e856da0 100644 --- a/man/readPolygonFilesVizgen.Rd +++ b/man/readPolygonFilesVizgen.Rd @@ -58,7 +58,7 @@ object and add the smoothed polygons to the object } \section{Functions}{ \itemize{ -\item \code{.h5_read_vizgen()}: (internal) Optimized .hdf5 reading for +\item \code{.h5_read_vizgen()}: (internal) Optimized .hdf5 reading for vizgen merscope output. Returns a data.table of xyz coords and cell_id }} diff --git a/man/readPolygonFilesVizgenHDF5.Rd b/man/readPolygonFilesVizgenHDF5.Rd index 4e60c5839..58417691e 100644 --- a/man/readPolygonFilesVizgenHDF5.Rd +++ b/man/readPolygonFilesVizgenHDF5.Rd @@ -35,7 +35,7 @@ readPolygonFilesVizgenHDF5( \item{segm_to_use}{segmentation results to use (usually = 1. Depends on if alternative segmentations were generated)} -\item{custom_polygon_names}{a character vector to provide custom polygon +\item{custom_polygon_names}{a character vector to provide custom polygon names (optional)} \item{flip_x_axis}{flip x axis of polygon coordinates (multiply by -1)} @@ -54,11 +54,11 @@ names (optional)} \item{cores}{cores to use} -\item{create_gpoly_parallel}{(default = TRUE) Whether to run gpoly creation +\item{create_gpoly_parallel}{(default = TRUE) Whether to run gpoly creation in parallel} \item{create_gpoly_bin}{(Optional, default = FALSE) Parallelization option. -Accepts integer values as an binning size when generating giottoPolygon +Accepts integer values as an binning size when generating giottoPolygon objects} \item{verbose}{be verbose} @@ -72,11 +72,11 @@ list of giottoPolygon or data.table } \description{ Read polygon info for all cells or for only selected FOVs from -Vizgen HDF5 files. Data is returned as a list of giottoPolygons or +Vizgen HDF5 files. Data is returned as a list of giottoPolygons or data.tables of the requested z indices. } \details{ -Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission +Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission issues. } \seealso{ diff --git a/man/readPolygonFilesVizgenHDF5_old.Rd b/man/readPolygonFilesVizgenHDF5_old.Rd index f4faef7a1..1cdf2a8c8 100644 --- a/man/readPolygonFilesVizgenHDF5_old.Rd +++ b/man/readPolygonFilesVizgenHDF5_old.Rd @@ -26,7 +26,7 @@ readPolygonFilesVizgenHDF5_old( \item{polygon_feat_types}{a vector containing the polygon feature types} -\item{custom_polygon_names}{a character vector to provide custom polygon +\item{custom_polygon_names}{a character vector to provide custom polygon names (optional)} \item{flip_x_axis}{flip x axis of polygon coordinates (multiply by -1)} @@ -49,11 +49,11 @@ names (optional)} data.table } \description{ -Read and create polygons for all cells, or for only selected +Read and create polygons for all cells, or for only selected FOVs. } \details{ -Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission +Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission issues. } \seealso{ diff --git a/man/readPolygonVizgenParquet.Rd b/man/readPolygonVizgenParquet.Rd index a90faf9a1..3ee5f1a73 100644 --- a/man/readPolygonVizgenParquet.Rd +++ b/man/readPolygonVizgenParquet.Rd @@ -14,7 +14,7 @@ readPolygonVizgenParquet( \arguments{ \item{file}{parquet file to load} -\item{z_index}{either 'all' or a numeric vector of z_indices to get polygons +\item{z_index}{either 'all' or a numeric vector of z_indices to get polygons for} \item{calc_centroids}{calculate centroids for the polygons (default = TRUE)} @@ -25,6 +25,6 @@ for} giottoPolygons } \description{ -Read Vizgen exported cell boundary parquet files as giottoPolyons. The z +Read Vizgen exported cell boundary parquet files as giottoPolyons. The z level can be selected. } diff --git a/man/registerGiottoObjectList.Rd b/man/registerGiottoObjectList.Rd index 7cfc09b09..46eeeab63 100644 --- a/man/registerGiottoObjectList.Rd +++ b/man/registerGiottoObjectList.Rd @@ -26,41 +26,41 @@ registerGiottoObjectList( \item{spat_unit}{spatial unit} -\item{method}{Method used to align gobjects. Current options are either +\item{method}{Method used to align gobjects. Current options are either using FIJI register_virtual_stack_slices output or rvision} \item{image_unreg}{Gobject image slot to use. Defaults to 'image' (optional)} -\item{image_reg_name}{Arbitrary image slot name for registered images to +\item{image_reg_name}{Arbitrary image slot name for registered images to occupy. Defaults to replacement of 'image' slot (optional)} \item{image_list}{RVISION - under construction} \item{save_dir}{RVISION - under construction} -\item{spatloc_unreg}{Unregistered spatial locations to align. Defaults to +\item{spatloc_unreg}{Unregistered spatial locations to align. Defaults to 'raw' slot (optional)} -\item{spatloc_reg_name}{Arbitrary name for registered spatial locations. +\item{spatloc_reg_name}{Arbitrary name for registered spatial locations. Defaults to replacement of 'raw' slot (optional)} \item{fiji_xml_files}{Filepaths to FIJI registration XML outputs} -\item{fiji_registered_images}{Registered images output by FIJI +\item{fiji_registered_images}{Registered images output by FIJI register_virtual_stack_slices} \item{scale_factor}{Scaling to be applied to spatial coordinates} -\item{allow_rvision_autoscale}{Whether or not to allow rvision to +\item{allow_rvision_autoscale}{Whether or not to allow rvision to automatically scale the images when performing image registration} \item{verbose}{Be verbose} } \value{ -List of registered giotto objects where the registered images and +List of registered giotto objects where the registered images and spatial locations } \description{ -Wrapper function for registerGiottoObjectListFiji and +Wrapper function for registerGiottoObjectListFiji and registerGiottoObjectListRvision } diff --git a/man/registerGiottoObjectListFiji.Rd b/man/registerGiottoObjectListFiji.Rd index 456e3febd..3b7cf7ee6 100644 --- a/man/registerGiottoObjectListFiji.Rd +++ b/man/registerGiottoObjectListFiji.Rd @@ -24,39 +24,39 @@ registerGiottoObjectListFiji( \item{spat_unit}{spatial unit} -\item{image_unreg}{name of original unregistered images. Defaults to +\item{image_unreg}{name of original unregistered images. Defaults to 'image' (optional)} -\item{image_reg_name}{arbitrary name for registered images to occupy. +\item{image_reg_name}{arbitrary name for registered images to occupy. Defaults to replacement of 'image' (optional)} -\item{image_replace_name}{arbitrary name for any images replaced due to +\item{image_replace_name}{arbitrary name for any images replaced due to image_reg_name argument (optional)} -\item{registered_images}{registered images output by FIJI +\item{registered_images}{registered images output by FIJI register_virtual_stack_slices} \item{spatloc_unreg}{spatial locations to use. Defaults to 'raw' (optional)} -\item{spatloc_reg_name}{name for registered spatial locations. Defaults to +\item{spatloc_reg_name}{name for registered spatial locations. Defaults to replacement of 'raw' (optional)} -\item{spatloc_replace_name}{arbitrary name for any spatial locations +\item{spatloc_replace_name}{arbitrary name for any spatial locations replaced due to spatloc_reg_name argument (optional)} -\item{xml_files}{atomic vector of filepaths to xml outputs from FIJI +\item{xml_files}{atomic vector of filepaths to xml outputs from FIJI register_virtual_stack_slices} -\item{scale_factor}{vector of scaling factors of images used in registration +\item{scale_factor}{vector of scaling factors of images used in registration vs spatlocs} \item{verbose}{be verbose} } \value{ -list of registered giotto objects where the registered images and +list of registered giotto objects where the registered images and spatial locations } \description{ -Function to spatially align gobject data based on FIJI image +Function to spatially align gobject data based on FIJI image registration. } diff --git a/man/registerGiottoObjectListRvision.Rd b/man/registerGiottoObjectListRvision.Rd index ba85763e2..3b30843d3 100644 --- a/man/registerGiottoObjectListRvision.Rd +++ b/man/registerGiottoObjectListRvision.Rd @@ -22,16 +22,16 @@ registerGiottoObjectListRvision( \item{spatloc_unreg}{spatial locations to use} -\item{spatloc_reg_name}{name for registered spatial locations to. Defaults +\item{spatloc_reg_name}{name for registered spatial locations to. Defaults to replacement of spat_unreg (optional)} \item{verbose}{be verbose} } \value{ -list of registered giotto objects where the registered images and +list of registered giotto objects where the registered images and spatial locations } \description{ -Function to spatially align gobject data based on Rvision image +Function to spatially align gobject data based on Rvision image registration. } diff --git a/man/registerImagesFIJI.Rd b/man/registerImagesFIJI.Rd index d15bd8464..49326fc32 100644 --- a/man/registerImagesFIJI.Rd +++ b/man/registerImagesFIJI.Rd @@ -37,10 +37,10 @@ registerImagesFIJI( \item{output_img_dir}{Folder to save registered images to} -\item{transforms_save_dir}{(jython implementation only) Folder to save +\item{transforms_save_dir}{(jython implementation only) Folder to save transforms to} -\item{ref_img_name}{(jython implementation only) File name of reference +\item{ref_img_name}{(jython implementation only) File name of reference image for the registration} \item{init_gauss_blur}{Point detector option: initial image blurring} @@ -86,14 +86,14 @@ image for the registration} executing it.} } \value{ -list of registered giotto objects where the registered images and +list of registered giotto objects where the registered images and spatial locations } \description{ -Wrapper function for Register Virtual Stack Slices plugin in +Wrapper function for Register Virtual Stack Slices plugin in FIJI } \details{ -This function was adapted from runFijiMacro function in +This function was adapted from runFijiMacro function in jimpipeline by jefferislab } diff --git a/man/runDWLSDeconv.Rd b/man/runDWLSDeconv.Rd index 3530492e1..fa8670b4f 100644 --- a/man/runDWLSDeconv.Rd +++ b/man/runDWLSDeconv.Rd @@ -51,13 +51,15 @@ expression data \examples{ g <- GiottoData::loadGiottoMini("visium") x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) } diff --git a/man/runHyperGeometricEnrich.Rd b/man/runHyperGeometricEnrich.Rd index f04ee5273..ee0a43465 100644 --- a/man/runHyperGeometricEnrich.Rd +++ b/man/runHyperGeometricEnrich.Rd @@ -59,13 +59,15 @@ hypergeometric test, -log10(p-value). \examples{ g <- GiottoData::loadGiottoMini("visium") x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) runHyperGeometricEnrich(gobject = g, sign_matrix = sign_matrix) } diff --git a/man/runPAGEEnrich.Rd b/man/runPAGEEnrich.Rd index d81c5134c..8d1ec1818 100644 --- a/man/runPAGEEnrich.Rd +++ b/man/runPAGEEnrich.Rd @@ -80,13 +80,16 @@ gene set. } \examples{ g <- GiottoData::loadGiottoMini("visium") -sign_gene <- c("Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -"Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -"Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -"Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b","Ipcef1") +sign_gene <- c( + "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", + "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", + "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", + "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" +) -sign_matrix <- matrix(rnorm(length(sign_gene)*3, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 3, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") diff --git a/man/runPatternSimulation.Rd b/man/runPatternSimulation.Rd index 7a63a7ddd..c06ba28da 100644 --- a/man/runPatternSimulation.Rd +++ b/man/runPatternSimulation.Rd @@ -86,7 +86,11 @@ and runs the different spatial gene detection tests \examples{ g <- GiottoData::loadGiottoMini("visium") -runPatternSimulation(gobject = g, pattern_cell_ids = c("AAAGGGATGTAGCAAG-1", -"TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1"), -spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2")) +runPatternSimulation( + gobject = g, pattern_cell_ids = c( + "AAAGGGATGTAGCAAG-1", + "TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" + ), + spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2") +) } diff --git a/man/runRankEnrich.Rd b/man/runRankEnrich.Rd index efe7c02a8..4c1059e33 100644 --- a/man/runRankEnrich.Rd +++ b/man/runRankEnrich.Rd @@ -75,17 +75,21 @@ and the final enrichment score is then calculated as the sum of top 100 RBPs. } \examples{ g <- GiottoData::loadGiottoMini("visium") -x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) +x <- findMarkers_one_vs_all(g, + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) -runRankEnrich(gobject = g, sign_matrix = sign_matrix, -expression_values = "normalized") +runRankEnrich( + gobject = g, sign_matrix = sign_matrix, + expression_values = "normalized" +) } \seealso{ \code{\link{makeSignMatrixRank}} diff --git a/man/runSpatialDeconv.Rd b/man/runSpatialDeconv.Rd index 9b62f08d4..65cb4e709 100644 --- a/man/runSpatialDeconv.Rd +++ b/man/runSpatialDeconv.Rd @@ -53,14 +53,16 @@ expression data } \examples{ g <- GiottoData::loadGiottoMini("visium") -x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) +x <- findMarkers_one_vs_all(g, + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) runSpatialDeconv(gobject = g, sign_matrix = sign_matrix) } diff --git a/man/runSpatialEnrich.Rd b/man/runSpatialEnrich.Rd index 9946d4c02..4f1924cab 100644 --- a/man/runSpatialEnrich.Rd +++ b/man/runSpatialEnrich.Rd @@ -85,14 +85,16 @@ For details see the individual functions: } \examples{ g <- GiottoData::loadGiottoMini("visium") -x <- findMarkers_one_vs_all(g, -cluster_column = "leiden_clus", min_feats = 20) +x <- findMarkers_one_vs_all(g, + cluster_column = "leiden_clus", min_feats = 20 +) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene)*8, mean = 10), -nrow = length(sign_gene)) +sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), + nrow = length(sign_gene) +) rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- paste0("celltype_",unique(x$cluster)) +colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) runSpatialEnrich(gobject = g, sign_matrix = sign_matrix) } diff --git a/man/runWNN.Rd b/man/runWNN.Rd index 4a542f892..f58829f3f 100644 --- a/man/runWNN.Rd +++ b/man/runWNN.Rd @@ -45,8 +45,8 @@ runWNN( \item{verbose}{be verbose} } \value{ -A Giotto object with integrated UMAP (integrated.umap) within the -dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the +A Giotto object with integrated UMAP (integrated.umap) within the +dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the cellular metadata. } \description{ diff --git a/man/showCellProportionSwitchedPie.Rd b/man/showCellProportionSwitchedPie.Rd index 723b80486..e79ddc0b4 100644 --- a/man/showCellProportionSwitchedPie.Rd +++ b/man/showCellProportionSwitchedPie.Rd @@ -24,8 +24,8 @@ ggplot showCellProportionSwitchedPie } \details{ -Creates a pie chart showing how many cells switched clusters after +Creates a pie chart showing how many cells switched clusters after annotation resizing. -The function showPolygonSizeInfluence() must have been run on the Giotto +The function showPolygonSizeInfluence() must have been run on the Giotto Object for this function to run. } diff --git a/man/showCellProportionSwitchedSanKey.Rd b/man/showCellProportionSwitchedSanKey.Rd index 69e5fa050..c350901d1 100644 --- a/man/showCellProportionSwitchedSanKey.Rd +++ b/man/showCellProportionSwitchedSanKey.Rd @@ -12,12 +12,12 @@ showCellProportionSwitchedSanKey( ) } \arguments{ -\item{gobject}{giotto object which contains metadata for both spat_unit and +\item{gobject}{giotto object which contains metadata for both spat_unit and alt_spat_unit} \item{spat_unit}{spatial unit} -\item{alt_spat_unit}{alternative spatial unit which stores data after +\item{alt_spat_unit}{alternative spatial unit which stores data after resizing annotations} \item{feat_type}{feature type} diff --git a/man/showPolygonSizeInfluence.Rd b/man/showPolygonSizeInfluence.Rd index c3562b6a8..44d7469f2 100644 --- a/man/showPolygonSizeInfluence.Rd +++ b/man/showPolygonSizeInfluence.Rd @@ -19,12 +19,12 @@ showPolygonSizeInfluence( \item{spat_unit}{spatial unit} -\item{alt_spat_unit}{alternaitve spatial unit which represents resized +\item{alt_spat_unit}{alternaitve spatial unit which represents resized polygon data} \item{feat_type}{feature type} -\item{clus_name}{name of cluster column in cell_metadata for given spat_unit +\item{clus_name}{name of cluster column in cell_metadata for given spat_unit and alt_spat_unit, i.e. "kmeans"} \item{return_plot}{logical. whether to return the plot object} @@ -43,12 +43,12 @@ Compares cell metadata from spat_unit-feat_type pairs as provided. New columns, resize_switch and cluster_interaction, will be created within cell_metadata for spat_unit-feat_type. -These new columns will describe if a given cell switched cluster number when +These new columns will describe if a given cell switched cluster number when resized. If the same amount of clusters exist for spat_unit-feat_type and alt_spat_unit-feat_type, then clusters are determined to be corresponding based on % overlap in cell_IDs in each cluster. -Otherwise, multiple clusters from the spatial unit feature type pair are +Otherwise, multiple clusters from the spatial unit feature type pair are condensed to align with the smaller number of clusters and ensure overlap. } diff --git a/man/simulateOneGenePatternGiottoObject.Rd b/man/simulateOneGenePatternGiottoObject.Rd index 8e8a64ef9..b73185499 100644 --- a/man/simulateOneGenePatternGiottoObject.Rd +++ b/man/simulateOneGenePatternGiottoObject.Rd @@ -46,8 +46,12 @@ Create a simulated spatial pattern for one selected gnee \examples{ g <- GiottoData::loadGiottoMini("visium") -simulateOneGenePatternGiottoObject(gobject = g, -pattern_cell_ids = c("AAAGGGATGTAGCAAG-1", "TCAAACAACCGCGTCG-1", -"ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1"), -gene_name = "Gna12") +simulateOneGenePatternGiottoObject( + gobject = g, + pattern_cell_ids = c( + "AAAGGGATGTAGCAAG-1", "TCAAACAACCGCGTCG-1", + "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" + ), + gene_name = "Gna12" +) } diff --git a/man/spatCellCellcomSpots.Rd b/man/spatCellCellcomSpots.Rd index 212a6179b..ac6e46389 100644 --- a/man/spatCellCellcomSpots.Rd +++ b/man/spatCellCellcomSpots.Rd @@ -85,30 +85,30 @@ expected based on a reshuffled null distribution of feature expression values in cells that are spatially in proximity to each other. \itemize{ * LR_comb:Pair of ligand and receptor - * lig_cell_type: cell type to assess expression level of ligand - * lig_expr: average expression residual(observed - DWLS_predicted) of - ligand in lig_cell_type - * ligand: ligand name - * rec_cell_type: cell type to assess expression level of receptor - * rec_expr: average expression residual(observed - DWLS_predicted) of + * lig_cell_type: cell type to assess expression level of ligand + * lig_expr: average expression residual(observed - DWLS_predicted) of + ligand in lig_cell_type + * ligand: ligand name + * rec_cell_type: cell type to assess expression level of receptor + * rec_expr: average expression residual(observed - DWLS_predicted) of receptor in rec_cell_type - * receptor: receptor name + * receptor: receptor name * LR_expr: combined average ligand and receptor expression residual - * lig_nr: total number of cells from lig_cell_type that spatially interact - with cells from rec_cell_type - * rec_nr: total number of cells from rec_cell_type that spatially interact - with cells from lig_cell_type - * rand_expr: average combined ligand and receptor expression residual from - random spatial permutations - * av_diff: average difference between LR_expr and rand_expr over all random - spatial permutations - * sd_diff: (optional) standard deviation of the difference between LR_expr - and rand_expr over all random spatial permutations - * z_score: (optional) z-score - * log2fc: LR_expr - rand_expr - * pvalue: p-value - * LR_cell_comb: cell type pair combination - * p.adj: adjusted p-value - * PI: significanc score: log2fc \* -log10(p.adj) + * lig_nr: total number of cells from lig_cell_type that spatially interact + with cells from rec_cell_type + * rec_nr: total number of cells from rec_cell_type that spatially interact + with cells from lig_cell_type + * rand_expr: average combined ligand and receptor expression residual from + random spatial permutations + * av_diff: average difference between LR_expr and rand_expr over all random + spatial permutations + * sd_diff: (optional) standard deviation of the difference between LR_expr + and rand_expr over all random spatial permutations + * z_score: (optional) z-score + * log2fc: LR_expr - rand_expr + * pvalue: p-value + * LR_cell_comb: cell type pair combination + * p.adj: adjusted p-value + * PI: significanc score: log2fc \* -log10(p.adj) } } diff --git a/man/spdepAutoCorr.Rd b/man/spdepAutoCorr.Rd index 9364ef8af..31b226470 100644 --- a/man/spdepAutoCorr.Rd +++ b/man/spdepAutoCorr.Rd @@ -19,7 +19,7 @@ spdepAutoCorr( \item{gobject}{Input a Giotto object.} \item{method}{Specify a method name to compute auto correlation. -Available methods include +Available methods include \code{"geary.test", "lee.test", "lm.morantest","moran.test"}.} \item{spat_unit}{spatial unit} @@ -28,7 +28,7 @@ Available methods include \item{expression_values}{expression values to use, default = normalized} -\item{spatial_network_to_use}{spatial network to use, +\item{spatial_network_to_use}{spatial network to use, default = spatial_network} \item{return_gobject}{if FALSE, results are returned as data.table. diff --git a/man/specificCellCellcommunicationScores.Rd b/man/specificCellCellcommunicationScores.Rd index a118f6cbc..73b90639d 100644 --- a/man/specificCellCellcommunicationScores.Rd +++ b/man/specificCellCellcommunicationScores.Rd @@ -88,29 +88,29 @@ distribution of feature expression values in cells that are spatially in proximity to each other. \itemize{ * LR_comb: Pair of ligand and receptor - * lig_cell_type: cell type to assess expression level of ligand - * lig_expr: average expression of ligand in lig_cell_type - * ligand: ligand name - * rec_cell_type: cell type to assess expression level of receptor + * lig_cell_type: cell type to assess expression level of ligand + * lig_expr: average expression of ligand in lig_cell_type + * ligand: ligand name + * rec_cell_type: cell type to assess expression level of receptor * rec_expr: average expression of receptor in rec_cell_type - * receptor: receptor name - * LR_expr: combined average ligand and receptor expression - * lig_nr: total number of cells from lig_cell_type that spatially interact - with cells from rec_cell_type - * rec_nr: total number of cells from rec_cell_type that spatially interact - with cells from lig_cell_type - * rand_expr: average combined ligand and receptor expression from random - spatial permutations - * av_diff: average difference between LR_expr and rand_expr over all - random spatial permutations - * sd_diff: (optional) standard deviation of the difference between LR_expr - and rand_expr over all random spatial permutations - * z_score: (optional) z-score - * log2fc: log2 fold-change (LR_expr/rand_expr) - * pvalue: p-value - * LR_cell_comb: cell type pair combination - * p.adj: adjusted p-value - * PI: significanec score: log2fc \* -log10(p.adj) + * receptor: receptor name + * LR_expr: combined average ligand and receptor expression + * lig_nr: total number of cells from lig_cell_type that spatially interact + with cells from rec_cell_type + * rec_nr: total number of cells from rec_cell_type that spatially interact + with cells from lig_cell_type + * rand_expr: average combined ligand and receptor expression from random + spatial permutations + * av_diff: average difference between LR_expr and rand_expr over all + random spatial permutations + * sd_diff: (optional) standard deviation of the difference between LR_expr + and rand_expr over all random spatial permutations + * z_score: (optional) z-score + * log2fc: log2 fold-change (LR_expr/rand_expr) + * pvalue: p-value + * LR_cell_comb: cell type pair combination + * p.adj: adjusted p-value + * PI: significanec score: log2fc \* -log10(p.adj) } } \examples{ diff --git a/man/subClusterCells.Rd b/man/subClusterCells.Rd index 7d81a1613..df848ad1f 100644 --- a/man/subClusterCells.Rd +++ b/man/subClusterCells.Rd @@ -201,8 +201,10 @@ g <- GiottoData::loadGiottoMini("visium") subClusterCells(g, cluster_column = "leiden_clus") # use louvain instead -subClusterCells(g, cluster_column = "leiden_clus", - cluster_method = "louvain_community") +subClusterCells(g, + cluster_column = "leiden_clus", + cluster_method = "louvain_community" +) # directly call the more specific functions doLeidenSubCluster(g, cluster_column = "leiden_clus") diff --git a/man/visium_micron_scalefactor.Rd b/man/visium_micron_scalefactor.Rd index 9c9f93949..96eb9e3ea 100644 --- a/man/visium_micron_scalefactor.Rd +++ b/man/visium_micron_scalefactor.Rd @@ -8,7 +8,7 @@ .visium_micron_scale(json_scalefactors) } \arguments{ -\item{json_scalefactors}{list of scalefactors from +\item{json_scalefactors}{list of scalefactors from .visium_read_scalefactors()} } \value{ diff --git a/man/write_giotto_viewer_annotation.Rd b/man/write_giotto_viewer_annotation.Rd index ef5c40f51..9e2c2334b 100644 --- a/man/write_giotto_viewer_annotation.Rd +++ b/man/write_giotto_viewer_annotation.Rd @@ -21,7 +21,7 @@ write_giotto_viewer_annotation( write a .txt and .annot file for the selection annotation } \description{ -write out factor-like annotation data from a giotto object for +write out factor-like annotation data from a giotto object for the Viewer } \keyword{internal} diff --git a/vignettes/intro_to_giotto.Rmd b/vignettes/intro_to_giotto.Rmd index a446de3aa..3f5ce35ec 100644 --- a/vignettes/intro_to_giotto.Rmd +++ b/vignettes/intro_to_giotto.Rmd @@ -9,8 +9,8 @@ vignette: > ```{r, include = FALSE} knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" + collapse = TRUE, + comment = "#>" ) ``` @@ -63,8 +63,9 @@ g <- runPCA(g) Plot PCA ```{r} -plotPCA(g, - cell_color = "leiden_clus") +plotPCA(g, + cell_color = "leiden_clus" +) ``` Run UMAP @@ -77,7 +78,8 @@ Plot UMAP ```{r} plotUMAP(g, - cell_color = "leiden_clus") + cell_color = "leiden_clus" +) ``` Run tSNE @@ -90,7 +92,8 @@ Plot tSNE ```{r} plotTSNE(g, - cell_color = "leiden_clus") + cell_color = "leiden_clus" +) ``` Do clustering @@ -103,7 +106,8 @@ Spatial plot with clusters ```{r} spatPlot2D(g, - cell_color = "leiden_clus") + cell_color = "leiden_clus" +) ``` Session info From 6a9d88d00478febdce98edc46e9c04217c075a72 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Fri, 24 May 2024 17:43:20 -0400 Subject: [PATCH 064/150] fix example --- R/spatial_interaction_spot.R | 11 ++++++++--- man/findICFSpot.Rd | 9 +++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/R/spatial_interaction_spot.R b/R/spatial_interaction_spot.R index 5ea6327d3..7f0454e48 100644 --- a/R/spatial_interaction_spot.R +++ b/R/spatial_interaction_spot.R @@ -1203,11 +1203,16 @@ NULL #' colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) #' #' g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) -#' g_expression <- getExpression(g, output = "matrix") -#' +#' ave_celltype_exp <- calculateMetaTable(g, metadata_cols = "leiden_clus") +#' ave_celltype_exp <- reshape2::dcast(ave_celltype_exp, variable~leiden_clus) +#' rownames(ave_celltype_exp) <- ave_celltype_exp$variable +#' ave_celltype_exp <- ave_celltype_exp[,-1] +#' colnames(ave_celltype_exp) <- colnames(sign_matrix) +#' #' findICFSpot(g, #' spat_unit = "cell", feat_type = "rna", -#' ave_celltype_exp = g_expression, spatial_network_name = "spatial_network" +#' ave_celltype_exp = ave_celltype_exp, +#' spatial_network_name = "spatial_network" #' ) #' @export findICFSpot <- function( diff --git a/man/findICFSpot.Rd b/man/findICFSpot.Rd index 04389711c..0497cd1fd 100644 --- a/man/findICFSpot.Rd +++ b/man/findICFSpot.Rd @@ -116,10 +116,15 @@ rownames(sign_matrix) <- sign_gene colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) -g_expression <- getExpression(g, output = "matrix") +ave_celltype_exp <- calculateMetaTable(g, metadata_cols = "leiden_clus") +ave_celltype_exp <- reshape2::dcast(ave_celltype_exp, variable~leiden_clus) +rownames(ave_celltype_exp) <- ave_celltype_exp$variable +ave_celltype_exp <- ave_celltype_exp[,-1] +colnames(ave_celltype_exp) <- colnames(sign_matrix) findICFSpot(g, spat_unit = "cell", feat_type = "rna", - ave_celltype_exp = g_expression, spatial_network_name = "spatial_network" + ave_celltype_exp = ave_celltype_exp, + spatial_network_name = "spatial_network" ) } From 497f810bb202de963adece78e741a30b71ff1e07 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Sun, 26 May 2024 08:25:50 -0400 Subject: [PATCH 065/150] enh: `show()` for `icfObject` --- R/spatial_interaction.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index b6075e35f..ec7ba6ee5 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -1268,6 +1268,26 @@ findInteractionChangedFeats <- function(gobject, } +#' @name print.icfObject +#' @title icfObject print method +#' @param x object to print +#' @param \dots additional params to pass (none implemented) +#' @keywords internal +#' @export +print.icfObject <- function(x, ...) { + cat("An object of class", class(x), "\n") + info <- list( + dimensions = sprintf("%d, %d (interactions, attributes)", + nrow(x$ICFscores), ncol(x$ICFscores)) + ) + print_list(info, pre = " -") + cat("\n") + print_list(x$Giotto_info, pre = " -") + cat("\n") + print_list(x$test_info, pre = " -") +} + + #' @title findInteractionChangedGenes #' @name findInteractionChangedGenes From 015bcb4372842a3cec28f3f0cdb4f24ac323ed88 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Sun, 26 May 2024 15:13:12 -0400 Subject: [PATCH 066/150] `findICF()` updates & code reorganization --- NAMESPACE | 2 + R/spatial_interaction.R | 859 +++++++++++--------------- man/combineCPG.Rd | 12 +- man/combineCellProximityGenes.Rd | 2 +- man/combineICF.Rd | 61 -- man/combineICG.Rd | 12 +- man/combineInteractionChangedFeats.Rd | 29 +- man/combineInteractionChangedGenes.Rd | 2 +- man/filterCPG.Rd | 4 +- man/filterCellProximityGenes.Rd | 2 +- man/filterICF.Rd | 57 -- man/filterICG.Rd | 4 +- man/filterInteractionChangedFeats.Rd | 37 +- man/filterInteractionChangedGenes.Rd | 2 +- man/findCPG.Rd | 2 +- man/findCellProximityGenes.Rd | 2 +- man/findICF.Rd | 105 ---- man/findICFSpot.Rd | 7 +- man/findICG.Rd | 2 +- man/findInteractionChangedFeats.Rd | 80 ++- man/findInteractionChangedGenes.Rd | 2 +- man/print.combIcfObject.Rd | 17 + man/print.icfObject.Rd | 17 + 23 files changed, 548 insertions(+), 771 deletions(-) delete mode 100644 man/combineICF.Rd delete mode 100644 man/filterICF.Rd delete mode 100644 man/findICF.Rd create mode 100644 man/print.combIcfObject.Rd create mode 100644 man/print.icfObject.Rd diff --git a/NAMESPACE b/NAMESPACE index bbe6fefac..818eb8c4d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method(print,combIcfObject) +S3method(print,icfObject) export("%>%") export("activeFeatType<-") export("activeSpatUnit<-") diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index ec7ba6ee5..7bccd8a0e 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -1052,6 +1052,7 @@ NULL #' @title findInteractionChangedFeats #' @name findInteractionChangedFeats +#' @aliases findICF #' @description Identifies cell-to-cell Interaction Changed Features (ICF), #' i.e. features that are differentially expressed due to proximity to other #' cell types. @@ -1073,33 +1074,48 @@ NULL #' @param do_parallel run calculations in parallel with mclapply #' @param set_seed set a seed for reproducibility #' @param seed_number seed number -#' @returns icfObject that contains the Interaction Changed differential +#' @returns `icfObject` that contains the Interaction Changed differential #' feature scores #' @details Function to calculate if features are differentially expressed in #' cell types when they interact (approximated by physical proximity) with -#' other cell types. The results data.table in the icfObject contains +#' other cell types. The results data.table in the `icfObject` contains #' - at least - the following columns: -#' \itemize{ -#' \item{features:}{ All or selected list of tested features} -#' \item{sel:}{ average feature expression in the interacting cells from the target cell type } -#' \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } -#' \item{log2fc:}{ log2 fold-change between sel and other} -#' \item{diff:}{ spatial expression difference between sel and other} -#' \item{p.value:}{ associated p-value} -#' \item{p.adj:}{ adjusted p-value} -#' \item{cell_type:}{ target cell type} -#' \item{int_cell_type:}{ interacting cell type} -#' \item{nr_select:}{ number of cells for selected target cell type} -#' \item{int_nr_select:}{ number of cells for interacting cell type} -#' \item{nr_other:}{ number of other cells of selected target cell type} -#' \item{int_nr_other:}{ number of other cells for interacting cell type} -#' \item{unif_int:}{ cell-cell interaction} -#' } +#' * **features:** All or selected list of tested features +#' * **sel:** average feature expression in the interacting cells from the +#' target cell type +#' * **other:** average feature expression in the NOT-interacting cells from +#' the target cell type +#' * **log2fc:** log2 fold-change between sel and other +#' * **diff:** spatial expression difference between sel and other +#' * **p.value:** associated p-value +#' * **p.adj:** adjusted p-value +#' * **cell_type:** target cell type +#' * **int_cell_type:** interacting cell type +#' * **nr_select:** number of cells for selected target cell type +#' * **int_nr_select:** number of cells for interacting cell type +#' * **nr_other:** number of other cells of selected target cell type +#' * **int_nr_other:** number of other cells for interacting cell type +#' * **unif_int:** cell-cell interaction +#' +#' @seealso [filterInteractionChangedFeats()] +#' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' icf1 <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), +#' nr_permutations = 10 +#' ) +#' force(icf1) +#' force(icf1$ICFscores) +#' +#' # this is just an alias with a shorter name +#' icf2 <- findICF(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), +#' nr_permutations = 10 +#' ) #' @export findInteractionChangedFeats <- function(gobject, feat_type = NULL, @@ -1165,8 +1181,9 @@ findInteractionChangedFeats <- function(gobject, mean_method <- match.arg(mean_method, choices = c("arithmic", "geometric")) ## metadata - cell_metadata <- pDataDT(gobject, feat_type = feat_type) - + cell_metadata <- pDataDT( + gobject, spat_unit = spat_unit, feat_type = feat_type + ) ## annotated spatial network @@ -1179,94 +1196,78 @@ findInteractionChangedFeats <- function(gobject, all_interactions <- unique(annot_spatnetwork$unified_int) - if (do_parallel == TRUE) { - fin_result <- lapply_flex( - X = all_interactions, future.seed = TRUE, FUN = function(x) { + ## prepare function + fcp_feats_per_i <- function(x) { + .findCellProximityFeats_per_interaction( + expr_values = expr_values, + cell_metadata = cell_metadata, + annot_spatnetwork = annot_spatnetwork, + minimum_unique_cells = minimum_unique_cells, + minimum_unique_int_cells = minimum_unique_int_cells, + sel_int = x, + cluster_column = cluster_column, + exclude_selected_cells_from_test = exclude_selected_cells_from_test, + diff_test = diff_test, + mean_method = mean_method, + offset = offset, + adjust_method = adjust_method, + nr_permutations = nr_permutations, + set_seed = set_seed, + seed_number = seed_number + ) + } - tempres <- .findCellProximityFeats_per_interaction( - expr_values = expr_values, - cell_metadata = cell_metadata, - annot_spatnetwork = annot_spatnetwork, - minimum_unique_cells = minimum_unique_cells, - minimum_unique_int_cells = minimum_unique_int_cells, - sel_int = x, - cluster_column = cluster_column, - exclude_selected_cells_from_test = exclude_selected_cells_from_test, - diff_test = diff_test, - mean_method = mean_method, - offset = offset, - adjust_method = adjust_method, - nr_permutations = nr_permutations, - set_seed = set_seed, - seed_number = seed_number - ) - }) - } else { + if (isTRUE(do_parallel)) { # parallel + fin_result <- lapply_flex( + X = all_interactions, future.seed = TRUE, FUN = fcp_feats_per_i + ) + } else { # sequential fin_result <- list() for (i in seq_along(all_interactions)) { x <- all_interactions[i] - - - tempres <- .findCellProximityFeats_per_interaction( - expr_values = expr_values, - cell_metadata = cell_metadata, - annot_spatnetwork = annot_spatnetwork, - minimum_unique_cells = minimum_unique_cells, - minimum_unique_int_cells = minimum_unique_int_cells, - sel_int = x, - cluster_column = cluster_column, - exclude_selected_cells_from_test = exclude_selected_cells_from_test, - diff_test = diff_test, - mean_method = mean_method, - offset = offset, - adjust_method = adjust_method, - nr_permutations = nr_permutations, - set_seed = set_seed, - seed_number = seed_number - ) - + tempres <- fcp_feats_per_i(x) fin_result[[i]] <- tempres } } final_result <- do.call("rbind", fin_result) - - - # data.table variables + # NSE variables spec_int <- cell_type <- int_cell_type <- type_int <- NULL final_result[, spec_int := paste0(cell_type, "--", int_cell_type)] final_result[, type_int := ifelse( cell_type == int_cell_type, "homo", "hetero")] - - # return(final_result) - permutation_test <- ifelse( diff_test == "permutation", nr_permutations, "no permutations") - icfObject <- list( - ICFscores = final_result, - Giotto_info = list( - "values" = values, - "cluster" = cluster_column, - "spatial network" = spatial_network_name + icfObject <- structure( + .Data = list( + ICFscores = final_result, + Giotto_info = list( + "values" = values, + "cluster" = cluster_column, + "spatial network" = spatial_network_name + ), + test_info = list( + "test" = diff_test, + "p.adj" = adjust_method, + "min cells" = minimum_unique_cells, + "min interacting cells" = minimum_unique_int_cells, + "exclude selected cells" = exclude_selected_cells_from_test, + "perm" = permutation_test + ) ), - test_info = list( - "test" = diff_test, - "p.adj" = adjust_method, - "min cells" = minimum_unique_cells, - "min interacting cells" = minimum_unique_int_cells, - "exclude selected cells" = exclude_selected_cells_from_test, - "perm" = permutation_test - ) + class = "icfObject" ) - class(icfObject) <- append("icfObject", class(icfObject)) return(icfObject) } +#' @rdname findInteractionChangedFeats +#' @export +findICF <- findInteractionChangedFeats #' @name print.icfObject #' @title icfObject print method @@ -1277,7 +1278,7 @@ findInteractionChangedFeats <- function(gobject, print.icfObject <- function(x, ...) { cat("An object of class", class(x), "\n") info <- list( - dimensions = sprintf("%d, %d (interactions, attributes)", + dimensions = sprintf("%d, %d (icfs, attributes)", nrow(x$ICFscores), ncol(x$ICFscores)) ) print_list(info, pre = " -") @@ -1289,177 +1290,14 @@ print.icfObject <- function(x, ...) { -#' @title findInteractionChangedGenes -#' @name findInteractionChangedGenes -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. genes that are differentially expressed due to interactions with other -#' cell types. -#' @param ... params to pass to \code{findInteractionChangedFeats} -#' @seealso \code{\link{findInteractionChangedFeats}} -#' @returns interaction changed genes -#' @export -findInteractionChangedGenes <- function(...) { - .Deprecated(new = "findInteractionChangedFeats") - findInteractionChangedFeats(...) -} - - - -#' @title findCellProximityGenes -#' @name findCellProximityGenes -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. genes that are differentially expressed due to proximity to other cell -#' types. -#' @inheritDotParams findInteractionChangedFeats -#' @seealso \code{\link{findInteractionChangedFeats}} -#' @returns cell-cell interaction changed genes -#' @export -findCellProximityGenes <- function(...) { - .Deprecated(new = "findInteractionChangedFeats") - - findInteractionChangedFeats(...) -} - - - - - -#' @title findICF -#' @name findICF -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. features that are differentially expressed due to proximity to other -#' cell types. -#' @param gobject giotto object -#' @param feat_type feature type -#' @param spat_unit spatial unit -#' @param expression_values expression values to use -#' @param selected_feats subset of selected features (optional) -#' @param cluster_column name of column to use for cell types -#' @param spatial_network_name name of spatial network to use -#' @param minimum_unique_cells minimum number of target cells required -#' @param minimum_unique_int_cells minimum number of interacting cells required -#' @param diff_test which differential expression test -#' @param mean_method method to use to calculate the mean -#' @param offset offset value to use when calculating log2 ratio -#' @param adjust_method which method to adjust p-values -#' @param nr_permutations number of permutations if diff_test = permutation -#' @param exclude_selected_cells_from_test exclude interacting cells other cells -#' @param do_parallel run calculations in parallel with mclapply -#' @param set_seed set a seed for reproducibility -#' @param seed_number seed number -#' @returns `icfObject` that contains the Interaction Changed differential gene -#' scores -#' @details Function to calculate if genes are differentially expressed in -#' cell types when they interact (approximated by physical proximity) with -#' other cell types. The results data.table in the `icfObject` contains -#' - at least - the following columns: -#' \itemize{ -#' \item{features:}{ All or selected list of tested features} -#' \item{sel:}{ average feature expression in the interacting cells from the target cell type } -#' \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } -#' \item{log2fc:}{ log2 fold-change between sel and other} -#' \item{diff:}{ spatial expression difference between sel and other} -#' \item{p.value:}{ associated p-value} -#' \item{p.adj:}{ adjusted p-value} -#' \item{cell_type:}{ target cell type} -#' \item{int_cell_type:}{ interacting cell type} -#' \item{nr_select:}{ number of cells for selected target cell type} -#' \item{int_nr_select:}{ number of cells for interacting cell type} -#' \item{nr_other:}{ number of other cells of selected target cell type} -#' \item{int_nr_other:}{ number of other cells for interacting cell type} -#' \item{unif_int:}{ cell-cell interaction} -#' } -#' @seealso \code{\link{findInteractionChangedFeats}} -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' findICF(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' @export -findICF <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = "normalized", - selected_feats = NULL, - cluster_column, - spatial_network_name = "Delaunay_network", - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - adjust_method = c( - "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "fdr", "none" - ), - nr_permutations = 100, - exclude_selected_cells_from_test = TRUE, - do_parallel = TRUE, - set_seed = TRUE, - seed_number = 1234) { - findInteractionChangedFeats( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - expression_values = expression_values, - selected_feats = selected_feats, - cluster_column = cluster_column, - spatial_network_name = spatial_network_name, - minimum_unique_cells = minimum_unique_cells, - minimum_unique_int_cells = minimum_unique_int_cells, - diff_test = diff_test, - mean_method = mean_method, - offset = offset, - adjust_method = adjust_method, - nr_permutations = nr_permutations, - exclude_selected_cells_from_test = exclude_selected_cells_from_test, - do_parallel = do_parallel, - set_seed = set_seed, - seed_number = seed_number - ) -} - - - - -#' @title findICG -#' @name findICG -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. genes that are differentially expressed due to interaction with other -#' cell types. -#' @inheritDotParams findICF -#' @seealso \code{\link{findICF}} -#' @returns cell-cell interaction changed features -#' @export -findICG <- function(...) { - .Deprecated(new = "findICF") - - findICF(...) -} - - - -#' @title findCPG -#' @name findCPG -#' @description Identifies cell-to-cell Interaction Changed Features (ICF), -#' i.e. genes that are differentially expressed due to proximity to other cell -#' types. -#' @inheritDotParams findICF -#' @returns cell-to-cell Interaction Changed Genes -#' @seealso \code{\link{findICF}} -#' @export -findCPG <- function(...) { - .Deprecated(new = "findICF") - - findICF(...) -} #' @title filterInteractionChangedFeats #' @name filterInteractionChangedFeats +#' @aliases filterICF #' @description Filter Interaction Changed Feature scores. #' @param icfObject ICF (interaction changed feature) score object #' @param min_cells minimum number of source cell type @@ -1473,7 +1311,27 @@ findCPG <- function(...) { #' @param min_zscore minimum z-score change #' @param zscores_column calculate z-scores over cell types or genes #' @param direction differential expression directions to keep -#' @returns icfObject that contains the filtered differential feature scores +#' @returns `icfObject` that contains the filtered differential feature scores +#' @md +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' icf <- findInteractionChangedFeats(g, +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), +#' nr_permutations = 10 +#' ) +#' force(icf) +#' force(icf$ICFscores) +#' +#' icf_filter1 <- filterInteractionChangedFeats(icf, min_cells = 4) +#' force(icf_filter1) +#' force(icf_filter1$ICFscores) +#' +#' # filterICF is a simple alias with a shortened name +#' icf_filter2 <- filterICF(icf, min_cells = 4) +#' force(icf_filter2) +#' #' @export filterInteractionChangedFeats <- function(icfObject, min_cells = 4, @@ -1486,7 +1344,7 @@ filterInteractionChangedFeats <- function(icfObject, min_zscore = 2, zscores_column = c("cell_type", "feats"), direction = c("both", "up", "down")) { - # data.table variables + # NSE vars nr_select <- int_nr_select <- zscores <- log2fc <- sel <- other <- p.adj <- NULL @@ -1542,117 +1400,13 @@ filterInteractionChangedFeats <- function(icfObject, return(newobj) } - -#' @title filterInteractionChangedGenes -#' @name filterInteractionChangedGenes -#' @description Filter Interaction Changed Feature scores. -#' @inheritDotParams filterInteractionChangedFeats -#' @seealso \code{\link{filterInteractionChangedFeats}} -#' @returns filtered interaction changed feature scores -#' @export -filterInteractionChangedGenes <- function(...) { - .Deprecated(new = "filterInteractionChangedFeats") - - filterInteractionChangedFeats(...) -} - - -#' @title filterCellProximityGenes -#' @name filterCellProximityGenes -#' @description Filter Interaction Changed Feature scores. -#' @inheritDotParams filterInteractionChangedFeats -#' @seealso \code{\link{filterInteractionChangedFeats}} -#' @returns proximity genes -#' @export -filterCellProximityGenes <- function(...) { - .Deprecated(new = "filterInteractionChangedFeats") - - filterInteractionChangedFeats(...) -} - - - - - -#' @title filterICF -#' @name filterICF -#' @description Filter Interaction Changed Feature scores. -#' @param icfObject ICF (interaction changed feature) score object -#' @param min_cells minimum number of source cell type -#' @param min_cells_expr minimum expression level for source cell type -#' @param min_int_cells minimum number of interacting neighbor cell type -#' @param min_int_cells_expr minimum expression level for interacting neighbor -#' cell type -#' @param min_fdr minimum adjusted p-value -#' @param min_spat_diff minimum absolute spatial expression difference -#' @param min_log2_fc minimum log2 fold-change -#' @param min_zscore minimum z-score change -#' @param zscores_column calculate z-scores over cell types or features -#' @param direction differential expression directions to keep -#' @returns icfObject that contains the filtered differential feature scores -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' g_icf <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") -#' -#' filterICF(g_icf) -#' @export -filterICF <- function(icfObject, - min_cells = 4, - min_cells_expr = 1, - min_int_cells = 4, - min_int_cells_expr = 1, - min_fdr = 0.1, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down")) { - filterInteractionChangedFeats( - icfObject = icfObject, - min_cells = min_cells, - min_cells_expr = min_cells_expr, - min_int_cells = min_int_cells, - min_int_cells_expr = min_int_cells_expr, - min_fdr = min_fdr, - min_spat_diff = min_spat_diff, - min_log2_fc = min_log2_fc, - min_zscore = min_zscore, - zscores_column = zscores_column, - direction = direction - ) -} - - - - -#' @title filterICG -#' @name filterICG -#' @description Filter Interaction Changed Gene scores. -#' @inheritDotParams filterICF -#' @seealso \code{\link{filterICF}} -#' @returns filtered interaction changed gene scores +#' @rdname filterInteractionChangedFeats #' @export -filterICG <- function(...) { - .Deprecated(new = "filterICF") +filterICF <- filterInteractionChangedFeats - filterICF(...) -} -#' @title filterCPG -#' @name filterCPG -#' @description Filter Interaction Changed Gene scores. -#' @inheritDotParams filterICF -#' @seealso \code{\link{filterICF}} -#' @returns filtered interaction changed gene scores -#' @export -filterCPG <- function(...) { - .Deprecated(new = "filterICF") - - filterICF(...) -} @@ -2002,6 +1756,7 @@ filterCPG <- function(...) { #' @title combineInteractionChangedFeats #' @name combineInteractionChangedFeats +#' @aliases combineICF #' @description Combine ICF scores in a pairwise manner. #' @param icfObject ICF (interaction changed feat) score object #' @param selected_ints subset of selected cell-cell interactions (optional) @@ -2017,14 +1772,19 @@ filterCPG <- function(...) { #' @param min_log2_fc minimum absolute log2 fold-change #' @param do_parallel run calculations in parallel with mclapply #' @param verbose verbose -#' @returns combIcfObject that contains the filtered differential feature scores +#' @returns `combIcfObject` that contains the filtered differential feature +#' scores #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' g_icf <- findInteractionChangedFeats(g, -#' cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), +#' nr_permutations = 10 +#' ) #' -#' combineInteractionChangedFeats(g_icf) +#' cicf <- combineInteractionChangedFeats(g_icf) +#' force(cicf) +#' combineICF(g_icf) # this is a shortened alias #' @export combineInteractionChangedFeats <- function(icfObject, selected_ints = NULL, @@ -2038,7 +1798,7 @@ combineInteractionChangedFeats <- function(icfObject, min_log2_fc = 0.5, do_parallel = TRUE, verbose = TRUE) { - # data.table variables + # NSE vars unif_int <- feat1_feat2 <- feats_1 <- feats_2 <- comb_logfc <- log2fc_1 <- log2fc_2 <- direction <- NULL @@ -2093,157 +1853,74 @@ combineInteractionChangedFeats <- function(icfObject, min_log2_fc = min_log2_fc ) FTFresults[[i]] <- tempres - } - } - - final_results <- do.call("rbind", FTFresults) - - final_results[, feat1_feat2 := paste0(feats_1, "--", feats_2)] - - final_results <- dt_sort_combine_two_columns(final_results, - column1 = "feats_1", column2 = "feats_2", - myname = "unif_feat_feat" - ) - - final_results[, comb_logfc := abs(log2fc_1) + abs(log2fc_2)] - setorder(final_results, -comb_logfc) - final_results[, direction := ifelse(log2fc_1 > 0 & log2fc_2 > 0, "both_up", - ifelse(log2fc_1 < 0 & log2fc_2 < 0, "both_down", "mixed") - )] - - combIcfObject <- list( - combICFscores = final_results, - Giotto_info = list( - "values" = icfObject[["Giotto_info"]][["values"]], - "cluster" = icfObject[["Giotto_info"]][["cluster"]], - "spatial network" = icfObject[["Giotto_info"]][["spatial network"]] - ), - test_info = list( - "test" = icfObject[["test_info"]][["test"]], - "p.adj" = icfObject[["test_info"]][["p.adj"]], - "min cells" = icfObject[["test_info"]][["min cells"]], - "min interacting cells" = icfObject[["test_info"]][[ - "min interacting cells"]], - "exclude selected cells" = icfObject[["test_info"]][[ - "exclude selected cells"]], - "perm" = icfObject[["test_info"]][["perm"]] - ) - ) - class(combIcfObject) <- append(class(combIcfObject), "combIcfObject") - return(combIcfObject) -} - - -#' @title combineInteractionChangedGenes -#' @name combineInteractionChangedGenes -#' @description Combine ICF scores in a pairwise manner. -#' @inheritDotParams combineInteractionChangedFeats -#' @returns ICF scores -#' @seealso \code{\link{combineInteractionChangedFeats}} -#' @export -combineInteractionChangedGenes <- function(...) { - .Deprecated(new = "combineInteractionChangedFeats") + } + } - combineInteractionChangedFeats(...) -} + final_results <- do.call("rbind", FTFresults) + final_results[, feat1_feat2 := paste0(feats_1, "--", feats_2)] -#' @title combineCellProximityGenes -#' @name combineCellProximityGenes -#' @description Combine ICF scores in a pairwise manner. -#' @inheritDotParams combineInteractionChangedFeats -#' @returns ICF scores -#' @seealso \code{\link{combineInteractionChangedFeats}} -#' @export -combineCellProximityGenes <- function(...) { - .Deprecated(new = "combineInteractionChangedFeats") + final_results <- dt_sort_combine_two_columns(final_results, + column1 = "feats_1", column2 = "feats_2", + myname = "unif_feat_feat" + ) - combineInteractionChangedFeats(...) -} + final_results[, comb_logfc := abs(log2fc_1) + abs(log2fc_2)] + setorder(final_results, -comb_logfc) + final_results[, direction := ifelse(log2fc_1 > 0 & log2fc_2 > 0, "both_up", + ifelse(log2fc_1 < 0 & log2fc_2 < 0, "both_down", "mixed") + )] -#' @title combineICF -#' @name combineICF -#' @description Combine ICF scores in a pairwise manner. -#' @param icfObject ICF (interaction changed feat) score object -#' @param selected_ints subset of selected cell-cell interactions (optional) -#' @param selected_feats subset of selected Feats (optional) -#' @param specific_feats_1 specific Featset combo -#' (need to position match specific_genes_2) -#' @param specific_feats_2 specific Featset combo -#' (need to position match specific_genes_1) -#' @param min_cells minimum number of target cell type -#' @param min_int_cells minimum number of interacting cell type -#' @param min_fdr minimum adjusted p-value -#' @param min_spat_diff minimum absolute spatial expression difference -#' @param min_log2_fc minimum absolute log2 fold-change -#' @param do_parallel run calculations in parallel with mclapply -#' @param verbose verbose -#' @returns icfObject that contains the filtered differential feats scores -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' g_icf <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -#' -#' combineICF(g_icf) -#' @export -combineICF <- function(icfObject, - selected_ints = NULL, - selected_feats = NULL, - specific_feats_1 = NULL, - specific_feats_2 = NULL, - min_cells = 5, - min_int_cells = 3, - min_fdr = 0.05, - min_spat_diff = 0, - min_log2_fc = 0.5, - do_parallel = TRUE, - verbose = TRUE) { - combineInteractionChangedFeats( - icfObject = icfObject, - selected_ints = selected_ints, - selected_feats = selected_feats, - specific_feats_1 = specific_feats_1, - specific_feats_2 = specific_feats_2, - min_cells = min_cells, - min_int_cells = min_int_cells, - min_fdr = min_fdr, - min_spat_diff = min_spat_diff, - min_log2_fc = min_log2_fc, - do_parallel = do_parallel, - verbose = verbose + combIcfObject <- structure( + .Data = list( + combICFscores = final_results, + Giotto_info = list( + "values" = icfObject[["Giotto_info"]][["values"]], + "cluster" = icfObject[["Giotto_info"]][["cluster"]], + "spatial network" = + icfObject[["Giotto_info"]][["spatial network"]] + ), + test_info = list( + "test" = icfObject[["test_info"]][["test"]], + "p.adj" = icfObject[["test_info"]][["p.adj"]], + "min cells" = icfObject[["test_info"]][["min cells"]], + "min interacting cells" = icfObject[["test_info"]][[ + "min interacting cells"]], + "exclude selected cells" = icfObject[["test_info"]][[ + "exclude selected cells"]], + "perm" = icfObject[["test_info"]][["perm"]] + ) + ), + class = "combIcfObject" ) + return(combIcfObject) } - -#' @title combineICG -#' @name combineICG -#' @description Combine ICF scores in a pairwise manner. -#' @inheritDotParams combineICF -#' @returns ICF scores -#' @seealso \code{\link{combineICF}} +#' @rdname combineInteractionChangedFeats #' @export -combineICG <- function(...) { - .Deprecated(new = "combineICF") - - combineICF(...) -} +combineICF <- combineInteractionChangedFeats -#' @title combineCPG -#' @name combineCPG -#' @description Combine ICF scores in a pairwise manner. -#' @inheritDotParams combineICF -#' @returns ICF scores -#' @seealso \code{\link{combineICF}} +#' @name print.combIcfObject +#' @title combIcfObject print method +#' @param x object to print +#' @param \dots additional params to pass (none implemented) +#' @keywords internal #' @export -combineCPG <- function(...) { - .Deprecated(new = "combineICF") - - combineICF(...) +print.combIcfObject <- function(x, ...) { + cat("An object of class", class(x), "\n") + info <- list( + dimensions = sprintf("%d, %d (icf pairs, attributes)", + nrow(x$combICFscores), ncol(x$combICFscores)) + ) + print_list(info, pre = " -") + cat("\n") + print_list(x$Giotto_info, pre = " -") + cat("\n") + print_list(x$test_info, pre = " -") } - # * #### # cell communication #### @@ -3239,3 +2916,187 @@ combCCcom <- function(spatialCC, return(merge_DT) } + + + + +# DEPRECATED #### + +#' @title deprecated +#' @name findInteractionChangedGenes +#' @description Identifies cell-to-cell Interaction Changed Features (ICF), +#' i.e. genes that are differentially expressed due to interactions with other +#' cell types. +#' @param ... params to pass to \code{findInteractionChangedFeats} +#' @seealso \code{\link{findInteractionChangedFeats}} +#' @returns interaction changed genes +#' @export +findInteractionChangedGenes <- function(...) { + .Deprecated(new = "findInteractionChangedFeats") + + findInteractionChangedFeats(...) +} + + + +#' @title deprecated +#' @name findCellProximityGenes +#' @description Identifies cell-to-cell Interaction Changed Features (ICF), +#' i.e. genes that are differentially expressed due to proximity to other cell +#' types. +#' @inheritDotParams findInteractionChangedFeats +#' @seealso \code{\link{findInteractionChangedFeats}} +#' @returns cell-cell interaction changed genes +#' @export +findCellProximityGenes <- function(...) { + .Deprecated(new = "findInteractionChangedFeats") + + findInteractionChangedFeats(...) +} + + + +#' @title deprecated +#' @name findICG +#' @description Identifies cell-to-cell Interaction Changed Features (ICF), +#' i.e. genes that are differentially expressed due to interaction with other +#' cell types. +#' @inheritDotParams findICF +#' @seealso \code{\link{findICF}} +#' @returns cell-cell interaction changed features +#' @export +findICG <- function(...) { + .Deprecated(new = "findICF") + + findICF(...) +} + + + +#' @title deprecated +#' @name findCPG +#' @description Identifies cell-to-cell Interaction Changed Features (ICF), +#' i.e. genes that are differentially expressed due to proximity to other cell +#' types. +#' @inheritDotParams findICF +#' @returns cell-to-cell Interaction Changed Genes +#' @seealso \code{\link{findICF}} +#' @export +findCPG <- function(...) { + .Deprecated(new = "findICF") + + findICF(...) +} + +#' @title deprecated +#' @name filterInteractionChangedGenes +#' @description Filter Interaction Changed Feature scores. +#' @inheritDotParams filterInteractionChangedFeats +#' @seealso \code{\link{filterInteractionChangedFeats}} +#' @returns filtered interaction changed feature scores +#' @export +filterInteractionChangedGenes <- function(...) { + .Deprecated(new = "filterInteractionChangedFeats") + + filterInteractionChangedFeats(...) +} + + +#' @title deprecated +#' @name filterCellProximityGenes +#' @description Filter Interaction Changed Feature scores. +#' @inheritDotParams filterInteractionChangedFeats +#' @seealso \code{\link{filterInteractionChangedFeats}} +#' @returns proximity genes +#' @export +filterCellProximityGenes <- function(...) { + .Deprecated(new = "filterInteractionChangedFeats") + + filterInteractionChangedFeats(...) +} + + + + + +#' @title deprecated +#' @name filterICG +#' @description Filter Interaction Changed Gene scores. +#' @inheritDotParams filterICF +#' @seealso \code{\link{filterICF}} +#' @returns filtered interaction changed gene scores +#' @export +filterICG <- function(...) { + .Deprecated(new = "filterICF") + + filterICF(...) +} + + + +#' @title deprecated +#' @name filterCPG +#' @description Filter Interaction Changed Gene scores. +#' @inheritDotParams filterICF +#' @seealso \code{\link{filterICF}} +#' @returns filtered interaction changed gene scores +#' @export +filterCPG <- function(...) { + .Deprecated(new = "filterICF") + + filterICF(...) +} + + +#' @title deprecated +#' @name combineInteractionChangedGenes +#' @description Combine ICF scores in a pairwise manner. +#' @inheritDotParams combineInteractionChangedFeats +#' @returns ICF scores +#' @seealso \code{\link{combineInteractionChangedFeats}} +#' @export +combineInteractionChangedGenes <- function(...) { + .Deprecated(new = "combineInteractionChangedFeats") + + combineInteractionChangedFeats(...) +} + + +#' @title deprecated +#' @name combineCellProximityGenes +#' @description Combine ICF scores in a pairwise manner. +#' @inheritDotParams combineInteractionChangedFeats +#' @returns ICF scores +#' @seealso \code{\link{combineInteractionChangedFeats}} +#' @export +combineCellProximityGenes <- function(...) { + .Deprecated(new = "combineInteractionChangedFeats") + + combineInteractionChangedFeats(...) +} + +#' @title deprecated +#' @name combineICG +#' @description Combine ICF scores in a pairwise manner. +#' @inheritDotParams combineICF +#' @returns ICF scores +#' @seealso \code{\link{combineICF}} +#' @export +combineICG <- function(...) { + .Deprecated(new = "combineICF") + + combineICF(...) +} + +#' @title deprecated +#' @name combineCPG +#' @description Combine ICF scores in a pairwise manner. +#' @inheritDotParams combineICF +#' @returns ICF scores +#' @seealso \code{\link{combineICF}} +#' @export +combineCPG <- function(...) { + .Deprecated(new = "combineICF") + + combineICF(...) +} diff --git a/man/combineCPG.Rd b/man/combineCPG.Rd index 0536ecbdc..f4aeb7a23 100644 --- a/man/combineCPG.Rd +++ b/man/combineCPG.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineCPG} \alias{combineCPG} -\title{combineCPG} +\title{deprecated} \usage{ combineCPG(...) } @@ -12,11 +12,11 @@ combineCPG(...) \describe{ \item{\code{icfObject}}{ICF (interaction changed feat) score object} \item{\code{selected_ints}}{subset of selected cell-cell interactions (optional)} - \item{\code{selected_feats}}{subset of selected Feats (optional)} - \item{\code{specific_feats_1}}{specific Featset combo -(need to position match specific_genes_2)} - \item{\code{specific_feats_2}}{specific Featset combo -(need to position match specific_genes_1)} + \item{\code{selected_feats}}{subset of selected Features (optional)} + \item{\code{specific_feats_1}}{specific Featureset combo +(need to position match specific_feats_2)} + \item{\code{specific_feats_2}}{specific Featureset combo +(need to position match specific_feats_1)} \item{\code{min_cells}}{minimum number of target cell type} \item{\code{min_int_cells}}{minimum number of interacting cell type} \item{\code{min_fdr}}{minimum adjusted p-value} diff --git a/man/combineCellProximityGenes.Rd b/man/combineCellProximityGenes.Rd index 1451edf7c..f1b2aeb6f 100644 --- a/man/combineCellProximityGenes.Rd +++ b/man/combineCellProximityGenes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineCellProximityGenes} \alias{combineCellProximityGenes} -\title{combineCellProximityGenes} +\title{deprecated} \usage{ combineCellProximityGenes(...) } diff --git a/man/combineICF.Rd b/man/combineICF.Rd deleted file mode 100644 index 6cf6d1413..000000000 --- a/man/combineICF.Rd +++ /dev/null @@ -1,61 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_interaction.R -\name{combineICF} -\alias{combineICF} -\title{combineICF} -\usage{ -combineICF( - icfObject, - selected_ints = NULL, - selected_feats = NULL, - specific_feats_1 = NULL, - specific_feats_2 = NULL, - min_cells = 5, - min_int_cells = 3, - min_fdr = 0.05, - min_spat_diff = 0, - min_log2_fc = 0.5, - do_parallel = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{icfObject}{ICF (interaction changed feat) score object} - -\item{selected_ints}{subset of selected cell-cell interactions (optional)} - -\item{selected_feats}{subset of selected Feats (optional)} - -\item{specific_feats_1}{specific Featset combo -(need to position match specific_genes_2)} - -\item{specific_feats_2}{specific Featset combo -(need to position match specific_genes_1)} - -\item{min_cells}{minimum number of target cell type} - -\item{min_int_cells}{minimum number of interacting cell type} - -\item{min_fdr}{minimum adjusted p-value} - -\item{min_spat_diff}{minimum absolute spatial expression difference} - -\item{min_log2_fc}{minimum absolute log2 fold-change} - -\item{do_parallel}{run calculations in parallel with mclapply} - -\item{verbose}{verbose} -} -\value{ -icfObject that contains the filtered differential feats scores -} -\description{ -Combine ICF scores in a pairwise manner. -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") -g_icf <- findInteractionChangedFeats(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) - -combineICF(g_icf) -} diff --git a/man/combineICG.Rd b/man/combineICG.Rd index 07a793b94..c90ca8c00 100644 --- a/man/combineICG.Rd +++ b/man/combineICG.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineICG} \alias{combineICG} -\title{combineICG} +\title{deprecated} \usage{ combineICG(...) } @@ -12,11 +12,11 @@ combineICG(...) \describe{ \item{\code{icfObject}}{ICF (interaction changed feat) score object} \item{\code{selected_ints}}{subset of selected cell-cell interactions (optional)} - \item{\code{selected_feats}}{subset of selected Feats (optional)} - \item{\code{specific_feats_1}}{specific Featset combo -(need to position match specific_genes_2)} - \item{\code{specific_feats_2}}{specific Featset combo -(need to position match specific_genes_1)} + \item{\code{selected_feats}}{subset of selected Features (optional)} + \item{\code{specific_feats_1}}{specific Featureset combo +(need to position match specific_feats_2)} + \item{\code{specific_feats_2}}{specific Featureset combo +(need to position match specific_feats_1)} \item{\code{min_cells}}{minimum number of target cell type} \item{\code{min_int_cells}}{minimum number of interacting cell type} \item{\code{min_fdr}}{minimum adjusted p-value} diff --git a/man/combineInteractionChangedFeats.Rd b/man/combineInteractionChangedFeats.Rd index 506685ce2..4851333e3 100644 --- a/man/combineInteractionChangedFeats.Rd +++ b/man/combineInteractionChangedFeats.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineInteractionChangedFeats} \alias{combineInteractionChangedFeats} +\alias{combineICF} \title{combineInteractionChangedFeats} \usage{ combineInteractionChangedFeats( @@ -18,6 +19,21 @@ combineInteractionChangedFeats( do_parallel = TRUE, verbose = TRUE ) + +combineICF( + icfObject, + selected_ints = NULL, + selected_feats = NULL, + specific_feats_1 = NULL, + specific_feats_2 = NULL, + min_cells = 5, + min_int_cells = 3, + min_fdr = 0.05, + min_spat_diff = 0, + min_log2_fc = 0.5, + do_parallel = TRUE, + verbose = TRUE +) } \arguments{ \item{icfObject}{ICF (interaction changed feat) score object} @@ -47,7 +63,8 @@ combineInteractionChangedFeats( \item{verbose}{verbose} } \value{ -combIcfObject that contains the filtered differential feature scores +`combIcfObject` that contains the filtered differential feature +scores } \description{ Combine ICF scores in a pairwise manner. @@ -55,8 +72,12 @@ Combine ICF scores in a pairwise manner. \examples{ g <- GiottoData::loadGiottoMini("visium") g_icf <- findInteractionChangedFeats(g, -cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), + nr_permutations = 10 +) -combineInteractionChangedFeats(g_icf) +cicf <- combineInteractionChangedFeats(g_icf) +force(cicf) +combineICF(g_icf) # this is a shortened alias } diff --git a/man/combineInteractionChangedGenes.Rd b/man/combineInteractionChangedGenes.Rd index 49014ddbf..061a625c4 100644 --- a/man/combineInteractionChangedGenes.Rd +++ b/man/combineInteractionChangedGenes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combineInteractionChangedGenes} \alias{combineInteractionChangedGenes} -\title{combineInteractionChangedGenes} +\title{deprecated} \usage{ combineInteractionChangedGenes(...) } diff --git a/man/filterCPG.Rd b/man/filterCPG.Rd index db948b69d..5e1173204 100644 --- a/man/filterCPG.Rd +++ b/man/filterCPG.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{filterCPG} \alias{filterCPG} -\title{filterCPG} +\title{deprecated} \usage{ filterCPG(...) } @@ -20,7 +20,7 @@ cell type} \item{\code{min_spat_diff}}{minimum absolute spatial expression difference} \item{\code{min_log2_fc}}{minimum log2 fold-change} \item{\code{min_zscore}}{minimum z-score change} - \item{\code{zscores_column}}{calculate z-scores over cell types or features} + \item{\code{zscores_column}}{calculate z-scores over cell types or genes} \item{\code{direction}}{differential expression directions to keep} }} } diff --git a/man/filterCellProximityGenes.Rd b/man/filterCellProximityGenes.Rd index ca4d925ec..957ed386f 100644 --- a/man/filterCellProximityGenes.Rd +++ b/man/filterCellProximityGenes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{filterCellProximityGenes} \alias{filterCellProximityGenes} -\title{filterCellProximityGenes} +\title{deprecated} \usage{ filterCellProximityGenes(...) } diff --git a/man/filterICF.Rd b/man/filterICF.Rd deleted file mode 100644 index a8698c95e..000000000 --- a/man/filterICF.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_interaction.R -\name{filterICF} -\alias{filterICF} -\title{filterICF} -\usage{ -filterICF( - icfObject, - min_cells = 4, - min_cells_expr = 1, - min_int_cells = 4, - min_int_cells_expr = 1, - min_fdr = 0.1, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down") -) -} -\arguments{ -\item{icfObject}{ICF (interaction changed feature) score object} - -\item{min_cells}{minimum number of source cell type} - -\item{min_cells_expr}{minimum expression level for source cell type} - -\item{min_int_cells}{minimum number of interacting neighbor cell type} - -\item{min_int_cells_expr}{minimum expression level for interacting neighbor -cell type} - -\item{min_fdr}{minimum adjusted p-value} - -\item{min_spat_diff}{minimum absolute spatial expression difference} - -\item{min_log2_fc}{minimum log2 fold-change} - -\item{min_zscore}{minimum z-score change} - -\item{zscores_column}{calculate z-scores over cell types or features} - -\item{direction}{differential expression directions to keep} -} -\value{ -icfObject that contains the filtered differential feature scores -} -\description{ -Filter Interaction Changed Feature scores. -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") - -g_icf <- findInteractionChangedFeats(g, cluster_column = "leiden_clus") - -filterICF(g_icf) -} diff --git a/man/filterICG.Rd b/man/filterICG.Rd index 09f9a3da7..d15ade247 100644 --- a/man/filterICG.Rd +++ b/man/filterICG.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{filterICG} \alias{filterICG} -\title{filterICG} +\title{deprecated} \usage{ filterICG(...) } @@ -20,7 +20,7 @@ cell type} \item{\code{min_spat_diff}}{minimum absolute spatial expression difference} \item{\code{min_log2_fc}}{minimum log2 fold-change} \item{\code{min_zscore}}{minimum z-score change} - \item{\code{zscores_column}}{calculate z-scores over cell types or features} + \item{\code{zscores_column}}{calculate z-scores over cell types or genes} \item{\code{direction}}{differential expression directions to keep} }} } diff --git a/man/filterInteractionChangedFeats.Rd b/man/filterInteractionChangedFeats.Rd index 0a0f3df16..06998f34d 100644 --- a/man/filterInteractionChangedFeats.Rd +++ b/man/filterInteractionChangedFeats.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{filterInteractionChangedFeats} \alias{filterInteractionChangedFeats} +\alias{filterICF} \title{filterInteractionChangedFeats} \usage{ filterInteractionChangedFeats( @@ -17,6 +18,20 @@ filterInteractionChangedFeats( zscores_column = c("cell_type", "feats"), direction = c("both", "up", "down") ) + +filterICF( + icfObject, + min_cells = 4, + min_cells_expr = 1, + min_int_cells = 4, + min_int_cells_expr = 1, + min_fdr = 0.1, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down") +) } \arguments{ \item{icfObject}{ICF (interaction changed feature) score object} @@ -43,8 +58,28 @@ cell type} \item{direction}{differential expression directions to keep} } \value{ -icfObject that contains the filtered differential feature scores +\code{icfObject} that contains the filtered differential feature scores } \description{ Filter Interaction Changed Feature scores. } +\examples{ +g <- GiottoData::loadGiottoMini("visium") + +icf <- findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), + nr_permutations = 10 +) +force(icf) +force(icf$ICFscores) + +icf_filter1 <- filterInteractionChangedFeats(icf, min_cells = 4) +force(icf_filter1) +force(icf_filter1$ICFscores) + +# filterICF is a simple alias with a shortened name +icf_filter2 <- filterICF(icf, min_cells = 4) +force(icf_filter2) + +} diff --git a/man/filterInteractionChangedGenes.Rd b/man/filterInteractionChangedGenes.Rd index 6de8591cc..236490145 100644 --- a/man/filterInteractionChangedGenes.Rd +++ b/man/filterInteractionChangedGenes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{filterInteractionChangedGenes} \alias{filterInteractionChangedGenes} -\title{filterInteractionChangedGenes} +\title{deprecated} \usage{ filterInteractionChangedGenes(...) } diff --git a/man/findCPG.Rd b/man/findCPG.Rd index 12a52c745..0e7975769 100644 --- a/man/findCPG.Rd +++ b/man/findCPG.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{findCPG} \alias{findCPG} -\title{findCPG} +\title{deprecated} \usage{ findCPG(...) } diff --git a/man/findCellProximityGenes.Rd b/man/findCellProximityGenes.Rd index 401ad4042..031ca177c 100644 --- a/man/findCellProximityGenes.Rd +++ b/man/findCellProximityGenes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{findCellProximityGenes} \alias{findCellProximityGenes} -\title{findCellProximityGenes} +\title{deprecated} \usage{ findCellProximityGenes(...) } diff --git a/man/findICF.Rd b/man/findICF.Rd deleted file mode 100644 index 86e830342..000000000 --- a/man/findICF.Rd +++ /dev/null @@ -1,105 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_interaction.R -\name{findICF} -\alias{findICF} -\title{findICF} -\usage{ -findICF( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = "normalized", - selected_feats = NULL, - cluster_column, - spatial_network_name = "Delaunay_network", - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - adjust_method = c("bonferroni", "BH", "holm", "hochberg", "hommel", "BY", "fdr", - "none"), - nr_permutations = 100, - exclude_selected_cells_from_test = TRUE, - do_parallel = TRUE, - set_seed = TRUE, - seed_number = 1234 -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{feat_type}{feature type} - -\item{spat_unit}{spatial unit} - -\item{expression_values}{expression values to use} - -\item{selected_feats}{subset of selected features (optional)} - -\item{cluster_column}{name of column to use for cell types} - -\item{spatial_network_name}{name of spatial network to use} - -\item{minimum_unique_cells}{minimum number of target cells required} - -\item{minimum_unique_int_cells}{minimum number of interacting cells required} - -\item{diff_test}{which differential expression test} - -\item{mean_method}{method to use to calculate the mean} - -\item{offset}{offset value to use when calculating log2 ratio} - -\item{adjust_method}{which method to adjust p-values} - -\item{nr_permutations}{number of permutations if diff_test = permutation} - -\item{exclude_selected_cells_from_test}{exclude interacting cells other cells} - -\item{do_parallel}{run calculations in parallel with mclapply} - -\item{set_seed}{set a seed for reproducibility} - -\item{seed_number}{seed number} -} -\value{ -`icfObject` that contains the Interaction Changed differential gene -scores -} -\description{ -Identifies cell-to-cell Interaction Changed Features (ICF), -i.e. features that are differentially expressed due to proximity to other -cell types. -} -\details{ -Function to calculate if genes are differentially expressed in -cell types when they interact (approximated by physical proximity) with -other cell types. The results data.table in the `icfObject` contains -- at least - the following columns: -\itemize{ - \item{features:}{ All or selected list of tested features} - \item{sel:}{ average feature expression in the interacting cells from the target cell type } - \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } - \item{log2fc:}{ log2 fold-change between sel and other} - \item{diff:}{ spatial expression difference between sel and other} - \item{p.value:}{ associated p-value} - \item{p.adj:}{ adjusted p-value} - \item{cell_type:}{ target cell type} - \item{int_cell_type:}{ interacting cell type} - \item{nr_select:}{ number of cells for selected target cell type} - \item{int_nr_select:}{ number of cells for interacting cell type} - \item{nr_other:}{ number of other cells of selected target cell type} - \item{int_nr_other:}{ number of other cells for interacting cell type} - \item{unif_int:}{ cell-cell interaction} -} -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") - -findICF(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) -} -\seealso{ -\code{\link{findInteractionChangedFeats}} -} diff --git a/man/findICFSpot.Rd b/man/findICFSpot.Rd index 5f13d7dc4..51fbef4e2 100644 --- a/man/findICFSpot.Rd +++ b/man/findICFSpot.Rd @@ -102,5 +102,10 @@ the following columns: g <- GiottoData::loadGiottoMini("visium") g_expression <- getExpression(g, output = "matrix") -findICFSpot(g, spat_unit = "cell", feat_type = "rna", ave_celltype_exp = g_expression, spatial_network_name = "spatial_network") +findICFSpot(g, + spat_unit = "cell", + feat_type = "rna", + ave_celltype_exp = g_expression, + spatial_network_name = "spatial_network" +) } diff --git a/man/findICG.Rd b/man/findICG.Rd index b6617ccec..170d83f2a 100644 --- a/man/findICG.Rd +++ b/man/findICG.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{findICG} \alias{findICG} -\title{findICG} +\title{deprecated} \usage{ findICG(...) } diff --git a/man/findInteractionChangedFeats.Rd b/man/findInteractionChangedFeats.Rd index 0252ce158..e674159b7 100644 --- a/man/findInteractionChangedFeats.Rd +++ b/man/findInteractionChangedFeats.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{findInteractionChangedFeats} \alias{findInteractionChangedFeats} +\alias{findICF} \title{findInteractionChangedFeats} \usage{ findInteractionChangedFeats( @@ -25,6 +26,28 @@ findInteractionChangedFeats( set_seed = TRUE, seed_number = 1234 ) + +findICF( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = "normalized", + selected_feats = NULL, + cluster_column, + spatial_network_name = "Delaunay_network", + minimum_unique_cells = 1, + minimum_unique_int_cells = 1, + diff_test = c("permutation", "limma", "t.test", "wilcox"), + mean_method = c("arithmic", "geometric"), + offset = 0.1, + adjust_method = c("bonferroni", "BH", "holm", "hochberg", "hommel", "BY", "fdr", + "none"), + nr_permutations = 1000, + exclude_selected_cells_from_test = TRUE, + do_parallel = TRUE, + set_seed = TRUE, + seed_number = 1234 +) } \arguments{ \item{gobject}{giotto object} @@ -64,7 +87,7 @@ findInteractionChangedFeats( \item{seed_number}{seed number} } \value{ -icfObject that contains the Interaction Changed differential +\code{icfObject} that contains the Interaction Changed differential feature scores } \description{ @@ -75,28 +98,47 @@ cell types. \details{ Function to calculate if features are differentially expressed in cell types when they interact (approximated by physical proximity) with -other cell types. The results data.table in the icfObject contains -- at least - the following columns: +other cell types. The results data.table in the \code{icfObject} contains +\itemize{ +\item at least - the following columns: \itemize{ - \item{features:}{ All or selected list of tested features} - \item{sel:}{ average feature expression in the interacting cells from the target cell type } - \item{other:}{ average feature expression in the NOT-interacting cells from the target cell type } - \item{log2fc:}{ log2 fold-change between sel and other} - \item{diff:}{ spatial expression difference between sel and other} - \item{p.value:}{ associated p-value} - \item{p.adj:}{ adjusted p-value} - \item{cell_type:}{ target cell type} - \item{int_cell_type:}{ interacting cell type} - \item{nr_select:}{ number of cells for selected target cell type} - \item{int_nr_select:}{ number of cells for interacting cell type} - \item{nr_other:}{ number of other cells of selected target cell type} - \item{int_nr_other:}{ number of other cells for interacting cell type} - \item{unif_int:}{ cell-cell interaction} +\item \strong{features:} All or selected list of tested features +\item \strong{sel:} average feature expression in the interacting cells from the +target cell type +\item \strong{other:} average feature expression in the NOT-interacting cells from +the target cell type +\item \strong{log2fc:} log2 fold-change between sel and other +\item \strong{diff:} spatial expression difference between sel and other +\item \strong{p.value:} associated p-value +\item \strong{p.adj:} adjusted p-value +\item \strong{cell_type:} target cell type +\item \strong{int_cell_type:} interacting cell type +\item \strong{nr_select:} number of cells for selected target cell type +\item \strong{int_nr_select:} number of cells for interacting cell type +\item \strong{nr_other:} number of other cells of selected target cell type +\item \strong{int_nr_other:} number of other cells for interacting cell type +\item \strong{unif_int:} cell-cell interaction +} } } \examples{ g <- GiottoData::loadGiottoMini("visium") -findInteractionChangedFeats(g, cluster_column = "leiden_clus", -selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10) +icf1 <- findInteractionChangedFeats(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), + nr_permutations = 10 +) +force(icf1) +force(icf1$ICFscores) + +# this is just an alias with a shorter name +icf2 <- findICF(g, + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), + nr_permutations = 10 + ) +} +\seealso{ +\code{\link[=filterInteractionChangedFeats]{filterInteractionChangedFeats()}} } diff --git a/man/findInteractionChangedGenes.Rd b/man/findInteractionChangedGenes.Rd index 72339c8ce..43351c763 100644 --- a/man/findInteractionChangedGenes.Rd +++ b/man/findInteractionChangedGenes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{findInteractionChangedGenes} \alias{findInteractionChangedGenes} -\title{findInteractionChangedGenes} +\title{deprecated} \usage{ findInteractionChangedGenes(...) } diff --git a/man/print.combIcfObject.Rd b/man/print.combIcfObject.Rd new file mode 100644 index 000000000..027c1f64e --- /dev/null +++ b/man/print.combIcfObject.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_interaction.R +\name{print.combIcfObject} +\alias{print.combIcfObject} +\title{combIcfObject print method} +\usage{ +\method{print}{combIcfObject}(x, ...) +} +\arguments{ +\item{x}{object to print} + +\item{\dots}{additional params to pass (none implemented)} +} +\description{ +combIcfObject print method +} +\keyword{internal} diff --git a/man/print.icfObject.Rd b/man/print.icfObject.Rd new file mode 100644 index 000000000..058a52704 --- /dev/null +++ b/man/print.icfObject.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_interaction.R +\name{print.icfObject} +\alias{print.icfObject} +\title{icfObject print method} +\usage{ +\method{print}{icfObject}(x, ...) +} +\arguments{ +\item{x}{object to print} + +\item{\dots}{additional params to pass (none implemented)} +} +\description{ +icfObject print method +} +\keyword{internal} From 5d9408e6e7da334fce24a40063ff0f868d927c5b Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 27 May 2024 22:32:15 -0400 Subject: [PATCH 067/150] enh: docs and examples updates - combine some docs - add progress bar for exprCellCellcom instead of verbose prints - remove some hardcoded param defaults and add catches --- NEWS.md | 3 + R/spatial_genes.R | 14 +- R/spatial_interaction.R | 657 +++++++++--------- R/spatial_interaction_spot.R | 7 +- man/combCCcom.Rd | 23 +- ...average_feat_feat_expression_in_groups.Rd} | 6 +- man/exprCellCellcom.Rd | 9 +- man/getBalancedSpatCoexpressionFeats.Rd | 12 +- man/spatCellCellcom.Rd | 48 +- man/specificCellCellcommunicationScores.Rd | 115 --- 10 files changed, 423 insertions(+), 471 deletions(-) rename man/{average_feat_feat_expression_in_groups.Rd => dot-average_feat_feat_expression_in_groups.Rd} (84%) delete mode 100644 man/specificCellCellcommunicationScores.Rd diff --git a/NEWS.md b/NEWS.md index fccc69684..94909d971 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,9 @@ * Add catch in `runPCAprojectionBatch()` for when ncp requested exceeds number of feats used * Make `spatCellCellcom()` respect `verbose` flag [#949](https://github.com/drieslab/Giotto/issues/949) by rbutleriii +## Enhancements +* `print()` methods for `icfObject` and `combIcfObject` + ## Changes * require GiottoUtils (>= 0.1.9) diff --git a/R/spatial_genes.R b/R/spatial_genes.R index bcfadc3c7..80e66ad4e 100644 --- a/R/spatial_genes.R +++ b/R/spatial_genes.R @@ -4053,7 +4053,7 @@ rankSpatialCorGroups <- function(gobject, -#' @title getBalancedSpatCoexpressionFeats +#' @title Get balanced spatial coexpression features #' @name getBalancedSpatCoexpressionFeats #' @description Extract features from spatial co-expression modules in a #' balanced manner @@ -4067,11 +4067,13 @@ rankSpatialCorGroups <- function(gobject, #' @returns balanced vector with features for each co-expression module #' @details There are 3 different ways of selecting features from the spatial #' co-expression modules -#' \itemize{ -#' \item{1. weighted: }{Features are ranked based on summarized pairwise co-expression scores} -#' \item{2. random: }{A random selection of features, set seed for reproducibility} -#' \item{3. informed: }{Features are selected based on prior information/ranking} -#' } +#' 1. **weighted:** Features are ranked based on summarized pairwise +#' co-expression scores +#' 2. **random:** A random selection of features, set seed for +#' reproducibility +#' 3. **informed:** Features are selected based on prior information/ranking +#' +#' @md #' @export getBalancedSpatCoexpressionFeats <- function(spatCorObject, maximum = 50, diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index 7bccd8a0e..9347b19aa 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -1926,7 +1926,7 @@ print.combIcfObject <- function(x, ...) { #' @title average_feat_feat_expression_in_groups -#' @name average_feat_feat_expression_in_groups +#' @name .average_feat_feat_expression_in_groups #' @description calculate average expression per cluster #' @param gobject giotto object to use #' @param spat_unit spatial unit @@ -1936,7 +1936,7 @@ print.combIcfObject <- function(x, ...) { #' @param feat_set_2 second specific feat set from feat pairs #' @returns data.table with average expression scores for each cluster #' @keywords internal -average_feat_feat_expression_in_groups <- function(gobject, +.average_feat_feat_expression_in_groups <- function(gobject, spat_unit = NULL, feat_type = NULL, cluster_column = "cell_types", @@ -2054,8 +2054,13 @@ average_feat_feat_expression_in_groups <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") +#' res <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", +#' feat_set_2 = "9630013A20Rik" +#' ) +#' +#' force(res) #' @export exprCellCellcom <- function(gobject, feat_type = NULL, @@ -2107,7 +2112,7 @@ exprCellCellcom <- function(gobject, names(nr_cells) <- nr_cell_types$cluster_column - comScore <- average_feat_feat_expression_in_groups( + comScore <- .average_feat_feat_expression_in_groups( gobject = gobject, feat_type = feat_type, spat_unit = spat_unit, @@ -2135,60 +2140,67 @@ exprCellCellcom <- function(gobject, # not yet available - for (sim in seq_len(random_iter)) { - if (verbose == TRUE) cat("simulation ", sim) + progressr::with_progress({ + pb <- progressr::progressor(steps = random_iter) + for (sim in seq_len(random_iter)) { - # create temporary giotto - tempGiotto <- subsetGiotto( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit - ) + # create temporary giotto + tempGiotto <- subsetGiotto( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit + ) - # randomize annoation - cell_types <- cell_metadata[[cluster_column]] - if (set_seed == TRUE) { - seed_number <- seed_number + sim - set.seed(seed = seed_number) - } - random_cell_types <- sample(x = cell_types, size = length(cell_types)) - tempGiotto <- addCellMetadata( - gobject = tempGiotto, - feat_type = feat_type, - spat_unit = spat_unit, - new_metadata = random_cell_types, - by_column = FALSE # on purpose since values are random - ) + # randomize annoation + cell_types <- cell_metadata[[cluster_column]] + if (set_seed == TRUE) { + seed_number <- seed_number + sim + set.seed(seed = seed_number) + } + random_cell_types <- sample( + x = cell_types, size = length(cell_types) + ) + tempGiotto <- addCellMetadata( + gobject = tempGiotto, + feat_type = feat_type, + spat_unit = spat_unit, + new_metadata = random_cell_types, + by_column = FALSE # on purpose since values are random + ) - # get random communication scores - randomScore <- average_feat_feat_expression_in_groups( - gobject = tempGiotto, - feat_type = feat_type, - spat_unit = spat_unit, - cluster_column = "random_cell_types", - feat_set_1 = feat_set_1, - feat_set_2 = feat_set_2 - ) + # get random communication scores + randomScore <- .average_feat_feat_expression_in_groups( + gobject = tempGiotto, + feat_type = feat_type, + spat_unit = spat_unit, + cluster_column = "random_cell_types", + feat_set_1 = feat_set_1, + feat_set_2 = feat_set_2 + ) - # average random score - total_av <- total_av + randomScore[["LR_expr"]] + # average random score + total_av <- total_av + randomScore[["LR_expr"]] - # difference between observed and random - difference <- comScore[["LR_expr"]] - randomScore[["LR_expr"]] + # difference between observed and random + difference <- comScore[["LR_expr"]] - randomScore[["LR_expr"]] - # calculate total difference - if (detailed == FALSE) { - total_sum <- total_sum + difference - } else { - total_sum[, sim] <- difference + # calculate total difference + if (detailed == FALSE) { + total_sum <- total_sum + difference + } else { + total_sum[, sim] <- difference + } + + # calculate p-values + difference[difference > 0] <- 1 + difference[difference < 0] <- -1 + total_bool <- total_bool + difference + + pb(sprintf("simulation %d", sim)) } + }) - # calculate p-values - difference[difference > 0] <- 1 - difference[difference < 0] <- -1 - total_bool <- total_bool + difference - } comScore[, rand_expr := total_av / random_iter] @@ -2290,9 +2302,9 @@ exprCellCellcom <- function(gobject, -#' @title specificCellCellcommunicationScores -#' @name specificCellCellcommunicationScores -#' @description Specific Cell-Cell communication scores based on spatial +#' @title Spatial cell cell communication scoring +#' @name spatCellCellcom +#' @description Spatial Cell-Cell communication scores based on spatial #' expression of interacting cells #' @param gobject giotto object to use #' @param feat_type feature type @@ -2301,10 +2313,8 @@ exprCellCellcom <- function(gobject, #' interacting cells #' @param cluster_column cluster column with cell type information #' @param random_iter number of iterations -#' @param cell_type_1 first cell type -#' @param cell_type_2 second cell type -#' @param feat_set_1 first specific gene set from gene pairs -#' @param feat_set_2 second specific gene set from gene pairs +#' @param feat_set_1 first specific feature set from feature pairs +#' @param feat_set_2 second specific feature set from feature pairs #' @param gene_set_1 deprecated, use feat_set_1 #' @param gene_set_2 deprecated, use feat_set_2 #' @param log2FC_addendum addendum to add when calculating log2FC @@ -2314,50 +2324,73 @@ exprCellCellcom <- function(gobject, #' (random variance and z-score) #' @param adjust_method which method to adjust p-values #' @param adjust_target adjust multiple hypotheses at the cell or feature level +#' @param do_parallel run calculations in parallel with mclapply +#' @param cores number of cores to use if do_parallel = TRUE #' @param set_seed set a seed for reproducibility #' @param seed_number seed number #' @param verbose verbose #' @returns Cell-Cell communication scores for feature pairs based on spatial #' interaction -#' @details Statistical framework to identify if pairs of features +#' @details Statistical framework to identify if pairs of genes #' (such as ligand-receptor combinations) #' are expressed at higher levels than expected based on a reshuffled null #' distribution of feature expression values in cells that are spatially in #' proximity to each other. -#' \itemize{ -#' \item{LR_comb:}{Pair of ligand and receptor} -#' \item{lig_cell_type:}{ cell type to assess expression level of ligand } -#' \item{lig_expr:}{ average expression of ligand in lig_cell_type } -#' \item{ligand:}{ ligand name } -#' \item{rec_cell_type:}{ cell type to assess expression level of receptor } -#' \item{rec_expr:}{ average expression of receptor in rec_cell_type} -#' \item{receptor:}{ receptor name } -#' \item{LR_expr:}{ combined average ligand and receptor expression } -#' \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } -#' \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } -#' \item{rand_expr:}{ average combined ligand and receptor expression from random spatial permutations } -#' \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } -#' \item{z_score:}{ (optinal) z-score } -#' \item{log2fc:}{ log2 fold-change (LR_expr/rand_expr) } -#' \item{pvalue:}{ p-value } -#' \item{LR_cell_comb:}{ cell type pair combination } -#' \item{p.adj:}{ adjusted p-value } -#' \item{PI:}{ significanc score: log2fc * -log10(p.adj) } -#' } +#' * **LR_comb:** Pair of ligand and receptor +#' * **lig_cell_type:** cell type to assess expression level of ligand +#' * **lig_expr:** average expression of ligand in lig_cell_type +#' * **ligand:** ligand name +#' * **rec_cell_type:** cell type to assess expression level of receptor +#' * **rec_expr:** average expression of receptor in rec_cell_type +#' * **receptor:** receptor name +#' * **LR_expr:** combined average ligand and receptor expression +#' * **lig_nr:** total number of cells from lig_cell_type that spatially +#' interact with cells from rec_cell_type +#' * **rec_nr:** total number of cells from rec_cell_type that spatially +#' interact with cells from lig_cell_type +#' * **rand_expr:** average combined ligand and receptor expression from +#' random spatial permutations +#' * **av_diff:** average difference between LR_expr and rand_expr over all +#' random spatial permutations +#' * **sd_diff:** (optional) standard deviation of the difference between +#' LR_expr and rand_expr over all random spatial permutations +#' * **z_score:** (optional) z-score +#' * **log2fc:** log2 fold-change (LR_expr/rand_expr) +#' * **pvalue:** p-value +#' * **LR_cell_comb:** cell type pair combination +#' * **p.adj:** adjusted p-value +#' * **PI:** significance score: \eqn{log2fc * -log10(p.adj)} +#' +#' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' specificCellCellcommunicationScores(g, cluster_column = "leiden_clus") +#' res1 <- spatCellCellcom( +#' gobject = g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", +#' feat_set_2 = "9630013A20Rik", +#' verbose = "a lot", +#' random_iter = 10 +#' ) +#' force(res1) +#' +#' res2 <- specificCellCellcommunicationScores(g, +#' cluster_column = "leiden_clus", +#' cell_type_1 = 1, +#' cell_type_2 = 2, +#' feat_set_1 = "Gm19935", +#' feat_set_2 = "9630013A20Rik" +#' ) +#' +#' force(res2) #' @export -specificCellCellcommunicationScores <- function(gobject, +spatCellCellcom <- function(gobject, feat_type = NULL, spat_unit = NULL, spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", - random_iter = 100, - cell_type_1 = "astrocyte", - cell_type_2 = "endothelial", + cluster_column = NULL, + random_iter = 1000, feat_set_1, feat_set_2, gene_set_1 = NULL, @@ -2370,9 +2403,181 @@ specificCellCellcommunicationScores <- function(gobject, "BY", "none" ), adjust_target = c("feats", "cells"), - set_seed = FALSE, + do_parallel = TRUE, + cores = NA, + set_seed = TRUE, seed_number = 1234, - verbose = TRUE) { + verbose = c("a little", "a lot", "none")) { + verbose <- match.arg(verbose, choices = c("a little", "a lot", "none")) + + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + ## check if spatial network exists ## + spat_networks <- list_spatial_networks_names(gobject, + spat_unit = spat_unit + ) + + if (!spatial_network_name %in% spat_networks) { + stop( + spatial_network_name, " is not an existing spatial network \n", + "use showNetworks() to see the available networks \n", + "or create a new spatial network with createSpatialNetwork()" + ) + } + + ## deprecated arguments + if (!is.null(gene_set_1)) { + feat_set_1 <- gene_set_1 + warning("gene_set_1 is deprecated, use feat_set_1 in the future") + } + if (!is.null(gene_set_2)) { + feat_set_2 <- gene_set_2 + warning("gene_set_2 is deprecated, use feat_set_2 in the future") + } + + if (is.null(cluster_column)) { + stop("Name of column in cell metadata with cell type info is needed") + } + + + cell_metadata <- pDataDT(gobject, + feat_type = feat_type, + spat_unit = spat_unit + ) + + ## get all combinations between cell types + all_uniq_values <- unique(cell_metadata[[cluster_column]]) + same_DT <- data.table::data.table( + V1 = all_uniq_values, V2 = all_uniq_values) + combn_DT <- data.table::as.data.table(t(combn(all_uniq_values, m = 2))) + combn_DT <- rbind(same_DT, combn_DT) + + ## parallel option ## + if (isTRUE(do_parallel)) { + savelist <- lapply_flex( + X = seq_len(nrow(combn_DT)), future.seed = TRUE, + cores = cores, fun = function(row) { + cell_type_1 <- combn_DT[row][["V1"]] + cell_type_2 <- combn_DT[row][["V2"]] + + specific_scores <- specificCellCellcommunicationScores( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + cluster_column = cluster_column, + random_iter = random_iter, + cell_type_1 = cell_type_1, + cell_type_2 = cell_type_2, + feat_set_1 = feat_set_1, + feat_set_2 = feat_set_2, + spatial_network_name = spatial_network_name, + log2FC_addendum = log2FC_addendum, + min_observations = min_observations, + detailed = detailed, + adjust_method = adjust_method, + adjust_target = adjust_target, + set_seed = set_seed, + seed_number = seed_number, + verbose = verbose %in% c("a lot") + ) + }) + } else { + ## for loop over all combinations ## + savelist <- list() + countdown <- nrow(combn_DT) + + for (row in seq_len(nrow(combn_DT))) { + cell_type_1 <- combn_DT[row][["V1"]] + cell_type_2 <- combn_DT[row][["V2"]] + + if (verbose == "a little" || verbose == "a lot") + cat(sprintf("[PROCESS nr %d : %d and %d] ", + countdown, cell_type_1, cell_type_2)) + + if (verbose %in% c("a little", "none")) { + specific_verbose <- FALSE + } else { + specific_verbose <- TRUE + } + + specific_scores <- specificCellCellcommunicationScores( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + cluster_column = cluster_column, + random_iter = random_iter, + cell_type_1 = cell_type_1, + cell_type_2 = cell_type_2, + feat_set_1 = feat_set_1, + feat_set_2 = feat_set_2, + spatial_network_name = spatial_network_name, + log2FC_addendum = log2FC_addendum, + min_observations = min_observations, + detailed = detailed, + adjust_method = adjust_method, + adjust_target = adjust_target, + set_seed = set_seed, + seed_number = seed_number, + verbose = specific_verbose + ) + savelist[[row]] <- specific_scores + countdown <- countdown - 1 + } + } + + finalDT <- do.call("rbind", savelist) + + # data.table variables + LR_comb <- LR_expr <- NULL + + data.table::setorder(finalDT, LR_comb, -LR_expr) + + return(finalDT) +} + + + + + +#' @rdname spatCellCellcom +#' @param cell_type_1 character. First cell type +#' @param cell_type_2 character. Second cell type +#' @export +specificCellCellcommunicationScores <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + cluster_column = NULL, + random_iter = 100, + cell_type_1 = NULL, + cell_type_2 = NULL, + feat_set_1, + feat_set_2, + gene_set_1 = NULL, + gene_set_2 = NULL, + log2FC_addendum = 0.1, + min_observations = 2, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("feats", "cells"), + set_seed = FALSE, + seed_number = 1234, + verbose = TRUE +) { + # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2394,6 +2599,17 @@ specificCellCellcommunicationScores <- function(gobject, warning("gene_set_2 is deprecated, use feat_set_2 in the future") } + if (is.null(cluster_column)) { + stop("Name of column in cell metadata with cell type info is needed") + } + + if (is.null(cell_type_1) || is.null(cell_type_2)) { + stop(sprintf( + "`%s` and `%s` in `%s` must be given", + "cell_type_1", "cell_type_2", "cluster_column") + ) + } + # data.table variables from_to <- cell_ID <- lig_cell_type <- rec_cell_type <- lig_nr <- @@ -2446,8 +2662,8 @@ specificCellCellcommunicationScores <- function(gobject, # get information about number of cells temp_meta <- pDataDT(subsetGiotto, - feat_type = feat_type, - spat_unit = spat_unit + feat_type = feat_type, + spat_unit = spat_unit ) nr_cell_types <- temp_meta[cell_ID %in% subset_ids][ , .N, by = c(cluster_column)] @@ -2455,7 +2671,7 @@ specificCellCellcommunicationScores <- function(gobject, names(nr_cells) <- nr_cell_types$cell_types # get average communication scores - comScore <- average_feat_feat_expression_in_groups( + comScore <- .average_feat_feat_expression_in_groups( gobject = subsetGiotto, feat_type = feat_type, spat_unit = spat_unit, @@ -2464,8 +2680,8 @@ specificCellCellcommunicationScores <- function(gobject, feat_set_2 = feat_set_2 ) comScore <- comScore[(lig_cell_type == cell_type_1 & - rec_cell_type == cell_type_2) | - (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] + rec_cell_type == cell_type_2) | + (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] comScore[, lig_nr := nr_cells[lig_cell_type]] comScore[, rec_nr := nr_cells[rec_cell_type]] @@ -2515,7 +2731,7 @@ specificCellCellcommunicationScores <- function(gobject, ) # get random communication scores - randomScore <- average_feat_feat_expression_in_groups( + randomScore <- .average_feat_feat_expression_in_groups( gobject = tempGiotto, feat_type = feat_type, spat_unit = spat_unit, @@ -2525,7 +2741,7 @@ specificCellCellcommunicationScores <- function(gobject, ) randomScore <- randomScore[(lig_cell_type == cell_type_1 & rec_cell_type == cell_type_2) | - (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] + (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] @@ -2602,236 +2818,9 @@ specificCellCellcommunicationScores <- function(gobject, } -#' @title spatCellCellcom -#' @name spatCellCellcom -#' @description Spatial Cell-Cell communication scores based on spatial -#' expression of interacting cells -#' @param gobject giotto object to use -#' @param feat_type feature type -#' @param spat_unit spatial unit -#' @param spatial_network_name spatial network to use for identifying -#' interacting cells -#' @param cluster_column cluster column with cell type information -#' @param random_iter number of iterations -#' @param feat_set_1 first specific feature set from feature pairs -#' @param feat_set_2 second specific feature set from feature pairs -#' @param gene_set_1 deprecated, use feat_set_1 -#' @param gene_set_2 deprecated, use feat_set_2 -#' @param log2FC_addendum addendum to add when calculating log2FC -#' @param min_observations minimum number of interactions needed to be -#' considered -#' @param detailed provide more detailed information -#' (random variance and z-score) -#' @param adjust_method which method to adjust p-values -#' @param adjust_target adjust multiple hypotheses at the cell or feature level -#' @param do_parallel run calculations in parallel with mclapply -#' @param cores number of cores to use if do_parallel = TRUE -#' @param set_seed set a seed for reproducibility -#' @param seed_number seed number -#' @param verbose verbose -#' @returns Cell-Cell communication scores for feature pairs based on spatial -#' interaction -#' @details Statistical framework to identify if pairs of genes -#' (such as ligand-receptor combinations) -#' are expressed at higher levels than expected based on a reshuffled null -#' distribution of feature expression values in cells that are spatially in -#' proximity to each other. -#' * **LR_comb:** Pair of ligand and receptor -#' * **lig_cell_type:** cell type to assess expression level of ligand -#' * **lig_expr:** average expression of ligand in lig_cell_type -#' * **ligand:** ligand name -#' * **rec_cell_type:** cell type to assess expression level of receptor -#' * **rec_expr:** average expression of receptor in rec_cell_type -#' * **receptor:** receptor name -#' * **LR_expr:** combined average ligand and receptor expression -#' * **lig_nr:** total number of cells from lig_cell_type that spatially -#' interact with cells from rec_cell_type -#' * **rec_nr:** total number of cells from rec_cell_type that spatially -#' interact with cells from lig_cell_type -#' * **rand_expr:** average combined ligand and receptor expression from -#' random spatial permutations -#' * **av_diff:** average difference between LR_expr and rand_expr over all -#' random spatial permutations -#' * **sd_diff:** (optional) standard deviation of the difference between -#' LR_expr and rand_expr over all random spatial permutations -#' * **z_score:** (optional) z-score -#' * **log2fc:** log2 fold-change (LR_expr/rand_expr) -#' * **pvalue:** p-value -#' * **LR_cell_comb:** cell type pair combination -#' * **p.adj:** adjusted p-value -#' * **PI:** significance score: log2fc * -log10(p.adj) -#' @md -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' spatCellCellcom( -#' gobject = g, -#' cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", -#' feat_set_2 = "9630013A20Rik", -#' verbose = "a lot", -#' random_iter = 10 -#' ) -#' @export -spatCellCellcom <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", - random_iter = 1000, - feat_set_1, - feat_set_2, - gene_set_1 = NULL, - gene_set_2 = NULL, - log2FC_addendum = 0.1, - min_observations = 2, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("feats", "cells"), - do_parallel = TRUE, - cores = NA, - set_seed = TRUE, - seed_number = 1234, - verbose = c("a little", "a lot", "none")) { - verbose <- match.arg(verbose, choices = c("a little", "a lot", "none")) - - # Set feat_type and spat_unit - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - - ## check if spatial network exists ## - spat_networks <- list_spatial_networks_names(gobject, - spat_unit = spat_unit - ) - - if (!spatial_network_name %in% spat_networks) { - stop( - spatial_network_name, " is not an existing spatial network \n", - "use showNetworks() to see the available networks \n", - "or create a new spatial network with createSpatialNetwork()" - ) - } - - ## deprecated arguments - if (!is.null(gene_set_1)) { - feat_set_1 <- gene_set_1 - warning("gene_set_1 is deprecated, use feat_set_1 in the future") - } - if (!is.null(gene_set_2)) { - feat_set_2 <- gene_set_2 - warning("gene_set_2 is deprecated, use feat_set_2 in the future") - } - - - cell_metadata <- pDataDT(gobject, - feat_type = feat_type, - spat_unit = spat_unit - ) - - ## get all combinations between cell types - all_uniq_values <- unique(cell_metadata[[cluster_column]]) - same_DT <- data.table::data.table( - V1 = all_uniq_values, V2 = all_uniq_values) - combn_DT <- data.table::as.data.table(t(combn(all_uniq_values, m = 2))) - combn_DT <- rbind(same_DT, combn_DT) - - ## parallel option ## - if (isTRUE(do_parallel)) { - savelist <- lapply_flex( - X = seq_len(nrow(combn_DT)), future.seed = TRUE, - cores = cores, fun = function(row) { - cell_type_1 <- combn_DT[row][["V1"]] - cell_type_2 <- combn_DT[row][["V2"]] - - specific_scores <- specificCellCellcommunicationScores( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - cluster_column = cluster_column, - random_iter = random_iter, - cell_type_1 = cell_type_1, - cell_type_2 = cell_type_2, - feat_set_1 = feat_set_1, - feat_set_2 = feat_set_2, - spatial_network_name = spatial_network_name, - log2FC_addendum = log2FC_addendum, - min_observations = min_observations, - detailed = detailed, - adjust_method = adjust_method, - adjust_target = adjust_target, - set_seed = set_seed, - seed_number = seed_number, - verbose = verbose %in% c("a lot") - ) - }) - } else { - ## for loop over all combinations ## - savelist <- list() - countdown <- nrow(combn_DT) - - for (row in seq_len(nrow(combn_DT))) { - cell_type_1 <- combn_DT[row][["V1"]] - cell_type_2 <- combn_DT[row][["V2"]] - - if (verbose == "a little" || verbose == "a lot") - cat(sprintf("[PROCESS nr %d : %d and %d] ", - countdown, cell_type_1, cell_type_2)) - - if (verbose %in% c("a little", "none")) { - specific_verbose <- FALSE - } else { - specific_verbose <- TRUE - } - - specific_scores <- specificCellCellcommunicationScores( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - cluster_column = cluster_column, - random_iter = random_iter, - cell_type_1 = cell_type_1, - cell_type_2 = cell_type_2, - feat_set_1 = feat_set_1, - feat_set_2 = feat_set_2, - spatial_network_name = spatial_network_name, - log2FC_addendum = log2FC_addendum, - min_observations = min_observations, - detailed = detailed, - adjust_method = adjust_method, - adjust_target = adjust_target, - set_seed = set_seed, - seed_number = seed_number, - verbose = specific_verbose - ) - savelist[[row]] <- specific_scores - countdown <- countdown - 1 - } - } - - finalDT <- do.call("rbind", savelist) - - # data.table variables - LR_comb <- LR_expr <- NULL - - data.table::setorder(finalDT, LR_comb, -LR_expr) - - return(finalDT) -} - - -#' @title combCCcom +#' @title Combine cell cell communication tables #' @name combCCcom #' @description Combine spatial and expression based cell-cell communication #' data.tables @@ -2848,13 +2837,22 @@ spatCellCellcom <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -#' spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -#' feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -#' random_iter = 10) +#' exprCC <- exprCellCellcom(g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", +#' feat_set_2 = "9630013A20Rik" +#' ) #' -#' combCCcom(spatialCC = spatialCC, exprCC = exprCC) +#' spatialCC <- spatCellCellcom(gobject = g, +#' cluster_column = "leiden_clus", +#' feat_set_1 = "Gm19935", +#' feat_set_2 = "9630013A20Rik", +#' verbose = "a lot", +#' random_iter = 10 +#' ) +#' +#' combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) +#' force(combCC) #' @export combCCcom <- function(spatialCC, exprCC, @@ -2920,6 +2918,9 @@ combCCcom <- function(spatialCC, + + + # DEPRECATED #### #' @title deprecated diff --git a/R/spatial_interaction_spot.R b/R/spatial_interaction_spot.R index 16cdc59aa..eed3910d6 100644 --- a/R/spatial_interaction_spot.R +++ b/R/spatial_interaction_spot.R @@ -1118,7 +1118,12 @@ NULL #' g <- GiottoData::loadGiottoMini("visium") #' g_expression <- getExpression(g, output = "matrix") #' -#' findICFSpot(g, spat_unit = "cell", feat_type = "rna", ave_celltype_exp = g_expression, spatial_network_name = "spatial_network") +#' findICFSpot(g, +#' spat_unit = "cell", +#' feat_type = "rna", +#' ave_celltype_exp = g_expression, +#' spatial_network_name = "spatial_network" +#' ) #' @export findICFSpot <- function(gobject, spat_unit = NULL, diff --git a/man/combCCcom.Rd b/man/combCCcom.Rd index 61d253f98..f31d5799b 100644 --- a/man/combCCcom.Rd +++ b/man/combCCcom.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_interaction.R \name{combCCcom} \alias{combCCcom} -\title{combCCcom} +\title{Combine cell cell communication tables} \usage{ combCCcom( spatialCC, @@ -43,11 +43,20 @@ data.tables \examples{ g <- GiottoData::loadGiottoMini("visium") -exprCC <- exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") -spatialCC <- spatCellCellcom(gobject = g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", verbose = "a lot", -random_iter = 10) +exprCC <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", + feat_set_2 = "9630013A20Rik" +) + +spatialCC <- spatCellCellcom(gobject = g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", + feat_set_2 = "9630013A20Rik", + verbose = "a lot", + random_iter = 10 +) -combCCcom(spatialCC = spatialCC, exprCC = exprCC) +combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) +force(combCC) } diff --git a/man/average_feat_feat_expression_in_groups.Rd b/man/dot-average_feat_feat_expression_in_groups.Rd similarity index 84% rename from man/average_feat_feat_expression_in_groups.Rd rename to man/dot-average_feat_feat_expression_in_groups.Rd index 191aed4dd..f1590ec8d 100644 --- a/man/average_feat_feat_expression_in_groups.Rd +++ b/man/dot-average_feat_feat_expression_in_groups.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/spatial_interaction.R -\name{average_feat_feat_expression_in_groups} -\alias{average_feat_feat_expression_in_groups} +\name{.average_feat_feat_expression_in_groups} +\alias{.average_feat_feat_expression_in_groups} \title{average_feat_feat_expression_in_groups} \usage{ -average_feat_feat_expression_in_groups( +.average_feat_feat_expression_in_groups( gobject, spat_unit = NULL, feat_type = NULL, diff --git a/man/exprCellCellcom.Rd b/man/exprCellCellcom.Rd index b571651d3..e3d89cef8 100644 --- a/man/exprCellCellcom.Rd +++ b/man/exprCellCellcom.Rd @@ -70,6 +70,11 @@ More details will follow soon. \examples{ g <- GiottoData::loadGiottoMini("visium") -exprCellCellcom(g, cluster_column = "leiden_clus", -feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik") +res <- exprCellCellcom(g, + cluster_column = "leiden_clus", + feat_set_1 = "Gm19935", + feat_set_2 = "9630013A20Rik" +) + +force(res) } diff --git a/man/getBalancedSpatCoexpressionFeats.Rd b/man/getBalancedSpatCoexpressionFeats.Rd index 674807070..1bac56357 100644 --- a/man/getBalancedSpatCoexpressionFeats.Rd +++ b/man/getBalancedSpatCoexpressionFeats.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spatial_genes.R \name{getBalancedSpatCoexpressionFeats} \alias{getBalancedSpatCoexpressionFeats} -\title{getBalancedSpatCoexpressionFeats} +\title{Get balanced spatial coexpression features} \usage{ getBalancedSpatCoexpressionFeats( spatCorObject, @@ -37,9 +37,11 @@ balanced manner \details{ There are 3 different ways of selecting features from the spatial co-expression modules -\itemize{ - \item{1. weighted: }{Features are ranked based on summarized pairwise co-expression scores} - \item{2. random: }{A random selection of features, set seed for reproducibility} - \item{3. informed: }{Features are selected based on prior information/ranking} +\enumerate{ +\item \strong{weighted:} Features are ranked based on summarized pairwise +co-expression scores +\item \strong{random:} A random selection of features, set seed for +reproducibility +\item \strong{informed:} Features are selected based on prior information/ranking } } diff --git a/man/spatCellCellcom.Rd b/man/spatCellCellcom.Rd index e73bf8ebc..5c04c1a1d 100644 --- a/man/spatCellCellcom.Rd +++ b/man/spatCellCellcom.Rd @@ -2,14 +2,15 @@ % Please edit documentation in R/spatial_interaction.R \name{spatCellCellcom} \alias{spatCellCellcom} -\title{spatCellCellcom} +\alias{specificCellCellcommunicationScores} +\title{Spatial cell cell communication scoring} \usage{ spatCellCellcom( gobject, feat_type = NULL, spat_unit = NULL, spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", + cluster_column = NULL, random_iter = 1000, feat_set_1, feat_set_2, @@ -27,6 +28,30 @@ spatCellCellcom( seed_number = 1234, verbose = c("a little", "a lot", "none") ) + +specificCellCellcommunicationScores( + gobject, + feat_type = NULL, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + cluster_column = NULL, + random_iter = 100, + cell_type_1 = NULL, + cell_type_2 = NULL, + feat_set_1, + feat_set_2, + gene_set_1 = NULL, + gene_set_2 = NULL, + log2FC_addendum = 0.1, + min_observations = 2, + detailed = FALSE, + adjust_method = c("fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", "BY", + "none"), + adjust_target = c("feats", "cells"), + set_seed = FALSE, + seed_number = 1234, + verbose = TRUE +) } \arguments{ \item{gobject}{giotto object to use} @@ -71,6 +96,10 @@ considered} \item{seed_number}{seed number} \item{verbose}{verbose} + +\item{cell_type_1}{character. First cell type} + +\item{cell_type_2}{character. Second cell type} } \value{ Cell-Cell communication scores for feature pairs based on spatial @@ -110,13 +139,13 @@ LR_expr and rand_expr over all random spatial permutations \item \strong{pvalue:} p-value \item \strong{LR_cell_comb:} cell type pair combination \item \strong{p.adj:} adjusted p-value -\item \strong{PI:} significance score: log2fc * -log10(p.adj) +\item \strong{PI:} significance score: \eqn{log2fc * -log10(p.adj)} } } \examples{ g <- GiottoData::loadGiottoMini("visium") -spatCellCellcom( +res1 <- spatCellCellcom( gobject = g, cluster_column = "leiden_clus", feat_set_1 = "Gm19935", @@ -124,4 +153,15 @@ spatCellCellcom( verbose = "a lot", random_iter = 10 ) +force(res1) + +res2 <- specificCellCellcommunicationScores(g, + cluster_column = "leiden_clus", + cell_type_1 = 1, + cell_type_2 = 2, + feat_set_1 = "Gm19935", + feat_set_2 = "9630013A20Rik" +) + +force(res2) } diff --git a/man/specificCellCellcommunicationScores.Rd b/man/specificCellCellcommunicationScores.Rd deleted file mode 100644 index 917da1a3f..000000000 --- a/man/specificCellCellcommunicationScores.Rd +++ /dev/null @@ -1,115 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_interaction.R -\name{specificCellCellcommunicationScores} -\alias{specificCellCellcommunicationScores} -\title{specificCellCellcommunicationScores} -\usage{ -specificCellCellcommunicationScores( - gobject, - feat_type = NULL, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - cluster_column = "cell_types", - random_iter = 100, - cell_type_1 = "astrocyte", - cell_type_2 = "endothelial", - feat_set_1, - feat_set_2, - gene_set_1 = NULL, - gene_set_2 = NULL, - log2FC_addendum = 0.1, - min_observations = 2, - detailed = FALSE, - adjust_method = c("fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", "BY", - "none"), - adjust_target = c("feats", "cells"), - set_seed = FALSE, - seed_number = 1234, - verbose = TRUE -) -} -\arguments{ -\item{gobject}{giotto object to use} - -\item{feat_type}{feature type} - -\item{spat_unit}{spatial unit} - -\item{spatial_network_name}{spatial network to use for identifying -interacting cells} - -\item{cluster_column}{cluster column with cell type information} - -\item{random_iter}{number of iterations} - -\item{cell_type_1}{first cell type} - -\item{cell_type_2}{second cell type} - -\item{feat_set_1}{first specific gene set from gene pairs} - -\item{feat_set_2}{second specific gene set from gene pairs} - -\item{gene_set_1}{deprecated, use feat_set_1} - -\item{gene_set_2}{deprecated, use feat_set_2} - -\item{log2FC_addendum}{addendum to add when calculating log2FC} - -\item{min_observations}{minimum number of interactions needed to be -considered} - -\item{detailed}{provide more detailed information -(random variance and z-score)} - -\item{adjust_method}{which method to adjust p-values} - -\item{adjust_target}{adjust multiple hypotheses at the cell or feature level} - -\item{set_seed}{set a seed for reproducibility} - -\item{seed_number}{seed number} - -\item{verbose}{verbose} -} -\value{ -Cell-Cell communication scores for feature pairs based on spatial -interaction -} -\description{ -Specific Cell-Cell communication scores based on spatial -expression of interacting cells -} -\details{ -Statistical framework to identify if pairs of features -(such as ligand-receptor combinations) -are expressed at higher levels than expected based on a reshuffled null -distribution of feature expression values in cells that are spatially in -proximity to each other. -\itemize{ - \item{LR_comb:}{Pair of ligand and receptor} - \item{lig_cell_type:}{ cell type to assess expression level of ligand } - \item{lig_expr:}{ average expression of ligand in lig_cell_type } - \item{ligand:}{ ligand name } - \item{rec_cell_type:}{ cell type to assess expression level of receptor } - \item{rec_expr:}{ average expression of receptor in rec_cell_type} - \item{receptor:}{ receptor name } - \item{LR_expr:}{ combined average ligand and receptor expression } - \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type } - \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type } - \item{rand_expr:}{ average combined ligand and receptor expression from random spatial permutations } - \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations } - \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations } - \item{z_score:}{ (optinal) z-score } - \item{log2fc:}{ log2 fold-change (LR_expr/rand_expr) } - \item{pvalue:}{ p-value } - \item{LR_cell_comb:}{ cell type pair combination } - \item{p.adj:}{ adjusted p-value } - \item{PI:}{ significanc score: log2fc * -log10(p.adj) } -} -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") - -specificCellCellcommunicationScores(g, cluster_column = "leiden_clus") -} From a2614e0325619af3501462cb0d07972c1bd8c990 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Tue, 28 May 2024 19:45:15 -0400 Subject: [PATCH 068/150] update example --- R/spatial_interaction_spot.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/spatial_interaction_spot.R b/R/spatial_interaction_spot.R index 971488176..8e7623546 100644 --- a/R/spatial_interaction_spot.R +++ b/R/spatial_interaction_spot.R @@ -1212,7 +1212,7 @@ NULL #' findICFSpot(g, #' spat_unit = "cell", #' feat_type = "rna", -#' ave_celltype_exp = g_expression, +#' ave_celltype_exp = ave_celltype_exp, #' spatial_network_name = "spatial_network" #' ) #' @export From 2315d4308bcdaf349d2d7b78cc99e5985eed97c4 Mon Sep 17 00:00:00 2001 From: Ruben Dries Date: Fri, 31 May 2024 16:17:37 -0400 Subject: [PATCH 069/150] modified detect function to allow recursive searching & created first modular reader for visium HD --- R/convenience_general.R | 4 +- R/convenience_visiumHD.R | 662 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 664 insertions(+), 2 deletions(-) create mode 100644 R/convenience_visiumHD.R diff --git a/R/convenience_general.R b/R/convenience_general.R index 8ae4b661b..ad36b2b58 100644 --- a/R/convenience_general.R +++ b/R/convenience_general.R @@ -214,9 +214,9 @@ abbrev_path <- function(path, head = 15, tail = 35L) { # warn - whether to warn when a pattern does not find any files # first - whether to only return the first match .detect_in_dir <- function( - path, pattern, platform, warn = TRUE, first = TRUE + path, pattern, recursive = FALSE, platform, warn = TRUE, first = TRUE ) { - f <- list.files(path, pattern = pattern, full.names = TRUE) + f <- list.files(path, pattern = pattern, recursive = recursive, full.names = TRUE) lenf <- length(f) if (lenf == 1L) return(f) # one match else if (lenf == 0L) { # no matches diff --git a/R/convenience_visiumHD.R b/R/convenience_visiumHD.R new file mode 100644 index 000000000..d8343881f --- /dev/null +++ b/R/convenience_visiumHD.R @@ -0,0 +1,662 @@ +## CLASS #### +# ------- ### + + +setClass( + "VisiumHDReader", + slots = list( + visiumHD_dir = "character", + expression_source = "character", + gene_column_index = "numeric", + barcodes = "character", + array_subset_row = "numeric", + array_subset_col = "numeric", + pxl_subset_row = "numeric", + pxl_subset_col = "numeric", + calls = "list" + ), + prototype = list( + expression_source = 'raw', + gene_column_index = 2, + barcodes = NULL, + array_subset_row = NULL, + array_subset_col = NULL, + pxl_subset_row = NULL, + pxl_subset_col = NULL, + calls = list() + ) +) + + + +# * show #### +setMethod("show", signature("VisiumHDReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "VisiumHDReader")) + print_slots <- c("dir", "expression_source", "gene_column_index", + "barcodes", "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col", + "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@visiumHD_dir + if (length(d) > 0L) { + nch <- nchar(d) + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # expression_source + expression_source <- object@expression_source + cat(pre["expression_source"], expression_source, "\n") + + # gene_column_index + gene_column_index <- object@gene_column_index + cat(pre["gene_column_index"], gene_column_index, "\n") + + # barcodes + barcodes <- ifelse(!is.null(object@barcodes), "found", "none") + cat(pre["barcodes"], barcodes, "\n") + + # array_subset_row + array_subset_row <- ifelse(!is.null(object@array_subset_row), "found", "none") + cat(pre["array_subset_row"], array_subset_row, "\n") + + # array_subset_col + array_subset_col <- ifelse(!is.null(object@array_subset_col), "found", "none") + cat(pre["array_subset_col"], array_subset_col, "\n") + + # pxl_subset_row + pxl_subset_row <- ifelse(!is.null(object@pxl_subset_row), "found", "none") + cat(pre["pxl_subset_row"], pxl_subset_row, "\n") + + # pxl_subset_col + pxl_subset_col <- ifelse(!is.null(object@pxl_subset_col), "found", "none") + cat(pre["pxl_subset_col"], pxl_subset_col, "\n") + + # funs + .reader_fun_prints(x = object, pre = pre["funs"]) +}) + +# * print #### +setMethod("print", signature("VisiumHDReader"), function(x, ...) show(x)) + + + +#' @title Import a Visium HD assay +#' @name importVisiumHD +#' @description +#' Giotto import functionalities for Visium HD datasets. This function generates +#' a `VisiumHDReader` instance that has convenient reader functions for converting +#' individual pieces of Visium HD data into Giotto-compatible representations when +#' the param `visiumHD_dir` is provided. +#' A function that creates the full `giotto` object is also available. +#' These functions should have all param values provided as defaults, but +#' can be flexibly modified to do things such as look in alternative +#' directories or paths. +#' @param visiumHD_dir Visium HD output directory (e.g. square_016um) +#' @param expression_source character. Raw or filter expression data. Defaults to 'raw' +#' @param gene_column_index numeric. Expression column to use for gene names +#' 1 = Ensembl and 2 = gene symbols +#' @param barcodes character vector. (optional) Use if you only want to load +#' a subset of the pixel barcodes +#' @param array_subset_row numeric vector. (optional) Vector with min and max values +#' to subset based on array rows +#' @param array_subset_col numeric vector. (optional) Vector with min and max values +#' to subset based on array columns +#' @param pxl_subset_row numeric vector. (optional) Vector with min and max values +#' to subset based on row pixels +#' @param pxl_subset_col numeric vector. (optional) Vector with min and max values +#' to subset based on column pixels +#' @details +#' Loading functions are generated after the `visiumHD_dir` is added. +#' @returns VisiumHDReader object +#' @examples +#' # Create a `VisiumHDReader` object +#' reader <- importVisiumHD() +#' +#' \dontrun{ +#' # Set the visiumHD_dir +#' reader$visiumHD_dir <- "path to visium HD dir" +#' readerHD$visiumHD_dir <- visiumHD_dir +#' +#' # Load tissue positions or create cell metadata +#' tissue_pos = readerHD$load_tissue_position() +#' metadata <- readerHD$load_metadata() +#' +#' Load matrix or create expression object +#' matrix <- readerHD$load_matrix() +#' expression_obj = readerHD$load_expression() +#' +#' Load transcript data (cell metadata, expression object, and transcripts per pixel) +#' my_transcripts = readerHD$load_transcripts(array_subset_row = c(500, 1000), +#' array_subset_col = c(500, 1000)) +#' +#' # Create a `giotto` object and add the loaded data +#' TODO +#' } +#' @export +importVisiumHD <- function( + visiumHD_dir = NULL, + expression_source = 'raw', + gene_column_index = 2, + barcodes = NULL, + array_subset_row = NULL, + array_subset_col = NULL, + pxl_subset_row = NULL, + pxl_subset_col = NULL) { + + # get params + a <- list(Class = "VisiumHDReader") + + if (!is.null(visiumHD_dir)) { + a$visiumHD_dir <- visiumHD_dir + } + + a$expression_source <- expression_source + a$gene_column_index <- gene_column_index + + if (!is.null(barcodes)) { + a$barcodes <- barcodes + } + + if (!is.null(array_subset_row)) { + a$array_subset_row <- array_subset_row + } + + if (!is.null(array_subset_col)) { + a$array_subset_col <- array_subset_col + } + + if (!is.null(pxl_subset_row)) { + a$pxl_subset_row <- pxl_subset_row + } + + if (!is.null(pxl_subset_col)) { + a$pxl_subset_col <- pxl_subset_col + } + + do.call(new, args = a) +} + + +# * init #### +setMethod("initialize", signature("VisiumHDReader"), function( + .Object, visiumHD_dir, + expression_source, + gene_column_index, + barcodes, + array_subset_row, + array_subset_col, + pxl_subset_row, + pxl_subset_col +) { + + # provided params (if any) + if (!missing(visiumHD_dir)) { + checkmate::assert_directory_exists(visiumHD_dir) + .Object@visiumHD_dir <- visiumHD_dir + } + + if (!missing(expression_source)) { + .Object@expression_source <- expression_source + } + + if (!missing(gene_column_index)) { + .Object@gene_column_index <- gene_column_index + } + + if (!missing(barcodes)) { + .Object@barcodes <- barcodes + } + + if (!missing(array_subset_row)) { + .Object@array_subset_row <- array_subset_row + } + + if (!missing(array_subset_col)) { + .Object@array_subset_col <- array_subset_col + } + + if (!missing(pxl_subset_row)) { + .Object@pxl_subset_row <- pxl_subset_row + } + + if (!missing(pxl_subset_col)) { + .Object@pxl_subset_col <- pxl_subset_col + } + + # NULL case + if (length(.Object@visiumHD_dir) == 0) { + return(.Object) # return early if no path given + } + + + # detect paths and subdirs + p <- .Object@visiumHD_dir + + + .visiumHD_detect <- function(pattern, path = p, recursive = FALSE) { + .detect_in_dir(pattern = pattern, path = path, recursive = recursive, platform = "visiumHD") + } + + + filter_expr_dir <- .visiumHD_detect(pattern = "filtered_feature_bc_matrix", path = p) + raw_expr_dir <- .visiumHD_detect(pattern = "raw_feature_bc_matrix", path = p) + + s <- .Object@expression_source + if(s == 'raw') { + expr_dir = raw_expr_dir + } else if(s == 'filter') { + expr_dir = filter_expr_dir + } else { + stop('expression source for visiumHD can only be raw or filter') + } + + spatial_dir <- .visiumHD_detect(pattern = "spatial", path = p) + + + c_index <- .Object@gene_column_index + if(!c_index %in% c(1, 2)) { + stop('gene column index can only be 1 (Ensembl) or 2 (gene symbols)') + } + + + ## matrix load call + matrix_fun <- function( + path = expr_dir, + gene_column_index = c_index, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + .visiumHD_matrix( + path = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_matrix <- matrix_fun + + + + ## expression load call + expression_fun <- function( + path = expr_dir, + gene_column_index = c_index, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + + .visiumHD_expression( + path = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_expression <- expression_fun + + + + ## tissue position load call + tissue_position_fun <- function( + path = spatial_dir, + verbose = NULL + ) { + .visiumHD_tissue_positions( + path = path, + verbose = verbose + ) + } + .Object@calls$load_tissue_position <- tissue_position_fun + + + + ## metadata load call + meta_fun <- function( + path = spatial_dir, + verbose = NULL) { + + .visiumHD_meta( + path = path, + verbose = verbose + ) + } + .Object@calls$load_metadata <- meta_fun + + + + ## transcript load call + transcript_fun <- function(expr_path = expr_dir, + tissue_positions_path = spatial_dir, + barcodes = .Object@barcodes, + array_subset_row = .Object@array_subset_row, + array_subset_col = .Object@array_subset_col, + pxl_subset_row = .Object@pxl_subset_row, + pxl_subset_col = .Object@pxl_subset_col) { + + .visiumHD_transcript(expr_path = expr_path, + tissue_positions_path = tissue_positions_path, + barcodes = barcodes, + array_subset_row = array_subset_row, + array_subset_col = array_subset_col, + pxl_subset_row = pxl_subset_row, + pxl_subset_col = pxl_subset_col, + verbose = TRUE) + + } + .Object@calls$load_transcripts <- transcript_fun + + return(.Object) +}) + + +# * access #### + +#' @export +setMethod("$", signature("VisiumHDReader"), function(x, name) { + basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", + "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { + basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", + "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.VisiumHDReader` <- function(x, pattern) { + dn <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", + "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) +} + + + + + +.visiumHD_matrix = function(path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to matrix file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading expression matrix ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # load expression results with the 10X default matrix function + matrix_results <- get10Xmatrix(path_to_data = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type) + + return(matrix_results) + +} + + + + + +.visiumHD_expression = function(path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to matrix file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading expression matrix ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # load expression results with the 10X default matrix function + matrix_results <- get10Xmatrix(path_to_data = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type) + + + exprObj = createExprObj(expression_data = matrix_results, + spat_unit = "pixel", + feat_type = 'rna', + name = "raw", + provenance = "pixel") + + return(list('rna' = exprObj)) + +} + + + + +.visiumHD_tissue_positions = function(path, + verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to tissue positions file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading tissue positions file ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # check existence and access rights of files + tissue_positions_path = file.path(path, 'tissue_positions.parquet') + checkmate::assert_file_exists(tissue_positions_path) + + # read with parquet and data.table + tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) + + return(tissue_positions) + +} + + + + +.visiumHD_meta = function( + path, + verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to tissue positions file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading tissue positions file ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # check existence and access rights of files + tissue_positions_path = file.path(path, 'tissue_positions.parquet') + checkmate::assert_file_exists(tissue_positions_path) + + # read with parquet and data.table + tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) + + vmsg(.v = verbose, "creating metadata ...") + + data.table::setnames(tissue_positions, 'barcode', 'cell_ID') + + cx <- createCellMetaObj( + metadata = tissue_positions, + spat_unit = "pixel", + feat_type = "rna", + provenance = "pixel", + verbose = verbose + ) + return(cx) + +} + + + +.visiumHD_transcript = function(expr_path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + tissue_positions_path, + barcodes = NULL, + array_subset_row = NULL, + array_subset_col = NULL, + pxl_subset_row = NULL, + pxl_subset_col = NULL, + verbose = TRUE) { + + + # function to create expression matrix + matrix = .visiumHD_matrix( + path = expr_path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + + + # function to create tissue position data.table + tissue_positions = .visiumHD_tissue_positions( + path = tissue_positions_path, + verbose = verbose + ) + + + + vmsg(.v = verbose, "creating visiumHD tissue position x expression data file ...") + + # subset data + if(!is.null(barcodes)) { + vmsg(.v = verbose, "subsetting visiumHD on barcodes") + tissue_positions = tissue_positions[barcode %in% barcodes] + } + + if(!is.null(array_subset_row)) { + if(is.vector(array_subset_row) & length(array_subset_row) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on array rows") + tissue_positions = tissue_positions[array_row > array_subset_row[1] & array_row < array_subset_row[2]] + } else { + stop('array_subset_row was provided but is not a vector with length 2') + } + } + + if(!is.null(array_subset_col)) { + if(is.vector(array_subset_col) & length(array_subset_col) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on array columns") + tissue_positions = tissue_positions[array_col > array_subset_col[1] & array_col < array_subset_col[2]] + } else { + stop('array_subset_col was provided but is not a vector with length 2') + } + } + + if(!is.null(pxl_subset_row)) { + if(is.vector(pxl_subset_row) & length(pxl_subset_row) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on row pixels") + tissue_positions = tissue_positions[pxl_row_in_fullres > pxl_subset_row[1] & pxl_row_in_fullres < pxl_subset_row[2]] + } else { + cat('pxl_subset_row is ', pxl_subset_row) + stop('pxl_subset_row was provided but is not a vector with length 2') + } + } + + if(!is.null(pxl_subset_col)) { + if(is.vector(pxl_subset_col) & length(pxl_subset_col) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on column pixels") + tissue_positions = tissue_positions[pxl_col_in_fullres > pxl_subset_col[1] & pxl_col_in_fullres < pxl_subset_col[2]] + } else { + cat(pxl_subset_col) + stop('pxl_subset_col was provided but is not a vector with length 2') + } + } + + # also subset matrix if needed + if(any(!is.null(c(barcodes, + array_subset_row, array_subset_col, + pxl_subset_row, pxl_subset_col)))) { + vmsg(.v = verbose, "subsetting visiumHD on expression matrix") + matrix = matrix[, colnames(matrix) %in% tissue_positions$barcode] + } + + + + + + + # convert expression matrix to minimal data.table object + matrix_tile_dt = data.table::as.data.table(Matrix::summary(matrix)) + genes = matrix@Dimnames[[1]] + samples = matrix@Dimnames[[2]] + matrix_tile_dt[, gene := genes[i]] + matrix_tile_dt[, pixel := samples[j]] + + + # merge data.table matrix and spatial coordinates to create input for Giotto Polygons + gpoints = data.table::merge.data.table(matrix_tile_dt, tissue_positions, by.x = 'pixel', by.y = 'barcode') + gpoints = gpoints[,.(pixel, pxl_row_in_fullres, pxl_col_in_fullres, gene, x)] + colnames(gpoints) = c('pixel', 'x', 'y', 'gene', 'counts') + + gpoints = createGiottoPoints(x = gpoints[,.(x, y, gene, pixel, counts)]) + + # ensure output is always a list + if (!is.list(gpoints)) { + gpoints <- list(gpoints) + names(gpoints) <- objName(gpoints[[1L]]) + } + + return(list('matrix' = matrix, 'tissue_positions' = tissue_positions, 'gpoints' = gpoints)) + +} + From 9402002a09756fe508ae0f7ed3a7994d6a778f42 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 3 Jun 2024 07:49:55 -0400 Subject: [PATCH 070/150] enh: expose param to select deconv data for `findICFSpot()` --- R/spatial_interaction.R | 7 ++- R/spatial_interaction_spot.R | 78 +++++++++++++++++------------- man/findICFSpot.Rd | 45 ++++++++++------- man/findInteractionChangedFeats.Rd | 7 ++- 4 files changed, 82 insertions(+), 55 deletions(-) diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index 9347b19aa..a643c66d0 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -1055,7 +1055,10 @@ NULL #' @aliases findICF #' @description Identifies cell-to-cell Interaction Changed Features (ICF), #' i.e. features that are differentially expressed due to proximity to other -#' cell types. +#' cell types. This function is appropriate for single-cell level data. For +#' data from spot-based spatial assays or spatially binned data, see +#' [findICFSpot()], which runs on top of DWLS results or similar spot-level +#' cell-type enrichment outputs #' @param gobject giotto object #' @param feat_type feature type #' @param spat_unit spatial unit @@ -1097,7 +1100,7 @@ NULL #' * **int_nr_other:** number of other cells for interacting cell type #' * **unif_int:** cell-cell interaction #' -#' @seealso [filterInteractionChangedFeats()] +#' @seealso [filterInteractionChangedFeats()] [findICFSpot()] #' @md #' @examples #' g <- GiottoData::loadGiottoMini("visium") diff --git a/R/spatial_interaction_spot.R b/R/spatial_interaction_spot.R index eed3910d6..058c38816 100644 --- a/R/spatial_interaction_spot.R +++ b/R/spatial_interaction_spot.R @@ -1068,7 +1068,8 @@ NULL #' @name findICFSpot #' @description Identifies cell-to-cell Interaction Changed Features (ICF) for #' spots, i.e. features expression residual that are different due to proximity -#' to other cell types. +#' to other cell types. Works using results from celltype deconvolution methods +#' such as those from [runDWLSDeconv()]. #' #' @param gobject A giotto object #' @param spat_unit spatial unit (e.g. 'cell') @@ -1077,6 +1078,7 @@ NULL #' @param ave_celltype_exp average feature expression in each cell type #' @param selected_features subset of selected features (optional) #' @param spatial_network_name name of spatial network to use +#' @param deconv_name name of deconvolution/spatial enrichment values to use #' @param minimum_unique_cells minimum number of target cells required #' @param minimum_unique_int_cells minimum number of interacting cells required #' @param CCI_cell_score cell proximity score to filter no interacted cell @@ -1090,40 +1092,45 @@ NULL #' @param seed_number seed number #' @param verbose be verbose #' -#' @returns icfObject that contains the differential feat scores +#' @returns `icfObject` that contains the differential feat scores #' @details Function to calculate if features expression residual are #' differentially expressed in cell types when they interact #' (approximated by physical proximity) with other cell types. #' Feature expression residual calculated as: #' (observed expression in spot - cell_type_proportion * #' average_expressed_in_cell_type) -#' The results data.table in the icfObject contains - at least - +#' The results data.table in the `icfObject` contains - at least - #' the following columns: -#' \itemize{ -#' \item{features:}{ All or selected list of tested features} -#' \item{sel:}{ average feature expression residual in the interacting cells from the target cell type } -#' \item{other:}{ average feature expression residual in the NOT-interacting cells from the target cell type } -#' \item{pcc_sel:}{ correlation between cell proximity score and expression residual in the interacting cells from the target cell type} -#' \item{pcc_other:}{ correlation between cell proximity score and expression residual in the NOT-interacting cells from the target cell type } -#' \item{pcc_diff:}{ correlation difference between sel and other} -#' \item{p.value:}{ associated p-value} -#' \item{p.adj:}{ adjusted p-value} -#' \item{cell_type:}{ target cell type} -#' \item{int_cell_type:}{ interacting cell type} -#' \item{nr_select:}{ number of cells for selected target cell type} -#' \item{int_nr_select:}{ number of cells for interacting cell type} -#' \item{unif_int:}{ cell-cell interaction} -#' } +#' * **features:** All or selected list of tested features +#' * **sel:** average feature expression residual in the interacting cells +#' from the target cell type +#' * **other:** average feature expression residual in the NOT-interacting +#' cells from the target cell type +#' * **pcc_sel:** correlation between cell proximity score and expression +#' residual in the interacting cells from the target cell type +#' * **pcc_other:** correlation between cell proximity score and expression +#' residual in the NOT-interacting cells from the target cell type +#' * **pcc_diff:** correlation difference between sel and other +#' * **p.value:** associated p-value +#' * **p.adj:** adjusted p-value +#' * **cell_type:** target cell type +#' * **int_cell_type:** interacting cell type +#' * **nr_select:** number of cells for selected target cell type +#' * **int_nr_select:** number of cells for interacting cell type +#' * **unif_int:** cell-cell interaction +#' #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' g_expression <- getExpression(g, output = "matrix") #' -#' findICFSpot(g, +#' res <- findICFSpot(g, #' spat_unit = "cell", #' feat_type = "rna", #' ave_celltype_exp = g_expression, #' spatial_network_name = "spatial_network" #' ) +#' @seealso [findInteractionChangedFeats()] +#' @md #' @export findICFSpot <- function(gobject, spat_unit = NULL, @@ -1132,6 +1139,7 @@ findICFSpot <- function(gobject, ave_celltype_exp, selected_features = NULL, spatial_network_name = "Delaunay_network", + deconv_name = "DWLS", minimum_unique_cells = 5, minimum_unique_int_cells = 5, CCI_cell_score = 0.1, @@ -1185,7 +1193,7 @@ findICFSpot <- function(gobject, gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - name = "DWLS", + name = deconv_name, output = "data.table" ) data.table::setDF(dwls_values) @@ -1256,22 +1264,24 @@ findICFSpot <- function(gobject, permutation_test <- ifelse( diff_test == "permutation", nr_permutations, "no permutations") - icfObject <- list( - ICFscores = final_result, - Giotto_info = list( - "values" = values, - "cluster" = "cell_ID", - "spatial network" = spatial_network_name + icfObject <- structure( + .Data = list( + ICFscores = final_result, + Giotto_info = list( + "values" = values, + "cluster" = "cell_ID", + "spatial network" = spatial_network_name + ), + test_info = list( + "test" = diff_test, + "p.adj" = adjust_method, + "min cells" = minimum_unique_cells, + "min interacting cells" = minimum_unique_int_cells, + "perm" = permutation_test + ) ), - test_info = list( - "test" = diff_test, - "p.adj" = adjust_method, - "min cells" = minimum_unique_cells, - "min interacting cells" = minimum_unique_int_cells, - "perm" = permutation_test - ) + class = "icfObject" ) - class(icfObject) <- append(class(icfObject), "icfObject") return(icfObject) } diff --git a/man/findICFSpot.Rd b/man/findICFSpot.Rd index 51fbef4e2..05b75e9bb 100644 --- a/man/findICFSpot.Rd +++ b/man/findICFSpot.Rd @@ -12,6 +12,7 @@ findICFSpot( ave_celltype_exp, selected_features = NULL, spatial_network_name = "Delaunay_network", + deconv_name = "DWLS", minimum_unique_cells = 5, minimum_unique_int_cells = 5, CCI_cell_score = 0.1, @@ -41,6 +42,8 @@ findICFSpot( \item{spatial_network_name}{name of spatial network to use} +\item{deconv_name}{name of deconvolution/spatial enrichment values to use} + \item{minimum_unique_cells}{minimum number of target cells required} \item{minimum_unique_int_cells}{minimum number of interacting cells required} @@ -66,12 +69,13 @@ findICFSpot( \item{verbose}{be verbose} } \value{ -icfObject that contains the differential feat scores +\code{icfObject} that contains the differential feat scores } \description{ Identifies cell-to-cell Interaction Changed Features (ICF) for spots, i.e. features expression residual that are different due to proximity -to other cell types. +to other cell types. Works using results from celltype deconvolution methods +such as those from \code{\link[=runDWLSDeconv]{runDWLSDeconv()}}. } \details{ Function to calculate if features expression residual are @@ -80,32 +84,39 @@ differentially expressed in cell types when they interact Feature expression residual calculated as: (observed expression in spot - cell_type_proportion * average_expressed_in_cell_type) -The results data.table in the icfObject contains - at least - +The results data.table in the \code{icfObject} contains - at least - the following columns: \itemize{ - \item{features:}{ All or selected list of tested features} - \item{sel:}{ average feature expression residual in the interacting cells from the target cell type } - \item{other:}{ average feature expression residual in the NOT-interacting cells from the target cell type } - \item{pcc_sel:}{ correlation between cell proximity score and expression residual in the interacting cells from the target cell type} - \item{pcc_other:}{ correlation between cell proximity score and expression residual in the NOT-interacting cells from the target cell type } - \item{pcc_diff:}{ correlation difference between sel and other} - \item{p.value:}{ associated p-value} - \item{p.adj:}{ adjusted p-value} - \item{cell_type:}{ target cell type} - \item{int_cell_type:}{ interacting cell type} - \item{nr_select:}{ number of cells for selected target cell type} - \item{int_nr_select:}{ number of cells for interacting cell type} - \item{unif_int:}{ cell-cell interaction} +\item \strong{features:} All or selected list of tested features +\item \strong{sel:} average feature expression residual in the interacting cells +from the target cell type +\item \strong{other:} average feature expression residual in the NOT-interacting +cells from the target cell type +\item \strong{pcc_sel:} correlation between cell proximity score and expression +residual in the interacting cells from the target cell type +\item \strong{pcc_other:} correlation between cell proximity score and expression +residual in the NOT-interacting cells from the target cell type +\item \strong{pcc_diff:} correlation difference between sel and other +\item \strong{p.value:} associated p-value +\item \strong{p.adj:} adjusted p-value +\item \strong{cell_type:} target cell type +\item \strong{int_cell_type:} interacting cell type +\item \strong{nr_select:} number of cells for selected target cell type +\item \strong{int_nr_select:} number of cells for interacting cell type +\item \strong{unif_int:} cell-cell interaction } } \examples{ g <- GiottoData::loadGiottoMini("visium") g_expression <- getExpression(g, output = "matrix") -findICFSpot(g, +res <- findICFSpot(g, spat_unit = "cell", feat_type = "rna", ave_celltype_exp = g_expression, spatial_network_name = "spatial_network" ) } +\seealso{ +\code{\link[=findInteractionChangedFeats]{findInteractionChangedFeats()}} +} diff --git a/man/findInteractionChangedFeats.Rd b/man/findInteractionChangedFeats.Rd index e674159b7..e47ebf356 100644 --- a/man/findInteractionChangedFeats.Rd +++ b/man/findInteractionChangedFeats.Rd @@ -93,7 +93,10 @@ feature scores \description{ Identifies cell-to-cell Interaction Changed Features (ICF), i.e. features that are differentially expressed due to proximity to other -cell types. +cell types. This function is appropriate for single-cell level data. For +data from spot-based spatial assays or spatially binned data, see +\code{\link[=findICFSpot]{findICFSpot()}}, which runs on top of DWLS results or similar spot-level +cell-type enrichment outputs } \details{ Function to calculate if features are differentially expressed in @@ -140,5 +143,5 @@ icf2 <- findICF(g, ) } \seealso{ -\code{\link[=filterInteractionChangedFeats]{filterInteractionChangedFeats()}} +\code{\link[=filterInteractionChangedFeats]{filterInteractionChangedFeats()}} \code{\link[=findICFSpot]{findICFSpot()}} } From 9937cc204095ce761e8e7907c4a40d46efb83128 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 3 Jun 2024 07:50:26 -0400 Subject: [PATCH 071/150] chore: change `doHMRF()` requirements --- R/python_hmrf.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/R/python_hmrf.R b/R/python_hmrf.R index 2f7504179..44b257763 100644 --- a/R/python_hmrf.R +++ b/R/python_hmrf.R @@ -56,15 +56,8 @@ doHMRF <- function(gobject, python_path = NULL, output_folder = NULL, overwrite_output = TRUE) { - if (!requireNamespace("smfishHmrf", quietly = TRUE)) { - stop("package ", "smfishHmrf", " is not yet installed \n", - "To install: \n", - "remotes::install_bitbucket(repo = 'qzhudfci/smfishhmrf-r', ref='master')", - "see http://spatial.rc.fas.harvard.edu/install.html for more information", - call. = FALSE - ) - } + package_check("smfishhmrf", repository = "pip") # data.table set global variable to <- from <- NULL From 1d43d5250cc5377c7e1cd4aa91156fc954d4a6bd Mon Sep 17 00:00:00 2001 From: josschavezf Date: Mon, 3 Jun 2024 09:22:58 -0400 Subject: [PATCH 072/150] update missing param --- R/clustering.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/clustering.R b/R/clustering.R index e39ab3e06..950debf62 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -2066,6 +2066,7 @@ subClusterCells <- function(gobject, #' @describeIn subClusterCells Further subcluster cells using a NN-network and #' the Leiden algorithm #' @param toplevel do not use +#' @param feat_type feature type #' @export doLeidenSubCluster <- function( gobject, From 76b606202e63718cb6b764c04aece69ccfaae17d Mon Sep 17 00:00:00 2001 From: josschavezf Date: Mon, 3 Jun 2024 09:23:10 -0400 Subject: [PATCH 073/150] update missing param --- R/cross_section.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/cross_section.R b/R/cross_section.R index fbf51fd86..ff93ccaf2 100644 --- a/R/cross_section.R +++ b/R/cross_section.R @@ -945,6 +945,7 @@ crossSectionPlot <- function( #' @param crossSection_obj cross section object as alternative input. default = NULL. #' @param name name of virtual cross section to use #' @param spatial_network_name name of spatial network to use +#' @param show_other_cells logical. Default = TRUE #' @param other_cell_color color of cells outside the cross section. #' default = transparent. #' @param default_save_name default save name for saving, don't change, change From 06df89967a94d8a83e9504cf70e28c756ce69a71 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Mon, 3 Jun 2024 09:23:49 -0400 Subject: [PATCH 074/150] update params --- R/spatial_genes.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/spatial_genes.R b/R/spatial_genes.R index dfe21a7ee..53bc927c5 100644 --- a/R/spatial_genes.R +++ b/R/spatial_genes.R @@ -670,7 +670,6 @@ NULL #' @param bin_method method to binarize gene expression #' @param expression_values expression values to use #' @param subset_feats only select a subset of features to test -#' @param subset_genes deprecated, use subset_feats #' @param spatial_network_name name of spatial network to use #' (default = 'spatial_network') #' @param spatial_network_k different k's for a spatial kNN to evaluate @@ -3193,10 +3192,9 @@ selectPatternGenes <- function( #' @title do_spatial_knn_smoothing #' @name do_spatial_knn_smoothing #' @description smooth gene expression over a kNN spatial network -#' @param gobject giotto object -#' @param expression_values gene expression values to use +#' @param expression_matrix gene expression values to use #' @param subset_feats subset of features to use -#' @param spatial_network_name name of spatial network to use +#' @param spatial_network spatial network to use #' @param b smoothing factor beteen 0 and 1 (default: automatic) #' @returns matrix with smoothened gene expression values based on kNN #' spatial network From f83cef18c38af84e83e11173e45b93e4986fd444 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Mon, 3 Jun 2024 09:24:16 -0400 Subject: [PATCH 075/150] remove unused params --- R/kriging.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/kriging.R b/R/kriging.R index 53ef9d159..ff3da4505 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -133,9 +133,6 @@ setMethod( #' @rdname interpolateFeature #' @param rastersize numeric. Length of major axis in px of interpolation #' raster to create. -#' @param name name of interpolation `giottoLargeImage` to generate -#' @param filename character. Output filename. Default is \[`name`\].tif within -#' the working directory. #' @param overwrite logical. Whether raster outputs should be overwritten if #' the same `filename` is provided. #' @details From 63c1edcd89a65037abba0e1bb7c205fd35e9dadc Mon Sep 17 00:00:00 2001 From: josschavezf Date: Mon, 3 Jun 2024 09:24:27 -0400 Subject: [PATCH 076/150] update documentation --- man/binSpect.Rd | 2 -- man/crossSectionFeatPlot3D.Rd | 2 ++ man/do_spatial_knn_smoothing.Rd | 10 ++++------ man/findICFSpot.Rd | 2 +- man/interpolateFeature.Rd | 5 ----- man/subClusterCells.Rd | 2 ++ 6 files changed, 9 insertions(+), 14 deletions(-) diff --git a/man/binSpect.Rd b/man/binSpect.Rd index 060f88f7b..996fb5436 100644 --- a/man/binSpect.Rd +++ b/man/binSpect.Rd @@ -208,8 +208,6 @@ separately (default)} \item{expression_matrix}{expression matrix} \item{spatial_network}{spatial network in data.table format} - -\item{subset_genes}{deprecated, use subset_feats} } \value{ data.table with results (see details) diff --git a/man/crossSectionFeatPlot3D.Rd b/man/crossSectionFeatPlot3D.Rd index e45e87666..8d6339194 100644 --- a/man/crossSectionFeatPlot3D.Rd +++ b/man/crossSectionFeatPlot3D.Rd @@ -30,6 +30,8 @@ crossSectionFeatPlot3D( \item{spatial_network_name}{name of spatial network to use} +\item{show_other_cells}{logical. Default = TRUE} + \item{other_cell_color}{color of cells outside the cross section. default = transparent.} diff --git a/man/do_spatial_knn_smoothing.Rd b/man/do_spatial_knn_smoothing.Rd index abeac38db..5e36e7d3f 100644 --- a/man/do_spatial_knn_smoothing.Rd +++ b/man/do_spatial_knn_smoothing.Rd @@ -12,15 +12,13 @@ do_spatial_knn_smoothing( ) } \arguments{ -\item{subset_feats}{subset of features to use} - -\item{b}{smoothing factor beteen 0 and 1 (default: automatic)} +\item{expression_matrix}{gene expression values to use} -\item{gobject}{giotto object} +\item{spatial_network}{spatial network to use} -\item{expression_values}{gene expression values to use} +\item{subset_feats}{subset of features to use} -\item{spatial_network_name}{name of spatial network to use} +\item{b}{smoothing factor beteen 0 and 1 (default: automatic)} } \value{ matrix with smoothened gene expression values based on kNN diff --git a/man/findICFSpot.Rd b/man/findICFSpot.Rd index ae18a35d8..16418e8ab 100644 --- a/man/findICFSpot.Rd +++ b/man/findICFSpot.Rd @@ -125,7 +125,7 @@ colnames(ave_celltype_exp) <- colnames(sign_matrix) findICFSpot(g, spat_unit = "cell", feat_type = "rna", - ave_celltype_exp = g_expression, + ave_celltype_exp = ave_celltype_exp, spatial_network_name = "spatial_network" ) } diff --git a/man/interpolateFeature.Rd b/man/interpolateFeature.Rd index 325ceb5fd..52b3eb239 100644 --- a/man/interpolateFeature.Rd +++ b/man/interpolateFeature.Rd @@ -76,11 +76,6 @@ the same `filename` is provided.} \item{rastersize}{numeric. Length of major axis in px of interpolation raster to create.} - -\item{name}{name of interpolation `giottoLargeImage` to generate} - -\item{filename}{character. Output filename. Default is \[`name`\].tif within -the working directory.} } \value{ `giotto` method returns a `giotto` object with newly made appended diff --git a/man/subClusterCells.Rd b/man/subClusterCells.Rd index df848ad1f..a595efc8a 100644 --- a/man/subClusterCells.Rd +++ b/man/subClusterCells.Rd @@ -156,6 +156,8 @@ input for PCA} \item{verbose}{verbose} +\item{feat_type}{feature type} + \item{toplevel}{do not use} \item{version}{version of Louvain algorithm to use. One of "community" or From ba060fc04d46a389dc807628883fd7adea1c6c70 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Mon, 3 Jun 2024 10:21:56 -0400 Subject: [PATCH 077/150] fix error in interactive polygons --- R/interactivity.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/interactivity.R b/R/interactivity.R index c9815439f..16e61af35 100644 --- a/R/interactivity.R +++ b/R/interactivity.R @@ -95,8 +95,10 @@ plotInteractivePolygons <- function( x + geom_polygon( data = clicklist(), - aes(x, y, color = name, fill = name), - alpha = 0, ... + aes(x, y, color = name), + alpha = 0, + show.legend = FALSE, + ... ) + coord_fixed( xlim = c(input$xrange[1], input$xrange[2]), From ee89c65f33e76f4014baf6d8ee6ff9a2c08ccf74 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Mon, 3 Jun 2024 10:23:38 -0400 Subject: [PATCH 078/150] add update description --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 94909d971..3b03d36be 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# Giotto 4.0.10 TBD + +## Bug fixes +* Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale # Giotto 4.0.9 From 0d94fc87a1e4cfdee732f2f2dfa22f8a0796cc22 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:52:40 -0400 Subject: [PATCH 079/150] fix: `find_dampening_constant()` needs `drop = FALSE` --- NEWS.md | 3 ++- R/spatial_enrichment.R | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3b03d36be..5fe9c1560 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,8 @@ # Giotto 4.0.10 TBD ## Bug fixes -* Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale +* Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale [#964](https://github.com/drieslab/Giotto/issues/964) by jweis3 +* Fix error in DWLS `find_dampening_constant()` when `S[subset, ]` produces only 1 gene. # Giotto 4.0.9 diff --git a/R/spatial_enrichment.R b/R/spatial_enrichment.R index ffcb3b801..1f95d9a53 100644 --- a/R/spatial_enrichment.R +++ b/R/spatial_enrichment.R @@ -2911,7 +2911,7 @@ find_dampening_constant <- function( # solve dampened weighted least squares for subset fit <- stats::lm( - B[subset] ~ -1 + S[subset, ], + B[subset] ~ -1 + S[subset, , drop = FALSE], weights = wsDampened[subset] ) sol <- fit$coef * sum(goldStandard) / sum(fit$coef) From 5367603aae08e208ad676f7b192f808c4ec68be3 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 3 Jun 2024 13:56:01 -0400 Subject: [PATCH 080/150] chore: docs and formatting --- R/convenience_cosmx.R | 76 ++++++++++++++++++++--------------------- R/convenience_general.R | 2 +- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/R/convenience_cosmx.R b/R/convenience_cosmx.R index a1037d188..4a107002f 100644 --- a/R/convenience_cosmx.R +++ b/R/convenience_cosmx.R @@ -1191,15 +1191,17 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { #' these image objects more responsive when accessing them from a server. #' \code{\link{showGiottoImageNames}} can be used to see the available images. #' @export -createGiottoCosMxObject <- function(cosmx_dir = NULL, - data_to_use = c("all", "subcellular", "aggregate"), - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - FOVs = NULL, - instructions = NULL, - cores = determine_cores(), - verbose = TRUE) { +createGiottoCosMxObject <- function( + cosmx_dir = NULL, + data_to_use = c("all", "subcellular", "aggregate"), + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + FOVs = NULL, + instructions = NULL, + cores = determine_cores(), + verbose = TRUE +) { # 0. setup cosmx_dir <- path.expand(cosmx_dir) @@ -1226,32 +1228,32 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, # 2. load and create giotto object cosmx_gobject <- switch(data_to_use, - "subcellular" = .createGiottoCosMxObject_subcellular( - dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ), - "aggregate" = .createGiottoCosMxObject_aggregate( - dir_items, - cores = cores, - verbose = verbose, - instructions = instructions - ), - "all" = .createGiottoCosMxObject_all( - dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ) + "subcellular" = .createGiottoCosMxObject_subcellular( + dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ), + "aggregate" = .createGiottoCosMxObject_aggregate( + dir_items, + cores = cores, + verbose = verbose, + instructions = instructions + ), + "all" = .createGiottoCosMxObject_all( + dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ) ) @@ -1263,9 +1265,7 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, } - - - message("done") + vmsg(.v = verbose, "done") return(cosmx_gobject) } diff --git a/R/convenience_general.R b/R/convenience_general.R index ad36b2b58..1d626c5bf 100644 --- a/R/convenience_general.R +++ b/R/convenience_general.R @@ -279,7 +279,7 @@ abbrev_path <- function(path, head = 15, tail = 35L) { #' @param h5_file optional path to create an on-disk h5 file #' @param verbose be verbose #' -#' @return giotto object +#' @returns giotto object #' @details #' If starting from a Visium 10X directory: #' \itemize{ From 56af98c4c00688dac9df743c5b385d490a059f57 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:12:15 -0400 Subject: [PATCH 081/150] fix: package checking --- R/python_hmrf.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/python_hmrf.R b/R/python_hmrf.R index 005067a49..76bb4a503 100644 --- a/R/python_hmrf.R +++ b/R/python_hmrf.R @@ -58,8 +58,8 @@ doHMRF <- function(gobject, python_path = NULL, output_folder = NULL, overwrite_output = TRUE) { - - package_check("smfishhmrf", repository = "pip") + + package_check("smfishHmrf", repository = "pip") # data.table set global variable to <- from <- NULL From 34a8313fbfff588e7325ac391866fe1fb6d62668 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 10 Jun 2024 12:31:29 -0400 Subject: [PATCH 082/150] !feat: remove `parse_affine()` - `parse_affine()` is now `decomp_affine()` in _GiottoClass 0.3.2_ --- R/image_registration.R | 26 +------------------------- man/parse_affine.Rd | 21 --------------------- 2 files changed, 1 insertion(+), 46 deletions(-) delete mode 100644 man/parse_affine.Rd diff --git a/R/image_registration.R b/R/image_registration.R index e671ae779..ec973892a 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -1021,31 +1021,7 @@ registerImagesFIJI <- function( -#' @name parse_affine -#' @title Read affine matrix for linear transforms -#' @description Affine transforms are linear transformations that cover scaling, -#' rotation, shearing, and translations. They can be represented as matrices of -#' 2x3 or 3x3 values. This function reads the matrix and extracts the values -#' needed to perform them. -#' @param x object coercible to matrix with a 2x3 or 3x3 affine matrix -#' @returns a list of transforms information. -#' @keywords internal -parse_affine <- function(x) { - x <- as.matrix(x) - scale_x <- x[[1, 1]] - shear_x <- x[[1, 2]] - translate_x <- x[[1, 3]] - scale_y <- x[[2, 2]] - shear_y <- x[[2, 1]] - translate_y <- x[[2, 3]] - - list( - scale = c(x = scale_x, y = scale_y), - rotate = atan(shear_x / scale_x) + atan(shear_y / scale_y), - shear = c(x = shear_x, y = shear_y), - translate = c(x = translate_x, y = translate_y) - ) -} + # TODO - merge jython function into normal register FIJI diff --git a/man/parse_affine.Rd b/man/parse_affine.Rd deleted file mode 100644 index c999783c9..000000000 --- a/man/parse_affine.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/image_registration.R -\name{parse_affine} -\alias{parse_affine} -\title{Read affine matrix for linear transforms} -\usage{ -parse_affine(x) -} -\arguments{ -\item{x}{object coercible to matrix with a 2x3 or 3x3 affine matrix} -} -\value{ -a list of transforms information. -} -\description{ -Affine transforms are linear transformations that cover scaling, -rotation, shearing, and translations. They can be represented as matrices of -2x3 or 3x3 values. This function reads the matrix and extracts the values -needed to perform them. -} -\keyword{internal} From 071f6e3bd062e6eab87425434fa5d2fb8b32ac2d Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:23:02 -0400 Subject: [PATCH 083/150] chore: formatting --- R/convenience_xenium.R | 203 ++++++++++++++++++++++------------------- 1 file changed, 108 insertions(+), 95 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 6b736b839..e68a897b7 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -444,7 +444,7 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { e <- file_extension(path) %>% head(1L) %>% tolower() vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) - # read in + # read in as data.table a <- list( path = path, dropcols = dropcols, @@ -453,10 +453,10 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { ) vmsg("Loading transcript level info...", .v = verbose) tx <- switch(e, - "csv" = do.call(.xenium_transcript_csv, - args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_transcript_parquet, args = a), - "zarr" = stop('zarr not yet supported') + "csv" = do.call(.xenium_transcript_csv, + args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_transcript_parquet, args = a), + "zarr" = stop('zarr not yet supported') ) # create gpoints @@ -518,11 +518,15 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { qv_threshold = 20, verbose = NULL ) { - package_check( - pkg_name = c("arrow", "dplyr"), - repository = c("CRAN:arrow", "CRAN:dplyr") - ) - + package_check("dplyr") + package_check("arrow", custom_msg = sprintf( + "package 'arrow' is not yet installed\n\n To install:\n%s\n%s%s", + "Sys.setenv(ARROW_WITH_ZSTD = \"ON\") ", + "install.packages(\"arrow\", ", + "repos = c(\"https://apache.r-universe.dev\"))" + )) + + # setup tx parquet query tx_arrow <- arrow::read_parquet(file = path, as_data_frame = FALSE) %>% dplyr::mutate(transcript_id = cast(transcript_id, arrow::string())) %>% dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% @@ -538,18 +542,13 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { tx_arrow <- dplyr::filter(tx_arrow, qv > qv_threshold) n_after <- .nr(tx_arrow) - vmsg( - .v = verbose, - sprintf( - "QV cutoff: %d\n Feature points removed: %d, out of %d", - qv_threshold, - n_before - n_after, - n_before - ) - ) + vmsg(.v = verbose, sprintf( + "QV cutoff: %f\n Feature points removed: %d, out of %d", + qv_threshold, n_before - n_after, n_before + )) } - # convert to data.table + # pull into memory as data.table tx_dt <- as.data.frame(tx_arrow) %>% data.table::setDT() data.table::setnames( x = tx_dt, @@ -574,9 +573,9 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { a <- list(path = path) vmsg("Loading boundary info...", .v = verbose) polys <- switch(e, - "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_poly_parquet, args = a), - "zarr" = stop("zarr not yet supported") + "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_poly_parquet, args = a), + "zarr" = stop("zarr not yet supported") ) # create gpolys @@ -727,8 +726,8 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE ex <- switch(e, - "mtx" = do.call(.xenium_cellmeta_csv, args = a), - "h5" = do.call(.xenium_cellmeta_parquet, args = a) + "mtx" = do.call(.xenium_cellmeta_csv, args = a), + "h5" = do.call(.xenium_cellmeta_parquet, args = a) ) eo <- createExprObj( @@ -814,34 +813,36 @@ NULL #' @rdname load_xenium_folder #' @keywords internal -.load_xenium_folder <- function(path_list, - load_format = "csv", - data_to_use = "subcellular", - h5_expression = "FALSE", - h5_gene_ids = "symbols", - gene_column_index = 1, - cores, - verbose = TRUE) { +.load_xenium_folder <- function( + path_list, + load_format = "csv", + data_to_use = "subcellular", + h5_expression = "FALSE", + h5_gene_ids = "symbols", + gene_column_index = 1, + cores, + verbose = TRUE +) { data_list <- switch(load_format, - "csv" = .load_xenium_folder_csv( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "parquet" = .load_xenium_folder_parquet( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) + "csv" = .load_xenium_folder_csv( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "parquet" = .load_xenium_folder_parquet( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) ) return(data_list) @@ -850,13 +851,15 @@ NULL #' @describeIn load_xenium_folder Load from csv files #' @keywords internal -.load_xenium_folder_csv <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { +.load_xenium_folder_csv <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE +) { # initialize return vars feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL @@ -955,13 +958,15 @@ NULL #' @describeIn load_xenium_folder Load from parquet files #' @keywords internal -.load_xenium_folder_parquet <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { +.load_xenium_folder_parquet <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE +) { # initialize return vars feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL # dplyr variable @@ -1173,18 +1178,20 @@ NULL #' map to any of the keys. #' #' @export -createGiottoXeniumObject <- function(xenium_dir, - data_to_use = c("subcellular", "aggregate"), - load_format = "csv", - h5_expression = TRUE, - h5_gene_ids = c("symbols", "ensembl"), - gene_column_index = 1, - bounds_to_load = c("cell"), - qv_threshold = 20, - key_list = NULL, - instructions = NULL, - cores = NA, - verbose = TRUE) { +createGiottoXeniumObject <- function( + xenium_dir, + data_to_use = c("subcellular", "aggregate"), + load_format = "csv", + h5_expression = TRUE, + h5_gene_ids = c("symbols", "ensembl"), + gene_column_index = 1, + bounds_to_load = c("cell"), + qv_threshold = 20, + key_list = NULL, + instructions = NULL, + cores = NA, + verbose = TRUE +) { # 0. setup xenium_dir <- path.expand(xenium_dir) @@ -1304,12 +1311,14 @@ createGiottoXeniumObject <- function(xenium_dir, #' @returns giotto object #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate #' @keywords internal -.createGiottoXeniumObject_subcellular <- function(data_list, - key_list = NULL, - qv_threshold = 20, - instructions = NULL, - cores = NA, - verbose = TRUE) { +.createGiottoXeniumObject_subcellular <- function( + data_list, + key_list = NULL, + qv_threshold = 20, + instructions = NULL, + cores = NA, + verbose = TRUE +) { # data.table vars qv <- NULL @@ -1397,11 +1406,13 @@ createGiottoXeniumObject <- function(xenium_dir, #' @returns giotto object #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular #' @keywords internal -.createGiottoXeniumObject_aggregate <- function(data_list, - # include_analysis = FALSE, - instructions = NULL, - cores = NA, - verbose = TRUE) { +.createGiottoXeniumObject_aggregate <- function( + data_list, + # include_analysis = FALSE, + instructions = NULL, + cores = NA, + verbose = TRUE +) { # Unpack data_list info feat_meta <- data_list$feat_meta cell_meta <- data_list$cell_meta @@ -1460,12 +1471,14 @@ createGiottoXeniumObject <- function(xenium_dir, #' @keywords internal #' @returns path_list a list of xenium files discovered and their filepaths. NULL #' values denote missing items -.read_xenium_folder <- function(xenium_dir, - data_to_use = "subcellular", - bounds_to_load = c("cell"), - load_format = "csv", - h5_expression = FALSE, - verbose = TRUE) { +.read_xenium_folder <- function( + xenium_dir, + data_to_use = "subcellular", + bounds_to_load = c("cell"), + load_format = "csv", + h5_expression = FALSE, + verbose = TRUE +) { # Check needed packages if (load_format == "parquet") { package_check(pkg_name = "arrow", repository = "CRAN") From 06446eb635b862221f9b217c9a1b96875bad5cba Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 11 Jun 2024 12:50:03 -0400 Subject: [PATCH 084/150] chore: cleanup --- R/image_registration.R | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/R/image_registration.R b/R/image_registration.R index ec973892a..cdd120cdc 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -1018,32 +1018,3 @@ registerImagesFIJI <- function( return(0 == system(cmd)) } - - - - - - -# TODO - merge jython function into normal register FIJI -# TODO - add in manual rigid registration when given a transforms table - -### Under Construction #### - -# resizeImagesFIJI = function(fiji = fiji()) {} - -# TODO - install FIJI jython registration and resize scripts -# install_FIJI_scripts = function(fiji = fiji()) {} - -# TODO These things require a correct set of boundary values -# - Subset images in Giotto using Magick and followup reassignment as the -# default 'image' -# - Follow this up with potential registration -# - Need a way to determine the pixel distances between spots to get an idea of -# which regions of image 'belong' to a spot -# - Would be nice to be able to put together an image mask even in magick and -# apply it to the image to aid with img_reg and take care of jagged lines after -# image subsetting -# - A shiny app to subset tissue regions would be nice -# The shiny app should be able to select spots in a 2d plane by default -# If given the ability, it should also select spots of a single plane or within -# a certain range of z values and plot them as a 2D for selection purposes From 69c4d90c7dda54560b898de27adf80bc707af96a Mon Sep 17 00:00:00 2001 From: Ruben Dries Date: Wed, 3 Jul 2024 09:45:04 -0400 Subject: [PATCH 085/150] add writeChatGPTqueryDEG --- R/general_help.R | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/R/general_help.R b/R/general_help.R index 67e6e7c13..853bfc91d 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -272,6 +272,50 @@ rank_binarize_wrapper <- function( } +## chatgpt queries #### + +#' @title writeChatGPTqueryDEG +#' @name writeChatGPTqueryDEG +#' @description This function writes a query as a .txt file that can be used with +#' ChatGPT or a similar LLM service to find the most likely cell types based on the +#' top differential expressed genes (DEGs) between identified clusters. +#' @param DEG_output the output format from the differenetial expression functions +#' @param top_n_genes number of genes for each cluster +#' @param tissue_type tissue type +#' @param folder_name path to the folder where you want to save the .txt file +#' @param file_name name of .txt file +#' @returns writes a .txt file to the desired location +#' @details This function does not run any LLM service. It simply creates the .txt +#' file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) +#' @export +writeChatGPTquery = function(DEG_output, + top_n_genes = 10, + tissue_type = 'human breast cancer', + folder_name = getwd(), + file_name = 'chatgpt_query.txt') { + + chatgpt_query = paste0("Identify cell types of ", tissue_type, " tissue using the following markers. Identify one cell type for each row. Only provide the cell type name and the marker genes used for cell type identification.") + + selected_DEG_output = DEG_output[, head(.SD, top_n_genes), by="cluster"] + + finallist = list() + finallist[[1]] = chatgpt_query + + for(clus in unique(selected_DEG_output$cluster)) { + x = selected_DEG_output[cluster == clus][['feats']] + x = c(clus, x) + finallist[[as.numeric(clus)+1]] = x + } + + outputdt = data.table::data.table(finallist) + + cat('\n start writing \n') + data.table::fwrite(x = outputdt, + file = paste0(folder_name,'/', file_name), + sep2 = c(""," ",""), col.names = F) + +} + # IDs #### From a360eb01631816c373968ca8d470dfaace2a39d8 Mon Sep 17 00:00:00 2001 From: Ruben Dries Date: Wed, 3 Jul 2024 09:46:21 -0400 Subject: [PATCH 086/150] add writeChatGPTqueryDEG --- R/general_help.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/general_help.R b/R/general_help.R index 853bfc91d..d1d87323a 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -288,11 +288,11 @@ rank_binarize_wrapper <- function( #' @details This function does not run any LLM service. It simply creates the .txt #' file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) #' @export -writeChatGPTquery = function(DEG_output, - top_n_genes = 10, - tissue_type = 'human breast cancer', - folder_name = getwd(), - file_name = 'chatgpt_query.txt') { +writeChatGPTqueryDEG = function(DEG_output, + top_n_genes = 10, + tissue_type = 'human breast cancer', + folder_name = getwd(), + file_name = 'chatgpt_query.txt') { chatgpt_query = paste0("Identify cell types of ", tissue_type, " tissue using the following markers. Identify one cell type for each row. Only provide the cell type name and the marker genes used for cell type identification.") From cf8232c0bd7619d04bbe24e3cda8a16aa5772614 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 3 Jul 2024 14:41:05 -0400 Subject: [PATCH 087/150] chore: remove ggrepel dep - rely on re-exported function from GiottoVisuals --- DESCRIPTION | 3 +-- R/auxiliary_giotto.R | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a8426d509..fd2f54709 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,9 +40,8 @@ Imports: data.table (>= 1.12.2), dbscan (>= 1.1-3), ggplot2 (>= 3.1.1), - ggrepel, GiottoUtils (>= 0.1.9), - GiottoVisuals (>= 0.2.2), + GiottoVisuals (>= 0.2.4), igraph (>= 1.2.4.1), jsonlite, limma, diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 11dafbe7a..82a65fd15 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -437,7 +437,7 @@ filterCombinations <- function( pl <- pl + scale_color_discrete( guide = guide_legend(title = "threshold(s)") ) - pl <- pl + ggrepel::geom_text_repel(data = result_DT, aes( + pl <- pl + geom_text_repel(data = result_DT, aes( x = removed_cells + x_axis_offset, y = removed_feats + y_axis_offset, label = combination From 09a52204c6380eb9e98c53e7d1683763866035c2 Mon Sep 17 00:00:00 2001 From: Ruben Dries Date: Mon, 8 Jul 2024 09:59:54 -0400 Subject: [PATCH 088/150] clustering changes --- R/clustering.R | 108 +++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 58 deletions(-) diff --git a/R/clustering.R b/R/clustering.R index 950debf62..65c36846c 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -1204,11 +1204,11 @@ doSNNCluster <- function( #' @param gobject giotto object #' @param feat_type feature type (e.g. "cell") #' @param spat_unit spatial unit (e.g. "rna", "dna", "protein") -#' @param expression_values expression values to use +#' @param expression_values expression values from list_expression() #' (e.g. "normalized", "scaled", "custom") -#' @param feats_to_use subset of features to use -#' @param dim_reduction_to_use dimension reduction to use -#' (e.g. "cells", "pca", "umap", "tsne") +#' @param feats_to_use (optional) subset of features to use +#' @param dim_reduction_to_use dimension reduction from list_dim_reductions() +#' (e.g. "pca", "umap", "tsne") #' @param dim_reduction_name dimensions reduction name, default to "pca" #' @param dimensions_to_use dimensions to use, default = 1:10 #' @param distance_method distance method (e.g. "original", "pearson", @@ -1222,8 +1222,10 @@ doSNNCluster <- function( #' @param return_gobject boolean: return giotto object (default = TRUE) #' @param set_seed set seed (default = TRUE) #' @param seed_number number for seed -#' @returns giotto object with new clusters appended to cell metadata -#' @details Description on how to use Kmeans clustering method. +#' @returns if return_gobject = TRUE: giotto object with new clusters appended to cell metadata +#' @details The default settings will use dimension reduction results as input. +#' Set dim_reduction_to_use = NULL if you want to directly use expression values as input. +#' By providing a feature vector to feats_to_use you can subset the expression matrix. #' @seealso \code{\link[stats]{kmeans}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") @@ -1252,6 +1254,7 @@ doKmeans <- function( return_gobject = TRUE, set_seed = TRUE, seed_number = 1234) { + # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1264,10 +1267,6 @@ doKmeans <- function( ) - dim_reduction_to_use <- match.arg( - dim_reduction_to_use, - choices = c("cells", "pca", "umap", "tsne") - ) distance_method <- match.arg(distance_method, choices = c( "original", "pearson", "spearman", "euclidean", "maximum", "manhattan", @@ -1276,52 +1275,49 @@ doKmeans <- function( ## using dimension reduction ## - if (dim_reduction_to_use != "cells" & !is.null(dim_reduction_to_use)) { - ## TODO: check if reduction exists - - # use only available dimensions if dimensions < dimensions_to_use - dim_coord <- get_dimReduction( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - reduction = "cells", - reduction_method = dim_reduction_to_use, - name = dim_reduction_name, - output = "dimObj" - ) - - dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% seq_len(ncol(dim_coord[])) - ] - matrix_to_use <- dim_coord[][, dimensions_to_use] + if(!is.null(dim_reduction_to_use)) { + + # use only available dimensions if dimensions < dimensions_to_use + dim_coord <- get_dimReduction( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + reduction = "cells", + reduction_method = dim_reduction_to_use, + name = dim_reduction_name, + output = "dimObj" + ) + + dimensions_to_use <- dimensions_to_use[ + dimensions_to_use %in% seq_len(ncol(dim_coord[])) + ] + matrix_to_use <- dim_coord[][, dimensions_to_use] + } else { - values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values)) - ) - - ## using original matrix ## - expr_values <- getExpression( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - values = values, - output = "exprObj" - ) - - # subset expression matrix - if (!is.null(feats_to_use)) { - expr_values[] <- expr_values[][ - rownames(expr_values[]) %in% feats_to_use, - ] - } - - # features as columns - # cells as rows - matrix_to_use <- t_flex(expr_values[]) + + ## using original matrix ## + expr_values <- getExpression( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + values = expression_values, + output = "exprObj" + ) + + # subset expression matrix + if (!is.null(feats_to_use)) { + expr_values[] <- expr_values[][ + rownames(expr_values[]) %in% feats_to_use, + ] + } + + # features as columns + # cells as rows + matrix_to_use <- t_flex(expr_values[]) + } - - + + ## distance if (distance_method == "original") { celldist <- matrix_to_use @@ -1480,10 +1476,6 @@ doHclust <- function( ) - dim_reduction_to_use <- match.arg( - dim_reduction_to_use, - choices = c("cells", "pca", "umap", "tsne") - ) distance_method <- match.arg( distance_method, choices = c( From f62ac559030ff803c81f7678a05d2349e5ba27a8 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 8 Jul 2024 14:59:22 -0400 Subject: [PATCH 089/150] chore: update suite reqs - needs GiottoClass 0.3.2 & GiottoVisuals 0.2.4 for the affine image improvements --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 592ad0f7f..26d84d72f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,7 @@ Depends: utils (>= 4.1.0), R (>= 4.1.0), methods, - GiottoClass (>= 0.3.1) + GiottoClass (>= 0.3.2) Imports: BiocParallel, BiocSingular, @@ -42,7 +42,7 @@ Imports: ggplot2 (>= 3.1.1), ggrepel, GiottoUtils (>= 0.1.9), - GiottoVisuals (>= 0.2.2), + GiottoVisuals (>= 0.2.4), igraph (>= 1.2.4.1), jsonlite, limma, From 8946e0b203ab65175dfa4c80cccea1170e4a6a26 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:40:22 -0400 Subject: [PATCH 090/150] new: `read10xAffineImage()` - import function for 10x images supplied with an affine transform matrix --- R/general_help.R | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/R/general_help.R b/R/general_help.R index 67e6e7c13..4533e5f97 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -678,6 +678,43 @@ get10Xmatrix_h5 <- function( } +#' @name read10xAffineImage +#' @description Read a 10x image that is provided with an affine matrix +#' transform. Loads the image in with an orientation that matches the dataset +#' points and polygons vector information +#' @param file filepath to image +#' @param micron micron scaling. Directly used if a numeric is supplied. +#' Also prefers a filepath to the `experiment.xenium` file which contains this +#' info. A default of 0.2125 is provided. +#' @param affine filepath to `...imagealignment.csv` which contains an affine +#' transformation matrix +#' @keywords internal +read10xAffineImage <- function( + file, imagealignment_path, micron = 0.2125 +) { + checkmate::assert_file_exists(file) + checkmate::assert_file_exists(imagealignment_path) + if (!is.numeric(micron)) { + checkmate::assert_file_exists(micron) + micron <- jsonlite::read_json(micron)$pixel_size + } + + aff <- data.table::fread(imagealignment_path) %>% + as.matrix() + + img <- createGiottoLargeImage(file) + + aff_img <- .tenx_img_affine(x = img, affine = aff, micron = micron) + + return(aff_img) +} + +.tenx_img_affine <- function(x, affine, micron) { + x %>% + affine(affine[seq(2), seq(2)]) %>% + rescale(micron, x0 = 0, y0 = 0) %>% + spatShift(dx = affine[1,3] * micron, dy = -affine[2,3] * micron) +} From bc37666c519e62d03137d4a6f8811c1308b985e8 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:45:14 -0400 Subject: [PATCH 091/150] fix: actually export the function --- R/general_help.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/general_help.R b/R/general_help.R index 4533e5f97..647235fc6 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -688,7 +688,7 @@ get10Xmatrix_h5 <- function( #' info. A default of 0.2125 is provided. #' @param affine filepath to `...imagealignment.csv` which contains an affine #' transformation matrix -#' @keywords internal +#' @export read10xAffineImage <- function( file, imagealignment_path, micron = 0.2125 ) { From 50aface91c1b232daad698446fc990d4796af923 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:46:32 -0400 Subject: [PATCH 092/150] chore: document --- NAMESPACE | 1 + NEWS.md | 3 +++ 2 files changed, 4 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 5e5be9585..6203d7e48 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -327,6 +327,7 @@ export(processGiotto) export(prov) export(rankEnrich) export(rankSpatialCorGroups) +export(read10xAffineImage) export(readCellMetadata) export(readDimReducData) export(readExprData) diff --git a/NEWS.md b/NEWS.md index 5fe9c1560..6b5faaf80 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ * Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale [#964](https://github.com/drieslab/Giotto/issues/964) by jweis3 * Fix error in DWLS `find_dampening_constant()` when `S[subset, ]` produces only 1 gene. +## New +* `read10xAffineImage()` for reading 10x affine tranformed images + # Giotto 4.0.9 ## Breaking changes From 348974bbc5da4413f11603f4c7d73699cda81937 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 10 Jul 2024 11:07:27 -0400 Subject: [PATCH 093/150] fix: incorrect gstat formula generation --- R/kriging.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/kriging.R b/R/kriging.R index ff3da4505..70ab6f585 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -194,7 +194,7 @@ setMethod( # model to use model <- gstat::gstat( id = feat, - formula = as.formula(paste(feat, "~ 1")), + formula = as.formula(sprintf("`%s` ~ 1"), feat), locations = ~ sdimx + sdimy, data = annotatedlocs, nmax = 7, From 81cd6b4e9d7e46c7b4645481ef78ac2e7ecb9b95 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 10 Jul 2024 11:12:46 -0400 Subject: [PATCH 094/150] fix: typo --- R/kriging.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/kriging.R b/R/kriging.R index 70ab6f585..1b48ecc29 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -194,7 +194,7 @@ setMethod( # model to use model <- gstat::gstat( id = feat, - formula = as.formula(sprintf("`%s` ~ 1"), feat), + formula = as.formula(sprintf("`%s` ~ 1", feat)), locations = ~ sdimx + sdimy, data = annotatedlocs, nmax = 7, From b63f144717e9689376dd6fb4a1ea96ad235f7d39 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Thu, 11 Jul 2024 14:06:51 -0400 Subject: [PATCH 095/150] fix missing title in read10xAffineImage documentation --- R/general_help.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/general_help.R b/R/general_help.R index 6f89c7c74..faef358d8 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -721,7 +721,7 @@ get10Xmatrix_h5 <- function( } } - +#' read10xAffineImage #' @name read10xAffineImage #' @description Read a 10x image that is provided with an affine matrix #' transform. Loads the image in with an orientation that matches the dataset From 9b3de1ac628e928cf106c0a612d7de07950eab41 Mon Sep 17 00:00:00 2001 From: josschavezf Date: Thu, 11 Jul 2024 14:07:07 -0400 Subject: [PATCH 096/150] run devtools::document --- DESCRIPTION | 2 +- NAMESPACE | 1 + man/doKmeans.Rd | 14 ++++++++------ man/read10xAffineImage.Rd | 23 +++++++++++++++++++++++ man/writeChatGPTqueryDEG.Rd | 37 +++++++++++++++++++++++++++++++++++++ 5 files changed, 70 insertions(+), 7 deletions(-) create mode 100644 man/read10xAffineImage.Rd create mode 100644 man/writeChatGPTqueryDEG.Rd diff --git a/DESCRIPTION b/DESCRIPTION index b269be49d..ab1607bd7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,7 @@ Encoding: UTF-8 LazyData: true URL: https://drieslab.github.io/Giotto/, https://github.com/drieslab/Giotto BugReports: https://github.com/drieslab/Giotto/issues -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Depends: base (>= 4.1.0), utils (>= 4.1.0), diff --git a/NAMESPACE b/NAMESPACE index 6203d7e48..a18aa8ef6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -494,6 +494,7 @@ export(viewHMRFresults3D) export(viewHMRFresults_V2) export(violinPlot) export(wrap) +export(writeChatGPTqueryDEG) export(writeGiottoLargeImage) export(writeHMRFresults) exportMethods("$") diff --git a/man/doKmeans.Rd b/man/doKmeans.Rd index cb53d3005..3bfc8552d 100644 --- a/man/doKmeans.Rd +++ b/man/doKmeans.Rd @@ -32,13 +32,13 @@ doKmeans( \item{spat_unit}{spatial unit (e.g. "rna", "dna", "protein")} -\item{expression_values}{expression values to use +\item{expression_values}{expression values from list_expression() (e.g. "normalized", "scaled", "custom")} -\item{feats_to_use}{subset of features to use} +\item{feats_to_use}{(optional) subset of features to use} -\item{dim_reduction_to_use}{dimension reduction to use -(e.g. "cells", "pca", "umap", "tsne")} +\item{dim_reduction_to_use}{dimension reduction from list_dim_reductions() +(e.g. "pca", "umap", "tsne")} \item{dim_reduction_name}{dimensions reduction name, default to "pca"} @@ -65,13 +65,15 @@ doKmeans( \item{seed_number}{number for seed} } \value{ -giotto object with new clusters appended to cell metadata +if return_gobject = TRUE: giotto object with new clusters appended to cell metadata } \description{ cluster cells using kmeans algorithm } \details{ -Description on how to use Kmeans clustering method. +The default settings will use dimension reduction results as input. +Set dim_reduction_to_use = NULL if you want to directly use expression values as input. +By providing a feature vector to feats_to_use you can subset the expression matrix. } \examples{ g <- GiottoData::loadGiottoMini("visium") diff --git a/man/read10xAffineImage.Rd b/man/read10xAffineImage.Rd new file mode 100644 index 000000000..b50226a50 --- /dev/null +++ b/man/read10xAffineImage.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/general_help.R +\name{read10xAffineImage} +\alias{read10xAffineImage} +\title{read10xAffineImage} +\usage{ +read10xAffineImage(file, imagealignment_path, micron = 0.2125) +} +\arguments{ +\item{file}{filepath to image} + +\item{micron}{micron scaling. Directly used if a numeric is supplied. +Also prefers a filepath to the `experiment.xenium` file which contains this +info. A default of 0.2125 is provided.} + +\item{affine}{filepath to `...imagealignment.csv` which contains an affine +transformation matrix} +} +\description{ +Read a 10x image that is provided with an affine matrix +transform. Loads the image in with an orientation that matches the dataset +points and polygons vector information +} diff --git a/man/writeChatGPTqueryDEG.Rd b/man/writeChatGPTqueryDEG.Rd new file mode 100644 index 000000000..034f77fa4 --- /dev/null +++ b/man/writeChatGPTqueryDEG.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/general_help.R +\name{writeChatGPTqueryDEG} +\alias{writeChatGPTqueryDEG} +\title{writeChatGPTqueryDEG} +\usage{ +writeChatGPTqueryDEG( + DEG_output, + top_n_genes = 10, + tissue_type = "human breast cancer", + folder_name = getwd(), + file_name = "chatgpt_query.txt" +) +} +\arguments{ +\item{DEG_output}{the output format from the differenetial expression functions} + +\item{top_n_genes}{number of genes for each cluster} + +\item{tissue_type}{tissue type} + +\item{folder_name}{path to the folder where you want to save the .txt file} + +\item{file_name}{name of .txt file} +} +\value{ +writes a .txt file to the desired location +} +\description{ +This function writes a query as a .txt file that can be used with +ChatGPT or a similar LLM service to find the most likely cell types based on the +top differential expressed genes (DEGs) between identified clusters. +} +\details{ +This function does not run any LLM service. It simply creates the .txt +file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) +} From d5d9bbdb8d82e89f635ce41563627cf2b6b7803a Mon Sep 17 00:00:00 2001 From: iqraAmin Date: Thu, 11 Jul 2024 17:10:23 -0400 Subject: [PATCH 097/150] Added VisiumHD Convenience Function --- NAMESPACE | 281 ------- R/convenience_visiumHD.R | 1538 +++++++++++++++++++++++++------------- 2 files changed, 1015 insertions(+), 804 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5e5be9585..ac9ff328f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -506,287 +506,6 @@ import(methods) import(stats, except = density) import(utils) importClassesFrom(data.table,data.table) -importFrom(GiottoClass,"activeFeatType<-") -importFrom(GiottoClass,"activeSpatUnit<-") -importFrom(GiottoClass,"ext<-") -importFrom(GiottoClass,"featType<-") -importFrom(GiottoClass,"instructions<-") -importFrom(GiottoClass,"objName<-") -importFrom(GiottoClass,"prov<-") -importFrom(GiottoClass,"spatUnit<-") -importFrom(GiottoClass,activeFeatType) -importFrom(GiottoClass,activeSpatUnit) -importFrom(GiottoClass,addCellMetadata) -importFrom(GiottoClass,addFeatMetadata) -importFrom(GiottoClass,addGiottoImage) -importFrom(GiottoClass,addGiottoImageMG) -importFrom(GiottoClass,addGiottoLargeImage) -importFrom(GiottoClass,addGiottoPoints) -importFrom(GiottoClass,addGiottoPoints3D) -importFrom(GiottoClass,addGiottoPolygons) -importFrom(GiottoClass,addNetworkLayout) -importFrom(GiottoClass,addSpatialCentroidLocations) -importFrom(GiottoClass,addSpatialCentroidLocationsLayer) -importFrom(GiottoClass,aggregateStacks) -importFrom(GiottoClass,aggregateStacksExpression) -importFrom(GiottoClass,aggregateStacksLocations) -importFrom(GiottoClass,aggregateStacksPolygonOverlaps) -importFrom(GiottoClass,aggregateStacksPolygons) -importFrom(GiottoClass,anndataToGiotto) -importFrom(GiottoClass,annotateGiotto) -importFrom(GiottoClass,annotateSpatialGrid) -importFrom(GiottoClass,annotateSpatialNetwork) -importFrom(GiottoClass,as.points) -importFrom(GiottoClass,as.polygons) -importFrom(GiottoClass,as.sf) -importFrom(GiottoClass,as.sp) -importFrom(GiottoClass,as.stars) -importFrom(GiottoClass,as.terra) -importFrom(GiottoClass,calculateMetaTable) -importFrom(GiottoClass,calculateMetaTableCells) -importFrom(GiottoClass,calculateOverlap) -importFrom(GiottoClass,calculateOverlapParallel) -importFrom(GiottoClass,calculateOverlapPolygonImages) -importFrom(GiottoClass,calculateOverlapRaster) -importFrom(GiottoClass,calculateOverlapSerial) -importFrom(GiottoClass,calculateSpatCellMetadataProportions) -importFrom(GiottoClass,centroids) -importFrom(GiottoClass,changeGiottoInstructions) -importFrom(GiottoClass,changeImageBg) -importFrom(GiottoClass,checkGiottoEnvironment) -importFrom(GiottoClass,circleVertices) -importFrom(GiottoClass,combineCellData) -importFrom(GiottoClass,combineFeatureData) -importFrom(GiottoClass,combineFeatureOverlapData) -importFrom(GiottoClass,combineMetadata) -importFrom(GiottoClass,combineSpatialCellFeatureInfo) -importFrom(GiottoClass,combineSpatialCellMetadataInfo) -importFrom(GiottoClass,combineToMultiPolygon) -importFrom(GiottoClass,convertGiottoLargeImageToMG) -importFrom(GiottoClass,copy) -importFrom(GiottoClass,createBentoAdata) -importFrom(GiottoClass,createCellMetaObj) -importFrom(GiottoClass,createDimObj) -importFrom(GiottoClass,createExprObj) -importFrom(GiottoClass,createFeatMetaObj) -importFrom(GiottoClass,createGiottoImage) -importFrom(GiottoClass,createGiottoInstructions) -importFrom(GiottoClass,createGiottoLargeImage) -importFrom(GiottoClass,createGiottoLargeImageList) -importFrom(GiottoClass,createGiottoObject) -importFrom(GiottoClass,createGiottoObjectSubcellular) -importFrom(GiottoClass,createGiottoPoints) -importFrom(GiottoClass,createGiottoPolygon) -importFrom(GiottoClass,createGiottoPolygonsFromDfr) -importFrom(GiottoClass,createGiottoPolygonsFromGeoJSON) -importFrom(GiottoClass,createGiottoPolygonsFromMask) -importFrom(GiottoClass,createMetafeats) -importFrom(GiottoClass,createNearestNetObj) -importFrom(GiottoClass,createNearestNetwork) -importFrom(GiottoClass,createSpatEnrObj) -importFrom(GiottoClass,createSpatLocsObj) -importFrom(GiottoClass,createSpatNetObj) -importFrom(GiottoClass,createSpatialDefaultGrid) -importFrom(GiottoClass,createSpatialDelaunayNetwork) -importFrom(GiottoClass,createSpatialFeaturesKNNnetwork) -importFrom(GiottoClass,createSpatialGrid) -importFrom(GiottoClass,createSpatialKNNnetwork) -importFrom(GiottoClass,createSpatialNetwork) -importFrom(GiottoClass,createSpatialWeightMatrix) -importFrom(GiottoClass,crop) -importFrom(GiottoClass,cropGiottoLargeImage) -importFrom(GiottoClass,density) -importFrom(GiottoClass,distGiottoImage) -importFrom(GiottoClass,estimateImageBg) -importFrom(GiottoClass,ext) -importFrom(GiottoClass,fDataDT) -importFrom(GiottoClass,featIDs) -importFrom(GiottoClass,featType) -importFrom(GiottoClass,featureNetwork) -importFrom(GiottoClass,flip) -importFrom(GiottoClass,gefToGiotto) -importFrom(GiottoClass,getCellMetadata) -importFrom(GiottoClass,getDimReduction) -importFrom(GiottoClass,getExpression) -importFrom(GiottoClass,getFeatureInfo) -importFrom(GiottoClass,getFeatureMetadata) -importFrom(GiottoClass,getGiottoImage) -importFrom(GiottoClass,getMultiomics) -importFrom(GiottoClass,getNearestNetwork) -importFrom(GiottoClass,getPolygonInfo) -importFrom(GiottoClass,getSpatialEnrichment) -importFrom(GiottoClass,getSpatialGrid) -importFrom(GiottoClass,getSpatialLocations) -importFrom(GiottoClass,getSpatialNetwork) -importFrom(GiottoClass,giotto) -importFrom(GiottoClass,giottoImage) -importFrom(GiottoClass,giottoLargeImage) -importFrom(GiottoClass,giottoMasterToSuite) -importFrom(GiottoClass,giottoPoints) -importFrom(GiottoClass,giottoPolygon) -importFrom(GiottoClass,giottoToAnnData) -importFrom(GiottoClass,giottoToSeurat) -importFrom(GiottoClass,giottoToSeuratV4) -importFrom(GiottoClass,giottoToSeuratV5) -importFrom(GiottoClass,giottoToSpatialExperiment) -importFrom(GiottoClass,hexVertices) -importFrom(GiottoClass,hist) -importFrom(GiottoClass,installGiottoEnvironment) -importFrom(GiottoClass,instructions) -importFrom(GiottoClass,joinGiottoObjects) -importFrom(GiottoClass,loadGiotto) -importFrom(GiottoClass,makePseudoVisium) -importFrom(GiottoClass,objHistory) -importFrom(GiottoClass,objName) -importFrom(GiottoClass,orthoGrid) -importFrom(GiottoClass,overlapImagesToMatrix) -importFrom(GiottoClass,overlapToMatrix) -importFrom(GiottoClass,overlapToMatrixMultiPoly) -importFrom(GiottoClass,overlaps) -importFrom(GiottoClass,pDataDT) -importFrom(GiottoClass,plotGiottoImage) -importFrom(GiottoClass,polyStamp) -importFrom(GiottoClass,prov) -importFrom(GiottoClass,readCellMetadata) -importFrom(GiottoClass,readDimReducData) -importFrom(GiottoClass,readExprData) -importFrom(GiottoClass,readExprMatrix) -importFrom(GiottoClass,readFeatData) -importFrom(GiottoClass,readFeatMetadata) -importFrom(GiottoClass,readGiottoInstructions) -importFrom(GiottoClass,readNearestNetData) -importFrom(GiottoClass,readPolygonData) -importFrom(GiottoClass,readSpatEnrichData) -importFrom(GiottoClass,readSpatLocsData) -importFrom(GiottoClass,readSpatNetData) -importFrom(GiottoClass,reconnectGiottoImage) -importFrom(GiottoClass,rectVertices) -importFrom(GiottoClass,removeCellAnnotation) -importFrom(GiottoClass,removeFeatAnnotation) -importFrom(GiottoClass,removeGiottoEnvironment) -importFrom(GiottoClass,replaceGiottoInstructions) -importFrom(GiottoClass,rescale) -importFrom(GiottoClass,rescalePolygons) -importFrom(GiottoClass,saveGiotto) -importFrom(GiottoClass,setCellMetadata) -importFrom(GiottoClass,setDimReduction) -importFrom(GiottoClass,setExpression) -importFrom(GiottoClass,setFeatureInfo) -importFrom(GiottoClass,setFeatureMetadata) -importFrom(GiottoClass,setGiotto) -importFrom(GiottoClass,setGiottoImage) -importFrom(GiottoClass,setMultiomics) -importFrom(GiottoClass,setNearestNetwork) -importFrom(GiottoClass,setPolygonInfo) -importFrom(GiottoClass,setSpatialEnrichment) -importFrom(GiottoClass,setSpatialGrid) -importFrom(GiottoClass,setSpatialLocations) -importFrom(GiottoClass,setSpatialNetwork) -importFrom(GiottoClass,seuratToGiotto) -importFrom(GiottoClass,seuratToGiottoV4) -importFrom(GiottoClass,seuratToGiottoV5) -importFrom(GiottoClass,showGiottoCellMetadata) -importFrom(GiottoClass,showGiottoDimRed) -importFrom(GiottoClass,showGiottoExpression) -importFrom(GiottoClass,showGiottoFeatInfo) -importFrom(GiottoClass,showGiottoFeatMetadata) -importFrom(GiottoClass,showGiottoImageNames) -importFrom(GiottoClass,showGiottoInstructions) -importFrom(GiottoClass,showGiottoNearestNetworks) -importFrom(GiottoClass,showGiottoSpatEnrichments) -importFrom(GiottoClass,showGiottoSpatGrids) -importFrom(GiottoClass,showGiottoSpatLocs) -importFrom(GiottoClass,showGiottoSpatNetworks) -importFrom(GiottoClass,showGiottoSpatialInfo) -importFrom(GiottoClass,showProcessingSteps) -importFrom(GiottoClass,smoothGiottoPolygons) -importFrom(GiottoClass,spatIDs) -importFrom(GiottoClass,spatQueryGiottoPolygons) -importFrom(GiottoClass,spatShift) -importFrom(GiottoClass,spatUnit) -importFrom(GiottoClass,spatialExperimentToGiotto) -importFrom(GiottoClass,spin) -importFrom(GiottoClass,stitchFieldCoordinates) -importFrom(GiottoClass,stitchGiottoLargeImage) -importFrom(GiottoClass,subsetGiotto) -importFrom(GiottoClass,subsetGiottoLocs) -importFrom(GiottoClass,subsetGiottoLocsMulti) -importFrom(GiottoClass,subsetGiottoLocsSubcellular) -importFrom(GiottoClass,tessellate) -importFrom(GiottoClass,triGrid) -importFrom(GiottoClass,updateGiottoImage) -importFrom(GiottoClass,updateGiottoImageMG) -importFrom(GiottoClass,updateGiottoLargeImage) -importFrom(GiottoClass,updateGiottoObject) -importFrom(GiottoClass,updateGiottoPointsObject) -importFrom(GiottoClass,updateGiottoPolygonObject) -importFrom(GiottoClass,vect) -importFrom(GiottoClass,wrap) -importFrom(GiottoClass,writeGiottoLargeImage) -importFrom(GiottoUtils,"%>%") -importFrom(GiottoUtils,getDistinctColors) -importFrom(GiottoUtils,getRainbowColors) -importFrom(GiottoVisuals,"sankeyLabel<-") -importFrom(GiottoVisuals,"sankeyRelate<-") -importFrom(GiottoVisuals,addGiottoImageToSpatPlot) -importFrom(GiottoVisuals,dimCellPlot) -importFrom(GiottoVisuals,dimCellPlot2D) -importFrom(GiottoVisuals,dimFeatPlot2D) -importFrom(GiottoVisuals,dimFeatPlot3D) -importFrom(GiottoVisuals,dimGenePlot3D) -importFrom(GiottoVisuals,dimPlot) -importFrom(GiottoVisuals,dimPlot2D) -importFrom(GiottoVisuals,dimPlot3D) -importFrom(GiottoVisuals,getColors) -importFrom(GiottoVisuals,giottoSankeyPlan) -importFrom(GiottoVisuals,plotHeatmap) -importFrom(GiottoVisuals,plotMetaDataCellsHeatmap) -importFrom(GiottoVisuals,plotMetaDataHeatmap) -importFrom(GiottoVisuals,plotPCA) -importFrom(GiottoVisuals,plotPCA_2D) -importFrom(GiottoVisuals,plotPCA_3D) -importFrom(GiottoVisuals,plotStatDelaunayNetwork) -importFrom(GiottoVisuals,plotTSNE) -importFrom(GiottoVisuals,plotTSNE_2D) -importFrom(GiottoVisuals,plotTSNE_3D) -importFrom(GiottoVisuals,plotUMAP) -importFrom(GiottoVisuals,plotUMAP_2D) -importFrom(GiottoVisuals,plotUMAP_3D) -importFrom(GiottoVisuals,sankeyLabel) -importFrom(GiottoVisuals,sankeyPlot) -importFrom(GiottoVisuals,sankeyRelate) -importFrom(GiottoVisuals,sankeySet) -importFrom(GiottoVisuals,sankeySetAddresses) -importFrom(GiottoVisuals,showClusterDendrogram) -importFrom(GiottoVisuals,showClusterHeatmap) -importFrom(GiottoVisuals,showColorInstructions) -importFrom(GiottoVisuals,showSaveParameters) -importFrom(GiottoVisuals,spatCellPlot) -importFrom(GiottoVisuals,spatCellPlot2D) -importFrom(GiottoVisuals,spatDeconvPlot) -importFrom(GiottoVisuals,spatDimCellPlot) -importFrom(GiottoVisuals,spatDimCellPlot2D) -importFrom(GiottoVisuals,spatDimFeatPlot2D) -importFrom(GiottoVisuals,spatDimFeatPlot3D) -importFrom(GiottoVisuals,spatDimGenePlot3D) -importFrom(GiottoVisuals,spatDimPlot) -importFrom(GiottoVisuals,spatDimPlot2D) -importFrom(GiottoVisuals,spatDimPlot3D) -importFrom(GiottoVisuals,spatFeatPlot2D) -importFrom(GiottoVisuals,spatFeatPlot2D_single) -importFrom(GiottoVisuals,spatFeatPlot3D) -importFrom(GiottoVisuals,spatGenePlot3D) -importFrom(GiottoVisuals,spatInSituPlotDensity) -importFrom(GiottoVisuals,spatInSituPlotHex) -importFrom(GiottoVisuals,spatInSituPlotPoints) -importFrom(GiottoVisuals,spatNetwDistributions) -importFrom(GiottoVisuals,spatNetwDistributionsDistance) -importFrom(GiottoVisuals,spatNetwDistributionsKneighbors) -importFrom(GiottoVisuals,spatPlot) -importFrom(GiottoVisuals,spatPlot2D) -importFrom(GiottoVisuals,spatPlot3D) -importFrom(GiottoVisuals,subsetSankeySet) -importFrom(GiottoVisuals,violinPlot) importFrom(data.table,data.table) importFrom(data.table,frank) importFrom(data.table,fread) diff --git a/R/convenience_visiumHD.R b/R/convenience_visiumHD.R index d8343881f..1f8178665 100644 --- a/R/convenience_visiumHD.R +++ b/R/convenience_visiumHD.R @@ -3,84 +3,84 @@ setClass( - "VisiumHDReader", - slots = list( - visiumHD_dir = "character", - expression_source = "character", - gene_column_index = "numeric", - barcodes = "character", - array_subset_row = "numeric", - array_subset_col = "numeric", - pxl_subset_row = "numeric", - pxl_subset_col = "numeric", - calls = "list" - ), - prototype = list( - expression_source = 'raw', - gene_column_index = 2, - barcodes = NULL, - array_subset_row = NULL, - array_subset_col = NULL, - pxl_subset_row = NULL, - pxl_subset_col = NULL, - calls = list() - ) + "VisiumHDReader", + slots = list( + visiumHD_dir = "character", + expression_source = "character", + gene_column_index = "numeric", + barcodes = "character", + array_subset_row = "numeric", + array_subset_col = "numeric", + pxl_subset_row = "numeric", + pxl_subset_col = "numeric", + calls = "list" + ), + prototype = list( + expression_source = 'raw', + gene_column_index = 2, + barcodes = NULL, + array_subset_row = NULL, + array_subset_col = NULL, + pxl_subset_row = NULL, + pxl_subset_col = NULL, + calls = list() + ) ) # * show #### setMethod("show", signature("VisiumHDReader"), function(object) { - cat(sprintf("Giotto <%s>\n", "VisiumHDReader")) - print_slots <- c("dir", "expression_source", "gene_column_index", - "barcodes", "array_subset_row", "array_subset_col", - "pxl_subset_row", "pxl_subset_col", - "funs") - pre <- sprintf( - "%s :", format(print_slots) - ) - names(pre) <- print_slots - - # dir - d <- object@visiumHD_dir - if (length(d) > 0L) { - nch <- nchar(d) - d <- abbrev_path(d) - cat(pre["dir"], d, "\n") - } else { - cat(pre["dir"], "\n") - } - - # expression_source - expression_source <- object@expression_source - cat(pre["expression_source"], expression_source, "\n") - - # gene_column_index - gene_column_index <- object@gene_column_index - cat(pre["gene_column_index"], gene_column_index, "\n") - - # barcodes - barcodes <- ifelse(!is.null(object@barcodes), "found", "none") - cat(pre["barcodes"], barcodes, "\n") - - # array_subset_row - array_subset_row <- ifelse(!is.null(object@array_subset_row), "found", "none") - cat(pre["array_subset_row"], array_subset_row, "\n") - - # array_subset_col - array_subset_col <- ifelse(!is.null(object@array_subset_col), "found", "none") - cat(pre["array_subset_col"], array_subset_col, "\n") - - # pxl_subset_row - pxl_subset_row <- ifelse(!is.null(object@pxl_subset_row), "found", "none") - cat(pre["pxl_subset_row"], pxl_subset_row, "\n") - - # pxl_subset_col - pxl_subset_col <- ifelse(!is.null(object@pxl_subset_col), "found", "none") - cat(pre["pxl_subset_col"], pxl_subset_col, "\n") - - # funs - .reader_fun_prints(x = object, pre = pre["funs"]) + cat(sprintf("Giotto <%s>\n", "VisiumHDReader")) + print_slots <- c("dir", "expression_source", "gene_column_index", + "barcodes", "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col", + "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@visiumHD_dir + if (length(d) > 0L) { + nch <- nchar(d) + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # expression_source + expression_source <- object@expression_source + cat(pre["expression_source"], expression_source, "\n") + + # gene_column_index + gene_column_index <- object@gene_column_index + cat(pre["gene_column_index"], gene_column_index, "\n") + + # barcodes + barcodes <- ifelse(!is.null(object@barcodes), "found", "none") + cat(pre["barcodes"], barcodes, "\n") + + # array_subset_row + array_subset_row <- ifelse(!is.null(object@array_subset_row), "found", "none") + cat(pre["array_subset_row"], array_subset_row, "\n") + + # array_subset_col + array_subset_col <- ifelse(!is.null(object@array_subset_col), "found", "none") + cat(pre["array_subset_col"], array_subset_col, "\n") + + # pxl_subset_row + pxl_subset_row <- ifelse(!is.null(object@pxl_subset_row), "found", "none") + cat(pre["pxl_subset_row"], pxl_subset_row, "\n") + + # pxl_subset_col + pxl_subset_col <- ifelse(!is.null(object@pxl_subset_col), "found", "none") + cat(pre["pxl_subset_col"], pxl_subset_col, "\n") + + # funs + .reader_fun_prints(x = object, pre = pre["funs"]) }) # * print #### @@ -101,7 +101,7 @@ setMethod("print", signature("VisiumHDReader"), function(x, ...) show(x)) #' directories or paths. #' @param visiumHD_dir Visium HD output directory (e.g. square_016um) #' @param expression_source character. Raw or filter expression data. Defaults to 'raw' -#' @param gene_column_index numeric. Expression column to use for gene names +#' @param gene_column_index numeric. Expression column to use for gene names #' 1 = Ensembl and 2 = gene symbols #' @param barcodes character vector. (optional) Use if you only want to load #' a subset of the pixel barcodes @@ -124,240 +124,396 @@ setMethod("print", signature("VisiumHDReader"), function(x, ...) show(x)) #' # Set the visiumHD_dir #' reader$visiumHD_dir <- "path to visium HD dir" #' readerHD$visiumHD_dir <- visiumHD_dir -#' +#' #' # Load tissue positions or create cell metadata #' tissue_pos = readerHD$load_tissue_position() #' metadata <- readerHD$load_metadata() -#' +#' #' Load matrix or create expression object #' matrix <- readerHD$load_matrix() #' expression_obj = readerHD$load_expression() -#' +#' #' Load transcript data (cell metadata, expression object, and transcripts per pixel) #' my_transcripts = readerHD$load_transcripts(array_subset_row = c(500, 1000), #' array_subset_col = c(500, 1000)) -#' +#' #' # Create a `giotto` object and add the loaded data #' TODO #' } #' @export importVisiumHD <- function( - visiumHD_dir = NULL, - expression_source = 'raw', - gene_column_index = 2, - barcodes = NULL, - array_subset_row = NULL, - array_subset_col = NULL, - pxl_subset_row = NULL, - pxl_subset_col = NULL) { - - # get params - a <- list(Class = "VisiumHDReader") - - if (!is.null(visiumHD_dir)) { - a$visiumHD_dir <- visiumHD_dir - } - - a$expression_source <- expression_source - a$gene_column_index <- gene_column_index - - if (!is.null(barcodes)) { - a$barcodes <- barcodes - } - - if (!is.null(array_subset_row)) { - a$array_subset_row <- array_subset_row - } - - if (!is.null(array_subset_col)) { - a$array_subset_col <- array_subset_col - } - - if (!is.null(pxl_subset_row)) { - a$pxl_subset_row <- pxl_subset_row - } - - if (!is.null(pxl_subset_col)) { - a$pxl_subset_col <- pxl_subset_col - } - - do.call(new, args = a) + visiumHD_dir = NULL, + expression_source = 'raw', + gene_column_index = 2, + barcodes = NULL, + array_subset_row = NULL, + array_subset_col = NULL, + pxl_subset_row = NULL, + pxl_subset_col = NULL) { + + # get params + a <- list(Class = "VisiumHDReader") + + if (!is.null(visiumHD_dir)) { + a$visiumHD_dir <- visiumHD_dir + } + + a$expression_source <- expression_source + a$gene_column_index <- gene_column_index + + if (!is.null(barcodes)) { + a$barcodes <- barcodes + } + + if (!is.null(array_subset_row)) { + a$array_subset_row <- array_subset_row + } + + if (!is.null(array_subset_col)) { + a$array_subset_col <- array_subset_col + } + + if (!is.null(pxl_subset_row)) { + a$pxl_subset_row <- pxl_subset_row + } + + if (!is.null(pxl_subset_col)) { + a$pxl_subset_col <- pxl_subset_col + } + + do.call(new, args = a) } # * init #### setMethod("initialize", signature("VisiumHDReader"), function( - .Object, visiumHD_dir, - expression_source, - gene_column_index, - barcodes, - array_subset_row, - array_subset_col, - pxl_subset_row, - pxl_subset_col + .Object, visiumHD_dir, + expression_source, + gene_column_index, + barcodes, + array_subset_row, + array_subset_col, + pxl_subset_row, + pxl_subset_col ) { - - # provided params (if any) - if (!missing(visiumHD_dir)) { - checkmate::assert_directory_exists(visiumHD_dir) - .Object@visiumHD_dir <- visiumHD_dir - } - - if (!missing(expression_source)) { - .Object@expression_source <- expression_source - } - - if (!missing(gene_column_index)) { - .Object@gene_column_index <- gene_column_index - } - - if (!missing(barcodes)) { - .Object@barcodes <- barcodes - } - - if (!missing(array_subset_row)) { - .Object@array_subset_row <- array_subset_row - } - - if (!missing(array_subset_col)) { - .Object@array_subset_col <- array_subset_col - } - - if (!missing(pxl_subset_row)) { - .Object@pxl_subset_row <- pxl_subset_row - } - - if (!missing(pxl_subset_col)) { - .Object@pxl_subset_col <- pxl_subset_col - } - - # NULL case - if (length(.Object@visiumHD_dir) == 0) { - return(.Object) # return early if no path given - } - - - # detect paths and subdirs - p <- .Object@visiumHD_dir - - - .visiumHD_detect <- function(pattern, path = p, recursive = FALSE) { - .detect_in_dir(pattern = pattern, path = path, recursive = recursive, platform = "visiumHD") - } - - - filter_expr_dir <- .visiumHD_detect(pattern = "filtered_feature_bc_matrix", path = p) - raw_expr_dir <- .visiumHD_detect(pattern = "raw_feature_bc_matrix", path = p) - - s <- .Object@expression_source - if(s == 'raw') { - expr_dir = raw_expr_dir - } else if(s == 'filter') { - expr_dir = filter_expr_dir - } else { - stop('expression source for visiumHD can only be raw or filter') - } - - spatial_dir <- .visiumHD_detect(pattern = "spatial", path = p) - - - c_index <- .Object@gene_column_index - if(!c_index %in% c(1, 2)) { - stop('gene column index can only be 1 (Ensembl) or 2 (gene symbols)') - } - - - ## matrix load call - matrix_fun <- function( - path = expr_dir, - gene_column_index = c_index, - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = NULL - ) { - .visiumHD_matrix( - path = path, - gene_column_index = gene_column_index, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type, - verbose = verbose - ) - } - .Object@calls$load_matrix <- matrix_fun - - - - ## expression load call - expression_fun <- function( - path = expr_dir, - gene_column_index = c_index, - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = NULL - ) { - - .visiumHD_expression( - path = path, - gene_column_index = gene_column_index, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type, - verbose = verbose - ) - } - .Object@calls$load_expression <- expression_fun - - - - ## tissue position load call - tissue_position_fun <- function( - path = spatial_dir, - verbose = NULL - ) { - .visiumHD_tissue_positions( - path = path, - verbose = verbose - ) - } - .Object@calls$load_tissue_position <- tissue_position_fun - - - - ## metadata load call - meta_fun <- function( - path = spatial_dir, - verbose = NULL) { - - .visiumHD_meta( - path = path, - verbose = verbose + + # provided params (if any) + if (!missing(visiumHD_dir)) { + checkmate::assert_directory_exists(visiumHD_dir) + .Object@visiumHD_dir <- visiumHD_dir + } + + if (!missing(expression_source)) { + .Object@expression_source <- expression_source + } + + if (!missing(gene_column_index)) { + .Object@gene_column_index <- gene_column_index + } + + if (!missing(barcodes)) { + .Object@barcodes <- barcodes + } + + if (!missing(array_subset_row)) { + .Object@array_subset_row <- array_subset_row + } + + if (!missing(array_subset_col)) { + .Object@array_subset_col <- array_subset_col + } + + if (!missing(pxl_subset_row)) { + .Object@pxl_subset_row <- pxl_subset_row + } + + if (!missing(pxl_subset_col)) { + .Object@pxl_subset_col <- pxl_subset_col + } + + # NULL case + if (length(.Object@visiumHD_dir) == 0) { + return(.Object) # return early if no path given + } + + + # detect paths and subdirs + p <- .Object@visiumHD_dir + + + .visiumHD_detect <- function(pattern, path = p, recursive = FALSE) { + .detect_in_dir(pattern = pattern, path = path, recursive = recursive, platform = "visiumHD") + } + + + filter_expr_dir <- .visiumHD_detect(pattern = "filtered_feature_bc_matrix", path = p) + raw_expr_dir <- .visiumHD_detect(pattern = "raw_feature_bc_matrix", path = p) + + s <- .Object@expression_source + if(s == 'raw') { + expr_dir = raw_expr_dir + } else if(s == 'filter') { + expr_dir = filter_expr_dir + } else { + stop('expression source for visiumHD can only be raw or filter') + } + + spatial_dir <- .visiumHD_detect(pattern = "spatial", path = p) + + + c_index <- .Object@gene_column_index + if(!c_index %in% c(1, 2)) { + stop('gene column index can only be 1 (Ensembl) or 2 (gene symbols)') + } + + read_folder_fun <- function( + path = spatial_dir, + gene_column_index = c_index, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL ) - } - .Object@calls$load_metadata <- meta_fun - - - - ## transcript load call - transcript_fun <- function(expr_path = expr_dir, - tissue_positions_path = spatial_dir, - barcodes = .Object@barcodes, - array_subset_row = .Object@array_subset_row, - array_subset_col = .Object@array_subset_col, - pxl_subset_row = .Object@pxl_subset_row, - pxl_subset_col = .Object@pxl_subset_col) { - - .visiumHD_transcript(expr_path = expr_path, - tissue_positions_path = tissue_positions_path, - barcodes = barcodes, - array_subset_row = array_subset_row, - array_subset_col = array_subset_col, - pxl_subset_row = pxl_subset_row, - pxl_subset_col = pxl_subset_col, - verbose = TRUE) - - } - .Object@calls$load_transcripts <- transcript_fun - - return(.Object) + { + .visiumHD_read_folder( + path = path, + expr_data = c("raw", "filter"), + gene_column_index = 1, + png_name = NULL, + verbose = verbose) + } + .Object@calls$read_folder <- read_folder_fun + + ## matrix load call + matrix_fun <- function( + path = expr_dir, + gene_column_index = c_index, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + .visiumHD_matrix( + path = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_matrix <- matrix_fun + + + + ## expression load call + expression_fun <- function( + path = expr_dir, + gene_column_index = c_index, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + + .visiumHD_expression( + path = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_expression <- expression_fun + + + + ## tissue position load call + tissue_position_fun <- function( + path = spatial_dir, + verbose = NULL + ) { + .visiumHD_tissue_positions( + path = path, + verbose = verbose + ) + } + .Object@calls$load_tissue_position <- tissue_position_fun + + ## scale factor load call + read_scalefactors <- function( + path = spatial_dir, + verbose = NULL + ) { + .visiumHD_read_scalefactors( + path = path, + verbose = verbose + ) + } + .Object@calls$load_scalefactor <- read_scalefactors + + load_image_fun <- function( + path = spatial_dir, + image_name = c("hires", "lowres"), + scale_factor_name = c("tissue_hires_scalef", "tissue_lowres_scalef"), + verbose = NULL + ) { + .visiumHD_image( + image_path = path, + json_info = json_info, + micron_scale = micron_scale, + verbose = verbose) + } + .Object@calls$load_image <- load_image_fun + + load_poly_fun <- function( + path = expr_dir, + gpoints, + tissue_positions_path = spatial_dir, + shape = 'hexagon', + shape_size = 400, + name = 'hex400', + verbose = NULL + ) { + .visiumHD_poly( + path = path, + gpoints = gpoints, + tissue_positions_path = tissue_positions_path, + shape = shape, + shape_size = shape_size, + name = name, + verbose = verbose) + } + .Object@calls$load_polygon <- load_poly_fun + + ## metadata load call + meta_fun <- function( + path = spatial_dir, + verbose = NULL) { + + .visiumHD_meta( + path = path, + verbose = verbose + ) + } + .Object@calls$load_metadata <- meta_fun + + + + ## transcript load call + transcript_fun <- function(expr_path = expr_dir, + tissue_positions_path = spatial_dir, + barcodes = .Object@barcodes, + array_subset_row = .Object@array_subset_row, + array_subset_col = .Object@array_subset_col, + pxl_subset_row = .Object@pxl_subset_row, + pxl_subset_col = .Object@pxl_subset_col) { + + .visiumHD_transcript(expr_path = expr_path, + tissue_positions_path = tissue_positions_path, + barcodes = barcodes, + array_subset_row = array_subset_row, + array_subset_col = array_subset_col, + pxl_subset_row = pxl_subset_row, + pxl_subset_col = pxl_subset_col, + verbose = TRUE) + + } + .Object@calls$load_transcripts <- transcript_fun + + giotto_object_fun <- function( + visiumHD_dir = visiumHD_dir, + expr_data = c("raw", "filter"), + gene_column_index = 2, + tissue_positions_path = spatial_dir, + expression_path = expr_path, + metadata_path = spatial_dir, + load_expression = TRUE, + load_metadata = TRUE, + instructions = NULL, + png_name = NULL, + verbose = NULL + ) { + load_expression <- as.logical(load_expression) + load_metadata <- as.logical(load_metadata) + + funs <- .Object@calls + + # init gobject + g <- giotto() + if (!is.null(instructions)) { + instructions(g) <- instructions + } + + # transcripts + tx_list <- funs$load_transcripts( + expr_path = expr_dir, + tissue_positions_path = spatial_dir, + barcodes = .Object@barcodes, + array_subset_row = .Object@array_subset_row, + array_subset_col = .Object@array_subset_col, + pxl_subset_row = .Object@pxl_subset_row, + pxl_subset_col = .Object@pxl_subset_col + ) + + g <- setGiotto(g,tx_list$gpoints[["rna"]]) + + polys <- funs$load_polygon( + path = expr_dir, + gpoints, + tissue_positions_path = spatial_dir, + shape = 'hexagon', + shape_size = 400, + name = 'hex400', + verbose = NULL + ) + g <- setGiotto(g, polys) + g <- addSpatialCentroidLocations(gobject = g, + poly_info = "hex400") + # images + + images <- funs$load_image( + path = spatial_dir, + image_name = c("hires", "lowres"), + scale_factor_name = c("tissue_hires_scalef", "tissue_lowres_scalef"), + verbose = NULL + ) + + g <- setGiotto(g, images) + + # expression & meta + # Need to check that names agree for poly/expr/meta + allowed_ids <- spatIDs(polys) + + if (load_expression) { + exlist <- funs$load_expression( + path = expr_dir, + gene_column_index = c_index, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + #have to run this for loop its one expression matrice rn + + # only keep allowed cells and set into gobject + # for (ex in exlist) { + # bool <- colnames(ex[]) %in% allowed_ids + # ex[] <- ex[][, bool] + # g <- setGiotto(g, ex) + #} + g <- setGiotto(g, exlist[[1]]) + } + + if (load_metadata) { + cx <- funs$load_metadata( + path = metadata_path + ) + #check this later causing to appear empty cellmetadata + #cx[] <- cx[][cell_ID %in% allowed_ids,] + g <- setGiotto(g, cx) + } + + return(g) + + } + .Object@calls$create_gobject <- giotto_object_fun + + return(.Object) }) @@ -365,37 +521,37 @@ setMethod("initialize", signature("VisiumHDReader"), function( #' @export setMethod("$", signature("VisiumHDReader"), function(x, name) { - basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", - "array_subset_row", "array_subset_col", - "pxl_subset_row", "pxl_subset_col") - if (name %in% basic_info) return(methods::slot(x, name)) - - return(x@calls[[name]]) + basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", + "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) }) #' @export setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { - basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", - "array_subset_row", "array_subset_col", - "pxl_subset_row", "pxl_subset_col") - if (name %in% basic_info) { - methods::slot(x, name) <- value - return(initialize(x)) - } - - stop(sprintf("Only items in '%s' can be set", - paste0(basic_info, collapse = "', '"))) + basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", + "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) }) #' @export `.DollarNames.VisiumHDReader` <- function(x, pattern) { - dn <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", - "array_subset_row", "array_subset_col", - "pxl_subset_row", "pxl_subset_col") - if (length(methods::slot(x, "calls")) > 0) { - dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) - } - return(dn) + dn <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", + "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) } @@ -407,28 +563,28 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { remove_zero_rows = TRUE, split_by_type = TRUE, verbose = TRUE) { - - # check if path is provided - if (missing(path)) { - stop(wrap_txt( - "No path to matrix file provided or auto-detected" - ), call. = FALSE) - } - - # check existence and access rights of files - checkmate::assert_directory_exists(path) - - vmsg(.v = verbose, "loading expression matrix ...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - # load expression results with the 10X default matrix function - matrix_results <- get10Xmatrix(path_to_data = path, - gene_column_index = gene_column_index, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type) - - return(matrix_results) - + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to matrix file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading expression matrix ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # load expression results with the 10X default matrix function + matrix_results <- get10Xmatrix(path_to_data = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type) + + return(matrix_results) + } @@ -440,107 +596,340 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { remove_zero_rows = TRUE, split_by_type = TRUE, verbose = TRUE) { - - # check if path is provided - if (missing(path)) { - stop(wrap_txt( - "No path to matrix file provided or auto-detected" - ), call. = FALSE) - } - - # check existence and access rights of files - checkmate::assert_directory_exists(path) - - vmsg(.v = verbose, "loading expression matrix ...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - # load expression results with the 10X default matrix function - matrix_results <- get10Xmatrix(path_to_data = path, - gene_column_index = gene_column_index, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type) - - - exprObj = createExprObj(expression_data = matrix_results, - spat_unit = "pixel", - feat_type = 'rna', - name = "raw", - provenance = "pixel") - - return(list('rna' = exprObj)) - + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to matrix file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading expression matrix ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # load expression results with the 10X default matrix function + matrix_results <- get10Xmatrix(path_to_data = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type) + + + exprObj = createExprObj(expression_data = matrix_results, + spat_unit = "pixel", + feat_type = 'rna', + name = "raw", + provenance = "pixel") + + return(list('rna' = exprObj)) + + } +.visiumHD_read_folder <- function( + path, + expr_data = c("raw", "filter"), + gene_column_index = 1, + png_name = NULL, + verbose = NULL) { + vmsg(.v = verbose, "A structured visium directory will be used") + if (is.null(path)) + .gstop("path needs to be a path to a visium directory") + path <- path.expand(path) + path <- dirname(path) + if (!dir.exists(path)) .gstop(path, " does not exist!") + expr_data <- match.arg(expr_data, choices = c("raw", "filter")) + ## 1. check expression + expr_counts_path <- switch( + expr_data, + "raw" = paste0(path, '/', 'raw_feature_bc_matrix/'), + "filter" = paste0(path, '/', 'filtered_feature_bc_matrix/') + ) + if (!file.exists(expr_counts_path)) .gstop(expr_counts_path, "does not exist!") + + ## 2. check spatial locations + spatial_dir <- paste0(path, "/", "spatial/") + tissue_positions_path = Sys.glob(paths = file.path(spatial_dir, 'tissue_positions*')) + + ## 3. check spatial image + if(is.null(png_name)) { + png_list = list.files(spatial_dir, pattern = "*.png") + png_name = png_list[1] + } + png_path = paste0(spatial_dir,'/',png_name) + if(!file.exists(png_path)) .gstop(png_path, ' does not exist!') + ## 4. check scalefactors + scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") + if (!file.exists(scalefactors_path)) + .gstop(scalefactors_path, "does not exist!") + + list( + expr_counts_path = expr_counts_path, + gene_column_index = gene_column_index, + tissue_positions_path = tissue_positions_path, + image_path = png_path, + scale_json_path = scalefactors_path + ) +} .visiumHD_tissue_positions = function(path, verbose = TRUE) { - - # check if path is provided - if (missing(path)) { - stop(wrap_txt( - "No path to tissue positions file provided or auto-detected" - ), call. = FALSE) - } - - # check existence and access rights of files - checkmate::assert_directory_exists(path) - - vmsg(.v = verbose, "loading tissue positions file ...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - # check existence and access rights of files - tissue_positions_path = file.path(path, 'tissue_positions.parquet') - checkmate::assert_file_exists(tissue_positions_path) - - # read with parquet and data.table - tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) - - return(tissue_positions) - + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to tissue positions file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading tissue positions file ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # check existence and access rights of files + tissue_positions_path = file.path(path, 'tissue_positions.parquet') + checkmate::assert_file_exists(tissue_positions_path) + + # read with parquet and data.table + tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) + + return(tissue_positions) + +} + +.check_new_format <- function(json_scalefactors) { + return(checkmate::test_list( + x = json_scalefactors, + types = "numeric", + len = 5L + )) +} + +.adjust_expected_names <- function(new_format_2023, expected_json_names) { + if (!new_format_2023) { + expected_json_names <- expected_json_names[2:5] + } + return(expected_json_names) +} + +.validate_json_names <- function(json_scalefactors, expected_names) { + if (!setequal(names(json_scalefactors)[1:5], expected_names)) { + warning(GiottoUtils::wrap_txt( + "h5 scalefactors json names differ from expected. + [Expected]:", expected_names, "\n", + "[Actual]:", names(json_scalefactors) + )) + } +} + +.visiumHD_read_scalefactors <- function(path, verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to scale factors file provided or auto-detected" + ), call. = FALSE) + } + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading scale factors file ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # check existence and access rights of files + json_path = file.path(path, 'scalefactors_json.json') + checkmate::assert_file_exists(json_path) + + json_scalefactors <- jsonlite::read_json(json_path) + + expected_json_names <- c( + "regist_target_img_scalef", # NEW as of 2023 + "spot_diameter_fullres", + "tissue_hires_scalef", + "fiducial_diameter_fullres", + "tissue_lowres_scalef" + ) + new_format_2023 <- .check_new_format(json_scalefactors) + expected_json_names <- .adjust_expected_names(new_format_2023, expected_json_names) + .validate_json_names(json_scalefactors, expected_json_names) + + return(json_scalefactors) +} + +.visiumHD_micron_scale <- function(json_scalefactors) { + # Check if json_scalefactors is a list and contains the required field + if (!is.list(json_scalefactors)) { + stop("json_scalefactors must be a list") + } + + if (!"microns_per_pixel" %in% names(json_scalefactors)) { + stop("microns_per_pixel field is missing from json_scalefactors") + } + + # Extract the microns_per_pixel value + px_to_micron <- json_scalefactors$microns_per_pixel + + return(px_to_micron) +} +.get_image_type <- function(png_name) { + possible_types <- c("lowres", "hires") + for (img_type in possible_types) { + if (grepl(img_type, png_name)) { + return(img_type) + } + } + stop("image_path filename did not match either 'lowres' or 'hires'. Ensure the image is named accordingly.") +} + +.get_scale_factor <- function(visiumHD_img_type, json_info) { + if (is.null(json_info)) { + warning("No scalefactors json info provided. VisiumHD image scale_factor defaulting to 1.") + return(1) + } + + checkmate::assert_list(json_info) + + scale_factor <- switch(visiumHD_img_type, + "lowres" = json_info[["tissue_lowres_scalef"]], + "hires" = json_info[["tissue_hires_scalef"]], + stop("Unexpected image type: ", visiumHD_img_type)) + + if (is.null(scale_factor)) { + stop("Scale factor for ", visiumHD_img_type, " image not found in json_info.") + } + + return(scale_factor) +} + +.apply_micron_scale <- function(scale_factor, json_info, px_to_micron) { + if (isTRUE(px_to_micron)) { + scale_factor <- scale_factor * .visiumHD_micron_scale(json_info) + } + return(scale_factor) +} + +.visiumHD_image <- function(image_path, + json_info = NULL, + micron_scale = FALSE, + verbose = NULL) { + # Assume image already checked + vmsg(.v = verbose, .initial = " - ", "found image") + + # Determine image scalefactor to use + if (missing(image_path)) { + stop(wrap_txt( + "No path to image file provided or auto-detected" + ), call. = FALSE) + } + + # 1. determine image scalefactor to use ---------------------------------- # + image_path <- .visiumHD_read_folder(path = image_path) + image_path <- image_path[[4]] + json_info <- .visiumHD_read_scalefactors(dirname(image_path)) + if (!is.null(json_info)) checkmate::assert_list(json_info) + png_name <- basename(image_path) # used for name pattern matching only + + if (is.null(json_info)) { # if none provided + warning(wrap_txt( + "No scalefactors json info provided. + VisiumHD image scale_factor defaulting to 1" + )) + scale_factor <- 1 + } else { # if provided + + scale_factor <- NULL # initial value + } + visiumHD_img_type <- .get_image_type(png_name) + scale_factor <- .get_scale_factor(visiumHD_img_type, json_info) + #scale_factor <- .apply_micron_scale(scale_factor, json_info, px_to_micron) + + # 2. create image -------------------------------------------------------- # + visiumHD_img <- createGiottoLargeImage( + raster_object = image_path, + name = "image", + negative_y = TRUE, + scale_factor = (1 / scale_factor) + ) + + visiumHD_img_list <- list(visiumHD_img) + names(visiumHD_img_list) <- c("image") + + return(visiumHD_img_list) } +.visiumHD_poly = function(path, + gpoints, + tissue_positions_path, + shape = 'hexagon', + shape_size = 400, + name = 'hex400', + verbose = TRUE){ + + transcripts <- .visiumHD_transcript(expr_path = path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + tissue_positions_path = tissue_positions_path, + barcodes = NULL, + array_subset_row =c(500, 1000), + array_subset_col = c(500, 1000), + pxl_subset_row = NULL, + pxl_subset_col = NULL, + verbose = TRUE) + gpoints = transcripts[[3]] + original_feat_ext = ext(gpoints$rna@spatVector) + polygons = tessellate(extent = original_feat_ext, + shape = shape, + shape_size = shape_size, + name = name) + return(polygons) +} .visiumHD_meta = function( - path, - verbose = TRUE) { - - # check if path is provided - if (missing(path)) { - stop(wrap_txt( - "No path to tissue positions file provided or auto-detected" - ), call. = FALSE) - } - - # check existence and access rights of files - checkmate::assert_directory_exists(path) - - vmsg(.v = verbose, "loading tissue positions file ...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - # check existence and access rights of files - tissue_positions_path = file.path(path, 'tissue_positions.parquet') - checkmate::assert_file_exists(tissue_positions_path) - - # read with parquet and data.table - tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) - - vmsg(.v = verbose, "creating metadata ...") - - data.table::setnames(tissue_positions, 'barcode', 'cell_ID') - - cx <- createCellMetaObj( - metadata = tissue_positions, - spat_unit = "pixel", - feat_type = "rna", - provenance = "pixel", - verbose = verbose - ) - return(cx) - + path, + verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to tissue positions file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading tissue positions file ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # check existence and access rights of files + tissue_positions_path = file.path(path, 'tissue_positions.parquet') + checkmate::assert_file_exists(tissue_positions_path) + + # read with parquet and data.table + tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) + + vmsg(.v = verbose, "creating metadata ...") + + data.table::setnames(tissue_positions, 'barcode', 'cell_ID') + + cx <- createCellMetaObj( + metadata = tissue_positions, + spat_unit = "pixel", + feat_type = "rna", + provenance = "pixel", + verbose = verbose + ) + return(cx) + } @@ -551,112 +940,215 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { split_by_type = TRUE, tissue_positions_path, barcodes = NULL, - array_subset_row = NULL, - array_subset_col = NULL, + array_subset_row = c(500, 1000), + array_subset_col = c(500, 1000), pxl_subset_row = NULL, pxl_subset_col = NULL, verbose = TRUE) { - - - # function to create expression matrix - matrix = .visiumHD_matrix( - path = expr_path, - gene_column_index = gene_column_index, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type, - verbose = verbose - ) - - - # function to create tissue position data.table - tissue_positions = .visiumHD_tissue_positions( - path = tissue_positions_path, - verbose = verbose - ) - - - - vmsg(.v = verbose, "creating visiumHD tissue position x expression data file ...") - - # subset data - if(!is.null(barcodes)) { - vmsg(.v = verbose, "subsetting visiumHD on barcodes") - tissue_positions = tissue_positions[barcode %in% barcodes] - } - - if(!is.null(array_subset_row)) { - if(is.vector(array_subset_row) & length(array_subset_row) == 2) { - vmsg(.v = verbose, "subsetting visiumHD on array rows") - tissue_positions = tissue_positions[array_row > array_subset_row[1] & array_row < array_subset_row[2]] - } else { - stop('array_subset_row was provided but is not a vector with length 2') - } - } - - if(!is.null(array_subset_col)) { - if(is.vector(array_subset_col) & length(array_subset_col) == 2) { - vmsg(.v = verbose, "subsetting visiumHD on array columns") - tissue_positions = tissue_positions[array_col > array_subset_col[1] & array_col < array_subset_col[2]] - } else { - stop('array_subset_col was provided but is not a vector with length 2') - } - } - - if(!is.null(pxl_subset_row)) { - if(is.vector(pxl_subset_row) & length(pxl_subset_row) == 2) { - vmsg(.v = verbose, "subsetting visiumHD on row pixels") - tissue_positions = tissue_positions[pxl_row_in_fullres > pxl_subset_row[1] & pxl_row_in_fullres < pxl_subset_row[2]] - } else { - cat('pxl_subset_row is ', pxl_subset_row) - stop('pxl_subset_row was provided but is not a vector with length 2') - } - } - - if(!is.null(pxl_subset_col)) { - if(is.vector(pxl_subset_col) & length(pxl_subset_col) == 2) { - vmsg(.v = verbose, "subsetting visiumHD on column pixels") - tissue_positions = tissue_positions[pxl_col_in_fullres > pxl_subset_col[1] & pxl_col_in_fullres < pxl_subset_col[2]] - } else { - cat(pxl_subset_col) - stop('pxl_subset_col was provided but is not a vector with length 2') - } - } - - # also subset matrix if needed - if(any(!is.null(c(barcodes, - array_subset_row, array_subset_col, - pxl_subset_row, pxl_subset_col)))) { - vmsg(.v = verbose, "subsetting visiumHD on expression matrix") - matrix = matrix[, colnames(matrix) %in% tissue_positions$barcode] - } - - - - - - - # convert expression matrix to minimal data.table object - matrix_tile_dt = data.table::as.data.table(Matrix::summary(matrix)) - genes = matrix@Dimnames[[1]] - samples = matrix@Dimnames[[2]] - matrix_tile_dt[, gene := genes[i]] - matrix_tile_dt[, pixel := samples[j]] - - - # merge data.table matrix and spatial coordinates to create input for Giotto Polygons - gpoints = data.table::merge.data.table(matrix_tile_dt, tissue_positions, by.x = 'pixel', by.y = 'barcode') - gpoints = gpoints[,.(pixel, pxl_row_in_fullres, pxl_col_in_fullres, gene, x)] - colnames(gpoints) = c('pixel', 'x', 'y', 'gene', 'counts') - - gpoints = createGiottoPoints(x = gpoints[,.(x, y, gene, pixel, counts)]) - - # ensure output is always a list - if (!is.list(gpoints)) { - gpoints <- list(gpoints) - names(gpoints) <- objName(gpoints[[1L]]) - } - - return(list('matrix' = matrix, 'tissue_positions' = tissue_positions, 'gpoints' = gpoints)) - + + + # function to create expression matrix + matrix = .visiumHD_matrix( + path = expr_path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + + + # function to create tissue position data.table + tissue_positions = .visiumHD_tissue_positions( + path = tissue_positions_path, + verbose = verbose + ) + + + + vmsg(.v = verbose, "creating visiumHD tissue position x expression data file ...") + + # subset data + if(!is.null(barcodes)) { + vmsg(.v = verbose, "subsetting visiumHD on barcodes") + tissue_positions = tissue_positions[barcode %in% barcodes] + } + + if(!is.null(array_subset_row)) { + if(is.vector(array_subset_row) & length(array_subset_row) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on array rows") + tissue_positions = tissue_positions[array_row > array_subset_row[1] & array_row < array_subset_row[2]] + } else { + stop('array_subset_row was provided but is not a vector with length 2') + } + } + + if(!is.null(array_subset_col)) { + if(is.vector(array_subset_col) & length(array_subset_col) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on array columns") + tissue_positions = tissue_positions[array_col > array_subset_col[1] & array_col < array_subset_col[2]] + } else { + stop('array_subset_col was provided but is not a vector with length 2') + } + } + + if(!is.null(pxl_subset_row)) { + if(is.vector(pxl_subset_row) & length(pxl_subset_row) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on row pixels") + tissue_positions = tissue_positions[pxl_row_in_fullres > pxl_subset_row[1] & pxl_row_in_fullres < pxl_subset_row[2]] + } else { + cat('pxl_subset_row is ', pxl_subset_row) + stop('pxl_subset_row was provided but is not a vector with length 2') + } + } + + if(!is.null(pxl_subset_col)) { + if(is.vector(pxl_subset_col) & length(pxl_subset_col) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on column pixels") + tissue_positions = tissue_positions[pxl_col_in_fullres > pxl_subset_col[1] & pxl_col_in_fullres < pxl_subset_col[2]] + } else { + cat(pxl_subset_col) + stop('pxl_subset_col was provided but is not a vector with length 2') + } + } + + # also subset matrix if needed + if(any(!is.null(c(barcodes, + array_subset_row, array_subset_col, + pxl_subset_row, pxl_subset_col)))) { + vmsg(.v = verbose, "subsetting visiumHD on expression matrix") + matrix = matrix[, colnames(matrix) %in% tissue_positions$barcode] + } + + # convert expression matrix to minimal data.table object + matrix_tile_dt = data.table::as.data.table(Matrix::summary(matrix)) + genes = matrix@Dimnames[[1]] + samples = matrix@Dimnames[[2]] + matrix_tile_dt[, gene := genes[i]] + matrix_tile_dt[, pixel := samples[j]] + + + # merge data.table matrix and spatial coordinates to create input for Giotto Polygons + gpoints = data.table::merge.data.table(matrix_tile_dt, tissue_positions, by.x = 'pixel', by.y = 'barcode') + gpoints = gpoints[,.(pixel, pxl_row_in_fullres, pxl_col_in_fullres, gene, x)] + colnames(gpoints) = c('pixel', 'x', 'y', 'gene', 'counts') + + gpoints = createGiottoPoints(x = gpoints[,.(x, y, gene, pixel, counts)]) + + # ensure output is always a list + if (!is.list(gpoints)) { + gpoints <- list(gpoints) + names(gpoints) <- objName(gpoints[[1L]]) + } + + return(list('matrix' = matrix, 'tissue_positions' = tissue_positions, 'gpoints' = gpoints)) + } + +createGiottoVisiumHDObject = function(visiumHD_dir = NULL, + expr_data = c('raw', 'filter'), + gene_column_index = 1, + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + cores = NA, + verbose = NULL){ + + # NSE vars + barcode = row_pxl = col_pxl = in_tissue = array_row = array_col = NULL + + # set number of cores automatically, but with limit of 10 + cores = determine_cores(cores) + data.table::setDTthreads(threads = cores) + + readerHD <- importVisiumHD() + readerHD$visiumHD_dir <- visiumHD_dir + + argslist <- readerHD$read_folder() + argslist$verbose <- verbose + argslist$expression_matrix_class <- expression_matrix_class + argslist$instructions <- instructions + + giotto_object <- do.call(.visiumHD_create, c(argslist, readerHD = readerHD)) + + return(giotto_object) +} + +.visiumHD_create <- function( + expr_counts_path, + gene_column_index = NULL, # folder + tissue_positions_path, + image_path = NULL, + scale_json_path = NULL, + png_name = NULL, + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + readerHD = readerHD, + verbose = NULL +) { + # NSE vars + barcode <- cell_ID <- row_pxl <- col_pxl <- in_tissue <- array_row <- + array_col <- NULL + + if (is.null(readerHD)) { + stop("readerHD is not provided") + } + + expr_counts_path = readerHD$read_folder()[[1]] + # 1. expression + expr_results <- get10Xmatrix(path_to_data = expr_counts_path, + gene_column_index = gene_column_index) + + # if expr_results is not a list, make it a list compatible with downstream + if (!is.list(expr_results)) expr_results = list("Gene Expression" = expr_results) + + # format expected data into list to be used with readExprData() + raw_matrix_list <- list("cell" = list("rna" = list("raw" = expr_results[["Gene Expression"]]))) + + # add protein expression data to list if it exists + if ('Antibody Capture' %in% names(expr_results)) { + raw_matrix_list$cell$protein$raw <- expr_results[["Antibody Capture"]] + } + + # 2. spatial locations + tissue_positions_path = readerHD$read_folder()[[2]] + spatial_results <- readerHD$load_tissue_position() + data.table::setnames(spatial_results, old = "barcode", new = "cell_ID") + spatial_locs <- spatial_results[,.(cell_ID, pxl_row_in_fullres,-pxl_col_in_fullres)] # flip x and y + colnames(spatial_locs) <- c("cell_ID", 'sdimx', 'sdimy') + + # 3. scalefactors (optional) + json_info <- readerHD$load_scalefactor() + + # 4. image (optional) + visium_png_list <- readerHD$load_image() + + # 5. metadata + meta_results <- spatial_results[,.(cell_ID, in_tissue, array_row, array_col)] + expr_types <- names(raw_matrix_list$cell) + meta_list <- list() + for (etype in expr_types) { + meta_list[[etype]] <- meta_results + } + + # 6. giotto object + giotto_object <- createGiottoObject( + expression = raw_matrix_list, + spatial_locs = spatial_locs, + instructions = instructions, + cell_metadata = meta_list, + largeImages = visium_png_list + ) + + # 7. polygon information + visium_polygons = readerHD$load_polygon() + giotto_object = setPolygonInfo( + gobject = giotto_object, + x = visium_polygons, + centroids_to_spatlocs = FALSE, + verbose = FALSE, + initialize = TRUE + ) + +return(giotto_object) + +} From f35fc6f2ff2dd3dd38062b98bafa6686892d7a75 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 12 Jul 2024 11:06:18 -0400 Subject: [PATCH 098/150] chore: update news --- NEWS.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6b5faaf80..e5b100900 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,12 @@ -# Giotto 4.0.10 TBD +# Giotto 4.1.0 TBD ## Bug fixes * Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale [#964](https://github.com/drieslab/Giotto/issues/964) by jweis3 * Fix error in DWLS `find_dampening_constant()` when `S[subset, ]` produces only 1 gene. ## New -* `read10xAffineImage()` for reading 10x affine tranformed images +* `read10xAffineImage()` for reading 10x affine transformed images +* Several modular importer functions # Giotto 4.0.9 From 6afe044cd59417717a66e6c405a9b276bbe03d7c Mon Sep 17 00:00:00 2001 From: Ruben Dries Date: Fri, 12 Jul 2024 16:30:01 -0400 Subject: [PATCH 099/150] function to split a graph and identify cores --- R/spatial_clusters.R | 175 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 160 insertions(+), 15 deletions(-) diff --git a/R/spatial_clusters.R b/R/spatial_clusters.R index ca7fbd6cb..71bac9c94 100644 --- a/R/spatial_clusters.R +++ b/R/spatial_clusters.R @@ -42,13 +42,30 @@ #' Get which weakly connected set of vertices each vertex is part of #' @param g igraph #' @param clus_name character. name to assign column of clustering info +#' @param all_ids (optional) character vector with all ids +#' @param missing_id_name character and name for vertices that were missing from g #' @returns data.table #' @keywords internal -.igraph_vertex_membership <- function(g, clus_name) { - membership <- igraph::components(g)$membership %>% - data.table::as.data.table(keep.rownames = TRUE) - data.table::setnames(membership, c("cell_ID", clus_name)) - membership +.igraph_vertex_membership <- function(g, + clus_name, + all_ids = NULL, + missing_id_name) { + + # get membership + membership <- igraph::components(g)$membership %>% + data.table::as.data.table(keep.rownames = TRUE) + data.table::setnames(membership, c("cell_ID", clus_name)) + + # add vertices that were missing from g back + if(!is.null(all_ids)) { + missing_ids = all_ids[!all_ids %in% V(g)$name] + missing_membership = data.table::data.table('cell_ID' = missing_ids, 'cluster_name' = missing_id_name) + data.table::setnames(missing_membership, c("cell_ID", clus_name)) + membership = data.table::rbindlist(list(membership, missing_membership)) + } + + return(membership) + } @@ -62,8 +79,12 @@ #' @param cluster_col character. Column in metadata containing original #' clustering #' @param split_clus_name character. Name to assign the split cluster results -#' information to split -#' @returns cluster annotations +#' @param include_all_ids Boolean. Include all ids, including vertex ids not found +#' in the spatial network +#' @param missing_id_name Character. Name for vertices that were missing from +#' spatial network +#' @param return_gobject Boolean. Return giotto object +#' @returns giotto object with cluster annotations #' @examples #' library(Giotto) #' g <- GiottoData::loadGiottoMini("vizgen") @@ -83,7 +104,11 @@ spatialSplitCluster <- function( feat_type = NULL, spatial_network_name = "Delaunay_network", cluster_col, - split_clus_name = paste0(cluster_col, "_split")) { + split_clus_name = paste0(cluster_col, "_split"), + include_all_ids = TRUE, + missing_id_name = 'not_connected', + return_gobject = TRUE) { + # NSE vars cell_ID <- NULL @@ -132,18 +157,138 @@ spatialSplitCluster <- function( ) # get new clusterings - new_clus_dt <- .igraph_vertex_membership( + if(isTRUE(include_all_ids)) { + # include all cell IDs + all_ids = unique(cell_meta$cell_ID) + new_clus_dt <- .igraph_vertex_membership( g = g, - clus_name = split_clus_name - ) - - gobject <- addCellMetadata( + clus_name = split_clus_name, + all_ids = all_ids, + missing_id_name = missing_id_name + ) + } else { + # only IDs present in graph + new_clus_dt <- .igraph_vertex_membership( + g = g, + clus_name = split_clus_name, + all_ids = NULL + ) + + } + + if(isTRUE(return_gobject)) { + gobject <- addCellMetadata( gobject, spat_unit = spat_unit, new_metadata = new_clus_dt, by_column = TRUE, column_cell_ID = "cell_ID" - ) + ) + return(gobject) + } else { + new_clus_dt + } + +} + + - gobject + + +#' @title Split cluster annotations based on a spatial network +#' @name identifyTMAcores +#' @inheritParams data_access_params +#' @param spatial_network_name character. Name of spatial network to use +#' @param core_id_name metadata column name for the core information +#' @param include_all_ids Boolean. Include all ids, including vertex ids not found +#' in the spatial network +#' @param missing_id_name Character. Name for vertices that were missing from +#' spatial network +#' @param return_gobject Boolean. Return giotto object +#' @returns cluster annotations +#' @export +identifyTMAcores <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + core_id_name = 'core_id', + include_all_ids = TRUE, + missing_id_name = 'not_connected', + return_gobject = TRUE) { + + + # NSE vars + cell_ID <- NULL + + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + cell_meta <- getCellMetadata( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = "data.table", + copy_obj = FALSE + ) + + sn <- getSpatialNetwork( + gobject = gobject, + spat_unit = spat_unit, + name = spatial_network_name, + output = "spatialNetworkObj", + copy_obj = FALSE, + verbose = FALSE, + ) + + + g <- GiottoClass::spat_net_to_igraph(sn) + # convert spatialNetworkObject to igraph + + + # get new clusterings + if(isTRUE(include_all_ids)) { + # include all cell IDs + all_ids = unique(cell_meta$cell_ID) + new_clus_dt <- .igraph_vertex_membership( + g = g, + clus_name = core_id_name, + all_ids = all_ids, + missing_id_name = missing_id_name + ) + } else { + # only IDs present in graph + new_clus_dt <- .igraph_vertex_membership( + g = g, + clus_name = core_id_name, + all_ids = NULL + ) + + } + + if(isTRUE(return_gobject)) { + gobject <- addCellMetadata( + gobject, + spat_unit = spat_unit, + new_metadata = new_clus_dt, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + return(gobject) + } else { + new_clus_dt + } + + } + + + + + From 47649b09bd63def2d961f9902f815d614aa11877 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:51:53 -0400 Subject: [PATCH 100/150] fix: change how data is input to kriging model --- R/kriging.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/kriging.R b/R/kriging.R index 1b48ecc29..7b132dea5 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -191,10 +191,15 @@ setMethod( name <- sprintf(name_fmt, feat) filename <- file.path(savedir, paste0(name, ".tif")) + # create subset table with only relevant data + data <- + annotatedlocs[, c("cell_ID", feat, "sdimx", "sdimy")] + data.table::setnames(data, old = feat, new = "count") + # model to use model <- gstat::gstat( id = feat, - formula = as.formula(sprintf("`%s` ~ 1", feat)), + formula = count ~ 1, locations = ~ sdimx + sdimy, data = annotatedlocs, nmax = 7, From 0693c73ae9886c5ef7bece31d235bc7a44710fd9 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:08:11 -0400 Subject: [PATCH 101/150] insert temp browser --- R/kriging.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/kriging.R b/R/kriging.R index 7b132dea5..d1706b570 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -190,7 +190,7 @@ setMethod( function(feat) { name <- sprintf(name_fmt, feat) filename <- file.path(savedir, paste0(name, ".tif")) - +browser() # create subset table with only relevant data data <- annotatedlocs[, c("cell_ID", feat, "sdimx", "sdimy")] From 2034053426107484e43f0005743bb39397375020 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:11:55 -0400 Subject: [PATCH 102/150] try fix --- R/kriging.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/kriging.R b/R/kriging.R index d1706b570..af6a862c0 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -190,7 +190,7 @@ setMethod( function(feat) { name <- sprintf(name_fmt, feat) filename <- file.path(savedir, paste0(name, ".tif")) -browser() + # create subset table with only relevant data data <- annotatedlocs[, c("cell_ID", feat, "sdimx", "sdimy")] @@ -201,7 +201,7 @@ browser() id = feat, formula = count ~ 1, locations = ~ sdimx + sdimy, - data = annotatedlocs, + data = data, nmax = 7, set = list( idp = 0.5 From dec9214dde6f11f3febb8d99b9ec96491f6da631 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:24:04 -0400 Subject: [PATCH 103/150] please fix --- R/kriging.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/kriging.R b/R/kriging.R index af6a862c0..2c0c8b204 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -192,8 +192,9 @@ setMethod( filename <- file.path(savedir, paste0(name, ".tif")) # create subset table with only relevant data - data <- - annotatedlocs[, c("cell_ID", feat, "sdimx", "sdimy")] + data <- annotatedlocs[, + c("cell_ID", feat, "sdimx", "sdimy"), with = FALSE + ] data.table::setnames(data, old = feat, new = "count") # model to use From f24ddb84fe39570c21779d570da6e2725ec4394f Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:36:48 -0400 Subject: [PATCH 104/150] chore: update news --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index e5b100900..bf585e6a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ ## Bug fixes * Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale [#964](https://github.com/drieslab/Giotto/issues/964) by jweis3 * Fix error in DWLS `find_dampening_constant()` when `S[subset, ]` produces only 1 gene. +* Fix error in `interpolateFeatures()` where feature names with `-` or starting with numbers did not work ## New * `read10xAffineImage()` for reading 10x affine transformed images From c8f1fd6d59f4a20b9e05e3402e81ea97592609f5 Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Thu, 18 Jul 2024 12:21:54 -0400 Subject: [PATCH 105/150] chore: update dbMatrix constructor name --- tests/testthat/test-dbMatrix_filterGiotto.R | 2 +- tests/testthat/test-dbMatrix_libNorm.R | 2 +- tests/testthat/test-dbMatrix_logNorm.R | 2 +- tests/testthat/test-dbMatrix_scale.R | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-dbMatrix_filterGiotto.R b/tests/testthat/test-dbMatrix_filterGiotto.R index 2d484d9c7..29979f555 100644 --- a/tests/testthat/test-dbMatrix_filterGiotto.R +++ b/tests/testthat/test-dbMatrix_filterGiotto.R @@ -8,7 +8,7 @@ dgc = getExpression(visium, output = "matrix") con = DBI::dbConnect(duckdb::duckdb(), ":memory:") -dbsm = dbMatrix::createDBMatrix(value = dgc, +dbsm = dbMatrix::dbMatrix(value = dgc, con = con, name = 'dgc', class = "dbSparseMatrix", diff --git a/tests/testthat/test-dbMatrix_libNorm.R b/tests/testthat/test-dbMatrix_libNorm.R index 9d9af3201..be575f17b 100644 --- a/tests/testthat/test-dbMatrix_libNorm.R +++ b/tests/testthat/test-dbMatrix_libNorm.R @@ -8,7 +8,7 @@ dgc = getExpression(visium, output = "matrix") con = DBI::dbConnect(duckdb::duckdb(), ":memory:") -dbsm = dbMatrix::createDBMatrix(value = dgc, +dbsm = dbMatrix::dbMatrix(value = dgc, con = con, name = 'dgc', class = "dbSparseMatrix", diff --git a/tests/testthat/test-dbMatrix_logNorm.R b/tests/testthat/test-dbMatrix_logNorm.R index c02ba8cc7..1731e634f 100644 --- a/tests/testthat/test-dbMatrix_logNorm.R +++ b/tests/testthat/test-dbMatrix_logNorm.R @@ -8,7 +8,7 @@ dgc = getExpression(visium, output = "matrix") con = DBI::dbConnect(duckdb::duckdb(), ":memory:") -dbsm = dbMatrix::createDBMatrix(value = dgc, +dbsm = dbMatrix::dbMatrix(value = dgc, con = con, name = 'dgc', class = "dbSparseMatrix", diff --git a/tests/testthat/test-dbMatrix_scale.R b/tests/testthat/test-dbMatrix_scale.R index 554816a79..b28504d35 100644 --- a/tests/testthat/test-dbMatrix_scale.R +++ b/tests/testthat/test-dbMatrix_scale.R @@ -8,7 +8,7 @@ dgc = getExpression(visium, output = "matrix") con = DBI::dbConnect(duckdb::duckdb(), ":memory:") -dbsm = dbMatrix::createDBMatrix(value = dgc, +dbsm = dbMatrix::dbMatrix(value = dgc, con = con, name = 'dgc', class = "dbSparseMatrix", From 9994770514c21e7e759991db8ae30ac328b24f20 Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Thu, 18 Jul 2024 12:27:18 -0400 Subject: [PATCH 106/150] chore: update vignette with new dbMatrix constructor --- vignettes/dbMatrix.Rmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vignettes/dbMatrix.Rmd b/vignettes/dbMatrix.Rmd index 3b42a2d46..588737d97 100644 --- a/vignettes/dbMatrix.Rmd +++ b/vignettes/dbMatrix.Rmd @@ -54,11 +54,11 @@ dgc = getExpression(visium, output = "matrix") con = DBI::dbConnect(duckb::duckdb(), ":memory:") # Create a dbSparseMatrix using the dbMatrix constructor function -dbsm = dbMatrix::createDBMatrix(value = dgc, - con = con, - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) +dbsm = dbMatrix::dbMatrix(value = dgc, + con = con, + name = 'dgc', + class = "dbSparseMatrix", + overwrite = TRUE) # Create Giotto exprObj with the dbMatrix expObj_db = createExprObj(expression_data = dbsm, From 802b98f13685c77546ae65d9d18ffae3663248c7 Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Tue, 23 Jul 2024 18:22:08 -0400 Subject: [PATCH 107/150] fix: `dbmatrix_compute` global option --- R/auxiliary_giotto.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 2bf784239..70f90b493 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -785,12 +785,6 @@ filterGiotto <- function(gobject, feat_names <- rownames(raw_expr[]) col_names <- colnames(raw_expr[]) - # set global option options(giotto.dbmatrix_compute = FALSE) if not desired - # see ?dplyr::compute() for more details - if(inherits(raw_expr[], "dbMatrix")){ - compute_mat <- getOption("giotto.dbmatrix_compute", FALSE) - } - ## 1. library size normalize if (library_size_norm == TRUE) { norm_expr <- .lib_norm_giotto( @@ -860,8 +854,9 @@ filterGiotto <- function(gobject, } ## 5. create and set exprObj - # Save dbMatrix to db if global option is set - if(compute_mat & !is.null(norm_expr)){ + # Save dbMatrix to db + compute_mat <- getOption("giotto.dbmatrix_compute", default = FALSE) + if(compute_mat && !is.null(norm_expr)){ norm_expr <- .compute_dbMatrix( dbMatrix = norm_expr, name = 'normalized', @@ -878,7 +873,8 @@ filterGiotto <- function(gobject, misc = NULL ) - if(compute_mat & !is.null(norm_scaled_expr)){ + # Save dbMatrix to db + if(compute_mat && !is.null(norm_scaled_expr)){ norm_scaled_expr = .compute_dbMatrix( dbMatrix = norm_scaled_expr, name = 'scaled', From 02fce69f4eb4a60b50455f0b61a55e0f1d877280 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 26 Jul 2024 22:31:13 -0400 Subject: [PATCH 108/150] chore: document --- NAMESPACE | 282 ++++++++++++++++++++++++++++ man/dot-igraph_vertex_membership.Rd | 6 +- man/identifyTMAcores.Rd | 42 +++++ man/importVisiumHD.Rd | 4 +- man/spatialSplitCluster.Rd | 18 +- 5 files changed, 345 insertions(+), 7 deletions(-) create mode 100644 man/identifyTMAcores.Rd diff --git a/NAMESPACE b/NAMESPACE index 1e78b2e0b..f770bc45f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -261,6 +261,7 @@ export(heatmSpatialCorGenes) export(hexVertices) export(hist) export(hyperGeometricEnrich) +export(identifyTMAcores) export(importCosMx) export(importVisiumHD) export(initHMRF_V2) @@ -508,6 +509,287 @@ import(methods) import(stats, except = density) import(utils) importClassesFrom(data.table,data.table) +importFrom(GiottoClass,"activeFeatType<-") +importFrom(GiottoClass,"activeSpatUnit<-") +importFrom(GiottoClass,"ext<-") +importFrom(GiottoClass,"featType<-") +importFrom(GiottoClass,"instructions<-") +importFrom(GiottoClass,"objName<-") +importFrom(GiottoClass,"prov<-") +importFrom(GiottoClass,"spatUnit<-") +importFrom(GiottoClass,activeFeatType) +importFrom(GiottoClass,activeSpatUnit) +importFrom(GiottoClass,addCellMetadata) +importFrom(GiottoClass,addFeatMetadata) +importFrom(GiottoClass,addGiottoImage) +importFrom(GiottoClass,addGiottoImageMG) +importFrom(GiottoClass,addGiottoLargeImage) +importFrom(GiottoClass,addGiottoPoints) +importFrom(GiottoClass,addGiottoPoints3D) +importFrom(GiottoClass,addGiottoPolygons) +importFrom(GiottoClass,addNetworkLayout) +importFrom(GiottoClass,addSpatialCentroidLocations) +importFrom(GiottoClass,addSpatialCentroidLocationsLayer) +importFrom(GiottoClass,aggregateStacks) +importFrom(GiottoClass,aggregateStacksExpression) +importFrom(GiottoClass,aggregateStacksLocations) +importFrom(GiottoClass,aggregateStacksPolygonOverlaps) +importFrom(GiottoClass,aggregateStacksPolygons) +importFrom(GiottoClass,anndataToGiotto) +importFrom(GiottoClass,annotateGiotto) +importFrom(GiottoClass,annotateSpatialGrid) +importFrom(GiottoClass,annotateSpatialNetwork) +importFrom(GiottoClass,as.points) +importFrom(GiottoClass,as.polygons) +importFrom(GiottoClass,as.sf) +importFrom(GiottoClass,as.sp) +importFrom(GiottoClass,as.stars) +importFrom(GiottoClass,as.terra) +importFrom(GiottoClass,calculateMetaTable) +importFrom(GiottoClass,calculateMetaTableCells) +importFrom(GiottoClass,calculateOverlap) +importFrom(GiottoClass,calculateOverlapParallel) +importFrom(GiottoClass,calculateOverlapPolygonImages) +importFrom(GiottoClass,calculateOverlapRaster) +importFrom(GiottoClass,calculateOverlapSerial) +importFrom(GiottoClass,calculateSpatCellMetadataProportions) +importFrom(GiottoClass,centroids) +importFrom(GiottoClass,changeGiottoInstructions) +importFrom(GiottoClass,changeImageBg) +importFrom(GiottoClass,checkGiottoEnvironment) +importFrom(GiottoClass,circleVertices) +importFrom(GiottoClass,combineCellData) +importFrom(GiottoClass,combineFeatureData) +importFrom(GiottoClass,combineFeatureOverlapData) +importFrom(GiottoClass,combineMetadata) +importFrom(GiottoClass,combineSpatialCellFeatureInfo) +importFrom(GiottoClass,combineSpatialCellMetadataInfo) +importFrom(GiottoClass,combineToMultiPolygon) +importFrom(GiottoClass,convertGiottoLargeImageToMG) +importFrom(GiottoClass,copy) +importFrom(GiottoClass,createBentoAdata) +importFrom(GiottoClass,createCellMetaObj) +importFrom(GiottoClass,createDimObj) +importFrom(GiottoClass,createExprObj) +importFrom(GiottoClass,createFeatMetaObj) +importFrom(GiottoClass,createGiottoImage) +importFrom(GiottoClass,createGiottoInstructions) +importFrom(GiottoClass,createGiottoLargeImage) +importFrom(GiottoClass,createGiottoLargeImageList) +importFrom(GiottoClass,createGiottoObject) +importFrom(GiottoClass,createGiottoObjectSubcellular) +importFrom(GiottoClass,createGiottoPoints) +importFrom(GiottoClass,createGiottoPolygon) +importFrom(GiottoClass,createGiottoPolygonsFromDfr) +importFrom(GiottoClass,createGiottoPolygonsFromGeoJSON) +importFrom(GiottoClass,createGiottoPolygonsFromMask) +importFrom(GiottoClass,createMetafeats) +importFrom(GiottoClass,createNearestNetObj) +importFrom(GiottoClass,createNearestNetwork) +importFrom(GiottoClass,createSpatEnrObj) +importFrom(GiottoClass,createSpatLocsObj) +importFrom(GiottoClass,createSpatNetObj) +importFrom(GiottoClass,createSpatialDefaultGrid) +importFrom(GiottoClass,createSpatialDelaunayNetwork) +importFrom(GiottoClass,createSpatialFeaturesKNNnetwork) +importFrom(GiottoClass,createSpatialGrid) +importFrom(GiottoClass,createSpatialKNNnetwork) +importFrom(GiottoClass,createSpatialNetwork) +importFrom(GiottoClass,createSpatialWeightMatrix) +importFrom(GiottoClass,crop) +importFrom(GiottoClass,cropGiottoLargeImage) +importFrom(GiottoClass,density) +importFrom(GiottoClass,distGiottoImage) +importFrom(GiottoClass,estimateImageBg) +importFrom(GiottoClass,ext) +importFrom(GiottoClass,fDataDT) +importFrom(GiottoClass,featIDs) +importFrom(GiottoClass,featType) +importFrom(GiottoClass,featureNetwork) +importFrom(GiottoClass,flip) +importFrom(GiottoClass,gefToGiotto) +importFrom(GiottoClass,getCellMetadata) +importFrom(GiottoClass,getDimReduction) +importFrom(GiottoClass,getExpression) +importFrom(GiottoClass,getFeatureInfo) +importFrom(GiottoClass,getFeatureMetadata) +importFrom(GiottoClass,getGiottoImage) +importFrom(GiottoClass,getMultiomics) +importFrom(GiottoClass,getNearestNetwork) +importFrom(GiottoClass,getPolygonInfo) +importFrom(GiottoClass,getSpatialEnrichment) +importFrom(GiottoClass,getSpatialGrid) +importFrom(GiottoClass,getSpatialLocations) +importFrom(GiottoClass,getSpatialNetwork) +importFrom(GiottoClass,giotto) +importFrom(GiottoClass,giottoImage) +importFrom(GiottoClass,giottoLargeImage) +importFrom(GiottoClass,giottoMasterToSuite) +importFrom(GiottoClass,giottoPoints) +importFrom(GiottoClass,giottoPolygon) +importFrom(GiottoClass,giottoToAnnData) +importFrom(GiottoClass,giottoToSeurat) +importFrom(GiottoClass,giottoToSeuratV4) +importFrom(GiottoClass,giottoToSeuratV5) +importFrom(GiottoClass,giottoToSpatialExperiment) +importFrom(GiottoClass,hexVertices) +importFrom(GiottoClass,hist) +importFrom(GiottoClass,installGiottoEnvironment) +importFrom(GiottoClass,instructions) +importFrom(GiottoClass,joinGiottoObjects) +importFrom(GiottoClass,loadGiotto) +importFrom(GiottoClass,makePseudoVisium) +importFrom(GiottoClass,objHistory) +importFrom(GiottoClass,objName) +importFrom(GiottoClass,orthoGrid) +importFrom(GiottoClass,overlapImagesToMatrix) +importFrom(GiottoClass,overlapToMatrix) +importFrom(GiottoClass,overlapToMatrixMultiPoly) +importFrom(GiottoClass,overlaps) +importFrom(GiottoClass,pDataDT) +importFrom(GiottoClass,plotGiottoImage) +importFrom(GiottoClass,polyStamp) +importFrom(GiottoClass,prov) +importFrom(GiottoClass,readCellMetadata) +importFrom(GiottoClass,readDimReducData) +importFrom(GiottoClass,readExprData) +importFrom(GiottoClass,readExprMatrix) +importFrom(GiottoClass,readFeatData) +importFrom(GiottoClass,readFeatMetadata) +importFrom(GiottoClass,readGiottoInstructions) +importFrom(GiottoClass,readNearestNetData) +importFrom(GiottoClass,readPolygonData) +importFrom(GiottoClass,readSpatEnrichData) +importFrom(GiottoClass,readSpatLocsData) +importFrom(GiottoClass,readSpatNetData) +importFrom(GiottoClass,reconnectGiottoImage) +importFrom(GiottoClass,rectVertices) +importFrom(GiottoClass,removeCellAnnotation) +importFrom(GiottoClass,removeFeatAnnotation) +importFrom(GiottoClass,removeGiottoEnvironment) +importFrom(GiottoClass,replaceGiottoInstructions) +importFrom(GiottoClass,rescale) +importFrom(GiottoClass,rescalePolygons) +importFrom(GiottoClass,saveGiotto) +importFrom(GiottoClass,setCellMetadata) +importFrom(GiottoClass,setDimReduction) +importFrom(GiottoClass,setExpression) +importFrom(GiottoClass,setFeatureInfo) +importFrom(GiottoClass,setFeatureMetadata) +importFrom(GiottoClass,setGiotto) +importFrom(GiottoClass,setGiottoImage) +importFrom(GiottoClass,setMultiomics) +importFrom(GiottoClass,setNearestNetwork) +importFrom(GiottoClass,setPolygonInfo) +importFrom(GiottoClass,setSpatialEnrichment) +importFrom(GiottoClass,setSpatialGrid) +importFrom(GiottoClass,setSpatialLocations) +importFrom(GiottoClass,setSpatialNetwork) +importFrom(GiottoClass,seuratToGiotto) +importFrom(GiottoClass,seuratToGiottoV4) +importFrom(GiottoClass,seuratToGiottoV5) +importFrom(GiottoClass,showGiottoCellMetadata) +importFrom(GiottoClass,showGiottoDimRed) +importFrom(GiottoClass,showGiottoExpression) +importFrom(GiottoClass,showGiottoFeatInfo) +importFrom(GiottoClass,showGiottoFeatMetadata) +importFrom(GiottoClass,showGiottoImageNames) +importFrom(GiottoClass,showGiottoInstructions) +importFrom(GiottoClass,showGiottoNearestNetworks) +importFrom(GiottoClass,showGiottoSpatEnrichments) +importFrom(GiottoClass,showGiottoSpatGrids) +importFrom(GiottoClass,showGiottoSpatLocs) +importFrom(GiottoClass,showGiottoSpatNetworks) +importFrom(GiottoClass,showGiottoSpatialInfo) +importFrom(GiottoClass,showProcessingSteps) +importFrom(GiottoClass,smoothGiottoPolygons) +importFrom(GiottoClass,spatIDs) +importFrom(GiottoClass,spatQueryGiottoPolygons) +importFrom(GiottoClass,spatShift) +importFrom(GiottoClass,spatUnit) +importFrom(GiottoClass,spatialExperimentToGiotto) +importFrom(GiottoClass,spin) +importFrom(GiottoClass,stitchFieldCoordinates) +importFrom(GiottoClass,stitchGiottoLargeImage) +importFrom(GiottoClass,subsetGiotto) +importFrom(GiottoClass,subsetGiottoLocs) +importFrom(GiottoClass,subsetGiottoLocsMulti) +importFrom(GiottoClass,subsetGiottoLocsSubcellular) +importFrom(GiottoClass,tessellate) +importFrom(GiottoClass,triGrid) +importFrom(GiottoClass,updateGiottoImage) +importFrom(GiottoClass,updateGiottoImageMG) +importFrom(GiottoClass,updateGiottoLargeImage) +importFrom(GiottoClass,updateGiottoObject) +importFrom(GiottoClass,updateGiottoPointsObject) +importFrom(GiottoClass,updateGiottoPolygonObject) +importFrom(GiottoClass,vect) +importFrom(GiottoClass,wrap) +importFrom(GiottoClass,writeGiottoLargeImage) +importFrom(GiottoUtils,"%>%") +importFrom(GiottoUtils,getDistinctColors) +importFrom(GiottoUtils,getRainbowColors) +importFrom(GiottoVisuals,"sankeyLabel<-") +importFrom(GiottoVisuals,"sankeyRelate<-") +importFrom(GiottoVisuals,addGiottoImageToSpatPlot) +importFrom(GiottoVisuals,dimCellPlot) +importFrom(GiottoVisuals,dimCellPlot2D) +importFrom(GiottoVisuals,dimFeatPlot2D) +importFrom(GiottoVisuals,dimFeatPlot3D) +importFrom(GiottoVisuals,dimGenePlot3D) +importFrom(GiottoVisuals,dimPlot) +importFrom(GiottoVisuals,dimPlot2D) +importFrom(GiottoVisuals,dimPlot3D) +importFrom(GiottoVisuals,getColors) +importFrom(GiottoVisuals,giottoSankeyPlan) +importFrom(GiottoVisuals,plotHeatmap) +importFrom(GiottoVisuals,plotMetaDataCellsHeatmap) +importFrom(GiottoVisuals,plotMetaDataHeatmap) +importFrom(GiottoVisuals,plotPCA) +importFrom(GiottoVisuals,plotPCA_2D) +importFrom(GiottoVisuals,plotPCA_3D) +importFrom(GiottoVisuals,plotStatDelaunayNetwork) +importFrom(GiottoVisuals,plotTSNE) +importFrom(GiottoVisuals,plotTSNE_2D) +importFrom(GiottoVisuals,plotTSNE_3D) +importFrom(GiottoVisuals,plotUMAP) +importFrom(GiottoVisuals,plotUMAP_2D) +importFrom(GiottoVisuals,plotUMAP_3D) +importFrom(GiottoVisuals,sankeyLabel) +importFrom(GiottoVisuals,sankeyPlot) +importFrom(GiottoVisuals,sankeyRelate) +importFrom(GiottoVisuals,sankeySet) +importFrom(GiottoVisuals,sankeySetAddresses) +importFrom(GiottoVisuals,showClusterDendrogram) +importFrom(GiottoVisuals,showClusterHeatmap) +importFrom(GiottoVisuals,showColorInstructions) +importFrom(GiottoVisuals,showSaveParameters) +importFrom(GiottoVisuals,spatCellPlot) +importFrom(GiottoVisuals,spatCellPlot2D) +importFrom(GiottoVisuals,spatDeconvPlot) +importFrom(GiottoVisuals,spatDimCellPlot) +importFrom(GiottoVisuals,spatDimCellPlot2D) +importFrom(GiottoVisuals,spatDimFeatPlot2D) +importFrom(GiottoVisuals,spatDimFeatPlot3D) +importFrom(GiottoVisuals,spatDimGenePlot3D) +importFrom(GiottoVisuals,spatDimPlot) +importFrom(GiottoVisuals,spatDimPlot2D) +importFrom(GiottoVisuals,spatDimPlot3D) +importFrom(GiottoVisuals,spatFeatPlot2D) +importFrom(GiottoVisuals,spatFeatPlot2D_single) +importFrom(GiottoVisuals,spatFeatPlot3D) +importFrom(GiottoVisuals,spatGenePlot3D) +importFrom(GiottoVisuals,spatInSituPlotDensity) +importFrom(GiottoVisuals,spatInSituPlotHex) +importFrom(GiottoVisuals,spatInSituPlotPoints) +importFrom(GiottoVisuals,spatNetwDistributions) +importFrom(GiottoVisuals,spatNetwDistributionsDistance) +importFrom(GiottoVisuals,spatNetwDistributionsKneighbors) +importFrom(GiottoVisuals,spatPlot) +importFrom(GiottoVisuals,spatPlot2D) +importFrom(GiottoVisuals,spatPlot3D) +importFrom(GiottoVisuals,subsetSankeySet) +importFrom(GiottoVisuals,violinPlot) importFrom(data.table,data.table) importFrom(data.table,frank) importFrom(data.table,fread) diff --git a/man/dot-igraph_vertex_membership.Rd b/man/dot-igraph_vertex_membership.Rd index 937905ad9..f4362ce83 100644 --- a/man/dot-igraph_vertex_membership.Rd +++ b/man/dot-igraph_vertex_membership.Rd @@ -4,12 +4,16 @@ \alias{.igraph_vertex_membership} \title{igraph vertex membership} \usage{ -.igraph_vertex_membership(g, clus_name) +.igraph_vertex_membership(g, clus_name, all_ids = NULL, missing_id_name) } \arguments{ \item{g}{igraph} \item{clus_name}{character. name to assign column of clustering info} + +\item{all_ids}{(optional) character vector with all ids} + +\item{missing_id_name}{character and name for vertices that were missing from g} } \value{ data.table diff --git a/man/identifyTMAcores.Rd b/man/identifyTMAcores.Rd new file mode 100644 index 000000000..010f05d50 --- /dev/null +++ b/man/identifyTMAcores.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_clusters.R +\name{identifyTMAcores} +\alias{identifyTMAcores} +\title{Split cluster annotations based on a spatial network} +\usage{ +identifyTMAcores( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + core_id_name = "core_id", + include_all_ids = TRUE, + missing_id_name = "not_connected", + return_gobject = TRUE +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{spatial_network_name}{character. Name of spatial network to use} + +\item{core_id_name}{metadata column name for the core information} + +\item{include_all_ids}{Boolean. Include all ids, including vertex ids not found +in the spatial network} + +\item{missing_id_name}{Character. Name for vertices that were missing from +spatial network} + +\item{return_gobject}{Boolean. Return giotto object} +} +\value{ +cluster annotations +} +\description{ +Split cluster annotations based on a spatial network +} diff --git a/man/importVisiumHD.Rd b/man/importVisiumHD.Rd index 1b584aad3..08fb3801b 100644 --- a/man/importVisiumHD.Rd +++ b/man/importVisiumHD.Rd @@ -20,7 +20,7 @@ importVisiumHD( \item{expression_source}{character. Raw or filter expression data. Defaults to 'raw'} -\item{gene_column_index}{numeric. Expression column to use for gene names +\item{gene_column_index}{numeric. Expression column to use for gene names 1 = Ensembl and 2 = gene symbols} \item{barcodes}{character vector. (optional) Use if you only want to load @@ -74,7 +74,7 @@ expression_obj = readerHD$load_expression() Load transcript data (cell metadata, expression object, and transcripts per pixel) my_transcripts = readerHD$load_transcripts(array_subset_row = c(500, 1000), array_subset_col = c(500, 1000)) - + # Create a `giotto` object and add the loaded data TODO } diff --git a/man/spatialSplitCluster.Rd b/man/spatialSplitCluster.Rd index b0c03729b..f6d503a68 100644 --- a/man/spatialSplitCluster.Rd +++ b/man/spatialSplitCluster.Rd @@ -10,7 +10,10 @@ spatialSplitCluster( feat_type = NULL, spatial_network_name = "Delaunay_network", cluster_col, - split_clus_name = paste0(cluster_col, "_split") + split_clus_name = paste0(cluster_col, "_split"), + include_all_ids = TRUE, + missing_id_name = "not_connected", + return_gobject = TRUE ) } \arguments{ @@ -25,11 +28,18 @@ spatialSplitCluster( \item{cluster_col}{character. Column in metadata containing original clustering} -\item{split_clus_name}{character. Name to assign the split cluster results -information to split} +\item{split_clus_name}{character. Name to assign the split cluster results} + +\item{include_all_ids}{Boolean. Include all ids, including vertex ids not found +in the spatial network} + +\item{missing_id_name}{Character. Name for vertices that were missing from +spatial network} + +\item{return_gobject}{Boolean. Return giotto object} } \value{ -cluster annotations +giotto object with cluster annotations } \description{ Split cluster annotations based on a spatial network From 7ff0bf47b4fa30337326183127578e1545b68f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wen=20Wang=20=28=E7=8E=8B=E6=96=87=29?= Date: Mon, 29 Jul 2024 09:03:45 -0400 Subject: [PATCH 109/150] Add: integrate with ONTraC --- DESCRIPTION | 1 + NAMESPACE | 287 ++++++++++++++ R/ONTraC_wrapper.R | 518 +++++++++++++++++++++++++ man/dot-igraph_vertex_membership.Rd | 6 +- man/getONTraCv1Input.Rd | 45 +++ man/identifyTMAcores.Rd | 42 ++ man/importVisiumHD.Rd | 4 +- man/loadOntraCResults.Rd | 22 ++ man/load_cell_NT_score.Rd | 22 ++ man/load_cell_bin_niche_cluster.Rd | 23 ++ man/load_cell_niche_cluster_prob.Rd | 35 ++ man/load_nc_connectivity.Rd | 35 ++ man/plotCTCompositionInNicheCluster.Rd | 47 +++ man/plotCellTypeNTScore.Rd | 42 ++ man/plotNicheClusterConnectivity.Rd | 44 +++ man/reexports.Rd | 2 +- man/spatialSplitCluster.Rd | 18 +- 17 files changed, 1185 insertions(+), 8 deletions(-) create mode 100644 R/ONTraC_wrapper.R create mode 100644 man/getONTraCv1Input.Rd create mode 100644 man/identifyTMAcores.Rd create mode 100644 man/loadOntraCResults.Rd create mode 100644 man/load_cell_NT_score.Rd create mode 100644 man/load_cell_bin_niche_cluster.Rd create mode 100644 man/load_cell_niche_cluster_prob.Rd create mode 100644 man/load_nc_connectivity.Rd create mode 100644 man/plotCTCompositionInNicheCluster.Rd create mode 100644 man/plotCellTypeNTScore.Rd create mode 100644 man/plotNicheClusterConnectivity.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ab1607bd7..340b4e716 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -130,6 +130,7 @@ Remotes: drieslab/GiottoClass, drieslab/GiottoVisuals Collate: + 'ONTraC_wrapper.R' 'auxiliary_giotto.R' 'cell_segmentation.R' 'clustering.R' diff --git a/NAMESPACE b/NAMESPACE index 1e78b2e0b..98ae43773 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -238,6 +238,7 @@ export(getGEFtxCoords) export(getGiottoImage) export(getMultiomics) export(getNearestNetwork) +export(getONTraCv1Input) export(getPolygonInfo) export(getRainbowColors) export(getSpatialEnrichment) @@ -261,6 +262,7 @@ export(heatmSpatialCorGenes) export(hexVertices) export(hist) export(hyperGeometricEnrich) +export(identifyTMAcores) export(importCosMx) export(importVisiumHD) export(initHMRF_V2) @@ -272,6 +274,7 @@ export(jackstrawPlot) export(joinGiottoObjects) export(loadGiotto) export(loadHMRF) +export(loadOntraCResults) export(makePseudoVisium) export(makeSignMatrixDWLS) export(makeSignMatrixDWLSfromMatrix) @@ -291,8 +294,10 @@ export(pieCellTypesFromEnrichment) export(plotCCcomDotplot) export(plotCCcomHeatmap) export(plotCPF) +export(plotCTCompositionInNicheCluster) export(plotCellProximityFeatSpot) export(plotCellProximityFeats) +export(plotCellTypeNTScore) export(plotCellTypesFromEnrichment) export(plotCombineCCcom) export(plotCombineCellCellCommunication) @@ -307,6 +312,7 @@ export(plotInteractive3D) export(plotInteractivePolygons) export(plotMetaDataCellsHeatmap) export(plotMetaDataHeatmap) +export(plotNicheClusterConnectivity) export(plotPCA) export(plotPCA_2D) export(plotPCA_3D) @@ -508,6 +514,287 @@ import(methods) import(stats, except = density) import(utils) importClassesFrom(data.table,data.table) +importFrom(GiottoClass,"activeFeatType<-") +importFrom(GiottoClass,"activeSpatUnit<-") +importFrom(GiottoClass,"ext<-") +importFrom(GiottoClass,"featType<-") +importFrom(GiottoClass,"instructions<-") +importFrom(GiottoClass,"objName<-") +importFrom(GiottoClass,"prov<-") +importFrom(GiottoClass,"spatUnit<-") +importFrom(GiottoClass,activeFeatType) +importFrom(GiottoClass,activeSpatUnit) +importFrom(GiottoClass,addCellMetadata) +importFrom(GiottoClass,addFeatMetadata) +importFrom(GiottoClass,addGiottoImage) +importFrom(GiottoClass,addGiottoImageMG) +importFrom(GiottoClass,addGiottoLargeImage) +importFrom(GiottoClass,addGiottoPoints) +importFrom(GiottoClass,addGiottoPoints3D) +importFrom(GiottoClass,addGiottoPolygons) +importFrom(GiottoClass,addNetworkLayout) +importFrom(GiottoClass,addSpatialCentroidLocations) +importFrom(GiottoClass,addSpatialCentroidLocationsLayer) +importFrom(GiottoClass,aggregateStacks) +importFrom(GiottoClass,aggregateStacksExpression) +importFrom(GiottoClass,aggregateStacksLocations) +importFrom(GiottoClass,aggregateStacksPolygonOverlaps) +importFrom(GiottoClass,aggregateStacksPolygons) +importFrom(GiottoClass,anndataToGiotto) +importFrom(GiottoClass,annotateGiotto) +importFrom(GiottoClass,annotateSpatialGrid) +importFrom(GiottoClass,annotateSpatialNetwork) +importFrom(GiottoClass,as.points) +importFrom(GiottoClass,as.polygons) +importFrom(GiottoClass,as.sf) +importFrom(GiottoClass,as.sp) +importFrom(GiottoClass,as.stars) +importFrom(GiottoClass,as.terra) +importFrom(GiottoClass,calculateMetaTable) +importFrom(GiottoClass,calculateMetaTableCells) +importFrom(GiottoClass,calculateOverlap) +importFrom(GiottoClass,calculateOverlapParallel) +importFrom(GiottoClass,calculateOverlapPolygonImages) +importFrom(GiottoClass,calculateOverlapRaster) +importFrom(GiottoClass,calculateOverlapSerial) +importFrom(GiottoClass,calculateSpatCellMetadataProportions) +importFrom(GiottoClass,centroids) +importFrom(GiottoClass,changeGiottoInstructions) +importFrom(GiottoClass,changeImageBg) +importFrom(GiottoClass,checkGiottoEnvironment) +importFrom(GiottoClass,circleVertices) +importFrom(GiottoClass,combineCellData) +importFrom(GiottoClass,combineFeatureData) +importFrom(GiottoClass,combineFeatureOverlapData) +importFrom(GiottoClass,combineMetadata) +importFrom(GiottoClass,combineSpatialCellFeatureInfo) +importFrom(GiottoClass,combineSpatialCellMetadataInfo) +importFrom(GiottoClass,combineToMultiPolygon) +importFrom(GiottoClass,convertGiottoLargeImageToMG) +importFrom(GiottoClass,copy) +importFrom(GiottoClass,createBentoAdata) +importFrom(GiottoClass,createCellMetaObj) +importFrom(GiottoClass,createDimObj) +importFrom(GiottoClass,createExprObj) +importFrom(GiottoClass,createFeatMetaObj) +importFrom(GiottoClass,createGiottoImage) +importFrom(GiottoClass,createGiottoInstructions) +importFrom(GiottoClass,createGiottoLargeImage) +importFrom(GiottoClass,createGiottoLargeImageList) +importFrom(GiottoClass,createGiottoObject) +importFrom(GiottoClass,createGiottoObjectSubcellular) +importFrom(GiottoClass,createGiottoPoints) +importFrom(GiottoClass,createGiottoPolygon) +importFrom(GiottoClass,createGiottoPolygonsFromDfr) +importFrom(GiottoClass,createGiottoPolygonsFromGeoJSON) +importFrom(GiottoClass,createGiottoPolygonsFromMask) +importFrom(GiottoClass,createMetafeats) +importFrom(GiottoClass,createNearestNetObj) +importFrom(GiottoClass,createNearestNetwork) +importFrom(GiottoClass,createSpatEnrObj) +importFrom(GiottoClass,createSpatLocsObj) +importFrom(GiottoClass,createSpatNetObj) +importFrom(GiottoClass,createSpatialDefaultGrid) +importFrom(GiottoClass,createSpatialDelaunayNetwork) +importFrom(GiottoClass,createSpatialFeaturesKNNnetwork) +importFrom(GiottoClass,createSpatialGrid) +importFrom(GiottoClass,createSpatialKNNnetwork) +importFrom(GiottoClass,createSpatialNetwork) +importFrom(GiottoClass,createSpatialWeightMatrix) +importFrom(GiottoClass,crop) +importFrom(GiottoClass,cropGiottoLargeImage) +importFrom(GiottoClass,density) +importFrom(GiottoClass,distGiottoImage) +importFrom(GiottoClass,estimateImageBg) +importFrom(GiottoClass,ext) +importFrom(GiottoClass,fDataDT) +importFrom(GiottoClass,featIDs) +importFrom(GiottoClass,featType) +importFrom(GiottoClass,featureNetwork) +importFrom(GiottoClass,flip) +importFrom(GiottoClass,gefToGiotto) +importFrom(GiottoClass,getCellMetadata) +importFrom(GiottoClass,getDimReduction) +importFrom(GiottoClass,getExpression) +importFrom(GiottoClass,getFeatureInfo) +importFrom(GiottoClass,getFeatureMetadata) +importFrom(GiottoClass,getGiottoImage) +importFrom(GiottoClass,getMultiomics) +importFrom(GiottoClass,getNearestNetwork) +importFrom(GiottoClass,getPolygonInfo) +importFrom(GiottoClass,getSpatialEnrichment) +importFrom(GiottoClass,getSpatialGrid) +importFrom(GiottoClass,getSpatialLocations) +importFrom(GiottoClass,getSpatialNetwork) +importFrom(GiottoClass,giotto) +importFrom(GiottoClass,giottoImage) +importFrom(GiottoClass,giottoLargeImage) +importFrom(GiottoClass,giottoMasterToSuite) +importFrom(GiottoClass,giottoPoints) +importFrom(GiottoClass,giottoPolygon) +importFrom(GiottoClass,giottoToAnnData) +importFrom(GiottoClass,giottoToSeurat) +importFrom(GiottoClass,giottoToSeuratV4) +importFrom(GiottoClass,giottoToSeuratV5) +importFrom(GiottoClass,giottoToSpatialExperiment) +importFrom(GiottoClass,hexVertices) +importFrom(GiottoClass,hist) +importFrom(GiottoClass,installGiottoEnvironment) +importFrom(GiottoClass,instructions) +importFrom(GiottoClass,joinGiottoObjects) +importFrom(GiottoClass,loadGiotto) +importFrom(GiottoClass,makePseudoVisium) +importFrom(GiottoClass,objHistory) +importFrom(GiottoClass,objName) +importFrom(GiottoClass,orthoGrid) +importFrom(GiottoClass,overlapImagesToMatrix) +importFrom(GiottoClass,overlapToMatrix) +importFrom(GiottoClass,overlapToMatrixMultiPoly) +importFrom(GiottoClass,overlaps) +importFrom(GiottoClass,pDataDT) +importFrom(GiottoClass,plotGiottoImage) +importFrom(GiottoClass,polyStamp) +importFrom(GiottoClass,prov) +importFrom(GiottoClass,readCellMetadata) +importFrom(GiottoClass,readDimReducData) +importFrom(GiottoClass,readExprData) +importFrom(GiottoClass,readExprMatrix) +importFrom(GiottoClass,readFeatData) +importFrom(GiottoClass,readFeatMetadata) +importFrom(GiottoClass,readGiottoInstructions) +importFrom(GiottoClass,readNearestNetData) +importFrom(GiottoClass,readPolygonData) +importFrom(GiottoClass,readSpatEnrichData) +importFrom(GiottoClass,readSpatLocsData) +importFrom(GiottoClass,readSpatNetData) +importFrom(GiottoClass,reconnectGiottoImage) +importFrom(GiottoClass,rectVertices) +importFrom(GiottoClass,removeCellAnnotation) +importFrom(GiottoClass,removeFeatAnnotation) +importFrom(GiottoClass,removeGiottoEnvironment) +importFrom(GiottoClass,replaceGiottoInstructions) +importFrom(GiottoClass,rescale) +importFrom(GiottoClass,rescalePolygons) +importFrom(GiottoClass,saveGiotto) +importFrom(GiottoClass,setCellMetadata) +importFrom(GiottoClass,setDimReduction) +importFrom(GiottoClass,setExpression) +importFrom(GiottoClass,setFeatureInfo) +importFrom(GiottoClass,setFeatureMetadata) +importFrom(GiottoClass,setGiotto) +importFrom(GiottoClass,setGiottoImage) +importFrom(GiottoClass,setMultiomics) +importFrom(GiottoClass,setNearestNetwork) +importFrom(GiottoClass,setPolygonInfo) +importFrom(GiottoClass,setSpatialEnrichment) +importFrom(GiottoClass,setSpatialGrid) +importFrom(GiottoClass,setSpatialLocations) +importFrom(GiottoClass,setSpatialNetwork) +importFrom(GiottoClass,seuratToGiotto) +importFrom(GiottoClass,seuratToGiottoV4) +importFrom(GiottoClass,seuratToGiottoV5) +importFrom(GiottoClass,showGiottoCellMetadata) +importFrom(GiottoClass,showGiottoDimRed) +importFrom(GiottoClass,showGiottoExpression) +importFrom(GiottoClass,showGiottoFeatInfo) +importFrom(GiottoClass,showGiottoFeatMetadata) +importFrom(GiottoClass,showGiottoImageNames) +importFrom(GiottoClass,showGiottoInstructions) +importFrom(GiottoClass,showGiottoNearestNetworks) +importFrom(GiottoClass,showGiottoSpatEnrichments) +importFrom(GiottoClass,showGiottoSpatGrids) +importFrom(GiottoClass,showGiottoSpatLocs) +importFrom(GiottoClass,showGiottoSpatNetworks) +importFrom(GiottoClass,showGiottoSpatialInfo) +importFrom(GiottoClass,showProcessingSteps) +importFrom(GiottoClass,smoothGiottoPolygons) +importFrom(GiottoClass,spatIDs) +importFrom(GiottoClass,spatQueryGiottoPolygons) +importFrom(GiottoClass,spatShift) +importFrom(GiottoClass,spatUnit) +importFrom(GiottoClass,spatialExperimentToGiotto) +importFrom(GiottoClass,spin) +importFrom(GiottoClass,stitchFieldCoordinates) +importFrom(GiottoClass,stitchGiottoLargeImage) +importFrom(GiottoClass,subsetGiotto) +importFrom(GiottoClass,subsetGiottoLocs) +importFrom(GiottoClass,subsetGiottoLocsMulti) +importFrom(GiottoClass,subsetGiottoLocsSubcellular) +importFrom(GiottoClass,tessellate) +importFrom(GiottoClass,triGrid) +importFrom(GiottoClass,updateGiottoImage) +importFrom(GiottoClass,updateGiottoImageMG) +importFrom(GiottoClass,updateGiottoLargeImage) +importFrom(GiottoClass,updateGiottoObject) +importFrom(GiottoClass,updateGiottoPointsObject) +importFrom(GiottoClass,updateGiottoPolygonObject) +importFrom(GiottoClass,vect) +importFrom(GiottoClass,wrap) +importFrom(GiottoClass,writeGiottoLargeImage) +importFrom(GiottoUtils,"%>%") +importFrom(GiottoUtils,getDistinctColors) +importFrom(GiottoUtils,getRainbowColors) +importFrom(GiottoVisuals,"sankeyLabel<-") +importFrom(GiottoVisuals,"sankeyRelate<-") +importFrom(GiottoVisuals,addGiottoImageToSpatPlot) +importFrom(GiottoVisuals,dimCellPlot) +importFrom(GiottoVisuals,dimCellPlot2D) +importFrom(GiottoVisuals,dimFeatPlot2D) +importFrom(GiottoVisuals,dimFeatPlot3D) +importFrom(GiottoVisuals,dimGenePlot3D) +importFrom(GiottoVisuals,dimPlot) +importFrom(GiottoVisuals,dimPlot2D) +importFrom(GiottoVisuals,dimPlot3D) +importFrom(GiottoVisuals,getColors) +importFrom(GiottoVisuals,giottoSankeyPlan) +importFrom(GiottoVisuals,plotHeatmap) +importFrom(GiottoVisuals,plotMetaDataCellsHeatmap) +importFrom(GiottoVisuals,plotMetaDataHeatmap) +importFrom(GiottoVisuals,plotPCA) +importFrom(GiottoVisuals,plotPCA_2D) +importFrom(GiottoVisuals,plotPCA_3D) +importFrom(GiottoVisuals,plotStatDelaunayNetwork) +importFrom(GiottoVisuals,plotTSNE) +importFrom(GiottoVisuals,plotTSNE_2D) +importFrom(GiottoVisuals,plotTSNE_3D) +importFrom(GiottoVisuals,plotUMAP) +importFrom(GiottoVisuals,plotUMAP_2D) +importFrom(GiottoVisuals,plotUMAP_3D) +importFrom(GiottoVisuals,sankeyLabel) +importFrom(GiottoVisuals,sankeyPlot) +importFrom(GiottoVisuals,sankeyRelate) +importFrom(GiottoVisuals,sankeySet) +importFrom(GiottoVisuals,sankeySetAddresses) +importFrom(GiottoVisuals,showClusterDendrogram) +importFrom(GiottoVisuals,showClusterHeatmap) +importFrom(GiottoVisuals,showColorInstructions) +importFrom(GiottoVisuals,showSaveParameters) +importFrom(GiottoVisuals,spatCellPlot) +importFrom(GiottoVisuals,spatCellPlot2D) +importFrom(GiottoVisuals,spatDeconvPlot) +importFrom(GiottoVisuals,spatDimCellPlot) +importFrom(GiottoVisuals,spatDimCellPlot2D) +importFrom(GiottoVisuals,spatDimFeatPlot2D) +importFrom(GiottoVisuals,spatDimFeatPlot3D) +importFrom(GiottoVisuals,spatDimGenePlot3D) +importFrom(GiottoVisuals,spatDimPlot) +importFrom(GiottoVisuals,spatDimPlot2D) +importFrom(GiottoVisuals,spatDimPlot3D) +importFrom(GiottoVisuals,spatFeatPlot2D) +importFrom(GiottoVisuals,spatFeatPlot2D_single) +importFrom(GiottoVisuals,spatFeatPlot3D) +importFrom(GiottoVisuals,spatGenePlot3D) +importFrom(GiottoVisuals,spatInSituPlotDensity) +importFrom(GiottoVisuals,spatInSituPlotHex) +importFrom(GiottoVisuals,spatInSituPlotPoints) +importFrom(GiottoVisuals,spatNetwDistributions) +importFrom(GiottoVisuals,spatNetwDistributionsDistance) +importFrom(GiottoVisuals,spatNetwDistributionsKneighbors) +importFrom(GiottoVisuals,spatPlot) +importFrom(GiottoVisuals,spatPlot2D) +importFrom(GiottoVisuals,spatPlot3D) +importFrom(GiottoVisuals,subsetSankeySet) +importFrom(GiottoVisuals,violinPlot) importFrom(data.table,data.table) importFrom(data.table,frank) importFrom(data.table,fread) diff --git a/R/ONTraC_wrapper.R b/R/ONTraC_wrapper.R new file mode 100644 index 000000000..9f760b67f --- /dev/null +++ b/R/ONTraC_wrapper.R @@ -0,0 +1,518 @@ +#' @title getONTraCv1Input +#' @name getONTraCv1Input +#' @description generate the input data for ONTraC v1 +#' @inheritParams data_access_params +#' @inheritParams read_data_params +#' @param output_path the path to save the output file +#' @param cell_type the cell type column name in the metadata +#' @returns data.table with columns: Cell_ID, Sample, x, y, Cell_Type +#' @details This function generate the input data for ONTraC v1 +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' getONTraCv1Input( +#' gobject = g, +#' cell_type = "custom_leiden" +#' ) +#' @export +getONTraCv1Input <- function(gobject, # nolint: object_name_linter. + cell_type, + output_path = getwd(), + spat_unit = NULL, + feat_type = NULL, + verbose = TRUE) { + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + pos_df <- getSpatialLocations( + gobject = gobject, + spat_unit = spat_unit, + output = "data.table" + ) + meta_df <- pDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + output_df <- merge(x = pos_df, y = meta_df, by = "cell_ID") + + # check if the cell_type column exits + if (!cell_type %in% colnames(output_df)) { + vmsg(.v = verbose, paste( + "Given", + cell_type, + "do not exist in giotto object's metadata!" + )) + return(NULL) + } + + # add default sample name for one sample obj + if (!"list_ID" %in% colnames(output_df)) { + output_df$list_ID <- "ONTraC" + } + + output_df <- output_df[, .SD, .SDcols = c( + "cell_ID", + "list_ID", + "sdimx", + "sdimy", + cell_type + )] + colnames(output_df) <- c("Cell_ID", "Sample", "x", "y", "Cell_Type") + file_path <- file.path(output_path, "ONTraC_dataset_input.csv") + write.csv(output_df, file = file_path, quote = FALSE, row.names = FALSE) + vmsg(.v = verbose, paste("ONTraC input file was saved as", file_path)) + + return(output_df) +} + + +#' @title load_cell_bin_niche_cluster +#' @name load_cell_bin_niche_cluster +#' @description load cell-level binarized niche cluster +#' @inheritParams data_access_params +#' @inheritParams read_data_params +#' @param ontrac_results_dir the directory where the ONTraC results are saved +#' @returns gobject with cell-level binarized niche cluster +#' @details This function loads the ONTraC outputed cell-level binarized niche +#' cluster into the giotto object. +load_cell_bin_niche_cluster <- function(gobject, + ontrac_results_dir = getwd()) { + bin_niche_cluster_df <- read.csv(file = file.path( + ontrac_results_dir, + "GNN_dir", "cell_level_max_niche_cluster.csv.gz" + )) + colnames(bin_niche_cluster_df) <- c("cell_ID", "NicheCluster") + gobject <- GiottoClass::addCellMetadata(gobject, + new_metadata = bin_niche_cluster_df, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + return(gobject) +} + + +#' @title load_cell_NT_score +#' @name load_cell_NT_score +#' @description load cell-level NT score +#' @inheritParams data_access_params +#' @inheritParams read_data_params +#' @param ontrac_results_dir the directory where the ONTraC results are saved +#' @returns gobject with cell-level NT score +#' @details This function loads the ONTraC outputed cell-level NT score +load_cell_NT_score <- function(gobject, # nolint: object_name_linter. + ontrac_results_dir = getwd()) { + NT_score_df <- read.csv(file = file.path( # nolint: object_name_linter. + ontrac_results_dir, + "NTScore_dir", "NTScore.csv.gz" + ))[c("Cell_ID", "Cell_NTScore")] + colnames(NT_score_df) <- c("cell_ID", "NTScore") # nolint: object_name_linter. + gobject <- addCellMetadata(gobject, + new_metadata = NT_score_df, # nolint: object_name_linter. + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + + return(gobject) +} + + +#' @title load_cell_niche_cluster_prob +#' @name load_cell_niche_cluster_prob +#' @description load cell-niche cluster probability +#' @inheritParams data_access_params +#' @inheritParams read_data_params +#' @param ontrac_results_dir the directory where the ONTraC results are saved +#' @param name name for the probability matrix +#' @returns gobject with cell-niche cluster probability matrix +#' @details This function loads the ONTraC outputed cell-niche cluster +#' probability as an exprObj into the giotto object. +load_cell_niche_cluster_prob <- function(gobject, + ontrac_results_dir = getwd(), + spat_unit = "cell", + feat_type = "niche cluster", + name = "prob") { + niche_cluster_prob_df <- read.csv(file = file.path( + ontrac_results_dir, + "GNN_dir", "cell_level_niche_cluster.csv.gz" + )) + rownames(niche_cluster_prob_df) <- niche_cluster_prob_df$Cell_ID + niche_cluster_prob_df$Cell_ID <- NULL + expobj <- createExprObj(t(niche_cluster_prob_df), + spat_unit = spat_unit, + feat_type = feat_type + ) + gobject <- GiottoClass::setExpression( + gobject = gobject, + x = expobj, + spat_unit = spat_unit, + feat_type = feat_type, + name = name + ) + + return(gobject) +} + + +#' @title load_nc_connectivity +#' @name load_nc_connectivity +#' @description load niche cluster connectivity +#' @inheritParams data_access_params +#' @inheritParams read_data_params +#' @param ontrac_results_dir the directory where the ONTraC results are saved +#' @param name name for the connectivity matrix +#' @returns gobject with niche cluster connectivity matrix +#' @details This function loads the ONTraC outputed niche cluster connectivity +#' matrix as an exprObj into the giotto object. +load_nc_connectivity <- function(gobject, + ontrac_results_dir = getwd(), + spat_unit = "niche cluster", + feat_type = "connectivity", + name = "normalized") { + connectivity_df <- read.csv(file = file.path( + ontrac_results_dir, + "GNN_dir", "consolidate_out_adj.csv.gz" + ), header = FALSE) + rownames(connectivity_df) <- paste0( + "NicheCluster_", + seq_len(dim(connectivity_df)[1]) - 1 + ) + colnames(connectivity_df) <- paste0( + "NicheCluster_", + seq_len(dim(connectivity_df)[2]) - 1 + ) + expobj <- createExprObj(t(connectivity_df), + spat_unit = spat_unit, + feat_type = feat_type + ) + gobject <- GiottoClass::setExpression( + gobject = gobject, + x = expobj, + spat_unit = spat_unit, + feat_type = feat_type, + name = name + ) + + return(gobject) +} + + +#' @title loadOntraCResults +#' @name loadOntraCResults +#' @description load ONTraC results +#' @inheritParams data_access_params +#' @inheritParams read_data_params +#' @param ontrac_results_dir the directory where the ONTraC results are saved +#' @returns gobject with ONTraC results +#' @details This function loads the ONTraC results into the giotto object. +#' @export +loadOntraCResults <- function(gobject, # nolint: object_name_linter. + ontrac_results_dir = getwd()) { + gobject <- load_cell_bin_niche_cluster(gobject, ontrac_results_dir) + gobject <- load_cell_NT_score(gobject, ontrac_results_dir) + gobject <- load_cell_niche_cluster_prob(gobject, ontrac_results_dir) + gobject <- GiottoClass::addCellMetadata( + gobject = gobject, + spat_unit = "cell", + feat_type = "niche cluster", + new_metadata = pDataDT(gobject), + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + + gobject <- load_nc_connectivity(gobject, ontrac_results_dir) + + return(gobject) +} + +#' @title plotNicheClusterConnectivity +#' @name plotNicheClusterConnectivity +#' @description plot niche cluster connectivity +#' @inheritParams data_access_params +#' @inheritParams plot_output_params +#' @param spat_unit name of spatial unit niche stored cluster features +#' @param feat_type name of the feature type stored niche cluster connectivities +#' @param values name of the expression matrix stored connectivity values +#' @details This function plots the niche cluster connectivity matrix +#' @export +plotNicheClusterConnectivity <- function( # nolint: object_name_linter. + gobject, + spat_unit = "niche cluster", + feat_type = "connectivity", + values = "normalized", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "NicheClusterConnectivity") { + # load `guide_edge_colourbar` function in ggraph, + # otherwise it will raise an error when using `scale_edge_colour_gradientn` + library(ggraph) + + # get the niche cluster connectivity matrix + niche_cluster_connectivites <- getExpression( + gobject = gobject, + values = "normalized", + spat_unit = "niche cluster", + feat_type = "connectivity", + output = "matrix" + ) + + # transform the matrix to data.frame for constructing igraph object + niche_cluster_connectivites <- cbind( + expand.grid(dimnames(niche_cluster_connectivites)), + value = as.vector(as.matrix( + niche_cluster_connectivites + )) + ) + colnames(niche_cluster_connectivites) <- c("from", "to", "connectivites") + + # construct igraph object + igd <- igraph::graph_from_data_frame( + d = niche_cluster_connectivites[, c("from", "to", "connectivites")], + directed = FALSE + ) + igd <- igraph::simplify( + graph = igd, + remove.loops = TRUE, + remove.multiple = FALSE + ) + edges_sizes <- igraph::edge_attr(igd, "connectivites") + edges_colors <- edges_sizes + igd <- igraph::set.edge.attribute( + graph = igd, + index = igraph::E(igd), + name = "color", + value = edges_colors + ) + igd <- igraph::set.edge.attribute( + graph = igd, + index = igraph::E(igd), + name = "size", + value = edges_sizes + ) + + # plot + ## layout + coords <- igraph::layout_with_drl( + graph = igd, + weights = edges_sizes, + use.seed = TRUE + ) + gpl <- ggraph::ggraph(graph = igd, layout = coords) + + ## edges + gpl <- gpl + ggraph::geom_edge_link( + ggplot2::aes( + colour = edges_sizes, + edge_width = 5, + edge_alpha = size # nolint: object_usage_linter. + ), + show.legend = FALSE + ) + gpl <- gpl + ggraph::scale_edge_alpha(range = c(0.1, 1)) + gpl <- gpl + ggraph::scale_edge_colour_gradientn( + colours = RColorBrewer::brewer.pal(9, "Reds"), + name = "Value" + ) + + ## node + gpl <- gpl + ggraph::geom_node_point( + ggplot2::aes(colour = name), # nolint: object_usage_linter. + size = 10 + ) + gpl <- gpl + ggplot2::scale_fill_gradientn(colours = viridis::turbo(100)) + gpl <- gpl + ggraph::geom_node_text( + ggplot2::aes(label = name), # nolint: object_usage_linter. + repel = TRUE + ) + + ## theme + gpl <- gpl + ggplot2::theme_bw() + ggplot2::theme( + panel.grid = ggplot2::element_blank(), + panel.border = ggplot2::element_blank(), + axis.title = ggplot2::element_blank(), + axis.text = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank() + ) + gpl + + # return or save + return(GiottoVisuals::plot_output_handler( + gobject = gobject, + plot_object = gpl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = NULL + )) +} + +#' @title plotCTCompositionInNicheCluster +#' @name plotCTCompositionInNicheCluster +#' @description plot cell type composition within each niche cluster +#' @param cell_type the cell type column name in the metadata +#' @inheritParams data_access_params +#' @inheritParams plot_output_params +#' @param spat_unit name of spatial unit niche stored cluster features +#' @param feat_type name of the feature type stored niche cluster connectivities +#' @param values name of the expression matrix stored connectivity values +#' @details This function plots the niche cluster connectivity matrix +#' @export +plotCTCompositionInNicheCluster <- function( # nolint: object_name_linter. + gobject, + cell_type, + values = "prob", + spat_unit = "cell", + feat_type = "niche cluster", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "CellTypeCompositionInNicheCluster") { + # Get the cell type composition within each niche cluster + ## extract the cell-level niche cluster probability matrix + exp <- getExpression( + gobject = gobject, + values = values, + spat_unit = spat_unit, + feat_type = feat_type, + output = "exprObj" + ) + prob_df <- as.data.frame(t(as.matrix(exp@exprMat))) + prob_df$cell_ID <- rownames(prob_df) + ## combine the cell type and niche cluster probability matrix + combined_df <- merge( + pDataDT(gobject, feat_type = feat_type)[, c("cell_ID", cell_type)], + prob_df, + by = "cell_ID" + ) + + # Calculate the normalized cell type composition within each niche cluster + cell_type_counts_df <- combined_df %>% + tidyr::pivot_longer( + cols = dplyr::starts_with("NicheCluster_"), + names_to = "Cluster", + values_to = "Probability" + ) %>% + dplyr::group_by(Cell_Type, Cluster) %>% # nolint: object_usage_linter. + dplyr::summarise(Sum = sum(Probability, # nolint: object_usage_linter. + na.rm = TRUE + )) %>% + tidyr::spread(key = "Cluster", value = "Sum", fill = 0) + cell_type_counts_df <- as.data.frame(cell_type_counts_df) + rownames(cell_type_counts_df) <- cell_type_counts_df$Cell_Type + cell_type_counts_df$Cell_Type <- NULL + normalized_df <- as.data.frame(t( + t(cell_type_counts_df) / colSums(cell_type_counts_df) + )) + + + # Reshape the data frame into long format + normalized_df$Cell_Type <- rownames(normalized_df) + df_long <- normalized_df %>% + tidyr::pivot_longer( + cols = -Cell_Type, # nolint: object_usage_linter. + names_to = "Cluster", + values_to = "Composition" + ) + + # Create the heatmap using ggplot2 + pl <- ggplot(df_long, aes( + x = Cell_Type, # nolint: object_usage_linter. + y = Cluster, # nolint: object_usage_linter. + fill = Composition # nolint: object_usage_linter. + )) + + geom_tile() + + viridis::scale_fill_viridis(option = "inferno", limits = c(0, 1)) + + theme_minimal() + + labs( + title = "Normalized cell type compositions within each niche cluster", + x = "Cell_Type", + y = "Cluster" + ) + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + # return or save + return(GiottoVisuals::plot_output_handler( + gobject = gobject, + plot_object = pl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = NULL + )) +} + + +#' @title plotCellTypeNTScore +#' @name plotCellTypeNTScore +#' @description plot NTScore by cell type +#' @param cell_type the cell type column name in the metadata +#' @inheritParams data_access_params +#' @inheritParams plot_output_params +#' @export +plotCellTypeNTScore <- function(gobject, # nolint: object_name_linter. + cell_type, + values = "NTScore", + spat_unit = "cell", + feat_type = "rna", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "CellTypeNTScore") { + # Get the cell type composition within each niche cluster + data_df <- pDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + avg_scores <- data_df %>% + dplyr::group_by(Cell_Type) %>% # nolint: object_usage_linter. + dplyr::summarise(Avg_NTScore = mean(NTScore)) # nolint: object_usage_linter. + data_df$Cell_Type <- factor(data_df$Cell_Type, + levels = avg_scores$Cell_Type[order(avg_scores$Avg_NTScore)] + ) + + pl <- ggplot(data_df, aes( + x = NTScore, # nolint: object_usage_linter. + y = Cell_Type, # nolint: object_usage_linter. + fill = Cell_Type + )) + + geom_violin() + + theme_minimal() + + labs( + title = "Violin Plot of NTScore by Cell Type", + x = "NTScore", + y = "Cell Type" + ) + + ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + # return or save + return(GiottoVisuals::plot_output_handler( + gobject = gobject, + plot_object = pl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = NULL + )) +} diff --git a/man/dot-igraph_vertex_membership.Rd b/man/dot-igraph_vertex_membership.Rd index 937905ad9..f4362ce83 100644 --- a/man/dot-igraph_vertex_membership.Rd +++ b/man/dot-igraph_vertex_membership.Rd @@ -4,12 +4,16 @@ \alias{.igraph_vertex_membership} \title{igraph vertex membership} \usage{ -.igraph_vertex_membership(g, clus_name) +.igraph_vertex_membership(g, clus_name, all_ids = NULL, missing_id_name) } \arguments{ \item{g}{igraph} \item{clus_name}{character. name to assign column of clustering info} + +\item{all_ids}{(optional) character vector with all ids} + +\item{missing_id_name}{character and name for vertices that were missing from g} } \value{ data.table diff --git a/man/getONTraCv1Input.Rd b/man/getONTraCv1Input.Rd new file mode 100644 index 000000000..784a77678 --- /dev/null +++ b/man/getONTraCv1Input.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{getONTraCv1Input} +\alias{getONTraCv1Input} +\title{getONTraCv1Input} +\usage{ +getONTraCv1Input( + gobject, + cell_type, + output_path = getwd(), + spat_unit = NULL, + feat_type = NULL, + verbose = TRUE +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{cell_type}{the cell type column name in the metadata} + +\item{output_path}{the path to save the output file} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{verbose}{be verbose} +} +\value{ +data.table with columns: Cell_ID, Sample, x, y, Cell_Type +} +\description{ +generate the input data for ONTraC v1 +} +\details{ +This function generate the input data for ONTraC v1 +} +\examples{ +g <- GiottoData::loadGiottoMini("visium") + +getONTraCv1Input( + gobject = g, + cell_type = "custom_leiden" +) +} diff --git a/man/identifyTMAcores.Rd b/man/identifyTMAcores.Rd new file mode 100644 index 000000000..010f05d50 --- /dev/null +++ b/man/identifyTMAcores.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_clusters.R +\name{identifyTMAcores} +\alias{identifyTMAcores} +\title{Split cluster annotations based on a spatial network} +\usage{ +identifyTMAcores( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + core_id_name = "core_id", + include_all_ids = TRUE, + missing_id_name = "not_connected", + return_gobject = TRUE +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{spatial_network_name}{character. Name of spatial network to use} + +\item{core_id_name}{metadata column name for the core information} + +\item{include_all_ids}{Boolean. Include all ids, including vertex ids not found +in the spatial network} + +\item{missing_id_name}{Character. Name for vertices that were missing from +spatial network} + +\item{return_gobject}{Boolean. Return giotto object} +} +\value{ +cluster annotations +} +\description{ +Split cluster annotations based on a spatial network +} diff --git a/man/importVisiumHD.Rd b/man/importVisiumHD.Rd index 1b584aad3..08fb3801b 100644 --- a/man/importVisiumHD.Rd +++ b/man/importVisiumHD.Rd @@ -20,7 +20,7 @@ importVisiumHD( \item{expression_source}{character. Raw or filter expression data. Defaults to 'raw'} -\item{gene_column_index}{numeric. Expression column to use for gene names +\item{gene_column_index}{numeric. Expression column to use for gene names 1 = Ensembl and 2 = gene symbols} \item{barcodes}{character vector. (optional) Use if you only want to load @@ -74,7 +74,7 @@ expression_obj = readerHD$load_expression() Load transcript data (cell metadata, expression object, and transcripts per pixel) my_transcripts = readerHD$load_transcripts(array_subset_row = c(500, 1000), array_subset_col = c(500, 1000)) - + # Create a `giotto` object and add the loaded data TODO } diff --git a/man/loadOntraCResults.Rd b/man/loadOntraCResults.Rd new file mode 100644 index 000000000..2eda6902f --- /dev/null +++ b/man/loadOntraCResults.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{loadOntraCResults} +\alias{loadOntraCResults} +\title{loadOntraCResults} +\usage{ +loadOntraCResults(gobject, ontrac_results_dir = getwd()) +} +\arguments{ +\item{gobject}{giotto object} + +\item{ontrac_results_dir}{the directory where the ONTraC results are saved} +} +\value{ +gobject with ONTraC results +} +\description{ +load ONTraC results +} +\details{ +This function loads the ONTraC results into the giotto object. +} diff --git a/man/load_cell_NT_score.Rd b/man/load_cell_NT_score.Rd new file mode 100644 index 000000000..4a8b5d87b --- /dev/null +++ b/man/load_cell_NT_score.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{load_cell_NT_score} +\alias{load_cell_NT_score} +\title{load_cell_NT_score} +\usage{ +load_cell_NT_score(gobject, ontrac_results_dir = getwd()) +} +\arguments{ +\item{gobject}{giotto object} + +\item{ontrac_results_dir}{the directory where the ONTraC results are saved} +} +\value{ +gobject with cell-level NT score +} +\description{ +load cell-level NT score +} +\details{ +This function loads the ONTraC outputed cell-level NT score +} diff --git a/man/load_cell_bin_niche_cluster.Rd b/man/load_cell_bin_niche_cluster.Rd new file mode 100644 index 000000000..4aafb4cb5 --- /dev/null +++ b/man/load_cell_bin_niche_cluster.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{load_cell_bin_niche_cluster} +\alias{load_cell_bin_niche_cluster} +\title{load_cell_bin_niche_cluster} +\usage{ +load_cell_bin_niche_cluster(gobject, ontrac_results_dir = getwd()) +} +\arguments{ +\item{gobject}{giotto object} + +\item{ontrac_results_dir}{the directory where the ONTraC results are saved} +} +\value{ +gobject with cell-level binarized niche cluster +} +\description{ +load cell-level binarized niche cluster +} +\details{ +This function loads the ONTraC outputed cell-level binarized niche +cluster into the giotto object. +} diff --git a/man/load_cell_niche_cluster_prob.Rd b/man/load_cell_niche_cluster_prob.Rd new file mode 100644 index 000000000..2bcb59346 --- /dev/null +++ b/man/load_cell_niche_cluster_prob.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{load_cell_niche_cluster_prob} +\alias{load_cell_niche_cluster_prob} +\title{load_cell_niche_cluster_prob} +\usage{ +load_cell_niche_cluster_prob( + gobject, + ontrac_results_dir = getwd(), + spat_unit = "cell", + feat_type = "niche cluster", + name = "prob" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{ontrac_results_dir}{the directory where the ONTraC results are saved} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{name}{name for the probability matrix} +} +\value{ +gobject with cell-niche cluster probability matrix +} +\description{ +load cell-niche cluster probability +} +\details{ +This function loads the ONTraC outputed cell-niche cluster +probability as an exprObj into the giotto object. +} diff --git a/man/load_nc_connectivity.Rd b/man/load_nc_connectivity.Rd new file mode 100644 index 000000000..ae96d3677 --- /dev/null +++ b/man/load_nc_connectivity.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{load_nc_connectivity} +\alias{load_nc_connectivity} +\title{load_nc_connectivity} +\usage{ +load_nc_connectivity( + gobject, + ontrac_results_dir = getwd(), + spat_unit = "niche cluster", + feat_type = "connectivity", + name = "normalized" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{ontrac_results_dir}{the directory where the ONTraC results are saved} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{name}{name for the connectivity matrix} +} +\value{ +gobject with niche cluster connectivity matrix +} +\description{ +load niche cluster connectivity +} +\details{ +This function loads the ONTraC outputed niche cluster connectivity +matrix as an exprObj into the giotto object. +} diff --git a/man/plotCTCompositionInNicheCluster.Rd b/man/plotCTCompositionInNicheCluster.Rd new file mode 100644 index 000000000..24c9a7109 --- /dev/null +++ b/man/plotCTCompositionInNicheCluster.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{plotCTCompositionInNicheCluster} +\alias{plotCTCompositionInNicheCluster} +\title{plotCTCompositionInNicheCluster} +\usage{ +plotCTCompositionInNicheCluster( + gobject, + cell_type, + values = "prob", + spat_unit = "cell", + feat_type = "niche cluster", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "CellTypeCompositionInNicheCluster" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{cell_type}{the cell type column name in the metadata} + +\item{values}{name of the expression matrix stored connectivity values} + +\item{spat_unit}{name of spatial unit niche stored cluster features} + +\item{feat_type}{name of the feature type stored niche cluster connectivities} + +\item{show_plot}{logical. show plot} + +\item{return_plot}{logical. return ggplot object} + +\item{save_plot}{logical. save the plot} + +\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} + +\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +} +\description{ +plot cell type composition within each niche cluster +} +\details{ +This function plots the niche cluster connectivity matrix +} diff --git a/man/plotCellTypeNTScore.Rd b/man/plotCellTypeNTScore.Rd new file mode 100644 index 000000000..c24a73bd9 --- /dev/null +++ b/man/plotCellTypeNTScore.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{plotCellTypeNTScore} +\alias{plotCellTypeNTScore} +\title{plotCellTypeNTScore} +\usage{ +plotCellTypeNTScore( + gobject, + cell_type, + values = "NTScore", + spat_unit = "cell", + feat_type = "rna", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "CellTypeNTScore" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{cell_type}{the cell type column name in the metadata} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{show_plot}{logical. show plot} + +\item{return_plot}{logical. return ggplot object} + +\item{save_plot}{logical. save the plot} + +\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} + +\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +} +\description{ +plot NTScore by cell type +} diff --git a/man/plotNicheClusterConnectivity.Rd b/man/plotNicheClusterConnectivity.Rd new file mode 100644 index 000000000..b6f236496 --- /dev/null +++ b/man/plotNicheClusterConnectivity.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{plotNicheClusterConnectivity} +\alias{plotNicheClusterConnectivity} +\title{plotNicheClusterConnectivity} +\usage{ +plotNicheClusterConnectivity( + gobject, + spat_unit = "niche cluster", + feat_type = "connectivity", + values = "normalized", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "NicheClusterConnectivity" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{name of spatial unit niche stored cluster features} + +\item{feat_type}{name of the feature type stored niche cluster connectivities} + +\item{values}{name of the expression matrix stored connectivity values} + +\item{show_plot}{logical. show plot} + +\item{return_plot}{logical. return ggplot object} + +\item{save_plot}{logical. save the plot} + +\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} + +\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +} +\description{ +plot niche cluster connectivity +} +\details{ +This function plots the niche cluster connectivity matrix +} diff --git a/man/reexports.Rd b/man/reexports.Rd index f5533929f..c4a2e34b5 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -298,7 +298,7 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellFeatureInfo}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{density}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{hist}}, \code{\link[GiottoClass]{installGiottoEnvironment}}, \code{\link[GiottoClass:instructions-generic]{instructions}}, \code{\link[GiottoClass:instructions-generic]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} + \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellFeatureInfo}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{density}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{hist}}, \code{\link[GiottoClass]{installGiottoEnvironment}}, \code{\link[GiottoClass:instructions-generic]{instructions}}, \code{\link[GiottoClass:instructions-generic]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} \item{GiottoUtils}{\code{\link[GiottoUtils:pipe]{\%>\%}}, \code{\link[GiottoUtils]{getDistinctColors}}, \code{\link[GiottoUtils]{getRainbowColors}}} diff --git a/man/spatialSplitCluster.Rd b/man/spatialSplitCluster.Rd index b0c03729b..f6d503a68 100644 --- a/man/spatialSplitCluster.Rd +++ b/man/spatialSplitCluster.Rd @@ -10,7 +10,10 @@ spatialSplitCluster( feat_type = NULL, spatial_network_name = "Delaunay_network", cluster_col, - split_clus_name = paste0(cluster_col, "_split") + split_clus_name = paste0(cluster_col, "_split"), + include_all_ids = TRUE, + missing_id_name = "not_connected", + return_gobject = TRUE ) } \arguments{ @@ -25,11 +28,18 @@ spatialSplitCluster( \item{cluster_col}{character. Column in metadata containing original clustering} -\item{split_clus_name}{character. Name to assign the split cluster results -information to split} +\item{split_clus_name}{character. Name to assign the split cluster results} + +\item{include_all_ids}{Boolean. Include all ids, including vertex ids not found +in the spatial network} + +\item{missing_id_name}{Character. Name for vertices that were missing from +spatial network} + +\item{return_gobject}{Boolean. Return giotto object} } \value{ -cluster annotations +giotto object with cluster annotations } \description{ Split cluster annotations based on a spatial network From 186fb2d1440880ec2533472368ce89714c0e8288 Mon Sep 17 00:00:00 2001 From: pacificma Date: Mon, 29 Jul 2024 22:30:15 -0400 Subject: [PATCH 110/150] Update python_hmrf.R fix addHMRF_V2; add feat/unit to clustering functions(doHMRF_V2) --- R/python_hmrf.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/python_hmrf.R b/R/python_hmrf.R index 76bb4a503..157aa73fe 100644 --- a/R/python_hmrf.R +++ b/R/python_hmrf.R @@ -1778,7 +1778,7 @@ initHMRF_V2 <- gobject@dimension_reduction$cells$spatial$spatial_feat$coordinates <- y gobject <- createNearestNetwork( - gobject = gobject, + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, dim_reduction_to_use = "spatial", dim_reduction_name = "spatial_feat", dimensions_to_use = seq_len(ncol(y)), @@ -1788,7 +1788,7 @@ initHMRF_V2 <- if (cl.method == "leiden") { message("Leiden clustering initialization...") leiden.cl <- doLeidenCluster( - gobject = gobject, + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, nn_network_to_use = "sNN", network_name = "sNN.initHMRF", set_seed = hmrf_seed, @@ -1803,7 +1803,7 @@ initHMRF_V2 <- } else if (cl.method == "louvain") { message("Louvain clustering initialization...") louvain.cl <- doLouvainCluster( - gobject = gobject, + gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, nn_network_to_use = "sNN", network_name = "sNN.initHMRF", set_seed = hmrf_seed, @@ -2033,19 +2033,19 @@ addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - column_cell_ID = "cell_ID", - # new_metadata = HMRFoutput[[i]]$class[match( - # ordered_cell_IDs, names(HMRFoutput[[i]]$class))], - new_metadata = HMRFoutput[[i]]$prob[ordered_cell_IDs, ], + new_metadata = HMRFoutput[[i]]$class[match( + ordered_cell_IDs, + rownames(HMRFoutput[[i]]$prob))], vector_name = paste(name, names(HMRFoutput)[i]) + # ,column_cell_ID = 'cell_ID', # by_column = TRUE ) } return(gobject) } - - + + #' @title viewHMRFresults_V2 #' @name viewHMRFresults_V2 #' @description function to view HMRF results with multiple betas From 9d83e32a69effff4d89995a41a74ffabcdc62c47 Mon Sep 17 00:00:00 2001 From: pacificma Date: Mon, 29 Jul 2024 23:03:46 -0400 Subject: [PATCH 111/150] Update python_hmrf.R add line break to messages --- R/python_hmrf.R | 134 ++++++++++++++++++++++++------------------------ 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/R/python_hmrf.R b/R/python_hmrf.R index 157aa73fe..6b0b597d0 100644 --- a/R/python_hmrf.R +++ b/R/python_hmrf.R @@ -135,8 +135,8 @@ doHMRF <- function(gobject, } if (!"matrix" %in% class(expr_values)) { - warning("this matrix will be converted to a dense and memory intensive - base matrix ...") + warning("\n this matrix will be converted to a dense and memory intensive + base matrix ...\n") expr_values <- as.matrix(expr_values) } @@ -145,7 +145,7 @@ doHMRF <- function(gobject, # overwrite if exists if (file.exists(expression_file) & overwrite_output == TRUE) { - message("expression_matrix.txt already exists at this location, will be + message("\n expression_matrix.txt already exists at this location, will be overwritten") data.table::fwrite( data.table::as.data.table(expr_values, keep.rownames = "gene"), @@ -153,7 +153,7 @@ doHMRF <- function(gobject, row.names = FALSE, sep = " " ) } else if (file.exists(expression_file) & overwrite_output == FALSE) { - message("expression_matrix.txt already exists at this location, will be + message("\n expression_matrix.txt already exists at this location, will be used again") } else { data.table::fwrite( @@ -177,7 +177,7 @@ doHMRF <- function(gobject, ] } else { if (is.null(spatial_genes)) { - stop("you need to provide a vector of spatial genes (~500)") + stop("\n you need to provide a vector of spatial genes (~500)") } spatial_genes_detected <- spatial_genes[ spatial_genes %in% rownames(expr_values) @@ -187,14 +187,14 @@ doHMRF <- function(gobject, # overwrite if exists if (file.exists(spatial_genes_file) & overwrite_output == TRUE) { - message("spatial_genes.txt already exists at this location, will be + message("\n spatial_genes.txt already exists at this location, will be overwritten") write.table(spatial_genes_detected, file = spatial_genes_file, quote = FALSE, col.names = FALSE, row.names = FALSE ) } else if (file.exists(spatial_genes_file) & overwrite_output == FALSE) { - message("spatial_genes.txt already exists at this location, will be + message("\n spatial_genes.txt already exists at this location, will be used again") } else { write.table(spatial_genes_detected, @@ -217,14 +217,14 @@ doHMRF <- function(gobject, spatial_network_file <- paste0(output_folder, "/", "spatial_network.txt") if (file.exists(spatial_network_file) & overwrite_output == TRUE) { - message("spatial_network.txt already exists at this location, will be + message("\n spatial_network.txt already exists at this location, will be overwritten") write.table(spatial_network, file = spatial_network_file, row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "\t" ) } else if (file.exists(spatial_network_file) & overwrite_output == FALSE) { - message("spatial_network.txt already exists at this location, will be + message("\n spatial_network.txt already exists at this location, will be used again") } else { write.table(spatial_network, @@ -258,14 +258,14 @@ doHMRF <- function(gobject, ) if (file.exists(spatial_location_file) & overwrite_output == TRUE) { - message("spatial_cell_locations.txt already exists at this location, + message("\n spatial_cell_locations.txt already exists at this location, will be overwritten") write.table(spatial_location, file = spatial_location_file, row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "\t" ) } else if (file.exists(spatial_location_file)) { - message("spatial_cell_locations.txt already exists at this location, + message("\n spatial_cell_locations.txt already exists at this location, will be used again") } else { write.table(spatial_location, @@ -370,7 +370,7 @@ loadHMRF <- function( python_path_used) { output_data <- paste0(output_folder_used, "/", "result.spatial.zscore") if (!file.exists(output_data)) { - stop("doHMRF was not run in this output directory") + stop("\n doHMRF was not run in this output directory") } # check if it indeed exists @@ -411,7 +411,7 @@ viewHMRFresults <- function( third_dim = FALSE, ...) { if (!"HMRFoutput" %in% class(HMRFoutput)) { - stop("HMRFoutput needs to be output from doHMRFextend") + stop("\n HMRFoutput needs to be output from doHMRFextend") } ## reader.py and get_result.py paths @@ -497,7 +497,7 @@ writeHMRFresults <- function( betas_to_view = NULL, print_command = FALSE) { if (!"HMRFoutput" %in% class(HMRFoutput)) { - stop("HMRFoutput needs to be output from doHMRFextend") + stop("\n HMRFoutput needs to be output from doHMRFextend") } ## reader.py and get_result.py paths @@ -514,7 +514,7 @@ writeHMRFresults <- function( # k-values if (is.null(k)) { - stop("you need to select a k that was used with doHMRFextend") + stop("\n you need to select a k that was used with doHMRFextend") } k <- HMRFoutput$k @@ -609,7 +609,7 @@ addHMRF <- function( betas_to_add = NULL, hmrf_name = NULL) { if (!"HMRFoutput" %in% class(HMRFoutput)) { - stop("HMRFoutput needs to be output from doHMRFextend") + stop("\n HMRFoutput needs to be output from doHMRFextend") } # Set feat_type and spat_unit @@ -640,7 +640,7 @@ addHMRF <- function( # k-values if (is.null(k)) { - stop("you need to select a k that was used with doHMRFextend") + stop("\n you need to select a k that was used with doHMRFextend") } k <- HMRFoutput$k @@ -743,7 +743,7 @@ viewHMRFresults2D <- function( ) if (!"HMRFoutput" %in% class(HMRFoutput)) { - stop("HMRFoutput needs to be output from doHMRFextend") + stop("\n HMRFoutput needs to be output from doHMRFextend") } ## reader.py and get_result.py paths @@ -760,7 +760,7 @@ viewHMRFresults2D <- function( # k-values if (is.null(k)) { - stop("you need to select a k that was used with doHMRFextend") + stop("\n please select a valid k used with doHMRFextend") } k <- HMRFoutput$k @@ -853,7 +853,7 @@ viewHMRFresults3D <- function( betas_to_view = NULL, ...) { if (!"HMRFoutput" %in% class(HMRFoutput)) { - stop("HMRFoutput needs to be output from doHMRFextend") + stop("\n HMRFoutput needs to be output from doHMRFextend") } spat_unit <- set_default_spat_unit( @@ -877,7 +877,7 @@ viewHMRFresults3D <- function( # k-values if (is.null(k)) { - stop("you need to select a k that was used with doHMRFextend") + stop("\n please select a valid k used with doHMRFextend") } k <- HMRFoutput$k @@ -1118,9 +1118,9 @@ filterSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = NULL, spat xPt <- length(y0s) - xPt y_cutoff <- y0[xPt] # The y-value at this x point. This is our y_cutoff. gx_sorted <- head(gx_sorted, n = xPt) - message("Elbow method chosen to determine number of spatial genes.") + message("\n Elbow method chosen to determine number of spatial genes.") cat(paste0( - "Elbow point determined to be at x=", xPt, " genes", + "\n Elbow point determined to be at x=", xPt, " genes", " y=", y_cutoff )) } @@ -1164,7 +1164,7 @@ chooseAvailableSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = N } else if (eval3 == TRUE) { return("silhouetteRank") } else { - stop(paste0("No available spatial genes. Please run binSpect or + stop(paste0("\n No available spatial genes. Please run binSpect or silhouetteRank\n"), call. = FALSE) } } @@ -1223,14 +1223,14 @@ checkAndFixSpatialGenes <- function( } else if (use_spatial_genes == "binSpect") { eval1 <- "binSpect.pval" %in% names(gx) if (eval1 == FALSE) { - stop(paste0("use_spatial_genes is set to binSpect, but it has + stop(paste0("\n use_spatial_genes is set to binSpect, but it has not been run yet. Run binSpect first."), call. = FALSE ) } return(use_spatial_genes) } else { - stop(paste0("use_spatial_genes is set to one that is not supported."), + stop(paste0("\n use_spatial_genes is set to one that is not supported."), call. = FALSE ) } @@ -1358,7 +1358,7 @@ initHMRF_V2 <- factor_step = 1.05, python_path = NULL) { wrap_msg( - "If used in published research, please cite: + "\n If used in published research, please cite: Q Zhu, S Shah, R Dries, L Cai, GC Yuan. 'Identification of spatially associated subpopulations by combining scRNAseq and sequential fluorescence in situ hybridization data' @@ -1406,24 +1406,24 @@ initHMRF_V2 <- if (use_neighborhood_composition) { if (is.null(spatial_network_name_for_neighborhood)) { - stop("spatial network is required to define neighborhood, + stop("\n spatial network is required to define neighborhood, set with \'spatial_network_name_for_neighborhood\' \n", call. = FALSE ) } else if (is.null(metadata_to_use)) { - stop("please specify the cluster in meta data, set with + stop("\n please specify the cluster in meta data, set with \'metadata_to_use\' \n", call. = FALSE ) } else if (is.null(cx[[metadata_to_use]])) { - stop("please provide a valid index in meta data, set with + stop("\n please provide a valid index in meta data, set with \'metadata_to_use\'", call. = FALSE ) } cat(paste0( - "use spatial network composition of \'", + "\n use spatial network composition of \'", metadata_to_use, "\' for domain clustering" )) @@ -1471,12 +1471,12 @@ initHMRF_V2 <- rownames(y0) <- cell_ID_enrich cat(paste0( - "Spatial enrichment result: \'", + "\n Spatial enrichment result: \'", existing_spatial_enrichm_to_use, "\' is used." )) if (sum(!rownames(y0) %in% cx$cell_ID) > 0) { - stop("Rownames of selected spatial enrichment result do not + stop("\n Rownames of selected spatial enrichment result do not match to (a subset of) Cell IDs, please fix them.", call. = FALSE ) @@ -1526,7 +1526,7 @@ initHMRF_V2 <- if (!"binSpect.pval" %in% names(gx) && !"silhouetteRank.score" %in% names(gx) && !"silhouetteRankTest.pval" %in% names(gx)) { - stop(paste0("Giotto spatial gene detection has not been run. + stop(paste0("\n Giotto spatial gene detection has not been run. Please run spatial gene detection first: binSpect, silhouetteRank."), call. = FALSE @@ -1534,8 +1534,8 @@ initHMRF_V2 <- } if (!is.null(user_gene_list)) { - message("User supplied gene list detected.") - message("Checking user gene list is spatial...") + message("\n User supplied gene list detected.") + message("\n Checking user gene list is spatial...") use_spatial_genes <- chooseAvailableSpatialGenes(gobject) filtered <- filterSpatialGenes( @@ -1549,26 +1549,26 @@ initHMRF_V2 <- ) if (filtered$num_genes_removed > 0) { cat(paste0( - "Removed ", filtered$num_genes_removed, + "\n Removed ", filtered$num_genes_removed, " from user's input gene list due to being absent or non-spatial genes." )) cat(paste0( - "Kept ", length(filtered$genes), + "\n Kept ", length(filtered$genes), " spatial genes for next step" )) } spatial_genes <- filtered$genes if (length(spatial_genes) == 0) { - stop("No genes are remaining to do HMRF. Please give a + stop("\n No genes are remaining to do HMRF. Please give a larger gene list.", call. = FALSE ) } } else { cat(paste0( - "Choosing spatial genes from the results of ", + "\n Choosing spatial genes from the results of ", use_spatial_genes )) use_spatial_genes <- checkAndFixSpatialGenes( @@ -1589,7 +1589,7 @@ initHMRF_V2 <- method = filter_method ) cat(paste0( - "Kept ", length(filtered$genes), + "\n Kept ", length(filtered$genes), " top spatial genes for next step" )) spatial_genes <- filtered$genes @@ -1603,7 +1603,7 @@ initHMRF_V2 <- ] y0 <- (pc.expr[, use_pca_dim]) } else { - message("Computing spatial coexpression modules...") + message("\n Computing spatial coexpression modules...") spat_cor_netw_DT <- detectSpatialCorFeats( gobject = gobject, feat_type = feat_type, @@ -1623,7 +1623,7 @@ initHMRF_V2 <- name = "spat_netw_clus", k = 20 ) - message("Sampling spatial genes from coexpression + message("\n Sampling spatial genes from coexpression modules...") sample_genes <- sampling_sp_genes( spat_cor_netw_DT$cor_clusters$spat_netw_clus, @@ -1633,15 +1633,15 @@ initHMRF_V2 <- ) spatial_genes_selected <- sample_genes$union_genes cat(paste0( - "Sampled ", length(spatial_genes_selected), + "\n Sampled ", length(spatial_genes_selected), " genes." )) } else { spatial_genes_selected <- spatial_genes } cat(paste0( - "Will use ", length(spatial_genes_selected), - "spatial genes for initialization of HMRF." + "\n Will use ", length(spatial_genes_selected), + " spatial genes for initialization of HMRF." )) expr_values <- expr_values[spatial_genes_selected, ] } else { @@ -1650,7 +1650,7 @@ initHMRF_V2 <- ) if (k.sp < cluster_metagene) { cat(paste0( - "construct ", k.sp, + "\n construct ", k.sp, " coexpression modules due to limited gene size..." )) } @@ -1663,7 +1663,7 @@ initHMRF_V2 <- show_top_feats = 1 ) - cat(paste0("Collecting top spatial genes and calculating + cat(paste0("\n Collecting top spatial genes and calculating metagenes from ", k.sp, " coexpression modules...")) top_per_module <- cluster_genes_DT[ @@ -1742,7 +1742,7 @@ initHMRF_V2 <- edge_ind <- edge_ind + 1 } } - message("Parsing neighborhood graph...") + message("\n Parsing neighborhood graph...") pp <- tidygraph::tbl_graph( edges = as.data.frame(edgelist), directed = FALSE ) @@ -1758,11 +1758,11 @@ initHMRF_V2 <- cl.method <- tolower(cl.method) if (!cl.method %in% c("km", "leiden", "louvain")) { cl.method <- "km" - message("clustering method not specified, use kmeans as default...") + message("\n clustering method not specified, use kmeans as default...") } if (cl.method == "km") { - message("Kmeans initialization...") + message("\n Kmeans initialization...") kk <- smfishHmrf::smfishHmrf.generate.centroid( y = y, par_k = k, par_seed = hmrf_seed, nstart = nstart @@ -1786,7 +1786,7 @@ initHMRF_V2 <- ) if (cl.method == "leiden") { - message("Leiden clustering initialization...") + message("\n Leiden clustering initialization...") leiden.cl <- doLeidenCluster( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, nn_network_to_use = "sNN", @@ -1801,7 +1801,7 @@ initHMRF_V2 <- ] mu <- aggregate(y, by = list(cl.match), FUN = mean) } else if (cl.method == "louvain") { - message("Louvain clustering initialization...") + message("\n Louvain clustering initialization...") louvain.cl <- doLouvainCluster( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, nn_network_to_use = "sNN", @@ -1836,7 +1836,7 @@ initHMRF_V2 <- ) damp[i] <- ifelse(is.null(di), 0, di) } - message("Done") + message("\n Done") list( y = y, nei = nei, numnei = numnei, blocks = blocks, damp = damp, mu = mu, sigma = sigma, k = k, genes = colnames(y), @@ -1885,39 +1885,39 @@ initHMRF_V2 <- #' @export doHMRF_V2 <- function(HMRF_init_obj, betas = NULL) { message( - "If used in published research, please cite: + "\n If used in published research, please cite: Q Zhu, S Shah, R Dries, L Cai, GC Yuan. 'Identification of spatially associated subpopulations by combining scRNAseq and sequential fluorescence in situ hybridization data' Nature biotechnology 36 (12), 1183-1190. 2018" ) - message("Please find more explanation and instruction of the HMRF function + message("\n Please find more explanation and instruction of the HMRF function on \n https://bitbucket.org/qzhudfci/smfishhmrf-r/src/master/TRANSITION.md") if (!"y" %in% names(HMRF_init_obj)) { - stop("expression matrix 'y' not in the intialization object") + stop("\n expression matrix 'y' not in the intialization object") } if (!"nei" %in% names(HMRF_init_obj)) { - stop("neighbor matrix 'nei' not in the intialization object") + stop("\n neighbor matrix 'nei' not in the intialization object") } if (!"numnei" %in% names(HMRF_init_obj)) { - stop("number of neighbors 'numnei' not in the intialization object") + stop("\n number of neighbors 'numnei' not in the intialization object") } if (!"blocks" %in% names(HMRF_init_obj)) { - stop("iteration groups 'blocks' not in the intialization object") + stop("\n iteration groups 'blocks' not in the intialization object") } if (!"damp" %in% names(HMRF_init_obj)) { - stop("dampen factors 'damp' not in the intialization object") + stop("\n dampen factors 'damp' not in the intialization object") } if (!"mu" %in% names(HMRF_init_obj)) { - stop("initial mean vector 'mu' not in the intialization object") + stop("\n initial mean vector 'mu' not in the intialization object") } if (!"sigma" %in% names(HMRF_init_obj)) { - stop("initial covariance matrix 'sigma' not in the intialization + stop("\n initial covariance matrix 'sigma' not in the intialization object") } if (!"k" %in% names(HMRF_init_obj)) { - stop("cluster number 'k' not in the intialization object") + stop("\n cluster number 'k' not in the intialization object") } if (!"spat_unit" %in% names(HMRF_init_obj)) { HMRF_init_obj[["spat_unit"]] <- NULL @@ -1939,10 +1939,10 @@ doHMRF_V2 <- function(HMRF_init_obj, betas = NULL) { if (is.null(betas)) { beta_seq <- max(ceiling(ncol(y) / 10), 2) - cat(paste0("Default value beta = ", beta_seq, " is used...")) + cat(paste0("\n Default value beta = ", beta_seq, " is used...")) } else if (length(betas) != 3 || (sum(betas[seq_len(3)] < 0) > 0)) { stop(wrap_txt( - "please provide betas as a vector of 3 non-negative numbers + "\n please provide betas as a vector of 3 non-negative numbers (initial value, increment, total iteration number)", errWidth = TRUE )) @@ -2008,7 +2008,7 @@ doHMRF_V2 <- function(HMRF_init_obj, betas = NULL) { #' @export addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { if (!"HMRFoutput" %in% class(HMRFoutput)) { - stop("HMRFoutput needs to be output from doHMRF_V2()") + stop("\n HMRFoutput needs to be output from doHMRF_V2()") } if (!"spat_unit" %in% names(HMRFoutput)) { HMRFoutput[["spat_unit"]] <- NULL From d7e83b43407519c0163f890a5d3a3e6e81594f23 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 06:50:34 -0400 Subject: [PATCH 112/150] wip --- R/convenience_xenium.R | 1436 ++++++++++++++++++++++------------------ 1 file changed, 797 insertions(+), 639 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index e68a897b7..cc61d87e4 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -9,6 +9,7 @@ setClass( xenium_dir = "character", filetype = "list", qv = "ANY", + micron = "numeric", calls = "list" ), prototype = list( @@ -62,31 +63,35 @@ setMethod( .Object, xenium_dir, filetype, - qv_cutoff + qv_cutoff, + micron ) { - .Object <- callNextMethod(.Object) + obj <- callNextMethod(.Object) # provided params (if any) if (!missing(xenium_dir)) { checkmate::assert_directory_exists(xenium_dir) - .Object@xenium_dir <- xenium_dir + obj@xenium_dir <- xenium_dir } if (!missing(filetype)) { - .Object@filetype <- filetype + obj@filetype <- filetype } if (!missing(qv_cutoff)) { - .Object@qv <- qv_cutoff + obj@qv <- qv_cutoff + } + if (!missing(micron)) { + obj@micron <- micron } # check filetype ftype_data <- c("transcripts", "boundaries", "expression", "cell_meta") - if (!all(ftype_data %in% names(.Object@filetype))) { + if (!all(ftype_data %in% names(obj@filetype))) { stop(wrap_txt("`$filetype` must have entries for each of:\n", paste(ftype_data, collapse = ", "))) } - ftype <- .Object@filetype + ftype <- obj@filetype ft_tab <- c("csv", "parquet") ft_exp <- c("h5", "mtx", "zarr") if (!ftype$transcripts %in% ft_tab) { @@ -112,7 +117,7 @@ setMethod( # detect paths and subdirs - p <- .Object@xenium_dir + p <- obj@xenium_dir .xenium_detect <- function(pattern, ...) { .detect_in_dir( pattern = pattern, ..., @@ -152,6 +157,17 @@ setMethod( expr_path <- .xenium_ftype(expr_path, ftype$expression) cell_meta_path <- .xenium_ftype(cell_meta_path, ftype$cell_meta) + # decide micron scaling + if (length(obj@micron) == 0) { # if no value already set + if (!is.null(experiment_info_path)) { + obj@micron <- jsonlite::fromJSON( + manifest$experiment.xenium)$pixel_size + } else { + warning(wrap_txt("No .xenium file found. + Guessing 0.2125 as micron scaling")) + obj@micron <- 0.2125 # default + } + } # transcripts load call tx_fun <- function( @@ -168,7 +184,7 @@ setMethod( "NegControlCodeword" ), dropcols = c(), - qv_threshold = .Object@qv, + qv_threshold = obj@qv, cores = determine_cores(), verbose = NULL ) { @@ -182,15 +198,15 @@ setMethod( verbose = verbose ) } - .Object@calls$load_transcripts <- tx_fun + obj@calls$load_transcripts <- tx_fun # load polys call poly_fun <- function( - path = cell_bound_path, - name = "cell", - calc_centroids = TRUE, - cores = determine_cores(), - verbose = NULL + path = cell_bound_path, + name = "cell", + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL ) { .xenium_poly( path = path, @@ -200,14 +216,14 @@ setMethod( verbose = verbose ) } - .Object@calls$load_polys <- poly_fun + obj@calls$load_polys <- poly_fun # load cellmeta cmeta_fun <- function( - path = cell_meta_path, - dropcols = c(), - cores = determine_cores(), - verbose = NULL + path = cell_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL ) { .xenium_cellmeta( path = path, @@ -216,14 +232,14 @@ setMethod( verbose = verbose ) } - .Object@calls$load_cellmeta <- cmeta_fun + obj@calls$load_cellmeta <- cmeta_fun # load featmeta fmeta_fun <- function( - path = panel_meta_path, - dropcols = c(), - cores = determine_cores(), - verbose = NULL + path = panel_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL ) { .xenium_featmeta( path = path, @@ -232,15 +248,15 @@ setMethod( verbose = verbose ) } - .Object@calls$load_featmeta <- fmeta_fun + obj@calls$load_featmeta <- fmeta_fun # load expression call expr_fun <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = NULL + path = expr_path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL ) { .xenium_expression( path = path, @@ -250,11 +266,43 @@ setMethod( verbose = verbose ) } - .Object@calls$load_expression <- expr_fun + obj@calls$load_expression <- expr_fun # load image call + img_fun <- function( + path, + name = "image", + micron = obj@micron, + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL + ) { + .xenium_image( + path = path, + name = name, + micron = micron, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + verbose = verbose + ) + } + obj@calls$load_image <- img_fun - + # load aligned image call + img_aff_fun <- function( + path = path, + micron = obj@micron, + imagealignment_path + ) { + read10xAffineImage( + file = path, + imagealignment_path = imagealignment_path, + micron = micron + ) + } + obj@calls$load_aligned_image <- img_aff_fun # create giotto object call @@ -301,7 +349,7 @@ setMethod( - funs <- .Object@calls + funs <- obj@calls # init gobject g <- giotto() @@ -367,10 +415,10 @@ setMethod( } - .Object@calls$create_gobject <- gobject_fun + obj@calls$create_gobject <- gobject_fun - return(.Object) + return(obj) } ) @@ -414,7 +462,7 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { # MODULAR #### - +## transcript #### .xenium_transcript <- function( path, @@ -452,6 +500,7 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { verbose = verbose ) vmsg("Loading transcript level info...", .v = verbose) + # pass to specific reader fun based on filetype tx <- switch(e, "csv" = do.call(.xenium_transcript_csv, args = c(a, list(cores = cores))), @@ -558,6 +607,9 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { return(tx_dt) } + +## polygon #### + .xenium_poly <- function( path, name = "cell", @@ -572,6 +624,7 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { a <- list(path = path) vmsg("Loading boundary info...", .v = verbose) + # pass to specific load function based on file extension polys <- switch(e, "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), "parquet" = do.call(.xenium_poly_parquet, args = a), @@ -608,6 +661,9 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { data.table::setDT() } + +## cellmeta #### + .xenium_cellmeta <- function( path, dropcols = c(), @@ -651,10 +707,12 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { arrow::read_parquet(file = path, as_data_frame = FALSE) %>% dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% dplyr::select(-dplyr::any_of(dropcols)) %>% - as.data.frame() %>% - data.table::setDT() + data.table::as.data.table() } + +## featmeta #### + .xenium_featmeta <- function( path, gene_ids = "symbols", @@ -694,6 +752,42 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { return(fx) } + +.load_xenium_panel_json <- function(path, gene_ids = "symbols") { + gene_ids <- match.arg(gene_ids, c("symbols", "ensembl")) + + # tested on v1.6 + j <- jsonlite::fromJSON(path) + # j$metadata # dataset meta + # j$payload # main content + # j$payload$chemistry # panel chemistry used + # j$payload$customer # panel customer + # j$payload$designer # panel designer + # j$payload$spec_version # versioning + # j$payload$panel # dataset panel stats + + panel_info <- j$payload$targets$type %>% + data.table::as.data.table() + + switch(gene_ids, + "symbols" = data.table::setnames( + panel_info, + old = c("data.id", "data.name", "descriptor"), + new = c("ensembl", "feat_ID", "type") + ), + "ensembl" = data.table::setnames( + panel_info, + old = c("data.id", "data.name", "descriptor"), + new = c("feat_ID", "symbol", "type") + ) + ) + return(panel_info) +} + + + +## expression #### + .xenium_expression <- function( path, gene_ids = "symbols", @@ -772,22 +866,101 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { ) } + + +## image #### + .xenium_image <- function( path, - name = "image", + name, + output_dir, + micron, negative_y = TRUE, flip_vertical = FALSE, flip_horizontal = FALSE, - affine = NULL, - verbose = NULL + verbose = NULL, + ... ) { if (missing(path)) { stop(wrap_txt( - "No path to image file to load provided or auto-detected" + "No path to image file or dir to load provided or auto-detected" ), call. = FALSE) } - checkmate::assert_file_exists(path) + # [directory input] -> load as individual .ome paths with defined names + # intended for usage with single channel stain focus images + if (checkmate::check_directory_exists(path)) { + if (missing(output_dir)) output_dir <- file.path(path, "tif_exports") + # find actual image paths in directory + ome_paths <- list.files(path, full.names = TRUE, pattern = ".ome") + # parse ome metadata for images names + ome_xml <- ometif_metadata( + ome_paths[[1]], node = "Channel", output = "data.frame" + ) + # update names with the channel names + name <- ome_xml$Name + + # do conversion if file does not already exist in output_dir + vmsg(.v = verbose, "> ometif to tif conversion") + lapply(ome_paths, function(ome) { + try(silent = TRUE, { # ignore fail when already written + ometif_to_tif( + # can pass overwrite = TRUE via ... if needed + ome, output_dir = output_dir, ... + ) + }) + }) + # update path param + path <- list.files(output_dir, pattern = ".tif", full.names = TRUE) + } + + # set default if still missing + if (missing(name)) name <- "image" + + # [paths] + # check files exist + vapply(path, checkmate::assert_file_exists, FUN.VALUE = character(1L)) + # names + if (length(name) != length(path) && + length(name) != 1) { + stop("length of `name` should be same as length of `path`") + } + if (length(name) == 1 && + length(path) > 1) { + name <- sprintf("%s_%d", name, seq_along(path)) + } + # micron + checkmate::assert_numeric(micron) + + progressr::with_progress({ + p <- progressr::progressor(along = path) + + gimg_list <- lapply(seq_along(path), function(img_i) { + gimg <- .xenium_image_single( + path = path[[img_i]], + name = name[[img_i]], + micron = micron, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + verbose = verbose + ) + p() + return(gimg) + }) + }) + return(gimg_list) +} + +.xenium_image_single <- function( + path, + name = "image", + micron, + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL +) { vmsg(.v = verbose, sprintf("loading image as '%s'", name)) vmsg(.v = verbose, .is_debug = TRUE, path) vmsg( @@ -797,670 +970,379 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { .prefix = "" ) - verbose <- verbose %null% TRUE + # warning to for single channel .ome.tif images that terra::rast() and + # gdal still have difficulties with. May be related to JP2OpenJPEG driver + # but even loading this does not seem to fix it. + if (file_extension(path) %in% "ome") { + warning(wrap_txt( + ".ome.tif images not fully supported. + If reading fails, try converting to a basic tif `ometif_to_tif()`") + ) + } - # TODO + img <- createGiottoLargeImage(path, + name = name, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + negative_y = negative_y, + verbose = verbose + ) + img <- rescale(img, micron, x0 = 0, y0 = 0) + return(img) } +# for affine, see the init method -#' @title Load xenium data from folder -#' @name load_xenium_folder -#' @param path_list list of full filepaths from .read_xenium_folder -#' @inheritParams createGiottoXeniumObject -#' @returns list of loaded in xenium data -NULL -#' @rdname load_xenium_folder -#' @keywords internal -.load_xenium_folder <- function( - path_list, + + + +# OLD #### + + + + +#' @title Create 10x Xenium Giotto Object +#' @name createGiottoXeniumObject +#' @description Given the path to a Xenium experiment output folder, creates a +#' Giotto object +#' @param xenium_dir full path to the exported xenium directory +#' @param data_to_use which type(s) of expression data to build the gobject with +#' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') +#' @param load_format files formats from which to load the data. Either `csv` or +#' `parquet` currently supported. +#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 +#' file. Default is \code{TRUE} +#' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene +#' expression matrix +#' @param bounds_to_load vector of boundary information to load +#' (e.g. \code{'cell'} +#' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both +#' at the same time.) +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' as a subcellular transcript detection (default = 20) +#' @param key_list (advanced) list of grep-based keywords to split the +#' subcellular feature detections by feature type. See details +#' @inheritParams get10Xmatrix +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns giotto object +#' @details +#' +#' [\strong{QC feature types}] +#' Xenium provides info on feature detections that include more than only the +#' Gene Expression specific probes. Additional probes for QC are included: +#' \emph{blank codeword}, \emph{negative control codeword}, and +#' \emph{negative control probe}. These additional QC probes each occupy and +#' are treated as their own feature types so that they can largely remain +#' independent of the gene expression information. +#' +#' [\strong{key_list}] +#' Related to \code{data_to_use = 'subcellular'} workflow only: +#' Additional QC probe information is in the subcellular feature detections +#' information and must be separated from the gene expression information +#' during processing. +#' The QC probes have prefixes that allow them to be selected from the rest of +#' the feature IDs. +#' Giotto uses a named list of keywords (\code{key_list}) to select these QC +#' probes, with the list names being the names that will be assigned as the +#' feature type of these feature detections. The default list is used when +#' \code{key_list} = NULL. +#' +#' Default list: +#' \preformatted{ +#' list(blank_code = 'BLANK_', +#' neg_code = 'NegControlCodeword_', +#' neg_probe = c('NegControlProbe_|antisense_')) +#' } +#' +#' The Gene expression subset is accepted as the subset of feat_IDs that do not +#' map to any of the keys. +#' +#' @export +createGiottoXeniumObject <- function( + xenium_dir, + data_to_use = c("subcellular", "aggregate"), load_format = "csv", - data_to_use = "subcellular", - h5_expression = "FALSE", - h5_gene_ids = "symbols", + h5_expression = TRUE, + h5_gene_ids = c("symbols", "ensembl"), gene_column_index = 1, - cores, + bounds_to_load = c("cell"), + qv_threshold = 20, + key_list = NULL, + instructions = NULL, + cores = NA, verbose = TRUE ) { - data_list <- switch(load_format, - "csv" = .load_xenium_folder_csv( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "parquet" = .load_xenium_folder_parquet( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) - ) + # 0. setup + xenium_dir <- path.expand(xenium_dir) - return(data_list) -} + # Determine data to load + data_to_use <- match.arg( + arg = data_to_use, choices = c("subcellular", "aggregate")) + # Determine load formats + load_format <- "csv" # TODO Remove this and add as param once other options + # are available + load_format <- match.arg( + arg = load_format, choices = c("csv", "parquet", "zarr")) -#' @describeIn load_xenium_folder Load from csv files -#' @keywords internal -.load_xenium_folder_csv <- function( - path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE -) { - # initialize return vars - feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL + # set number of cores automatically, but with limit of 10 + cores <- determine_cores(cores) + data.table::setDTthreads(threads = cores) - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_path <- path_list$panel_meta_path[[1]] - fdata_ext <- GiottoUtils::file_extension(fdata_path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json(path = fdata_path, - gene_ids = h5_gene_ids) - } else { - feat_meta <- data.table::fread(fdata_path, nThread = cores) - colnames(feat_meta)[[1]] <- "feat_ID" - } + # 1. detect xenium folder and find filepaths to load - # **** subcellular info **** - if (data_to_use == "subcellular") { - # append missing QC probe info to feat_meta - if (isTRUE(h5_expression)) { - h5 <- hdf5r::H5File$new(path_list$agg_expr_path) - tryCatch({ - root <- names(h5) - feature_id <- h5[[paste0(root, "/features/id")]][] - feature_info <- h5[[paste0(root, "/features/feature_type")]][] - feature_names <- h5[[paste0(root, "/features/name")]][] - features_dt <- data.table::data.table( - "id" = feature_id, - "name" = feature_names, - "feature_type" = feature_info - ) - }, finally = { - h5$close_all() - }) - } else { - features_dt <- data.table::fread( - paste0(path_list$agg_expr_path, "/features.tsv.gz"), - header = FALSE - ) - } - colnames(features_dt) <- c("id", "feat_ID", "feat_class") - feat_meta <- merge( - features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + # path_list contents: + # tx_path + # bound_paths + # cell_meta_path + # agg_expr_path + # panel_meta_path + path_list <- .read_xenium_folder( + xenium_dir = xenium_dir, + data_to_use = data_to_use, + bounds_to_load = bounds_to_load, + load_format = load_format, + h5_expression = h5_expression, + verbose = verbose + ) - GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) - tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) - data.table::setnames( - x = tx_dt, - old = c("feature_name", "x_location", "y_location"), - new = c("feat_ID", "x", "y") - ) - GiottoUtils::vmsg("Loading boundary info...", .v = verbose) - bound_dt_list <- lapply( - path_list$bound_paths, - function(x) data.table::fread(x[[1]], nThread = cores) - ) - } + # 2. load in data - # **** aggregate info **** - GiottoUtils::vmsg("loading cell metadata...", .v = verbose) - cell_meta <- data.table::fread( - path_list$cell_meta_path[[1]], nThread = cores) + # data_list contents: + # feat_meta + # tx_dt + # bound_dt_list + # cell_meta + # agg_expr + data_list <- .load_xenium_folder( + path_list = path_list, + load_format = load_format, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ) - if (data_to_use == "aggregate") { - GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) - if (isTRUE(h5_expression)) { - agg_expr <- get10Xmatrix_h5( - path_to_data = path_list$agg_expr_path, - gene_ids = h5_gene_ids, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } else { - agg_expr <- get10Xmatrix( - path_to_data = path_list$agg_expr_path, - gene_column_index = gene_column_index, - remove_zero_rows = TRUE, - split_by_type = TRUE + + # TODO load images + + + # 3. Create giotto objects + + if (data_to_use == "subcellular") { + # ** feat type search keys ** + if (is.null(key_list)) { + key_list <- list( + blank_code = "BLANK_", + neg_code = "NegControlCodeword_", + neg_probe = c("NegControlProbe_|antisense_") ) } + + # needed: + # feat_meta + # tx_dt + # bound_dt_list + xenium_gobject <- .createGiottoXeniumObject_subcellular( + data_list = data_list, + qv_threshold = qv_threshold, + key_list = key_list, + instructions = instructions, + cores = cores, + verbose = verbose + ) } - data_list <- list( - "feat_meta" = feat_meta, - "tx_dt" = tx_dt, - "bound_dt_list" = bound_dt_list, - "cell_meta" = cell_meta, - "agg_expr" = agg_expr - ) + if (data_to_use == "aggregate") { + # needed: + # feat_meta + # cell_meta + # agg_expr + # optional? + # tx_dt + # bound_dt_list + xenium_gobject <- .createGiottoXeniumObject_aggregate( + data_list = data_list, + instructions = instructions, + cores = cores, + verbose = verbose + ) + } - return(data_list) + return(xenium_gobject) } -#' @describeIn load_xenium_folder Load from parquet files +#' @title Create a Xenium Giotto object from subcellular info +#' @name .createGiottoXeniumObject_subcellular +#' @description Subcellular workflow for createGiottoXeniumObject +#' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} +#' @param key_list regex-based search keys for feature IDs to allow separation +#' into separate giottoPoints objects by feat_type +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' as a subcellular transcript detection (default = 20) +#' @inheritParams get10Xmatrix +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns giotto object +#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate #' @keywords internal -.load_xenium_folder_parquet <- function( - path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, +.createGiottoXeniumObject_subcellular <- function( + data_list, + key_list = NULL, + qv_threshold = 20, + instructions = NULL, + cores = NA, verbose = TRUE ) { - # initialize return vars - feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL - # dplyr variable - cell_id <- NULL + # data.table vars + qv <- NULL - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_path <- path_list$panel_meta_path[[1]] - fdata_ext <- GiottoUtils::file_extension(fdata_path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json( - path = fdata_path, gene_ids = h5_gene_ids) - } else { - feat_meta <- data.table::fread(fdata_path, nThread = cores) - colnames(feat_meta)[[1]] <- "feat_ID" - } + # Unpack data_list info + feat_meta <- data_list$feat_meta + tx_dt <- data_list$tx_dt + bound_dt_list <- data_list$bound_dt_list - # **** subcellular info **** - if (data_to_use == "subcellular") { - # define for data.table - transcript_id <- feature_name <- NULL + # define for data.table + cell_id <- feat_ID <- feature_name <- NULL - # append missing QC probe info to feat_meta - if (isTRUE(h5_expression)) { - h5 <- hdf5r::H5File$new(path_list$agg_expr_path) - tryCatch({ - root <- names(h5) - feature_id <- h5[[paste0(root, "/features/id")]][] - feature_info <- h5[[paste0(root, "/features/feature_type")]][] - feature_names <- h5[[paste0(root, "/features/name")]][] - features_dt <- data.table::data.table( - "id" = feature_id, - "name" = feature_names, - "feature_type" = feature_info - ) - }, finally = { - h5$close_all() - }) - } else { - features_dt <- arrow::read_tsv_arrow(paste0( - path_list$agg_expr_path, "/features.tsv.gz"), - col_names = FALSE - ) %>% - data.table::setDT() - } - colnames(features_dt) <- c("id", "feat_ID", "feat_class") - feat_meta <- merge(features_dt[ - , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + vmsg("Building subcellular giotto object...", .v = verbose) + # Giotto points object + vmsg("> points data prep...", .v = verbose) - vmsg("Loading transcript level info...", .v = verbose) - tx_dt <- arrow::read_parquet( - file = path_list$tx_path[[1]], - as_data_frame = FALSE - ) %>% - dplyr::mutate( - transcript_id = cast(transcript_id, arrow::string())) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - dplyr::mutate( - feature_name = cast(feature_name, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - data.table::setnames( - x = tx_dt, - old = c("feature_name", "x_location", "y_location"), - new = c("feat_ID", "x", "y") + # filter by qv_threshold + vmsg("> filtering feature detections for Phred score >= ", + qv_threshold, .v = verbose) + n_before <- tx_dt[, .N] + tx_dt_filtered <- tx_dt[qv >= qv_threshold] + n_after <- tx_dt_filtered[, .N] + + if (verbose) { + cat( + "Number of feature points removed: ", + n_before - n_after, + " out of ", n_before, "\n" ) - vmsg("Loading boundary info...", .v = verbose) - bound_dt_list <- lapply(path_list$bound_paths, function(x) { - arrow::read_parquet(file = x[[1]], as_data_frame = FALSE) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - }) } - # **** aggregate info **** - if (data_to_use == "aggregate") { - vmsg("Loading cell metadata...", .v = verbose) - cell_meta <- arrow::read_parquet( - file = path_list$cell_meta_path[[1]], - as_data_frame = FALSE - ) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - # NOTE: no parquet for agg_expr. - vmsg("Loading aggregated expression...", .v = verbose) - if (isTRUE(h5_expression)) { - agg_expr <- get10Xmatrix_h5( - path_to_data = path_list$agg_expr_path, - gene_ids = h5_gene_ids, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } else { - agg_expr <- get10Xmatrix( - path_to_data = path_list$agg_expr_path, - gene_column_index = gene_column_index, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } - } + vmsg("> splitting detections by feat_type", .v = verbose) + # discover feat_IDs for each feat_type + all_IDs <- tx_dt_filtered[, unique(feat_ID)] + feat_types_IDs <- lapply( + key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) + rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) + feat_types_IDs <- append(rna, feat_types_IDs) - data_list <- list( - "feat_meta" = feat_meta, - "tx_dt" = tx_dt, - "bound_dt_list" = bound_dt_list, - "cell_meta" = cell_meta, - "agg_expr" = agg_expr + # separate detections by feature type + points_list <- lapply( + feat_types_IDs, + function(types) { + tx_dt_filtered[feat_ID %in% types] + } ) - return(data_list) -} - - - -.load_xenium_panel_json <- function(path, gene_ids = "symbols") { - gene_ids <- match.arg(gene_ids, c("symbols", "ensembl")) - - # tested on v1.6 - j <- jsonlite::fromJSON(path) - # j$metadata # dataset meta - # j$payload # main content - # j$payload$chemistry # panel chemistry used - # j$payload$customer # panel customer - # j$payload$designer # panel designer - # j$payload$spec_version # versioning - # j$payload$panel # dataset panel stats + # Giotto polygons object + vmsg("> polygons data prep...", .v = verbose) + polys_list <- lapply( + bound_dt_list, + function(bound_type) { + bound_type[, cell_id := as.character(cell_id)] + } + ) - panel_info <- j$payload$targets$type %>% - data.table::as.data.table() + xenium_gobject <- createGiottoObjectSubcellular( + gpoints = points_list, + gpolygons = polys_list, + instructions = instructions, + cores = cores, + verbose = verbose + ) - switch(gene_ids, - "symbols" = data.table::setnames( - panel_info, - old = c("data.id", "data.name", "descriptor"), - new = c("ensembl", "feat_ID", "type") - ), - "ensembl" = data.table::setnames( - panel_info, - old = c("data.id", "data.name", "descriptor"), - new = c("feat_ID", "symbol", "type") - ) + # generate centroids + vmsg("Calculating polygon centroids...", .v = verbose) + xenium_gobject <- addSpatialCentroidLocations( + xenium_gobject, + poly_info = c(names(bound_dt_list)), + provenance = as.list(names(bound_dt_list)) ) - return(panel_info) -} + return(xenium_gobject) +} -# OLD #### -#' @title Create 10x Xenium Giotto Object -#' @name createGiottoXeniumObject -#' @description Given the path to a Xenium experiment output folder, creates a -#' Giotto object -#' @param xenium_dir full path to the exported xenium directory -#' @param data_to_use which type(s) of expression data to build the gobject with -#' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') -#' @param load_format files formats from which to load the data. Either `csv` or -#' `parquet` currently supported. -#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 -#' file. Default is \code{TRUE} -#' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene -#' expression matrix -#' @param bounds_to_load vector of boundary information to load -#' (e.g. \code{'cell'} -#' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both -#' at the same time.) -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included -#' as a subcellular transcript detection (default = 20) -#' @param key_list (advanced) list of grep-based keywords to split the -#' subcellular feature detections by feature type. See details +#' @title Create a Xenium Giotto object from aggregate info +#' @name .createGiottoXeniumObject_aggregate +#' @description Aggregate workflow for createGiottoXeniumObject +#' @param data_list list of data loaded by \code{.load_xenium_folder} #' @inheritParams get10Xmatrix #' @inheritParams GiottoClass::createGiottoObjectSubcellular #' @returns giotto object -#' @details -#' -#' [\strong{QC feature types}] -#' Xenium provides info on feature detections that include more than only the -#' Gene Expression specific probes. Additional probes for QC are included: -#' \emph{blank codeword}, \emph{negative control codeword}, and -#' \emph{negative control probe}. These additional QC probes each occupy and -#' are treated as their own feature types so that they can largely remain -#' independent of the gene expression information. -#' -#' [\strong{key_list}] -#' Related to \code{data_to_use = 'subcellular'} workflow only: -#' Additional QC probe information is in the subcellular feature detections -#' information and must be separated from the gene expression information -#' during processing. -#' The QC probes have prefixes that allow them to be selected from the rest of -#' the feature IDs. -#' Giotto uses a named list of keywords (\code{key_list}) to select these QC -#' probes, with the list names being the names that will be assigned as the -#' feature type of these feature detections. The default list is used when -#' \code{key_list} = NULL. -#' -#' Default list: -#' \preformatted{ -#' list(blank_code = 'BLANK_', -#' neg_code = 'NegControlCodeword_', -#' neg_probe = c('NegControlProbe_|antisense_')) -#' } -#' -#' The Gene expression subset is accepted as the subset of feat_IDs that do not -#' map to any of the keys. -#' -#' @export -createGiottoXeniumObject <- function( - xenium_dir, - data_to_use = c("subcellular", "aggregate"), - load_format = "csv", - h5_expression = TRUE, - h5_gene_ids = c("symbols", "ensembl"), - gene_column_index = 1, - bounds_to_load = c("cell"), - qv_threshold = 20, - key_list = NULL, +#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular +#' @keywords internal +.createGiottoXeniumObject_aggregate <- function( + data_list, + # include_analysis = FALSE, instructions = NULL, cores = NA, verbose = TRUE ) { - # 0. setup - xenium_dir <- path.expand(xenium_dir) - - # Determine data to load - data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) - - # Determine load formats - load_format <- "csv" # TODO Remove this and add as param once other options - # are available - load_format <- match.arg( - arg = load_format, choices = c("csv", "parquet", "zarr")) + # Unpack data_list info + feat_meta <- data_list$feat_meta + cell_meta <- data_list$cell_meta + agg_expr <- data_list$agg_expr - # set number of cores automatically, but with limit of 10 - cores <- determine_cores(cores) - data.table::setDTthreads(threads = cores) + # define for data.table + cell_ID <- x_centroid <- y_centroid <- NULL - # 1. detect xenium folder and find filepaths to load + # clean up names for aggregate matrices + names(agg_expr) <- gsub(pattern = " ", replacement = "_", names(agg_expr)) + geneExpMat <- which(names(agg_expr) == "Gene_Expression") + names(agg_expr)[[geneExpMat]] <- "raw" - # path_list contents: - # tx_path - # bound_paths - # cell_meta_path - # agg_expr_path - # panel_meta_path - path_list <- .read_xenium_folder( - xenium_dir = xenium_dir, - data_to_use = data_to_use, - bounds_to_load = bounds_to_load, - load_format = load_format, - h5_expression = h5_expression, - verbose = verbose - ) + # set cell_id as character + cell_meta <- cell_meta[, data.table::setnames(.SD, "cell_id", "cell_ID")] + cell_meta <- cell_meta[, cell_ID := as.character(cell_ID)] + # set up spatial locations + agg_spatlocs <- cell_meta[, .(x_centroid, y_centroid, cell_ID)] - # 2. load in data + # set up metadata + agg_meta <- cell_meta[, !c("x_centroid", "y_centroid")] - # data_list contents: - # feat_meta - # tx_dt - # bound_dt_list - # cell_meta - # agg_expr - data_list <- .load_xenium_folder( - path_list = path_list, - load_format = load_format, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, + vmsg("Building aggregate giotto object...", .v = verbose) + xenium_gobject <- createGiottoObject( + expression = agg_expr, + spatial_locs = agg_spatlocs, + instructions = instructions, cores = cores, verbose = verbose ) + # append aggregate metadata + xenium_gobject <- addCellMetadata( + gobject = xenium_gobject, + new_metadata = agg_meta, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + xenium_gobject <- addFeatMetadata( + gobject = xenium_gobject, + new_metadata = feat_meta, + by_column = TRUE, + column_feat_ID = "feat_ID" + ) - # TODO load images - - - # 3. Create giotto objects - - if (data_to_use == "subcellular") { - # ** feat type search keys ** - if (is.null(key_list)) { - key_list <- list( - blank_code = "BLANK_", - neg_code = "NegControlCodeword_", - neg_probe = c("NegControlProbe_|antisense_") - ) - } - - # needed: - # feat_meta - # tx_dt - # bound_dt_list - xenium_gobject <- .createGiottoXeniumObject_subcellular( - data_list = data_list, - qv_threshold = qv_threshold, - key_list = key_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - } - - if (data_to_use == "aggregate") { - # needed: - # feat_meta - # cell_meta - # agg_expr - # optional? - # tx_dt - # bound_dt_list - xenium_gobject <- .createGiottoXeniumObject_aggregate( - data_list = data_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - } - - return(xenium_gobject) -} - - - - -#' @title Create a Xenium Giotto object from subcellular info -#' @name .createGiottoXeniumObject_subcellular -#' @description Subcellular workflow for createGiottoXeniumObject -#' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} -#' @param key_list regex-based search keys for feature IDs to allow separation -#' into separate giottoPoints objects by feat_type -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included -#' as a subcellular transcript detection (default = 20) -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate -#' @keywords internal -.createGiottoXeniumObject_subcellular <- function( - data_list, - key_list = NULL, - qv_threshold = 20, - instructions = NULL, - cores = NA, - verbose = TRUE -) { - # data.table vars - qv <- NULL - - # Unpack data_list info - feat_meta <- data_list$feat_meta - tx_dt <- data_list$tx_dt - bound_dt_list <- data_list$bound_dt_list - - # define for data.table - cell_id <- feat_ID <- feature_name <- NULL - - vmsg("Building subcellular giotto object...", .v = verbose) - # Giotto points object - vmsg("> points data prep...", .v = verbose) - - # filter by qv_threshold - vmsg("> filtering feature detections for Phred score >= ", - qv_threshold, .v = verbose) - n_before <- tx_dt[, .N] - tx_dt_filtered <- tx_dt[qv >= qv_threshold] - n_after <- tx_dt_filtered[, .N] - - if (verbose) { - cat( - "Number of feature points removed: ", - n_before - n_after, - " out of ", n_before, "\n" - ) - } - - vmsg("> splitting detections by feat_type", .v = verbose) - # discover feat_IDs for each feat_type - all_IDs <- tx_dt_filtered[, unique(feat_ID)] - feat_types_IDs <- lapply( - key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) - rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) - feat_types_IDs <- append(rna, feat_types_IDs) - - # separate detections by feature type - points_list <- lapply( - feat_types_IDs, - function(types) { - tx_dt_filtered[feat_ID %in% types] - } - ) - - # Giotto polygons object - vmsg("> polygons data prep...", .v = verbose) - polys_list <- lapply( - bound_dt_list, - function(bound_type) { - bound_type[, cell_id := as.character(cell_id)] - } - ) - - xenium_gobject <- createGiottoObjectSubcellular( - gpoints = points_list, - gpolygons = polys_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - - # generate centroids - vmsg("Calculating polygon centroids...", .v = verbose) - xenium_gobject <- addSpatialCentroidLocations( - xenium_gobject, - poly_info = c(names(bound_dt_list)), - provenance = as.list(names(bound_dt_list)) - ) - - return(xenium_gobject) -} - - - - - -#' @title Create a Xenium Giotto object from aggregate info -#' @name .createGiottoXeniumObject_aggregate -#' @description Aggregate workflow for createGiottoXeniumObject -#' @param data_list list of data loaded by \code{.load_xenium_folder} -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular -#' @keywords internal -.createGiottoXeniumObject_aggregate <- function( - data_list, - # include_analysis = FALSE, - instructions = NULL, - cores = NA, - verbose = TRUE -) { - # Unpack data_list info - feat_meta <- data_list$feat_meta - cell_meta <- data_list$cell_meta - agg_expr <- data_list$agg_expr - - # define for data.table - cell_ID <- x_centroid <- y_centroid <- NULL - - # clean up names for aggregate matrices - names(agg_expr) <- gsub(pattern = " ", replacement = "_", names(agg_expr)) - geneExpMat <- which(names(agg_expr) == "Gene_Expression") - names(agg_expr)[[geneExpMat]] <- "raw" - - # set cell_id as character - cell_meta <- cell_meta[, data.table::setnames(.SD, "cell_id", "cell_ID")] - cell_meta <- cell_meta[, cell_ID := as.character(cell_ID)] - - # set up spatial locations - agg_spatlocs <- cell_meta[, .(x_centroid, y_centroid, cell_ID)] - - # set up metadata - agg_meta <- cell_meta[, !c("x_centroid", "y_centroid")] - - vmsg("Building aggregate giotto object...", .v = verbose) - xenium_gobject <- createGiottoObject( - expression = agg_expr, - spatial_locs = agg_spatlocs, - instructions = instructions, - cores = cores, - verbose = verbose - ) - - # append aggregate metadata - xenium_gobject <- addCellMetadata( - gobject = xenium_gobject, - new_metadata = agg_meta, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - xenium_gobject <- addFeatMetadata( - gobject = xenium_gobject, - new_metadata = feat_meta, - by_column = TRUE, - column_feat_ID = "feat_ID" - ) - - return(xenium_gobject) -} + return(xenium_gobject) +} @@ -1636,4 +1518,280 @@ createGiottoXeniumObject <- function( return(path_list) } +#' @title Load xenium data from folder +#' @name load_xenium_folder +#' @param path_list list of full filepaths from .read_xenium_folder +#' @inheritParams createGiottoXeniumObject +#' @returns list of loaded in xenium data +NULL + +#' @rdname load_xenium_folder +#' @keywords internal +.load_xenium_folder <- function( + path_list, + load_format = "csv", + data_to_use = "subcellular", + h5_expression = "FALSE", + h5_gene_ids = "symbols", + gene_column_index = 1, + cores, + verbose = TRUE +) { + data_list <- switch(load_format, + "csv" = .load_xenium_folder_csv( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "parquet" = .load_xenium_folder_parquet( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) + ) + + return(data_list) +} + +#' @describeIn load_xenium_folder Load from csv files +#' @keywords internal +.load_xenium_folder_csv <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE +) { + # initialize return vars + feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL + + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_path <- path_list$panel_meta_path[[1]] + fdata_ext <- GiottoUtils::file_extension(fdata_path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json(path = fdata_path, + gene_ids = h5_gene_ids) + } else { + feat_meta <- data.table::fread(fdata_path, nThread = cores) + colnames(feat_meta)[[1]] <- "feat_ID" + } + + # **** subcellular info **** + if (data_to_use == "subcellular") { + # append missing QC probe info to feat_meta + if (isTRUE(h5_expression)) { + h5 <- hdf5r::H5File$new(path_list$agg_expr_path) + tryCatch({ + root <- names(h5) + feature_id <- h5[[paste0(root, "/features/id")]][] + feature_info <- h5[[paste0(root, "/features/feature_type")]][] + feature_names <- h5[[paste0(root, "/features/name")]][] + features_dt <- data.table::data.table( + "id" = feature_id, + "name" = feature_names, + "feature_type" = feature_info + ) + }, finally = { + h5$close_all() + }) + } else { + features_dt <- data.table::fread( + paste0(path_list$agg_expr_path, "/features.tsv.gz"), + header = FALSE + ) + } + colnames(features_dt) <- c("id", "feat_ID", "feat_class") + feat_meta <- merge( + features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + + GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) + tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) + data.table::setnames( + x = tx_dt, + old = c("feature_name", "x_location", "y_location"), + new = c("feat_ID", "x", "y") + ) + + GiottoUtils::vmsg("Loading boundary info...", .v = verbose) + bound_dt_list <- lapply( + path_list$bound_paths, + function(x) data.table::fread(x[[1]], nThread = cores) + ) + } + + # **** aggregate info **** + GiottoUtils::vmsg("loading cell metadata...", .v = verbose) + cell_meta <- data.table::fread( + path_list$cell_meta_path[[1]], nThread = cores) + + if (data_to_use == "aggregate") { + GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) + if (isTRUE(h5_expression)) { + agg_expr <- get10Xmatrix_h5( + path_to_data = path_list$agg_expr_path, + gene_ids = h5_gene_ids, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } else { + agg_expr <- get10Xmatrix( + path_to_data = path_list$agg_expr_path, + gene_column_index = gene_column_index, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } + } + + data_list <- list( + "feat_meta" = feat_meta, + "tx_dt" = tx_dt, + "bound_dt_list" = bound_dt_list, + "cell_meta" = cell_meta, + "agg_expr" = agg_expr + ) + + return(data_list) +} + + + + +#' @describeIn load_xenium_folder Load from parquet files +#' @keywords internal +.load_xenium_folder_parquet <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE +) { + # initialize return vars + feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL + # dplyr variable + cell_id <- NULL + + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_path <- path_list$panel_meta_path[[1]] + fdata_ext <- GiottoUtils::file_extension(fdata_path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json( + path = fdata_path, gene_ids = h5_gene_ids) + } else { + feat_meta <- data.table::fread(fdata_path, nThread = cores) + colnames(feat_meta)[[1]] <- "feat_ID" + } + + # **** subcellular info **** + if (data_to_use == "subcellular") { + # define for data.table + transcript_id <- feature_name <- NULL + + # append missing QC probe info to feat_meta + if (isTRUE(h5_expression)) { + h5 <- hdf5r::H5File$new(path_list$agg_expr_path) + tryCatch({ + root <- names(h5) + feature_id <- h5[[paste0(root, "/features/id")]][] + feature_info <- h5[[paste0(root, "/features/feature_type")]][] + feature_names <- h5[[paste0(root, "/features/name")]][] + features_dt <- data.table::data.table( + "id" = feature_id, + "name" = feature_names, + "feature_type" = feature_info + ) + }, finally = { + h5$close_all() + }) + } else { + features_dt <- arrow::read_tsv_arrow(paste0( + path_list$agg_expr_path, "/features.tsv.gz"), + col_names = FALSE + ) %>% + data.table::setDT() + } + colnames(features_dt) <- c("id", "feat_ID", "feat_class") + feat_meta <- merge(features_dt[ + , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + + vmsg("Loading transcript level info...", .v = verbose) + tx_dt <- arrow::read_parquet( + file = path_list$tx_path[[1]], + as_data_frame = FALSE + ) %>% + dplyr::mutate( + transcript_id = cast(transcript_id, arrow::string())) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::mutate( + feature_name = cast(feature_name, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() + data.table::setnames( + x = tx_dt, + old = c("feature_name", "x_location", "y_location"), + new = c("feat_ID", "x", "y") + ) + vmsg("Loading boundary info...", .v = verbose) + bound_dt_list <- lapply(path_list$bound_paths, function(x) { + arrow::read_parquet(file = x[[1]], as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() + }) + } + # **** aggregate info **** + if (data_to_use == "aggregate") { + vmsg("Loading cell metadata...", .v = verbose) + cell_meta <- arrow::read_parquet( + file = path_list$cell_meta_path[[1]], + as_data_frame = FALSE + ) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() + + # NOTE: no parquet for agg_expr. + vmsg("Loading aggregated expression...", .v = verbose) + if (isTRUE(h5_expression)) { + agg_expr <- get10Xmatrix_h5( + path_to_data = path_list$agg_expr_path, + gene_ids = h5_gene_ids, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } else { + agg_expr <- get10Xmatrix( + path_to_data = path_list$agg_expr_path, + gene_column_index = gene_column_index, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } + } + + data_list <- list( + "feat_meta" = feat_meta, + "tx_dt" = tx_dt, + "bound_dt_list" = bound_dt_list, + "cell_meta" = cell_meta, + "agg_expr" = agg_expr + ) + + return(data_list) +} From 856264374fe298dda49799f6cb65c09f4ba4d9c0 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:30:37 -0400 Subject: [PATCH 113/150] feat: add minimal `importXenium()` and fix bug --- R/convenience_xenium.R | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index cc61d87e4..6a2953fee 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -459,6 +459,36 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { +# CREATE READER #### + +#' @title Import a 10X Xenium Assay +#' @name importXenium +#' @description +#' Giotto import functionalities for Xenium datasets. This function creates a +#' `XeniumReader` instance that has convenient reader functions for converting +#' individual pieces of Xenium data into Giotto-compatible representations. +#' +#' These functions should have all param values provided as defaults, but +#' can be flexibly modified to do things such as look in alternative +#' directories or paths +#' @param xenium_dir Xenium output directory +#' @returns `XeniumReader` object +#' @export +importXenium <- function( + xenium_dir = NULL, qv_threshold = 20, +) { + a <- list(Class = "XeniumReader") + if (!is.null(xenium_dir)) { + a$xenium_dir <- xenium_dir + } + + do.call(new, args = a) +} + + + + + # MODULAR #### @@ -683,9 +713,13 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE cx <- switch(e, - "csv" = do.call(.xenium_cellmeta_csv, args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_cellmeta_parquet, args = a) + "csv" = do.call( + .xenium_cellmeta_csv, + args = c(a, list(cores = cores)) + ), + "parquet" = do.call(.xenium_cellmeta_parquet, args = a) ) + data.table::setnames(cx, "cell_id", "cell_ID") cx <- createCellMetaObj( metadata = cx, From a26a7541e6818962558f91cf731b3e0fdc433fb7 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:38:00 -0400 Subject: [PATCH 114/150] fix bug in import xenium --- R/convenience_xenium.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 6a2953fee..cff626bff 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -475,12 +475,13 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { #' @returns `XeniumReader` object #' @export importXenium <- function( - xenium_dir = NULL, qv_threshold = 20, + xenium_dir = NULL, qv_threshold = 20 ) { a <- list(Class = "XeniumReader") if (!is.null(xenium_dir)) { a$xenium_dir <- xenium_dir } + a$qv_threshold <- qv_threshold do.call(new, args = a) } From 718fc013b34d2bb80bcf1f97246a32a63445c327 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:38:24 -0400 Subject: [PATCH 115/150] chore: document --- NAMESPACE | 1 + man/importXenium.Rd | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 man/importXenium.Rd diff --git a/NAMESPACE b/NAMESPACE index f770bc45f..b9e278ca0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -264,6 +264,7 @@ export(hyperGeometricEnrich) export(identifyTMAcores) export(importCosMx) export(importVisiumHD) +export(importXenium) export(initHMRF_V2) export(insertCrossSectionFeatPlot3D) export(insertCrossSectionSpatPlot3D) diff --git a/man/importXenium.Rd b/man/importXenium.Rd new file mode 100644 index 000000000..190c48a35 --- /dev/null +++ b/man/importXenium.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience_xenium.R +\name{importXenium} +\alias{importXenium} +\title{Import a 10X Xenium Assay} +\usage{ +importXenium(xenium_dir = NULL, qv_threshold = 20) +} +\arguments{ +\item{xenium_dir}{Xenium output directory} +} +\value{ +`XeniumReader` object +} +\description{ +Giotto import functionalities for Xenium datasets. This function creates a +`XeniumReader` instance that has convenient reader functions for converting +individual pieces of Xenium data into Giotto-compatible representations. + +These functions should have all param values provided as defaults, but +can be flexibly modified to do things such as look in alternative +directories or paths +} From 3d74951d3e3f1d6e93356da636bdd44819efcaa4 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:42:35 -0400 Subject: [PATCH 116/150] Update convenience_xenium.R --- R/convenience_xenium.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index cff626bff..3c03961a9 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -481,7 +481,7 @@ importXenium <- function( if (!is.null(xenium_dir)) { a$xenium_dir <- xenium_dir } - a$qv_threshold <- qv_threshold + a$qv <- qv_threshold do.call(new, args = a) } From 9aaf2b77f12d70ca2d4c07788537f463620f29b2 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:54:16 -0400 Subject: [PATCH 117/150] fix .xenium path --- R/convenience_xenium.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 3c03961a9..97a86ae96 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -161,7 +161,7 @@ setMethod( if (length(obj@micron) == 0) { # if no value already set if (!is.null(experiment_info_path)) { obj@micron <- jsonlite::fromJSON( - manifest$experiment.xenium)$pixel_size + experiment_info_path)$pixel_size } else { warning(wrap_txt("No .xenium file found. Guessing 0.2125 as micron scaling")) @@ -472,6 +472,8 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { #' can be flexibly modified to do things such as look in alternative #' directories or paths #' @param xenium_dir Xenium output directory +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' as a subcellular transcript detection (default = 20) #' @returns `XeniumReader` object #' @export importXenium <- function( From 4023e8b1a870d6a7b90573a594d27daa5cfe635d Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:54:32 -0400 Subject: [PATCH 118/150] document --- man/importXenium.Rd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/man/importXenium.Rd b/man/importXenium.Rd index 190c48a35..144db176d 100644 --- a/man/importXenium.Rd +++ b/man/importXenium.Rd @@ -8,6 +8,9 @@ importXenium(xenium_dir = NULL, qv_threshold = 20) } \arguments{ \item{xenium_dir}{Xenium output directory} + +\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included +as a subcellular transcript detection (default = 20)} } \value{ `XeniumReader` object From 63c95920fc98ddc65ef73b6d6a35bb52b75ff399 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 08:03:57 -0400 Subject: [PATCH 119/150] fix feat reading --- R/convenience_xenium.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 97a86ae96..a305fbb95 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -237,13 +237,14 @@ setMethod( # load featmeta fmeta_fun <- function( path = panel_meta_path, + gene_ids = "symbols", dropcols = c(), cores = determine_cores(), verbose = NULL ) { .xenium_featmeta( path = path, - gene_ids, + gene_ids = gene_ids, dropcols = dropcols, verbose = verbose ) From 72f3e5d6612e2cd7d0ccf482f42c88eaed6c8d70 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 08:15:07 -0400 Subject: [PATCH 120/150] add debug messages --- R/convenience_xenium.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index a305fbb95..791d004f6 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -246,6 +246,7 @@ setMethod( path = path, gene_ids = gene_ids, dropcols = dropcols, + cores = cores, verbose = verbose ) } @@ -525,6 +526,7 @@ importXenium <- function( checkmate::assert_file_exists(path) e <- file_extension(path) %>% head(1L) %>% tolower() vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) + vmsg(.v = verbose, .is_debug = TRUE, path) # read in as data.table a <- list( @@ -658,6 +660,8 @@ importXenium <- function( a <- list(path = path) vmsg("Loading boundary info...", .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, "[POLY_READ] FMT =", e) + vmsg(.v = verbose, .is_debug = TRUE, path) # pass to specific load function based on file extension polys <- switch(e, "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), @@ -714,6 +718,7 @@ importXenium <- function( e <- file_extension(path) %>% head(1L) %>% tolower() a <- list(path = path, dropcols = dropcols) vmsg('Loading cell metadata...', .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, "[CMETA_READ] FMT =", e) vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE cx <- switch(e, @@ -765,6 +770,7 @@ importXenium <- function( } checkmate::assert_file_exists(path) vmsg("Loading feature metadata...", .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, path) # updated for pipeline v1.6 json format fdata_ext <- GiottoUtils::file_extension(path) if ("json" %in% fdata_ext) { @@ -855,6 +861,7 @@ importXenium <- function( } vmsg("Loading 10x pre-aggregated expression...", .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, "[EXPR_READ] FMT =", e) vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE ex <- switch(e, From 6ec475e894b54778f3d675917b4c42fd94ca928e Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 08:24:39 -0400 Subject: [PATCH 121/150] fix: xenium fmeta dropcols --- R/convenience_xenium.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 791d004f6..7e8935698 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -783,7 +783,8 @@ importXenium <- function( } dropcols <- dropcols[dropcols %in% colnames(feat_meta)] - feat_meta[, (dropcols) := NULL] # remove dropcols + # remove dropcols + if (length(dropcols) > 0L) feat_meta[, (dropcols) := NULL] fx <- createFeatMetaObj( metadata = feat_meta, From a0e3ee2af2ed9a51737ff5563811e1783896f12a Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 08:52:30 -0400 Subject: [PATCH 122/150] add y flipping to xenium vector data --- R/convenience_xenium.R | 49 +++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 7e8935698..b17304e39 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -171,27 +171,29 @@ setMethod( # transcripts load call tx_fun <- function( - path = tx_path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - dropcols = c(), - qv_threshold = obj@qv, - cores = determine_cores(), - verbose = NULL + path = tx_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + flip_vertical = TRUE, + dropcols = c(), + qv_threshold = obj@qv, + cores = determine_cores(), + verbose = NULL ) { .xenium_transcript( path = path, feat_type = feat_type, split_keyword = split_keyword, + flip_vertical = flip_vertical, dropcols = dropcols, qv_threshold = qv_threshold, cores = cores, @@ -204,6 +206,7 @@ setMethod( poly_fun <- function( path = cell_bound_path, name = "cell", + flip_vertical = TRUE, calc_centroids = TRUE, cores = determine_cores(), verbose = NULL @@ -211,6 +214,7 @@ setMethod( .xenium_poly( path = path, name = name, + flip_vertical = flip_vertical, calc_centroids = calc_centroids, cores = cores, verbose = verbose @@ -512,6 +516,7 @@ importXenium <- function( "UnassignedCodeword", "NegControlCodeword" ), + flip_vertical = TRUE, dropcols = c(), qv_threshold = 20, cores = determine_cores(), @@ -537,6 +542,7 @@ importXenium <- function( ) vmsg("Loading transcript level info...", .v = verbose) # pass to specific reader fun based on filetype + # return as data.table with colnames `feat_ID`, `x`, `y` tx <- switch(e, "csv" = do.call(.xenium_transcript_csv, args = c(a, list(cores = cores))), @@ -544,6 +550,10 @@ importXenium <- function( "zarr" = stop('zarr not yet supported') ) + # flip values vertically + y <- NULL # NSE var + if (flip_vertical) tx[, y := -y] + # create gpoints gpointslist <- createGiottoPoints( x = tx, @@ -649,6 +659,7 @@ importXenium <- function( .xenium_poly <- function( path, name = "cell", + flip_vertical = TRUE, calc_centroids = TRUE, cores = determine_cores(), verbose = NULL @@ -663,12 +674,16 @@ importXenium <- function( vmsg(.v = verbose, .is_debug = TRUE, "[POLY_READ] FMT =", e) vmsg(.v = verbose, .is_debug = TRUE, path) # pass to specific load function based on file extension + # returns as data.table with colnames `cell_id`, `vertex_x`, `vertex_y` polys <- switch(e, "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), "parquet" = do.call(.xenium_poly_parquet, args = a), "zarr" = stop("zarr not yet supported") ) + vertex_y <- NULL # NSE var + if (flip_vertical) polys[, vertex_y := -vertex_y] + # create gpolys verbose <- verbose %null% FALSE gpolys <- createGiottoPolygon( @@ -935,7 +950,7 @@ importXenium <- function( # [directory input] -> load as individual .ome paths with defined names # intended for usage with single channel stain focus images - if (checkmate::check_directory_exists(path)) { + if (checkmate::test_directory_exists(path)) { if (missing(output_dir)) output_dir <- file.path(path, "tif_exports") # find actual image paths in directory ome_paths <- list.files(path, full.names = TRUE, pattern = ".ome") From 6e2b01f14a672bbca2470066d27c9ddddc794d47 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:05:42 -0400 Subject: [PATCH 123/150] fix xenium expression param passing --- R/convenience_xenium.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index b17304e39..e76f13a20 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -881,8 +881,8 @@ importXenium <- function( vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE ex <- switch(e, - "mtx" = do.call(.xenium_cellmeta_csv, args = a), - "h5" = do.call(.xenium_cellmeta_parquet, args = a) + "mtx" = do.call(.xenium_expression_mtx, args = a), + "h5" = do.call(.xenium_expression_h5, args = a) ) eo <- createExprObj( From 2017d644bbf4ff22515db45cec257437f1f5c905 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:17:18 -0400 Subject: [PATCH 124/150] fix exprobj generation for xenium --- R/convenience_xenium.R | 25 ++++++++++++++++--------- R/general_help.R | 34 +++++++++++++++++----------------- man/writeChatGPTqueryDEG.Rd | 6 +++--- 3 files changed, 36 insertions(+), 29 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index e76f13a20..5d8119fc6 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -880,19 +880,26 @@ importXenium <- function( vmsg(.v = verbose, .is_debug = TRUE, "[EXPR_READ] FMT =", e) vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE - ex <- switch(e, + ex_list <- switch(e, "mtx" = do.call(.xenium_expression_mtx, args = a), "h5" = do.call(.xenium_expression_h5, args = a) ) - eo <- createExprObj( - expression_data = ex, - name = "raw", - spat_unit = "cell", - feat_type = "rna", - provenance = "cell" - ) - return(eo) + # ensure list + if (!inherits(ex, "list")) ex_list <- list(ex_list) + + # lapply to process more than one if present + eo_list <- lapply(ex_list, function(ex) { + createExprObj( + expression_data = ex, + name = "raw", + spat_unit = "cell", + feat_type = "rna", + provenance = "cell" + ) + }) + + return(eo_list) } .xenium_expression_h5 <- function( diff --git a/R/general_help.R b/R/general_help.R index faef358d8..f6fe38ea8 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -276,8 +276,8 @@ rank_binarize_wrapper <- function( #' @title writeChatGPTqueryDEG #' @name writeChatGPTqueryDEG -#' @description This function writes a query as a .txt file that can be used with -#' ChatGPT or a similar LLM service to find the most likely cell types based on the +#' @description This function writes a query as a .txt file that can be used with +#' ChatGPT or a similar LLM service to find the most likely cell types based on the #' top differential expressed genes (DEGs) between identified clusters. #' @param DEG_output the output format from the differenetial expression functions #' @param top_n_genes number of genes for each cluster @@ -285,36 +285,36 @@ rank_binarize_wrapper <- function( #' @param folder_name path to the folder where you want to save the .txt file #' @param file_name name of .txt file #' @returns writes a .txt file to the desired location -#' @details This function does not run any LLM service. It simply creates the .txt +#' @details This function does not run any LLM service. It simply creates the .txt #' file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) #' @export -writeChatGPTqueryDEG = function(DEG_output, - top_n_genes = 10, - tissue_type = 'human breast cancer', - folder_name = getwd(), +writeChatGPTqueryDEG = function(DEG_output, + top_n_genes = 10, + tissue_type = 'human breast cancer', + folder_name = getwd(), file_name = 'chatgpt_query.txt') { - + chatgpt_query = paste0("Identify cell types of ", tissue_type, " tissue using the following markers. Identify one cell type for each row. Only provide the cell type name and the marker genes used for cell type identification.") - + selected_DEG_output = DEG_output[, head(.SD, top_n_genes), by="cluster"] - + finallist = list() finallist[[1]] = chatgpt_query - + for(clus in unique(selected_DEG_output$cluster)) { x = selected_DEG_output[cluster == clus][['feats']] x = c(clus, x) finallist[[as.numeric(clus)+1]] = x } - + outputdt = data.table::data.table(finallist) - + cat('\n start writing \n') - data.table::fwrite(x = outputdt, + data.table::fwrite(x = outputdt, file = paste0(folder_name,'/', file_name), sep2 = c(""," ",""), col.names = F) - -} + +} @@ -691,7 +691,7 @@ get10Xmatrix_h5 <- function( ] # change names to gene symbols if it's expression - if (fclass == "Gene Expression" & gene_ids == "symbols") { + if (fclass == "Gene Expression" && gene_ids == "symbols") { conv_vector <- features_dt$uniq_name names(conv_vector) <- features_dt$id diff --git a/man/writeChatGPTqueryDEG.Rd b/man/writeChatGPTqueryDEG.Rd index 034f77fa4..8e1920852 100644 --- a/man/writeChatGPTqueryDEG.Rd +++ b/man/writeChatGPTqueryDEG.Rd @@ -27,11 +27,11 @@ writeChatGPTqueryDEG( writes a .txt file to the desired location } \description{ -This function writes a query as a .txt file that can be used with -ChatGPT or a similar LLM service to find the most likely cell types based on the +This function writes a query as a .txt file that can be used with +ChatGPT or a similar LLM service to find the most likely cell types based on the top differential expressed genes (DEGs) between identified clusters. } \details{ -This function does not run any LLM service. It simply creates the .txt +This function does not run any LLM service. It simply creates the .txt file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) } From 60c2268cd6dc035826d54e6ee9f843bc2d8535f3 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:19:22 -0400 Subject: [PATCH 125/150] fix typo --- R/convenience_xenium.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 5d8119fc6..ac205f22b 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -886,7 +886,7 @@ importXenium <- function( ) # ensure list - if (!inherits(ex, "list")) ex_list <- list(ex_list) + if (!inherits(ex_list, "list")) ex_list <- list(ex_list) # lapply to process more than one if present eo_list <- lapply(ex_list, function(ex) { From 92f4691ba014d1c7e90255c0f69be8fa84c8239d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wen=20Wang=20=28=E7=8E=8B=E6=96=87=29?= Date: Tue, 30 Jul 2024 13:50:44 -0400 Subject: [PATCH 126/150] Fix: data.frame column referring issue --- R/ONTraC_wrapper.R | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/R/ONTraC_wrapper.R b/R/ONTraC_wrapper.R index 9f760b67f..1b3b9cbb6 100644 --- a/R/ONTraC_wrapper.R +++ b/R/ONTraC_wrapper.R @@ -243,7 +243,8 @@ loadOntraCResults <- function(gobject, # nolint: object_name_linter. #' @param values name of the expression matrix stored connectivity values #' @details This function plots the niche cluster connectivity matrix #' @export -plotNicheClusterConnectivity <- function( # nolint: object_name_linter. +plotNicheClusterConnectivity <- function( + # nolint: object_name_linter. gobject, spat_unit = "niche cluster", feat_type = "connectivity", @@ -370,7 +371,8 @@ plotNicheClusterConnectivity <- function( # nolint: object_name_linter. #' @param values name of the expression matrix stored connectivity values #' @details This function plots the niche cluster connectivity matrix #' @export -plotCTCompositionInNicheCluster <- function( # nolint: object_name_linter. +plotCTCompositionInNicheCluster <- function( + # nolint: object_name_linter. gobject, cell_type, values = "prob", @@ -395,7 +397,10 @@ plotCTCompositionInNicheCluster <- function( # nolint: object_name_linter. prob_df$cell_ID <- rownames(prob_df) ## combine the cell type and niche cluster probability matrix combined_df <- merge( - pDataDT(gobject, feat_type = feat_type)[, c("cell_ID", cell_type)], + as.data.frame(pDataDT(gobject, feat_type = feat_type))[, c( + "cell_ID", + cell_type + )], prob_df, by = "cell_ID" ) @@ -407,31 +412,34 @@ plotCTCompositionInNicheCluster <- function( # nolint: object_name_linter. names_to = "Cluster", values_to = "Probability" ) %>% - dplyr::group_by(Cell_Type, Cluster) %>% # nolint: object_usage_linter. + dplyr::group_by( + !!rlang::sym(cell_type), + Cluster # nolint: object_usage_linter. + ) %>% dplyr::summarise(Sum = sum(Probability, # nolint: object_usage_linter. na.rm = TRUE )) %>% tidyr::spread(key = "Cluster", value = "Sum", fill = 0) cell_type_counts_df <- as.data.frame(cell_type_counts_df) - rownames(cell_type_counts_df) <- cell_type_counts_df$Cell_Type - cell_type_counts_df$Cell_Type <- NULL + rownames(cell_type_counts_df) <- cell_type_counts_df[[cell_type]] + cell_type_counts_df[[cell_type]] <- NULL normalized_df <- as.data.frame(t( t(cell_type_counts_df) / colSums(cell_type_counts_df) )) # Reshape the data frame into long format - normalized_df$Cell_Type <- rownames(normalized_df) + normalized_df[[cell_type]] <- rownames(normalized_df) df_long <- normalized_df %>% tidyr::pivot_longer( - cols = -Cell_Type, # nolint: object_usage_linter. + cols = -!!rlang::sym(cell_type), # nolint: object_usage_linter. names_to = "Cluster", values_to = "Composition" ) # Create the heatmap using ggplot2 pl <- ggplot(df_long, aes( - x = Cell_Type, # nolint: object_usage_linter. + x = !!rlang::sym(cell_type), # nolint: object_usage_linter. y = Cluster, # nolint: object_usage_linter. fill = Composition # nolint: object_usage_linter. )) + @@ -470,7 +478,7 @@ plotCellTypeNTScore <- function(gobject, # nolint: object_name_linter. cell_type, values = "NTScore", spat_unit = "cell", - feat_type = "rna", + feat_type = "niche cluster", show_plot = NULL, return_plot = NULL, save_plot = NULL, @@ -484,16 +492,16 @@ plotCellTypeNTScore <- function(gobject, # nolint: object_name_linter. feat_type = feat_type ) avg_scores <- data_df %>% - dplyr::group_by(Cell_Type) %>% # nolint: object_usage_linter. + dplyr::group_by(!!rlang::sym(cell_type)) %>% # nolint: object_usage_linter. dplyr::summarise(Avg_NTScore = mean(NTScore)) # nolint: object_usage_linter. - data_df$Cell_Type <- factor(data_df$Cell_Type, - levels = avg_scores$Cell_Type[order(avg_scores$Avg_NTScore)] + data_df[[cell_type]] <- factor(data_df[[cell_type]], + levels = avg_scores[[cell_type]][order(avg_scores$Avg_NTScore)] ) pl <- ggplot(data_df, aes( x = NTScore, # nolint: object_usage_linter. - y = Cell_Type, # nolint: object_usage_linter. - fill = Cell_Type + y = !!rlang::sym(cell_type), + fill = !!rlang::sym(cell_type) )) + geom_violin() + theme_minimal() + From 8de8bc8322ffbefb5cb6f71e5928ac74c605db23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wen=20Wang=20=28=E7=8E=8B=E6=96=87=29?= Date: Tue, 30 Jul 2024 13:52:42 -0400 Subject: [PATCH 127/150] Change: format --- R/ONTraC_wrapper.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/ONTraC_wrapper.R b/R/ONTraC_wrapper.R index 1b3b9cbb6..0cdc02ff4 100644 --- a/R/ONTraC_wrapper.R +++ b/R/ONTraC_wrapper.R @@ -243,8 +243,7 @@ loadOntraCResults <- function(gobject, # nolint: object_name_linter. #' @param values name of the expression matrix stored connectivity values #' @details This function plots the niche cluster connectivity matrix #' @export -plotNicheClusterConnectivity <- function( - # nolint: object_name_linter. +plotNicheClusterConnectivity <- function( # nolint: object_name_linter. gobject, spat_unit = "niche cluster", feat_type = "connectivity", @@ -371,8 +370,7 @@ plotNicheClusterConnectivity <- function( #' @param values name of the expression matrix stored connectivity values #' @details This function plots the niche cluster connectivity matrix #' @export -plotCTCompositionInNicheCluster <- function( - # nolint: object_name_linter. +plotCTCompositionInNicheCluster <- function( # nolint: object_name_linter. gobject, cell_type, values = "prob", From f20d692e5701f17a712dfae403617e9eef680ced Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 06:02:44 -0400 Subject: [PATCH 128/150] update xenium --- DESCRIPTION | 12 +-- R/convenience_xenium.R | 172 ++++++++++++++++++++++++++++++----------- 2 files changed, 128 insertions(+), 56 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ab1607bd7..2568a14c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -60,7 +60,6 @@ Imports: Suggests: ArchR, arrow, - Biobase, biomaRt, ClusterR, clustree, @@ -69,14 +68,12 @@ Suggests: DelayedMatrixStats, dendextend (>= 1.13.0), dplyr, - exactextractr, FactoMineR, factoextra, fitdistrplus, FNN, future, future.apply, - geometry, GiottoData, ggalluvial, ggdendro, @@ -86,7 +83,6 @@ Suggests: graphcoloring, HDF5Array (>= 1.18.1), hdf5r, - htmlwidgets, jackstraw, kableExtra, knitr, @@ -95,7 +91,6 @@ Suggests: multinet (>= 3.0.2), networkD3, pheatmap, - png, quadprog, harmony, R.utils, @@ -105,10 +100,7 @@ Suggests: rhdf5, RTriangle (>= 1.6-0.10), Rvision, - S4Vectors, scater, - scatterpie, - scattermore, scran (>= 1.10.1), Seurat, sf, @@ -121,10 +113,8 @@ Suggests: STexampleData, SummarizedExperiment, tidygraph, - tiff, trendsceek, - testthat (>= 3.0.0), - qs + testthat (>= 3.0.0) Remotes: drieslab/GiottoUtils, drieslab/GiottoClass, diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index ac205f22b..584ef629d 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -1,4 +1,28 @@ +# modular reader functions layout # +# # +# - initialize method for reader object +# - filepath detection based on directory path +# - register modular load functions as tx_fun(), poly_fun, etc. in the object. +# include expected defaults that update based on filepath info +# - `create_gobject()` single function that utilizes other functions +# registered to the reader object in previous step. Returns gobject with +# desired data contents +# For params that `create_gobject()` is in charge of, the registered funs +# should use the params passed to `create_gobject()` instead of the baked +# in defaults +# +# # +# - exported function to create a `XYZReader` class object +# +# # +# - from `path` and other minimal args, create a giotto subobject with access +# to specific ways to load and manipulate data + + + + + # CLASS #### @@ -276,7 +300,7 @@ setMethod( # load image call img_fun <- function( - path, + path = img_focus_path, name = "image", micron = obj@micron, negative_y = TRUE, @@ -298,9 +322,9 @@ setMethod( # load aligned image call img_aff_fun <- function( - path = path, - micron = obj@micron, - imagealignment_path + path, + imagealignment_path, + micron = obj@micron ) { read10xAffineImage( file = path, @@ -313,29 +337,32 @@ setMethod( # create giotto object call gobject_fun <- function( - transcript_path = tx_path, - load_bounds = list( - cell = "cell", - nucleus = "nucleus" - ), - expression_path = expr_path, - metadata_path = meta_path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - load_images = list( - morphology = "focus", - ), - load_expression = FALSE, - load_cellmeta = FALSE + transcript_path = tx_path, + load_bounds = list( + cell = "cell", + nucleus = "nucleus" + ), + gene_panel_json_path = panel_meta_path, + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + load_images = list( + morphology = "focus" + ), + load_aligned_images = NULL, + load_expression = FALSE, + load_cellmeta = FALSE, + verbose = NULL ) { load_expression <- as.logical(load_expression) load_cellmeta <- as.logical(load_cellmeta) @@ -343,18 +370,35 @@ setMethod( if (!is.null(load_images)) { checkmate::assert_list(load_images) if (is.null(names(load_images))) { - stop("Images paths provided to 'load_images' must be named") + stop("'load_images' must be a named list of filepaths\n") + } + } + if (!is.null(load_aligned_images)) { + checkmate::assert_list(load_aligned_images) + if (is.null(names(load_aligned_images))) { + stop(wrap_txt( + "'load_aligned_images' must be a named list" + )) + } + if (any(lengths(load_aligned_images) != 2L) || + any(!vapply(load_aligned_images, is.character, + FUN.VALUE = logical(1L)))) { + stop(wrap_txt( + "'load_aligned_images' must be character with length 2: + 1. image path + 2. alignment matrix path" + )) } } if (!is.null(load_bounds)) { checkmate::assert_list(load_bounds) if (is.null(names(load_bounds))) { - stop("bounds paths provided to 'load_bounds' must be named") + stop("'load_bounds' must be named list of filepaths\n") } } - + # place calls in new variable for easier access funs <- obj@calls # init gobject @@ -365,12 +409,10 @@ setMethod( tx_list <- funs$load_transcripts( path = transcript_path, feat_type = feat_type, - split_keyword = split_keyword + split_keyword = split_keyword, + verbose = verbose ) - for (tx in tx_list) { - g <- setGiotto(g, tx) - } - + g <- setGiotto(g, tx, verbose = FALSE) # lists are fine # polys if (!is.null(load_bounds)) { @@ -383,31 +425,46 @@ setMethod( for (b_i in seq_along(load_bounds)) { b <- funs$load_polys( path = load_bounds[[b_i]], - name = bnames[[b_i]] + name = bnames[[b_i]], + verbose = verbose ) blist <- c(blist, b) } - for (gpoly_i in seq_along(blist)) { - g <- setGiotto(g, blist[[gpoly_i]]) - } + g <- setGiotto(g, blist, verbose = FALSE) } # feat metadata fx <- funs$load_featmeta( - path = + path = gene_panel_json_path, + # ID = symbols makes sense with the subcellular feat_IDs + gene_ids = "symbols", + # no dropcols + verbose = verbose ) + g <- setGiotto(g, fx) # expression if (load_expression) { - + ex <- funs$load_expression( + path = expression_path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = verbose + ) + g <- setGiotto(g, ex) } # cell metadata if (load_cellmeta) { - + cx <- funs$load_cellmeta( + path = metadata_path, + verbose = verbose + ) + g <- setGiotto(g, cx) } @@ -415,11 +472,36 @@ setMethod( if (!is.null(load_images)) { # replace convenient shortnames load_images[load_images == "focus"] <- img_focus_path - } + imglist <- list() + imnames <- names(load_images) + for (impath_i in seq_along(load_images)) { + im <- load_image( + path = load_images[[impath_i]], + name = imnames[[impath_i]] + ) + imglist <- c(imglist, im) + } + g <- setGiotto(g, imglist) + } + # aligned images can be placed in random places and do not have + # a standardized naming scheme. + if (!is.null(load_aligned_images)) { + aimglist <- list() + aimnames <- names(load_aligned_images) + for (aim_i in seq_along(load_aligned_images)) { + aim <- load_aligned_image( + path = load_aligned_images[[aim_i]][1], + imagealignment_path = load_aligned_images[[aim_i]][2] + ) + aimglist <- c(aimglist, aim) + } + g <- setGiotto(g, aimglist) + } + return(g) } obj@calls$create_gobject <- gobject_fun @@ -1023,6 +1105,7 @@ importXenium <- function( .xenium_image_single <- function( path, name = "image", + output_dir, micron, negative_y = TRUE, flip_vertical = FALSE, @@ -1059,8 +1142,7 @@ importXenium <- function( return(img) } -# for affine, see the init method - +# for aligned_image (affine), see the `XeniumReader` init method From 4aa44d76dd0fbb3dff3159d7d62e42a93e78a9e0 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 09:02:25 -0400 Subject: [PATCH 129/150] remove img dir reading --- R/convenience_xenium.R | 57 +++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 584ef629d..7db5c37e1 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -1023,7 +1023,7 @@ importXenium <- function( .xenium_image <- function( path, name, - output_dir, + # output_dir, micron, negative_y = TRUE, flip_vertical = FALSE, @@ -1033,36 +1033,36 @@ importXenium <- function( ) { if (missing(path)) { stop(wrap_txt( - "No path to image file or dir to load provided or auto-detected" + "No path to image file provided or auto-detected" ), call. = FALSE) } - # [directory input] -> load as individual .ome paths with defined names - # intended for usage with single channel stain focus images - if (checkmate::test_directory_exists(path)) { - if (missing(output_dir)) output_dir <- file.path(path, "tif_exports") - # find actual image paths in directory - ome_paths <- list.files(path, full.names = TRUE, pattern = ".ome") - # parse ome metadata for images names - ome_xml <- ometif_metadata( - ome_paths[[1]], node = "Channel", output = "data.frame" - ) - # update names with the channel names - name <- ome_xml$Name - - # do conversion if file does not already exist in output_dir - vmsg(.v = verbose, "> ometif to tif conversion") - lapply(ome_paths, function(ome) { - try(silent = TRUE, { # ignore fail when already written - ometif_to_tif( - # can pass overwrite = TRUE via ... if needed - ome, output_dir = output_dir, ... - ) - }) - }) - # update path param - path <- list.files(output_dir, pattern = ".tif", full.names = TRUE) - } + # # [directory input] -> load as individual .ome paths with defined names + # # intended for usage with single channel stain focus images + # if (checkmate::test_directory_exists(path)) { + # if (missing(output_dir)) output_dir <- file.path(path, "tif_exports") + # # find actual image paths in directory + # ome_paths <- list.files(path, full.names = TRUE, pattern = ".ome") + # # parse ome metadata for images names + # ome_xml <- ometif_metadata( + # ome_paths[[1]], node = "Channel", output = "data.frame" + # ) + # # update names with the channel names + # name <- ome_xml$Name + # + # # do conversion if file does not already exist in output_dir + # vmsg(.v = verbose, "> ometif to tif conversion") + # lapply(ome_paths, function(ome) { + # try(silent = TRUE, { # ignore fail when already written + # ometif_to_tif( + # # can pass overwrite = TRUE via ... if needed + # ome, output_dir = output_dir, ... + # ) + # }) + # }) + # # update path param + # path <- list.files(output_dir, pattern = ".tif", full.names = TRUE) + # } # set default if still missing if (missing(name)) name <- "image" @@ -1105,7 +1105,6 @@ importXenium <- function( .xenium_image_single <- function( path, name = "image", - output_dir, micron, negative_y = TRUE, flip_vertical = FALSE, From ce64c3c09e99a63b5c77ca88b4006a55850d2d43 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:09:27 -0400 Subject: [PATCH 130/150] add xenium wrapper --- R/convenience_xenium.R | 1668 +++++++++-------- man/createGiottoXeniumObject.Rd | 128 +- man/dot-createGiottoXeniumObject_aggregate.Rd | 34 - ...ot-createGiottoXeniumObject_subcellular.Rd | 42 - man/dot-read_xenium_folder.Rd | 42 - man/load_xenium_folder.Rd | 77 - 6 files changed, 979 insertions(+), 1012 deletions(-) delete mode 100644 man/dot-createGiottoXeniumObject_aggregate.Rd delete mode 100644 man/dot-createGiottoXeniumObject_subcellular.Rd delete mode 100644 man/dot-read_xenium_folder.Rd delete mode 100644 man/load_xenium_folder.Rd diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 7db5c37e1..25bf811c0 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -300,7 +300,7 @@ setMethod( # load image call img_fun <- function( - path = img_focus_path, + path, name = "image", micron = obj@micron, negative_y = TRUE, @@ -356,9 +356,7 @@ setMethod( "UnassignedCodeword", "NegControlCodeword" ), - load_images = list( - morphology = "focus" - ), + load_images = NULL, load_aligned_images = NULL, load_expression = FALSE, load_cellmeta = FALSE, @@ -1146,38 +1144,73 @@ importXenium <- function( - -# OLD #### - - +# wrapper #### #' @title Create 10x Xenium Giotto Object #' @name createGiottoXeniumObject -#' @description Given the path to a Xenium experiment output folder, creates a -#' Giotto object -#' @param xenium_dir full path to the exported xenium directory -#' @param data_to_use which type(s) of expression data to build the gobject with -#' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') -#' @param load_format files formats from which to load the data. Either `csv` or -#' `parquet` currently supported. -#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 -#' file. Default is \code{TRUE} -#' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene -#' expression matrix -#' @param bounds_to_load vector of boundary information to load -#' (e.g. \code{'cell'} -#' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both -#' at the same time.) +#' @description Create a Giotto object from a Xenium experiment output folder. +#' Only the `xenium_dir`, `load_images`, and `load_aligned_images` params +#' need to be supplied when defaults are sufficient. All other params have +#' defaults set and are there in case of non-standard directory layouts or +#' alternative preference in file format to load from.\cr +#' When possible, `.parquet` files are loaded. This requires the additional +#' installation of \pkg{arrow} with zstd support. See details. `h5` is also +#' used by default if the 10x provided expression matrix is loaded.\cr +#' The 10X provided aggregated expression matrix and cell metdata are not +#' loaded by default since the results may be slightly different from those +#' that Giotto spatially aggregates. +#' @param xenium_dir Full path to the exported xenium directory +#' @param transcript_path Optional. Filepath to desired transcripts file to +#' load. Either the `.parquet` or `.csv` files can be used. +#' @param bounds_path Optional. Named list of filepaths to desired Xenium +#' bounds/polygon files to load. Either the `.parquet` or `.csv` files can be +#' used. The default is to load the `.parquets` of both cell and nucleus. +#' @param gene_panel_json_path Optional. Filepath to panel json. This json +#' contains feature metadata information and ENSG names. +#' @param expression_path Optional. Filepath to cell feature matrix. Accepts +#' either the `.h5` or the unzipped directory containing `.mtx` files. +#' @param metadata_path Optional. Filepath to `cells.csv.gz` or `cells.parquet` +#' which contain cell metadata information. +#' @param feat_type character. feature type. Provide more than one value if +#' using the `split_keyword` param. For each set of keywords to split by, an +#' additional feat_type should be provided in the same order. Affects how +#' the transcripts information is loaded. Helpful for separating out the +#' QC probes. See details. +#' @param split_keyword list of character vectors of keywords to split the +#' transcripts based on their feat_ID. Keywords will be `grepl()` +#' matched against the feature IDs information. See details. #' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included #' as a subcellular transcript detection (default = 20) -#' @param key_list (advanced) list of grep-based keywords to split the -#' subcellular feature detections by feature type. See details -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object +#' @param load_images Named list of filepaths to `.tif` images, usually the +#' ones in the `morphology_focus` directory. These `ome.tif` images are not +#' compatible and must be converted to `tif` using +#' `[GiottoClass::ometif_to_tif()]`. +#' @param load_aligned_images Named list of filepaths. The list names are used +#' as the image names when loaded. Two filepaths are expected per entry. The +#' first one should be to the `.tif` image. The second path is to the `.csv` +#' alignment matrix file. `ome.tif` images will work, but they are currently +#' slower in our imaging pipeline. +#' @param load_expression logical. Default = FALSE. Whether to load in 10X +#' provided expression matrix. +#' @param load_cellmeta logical. Default = FALSE. Whether to laod in 10X +#' provided cell metadata information +#' @param verbose logical or NULL. NULL uses the `giotto.verbose` option +#' setting and defaults to TRUE. +#' @returns `giotto` object #' @details #' +#' [\strong{arrow zstd support}] +#' Xenium parquets have zstd compression. \pkg{arrow} is used to access +#' parquets, however it may not install on all systems with zstd by default. +#' You can check whether zstd support is installed by running: +#' `arrow::arrow_info()$capabilities[["zstd"]]`. If `FALSE`, it needs to be +#' reinstalled with the following: +#' \preformatted{ +#' Sys.setenv(ARROW_WITH_ZSTD = "ON") +#' install.packages("arrow", repos = c("https://apache.r-universe.dev")) +#' } +#' #' [\strong{QC feature types}] #' Xenium provides info on feature detections that include more than only the #' Gene Expression specific probes. Additional probes for QC are included: @@ -1186,761 +1219,862 @@ importXenium <- function( #' are treated as their own feature types so that they can largely remain #' independent of the gene expression information. #' -#' [\strong{key_list}] -#' Related to \code{data_to_use = 'subcellular'} workflow only: +#' [\strong{feat_type and split_keyword}] #' Additional QC probe information is in the subcellular feature detections #' information and must be separated from the gene expression information #' during processing. #' The QC probes have prefixes that allow them to be selected from the rest of #' the feature IDs. -#' Giotto uses a named list of keywords (\code{key_list}) to select these QC -#' probes, with the list names being the names that will be assigned as the -#' feature type of these feature detections. The default list is used when -#' \code{key_list} = NULL. -#' -#' Default list: -#' \preformatted{ -#' list(blank_code = 'BLANK_', -#' neg_code = 'NegControlCodeword_', -#' neg_probe = c('NegControlProbe_|antisense_')) -#' } +#' Giotto uses `feat_type` and `split_keyword` params to select these QC +#' probes out as separate feature types. See examples in +#' `[GiottoClass::createGiottoPoints]` for how this works. #' -#' The Gene expression subset is accepted as the subset of feat_IDs that do not -#' map to any of the keys. +#' The Gene expression subset labeled as `rna` is accepted as the subset of +#' feat_IDs that do not get matched to any of the `split_keywords`. #' #' @export createGiottoXeniumObject <- function( xenium_dir, - data_to_use = c("subcellular", "aggregate"), - load_format = "csv", - h5_expression = TRUE, - h5_gene_ids = c("symbols", "ensembl"), - gene_column_index = 1, - bounds_to_load = c("cell"), - qv_threshold = 20, - key_list = NULL, - instructions = NULL, - cores = NA, - verbose = TRUE -) { - # 0. setup - xenium_dir <- path.expand(xenium_dir) - - # Determine data to load - data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) - - # Determine load formats - load_format <- "csv" # TODO Remove this and add as param once other options - # are available - load_format <- match.arg( - arg = load_format, choices = c("csv", "parquet", "zarr")) - - # set number of cores automatically, but with limit of 10 - cores <- determine_cores(cores) - data.table::setDTthreads(threads = cores) - - # 1. detect xenium folder and find filepaths to load - - # path_list contents: - # tx_path - # bound_paths - # cell_meta_path - # agg_expr_path - # panel_meta_path - path_list <- .read_xenium_folder( - xenium_dir = xenium_dir, - data_to_use = data_to_use, - bounds_to_load = bounds_to_load, - load_format = load_format, - h5_expression = h5_expression, - verbose = verbose - ) - - - # 2. load in data - - # data_list contents: - # feat_meta - # tx_dt - # bound_dt_list - # cell_meta - # agg_expr - data_list <- .load_xenium_folder( - path_list = path_list, - load_format = load_format, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ) - - - # TODO load images - - - # 3. Create giotto objects - - if (data_to_use == "subcellular") { - # ** feat type search keys ** - if (is.null(key_list)) { - key_list <- list( - blank_code = "BLANK_", - neg_code = "NegControlCodeword_", - neg_probe = c("NegControlProbe_|antisense_") - ) - } - - # needed: - # feat_meta - # tx_dt - # bound_dt_list - xenium_gobject <- .createGiottoXeniumObject_subcellular( - data_list = data_list, - qv_threshold = qv_threshold, - key_list = key_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - } - - if (data_to_use == "aggregate") { - # needed: - # feat_meta - # cell_meta - # agg_expr - # optional? - # tx_dt - # bound_dt_list - xenium_gobject <- .createGiottoXeniumObject_aggregate( - data_list = data_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - } - - return(xenium_gobject) -} - - - - -#' @title Create a Xenium Giotto object from subcellular info -#' @name .createGiottoXeniumObject_subcellular -#' @description Subcellular workflow for createGiottoXeniumObject -#' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} -#' @param key_list regex-based search keys for feature IDs to allow separation -#' into separate giottoPoints objects by feat_type -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included -#' as a subcellular transcript detection (default = 20) -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate -#' @keywords internal -.createGiottoXeniumObject_subcellular <- function( - data_list, - key_list = NULL, + transcript_path = NULL, # optional + bounds_path = list( # looks for parquets by default + cell = "cell", + nucleus = "nucleus" + ), + gene_panel_json_path = NULL, + expression_path = NULL, # optional + metadata_path = NULL, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), qv_threshold = 20, - instructions = NULL, - cores = NA, - verbose = TRUE -) { - # data.table vars - qv <- NULL - - # Unpack data_list info - feat_meta <- data_list$feat_meta - tx_dt <- data_list$tx_dt - bound_dt_list <- data_list$bound_dt_list - - # define for data.table - cell_id <- feat_ID <- feature_name <- NULL - - vmsg("Building subcellular giotto object...", .v = verbose) - # Giotto points object - vmsg("> points data prep...", .v = verbose) - - # filter by qv_threshold - vmsg("> filtering feature detections for Phred score >= ", - qv_threshold, .v = verbose) - n_before <- tx_dt[, .N] - tx_dt_filtered <- tx_dt[qv >= qv_threshold] - n_after <- tx_dt_filtered[, .N] - - if (verbose) { - cat( - "Number of feature points removed: ", - n_before - n_after, - " out of ", n_before, "\n" - ) - } - - vmsg("> splitting detections by feat_type", .v = verbose) - # discover feat_IDs for each feat_type - all_IDs <- tx_dt_filtered[, unique(feat_ID)] - feat_types_IDs <- lapply( - key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) - rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) - feat_types_IDs <- append(rna, feat_types_IDs) - - # separate detections by feature type - points_list <- lapply( - feat_types_IDs, - function(types) { - tx_dt_filtered[feat_ID %in% types] - } - ) - - # Giotto polygons object - vmsg("> polygons data prep...", .v = verbose) - polys_list <- lapply( - bound_dt_list, - function(bound_type) { - bound_type[, cell_id := as.character(cell_id)] - } - ) - - xenium_gobject <- createGiottoObjectSubcellular( - gpoints = points_list, - gpolygons = polys_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - - # generate centroids - vmsg("Calculating polygon centroids...", .v = verbose) - xenium_gobject <- addSpatialCentroidLocations( - xenium_gobject, - poly_info = c(names(bound_dt_list)), - provenance = as.list(names(bound_dt_list)) - ) - - return(xenium_gobject) -} - - - - - -#' @title Create a Xenium Giotto object from aggregate info -#' @name .createGiottoXeniumObject_aggregate -#' @description Aggregate workflow for createGiottoXeniumObject -#' @param data_list list of data loaded by \code{.load_xenium_folder} -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular -#' @keywords internal -.createGiottoXeniumObject_aggregate <- function( - data_list, - # include_analysis = FALSE, - instructions = NULL, - cores = NA, - verbose = TRUE + load_images = NULL, + load_aligned_images = NULL, + load_expression = FALSE, + load_cellmeta = FALSE, + verbose = NULL ) { - # Unpack data_list info - feat_meta <- data_list$feat_meta - cell_meta <- data_list$cell_meta - agg_expr <- data_list$agg_expr - - # define for data.table - cell_ID <- x_centroid <- y_centroid <- NULL - - # clean up names for aggregate matrices - names(agg_expr) <- gsub(pattern = " ", replacement = "_", names(agg_expr)) - geneExpMat <- which(names(agg_expr) == "Gene_Expression") - names(agg_expr)[[geneExpMat]] <- "raw" - - # set cell_id as character - cell_meta <- cell_meta[, data.table::setnames(.SD, "cell_id", "cell_ID")] - cell_meta <- cell_meta[, cell_ID := as.character(cell_ID)] - - # set up spatial locations - agg_spatlocs <- cell_meta[, .(x_centroid, y_centroid, cell_ID)] - - # set up metadata - agg_meta <- cell_meta[, !c("x_centroid", "y_centroid")] - - vmsg("Building aggregate giotto object...", .v = verbose) - xenium_gobject <- createGiottoObject( - expression = agg_expr, - spatial_locs = agg_spatlocs, - instructions = instructions, - cores = cores, + x <- importXenium(xenium_dir) + # apply reader params + x$qv <- qv_threshold + + g <- x$create_gobject( + transcript_path = transcript_path, + load_bounds = bounds_path, + gene_panel_json_path = gene_panel_json_path, + expression_path = expression_path, + metadata_path = metadata_path, + feat_type = split_keyword, + split_keyword = split_keyword, + load_images = load_images, + load_aligned_images = load_aligned_images, + load_expression = load_expression, + load_cellmeta = load_cellmeta, verbose = verbose ) - # append aggregate metadata - xenium_gobject <- addCellMetadata( - gobject = xenium_gobject, - new_metadata = agg_meta, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - xenium_gobject <- addFeatMetadata( - gobject = xenium_gobject, - new_metadata = feat_meta, - by_column = TRUE, - column_feat_ID = "feat_ID" - ) - - return(xenium_gobject) -} - - - - -#' @title Read a structured xenium folder -#' @name .read_xenium_folder -#' @inheritParams createGiottoXeniumObject -#' @keywords internal -#' @returns path_list a list of xenium files discovered and their filepaths. NULL -#' values denote missing items -.read_xenium_folder <- function( - xenium_dir, - data_to_use = "subcellular", - bounds_to_load = c("cell"), - load_format = "csv", - h5_expression = FALSE, - verbose = TRUE -) { - # Check needed packages - if (load_format == "parquet") { - package_check(pkg_name = "arrow", repository = "CRAN") - package_check(pkg_name = "dplyr", repository = "CRAN") - } - if (isTRUE(h5_expression)) { - package_check(pkg_name = "hdf5r", repository = "CRAN") - } - - ch <- box_chars() - - - # 0. test if folder structure exists and is as expected - - - if (is.null(xenium_dir) | !dir.exists(xenium_dir)) - stop("The full path to a xenium directory must be given.") - vmsg("A structured Xenium directory will be used\n", .v = verbose) - - # find items (length = 1 if present, length = 0 if missing) - dir_items <- list( - `analysis info` = "*analysis*", - `boundary info` = "*bound*", - `cell feature matrix` = "*cell_feature_matrix*", - `cell metadata` = "*cells*", - `image info` = "*tif", - `panel metadata` = "*panel*", - `raw transcript info` = "*transcripts*", - `experiment info (.xenium)` = "*.xenium" - ) - - dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x))) - dir_items_lengths <- lengths(dir_items) - - if (isTRUE(verbose)) { - message("Checking directory contents...") - for (item in names(dir_items)) { - # IF ITEM FOUND - - if (dir_items_lengths[[item]] > 0) { - message(ch$s, "> ", item, " found") - for (item_i in seq_along(dir_items[[item]])) { - # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) - message(ch$s, ch$s, ch$l, ch$h, ch$h, subItem) - } - } else { - # IF ITEM MISSING - # Based on workflow, determine if: - # necessary (error) - # optional (warning) - - if (data_to_use == "subcellular") { - # necessary items - if (item %in% c("boundary info", "raw transcript info")) - stop(item, " is missing") - # optional items - if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata")) - warning(item, " is missing (optional)") - # items to ignore: analysis info, cell feature matrix, - # cell metadata - } else if (data_to_use == "aggregate") { - # necessary items - if (item %in% c("cell feature matrix", "cell metadata")) - stop(item, " is missing") - # optional items - if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata", "analysis info")) - warning(item, " is missing (optional)") - # items to ignore: boundary info, raw transcript info - } - } - } - } - - - # 1. Select data to load - - - # **** transcript info **** - tx_path <- NULL - tx_path <- dir_items$`raw transcript info`[grepl( - pattern = load_format, dir_items$`raw transcript info`)] - # **** cell metadata **** - cell_meta_path <- NULL - cell_meta_path <- dir_items$`cell metadata`[grepl( - pattern = load_format, dir_items$`cell metadata`)] - - # **** boundary info **** - # Select bound load format - if (load_format != "zarr") { # No zarr available for boundary info - dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = load_format, dir_items$`boundary info`)] - } else { - dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = "csv", dir_items$`boundary info`)] - } - - # Organize bound paths by type of bound (bounds_to_load param) - bound_paths <- NULL - bound_names <- bounds_to_load - bounds_to_load <- as.list(bounds_to_load) - bound_paths <- lapply(bounds_to_load, function(x) dir_items$`boundary info`[ - grepl(pattern = x, dir_items$`boundary info`)]) - names(bound_paths) <- bound_names - - # **** aggregated expression info **** - agg_expr_path <- NULL - if (isTRUE(h5_expression)) { # h5 expression matrix loading is default - agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "h5", dir_items$`cell feature matrix`)] - } else if (load_format == "zarr") { - agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "zarr", dir_items$`cell feature matrix`)] - } else { # No parquet for aggregated expression - default to normal 10x loading - agg_expr_path <- dir_items$`cell feature matrix`[sapply( - dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x))] - if (length(agg_expr_path) == 0) { - stop(wrap_txt( - "Expression matrix cannot be loaded.\n - Has cell_feature_matrix(.tar.gz) been unpacked into a - directory?" - )) - } - } - if (data_to_use == "aggregate") { - if (length(path_list$agg_expr_path) == 0) { - stop(wrap_txt( - "Aggregated expression not found.\n - Please confirm h5_expression and load_format params are correct" - )) - } - } - - # **** panel info **** - panel_meta_path <- NULL - panel_meta_path <- dir_items$`panel metadata` - - - vmsg("Directory check done", .v = verbose) - - path_list <- list( - "tx_path" = tx_path, - "bound_paths" = bound_paths, - "cell_meta_path" = cell_meta_path, - "agg_expr_path" = agg_expr_path, - "panel_meta_path" = panel_meta_path - ) - - return(path_list) -} - -#' @title Load xenium data from folder -#' @name load_xenium_folder -#' @param path_list list of full filepaths from .read_xenium_folder -#' @inheritParams createGiottoXeniumObject -#' @returns list of loaded in xenium data -NULL - -#' @rdname load_xenium_folder -#' @keywords internal -.load_xenium_folder <- function( - path_list, - load_format = "csv", - data_to_use = "subcellular", - h5_expression = "FALSE", - h5_gene_ids = "symbols", - gene_column_index = 1, - cores, - verbose = TRUE -) { - data_list <- switch(load_format, - "csv" = .load_xenium_folder_csv( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "parquet" = .load_xenium_folder_parquet( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) - ) - - return(data_list) -} - - -#' @describeIn load_xenium_folder Load from csv files -#' @keywords internal -.load_xenium_folder_csv <- function( - path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE -) { - # initialize return vars - feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL - - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_path <- path_list$panel_meta_path[[1]] - fdata_ext <- GiottoUtils::file_extension(fdata_path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json(path = fdata_path, - gene_ids = h5_gene_ids) - } else { - feat_meta <- data.table::fread(fdata_path, nThread = cores) - colnames(feat_meta)[[1]] <- "feat_ID" - } - - # **** subcellular info **** - if (data_to_use == "subcellular") { - # append missing QC probe info to feat_meta - if (isTRUE(h5_expression)) { - h5 <- hdf5r::H5File$new(path_list$agg_expr_path) - tryCatch({ - root <- names(h5) - feature_id <- h5[[paste0(root, "/features/id")]][] - feature_info <- h5[[paste0(root, "/features/feature_type")]][] - feature_names <- h5[[paste0(root, "/features/name")]][] - features_dt <- data.table::data.table( - "id" = feature_id, - "name" = feature_names, - "feature_type" = feature_info - ) - }, finally = { - h5$close_all() - }) - } else { - features_dt <- data.table::fread( - paste0(path_list$agg_expr_path, "/features.tsv.gz"), - header = FALSE - ) - } - colnames(features_dt) <- c("id", "feat_ID", "feat_class") - feat_meta <- merge( - features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") - - GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) - tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) - data.table::setnames( - x = tx_dt, - old = c("feature_name", "x_location", "y_location"), - new = c("feat_ID", "x", "y") - ) - - GiottoUtils::vmsg("Loading boundary info...", .v = verbose) - bound_dt_list <- lapply( - path_list$bound_paths, - function(x) data.table::fread(x[[1]], nThread = cores) - ) - } - - # **** aggregate info **** - GiottoUtils::vmsg("loading cell metadata...", .v = verbose) - cell_meta <- data.table::fread( - path_list$cell_meta_path[[1]], nThread = cores) - - if (data_to_use == "aggregate") { - GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) - if (isTRUE(h5_expression)) { - agg_expr <- get10Xmatrix_h5( - path_to_data = path_list$agg_expr_path, - gene_ids = h5_gene_ids, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } else { - agg_expr <- get10Xmatrix( - path_to_data = path_list$agg_expr_path, - gene_column_index = gene_column_index, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } - } - - data_list <- list( - "feat_meta" = feat_meta, - "tx_dt" = tx_dt, - "bound_dt_list" = bound_dt_list, - "cell_meta" = cell_meta, - "agg_expr" = agg_expr - ) - - return(data_list) + return(g) } - -#' @describeIn load_xenium_folder Load from parquet files -#' @keywords internal -.load_xenium_folder_parquet <- function( - path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE -) { - # initialize return vars - feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL - # dplyr variable - cell_id <- NULL - - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_path <- path_list$panel_meta_path[[1]] - fdata_ext <- GiottoUtils::file_extension(fdata_path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json( - path = fdata_path, gene_ids = h5_gene_ids) - } else { - feat_meta <- data.table::fread(fdata_path, nThread = cores) - colnames(feat_meta)[[1]] <- "feat_ID" - } - - # **** subcellular info **** - if (data_to_use == "subcellular") { - # define for data.table - transcript_id <- feature_name <- NULL - - # append missing QC probe info to feat_meta - if (isTRUE(h5_expression)) { - h5 <- hdf5r::H5File$new(path_list$agg_expr_path) - tryCatch({ - root <- names(h5) - feature_id <- h5[[paste0(root, "/features/id")]][] - feature_info <- h5[[paste0(root, "/features/feature_type")]][] - feature_names <- h5[[paste0(root, "/features/name")]][] - features_dt <- data.table::data.table( - "id" = feature_id, - "name" = feature_names, - "feature_type" = feature_info - ) - }, finally = { - h5$close_all() - }) - } else { - features_dt <- arrow::read_tsv_arrow(paste0( - path_list$agg_expr_path, "/features.tsv.gz"), - col_names = FALSE - ) %>% - data.table::setDT() - } - colnames(features_dt) <- c("id", "feat_ID", "feat_class") - feat_meta <- merge(features_dt[ - , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") - - vmsg("Loading transcript level info...", .v = verbose) - tx_dt <- arrow::read_parquet( - file = path_list$tx_path[[1]], - as_data_frame = FALSE - ) %>% - dplyr::mutate( - transcript_id = cast(transcript_id, arrow::string())) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - dplyr::mutate( - feature_name = cast(feature_name, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - data.table::setnames( - x = tx_dt, - old = c("feature_name", "x_location", "y_location"), - new = c("feat_ID", "x", "y") - ) - vmsg("Loading boundary info...", .v = verbose) - bound_dt_list <- lapply(path_list$bound_paths, function(x) { - arrow::read_parquet(file = x[[1]], as_data_frame = FALSE) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - }) - } - # **** aggregate info **** - if (data_to_use == "aggregate") { - vmsg("Loading cell metadata...", .v = verbose) - cell_meta <- arrow::read_parquet( - file = path_list$cell_meta_path[[1]], - as_data_frame = FALSE - ) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - - # NOTE: no parquet for agg_expr. - vmsg("Loading aggregated expression...", .v = verbose) - if (isTRUE(h5_expression)) { - agg_expr <- get10Xmatrix_h5( - path_to_data = path_list$agg_expr_path, - gene_ids = h5_gene_ids, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } else { - agg_expr <- get10Xmatrix( - path_to_data = path_list$agg_expr_path, - gene_column_index = gene_column_index, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } - } - - data_list <- list( - "feat_meta" = feat_meta, - "tx_dt" = tx_dt, - "bound_dt_list" = bound_dt_list, - "cell_meta" = cell_meta, - "agg_expr" = agg_expr - ) - - return(data_list) -} +#' +#' #' @title Create 10x Xenium Giotto Object +#' #' @name createGiottoXeniumObject +#' #' @description Given the path to a Xenium experiment output folder, creates a +#' #' Giotto object +#' #' @param xenium_dir full path to the exported xenium directory +#' #' @param data_to_use which type(s) of expression data to build the gobject with +#' #' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') +#' #' @param load_format files formats from which to load the data. Either `csv` or +#' #' `parquet` currently supported. +#' #' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 +#' #' file. Default is \code{TRUE} +#' #' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene +#' #' expression matrix +#' #' @param bounds_to_load vector of boundary information to load +#' #' (e.g. \code{'cell'} +#' #' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both +#' #' at the same time.) +#' #' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' #' as a subcellular transcript detection (default = 20) +#' #' @param key_list (advanced) list of grep-based keywords to split the +#' #' subcellular feature detections by feature type. See details +#' #' @inheritParams get10Xmatrix +#' #' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' #' @returns giotto object +#' #' @details +#' #' +#' #' [\strong{QC feature types}] +#' #' Xenium provides info on feature detections that include more than only the +#' #' Gene Expression specific probes. Additional probes for QC are included: +#' #' \emph{blank codeword}, \emph{negative control codeword}, and +#' #' \emph{negative control probe}. These additional QC probes each occupy and +#' #' are treated as their own feature types so that they can largely remain +#' #' independent of the gene expression information. +#' #' +#' #' [\strong{key_list}] +#' #' Related to \code{data_to_use = 'subcellular'} workflow only: +#' #' Additional QC probe information is in the subcellular feature detections +#' #' information and must be separated from the gene expression information +#' #' during processing. +#' #' The QC probes have prefixes that allow them to be selected from the rest of +#' #' the feature IDs. +#' #' Giotto uses a named list of keywords (\code{key_list}) to select these QC +#' #' probes, with the list names being the names that will be assigned as the +#' #' feature type of these feature detections. The default list is used when +#' #' \code{key_list} = NULL. +#' #' +#' #' Default list: +#' #' \preformatted{ +#' #' list(blank_code = 'BLANK_', +#' #' neg_code = 'NegControlCodeword_', +#' #' neg_probe = c('NegControlProbe_|antisense_')) +#' #' } +#' #' +#' #' The Gene expression subset is accepted as the subset of feat_IDs that do not +#' #' map to any of the keys. +#' #' +#' #' @export +#' createGiottoXeniumObject <- function( +#' xenium_dir, +#' data_to_use = c("subcellular", "aggregate"), +#' load_format = "csv", +#' h5_expression = TRUE, +#' h5_gene_ids = c("symbols", "ensembl"), +#' gene_column_index = 1, +#' bounds_to_load = c("cell"), +#' qv_threshold = 20, +#' key_list = NULL, +#' instructions = NULL, +#' cores = NA, +#' verbose = TRUE +#' ) { +#' # 0. setup +#' xenium_dir <- path.expand(xenium_dir) +#' +#' # Determine data to load +#' data_to_use <- match.arg( +#' arg = data_to_use, choices = c("subcellular", "aggregate")) +#' +#' # Determine load formats +#' load_format <- "csv" # TODO Remove this and add as param once other options +#' # are available +#' load_format <- match.arg( +#' arg = load_format, choices = c("csv", "parquet", "zarr")) +#' +#' # set number of cores automatically, but with limit of 10 +#' cores <- determine_cores(cores) +#' data.table::setDTthreads(threads = cores) +#' +#' # 1. detect xenium folder and find filepaths to load +#' +#' # path_list contents: +#' # tx_path +#' # bound_paths +#' # cell_meta_path +#' # agg_expr_path +#' # panel_meta_path +#' path_list <- .read_xenium_folder( +#' xenium_dir = xenium_dir, +#' data_to_use = data_to_use, +#' bounds_to_load = bounds_to_load, +#' load_format = load_format, +#' h5_expression = h5_expression, +#' verbose = verbose +#' ) +#' +#' +#' # 2. load in data +#' +#' # data_list contents: +#' # feat_meta +#' # tx_dt +#' # bound_dt_list +#' # cell_meta +#' # agg_expr +#' data_list <- .load_xenium_folder( +#' path_list = path_list, +#' load_format = load_format, +#' data_to_use = data_to_use, +#' h5_expression = h5_expression, +#' h5_gene_ids = h5_gene_ids, +#' gene_column_index = gene_column_index, +#' cores = cores, +#' verbose = verbose +#' ) +#' +#' +#' # TODO load images +#' +#' +#' # 3. Create giotto objects +#' +#' if (data_to_use == "subcellular") { +#' # ** feat type search keys ** +#' if (is.null(key_list)) { +#' key_list <- list( +#' blank_code = "BLANK_", +#' neg_code = "NegControlCodeword_", +#' neg_probe = c("NegControlProbe_|antisense_") +#' ) +#' } +#' +#' # needed: +#' # feat_meta +#' # tx_dt +#' # bound_dt_list +#' xenium_gobject <- .createGiottoXeniumObject_subcellular( +#' data_list = data_list, +#' qv_threshold = qv_threshold, +#' key_list = key_list, +#' instructions = instructions, +#' cores = cores, +#' verbose = verbose +#' ) +#' } +#' +#' if (data_to_use == "aggregate") { +#' # needed: +#' # feat_meta +#' # cell_meta +#' # agg_expr +#' # optional? +#' # tx_dt +#' # bound_dt_list +#' xenium_gobject <- .createGiottoXeniumObject_aggregate( +#' data_list = data_list, +#' instructions = instructions, +#' cores = cores, +#' verbose = verbose +#' ) +#' } +#' +#' return(xenium_gobject) +#' } +#' +#' +#' +#' +#' #' @title Create a Xenium Giotto object from subcellular info +#' #' @name .createGiottoXeniumObject_subcellular +#' #' @description Subcellular workflow for createGiottoXeniumObject +#' #' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} +#' #' @param key_list regex-based search keys for feature IDs to allow separation +#' #' into separate giottoPoints objects by feat_type +#' #' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' #' as a subcellular transcript detection (default = 20) +#' #' @inheritParams get10Xmatrix +#' #' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' #' @returns giotto object +#' #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate +#' #' @keywords internal +#' .createGiottoXeniumObject_subcellular <- function( +#' data_list, +#' key_list = NULL, +#' qv_threshold = 20, +#' instructions = NULL, +#' cores = NA, +#' verbose = TRUE +#' ) { +#' # data.table vars +#' qv <- NULL +#' +#' # Unpack data_list info +#' feat_meta <- data_list$feat_meta +#' tx_dt <- data_list$tx_dt +#' bound_dt_list <- data_list$bound_dt_list +#' +#' # define for data.table +#' cell_id <- feat_ID <- feature_name <- NULL +#' +#' vmsg("Building subcellular giotto object...", .v = verbose) +#' # Giotto points object +#' vmsg("> points data prep...", .v = verbose) +#' +#' # filter by qv_threshold +#' vmsg("> filtering feature detections for Phred score >= ", +#' qv_threshold, .v = verbose) +#' n_before <- tx_dt[, .N] +#' tx_dt_filtered <- tx_dt[qv >= qv_threshold] +#' n_after <- tx_dt_filtered[, .N] +#' +#' if (verbose) { +#' cat( +#' "Number of feature points removed: ", +#' n_before - n_after, +#' " out of ", n_before, "\n" +#' ) +#' } +#' +#' vmsg("> splitting detections by feat_type", .v = verbose) +#' # discover feat_IDs for each feat_type +#' all_IDs <- tx_dt_filtered[, unique(feat_ID)] +#' feat_types_IDs <- lapply( +#' key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) +#' rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) +#' feat_types_IDs <- append(rna, feat_types_IDs) +#' +#' # separate detections by feature type +#' points_list <- lapply( +#' feat_types_IDs, +#' function(types) { +#' tx_dt_filtered[feat_ID %in% types] +#' } +#' ) +#' +#' # Giotto polygons object +#' vmsg("> polygons data prep...", .v = verbose) +#' polys_list <- lapply( +#' bound_dt_list, +#' function(bound_type) { +#' bound_type[, cell_id := as.character(cell_id)] +#' } +#' ) +#' +#' xenium_gobject <- createGiottoObjectSubcellular( +#' gpoints = points_list, +#' gpolygons = polys_list, +#' instructions = instructions, +#' cores = cores, +#' verbose = verbose +#' ) +#' +#' # generate centroids +#' vmsg("Calculating polygon centroids...", .v = verbose) +#' xenium_gobject <- addSpatialCentroidLocations( +#' xenium_gobject, +#' poly_info = c(names(bound_dt_list)), +#' provenance = as.list(names(bound_dt_list)) +#' ) +#' +#' return(xenium_gobject) +#' } +#' +#' +#' +#' +#' +#' #' @title Create a Xenium Giotto object from aggregate info +#' #' @name .createGiottoXeniumObject_aggregate +#' #' @description Aggregate workflow for createGiottoXeniumObject +#' #' @param data_list list of data loaded by \code{.load_xenium_folder} +#' #' @inheritParams get10Xmatrix +#' #' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' #' @returns giotto object +#' #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular +#' #' @keywords internal +#' .createGiottoXeniumObject_aggregate <- function( +#' data_list, +#' # include_analysis = FALSE, +#' instructions = NULL, +#' cores = NA, +#' verbose = TRUE +#' ) { +#' # Unpack data_list info +#' feat_meta <- data_list$feat_meta +#' cell_meta <- data_list$cell_meta +#' agg_expr <- data_list$agg_expr +#' +#' # define for data.table +#' cell_ID <- x_centroid <- y_centroid <- NULL +#' +#' # clean up names for aggregate matrices +#' names(agg_expr) <- gsub(pattern = " ", replacement = "_", names(agg_expr)) +#' geneExpMat <- which(names(agg_expr) == "Gene_Expression") +#' names(agg_expr)[[geneExpMat]] <- "raw" +#' +#' # set cell_id as character +#' cell_meta <- cell_meta[, data.table::setnames(.SD, "cell_id", "cell_ID")] +#' cell_meta <- cell_meta[, cell_ID := as.character(cell_ID)] +#' +#' # set up spatial locations +#' agg_spatlocs <- cell_meta[, .(x_centroid, y_centroid, cell_ID)] +#' +#' # set up metadata +#' agg_meta <- cell_meta[, !c("x_centroid", "y_centroid")] +#' +#' vmsg("Building aggregate giotto object...", .v = verbose) +#' xenium_gobject <- createGiottoObject( +#' expression = agg_expr, +#' spatial_locs = agg_spatlocs, +#' instructions = instructions, +#' cores = cores, +#' verbose = verbose +#' ) +#' +#' # append aggregate metadata +#' xenium_gobject <- addCellMetadata( +#' gobject = xenium_gobject, +#' new_metadata = agg_meta, +#' by_column = TRUE, +#' column_cell_ID = "cell_ID" +#' ) +#' xenium_gobject <- addFeatMetadata( +#' gobject = xenium_gobject, +#' new_metadata = feat_meta, +#' by_column = TRUE, +#' column_feat_ID = "feat_ID" +#' ) +#' +#' return(xenium_gobject) +#' } +#' +#' +#' +#' +#' #' @title Read a structured xenium folder +#' #' @name .read_xenium_folder +#' #' @inheritParams createGiottoXeniumObject +#' #' @keywords internal +#' #' @returns path_list a list of xenium files discovered and their filepaths. NULL +#' #' values denote missing items +#' .read_xenium_folder <- function( +#' xenium_dir, +#' data_to_use = "subcellular", +#' bounds_to_load = c("cell"), +#' load_format = "csv", +#' h5_expression = FALSE, +#' verbose = TRUE +#' ) { +#' # Check needed packages +#' if (load_format == "parquet") { +#' package_check(pkg_name = "arrow", repository = "CRAN") +#' package_check(pkg_name = "dplyr", repository = "CRAN") +#' } +#' if (isTRUE(h5_expression)) { +#' package_check(pkg_name = "hdf5r", repository = "CRAN") +#' } +#' +#' ch <- box_chars() +#' +#' +#' # 0. test if folder structure exists and is as expected +#' +#' +#' if (is.null(xenium_dir) | !dir.exists(xenium_dir)) +#' stop("The full path to a xenium directory must be given.") +#' vmsg("A structured Xenium directory will be used\n", .v = verbose) +#' +#' # find items (length = 1 if present, length = 0 if missing) +#' dir_items <- list( +#' `analysis info` = "*analysis*", +#' `boundary info` = "*bound*", +#' `cell feature matrix` = "*cell_feature_matrix*", +#' `cell metadata` = "*cells*", +#' `image info` = "*tif", +#' `panel metadata` = "*panel*", +#' `raw transcript info` = "*transcripts*", +#' `experiment info (.xenium)` = "*.xenium" +#' ) +#' +#' dir_items <- lapply( +#' dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x))) +#' dir_items_lengths <- lengths(dir_items) +#' +#' if (isTRUE(verbose)) { +#' message("Checking directory contents...") +#' for (item in names(dir_items)) { +#' # IF ITEM FOUND +#' +#' if (dir_items_lengths[[item]] > 0) { +#' message(ch$s, "> ", item, " found") +#' for (item_i in seq_along(dir_items[[item]])) { +#' # print found item names +#' subItem <- gsub(pattern = ".*/", replacement = "", +#' x = dir_items[[item]][[item_i]]) +#' message(ch$s, ch$s, ch$l, ch$h, ch$h, subItem) +#' } +#' } else { +#' # IF ITEM MISSING +#' # Based on workflow, determine if: +#' # necessary (error) +#' # optional (warning) +#' +#' if (data_to_use == "subcellular") { +#' # necessary items +#' if (item %in% c("boundary info", "raw transcript info")) +#' stop(item, " is missing") +#' # optional items +#' if (item %in% c( +#' "image info", "experiment info (.xenium)", +#' "panel metadata")) +#' warning(item, " is missing (optional)") +#' # items to ignore: analysis info, cell feature matrix, +#' # cell metadata +#' } else if (data_to_use == "aggregate") { +#' # necessary items +#' if (item %in% c("cell feature matrix", "cell metadata")) +#' stop(item, " is missing") +#' # optional items +#' if (item %in% c( +#' "image info", "experiment info (.xenium)", +#' "panel metadata", "analysis info")) +#' warning(item, " is missing (optional)") +#' # items to ignore: boundary info, raw transcript info +#' } +#' } +#' } +#' } +#' +#' +#' # 1. Select data to load +#' +#' +#' # **** transcript info **** +#' tx_path <- NULL +#' tx_path <- dir_items$`raw transcript info`[grepl( +#' pattern = load_format, dir_items$`raw transcript info`)] +#' # **** cell metadata **** +#' cell_meta_path <- NULL +#' cell_meta_path <- dir_items$`cell metadata`[grepl( +#' pattern = load_format, dir_items$`cell metadata`)] +#' +#' # **** boundary info **** +#' # Select bound load format +#' if (load_format != "zarr") { # No zarr available for boundary info +#' dir_items$`boundary info` <- dir_items$`boundary info`[grepl( +#' pattern = load_format, dir_items$`boundary info`)] +#' } else { +#' dir_items$`boundary info` <- dir_items$`boundary info`[grepl( +#' pattern = "csv", dir_items$`boundary info`)] +#' } +#' +#' # Organize bound paths by type of bound (bounds_to_load param) +#' bound_paths <- NULL +#' bound_names <- bounds_to_load +#' bounds_to_load <- as.list(bounds_to_load) +#' bound_paths <- lapply(bounds_to_load, function(x) dir_items$`boundary info`[ +#' grepl(pattern = x, dir_items$`boundary info`)]) +#' names(bound_paths) <- bound_names +#' +#' # **** aggregated expression info **** +#' agg_expr_path <- NULL +#' if (isTRUE(h5_expression)) { # h5 expression matrix loading is default +#' agg_expr_path <- dir_items$`cell feature matrix`[grepl( +#' pattern = "h5", dir_items$`cell feature matrix`)] +#' } else if (load_format == "zarr") { +#' agg_expr_path <- dir_items$`cell feature matrix`[grepl( +#' pattern = "zarr", dir_items$`cell feature matrix`)] +#' } else { # No parquet for aggregated expression - default to normal 10x loading +#' agg_expr_path <- dir_items$`cell feature matrix`[sapply( +#' dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x))] +#' if (length(agg_expr_path) == 0) { +#' stop(wrap_txt( +#' "Expression matrix cannot be loaded.\n +#' Has cell_feature_matrix(.tar.gz) been unpacked into a +#' directory?" +#' )) +#' } +#' } +#' if (data_to_use == "aggregate") { +#' if (length(path_list$agg_expr_path) == 0) { +#' stop(wrap_txt( +#' "Aggregated expression not found.\n +#' Please confirm h5_expression and load_format params are correct" +#' )) +#' } +#' } +#' +#' # **** panel info **** +#' panel_meta_path <- NULL +#' panel_meta_path <- dir_items$`panel metadata` +#' +#' +#' vmsg("Directory check done", .v = verbose) +#' +#' path_list <- list( +#' "tx_path" = tx_path, +#' "bound_paths" = bound_paths, +#' "cell_meta_path" = cell_meta_path, +#' "agg_expr_path" = agg_expr_path, +#' "panel_meta_path" = panel_meta_path +#' ) +#' +#' return(path_list) +#' } +#' +#' #' @title Load xenium data from folder +#' #' @name load_xenium_folder +#' #' @param path_list list of full filepaths from .read_xenium_folder +#' #' @inheritParams createGiottoXeniumObject +#' #' @returns list of loaded in xenium data +#' NULL +#' +#' #' @rdname load_xenium_folder +#' #' @keywords internal +#' .load_xenium_folder <- function( +#' path_list, +#' load_format = "csv", +#' data_to_use = "subcellular", +#' h5_expression = "FALSE", +#' h5_gene_ids = "symbols", +#' gene_column_index = 1, +#' cores, +#' verbose = TRUE +#' ) { +#' data_list <- switch(load_format, +#' "csv" = .load_xenium_folder_csv( +#' path_list = path_list, +#' data_to_use = data_to_use, +#' h5_expression = h5_expression, +#' h5_gene_ids = h5_gene_ids, +#' gene_column_index = gene_column_index, +#' cores = cores, +#' verbose = verbose +#' ), +#' "parquet" = .load_xenium_folder_parquet( +#' path_list = path_list, +#' data_to_use = data_to_use, +#' h5_expression = h5_expression, +#' h5_gene_ids = h5_gene_ids, +#' gene_column_index = gene_column_index, +#' cores = cores, +#' verbose = verbose +#' ), +#' "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) +#' ) +#' +#' return(data_list) +#' } +#' +#' +#' #' @describeIn load_xenium_folder Load from csv files +#' #' @keywords internal +#' .load_xenium_folder_csv <- function( +#' path_list, +#' cores, +#' data_to_use = "subcellular", +#' h5_expression = FALSE, +#' h5_gene_ids = "symbols", +#' gene_column_index = 1, +#' verbose = TRUE +#' ) { +#' # initialize return vars +#' feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL +#' +#' vmsg("Loading feature metadata...", .v = verbose) +#' # updated for pipeline v1.6 json format +#' fdata_path <- path_list$panel_meta_path[[1]] +#' fdata_ext <- GiottoUtils::file_extension(fdata_path) +#' if ("json" %in% fdata_ext) { +#' feat_meta <- .load_xenium_panel_json(path = fdata_path, +#' gene_ids = h5_gene_ids) +#' } else { +#' feat_meta <- data.table::fread(fdata_path, nThread = cores) +#' colnames(feat_meta)[[1]] <- "feat_ID" +#' } +#' +#' # **** subcellular info **** +#' if (data_to_use == "subcellular") { +#' # append missing QC probe info to feat_meta +#' if (isTRUE(h5_expression)) { +#' h5 <- hdf5r::H5File$new(path_list$agg_expr_path) +#' tryCatch({ +#' root <- names(h5) +#' feature_id <- h5[[paste0(root, "/features/id")]][] +#' feature_info <- h5[[paste0(root, "/features/feature_type")]][] +#' feature_names <- h5[[paste0(root, "/features/name")]][] +#' features_dt <- data.table::data.table( +#' "id" = feature_id, +#' "name" = feature_names, +#' "feature_type" = feature_info +#' ) +#' }, finally = { +#' h5$close_all() +#' }) +#' } else { +#' features_dt <- data.table::fread( +#' paste0(path_list$agg_expr_path, "/features.tsv.gz"), +#' header = FALSE +#' ) +#' } +#' colnames(features_dt) <- c("id", "feat_ID", "feat_class") +#' feat_meta <- merge( +#' features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") +#' +#' GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) +#' tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) +#' data.table::setnames( +#' x = tx_dt, +#' old = c("feature_name", "x_location", "y_location"), +#' new = c("feat_ID", "x", "y") +#' ) +#' +#' GiottoUtils::vmsg("Loading boundary info...", .v = verbose) +#' bound_dt_list <- lapply( +#' path_list$bound_paths, +#' function(x) data.table::fread(x[[1]], nThread = cores) +#' ) +#' } +#' +#' # **** aggregate info **** +#' GiottoUtils::vmsg("loading cell metadata...", .v = verbose) +#' cell_meta <- data.table::fread( +#' path_list$cell_meta_path[[1]], nThread = cores) +#' +#' if (data_to_use == "aggregate") { +#' GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) +#' if (isTRUE(h5_expression)) { +#' agg_expr <- get10Xmatrix_h5( +#' path_to_data = path_list$agg_expr_path, +#' gene_ids = h5_gene_ids, +#' remove_zero_rows = TRUE, +#' split_by_type = TRUE +#' ) +#' } else { +#' agg_expr <- get10Xmatrix( +#' path_to_data = path_list$agg_expr_path, +#' gene_column_index = gene_column_index, +#' remove_zero_rows = TRUE, +#' split_by_type = TRUE +#' ) +#' } +#' } +#' +#' data_list <- list( +#' "feat_meta" = feat_meta, +#' "tx_dt" = tx_dt, +#' "bound_dt_list" = bound_dt_list, +#' "cell_meta" = cell_meta, +#' "agg_expr" = agg_expr +#' ) +#' +#' return(data_list) +#' } +#' +#' +#' +#' +#' #' @describeIn load_xenium_folder Load from parquet files +#' #' @keywords internal +#' .load_xenium_folder_parquet <- function( +#' path_list, +#' cores, +#' data_to_use = "subcellular", +#' h5_expression = FALSE, +#' h5_gene_ids = "symbols", +#' gene_column_index = 1, +#' verbose = TRUE +#' ) { +#' # initialize return vars +#' feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL +#' # dplyr variable +#' cell_id <- NULL +#' +#' vmsg("Loading feature metadata...", .v = verbose) +#' # updated for pipeline v1.6 json format +#' fdata_path <- path_list$panel_meta_path[[1]] +#' fdata_ext <- GiottoUtils::file_extension(fdata_path) +#' if ("json" %in% fdata_ext) { +#' feat_meta <- .load_xenium_panel_json( +#' path = fdata_path, gene_ids = h5_gene_ids) +#' } else { +#' feat_meta <- data.table::fread(fdata_path, nThread = cores) +#' colnames(feat_meta)[[1]] <- "feat_ID" +#' } +#' +#' # **** subcellular info **** +#' if (data_to_use == "subcellular") { +#' # define for data.table +#' transcript_id <- feature_name <- NULL +#' +#' # append missing QC probe info to feat_meta +#' if (isTRUE(h5_expression)) { +#' h5 <- hdf5r::H5File$new(path_list$agg_expr_path) +#' tryCatch({ +#' root <- names(h5) +#' feature_id <- h5[[paste0(root, "/features/id")]][] +#' feature_info <- h5[[paste0(root, "/features/feature_type")]][] +#' feature_names <- h5[[paste0(root, "/features/name")]][] +#' features_dt <- data.table::data.table( +#' "id" = feature_id, +#' "name" = feature_names, +#' "feature_type" = feature_info +#' ) +#' }, finally = { +#' h5$close_all() +#' }) +#' } else { +#' features_dt <- arrow::read_tsv_arrow(paste0( +#' path_list$agg_expr_path, "/features.tsv.gz"), +#' col_names = FALSE +#' ) %>% +#' data.table::setDT() +#' } +#' colnames(features_dt) <- c("id", "feat_ID", "feat_class") +#' feat_meta <- merge(features_dt[ +#' , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") +#' +#' vmsg("Loading transcript level info...", .v = verbose) +#' tx_dt <- arrow::read_parquet( +#' file = path_list$tx_path[[1]], +#' as_data_frame = FALSE +#' ) %>% +#' dplyr::mutate( +#' transcript_id = cast(transcript_id, arrow::string())) %>% +#' dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% +#' dplyr::mutate( +#' feature_name = cast(feature_name, arrow::string())) %>% +#' as.data.frame() %>% +#' data.table::setDT() +#' data.table::setnames( +#' x = tx_dt, +#' old = c("feature_name", "x_location", "y_location"), +#' new = c("feat_ID", "x", "y") +#' ) +#' vmsg("Loading boundary info...", .v = verbose) +#' bound_dt_list <- lapply(path_list$bound_paths, function(x) { +#' arrow::read_parquet(file = x[[1]], as_data_frame = FALSE) %>% +#' dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% +#' as.data.frame() %>% +#' data.table::setDT() +#' }) +#' } +#' # **** aggregate info **** +#' if (data_to_use == "aggregate") { +#' vmsg("Loading cell metadata...", .v = verbose) +#' cell_meta <- arrow::read_parquet( +#' file = path_list$cell_meta_path[[1]], +#' as_data_frame = FALSE +#' ) %>% +#' dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% +#' as.data.frame() %>% +#' data.table::setDT() +#' +#' # NOTE: no parquet for agg_expr. +#' vmsg("Loading aggregated expression...", .v = verbose) +#' if (isTRUE(h5_expression)) { +#' agg_expr <- get10Xmatrix_h5( +#' path_to_data = path_list$agg_expr_path, +#' gene_ids = h5_gene_ids, +#' remove_zero_rows = TRUE, +#' split_by_type = TRUE +#' ) +#' } else { +#' agg_expr <- get10Xmatrix( +#' path_to_data = path_list$agg_expr_path, +#' gene_column_index = gene_column_index, +#' remove_zero_rows = TRUE, +#' split_by_type = TRUE +#' ) +#' } +#' } +#' +#' data_list <- list( +#' "feat_meta" = feat_meta, +#' "tx_dt" = tx_dt, +#' "bound_dt_list" = bound_dt_list, +#' "cell_meta" = cell_meta, +#' "agg_expr" = agg_expr +#' ) +#' +#' return(data_list) +#' } diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 41be6c19e..edcc625f3 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -6,64 +6,101 @@ \usage{ createGiottoXeniumObject( xenium_dir, - data_to_use = c("subcellular", "aggregate"), - load_format = "csv", - h5_expression = TRUE, - h5_gene_ids = c("symbols", "ensembl"), - gene_column_index = 1, - bounds_to_load = c("cell"), + transcript_path = NULL, + bounds_path = list(cell = "cell", nucleus = "nucleus"), + gene_panel_json_path = NULL, + expression_path = NULL, + metadata_path = NULL, + feat_type = c("rna", "NegControlProbe", "UnassignedCodeword", "NegControlCodeword"), + split_keyword = list("NegControlProbe", "UnassignedCodeword", "NegControlCodeword"), qv_threshold = 20, - key_list = NULL, - instructions = NULL, - cores = NA, - verbose = TRUE + load_images = NULL, + load_aligned_images = NULL, + load_expression = FALSE, + load_cellmeta = FALSE, + verbose = NULL ) } \arguments{ -\item{xenium_dir}{full path to the exported xenium directory} +\item{xenium_dir}{Full path to the exported xenium directory} -\item{data_to_use}{which type(s) of expression data to build the gobject with -(e.g. default: \strong{'subcellular'}, 'aggregate', or 'all')} +\item{transcript_path}{Optional. Filepath to desired transcripts file to +load. Either the `.parquet` or `.csv` files can be used.} -\item{load_format}{files formats from which to load the data. Either `csv` or -`parquet` currently supported.} +\item{bounds_path}{Optional. Named list of filepaths to desired Xenium +bounds/polygon files to load. Either the `.parquet` or `.csv` files can be +used. The default is to load the `.parquets` of both cell and nucleus.} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 -file. Default is \code{TRUE}} +\item{gene_panel_json_path}{Optional. Filepath to panel json. This json +contains feature metadata information and ENSG names.} -\item{h5_gene_ids}{use gene symbols (default) or ensembl ids for the .h5 gene -expression matrix} +\item{expression_path}{Optional. Filepath to cell feature matrix. Accepts +either the `.h5` or the unzipped directory containing `.mtx` files.} -\item{gene_column_index}{which column from the features or genes .tsv file -to use for row ids} +\item{metadata_path}{Optional. Filepath to `cells.csv.gz` or `cells.parquet` +which contain cell metadata information.} -\item{bounds_to_load}{vector of boundary information to load -(e.g. \code{'cell'} -or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both -at the same time.)} +\item{feat_type}{character. feature type. Provide more than one value if +using the `split_keyword` param. For each set of keywords to split by, an +additional feat_type should be provided in the same order. Affects how +the transcripts information is loaded. Helpful for separating out the +QC probes. See details.} + +\item{split_keyword}{list of character vectors of keywords to split the +transcripts based on their feat_ID. Keywords will be `grepl()` +matched against the feature IDs information. See details.} \item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} -\item{key_list}{(advanced) list of grep-based keywords to split the -subcellular feature detections by feature type. See details} +\item{load_images}{Named list of filepaths to `.tif` images, usually the +ones in the `morphology_focus` directory. These `ome.tif` images are not +compatible and must be converted to `tif` using +`[GiottoClass::ometif_to_tif()]`.} + +\item{load_aligned_images}{Named list of filepaths. The list names are used +as the image names when loaded. Two filepaths are expected per entry. The +first one should be to the `.tif` image. The second path is to the `.csv` +alignment matrix file. `ome.tif` images will work, but they are currently +slower in our imaging pipeline.} -\item{instructions}{list of instructions or output result -from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{load_expression}{logical. Default = FALSE. Whether to load in 10X +provided expression matrix.} -\item{cores}{how many cores or threads to use to read data if paths are -provided} +\item{load_cellmeta}{logical. Default = FALSE. Whether to laod in 10X +provided cell metadata information} -\item{verbose}{be verbose when building Giotto object} +\item{verbose}{logical or NULL. NULL uses the `giotto.verbose` option +setting and defaults to TRUE.} } \value{ -giotto object +`giotto` object } \description{ -Given the path to a Xenium experiment output folder, creates a -Giotto object +Create a Giotto object from a Xenium experiment output folder. +Only the `xenium_dir`, `load_images`, and `load_aligned_images` params +need to be supplied when defaults are sufficient. All other params have +defaults set and are there in case of non-standard directory layouts or +alternative preference in file format to load from.\cr +When possible, `.parquet` files are loaded. This requires the additional +installation of \pkg{arrow} with zstd support. See details. `h5` is also +used by default if the 10x provided expression matrix is loaded.\cr +The 10X provided aggregated expression matrix and cell metdata are not +loaded by default since the results may be slightly different from those +that Giotto spatially aggregates. } \details{ +[\strong{arrow zstd support}] +Xenium parquets have zstd compression. \pkg{arrow} is used to access +parquets, however it may not install on all systems with zstd by default. +You can check whether zstd support is installed by running: +`arrow::arrow_info()$capabilities[["zstd"]]`. If `FALSE`, it needs to be +reinstalled with the following: +\preformatted{ + Sys.setenv(ARROW_WITH_ZSTD = "ON") + install.packages("arrow", repos = c("https://apache.r-universe.dev")) +} + [\strong{QC feature types}] Xenium provides info on feature detections that include more than only the Gene Expression specific probes. Additional probes for QC are included: @@ -72,25 +109,16 @@ Gene Expression specific probes. Additional probes for QC are included: are treated as their own feature types so that they can largely remain independent of the gene expression information. -[\strong{key_list}] -Related to \code{data_to_use = 'subcellular'} workflow only: +[\strong{feat_type and split_keyword}] Additional QC probe information is in the subcellular feature detections information and must be separated from the gene expression information during processing. The QC probes have prefixes that allow them to be selected from the rest of the feature IDs. -Giotto uses a named list of keywords (\code{key_list}) to select these QC -probes, with the list names being the names that will be assigned as the -feature type of these feature detections. The default list is used when -\code{key_list} = NULL. - -Default list: -\preformatted{ - list(blank_code = 'BLANK_', - neg_code = 'NegControlCodeword_', - neg_probe = c('NegControlProbe_|antisense_')) -} +Giotto uses `feat_type` and `split_keyword` params to select these QC +probes out as separate feature types. See examples in +`[GiottoClass::createGiottoPoints]` for how this works. -The Gene expression subset is accepted as the subset of feat_IDs that do not -map to any of the keys. +The Gene expression subset labeled as `rna` is accepted as the subset of +feat_IDs that do not get matched to any of the `split_keywords`. } diff --git a/man/dot-createGiottoXeniumObject_aggregate.Rd b/man/dot-createGiottoXeniumObject_aggregate.Rd deleted file mode 100644 index 5baa80496..000000000 --- a/man/dot-createGiottoXeniumObject_aggregate.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience_xenium.R -\name{.createGiottoXeniumObject_aggregate} -\alias{.createGiottoXeniumObject_aggregate} -\title{Create a Xenium Giotto object from aggregate info} -\usage{ -.createGiottoXeniumObject_aggregate( - data_list, - instructions = NULL, - cores = NA, - verbose = TRUE -) -} -\arguments{ -\item{data_list}{list of data loaded by \code{.load_xenium_folder}} - -\item{instructions}{list of instructions or output result -from \code{\link[GiottoClass]{createGiottoInstructions}}} - -\item{cores}{how many cores or threads to use to read data if paths are -provided} - -\item{verbose}{be verbose when building Giotto object} -} -\value{ -giotto object -} -\description{ -Aggregate workflow for createGiottoXeniumObject -} -\seealso{ -createGiottoXeniumObject .createGiottoXeniumObject_subcellular -} -\keyword{internal} diff --git a/man/dot-createGiottoXeniumObject_subcellular.Rd b/man/dot-createGiottoXeniumObject_subcellular.Rd deleted file mode 100644 index b7e564a92..000000000 --- a/man/dot-createGiottoXeniumObject_subcellular.Rd +++ /dev/null @@ -1,42 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience_xenium.R -\name{.createGiottoXeniumObject_subcellular} -\alias{.createGiottoXeniumObject_subcellular} -\title{Create a Xenium Giotto object from subcellular info} -\usage{ -.createGiottoXeniumObject_subcellular( - data_list, - key_list = NULL, - qv_threshold = 20, - instructions = NULL, - cores = NA, - verbose = TRUE -) -} -\arguments{ -\item{data_list}{list of data loaded by \code{\link{.load_xenium_folder}}} - -\item{key_list}{regex-based search keys for feature IDs to allow separation -into separate giottoPoints objects by feat_type} - -\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included -as a subcellular transcript detection (default = 20)} - -\item{instructions}{list of instructions or output result -from \code{\link[GiottoClass]{createGiottoInstructions}}} - -\item{cores}{how many cores or threads to use to read data if paths are -provided} - -\item{verbose}{be verbose when building Giotto object} -} -\value{ -giotto object -} -\description{ -Subcellular workflow for createGiottoXeniumObject -} -\seealso{ -createGiottoXeniumObject .createGiottoXeniumObject_aggregate -} -\keyword{internal} diff --git a/man/dot-read_xenium_folder.Rd b/man/dot-read_xenium_folder.Rd deleted file mode 100644 index f0e5dfda3..000000000 --- a/man/dot-read_xenium_folder.Rd +++ /dev/null @@ -1,42 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience_xenium.R -\name{.read_xenium_folder} -\alias{.read_xenium_folder} -\title{Read a structured xenium folder} -\usage{ -.read_xenium_folder( - xenium_dir, - data_to_use = "subcellular", - bounds_to_load = c("cell"), - load_format = "csv", - h5_expression = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{xenium_dir}{full path to the exported xenium directory} - -\item{data_to_use}{which type(s) of expression data to build the gobject with -(e.g. default: \strong{'subcellular'}, 'aggregate', or 'all')} - -\item{bounds_to_load}{vector of boundary information to load -(e.g. \code{'cell'} -or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both -at the same time.)} - -\item{load_format}{files formats from which to load the data. Either `csv` or -`parquet` currently supported.} - -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 -file. Default is \code{TRUE}} - -\item{verbose}{be verbose when building Giotto object} -} -\value{ -path_list a list of xenium files discovered and their filepaths. NULL -values denote missing items -} -\description{ -Read a structured xenium folder -} -\keyword{internal} diff --git a/man/load_xenium_folder.Rd b/man/load_xenium_folder.Rd deleted file mode 100644 index fb2cd8951..000000000 --- a/man/load_xenium_folder.Rd +++ /dev/null @@ -1,77 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience_xenium.R -\name{load_xenium_folder} -\alias{load_xenium_folder} -\alias{.load_xenium_folder} -\alias{.load_xenium_folder_csv} -\alias{.load_xenium_folder_parquet} -\title{Load xenium data from folder} -\usage{ -.load_xenium_folder( - path_list, - load_format = "csv", - data_to_use = "subcellular", - h5_expression = "FALSE", - h5_gene_ids = "symbols", - gene_column_index = 1, - cores, - verbose = TRUE -) - -.load_xenium_folder_csv( - path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE -) - -.load_xenium_folder_parquet( - path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE -) -} -\arguments{ -\item{path_list}{list of full filepaths from .read_xenium_folder} - -\item{load_format}{files formats from which to load the data. Either `csv` or -`parquet` currently supported.} - -\item{data_to_use}{which type(s) of expression data to build the gobject with -(e.g. default: \strong{'subcellular'}, 'aggregate', or 'all')} - -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 -file. Default is \code{TRUE}} - -\item{h5_gene_ids}{use gene symbols (default) or ensembl ids for the .h5 gene -expression matrix} - -\item{gene_column_index}{which column from the features or genes .tsv file -to use for row ids} - -\item{cores}{how many cores or threads to use to read data if paths are -provided} - -\item{verbose}{be verbose when building Giotto object} -} -\value{ -list of loaded in xenium data -} -\description{ -Load xenium data from folder -} -\section{Functions}{ -\itemize{ -\item \code{.load_xenium_folder_csv()}: Load from csv files - -\item \code{.load_xenium_folder_parquet()}: Load from parquet files - -}} -\keyword{internal} From 5bf13dad05c3d1ace4eeebb94ca54c02009d72e4 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:11:44 -0400 Subject: [PATCH 131/150] chore: use @md tag --- R/convenience_xenium.R | 1 + man/createGiottoXeniumObject.Rd | 52 ++++++++++++++++----------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 25bf811c0..ebee56c0d 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -1232,6 +1232,7 @@ importXenium <- function( #' The Gene expression subset labeled as `rna` is accepted as the subset of #' feat_IDs that do not get matched to any of the `split_keywords`. #' +#' @md #' @export createGiottoXeniumObject <- function( xenium_dir, diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index edcc625f3..14a2fb4d4 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -25,43 +25,43 @@ createGiottoXeniumObject( \item{xenium_dir}{Full path to the exported xenium directory} \item{transcript_path}{Optional. Filepath to desired transcripts file to -load. Either the `.parquet` or `.csv` files can be used.} +load. Either the \code{.parquet} or \code{.csv} files can be used.} \item{bounds_path}{Optional. Named list of filepaths to desired Xenium -bounds/polygon files to load. Either the `.parquet` or `.csv` files can be -used. The default is to load the `.parquets` of both cell and nucleus.} +bounds/polygon files to load. Either the \code{.parquet} or \code{.csv} files can be +used. The default is to load the \code{.parquets} of both cell and nucleus.} \item{gene_panel_json_path}{Optional. Filepath to panel json. This json contains feature metadata information and ENSG names.} \item{expression_path}{Optional. Filepath to cell feature matrix. Accepts -either the `.h5` or the unzipped directory containing `.mtx` files.} +either the \code{.h5} or the unzipped directory containing \code{.mtx} files.} -\item{metadata_path}{Optional. Filepath to `cells.csv.gz` or `cells.parquet` +\item{metadata_path}{Optional. Filepath to \code{cells.csv.gz} or \code{cells.parquet} which contain cell metadata information.} \item{feat_type}{character. feature type. Provide more than one value if -using the `split_keyword` param. For each set of keywords to split by, an +using the \code{split_keyword} param. For each set of keywords to split by, an additional feat_type should be provided in the same order. Affects how the transcripts information is loaded. Helpful for separating out the QC probes. See details.} \item{split_keyword}{list of character vectors of keywords to split the -transcripts based on their feat_ID. Keywords will be `grepl()` +transcripts based on their feat_ID. Keywords will be \code{grepl()} matched against the feature IDs information. See details.} \item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} -\item{load_images}{Named list of filepaths to `.tif` images, usually the -ones in the `morphology_focus` directory. These `ome.tif` images are not -compatible and must be converted to `tif` using -`[GiottoClass::ometif_to_tif()]`.} +\item{load_images}{Named list of filepaths to \code{.tif} images, usually the +ones in the \code{morphology_focus} directory. These \code{ome.tif} images are not +compatible and must be converted to \code{tif} using +\verb{[GiottoClass::ometif_to_tif()]}.} \item{load_aligned_images}{Named list of filepaths. The list names are used as the image names when loaded. Two filepaths are expected per entry. The -first one should be to the `.tif` image. The second path is to the `.csv` -alignment matrix file. `ome.tif` images will work, but they are currently +first one should be to the \code{.tif} image. The second path is to the \code{.csv} +alignment matrix file. \code{ome.tif} images will work, but they are currently slower in our imaging pipeline.} \item{load_expression}{logical. Default = FALSE. Whether to load in 10X @@ -70,38 +70,38 @@ provided expression matrix.} \item{load_cellmeta}{logical. Default = FALSE. Whether to laod in 10X provided cell metadata information} -\item{verbose}{logical or NULL. NULL uses the `giotto.verbose` option +\item{verbose}{logical or NULL. NULL uses the \code{giotto.verbose} option setting and defaults to TRUE.} } \value{ -`giotto` object +\code{giotto} object } \description{ Create a Giotto object from a Xenium experiment output folder. -Only the `xenium_dir`, `load_images`, and `load_aligned_images` params +Only the \code{xenium_dir}, \code{load_images}, and \code{load_aligned_images} params need to be supplied when defaults are sufficient. All other params have defaults set and are there in case of non-standard directory layouts or alternative preference in file format to load from.\cr -When possible, `.parquet` files are loaded. This requires the additional -installation of \pkg{arrow} with zstd support. See details. `h5` is also +When possible, \code{.parquet} files are loaded. This requires the additional +installation of \pkg{arrow} with zstd support. See details. \code{h5} is also used by default if the 10x provided expression matrix is loaded.\cr The 10X provided aggregated expression matrix and cell metdata are not loaded by default since the results may be slightly different from those that Giotto spatially aggregates. } \details{ -[\strong{arrow zstd support}] +\link[=\\strong{arrow zstd support}]{\strong{arrow zstd support}} Xenium parquets have zstd compression. \pkg{arrow} is used to access parquets, however it may not install on all systems with zstd by default. You can check whether zstd support is installed by running: -`arrow::arrow_info()$capabilities[["zstd"]]`. If `FALSE`, it needs to be +\code{arrow::arrow_info()$capabilities[["zstd"]]}. If \code{FALSE}, it needs to be reinstalled with the following: \preformatted{ Sys.setenv(ARROW_WITH_ZSTD = "ON") install.packages("arrow", repos = c("https://apache.r-universe.dev")) } -[\strong{QC feature types}] +\link[=\\strong{QC feature types}]{\strong{QC feature types}} Xenium provides info on feature detections that include more than only the Gene Expression specific probes. Additional probes for QC are included: \emph{blank codeword}, \emph{negative control codeword}, and @@ -109,16 +109,16 @@ Gene Expression specific probes. Additional probes for QC are included: are treated as their own feature types so that they can largely remain independent of the gene expression information. -[\strong{feat_type and split_keyword}] +\link[=\\strong{feat_type and split_keyword}]{\strong{feat_type and split_keyword}} Additional QC probe information is in the subcellular feature detections information and must be separated from the gene expression information during processing. The QC probes have prefixes that allow them to be selected from the rest of the feature IDs. -Giotto uses `feat_type` and `split_keyword` params to select these QC +Giotto uses \code{feat_type} and \code{split_keyword} params to select these QC probes out as separate feature types. See examples in -`[GiottoClass::createGiottoPoints]` for how this works. +\verb{[GiottoClass::createGiottoPoints]} for how this works. -The Gene expression subset labeled as `rna` is accepted as the subset of -feat_IDs that do not get matched to any of the `split_keywords`. +The Gene expression subset labeled as \code{rna} is accepted as the subset of +feat_IDs that do not get matched to any of the \code{split_keywords}. } From 8239661befba1894c624098f164fc8de6e093ed8 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:14:49 -0400 Subject: [PATCH 132/150] chore: fix formatting --- R/convenience_xenium.R | 10 +++++----- man/createGiottoXeniumObject.Rd | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index ebee56c0d..32ebb99bc 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -1200,7 +1200,7 @@ importXenium <- function( #' @returns `giotto` object #' @details #' -#' [\strong{arrow zstd support}] +#' \[\strong{arrow zstd support}\] #' Xenium parquets have zstd compression. \pkg{arrow} is used to access #' parquets, however it may not install on all systems with zstd by default. #' You can check whether zstd support is installed by running: @@ -1211,7 +1211,7 @@ importXenium <- function( #' install.packages("arrow", repos = c("https://apache.r-universe.dev")) #' } #' -#' [\strong{QC feature types}] +#' \[\strong{QC feature types}\] #' Xenium provides info on feature detections that include more than only the #' Gene Expression specific probes. Additional probes for QC are included: #' \emph{blank codeword}, \emph{negative control codeword}, and @@ -1219,7 +1219,7 @@ importXenium <- function( #' are treated as their own feature types so that they can largely remain #' independent of the gene expression information. #' -#' [\strong{feat_type and split_keyword}] +#' \[\strong{feat_type and split_keyword}\] #' Additional QC probe information is in the subcellular feature detections #' information and must be separated from the gene expression information #' during processing. @@ -1313,7 +1313,7 @@ createGiottoXeniumObject <- function( #' #' @returns giotto object #' #' @details #' #' -#' #' [\strong{QC feature types}] +#' #' \[\strong{QC feature types}\] #' #' Xenium provides info on feature detections that include more than only the #' #' Gene Expression specific probes. Additional probes for QC are included: #' #' \emph{blank codeword}, \emph{negative control codeword}, and @@ -1321,7 +1321,7 @@ createGiottoXeniumObject <- function( #' #' are treated as their own feature types so that they can largely remain #' #' independent of the gene expression information. #' #' -#' #' [\strong{key_list}] +#' #' \[\strong{key_list}\] #' #' Related to \code{data_to_use = 'subcellular'} workflow only: #' #' Additional QC probe information is in the subcellular feature detections #' #' information and must be separated from the gene expression information diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 14a2fb4d4..51aadbab7 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -90,7 +90,7 @@ loaded by default since the results may be slightly different from those that Giotto spatially aggregates. } \details{ -\link[=\\strong{arrow zstd support}]{\strong{arrow zstd support}} +[\strong{arrow zstd support}] Xenium parquets have zstd compression. \pkg{arrow} is used to access parquets, however it may not install on all systems with zstd by default. You can check whether zstd support is installed by running: @@ -101,7 +101,7 @@ reinstalled with the following: install.packages("arrow", repos = c("https://apache.r-universe.dev")) } -\link[=\\strong{QC feature types}]{\strong{QC feature types}} +[\strong{QC feature types}] Xenium provides info on feature detections that include more than only the Gene Expression specific probes. Additional probes for QC are included: \emph{blank codeword}, \emph{negative control codeword}, and @@ -109,7 +109,7 @@ Gene Expression specific probes. Additional probes for QC are included: are treated as their own feature types so that they can largely remain independent of the gene expression information. -\link[=\\strong{feat_type and split_keyword}]{\strong{feat_type and split_keyword}} +[\strong{feat_type and split_keyword}] Additional QC probe information is in the subcellular feature detections information and must be separated from the gene expression information during processing. From 33a0eaf4f0bca4edc0357f3b2beb7d471f27bd9f Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:19:41 -0400 Subject: [PATCH 133/150] fix typo --- R/convenience_xenium.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 32ebb99bc..e8bd38d85 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -410,7 +410,7 @@ setMethod( split_keyword = split_keyword, verbose = verbose ) - g <- setGiotto(g, tx, verbose = FALSE) # lists are fine + g <- setGiotto(g, tx_list, verbose = FALSE) # lists are fine # polys if (!is.null(load_bounds)) { From f491e3893564a3b1866dcf9039d5439abfb4d0da Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:27:42 -0400 Subject: [PATCH 134/150] fix typo --- R/convenience_xenium.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index e8bd38d85..2caf9ea81 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -344,7 +344,7 @@ setMethod( ), gene_panel_json_path = panel_meta_path, expression_path = expr_path, - metadata_path = meta_path, + metadata_path = cell_meta_path, feat_type = c( "rna", "NegControlProbe", From 4a32833f3e316347205319758bc4e04533b982bd Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:55:07 -0400 Subject: [PATCH 135/150] fixes - add dropcols for centroids in metadata - fix feature type naming in expression matrices --- R/convenience_xenium.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 2caf9ea81..2a40d7ff1 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -249,7 +249,7 @@ setMethod( # load cellmeta cmeta_fun <- function( path = cell_meta_path, - dropcols = c(), + dropcols = c("x_centroid", "y_centroid"), cores = determine_cores(), verbose = NULL ) { @@ -967,14 +967,18 @@ importXenium <- function( # ensure list if (!inherits(ex_list, "list")) ex_list <- list(ex_list) + # set correct feature name + fname <- "rna" + if (length(fname) > 1L) fname <- names(fname) + fname[fname == "Gene Expression"] <- "rna" # lapply to process more than one if present - eo_list <- lapply(ex_list, function(ex) { + eo_list <- lapply(seq_along(ex_list), function(ex_i) { createExprObj( - expression_data = ex, + expression_data = ex_list[[ex_i]], name = "raw", spat_unit = "cell", - feat_type = "rna", + feat_type = fname[[ex_i]], provenance = "cell" ) }) From 9182a8a050865c48bb51e50edbcc81d033650806 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:56:11 -0400 Subject: [PATCH 136/150] fix refs to image loading funs --- R/convenience_xenium.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 2a40d7ff1..0218007c0 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -474,7 +474,7 @@ setMethod( imglist <- list() imnames <- names(load_images) for (impath_i in seq_along(load_images)) { - im <- load_image( + im <- funs$load_image( path = load_images[[impath_i]], name = imnames[[impath_i]] ) @@ -490,7 +490,7 @@ setMethod( aimglist <- list() aimnames <- names(load_aligned_images) for (aim_i in seq_along(load_aligned_images)) { - aim <- load_aligned_image( + aim <- funs$load_aligned_image( path = load_aligned_images[[aim_i]][1], imagealignment_path = load_aligned_images[[aim_i]][2] ) From 8d3a9e62837f9002ed2d3cc0c6f84c1df8b339ac Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:04:08 -0400 Subject: [PATCH 137/150] fix typo --- R/convenience_xenium.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 0218007c0..6d4972340 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -969,7 +969,7 @@ importXenium <- function( if (!inherits(ex_list, "list")) ex_list <- list(ex_list) # set correct feature name fname <- "rna" - if (length(fname) > 1L) fname <- names(fname) + if (length(names(ex_list)) > 1L) fname <- names(ex_list) fname[fname == "Gene Expression"] <- "rna" # lapply to process more than one if present From b9a50b848d628b77836b2de0af7774421c8984c1 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:21:30 -0400 Subject: [PATCH 138/150] fix and add aligned image naming --- R/convenience_xenium.R | 5 ++++- R/general_help.R | 12 ++++++++---- man/read10xAffineImage.Rd | 19 +++++++++++++++---- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 6d4972340..7d7fdb5ed 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -324,11 +324,13 @@ setMethod( img_aff_fun <- function( path, imagealignment_path, + name = "aligned_image", micron = obj@micron ) { read10xAffineImage( file = path, imagealignment_path = imagealignment_path, + name = name, micron = micron ) } @@ -492,7 +494,8 @@ setMethod( for (aim_i in seq_along(load_aligned_images)) { aim <- funs$load_aligned_image( path = load_aligned_images[[aim_i]][1], - imagealignment_path = load_aligned_images[[aim_i]][2] + imagealignment_path = load_aligned_images[[aim_i]][2], + name = aimnames[[aim_i]] ) aimglist <- c(aimglist, aim) } diff --git a/R/general_help.R b/R/general_help.R index f6fe38ea8..c10ae4ffb 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -727,14 +727,18 @@ get10Xmatrix_h5 <- function( #' transform. Loads the image in with an orientation that matches the dataset #' points and polygons vector information #' @param file filepath to image +#' @param imagealignment_path filepath to alignment file which contains +#' an affine transformation matrix. Usually a `.csv` file +#' @param name character. Name to assign the image. Default is 'image'. #' @param micron micron scaling. Directly used if a numeric is supplied. #' Also prefers a filepath to the `experiment.xenium` file which contains this #' info. A default of 0.2125 is provided. -#' @param affine filepath to `...imagealignment.csv` which contains an affine -#' transformation matrix +#' @param \dots additional params to pass to +#' `[GiottoClass::createGiottoLargeImage]` +#' @md #' @export read10xAffineImage <- function( - file, imagealignment_path, micron = 0.2125 + file, imagealignment_path, name = "aligned_image", micron = 0.2125, ... ) { checkmate::assert_file_exists(file) checkmate::assert_file_exists(imagealignment_path) @@ -746,7 +750,7 @@ read10xAffineImage <- function( aff <- data.table::fread(imagealignment_path) %>% as.matrix() - img <- createGiottoLargeImage(file) + img <- createGiottoLargeImage(file, name = name, ...) aff_img <- .tenx_img_affine(x = img, affine = aff, micron = micron) diff --git a/man/read10xAffineImage.Rd b/man/read10xAffineImage.Rd index b50226a50..b4681f097 100644 --- a/man/read10xAffineImage.Rd +++ b/man/read10xAffineImage.Rd @@ -4,17 +4,28 @@ \alias{read10xAffineImage} \title{read10xAffineImage} \usage{ -read10xAffineImage(file, imagealignment_path, micron = 0.2125) +read10xAffineImage( + file, + imagealignment_path, + name = "aligned_image", + micron = 0.2125, + ... +) } \arguments{ \item{file}{filepath to image} +\item{imagealignment_path}{filepath to alignment file which contains +an affine transformation matrix. Usually a \code{.csv} file} + +\item{name}{character. Name to assign the image. Default is 'image'.} + \item{micron}{micron scaling. Directly used if a numeric is supplied. -Also prefers a filepath to the `experiment.xenium` file which contains this +Also prefers a filepath to the \code{experiment.xenium} file which contains this info. A default of 0.2125 is provided.} -\item{affine}{filepath to `...imagealignment.csv` which contains an affine -transformation matrix} +\item{\dots}{additional params to pass to +\verb{[GiottoClass::createGiottoLargeImage]}} } \description{ Read a 10x image that is provided with an affine matrix From 2c1d95b98f25cd7ca5ad14bccab39e985239d4f7 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:29:22 -0400 Subject: [PATCH 139/150] messaging changes --- R/convenience_xenium.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 7d7fdb5ed..62bf61376 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -325,13 +325,15 @@ setMethod( path, imagealignment_path, name = "aligned_image", - micron = obj@micron + micron = obj@micron, + verbose = NULL ) { read10xAffineImage( file = path, imagealignment_path = imagealignment_path, name = name, - micron = micron + micron = micron, + verbose = verbose ) } obj@calls$load_aligned_image <- img_aff_fun @@ -492,6 +494,8 @@ setMethod( aimglist <- list() aimnames <- names(load_aligned_images) for (aim_i in seq_along(load_aligned_images)) { + vmsg(.v = verbose, "loading aligned image as '%s'", + aimnames[[aim_i]]) aim <- funs$load_aligned_image( path = load_aligned_images[[aim_i]][1], imagealignment_path = load_aligned_images[[aim_i]][2], @@ -815,7 +819,7 @@ importXenium <- function( e <- file_extension(path) %>% head(1L) %>% tolower() a <- list(path = path, dropcols = dropcols) - vmsg('Loading cell metadata...', .v = verbose) + vmsg('Loading 10X cell metadata...', .v = verbose) vmsg(.v = verbose, .is_debug = TRUE, "[CMETA_READ] FMT =", e) vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE From e5a6710757eed15dcf86b884dee0f667d4cd81f7 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:34:53 -0400 Subject: [PATCH 140/150] fix typo --- R/convenience_xenium.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 62bf61376..b2106f3f4 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -494,8 +494,8 @@ setMethod( aimglist <- list() aimnames <- names(load_aligned_images) for (aim_i in seq_along(load_aligned_images)) { - vmsg(.v = verbose, "loading aligned image as '%s'", - aimnames[[aim_i]]) + vmsg(.v = verbose, sprintf("loading aligned image as '%s'", + aimnames[[aim_i]])) aim <- funs$load_aligned_image( path = load_aligned_images[[aim_i]][1], imagealignment_path = load_aligned_images[[aim_i]][2], From 658fc72600e642910be3fdcec580264596acc957 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:42:38 -0400 Subject: [PATCH 141/150] update messages --- R/convenience_xenium.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index b2106f3f4..9e3ca4e4d 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -757,7 +757,7 @@ importXenium <- function( e <- file_extension(path) %>% head(1L) %>% tolower() a <- list(path = path) - vmsg("Loading boundary info...", .v = verbose) + vmsg(sprintf("Loading boundary info '%s'", name), .v = verbose) vmsg(.v = verbose, .is_debug = TRUE, "[POLY_READ] FMT =", e) vmsg(.v = verbose, .is_debug = TRUE, path) # pass to specific load function based on file extension From 4edc4578bdd4950308c750c4ae7534267236c386 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:56:53 -0400 Subject: [PATCH 142/150] fix optional params passing for xen conv. --- R/convenience_xenium.R | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 9e3ca4e4d..624110d92 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -1181,7 +1181,8 @@ importXenium <- function( #' contains feature metadata information and ENSG names. #' @param expression_path Optional. Filepath to cell feature matrix. Accepts #' either the `.h5` or the unzipped directory containing `.mtx` files. -#' @param metadata_path Optional. Filepath to `cells.csv.gz` or `cells.parquet` +#' @param cell_metadata_path Optional. Filepath to `cells.csv.gz` or +#' `cells.parquet` #' which contain cell metadata information. #' @param feat_type character. feature type. Provide more than one value if #' using the `split_keyword` param. For each set of keywords to split by, an @@ -1252,9 +1253,9 @@ createGiottoXeniumObject <- function( cell = "cell", nucleus = "nucleus" ), - gene_panel_json_path = NULL, + gene_panel_json_path = NULL, # optional expression_path = NULL, # optional - metadata_path = NULL, + cell_metadata_path = NULL, # optional feat_type = c( "rna", "NegControlProbe", @@ -1277,21 +1278,25 @@ createGiottoXeniumObject <- function( # apply reader params x$qv <- qv_threshold - g <- x$create_gobject( - transcript_path = transcript_path, - load_bounds = bounds_path, - gene_panel_json_path = gene_panel_json_path, - expression_path = expression_path, - metadata_path = metadata_path, - feat_type = split_keyword, - split_keyword = split_keyword, - load_images = load_images, - load_aligned_images = load_aligned_images, - load_expression = load_expression, - load_cellmeta = load_cellmeta, - verbose = verbose - ) + # directly passed + a <- list(load_bounds = bounds_path, + feat_type = feat_type, + split_keyword = split_keyword, + load_images = load_images, + load_aligned_images = load_aligned_images, + load_expression = load_expression, + load_cellmeta = load_cellmeta, + verbose = verbose) + + # only passed if not null + if (!is.null(transcript_path)) a$transcript_path <- transcript_path + if (!is.null(gene_panel_json_path)) { + a$gene_panel_json_path <- gene_panel_json_path + } + if (!is.null(expression_path)) a$expression_path <- expression_path + if (!is.null(cell_metadata_path)) a$metadata_path <- cell_metadata_path + g <- do.call(x$create_gobject, args = a) return(g) } From 45417241c3c3849c92d0fb4d0599b375defe794a Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:56:58 -0400 Subject: [PATCH 143/150] docs --- man/createGiottoXeniumObject.Rd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 51aadbab7..0d310af52 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -10,7 +10,7 @@ createGiottoXeniumObject( bounds_path = list(cell = "cell", nucleus = "nucleus"), gene_panel_json_path = NULL, expression_path = NULL, - metadata_path = NULL, + cell_metadata_path = NULL, feat_type = c("rna", "NegControlProbe", "UnassignedCodeword", "NegControlCodeword"), split_keyword = list("NegControlProbe", "UnassignedCodeword", "NegControlCodeword"), qv_threshold = 20, @@ -37,7 +37,8 @@ contains feature metadata information and ENSG names.} \item{expression_path}{Optional. Filepath to cell feature matrix. Accepts either the \code{.h5} or the unzipped directory containing \code{.mtx} files.} -\item{metadata_path}{Optional. Filepath to \code{cells.csv.gz} or \code{cells.parquet} +\item{cell_metadata_path}{Optional. Filepath to \code{cells.csv.gz} or +\code{cells.parquet} which contain cell metadata information.} \item{feat_type}{character. feature type. Provide more than one value if From 699ee757e8906193d04d92fab2ce44b2d6d68b13 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:39:06 -0400 Subject: [PATCH 144/150] fix mtx dir detection --- R/convenience_xenium.R | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 624110d92..0caaabbbe 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -135,7 +135,7 @@ setMethod( } if (!ftype$expression %in% ft_exp) { stop(wrap_txt("`$filetype$expression` must be one of", - paste(ft_tab, collapse = ", ")), + paste(tf_exp, collapse = ", ")), call. = FALSE) } @@ -178,9 +178,18 @@ setMethod( tx_path <- .xenium_ftype(tx_path, ftype$transcripts) cell_bound_path <- .xenium_ftype(cell_bound_path, ftype$boundaries) nuc_bound_path <- .xenium_ftype(nuc_bound_path, ftype$boundaries) - expr_path <- .xenium_ftype(expr_path, ftype$expression) cell_meta_path <- .xenium_ftype(cell_meta_path, ftype$cell_meta) + # for mtx, check if directory instead + if (ftype$expression == "mtx") { + is_dir <- vapply( + expr_path, checkmate::test_directory, FUN.VALUE = logical(1L) + ) + expr_path <- expr_path[is_dir] + } else { + expr_path <- .xenium_ftype(expr_path, ftype$expression) + } + # decide micron scaling if (length(obj@micron) == 0) { # if no value already set if (!is.null(experiment_info_path)) { @@ -444,7 +453,7 @@ setMethod( # no dropcols verbose = verbose ) - g <- setGiotto(g, fx) + g <- setGiotto(g, fx, verbose = FALSE) # expression From 2b8c5f788835065a3bb30ab2642685b0b7c703ea Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:52:26 -0400 Subject: [PATCH 145/150] fix checking for filepaths --- R/convenience_xenium.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 0caaabbbe..ef7f9d836 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -117,7 +117,7 @@ setMethod( ftype <- obj@filetype ft_tab <- c("csv", "parquet") - ft_exp <- c("h5", "mtx", "zarr") + ft_exp <- c("h5", "mtx") # zarr not yet supported if (!ftype$transcripts %in% ft_tab) { stop(wrap_txt("`$filetype$transcripts` must be one of", paste(ft_tab, collapse = ", ")), @@ -956,7 +956,7 @@ importXenium <- function( "No path to expression dir (mtx) or file (h5) provided or auto-detected" ), call. = FALSE) } - checkmate::assert_file_exists(path) + if (!file.exists(path)) stop("filepath or directory does not exist.\n") a <- list( path = path, gene_ids = gene_ids, @@ -978,7 +978,8 @@ importXenium <- function( verbose <- verbose %null% TRUE ex_list <- switch(e, "mtx" = do.call(.xenium_expression_mtx, args = a), - "h5" = do.call(.xenium_expression_h5, args = a) + "h5" = do.call(.xenium_expression_h5, args = a), + "zarr" = stop("zarr reading not yet implemented") ) # ensure list From e126f24d2190eacb753204fb704aab5e276ba9fc Mon Sep 17 00:00:00 2001 From: Junxiang Xu Date: Wed, 31 Jul 2024 16:49:17 -0400 Subject: [PATCH 146/150] image_register --- NAMESPACE | 5 + R/cell_segmentation.R | 134 +++++ R/image_registration.R | 518 ++++++++++++++++++ inst/python/configuration/genv_cellpose.yml | 19 + man/InteractiveLandmarkSelection.Rd | 19 + man/calculateAffineMatrixFromLandmarks.Rd | 19 + ...imate_transform_from_matched_descriptor.Rd | 26 + man/dot-match_descriptor.Rd | 39 ++ man/dot-plot_matched_descriptors.Rd | 23 + man/dot-sift_detect.Rd | 19 + man/dot-warp_transformed_image.Rd | 21 + ...imateAutomatedImageRegistrationWithSIFT.Rd | 36 ++ man/performCellposeSegmentation.Rd | 107 ++++ man/preprocessImageToMatrix.Rd | 41 ++ man/reexports.Rd | 2 +- 15 files changed, 1027 insertions(+), 1 deletion(-) create mode 100644 inst/python/configuration/genv_cellpose.yml create mode 100644 man/InteractiveLandmarkSelection.Rd create mode 100644 man/calculateAffineMatrixFromLandmarks.Rd create mode 100644 man/dot-estimate_transform_from_matched_descriptor.Rd create mode 100644 man/dot-match_descriptor.Rd create mode 100644 man/dot-plot_matched_descriptors.Rd create mode 100644 man/dot-sift_detect.Rd create mode 100644 man/dot-warp_transformed_image.Rd create mode 100644 man/estimateAutomatedImageRegistrationWithSIFT.Rd create mode 100644 man/performCellposeSegmentation.Rd create mode 100644 man/preprocessImageToMatrix.Rd diff --git a/NAMESPACE b/NAMESPACE index 98ae43773..f5ea0ce63 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export("prov<-") export("sankeyLabel<-") export("sankeyRelate<-") export("spatUnit<-") +export(InteractiveLandmarkSelection) export(PAGEEnrich) export(activeFeatType) export(activeSpatUnit) @@ -58,6 +59,7 @@ export(binSpect) export(binSpectMulti) export(binSpectSingle) export(binSpectSingleMatrix) +export(calculateAffineMatrixFromLandmarks) export(calculateHVF) export(calculateMetaTable) export(calculateMetaTableCells) @@ -181,6 +183,7 @@ export(doLouvainSubCluster) export(doRandomWalkCluster) export(doSNNCluster) export(doScrubletDetect) +export(estimateAutomatedImageRegistrationWithSIFT) export(estimateImageBg) export(exportGiottoViewer) export(exprCellCellcom) @@ -290,6 +293,7 @@ export(overlapToMatrix) export(overlapToMatrixMultiPoly) export(overlaps) export(pDataDT) +export(performCellposeSegmentation) export(pieCellTypesFromEnrichment) export(plotCCcomDotplot) export(plotCCcomHeatmap) @@ -327,6 +331,7 @@ export(plotUMAP) export(plotUMAP_2D) export(plotUMAP_3D) export(polyStamp) +export(preprocessImageToMatrix) export(print.combIcfObject) export(print.icfObject) export(processGiotto) diff --git a/R/cell_segmentation.R b/R/cell_segmentation.R index 05ff56040..4eaa5f4f6 100644 --- a/R/cell_segmentation.R +++ b/R/cell_segmentation.R @@ -84,3 +84,137 @@ doCellSegmentation <- function( print(segmentation_result) } + + + + + +#' +#' @title perform cellpose segmentation +#' @description +#' +#' perform the Giotto Wrapper of cellpose segmentation. This is for a model inference to generate segmentation mask file from input image. +#' main parameters needed +#' @name performCellposeSegmentation +#' @param image_dir character, required. Provide a path to a gray scale or a three channel image. +#' @param python_path python environment with cellpose installed. default = "giotto_cellpose". +#' @param mask_output required. Provide a path to the output mask file. +#' @param channel_1 channel number for cytoplasm, default to 0(gray scale) +#' @param channel_2 channel number for Nuclei, default to 0(gray scale) +#' @param model_name Name of the model to run inference. Default to 'cyto3' +#' @param batch_size Cellpose Parameter, Number of 224x224 patches to run simultaneously on the GPU. Can make smaller or bigger depending on GPU memory usage. Defaults to 8. +#' @param resample Cellpose Parameter +#' @param channel_axis Cellpose Parameter +#' @param z_axis Cellpose Parameter +#' @param normalize Cellpose Parameter +#' @param invert Cellpose Parameter +#' @param rescale Cellpose Parameter +#' @param diameter Cellpose Parameter +#' @param flow_threshold Cellpose Parameter +#' @param cellprob_threshold Cellpose Parameter +#' @param do_3D Cellpose Parameter +#' @param anisotropy Cellpose Parameter +#' @param stitch_threshold Cellpose Parameter +#' @param min_size Cellpose Parameter +#' @param niter Cellpose Parameter +#' @param augment Cellpose Parameter +#' @param tile Cellpose Parameter +#' @param tile_overlap Cellpose Parameter +#' @param bsize Cellpose Parameter +#' @param interp Cellpose Parameter +#' @param compute_masks Cellpose Parameter +#' @param progress Cellpose Parameter +#' @returns No return variable, as this will write directly to output path provided. +#' @examples +#' # example code +#' performCellposeSegmentation(image_dir = input_image, mask_output = output, channel_1 = 2, channel_2 = 1, model_name = 'cyto3',batch_size=4) +#' @export +performCellposeSegmentation <- function(python_env = 'giotto_cellpose', + image_dir, + mask_output, + channel_1 = 0, + channel_2 = 0, + model_name = 'cyto3', + batch_size=8, + resample=TRUE, + channel_axis=NULL, + z_axis=NULL, + normalize=TRUE, + invert=FALSE, + rescale=NULL, + diameter=NULL, + flow_threshold=0.4, + cellprob_threshold=0.0, + do_3D=FALSE, + anisotropy=NULL, + stitch_threshold=0.0, + min_size=15, + niter=NULL, + augment=FALSE, + tile=TRUE, + tile_overlap=0.1, + bsize=224, + interp=TRUE, + compute_masks=TRUE, + progress=NULL, + verbose = TRUE,...){ + + + #Check Input arguments + model_name <- match.arg(model_name, unique(c('cyto3', 'cyto2', 'cyto','nuclei', model_name))) + ## Load required python libraries + GiottoClass::set_giotto_python_path(python_env) + GiottoUtils::package_check('cellpose',repository = 'pip') + + cellpose <- reticulate::import("cellpose") + np <- reticulate::import("numpy") + cv2 <- reticulate::import("cv2") + torch <- reticulate::import("torch") + message('successfully loaded giotto environment with cellpose.') + + if (!(torch$cuda$is_available())){ + warning('GPU is not available for this session, inference may be slow.\n ') + } + + GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Loading Image from ',image_dir) + + img <- cellpose$io$imread(image_dir) + GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Loading Model...') + + model_to_seg <- cellpose$models$Cellpose(model_type=model_name,gpu = torch$cuda$is_available()) + channel_to_seg <- as.integer(c(channel_1,channel_2)) + + GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Segmenting Image...') + segmentation <- model_to_seg$eval + + result <- segmentation(img, + diameter=diameter, + channels=channel_to_seg, + batch_size = batch_size, + resample=resample, + channel_axis=channel_axis, + z_axis=z_axis, + normalize=normalize, + invert=invert, + rescale=rescale, + flow_threshold=flow_threshold, + cellprob_threshold=cellprob_threshold, + do_3D=do_3D, + anisotropy=anisotropy, + stitch_threshold=stitch_threshold, + min_size=min_size, + niter=niter, + augment=augment, + tile=tile, + tile_overlap=tile_overlap, + bsize=bsize, + interp=interp, + compute_masks=compute_masks, + progress=progress) + masks <- result[[1]] + GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Segmentation finished... Saving mask file...') + GiottoUtils::package_check('terra') + rast = terra::rast(masks) + terra::writeRaster(rast, mask_output,overwrite=TRUE) +} + diff --git a/R/image_registration.R b/R/image_registration.R index cdd120cdc..f9ca0e552 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -1018,3 +1018,521 @@ registerImagesFIJI <- function( return(0 == system(cmd)) } + + +#' Record landmarks by interactive selection +#' +#' @description Record landmarks by interactive selection +#' @param source_image the image to be plotted on the left, and landmarks will output in the first of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image +#' @param target_image the image to be plotted on the right, and landmarks will output in the second of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image +#' +#' @returns a list of landmarks +#' +#' @export + +InteractiveLandmarkSelection <- function(source, target) { + GiottoUtils::package_check("shiny") + GiottoUtils::package_check("ggplot2") + GiottoUtils::package_check("miniUI") + + .create_image_to_plot <- function(x){ + if (inherits(x, "gg")){ + return(x) + } + else if (is.character(x)){ + gimg = Giotto::createGiottoLargeImage(x) + gg <- ggplot2::ggplot() + gg_raster = GiottoVisuals::gg_annotation_raster(gg,gimg) + return(gg_raster) + } + else{ + gg <- ggplot2::ggplot() + gg_raster = GiottoVisuals::gg_annotation_raster(gg,x) + return(gg_raster) + } + } + source_image <- .create_image_to_plot(source) + target_image <- .create_image_to_plot(target) + + # Function to extract the range of x and y values from a ggplot object + .extract_plot_ranges <- function(plot) { + data <- ggplot2::ggplot_build(plot)$data[[1]] + x_range <- range(data$x, na.rm = TRUE) + y_range <- range(data$y, na.rm = TRUE) + list(x_range = x_range, y_range = y_range) + } + + # Extract ranges for the input plots + source_ranges <- .extract_plot_ranges(source_image) + target_ranges <- .extract_plot_ranges(target_image) + + ui <- miniUI::miniPage( + miniUI::gadgetTitleBar("Select Extents and Points"), + miniUI::miniContentPanel( + shiny::fluidRow( + shiny::column(6, shiny::plotOutput("plot1", click = "plot1_click")), + shiny::column(6, shiny::plotOutput("plot2", click = "plot2_click")) + ), + shiny::fluidRow( + shiny::column(6, + shiny::sliderInput("xrange1", "X Range for Plot 1", min = source_ranges$x_range[1], max = source_ranges$x_range[2], value = source_ranges$x_range), + shiny::sliderInput("yrange1", "Y Range for Plot 1", min = source_ranges$y_range[1], max = source_ranges$y_range[2], value = source_ranges$y_range) + ), + shiny::column(6, + shiny::sliderInput("xrange2", "X Range for Plot 2", min = target_ranges$x_range[1], max = target_ranges$x_range[2], value = target_ranges$x_range), + shiny::sliderInput("yrange2", "Y Range for Plot 2", min = target_ranges$y_range[1], max = target_ranges$y_range[2], value = target_ranges$y_range) + ) + ), + shiny::fluidRow( + shiny::column(6, shiny::verbatimTextOutput("click_info1")), + shiny::column(6, shiny::verbatimTextOutput("click_info2")) + ), + shiny::fluidRow( + shiny::column(6, shiny::actionButton("undo1", "Undo Click on Source Image")), + shiny::column(6, shiny::actionButton("undo2", "Undo Click on Target Image")) + ) + ) + ) + + server <- function(input, output, session) { + click_history1 <- shiny::reactiveVal(data.frame(x = numeric(), y = numeric())) + click_history2 <- shiny::reactiveVal(data.frame(x = numeric(), y = numeric())) + + output$plot1 <- shiny::renderPlot({ + source_image + + ggplot2::coord_cartesian(xlim = input$xrange1, ylim = input$yrange1) + + ggplot2::geom_point(data = click_history1(), ggplot2::aes(x = x, y = y), color = "red", size = 4.5) + }) + + output$plot2 <- shiny::renderPlot({ + target_image + + ggplot2::coord_cartesian(xlim = input$xrange2, ylim = input$yrange2) + + ggplot2::geom_point(data = click_history2(), ggplot2::aes(x = x, y = y), color = "blue",size = 4.5) + }) + + shiny::observeEvent(input$plot1_click, { + click <- input$plot1_click + new_coords <- rbind(click_history1(), data.frame(x = click$x, y = click$y)) + click_history1(new_coords) + }) + + shiny::observeEvent(input$plot2_click, { + click <- input$plot2_click + new_coords <- rbind(click_history2(), data.frame(x = click$x, y = click$y)) + click_history2(new_coords) + }) + + shiny::observeEvent(input$undo1, { + if (nrow(click_history1()) > 0) { + new_coords <- click_history1()[-nrow(click_history1()), , drop = FALSE] + click_history1(new_coords) + } + }) + + shiny::observeEvent(input$undo2, { + if (nrow(click_history2()) > 0) { + new_coords <- click_history2()[-nrow(click_history2()), , drop = FALSE] + click_history2(new_coords) + } + }) + + output$click_info1 <- shiny::renderPrint({ + click_history1() + }) + + output$click_info2 <- shiny::renderPrint({ + click_history2() + }) + + shiny::observeEvent(input$done, { + returnValue <- list(click_history1(),click_history2()) + shiny::stopApp(returnValue) + }) + } + + shiny::runGadget(ui, server) +} + + + + + + +#' Calculate a affine transformation matrix from two set of landmarks +#' +#' @description calculate a affine transformation matrix from two set of landmarks +#' @param source_df source landmarks, two columns, first column represent x coordinate and second column represent y coordinate. +#' @param target_df target landmarks, two columns, first column represent x coordinate and second column represent y coordinate. +#' +#' @returns a 3 by 3 matrix with the third row close to (0,0,1) +#' +#' @export + +calculateAffineMatrixFromLandmarks <- function(source_df,target_df){ + source_landmarks_matrix = as.matrix(source_df) + source_landmarks_matrix = cbind(source_landmarks_matrix,rep(1,nrow(source_landmarks_matrix))) + ## Create landmark matrix for the target image + target_landmarks_matrix <- as.matrix(target_df) + target_landmarks_matrix = cbind(target_landmarks_matrix,rep(1,nrow(target_landmarks_matrix))) + ## Compute the affine matrix + source_dp = t(source_landmarks_matrix) %*% source_landmarks_matrix + source_target_dp = t(source_landmarks_matrix) %*% target_landmarks_matrix + source_dp_inv <- solve(source_dp) + Affine_matrix = t(source_dp_inv %*% source_target_dp) + return(Affine_matrix) +} + + + + +#' @name .sift_detect +#' @title Run SIFT feature detector and descriptor extractor +#' @description +#' Perform feature detector and descriptor extractor on a matrix object or preprocessed image object +#' @param x input matrix or preprocessed image to extract feature and descriptor from +#' @param ... additional params to pass to `skimage.feature.SIFT()` +#' @returns list of keypoints and descriptors +#' +.sift_detect <- function(x, ..., pkg_ptr) { + + if (missing(pkg_ptr)) { + GiottoUtils::package_check("skimage", repository = "pip:scikit-image") + SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) + } else { + SKI <- pkg_ptr + } + + # sift object + SIFT <- SKI$feature$SIFT() + + SIFT$detect_and_extract(x) + + out <- list( + keypoints = SIFT$keypoints, + descriptors = SIFT$descriptors + ) + + return(out) +} + +#' @name .match_descriptor +#' @title Match image descriptors +#' @description +#' Brute force matching of descriptors using \pkg{scikit-image}. Find matching +#' image descriptors between moving images and a target image. +#' @param descriptor_list list of descriptor matrices +#' @param target_idx which item in the list is the target image. Default is 1 +#' @param cross_check whether to check that only the best match is returned +#' @param max_ratio Maximum ratio of distances between first and second closest +#' descriptor in the second set of descriptors. This threshold is useful to +#' filter ambiguous matches between the two descriptor sets. The choice of this +#' value depends on the statistics of the chosen descriptor, e.g., for SIFT +#' descriptors a value of 0.8 is usually chosen, see D.G. Lowe, "Distinctive +#' Image Features from Scale-Invariant Keypoints", International Journal of +#' Computer Vision, 2004. +#' @param ... additional params to pass to `skimage.feature.match_descriptors()` +#' @returns list +#' +.match_descriptor <- function( + descriptor_list, + target_idx = 1L, + cross_check = TRUE, + max_ratio = 0.8, + ..., + pkg_ptr +) { + + checkmate::assert_list(descriptor_list, min.len = 2L) + target_idx <- as.integer(target_idx) + + if (missing(pkg_ptr)) { + package_check("skimage", repository = "pip:scikit-image") + SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) + } else { + SKI <- pkg_ptr + } + + target <- descriptor_list[[target_idx]] + + out <- lapply( + seq_along(descriptor_list), + function(moving_idx) { + if (moving_idx == target_idx) { + return(matrix( + rep(seq_len(nrow(target)), 2L), + ncol = 2L, + byrow = FALSE + )) + # directly return all as matches + } + + moving <- descriptor_list[[moving_idx]] + + m <- .match_descriptor_single( + x = target, + y = moving, + ..., + pkg_ptr = pkg_ptr + ) + m + 1 # since it is 0 indexed + } + ) + + return(out) +} + + +# wrapper for sklearn-image match_descriptors +# returns a 2 col matrix of x to y index matches +.match_descriptor_single <- function(x, y,max_ratio, ..., pkg_ptr) { + + checkmate::assert_class(x, "matrix") + checkmate::assert_class(y, "matrix") + + if (missing(pkg_ptr)) { + GiottoUtils::package_check("skimage", repository = "pip:scikit-image") + SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) + } else { + SKI <- pkg_ptr + } + + match_descriptors <- SKI$feature$match_descriptors + m <- match_descriptors( + descriptors1 = x, + descriptors2 = y, + max_ratio = max_ratio, + ... # max_ratio of 0.6 - 0.8 recommended for sift, cross_check = TRUE + ) + + return(m) +} + + +#' @name preprocessImageToMatrix +#' @title Preprocess from image directory to the required matrix format for Image registration pipeline built on scikit-image +#' @description +#' Preprocess a image path to the required matrix format for Image registration pipeline built on scikit-image +#' @param x input file path, required +#' @param invert whether or not to invert intensity to make calculation of descriptors more accurate, default FALSE +#' @param equalize_histogram whether or not to calculate equalized histogram of the image,default TRUE +#' @param flip_vertical whether or not to flip vertical, default FALSE +#' @param flip_horizontal whether or not to flip horizontal, default FALSE +#' @param rotate_90 whether or not to rotates the image 90 degrees counter-clockwise, default FALSE +#' @param use_single_channel If input is a multichannel image, whether or not to extract single channel, default FALSE +#' @param single_channel_number Channel number in the multichannel image, required if use_single_channel = TRUE +#' @returns a matrix array to input to .sift_detect +#' +#' @export +preprocessImageToMatrix <- function(x, + invert = F, + equalize_histogram = T, + flip_vertical = F, + flip_horizontal = F, + rotate_90 = F, + use_single_channel = F, + single_channel_number = NULL, + pkg_ptr) { + + if (missing(pkg_ptr)) { + GiottoUtils::package_check("skimage", repository = "pip:scikit-image") + SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) + } else { + SKI <- pkg_ptr + } + GiottoUtils::package_check("numpy", repository = "pip:scikit-image") + np <- reticulate::import("numpy", convert = TRUE, delay_load = TRUE) + + image = SKI$io$imread(x) + + if (length(dim(image)) >2 & use_single_channel == FALSE){ + image = SKI$color$rgb2gray(image) + } + if (use_single_channel == TRUE) { + if (is.null(single_channel_number)) {stop("Set use single channel == TRUE, please provide a channel number to continue")} + image <- image[,,single_channel_number] + } + + + if (flip_vertical == T){ + image = np$flipud(image) + } + if (flip_horizontal == T){ + image = np$fliplr(image) + } + if (rotate_90 == T){ + image = np$rot90(image) + } + if (invert == T){ + image = SKI$util$invert(image) + } + if (equalize_histogram == T){ + image = SKI$exposure$equalize_hist(image) + } + return(image) +} + + +#' @name .estimate_transform_from_matched_descriptor +#' @title Estimate affine transformation from matched descriptor +#' @description +#' Estimate affine transformation from matched descriptor +#' @param keypoints1 keypoints extracted from source image via .sift_detect +#' @param keypoints1 keypoints extracted from target image via .sift_detect +#' @param match a 2 col matrix of x to y index matched descriptors via .match_descriptor_single +#' @returns a list of model and inliners +#' + +.estimate_transform_from_matched_descriptor <- function(keypoints1, + keypoints2, + match, + estimate_fun, + ..., + pkg_ptr){ + if (missing(pkg_ptr)) { + GiottoUtils::package_check("skimage", repository = "pip:scikit-image") + SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) + } else { + SKI <- pkg_ptr + } + + # Extract matched keypoints + src_pts <- keypoints1[match[, 1] + 1, , drop = FALSE] + dst_pts <- keypoints2[match[, 2] + 1, , drop = FALSE] + + estimate_fun <- match.arg(estimate_fun, unique(c('euclidean', 'similarity', 'affine', 'piecewise-affine', 'projective', 'polynomial', estimate_fun))) + + # Estimate homography matrix + ransac_result <- SKI$transform$estimate_transform( + ttype = estimate_fun, + src = src_pts, + dst = dst_pts, + ) + + return(ransac_result) +} + + +#' @name .warp_transformed_image +#' @title Warp transformed images from estimated transformation +#' @description +#' Warp transformed images from estimated transformation +#' @param x source image from .sift_preprocess +#' @param y target image from .sift_preprocess +#' @param model estimated transformation object from .estimate_transform_from_matched_descriptor +#' @returns None, it will write to a output path +#' + +.warp_transformed_image <- function(x, + y, + model, + outpath = NULL, + pkg_ptr){ + if (missing(pkg_ptr)) { + GiottoUtils::package_check("skimage", repository = "pip:scikit-image") + SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) + } else { + SKI <- pkg_ptr + } + + # Ensure the source image array is writable by making a copy + x_copy <- reticulate::r_to_py(x)$copy() + + # Warp the source image to align with the destination image + warped_image <- SKI$transform$warp(x_copy, model, output_shape = dim(y)) + SKI$io$imsave(outpath,warped_image) +} + + + +#' @name .plot_matched_descriptors +#' @title plot matched descriptors +#' @description +#' A wrapper function for the plot_matches for the SIFT feature extractor and descriptor pipeline +#' @param x source image from .sift_preprocess +#' @param y target image from .sift_preprocess +#' @param keypoints1 keypoints extracted from source image via .sift_detect +#' @param keypoints1 keypoints extracted from target image via .sift_detect +#' @param match a 2 col matrix of x to y index matched descriptors via .match_descriptor_single +#' @returns None +#' + +.plot_matched_descriptors <- function(x, y, keypoints1, keypoints2, match, pkg_ptr){ + if (missing(pkg_ptr)) { + GiottoUtils::package_check("skimage", repository = "pip:scikit-image") + SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) + } else { + SKI <- pkg_ptr + } + + matplotlib <-reticulate::import("matplotlib", convert = TRUE, delay_load = TRUE) + np <- reticulate::import("numpy",convert = T, delay_load = T) + plt <- matplotlib$pyplot + + match_py <- reticulate::r_to_py(match) + match_py <- np$array(match_py, dtype = np$int32) + + # Create a subplot + fig_ax <- plt$subplots(nrows = 1L, ncols = 1L, figsize = c(11, 8)) + fig <- fig_ax[[1]] + ax <- fig_ax[[2]] + + # Plot the matches + SKI$feature$plot_matches(ax, x, y, keypoints1, keypoints2, match_py, only_matches = TRUE) + + ax$axis('off') + plt$show() + plt$close() +} + +#' +#' @title Estimate Automated ImageRegistration With SIFT +#' @name estimateAutomatedImageRegistrationWithSIFT +#' @description +#' Automatically estimate a transform with SIFT feature detection, descriptor match and returns a transformation object to use +#' @param x required. Source matrix input, could be generated from preprocessImageToMatrix +#' @param y required. Source matrix input, could be generated from preprocessImageToMatrix +#' @param max_ratio max_ratio parameter for matching descriptors, default 0.6 +#' @param save_warp default NULL, if not NULL, please provide an output image path to save the warpped image. +#' @param estimate_fun default Affine. The transformation model to use estimation +#' @param plot_match whether or not to plot the matching descriptors.Default False +#' @returns a list of the estimated transformation object +#' example estimation <- estimateAutomatedImageRegistrationWithSIFT(x = image_mtx1,y = image_mtx2) +#' @export +estimateAutomatedImageRegistrationWithSIFT <- function(x, + y, + plot_match = F, + max_ratio = 0.6, + estimate_fun = 'affine', + save_warp = NULL, + verbose = T){ + + GiottoUtils::vmsg(.v = verbose, .is_debug = T,'Detecting features via SIFT... ') + x_sift <- .sift_detect(x) + y_sift <- .sift_detect(y) + + GiottoUtils::vmsg(.v = verbose, .is_debug = T,'Matching Descriptors via SIFT... ') + matched <- .match_descriptor_single(x_sift$descriptor, y_sift$descriptor,max_ratio = max_ratio) + + if (plot_match == TRUE){ + .plot_matched_descriptors(x, y, x_sift$keypoints, y_sift$keypoints, matched) + } + + + GiottoUtils::vmsg(.v = verbose, .is_debug = T,'Estimating transformation matrix from matched descriptor... ') + estimation <- .estimate_transform_from_matched_descriptor(x_sift$keypoints, + y_sift$keypoints, + matched, + estimate_fun = estimate_fun) + + if (!is.null(save_warp)){ + .warp_transformed_image(x = x, + y = y, + model = estimation$inverse, outpath = save_warp) + } + + return(estimation) +} + + + + diff --git a/inst/python/configuration/genv_cellpose.yml b/inst/python/configuration/genv_cellpose.yml new file mode 100644 index 000000000..572d53d48 --- /dev/null +++ b/inst/python/configuration/genv_cellpose.yml @@ -0,0 +1,19 @@ +name: giotto_cellpose +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - python=3.8 + - pip + - pandas + - networkx + - python-igraph + - leidenalg + - scikit-learn + - tifffile + - pip: + - cellpose + - python-louvain + - smfishHmrf + - git+https://github.com/wwang-chcn/bento-tools.git@giotto_install diff --git a/man/InteractiveLandmarkSelection.Rd b/man/InteractiveLandmarkSelection.Rd new file mode 100644 index 000000000..51a120bfe --- /dev/null +++ b/man/InteractiveLandmarkSelection.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_registration.R +\name{InteractiveLandmarkSelection} +\alias{InteractiveLandmarkSelection} +\title{Record landmarks by interactive selection} +\usage{ +InteractiveLandmarkSelection(source, target) +} +\arguments{ +\item{source_image}{the image to be plotted on the left, and landmarks will output in the first of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image} + +\item{target_image}{the image to be plotted on the right, and landmarks will output in the second of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image} +} +\value{ +a list of landmarks +} +\description{ +Record landmarks by interactive selection +} diff --git a/man/calculateAffineMatrixFromLandmarks.Rd b/man/calculateAffineMatrixFromLandmarks.Rd new file mode 100644 index 000000000..eb1fcaf21 --- /dev/null +++ b/man/calculateAffineMatrixFromLandmarks.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_registration.R +\name{calculateAffineMatrixFromLandmarks} +\alias{calculateAffineMatrixFromLandmarks} +\title{Calculate a affine transformation matrix from two set of landmarks} +\usage{ +calculateAffineMatrixFromLandmarks(source_df, target_df) +} +\arguments{ +\item{source_df}{source landmarks, two columns, first column represent x coordinate and second column represent y coordinate.} + +\item{target_df}{target landmarks, two columns, first column represent x coordinate and second column represent y coordinate.} +} +\value{ +a 3 by 3 matrix with the third row close to (0,0,1) +} +\description{ +calculate a affine transformation matrix from two set of landmarks +} diff --git a/man/dot-estimate_transform_from_matched_descriptor.Rd b/man/dot-estimate_transform_from_matched_descriptor.Rd new file mode 100644 index 000000000..45c31a330 --- /dev/null +++ b/man/dot-estimate_transform_from_matched_descriptor.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_registration.R +\name{.estimate_transform_from_matched_descriptor} +\alias{.estimate_transform_from_matched_descriptor} +\title{Estimate affine transformation from matched descriptor} +\usage{ +.estimate_transform_from_matched_descriptor( + keypoints1, + keypoints2, + match, + estimate_fun, + ..., + pkg_ptr +) +} +\arguments{ +\item{keypoints1}{keypoints extracted from target image via .sift_detect} + +\item{match}{a 2 col matrix of x to y index matched descriptors via .match_descriptor_single} +} +\value{ +a list of model and inliners +} +\description{ +Estimate affine transformation from matched descriptor +} diff --git a/man/dot-match_descriptor.Rd b/man/dot-match_descriptor.Rd new file mode 100644 index 000000000..cb902e2f9 --- /dev/null +++ b/man/dot-match_descriptor.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_registration.R +\name{.match_descriptor} +\alias{.match_descriptor} +\title{Match image descriptors} +\usage{ +.match_descriptor( + descriptor_list, + target_idx = 1L, + cross_check = TRUE, + max_ratio = 0.8, + ..., + pkg_ptr +) +} +\arguments{ +\item{descriptor_list}{list of descriptor matrices} + +\item{target_idx}{which item in the list is the target image. Default is 1} + +\item{cross_check}{whether to check that only the best match is returned} + +\item{max_ratio}{Maximum ratio of distances between first and second closest +descriptor in the second set of descriptors. This threshold is useful to +filter ambiguous matches between the two descriptor sets. The choice of this +value depends on the statistics of the chosen descriptor, e.g., for SIFT +descriptors a value of 0.8 is usually chosen, see D.G. Lowe, "Distinctive +Image Features from Scale-Invariant Keypoints", International Journal of +Computer Vision, 2004.} + +\item{...}{additional params to pass to `skimage.feature.match_descriptors()`} +} +\value{ +list +} +\description{ +Brute force matching of descriptors using \pkg{scikit-image}. Find matching +image descriptors between moving images and a target image. +} diff --git a/man/dot-plot_matched_descriptors.Rd b/man/dot-plot_matched_descriptors.Rd new file mode 100644 index 000000000..4b64af54a --- /dev/null +++ b/man/dot-plot_matched_descriptors.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_registration.R +\name{.plot_matched_descriptors} +\alias{.plot_matched_descriptors} +\title{plot matched descriptors} +\usage{ +.plot_matched_descriptors(x, y, keypoints1, keypoints2, match, pkg_ptr) +} +\arguments{ +\item{x}{source image from .sift_preprocess} + +\item{y}{target image from .sift_preprocess} + +\item{keypoints1}{keypoints extracted from target image via .sift_detect} + +\item{match}{a 2 col matrix of x to y index matched descriptors via .match_descriptor_single} +} +\value{ +None +} +\description{ +A wrapper function for the plot_matches for the SIFT feature extractor and descriptor pipeline +} diff --git a/man/dot-sift_detect.Rd b/man/dot-sift_detect.Rd new file mode 100644 index 000000000..a28954503 --- /dev/null +++ b/man/dot-sift_detect.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_registration.R +\name{.sift_detect} +\alias{.sift_detect} +\title{Run SIFT feature detector and descriptor extractor} +\usage{ +.sift_detect(x, ..., pkg_ptr) +} +\arguments{ +\item{x}{input matrix or preprocessed image to extract feature and descriptor from} + +\item{...}{additional params to pass to `skimage.feature.SIFT()`} +} +\value{ +list of keypoints and descriptors +} +\description{ +Perform feature detector and descriptor extractor on a matrix object or preprocessed image object +} diff --git a/man/dot-warp_transformed_image.Rd b/man/dot-warp_transformed_image.Rd new file mode 100644 index 000000000..662dad1c4 --- /dev/null +++ b/man/dot-warp_transformed_image.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_registration.R +\name{.warp_transformed_image} +\alias{.warp_transformed_image} +\title{Warp transformed images from estimated transformation} +\usage{ +.warp_transformed_image(x, y, model, outpath = NULL, pkg_ptr) +} +\arguments{ +\item{x}{source image from .sift_preprocess} + +\item{y}{target image from .sift_preprocess} + +\item{model}{estimated transformation object from .estimate_transform_from_matched_descriptor} +} +\value{ +None, it will write to a output path +} +\description{ +Warp transformed images from estimated transformation +} diff --git a/man/estimateAutomatedImageRegistrationWithSIFT.Rd b/man/estimateAutomatedImageRegistrationWithSIFT.Rd new file mode 100644 index 000000000..a8601a0f2 --- /dev/null +++ b/man/estimateAutomatedImageRegistrationWithSIFT.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_registration.R +\name{estimateAutomatedImageRegistrationWithSIFT} +\alias{estimateAutomatedImageRegistrationWithSIFT} +\title{Estimate Automated ImageRegistration With SIFT} +\usage{ +estimateAutomatedImageRegistrationWithSIFT( + x, + y, + plot_match = F, + max_ratio = 0.6, + estimate_fun = "affine", + save_warp = NULL, + verbose = T +) +} +\arguments{ +\item{x}{required. Source matrix input, could be generated from preprocessImageToMatrix} + +\item{y}{required. Source matrix input, could be generated from preprocessImageToMatrix} + +\item{plot_match}{whether or not to plot the matching descriptors.Default False} + +\item{max_ratio}{max_ratio parameter for matching descriptors, default 0.6} + +\item{estimate_fun}{default Affine. The transformation model to use estimation} + +\item{save_warp}{default NULL, if not NULL, please provide an output image path to save the warpped image.} +} +\value{ +a list of the estimated transformation object +example estimation <- estimateAutomatedImageRegistrationWithSIFT(x = image_mtx1,y = image_mtx2) +} +\description{ +Automatically estimate a transform with SIFT feature detection, descriptor match and returns a transformation object to use +} diff --git a/man/performCellposeSegmentation.Rd b/man/performCellposeSegmentation.Rd new file mode 100644 index 000000000..fc94d04a0 --- /dev/null +++ b/man/performCellposeSegmentation.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cell_segmentation.R +\name{performCellposeSegmentation} +\alias{performCellposeSegmentation} +\title{perform cellpose segmentation} +\usage{ +performCellposeSegmentation( + python_env = "giotto_cellpose", + image_dir, + mask_output, + channel_1 = 0, + channel_2 = 0, + model_name = "cyto3", + batch_size = 8, + resample = TRUE, + channel_axis = NULL, + z_axis = NULL, + normalize = TRUE, + invert = FALSE, + rescale = NULL, + diameter = NULL, + flow_threshold = 0.4, + cellprob_threshold = 0, + do_3D = FALSE, + anisotropy = NULL, + stitch_threshold = 0, + min_size = 15, + niter = NULL, + augment = FALSE, + tile = TRUE, + tile_overlap = 0.1, + bsize = 224, + interp = TRUE, + compute_masks = TRUE, + progress = NULL, + verbose = TRUE, + ... +) +} +\arguments{ +\item{image_dir}{character, required. Provide a path to a gray scale or a three channel image.} + +\item{mask_output}{required. Provide a path to the output mask file.} + +\item{channel_1}{channel number for cytoplasm, default to 0(gray scale)} + +\item{channel_2}{channel number for Nuclei, default to 0(gray scale)} + +\item{model_name}{Name of the model to run inference. Default to 'cyto3'} + +\item{batch_size}{Cellpose Parameter, Number of 224x224 patches to run simultaneously on the GPU. Can make smaller or bigger depending on GPU memory usage. Defaults to 8.} + +\item{resample}{Cellpose Parameter} + +\item{channel_axis}{Cellpose Parameter} + +\item{z_axis}{Cellpose Parameter} + +\item{normalize}{Cellpose Parameter} + +\item{invert}{Cellpose Parameter} + +\item{rescale}{Cellpose Parameter} + +\item{diameter}{Cellpose Parameter} + +\item{flow_threshold}{Cellpose Parameter} + +\item{cellprob_threshold}{Cellpose Parameter} + +\item{do_3D}{Cellpose Parameter} + +\item{anisotropy}{Cellpose Parameter} + +\item{stitch_threshold}{Cellpose Parameter} + +\item{min_size}{Cellpose Parameter} + +\item{niter}{Cellpose Parameter} + +\item{augment}{Cellpose Parameter} + +\item{tile}{Cellpose Parameter} + +\item{tile_overlap}{Cellpose Parameter} + +\item{bsize}{Cellpose Parameter} + +\item{interp}{Cellpose Parameter} + +\item{compute_masks}{Cellpose Parameter} + +\item{progress}{Cellpose Parameter} + +\item{python_path}{python environment with cellpose installed. default = "giotto_cellpose".} +} +\value{ +No return variable, as this will write directly to output path provided. +} +\description{ +perform the Giotto Wrapper of cellpose segmentation. This is for a model inference to generate segmentation mask file from input image. +main parameters needed +} +\examples{ +# example code +performCellposeSegmentation(image_dir = input_image, mask_output = output, channel_1 = 2, channel_2 = 1, model_name = 'cyto3',batch_size=4) +} diff --git a/man/preprocessImageToMatrix.Rd b/man/preprocessImageToMatrix.Rd new file mode 100644 index 000000000..842cbefcb --- /dev/null +++ b/man/preprocessImageToMatrix.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/image_registration.R +\name{preprocessImageToMatrix} +\alias{preprocessImageToMatrix} +\title{Preprocess from image directory to the required matrix format for Image registration pipeline built on scikit-image} +\usage{ +preprocessImageToMatrix( + x, + invert = F, + equalize_histogram = T, + flip_vertical = F, + flip_horizontal = F, + rotate_90 = F, + use_single_channel = F, + single_channel_number = NULL, + pkg_ptr +) +} +\arguments{ +\item{x}{input file path, required} + +\item{invert}{whether or not to invert intensity to make calculation of descriptors more accurate, default FALSE} + +\item{equalize_histogram}{whether or not to calculate equalized histogram of the image,default TRUE} + +\item{flip_vertical}{whether or not to flip vertical, default FALSE} + +\item{flip_horizontal}{whether or not to flip horizontal, default FALSE} + +\item{rotate_90}{whether or not to rotates the image 90 degrees counter-clockwise, default FALSE} + +\item{use_single_channel}{If input is a multichannel image, whether or not to extract single channel, default FALSE} + +\item{single_channel_number}{Channel number in the multichannel image, required if use_single_channel = TRUE} +} +\value{ +a matrix array to input to .sift_detect +} +\description{ +Preprocess a image path to the required matrix format for Image registration pipeline built on scikit-image +} diff --git a/man/reexports.Rd b/man/reexports.Rd index c4a2e34b5..f5533929f 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -298,7 +298,7 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellFeatureInfo}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{density}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{hist}}, \code{\link[GiottoClass]{installGiottoEnvironment}}, \code{\link[GiottoClass:instructions-generic]{instructions}}, \code{\link[GiottoClass:instructions-generic]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} + \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellFeatureInfo}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{density}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{hist}}, \code{\link[GiottoClass]{installGiottoEnvironment}}, \code{\link[GiottoClass:instructions-generic]{instructions}}, \code{\link[GiottoClass:instructions-generic]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} \item{GiottoUtils}{\code{\link[GiottoUtils:pipe]{\%>\%}}, \code{\link[GiottoUtils]{getDistinctColors}}, \code{\link[GiottoUtils]{getRainbowColors}}} From d9e4740f1e8fdfa92900ca8d61d6eaadd35e4151 Mon Sep 17 00:00:00 2001 From: Junxiang Xu Date: Wed, 31 Jul 2024 16:57:44 -0400 Subject: [PATCH 147/150] image_register --- NAMESPACE | 2 +- R/cell_segmentation.R | 2 +- R/image_registration.R | 18 +++++------------- ...tion.Rd => interactiveLandmarkSelection.Rd} | 8 ++++---- man/performCellposeSegmentation.Rd | 2 +- 5 files changed, 12 insertions(+), 20 deletions(-) rename man/{InteractiveLandmarkSelection.Rd => interactiveLandmarkSelection.Rd} (77%) diff --git a/NAMESPACE b/NAMESPACE index f5ea0ce63..b70048a8d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,7 +14,6 @@ export("prov<-") export("sankeyLabel<-") export("sankeyRelate<-") export("spatUnit<-") -export(InteractiveLandmarkSelection) export(PAGEEnrich) export(activeFeatType) export(activeSpatUnit) @@ -273,6 +272,7 @@ export(insertCrossSectionFeatPlot3D) export(insertCrossSectionSpatPlot3D) export(installGiottoEnvironment) export(instructions) +export(interactiveLandmarkSelection) export(jackstrawPlot) export(joinGiottoObjects) export(loadGiotto) diff --git a/R/cell_segmentation.R b/R/cell_segmentation.R index 4eaa5f4f6..a411e320a 100644 --- a/R/cell_segmentation.R +++ b/R/cell_segmentation.R @@ -101,7 +101,7 @@ doCellSegmentation <- function( #' @param mask_output required. Provide a path to the output mask file. #' @param channel_1 channel number for cytoplasm, default to 0(gray scale) #' @param channel_2 channel number for Nuclei, default to 0(gray scale) -#' @param model_name Name of the model to run inference. Default to 'cyto3' +#' @param model_name Name of the model to run inference. Default to 'cyto3', if you want to run cutomized trained model, place your model file in ~/.cellpose/models and specify your model name. #' @param batch_size Cellpose Parameter, Number of 224x224 patches to run simultaneously on the GPU. Can make smaller or bigger depending on GPU memory usage. Defaults to 8. #' @param resample Cellpose Parameter #' @param channel_axis Cellpose Parameter diff --git a/R/image_registration.R b/R/image_registration.R index f9ca0e552..a98e9a280 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -1020,8 +1020,8 @@ registerImagesFIJI <- function( -#' Record landmarks by interactive selection -#' +#' @title title Record landmarks by interactive selection +#' @name interactiveLandmarkSelection #' @description Record landmarks by interactive selection #' @param source_image the image to be plotted on the left, and landmarks will output in the first of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image #' @param target_image the image to be plotted on the right, and landmarks will output in the second of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image @@ -1029,8 +1029,7 @@ registerImagesFIJI <- function( #' @returns a list of landmarks #' #' @export - -InteractiveLandmarkSelection <- function(source, target) { +interactiveLandmarkSelection <- function(source, target) { GiottoUtils::package_check("shiny") GiottoUtils::package_check("ggplot2") GiottoUtils::package_check("miniUI") @@ -1158,8 +1157,8 @@ InteractiveLandmarkSelection <- function(source, target) { -#' Calculate a affine transformation matrix from two set of landmarks -#' +#' @title Calculate a affine transformation matrix from two set of landmarks +#' @name calculateAffineMatrixFromLandmarks #' @description calculate a affine transformation matrix from two set of landmarks #' @param source_df source landmarks, two columns, first column represent x coordinate and second column represent y coordinate. #' @param target_df target landmarks, two columns, first column represent x coordinate and second column represent y coordinate. @@ -1167,7 +1166,6 @@ InteractiveLandmarkSelection <- function(source, target) { #' @returns a 3 by 3 matrix with the third row close to (0,0,1) #' #' @export - calculateAffineMatrixFromLandmarks <- function(source_df,target_df){ source_landmarks_matrix = as.matrix(source_df) source_landmarks_matrix = cbind(source_landmarks_matrix,rep(1,nrow(source_landmarks_matrix))) @@ -1380,8 +1378,6 @@ preprocessImageToMatrix <- function(x, #' @param keypoints1 keypoints extracted from target image via .sift_detect #' @param match a 2 col matrix of x to y index matched descriptors via .match_descriptor_single #' @returns a list of model and inliners -#' - .estimate_transform_from_matched_descriptor <- function(keypoints1, keypoints2, match, @@ -1420,8 +1416,6 @@ preprocessImageToMatrix <- function(x, #' @param y target image from .sift_preprocess #' @param model estimated transformation object from .estimate_transform_from_matched_descriptor #' @returns None, it will write to a output path -#' - .warp_transformed_image <- function(x, y, model, @@ -1454,8 +1448,6 @@ preprocessImageToMatrix <- function(x, #' @param keypoints1 keypoints extracted from target image via .sift_detect #' @param match a 2 col matrix of x to y index matched descriptors via .match_descriptor_single #' @returns None -#' - .plot_matched_descriptors <- function(x, y, keypoints1, keypoints2, match, pkg_ptr){ if (missing(pkg_ptr)) { GiottoUtils::package_check("skimage", repository = "pip:scikit-image") diff --git a/man/InteractiveLandmarkSelection.Rd b/man/interactiveLandmarkSelection.Rd similarity index 77% rename from man/InteractiveLandmarkSelection.Rd rename to man/interactiveLandmarkSelection.Rd index 51a120bfe..c0b1ecef4 100644 --- a/man/InteractiveLandmarkSelection.Rd +++ b/man/interactiveLandmarkSelection.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/image_registration.R -\name{InteractiveLandmarkSelection} -\alias{InteractiveLandmarkSelection} -\title{Record landmarks by interactive selection} +\name{interactiveLandmarkSelection} +\alias{interactiveLandmarkSelection} +\title{title Record landmarks by interactive selection} \usage{ -InteractiveLandmarkSelection(source, target) +interactiveLandmarkSelection(source, target) } \arguments{ \item{source_image}{the image to be plotted on the left, and landmarks will output in the first of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image} diff --git a/man/performCellposeSegmentation.Rd b/man/performCellposeSegmentation.Rd index fc94d04a0..92e8e3b11 100644 --- a/man/performCellposeSegmentation.Rd +++ b/man/performCellposeSegmentation.Rd @@ -46,7 +46,7 @@ performCellposeSegmentation( \item{channel_2}{channel number for Nuclei, default to 0(gray scale)} -\item{model_name}{Name of the model to run inference. Default to 'cyto3'} +\item{model_name}{Name of the model to run inference. Default to 'cyto3', if you want to run cutomized trained model, place your model file in ~/.cellpose/models and specify your model name.} \item{batch_size}{Cellpose Parameter, Number of 224x224 patches to run simultaneously on the GPU. Can make smaller or bigger depending on GPU memory usage. Defaults to 8.} From bc81691e29ab475c0b0b40526b39f6a5d34bdfaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wen=20Wang=20=28=E7=8E=8B=E6=96=87=29?= Date: Wed, 31 Jul 2024 17:26:10 -0400 Subject: [PATCH 148/150] Add: new functions for ONTraC integration --- R/ONTraC_wrapper.R | 249 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 246 insertions(+), 3 deletions(-) diff --git a/R/ONTraC_wrapper.R b/R/ONTraC_wrapper.R index 0cdc02ff4..e33abeac7 100644 --- a/R/ONTraC_wrapper.R +++ b/R/ONTraC_wrapper.R @@ -75,6 +75,83 @@ getONTraCv1Input <- function(gobject, # nolint: object_name_linter. } +#' @title getONTraCv2Input +#' @name getONTraCv2Input +#' @description generate the input data for ONTraC v2 +#' @inheritParams data_access_params +#' @inheritParams read_data_params +#' @param output_path the path to save the output file +#' @param cell_type the cell type column name in the metadata +#' @returns data.table with columns: Cell_ID, Sample, x, y, Cell_Type +#' @details This function generate the input data for ONTraC v2 +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' getONTraCv2Input( +#' gobject = g, +#' cell_type = "custom_leiden" +#' ) +#' @export +getONTraCv2Input <- function(gobject, # nolint: object_name_linter. + cell_type, + output_path = getwd(), + spat_unit = NULL, + feat_type = NULL, + verbose = TRUE) { + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + pos_df <- getSpatialLocations( + gobject = gobject, + spat_unit = spat_unit, + output = "data.table" + ) + meta_df <- pDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + output_df <- merge(x = pos_df, y = meta_df, by = "cell_ID") + + # check if the cell_type column exits + if (!cell_type %in% colnames(output_df)) { + vmsg(.v = verbose, paste( + "Given", + cell_type, + "do not exist in giotto object's metadata!" + )) + return(NULL) + } + + # add default sample name for one sample obj + if (!"list_ID" %in% colnames(output_df)) { + output_df$list_ID <- "ONTraC" + } + + output_df <- output_df[, .SD, .SDcols = c( + "cell_ID", + "list_ID", + "sdimx", + "sdimy", + cell_type + )] + colnames(output_df) <- c("Cell_ID", "Sample", "x", "y", "Cell_Type") + file_path <- file.path(output_path, "ONTraC_meta_data_input.csv") + write.csv(output_df, file = file_path, quote = FALSE, row.names = FALSE) + vmsg(.v = verbose, paste("ONTraC input file was saved as", file_path)) + + return(output_df) +} + + #' @title load_cell_bin_niche_cluster #' @name load_cell_bin_niche_cluster #' @description load cell-level binarized niche cluster @@ -366,9 +443,9 @@ plotNicheClusterConnectivity <- function( # nolint: object_name_linter. #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @param spat_unit name of spatial unit niche stored cluster features -#' @param feat_type name of the feature type stored niche cluster connectivities -#' @param values name of the expression matrix stored connectivity values -#' @details This function plots the niche cluster connectivity matrix +#' @param feat_type name of the feature type stored probability matrix +#' @param values name of the expression matrix stored probability of each cell assigned to each niche cluster +#' @details This function plots the cell type composition within each niche cluster #' @export plotCTCompositionInNicheCluster <- function( # nolint: object_name_linter. gobject, @@ -465,6 +542,112 @@ plotCTCompositionInNicheCluster <- function( # nolint: object_name_linter. } +#' @title plotCTCompositionInProbCluster +#' @name plotCTCompositionInProbCluster +#' @description plot cell type composition within each probabilistic cluster +#' @param cell_type the cell type column name in the metadata +#' @inheritParams data_access_params +#' @inheritParams plot_output_params +#' @param spat_unit name of spatial unit niche stored cluster features +#' @param feat_type name of the feature type stored niche cluster connectivities +#' @param values name of the expression matrix stored probability of each cell assigned to each probabilistic cluster +#' @details This function plots the cell type composition within each probabilistic cluster +#' @export +plotCTCompositionInProbCluster <- function( # nolint: object_name_linter. + gobject, + cell_type, + values = "prob", + spat_unit = "cell", + feat_type = "niche cluster", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "plotCTCompositionInProbCluster") { + # Get the cell type composition within each niche cluster + ## extract the cell-level niche cluster probability matrix + exp <- getExpression( + gobject = gobject, + values = values, + spat_unit = spat_unit, + feat_type = feat_type, + output = "exprObj" + ) + prob_df <- as.data.frame(t(as.matrix(exp@exprMat))) + prob_df$cell_ID <- rownames(prob_df) + ## combine the cell type and niche cluster probability matrix + combined_df <- merge( + as.data.frame(pDataDT(gobject, feat_type = feat_type))[, c( + "cell_ID", + cell_type + )], + prob_df, + by = "cell_ID" + ) + + # Calculate the normalized cell type composition within each niche cluster + cell_type_counts_df <- combined_df %>% + tidyr::pivot_longer( + cols = dplyr::starts_with("NicheCluster_"), + names_to = "Cluster", + values_to = "Probability" + ) %>% + dplyr::group_by( + !!rlang::sym(cell_type), + Cluster # nolint: object_usage_linter. + ) %>% + dplyr::summarise(Sum = sum(Probability, # nolint: object_usage_linter. + na.rm = TRUE + )) %>% + tidyr::spread(key = "Cluster", value = "Sum", fill = 0) + cell_type_counts_df <- as.data.frame(cell_type_counts_df) + rownames(cell_type_counts_df) <- cell_type_counts_df[[cell_type]] + cell_type_counts_df[[cell_type]] <- NULL + normalized_df <- as.data.frame(t( + t(cell_type_counts_df) / colSums(cell_type_counts_df) + )) + + + # Reshape the data frame into long format + normalized_df[[cell_type]] <- rownames(normalized_df) + df_long <- normalized_df %>% + tidyr::pivot_longer( + cols = -!!rlang::sym(cell_type), # nolint: object_usage_linter. + names_to = "Cluster", + values_to = "Composition" + ) + + # Create the heatmap using ggplot2 + pl <- ggplot(df_long, aes( + x = !!rlang::sym(cell_type), # nolint: object_usage_linter. + y = Cluster, # nolint: object_usage_linter. + fill = Composition # nolint: object_usage_linter. + )) + + geom_tile() + + viridis::scale_fill_viridis(option = "inferno", limits = c(0, 1)) + + theme_minimal() + + labs( + title = "Normalized cell type compositions within each niche cluster", + x = "Cell_Type", + y = "Cluster" + ) + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + # return or save + return(GiottoVisuals::plot_output_handler( + gobject = gobject, + plot_object = pl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = NULL + )) +} + + #' @title plotCellTypeNTScore #' @name plotCellTypeNTScore #' @description plot NTScore by cell type @@ -522,3 +705,63 @@ plotCellTypeNTScore <- function(gobject, # nolint: object_name_linter. else_return = NULL )) } + + +#' @title plotDiscreteAlongContinuous +#' @name plotDiscreteAlongContinuous +#' @description plot density of a discrete annotation along a continuou values +#' @param cell_type the column name of discrete annotation in cell metadata +#' @param values the column name of continuous values in cell metadata +#' @inheritParams data_access_params +#' @inheritParams plot_output_params +#' @export +plotCellTypeNTScore <- function(gobject, # nolint: object_name_linter. + cell_type, + values = "NTScore", + spat_unit = "cell", + feat_type = "niche cluster", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "discreteAlongContinuous") { + # Get the cell type composition within each niche cluster + data_df <- pDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + avg_scores <- data_df %>% + dplyr::group_by(!!rlang::sym(cell_type)) %>% # nolint: object_usage_linter. + dplyr::summarise(Avg_NTScore = mean(NTScore)) # nolint: object_usage_linter. + data_df[[cell_type]] <- factor(data_df[[cell_type]], + levels = avg_scores[[cell_type]][order(avg_scores$Avg_NTScore)] + ) + + pl <- ggplot(data_df, aes( + x = NTScore, # nolint: object_usage_linter. + y = !!rlang::sym(cell_type), + fill = !!rlang::sym(cell_type) + )) + + geom_violin() + + theme_minimal() + + labs( + title = "Violin Plot of NTScore by Cell Type", + x = "NTScore", + y = "Cell Type" + ) + + ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + # return or save + return(GiottoVisuals::plot_output_handler( + gobject = gobject, + plot_object = pl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = NULL + )) +} \ No newline at end of file From a57a4e3acbd67042aa2fe1ab47f7b6414e413913 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 17:35:51 -0400 Subject: [PATCH 149/150] chore: naming change and document - `performCellposeSegmentation()` -> `doCellposeSegmentation()` --- NAMESPACE | 4 +- R/cell_segmentation.R | 92 +++++++++---------- ...mentation.Rd => doCellposeSegmentation.Rd} | 8 +- man/dot-compute_dbMatrix.Rd | 16 ++++ man/getONTraCv2Input.Rd | 45 +++++++++ man/plotCTCompositionInNicheCluster.Rd | 6 +- man/plotCTCompositionInProbCluster.Rd | 47 ++++++++++ man/plotCellTypeNTScore.Rd | 4 +- man/plotDiscreteAlongContinuous.Rd | 45 +++++++++ 9 files changed, 211 insertions(+), 56 deletions(-) rename man/{performCellposeSegmentation.Rd => doCellposeSegmentation.Rd} (91%) create mode 100644 man/dot-compute_dbMatrix.Rd create mode 100644 man/getONTraCv2Input.Rd create mode 100644 man/plotCTCompositionInProbCluster.Rd create mode 100644 man/plotDiscreteAlongContinuous.Rd diff --git a/NAMESPACE b/NAMESPACE index 6fa2ade63..f9eddf16a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -167,6 +167,7 @@ export(dimPlot2D) export(dimPlot3D) export(distGiottoImage) export(doCellSegmentation) +export(doCellposeSegmentation) export(doClusterProjection) export(doFeatureSetEnrichment) export(doGiottoClustree) @@ -241,6 +242,7 @@ export(getGiottoImage) export(getMultiomics) export(getNearestNetwork) export(getONTraCv1Input) +export(getONTraCv2Input) export(getPolygonInfo) export(getRainbowColors) export(getSpatialEnrichment) @@ -294,12 +296,12 @@ export(overlapToMatrix) export(overlapToMatrixMultiPoly) export(overlaps) export(pDataDT) -export(performCellposeSegmentation) export(pieCellTypesFromEnrichment) export(plotCCcomDotplot) export(plotCCcomHeatmap) export(plotCPF) export(plotCTCompositionInNicheCluster) +export(plotCTCompositionInProbCluster) export(plotCellProximityFeatSpot) export(plotCellProximityFeats) export(plotCellTypeNTScore) diff --git a/R/cell_segmentation.R b/R/cell_segmentation.R index a411e320a..8090cd603 100644 --- a/R/cell_segmentation.R +++ b/R/cell_segmentation.R @@ -92,10 +92,10 @@ doCellSegmentation <- function( #' #' @title perform cellpose segmentation #' @description -#' +#' #' perform the Giotto Wrapper of cellpose segmentation. This is for a model inference to generate segmentation mask file from input image. #' main parameters needed -#' @name performCellposeSegmentation +#' @name doCellposeSegmentation #' @param image_dir character, required. Provide a path to a gray scale or a three channel image. #' @param python_path python environment with cellpose installed. default = "giotto_cellpose". #' @param mask_output required. Provide a path to the output mask file. @@ -127,88 +127,88 @@ doCellSegmentation <- function( #' @returns No return variable, as this will write directly to output path provided. #' @examples #' # example code -#' performCellposeSegmentation(image_dir = input_image, mask_output = output, channel_1 = 2, channel_2 = 1, model_name = 'cyto3',batch_size=4) +#' doCellposeSegmentation(image_dir = input_image, mask_output = output, channel_1 = 2, channel_2 = 1, model_name = 'cyto3',batch_size=4) #' @export -performCellposeSegmentation <- function(python_env = 'giotto_cellpose', +doCellposeSegmentation <- function(python_env = 'giotto_cellpose', image_dir, mask_output, channel_1 = 0, channel_2 = 0, model_name = 'cyto3', - batch_size=8, - resample=TRUE, + batch_size=8, + resample=TRUE, channel_axis=NULL, - z_axis=NULL, - normalize=TRUE, - invert=FALSE, - rescale=NULL, + z_axis=NULL, + normalize=TRUE, + invert=FALSE, + rescale=NULL, diameter=NULL, - flow_threshold=0.4, - cellprob_threshold=0.0, - do_3D=FALSE, + flow_threshold=0.4, + cellprob_threshold=0.0, + do_3D=FALSE, anisotropy=NULL, - stitch_threshold=0.0, - min_size=15, - niter=NULL, - augment=FALSE, + stitch_threshold=0.0, + min_size=15, + niter=NULL, + augment=FALSE, tile=TRUE, - tile_overlap=0.1, - bsize=224, - interp=TRUE, + tile_overlap=0.1, + bsize=224, + interp=TRUE, compute_masks=TRUE, progress=NULL, verbose = TRUE,...){ - - + + #Check Input arguments model_name <- match.arg(model_name, unique(c('cyto3', 'cyto2', 'cyto','nuclei', model_name))) ## Load required python libraries GiottoClass::set_giotto_python_path(python_env) GiottoUtils::package_check('cellpose',repository = 'pip') - + cellpose <- reticulate::import("cellpose") np <- reticulate::import("numpy") cv2 <- reticulate::import("cv2") torch <- reticulate::import("torch") message('successfully loaded giotto environment with cellpose.') - + if (!(torch$cuda$is_available())){ warning('GPU is not available for this session, inference may be slow.\n ') } - + GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Loading Image from ',image_dir) - + img <- cellpose$io$imread(image_dir) GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Loading Model...') - + model_to_seg <- cellpose$models$Cellpose(model_type=model_name,gpu = torch$cuda$is_available()) channel_to_seg <- as.integer(c(channel_1,channel_2)) - + GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Segmenting Image...') segmentation <- model_to_seg$eval - - result <- segmentation(img, - diameter=diameter, + + result <- segmentation(img, + diameter=diameter, channels=channel_to_seg, batch_size = batch_size, - resample=resample, + resample=resample, channel_axis=channel_axis, - z_axis=z_axis, - normalize=normalize, - invert=invert, - rescale=rescale, - flow_threshold=flow_threshold, - cellprob_threshold=cellprob_threshold, - do_3D=do_3D, + z_axis=z_axis, + normalize=normalize, + invert=invert, + rescale=rescale, + flow_threshold=flow_threshold, + cellprob_threshold=cellprob_threshold, + do_3D=do_3D, anisotropy=anisotropy, - stitch_threshold=stitch_threshold, - min_size=min_size, - niter=niter, - augment=augment, + stitch_threshold=stitch_threshold, + min_size=min_size, + niter=niter, + augment=augment, tile=tile, - tile_overlap=tile_overlap, - bsize=bsize, - interp=interp, + tile_overlap=tile_overlap, + bsize=bsize, + interp=interp, compute_masks=compute_masks, progress=progress) masks <- result[[1]] diff --git a/man/performCellposeSegmentation.Rd b/man/doCellposeSegmentation.Rd similarity index 91% rename from man/performCellposeSegmentation.Rd rename to man/doCellposeSegmentation.Rd index 92e8e3b11..084d3148e 100644 --- a/man/performCellposeSegmentation.Rd +++ b/man/doCellposeSegmentation.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cell_segmentation.R -\name{performCellposeSegmentation} -\alias{performCellposeSegmentation} +\name{doCellposeSegmentation} +\alias{doCellposeSegmentation} \title{perform cellpose segmentation} \usage{ -performCellposeSegmentation( +doCellposeSegmentation( python_env = "giotto_cellpose", image_dir, mask_output, @@ -103,5 +103,5 @@ main parameters needed } \examples{ # example code -performCellposeSegmentation(image_dir = input_image, mask_output = output, channel_1 = 2, channel_2 = 1, model_name = 'cyto3',batch_size=4) +doCellposeSegmentation(image_dir = input_image, mask_output = output, channel_1 = 2, channel_2 = 1, model_name = 'cyto3',batch_size=4) } diff --git a/man/dot-compute_dbMatrix.Rd b/man/dot-compute_dbMatrix.Rd new file mode 100644 index 000000000..d630ed00b --- /dev/null +++ b/man/dot-compute_dbMatrix.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/auxiliary_giotto.R +\name{.compute_dbMatrix} +\alias{.compute_dbMatrix} +\title{compute_dbMatrix} +\usage{ +.compute_dbMatrix(dbMatrix, name, verbose = TRUE) +} +\description{ +saves dbMatrix to db if global option is set +} +\details{ +Set \code{options(giotto.dbmatrix_compute = FALSE)} if saving dbMatrix +after each step of normalization workflow is not desired. +} +\keyword{internal} diff --git a/man/getONTraCv2Input.Rd b/man/getONTraCv2Input.Rd new file mode 100644 index 000000000..fb4336bb8 --- /dev/null +++ b/man/getONTraCv2Input.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{getONTraCv2Input} +\alias{getONTraCv2Input} +\title{getONTraCv2Input} +\usage{ +getONTraCv2Input( + gobject, + cell_type, + output_path = getwd(), + spat_unit = NULL, + feat_type = NULL, + verbose = TRUE +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{cell_type}{the cell type column name in the metadata} + +\item{output_path}{the path to save the output file} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{verbose}{be verbose} +} +\value{ +data.table with columns: Cell_ID, Sample, x, y, Cell_Type +} +\description{ +generate the input data for ONTraC v2 +} +\details{ +This function generate the input data for ONTraC v2 +} +\examples{ +g <- GiottoData::loadGiottoMini("visium") + +getONTraCv2Input( + gobject = g, + cell_type = "custom_leiden" +) +} diff --git a/man/plotCTCompositionInNicheCluster.Rd b/man/plotCTCompositionInNicheCluster.Rd index 24c9a7109..e121ae78f 100644 --- a/man/plotCTCompositionInNicheCluster.Rd +++ b/man/plotCTCompositionInNicheCluster.Rd @@ -23,11 +23,11 @@ plotCTCompositionInNicheCluster( \item{cell_type}{the cell type column name in the metadata} -\item{values}{name of the expression matrix stored connectivity values} +\item{values}{name of the expression matrix stored probability of each cell assigned to each niche cluster} \item{spat_unit}{name of spatial unit niche stored cluster features} -\item{feat_type}{name of the feature type stored niche cluster connectivities} +\item{feat_type}{name of the feature type stored probability matrix} \item{show_plot}{logical. show plot} @@ -43,5 +43,5 @@ plotCTCompositionInNicheCluster( plot cell type composition within each niche cluster } \details{ -This function plots the niche cluster connectivity matrix +This function plots the cell type composition within each niche cluster } diff --git a/man/plotCTCompositionInProbCluster.Rd b/man/plotCTCompositionInProbCluster.Rd new file mode 100644 index 000000000..7116b6bd4 --- /dev/null +++ b/man/plotCTCompositionInProbCluster.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{plotCTCompositionInProbCluster} +\alias{plotCTCompositionInProbCluster} +\title{plotCTCompositionInProbCluster} +\usage{ +plotCTCompositionInProbCluster( + gobject, + cell_type, + values = "prob", + spat_unit = "cell", + feat_type = "niche cluster", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "plotCTCompositionInProbCluster" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{cell_type}{the cell type column name in the metadata} + +\item{values}{name of the expression matrix stored probability of each cell assigned to each probabilistic cluster} + +\item{spat_unit}{name of spatial unit niche stored cluster features} + +\item{feat_type}{name of the feature type stored niche cluster connectivities} + +\item{show_plot}{logical. show plot} + +\item{return_plot}{logical. return ggplot object} + +\item{save_plot}{logical. save the plot} + +\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} + +\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +} +\description{ +plot cell type composition within each probabilistic cluster +} +\details{ +This function plots the cell type composition within each probabilistic cluster +} diff --git a/man/plotCellTypeNTScore.Rd b/man/plotCellTypeNTScore.Rd index c24a73bd9..5938c5c37 100644 --- a/man/plotCellTypeNTScore.Rd +++ b/man/plotCellTypeNTScore.Rd @@ -9,13 +9,13 @@ plotCellTypeNTScore( cell_type, values = "NTScore", spat_unit = "cell", - feat_type = "rna", + feat_type = "niche cluster", show_plot = NULL, return_plot = NULL, save_plot = NULL, save_param = list(), theme_param = list(), - default_save_name = "CellTypeNTScore" + default_save_name = "discreteAlongContinuous" ) } \arguments{ diff --git a/man/plotDiscreteAlongContinuous.Rd b/man/plotDiscreteAlongContinuous.Rd new file mode 100644 index 000000000..e5fe7e131 --- /dev/null +++ b/man/plotDiscreteAlongContinuous.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{plotDiscreteAlongContinuous} +\alias{plotDiscreteAlongContinuous} +\alias{plotCellTypeNTScore} +\title{plotDiscreteAlongContinuous} +\usage{ +plotCellTypeNTScore( + gobject, + cell_type, + values = "NTScore", + spat_unit = "cell", + feat_type = "niche cluster", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + theme_param = list(), + default_save_name = "discreteAlongContinuous" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{cell_type}{the column name of discrete annotation in cell metadata} + +\item{values}{the column name of continuous values in cell metadata} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{show_plot}{logical. show plot} + +\item{return_plot}{logical. return ggplot object} + +\item{save_plot}{logical. save the plot} + +\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} + +\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +} +\description{ +plot density of a discrete annotation along a continuou values +} From 14e465a29cb94f223f7b087299e377cd7b761c55 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 31 Jul 2024 17:50:43 -0400 Subject: [PATCH 150/150] chore: update news --- DESCRIPTION | 2 +- NEWS.md | 27 ++++++++++++++------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 58d304e4f..945224e9a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,7 @@ Depends: utils (>= 4.1.0), R (>= 4.1.0), methods, - GiottoClass (>= 0.3.2) + GiottoClass (>= 0.3.3) Imports: BiocParallel, BiocSingular, diff --git a/NEWS.md b/NEWS.md index bf585e6a7..9d8438301 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,30 +1,31 @@ -# Giotto 4.1.0 TBD - -## Bug fixes -* Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale [#964](https://github.com/drieslab/Giotto/issues/964) by jweis3 -* Fix error in DWLS `find_dampening_constant()` when `S[subset, ]` produces only 1 gene. -* Fix error in `interpolateFeatures()` where feature names with `-` or starting with numbers did not work - -## New -* `read10xAffineImage()` for reading 10x affine transformed images -* Several modular importer functions - -# Giotto 4.0.9 +# Giotto 4.1.0 (2024/07/31) ## Breaking changes * Deprecated `detectSpatialCorGenes()` removed. Use `detectSpatialCorFeats()` instead * Deprecated `findInteractionChangedGenes()` removed. Use `findInteractionChangedFeats()` instead * Deprecated `findCellProximityGenes()` removed. Use `findInteractionChangedFeats()` instead +* `createGiottoXeniumObject()` has been overhauled and parameters have changed. ## Bug fixes +* Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale [#964](https://github.com/drieslab/Giotto/issues/964) by jweis3 +* Fix error in DWLS `find_dampening_constant()` when `S[subset, ]` produces only 1 gene. +* Fix error in `interpolateFeatures()` where feature names with `-` or starting with numbers did not work * Add catch in `runPCAprojectionBatch()` for when ncp requested exceeds number of feats used * Make `spatCellCellcom()` respect `verbose` flag [#949](https://github.com/drieslab/Giotto/issues/949) by rbutleriii +## New +* Dataset affine registration via interactive shiny app and automated SIFT detection +* Cell segmentation via Cellpose +* `read10xAffineImage()` for reading 10x affine transformed images +* Several modular importer and convenience functions +* ONTraC implementation + ## Enhancements * `print()` methods for `icfObject` and `combIcfObject` ## Changes -* require GiottoUtils (>= 0.1.9) +* require GiottoUtils (>= 0.1.10) +* require GiottoClass (>= 0.3.3) # Giotto 4.0.8 (2024/05/22)