From ad6d004c613a3df076eefafe6fe091ca3691bb3a Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 16 Oct 2024 09:45:13 -0400 Subject: [PATCH 01/10] chore: change libsize 0 detection --- R/normalize.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/normalize.R b/R/normalize.R index 4e4303fe4..2a1a2a1a3 100644 --- a/R/normalize.R +++ b/R/normalize.R @@ -199,7 +199,7 @@ normalizeGiotto <- function( .lib_norm_giotto <- function(mymatrix, scalefactor) { libsizes <- colSums_flex(mymatrix) - if (any(libsizes == 0)) { + if (0 %in% libsizes) { warning(wrap_txt("Total library size or counts for individual spat units are 0. This will likely result in normalization problems. From 074b3d8d4dd4fe1fa37d11ed15916f24fc4bb0b9 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Sun, 20 Oct 2024 20:29:32 -0400 Subject: [PATCH 02/10] change: rework spatial splitting to return numeric --- R/spatial_clusters.R | 92 ++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 47 deletions(-) diff --git a/R/spatial_clusters.R b/R/spatial_clusters.R index 3e78fbcb7..4c441fd7b 100644 --- a/R/spatial_clusters.R +++ b/R/spatial_clusters.R @@ -43,14 +43,13 @@ #' @param g igraph #' @param clus_name character. name to assign column of clustering info #' @param all_ids (optional) character vector with all ids -#' @param missing_id_name character and name for vertices that were missing -#' from g -#' @returns data.table +#' @returns `data.table` with two columns. 1st is "cell_ID", second is named with +#' `clus_name` and is of type `numeric` #' @keywords internal +#' @noRd .igraph_vertex_membership <- function(g, clus_name, - all_ids = NULL, - missing_id_name) { + all_ids = NULL) { # get membership membership <- igraph::components(g)$membership %>% data.table::as.data.table(keep.rownames = TRUE) @@ -60,7 +59,9 @@ if (!is.null(all_ids)) { missing_ids <- all_ids[!all_ids %in% igraph::V(g)$name] missing_membership <- data.table::data.table( - "cell_ID" = missing_ids, "cluster_name" = missing_id_name) + "cell_ID" = missing_ids, + "cluster_name" = 0 + ) data.table::setnames(missing_membership, c("cell_ID", clus_name)) membership <- data.table::rbindlist( list(membership, missing_membership)) @@ -80,14 +81,11 @@ #' @param cluster_col character. Column in metadata containing original #' clustering #' @param split_clus_name character. Name to assign the split cluster results -#' @param include_all_ids Boolean. Include all ids, including vertex ids not +#' @param include_all_ids logical. Include all ids, including vertex ids not #' found in the spatial network -#' @param missing_id_name Character. Name for vertices that were missing from -#' spatial network -#' @param return_gobject Boolean. Return giotto object +#' @param return_gobject logical. Return giotto object #' @returns giotto object with cluster annotations #' @examples -#' library(Giotto) #' g <- GiottoData::loadGiottoMini("vizgen") #' activeSpatUnit(g) <- "aggregate" #' spatPlot2D(g, cell_color = "leiden_clus") @@ -105,7 +103,6 @@ spatialSplitCluster <- function(gobject, spatial_network_name = "Delaunay_network", cluster_col, split_clus_name = paste0(cluster_col, "_split"), - include_all_ids = TRUE, missing_id_name = "not_connected", return_gobject = TRUE) { # NSE vars @@ -156,23 +153,13 @@ spatialSplitCluster <- function(gobject, ) # get new clusterings - if (isTRUE(include_all_ids)) { - # include all cell IDs - all_ids <- unique(cell_meta$cell_ID) - new_clus_dt <- .igraph_vertex_membership( - g = g, - clus_name = split_clus_name, - all_ids = all_ids, - missing_id_name = missing_id_name - ) - } else { - # only IDs present in graph - new_clus_dt <- .igraph_vertex_membership( - g = g, - clus_name = split_clus_name, - all_ids = NULL - ) - } + # spatially unconnected nodes (if any) will always be returned as 0 + all_ids <- unique(cell_meta$cell_ID) + new_clus_dt <- .igraph_vertex_membership( + g = g, + clus_name = split_clus_name, + all_ids <- all_ids + ) if (isTRUE(return_gobject)) { gobject <- addCellMetadata( @@ -197,11 +184,12 @@ spatialSplitCluster <- function(gobject, #' @inheritParams data_access_params #' @param spatial_network_name character. Name of spatial network to use #' @param core_id_name metadata column name for the core information -#' @param include_all_ids Boolean. Include all ids, including vertex ids not +#' @param id_fmt character. [sprintf] formatting to use for core ids +#' @param include_all_ids logical. Include all ids, including vertex ids not #' found in the spatial network -#' @param missing_id_name Character. Name for vertices that were missing from +#' @param missing_id_name character. Name for vertices that were missing from #' spatial network -#' @param return_gobject Boolean. Return giotto object +#' @param return_gobject logical. Return giotto object #' @returns cluster annotations #' @export identifyTMAcores <- function(gobject, @@ -209,6 +197,7 @@ identifyTMAcores <- function(gobject, feat_type = NULL, spatial_network_name = "Delaunay_network", core_id_name = "core_id", + id_fmt = "%d", include_all_ids = TRUE, missing_id_name = "not_connected", return_gobject = TRUE) { @@ -225,6 +214,7 @@ identifyTMAcores <- function(gobject, feat_type = feat_type ) + # get data cell_meta <- getCellMetadata( gobject = gobject, spat_unit = spat_unit, @@ -242,35 +232,43 @@ identifyTMAcores <- function(gobject, verbose = FALSE, ) - g <- GiottoClass::spat_net_to_igraph(sn) # convert spatialNetworkObject to igraph - - # get new clusterings + # get new clusterings as initial indices + # these indices may need repairs and updates to be finalized + ivm_params <- list( + g = g, clus_name = "init_idx" + ) if (isTRUE(include_all_ids)) { # include all cell IDs all_ids <- unique(cell_meta$cell_ID) - new_clus_dt <- .igraph_vertex_membership( - g = g, - clus_name = core_id_name, - all_ids = all_ids, - missing_id_name = missing_id_name - ) + ivm_params$all_ids <- all_ids } else { # only IDs present in graph - new_clus_dt <- .igraph_vertex_membership( - g = g, - clus_name = core_id_name, - all_ids = NULL - ) + ivm_params$all_ids <- NULL } + new_clus_dt <- do.call(.igraph_vertex_membership, args = ivm_params) + # connected nodes + con <- new_clus_dt[init_idx > 0] + # spatially disconnected observations (not connected to a group of nodes) + dcon <- new_clus_dt[init_idx == 0] + + # apply core_id_name + con[, (core_id_name) := sprintf(id_fmt, init_idx)] + dcon[, (core_id_name) := missing_id_name] + + res <- rbind( + con[, .SD, .SDcols = c("cell_ID", core_id_name)], + dcon[, .SD, .SDcols = c("cell_ID", core_id_name)] + ) + if (isTRUE(return_gobject)) { gobject <- addCellMetadata( gobject, spat_unit = spat_unit, - new_metadata = new_clus_dt, + new_metadata = res, by_column = TRUE, column_cell_ID = "cell_ID" ) From 9037649902c1206ab1d477a44339be8d32ee45b6 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 25 Oct 2024 23:23:25 -0400 Subject: [PATCH 03/10] enh: identifyTMAcores - has minimal nodes to not be considerd disconnected - now repairs split cores --- R/spatial_clusters.R | 220 +++++++++++++++++--------- man/dot-igraph_remove_hetero_edges.Rd | 23 --- man/dot-igraph_vertex_membership.Rd | 25 --- man/identifyTMAcores.Rd | 19 ++- man/spatialSplitCluster.Rd | 11 +- 5 files changed, 160 insertions(+), 138 deletions(-) delete mode 100644 man/dot-igraph_remove_hetero_edges.Rd delete mode 100644 man/dot-igraph_vertex_membership.Rd diff --git a/R/spatial_clusters.R b/R/spatial_clusters.R index 4c441fd7b..660f9b1cc 100644 --- a/R/spatial_clusters.R +++ b/R/spatial_clusters.R @@ -1,79 +1,3 @@ -#' @title Remove hetero edges from igraph -#' @name .igraph_remove_hetero_edges -#' @description -#' Given an igraph `g` and set of node attributes `clus_att` that encode -#' different spatial clusters, remove edges that connect non-similar nodes. -#' This can be used when data is already clustered, but these clusters should -#' be further broken up based on whether they are spatially touching. -#' @param g igraph object -#' @param clus_attr character. A categorical node attribute -#' @md -#' @returns igraph -#' @keywords internal -.igraph_remove_hetero_edges <- function(g, clus_attr) { - clus_attr_values <- igraph::vertex_attr(g, name = clus_attr) - - for (n in unique(clus_attr_values)) { - # find all vertices of the attribute - nv <- igraph::V(g)$name[clus_attr_values == n] - - # find edges that include these vertices - n_all_edges <- igraph::E(g)[.inc(igraph::V(g)[nv])] %>% - igraph::as_ids() - - # find edges associated with only these vertices - n_internal_edges <- igraph::E(g)[nv %--% nv] %>% - igraph::as_ids() - - het_edges <- n_all_edges[!n_all_edges %in% n_internal_edges] - - g <- igraph::delete_edges(g, edges = het_edges) - } - - g -} - - - - -#' @title igraph vertex membership -#' @name .igraph_vertex_membership -#' @description -#' Get which weakly connected set of vertices each vertex is part of -#' @param g igraph -#' @param clus_name character. name to assign column of clustering info -#' @param all_ids (optional) character vector with all ids -#' @returns `data.table` with two columns. 1st is "cell_ID", second is named with -#' `clus_name` and is of type `numeric` -#' @keywords internal -#' @noRd -.igraph_vertex_membership <- function(g, - clus_name, - all_ids = NULL) { - # get membership - membership <- igraph::components(g)$membership %>% - data.table::as.data.table(keep.rownames = TRUE) - data.table::setnames(membership, c("cell_ID", clus_name)) - - # add vertices that were missing from g back - if (!is.null(all_ids)) { - missing_ids <- all_ids[!all_ids %in% igraph::V(g)$name] - missing_membership <- data.table::data.table( - "cell_ID" = missing_ids, - "cluster_name" = 0 - ) - data.table::setnames(missing_membership, c("cell_ID", clus_name)) - membership <- data.table::rbindlist( - list(membership, missing_membership)) - } - - return(membership) -} - - - - - #' @title Split cluster annotations based on a spatial network #' @name spatialSplitCluster #' @inheritParams data_access_params @@ -187,8 +111,12 @@ spatialSplitCluster <- function(gobject, #' @param id_fmt character. [sprintf] formatting to use for core ids #' @param include_all_ids logical. Include all ids, including vertex ids not #' found in the spatial network -#' @param missing_id_name character. Name for vertices that were missing from -#' spatial network +#' @param missing_id_name character. Name for nodes that are not connected to +#' a core. +#' @param min_nodes numeric. Minimal number of nodes to not be considered +#' an unconnected group. +#' @param repair_split_cores logical. Attempt to repair core IDs when a core +#' is split down the middle and detected as two different cores. #' @param return_gobject logical. Return giotto object #' @returns cluster annotations #' @export @@ -200,6 +128,8 @@ identifyTMAcores <- function(gobject, id_fmt = "%d", include_all_ids = TRUE, missing_id_name = "not_connected", + min_nodes = 5, + repair_split_cores = TRUE, return_gobject = TRUE) { # NSE vars cell_ID <- NULL @@ -255,6 +185,51 @@ identifyTMAcores <- function(gobject, # spatially disconnected observations (not connected to a group of nodes) dcon <- new_clus_dt[init_idx == 0] + # min nodes filter + con_nodes <- con[, .N, by = init_idx] + small_con_idx <- con_nodes[N < min_nodes, init_idx] + # shift filtered values to dcon (disconnected) + con[init_idx %in% small_con_idx, init_idx := 0] + dcon <- rbind(dcon, con[init_idx == 0]) + con <- con[init_idx != 0] + + # fix split cores + if (repair_split_cores) { + sl <- getSpatialLocations(gobject, spat_unit = spat_unit) + + # find ext of cores + # iterate through angles to catch cases where extents do not + # bridge across split. + ovlp_reps <- lapply(c(0, 45, 90), function(rangle) { + sl_rot <- spin(sl, rangle) + + # get ext poly of rotated cores + epoly_list <- lapply(unique(con$init_idx), function(core_id) { + sl_rot[con[init_idx == core_id, cell_ID]] |> + ext() |> + as.polygons() + }) + poly <- do.call(rbind, epoly_list) + + # test for overlaps + ovlps <- relate(poly, relation = "overlaps", pairs = TRUE) |> + # determine sorted pairs of overlaps + apply(MARGIN = 2, sort) |> + t() + }) + # combine test reps + ovlps <- do.call(rbind, ovlp_reps) |> + unique() + + # update ids based on test + for (pair_i in nrow(ovlps)) { + idx_1 <- ovlps[pair_i, 1L] + idx_2 <- ovlps[pair_i, 2L] + con[init_idx == idx_2, init_idx := idx_1] + } + + } + # apply core_id_name con[, (core_id_name) := sprintf(id_fmt, init_idx)] dcon[, (core_id_name) := missing_id_name] @@ -277,3 +252,92 @@ identifyTMAcores <- function(gobject, new_clus_dt } } + + + + + + + + + + + + +# internals #### + +#' @title Remove hetero edges from igraph +#' @name .igraph_remove_hetero_edges +#' @description +#' Given an igraph `g` and set of node attributes `clus_att` that encode +#' different spatial clusters, remove edges that connect non-similar nodes. +#' This can be used when data is already clustered, but these clusters should +#' be further broken up based on whether they are spatially touching. +#' @param g igraph object +#' @param clus_attr character. A categorical node attribute +#' @returns igraph +#' @noRd +#' @keywords internal +.igraph_remove_hetero_edges <- function(g, clus_attr) { + clus_attr_values <- igraph::vertex_attr(g, name = clus_attr) + + for (n in unique(clus_attr_values)) { + # find all vertices of the attribute + nv <- igraph::V(g)$name[clus_attr_values == n] + + # find edges that include these vertices + n_all_edges <- igraph::E(g)[.inc(igraph::V(g)[nv])] %>% + igraph::as_ids() + + # find edges associated with only these vertices + n_internal_edges <- igraph::E(g)[nv %--% nv] %>% + igraph::as_ids() + + het_edges <- n_all_edges[!n_all_edges %in% n_internal_edges] + + g <- igraph::delete_edges(g, edges = het_edges) + } + + g +} + + + + +#' @title igraph vertex membership +#' @name .igraph_vertex_membership +#' @description +#' Get which weakly connected set of vertices each vertex is part of +#' @param g igraph +#' @param clus_name character. name to assign column of clustering info +#' @param all_ids (optional) character vector with all ids +#' @returns `data.table` with two columns. 1st is "cell_ID", second is named with +#' `clus_name` and is of type `numeric` +#' @keywords internal +#' @noRd +.igraph_vertex_membership <- function(g, + clus_name, + all_ids = NULL) { + # get membership + membership <- igraph::components(g)$membership %>% + data.table::as.data.table(keep.rownames = TRUE) + data.table::setnames(membership, c("cell_ID", clus_name)) + + # add vertices that were missing from g back + if (!is.null(all_ids)) { + missing_ids <- all_ids[!all_ids %in% igraph::V(g)$name] + missing_membership <- data.table::data.table( + "cell_ID" = missing_ids, + "cluster_name" = 0 + ) + data.table::setnames(missing_membership, c("cell_ID", clus_name)) + membership <- data.table::rbindlist( + list(membership, missing_membership)) + } + + return(membership) +} + + + + diff --git a/man/dot-igraph_remove_hetero_edges.Rd b/man/dot-igraph_remove_hetero_edges.Rd deleted file mode 100644 index 155d1e958..000000000 --- a/man/dot-igraph_remove_hetero_edges.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_clusters.R -\name{.igraph_remove_hetero_edges} -\alias{.igraph_remove_hetero_edges} -\title{Remove hetero edges from igraph} -\usage{ -.igraph_remove_hetero_edges(g, clus_attr) -} -\arguments{ -\item{g}{igraph object} - -\item{clus_attr}{character. A categorical node attribute} -} -\value{ -igraph -} -\description{ -Given an igraph \code{g} and set of node attributes \code{clus_att} that encode -different spatial clusters, remove edges that connect non-similar nodes. -This can be used when data is already clustered, but these clusters should -be further broken up based on whether they are spatially touching. -} -\keyword{internal} diff --git a/man/dot-igraph_vertex_membership.Rd b/man/dot-igraph_vertex_membership.Rd deleted file mode 100644 index a3c9ba1be..000000000 --- a/man/dot-igraph_vertex_membership.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_clusters.R -\name{.igraph_vertex_membership} -\alias{.igraph_vertex_membership} -\title{igraph vertex membership} -\usage{ -.igraph_vertex_membership(g, clus_name, all_ids = NULL, missing_id_name) -} -\arguments{ -\item{g}{igraph} - -\item{clus_name}{character. name to assign column of clustering info} - -\item{all_ids}{(optional) character vector with all ids} - -\item{missing_id_name}{character and name for vertices that were missing -from g} -} -\value{ -data.table -} -\description{ -Get which weakly connected set of vertices each vertex is part of -} -\keyword{internal} diff --git a/man/identifyTMAcores.Rd b/man/identifyTMAcores.Rd index f83bb5d37..0f5e42a26 100644 --- a/man/identifyTMAcores.Rd +++ b/man/identifyTMAcores.Rd @@ -10,8 +10,11 @@ identifyTMAcores( feat_type = NULL, spatial_network_name = "Delaunay_network", core_id_name = "core_id", + id_fmt = "\%d", include_all_ids = TRUE, missing_id_name = "not_connected", + min_nodes = 5, + repair_split_cores = TRUE, return_gobject = TRUE ) } @@ -26,13 +29,21 @@ identifyTMAcores( \item{core_id_name}{metadata column name for the core information} -\item{include_all_ids}{Boolean. Include all ids, including vertex ids not +\item{id_fmt}{character. [sprintf] formatting to use for core ids} + +\item{include_all_ids}{logical. Include all ids, including vertex ids not found in the spatial network} -\item{missing_id_name}{Character. Name for vertices that were missing from -spatial network} +\item{missing_id_name}{character. Name for nodes that are not connected to +a core.} + +\item{min_nodes}{numeric. Minimal number of nodes to not be considered +an unconnected group.} + +\item{repair_split_cores}{logical. Attempt to repair core IDs when a core +is split down the middle and detected as two different cores.} -\item{return_gobject}{Boolean. Return giotto object} +\item{return_gobject}{logical. Return giotto object} } \value{ cluster annotations diff --git a/man/spatialSplitCluster.Rd b/man/spatialSplitCluster.Rd index a723c808b..52582a0ea 100644 --- a/man/spatialSplitCluster.Rd +++ b/man/spatialSplitCluster.Rd @@ -11,7 +11,6 @@ spatialSplitCluster( spatial_network_name = "Delaunay_network", cluster_col, split_clus_name = paste0(cluster_col, "_split"), - include_all_ids = TRUE, missing_id_name = "not_connected", return_gobject = TRUE ) @@ -30,13 +29,10 @@ clustering} \item{split_clus_name}{character. Name to assign the split cluster results} -\item{include_all_ids}{Boolean. Include all ids, including vertex ids not -found in the spatial network} - -\item{missing_id_name}{Character. Name for vertices that were missing from -spatial network} +\item{return_gobject}{logical. Return giotto object} -\item{return_gobject}{Boolean. Return giotto object} +\item{include_all_ids}{logical. Include all ids, including vertex ids not +found in the spatial network} } \value{ giotto object with cluster annotations @@ -45,7 +41,6 @@ giotto object with cluster annotations Split cluster annotations based on a spatial network } \examples{ -library(Giotto) g <- GiottoData::loadGiottoMini("vizgen") activeSpatUnit(g) <- "aggregate" spatPlot2D(g, cell_color = "leiden_clus") From 40e30600368835d90129e74a1c3149b854042a7a Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 25 Oct 2024 23:29:15 -0400 Subject: [PATCH 04/10] enh: expose angles to test param --- R/spatial_clusters.R | 5 ++++- man/identifyTMAcores.Rd | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/R/spatial_clusters.R b/R/spatial_clusters.R index 660f9b1cc..beece2ee0 100644 --- a/R/spatial_clusters.R +++ b/R/spatial_clusters.R @@ -117,6 +117,8 @@ spatialSplitCluster <- function(gobject, #' an unconnected group. #' @param repair_split_cores logical. Attempt to repair core IDs when a core #' is split down the middle and detected as two different cores. +#' @param repair_split_test_angles angles to rotate through when testing for +#' split cores. If cores are very close together, 0 and 90 are safest. #' @param return_gobject logical. Return giotto object #' @returns cluster annotations #' @export @@ -130,6 +132,7 @@ identifyTMAcores <- function(gobject, missing_id_name = "not_connected", min_nodes = 5, repair_split_cores = TRUE, + repair_split_test_angles = c(0, 90), return_gobject = TRUE) { # NSE vars cell_ID <- NULL @@ -200,7 +203,7 @@ identifyTMAcores <- function(gobject, # find ext of cores # iterate through angles to catch cases where extents do not # bridge across split. - ovlp_reps <- lapply(c(0, 45, 90), function(rangle) { + ovlp_reps <- lapply(repair_split_test_angles, function(rangle) { sl_rot <- spin(sl, rangle) # get ext poly of rotated cores diff --git a/man/identifyTMAcores.Rd b/man/identifyTMAcores.Rd index 0f5e42a26..e8e0fef6d 100644 --- a/man/identifyTMAcores.Rd +++ b/man/identifyTMAcores.Rd @@ -15,6 +15,7 @@ identifyTMAcores( missing_id_name = "not_connected", min_nodes = 5, repair_split_cores = TRUE, + repair_split_test_angles = c(0, 90), return_gobject = TRUE ) } @@ -43,6 +44,9 @@ an unconnected group.} \item{repair_split_cores}{logical. Attempt to repair core IDs when a core is split down the middle and detected as two different cores.} +\item{repair_split_test_angles}{angles to rotate through when testing for +split cores. If cores are very close together, 0 and 90 are safest.} + \item{return_gobject}{logical. Return giotto object} } \value{ From 07df22ffea0d4ded6718ffcf37340b51946032b4 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Sat, 26 Oct 2024 19:40:31 -0400 Subject: [PATCH 05/10] enh: update TMA core detection - change arg repair_split_cores to join_split_cores - add join_tolerance param for area allowed after joining to prevent joins of full cores pairs when the array has very small gutter space. --- R/spatial_clusters.R | 37 ++++++++++++++++++++++++++++--------- man/identifyTMAcores.Rd | 10 +++++----- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/R/spatial_clusters.R b/R/spatial_clusters.R index beece2ee0..81fbf9e98 100644 --- a/R/spatial_clusters.R +++ b/R/spatial_clusters.R @@ -115,10 +115,10 @@ spatialSplitCluster <- function(gobject, #' a core. #' @param min_nodes numeric. Minimal number of nodes to not be considered #' an unconnected group. -#' @param repair_split_cores logical. Attempt to repair core IDs when a core +#' @param join_split_cores logical. Attempt to repair core IDs when a core #' is split down the middle and detected as two different cores. -#' @param repair_split_test_angles angles to rotate through when testing for -#' split cores. If cores are very close together, 0 and 90 are safest. +#' @param join_tolerance numeric. Max ratio allowed relative to previous max +#' core convex hull area when determining if a pair of cores should be joined. #' @param return_gobject logical. Return giotto object #' @returns cluster annotations #' @export @@ -131,8 +131,8 @@ identifyTMAcores <- function(gobject, include_all_ids = TRUE, missing_id_name = "not_connected", min_nodes = 5, - repair_split_cores = TRUE, - repair_split_test_angles = c(0, 90), + join_split_cores = TRUE, + join_tolerance = 1.2, return_gobject = TRUE) { # NSE vars cell_ID <- NULL @@ -197,17 +197,27 @@ identifyTMAcores <- function(gobject, con <- con[init_idx != 0] # fix split cores - if (repair_split_cores) { + if (join_split_cores) { sl <- getSpatialLocations(gobject, spat_unit = spat_unit) + con_init_idx_uniq <- sort(unique(con$init_idx)) + + areas <- vapply( + FUN.VALUE = numeric(1L), con_init_idx_uniq, function(core_id) { + sl[con[init_idx == core_id, cell_ID]] |> + convHull() |> + area() + } + ) + max_area <- max(areas) # find ext of cores # iterate through angles to catch cases where extents do not # bridge across split. - ovlp_reps <- lapply(repair_split_test_angles, function(rangle) { + ovlp_reps <- lapply(c(0, 22.5, 45), function(rangle) { sl_rot <- spin(sl, rangle) # get ext poly of rotated cores - epoly_list <- lapply(unique(con$init_idx), function(core_id) { + epoly_list <- lapply(con_init_idx_uniq, function(core_id) { sl_rot[con[init_idx == core_id, cell_ID]] |> ext() |> as.polygons() @@ -215,10 +225,11 @@ identifyTMAcores <- function(gobject, poly <- do.call(rbind, epoly_list) # test for overlaps - ovlps <- relate(poly, relation = "overlaps", pairs = TRUE) |> + ovlp <- relate(poly, relation = "overlaps", pairs = TRUE) |> # determine sorted pairs of overlaps apply(MARGIN = 2, sort) |> t() + return(ovlp) }) # combine test reps ovlps <- do.call(rbind, ovlp_reps) |> @@ -228,6 +239,13 @@ identifyTMAcores <- function(gobject, for (pair_i in nrow(ovlps)) { idx_1 <- ovlps[pair_i, 1L] idx_2 <- ovlps[pair_i, 2L] + # ignore hits from two full cores + # combined area of IDs to join cannot be greater than join_tolerance of max_area + if ((areas[[idx_1]] + areas[[idx_2]]) > + (join_tolerance * max_area)) { + next + } + con[init_idx == idx_2, init_idx := idx_1] } @@ -344,3 +362,4 @@ identifyTMAcores <- function(gobject, + diff --git a/man/identifyTMAcores.Rd b/man/identifyTMAcores.Rd index e8e0fef6d..e0de7a25d 100644 --- a/man/identifyTMAcores.Rd +++ b/man/identifyTMAcores.Rd @@ -14,8 +14,8 @@ identifyTMAcores( include_all_ids = TRUE, missing_id_name = "not_connected", min_nodes = 5, - repair_split_cores = TRUE, - repair_split_test_angles = c(0, 90), + join_split_cores = TRUE, + join_tolerance = 1.2, return_gobject = TRUE ) } @@ -41,11 +41,11 @@ a core.} \item{min_nodes}{numeric. Minimal number of nodes to not be considered an unconnected group.} -\item{repair_split_cores}{logical. Attempt to repair core IDs when a core +\item{join_split_cores}{logical. Attempt to repair core IDs when a core is split down the middle and detected as two different cores.} -\item{repair_split_test_angles}{angles to rotate through when testing for -split cores. If cores are very close together, 0 and 90 are safest.} +\item{join_tolerance}{numeric. Max ratio allowed relative to previous max +core convex hull area when determining if a pair of cores should be joined.} \item{return_gobject}{logical. Return giotto object} } From 24c5ec953a57921aa23df2b95a10270cc0696dd5 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Sat, 26 Oct 2024 21:25:25 -0400 Subject: [PATCH 06/10] chore: use new gwith_seed() --- R/dimension_reduction.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/dimension_reduction.R b/R/dimension_reduction.R index 8fbe088b0..8b658a634 100644 --- a/R/dimension_reduction.R +++ b/R/dimension_reduction.R @@ -230,11 +230,11 @@ reduceDims <- function( ) if (set_seed) { - withSeed( + gwith_seed( + seed = seed_number, { pca_res <- do.call(BiocSingular::runPCA, pca_param) }, - seed = seed_number ) } else { pca_res <- do.call(BiocSingular::runPCA, pca_param) @@ -1757,7 +1757,7 @@ jackstrawPlot <- function(gobject, # create a random subset if random_subset is not NULL if (!is.null(random_subset)) { if (set_seed) { - withSeed(seed = seed_number, { + gwith_seed(seed = seed_number, { random_selection <- sort(sample( seq_len(ncol(expr_values)), random_subset )) @@ -1795,7 +1795,7 @@ jackstrawPlot <- function(gobject, } if (set_seed) { - withSeed(seed = seed_number, { + gwith_seed(seed = seed_number, { jtest <- .perm_pa( dat = expr_values, iter = iter, From e58f1618641c49a202f61fbdfcccc368eb796426 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Sun, 27 Oct 2024 17:56:22 -0400 Subject: [PATCH 07/10] chore: deps cleanups --- DESCRIPTION | 2 -- R/clustering.R | 4 ++-- R/globals.R | 2 +- R/image_registration.R | 4 ++-- R/zzz.R | 2 +- tests/testthat/test_visium.R | 4 ++-- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c57b4af02..9b0ed6931 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,8 +26,6 @@ URL: https://drieslab.github.io/Giotto/, https://github.com/drieslab/Giotto BugReports: https://github.com/drieslab/Giotto/issues RoxygenNote: 7.3.2 Depends: - base (>= 4.4.1), - utils (>= 4.4.1), R (>= 4.4.1), methods, GiottoClass (>= 0.3.3) diff --git a/R/clustering.R b/R/clustering.R index f57dd4f84..fe858c0f4 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -3228,11 +3228,11 @@ getDendrogramSplits <- function(gobject, if (show_dend == TRUE) { # plot dendrogram - graphics::plot(cordend) + plot(cordend) # add horizontal lines if (!is.null(h)) { - graphics::abline(h = h, col = h_color) + abline(h = h, col = h_color) } } diff --git a/R/globals.R b/R/globals.R index 53c08c870..c6d015e16 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,4 +1,4 @@ -utils::globalVariables(names = c( +globalVariables(names = c( ":=", ".N", ".SD", ".", "cast", "%--%", ".inc", # igraph "python_leiden", "python_louvain", "python_spatial_genes", diff --git a/R/image_registration.R b/R/image_registration.R index b307ebbf3..0cc485233 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -705,10 +705,10 @@ registerGiottoObjectListRvision <- function(gobject_list = gobject_list, ## 4. Compute transformations # Choose reference image - refImage <- unreg_images[[base::floor(length(unreg_images) / 2)]] + refImage <- unreg_images[[floor(length(unreg_images) / 2)]] # Compute ECC transforms - transfs <- base::vector(mode = "list", length = length(unreg_images)) + transfs <- vector(mode = "list", length = length(unreg_images)) for (i in seq_along(unreg_images)) { transfs[[i]] <- Rvision::findTransformECC( refImage, unreg_images[[i]], diff --git a/R/zzz.R b/R/zzz.R index 9cfa6b5af..3c024dc1c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -3,7 +3,7 @@ .onAttach <- function(libname, pkgname) { ## print version number ## - packageStartupMessage("Giotto Suite ", utils::packageVersion("Giotto")) + packageStartupMessage("Giotto Suite ", packageVersion("Giotto")) check_ver <- getOption("giotto.check_version", TRUE) if (isTRUE(check_ver)) { diff --git a/tests/testthat/test_visium.R b/tests/testthat/test_visium.R index 1e3fd6f14..c36811741 100644 --- a/tests/testthat/test_visium.R +++ b/tests/testthat/test_visium.R @@ -19,7 +19,7 @@ lapply( function(url) { myfilename <- basename(url) mydestfile <- file.path(datadir, myfilename) - utils::download.file(url = url, destfile = mydestfile, quiet = TRUE) + download.file(url = url, destfile = mydestfile, quiet = TRUE) } ) @@ -32,7 +32,7 @@ lapply( "raw_feature_bc_matrix.tar.gz", "spatial.tar.gz" )], - utils::untar, + untar, exdir = datadir ) From d0d1f22e9caa0048c0e09b006996aac38501e1f0 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Sun, 27 Oct 2024 18:01:04 -0400 Subject: [PATCH 08/10] Update .Rbuildignore --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 5c0eec310..daf3fbc13 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,6 +2,7 @@ ^renv\.lock$ ^Giotto\.Rproj$ ^\.Rproj\.user$ +^.lintr ^LICENSE\.md$ ^README\.Rmd$ From 67e9088a56a42fa2042af88ef890e02f54c38708 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Sun, 27 Oct 2024 21:34:11 -0400 Subject: [PATCH 09/10] Update NEWS.md --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 3dbe44bcd..b97db3e42 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# Giotto 4.1.3 +# Giotto 4.1.3 (2024/10/27) ## New * Add `giottoToAnndataZarr()` to create a local anndata zarr folder and interact with the vitessceR package. From daabd7073fad00f31b2d254ca8b22f3ff294776d Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Sun, 27 Oct 2024 21:45:20 -0400 Subject: [PATCH 10/10] Update NEWS.md --- NEWS.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/NEWS.md b/NEWS.md index b97db3e42..ecb519613 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,10 +2,15 @@ ## New * Add `giottoToAnndataZarr()` to create a local anndata zarr folder and interact with the vitessceR package. +* `reduceDims()` API function for dimension reductions +* `runNMF()` implementation that works via RcppML ## Changes +* `screePlot()` `name` param changed to `dim_reduction_name` * `runWNN()` and `runIntegratedUMAP()` arguments were updated to make the function flexible to handle any number of modalities. * update `jackstrawPlot()` to make more flexible and efficient. Changed default params for `scaling`, `centering`, and `feats_to_use` to match `runPCA()` +* change warning when reduction "feats" is selected in `runtSNE()` to error to avoid accidentally wiping the `giotto` object. + # Giotto 4.1.2 @@ -26,7 +31,11 @@ * move `progressr` and `jsonlite` dependencies to GiottoUtils v0.1.12 * remove `reshape2` dependency. +## Bug fixes +* `processGiotto()` can now skip adjust step by default + ## New +* `identifyTMAcores()` for assigning IDs to tissue microarray spatial data. * `labelTransfer()` for transferring labels between giotto objects or subsets thereof. Supercedes `doClusterProjection()` # Giotto 4.1.1