From 4f9398134032a475ee60e56f20f167899d3adf44 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 12 Nov 2024 10:50:18 -0500 Subject: [PATCH] add tma - split_join should be made shorter now that the specific TMA datset is already covered --- _pkgdown.yml | 3 + vignettes/tma.Rmd | 321 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 324 insertions(+) create mode 100644 vignettes/tma.Rmd diff --git a/_pkgdown.yml b/_pkgdown.yml index c165219ff..98571a84c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -120,6 +120,8 @@ navbar: href: articles/visium_prostate_integration.html - text: Object splitting and joining href: articles/split_join.html + - text: Tissue Microarrays + href: articles/tma.html - text: ------- - text: Spatial manipulation menu: @@ -373,6 +375,7 @@ articles: contents: - visium_prostate_integration - split_join + - tma - title: Try Giotto in the Cloud navbar: ~ contents: diff --git a/vignettes/tma.Rmd b/vignettes/tma.Rmd new file mode 100644 index 000000000..fbfb9e520 --- /dev/null +++ b/vignettes/tma.Rmd @@ -0,0 +1,321 @@ +--- +title: "Tissue Microarrays" +output: + html_document: + number_sections: true + toc: true +pkgdown: + as_is: true +vignette: > + %\VignetteIndexEntry{Tissue Microarrays} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +Tissue micrarrays (TMAs) are a good way of sampling several tissues in the same +spatial assay. + +This example demonstrates loading in the data, isolating individual TMA cores, and annotating with a 10x Visium +[mouse TMA](https://www.10xgenomics.com/datasets/mouse-tissue-microarray-in-3x3-layout-with-2-mm-edge-to-edge-spacing-ffpe-2-standard) +dataset. + + +# Check Giotto installation + +```{r, eval=FALSE} +# Ensure Giotto Suite is installed. +if(!"Giotto" %in% installed.packages()) { + pak::pkg_install("drieslab/Giotto") +} + +# Ensure the Python environment for Giotto has been installed. +genv_exists <- Giotto::checkGiottoEnvironment() +if(!genv_exists){ + # The following command need only be run once to install the Giotto environment. + Giotto::installGiottoEnvironment() +} +``` + +# Setup example TMA dataset + +## Download the Data + +```{r, eval=FALSE} +## provide path to a folder to save the example data to +data_path <- "/path/to/data/" + +mat_link <- "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Mouse_TMA_3x3_2mm/CytAssist_11mm_FFPE_Mouse_TMA_3x3_2mm_filtered_feature_bc_matrix.h5" +spat_link <- "https://cf.10xgenomics.com/samples/spatial-exp/2.0.1/CytAssist_11mm_FFPE_Mouse_TMA_3x3_2mm/CytAssist_11mm_FFPE_Mouse_TMA_3x3_2mm_spatial.tar.gz" + +mat_path <- file.path(data_path, "filtered_matrix.h5") +spat_path <- file.path(data_path, "spatial.tar.gz") + +download.file(mat_link, destfile = mat_path) +download.file(spat_link, destfile = spat_path) + +# untar the spatial folder +untar(spat_path, exdir = file.path(data_path)) +spat_dir <- file.path(data_path, "spatial") +``` + + +## Create Giotto Object + +```{r, eval=FALSE} +library(Giotto) + +# 1. set working directory +results_folder <- "path/to/results" + +# Optional: Specify a path to a Python executable within a conda or miniconda +# environment. If set to NULL (default), the Python executable within the previously +# installed Giotto environment will be used. +python_path <- NULL # alternatively, "/local/python/path/python" if desired. + +instrs <- instructions(python_path = python_path, save_dir = results_folder) + +tma <- createGiottoVisiumObject( + h5_visium_path = mat_path, + h5_tissue_positions_path = file.path(spat_dir, "tissue_positions.csv"), + h5_image_png_path = file.path(spat_dir, "tissue_hires_image.png"), + h5_json_scalefactors_path = file.path(spat_dir, "scalefactors_json.json"), + instructions = instrs +) +``` + +```{r, eval=FALSE} +# plot the data +spatPlot2D(tma, show_image = TRUE, point_size = 1.5, point_alpha = 0.7) +``` + +```{r, echo=FALSE, out.width="100%", fig.align="center"} +knitr::include_graphics("images/split_join/1_spatplot.png") +``` + +## Detect individual TMA cores + +By building a spatial network, we can find out which data points are spatially +contiguous by looking checking spatial distance, breaking ones that are too +far away, and then checking for graph membership. + +This functionality is implemented as `identifyTMAcores()`, along with some +additional steps that filter for minimal number of nodes to be considered a core +and identify and join together cores that show up as two or more separate pieces +of tissue. + +```{r, eval=FALSE} +# create a default delaunay spatial network +tma <- createSpatialNetwork(tma) +tma <- identifyTMAcores(tma) +spatPlot2D(tma, + show_image = TRUE, + cell_color = "core_id", + point_size = 1.5, + point_alpha = 0.7 +) +``` + +```{r, echo=FALSE, out.width="100%", fig.align="center"} +knitr::include_graphics("images/split_join/2_identify_tma.png") +``` + +The cores have been assigned numerical IDs. There is also a `"not_connected"` +group which are too small to be considered a spatial region and not connected to +a larger group of data points. + +We can also now add the core annotations from the 10X dataset description + +```{r, eval=FALSE} +# also add the core annotations from the 10x dataset description +tma <- annotateGiotto(tma, + name = "sample", + cluster_column = "core_id", + annotation_vector = c( + "1" = "lung1", + "2" = "brain2", + "3" = "brain3", + "4" = "lung5", + "5" = "brain1", + "6" = "brain4", + "7" = "lung4", + "8" = "lung3", + "9" = "lung2", + "not_connected" = "not_connected" + ) +) + +spatPlot2D(tma, + show_image = TRUE, + cell_color = "sample", + point_size = 1.5, + point_alpha = 1 +) +``` + +```{r, echo=FALSE, out.width="100%", fig.align="center"} +knitr::include_graphics("images/split_join/3_annotated.png") +``` + + +# Reorganize Object by Tissue Type (optional) + +This particular dataset contains two types of tissues (lung and brain). It can be helpful to analyze +these in separate expression spaces. + +## Split Giotto Object + +A split operation will split a single `giotto` object into a list of several based on a cell metadata column defined by the `by` param. +Here we split by the newly added `"sample"` annotation that tells us what tissue the core is from. + +```{r, eval=FALSE} +object_list <- splitGiotto(tma, by = "sample") +length(object_list) +``` + +``` +[1] 10 +``` + +```{r, eval=FALSE} +names(object_list) +``` + +``` + [1] "lung1" "brain2" "brain3" "lung5" "brain1" + [6] "brain4" "lung4" "lung3" "not_connected" "lung2" +``` + +## Join Similar Tissue Types + +Next we join together the `giotto` objects containing cores with similar tissue +types, finishing the reorganization of the analysis objects. + +```{r, eval=FALSE} +lung_reps <- object_list[c("lung1", "lung2", "lung3", "lung4", "lung5")] +brain_reps <- object_list[c("brain1", "brain2", "brain3", "brain4")] + +lung <- joinGiottoObjects( + gobject_list = lung_reps, + gobject_names = names(lung_reps), + join_method = "no_change" +) +lung <- addStatistics(lung, expression_values = "raw") +spatPlot2D(lung, + cell_color = "total_expr", + color_as_factor = FALSE, + gradient_style = "sequential", + group_by = "sample", + point_size = 3, + point_border_stroke = 0, + point_alpha = 0.7, + show_image = TRUE +) +``` + + +```{r, echo=FALSE, out.width="100%", fig.align="center"} +knitr::include_graphics("images/split_join/4_lung_joined.png") +``` + +```{r, eval=FALSE} +brain <- joinGiottoObjects( + gobject_list = brain_reps, + gobject_names = names(brain_reps), + join_method = "no_change" +) +brain <- addStatistics(brain, expression_values = "raw") +spatPlot2D(brain, + cell_color = "total_expr", + color_as_factor = FALSE, + gradient_style = "sequential", + group_by = "sample", + point_size = 3, + point_border_stroke = 0, + point_alpha = 0.7, + show_image = TRUE +) +``` + +```{r, echo=FALSE, out.width="80%", fig.align="center"} +knitr::include_graphics("images/split_join/5_brain_joined.png") +``` + + +The datasets are now organized into the two multi-sample `giotto` objects `lung` and `brain`. +Downstream analyses can now continue on these two sets of data independently. + +Note that these objects were joined without integration, so a good first step +would be to cluster the datasets and check for inter-core batch effects, and +then proceed with [integration](visium_prostate_integration) if necessary. + + +# Session Info +```{r, eval=FALSE} +sessionInfo() +``` + +``` +R version 4.4.1 (2024-06-14) +Platform: aarch64-apple-darwin20 +Running under: macOS 15.0.1 + +Matrix products: default +BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib +LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0 + +locale: +[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 + +time zone: America/New_York +tzcode source: internal + +attached base packages: +[1] stats graphics grDevices utils datasets methods base + +other attached packages: +[1] Giotto_4.1.3 GiottoClass_0.4.1 + +loaded via a namespace (and not attached): + [1] colorRamp2_0.1.0 deldir_2.0-4 gridExtra_2.3 + [4] rlang_1.1.4 magrittr_2.0.3 GiottoUtils_0.2.0 + [7] matrixStats_1.4.1 compiler_4.4.1 png_0.1-8 + [10] vctrs_0.6.5 hdf5r_1.3.10 pkgconfig_2.0.3 + [13] SpatialExperiment_1.14.0 crayon_1.5.3 fastmap_1.2.0 + [16] backports_1.5.0 magick_2.8.4 XVector_0.44.0 + [19] labeling_0.4.3 ggraph_2.2.1 utf8_1.2.4 + [22] rmarkdown_2.28 UCSC.utils_1.0.0 bit_4.5.0 + [25] purrr_1.0.2 xfun_0.47 zlibbioc_1.50.0 + [28] cachem_1.1.0 beachmat_2.20.0 GenomeInfoDb_1.40.0 + [31] jsonlite_1.8.9 DelayedArray_0.30.0 tweenr_2.0.3 + [34] BiocParallel_1.38.0 terra_1.7-78 irlba_2.3.5.1 + [37] parallel_4.4.1 R6_2.5.1 RColorBrewer_1.1-3 + [40] reticulate_1.39.0 pkgload_1.3.4 GenomicRanges_1.56.0 + [43] scattermore_1.2 Rcpp_1.0.13 SummarizedExperiment_1.34.0 + [46] knitr_1.48 IRanges_2.38.0 Matrix_1.7-0 + [49] igraph_2.0.3 tidyselect_1.2.1 viridis_0.6.5 + [52] rstudioapi_0.16.0 abind_1.4-8 yaml_2.3.10 + [55] codetools_0.2-20 pkgbuild_1.4.4 lattice_0.22-6 + [58] tibble_3.2.1 Biobase_2.64.0 withr_3.0.1 + [61] evaluate_1.0.0 desc_1.4.3 polyclip_1.10-7 + [64] xml2_1.3.6 pillar_1.9.0 MatrixGenerics_1.16.0 + [67] whisker_0.4.1 checkmate_2.3.2 stats4_4.4.1 + [70] plotly_4.10.4 generics_0.1.3 dbscan_1.2-0 + [73] rprojroot_2.0.4 S4Vectors_0.42.0 ggplot2_3.5.1 + [76] sparseMatrixStats_1.16.0 munsell_0.5.1 scales_1.3.0 + [79] GiottoData_0.2.15 gtools_3.9.5 glue_1.8.0 + [82] lazyeval_0.2.2 tools_4.4.1 GiottoVisuals_0.2.6 + [85] data.table_1.16.0 ScaledMatrix_1.12.0 graphlayouts_1.1.1 + [88] fs_1.6.4 tidygraph_1.3.1 cowplot_1.1.3 + [91] grid_4.4.1 tidyr_1.3.1 colorspace_2.1-1 + [94] SingleCellExperiment_1.26.0 GenomeInfoDbData_1.2.12 ggforce_0.4.2 + [97] BiocSingular_1.20.0 cli_3.6.3 rsvd_1.0.5 +[100] fansi_1.0.6 S4Arrays_1.4.0 viridisLite_0.4.2 +[103] dplyr_1.1.4 uwot_0.2.2 downlit_0.4.4 +[106] gtable_0.3.5 digest_0.6.37 BiocGenerics_0.50.0 +[109] SparseArray_1.4.1 ggrepel_0.9.6 farver_2.1.2 +[112] rjson_0.2.21 htmlwidgets_1.6.4 memoise_2.0.1 +[115] htmltools_0.5.8.1 pkgdown_2.1.0 lifecycle_1.0.4 +[118] httr_1.4.7 bit64_4.5.2 MASS_7.3-60.2 +``` + +