From 2f84eef70c6b9b5f613f8f2a8210eb30b9211542 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:36:23 -0400 Subject: [PATCH 1/8] add to networks tutorial --- vignettes/networks.Rmd | 76 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/vignettes/networks.Rmd b/vignettes/networks.Rmd index 0589cfb99..418001d55 100644 --- a/vignettes/networks.Rmd +++ b/vignettes/networks.Rmd @@ -178,6 +178,82 @@ dimPlot2D(mer, show_NN_network = TRUE, nn_network_to_use = "kNN", network_name = knitr::include_graphics("images/networks/5_kNN_dim.png") ``` + +# API function + +`createNetwork()` exported from *GiottoClass* allows creation of any of the above network types, starting from a `matrix` of nodes. These nodes can be n-dimensional expression information, dimension reduction, or spatial coordinates. The output is either `igraph` if `as.igraph = TRUE` and `data.table` otherwise. + +```{r, eval=FALSE} +topo <- expand.grid(x = 1:nrow(volcano), + y = 1:ncol(volcano)) +topo$z <- c(volcano) +topo <- as.matrix(topo) + +createNetwork(topo, type = "kNN", k = 8) # returns as igraph +``` + +``` +IGRAPH 64720af DNW- 5307 42456 -- ++ attr: name (v/c), weight (e/n), distance (e/n) ++ edges from 64720af (vertex names): + [1] 1 ->88 2 ->89 3 ->90 4 ->91 5 ->92 6 ->7 7 ->6 8 ->94 9 ->95 10->97 11->97 +[12] 12->99 13->12 14->13 15->102 16->102 17->103 18->19 19->18 20->21 21->22 22->21 +[23] 23->24 24->25 25->26 26->25 27->115 28->29 29->28 30->31 31->30 32->31 33->120 +[34] 34->121 35->36 36->124 37->38 38->37 39->40 40->41 41->40 42->43 43->42 44->45 +[45] 45->46 46->45 47->134 48->49 49->48 50->51 51->52 52->51 53->52 54->141 55->56 +[56] 56->57 57->56 58->59 59->60 60->59 61->62 62->63 63->62 64->65 65->64 66->67 +[67] 67->66 68->69 69->68 70->69 71->158 72->159 73->160 74->161 75->162 76->163 77->165 +[78] 78->79 79->78 80->81 81->80 82->83 83->170 84->85 85->172 86->87 87->86 88->1 ++ ... omitted several edges +``` + +```{r, eval=FALSE} +createNetwork(topo, type = "kNN", k = 8, as.igraph = FALSE) # return as data.table +``` + +``` + from to weight distance + + 1: 1 88 0.5000000 1.000000 + 2: 2 89 0.5000000 1.000000 + 3: 3 90 0.5000000 1.000000 + 4: 4 91 0.5000000 1.000000 + 5: 5 92 0.5000000 1.000000 + --- +42452: 5303 5130 0.3090170 2.236068 +42453: 5304 5306 0.3333333 2.000000 +42454: 5305 5303 0.3333333 2.000000 +42455: 5306 5131 0.3090170 2.236068 +42456: 5307 5131 0.2612039 2.828427 +``` + +```{r, eval=FALSE} +# default weight for kNN was 1 / (d + 1) +# use a custom weight function +createNetwork(topo, + type = "kNN", + k = 8, + as.igraph = FALSE, + weight_fun = function(d) 1 / (d)^2 +) +``` + +``` + from to weight distance + + 1: 1 88 1.000 1.000000 + 2: 2 89 1.000 1.000000 + 3: 3 90 1.000 1.000000 + 4: 4 91 1.000 1.000000 + 5: 5 92 1.000 1.000000 + --- +42452: 5303 5130 0.200 2.236068 +42453: 5304 5306 0.250 2.000000 +42454: 5305 5303 0.250 2.000000 +42455: 5306 5131 0.200 2.236068 +42456: 5307 5131 0.125 2.828427 +``` + # Session Info ```{r, eval=FALSE} sessionInfo() From 5b3cdcb69601d02352ee78b062222ccfd0a07b0d Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 25 Oct 2024 00:21:01 -0400 Subject: [PATCH 2/8] chore: code formatting --- vignettes/spatial_genes.Rmd | 79 +++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/vignettes/spatial_genes.Rmd b/vignettes/spatial_genes.Rmd index abfe03754..35192b9e3 100644 --- a/vignettes/spatial_genes.Rmd +++ b/vignettes/spatial_genes.Rmd @@ -46,16 +46,18 @@ g <- GiottoData::loadGiottoMini("visium") # Create a spatial network ```{r, eval=FALSE} -g <- createSpatialNetwork(gobject = g, - method = "kNN", - k = 6, - maximum_distance_knn = 400, - name = "spatial_network") - -spatPlot2D(gobject = g, - show_network= TRUE, - network_color = "blue", - spatial_network_name = "spatial_network") +g <- createSpatialNetwork(g, + method = "kNN", + k = 6, + maximum_distance_knn = 400, + name = "spatial_network" +) + +spatPlot2D(g, + show_network = TRUE, + network_color = "blue", + spatial_network_name = "spatial_network" +) ``` ```{r, echo=FALSE, out.width="60%", fig.align='center'} @@ -70,10 +72,11 @@ This step may take a few minutes to run. ```{r, eval=FALSE} ranktest <- binSpect(g, - bin_method = "rank", - calc_hub = TRUE, - hub_min_int = 5, - spatial_network_name = "spatial_network") + bin_method = "rank", + calc_hub = TRUE, + hub_min_int = 5, + spatial_network_name = "spatial_network" +) ``` ## Visualize top results @@ -82,10 +85,11 @@ Plot the scaled expression of genes with the highest probability of being spatia ```{r, eval=FALSE} spatFeatPlot2D(g, - expression_values = "scaled", - feats = ranktest$feats[1:6], - cow_n_col = 2, - point_size = 2) + expression_values = "scaled", + feats = ranktest$feats[1:6], + cow_n_col = 2, + point_size = 2 +) ``` ```{r, echo=FALSE, out.width="80%", fig.align='center'} @@ -103,8 +107,7 @@ ext_spatial_genes <- ranktest[1:100,]$feats ## Calculate pairwise distances between genes. ```{r, eval=FALSE} -spat_cor_netw_DT <- detectSpatialCorFeats( - g, +spat_cor_netw_DT <- detectSpatialCorFeats(g, method = "network", spatial_network_name = "spatial_network", subset_feats = ext_spatial_genes) @@ -114,17 +117,19 @@ spat_cor_netw_DT <- detectSpatialCorFeats( ```{r, eval=FALSE} spat_cor_netw_DT <- clusterSpatialCorFeats(spat_cor_netw_DT, - name = "spat_netw_clus", - k = 5) + name = "spat_netw_clus", + k = 5 +) ``` ## Plot the correlation of the top spatial genes with their assigned cluster. ```{r, eval=FALSE} heatmSpatialCorFeats(g, - spatCorObject = spat_cor_netw_DT, - use_clus_name = "spat_netw_clus", - heatmap_legend_param = list(title = NULL)) + spatCorObject = spat_cor_netw_DT, + use_clus_name = "spat_netw_clus", + heatmap_legend_param = list(title = NULL) +) ``` ```{r, echo=FALSE, out.width="80%", fig.align='center'} @@ -135,8 +140,9 @@ knitr::include_graphics("images/spatial_genes/3-heatmSpatialCorFeats.png") ```{r, eval=FALSE} netw_ranks <- rankSpatialCorGroups(g, - spatCorObject = spat_cor_netw_DT, - use_clus_name = "spat_netw_clus") + spatCorObject = spat_cor_netw_DT, + use_clus_name = "spat_netw_clus" +) ``` ```{r, echo=FALSE, out.width="70%", fig.align='center'} @@ -147,25 +153,28 @@ knitr::include_graphics("images/spatial_genes/4-rankSpatialCorGroups.png") ```{r, eval=FALSE} cluster_genes_DT <- showSpatialCorFeats(spat_cor_netw_DT, - use_clus_name = "spat_netw_clus", - show_top_feats = 1) + use_clus_name = "spat_netw_clus", + show_top_feats = 1 +) cluster_genes <- cluster_genes_DT$clus names(cluster_genes) <- cluster_genes_DT$feat_ID g <- createMetafeats(g, - feat_clusters = cluster_genes, - name = "cluster_metagene") + feat_clusters = cluster_genes, + name = "cluster_metagene" +) ``` Plot the spatial distribution of the metagene enrichment scores of each spatial co-expression cluster. ```{r, eval=FALSE} spatCellPlot(g, - spat_enr_names = "cluster_metagene", - cell_annotation_values = netw_ranks$clusters, - point_size = 2, - cow_n_col = 2) + spat_enr_names = "cluster_metagene", + cell_annotation_values = netw_ranks$clusters, + point_size = 2, + cow_n_col = 2 +) ``` ```{r, echo=FALSE, out.width="100%", fig.align='center'} From 8680a636e1032e366acc2320e7491cc26489381a Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 25 Oct 2024 00:22:02 -0400 Subject: [PATCH 3/8] add cell meta annotation examples --- vignettes/core_functions.Rmd | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/vignettes/core_functions.Rmd b/vignettes/core_functions.Rmd index b8d2f1bc3..31c47d4fd 100644 --- a/vignettes/core_functions.Rmd +++ b/vignettes/core_functions.Rmd @@ -143,10 +143,44 @@ dim(g) # feature metadata fDataDT(g) +# cell metadata +pDataDT(g) + # get specific values from cell metadata g$leiden_clus -# cell metadata +# set values directly into cell metadata +# this is safest when directly using values from another metadata column +# since cell_IDs and their ordering may not as expected. +g$leiden_clus2 <- sprintf("clus_%s", g$leiden_clus) +force(g$leiden_clus2) + +# annotate based on another column +g <- annotateGiotto(g, + name = "region", + cluster_column = "leiden_clus", + annotation_vector = c( + "1" = "a", + "2" = "b", + "3" = "c", + "4" = "a", + "5" = "d", + "6" = "b" + ) +) +spatPlot2D(g, cell_color = "region") + +# merge in table of values to metadata +ann <- data.frame( + cell_ID = spatIDs(g), + random = rnorm(ncol(g)) +) +force(ann) +g <- addCellMetadata(g, + new_metadata = ann, + by_column = TRUE, + column_cell_ID = "cell_ID" +) pDataDT(g) ``` From 8e35ffb40dafd3307c2ae08711d988357a1c156d Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 25 Oct 2024 00:44:30 -0400 Subject: [PATCH 4/8] Update CONTRIBUTING.md --- CONTRIBUTING.md | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 920dbde07..c39c610e1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,19 +56,22 @@ Style Guide. These guidelines are preferences and strongly encouraged! - We follow the BioConductor styling. You can set this up easily by installing *biocthis* and *styler.* - ```{r, eval=FALSE} - # package installations - BiocManager::install("biocthis") - install.packages("styler") - # styling a file - b_style <- biocthis::bioc_style() - styler::style_file(path = "[???]", transformers = b_style) +```{r, eval=FALSE} +# package installations +BiocManager::install("biocthis") +install.packages("styler") + +# styling a file +b_style <- biocthis::bioc_style() +styler::style_file(path = "[???]", transformers = b_style) + +# styling the active package (may lead to lots of conflicts) +# !! This should only be done be core devs with a lot of caution and forewarning !! +styler::style_pkg(transformers = b_style) +``` + - # styling the active package (may lead to lots of conflicts) - # !! This should only be done be core devs with a lot of caution and forewarning !! - styler::style_pkg(transformers = b_style) - ``` - setting your default indent size to be 4 spaces instead of 2 is also recommended. @@ -333,7 +336,7 @@ vignette: > --- ``` -- 2. Absolutely no eval=TRUE for example code. +- 2. Absolutely no `eval=TRUE` for example code. To save time when rendering the website, all chunks should not evaluate the code. Image results should be included via linking or a `knitr` chunk of this style: From 05cd4d870abef7794a80b9a360cb18923c3e4563 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 25 Oct 2024 01:08:51 -0400 Subject: [PATCH 5/8] add some links --- vignettes/core_functions.Rmd | 6 +++++- vignettes/dimension_reduction.Rmd | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/vignettes/core_functions.Rmd b/vignettes/core_functions.Rmd index 31c47d4fd..d109cf586 100644 --- a/vignettes/core_functions.Rmd +++ b/vignettes/core_functions.Rmd @@ -39,7 +39,8 @@ g <- setGiotto(g, data_list) # Spatial Aggregation -For use with raw subcellular data. Find overlapped feature information and convert to a matrix +For use with raw subcellular data. Find overlapped feature information and convert to a matrix. +Other examples, starting with different data types can be found in the [object creation](articles/object_creation.html) tutorial. ```{r, eval=FALSE} # overlap of feature points with polygons g <- calculateOverlap(g) @@ -296,6 +297,9 @@ joinGiottoObjects(list(g, g), gobject_names = c("a", "b"), dry_run = TRUE) j <- joinGiottoObjects(list(g, g), gobject_names = c("a", "b")) ``` +For a more information, see the object [splitting and joining](articles/split_join.html) tutorial + + ## Splitting objects ```{r, eval=FALSE} diff --git a/vignettes/dimension_reduction.Rmd b/vignettes/dimension_reduction.Rmd index e0f1423af..56f20693b 100644 --- a/vignettes/dimension_reduction.Rmd +++ b/vignettes/dimension_reduction.Rmd @@ -59,7 +59,7 @@ These methods are typically used early in the analysis pipeline after filtering, **Features to use** -Which features to include when calculating these dimension reductions has a large effect on the information extracted. Highly variable features will focus on variation from features with the largest expression variation. Spatially variable features will focus on features with spatially organized expression. However, when there are not many features (only hundreds of features), it is a better idea to include all features than to use a subset. +Which features to include when calculating these dimension reductions has a large effect on the information extracted. [Highly variable features](articles/hvf.html) will focus on variation from features with the largest expression variation. [Spatially variable features](articles/spatial_genes.html) will focus on features with spatially organized expression. However, when there are not many features (only hundreds of features), it is a better idea to include all features than to use a subset. **Centering and Scaling** From 7faf4fd0df17842bac03fcebc59f51592bef480b Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 25 Oct 2024 07:22:09 -0400 Subject: [PATCH 6/8] fix links --- vignettes/core_functions.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/core_functions.Rmd b/vignettes/core_functions.Rmd index d109cf586..a77f1ab25 100644 --- a/vignettes/core_functions.Rmd +++ b/vignettes/core_functions.Rmd @@ -40,7 +40,7 @@ g <- setGiotto(g, data_list) # Spatial Aggregation For use with raw subcellular data. Find overlapped feature information and convert to a matrix. -Other examples, starting with different data types can be found in the [object creation](articles/object_creation.html) tutorial. +Other examples, starting with different data types can be found in the [object creation](object_creation.html) tutorial. ```{r, eval=FALSE} # overlap of feature points with polygons g <- calculateOverlap(g) @@ -95,7 +95,7 @@ g <- addSpatialCentroidLocations(g, poly_info = "aggregate") # Standard workflow -For more detail, see the standard workflow vignette +For more detail, see the [standard workflow](general_workflow.html) vignette ```{r, eval=FALSE} g <- filterGiotto(g, expression_threshold = 1, @@ -297,7 +297,7 @@ joinGiottoObjects(list(g, g), gobject_names = c("a", "b"), dry_run = TRUE) j <- joinGiottoObjects(list(g, g), gobject_names = c("a", "b")) ``` -For a more information, see the object [splitting and joining](articles/split_join.html) tutorial +For a more information, see the object [splitting and joining](split_join.html) tutorial ## Splitting objects From 19f70bb8352da23d4240fc39b813a679d3a974dc Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 25 Oct 2024 07:29:06 -0400 Subject: [PATCH 7/8] fix link and update text --- vignettes/dimension_reduction.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/dimension_reduction.Rmd b/vignettes/dimension_reduction.Rmd index 56f20693b..1498287f2 100644 --- a/vignettes/dimension_reduction.Rmd +++ b/vignettes/dimension_reduction.Rmd @@ -59,7 +59,7 @@ These methods are typically used early in the analysis pipeline after filtering, **Features to use** -Which features to include when calculating these dimension reductions has a large effect on the information extracted. [Highly variable features](articles/hvf.html) will focus on variation from features with the largest expression variation. [Spatially variable features](articles/spatial_genes.html) will focus on features with spatially organized expression. However, when there are not many features (only hundreds of features), it is a better idea to include all features than to use a subset. +Which features to include when calculating these dimension reductions has a large effect on the information extracted. Only using [highly variable features](hvf.html) will focus on variation from features with the largest expression variation, maximizing expression-based separation of clusters. [Spatially variable features](spatial_genes.html) will focus on features with spatially organized expression, resulting in clusters that better map to spatial organization. However, when there are not many features (only hundreds of features), it is a better idea to include all features than to use a subset. **Centering and Scaling** From d552edae751c68ffdc292ea5ae724e4ddbdc7e1a Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Fri, 25 Oct 2024 07:33:29 -0400 Subject: [PATCH 8/8] Update core_functions.Rmd --- vignettes/core_functions.Rmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vignettes/core_functions.Rmd b/vignettes/core_functions.Rmd index a77f1ab25..58e12fe10 100644 --- a/vignettes/core_functions.Rmd +++ b/vignettes/core_functions.Rmd @@ -38,9 +38,9 @@ g <- setGiotto(g, data_list) ``` -# Spatial Aggregation -For use with raw subcellular data. Find overlapped feature information and convert to a matrix. -Other examples, starting with different data types can be found in the [object creation](object_creation.html) tutorial. +# Spatial Aggregation (subcellular data only) +For use with raw subcellular data that has spatial boundaries/polygon annotations and spatial features. Find overlapped feature information and convert to a matrix. +Other examples can be found in the [object creation](object_creation.html) tutorial. ```{r, eval=FALSE} # overlap of feature points with polygons g <- calculateOverlap(g) @@ -87,7 +87,7 @@ activeSpatUnit(g) <- "aggregate" activeFeatType(g) <- "rna" ``` -# Spatial Centroid Calculation +# Spatial Centroid Calculation (polygon annotations only) ```{r, eval=FALSE} g <- addSpatialCentroidLocations(g, poly_info = "aggregate")