1 Dataset explanation

Codeluppi et al. created a cyclic single-molecule fluorescence in situ hybridization (osmFISH) technology and define the cellular organization of the somatosensory cortex with the expression of 33 genes in 5,328 cells.

2 Set up Giotto environment

# Ensure Giotto Suite and GiottoData, a small, helper module for tutorials, are installed.
pkgs <- c("Giotto", "GiottoData")
need_install <- pkgs[sapply(pkgs, function(pkg) !requireNamespace(pkg, quietly = TRUE))]

if (length(need_install) > 0L) {
  if (!requireNamespace("pak", quietly = TRUE)) install.packages("pak")
  pak::pak(sprintf("drieslab/%s", need_install))
}

# Ensure the Python environment for Giotto has been installed.
genv_exists <- Giotto::checkGiottoEnvironment()
if(!genv_exists){
  # The following command need only be run once to install the Giotto environment.
  Giotto::installGiottoEnvironment()
}
library(Giotto)
library(GiottoData)

# 1. set working directory
results_folder <- "/path/to/results/"

# Optional: Specify a path to a Python executable within a conda or miniconda 
# environment. If set to NULL (default), the Python executable within the previously
# installed Giotto environment will be used.
python_path <- NULL # alternatively, "/local/python/path/python" if desired.

3 Dataset download

The osmFISH data to run this tutorial can be found here. Alternatively you can use the getSpatialDataset to automatically download this dataset like we do in this example; to download the data used to create the Giotto Object below, please ensure that wget is installed locally.

# download data to working directory ####
# if wget is installed, set method = "wget"
# if you run into authentication issues with wget, then add " extra = "--no-check-certificate" "
getSpatialDataset(dataset = "osmfish_SS_cortex", 
                  directory = results_folder, 
                  method = "wget")

4 Giotto global instructions and preparations

## instructions allow us to automatically save all plots into a chosen results folder
instructions <- createGiottoInstructions(save_plot = TRUE, 
                                         show_plot = FALSE,
                                         return_plot = FALSE,
                                         save_dir = results_folder,
                                         python_path = python_path)

expr_path <- file.path(results_folder, "osmFISH_prep_expression.txt")
loc_path <- file.path(results_folder, "osmFISH_prep_cell_coordinates.txt")
meta_path <- file.path(results_folder, "osmFISH_prep_cell_metadata.txt")

5 Create Giotto object & process data

## create
osm_test <- createGiottoObject(expression = expr_path,
                               spatial_locs = loc_path,
                               instructions = instructions)

## add field annotation
metadata <- data.table::fread(file = meta_path)

osm_test <- addCellMetadata(osm_test, 
                            new_metadata = metadata,
                            by_column = TRUE, 
                            column_cell_ID = "CellID")

## filter
osm_test <- filterGiotto(gobject = osm_test,
                         expression_threshold = 1,
                         feat_det_in_min_cells = 10,
                         min_det_feats_per_cell = 10,
                         expression_values = "raw",
                         verbose = TRUE)

## normalize Giotto
## there are two ways for osmFISH object

# 1. standard z-score way
osm_test <- normalizeGiotto(gobject = osm_test)

# 2. osmFISH way
raw_expr_matrix <- getExpression(osm_test, 
                                 values = "raw", 
                                 output = "matrix")

norm_genes <- (raw_expr_matrix/Matrix::rowSums(raw_expr_matrix)) * nrow(raw_expr_matrix)

norm_genes_cells <- Matrix::t((Matrix::t(norm_genes)/Matrix::colSums(norm_genes)) * ncol(raw_expr_matrix))

norm_genes_cells <- Giotto::createExprObj(norm_genes_cells, 
                                          name = "custom")

osm_test <- setGiotto(osm_test, 
                      x = norm_genes_cells)

## add a python path
x <- createGiottoInstructions()
osm_test <- changeGiottoInstructions(osm_test,
                                     params = "python_path",
                                     new_values = x$python_path)

## add gene & cell statistics
osm_test <- addStatistics(gobject = osm_test)

# save according to giotto instructions
spatPlot2D(gobject = osm_test,
           cell_color = "ClusterName",
           point_size = 1.3,
           save_param = list(save_name = "2_a_original_clusters"))

spatPlot2D(gobject = osm_test,
           cell_color = "Region",
           cell_color_code = c(
             "Excluded" = "lightsteelblue4",
             "Hippocampus" = "gold",
             "White matter" = "cyan",
             "Internal Capsule Caudoputamen" = "#9730F2",
             "Ventricle" = "green",
             "Layer 2-3 lateral" = "blue",
             "Layer 2-3 medial" = "yellowgreen",
             "Layer 3-4" = "brown",
             "Layer 4" = "orange",
             "Layer 5" = "red",
             "Layer 6" = "magenta",
             "Pia Layer 1" = "mediumturquoise"
           ),
           point_size = 1.5,
           save_param = list(save_name = "2_b_original_regions"))

spatPlot2D(gobject = osm_test,
           cell_color = "ClusterID",
           point_size = 1.5,
           save_param = list(save_name = "2_c_clusterID"))

spatPlot2D(gobject = osm_test, 
           cell_color = "total_expr", 
           color_as_factor = FALSE,
           point_size = 1.5,
           gradient_style = "sequential",
           gradient_midpoint = 160,
           gradient_limits = c(120, 220),
           background_color = "black",
           save_param = list(save_name = "2_d_total_expr_limits"))

6 Dimension reduction

## highly variable genes (HVG)
# only 33 genes so use all genes

## run PCA on expression values (default)
osm_test <- runPCA(gobject = osm_test,
                   expression_values = "custom", 
                   scale_unit = FALSE, 
                   center = FALSE)

screePlot(osm_test, 
          ncp = 30,
          save_param = list(save_name = "3_a_screeplot"))

plotPCA(osm_test,
        save_param = list(save_name = "3_b_PCA_reduction"))

## run UMAP and tSNE on PCA space (default)
osm_test <- runUMAP(osm_test, 
                    dimensions_to_use = 1:31, 
                    n_threads = 4)

plotUMAP(gobject = osm_test,
         save_param = list(save_name = "3_c_UMAP_reduction"))

plotUMAP(gobject = osm_test,
         cell_color = "total_expr", 
         color_as_factor = FALSE,
         point_size = 2,
         gradient_style = "sequential",
         gradient_midpoint = 180,
         gradient_limits = c(120, 220),
         background_color = "black",
         save_param = list(save_name = "3_d_UMAP_reduction_expression"))

osm_test <- runtSNE(osm_test, 
                    dimensions_to_use = 1:31, 
                    perplexity = 70, 
                    check_duplicates = FALSE)

plotTSNE(gobject = osm_test,
         save_param = list(save_name = "3_e_tSNE_reduction"))

7 Cluster

## hierarchical clustering
osm_test <- doHclust(gobject = osm_test, 
                     expression_values = "custom", 
                     k = 36)

plotUMAP(gobject = osm_test, 
         cell_color = "hclust", 
         point_size = 2.5,
         show_NN_network = FALSE, 
         edge_alpha = 0.05,
         save_param = list(save_name = "4_a_UMAP_hclust"))

## kmeans clustering
osm_test <- doKmeans(gobject = osm_test, 
                     expression_values = "normalized", 
                     dim_reduction_to_use = "pca", 
                     dimensions_to_use = 1:20, 
                     centers = 36, 
                     nstart = 2000)

plotUMAP(gobject = osm_test, 
         cell_color = "kmeans",
         point_size = 2.5, 
         show_NN_network = FALSE, 
         edge_alpha = 0.05, 
         save_param =  list(save_name = "4_b_UMAP_kmeans"))

## Leiden clustering strategy:
# 1. overcluster
# 2. merge small clusters that are highly similar

# sNN network (default)
osm_test <- createNearestNetwork(gobject = osm_test, 
                                 dimensions_to_use = 1:31, 
                                 k = 12)

osm_test <- doLeidenCluster(gobject = osm_test, 
                            resolution = 0.09, 
                            n_iterations = 1000)

plotUMAP(gobject = osm_test, 
         cell_color = "leiden_clus", 
         point_size = 2.5,
         show_NN_network = FALSE, 
         edge_alpha = 0.05,
         save_param = list(save_name = "4_c_UMAP_leiden"))

# merge small groups based on similarity
leiden_similarities <- getClusterSimilarity(osm_test,
                                            expression_values = "custom",
                                            cluster_column = "leiden_clus")

osm_test <- mergeClusters(osm_test,
                          expression_values = "custom",
                          cluster_column = "leiden_clus",
                          new_cluster_name = "leiden_clus_m",
                          max_group_size = 30,
                          force_min_group_size = 25,
                          max_sim_clusters = 10,
                          min_cor_score = 0.7)

plotUMAP(gobject = osm_test, 
         cell_color = "leiden_clus_m", 
         point_size = 2.5,
         show_NN_network = FALSE, 
         edge_alpha = 0.05,
         save_param = list(save_name = "4_d_UMAP_leiden_merged"))

## show cluster relationships
showClusterHeatmap(gobject = osm_test, 
                   expression_values = "custom", 
                   cluster_column = "leiden_clus_m",
                   save_param = list(save_name = "4_e_heatmap", units = "cm"),
                   row_names_gp = grid::gpar(fontsize = 6), 
                   column_names_gp = grid::gpar(fontsize = 6))

showClusterDendrogram(osm_test, 
                      cluster_column = "leiden_clus_m", 
                      h = 1, 
                      rotate = TRUE,
                      save_param = list(save_name = "4_f_dendro", units = "cm"))

8 Co-visualize

# expression and spatial
spatDimPlot2D(gobject = osm_test, 
              cell_color = "leiden_clus", 
              spat_point_size = 2,
              plot_alignment = "horizontal",
              save_param = list(save_name = "5_a_covis_leiden"))

spatDimPlot2D(gobject = osm_test, 
              cell_color = "leiden_clus_m", 
              spat_point_size = 2,  
              plot_alignment = "horizontal", 
              save_param = list(save_name = "5_b_covis_leiden_m"))

spatDimPlot2D(gobject = osm_test, 
              cell_color = "leiden_clus_m", 
              dim_point_size = 2, 
              spat_point_size = 2, 
              select_cell_groups = "29",  
              plot_alignment = "horizontal",
              save_param = list(save_name = "5_c_covis_leiden_merged_selected"))

spatDimPlot2D(gobject = osm_test, 
              cell_color = "total_expr", 
              color_as_factor = FALSE,
              dim_point_shape = "no_border",
              spat_point_shape = "no_border",
              dim_point_size = 0.4,
              spat_point_size = 0.4,
              dim_background_color = "black",
              spat_background_color = "black",
              gradient_style = "sequential",
              gradient_midpoint = 160, 
              gradient_limits = c(120,220),  
              plot_alignment = "horizontal",
              save_param = list(save_name = "5_d_total_expr"))

9 Differential expression

## split dendrogram nodes ##
dendsplits <- getDendrogramSplits(gobject = osm_test,
                                  expression_values = "custom",
                                  cluster_column = "leiden_clus_m")
split_3_markers <- findMarkers(gobject = osm_test,
                               method = "gini",
                               expression_values = "custom",
                               cluster_column = "leiden_clus_m",
                               group_1 = unlist(dendsplits[3]$tree_1), 
                               group_2 = unlist(dendsplits[3]$tree_2))

## Individual populations ##
markers_scran <- findMarkers_one_vs_all(gobject = osm_test,
                                        method = "scran",
                                        expression_values = "custom",
                                        cluster_column = "leiden_clus_m",
                                        min_feats = 2, 
                                        rank_score = 2)

## violinplot
topgenes_scran <- markers_scran[, head(.SD, 1), by = "cluster"]$feats

violinPlot(osm_test, 
           feats = unique(topgenes_scran), 
           cluster_column = "leiden_clus_m", 
           expression_values = "custom",
           strip_text = 5, 
           strip_position = "right",
           save_param = list(save_name = "6_a_violinplot"))

plotMetaDataHeatmap(osm_test, 
                    expression_values = "custom",
                    metadata_cols = "leiden_clus_m", 
                    save_param = list(save_name = "6_b_metaheatmap"))

plotMetaDataHeatmap(osm_test, 
                    expression_values = "custom",
                    metadata_cols = "leiden_clus_m", 
                    save_param = list(save_name = "6_e_metaheatmap_all_genes"))

plotMetaDataHeatmap(osm_test, 
                    expression_values = "custom",
                    metadata_cols = "ClusterName", 
                    save_param = list(save_name = "6_f_metaheatmap_all_genes_names"))

10 Cell type annotation

Use annotateGiotto() to annotate the clusters. For this dataset, we have ClusterName in the metadata.

11 Spatial grid

osm_test <- createSpatialGrid(gobject = osm_test,
                              sdimx_stepsize = 2000,
                              sdimy_stepsize = 2000,
                              minimum_padding = 0)

spatPlot2D(osm_test, 
           cell_color = "ClusterName", 
           show_grid = TRUE,
           grid_color = "lightblue", 
           spatial_grid_name = "spatial_grid",
           point_size = 1.5,
           save_param = list(save_name = "8_grid_det_cell_types"))

12 Spatial network

osm_test <- createSpatialNetwork(gobject = osm_test)

spatPlot2D(gobject = osm_test, 
           show_network = TRUE,
           network_color = "blue",
           point_size = 1.5, 
           cell_color = "ClusterName", 
           legend_symbol_size = 2,
           save_param = list(save_name = "9_spatial_network_k10"))

13 Spatial genes

# km binarization
km_spatialfeats <- binSpect(osm_test, 
                            calc_hub = TRUE, 
                            hub_min_int = 5,
                            bin_method = "kmeans")

spatDimFeatPlot2D(osm_test, 
                  expression_values = "scaled",
                  feats = km_spatialgenes$feats[1:3], 
                  plot_alignment = "horizontal",
                  spat_point_shape = "no_border",
                  dim_point_shape = "no_border",
                  spat_point_size = 0.1,
                  dim_point_size = 0.1,
                  cow_n_col = 1,
                  save_param = list(save_name = "10_a_spatial_genes_km"))

14 Cell-cell preferential proximity

## calculate frequently seen proximities
cell_proximities <- cellProximityEnrichment(gobject = osm_test,
                                            cluster_column = "ClusterName",
                                            number_of_simulations = 1000)
## barplot
cellProximityBarplot(gobject = osm_test, 
                     CPscore = cell_proximities, 
                     min_orig_ints = 25, 
                     min_sim_ints = 25,
                     save_param = list(save_name = "12_a_barplot_cell_cell_enrichment"))

## heatmap
cellProximityHeatmap(gobject = osm_test, 
                     CPscore = cell_proximities, 
                     order_cell_types = TRUE, 
                     scale = TRUE,
                     color_breaks = c(-1.5, 0, 1.5), 
                     color_names = c("blue", "white", "red"),
                     save_param = list(save_name = "12_b_heatmap_cell_cell_enrichment", 
                      unit = "in"))

## network
cellProximityNetwork(gobject = osm_test, 
                     CPscore = cell_proximities, 
                     remove_self_edges = FALSE, 
                     only_show_enrichment_edges = TRUE,
                     save_param = list(save_name = "12_c_network_cell_cell_enrichment"))

## visualization
spec_interaction <- "Astrocyte_Mfge8--Oligodendrocyte_Precursor_cells"

cellProximitySpatPlot(gobject = osm_test,
                      interaction_name = spec_interaction,
                      cluster_column = "ClusterName", 
                      cell_color = "ClusterName", 
                      cell_color_code = c("Astrocyte_Mfge8" = "blue", 
                                          "Oligodendrocyte_Precursor_cells" = "red"),
                      coord_fix_ratio = 0.5,  
                      point_size_select = 3, 
                      point_size_other = 1.5,
                      save_param = list(save_name = "12_d_cell_cell_enrichment_selected"))

15 Session Info

R version 4.3.2 (2023-10-31)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Sonoma 14.3.1

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/New_York
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] GiottoData_0.2.7.0 GiottoUtils_0.1.5  Giotto_4.0.2       GiottoClass_0.1.3 

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3          ggdendro_0.1.23             rstudioapi_0.15.0          
  [4] jsonlite_1.8.8              shape_1.4.6                 magrittr_2.0.3             
  [7] magick_2.8.2                farver_2.1.1                rmarkdown_2.25             
 [10] GlobalOptions_0.1.2         zlibbioc_1.48.0             ragg_1.2.7                 
 [13] vctrs_0.6.5                 Cairo_1.6-2                 DelayedMatrixStats_1.24.0  
 [16] RCurl_1.98-1.14             terra_1.7-71                htmltools_0.5.7            
 [19] S4Arrays_1.2.0              BiocNeighbors_1.20.2        SparseArray_1.2.4          
 [22] plyr_1.8.9                  igraph_2.0.1.1              lifecycle_1.0.4            
 [25] iterators_1.0.14            pkgconfig_2.0.3             rsvd_1.0.5                 
 [28] Matrix_1.6-5                R6_2.5.1                    fastmap_1.1.1              
 [31] GenomeInfoDbData_1.2.11     MatrixGenerics_1.14.0       clue_0.3-65                
 [34] digest_0.6.34               colorspace_2.1-0            S4Vectors_0.40.2           
 [37] dqrng_0.3.2                 irlba_2.3.5.1               textshaping_0.3.7          
 [40] GenomicRanges_1.54.1        beachmat_2.18.0             labeling_0.4.3             
 [43] progressr_0.14.0            fansi_1.0.6                 polyclip_1.10-6            
 [46] abind_1.4-5                 compiler_4.3.2              withr_3.0.0                
 [49] doParallel_1.0.17           backports_1.4.1             BiocParallel_1.36.0        
 [52] viridis_0.6.5               dendextend_1.17.1           ggforce_0.4.1              
 [55] MASS_7.3-60.0.1             DelayedArray_0.28.0         rjson_0.2.21               
 [58] bluster_1.12.0              gtools_3.9.5                GiottoVisuals_0.1.4        
 [61] tools_4.3.2                 glue_1.7.0                  dbscan_1.1-12              
 [64] grid_4.3.2                  checkmate_2.3.1             Rtsne_0.17                 
 [67] cluster_2.1.6               reshape2_1.4.4              generics_0.1.3             
 [70] gtable_0.3.4                tidyr_1.3.1                 data.table_1.15.0          
 [73] BiocSingular_1.18.0         tidygraph_1.3.1             ScaledMatrix_1.10.0        
 [76] metapod_1.10.1              utf8_1.2.4                  XVector_0.42.0             
 [79] BiocGenerics_0.48.1         RcppAnnoy_0.0.22            ggrepel_0.9.5              
 [82] foreach_1.5.2               pillar_1.9.0                stringr_1.5.1              
 [85] limma_3.58.1                circlize_0.4.15             tweenr_2.0.2               
 [88] dplyr_1.1.4                 lattice_0.22-5              deldir_2.0-2               
 [91] tidyselect_1.2.0            ComplexHeatmap_2.18.0       SingleCellExperiment_1.24.0
 [94] locfit_1.5-9.8              scuttle_1.12.0              knitr_1.45                 
 [97] gridExtra_2.3               IRanges_2.36.0              edgeR_4.0.15               
[100] SummarizedExperiment_1.32.0 stats4_4.3.2                xfun_0.42                  
[103] graphlayouts_1.1.0          Biobase_2.62.0              statmod_1.5.0              
[106] matrixStats_1.2.0           stringi_1.8.3               yaml_2.3.8                 
[109] evaluate_0.23               codetools_0.2-19            ggraph_2.1.0               
[112] tibble_3.2.1                colorRamp2_0.1.0            cli_3.6.2                  
[115] uwot_0.1.16                 reticulate_1.35.0           systemfonts_1.0.5          
[118] munsell_0.5.0               Rcpp_1.0.12                 GenomeInfoDb_1.38.6        
[121] png_0.1-8                   parallel_4.3.2              ggplot2_3.4.4              
[124] scran_1.30.2                sparseMatrixStats_1.14.0    bitops_1.0-7               
[127] SpatialExperiment_1.12.0    viridisLite_0.4.2           scales_1.3.0               
[130] purrr_1.0.2                 crayon_1.5.2                GetoptLong_1.0.5           
[133] rlang_1.1.3                 cowplot_1.1.3