Dataset Explanation

Moffitt et al. created a 3D spatial expression dataset consisting of 155 genes from ~1 million single cells acquired from mouse hypothalamic preoptic regions. Please ensure that wget is installed locally to streamline the download.

Clustering, 3D visualization, and cell type identification of clusters using marker genes will be explored in this tutorial.

Start Giotto

# Ensure Giotto Suite is installed.
if(!"Giotto" %in% installed.packages()) {
  pak::pkg_install("drieslab/Giotto")
}

# Ensure GiottoData, a small, helper module for tutorials, is installed.
if(!"GiottoData" %in% installed.packages()) {
  pak::pkg_install("drieslab/GiottoData")
}

# Ensure the Python environment for Giotto has been installed.
genv_exists = Giotto::checkGiottoEnvironment()
if(!genv_exists){
  # The following command need only be run once to install the Giotto environment.
  Giotto::installGiottoEnvironment()
}

Download Dataset

library(Giotto)
library(GiottoData)

# Specify path from which data may be retrieved/stored
data_directory = 'data'
# alternatively, "/path/to/where/the/data/lives/"

# Specify path to which results may be saved
results_directory = 'results/'
# alternatively, "/path/to/store/the/results/"

# Optional: Specify a path to a Python executable within a conda or miniconda 
# environment. If set to NULL (default), the Python executable within the previously
# installed Giotto environment will be used.
my_python_path = NULL # alternatively, "/local/python/path/python" if desired.
# In the event of authentication issues with wget, 
# add ", extra = '--no-check-certificate' " after the method argument.
# Get the dataset:
getSpatialDataset(dataset = 'merfish_preoptic', 
                  directory = data_directory, 
                  method = 'wget')

1. Create Giotto Instructions & Prepare Data

# Optional, but encouraged: Set Giotto instructions
instrs = createGiottoInstructions(save_plot = TRUE,
                                  show_plot = TRUE,
                                  save_dir = results_directory, 
                                  python_path = my_python_path)

# Create file paths to feed data into Giotto Object
expr_path = paste0(data_directory, "merFISH_3D_data_expression.txt.gz")
loc_path = paste0(data_directory, "merFISH_3D_data_cell_locations.txt")
meta_path = paste0(data_directory, "merFISH_3D_metadata.txt")
# Create Giotto object
merFISH_gobject <- createGiottoObject(expression = expr_path,
                                      spatial_locs = loc_path,
                                      instructions = instrs)

# Add additional metadata
metadata = data.table::fread(meta_path)

merFISH_gobject <- addCellMetadata(merFISH_gobject, 
                                   new_metadata = metadata$layer_ID, 
                                   vector_name = 'layer_ID')
merFISH_gobject <- addCellMetadata(merFISH_gobject,
                                   new_metadata = metadata$orig_cell_types,
                                   vector_name = 'orig_cell_types')

First pre-test filter parameters for both features and cells.

filterDistributions(merFISH_gobject, 
                    detection = 'feats')

filterDistributions(merFISH_gobject, 
                    detection = 'cells')

filterCombinations(merFISH_gobject,
                  expression_thresholds = c(0,1e-6,1e-5),
                  feat_det_in_min_cells = c(500, 1000, 1500),
                  min_det_feats_per_cell = c(1, 5, 10))

2. Data Processing

Use the previously generated plots to inform filter decisions.

# Filter data
merFISH_gobject <- filterGiotto(gobject = merFISH_gobject,
                                feat_det_in_min_cells = 0,
                                min_det_feats_per_cell = 0)

# Normalize data
merFISH_gobject <- normalizeGiotto(gobject = merFISH_gobject, 
                                   scalefactor = 10000, 
                                   verbose = T)

# Add statistics to Giotto Object
merFISH_gobject <- addStatistics(gobject = merFISH_gobject, 
                                 expression_values = 'normalized')

# Adjust for covariates
merFISH_gobject <- adjustGiottoMatrix(gobject = merFISH_gobject, 
                                      expression_values = c('normalized'),
                                      batch_columns = NULL, 
                                      covariate_columns = c('layer_ID'),
                                      return_gobject = TRUE,
                                      update_slot = c('custom'))

Now, take a glance at the data in both 2D and 3D.

# 2D
spatPlot(gobject = merFISH_gobject, 
         point_size = 1.5)

# 3D
spatPlot3D(gobject = merFISH_gobject, 
           point_size = 1.25, 
           axis_scale = 'real')

3. Dimension Reduction

There are only 155 genes within this dataset. Use them all (default) within the dimension reduction.

merFISH_gobject <- runPCA(gobject = merFISH_gobject, 
                          feats_to_use = NULL, 
                          scale_unit = FALSE, 
                          center = TRUE)

# View details about the principal components
screePlot(merFISH_gobject)

After the PCA, a UMAP may be run. Run the UMAP so clusters may be visualized upon it.

merFISH_gobject <- runUMAP(merFISH_gobject, 
                           dimensions_to_use = 1:8, 
                           n_components = 3, 
                           n_threads = 4)

plotUMAP_3D(gobject = merFISH_gobject, 
            point_size = 1.5) 

4. Clustering

Create a nearest network, then perform Leiden clustering. The clusters may be visualized on a UMAP.

# Create a sNN network (default)
merFISH_gobject <- createNearestNetwork(gobject = merFISH_gobject, 
                                        dimensions_to_use = 1:8, 
                                        k = 15)

# Leiden cluster
merFISH_gobject <- doLeidenCluster(gobject = merFISH_gobject, 
                                   resolution = 0.2, 
                                   n_iterations = 200,
                                   name = 'leiden_0.2_200')

# Plot the clusters upon the UMAP
plotUMAP_3D(gobject = merFISH_gobject, 
            cell_color = 'leiden_0.2_200', 
            point_size = 1.5, 
            show_center_label = F)

5. Co-Visualize

View the clusters in-tissue on each layer.

spatPlot2D(gobject = merFISH_gobject, 
           point_size = 1.5, 
           cell_color = 'leiden_0.2_200', 
           group_by = 'layer_ID', 
           cow_n_col = 2, 
           group_by_subset = c(260, 160, 60, -40, -140, -240))

6. Cell Type Marker Gene Detection

Call findMarkers_one_vs_all to identify marker features. Click the function to see alternate methods, or look findGiniMarkers section for details on the gini method. Once marker features have been determined, observe the differential expression across clusters within the violin plot.

markers = findMarkers_one_vs_all(gobject = merFISH_gobject,
                                 method = 'gini',
                                 expression_values = 'normalized',
                                 cluster_column = 'leiden_0.2_200',
                                 min_feats = 1, rank_score = 2)

# Display details about the marker genes in-console
markers[, head(.SD, 2), by = 'cluster']

# violinplot
topgini_genes = unique(markers[, head(.SD, 2), by = 'cluster']$feats)

violinPlot(merFISH_gobject, 
           feats = topgini_genes, 
           cluster_column = 'leiden_0.2_200', 
           strip_position = 'right')

topgini_genes = unique(markers[, head(.SD, 6), by = 'cluster']$feats)

plotMetaDataHeatmap(merFISH_gobject, 
                    expression_values = 'scaled',
                    metadata_cols = c('leiden_0.2_200'),
                    selected_feats = topgini_genes)

7. Cell-Type Annotation

Use known marker and differentially expressed genes (DEGs) to identify cell type for each cluster.

# Known markers and DEGs
selected_genes = c('Myh11', 'Klf4', 'Fn1', 'Cd24a', 'Cyr61', 'Nnat', 'Trh', 
                   'Selplg', 'Pou3f2', 'Aqp4', 'Traf4', 'Pdgfra', 'Opalin', 
                   'Mbp', 'Ttyh2', 'Fezf1', 'Cbln1', 'Slc17a6', 'Scg2', 'Isl1', 'Gad1')

gobject_cell_metadata = pDataDT(merFISH_gobject)
cluster_order = unique(gobject_cell_metadata$leiden_0.2_200)

plotMetaDataHeatmap(merFISH_gobject, 
                    expression_values = 'scaled',
                    metadata_cols = c('leiden_0.2_200'),
                    selected_feats = selected_genes,
                    custom_feat_order = rev(selected_genes),
                    custom_cluster_order = cluster_order)

Since there are more Leiden clusters than there are types of cells in this dataset, the same cell type may be assigned to different cluster numbers. This may be done only after verifying that particular clusters highly express marker genes corresponding to the same cell type. The above heatmap is used to streamline this process. Call annotateGiotto to map cell types to Leiden clusters; these will appear in cell_metadata within the giottoObject.

# Name clusters
clusters_cell_types_hypo = c('Inhibitory', 'Inhibitory', 'Excitatory', 'Astrocyte','OD Mature', 'Endothelial',
                             'OD Mature', 'OD Immature',  'Ambiguous','Ependymal', 'Endothelial', 'Microglia', 'OD Mature')
names(clusters_cell_types_hypo) = as.character(sort(cluster_order))

merFISH_gobject <- annotateGiotto(gobject = merFISH_gobject, 
                                  annotation_vector = clusters_cell_types_hypo,
                                  cluster_column = 'leiden_0.2_200', 
                                  name = 'cell_types')

## show heatmap
plotMetaDataHeatmap(merFISH_gobject, 
                    expression_values = 'scaled',
                    metadata_cols = c('cell_types'),
                    selected_feats = selected_genes,
                    custom_feat_order = rev(selected_genes),
                    custom_cluster_order = clusters_cell_types_hypo)

8. Visualize

# Assign colors to each cell type 
mycolorcode = c('red', 'lightblue', 'yellowgreen','purple', 'darkred',
                'magenta', 'mediumblue', 'yellow', 'gray')
names(mycolorcode) = c('Inhibitory', 'Excitatory','OD Mature', 'OD Immature', 
                       'Astrocyte', 'Microglia', 'Ependymal','Endothelial', 'Ambiguous')

plotUMAP_3D(merFISH_gobject, 
            cell_color = 'cell_types', 
            point_size = 1.5, 
            cell_color_code = mycolorcode)

spatPlot3D(merFISH_gobject,
           cell_color = 'cell_types', 
           axis_scale = 'real',
           sdimx = 'sdimx', 
           sdimy = 'sdimy', 
           sdimz = 'sdimz',
           show_grid = F, 
           cell_color_code = mycolorcode)

spatPlot2D(gobject = merFISH_gobject, 
           point_size = 1.0,
           cell_color = 'cell_types', 
           cell_color_code = mycolorcode,
           group_by = 'layer_ID', 
           cow_n_col = 2, 
           group_by_subset = c(seq(260, -290, -100)))

8.1 Excitatory Cells Only

spatPlot3D(merFISH_gobject,
           cell_color = 'cell_types', 
           axis_scale = 'real',
           sdimx = 'sdimx', 
           sdimy = 'sdimy', 
           sdimz = 'sdimz',
           show_grid = F, 
           cell_color_code = mycolorcode,
           select_cell_groups = 'Excitatory',
           show_other_cells = F)

spatPlot2D(gobject = merFISH_gobject, 
           point_size = 1.0, 
           cell_color = 'cell_types', 
           cell_color_code = mycolorcode,
           select_cell_groups = 'Excitatory', 
           show_other_cells = F,
           group_by = 'layer_ID', 
           cow_n_col = 2, 
           group_by_subset = c(seq(260, -290, -100)))

8.2 Inhibitory Cells Only

spatPlot3D(merFISH_gobject,
           cell_color = 'cell_types', 
           axis_scale = 'real',
           sdimx = 'sdimx', 
           sdimy = 'sdimy', 
           sdimz = 'sdimz',
           show_grid = F, 
           cell_color_code = mycolorcode,
           select_cell_groups = 'Inhibitory', 
           show_other_cells = F)

spatPlot2D(gobject = merFISH_gobject, 
           point_size = 1.0, 
           cell_color = 'cell_types', 
           cell_color_code = mycolorcode,
           select_cell_groups = 'Inhibitory',
           show_other_cells = F,
           group_by = 'layer_ID', 
           cow_n_col = 2, 
           group_by_subset = c(seq(260, -290, -100)))

8.3 OD and Astrocytes Only

spatPlot3D(merFISH_gobject,
           cell_color = 'cell_types', 
           axis_scale = 'real',
           sdimx = 'sdimx', 
           sdimy = 'sdimy', 
           sdimz = 'sdimz',
           show_grid = F, 
           cell_color_code = mycolorcode,
           select_cell_groups = c('Astrocyte', 'OD Mature', 'OD Immature'), 
           show_other_cells = F)

spatPlot2D(gobject = merFISH_gobject, 
           point_size = 1.0, 
           cell_color = 'cell_types', 
           cell_color_code = mycolorcode,
           select_cell_groups = c('Astrocyte', 'OD Mature', 'OD Immature'), 
           show_other_cells = F,
           group_by = 'layer_ID', 
           cow_n_col = 2, 
           group_by_subset = c(seq(260, -290, -100)))

8.4 Other Cells Only

spatPlot3D(merFISH_gobject,
           cell_color = 'cell_types', 
           axis_scale = 'real',
           sdimx = 'sdimx', 
           sdimy = 'sdimy', 
           sdimz = 'sdimz',
           show_grid = F, 
           cell_color_code = mycolorcode,
           select_cell_groups = c('Microglia', 'Ependymal', 'Endothelial'), 
           show_other_cells = F)

spatPlot2D(gobject = merFISH_gobject, 
           point_size = 1.0, 
           cell_color = 'cell_types', 
           cell_color_code = mycolorcode,
           select_cell_groups = c('Microglia', 'Ependymal', 'Endothelial'), 
           show_other_cells = F,
           group_by = 'layer_ID', 
           cow_n_col = 2, 
           group_by_subset = c(seq(260, -290, -100)))

9. Session Info

R version 4.3.2 (2023-10-31)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Sonoma 14.3.1

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/New_York
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] GiottoData_0.2.7.0 GiottoUtils_0.1.5  Giotto_4.0.2       GiottoClass_0.1.3 

loaded via a namespace (and not attached):
  [1] colorRamp2_0.1.0            bitops_1.0-7                rlang_1.1.3                
  [4] magrittr_2.0.3              RcppAnnoy_0.0.22            matrixStats_1.2.0          
  [7] compiler_4.3.2              png_0.1-8                   systemfonts_1.0.5          
 [10] vctrs_0.6.5                 pkgconfig_2.0.3             SpatialExperiment_1.12.0   
 [13] crayon_1.5.2                fastmap_1.1.1               backports_1.4.1            
 [16] magick_2.8.2                XVector_0.42.0              ellipsis_0.3.2             
 [19] labeling_0.4.3              utf8_1.2.4                  rmarkdown_2.25             
 [22] ragg_1.2.7                  purrr_1.0.2                 xfun_0.42                  
 [25] zlibbioc_1.48.0             beachmat_2.18.0             GenomeInfoDb_1.38.6        
 [28] jsonlite_1.8.8              DelayedArray_0.28.0         BiocParallel_1.36.0        
 [31] terra_1.7-71                irlba_2.3.5.1               parallel_4.3.2             
 [34] R6_2.5.1                    RColorBrewer_1.1-3          limma_3.58.1               
 [37] reticulate_1.35.0           GenomicRanges_1.54.1        Rcpp_1.0.12                
 [40] SummarizedExperiment_1.32.0 knitr_1.45                  R.utils_2.12.3             
 [43] IRanges_2.36.0              igraph_2.0.1.1              Matrix_1.6-5               
 [46] tidyselect_1.2.0            rstudioapi_0.15.0           abind_1.4-5                
 [49] yaml_2.3.8                  codetools_0.2-19            lattice_0.22-5             
 [52] tibble_3.2.1                Biobase_2.62.0              withr_3.0.0                
 [55] evaluate_0.23               pillar_1.9.0                MatrixGenerics_1.14.0      
 [58] checkmate_2.3.1             stats4_4.3.2                dbscan_1.1-12              
 [61] plotly_4.10.4               generics_0.1.3              RCurl_1.98-1.14            
 [64] S4Vectors_0.40.2            ggplot2_3.4.4               munsell_0.5.0              
 [67] scales_1.3.0                gtools_3.9.5                glue_1.7.0                 
 [70] lazyeval_0.2.2              tools_4.3.2                 GiottoVisuals_0.1.4        
 [73] data.table_1.15.0           ScaledMatrix_1.10.0         cowplot_1.1.3              
 [76] grid_4.3.2                  tidyr_1.3.1                 crosstalk_1.2.1            
 [79] colorspace_2.1-0            SingleCellExperiment_1.24.0 GenomeInfoDbData_1.2.11    
 [82] BiocSingular_1.18.0         cli_3.6.2                   rsvd_1.0.5                 
 [85] textshaping_0.3.7           fansi_1.0.6                 S4Arrays_1.2.0             
 [88] viridisLite_0.4.2           dplyr_1.1.4                 uwot_0.1.16                
 [91] gtable_0.3.4                R.methodsS3_1.8.2           digest_0.6.34              
 [94] progressr_0.14.0            BiocGenerics_0.48.1         SparseArray_1.2.4          
 [97] ggrepel_0.9.5               rjson_0.2.21                htmlwidgets_1.6.4          
[100] farver_2.1.1                htmltools_0.5.7             R.oo_1.26.0                
[103] lifecycle_1.0.4             httr_1.4.7                  statmod_1.5.0