vignettes/merfish_mouse_hypothalamic.Rmd
merfish_mouse_hypothalamic.Rmd
Moffitt et al. created a 3D spatial expression dataset consisting of 155 genes from ~1 million single cells acquired from mouse hypothalamic preoptic regions. Please ensure that wget is installed locally to streamline the download.
Clustering, 3D visualization, and cell type identification of clusters using marker genes will be explored in this tutorial.
# Ensure Giotto Suite is installed.
if(!"Giotto" %in% installed.packages()) {
pak::pkg_install("drieslab/Giotto")
}
# Ensure GiottoData, a small, helper module for tutorials, is installed.
if(!"GiottoData" %in% installed.packages()) {
pak::pkg_install("drieslab/GiottoData")
}
# Ensure the Python environment for Giotto has been installed.
genv_exists <- Giotto::checkGiottoEnvironment()
if(!genv_exists){
# The following command need only be run once to install the Giotto environment.
Giotto::installGiottoEnvironment()
}
library(Giotto)
library(GiottoData)
# Specify path from which data may be retrieved/stored
data_path <- "/path/to/data/"
# Specify path to which results may be saved
results_folder <- "/path/to/results/"
# Optional: Specify a path to a Python executable within a conda or miniconda
# environment. If set to NULL (default), the Python executable within the previously
# installed Giotto environment will be used.
python_path <- NULL # alternatively, "/local/python/path/python" if desired.
# In the event of authentication issues with wget,
# add ", extra = "--no-check-certificate" " after the method argument.
# Get the dataset:
getSpatialDataset(dataset = "merfish_preoptic",
directory = data_path,
method = "wget")
# Optional, but encouraged: Set Giotto instructions
instructions <- createGiottoInstructions(save_plot = TRUE,
show_plot = FALSE,
return_plot = FALSE,
save_dir = results_folder,
python_path = python_path)
# Create file paths to feed data into Giotto Object
expr_path <- paste0(data_path, "merFISH_3D_data_expression.txt.gz")
loc_path <- paste0(data_path, "merFISH_3D_data_cell_locations.txt")
meta_path <- paste0(data_path, "merFISH_3D_metadata.txt")
# Create Giotto object
merFISH_gobject <- createGiottoObject(expression = expr_path,
spatial_locs = loc_path,
instructions = instructions)
# Add additional metadata
metadata <- data.table::fread(meta_path)
merFISH_gobject <- addCellMetadata(merFISH_gobject,
new_metadata = metadata$layer_ID,
vector_name = "layer_ID")
merFISH_gobject <- addCellMetadata(merFISH_gobject,
new_metadata = metadata$orig_cell_types,
vector_name = "orig_cell_types")
First pre-test filter parameters for both features and cells.
filterDistributions(merFISH_gobject,
detection = "feats")
filterDistributions(merFISH_gobject,
detection = "cells")
filterCombinations(merFISH_gobject,
expression_thresholds = c(0,1e-6,1e-5),
feat_det_in_min_cells = c(500, 1000, 1500),
min_det_feats_per_cell = c(1, 5, 10))
Use the previously generated plots to inform filter decisions.
# Filter data
merFISH_gobject <- filterGiotto(gobject = merFISH_gobject,
feat_det_in_min_cells = 0,
min_det_feats_per_cell = 0)
# Normalize data
merFISH_gobject <- normalizeGiotto(gobject = merFISH_gobject,
scalefactor = 10000,
verbose = TRUE)
# Add statistics to Giotto Object
merFISH_gobject <- addStatistics(gobject = merFISH_gobject,
expression_values = "normalized")
# Adjust for covariates
merFISH_gobject <- adjustGiottoMatrix(gobject = merFISH_gobject,
expression_values = "normalized",
batch_columns = NULL,
covariate_columns = "layer_ID",
return_gobject = TRUE,
update_slot = "custom")
Now, take a glance at the data in both 2D and 3D.
# 2D
spatPlot(gobject = merFISH_gobject,
point_size = 1.5)
# 3D
spatPlot3D(gobject = merFISH_gobject,
point_size = 1.25,
axis_scale = "real")
There are only 155 genes within this dataset. Use them all (default) within the dimension reduction.
merFISH_gobject <- runPCA(gobject = merFISH_gobject,
feats_to_use = NULL,
scale_unit = FALSE,
center = TRUE)
# View details about the principal components
screePlot(merFISH_gobject)
After the PCA, a UMAP may be run. Run the UMAP so clusters may be visualized upon it.
merFISH_gobject <- runUMAP(merFISH_gobject,
dimensions_to_use = 1:8,
n_components = 3,
n_threads = 4)
plotUMAP_3D(gobject = merFISH_gobject,
point_size = 1.5)
Create a nearest network, then perform Leiden clustering. The clusters may be visualized on a UMAP.
# Create a sNN network (default)
merFISH_gobject <- createNearestNetwork(gobject = merFISH_gobject,
dimensions_to_use = 1:8,
k = 15)
# Leiden cluster
merFISH_gobject <- doLeidenCluster(gobject = merFISH_gobject,
resolution = 0.2,
n_iterations = 200,
name = "leiden_0.2_200")
# Plot the clusters upon the UMAP
plotUMAP_3D(gobject = merFISH_gobject,
cell_color = "leiden_0.2_200",
point_size = 1.5,
show_center_label = FALSE)
View the clusters in-tissue on each layer.
spatPlot2D(gobject = merFISH_gobject,
point_size = 1.5,
cell_color = "leiden_0.2_200",
group_by = "layer_ID",
cow_n_col = 2,
group_by_subset = c(260, 160, 60, -40, -140, -240))
Call findMarkers_one_vs_all to identify marker features. Click the function to see alternate methods, or look findGiniMarkers section for details on the gini method. Once marker features have been determined, observe the differential expression across clusters within the violin plot.
markers_gini <- findMarkers_one_vs_all(gobject = merFISH_gobject,
method = "gini",
expression_values = "normalized",
cluster_column = "leiden_0.2_200",
min_feats = 1,
rank_score = 2)
# Display details about the marker genes in-console
markers_gini[, head(.SD, 2), by = "cluster"]
# violinplot
topgenes_gini <- unique(markers_gini[, head(.SD, 2), by = "cluster"]$feats)
violinPlot(merFISH_gobject,
feats = topgenes_gini,
cluster_column = "leiden_0.2_200",
strip_position = "right")
topgenes_gini <- unique(markers_gini[, head(.SD, 6), by = "cluster"]$feats)
plotMetaDataHeatmap(merFISH_gobject,
expression_values = "scaled",
metadata_cols = "leiden_0.2_200",
selected_feats = topgenes_gini)
Use known marker and differentially expressed genes (DEGs) to identify cell type for each cluster.
# Known markers and DEGs
selected_genes <- c("Myh11", "Klf4", "Fn1", "Cd24a", "Cyr61", "Nnat", "Trh",
"Selplg", "Pou3f2", "Aqp4", "Traf4", "Pdgfra", "Opalin",
"Mbp", "Ttyh2", "Fezf1", "Cbln1", "Slc17a6", "Scg2", "Isl1", "Gad1")
cell_metadata <- pDataDT(merFISH_gobject)
cluster_order <- unique(cell_metadata$leiden_0.2_200)
plotMetaDataHeatmap(merFISH_gobject,
expression_values = "scaled",
metadata_cols = "leiden_0.2_200",
selected_feats = selected_genes,
custom_feat_order = rev(selected_genes),
custom_cluster_order = cluster_order)
Since there are more Leiden clusters than there are types of cells in this dataset, the same cell type may be assigned to different cluster numbers. This may be done only after verifying that particular clusters highly express marker genes corresponding to the same cell type. The above heatmap is used to streamline this process. Call annotateGiotto to map cell types to Leiden clusters; these will appear in cell_metadata within the giottoObject.
# Name clusters
clusters_cell_types <- c("Inhibitory", "Inhibitory", "Excitatory",
"Astrocyte", "OD Mature", "Endothelial",
"OD Mature", "OD Immature", "Ambiguous",
"Ependymal", "Endothelial", "Microglia",
"OD Mature")
names(clusters_cell_types) <- as.character(sort(cluster_order))
merFISH_gobject <- annotateGiotto(gobject = merFISH_gobject,
annotation_vector = clusters_cell_types,
cluster_column = "leiden_0.2_200",
name = "cell_types")
## show heatmap
plotMetaDataHeatmap(merFISH_gobject,
expression_values = "scaled",
metadata_cols = "cell_types",
selected_feats = selected_genes,
custom_feat_order = rev(selected_genes),
custom_cluster_order = clusters_cell_types)
# Assign colors to each cell type
mycolorcode <- c("red", "lightblue", "yellowgreen","purple", "darkred",
"magenta", "mediumblue", "yellow", "gray")
names(mycolorcode) <- c("Inhibitory", "Excitatory", "OD Mature", "OD Immature",
"Astrocyte", "Microglia", "Ependymal", "Endothelial",
"Ambiguous")
plotUMAP_3D(merFISH_gobject,
cell_color = "cell_types",
point_size = 1.5,
cell_color_code = mycolorcode)
spatPlot3D(merFISH_gobject,
cell_color = "cell_types",
axis_scale = "real",
sdimx = "sdimx",
sdimy = "sdimy",
sdimz = "sdimz",
show_grid = FALSE,
cell_color_code = mycolorcode)
spatPlot2D(gobject = merFISH_gobject,
point_size = 1.0,
cell_color = "cell_types",
cell_color_code = mycolorcode,
group_by = "layer_ID",
cow_n_col = 2,
group_by_subset = c(seq(260, -290, -100)))
spatPlot3D(merFISH_gobject,
cell_color = "cell_types",
axis_scale = "real",
sdimx = "sdimx",
sdimy = "sdimy",
sdimz = "sdimz",
show_grid = FALSE,
cell_color_code = mycolorcode,
select_cell_groups = "Excitatory",
show_other_cells = FALSE)
spatPlot2D(gobject = merFISH_gobject,
point_size = 1.0,
cell_color = "cell_types",
cell_color_code = mycolorcode,
select_cell_groups = "Excitatory",
show_other_cells = FALSE,
group_by = "layer_ID",
cow_n_col = 2,
group_by_subset = c(seq(260, -290, -100)))
spatPlot3D(merFISH_gobject,
cell_color = "cell_types",
axis_scale = "real",
sdimx = "sdimx",
sdimy = "sdimy",
sdimz = "sdimz",
show_grid = FALSE,
cell_color_code = mycolorcode,
select_cell_groups = "Inhibitory",
show_other_cells = FALSE)
spatPlot2D(gobject = merFISH_gobject,
point_size = 1.0,
cell_color = "cell_types",
cell_color_code = mycolorcode,
select_cell_groups = "Inhibitory",
show_other_cells = FALSE,
group_by = "layer_ID",
cow_n_col = 2,
group_by_subset = c(seq(260, -290, -100)))
spatPlot3D(merFISH_gobject,
cell_color = "cell_types",
axis_scale = "real",
sdimx = "sdimx",
sdimy = "sdimy",
sdimz = "sdimz",
show_grid = FALSE,
cell_color_code = mycolorcode,
select_cell_groups = c("Astrocyte", "OD Mature", "OD Immature"),
show_other_cells = FALSE)
spatPlot2D(gobject = merFISH_gobject,
point_size = 1.0,
cell_color = "cell_types",
cell_color_code = mycolorcode,
select_cell_groups = c("Astrocyte", "OD Mature", "OD Immature"),
show_other_cells = FALSE,
group_by = "layer_ID",
cow_n_col = 2,
group_by_subset = c(seq(260, -290, -100)))
spatPlot3D(merFISH_gobject,
cell_color = "cell_types",
axis_scale = "real",
sdimx = "sdimx",
sdimy = "sdimy",
sdimz = "sdimz",
show_grid = FALSE,
cell_color_code = mycolorcode,
select_cell_groups = c("Microglia", "Ependymal", "Endothelial"),
show_other_cells = FALSE)
spatPlot2D(gobject = merFISH_gobject,
point_size = 1.0,
cell_color = "cell_types",
cell_color_code = mycolorcode,
select_cell_groups = c("Microglia", "Ependymal", "Endothelial"),
show_other_cells = FALSE,
group_by = "layer_ID",
cow_n_col = 2,
group_by_subset = c(seq(260, -290, -100)))
R version 4.3.2 (2023-10-31)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Sonoma 14.3.1
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] GiottoData_0.2.7.0 GiottoUtils_0.1.5 Giotto_4.0.2 GiottoClass_0.1.3
loaded via a namespace (and not attached):
[1] colorRamp2_0.1.0 bitops_1.0-7 rlang_1.1.3
[4] magrittr_2.0.3 RcppAnnoy_0.0.22 matrixStats_1.2.0
[7] compiler_4.3.2 png_0.1-8 systemfonts_1.0.5
[10] vctrs_0.6.5 pkgconfig_2.0.3 SpatialExperiment_1.12.0
[13] crayon_1.5.2 fastmap_1.1.1 backports_1.4.1
[16] magick_2.8.2 XVector_0.42.0 ellipsis_0.3.2
[19] labeling_0.4.3 utf8_1.2.4 rmarkdown_2.25
[22] ragg_1.2.7 purrr_1.0.2 xfun_0.42
[25] zlibbioc_1.48.0 beachmat_2.18.0 GenomeInfoDb_1.38.6
[28] jsonlite_1.8.8 DelayedArray_0.28.0 BiocParallel_1.36.0
[31] terra_1.7-71 irlba_2.3.5.1 parallel_4.3.2
[34] R6_2.5.1 RColorBrewer_1.1-3 limma_3.58.1
[37] reticulate_1.35.0 GenomicRanges_1.54.1 Rcpp_1.0.12
[40] SummarizedExperiment_1.32.0 knitr_1.45 R.utils_2.12.3
[43] IRanges_2.36.0 igraph_2.0.1.1 Matrix_1.6-5
[46] tidyselect_1.2.0 rstudioapi_0.15.0 abind_1.4-5
[49] yaml_2.3.8 codetools_0.2-19 lattice_0.22-5
[52] tibble_3.2.1 Biobase_2.62.0 withr_3.0.0
[55] evaluate_0.23 pillar_1.9.0 MatrixGenerics_1.14.0
[58] checkmate_2.3.1 stats4_4.3.2 dbscan_1.1-12
[61] plotly_4.10.4 generics_0.1.3 RCurl_1.98-1.14
[64] S4Vectors_0.40.2 ggplot2_3.4.4 munsell_0.5.0
[67] scales_1.3.0 gtools_3.9.5 glue_1.7.0
[70] lazyeval_0.2.2 tools_4.3.2 GiottoVisuals_0.1.4
[73] data.table_1.15.0 ScaledMatrix_1.10.0 cowplot_1.1.3
[76] grid_4.3.2 tidyr_1.3.1 crosstalk_1.2.1
[79] colorspace_2.1-0 SingleCellExperiment_1.24.0 GenomeInfoDbData_1.2.11
[82] BiocSingular_1.18.0 cli_3.6.2 rsvd_1.0.5
[85] textshaping_0.3.7 fansi_1.0.6 S4Arrays_1.2.0
[88] viridisLite_0.4.2 dplyr_1.1.4 uwot_0.1.16
[91] gtable_0.3.4 R.methodsS3_1.8.2 digest_0.6.34
[94] progressr_0.14.0 BiocGenerics_0.48.1 SparseArray_1.2.4
[97] ggrepel_0.9.5 rjson_0.2.21 htmlwidgets_1.6.4
[100] farver_2.1.1 htmltools_0.5.7 R.oo_1.26.0
[103] lifecycle_1.0.4 httr_1.4.7 statmod_1.5.0