10X genomics recently launched a new platform to obtain spatial expression data using a Visium Spatial Gene Expression slide.
The Visium brain data to run this tutorial can be found here
Visium technology:
High resolution png from original tissue:
[]images/general_figs/mouse_brain_highres.png)
# Ensure Giotto Suite is installed.
if(!"Giotto" %in% installed.packages()) {
pak::pkg_install("drieslab/Giotto")
}
# Ensure the Python environment for Giotto has been installed.
genv_exists <- Giotto::checkGiottoEnvironment()
if(!genv_exists){
# The following command need only be run once to install the Giotto environment.
Giotto::installGiottoEnvironment()
}
library(Giotto)
# 1. set working directory
results_folder <- "path/to/results"
# Optional: Specify a path to a Python executable within a conda or miniconda
# environment. If set to NULL (default), the Python executable within the previously
# installed Giotto environment will be used.
python_path <- NULL # alternatively, "/local/python/path/python" if desired.
# 3. Create Giotto Instructions
instructions <- createGiottoInstructions(save_dir = results_folder,
save_plot = TRUE,
show_plot = FALSE,
return_plot = FALSE,
python_path = python_path)
## provide path to visium folder
data_path <- "/path/to/data/"
## directly from visium folder
visium_brain <- createGiottoVisiumObject(visium_dir = data_path,
expr_data = "raw",
png_name = "tissue_lowres_image.png",
gene_column_index = 2,
instructions = instructions)
## show associated images with giotto object
showGiottoImageNames(visium_brain) # "image" is the default name
## check metadata
pDataDT(visium_brain)
## show plot
spatPlot2D(gobject = visium_brain,
cell_color = "in_tissue",
point_size = 2,
cell_color_code = c("0" = "lightgrey", "1" = "blue"),
show_image = TRUE,
image_name = "image")
## subset on spots that were covered by tissue
metadata <- pDataDT(visium_brain)
in_tissue_barcodes <- metadata[in_tissue == 1]$cell_ID
visium_brain <- subsetGiotto(visium_brain,
cell_ids = in_tissue_barcodes)
## filter
visium_brain <- filterGiotto(gobject = visium_brain,
expression_threshold = 1,
feat_det_in_min_cells = 50,
min_det_feats_per_cell = 1000,
expression_values = "raw",
verbose = TRUE)
## normalize
visium_brain <- normalizeGiotto(gobject = visium_brain,
scalefactor = 6000,
verbose = TRUE)
## add gene & cell statistics
visium_brain <- addStatistics(gobject = visium_brain)
## visualize
spatPlot2D(gobject = visium_brain,
show_image = TRUE,
point_alpha = 0.7,
cell_color = "nr_feats",
color_as_factor = FALSE)
## highly variable features / genes (HVF)
visium_brain <- calculateHVF(gobject = visium_brain,
save_plot = TRUE)
## run PCA on expression values (default)
gene_metadata <- fDataDT(visium_brain)
featgenes <- gene_metadata[hvf == "yes" & perc_cells > 3 & mean_expr_det > 0.4]$feat_ID
## run PCA on expression values (default)
visium_brain <- runPCA(gobject = visium_brain,
feats_to_use = featgenes)
screePlot(visium_brain,
ncp = 30)
dimPlot2D(gobject = visium_brain,
dim_reduction_to_use = "pca")
## sNN network (default)
visium_brain <- createNearestNetwork(gobject = visium_brain,
dimensions_to_use = 1:10,
k = 15)
## Leiden clustering
visium_brain <- doLeidenCluster(gobject = visium_brain,
resolution = 0.4,
n_iterations = 1000)
plotUMAP(gobject = visium_brain,
cell_color = "leiden_clus",
show_NN_network = TRUE,
point_size = 2.5)
# spatial and dimension plots
spatDimPlot(gobject = visium_brain,
cell_color = "leiden_clus",
dim_point_size = 2,
spat_point_size = 2.5)
spatDimPlot(gobject = visium_brain,
cell_color = "nr_feats",
color_as_factor = FALSE,
dim_point_size = 2,
spat_point_size = 2.5)
# dimension plots grouped by cluster
spatPlot2D(visium_brain,
cell_color = "leiden_clus",
coord_fix_ratio = 1)
Plot with group by:
spatPlot2D(visium_brain,
cell_color = "leiden_clus",
group_by = "leiden_clus",
coord_fix_ratio = 1,
cow_n_col = 6,
show_legend = FALSE)
Highlight one or more groups:
spatPlot2D(visium_brain,
cell_color = "leiden_clus",
select_cell_groups = "8",
coord_fix_ratio = 1,
show_other_cells = TRUE,
cell_color_code = c("8" = "red"),
other_cell_color = "grey",
other_point_size = 1.5)
# create and show subset
DG_subset <- subsetGiottoLocs(visium_brain,
x_max = 6500, x_min = 3000,
y_max = -2500, y_min = -5500,
return_gobject = TRUE)
spatDimPlot(gobject = DG_subset,
cell_color = "leiden_clus",
spat_point_size = 5)
## ------------------ ##
## Gini markers
markers_gini <- findMarkers_one_vs_all(gobject = visium_brain,
method = "gini",
expression_values = "normalized",
cluster_column = "leiden_clus",
min_feats = 20,
min_expr_gini_score = 0.5,
min_det_gini_score = 0.5)
topgenes_gini <- markers_gini[, head(.SD, 2), by = "cluster"]$feats
# violinplot
violinPlot(visium_brain,
feats = unique(topgenes_gini),
cluster_column = "leiden_clus",
strip_text = 8,
strip_position = "right")
# cluster heatmap
plotMetaDataHeatmap(visium_brain,
selected_feats = unique(topgenes_gini),
metadata_cols = "leiden_clus",
x_text_size = 10,
y_text_size = 10)
# umap plots
dimFeatPlot2D(visium_brain,
expression_values = "scaled",
feats = markers_gini[, head(.SD, 1), by = "cluster"]$feats,
cow_n_col = 4,
point_size = 0.75)
## ------------------ ##
# Scran Markers
markers_scran <- findMarkers_one_vs_all(gobject = visium_brain,
method = "scran",
expression_values = "normalized",
cluster_column = "leiden_clus")
topgenes_scran <- markers_scran[, head(.SD, 2), by = "cluster"]$feats
# violinplot
violinPlot(visium_brain,
feats = unique(topgenes_scran),
cluster_column = "leiden_clus",
strip_text = 10,
strip_position = "right")
# cluster heatmap
plotMetaDataHeatmap(visium_brain,
selected_feats = topgenes_scran,
metadata_cols = "leiden_clus")
# umap plots
dimFeatPlot2D(visium_brain,
expression_values = "scaled",
feats = markers_scran[, head(.SD, 1), by = "cluster"]$feats,
cow_n_col = 3,
point_size = 1)
Visium spatial transcriptomics does not provide single-cell resolution, making cell type annotation a harder problem. Giotto provides several ways to calculate enrichment of specific cell-type signature gene lists:
# download data to results directory ####
# if wget is installed, set method = "wget"
# if you run into authentication issues with wgeTRUE, then add " extra = "--no-check-certificate" "
GiottoData::getSpatialDataset(dataset = "scRNA_mouse_brain",
directory = data_path)
sc_expression <- file.path(data_path, "brain_sc_expression_matrix.txt.gz")
sc_metadata <- file.path(data_path, "brain_sc_metadata.csv")
giotto_SC <- createGiottoObject(expression = sc_expression,
instructions = instructions)
giotto_SC <- addCellMetadata(giotto_SC,
new_metadata = data.table::fread(sc_metadata))
giotto_SC <- normalizeGiotto(giotto_SC)
# Create PAGE matrix
# PAGE matrix should be a binary matrix with each row represent a gene marker and each column represent a cell type
# There are several ways to create PAGE matrix
# 1.1 create binary matrix of cell signature genes
# small example #
gran_markers <- c("Nr3c2", "Gabra5", "Tubgcp2", "Ahcyl2",
"Islr2", "Rasl10a", "Tmem114", "Bhlhe22",
"Ntf3", "C1ql2")
oligo_markers <- c("Efhd1", "H2-Ab1", "Enpp6", "Ninj2",
"Bmp4", "Tnr", "Hapln2", "Neu4",
"Wfdc18", "Ccp110")
di_mesench_markers <- c("Cartpt", "Scn1a", "Lypd6b", "Drd5",
"Gpr88", "Plcxd2", "Cpne7", "Pou4f1",
"Ctxn2", "Wnt4")
PAGE_matrix_1 <- makeSignMatrixPAGE(sign_names = c("Granule_neurons",
"Oligo_dendrocytes",
"di_mesenchephalon"),
sign_list = list(gran_markers,
oligo_markers,
di_mesench_markers))
# ----
# 1.2 [shortcut] fully pre-prepared matrix for all cell types
sign_matrix_path <- system.file("extdata", "sig_matrix.txt", package = "GiottoData")
brain_sc_markers <- data.table::fread(sign_matrix_path)
PAGE_matrix <- as.matrix(brain_sc_markers[,-1])
rownames(PAGE_matrix) <- brain_sc_markers$Event
# ---
# 1.3 make PAGE matrix from single cell dataset
markers_scran <- findMarkers_one_vs_all(gobject = giotto_SC,
method = "scran",
expression_values = "normalized",
cluster_column = "Class",
min_feats = 3)
topgenes_scran <- markers_scran[, head(.SD, 10), by = "cluster"]
celltypes <- levels(factor(markers_scran$cluster))
sign_list <- list()
for (i in 1:length(celltypes)){
sign_list[[i]] <- topgenes_scran[which(topgenes_scran$cluster == celltypes[i]),]$feats
}
PAGE_matrix <- makeSignMatrixPAGE(sign_names = celltypes,
sign_list = sign_list)
# 1.4 enrichment test with PAGE
# runSpatialEnrich() can also be used as a wrapper for all currently provided enrichment options
visium_brain <- runPAGEEnrich(gobject = visium_brain,
sign_matrix = PAGE_matrix)
# 1.5 heatmap of enrichment versus annotation (e.g. clustering result)
cell_types_PAGE <- colnames(PAGE_matrix)
plotMetaDataCellsHeatmap(gobject = visium_brain,
metadata_cols = "leiden_clus",
value_cols = cell_types_PAGE,
spat_enr_names = "PAGE",
x_text_size = 8,
y_text_size = 8)
# 1.6 visualizations
spatCellPlot2D(gobject = visium_brain,
spat_enr_names = "PAGE",
cell_annotation_values = cell_types_PAGE[1:4],
cow_n_col = 2,
coord_fix_ratio = 1,
point_size = 1.25,
show_legend = TRUE)
spatDimCellPlot2D(gobject = visium_brain,
spat_enr_names = "PAGE",
cell_annotation_values = cell_types_PAGE[1:4],
cow_n_col = 1,
spat_point_size = 1,
plot_alignment = "horizontal",
save_param = list(base_width = 7, base_height = 10))
visium_brain <- runHyperGeometricEnrich(gobject = visium_brain,
expression_values = "normalized",
sign_matrix = PAGE_matrix)
cell_types_HyperGeometric <- colnames(PAGE_matrix)
spatCellPlot(gobject = visium_brain,
spat_enr_names = "hypergeometric",
cell_annotation_values = cell_types_HyperGeometric[1:4],
cow_n_col = 2,
coord_fix_ratio = NULL,
point_size = 1.75)
# Create rank matrix, not that rank matrix is different from PAGE
# A count matrix and a vector for all cell labels will be needed
rank_matrix <- makeSignMatrixRank(sc_matrix = getExpression(giotto_SC,
values = "normalized",
output = "matrix"),
sc_cluster_ids = pDataDT(giotto_SC)$Class)
colnames(rank_matrix) <- levels(factor(pDataDT(giotto_SC)$Class))
visium_brain <- runRankEnrich(gobject = visium_brain,
sign_matrix = rank_matrix,
expression_values = "normalized")
# Plot Rank enrichment result
spatCellPlot2D(gobject = visium_brain,
spat_enr_names = "rank",
cell_annotation_values = colnames(rank_matrix)[1:4],
cow_n_col = 2,
coord_fix_ratio = 1,
point_size = 1)
# Create DWLS matrix, not that DWLS matrix is different from PAGE and rank
# A count matrix a vector for a list of gene signatures and a vector for all cell labels will be needed
DWLS_matrix <- makeSignMatrixDWLSfromMatrix(
matrix = getExpression(giotto_SC,
values = "normalized",
output = "matrix"),
cell_type = pDataDT(giotto_SC)$Class,
sign_gene = topgenes_scran$feats)
visium_brain <- runDWLSDeconv(gobject = visium_brain,
sign_matrix = DWLS_matrix)
# Plot DWLS deconvolution result
spatCellPlot2D(gobject = visium_brain,
spat_enr_names = "DWLS",
cell_annotation_values = levels(factor(pDataDT(giotto_SC)$Class))[1:4],
cow_n_col = 2,
coord_fix_ratio = 1,
point_size = 1)
# Plot DWLS deconvolution result with Pie plots
spatDeconvPlot(visium_brain,
show_image = TRUE,
radius = 50)
visium_brain <- createSpatialGrid(gobject = visium_brain,
sdimx_stepsize = 400,
sdimy_stepsize = 400,
minimum_padding = 0)
showGiottoSpatGrids(visium_brain)
spatPlot2D(visium_brain,
cell_color = "leiden_clus",
show_grid = TRUE,
grid_color = "red",
spatial_grid_name = "spatial_grid")
visium_brain <- createSpatialNetwork(gobject = visium_brain,
method = "kNN",
k = 5,
maximum_distance_knn = 400,
name = "spatial_network")
showGiottoSpatNetworks(visium_brain)
spatPlot2D(gobject = visium_brain,
show_network= TRUE,
network_color = "blue",
spatial_network_name = "spatial_network")
## rank binarization
ranktest <- binSpect(visium_brain,
bin_method = "rank",
calc_hub = TRUE,
hub_min_int = 5,
spatial_network_name = "spatial_network")
spatFeatPlot2D(visium_brain,
expression_values = "scaled",
feats = ranktest$feats[1:6],
cow_n_col = 2,
point_size = 1.5)
# cluster the top 500 spatial genes into 20 clusters
my_spatial_genes <- ranktest[1:1500,]$feats
# here we use existing detectSpatialCorGenes function to calculate pairwise distances between genes (but set network_smoothing=0 to use default clustering)
spat_cor_netw_DT <- detectSpatialCorFeats(visium_brain,
method = "network",
spatial_network_name = "spatial_network",
subset_feats = my_spatial_genes)
# 2. identify most similar spatially correlated genes for one gene
top10_genes <- showSpatialCorFeats(spat_cor_netw_DT,
feats = "Mbp",
show_top_feats = 10)
spatFeatPlot2D(visium_brain,
expression_values = "scaled",
feats = top10_genes$variable[1:4],
point_size = 3)
# cluster spatial genes
spat_cor_netw_DT <- clusterSpatialCorFeats(spat_cor_netw_DT,
name = "spat_netw_clus",
k = 20)
# visualize clusters
heatmSpatialCorFeats(visium_brain,
spatCorObject = spat_cor_netw_DT,
use_clus_name = "spat_netw_clus",
heatmap_legend_param = list(title = NULL))
# 4. rank spatial correlated clusters and show genes for selected clusters
netw_ranks <- rankSpatialCorGroups(visium_brain,
spatCorObject = spat_cor_netw_DT,
use_clus_name = "spat_netw_clus")
top_netw_spat_cluster <- showSpatialCorFeats(spat_cor_netw_DT,
use_clus_name = "spat_netw_clus",
selected_clusters = 6,
show_top_feats = 1)
# 5. create metagene enrichment score for clusters
cluster_genes_DT <- showSpatialCorFeats(spat_cor_netw_DT,
use_clus_name = "spat_netw_clus",
show_top_feats = 1)
cluster_genes <- cluster_genes_DT$clus
names(cluster_genes) <- cluster_genes_DT$feat_ID
visium_brain <- createMetafeats(visium_brain,
feat_clusters = cluster_genes,
name = "cluster_metagene")
spatCellPlot(visium_brain,
spat_enr_names = "cluster_metagene",
cell_annotation_values = netw_ranks$clusters,
point_size = 1,
cow_n_col = 5)
# top 30 genes per spatial co-expression cluster
table(spat_cor_netw_DT$cor_clusters$spat_netw_clus)
coexpr_dt <- data.table::data.table(
genes = names(spat_cor_netw_DT$cor_clusters$spat_netw_clus),
cluster = spat_cor_netw_DT$cor_clusters$spat_netw_clus)
data.table::setorder(coexpr_dt, cluster)
top30_coexpr_dt <- coexpr_dt[, head(.SD, 30), by = cluster]
my_spatial_genes <- top30_coexpr_dt$genes
visium_brain <- runPCA(gobject = visium_brain,
feats_to_use = my_spatial_genes,
name = "custom_pca")
visium_brain <- runUMAP(visium_brain,
dim_reduction_name = "custom_pca",
dimensions_to_use = 1:20,
name = "custom_umap")
visium_brain <- createNearestNetwork(gobject = visium_brain,
dim_reduction_name = "custom_pca",
dimensions_to_use = 1:20,
k = 5,
name = "custom_NN")
visium_brain <- doLeidenCluster(gobject = visium_brain,
network_name = "custom_NN",
resolution = 0.15,
n_iterations = 1000,
name = "custom_leiden")
cell_metadata <- pDataDT(visium_brain)
cell_clusters <- unique(cell_metadata$custom_leiden)
giotto_colors <- getDistinctColors(length(cell_clusters))
names(giotto_colors) <- cell_clusters
spatPlot2D(visium_brain,
cell_color = "custom_leiden",
cell_color_code = giotto_colors,
coord_fix_ratio = 1)
plotUMAP(gobject = visium_brain,
cell_color = "custom_leiden",
cell_color_code = giotto_colors,
point_size = 1.5)
# do HMRF with different betas on top 30 genes per spatial co-expression module
hmrf_folder <- file.path(data_path, "HMRF")
if(!file.exists(hmrf_folder)) dir.create(hmrf_folder, recursive = TRUE)
HMRF_spatial_genes <- doHMRF(gobject = visium_brain,
expression_values = "scaled",
spatial_genes = my_spatial_genes,
k = 20,
spatial_network_name="spatial_network",
betas = c(0, 10, 5),
output_folder = file.path(hmrf_folder, "Spatial_genes/SG_topgenes_k20_scaled"))
visium_brain <- addHMRF(gobject = visium_brain,
HMRFoutput = HMRF_spatial_genes,
k = 20,
betas_to_add = c(0, 10, 20, 30, 40),
hmrf_name = "HMRF")
spatPlot2D(gobject = visium_brain,
cell_color = "HMRF_k20_b.40")
4.4.0 (2024-04-24)
R version : x86_64-apple-darwin20
Platform: macOS Sonoma 14.6.1
Running under
: default
Matrix products: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
LAPACK
:
locale1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
[
: America/New_York
time zone: internal
tzcode source
:
attached base packages1] stats graphics grDevices utils datasets methods base
[
:
other attached packages1] Giotto_4.1.0 GiottoClass_0.3.4
[
namespace (and not attached):
loaded via a [1] RColorBrewer_1.1-3 shape_1.4.6.1 rstudioapi_0.16.0
4] jsonlite_1.8.8 magrittr_2.0.3 magick_2.8.4
[7] farver_2.1.2 rmarkdown_2.27 GlobalOptions_0.1.2
[10] zlibbioc_1.50.0 ragg_1.3.2 vctrs_0.6.5
[13] Cairo_1.6-2 DelayedMatrixStats_1.26.0 GiottoUtils_0.1.10
[16] terra_1.7-78 htmltools_0.5.8.1 S4Arrays_1.4.1
[19] BiocNeighbors_1.22.0 SparseArray_1.4.8 parallelly_1.38.0
[22] htmlwidgets_1.6.4 plyr_1.8.9 plotly_4.10.4
[25] igraph_2.0.3 iterators_1.0.14 lifecycle_1.0.4
[28] pkgconfig_2.0.3 rsvd_1.0.5 Matrix_1.7-0
[31] R6_2.5.1 fastmap_1.2.0 clue_0.3-65
[34] GenomeInfoDbData_1.2.12 MatrixGenerics_1.16.0 future_1.34.0
[37] digest_0.6.36 colorspace_2.1-1 S4Vectors_0.42.1
[40] dqrng_0.4.1 irlba_2.3.5.1 textshaping_0.4.0
[43] GenomicRanges_1.56.1 beachmat_2.20.0 labeling_0.4.3
[46] RcppZiggurat_0.1.6 progressr_0.14.0 fansi_1.0.6
[49] polyclip_1.10-7 httr_1.4.7 abind_1.4-5
[52] compiler_4.4.0 doParallel_1.0.17 withr_3.0.0
[55] backports_1.5.0 BiocParallel_1.38.0 ggforce_0.4.2
[58] R.utils_2.12.3 MASS_7.3-61 DelayedArray_0.30.1
[61] rjson_0.2.21 bluster_1.14.0 gtools_3.9.5
[64] GiottoVisuals_0.2.4 tools_4.4.0 scatterpie_0.2.3
[67] future.apply_1.11.2 quadprog_1.5-8 R.oo_1.26.0
[70] glue_1.7.0 dbscan_1.2-0 grid_4.4.0
[73] checkmate_2.3.2 Rtsne_0.17 cluster_2.1.6
[76] reshape2_1.4.4 generics_0.1.3 gtable_0.3.5
[79] R.methodsS3_1.8.2 tidyr_1.3.1 data.table_1.15.4
[82] BiocSingular_1.20.0 ScaledMatrix_1.12.0 metapod_1.12.0
[85] sp_2.1-4 utf8_1.2.4 XVector_0.44.0
[88] BiocGenerics_0.50.0 foreach_1.5.2 ggrepel_0.9.5
[91] pillar_1.9.0 stringr_1.5.1 limma_3.60.4
[94] circlize_0.4.16 tweenr_2.0.3 dplyr_1.1.4
[97] lattice_0.22-6 FNN_1.1.4 deldir_2.0-4
[100] tidyselect_1.2.1 ComplexHeatmap_2.20.0 SingleCellExperiment_1.26.0
[103] locfit_1.5-9.10 scuttle_1.14.0 knitr_1.48
[106] IRanges_2.38.1 edgeR_4.2.1 SummarizedExperiment_1.34.0
[109] scattermore_1.2 stats4_4.4.0 xfun_0.46
[112] Biobase_2.64.0 statmod_1.5.0 matrixStats_1.3.0
[115] stringi_1.8.4 UCSC.utils_1.0.0 ggfun_0.1.5
[118] lazyeval_0.2.2 yaml_2.3.10 evaluate_0.24.0
[121] codetools_0.2-20 GiottoData_0.2.13 tibble_3.2.1
[124] colorRamp2_0.1.0 cli_3.6.3 RcppParallel_5.1.8
[127] uwot_0.2.2 reticulate_1.38.0 systemfonts_1.1.0
[130] munsell_0.5.1 Rcpp_1.0.13 GenomeInfoDb_1.40.1
[133] globals_0.16.3 png_0.1-8 Rfast_2.1.0
[136] parallel_4.4.0 ggplot2_3.5.1 scran_1.32.0
[139] sparseMatrixStats_1.16.0 listenv_0.9.1 SpatialExperiment_1.14.0
[142] viridisLite_0.4.2 scales_1.3.0 purrr_1.0.2
[145] crayon_1.5.3 GetoptLong_1.0.5 rlang_1.1.4
[148] cowplot_1.1.3 [