Note: Feature aggregation is mainly used with subcellular datasets
See also: Giotto object creation for a more complete introduction to raw spatial features and spatial units.
Many analysis methods work best with spatial features that have been aggregated based on some kind of functional context e.g. a cell, niche, or domain into an expression matrix format. Giotto provides tools to do this aggregation with sets of spatial unit annotations (represented as giottoPolygon
) overlapped against point features (giottoPoints
) and raster intensity-based features (giottoLargeImage
):
calculateOverlap()
and turn that into an expression matrix.These are generic functions and they can either be performed directly on the Giotto subobjects (see function documentation examples above for more info) or from the overarching giotto
analysis object.
# Ensure Giotto Suite is installed.
if(!"Giotto" %in% installed.packages()) {
pak::pkg_install("drieslab/Giotto")
}
# Ensure GiottoData is installed.
if(!"GiottoData" %in% installed.packages()) {
pak::pkg_install("drieslab/GiottoData")
}
library(Giotto)
Giotto performs feature point aggregation by converting polygonal annotations to raster masks and then extracting the overlapped points data. This is a slight approximation, but is a large speed improvement.
g <- GiottoData::loadGiottoMini("vizgen")
# aggregate subobjects
g <- calculateOverlap(g,
spatial_info = "z0",
feat_info = "rna"
)
g <- overlapToMatrix(g,
type = "point",
poly_info = "z0",
feat_info = "rna",
name = "raw"
)
Comparison of Giotto vs default outputs
Due to the approximation and possible differences by which points overlaps are called depending on the software, it is possible for there to be small differences between the values calculated from Giotto Suite vs those provided as part of the standard outputs.
Here we provide an example comparison between Giotto Suite overlaps vs original Nanostring CosMx expression matrix outputs from their legacy FFPE non-small-cell lung cancer dataset. This example is using FOV2 from the Lung12 dataset.
import data
# ** SET PATH TO FOLDER CONTAINING COSMX DATA **
data_path <- "path/to/data"
cosmx <- importCosMx(cosmx_dir = data_dir)
# fix offsets not being detected
cosmx$offsets <- data.table::fread(file.path(data_dir, "Lung12_fov_positions_file.csv"))
names(cosmx$offsets) <- c("fov", "x", "y")
# set FOV to load
cosmx$fovs <- 2
polys <- cosmx$load_polys(shift_vertical_step = TRUE)
points <- cosmx$load_transcripts()
nano_expr <- cosmx$load_expression()
nano_expr <- nano_expr[[1]][] # select the `rna` values and drop to dgCMatrix
plot(points$rna, raster = FALSE)
plot(polys[[1]], add = TRUE, border = "red")
perform Giotto aggregation
giotto_expr <- calculateOverlap(polys[[1]], points$rna) |>
overlapToMatrix()
expression matrix discrepancies
# No differences in features found
setequal(rownames(giotto_expr), rownames(nano_expr)) # TRUE
# Differences in cells found
length(colnames(giotto_expr)) # 2983
length(colnames(nano_expr)) # 2985
# What accounts for this discrepancy?
colnames(nano_expr)[which(!colnames(nano_expr) %in% colnames(giotto_expr))]
# "c_1_2_1796" "c_1_2_244"
#
plot(polys[[1]][c("c_1_2_1796", "c_1_2_244")], col = "black")
plot(polys[[1]], add = TRUE, border = "red", lwd = 0.5)
These polygons are entirely overlapped by other polygons and thus ignored
prep data for comparison
# Align matrix ordering
# Match samples on giotto-found
giotto_expr <- giotto_expr[sort(rownames(giotto_expr)), sort(colnames(giotto_expr))]
nano_expr <- nano_expr[sort(rownames(nano_expr)), sort(colnames(giotto_expr))]
# Prepare matrix comparison
# Summarise sparse matrices (i and j are matrix indices, x is value)
g_dt <- data.table::as.data.table(Matrix::summary(giotto_expr))
g_dt[, method := "giotto"]
n_dt <- data.table::as.data.table(Matrix::summary(nano_expr))
n_dt[, method := "nanostring"]
test_dt <- data.table::rbindlist(list(g_dt, n_dt))
# Combine sparse matrix indices
test_dt[, combo := paste0(i,"-",j)]
plot comparisons
library(ggplot2)
# matrix index similarity
pl_n <- ggplot()
pl_n <- pl_n + geom_tile(data = n_dt, aes(x = i, y = j, fill = log(x+1)))
pl_n <- pl_n + ggtitle("Nanostring Sparse Matrix")
pl_n <- pl_n + scale_fill_gradient(low = "blue", high = "red")
pl_n <- pl_n + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "black"))
pl_g <- ggplot()
pl_g <- pl_g + geom_tile(data = g_dt, aes(x = i, y = j, fill = log(x+1)))
pl_g <- pl_g + ggtitle("Giotto Sparse Matrix")
pl_g <- pl_g + scale_fill_gradient(low = "blue", high = "red")
pl_g <- pl_g + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "black"))
cowplot::plot_grid(pl_n, pl_g,
nrow = 2,
labels = "AUTO"
)
# directly compare differences in matrix values (counts assigned)
test_dt[, diff := diff(x), by = .(i,j)]
data.table::setorder(test_dt, var)
# plot difference in values
pl <- ggplot()
pl <- pl + geom_bar(data = test_dt, aes(x = diff))
pl <- pl + theme_bw()
pl <- pl + labs(x = "difference nanostring - Giotto")
print(pl)
Overall of the shared entries that were called by both methods (common i and j indices), there appears to be no major bias in terms of counts/values assigned. Moreover, the vast majority of these shared entries have the same values (difference of 0).
g <- calculateOverlap(g,
name_overlap = "dapi",
spatial_info = "z0",
image_names = "dapi_z0"
)
g <- overlapToMatrix(g,
poly_info = "z0",
feat_info = "dapi",
type = "intensity"
)
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS 15.0.1
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] GiottoClass_0.4.5
loaded via a namespace (and not attached):
[1] SummarizedExperiment_1.34.0 rjson_0.2.21 xfun_0.49
[4] raster_3.6-26 Biobase_2.64.0 lattice_0.22-6
[7] tools_4.4.1 stats4_4.4.1 proxy_0.4-27
[10] GiottoData_0.2.15 pkgconfig_2.0.3 KernSmooth_2.23-24
[13] Matrix_1.7-0 data.table_1.16.2 checkmate_2.3.2
[16] S4Vectors_0.42.0 lifecycle_1.0.4 GenomeInfoDbData_1.2.12
[19] compiler_4.4.1 GiottoUtils_0.2.3 terra_1.7-78
[22] codetools_0.2-20 GenomeInfoDb_1.40.0 htmltools_0.5.8.1
[25] class_7.3-22 yaml_2.3.10 exactextractr_0.10.0
[28] crayon_1.5.3 classInt_0.4-10 SingleCellExperiment_1.26.0
[31] DelayedArray_0.30.0 magick_2.8.5 abind_1.4-8
[34] gtools_3.9.5 digest_0.6.37 sf_1.0-16
[37] fastmap_1.2.0 grid_4.4.1 cli_3.6.3
[40] SparseArray_1.4.1 magrittr_2.0.3 S4Arrays_1.4.0
[43] e1071_1.7-14 withr_3.0.2 UCSC.utils_1.0.0
[46] backports_1.5.0 sp_2.1-4 rmarkdown_2.29
[49] XVector_0.44.0 httr_1.4.7 matrixStats_1.4.1
[52] igraph_2.1.1 reticulate_1.39.0 png_0.1-8
[55] SpatialExperiment_1.14.0 evaluate_1.0.1 knitr_1.49
[58] GenomicRanges_1.56.0 IRanges_2.38.0 rlang_1.1.4
[61] Rcpp_1.0.13-1 glue_1.8.0 DBI_1.2.3
[64] BiocGenerics_0.50.0 pkgload_1.3.4 rstudioapi_0.16.0
[67] jsonlite_1.8.9 R6_2.5.1 units_0.8-5
[70] MatrixGenerics_1.16.0 zlibbioc_1.50.0