1 Introduction

This vignette demonstrates how to use a dbMatrix within a Giotto Object. The dbMatrix is a database-backed matrix that can be used to store large matrices in a database. This allows for efficient storage and retrieval of large matrices and enables efficiently working with larger-than-memory cell count matrices.

2 Set up Giotto

# Ensure Giotto Suite is installed.
if(!"Giotto" %in% installed.packages()) {
  devtools::install_github("drieslab/Giotto")
}

# Ensure GiottoData, a small, helper module for tutorials, is installed.
if(!"GiottoData" %in% installed.packages()) {
  devtools::install_github("drieslab/GiottoData")
}

library(Giotto)
library(GiottoData)

# Ensure the Python environment for Giotto has been installed.
genv_exists = checkGiottoEnvironment()
if(!genv_exists){
  # The following command need only be run once to install the Giotto environment.
  installGiottoEnvironment()
}

3 Create Giotto object with dbMatrix

# Get test dataset from Giotto Data package
visium = GiottoData::loadGiottoMini(dataset = "visium")

# Extract the cell expression matrix as a test dataset
dgc = getExpression(visium, output = "matrix")

# Create a DBI connection object
con = DBI::dbConnect(duckb::duckdb(), ":memory:")

# Create a dbSparseMatrix using the dbMatrix constructor function
dbsm = dbMatrix::dbMatrix(value = dgc, 
                          con = con, 
                          name = 'dgc', 
                          class = "dbSparseMatrix",
                          overwrite = TRUE)

# Create Giotto exprObj with the dbMatrix
expObj_db = createExprObj(expression_data = dbsm, 
                          expression_matrix_class = 'dbSparseMatrix', 
                          name = 'raw')

# Create the Giotto object consisting of only the cell count matrix
gobject_db = createGiottoObject(expression = expObj_db)

4 Preprocess Giotto object with dbMatrix

# Perform filtering 
gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell",
                                   feat_type = "rna",
                                   expression_values = "raw")

# Perform library normalization and scaling
gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered,
                                      spat_unit = 'cell',
                                      feat_type = 'rna',
                                      expression_values = 'raw',
                                      library_size_norm = FALSE,
                                      log_norm = FALSE,
                                      scale_feats = TRUE,
                                      scale_cells = TRUE)