Identify the NeMO dataset

Log in to your Terra bio account and go to the left menu, select Library > Datasets

On the datasets page, scroll down until you find the NeMO database

Load the dataset to a terra workspace

By clicking on the Browse data button, you will be re-directed to the NeMO website. Use the filters menu to select the Slide-seq technology, then chose a sample to download and ddd the file to the cart.

When selecting the sample, you will see multiple files available to download, including the fasta files. Uncheck the fasta files boxes and keep only the expression.mex.tar.gz file.

Click the Download button and select the option “Export to terra”.

You will be directed back to your terra bio account. Select the Workspace to add your dataset.

You will find the new file under the Data tab > file section.

Open the dataset

Scroll to the right and locate the url of the file, it should look like https://data.nemoarchive.org/biccn/grant/rf1_macosko/macosko/spatial_transcriptome/cellgroup/Slide-seq/mouse/processed/counts/2022-02-11_Puck_211013_01.matched.digital_expression.mex.tar.gz.

Open an cloud environment, either using Jupyter notebooks or RStudio. Open the terminal and load the file to your session by running the command wget <file url>. Uncompress the file, you will get a folder with three files:

Use these files to create a Giotto object and start running the analysis.

Run the analysis

You can use the following Giotto pipeline as an example. The sample 2020-12-19_Puck_201112_26 was used for running this tutorial.

Download data

Run the following commands in the terminal:

  • Get the expression data

wget https://data.nemoarchive.org/biccn/grant/rf1_macosko/macosko/spatial_transcriptome/cellgroup/Slide-seq/mouse/processed/counts/2020-12-19_Puck_201112_26.matched.digital_expression.mex.tar.gz

Unzip the file running:

tar -xf 2020-12-19_Puck_201112_26.matched.digital_expression.mex.tar.gz

  • Get the spatial coordinates

wget https://data.nemoarchive.org/biccn/grant/rf1_macosko/macosko/spatial_transcriptome/cellgroup/Slide-seq/mouse/processed/other/2020-12-19_Puck_201112_26.BeadLocationsForR.csv.tar

Unzip the file running:

tar -xf 2020-12-19_Puck_201112_26.BeadLocationsForR.csv.tar

Pre-processing

Read the expression files and create the expression matrix.

datadir <- "2020-12-19_Puck_201112_26.matched.digital_expression"

barcodes <- data.table::fread(fs::path(datadir, "barcodes.tsv"),
                              header = FALSE)
features <- data.table::fread(fs::path(datadir, "features.tsv"),
                              header = FALSE)
expression_matrix <- data.table::fread(fs::path(datadir, "matrix.mtx"),
                                       skip = 2)

colnames(barcodes) <- "cell_ID"
colnames(features) <- "feat_ID"
colnames(expression_matrix) <- c("feat_ID", "cell_ID", "value")

expression_matrix <- reshape2::dcast(data = expression_matrix, 
                                     formula = feat_ID~cell_ID,
                                     fill = 0)

expression_matrix <- expression_matrix[,-1]
colnames(expression_matrix) <- barcodes$cell_ID
rownames(expression_matrix) <- features$feat_ID

# save the expression matrix
data.table::fwrite(expression_matrix, "expression_matrix.csv",
                   sep = ",", col.names = TRUE, row.names = TRUE)

Read the spatial coordinates file and filter the cell IDs.

spatial_locs <- data.table::fread("2020-12-19_Puck_201112_26.BeadLocationsForR/2020-12-19_Puck_201112_26.BeadLocationsForR.csv.gz")
spatial_locs <- as.data.frame(spatial_locs)
spatial_locs <- spatial_locs[spatial_locs$barcodes %in% barcodes$cell_ID,]

# save the filtered spatial coordinates file
data.table::fwrite(spatial_locs, "spatial_locs.csv",
                   sep = ",", col.names = TRUE, row.names = FALSE)

Load package

Check that the Giotto environment is available, if not, install it.

Create instructions

instructions <- createGiottoInstructions(save_plot = TRUE,
                                         save_dir = "results")

Create Giotto object

giotto_object <- createGiottoObject(expression = "expression_matrix.csv",
                                    spatial_locs = "spatial_locs.csv",
                                    instructions = instructions)
spatPlot2D(giotto_object,
           point_size = 0.5)

QC

filterDistributions(gobject = giotto_object, 
                    detection = "cells",
                    nr_bins = 100)

filterDistributions(gobject = giotto_object, 
                    detection = "feats",
                    nr_bins = 100)

Filtering

giotto_object <- filterGiotto(giotto_object,
                              min_det_feats_per_cell = 10,
                              feat_det_in_min_cells = 10)

Normalization

giotto_object <- normalizeGiotto(giotto_object)

Add statistics

giotto_object <- addStatistics(giotto_object)

HVF

giotto_object <- calculateHVF(giotto_object)

Dimension reduction

giotto_object <- runPCA(giotto_object)

screePlot(giotto_object, ncp = 30)

Clustering

giotto_object <- runUMAP(giotto_object,
                         dimensions_to_use = 1:10)

giotto_object <- createNearestNetwork(giotto_object)

giotto_object <- doLeidenCluster(giotto_object,
                                 resolution = 1)

Plot

plotPCA(giotto_object,
        cell_color = "leiden_clus",
        point_size = 1)

plotUMAP(giotto_object,
         cell_color = "leiden_clus",
         point_size = 1)

spatPlot2D(giotto_object,
           cell_color = "leiden_clus",
           point_size = 1)