Run scrublet doublet detection for raw expression. Intended for single cell data

doScrubletDetect(
  gobject,
  feat_type = NULL,
  spat_unit = "cell",
  expression_values = "raw",
  expected_doublet_rate = 0.06,
  min_counts = 1,
  min_cells = 1,
  min_gene_variability_pctl = 85,
  n_prin_comps = 30,
  return_gobject = TRUE,
  seed = 1234
)

Arguments

gobject

giotto object containing expression data

feat_type

feature type

spat_unit

spatial unit

expression_values

expression values to use

expected_doublet_rate

expected transcriptomes that are doublets. 0.06 is from 10x Chromium guide.

min_counts

scrublet internal data filtering, min counts found to be considered a cell

min_cells

scrublet internal data filtering. min cells expressed to be considered a feat

min_gene_variability_pctl

scrublet internal PCA generation. highly variable gene percentile cutoff

n_prin_comps

number of PCs to use in PCA for detection

return_gobject

return as gobject if TRUE, data.frame with cell_ID if FALSE

seed

If a numeric is provided, then it will be used as a seed. If NULL, no seed will be set.

Value

if return_gobject = FALSE, a data.table cell_ID, doublet scores, and classifications are returned. If TRUE, that information is appended into the input giotto object's metadata and the giotto object is returned.

See also

This function wraps the python package scrublet doi:10.1016/j.cels.2018.11.005

Examples

# Should only be done with single cell data, but this is just a
# convenient example.
g <- GiottoData::loadGiottoMini("visium")
#> 1. read Giotto object
#> 2. read Giotto feature information
#> 3. read Giotto spatial information
#> 3.1 read Giotto spatial shape information
#> 3.2 read Giotto spatial centroid information
#> 3.3 read Giotto spatial overlap information
#> 4. read Giotto image information
#> python already initialized in this session
#>  active environment : '/usr/bin/python3'
#>  python version : 3.12
#> checking default envname 'giotto_env'
#> a system default python environment was found
#> Using python path:
#>  "/usr/bin/python3"

g <- doScrubletDetect(g)
#> Error: package 'scrublet' is not yet installed
#> 
#>  To install:
#> ## active python env: '/usr/bin/python3' 
#> ## python version: 3.12
#> ## restart session then use GiottoClass::set_giotto_python_path() if this is incorrect
#> reticulate::conda_install(envname = '/usr/bin/python3', packages = c('scrublet'), pip = TRUE)

pDataDT(g) # doublet_scores and doublet cols are added
#>                 cell_ID in_tissue nr_feats perc_feats total_expr leiden_clus
#>                  <char>     <int>    <int>      <num>      <num>       <num>
#>   1: AACTCGATGGCGCAGT-1         1      265   41.79811  1057.9308           2
#>   2: GGCTGGCTAGCTTAAA-1         1      279   44.00631  1064.7493           5
#>   3: GACGCCTGTTGCAGGG-1         1      219   34.54259   964.9294           2
#>   4: GAGGGCATCGCGTATC-1         1      294   46.37224  1142.7664           2
#>   5: TCAACACATTGGGTAA-1         1      261   41.16719  1063.3517           2
#>  ---                                                                        
#> 620: GGTAGTGCTCGCACCA-1         1      179   28.23344   768.4749           5
#> 621: AAGCTCGTGCCAAGTC-1         1      195   30.75710   756.0675           5
#> 622: TATTCAATTCTAATCC-1         1      247   38.95899   921.8264           5
#> 623: TTCAAAGTCTCTAGCC-1         1      384   60.56782   916.5929           6
#> 624: TTGAATATGGACTTTC-1         1      380   59.93691   912.3051           6
#>      custom_leiden
#>              <num>
#>   1:             4
#>   2:             3
#>   3:             3
#>   4:             3
#>   5:             3
#>  ---              
#> 620:             4
#> 621:             4
#> 622:             4
#> 623:             7
#> 624:             4
dimPlot2D(g, cell_color = "doublet_scores", color_as_factor = FALSE)
#> Error in plot_point_layer_ggplot(ggobject = pl, instrs = instructions(gobject),     annotated_DT_selected = annotated_DT_selected, annotated_DT_other = annotated_DT_other,     cell_color = cell_color, color_as_factor = color_as_factor,     cell_color_code = cell_color_code, cell_color_gradient = cell_color_gradient,     gradient_midpoint = gradient_midpoint, gradient_style = gradient_style,     gradient_limits = gradient_limits, select_cell_groups = select_cell_groups,     select_cells = select_cells, show_other_cells = show_other_cells,     other_cell_color = other_cell_color, other_point_size = other_point_size,     show_cluster_center = show_cluster_center, show_center_label = show_center_label,     center_point_size = center_point_size, center_point_border_col = center_point_border_col,     center_point_border_stroke = center_point_border_stroke,     label_size = label_size, label_fontface = label_fontface,     edge_alpha = edge_alpha, point_size = point_size, point_alpha = point_alpha,     point_border_col = point_border_col, point_border_stroke = point_border_stroke,     show_legend = show_legend): doublet_scores is not a color or a column name