library("dplyr")
library("Seurat")
library("knitr")
library("ggplot2")
library("BiocManager")
library("here")
#BiocManager::install("EnhancedVolcano")
library("EnhancedVolcano") #volcano plot
#install.packages('DESeq2') #for DEG
library("DESeq2")
library("tidyverse") #tidy up data
library("styler") #tidy up data
library("scCustomize") #for color scales)
library("qs") #for color scales)


if (!require("kableExtra")) {install.packages("kableExtra"); require("kableExtra")} # for color brewer
if (!require("RColorBrewer")) {install.packages("RColorBrewer"); require("RColorBrewer")} # for color brewer
if (!require("sctransform")) {install.packages("sctransform"); require("sctransform")} # for data normalization
if (!require("glmGamPoi")) {BiocManager::install('glmGamPoi'); require("glmGamPoi")} # for data normalization, sctransform
if (!require("cowplot")) {install.packages("cowplot"); require("cowplot")} # for figure layout
if (!require("patchwork")) {install.packages("patchwork"); require("patchwork")} # for figure patching
if (!require("openxlsx")) {install.packages("openxlsx"); require("openxlsx")} # to save .xlsx files

# install.packages("styler")

set.seed(12345)
# here()

1 Welcome

Welcome to the Single-Cell Omics Research and Education Club!

If this is your time to the club, I want to extend and extra-special welcome to you!

I’m Jonathan Nelson, an Assistant Professor at the University of Southern California. I’m a wet scientist turned wet+dry scientist. I’ve been working with single-cell RNAseq data for the past 5 years and I’m excited to share what I’ve learned with you.

1.1 SCORE Values

1.1.1 Learning

We believe that bioinformatics is a constantly evolving field, and that ongoing learning and professional development is essential to staying up-to-date. We encourage members to share their knowledge and experiences with each other, and to seek out opportunities for continued learning.

1.1.2 Accessibility

We believe that access to bioinformatics support should be available to everyone. We strive to create a welcoming and inclusive environment where all members can feel comfortable asking for help and contributing to the group.

1.1.3 Collaboration

We believe that working together is key to achieving success in bioinformatics. We value the diversity of perspectives and backgrounds that each member brings, and we encourage open communication and the sharing of ideas.

1.1.4 Integrity

We believe in conducting ourselves with honesty and professionalism in all our interactions. We hold ourselves to high ethical standards and respect the privacy and confidentiality of all members.

1.1.5 Empathy

We believe in approaching each other with empathy and kindness. We understand that bioinformatics can be a challenging and sometimes frustrating field, and we strive to support each other through these difficulties.

1.2 Context and Expectations

I know a lot of this has been going on in the background for everyone and I wanted to bring it to the forefront. My expectation is that we have 4-6 more meetings about spatial transcriptomics…and then we’ll re-evaluate.

Email me you would like me to add anyone:

Today’s code (this html file) will be posted to the SCORE website (https://usckrc.github.io/website/score.html)

2 The Agenda!

2.1 Music and Memes

2.2 Main Theme: Xenium Annotation



3 Music and Memes

3.1 This Months Coding Music

3.1.2 SeeSaw Hits

3.2 The Memes

3.2.1 Meme 1/2: The REAL Nobel Prize

3.2.2 Meme 1: THE Pain

3.2.3 Meme 2: INSIGHT



4 Main Theme: Xenium Annotation

4.1 Series on Spatial Transcriptomics

4.1.0.1 See SCORE #6 for Background on Spatial Transcriptomics

https://usckrc.github.io/website/SCORE_6__LZ_ST_ppt.pdf

4.1.0.2 See SCORE #7 for Starting with a Xenium Dataset

https://usckrc.github.io/website/SCORE_7.html

4.1.1 Load Xenium Object

https://satijalab.org/seurat/articles/seurat5_spatial_vignette_2#mouse-brain-10x-genomics-xenium-in-situ

xenium.obj <- LoadXenium(here("output-XETG00402__0054800__Region_1__20250822__221946"), fov = "fov")

xenium.obj

head(xenium.obj@meta.data)

4.1.1.1 Visualize Xenium object

Boundaries(xenium.obj[["fov"]])

ImageDimPlot(xenium.obj, cols = "polychrome", size = 0.75)

4.1.1.2 Filtering Out Genes with No Data

VlnPlot(xenium.obj, features = c("nFeature_Xenium", "nCount_Xenium"), ncol = 2, pt.size = 0)

# remove cells with 0 counts
xenium.obj <- subset(xenium.obj, subset = nCount_Xenium > 0)

VlnPlot(xenium.obj, features = c("nFeature_Xenium", "nCount_Xenium"), ncol = 2, pt.size = 0)

VlnPlot(xenium.obj, features = c("nFeature_Xenium", "nCount_Xenium"), ncol = 2)

4.1.2 Basic Processing

4.1.2.1 PCA Dims = 50

4.1.2.2 Res = 2

4.1.2.2.1 This takes FOREVER
4.1.2.2.2 Especially the SCTransform
4.1.2.2.2.1 Remember: 150K Cells!
xenium.obj <- SCTransform(xenium.obj, assay = "Xenium")
xenium.obj <- RunPCA(xenium.obj, npcs = 50, features = rownames(xenium.obj))
xenium.obj <- RunUMAP(xenium.obj, dims = 1:50)
xenium.obj <- FindNeighbors(xenium.obj, reduction = "pca", dims = 1:50)
xenium.obj <- FindClusters(xenium.obj, resolution = 2)

DimPlot(xenium.obj, group.by = "SCT_snn_res.2")

4.2 Real Start

4.2.1 I save/load a .qs object after basic processing

xenium.obj <- qread(here("Week 8 Xenium Annotation", "region1_xenium.qs"))

DimPlot(xenium.obj, group.by = "SCT_snn_res.2")

4.3 Now What?!?!?

4.3.1 How do you annotate this object? What would you want to know beforehand?

4.3.2 Insight from snRNAseq analysis from similar tissue

4.3.2.1 I’m still suprised at how similar this looks to a dissociated snRNAseq dataset

4.3.3 I started with what I knew about dissociated snRNaseq from this tissue

4.3.3.1 Both from the literature and my own experience and background knowledge about the kidney

4.3.4 I applied my snRNaseq guide markers

https://usckrc.github.io/website/scRNAseq_guide.html

markers.to.plot1 <- c("Lrp2",         # PTS1, PTS2, PTS3
                      "Slc5a12",      # PTS1
                      "Slc13a3",      # PTS2
                      "Slc16a9",      # PTS3
                      "Epha7",        # DTL
                      "Cryab",        # DTL
                      "Slc12a1",      # TALA, TALB, MD
                      "Cldn16",       # TALA
                      "Cldn10",       # TALB
                      "Nos1",         # MD
                      "Slc12a3",      # DCT1, DCT2
                      "Egf",          # DCT1
                      "Slc8a1",       # CNT, DCT2
                      "Aqp2",         # PC
                      "Slc4a1",       # ICA
                      "Kit",          # ICA
                      "Slc26a4",      # ICB
                      "Nphs1",        # PODO
                      "Ncam1",        # PEC
                      "Upk1b",        # URO
                      "Flt1",         # EC
                      "Emcn",         # EC
                      "Lyve1",        # LYMPH
                      "Kdr",          # LYMPH
                      "Pdgfra",       # FIB
                      "Pdgfrb",       # FIB, MES, VSMC
                      "Piezo2",       # MES
                      "Acta2",        # VSMC
                      "Ptprc",        # LYMPHO, MACRO
                      "Skap1",        # LYMPHO
                      "Cd74"          # MACRO
)

p1 <- DotPlot(xenium.obj,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  coord_flip() +
  labs(x = "Gene Features", y = "Clusters")

p1

4.3.5 Annotation and Leveling

xenium.obj@meta.data <- xenium.obj@meta.data %>%
  mutate(subclass = dplyr::case_when(
    seurat_clusters == 0  ~ "EC",
    seurat_clusters == 1  ~ "PTS2",
    seurat_clusters == 2  ~ "PTS3",
    seurat_clusters == 3  ~ "PTS1",
    seurat_clusters == 4  ~ "TALA",
    seurat_clusters == 5  ~ "PTS2",
    seurat_clusters == 6  ~ "PTS2",
    seurat_clusters == 7  ~ "PTS2",
    seurat_clusters == 8  ~ "PTS1",
    seurat_clusters == 9  ~ "TALA",
    seurat_clusters == 10 ~ "FIB",
    seurat_clusters == 11 ~ "PTS1",
    seurat_clusters == 12 ~ "MACRO",
    seurat_clusters == 13 ~ "EC",
    seurat_clusters == 14 ~ "TALB",
    seurat_clusters == 15 ~ "FIB",
    seurat_clusters == 16 ~ "PTS2",
    seurat_clusters == 17 ~ "PC",
    seurat_clusters == 18 ~ "PTS3",
    seurat_clusters == 19 ~ "DCT1",
    seurat_clusters == 20 ~ "PC",
    seurat_clusters == 21 ~ "VSMC",
    seurat_clusters == 22 ~ "ICA",
    seurat_clusters == 23 ~ "CNT",
    seurat_clusters == 24 ~ "PTS1",
    seurat_clusters == 25 ~ "FIB",
    seurat_clusters == 26 ~ "TALA",
    seurat_clusters == 27 ~ "DTL",
    seurat_clusters == 28 ~ "MES",
    seurat_clusters == 29 ~ "PTS2",
    seurat_clusters == 30 ~ "PTS1",
    seurat_clusters == 31 ~ "EC",
    seurat_clusters == 32 ~ "DCT1",
    seurat_clusters == 33 ~ "DTL",
    seurat_clusters == 34 ~ "EC",
    seurat_clusters == 35 ~ "DTL",
    seurat_clusters == 36 ~ "ICB",
    seurat_clusters == 37 ~ "PODO",
    seurat_clusters == 38 ~ "PTS2",
    seurat_clusters == 39 ~ "X1",
    seurat_clusters == 40 ~ "URO",
    seurat_clusters == 41 ~ "DCT2",
    seurat_clusters == 42 ~ "EC",
    seurat_clusters == 43 ~ "EC",
    seurat_clusters == 44 ~ "VSMC",
    seurat_clusters == 45 ~ "FIB",
    seurat_clusters == 46 ~ "PC",
    seurat_clusters == 47 ~ "MACRO",
    seurat_clusters == 48 ~ "LYMPHO",
    seurat_clusters == 49 ~ "EC",
    seurat_clusters == 50 ~ "PTS2",
    seurat_clusters == 51 ~ "MD",
    seurat_clusters == 52 ~ "VSMC",
    TRUE ~ NA_character_
  ))

subclass_order <- c("PTS1", "PTS2", "PTS3", "DTL", "TALA", "TALB", "MD", "DCT1", "DCT2", "CNT", "PC", "ICA", "ICB", "PODO", "PEC", "URO", "EC", "FIB", "MES", "VSMC", "LYMPHO", "MACRO", "X1")

xenium.obj@meta.data$subclass <- factor(xenium.obj@meta.data$subclass, levels = subclass_order)

# Change the identities to align with new subclass names and make umap

Idents(xenium.obj) <- xenium.obj@meta.data$subclass

DimPlot(xenium.obj, reduction = "umap", label = TRUE, label.size = 4) +  
  theme(plot.title = element_text(hjust = 0.5)) +
  ggtitle(paste0("Subclass Annotation for Xenium"))

p2 <- DotPlot(xenium.obj,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  coord_flip() +
  labs(x = "Gene Features", y = "Clusters")

p2

4.4 Spatial vs. snRNAseq

4.4.1 Looks pretty similar…Right?!?!?

4.5 Visulaizing Annotation on Tissue

# Get cell IDs and cluster assignments
df_export <- data.frame(
  cell_id = Cells(xenium.obj),
  group   = xenium.obj@meta.data$subclass
) %>%
  tibble::rownames_to_column("row") %>%
  select(-row)

# Save as CSV
write.csv(df_export, "cellid_subclass_cluster_export.csv", row.names = FALSE)

4.6 Spatial Informed Annotation

4.6.1 Is there additional information that we could get from the Xenium dataset?

4.6.2 Lessons from Endothelial Cells

4.6.2.1 EC Populations within the Tissue

4.6.2.2 Clear Spatial Distribution of EC Populations (That Make Sense)

4.6.2.2.1 Cortical
4.6.2.2.2 Medullary
4.6.2.2.3 Glomerular
4.6.2.2.4 Large Vessel

4.6.2.3 Were these the best markers that I could be using for annotation?

4.6.2.4 I went back to the Res = 2 populations

p1

4.6.2.5 Did FindAllMarkers and Analyzed the Top Markers for each cluster

df <- FindAllMarkers(xenium.obj, only.pos = TRUE, min.pct = .25)

# Order by avg_log2FC
all_markers <- df %>%
  arrange(desc(avg_log2FC)) %>%
  select(gene, everything())

# Split by cluster (ident column)
marker_list <- split(all_markers, all_markers$cluster)

# Sort names alphanumerically
sorted_cluster_names <- names(marker_list)[order(as.numeric(names(marker_list)))]

# Create a workbook
wb <- createWorkbook()

# Add each cluster as a new worksheet
for (cluster_name in sorted_cluster_names) {
  addWorksheet(wb, sheetName = cluster_name)
  writeData(wb, sheet = cluster_name, marker_list[[cluster_name]])
}

date <- format(Sys.Date(), "%Y%m%d")

# Save workbook
saveWorkbook(wb, here(paste0(date, "_", "FindAllMarkers_By_Cluster.xlsx")), overwrite = TRUE)

4.6.2.6 I asked: How far down the DEG list do I go before I find the identifying marker gene?

4.6.2.7 Some clusters had better genes that I wasn’t using as identifying markers.

4.6.2.8 Some clusters had clear rare population identities

4.6.2.8.1 Endothelial Cells
4.6.2.8.2 Thin Ascending Limb
4.6.2.8.3 Thin Decending Limb
4.6.2.8.4 Renin Cells

4.6.2.9 What’s the difference for me in the Xenium Dataset

4.6.2.9.1 Tiny clusters aren’t random (by chance)
4.6.2.9.2 I can visually confirm their pattern within tissue

4.6.3 Currently work on creating a different annotation guide for Xenium

4.6.4 Need to go back to a snRNAseq dataset and see how well Spatial Gene Marker List works

4.7 Questions?

4.8 Community Questions

Do you have a coding problem that you’d like some support on?
Do you have a topic you’d like covered at a future meeting?

Email me:

4.9 Upcoming Schedule

4.10 Brainstorming Upcoming Topics

4.10.1 Potential Future Topics on Xenium Analysis

4.10.1.1 Spatial Cell Segmentation Algorithms

4.10.1.2 Label Transfer between snRNAseq and Xenium

4.10.1.3 Integration of Xenium and snRNAseq Datasets

4.10.1.4 Comparing Xenium and snRNAseq DEG Analysis

5 Session Info

sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] openxlsx_4.2.8              patchwork_1.3.2            
##  [3] cowplot_1.2.0               glmGamPoi_1.20.0           
##  [5] sctransform_0.4.2           RColorBrewer_1.1-3         
##  [7] kableExtra_1.4.0            qs_0.27.3                  
##  [9] scCustomize_3.2.0           styler_1.10.3              
## [11] lubridate_1.9.4             forcats_1.0.0              
## [13] stringr_1.5.1               purrr_1.1.0                
## [15] readr_2.1.5                 tidyr_1.3.1                
## [17] tibble_3.3.0                tidyverse_2.0.0            
## [19] DESeq2_1.48.2               SummarizedExperiment_1.38.1
## [21] Biobase_2.68.0              MatrixGenerics_1.20.0      
## [23] matrixStats_1.5.0           GenomicRanges_1.60.0       
## [25] GenomeInfoDb_1.44.2         IRanges_2.42.0             
## [27] S4Vectors_0.46.0            BiocGenerics_0.54.0        
## [29] generics_0.1.4              EnhancedVolcano_1.26.0     
## [31] ggrepel_0.9.6               here_1.0.1                 
## [33] BiocManager_1.30.26         ggplot2_3.5.2              
## [35] knitr_1.50                  Seurat_5.3.0               
## [37] SeuratObject_5.2.0          sp_2.2-0                   
## [39] dplyr_1.1.4                
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.22        splines_4.5.1           later_1.4.4            
##   [4] R.oo_1.27.1             polyclip_1.10-7         janitor_2.2.1          
##   [7] fastDummies_1.7.5       lifecycle_1.0.4         rprojroot_2.1.1        
##  [10] globals_0.18.0          lattice_0.22-7          MASS_7.3-65            
##  [13] magrittr_2.0.3          plotly_4.11.0           sass_0.4.10            
##  [16] rmarkdown_2.29          jquerylib_0.1.4         yaml_2.3.10            
##  [19] httpuv_1.6.16           zip_2.3.3               spam_2.11-1            
##  [22] spatstat.sparse_3.1-0   reticulate_1.43.0       pbapply_1.7-4          
##  [25] abind_1.4-8             Rtsne_0.17              R.cache_0.17.0         
##  [28] R.utils_2.13.0          circlize_0.4.16         GenomeInfoDbData_1.2.14
##  [31] irlba_2.3.5.1           listenv_0.9.1           spatstat.utils_3.1-5   
##  [34] goftest_1.2-3           RSpectra_0.16-2         spatstat.random_3.4-1  
##  [37] fitdistrplus_1.2-4      parallelly_1.45.1       svglite_2.2.1          
##  [40] codetools_0.2-20        DelayedArray_0.34.1     xml2_1.4.0             
##  [43] RApiSerialize_0.1.4     shape_1.4.6.1           tidyselect_1.2.1       
##  [46] UCSC.utils_1.4.0        farver_2.1.2            spatstat.explore_3.5-2 
##  [49] jsonlite_2.0.0          progressr_0.15.1        ggridges_0.5.7         
##  [52] survival_3.8-3          systemfonts_1.2.3       tools_4.5.1            
##  [55] ica_1.0-3               Rcpp_1.1.0              glue_1.8.0             
##  [58] gridExtra_2.3           SparseArray_1.8.1       xfun_0.53              
##  [61] withr_3.0.2             fastmap_1.2.0           digest_0.6.37          
##  [64] timechange_0.3.0        R6_2.6.1                mime_0.13              
##  [67] textshaping_1.0.3       ggprism_1.0.7           colorspace_2.1-1       
##  [70] scattermore_1.2         tensor_1.5.1            spatstat.data_3.1-8    
##  [73] R.methodsS3_1.8.2       data.table_1.17.8       httr_1.4.7             
##  [76] htmlwidgets_1.6.4       S4Arrays_1.8.1          uwot_0.2.3             
##  [79] pkgconfig_2.0.3         gtable_0.3.6            lmtest_0.9-40          
##  [82] XVector_0.48.0          htmltools_0.5.8.1       dotCall64_1.2          
##  [85] scales_1.4.0            png_0.1-8               snakecase_0.11.1       
##  [88] spatstat.univar_3.1-4   rstudioapi_0.17.1       tzdb_0.5.0             
##  [91] reshape2_1.4.4          nlme_3.1-168            GlobalOptions_0.1.2    
##  [94] cachem_1.1.0            zoo_1.8-14              KernSmooth_2.23-26     
##  [97] parallel_4.5.1          miniUI_0.1.2            vipor_0.4.7            
## [100] ggrastr_1.0.2           pillar_1.11.0           grid_4.5.1             
## [103] vctrs_0.6.5             RANN_2.6.2              promises_1.3.3         
## [106] stringfish_0.17.0       beachmat_2.24.0         xtable_1.8-4           
## [109] cluster_2.1.8.1         paletteer_1.6.0         beeswarm_0.4.0         
## [112] evaluate_1.0.5          cli_3.6.5               locfit_1.5-9.12        
## [115] compiler_4.5.1          rlang_1.1.6             crayon_1.5.3           
## [118] future.apply_1.20.0     labeling_0.4.3          rematch2_2.1.2         
## [121] plyr_1.8.9              ggbeeswarm_0.7.2        stringi_1.8.7          
## [124] viridisLite_0.4.2       deldir_2.0-4            BiocParallel_1.42.2    
## [127] lazyeval_0.2.2          spatstat.geom_3.5-0     Matrix_1.7-3           
## [130] RcppHNSW_0.6.0          hms_1.1.3               future_1.67.0          
## [133] shiny_1.11.1            ROCR_1.0-11             igraph_2.1.4           
## [136] RcppParallel_5.1.11-1   bslib_0.9.0