4  Skin: Post-clustering QC and characterization

4.1 Set up Seurat workspace

# Load libraries
library(data.table)
library(devtools)
library(presto) 
library(glmGamPoi)
library(sctransform)
library(Seurat) 
library(tidyverse)
library(miQC)
library(SeuratWrappers)
library(flexmix)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(readxl)
library(fishpond)
library(Matrix)
library(speckle)
library(scater)
library(patchwork)
library(vctrs)
library(alevinQC)
library(harmony)
library(scDblFinder)
library(cellXY)

# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')

4.2 Load previously saved object

merged.18279.skin.singlets <- readRDS("Skin_scRNA_Part3.rds")

4.3 Set idents to preferred initial clustering resolution

Idents(merged.18279.skin.singlets) <- merged.18279.skin.singlets$RNA_snn_res.0.8
merged.18279.skin.singlets$seurat_clusters <- merged.18279.skin.singlets$RNA_snn_res.0.8
DimPlot(merged.18279.skin.singlets, reduction="umap.harmony", label = TRUE)

4.4 Plot QC

VlnPlot(merged.18279.skin.singlets, features = "nCount_RNA", group.by="seurat_clusters",pt.size=0) + NoLegend()

VlnPlot(merged.18279.skin.singlets, features = "nFeature_RNA", group.by="seurat_clusters",pt.size=0) + NoLegend()

VlnPlot(merged.18279.skin.singlets, features = "percent.mt", group.by="seurat_clusters",pt.size=0) + NoLegend()

4.5 Identify cursory marker genes of each cluster

Note layers were already joined in previous session

DefaultAssay(merged.18279.skin.singlets) <- "RNA"

vargenes <- presto::wilcoxauc(merged.18279.skin.singlets, 'seurat_clusters', seurat_assay = 'RNA')
top_vargenes <- top_markers(vargenes, n = 100, auc_min = 0.5, pct_in_min = 50, pct_out_max = 50)
top_vargenes
# A tibble: 100 × 28
    rank `0`   `1`   `10`  `11`  `12`  `13`  `14`  `15`  `16`  `17`  `18`  `19` 
   <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
 1     1 TYRO… IL7R  COL6… CNOT… DOCK4 STMN1 ENSG… LAMP3 HPGD  TM4S… CPVL  XCL1 
 2     2 BCL2… SPOC… DCN   SYNE2 RBM47 HMGB2 IL3RA FSCN1 TPSA… SPAR… WDFY4 TNFR…
 3     3 IL1RN CD3E  COL6… PARP8 FNDC… TYMS  IRF4  DAPP1 TPSB2 PECA… ENSG… XCL2 
 4     4 FCER… LTB   COL1… BCL1… MYO9B RRM2  P2RY6 MARC… CPA3  GNG11 HLA-… CTSW 
 5     5 IFI30 IL32  NNMT  SLC3… KYNU  MKI67 ENSG… NUB1  HPGDS CAV1  HLA-… SYTL3
 6     6 MARC… ICOS  CALD1 PRKCH CYRIA DUT   PLD4  CERS6 GATA2 RAMP2 C1or… CD69 
 7     7 PLAUR CD52  C1R   PPP1… ABCA1 PCLAF SLC1… CD83  KIT   A2M   DNAS… RIN3 
 8     8 S100… CD3D  SELE… RORA  DMXL2 H4C3  JCHA… CCR7  CTSG  PDLI… HDAC9 CD96 
 9     9 ENSG… CD2   CXCL… CAMK4 ATP1… MCM7  POLB  BASP1 HDC   ADGR… RUBC… CSF2 
10    10 PLEK  CD6   COL1… KLF12 FNIP2 KIF22 FCHS… CSF2… IL18… EMCN  CLEC… KLRB1
# ℹ 90 more rows
# ℹ 15 more variables: `2` <chr>, `20` <chr>, `21` <chr>, `22` <chr>,
#   `23` <chr>, `24` <chr>, `25` <chr>, `26` <chr>, `3` <chr>, `4` <chr>,
#   `5` <chr>, `6` <chr>, `7` <chr>, `8` <chr>, `9` <chr>

4.6 Plot genes from slide-tags preprint

Sourced from this preprint

goi <- c("PMEL","MLANA","CCL5","LEF1","FOXP3","PAX5","IGHG3","KCNMA1","ZNF366","CUX2","COL1A1","PLVAP")
VlnPlot(merged.18279.skin.singlets,features=goi,assay="RNA",layer="data",flip=T,sort=T,stack=T)

4.7 Plot other known markers

Sourced from this paper

tumor <- c("DCT","MLANA","MITF","PMEL","S100A1","TYR","APOC1")
endothelial <- c("PECAM1","VWF")
fibroblast <- c("COL3A1","COL1A1","COL1A2","LUM")
tcell <- c("FGFBP2","KLRD1","CD3E","CD3D","GZMB","XCL2","GZMH","CST7","GZMK","GZMA","IFNG","GNLY","CCL4","NKG7","CCL5","CD8A","CD8B","CTLA4","TNFRSF4","BATF","ITM2A")
mono <- c("LYZ","CD74","CD68")
bcell <- c("MS4A1","CD79A")

VlnPlot(merged.18279.skin.singlets,features=tumor,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.skin.singlets,features=endothelial,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.skin.singlets,features=fibroblast,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.skin.singlets,features=tcell,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.skin.singlets,features=mono,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  VlnPlot(merged.18279.skin.singlets,features=bcell,assay="RNA",layer="data",flip=T,sort=T,stack=T) +
  NoLegend()

4.8 Plot top markers identified and canonical genes as a dotplot

top_vargenes <- top_markers(vargenes, n = 5, auc_min = 0.5, pct_in_min = 50, pct_out_max = 50)
top_markers <- top_vargenes %>%
    select(-rank) %>% 
    unclass() %>% 
    stack() %>%
    pull(values) %>%
    unique() %>%
    .[!is.na(.)]

dotplotmarkers <- unique(c(top_markers,tumor,endothelial,fibroblast,tcell,mono,bcell))

# Compute aggregated expression values of these genes and cluster them to order the figure
rna <- AverageExpression(merged.18279.skin.singlets,assay="RNA",slot="data")
As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.
First group.by variable `ident` starts with a number, appending `g` to ensure valid variable names
This message is displayed once per session.
rna.sub <- rna$RNA[dotplotmarkers,]
cors.genes <- as.dist(1-cor(as.matrix(t(rna.sub)),method="pearson"))
hc.genes <- hclust(cors.genes)
dotplotmarkers.sorted <- rownames(rna.sub)[hc.genes$order]

# Plot
DotPlot(merged.18279.skin.singlets,features=dotplotmarkers.sorted,assay="RNA",cols=c("blue","red"),cluster.idents=T) + RotatedAxis()

4.9 Plot expression of key genes in UMAP space

FeaturePlot(merged.18279.skin.singlets, 
    reduction="umap.harmony", 
    features=c("CD8A","CD4","CTLA4","KLRC1","CD79A","PMEL","MLANA","LYZ","PECAM1","COL3A1","SFN","KRT19"),
    order = T,
    ncol = 2)

4.10 Get session info

sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] cellXY_0.99.0               scDblFinder_1.14.0         
 [3] harmony_1.2.0               alevinQC_1.16.1            
 [5] vctrs_0.6.5                 patchwork_1.3.0            
 [7] scater_1.28.0               scuttle_1.10.3             
 [9] speckle_1.0.0               Matrix_1.6-4               
[11] fishpond_2.6.2              readxl_1.4.3               
[13] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[15] Biobase_2.60.0              GenomicRanges_1.52.1       
[17] GenomeInfoDb_1.36.4         IRanges_2.34.1             
[19] S4Vectors_0.38.2            BiocGenerics_0.46.0        
[21] MatrixGenerics_1.12.3       matrixStats_1.2.0          
[23] flexmix_2.3-19              lattice_0.22-5             
[25] SeuratWrappers_0.3.19       miQC_1.8.0                 
[27] lubridate_1.9.3             forcats_1.0.0              
[29] stringr_1.5.1               dplyr_1.1.4                
[31] purrr_1.0.2                 readr_2.1.5                
[33] tidyr_1.3.1                 tibble_3.2.1               
[35] ggplot2_3.4.4               tidyverse_2.0.0            
[37] Seurat_5.1.0                SeuratObject_5.0.2         
[39] sp_2.1-3                    sctransform_0.4.1          
[41] glmGamPoi_1.12.2            presto_1.0.0               
[43] Rcpp_1.0.12                 devtools_2.4.5             
[45] usethis_2.2.2               data.table_1.15.0          

loaded via a namespace (and not attached):
  [1] fs_1.6.3                  spatstat.sparse_3.0-3    
  [3] bitops_1.0-7              httr_1.4.7               
  [5] RColorBrewer_1.1-3        profvis_0.3.8            
  [7] tools_4.3.1               utf8_1.2.4               
  [9] R6_2.5.1                  DT_0.31                  
 [11] lazyeval_0.2.2            uwot_0.1.16              
 [13] urlchecker_1.0.1          withr_3.0.0              
 [15] GGally_2.2.1              gridExtra_2.3            
 [17] progressr_0.14.0          cli_3.6.2                
 [19] spatstat.explore_3.2-6    fastDummies_1.7.3        
 [21] labeling_0.4.3            spatstat.data_3.0-4      
 [23] ggridges_0.5.6            pbapply_1.7-2            
 [25] Rsamtools_2.16.0          R.utils_2.12.3           
 [27] parallelly_1.37.0         sessioninfo_1.2.2        
 [29] limma_3.56.2              RSQLite_2.3.5            
 [31] BiocIO_1.10.0             generics_0.1.3           
 [33] gtools_3.9.5              ica_1.0-3                
 [35] spatstat.random_3.2-2     ggbeeswarm_0.7.2         
 [37] fansi_1.0.6               abind_1.4-5              
 [39] R.methodsS3_1.8.2         lifecycle_1.0.4          
 [41] yaml_2.3.8                edgeR_3.42.4             
 [43] Rtsne_0.17                blob_1.2.4               
 [45] grid_4.3.1                dqrng_0.3.2              
 [47] promises_1.2.1            crayon_1.5.2             
 [49] shinydashboard_0.7.2      miniUI_0.1.1.1           
 [51] beachmat_2.16.0           cowplot_1.1.3            
 [53] KEGGREST_1.40.1           metapod_1.8.0            
 [55] pillar_1.9.0              knitr_1.45               
 [57] rjson_0.2.21              xgboost_1.7.7.1          
 [59] future.apply_1.11.1       codetools_0.2-19         
 [61] leiden_0.4.3.1            glue_1.7.0               
 [63] remotes_2.4.2.1           png_0.1-8                
 [65] spam_2.10-0               org.Mm.eg.db_3.18.0      
 [67] cellranger_1.1.0          gtable_0.3.4             
 [69] cachem_1.0.8              xfun_0.42                
 [71] S4Arrays_1.2.0            mime_0.12                
 [73] survival_3.5-8            statmod_1.5.0            
 [75] bluster_1.10.0            ellipsis_0.3.2           
 [77] fitdistrplus_1.1-11       ROCR_1.0-11              
 [79] nlme_3.1-164              bit64_4.0.5              
 [81] RcppAnnoy_0.0.22          irlba_2.3.5.1            
 [83] vipor_0.4.7               KernSmooth_2.23-22       
 [85] DBI_1.2.2                 colorspace_2.1-0         
 [87] nnet_7.3-19               ggrastr_1.0.2            
 [89] tidyselect_1.2.0          bit_4.0.5                
 [91] compiler_4.3.1            BiocNeighbors_1.18.0     
 [93] DelayedArray_0.26.7       plotly_4.10.4            
 [95] rtracklayer_1.60.1        scales_1.3.0             
 [97] lmtest_0.9-40             digest_0.6.34            
 [99] goftest_1.2-3             spatstat.utils_3.0-4     
[101] rmarkdown_2.25            XVector_0.40.0           
[103] htmltools_0.5.7           pkgconfig_2.0.3          
[105] sparseMatrixStats_1.12.2  fastmap_1.1.1            
[107] rlang_1.1.3               htmlwidgets_1.6.4        
[109] shiny_1.8.0               DelayedMatrixStats_1.22.6
[111] farver_2.1.1              zoo_1.8-12               
[113] jsonlite_1.8.8            BiocParallel_1.34.2      
[115] R.oo_1.26.0               BiocSingular_1.16.0      
[117] RCurl_1.98-1.14           magrittr_2.0.3           
[119] modeltools_0.2-23         GenomeInfoDbData_1.2.10  
[121] dotCall64_1.1-1           munsell_0.5.0            
[123] viridis_0.6.5             reticulate_1.35.0        
[125] stringi_1.8.3             zlibbioc_1.46.0          
[127] MASS_7.3-60.0.1           org.Hs.eg.db_3.18.0      
[129] plyr_1.8.9                pkgbuild_1.4.3           
[131] ggstats_0.5.1             parallel_4.3.1           
[133] listenv_0.9.1             ggrepel_0.9.5            
[135] deldir_2.0-2              Biostrings_2.68.1        
[137] splines_4.3.1             tensor_1.5               
[139] hms_1.1.3                 locfit_1.5-9.8           
[141] igraph_2.0.2              spatstat.geom_3.2-8      
[143] RcppHNSW_0.6.0            reshape2_1.4.4           
[145] ScaledMatrix_1.8.1        pkgload_1.3.4            
[147] XML_3.99-0.16.1           evaluate_0.23            
[149] scran_1.28.2              BiocManager_1.30.22      
[151] tzdb_0.4.0                httpuv_1.6.14            
[153] RANN_2.6.1                polyclip_1.10-6          
[155] future_1.33.1             scattermore_1.2          
[157] rsvd_1.0.5                xtable_1.8-4             
[159] restfulr_0.0.15           svMisc_1.2.3             
[161] RSpectra_0.16-1           later_1.3.2              
[163] viridisLite_0.4.2         AnnotationDbi_1.64.1     
[165] GenomicAlignments_1.36.0  memoise_2.0.1            
[167] beeswarm_0.4.0            tximport_1.28.0          
[169] cluster_2.1.6             timechange_0.3.0         
[171] globals_0.16.2