# Load libraries
library(data.table)
library(devtools)
library(presto)
library(glmGamPoi)
library(sctransform)
library(Seurat)
library(tidyverse)
library(miQC)
library(SeuratWrappers)
library(flexmix)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(readxl)
library(fishpond)
library(Matrix)
library(speckle)
library(scater)
library(patchwork)
library(vctrs)
library(alevinQC)
library(harmony)
library(scDblFinder)
library(cellXY)
# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')3 Skin: Check patient sexes
3.1 Set up Seurat workspace
3.2 Load previously saved clustered object
merged.18279.skin.singlets <- readRDS("Skin_scRNA_Part2.rds")3.3 Infer sex per cell using chrX and chrY gene counts using cellXY
merged.18279.skin.singlets[['RNA']] <- JoinLayers(merged.18279.skin.singlets[['RNA']])
xyPredict <- classifySex(x=merged.18279.skin.singlets@assays$RNA$counts,genome="Hs", qc=F)Warning in asMethod(object): sparse->dense coercion: allocating vector of size
15.2 GiB
28cell/s are unable to be classified
due to an abundance of zeroes on X and Y chromosome genes
merged.18279.skin.singlets <- AddMetaData(merged.18279.skin.singlets,xyPredict$prediction,col.name="CellPredictedSex")3.4 Assign known sexes to each sample
knownSex <- as.data.frame(cbind("Sample" = unique(merged.18279.skin.singlets$Sample), "Sex" = c(rep("Male",2),rep("Male",2),rep("Female",2),rep("Male",2),rep("Female",2),rep("Male",2),rep("Female",1),rep("Male",2),rep("Male",2))))
knownSex Sample Sex
1 P101_Skin_Pre3rd_2.5mgIpi_RNA Male
2 P101_Skin_Post3rd_2.5mgIpi_RNA Male
3 P103_Skin_Pre3rd_2.5mgIpi_RNA Male
4 P103_Skin_Post3rd_2.5mgIpi_RNA Male
5 P104_Skin_Post3rd_2.5mgIpi_RNA Female
6 P104_Skin_Pre3rd_2.5mgIpi_RNA Female
7 P105_Skin_Post3rd_2.5mgIpi_RNA Male
8 P105_Skin_Pre3rd_2.5mgIpi_RNA Male
9 P106_Skin_Pre3rd_2.5mgIpi_RNA Female
10 P106_Skin_Post3rd_2.5mgIpi_RNA Female
11 P108_Skin_Pre3rd_5mgIpi_RNA Male
12 P108_Skin_Post3rd_5mgIpi_RNA Male
13 P109_Skin_Pre3rd_5mgIpi_RNA Female
14 P110_Skin_Pre3rd_5mgIpi_RNA Male
15 P110_Skin_Post3rd_5mgIpi_RNA Male
16 P111_Skin_Pre3rd_5mgIpi_RNA Male
17 P111_Skin_Post3rd_5mgIpi_RNA Male
3.5 Summarize cell-wise sex predictions per Sample and compare to known labels
If more than 80% of the individual cell sex predictions are consistent for a given Sample, we call that Sample as that sex, then match to known labels
# Show snippet first
rownames_to_column(merged.18279.skin.singlets@meta.data,var="bc") %>%
as_tibble() %>%
dplyr::select(bc,CellPredictedSex,Sample) %>%
group_by(Sample) %>%
summarize(nMale = sum(CellPredictedSex=="Male"),
nFemale = sum(CellPredictedSex=="Female"),
nCells = n()
)# A tibble: 17 × 4
Sample nMale nFemale nCells
<chr> <int> <int> <int>
1 P101_Skin_Post3rd_2.5mgIpi_RNA 1655 62 1718
2 P101_Skin_Pre3rd_2.5mgIpi_RNA 417 9 426
3 P103_Skin_Post3rd_2.5mgIpi_RNA 3436 278 3726
4 P103_Skin_Pre3rd_2.5mgIpi_RNA 782 36 818
5 P104_Skin_Post3rd_2.5mgIpi_RNA 957 4465 5434
6 P104_Skin_Pre3rd_2.5mgIpi_RNA 136 2956 3092
7 P105_Skin_Post3rd_2.5mgIpi_RNA 3213 215 3429
8 P105_Skin_Pre3rd_2.5mgIpi_RNA 2387 139 2526
9 P106_Skin_Post3rd_2.5mgIpi_RNA 249 1405 1655
10 P106_Skin_Pre3rd_2.5mgIpi_RNA 45 1076 1121
11 P108_Skin_Post3rd_5mgIpi_RNA 2589 100 2689
12 P108_Skin_Pre3rd_5mgIpi_RNA 906 31 937
13 P109_Skin_Pre3rd_5mgIpi_RNA 44 1331 1375
14 P110_Skin_Post3rd_5mgIpi_RNA 1408 72 1481
15 P110_Skin_Pre3rd_5mgIpi_RNA 1160 31 1191
16 P111_Skin_Post3rd_5mgIpi_RNA 1270 72 1342
17 P111_Skin_Pre3rd_5mgIpi_RNA 406 31 437
# Now determine sex per sample and count matching vs non-matching labels
rownames_to_column(merged.18279.skin.singlets@meta.data,var="bc") %>%
as_tibble() %>%
dplyr::select(bc,CellPredictedSex,Sample) %>%
group_by(Sample) %>%
summarize(nMale = sum(CellPredictedSex=="Male"),
nFemale = sum(CellPredictedSex=="Female"),
nCells = n()
) %>%
ungroup() %>%
mutate(PredictedSex = case_when(
nMale / nCells > 0.8 ~ "Male",
nFemale / nCells > 0.8 ~ "Female",
T ~ "other"
)
) %>%
inner_join(knownSex,by="Sample") %>%
summarize(nEqual = sum(PredictedSex==Sex), nNotEqual = sum(PredictedSex != Sex))# A tibble: 1 × 2
nEqual nNotEqual
<int> <int>
1 17 0
3.6 Since everything matches we now add Sex as a metadata label
knownSexByCell <- enframe(merged.18279.skin.singlets$Sample,name="bc",value="Sample") %>%
inner_join(knownSex,by="Sample") %>%
pull(Sex)
merged.18279.skin.singlets <- AddMetaData(merged.18279.skin.singlets,knownSexByCell,col.name="Sex")3.7 Plot UMAP of sex
DimPlot(merged.18279.skin.singlets,reduction = "umap.harmony", group.by = "Sex")
3.8 Save the updated object
saveRDS(merged.18279.skin.singlets,file="Skin_scRNA_Part3.rds")3.9 Get session info
sessionInfo()R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)
Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so; LAPACK version 3.9.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: America/New_York
tzcode source: system (glibc)
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] cellXY_0.99.0 scDblFinder_1.14.0
[3] harmony_1.2.0 alevinQC_1.16.1
[5] vctrs_0.6.5 patchwork_1.3.0
[7] scater_1.28.0 scuttle_1.10.3
[9] speckle_1.0.0 Matrix_1.6-4
[11] fishpond_2.6.2 readxl_1.4.3
[13] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[15] Biobase_2.60.0 GenomicRanges_1.52.1
[17] GenomeInfoDb_1.36.4 IRanges_2.34.1
[19] S4Vectors_0.38.2 BiocGenerics_0.46.0
[21] MatrixGenerics_1.12.3 matrixStats_1.2.0
[23] flexmix_2.3-19 lattice_0.22-5
[25] SeuratWrappers_0.3.19 miQC_1.8.0
[27] lubridate_1.9.3 forcats_1.0.0
[29] stringr_1.5.1 dplyr_1.1.4
[31] purrr_1.0.2 readr_2.1.5
[33] tidyr_1.3.1 tibble_3.2.1
[35] ggplot2_3.4.4 tidyverse_2.0.0
[37] Seurat_5.1.0 SeuratObject_5.0.2
[39] sp_2.1-3 sctransform_0.4.1
[41] glmGamPoi_1.12.2 presto_1.0.0
[43] Rcpp_1.0.12 devtools_2.4.5
[45] usethis_2.2.2 data.table_1.15.0
loaded via a namespace (and not attached):
[1] fs_1.6.3 spatstat.sparse_3.0-3
[3] bitops_1.0-7 httr_1.4.7
[5] RColorBrewer_1.1-3 profvis_0.3.8
[7] tools_4.3.1 utf8_1.2.4
[9] R6_2.5.1 DT_0.31
[11] lazyeval_0.2.2 uwot_0.1.16
[13] urlchecker_1.0.1 withr_3.0.0
[15] GGally_2.2.1 gridExtra_2.3
[17] progressr_0.14.0 cli_3.6.2
[19] spatstat.explore_3.2-6 fastDummies_1.7.3
[21] labeling_0.4.3 spatstat.data_3.0-4
[23] ggridges_0.5.6 pbapply_1.7-2
[25] Rsamtools_2.16.0 R.utils_2.12.3
[27] parallelly_1.37.0 sessioninfo_1.2.2
[29] limma_3.56.2 RSQLite_2.3.5
[31] BiocIO_1.10.0 generics_0.1.3
[33] gtools_3.9.5 ica_1.0-3
[35] spatstat.random_3.2-2 ggbeeswarm_0.7.2
[37] fansi_1.0.6 abind_1.4-5
[39] R.methodsS3_1.8.2 lifecycle_1.0.4
[41] yaml_2.3.8 edgeR_3.42.4
[43] recipes_1.0.10 Rtsne_0.17
[45] blob_1.2.4 grid_4.3.1
[47] dqrng_0.3.2 promises_1.2.1
[49] crayon_1.5.2 shinydashboard_0.7.2
[51] miniUI_0.1.1.1 beachmat_2.16.0
[53] cowplot_1.1.3 KEGGREST_1.40.1
[55] metapod_1.8.0 pillar_1.9.0
[57] knitr_1.45 rjson_0.2.21
[59] xgboost_1.7.7.1 future.apply_1.11.1
[61] codetools_0.2-19 leiden_0.4.3.1
[63] glue_1.7.0 remotes_2.4.2.1
[65] png_0.1-8 spam_2.10-0
[67] org.Mm.eg.db_3.18.0 cellranger_1.1.0
[69] gtable_0.3.4 cachem_1.0.8
[71] gower_1.0.1 xfun_0.42
[73] prodlim_2023.08.28 S4Arrays_1.2.0
[75] mime_0.12 survival_3.5-8
[77] timeDate_4032.109 iterators_1.0.14
[79] hardhat_1.3.1 lava_1.7.3
[81] statmod_1.5.0 bluster_1.10.0
[83] ellipsis_0.3.2 fitdistrplus_1.1-11
[85] ipred_0.9-14 ROCR_1.0-11
[87] nlme_3.1-164 bit64_4.0.5
[89] RcppAnnoy_0.0.22 irlba_2.3.5.1
[91] rpart_4.1.23 vipor_0.4.7
[93] KernSmooth_2.23-22 DBI_1.2.2
[95] colorspace_2.1-0 nnet_7.3-19
[97] tidyselect_1.2.0 bit_4.0.5
[99] compiler_4.3.1 BiocNeighbors_1.18.0
[101] DelayedArray_0.26.7 plotly_4.10.4
[103] rtracklayer_1.60.1 scales_1.3.0
[105] lmtest_0.9-40 digest_0.6.34
[107] goftest_1.2-3 spatstat.utils_3.0-4
[109] rmarkdown_2.25 XVector_0.40.0
[111] htmltools_0.5.7 pkgconfig_2.0.3
[113] sparseMatrixStats_1.12.2 fastmap_1.1.1
[115] rlang_1.1.3 htmlwidgets_1.6.4
[117] shiny_1.8.0 DelayedMatrixStats_1.22.6
[119] farver_2.1.1 zoo_1.8-12
[121] jsonlite_1.8.8 BiocParallel_1.34.2
[123] ModelMetrics_1.2.2.2 R.oo_1.26.0
[125] BiocSingular_1.16.0 RCurl_1.98-1.14
[127] magrittr_2.0.3 modeltools_0.2-23
[129] GenomeInfoDbData_1.2.10 dotCall64_1.1-1
[131] munsell_0.5.0 viridis_0.6.5
[133] reticulate_1.35.0 pROC_1.18.5
[135] stringi_1.8.3 zlibbioc_1.46.0
[137] MASS_7.3-60.0.1 org.Hs.eg.db_3.18.0
[139] plyr_1.8.9 pkgbuild_1.4.3
[141] ggstats_0.5.1 parallel_4.3.1
[143] listenv_0.9.1 ggrepel_0.9.5
[145] deldir_2.0-2 Biostrings_2.68.1
[147] splines_4.3.1 tensor_1.5
[149] hms_1.1.3 locfit_1.5-9.8
[151] igraph_2.0.2 spatstat.geom_3.2-8
[153] RcppHNSW_0.6.0 reshape2_1.4.4
[155] ScaledMatrix_1.8.1 pkgload_1.3.4
[157] XML_3.99-0.16.1 evaluate_0.23
[159] scran_1.28.2 BiocManager_1.30.22
[161] foreach_1.5.2 tzdb_0.4.0
[163] httpuv_1.6.14 RANN_2.6.1
[165] polyclip_1.10-6 future_1.33.1
[167] scattermore_1.2 rsvd_1.0.5
[169] xtable_1.8-4 restfulr_0.0.15
[171] svMisc_1.2.3 RSpectra_0.16-1
[173] later_1.3.2 class_7.3-22
[175] viridisLite_0.4.2 AnnotationDbi_1.64.1
[177] GenomicAlignments_1.36.0 memoise_2.0.1
[179] beeswarm_0.4.0 tximport_1.28.0
[181] cluster_2.1.6 timechange_0.3.0
[183] globals_0.16.2 caret_6.0-94