3  Skin: Check patient sexes

3.1 Set up Seurat workspace

# Load libraries
library(data.table)
library(devtools)
library(presto)
library(glmGamPoi)
library(sctransform)
library(Seurat) 
library(tidyverse)
library(miQC)   
library(SeuratWrappers)
library(flexmix)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(readxl)
library(fishpond)
library(Matrix)
library(speckle)
library(scater)
library(patchwork)
library(vctrs)
library(alevinQC)
library(harmony)
library(scDblFinder)
library(cellXY)

# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')

3.2 Load previously saved clustered object

merged.18279.skin.singlets <- readRDS("Skin_scRNA_Part2.rds")

3.3 Infer sex per cell using chrX and chrY gene counts using cellXY

merged.18279.skin.singlets[['RNA']] <- JoinLayers(merged.18279.skin.singlets[['RNA']])
xyPredict <- classifySex(x=merged.18279.skin.singlets@assays$RNA$counts,genome="Hs", qc=F)
Warning in asMethod(object): sparse->dense coercion: allocating vector of size
15.2 GiB
28cell/s are unable to be classified
              due to an abundance of zeroes on X and Y chromosome genes
merged.18279.skin.singlets <- AddMetaData(merged.18279.skin.singlets,xyPredict$prediction,col.name="CellPredictedSex")

3.4 Assign known sexes to each sample

knownSex <- as.data.frame(cbind("Sample" = unique(merged.18279.skin.singlets$Sample), "Sex" = c(rep("Male",2),rep("Male",2),rep("Female",2),rep("Male",2),rep("Female",2),rep("Male",2),rep("Female",1),rep("Male",2),rep("Male",2))))

knownSex
                           Sample    Sex
1   P101_Skin_Pre3rd_2.5mgIpi_RNA   Male
2  P101_Skin_Post3rd_2.5mgIpi_RNA   Male
3   P103_Skin_Pre3rd_2.5mgIpi_RNA   Male
4  P103_Skin_Post3rd_2.5mgIpi_RNA   Male
5  P104_Skin_Post3rd_2.5mgIpi_RNA Female
6   P104_Skin_Pre3rd_2.5mgIpi_RNA Female
7  P105_Skin_Post3rd_2.5mgIpi_RNA   Male
8   P105_Skin_Pre3rd_2.5mgIpi_RNA   Male
9   P106_Skin_Pre3rd_2.5mgIpi_RNA Female
10 P106_Skin_Post3rd_2.5mgIpi_RNA Female
11    P108_Skin_Pre3rd_5mgIpi_RNA   Male
12   P108_Skin_Post3rd_5mgIpi_RNA   Male
13    P109_Skin_Pre3rd_5mgIpi_RNA Female
14    P110_Skin_Pre3rd_5mgIpi_RNA   Male
15   P110_Skin_Post3rd_5mgIpi_RNA   Male
16    P111_Skin_Pre3rd_5mgIpi_RNA   Male
17   P111_Skin_Post3rd_5mgIpi_RNA   Male

3.5 Summarize cell-wise sex predictions per Sample and compare to known labels

If more than 80% of the individual cell sex predictions are consistent for a given Sample, we call that Sample as that sex, then match to known labels

# Show snippet first
rownames_to_column(merged.18279.skin.singlets@meta.data,var="bc") %>%
    as_tibble() %>%
    dplyr::select(bc,CellPredictedSex,Sample) %>%
    group_by(Sample) %>%
    summarize(nMale = sum(CellPredictedSex=="Male"), 
        nFemale = sum(CellPredictedSex=="Female"), 
        nCells = n()
    )
# A tibble: 17 × 4
   Sample                         nMale nFemale nCells
   <chr>                          <int>   <int>  <int>
 1 P101_Skin_Post3rd_2.5mgIpi_RNA  1655      62   1718
 2 P101_Skin_Pre3rd_2.5mgIpi_RNA    417       9    426
 3 P103_Skin_Post3rd_2.5mgIpi_RNA  3436     278   3726
 4 P103_Skin_Pre3rd_2.5mgIpi_RNA    782      36    818
 5 P104_Skin_Post3rd_2.5mgIpi_RNA   957    4465   5434
 6 P104_Skin_Pre3rd_2.5mgIpi_RNA    136    2956   3092
 7 P105_Skin_Post3rd_2.5mgIpi_RNA  3213     215   3429
 8 P105_Skin_Pre3rd_2.5mgIpi_RNA   2387     139   2526
 9 P106_Skin_Post3rd_2.5mgIpi_RNA   249    1405   1655
10 P106_Skin_Pre3rd_2.5mgIpi_RNA     45    1076   1121
11 P108_Skin_Post3rd_5mgIpi_RNA    2589     100   2689
12 P108_Skin_Pre3rd_5mgIpi_RNA      906      31    937
13 P109_Skin_Pre3rd_5mgIpi_RNA       44    1331   1375
14 P110_Skin_Post3rd_5mgIpi_RNA    1408      72   1481
15 P110_Skin_Pre3rd_5mgIpi_RNA     1160      31   1191
16 P111_Skin_Post3rd_5mgIpi_RNA    1270      72   1342
17 P111_Skin_Pre3rd_5mgIpi_RNA      406      31    437
# Now determine sex per sample and count matching vs non-matching labels
rownames_to_column(merged.18279.skin.singlets@meta.data,var="bc") %>%
    as_tibble() %>%
    dplyr::select(bc,CellPredictedSex,Sample) %>%
    group_by(Sample) %>%
    summarize(nMale = sum(CellPredictedSex=="Male"), 
        nFemale = sum(CellPredictedSex=="Female"), 
        nCells = n()
    ) %>%
    ungroup() %>%
    mutate(PredictedSex = case_when(
        nMale / nCells > 0.8 ~ "Male",
        nFemale / nCells > 0.8 ~ "Female",
        T ~ "other"
        )
    ) %>%
    inner_join(knownSex,by="Sample") %>%
    summarize(nEqual = sum(PredictedSex==Sex), nNotEqual = sum(PredictedSex != Sex))
# A tibble: 1 × 2
  nEqual nNotEqual
   <int>     <int>
1     17         0

3.6 Since everything matches we now add Sex as a metadata label

knownSexByCell <- enframe(merged.18279.skin.singlets$Sample,name="bc",value="Sample") %>% 
    inner_join(knownSex,by="Sample") %>% 
    pull(Sex)
    
merged.18279.skin.singlets <- AddMetaData(merged.18279.skin.singlets,knownSexByCell,col.name="Sex")

3.7 Plot UMAP of sex

DimPlot(merged.18279.skin.singlets,reduction = "umap.harmony", group.by = "Sex")

3.8 Save the updated object

saveRDS(merged.18279.skin.singlets,file="Skin_scRNA_Part3.rds")

3.9 Get session info

sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] cellXY_0.99.0               scDblFinder_1.14.0         
 [3] harmony_1.2.0               alevinQC_1.16.1            
 [5] vctrs_0.6.5                 patchwork_1.3.0            
 [7] scater_1.28.0               scuttle_1.10.3             
 [9] speckle_1.0.0               Matrix_1.6-4               
[11] fishpond_2.6.2              readxl_1.4.3               
[13] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[15] Biobase_2.60.0              GenomicRanges_1.52.1       
[17] GenomeInfoDb_1.36.4         IRanges_2.34.1             
[19] S4Vectors_0.38.2            BiocGenerics_0.46.0        
[21] MatrixGenerics_1.12.3       matrixStats_1.2.0          
[23] flexmix_2.3-19              lattice_0.22-5             
[25] SeuratWrappers_0.3.19       miQC_1.8.0                 
[27] lubridate_1.9.3             forcats_1.0.0              
[29] stringr_1.5.1               dplyr_1.1.4                
[31] purrr_1.0.2                 readr_2.1.5                
[33] tidyr_1.3.1                 tibble_3.2.1               
[35] ggplot2_3.4.4               tidyverse_2.0.0            
[37] Seurat_5.1.0                SeuratObject_5.0.2         
[39] sp_2.1-3                    sctransform_0.4.1          
[41] glmGamPoi_1.12.2            presto_1.0.0               
[43] Rcpp_1.0.12                 devtools_2.4.5             
[45] usethis_2.2.2               data.table_1.15.0          

loaded via a namespace (and not attached):
  [1] fs_1.6.3                  spatstat.sparse_3.0-3    
  [3] bitops_1.0-7              httr_1.4.7               
  [5] RColorBrewer_1.1-3        profvis_0.3.8            
  [7] tools_4.3.1               utf8_1.2.4               
  [9] R6_2.5.1                  DT_0.31                  
 [11] lazyeval_0.2.2            uwot_0.1.16              
 [13] urlchecker_1.0.1          withr_3.0.0              
 [15] GGally_2.2.1              gridExtra_2.3            
 [17] progressr_0.14.0          cli_3.6.2                
 [19] spatstat.explore_3.2-6    fastDummies_1.7.3        
 [21] labeling_0.4.3            spatstat.data_3.0-4      
 [23] ggridges_0.5.6            pbapply_1.7-2            
 [25] Rsamtools_2.16.0          R.utils_2.12.3           
 [27] parallelly_1.37.0         sessioninfo_1.2.2        
 [29] limma_3.56.2              RSQLite_2.3.5            
 [31] BiocIO_1.10.0             generics_0.1.3           
 [33] gtools_3.9.5              ica_1.0-3                
 [35] spatstat.random_3.2-2     ggbeeswarm_0.7.2         
 [37] fansi_1.0.6               abind_1.4-5              
 [39] R.methodsS3_1.8.2         lifecycle_1.0.4          
 [41] yaml_2.3.8                edgeR_3.42.4             
 [43] recipes_1.0.10            Rtsne_0.17               
 [45] blob_1.2.4                grid_4.3.1               
 [47] dqrng_0.3.2               promises_1.2.1           
 [49] crayon_1.5.2              shinydashboard_0.7.2     
 [51] miniUI_0.1.1.1            beachmat_2.16.0          
 [53] cowplot_1.1.3             KEGGREST_1.40.1          
 [55] metapod_1.8.0             pillar_1.9.0             
 [57] knitr_1.45                rjson_0.2.21             
 [59] xgboost_1.7.7.1           future.apply_1.11.1      
 [61] codetools_0.2-19          leiden_0.4.3.1           
 [63] glue_1.7.0                remotes_2.4.2.1          
 [65] png_0.1-8                 spam_2.10-0              
 [67] org.Mm.eg.db_3.18.0       cellranger_1.1.0         
 [69] gtable_0.3.4              cachem_1.0.8             
 [71] gower_1.0.1               xfun_0.42                
 [73] prodlim_2023.08.28        S4Arrays_1.2.0           
 [75] mime_0.12                 survival_3.5-8           
 [77] timeDate_4032.109         iterators_1.0.14         
 [79] hardhat_1.3.1             lava_1.7.3               
 [81] statmod_1.5.0             bluster_1.10.0           
 [83] ellipsis_0.3.2            fitdistrplus_1.1-11      
 [85] ipred_0.9-14              ROCR_1.0-11              
 [87] nlme_3.1-164              bit64_4.0.5              
 [89] RcppAnnoy_0.0.22          irlba_2.3.5.1            
 [91] rpart_4.1.23              vipor_0.4.7              
 [93] KernSmooth_2.23-22        DBI_1.2.2                
 [95] colorspace_2.1-0          nnet_7.3-19              
 [97] tidyselect_1.2.0          bit_4.0.5                
 [99] compiler_4.3.1            BiocNeighbors_1.18.0     
[101] DelayedArray_0.26.7       plotly_4.10.4            
[103] rtracklayer_1.60.1        scales_1.3.0             
[105] lmtest_0.9-40             digest_0.6.34            
[107] goftest_1.2-3             spatstat.utils_3.0-4     
[109] rmarkdown_2.25            XVector_0.40.0           
[111] htmltools_0.5.7           pkgconfig_2.0.3          
[113] sparseMatrixStats_1.12.2  fastmap_1.1.1            
[115] rlang_1.1.3               htmlwidgets_1.6.4        
[117] shiny_1.8.0               DelayedMatrixStats_1.22.6
[119] farver_2.1.1              zoo_1.8-12               
[121] jsonlite_1.8.8            BiocParallel_1.34.2      
[123] ModelMetrics_1.2.2.2      R.oo_1.26.0              
[125] BiocSingular_1.16.0       RCurl_1.98-1.14          
[127] magrittr_2.0.3            modeltools_0.2-23        
[129] GenomeInfoDbData_1.2.10   dotCall64_1.1-1          
[131] munsell_0.5.0             viridis_0.6.5            
[133] reticulate_1.35.0         pROC_1.18.5              
[135] stringi_1.8.3             zlibbioc_1.46.0          
[137] MASS_7.3-60.0.1           org.Hs.eg.db_3.18.0      
[139] plyr_1.8.9                pkgbuild_1.4.3           
[141] ggstats_0.5.1             parallel_4.3.1           
[143] listenv_0.9.1             ggrepel_0.9.5            
[145] deldir_2.0-2              Biostrings_2.68.1        
[147] splines_4.3.1             tensor_1.5               
[149] hms_1.1.3                 locfit_1.5-9.8           
[151] igraph_2.0.2              spatstat.geom_3.2-8      
[153] RcppHNSW_0.6.0            reshape2_1.4.4           
[155] ScaledMatrix_1.8.1        pkgload_1.3.4            
[157] XML_3.99-0.16.1           evaluate_0.23            
[159] scran_1.28.2              BiocManager_1.30.22      
[161] foreach_1.5.2             tzdb_0.4.0               
[163] httpuv_1.6.14             RANN_2.6.1               
[165] polyclip_1.10-6           future_1.33.1            
[167] scattermore_1.2           rsvd_1.0.5               
[169] xtable_1.8-4              restfulr_0.0.15          
[171] svMisc_1.2.3              RSpectra_0.16-1          
[173] later_1.3.2               class_7.3-22             
[175] viridisLite_0.4.2         AnnotationDbi_1.64.1     
[177] GenomicAlignments_1.36.0  memoise_2.0.1            
[179] beeswarm_0.4.0            tximport_1.28.0          
[181] cluster_2.1.6             timechange_0.3.0         
[183] globals_0.16.2            caret_6.0-94