1  IFNg: QC & Filtering

1.1 Set up Seurat workspace

# Load libraries
library(data.table)
library(sctransform)
library(Seurat)
library(tidyverse)
library(miQC)
library(SeuratWrappers)
library(fishpond)
library(Matrix)
library(patchwork)
library(alevinQC)
library(harmony)
library(scDblFinder)

# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')

1.2 Load alevin-fry output

Using U+S+A counts via snRNA

P108_IFNg_PostVax_5mgIpi_RNA <- loadFry(fryDir =  "/jsimonlab/projects/Wu/Melanoma_scRNA_Eryn/alevinfry_072023/P108_IFNg_PostVax_5mgIpi_RNA_alevin_quant_crlikeem/", outputFormat = "snRNA")
locating quant file
Reading meta data
USA mode: TRUE
Processing 62703 genes and 93474 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
P103_IFNg_PostVax_2.5mgIpi_RNA <- loadFry(fryDir =  "/jsimonlab/projects/Wu/Melanoma_scRNA_Eryn/alevinfry_072023/P103_IFNg_PostVax_2.5mgIpi_RNA_alevin_quant_crlikeem/", outputFormat = "snRNA")
locating quant file
Reading meta data
USA mode: TRUE
Processing 62703 genes and 101283 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
P101_IFNg_PostVax_2.5mgIpi_RNA <- loadFry(fryDir =  "/jsimonlab/projects/Wu/Melanoma_scRNA_Eryn/alevinfry_072023/P101_IFNg_PostVax_2.5mgIpi_RNA_alevin_quant_crlikeem/", outputFormat = "snRNA")
locating quant file
Reading meta data
USA mode: TRUE
Processing 62703 genes and 75687 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done
P104_IFNg_PostVax_2.5mgIpi_RNA <- loadFry(fryDir =  "/jsimonlab/projects/Wu/Melanoma_scRNA_Eryn/alevinfry_072023/P104_IFNg_PostVax_2.5mgIpi_RNA_alevin_quant_crlikeem/", outputFormat = "snRNA")
locating quant file
Reading meta data
USA mode: TRUE
Processing 62703 genes and 120627 barcodes
Using pre-defined output format: snrna
Building the 'counts' assay, which contains U S A
Constructing output SingleCellExperiment object
Done

1.3 Convert gene IDs to gene symbols, collapse transcripts to gene symbols

tx2gene <- read.table("/jsimonlab/genomes/hg38/gencode.v43.annotation_splici/gene_id_to_name.tsv",header=F,sep="\t",col.names=c("tx","gene"))

# Use rownames of first object to apply to all
exp.txId <- rownames(P108_IFNg_PostVax_5mgIpi_RNA)
exp.geneId <- as.vector(tx2gene$gene[match(exp.txId, tx2gene$tx)])
exp.tx.grp <- t(sparse.model.matrix(~ 0 + exp.geneId))

P108_IFNg_PostVax_5mgIpi_RNA.summarized <- exp.tx.grp %*% counts(P108_IFNg_PostVax_5mgIpi_RNA)
rownames(P108_IFNg_PostVax_5mgIpi_RNA.summarized) <- rownames(P108_IFNg_PostVax_5mgIpi_RNA.summarized) %>% str_replace_all(".+.geneId","")

P103_IFNg_PostVax_2.5mgIpi_RNA.summarized <- exp.tx.grp %*% counts(P103_IFNg_PostVax_2.5mgIpi_RNA)
rownames(P103_IFNg_PostVax_2.5mgIpi_RNA.summarized) <- rownames(P103_IFNg_PostVax_2.5mgIpi_RNA.summarized) %>% str_replace_all(".+.geneId","")

P101_IFNg_PostVax_2.5mgIpi_RNA.summarized <- exp.tx.grp %*% counts(P101_IFNg_PostVax_2.5mgIpi_RNA)
rownames(P101_IFNg_PostVax_2.5mgIpi_RNA.summarized) <- rownames(P101_IFNg_PostVax_2.5mgIpi_RNA.summarized) %>% str_replace_all(".+.geneId","")

P104_IFNg_PostVax_2.5mgIpi_RNA.summarized <- exp.tx.grp %*% counts(P104_IFNg_PostVax_2.5mgIpi_RNA)
rownames(P104_IFNg_PostVax_2.5mgIpi_RNA.summarized) <- rownames(P104_IFNg_PostVax_2.5mgIpi_RNA.summarized) %>% str_replace_all(".+.geneId","")

1.4 Create Seurat objects

P108_IFNg_PostVax_5mgIpi_RNA.seurat <- CreateSeuratObject(P108_IFNg_PostVax_5mgIpi_RNA.summarized)
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
P103_IFNg_PostVax_2.5mgIpi_RNA.seurat <- CreateSeuratObject(P103_IFNg_PostVax_2.5mgIpi_RNA.summarized)
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
P101_IFNg_PostVax_2.5mgIpi_RNA.seurat <- CreateSeuratObject(P101_IFNg_PostVax_2.5mgIpi_RNA.summarized)
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
P104_IFNg_PostVax_2.5mgIpi_RNA.seurat <- CreateSeuratObject(P104_IFNg_PostVax_2.5mgIpi_RNA.summarized)
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')

1.5 Add sample name prefix to CBs

P108_IFNg_PostVax_5mgIpi_RNA.seurat <- RenameCells(P108_IFNg_PostVax_5mgIpi_RNA.seurat,add.cell.id = "P108_IFNg_PostVax_5mgIpi_RNA")
P103_IFNg_PostVax_2.5mgIpi_RNA.seurat <- RenameCells(P103_IFNg_PostVax_2.5mgIpi_RNA.seurat,add.cell.id = "P103_IFNg_PostVax_2.5mgIpi_RNA")
P101_IFNg_PostVax_2.5mgIpi_RNA.seurat <- RenameCells(P101_IFNg_PostVax_2.5mgIpi_RNA.seurat,add.cell.id = "P101_IFNg_PostVax_2.5mgIpi_RNA")
P104_IFNg_PostVax_2.5mgIpi_RNA.seurat <- RenameCells(P104_IFNg_PostVax_2.5mgIpi_RNA.seurat,add.cell.id = "P104_IFNg_PostVax_2.5mgIpi_RNA")

1.6 Merge objects

merged.18279 <- merge(x = P108_IFNg_PostVax_5mgIpi_RNA.seurat, y=c(P103_IFNg_PostVax_2.5mgIpi_RNA.seurat, P101_IFNg_PostVax_2.5mgIpi_RNA.seurat, P104_IFNg_PostVax_2.5mgIpi_RNA.seurat))
dim(merged.18279)
[1]  61217 391071

1.7 QC filter

Use relatively loose initial filters

merged.18279 <- subset(merged.18279, subset = nCount_RNA > 1000 & nFeature_RNA > 500)
dim(merged.18279)
[1] 61217 13980
merged.18279 <- PercentageFeatureSet(merged.18279, pattern = "^MT-", col.name = "percent.mt")
merged.18279 <- RunMiQC(merged.18279, 
                        percent.mt = "percent.mt", 
                        nFeature_RNA = "nFeature_RNA", 
                        posterior.cutoff = 0.7, 
                        model.slot = "flexmix_model")
Warning: Adding a command log without an assay associated with it
merged.18279 <- subset(merged.18279, miQC.keep == "keep")

dim(merged.18279)
[1] 61217 12057
data.frame(table(str_replace_all(colnames(merged.18279),"_RNA_.+","")))
                        Var1 Freq
1 P101_IFNg_PostVax_2.5mgIpi 3200
2 P103_IFNg_PostVax_2.5mgIpi 1271
3 P104_IFNg_PostVax_2.5mgIpi 3303
4   P108_IFNg_PostVax_5mgIpi 4283

1.8 Add meta data

patient <- str_split_i(colnames(merged.18279),"_",1)
site <- str_split_i(colnames(merged.18279),"_",2)
timepoint <- str_split_i(colnames(merged.18279),"_",3)
IpiCohort <- str_split_i(colnames(merged.18279),"_",4)
assay <- str_split_i(colnames(merged.18279),"_",5)
barcode <- str_split_i(colnames(merged.18279),"_",6)
sample <- paste0(patient,"_",site,"_",timepoint,"_",IpiCohort,"_",assay)

merged.18279 <- AddMetaData(merged.18279,patient,col.name="Patient")
merged.18279 <- AddMetaData(merged.18279,site,col.name="Site")
merged.18279 <- AddMetaData(merged.18279,timepoint,col.name="Timepoint")
merged.18279 <- AddMetaData(merged.18279,IpiCohort,col.name="IpiCohort")
merged.18279 <- AddMetaData(merged.18279,assay,col.name="Assay")
merged.18279 <- AddMetaData(merged.18279,sample,col.name="Sample")

1.9 Join then re-split RNA counts layers by Sample

merged.18279[['RNA']] <- JoinLayers(merged.18279[['RNA']])
merged.18279[["RNA"]] <- split(merged.18279[["RNA"]], f = merged.18279$Sample)

1.10 Save merged object

saveRDS(merged.18279,"IFNg_scRNA_Part1.rds")

1.11 Get session info

sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] scDblFinder_1.14.0          SingleCellExperiment_1.22.0
 [3] SummarizedExperiment_1.30.2 Biobase_2.60.0             
 [5] GenomicRanges_1.52.1        GenomeInfoDb_1.36.4        
 [7] IRanges_2.34.1              S4Vectors_0.38.2           
 [9] BiocGenerics_0.46.0         MatrixGenerics_1.12.3      
[11] matrixStats_1.2.0           harmony_1.2.0              
[13] Rcpp_1.0.12                 alevinQC_1.16.1            
[15] patchwork_1.3.0             Matrix_1.6-4               
[17] fishpond_2.6.2              SeuratWrappers_0.3.19      
[19] miQC_1.8.0                  lubridate_1.9.3            
[21] forcats_1.0.0               stringr_1.5.1              
[23] dplyr_1.1.4                 purrr_1.0.2                
[25] readr_2.1.5                 tidyr_1.3.1                
[27] tibble_3.2.1                ggplot2_3.4.4              
[29] tidyverse_2.0.0             Seurat_5.1.0               
[31] SeuratObject_5.0.2          sp_2.1-3                   
[33] sctransform_0.4.1           data.table_1.15.0          

loaded via a namespace (and not attached):
  [1] spatstat.sparse_3.0-3     bitops_1.0-7             
  [3] httr_1.4.7                RColorBrewer_1.1-3       
  [5] tools_4.3.1               utf8_1.2.4               
  [7] R6_2.5.1                  DT_0.31                  
  [9] lazyeval_0.2.2            uwot_0.1.16              
 [11] withr_3.0.0               GGally_2.2.1             
 [13] gridExtra_2.3             progressr_0.14.0         
 [15] cli_3.6.2                 spatstat.explore_3.2-6   
 [17] fastDummies_1.7.3         spatstat.data_3.0-4      
 [19] ggridges_0.5.6            pbapply_1.7-2            
 [21] Rsamtools_2.16.0          R.utils_2.12.3           
 [23] scater_1.28.0             parallelly_1.37.0        
 [25] limma_3.56.2              generics_0.1.3           
 [27] BiocIO_1.10.0             gtools_3.9.5             
 [29] ica_1.0-3                 spatstat.random_3.2-2    
 [31] ggbeeswarm_0.7.2          fansi_1.0.6              
 [33] abind_1.4-5               R.methodsS3_1.8.2        
 [35] lifecycle_1.0.4           yaml_2.3.8               
 [37] edgeR_3.42.4              Rtsne_0.17               
 [39] grid_4.3.1                promises_1.2.1           
 [41] dqrng_0.3.2               crayon_1.5.2             
 [43] shinydashboard_0.7.2      miniUI_0.1.1.1           
 [45] lattice_0.22-5            beachmat_2.16.0          
 [47] cowplot_1.1.3             pillar_1.9.0             
 [49] knitr_1.45                metapod_1.8.0            
 [51] rjson_0.2.21              xgboost_1.7.7.1          
 [53] future.apply_1.11.1       codetools_0.2-19         
 [55] leiden_0.4.3.1            glue_1.7.0               
 [57] remotes_2.4.2.1           vctrs_0.6.5              
 [59] png_0.1-8                 spam_2.10-0              
 [61] gtable_0.3.4              xfun_0.42                
 [63] S4Arrays_1.2.0            mime_0.12                
 [65] survival_3.5-8            statmod_1.5.0            
 [67] bluster_1.10.0            ellipsis_0.3.2           
 [69] fitdistrplus_1.1-11       ROCR_1.0-11              
 [71] nlme_3.1-164              RcppAnnoy_0.0.22         
 [73] irlba_2.3.5.1             vipor_0.4.7              
 [75] KernSmooth_2.23-22        colorspace_2.1-0         
 [77] nnet_7.3-19               tidyselect_1.2.0         
 [79] compiler_4.3.1            BiocNeighbors_1.18.0     
 [81] DelayedArray_0.26.7       plotly_4.10.4            
 [83] rtracklayer_1.60.1        scales_1.3.0             
 [85] lmtest_0.9-40             digest_0.6.34            
 [87] goftest_1.2-3             spatstat.utils_3.0-4     
 [89] rmarkdown_2.25            XVector_0.40.0           
 [91] htmltools_0.5.7           pkgconfig_2.0.3          
 [93] sparseMatrixStats_1.12.2  fastmap_1.1.1            
 [95] rlang_1.1.3               htmlwidgets_1.6.4        
 [97] shiny_1.8.0               DelayedMatrixStats_1.22.6
 [99] farver_2.1.1              zoo_1.8-12               
[101] jsonlite_1.8.8            BiocParallel_1.34.2      
[103] R.oo_1.26.0               BiocSingular_1.16.0      
[105] RCurl_1.98-1.14           magrittr_2.0.3           
[107] modeltools_0.2-23         scuttle_1.10.3           
[109] GenomeInfoDbData_1.2.10   dotCall64_1.1-1          
[111] munsell_0.5.0             viridis_0.6.5            
[113] reticulate_1.35.0         stringi_1.8.3            
[115] zlibbioc_1.46.0           MASS_7.3-60.0.1          
[117] plyr_1.8.9                flexmix_2.3-19           
[119] ggstats_0.5.1             parallel_4.3.1           
[121] listenv_0.9.1             ggrepel_0.9.5            
[123] deldir_2.0-2              Biostrings_2.68.1        
[125] splines_4.3.1             tensor_1.5               
[127] hms_1.1.3                 locfit_1.5-9.8           
[129] igraph_2.0.2              spatstat.geom_3.2-8      
[131] RcppHNSW_0.6.0            reshape2_1.4.4           
[133] ScaledMatrix_1.8.1        XML_3.99-0.16.1          
[135] evaluate_0.23             scran_1.28.2             
[137] BiocManager_1.30.22       tzdb_0.4.0               
[139] httpuv_1.6.14             RANN_2.6.1               
[141] polyclip_1.10-6           future_1.33.1            
[143] scattermore_1.2           rsvd_1.0.5               
[145] xtable_1.8-4              restfulr_0.0.15          
[147] svMisc_1.2.3              RSpectra_0.16-1          
[149] later_1.3.2               viridisLite_0.4.2        
[151] beeswarm_0.4.0            GenomicAlignments_1.36.0 
[153] tximport_1.28.0           cluster_2.1.6            
[155] timechange_0.3.0          globals_0.16.2