9  Skin: Differential expression analysis, Post vs Pre 3rd vaccination

9.1 Set up Seurat workspace

# Load libraries
library(presto)
library(Seurat)
library(tidyverse)
library(patchwork)
library(paletteer)
library(msigdbr)
library(ComplexHeatmap)
library(circlize)
library(muscat)
library(edgeR)
library(SingleCellExperiment)
library(DESeq2)
library(scuttle)
library(gprofiler2)
library(msigdbr)

9.2 Load previous seurat object

merged.18279.skin.singlets <- readRDS("Skin_scRNA_Part8.rds")

9.3 Run pseudobulk differential expression test with muscat

9.3.1 Convert Seurat object to SingleCellExperiment

merged.18279.skin.singlets[['RNA']] <- JoinLayers(merged.18279.skin.singlets[['RNA']])
merged.sce <- as.SingleCellExperiment(merged.18279.skin.singlets, assay="RNA")

9.3.2 Prep coldata and create pseudobulk of raw counts by sub-cluster x sample

(mergedCondition.sce <- prepSCE(merged.sce,
    kid = "sub.cluster",
    gid = "Timepoint",
    sid = "Sample",
    drop=TRUE))
class: SingleCellExperiment 
dim: 61217 33397 
metadata(1): experiment_info
assays(2): counts logcounts
rownames(61217): 5-8S-rRNA 5S-rRNA ... ZZEF1 ZZZ3
rowData names(0):
colnames(33397): P101_Skin_Pre3rd_2.5mgIpi_RNA_GCACATAAGTGCCATT
  P101_Skin_Pre3rd_2.5mgIpi_RNA_GGAACTTGTCGCGGTT ...
  P111_Skin_Post3rd_5mgIpi_RNA_ACGATGTTCCTAGGGC
  P111_Skin_Post3rd_5mgIpi_RNA_GAACATCAGCCACGTC
colData names(3): cluster_id sample_id group_id
reducedDimNames(4): PCA UMAP.UNINTEGRATED INTEGRATED.HARMONY
  UMAP.HARMONY
mainExpName: RNA
altExpNames(0):
pb <- aggregateData(mergedCondition.sce,
    assay = "counts",fun="sum",
    by = c("cluster_id", "sample_id"))

9.3.3 Construct design & contrast matrix to compare Post3rd to Pre3rd timepoints

ei <- metadata(mergedCondition.sce)$experiment_info
mm <- model.matrix(~0 + ei$group_id)
dim_name <- levels(ei$group_id)
dimnames(mm) <- list(ei$sample_id, dim_name)
contrast <- makeContrasts("Post3rd-Pre3rd", levels = mm)

9.3.4 Run differential expression test via DESeq2

res <- pbDS(pb, 
            design = mm, 
            contrast = contrast, 
            method="DESeq2", 
            min_cells = 20,
            verbose = FALSE)

# Combine results from all clusters into one tibble
df <- bind_rows(res$table$`Post3rd-Pre3rd`, .id = "sub.cluster") %>% 
    as_tibble()

9.3.5 Show snippet of results from one cluster: upregulated genes in cluster 7

df %>% 
    dplyr::filter(p_adj.loc < 0.05 & sub.cluster=="7" & logFC > 0) %>% 
    dplyr::arrange(p_adj.loc) %>% 
    print(n=30)
# A tibble: 927 × 11
   sub.cluster gene     cluster_id baseMean logFC lfcSE  stat    p_val p_adj.loc
   <chr>       <chr>    <chr>         <dbl> <dbl> <dbl> <dbl>    <dbl>     <dbl>
 1 7           APOBEC3A 7             716.   4.29 0.422 10.2  3.21e-24  2.56e-20
 2 7           ISG15    7            1378.   4.40 0.441  9.99 1.74e-23  6.95e-20
 3 7           IFI6     7             648.   4.10 0.413  9.91 3.94e-23  1.05e-19
 4 7           MX1      7             361.   3.94 0.434  9.09 1.03e-19  2.05e-16
 5 7           OAS2     7              78.2  3.31 0.370  8.95 3.43e-19  5.48e-16
 6 7           IRF7     7             310.   2.48 0.280  8.84 9.97e-19  1.33e-15
 7 7           OAS1     7             131.   3.75 0.429  8.74 2.40e-18  2.73e-15
 8 7           IFI35    7             164.   3.08 0.355  8.68 4.03e-18  3.59e-15
 9 7           LY6E     7             822.   2.92 0.337  8.68 4.05e-18  3.59e-15
10 7           OAS3     7              65.9  3.21 0.390  8.24 1.78e-16  1.29e-13
11 7           XAF1     7             125.   2.78 0.353  7.88 3.26e-15  1.45e-12
12 7           SAMD9    7              56.3  3.20 0.411  7.77 7.74e-15  3.09e-12
13 7           IL15RA   7              35.5  2.68 0.345  7.76 8.59e-15  3.26e-12
14 7           PSME2    7             791.   1.62 0.210  7.72 1.16e-14  4.01e-12
15 7           LGALS9   7             352.   1.66 0.215  7.71 1.25e-14  4.15e-12
16 7           TNFSF10  7             295.   3.25 0.422  7.70 1.32e-14  4.21e-12
17 7           EPSTI1   7             289.   2.69 0.350  7.67 1.67e-14  5.12e-12
18 7           PML      7             109.   1.87 0.243  7.67 1.76e-14  5.21e-12
19 7           IFITM1   7            1481.   4.47 0.594  7.52 5.39e-14  1.34e-11
20 7           SERPING1 7             322.   2.71 0.362  7.49 7.13e-14  1.69e-11
21 7           UBE2L6   7             247.   2.13 0.285  7.48 7.22e-14  1.69e-11
22 7           IFIT3    7              82.2  6.32 0.846  7.47 8.24e-14  1.87e-11
23 7           PPA1     7             507.   2.46 0.330  7.46 8.44e-14  1.87e-11
24 7           TYMP     7            1459.   2.09 0.281  7.45 9.52e-14  2.05e-11
25 7           NMI      7              78.9  2.24 0.302  7.43 1.07e-13  2.24e-11
26 7           LYSMD2   7              80.9  1.83 0.246  7.43 1.10e-13  2.26e-11
27 7           ENSG000… 7              25.5  3.14 0.424  7.40 1.38e-13  2.75e-11
28 7           LGALS3BP 7             147.   2.38 0.323  7.38 1.59e-13  3.09e-11
29 7           SP110    7             139.   1.98 0.269  7.36 1.81e-13  3.43e-11
30 7           IFI44L   7             107.   3.88 0.529  7.33 2.27e-13  4.22e-11
# ℹ 897 more rows
# ℹ 2 more variables: p_adj.glb <dbl>, contrast <chr>

9.3.6 Write differential expression results to tsv

write_tsv(df, "Skin_scRNA_PostVsPre3rd_DiffExp_results.tsv")

9.3.7 Plot number of significant genes for all clusters

all_subclusts <- enframe(unique(merged.18279.skin.singlets$sub.cluster),name=NULL,value="sub.cluster")
df %>%
  dplyr::filter(p_adj.loc < 0.05) %>%
  group_by(sub.cluster) %>%
  summarize(Up = count(logFC > 0),Down = (-1 * count(logFC < 0)),Total = (count(logFC > 0) + count(logFC < 0))) %>%
  left_join(all_subclusts, ., by="sub.cluster") %>%
  mutate(Up = replace_na(Up, 0), Down = replace_na(Down, 0), Total = replace_na(Total, 0)) %>%
  pivot_longer(cols = c(Up,Down), names_to="Direction", values_to="NumberDE") %>%
  ggplot(aes(x = reorder(sub.cluster,-Total), y = NumberDE, fill = Direction)) +
  geom_col() +
  theme_bw() +
  xlab("Sub-cluster") +
  ylab("Number of Differentially Expressed Genes\np_adj.loc < 0.05") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

9.4 Get session info

sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    grid      stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] gprofiler2_0.2.2            scuttle_1.10.3             
 [3] DESeq2_1.40.2               SingleCellExperiment_1.22.0
 [5] SummarizedExperiment_1.30.2 Biobase_2.60.0             
 [7] GenomicRanges_1.52.1        GenomeInfoDb_1.36.4        
 [9] IRanges_2.34.1              S4Vectors_0.38.2           
[11] BiocGenerics_0.46.0         MatrixGenerics_1.12.3      
[13] matrixStats_1.2.0           edgeR_3.42.4               
[15] limma_3.56.2                muscat_1.14.0              
[17] circlize_0.4.15             ComplexHeatmap_2.16.0      
[19] msigdbr_7.5.1               paletteer_1.6.0            
[21] patchwork_1.3.0             lubridate_1.9.3            
[23] forcats_1.0.0               stringr_1.5.1              
[25] dplyr_1.1.4                 purrr_1.0.2                
[27] readr_2.1.5                 tidyr_1.3.1                
[29] tibble_3.2.1                ggplot2_3.4.4              
[31] tidyverse_2.0.0             Seurat_5.1.0               
[33] SeuratObject_5.0.2          sp_2.1-3                   
[35] presto_1.0.0                data.table_1.15.0          
[37] Rcpp_1.0.12                

loaded via a namespace (and not attached):
  [1] spatstat.sparse_3.0-3     bitops_1.0-7             
  [3] httr_1.4.7                RColorBrewer_1.1-3       
  [5] doParallel_1.0.17         numDeriv_2016.8-1.1      
  [7] backports_1.4.1           tools_4.3.1              
  [9] sctransform_0.4.1         utf8_1.2.4               
 [11] R6_2.5.1                  lazyeval_0.2.2           
 [13] uwot_0.1.16               mgcv_1.9-1               
 [15] GetoptLong_1.0.5          withr_3.0.0              
 [17] prettyunits_1.2.0         gridExtra_2.3            
 [19] SeuratWrappers_0.3.19     progressr_0.14.0         
 [21] cli_3.6.2                 spatstat.explore_3.2-6   
 [23] fastDummies_1.7.3         sandwich_3.1-0           
 [25] labeling_0.4.3            mvtnorm_1.2-4            
 [27] spatstat.data_3.0-4       blme_1.0-5               
 [29] ggridges_0.5.6            pbapply_1.7-2            
 [31] R.utils_2.12.3            scater_1.28.0            
 [33] parallelly_1.37.0         generics_0.1.3           
 [35] shape_1.4.6               vroom_1.6.5              
 [37] gtools_3.9.5              ica_1.0-3                
 [39] spatstat.random_3.2-2     Matrix_1.6-4             
 [41] ggbeeswarm_0.7.2          fansi_1.0.6              
 [43] abind_1.4-5               R.methodsS3_1.8.2        
 [45] lifecycle_1.0.4           multcomp_1.4-25          
 [47] yaml_2.3.8                gplots_3.1.3.1           
 [49] Rtsne_0.17                promises_1.2.1           
 [51] crayon_1.5.2              miniUI_0.1.1.1           
 [53] lattice_0.22-5            beachmat_2.16.0          
 [55] cowplot_1.1.3             pillar_1.9.0             
 [57] knitr_1.45                rjson_0.2.21             
 [59] boot_1.3-29               estimability_1.4.1       
 [61] future.apply_1.11.1       codetools_0.2-19         
 [63] leiden_0.4.3.1            glue_1.7.0               
 [65] remotes_2.4.2.1           vctrs_0.6.5              
 [67] png_0.1-8                 spam_2.10-0              
 [69] Rdpack_2.6                gtable_0.3.4             
 [71] rematch2_2.1.2            xfun_0.42                
 [73] rbibutils_2.2.16          S4Arrays_1.2.0           
 [75] mime_0.12                 coda_0.19-4.1            
 [77] reformulas_0.4.0          survival_3.5-8           
 [79] iterators_1.0.14          ellipsis_0.3.2           
 [81] fitdistrplus_1.1-11       TH.data_1.1-2            
 [83] ROCR_1.0-11               nlme_3.1-164             
 [85] pbkrtest_0.5.2            bit64_4.0.5              
 [87] EnvStats_2.8.1            progress_1.2.3           
 [89] RcppAnnoy_0.0.22          TMB_1.9.10               
 [91] irlba_2.3.5.1             vipor_0.4.7              
 [93] KernSmooth_2.23-22        colorspace_2.1-0         
 [95] nnet_7.3-19               tidyselect_1.2.0         
 [97] emmeans_1.10.0            bit_4.0.5                
 [99] compiler_4.3.1            BiocNeighbors_1.18.0     
[101] DelayedArray_0.26.7       plotly_4.10.4            
[103] caTools_1.18.2            scales_1.3.0             
[105] remaCor_0.0.18            lmtest_0.9-40            
[107] digest_0.6.34             goftest_1.2-3            
[109] spatstat.utils_3.0-4      minqa_1.2.6              
[111] variancePartition_1.30.2  rmarkdown_2.25           
[113] aod_1.3.3                 RhpcBLASctl_0.23-42      
[115] XVector_0.40.0            htmltools_0.5.7          
[117] pkgconfig_2.0.3           lme4_1.1-35.1            
[119] sparseMatrixStats_1.12.2  fastmap_1.1.1            
[121] rlang_1.1.3               GlobalOptions_0.1.2      
[123] htmlwidgets_1.6.4         shiny_1.8.0              
[125] DelayedMatrixStats_1.22.6 farver_2.1.1             
[127] zoo_1.8-12                jsonlite_1.8.8           
[129] BiocParallel_1.34.2       R.oo_1.26.0              
[131] BiocSingular_1.16.0       RCurl_1.98-1.14          
[133] magrittr_2.0.3            modeltools_0.2-23        
[135] GenomeInfoDbData_1.2.10   dotCall64_1.1-1          
[137] munsell_0.5.0             viridis_0.6.5            
[139] babelgene_22.9            reticulate_1.35.0        
[141] stringi_1.8.3             zlibbioc_1.46.0          
[143] MASS_7.3-60.0.1           flexmix_2.3-19           
[145] plyr_1.8.9                parallel_4.3.1           
[147] listenv_0.9.1             ggrepel_0.9.5            
[149] deldir_2.0-2              splines_4.3.1            
[151] tensor_1.5                hms_1.1.3                
[153] locfit_1.5-9.8            igraph_2.0.2             
[155] spatstat.geom_3.2-8       RcppHNSW_0.6.0           
[157] ScaledMatrix_1.8.1        reshape2_1.4.4           
[159] evaluate_0.23             BiocManager_1.30.22      
[161] nloptr_2.0.3              tzdb_0.4.0               
[163] foreach_1.5.2             httpuv_1.6.14            
[165] RANN_2.6.1                polyclip_1.10-6          
[167] future_1.33.1             clue_0.3-65              
[169] scattermore_1.2           rsvd_1.0.5               
[171] broom_1.0.5               xtable_1.8-4             
[173] RSpectra_0.16-1           later_1.3.2              
[175] viridisLite_0.4.2         lmerTest_3.1-3           
[177] glmmTMB_1.1.10            beeswarm_0.4.0           
[179] cluster_2.1.6             timechange_0.3.0         
[181] globals_0.16.2