11  Skin: Calculating Post vs Pre 3rd vaccination differences in pathway enrichments with fgsea

11.1 Set up Seurat workspace

# Load libraries
library(ggplot2)
library(Seurat)
library(tidyverse)
library(msigdbr)
library(fgsea)
library(GSEAtraining)       # This is used because KEGG_MEDICUS isn't yet available in msigdbr

11.2 Read in pseudobulk DE test results

skin_de <- read_tsv("Skin_scRNA_PostVsPre3rd_DiffExp_results.tsv")
Rows: 125307 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (4): sub.cluster, gene, cluster_id, contrast
dbl (7): baseMean, logFC, lfcSE, stat, p_val, p_adj.loc, p_adj.glb

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skin_de
# A tibble: 125,307 × 11
   sub.cluster gene   cluster_id baseMean   logFC lfcSE   stat   p_val p_adj.loc
   <chr>       <chr>  <chr>         <dbl>   <dbl> <dbl>  <dbl>   <dbl>     <dbl>
 1 0           7SK    0            166.   -2.09   0.470 -4.44  9.12e-6  0.000168
 2 0           A2M    0             84.8  -3.11   1.07  -2.90  3.75e-3  0.0215  
 3 0           AAGAB  0             21.1   0.113  0.282  0.400 6.89e-1  0.817   
 4 0           AAK1   0            105.   -0.111  0.154 -0.724 4.69e-1  0.650   
 5 0           AAMDC  0             22.9   0.303  0.280  1.08  2.79e-1  0.471   
 6 0           AAMP   0             50.9   0.0513 0.219  0.234 8.15e-1  0.897   
 7 0           AAR2   0              9.66  0.0439 0.430  0.102 9.19e-1  0.959   
 8 0           AARS1  0             12.1  -0.160  0.327 -0.489 6.25e-1  0.773   
 9 0           AASDH… 0             14.8   0.495  0.315  1.57  1.16e-1  0.262   
10 0           AATF   0             50.2   0.489  0.233  2.10  3.58e-2  0.116   
# ℹ 125,297 more rows
# ℹ 2 more variables: p_adj.glb <dbl>, contrast <chr>

11.3 Set cluster annotations

cluster_annot <- c(
        "0" = "MonoMac_1",
        "1_0" = "CD4_T_Naive/CM",
        "1_1" = "CD4_T_CTL",
        "2_0" = "CD8_T_EM_1",
        "2_1" = "CD8_T_Naive/CM_1",
        "2_2" = "CD8_T_EM_2",
        "3" = "MonoMac_2",
        "4_0" = "CD8_T_Naive/CM_2",
        "4_1" = "CD8_T_Naive/CM_3",
        "5_0" = "NK/T_1",
        "5_1" = "NK/T_2",
        "5_2" = "NK_cells",
        "6_0" = "T_unknown_1",
        "6_1" = "CD8_T_EM_3",
        "7" = "cDC2",
        "8" = "Keratinocytes",
        "9_0" = "Treg_1",
        "9_1" = "Treg_2",
        "10" = "Fibroblasts",
        "11_0" = "T_unknown_2",
        "11_1" = "T_unknown_3",
        "12" = "MonoMac_3",
        "13_0" = "Proliferating_T_cells_1",
        "13_1" = "Proliferating_T_cells_2",
        "14" = "pDC",
        "15" = "DC_LAMP3",
        "16" = "Mast",
        "17" = "Endothelial",
        "18" = "cDC1",
        "19_0" = "NK/T_3",
        "20" = "Pericytes/smooth_muscle",
        "21" = "Basal_1",
        "22" = "MonoMac_4",
        "23" = "B_cells",
        "24" = "Basal_2",
        "25" = "Macrophages",
        "26" = "Proliferating_cDC2"
)

11.4 Set cluster orders for plotting

cluster_order <- c("0","3","12","22","25","14","18","7","26","15","23","5_2","5_0","5_1","19_0","2_1","4_0","4_1","2_0","2_2","6_1","1_0","1_1","9_0","9_1","13_0","13_1","6_0","11_0","11_1","16","8","10","17","20","21","24")

11.5 Split results by cluster, then extract ranks by log2 fold-change Post3rd vs Pre3rd

skin_de_split <- skin_de %>%
    group_by(sub.cluster) %>%
    group_split()

skin_de_split_ranks <- lapply(skin_de_split, 
                                FUN = function(x) { x %>%
                                    dplyr::select(gene,logFC) %>%
                                    dplyr::arrange(logFC) %>%
                                    deframe()
                                    }
                        )
names(skin_de_split_ranks) <- paste0("Cluster", 
                                sapply(skin_de_split, 
                                FUN = function(x) { x %>% 
                                    pull(sub.cluster) %>%
                                    unique()
                                    }
                                )
                            )

11.6 Prepare gene sets from MSigDB

c2 <- msigdbr::msigdbr(species="Homo sapiens",
        category = "C2")

immunesig <- msigdbr::msigdbr(species="Homo sapiens",
        category = "C7", 
        subcategory = "IMMUNESIGDB")

hallmark <- msigdbr::msigdbr(species="Homo sapiens",
    category = "H")                         

# Get updated KEGG MEDICUS collection from GSEAtraining package 
km <- get_msigdb(version = "2024.1.Hs", 
    collection = "c2.cp.kegg_medicus",
    gene_id_type = "symbols")

# Combine into named list
pathwaysToTest <- bind_rows(c2, immunesig, hallmark)

pathwaysToTest_split <- pathwaysToTest %>%
    dplyr::select(gs_name,human_gene_symbol) %>%
    group_by(gs_name) %>%
    group_split()

names(pathwaysToTest_split) <- sapply(pathwaysToTest_split, 
                                        FUN = function(x) { x %>% 
                                            pull(gs_name) %>%
                                            unique()
                                            }
                                        )


pathwaysToTest_split_list <- sapply(pathwaysToTest_split, 
                                    simplify = FALSE, 
                                    USE.NAMES = TRUE, 
                                    FUN = function(x) { x %>%
                                        pull(human_gene_symbol) %>%
                                        unique()
                                    }
                                )
# Add KEGG MEDICUS to the list
pathwaysToTest_split_list <- c(pathwaysToTest_split_list, km)

11.7 Iterate over DE results and run fgsea on each cluster’s ranked gene list

set.seed(123)
fgseaRes <- sapply(skin_de_split_ranks, 
    simplify = FALSE, 
    USE.NAMES = TRUE, 
    FUN = function(x) {
        fgsea(pathways = pathwaysToTest_split_list, 
              stats = x,
              minSize = 5, # Note this is the intersection of genes in the ranked list and genes in the pathway
              maxSize = 1000,
              nproc = 1
              )
        }
    )

11.8 Collapse all results into one tibble and filter for significance

Save the filtered output

bind_rows(fgseaRes,.id = "sub.cluster") %>%
    as_tibble() %>%
    mutate(leadingEdgeGenes = map_chr(leadingEdge, toString)) %>%
    dplyr::select(-leadingEdge) %>%
    dplyr::filter(padj < 0.05) %>%
    dplyr::arrange(sub.cluster,padj,NES) %>%
    write_tsv("Skin_scRNA_PostVsPre3rd_DiffExp_fgseaRes_sig.tsv")

11.12 Plot dot plot of NES and p-value for pathways of interest

bind_rows(fgseaRes,.id = "sub.cluster") %>%
    as_tibble() %>%
    mutate(leadingEdgeGenes = map_chr(leadingEdge, toString)) %>%
    dplyr::select(-leadingEdge) %>%
    dplyr::filter(pathway %in% topsigPathways) %>%
    left_join(enframe(topsigPathways,name = "PathwayGroup",value = "pathway"),by="pathway") %>%
    left_join(enframe(cluster_annot,name = "sub.cluster", value = "Cluster") %>% 
                mutate(sub.cluster = paste0("Cluster",sub.cluster)), 
              by = "sub.cluster") %>%
    mutate(logP = ifelse(padj < 0.05, -log10(padj), 0)) %>%
    ggplot(aes(x = fct_relevel(Cluster,cluster_annot[cluster_order][cluster_annot[cluster_order] %in% Cluster]), y = pathway)) +
    geom_point(aes(color = NES, size = logP, alpha = I(ifelse(logP < -log10(0.05), 0, 1)))) +
    scale_size_continuous("-log10 adj p", range = c(1, 10), breaks = c(1.3, 5, 10, 20, 30)) +
    scale_color_gradient2(low = "blue", mid = "white", high = "red", limits=c(-3,3), midpoint = 0, oob = scales::squish) +
    theme_bw() +
    facet_grid(fct_relevel(PathwayGroup,
                            c("Innate stimulation",
                              "Interferon signaling",
                              "Cytokine signaling",
                              "Antigen processing",
                              "Other recurring significant")
                           )~.,
               scales="free",
               switch="y",
               space="free") +
    scale_y_discrete(position = "right") +
    ylab("") +
    xlab("") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          strip.text.y.left = element_text(angle = 0))

11.13 Get session info

sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] GSEAtraining_0.99.1 fgsea_1.26.0        msigdbr_7.5.1      
 [4] lubridate_1.9.3     forcats_1.0.0       stringr_1.5.1      
 [7] dplyr_1.1.4         purrr_1.0.2         readr_2.1.5        
[10] tidyr_1.3.1         tibble_3.2.1        tidyverse_2.0.0    
[13] Seurat_5.1.0        SeuratObject_5.0.2  sp_2.1-3           
[16] ggplot2_3.5.1      

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3      jsonlite_1.8.8          magrittr_2.0.3         
  [4] spatstat.utils_3.0-4    farver_2.1.1            rmarkdown_2.25         
  [7] zlibbioc_1.46.0         vctrs_0.6.5             ROCR_1.0-11            
 [10] memoise_2.0.1           spatstat.explore_3.2-6  RCurl_1.98-1.14        
 [13] htmltools_0.5.7         sctransform_0.4.1       parallelly_1.37.0      
 [16] KernSmooth_2.23-22      htmlwidgets_1.6.4       ica_1.0-3              
 [19] plyr_1.8.9              cachem_1.0.8            plotly_4.10.4          
 [22] zoo_1.8-12              igraph_2.0.2            mime_0.12              
 [25] lifecycle_1.0.4         pkgconfig_2.0.3         Matrix_1.6-4           
 [28] R6_2.5.1                fastmap_1.1.1           GenomeInfoDbData_1.2.10
 [31] fitdistrplus_1.1-11     future_1.33.1           shiny_1.8.0            
 [34] digest_0.6.34           colorspace_2.1-0        S4Vectors_0.38.2       
 [37] AnnotationDbi_1.64.1    patchwork_1.3.0         tensor_1.5             
 [40] RSpectra_0.16-1         irlba_2.3.5.1           RSQLite_2.3.5          
 [43] labeling_0.4.3          progressr_0.14.0        fansi_1.0.6            
 [46] spatstat.sparse_3.0-3   timechange_0.3.0        httr_1.4.7             
 [49] polyclip_1.10-6         abind_1.4-5             compiler_4.3.1         
 [52] bit64_4.0.5             withr_3.0.0             BiocParallel_1.34.2    
 [55] DBI_1.2.2               fastDummies_1.7.3       MASS_7.3-60.0.1        
 [58] tools_4.3.1             lmtest_0.9-40           httpuv_1.6.14          
 [61] future.apply_1.11.1     goftest_1.2-3           glue_1.7.0             
 [64] nlme_3.1-164            promises_1.2.1          grid_4.3.1             
 [67] Rtsne_0.17              cluster_2.1.6           reshape2_1.4.4         
 [70] generics_0.1.3          gtable_0.3.4            spatstat.data_3.0-4    
 [73] tzdb_0.4.0              data.table_1.15.0       hms_1.1.3              
 [76] XVector_0.40.0          utf8_1.2.4              BiocGenerics_0.46.0    
 [79] spatstat.geom_3.2-8     RcppAnnoy_0.0.22        ggrepel_0.9.5          
 [82] RANN_2.6.1              pillar_1.9.0            vroom_1.6.5            
 [85] spam_2.10-0             babelgene_22.9          RcppHNSW_0.6.0         
 [88] later_1.3.2             splines_4.3.1           lattice_0.22-5         
 [91] bit_4.0.5               survival_3.5-8          deldir_2.0-2           
 [94] tidyselect_1.2.0        Biostrings_2.68.1       miniUI_0.1.1.1         
 [97] pbapply_1.7-2           knitr_1.45              gridExtra_2.3          
[100] IRanges_2.34.1          scattermore_1.2         stats4_4.3.1           
[103] xfun_0.42               Biobase_2.60.0          matrixStats_1.2.0      
[106] DT_0.31                 stringi_1.8.3           lazyeval_0.2.2         
[109] yaml_2.3.8              evaluate_0.23           codetools_0.2-19       
[112] cli_3.6.2               uwot_0.1.16             xtable_1.8-4           
[115] reticulate_1.35.0       munsell_0.5.0           GenomeInfoDb_1.36.4    
[118] Rcpp_1.0.12             globals_0.16.2          spatstat.random_3.2-2  
[121] png_0.1-8               parallel_4.3.1          ellipsis_0.3.2         
[124] blob_1.2.4              dotCall64_1.1-1         bitops_1.0-7           
[127] listenv_0.9.1           viridisLite_0.4.2       scales_1.3.0           
[130] ggridges_0.5.6          crayon_1.5.2            leiden_0.4.3.1         
[133] rlang_1.1.3             KEGGREST_1.40.1         fastmatch_1.1-4        
[136] cowplot_1.1.3