Training reference model for pseudotime using ProjectSVR

To be note, we can build reference model on not only UMAP embeddings, but also any biological meaningful continuous variable, such as pseudotime via ProjectSVR. Here, we show how to build supported vector regression (SVR) model for pseudotime of germ cells in mTCA.

library(Seurat)
library(ProjectSVR)
library(tidyverse)

options(timeout = max(3600, getOption("timeout")))

# reference model
if (!dir.exists("models")) dir.create("models")
download.file(url = "https://zenodo.org/record/8350732/files/model.mTCA.rds", 
              destfile = "models/model.mTCA.rds")
# reference atlas
if (!dir.exists("reference")) dir.create("reference")
download.file(url = "https://zenodo.org/record/8350746/files/mTCA.seurat.slim.qs", 
              destfile = "reference/mTCA.seurat.slim.qs")

# query data
if (!dir.exists("query")) dir.create("query")
download.file(url = "https://zenodo.org/record/8350748/files/query_Ythdc2-KO.seurat.slim.qs", 
              destfile = "query/query_Ythdc2-KO.seurat.slim.qs")

Build SVR Model of Pseudotime

reference <- readRDS("models/model.mTCA.rds")
seu.ref <- qs::qread("reference/mTCA.seurat.slim.qs")
## subset germ cells
germ.cells <- rownames(subset(seu.ref@meta.data, !is.na(Pseudotime_dm)))
seu.ref <- subset(seu.ref, cells = germ.cells)
seu.ref

## An object of class Seurat 
## 32285 features across 97986 samples within 1 assay 
## Active assay: RNA (32285 features, 2000 variable features)
##  1 dimensional reduction calculated: umap

FeaturePlot(seu.ref, reduction = "umap", features = "Pseudotime_dm", raster = TRUE) + 
  scale_color_viridis_c()

## gene set scoring
top.genes <- reference$genes$gene.sets
seu.ref <- ComputeModuleScore(seu.ref, gene.sets = top.genes, method = "AUCell", cores = 10)
Assays(seu.ref)

## [1] "RNA"            "SignatureScore"

DefaultAssay(seu.ref) <- "SignatureScore"
gss.mat <- FetchData(seu.ref, vars = rownames(seu.ref))

## training model
pst.mat <- FetchData(seu.ref, vars = "Pseudotime_dm")
colnames(pst.mat) <- "pseudotime_mTCA"
pst.model <- FitEnsembleSVM(feature.mat = gss.mat, emb.mat = pst.mat, n.models = 20, cores = 10)

## save model to reference object
reference$models$pseudotime <- pst.model

saveRDS(reference, "models/model.mTCA.v2.rds")

Reference Mapping

seu.q <- qs::qread("query/query_Ythdc2-KO.seurat.slim.qs")

## map query
seu.q <- ProjectSVR::MapQuery(seu.q, reference = reference, add.map.qual = T, ncores = 10)

## label transfer
seu.q <- subset(seu.q, mapQ.p.adj < 0.1)
seu.q <- ProjectSVR::LabelTransfer(seu.q, reference, ref.label.col = "cell_type")

## majority votes
feature.mat <- FetchData(seu.q, vars = rownames(seu.q))
cell.types <- FetchData(seu.q, vars = c("knn.pred.celltype"))
knn.pred.mv <- MajorityVote(feature.mat = feature.mat, cell.types = cell.types, k = 100, min.prop = 0.3)
seu.q$knn.pred.celltype.major_votes <- knn.pred.mv$knn.pred.celltype.major_votes

## predict the pseudotime
gss.mat.q <- FetchData(seu.q, vars = rownames(seu.q))
proj.res <- ProjectNewdata(feature.mat = gss.mat.q, model = pst.model, cores = 10)

## save results to the original seurat object
seu.q$pseudotime.pred <- proj.res@embeddings[,1]

query.plot <- FetchData(seu.q, vars = c("genotype", "pseudotime.pred", "knn.pred.celltype.major_votes"))
colnames(query.plot)[3] <- "celltype"

# remove somatic cells
query.plot <- subset(query.plot, celltype != "m24-PTM_Myh11") 
 
cal_cum <- . %>% 
  arrange(pseudotime.pred) %>% 
  mutate(rank = order(pseudotime.pred)) %>% 
  mutate(cum = rank/max(rank))

ctrl_cum <- query.plot %>% subset(genotype == "WT") %>% cal_cum()
test_cum <- query.plot %>% subset(genotype == "Ythdc2-KO") %>% cal_cum()

segment.plot <- query.plot %>% group_by(celltype) %>% 
  summarise(min.pt = quantile(pseudotime.pred, 0.3),
            max.pt = quantile(pseudotime.pred, 0.7)) %>% 
  arrange(min.pt)
N <- nrow(segment.plot)

data.plot <- rbind(ctrl_cum, test_cum) 

p1 <- ggplot(data.plot, aes(pseudotime.pred, cum, color = celltype)) + geom_point()

data.plot %>% 
  ggplot(aes(pseudotime.pred, cum)) + 
  geom_jitter(inherit.aes = F, data = subset(data.plot, genotype == 'WT'), 
              aes(pseudotime.pred, -0.05, color = celltype, show.legend = TRUE), 
              height = 0.05, size = .5, alpha = 1, show.legend = F) +
  geom_jitter(inherit.aes = F, data = subset(data.plot, genotype == 'Ythdc2-KO'), 
              aes(pseudotime.pred, -0.2, color = celltype), 
              height = 0.05, size = .5, alpha = 1, show.legend = F) +
  annotate("text", x = c(38,38), y = c(-0.05, -0.2), label = c("WT", "KO")) + 
  geom_line(aes(linetype=genotype)) + 
  geom_vline(xintercept = 53, linetype="dashed", color="blue") +
  labs(x = "Pseudotime", y = "Cumulative proportion of cells") + 
  theme_bw(base_size = 15) + 
  theme(legend.title = element_blank(),
        legend.position = c(0.2, 0.85),
        legend.background = element_rect(size=.5, color="black")) + 
  cowplot::get_legend(p1)

The pseudotime characterizes the progress of spermatogenesis in mTCA, through the distribution of the predicted pseudotime between WT and Ythdc2-KO germ cells, we can interpret that the Ythdc2-KO germ cells arrest at the pre-leptotene stage at transcriptome level.

Session Info

sessionInfo()

## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.9.2    forcats_1.0.0      stringr_1.5.0      dplyr_1.1.3       
##  [5] purrr_1.0.2        readr_2.1.4        tidyr_1.3.0        tibble_3.2.1      
##  [9] ggplot2_3.4.3      tidyverse_2.0.0    ProjectSVR_0.2.0   SeuratObject_4.1.3
## [13] Seurat_4.3.0.1    
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3              scattermore_1.2            
##   [3] prabclus_2.3-2              R.methodsS3_1.8.2          
##   [5] ragg_1.2.5                  bit64_4.0.5                
##   [7] knitr_1.43                  DelayedArray_0.20.0        
##   [9] R.utils_2.12.2              irlba_2.3.5.1              
##  [11] data.table_1.14.8           KEGGREST_1.34.0            
##  [13] RCurl_1.98-1.12             doParallel_1.0.17          
##  [15] generics_0.1.3              BiocGenerics_0.40.0        
##  [17] cowplot_1.1.1               RSQLite_2.3.1              
##  [19] RApiSerialize_0.1.2         RANN_2.6.1                 
##  [21] future_1.33.0               bit_4.0.5                  
##  [23] tzdb_0.4.0                  spatstat.data_3.0-1        
##  [25] httpuv_1.6.11               SummarizedExperiment_1.24.0
##  [27] xfun_0.40                   hms_1.1.3                  
##  [29] jquerylib_0.1.4             evaluate_0.21              
##  [31] promises_1.2.1              DEoptimR_1.1-2             
##  [33] fansi_1.0.4                 igraph_1.5.1               
##  [35] DBI_1.1.3                   htmlwidgets_1.6.2          
##  [37] spatstat.geom_3.2-5         stats4_4.1.2               
##  [39] ellipsis_0.3.2              mlr3data_0.7.0             
##  [41] backports_1.4.1             annotate_1.72.0            
##  [43] MatrixGenerics_1.6.0        RcppParallel_5.1.7         
##  [45] deldir_1.0-9                vctrs_0.6.3                
##  [47] Biobase_2.54.0              here_1.0.1                 
##  [49] ROCR_1.0-11                 abind_1.4-5                
##  [51] cachem_1.0.8                withr_2.5.0                
##  [53] mlr3verse_0.2.8             mlr3learners_0.5.6         
##  [55] robustbase_0.99-0           progressr_0.14.0           
##  [57] checkmate_2.2.0             sctransform_0.3.5          
##  [59] mlr3fselect_0.11.0          mclust_6.0.0               
##  [61] goftest_1.2-3               cluster_2.1.2              
##  [63] lazyeval_0.2.2              crayon_1.5.2               
##  [65] spatstat.explore_3.2-3      pkgconfig_2.0.3            
##  [67] labeling_0.4.3              GenomeInfoDb_1.30.1        
##  [69] nlme_3.1-155                nnet_7.3-17                
##  [71] rlang_1.1.1                 globals_0.16.2             
##  [73] diptest_0.76-0              lifecycle_1.0.3            
##  [75] miniUI_0.1.1.1              palmerpenguins_0.1.1       
##  [77] rprojroot_2.0.3             polyclip_1.10-4            
##  [79] matrixStats_1.0.0           lmtest_0.9-40              
##  [81] graph_1.72.0                Matrix_1.6-1               
##  [83] zoo_1.8-12                  ggridges_0.5.4             
##  [85] GlobalOptions_0.1.2         png_0.1-8                  
##  [87] viridisLite_0.4.2           rjson_0.2.21               
##  [89] stringfish_0.15.8           bitops_1.0-7               
##  [91] R.oo_1.25.0                 KernSmooth_2.23-20         
##  [93] Biostrings_2.62.0           blob_1.2.4                 
##  [95] shape_1.4.6                 paradox_0.11.1             
##  [97] parallelly_1.36.0           spatstat.random_3.1-6      
##  [99] S4Vectors_0.32.4            scales_1.2.1               
## [101] memoise_2.0.1               GSEABase_1.56.0            
## [103] magrittr_2.0.3              plyr_1.8.8                 
## [105] ica_1.0-3                   zlibbioc_1.40.0            
## [107] compiler_4.1.2              RColorBrewer_1.1-3         
## [109] clue_0.3-64                 fitdistrplus_1.1-11        
## [111] cli_3.6.1                   XVector_0.34.0             
## [113] mlr3tuningspaces_0.4.0      mlr3filters_0.7.1          
## [115] listenv_0.9.0               patchwork_1.1.3            
## [117] pbapply_1.7-2               MASS_7.3-55                
## [119] mlr3hyperband_0.4.5         tidyselect_1.2.0           
## [121] stringi_1.7.12              textshaping_0.3.6          
## [123] highr_0.10                  yaml_2.3.7                 
## [125] ggrepel_0.9.3               grid_4.1.2                 
## [127] sass_0.4.7                  tools_4.1.2                
## [129] timechange_0.2.0            mlr3misc_0.12.0            
## [131] future.apply_1.11.0         parallel_4.1.2             
## [133] mlr3cluster_0.1.8           circlize_0.4.15            
## [135] rstudioapi_0.15.0           uuid_1.1-1                 
## [137] qs_0.25.5                   foreach_1.5.2              
## [139] AUCell_1.16.0               gridExtra_2.3              
## [141] farver_2.1.1                Rtsne_0.16                 
## [143] digest_0.6.33               shiny_1.7.5                
## [145] fpc_2.2-10                  Rcpp_1.0.11                
## [147] GenomicRanges_1.46.1        later_1.3.1                
## [149] RcppAnnoy_0.0.21            httr_1.4.7                 
## [151] AnnotationDbi_1.56.2        mlr3mbo_0.2.1              
## [153] mlr3tuning_0.19.0           ComplexHeatmap_2.10.0      
## [155] kernlab_0.9-32              colorspace_2.1-0           
## [157] XML_3.99-0.14               fs_1.6.3                   
## [159] tensor_1.5                  reticulate_1.31            
## [161] IRanges_2.28.0              splines_4.1.2              
## [163] lgr_0.4.4                   uwot_0.1.16                
## [165] bbotk_0.7.2                 spatstat.utils_3.0-3       
## [167] pkgdown_2.0.7               sp_2.0-0                   
## [169] mlr3pipelines_0.5.0-1       flexmix_2.3-19             
## [171] plotly_4.10.2               systemfonts_1.0.4          
## [173] xtable_1.8-4                jsonlite_1.8.7             
## [175] modeltools_0.2-23           R6_2.5.1                   
## [177] pillar_1.9.0                htmltools_0.5.6            
## [179] mime_0.12                   glue_1.6.2                 
## [181] fastmap_1.1.1               mlr3_0.16.1                
## [183] class_7.3-20                codetools_0.2-18           
## [185] spacefillr_0.3.2            utf8_1.2.3                 
## [187] lattice_0.20-45             bslib_0.5.1                
## [189] spatstat.sparse_3.0-2       leiden_0.4.3               
## [191] mlr3viz_0.6.1               survival_3.2-13            
## [193] rmarkdown_2.24              desc_1.4.2                 
## [195] munsell_0.5.0               GetoptLong_1.0.5           
## [197] GenomeInfoDbData_1.2.7      iterators_1.0.14           
## [199] reshape2_1.4.4              gtable_0.3.4

Compiled: November 06, 2023

Download Related Dataset

Build SVR Model of Pseudotime

Reference Mapping