Sorry for lateness,
I wanted to do something similar. This is what I did for reference:
Using a Seurat generated gene list for input into ClusterProfiler to see the GO or KEGG terms per cluster.
I'll keep the meat and potatoes of the Seurat vignette in this tutorial:
library(dplyr)
library(Seurat)
library(patchwork)
# Load the PBMC dataset
SeuratData::InstallData("pbmc3k")
pbmc <- SeuratData::LoadData("pbmc3k")
# Initialize the Seurat object with the raw (non-normalized data).
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
#Without graph.name argument
pbmc <- FindNeighbors(pbmc, graph.name ="test", dims = 1:10)
pbmc <- FindClusters(pbmc, graph.name = "test", algorithm = 1, resolution = 0.5)
#pbmc <- FindNeighbors(pbmc, dims = 1:10)
#pbmc <- FindClusters(pbmc, algorithm = 1, resolution = 0.5)
pbmc <- RunUMAP(pbmc, dims = 1:10)
DimPlot(pbmc, reduction = "umap", label = TRUE, label.size = 6)
pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 0.25)
pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
##################################################################
#Subsetting top 100 markers with adjusted p values lower than .05#
##################################################################
top100 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 100, wt = avg_log2FC)
top100pval <- subset(top100, rowSums(top100[5] < 0.05) > 0)
Now for that you have your dataframe with top ~100 genes per cluster with a pvalue adjusted of lower than .05. We can bring this to ClusterProfiler.
#ClusterProfiler
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("clusterProfiler")
BiocManager::install("org.Hs.eg.db")
BiocManager::install("AnnotationHub")
library("clusterProfiler")
library("org.Hs.eg.db")
library("AnnotationHub")
df <- top100pval[,7:6]
dfsample <- split(df$gene,df$cluster)
length(dfsample)
#The output of length(dfsample) returns how many clusters you have
#Here there at 9 clusters (0, 1, 2, 3, 4, 5, 6, 7 and 8)
#I'm sure there's a better way but you have to make a line like below for each cluster
dfsample$`0` = bitr(dfsample$`0`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`1` = bitr(dfsample$`1`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`2` = bitr(dfsample$`2`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`3` = bitr(dfsample$`3`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`4` = bitr(dfsample$`4`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`5` = bitr(dfsample$`5`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`6` = bitr(dfsample$`6`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`7` = bitr(dfsample$`7`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`8` = bitr(dfsample$`8`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
#do the same here, a line like below for each cluster
genelist <- list("0" = dfsample$`0`$ENTREZID,
"1" = dfsample$`1`$ENTREZID,
"2" = dfsample$`2`$ENTREZID,
"3" = dfsample$`3`$ENTREZID,
"4" = dfsample$`4`$ENTREZID,
"5" = dfsample$`5`$ENTREZID,
"6" = dfsample$`6`$ENTREZID,
"7" = dfsample$`7`$ENTREZID,
"8" = dfsample$`8`$ENTREZID
)
GOclusterplot <- compareCluster(geneCluster = genelist, fun = "enrichGO", OrgDb = "org.Hs.eg.db")
dotplot(GOclusterplot)
KEGGclusterplot <- compareCluster(geneCluster = genelist, fun = "enrichKEGG")
dotplot(KEGGclusterplot)
Note: the KEGG plot doesn't show the cluster 8 for some reason. I'm thinking because it wasn't enriched for KEGG terms? Cluster 8 was also a tiny cluster... (See UMAP) Regardless, a quick and dirty way to visualize the enriched GO & KEGG terms per cluster.
Good luck!
the deg gene list from seurat
gene list input to clusterProfiler compareCluster
x1 x2 x3
Id2 Id2 Mybbp1a
Fabp3 Fabp3 Nsmce1
Id1 Id1 Ubl4a
Bex1 Bex1 Clint1
Bex3 Bex3
Tceal9 Tceal9