Okay, I found a round about way to solve this, at least for my scenario.
The gmt and fgsea information I'm using can only read gene symbols (e.g. "TRY") or entrez IDs. So I wrote a function to convert all of the information I had to either symbols or entrez IDs. The code is:
reads <- as.matrix(read.csv("genereads_ONLY4.txt", sep = '\t', row.names = 1, header = TRUE))
genes <- row.names(reads)
sum(lengths(regmatches(genes, gregexpr("\\AT[0-9]", genes, ignore.case = TRUE))))
#genes <- c("TRY", "AT2G46410", "AT5G41315", "AT2G42200", "AT1G10280")
IDconvert <- function(genes) {
for (i in genes){
if (grepl("AT[0-9]", i) == TRUE) {
if is.na(getSYMBOL(i, data='org.At.tair.db')) == TRUE) {
if is.na(getEG(i, data='org.At.tair')) == TRUE) {
i <- i
} else{
name <- getEG(i, data='org.At.tair')
name.l <- as.list(name)
newname <- as.character(name.l[[1]])
genes <- sub(i, newname, genes)
}
} else{
name <- getSYMBOL(i, data='org.At.tair')
name.l <- as.list(name)
newname <- as.character(name.l[[1]])
genes <- sub(i, newname, genes)
}
} else{
NULL
}
}
return(genes)
}
genes2 <- IDconvert(genes)
sum(lengths(regmatches(genes2, gregexpr("\\AT[0-9]", genes2, ignore.case = TRUE))))
row.names(reads) <- genes2
gmt <- read.gmt("GSEA_BIO.gmt")
gmt.ids <- read.gmt("IB_BIO_GMT.gmt")
gmt.combo <- c(gmt, gmt.ids)
#Stage 3 GO terms
names3 <- row.names(sub.break3)
sub.break3$names=names3
ranks <- sub.break3$stat
names(ranks) <- sub.break3$names
sub.break3.rank <- sort(ranks, decreasing = T)
fgseaRes3 <- fgsea(pathways = gmt.combo,
stats = sub.break3.rank,
minSize=5,
maxSize=500,
nperm=100000)
fgsea3.sig <- fgseaRes3[pval < 0.05]
pathways.stg3 <- fgsea3.sig$pathway
#Stage 1 GO terms
names1 <- row.names(sub.break1)
sub.break1$names=names1
ranks <- sub.break1$stat
names(ranks) <- sub.break1$names
sub.break1.rank <- sort(ranks, decreasing = T)
fgseaRes1 <- fgsea(pathways = gmt.combo,
stats = sub.break1.rank,
minSize=5,
maxSize=500,
nperm=100000)
fgsea1.sig <- fgseaRes1[pval < 0.05]
pathways.stg1 <- fgsea1.sig$pathway
#Stage 2 GO terms
names2 <- row.names(sub.break2)
sub.break2$names=names2
ranks <- sub.break2$stat
names(ranks) <- sub.break2$names
sub.break2.rank <- sort(ranks, decreasing = T)
fgseaRes2 <- fgsea(pathways = gmt.combo,
stats = sub.break2.rank,
minSize=5,
maxSize=500,
nperm=100000)
fgsea2.sig <- fgseaRes2[pval < 0.05]
pathways.stg2 <- fgsea2.sig$pathway
#Stage 4 GO terms
names4 <- row.names(sub.break4)
sub.break4$names=names4
ranks <- sub.break4$stat
names(ranks) <- sub.break4$names
sub.break4.rank <- sort(ranks, decreasing = T)
fgseaRes4 <- fgsea(pathways = gmt.combo,
stats = sub.break4.rank,
minSize=5,
maxSize=500,
nperm=100000)
fgsea4.sig <- fgseaRes4[pval < 0.05]
pathways.stg4 <- fgsea4.sig$pathway
#openxlsx::write.xlsx(fgsea4.sig, "fgsea_stg4_t1.xlsx")
#GO Venn-----------------------------------
group.venn(list(One = pathways.stg1,
Two = pathways.stg2,
Three = pathways.stg3,
Four = pathways.stg4),
fill = c("orange", "green", "red", "blue"))