Hi DN99,
I think this may be what you need.
First download this file. It is the most current gene ontology database file: http://current.geneontology.org/annotations/goa_human.gaf.gz
Then extract the gzip file onto your Desktop. The following script will give you a master list of all GENE IDS and their associated GO IDs in a dataframe. Then you can use the merge()
function to merge the GO:IDS with your gene list data frame.
system("awk 'NR>=42' ~/Desktop/goa_human.gaf > ~/Desktop/goa_human_no_header.txt")
GO <-read.csv("~/Desktop/goa_human_no_header.txt", header=F, sep="\t")
GO$V4 <- NULL
GO$V7 <- NULL
GO$V8 <- NULL
GO$V1 <- NULL
GO$V6 <- NULL
GO$V10 <- NULL
GO$V13 <- NULL
GO$V14 <- NULL
GO$V16 <- NULL
GO$V17 <- NULL
GO$V12 <- NULL
GO$V15 <- NULL
GO$V2 <- NULL
GO$V9 <- NULL
GO$V11 <- NULL
colnames(GO) <- c("GENEID", "GOID")
If you want more information such as GO TERMS in a dataframe as well, you can use the follow script:
system("awk 'NR>=42' ~/Desktop/goa_human.gaf > ~/Desktop/goa_human_no_header.txt")
GO <-read.csv("~/Desktop/goa_human_no_header.txt", header=F, sep="\t")
BiocManager::install("GO.db")
library(GO.db)
GOdb <- as.data.frame(GOTERM)
GO$V4 <- NULL
GO$V7 <- NULL
GO$V8 <- NULL
GO$V1 <- NULL
GO$V6 <- NULL
GO$V10 <- NULL
GO$V13 <- NULL
GO$V14 <- NULL
GO$V16 <- NULL
GO$V17 <- NULL
GO$V12 <- NULL
GO$V15 <- NULL
GO$V2 <- NULL
GO$V9 <- NULL
GO$V11 <- NULL
colnames(GO) <- c("GENEID", "GOID")
colnames(GOdb)[1] <- c("GOID")
GOdb <- head(GOdb,-1)
GENESwithGO <- merge(GO, GOdb, by = "GOID")
rm(GOdb, GO)
GENESwithGO$go_id <- NULL
This should create a master data frame for you of GO IDs and their gene ontology terms. It might be overkill for your purposes, but just throwing this here.
Hope this helps!