Question

calculating p-value for GO terms

0

Entering edit mode

19 months ago

azadeh.taati • 0

Hello scholars I have a list of GO ID, GO term, and gene ID. I want to analyze gene enrichment via topGO. but I found out, I need the p-values for my gene list. Through which package or script can I get the p-value for my gene list?

GO_ID • 1.2k views

ADD COMMENT • link updated 16 months ago by GenoMax 151k • written 19 months ago by azadeh.taati • 0

GenoMax · Answer 1 · 2023-11-07

1

Entering edit mode

19 months ago

Philipp Bayer 8.8k

You don't necessarily need p-values for TopGO, see this previous answer, specifically to look at the 'Predefined list of interesting genes' With No P Values Of Gene Difference Expression, How Could I Do Go Analysis With R Package Gostats? (in fact, there are many similar questions to yours on biostars :) )

ADD COMMENT • link 19 months ago by Philipp Bayer 8.8k

0

Entering edit mode

thank you so much I used the following script:

library(topGO)
library(AnnotationDbi)
library(S4Vectors)
library(ALL)
library(Rgraphviz)
library(ggplot2)
library(RColorBrewer)
library(wesanderson)
library(viridis)
library(tidyverse)
data(ALL)
data(geneList)
setwd("C:/Users/azadeh/Desktop/PHD_thesis/resaults/topGO")
gene.go <- read.delim("GO_Gene.txt", stringsAsFactors = F)

gene.go <- gene.go[which(gene.go$GO != ""),] # take out genes without GO terms
head(gene.go)
gene2GO <- tapply(gene.go$GO, gene.go$geneID, function(x)x)
head(gene2GO)


#gene list of chromosome ...###
#Enter the gene of the desired list as a list of characters 
x <- c(1:6)
print(x)
#Enter the gene of the desired list as a list of characters 
##belowe methods make a vector from your gene list because GOdata needs vector format to calculate GO enrichment
names(x) <-  c ("CGRA01v4_09953",
                "CGRA01v4_09952",
                "CGRA01v4_09951",
                "CGRA01v4_09950",
                "CGRA01v4_09949",
                "CGRA01v4_09948")


all_genes<- x
print(all_genes)



#prepare GOdata according to Biological Process####

GOdata <- new("topGOdata",
              ontology = "BP",
              allGenes = all_genes,
              geneSelectionFun = function(x)x,
              annot =  annFUN.gene2GO, gene2GO=gene2GO)
print(GOdata)

resultFisher <- runTest(GOdata, algorithm = "elim", statistic = "fisher")

tab <- GenTable(GOdata, raw.p.value = resultFisher, topNodes = length(resultFisher@score),
                numChar = 120)
head(tab)

# Kolmogorov-Smirnov test for Biological Process####
resultKS <- runTest(GOdata, algorithm = "weight01", statistic = "ks")
tabKS <- GenTable(GOdata, raw.p.value = resultKS, topNodes = length(resultKS@score), numChar = 120)
head(tabKS, 50)
write.csv(tabKS, 'C:/Users/azadeh/Desktop/PHD_thesis/resaults/topGO/chr6_KSresults.csv')

resultKS_1 <- runTest(GOdata, algorithm = "classic", statistic = "ks")
resultKS.elim <- runTest(GOdata, algorithm = "elim", statistic = "ks")
allRes <- GenTable(GOdata, classicFisher = resultFisher, classicKS = resultKS.elim, elimKS = resultKS.elim, orderBy = "elimKS", ranksOf = "classicFisher", topNodes = 10)
head(allRes, 70)
write.csv(allRes, 'C:/Users/azadeh/Desktop/PHD_thesis/resaults/topGO/chr6_elim.KSresults.csv')

par(cex = 0.3)
showSigOfNodes(GOdata, score(resultKS.elim), firstSigNodes = 20, useInfo = "all")

#visualize GOTerms####
goEnrichment <- GenTable(
  GOdata,
  resultKS.elim = resultKS.elim,
  orderBy = "resultKS",
  topNodes = length(resultKS.elim@score),
  numChar = 82)

goEnrichment$resultKS.elim <- as.numeric(goEnrichment$resultKS.elim)
goEnrichment <- goEnrichment[goEnrichment$resultKS.elim < 1,] # filter terms for Fisher p<0.1
goEnrichment <- goEnrichment[,c("GO.ID","Term","resultKS.elim")]

# Create a data frame
goEnrichment <- as.data.frame(goEnrichment)
# Use mutate to add a new column
goEnrichment <- goEnrichment %>% mutate(ontology = c("BP"))
print(goEnrichment)

write.csv(goEnrichment, 'C:/Users/azadeh/Desktop/PHD_thesis/resaults/topGO/chr6_goEnrichment.csv')





ntop <- 20
ggdata <- goEnrichment[1:ntop,]
ggdata$Term <- factor(ggdata$Term, levels = rev(ggdata$Term)) # fixes order
gg1 <- ggplot(ggdata,
              aes(x = Term, y = -log10(resultKS.elim), size =frequency(Term), fill = -log10(resultKS.elim)))+
  expand_limits(y = 1) +
  geom_point(shape = 21) +
  scale_size(range = c(2.5,12.5)) +
  scale_fill_continuous(low = 'royalblue', high = 'red4') +

  xlab('') + ylab('Enrichment score') +
  labs(
    title = 'GO Biological processes chromosome 6',
    subtitle = 'Top 20 terms ordered by Kolmogorov-Smirnov p-value',
    caption = 'Cut-off lines drawn at equivalents of p=0.01, p=0.001, p=0.0001') +

  geom_hline(yintercept = c(-log10(0.01), -log10(0.001), -log10(0.0001)),
             linetype = c("dotted", "longdash", "solid"),
             colour = c("black", "black", "black"),
             size = c(0.5, 1.5, 3)) +

  theme_bw(base_size = 24) +
  theme(
    legend.position = 'right',
    legend.background = element_rect(),
    plot.title = element_text(angle = 0, size = 16, face = 'bold', vjust = 1),
    plot.subtitle = element_text(angle = 0, size = 14, face = 'bold', vjust = 1),
    plot.caption = element_text(angle = 0, size = 12, face = 'bold', vjust = 1),

    axis.text.x = element_text(angle = 0, size = 12, face = 'bold', hjust = 1.10),
    axis.text.y = element_text(angle = 0, size = 12, face = 'bold', vjust = 0.5),
    axis.title = element_text(size = 12, face = 'bold'),
    axis.title.x = element_text(size = 12, face = 'bold'),
    axis.title.y = element_text(size = 12, face = 'bold'),
    axis.line = element_line(colour = 'black'),

    #Legend
    legend.key = element_blank(), # removes the border
    legend.key.size = unit(2, "cm"), # Sets overall area/size of the legend
    legend.text = element_text(size = 14, face = "bold"), # Text size
    title = element_text(size = 14, face = "bold")) +

  coord_flip()

print(gg1)

ADD REPLY • link updated 16 months ago by GenoMax 151k • written 16 months ago by azadeh.taati • 0