Hi F,
They can be reproduced using ggplot
and VennDiagram::draw.pairwise.venn
:
library(tidyverse)
library(VennDiagram)
library(GO.db)
# Grap some example from E. coli
gene2go <- read_tsv("https://www.uniprot.org/uniprot/?query=organism:83333&format=tab&columns=id,go-id")
colnames(gene2go) <- c("Gene", "GO")
DECs <- gene2go[sample(nrow(gene2go), 500),]
DEPs <- gene2go[sample(nrow(gene2go), 500),]
# Calcuate sets
sets <- calculate.overlap(x = list("DECs" = DECs$Gene,
"DEPs" = DEPs$Gene))
Overlap <- sets$a3
DECs_only <- setdiff(sets$a1, Overlap)
DEPs_only <- setdiff(sets$a2, Overlap)
df_sets <- rbind(
data.frame(Type = rep("Overlap", length(Overlap)), Gene = Overlap),
data.frame(Type = rep("DECs_only", length(DECs_only)), Gene = DECs_only),
data.frame(Type = rep("DEPs_only", length(DEPs_only)), Gene = DEPs_only)
)
# Combine with GO data and flatten GO
df_sets_go <- left_join(df_sets, gene2go, by = "Gene") %>% separate_rows(., "GO", sep = "; ")
df_sets_go$Description <- Term(df_sets_go$GO)
levels(df_sets_go$Type) <- as.vector(c("DECs", "DEPs", "Overlap"))
# Only look at top 20 GO terms
GO_top20 <- t(t(sort(table(df_sets_go$GO)))) %>% tail(20) %>% row.names()
# Barplot
ggplot(filter(df_sets_go, GO %in% GO_top20), aes(str_to_sentence(Description))) +
geom_bar(color = "black", aes(fill = Type)) +
coord_flip() +
theme_bw() +
scale_fill_manual(values = c(
"DECs" = "black",
"DEPs" = "white",
"Overlap" = "grey"
)) +
scale_y_continuous(expand = c(0, 0)) +
xlab("") +
ylab("Number of DECs or DEPs") +
theme(legend.position = "top",
legend.title = element_blank())
# Venn diagram for the whole sets (not only the genes in GO barplot)
draw.pairwise.venn(
area1 = length(DECs_only),
area2 = length(DEPs_only),
cross.area = length(Overlap),
category = c("DECs", "DEPs")
)
Hope it helps.
Divide it into its components:
Thank you; Supposing 100 DEGs , 200 DEPs and 70 overlap, they are being classified into different Terms so how I select which term for plotting?
I'm not sure what the message is behind that plot, what do you want to show?
The relationship between the transcriptome and proteome data