Entering edit mode
5.2 years ago
Adeler001
•
0
I'm doing an RNA seq analysis and I'm trying to show my results using a volcano plot on R studio. I used the the follow script to make my volcano plot on R studio:
#Import count table#
countdata <- read.table("family1301RNA-seq.countsfixed.txt", header=TRUE, row.names=1)
#Convert to matrix#
countdata <- as.matrix(countdata) head(countdata)
#Assign condition (first four are controls, second four and third four contain two different experiments)#
condition<-factor(c("unaffected","unaffected","unaffected","affected","affected","affected"),levels=c("unaffected","affected")) subject <- factor(c("1","2","3","3","2","1"))
library(DESeq2)
#Create a coldata frame and instantiate the DESeqDataSet#
coldata <- data.frame(row.names=colnames(countdata), subject, condition)
dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~ subject + condition) dds
#pre-filtering to keep only rows that have at least 1 reads total#
keep <- rowSums(counts(dds)) > 1 dds <- dds[keep,]
#Run the DESeq#
dds <- DESeq(dds)
#Regularized log transformation for clustering/heatmaps#
rld <- rlogTransformation(dds) head(assay(rld)) hist(assay(rld))
#Colors for plots below#
library(RColorBrewer) (mycols <- brewer.pal(8, "Dark2")[1:length(unique(condition))])
#Sample distance heatmap#
sampleDists <- as.matrix(dist(t(assay(rld)))) library(gplots) png("qc-heatmap_baker.png", w=1000, h=1000, pointsize=20) heatmap.2(as.matrix(sampleDists), key=F, trace="none", col=colorpanel(100, "black", "white"), ColSideColors=mycols[condition], RowSideColors=mycols[condition], margin=c(10, 10), main="Sample Distance Matrix") dev.off()
#Principal components analysis#
rldpca <- function (rld, intgroup = "condition", ntop = 500, colors=NULL, legendpos="bottomleft", main="PCA Biplot", textcx=1, ...) { require(genefilter) require(calibrate) require(RColorBrewer) rv = rowVars(assay(rld)) select = order(rv, decreasing = TRUE)[seqlen(min(ntop, length(rv)))] pca = prcomp(t(assay(rld)[select, ])) fac = factor(apply(as.data.frame(colData(rld)[, intgroup, drop = FALSE]), 1, paste, collapse = " : ")) if (is.null(colors)) { if (nlevels(fac) >= 3) { colors = brewer.pal(nlevels(fac), "Paired") } else { colors = c("black", "red") } } pc1var <- round(summary(pca)$importance[2,1]100, digits=1) pc2var <- round(summary(pca)$importance[2,2]100, digits=1) pc1lab <- paste0("PC1 (",as.character(pc1var),"%)") pc2lab <- paste0("PC2 (",as.character(pc2var),"%)") plot(PC2~PC1, data=as.data.frame(pca$x), bg=colors[fac], pch=21, xlab=pc1lab, ylab=pc2lab, main=main, ...) with(as.data.frame(pca$x), textxy(PC1, PC2, labs=rownames(as.data.frame(pca$x)), cex=textcx)) legend(legendpos, legend=levels(fac), col=colors, pch=20)
png("qc-pca.png", 1000, 1000, pointsize=20) rld_pca(rld, colors=mycols, intgroup="condition", xlim=c(-20, 20)) dev.off()
#Get differential expression results#
res <- results(dds) table(res$padj<0.05)
#Order by adjusted p-value#
res <- res[order(res$padj), ]
#Merge with normalized count data#
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE) names(resdata)[1] <- "Gene" head(resdata)
#get significant results (FDR<0.05)
Write results#
write.csv(resdata, file="sig_diffexpr-results.csv")
#Volcano plot with significant DE genes#
volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) { with(res, plot(log2FoldChange, -log10(padj), pch=20, main=main, ...)) with(subset(res, padj<sigthresh ),="" points(log2foldchange,="" -log10(padj),="" pch="20," col="red" ,="" ...))="" with(subset(res,="" abs(log2foldchange)>lfcthresh),="" points(log2foldchange,="" -log10(padj),="" pch="20," col="orange" ,="" ...))="" with(subset(res,="" padj<sigthresh="" &="" abs(log2foldchange)>lfcthresh),="" points(log2foldchange,="" -log10(padj),="" pch="20," col="green" ,="" ...))="" if="" (labelsig)="" {="" require(calibrate)="" with(subset(res,="" padj<sigthresh="" &="" abs(log2foldchange)>lfcthresh),="" textxy(log2foldchange,="" -log10(padj),="" labs="Gene," cex="textcx," ...))="" }="" legend(legendpos,="" xjust="1," yjust="1," legend="c(paste("FDR<",sigthresh,sep="")," paste("|logfc|>",lfcthresh,sep="" ),="" "both"),="" pch="20," col="c("red","orange","green"))" }="" png("diffexpr-volcanoplot.png",="" 1200,="" 1000,="" pointsize="20)" volcanoplot(resdata,="" lfcthresh="1," sigthresh="0.05," textcx=".8," xlim="c(-3," 3))="" dev.off()<="" p="">
I am novice R studio user , the issue I'm having is that the gene name labels displayed on my Volcano plot overlap, making them unreadable how can I prevent this overlap of the gene labels?
Please use the formatting bar (especially the
code
option) to present your post better. I've done it for you this time.Thank you!
Hello Adeler001!
It appears that your post has been cross-posted to another site: https://support.bioconductor.org/p/124173/
This is typically not recommended as it runs the risk of annoying people in both communities.