Entering edit mode
5.5 years ago
Adeler001
•
0
I'm doing an RNA seq analysis and I'm trying to show my results using a volcano plot on R studio. I used the the follow script to make my volcano plot on R studio:
#Import count table#
countdata <- read.table("family1301RNA-seq.countsfixed.txt", header=TRUE, row.names=1)
#Convert to matrix#
countdata <- as.matrix(countdata) head(countdata)
#Assign condition (first four are controls, second four and third four contain two different experiments)#
condition<-factor(c("unaffected","unaffected","unaffected","affected","affected","affected"),levels=c("unaffected","affected")) subject <- factor(c("1","2","3","3","2","1"))
library(DESeq2)
#Create a coldata frame and instantiate the DESeqDataSet#
coldata <- data.frame(row.names=colnames(countdata), subject, condition)
dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~ subject + condition) dds
#pre-filtering to keep only rows that have at least 1 reads total#
keep <- rowSums(counts(dds)) > 1 dds <- dds[keep,]
#Run the DESeq#
dds <- DESeq(dds)
#Regularized log transformation for clustering/heatmaps#
rld <- rlogTransformation(dds) head(assay(rld)) hist(assay(rld))
#Colors for plots below#
library(RColorBrewer) (mycols <- brewer.pal(8, "Dark2")[1:length(unique(condition))])
#Sample distance heatmap#
sampleDists <- as.matrix(dist(t(assay(rld)))) library(gplots) png("qc-heatmap_baker.png", w=1000, h=1000, pointsize=20) heatmap.2(as.matrix(sampleDists), key=F, trace="none", col=colorpanel(100, "black", "white"), ColSideColors=mycols[condition], RowSideColors=mycols[condition], margin=c(10, 10), main="Sample Distance Matrix") dev.off()
#Principal components analysis#
rldpca <- function (rld, intgroup = "condition", ntop = 500, colors=NULL, legendpos="bottomleft", main="PCA Biplot", textcx=1, ...) { require(genefilter) require(calibrate) require(RColorBrewer) rv = rowVars(assay(rld)) select = order(rv, decreasing = TRUE)[seqlen(min(ntop, length(rv)))] pca = prcomp(t(assay(rld)[select, ])) fac = factor(apply(as.data.frame(colData(rld)[, intgroup, drop = FALSE]), 1, paste, collapse = " : ")) if (is.null(colors)) { if (nlevels(fac) >= 3) { colors = brewer.pal(nlevels(fac), "Paired") } else { colors = c("black", "red") } } pc1var <- round(summary(pca)$importance[2,1]100, digits=1) pc2var <- round(summary(pca)$importance[2,2]100, digits=1) pc1lab <- paste0("PC1 (",as.character(pc1var),"%)") pc2lab <- paste0("PC2 (",as.character(pc2var),"%)") plot(PC2~PC1, data=as.data.frame(pca$x), bg=colors[fac], pch=21, xlab=pc1lab, ylab=pc2lab, main=main, ...) with(as.data.frame(pca$x), textxy(PC1, PC2, labs=rownames(as.data.frame(pca$x)), cex=textcx)) legend(legendpos, legend=levels(fac), col=colors, pch=20)
png("qc-pca.png", 1000, 1000, pointsize=20) rld_pca(rld, colors=mycols, intgroup="condition", xlim=c(-20, 20)) dev.off()
#Get differential expression results#
res <- results(dds) table(res$padj<0.05)
#Order by adjusted p-value#
res <- res[order(res$padj), ]
#Merge with normalized count data#
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE) names(resdata)[1] <- "Gene" head(resdata)
#get significant results (FDR<0.05)
Write results#
write.csv(resdata, file="sig_diffexpr-results.csv")
#Volcano plot with significant DE genes#
volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) { with(res, plot(log2FoldChange, -log10(padj), pch=20, main=main, ...)) with(subset(res, padj<sigthresh ),="" points(log2foldchange,="" -log10(padj),="" pch="20," col="red" ,="" ...))="" with(subset(res,="" abs(log2foldchange)>lfcthresh),="" points(log2foldchange,="" -log10(padj),="" pch="20," col="orange" ,="" ...))="" with(subset(res,="" padj<sigthresh="" &="" abs(log2foldchange)>lfcthresh),="" points(log2foldchange,="" -log10(padj),="" pch="20," col="green" ,="" ...))="" if="" (labelsig)="" {="" require(calibrate)="" with(subset(res,="" padj<sigthresh="" &="" abs(log2foldchange)>lfcthresh),="" textxy(log2foldchange,="" -log10(padj),="" labs="Gene," cex="textcx," ...))="" }="" legend(legendpos,="" xjust="1," yjust="1," legend="c(paste("FDR<",sigthresh,sep="")," paste("|logfc|>",lfcthresh,sep="" ),="" "both"),="" pch="20," col="c("red","orange","green"))" }="" png("diffexpr-volcanoplot.png",="" 1200,="" 1000,="" pointsize="20)" volcanoplot(resdata,="" lfcthresh="1," sigthresh="0.05," textcx=".8," xlim="c(-3," 3))="" dev.off()<="" p="">
I am novice R studio user , the issue I'm having is that the gene name labels displayed on my Volcano plot overlap, making them unreadable how can I prevent this overlap of the gene labels?
Please use the formatting bar (especially the
data:image/s3,"s3://crabby-images/e62ca/e62ca224264879bacfb453e0b9704f8d73331f96" alt="code_formatting"
code
option) to present your post better. I've done it for you this time.Thank you!
Hello Adeler001!
It appears that your post has been cross-posted to another site: https://support.bioconductor.org/p/124173/
This is typically not recommended as it runs the risk of annoying people in both communities.