Entering edit mode
8.5 years ago
Dataminer
★
2.8k
Hi,
I have a time series data in tripicates for 4 time points. My aim is to identify signature genes for each time point.
What I am doing in R is :
##loading libraries##
library(lattice)
library(edgeR)
library(gplots)
library("RColorBrewer")
###loading data###
counts<-read.table("Rep3_CountTableDev.txt", header=T, sep="\t", row.names=1)
geneLevelCounts<-counts
###filtering out non-expressed genes. Consider only the genes with an average read count of 5 or more ###
means <- rowMeans(geneLevelCounts)
filter <- means >= 5
table(filter)
geneLevelCounts <- geneLevelCounts[filter,]
dim(geneLevelCounts)
####Overview of Data###
colors <- c(rep("yellow3",3),rep("blue",3),rep("green",3), rep("orange",3))
totCounts <- colSums(geneLevelCounts)
pdf("ResultsAllReplicates.pdf")
barplot(totCounts, las=2, col=colors)
##boxplot to visualise difference between each experiment ####
#boxplot(geneLevelCounts, las=2, col=colors)
par(mfrow=c(1,2))
boxplot(log2(geneLevelCounts+1), las=2, col=colors)
#####defining groups and normalization #######
group <- factor(c("t1","t1","t1","t2","t2","t2","t3","t3","t3","t4","4","4"))
counts <- geneLevelCounts
cds <- DGEList( counts , group = group )
names(cds)
head(cds$counts) # original count matrix
cds$samples # contains a summary of your samples
sum(cds$all.zeros) # How many genes have 0 counts across all samples
cds <- calcNormFactors(cds, method="upperquartile")
cds$samples
##measuring similarity of samples and projects
plotMDS(cds, main="MDS Plot for Count Data", labels=colnames(cds$counts))
#####multiple sample comparison ####
counts <- geneLevelCounts
cds <- DGEList( counts , group = group )
names(cds)
design <- model.matrix(~group)
design
cds <- calcNormFactors(cds, method="upperquartile")
cds$samples
#### Estimate of dispersion ###
cds <- estimateGLMCommonDisp(cds, design, verbose=TRUE)
cds <- estimateGLMTagwiseDisp(cds, design)
### Biological coefficient of variance & Mean variance plot ###
plotBCV(cds)
meanVarPlot <- plotMeanVar(cds, show.raw.vars=TRUE, show.tagwise.vars=TRUE, show.binned.common.disp.vars=FALSE, show.ave.raw.vars=FALSE, NBline = TRUE , nbins = 100 , pch = 16 , xlab ="Mean Expression (Log10 Scale)" , ylab = "Variance (Log10 Scale)" , main = "Mean-Variance Plot" )
###testing DEG ####
fit <- glmFit(cds, design)
lrt <- glmLRT(fit, coef=2:4)
#### Top DEG ###
top <- topTags(lrt, n=nrow(cds$counts))$table
head(top)
### Distribution of FDR ###
de <- rownames(top[top$FDR<0.05,])
length(de)
head(de)
hist(top$PValue, breaks=20)
###### Smear plots ####
par(mfrow=c(3,3))
plotSmear(cds, de.tags=de, pair=c("t1", "t2"))
abline(h=c(-2, 2), col="green")
plotSmear(cds, de.tags=de, pair=c("t1", "t3"))
abline(h=c(-2, 2), col="green")
plotSmear(cds, de.tags=de, pair=c("t1", "t4"))
abline(h=c(-2, 2), col="green")
plotSmear(cds, de.tags=de, pair=c("t2", "t3"))
abline(h=c(-2, 2), col="green")
plotSmear(cds, de.tags=de, pair=c("t2", "t4"))
abline(h=c(-2, 2), col="green")
plotSmear(cds, de.tags=de, pair=c("t3", "t4"))
abline(h=c(-2, 2), col="green")
###### heatmap of all DEG at 0.05 FDR ####
x<-(log(cds$counts[de,]+1))
heatmap(x, ColSideColor=colors, col=brewer.pal(11,"RdBu"), main="All Genes",Colv=NA)
##PLOTTING TOP 500 GENES deg ####
heatmap(x[1:500,], ColSideColor=colors, col=brewer.pal(11,"RdBu"), main="Top 500 genes", Colv=NA)
Of course this is one of the approach of getting the signature genes but I am not entirely sure that it is giving me signature genes between the replicates.
Also, a pairwise comaprison of the data will be better to get signature genes between each time point?
This is developmental time series data.
Any help will be good.
Kindly drop comments.
Thank you
I have similar question, did you find the answer for this? If yes, please update.