Entering edit mode
7 months ago
Ankit
▴
500
Hi everyone,
I am trying to use Diffbind for ATAC-Seq differential data analysis. Upon comparison with DESEq2 the differential peaks are quite different. Here is my code.
# ------- Diffbind -------
ss <- read.csv("/mnt/beegfs6/samplesheet.csv")
obj <- dba(sampleSheet=ss, scoreCol=5, minOverlap=1)
obj <- dba.count(obj, minOverlap=1, score=DBA_SCORE_READS, summits=FALSE, mapQCth=0)# mapQC kept 0 to match results with featurecounts default
obj <- dba.normalize(obj, method=DBA_DESEQ2, normalize = DBA_NORM_NATIVE,library = DBA_LIBSIZE_PEAKREADS) # DBA_LIBSIZE_PEAKREADS for matching results the way DESEQ2 calculate size factor using library size withing the interval
obj <- dba.contrast(obj, minMembers = 2,categories=DBA_CONDITION) # create contrast using conditions present, atleast 2 replicates should be there for each condition
obj <- dba.analyze(obj) # run in default state
# extract Differential regions , FDR < 0.05 and fold change > |2|
df_diffbind_test_all <- data.frame(dba.report(obj, method=DBA_DESEQ2, contrast = 1, th=1)) # contrast = 1, is comparison of DE vs hESC
df_diffbind_test_all0.05 <- df_diffbind_test_all %>% dplyr::filter(FDR < 0.05)
df_diffbind_test_de <- df_diffbind_test_all0.05 %>% dplyr::filter((Fold > 2 | Fold < -2))
# -------DESEq2 -----------
# Get count matrix from Diffbind rather than featurecounts in order to reproduce results
d_counts <- dba.peakset(obj, bRetrieve=TRUE, DataType=DBA_DATA_FRAME)
rownames(d_counts) <- paste0(d_counts$CHR, "%", d_counts$START, "%", d_counts$END)
d_counts <- d_counts[,-c(1:3)]
coldata <- data.frame(fread("coldata.txt", header = T))
rownames(coldata) <- coldata$SampleID
coldata$condition <- factor(coldata$Condition)
coldata$replicate <- factor(coldata$Replicate)
coldata <- coldata[,c(1,2,5,6)]
all(rownames(coldata) == colnames(d_counts)) #should print TRUE
d_dds <- DESeq2::DESeqDataSetFromMatrix(countData = as.matrix(d_counts), colData = coldata, design = ~ condition)
# extract results
res <- DESeq2::results(d_dds, contrast=c("condition", "DE", "hESC"))
# shrinkage for logfoldchange
res_d <- lfcShrink(d_dds, contrast=c("condition", "DE", "hESC"), res=res, type="normal") # shrinkage as diffbind does for logfoldchange
res_d$threshold <- as.logical(res_d$padj < 0.05)
res_d_fdr <- data.frame(res_d[which(res_d$threshold == TRUE),])
# logfc filter 2
res_d_de <- res_d_fdr %>% dplyr::filter((log2FoldChange > 2) | (log2FoldChange < -2))
# Comparison and the results are not same
deseq2 = 37862
diffbind = 29578
Please help. What I am not doing right?