Here is my code:
####
library(bsseq)
library(bsseqData)
## ----showData-----------------------------------------------------------------
data(BS.cancer.ex)
BS.cancer.ex <- updateObject(BS.cancer.ex)
BS.cancer.ex
pData(BS.cancer.ex)
## ----smooth,eval=FALSE--------------------------------------------------------
# BS.cancer.ex.fit <- BSmooth(
# BSseq = BS.cancer.ex,
# BPPARAM = MulticoreParam(workers = 1),
# verbose = TRUE)
## ----showDataFit--------------------------------------------------------------
data(BS.cancer.ex.fit)
BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit)
BS.cancer.ex.fit
## ----cpgNumbers---------------------------------------------------------------
## The average coverage of CpGs on the two chromosomes
round(colMeans(getCoverage(BS.cancer.ex)), 1)
## Number of CpGs in two chromosomes
length(BS.cancer.ex)
## Number of CpGs which are covered by at least 1 read in all 6 samples
sum(rowSums(getCoverage(BS.cancer.ex) >= 1) == 6)
## Number of CpGs with 0 coverage in all samples
sum(rowSums(getCoverage(BS.cancer.ex)) == 0)
## ----poisson------------------------------------------------------------------
logp <- ppois(0, lambda = 4, lower.tail = FALSE, log.p = TRUE)
round(1 - exp(6 * logp), 3)
## ----smoothSplit,eval=FALSE---------------------------------------------------
# ## Split datag
# BS1 <- BS.cancer.ex[, 1]
# save(BS1, file = "BS1.rda")
# BS2 <- BS.cancer.ex[, 2]
# save(BS1, file = "BS1.rda")
# ## done splitting
#
# ## Do the following on each node
#
# ## node 1
# load("BS1.rda")
# BS1.fit <- BSmooth(BS1)
# save(BS1.fit)
# save(BS1.fit, file = "BS1.fit.rda")
# ## done node 1
#
# ## node 2
# load("BS2.rda")
# BS2.fit <- BSmooth(BS2)
# save(BS2.fit, file = "BS2.fit.rda")
# ## done node 2
#
# ## join; in a new R session
# load("BS1.fit.rda")
# load("BS2.fit.rda")
# BS.fit <- combine(BS1.fit, BS2.fit)
## ----keepLoci-----------------------------------------------------------------
BS.cov <- getCoverage(BS.cancer.ex.fit)
keepLoci.ex <- which(rowSums(BS.cov[, BS.cancer.ex$Type == "cancer"] >= 2) >= 2 &
rowSums(BS.cov[, BS.cancer.ex$Type == "normal"] >= 2) >= 2)
length(keepLoci.ex)
BS.cancer.ex.fit <- BS.cancer.ex.fit[keepLoci.ex,]
## ----BSmooth.tstat------------------------------------------------------------
BS.cancer.ex.tstat <- BSmooth.tstat(BS.cancer.ex.fit,
group1 = c("C1", "C2", "C3"),
group2 = c("N1", "N2", "N3"),
estimate.var = "group2",
local.correct = TRUE,
verbose = TRUE)
BS.cancer.ex.tstat
## ----plotTstat,fig.width=4,fig.height=4---------------------------------------
plot(BS.cancer.ex.tstat)
## ----dmrs---------------------------------------------------------------------
dmrs0 <- dmrFinder(BS.cancer.ex.tstat, cutoff = c(-4.6, 4.6))
dmrs <- subset(dmrs0, n >= 3 & abs(meanDiff) >= 0.1)
nrow(dmrs)
head(dmrs, n = 3)
## ----plotSetup----------------------------------------------------------------
pData <- pData(BS.cancer.ex.fit)
pData$col <- rep(c("red", "blue"), each = 3)
pData(BS.cancer.ex.fit) <- pData
# rm(plotRegion)
## ----plotDmr,fig.width=8,fig.height=4-----------------------------------------
# ?plotRegion
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000, addRegions = dmrs)
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000)
# names(annot.chr21)[2]
# geneTrack<-data.frame(annot.chr21[2])
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000,addRegions = dmrs,addTicks = T,annoTrack=annot.chr21)
# source("H:\\R_bsseq\\bsseq-master\\R\\plotting.R")
head(geneTrack)
class(geneTrack)
geneTrack<-data.frame(annot.chr21[2])
unique(geneTrack$group)
names(geneTrack)<-c("exon_number","group_name","chr","start","end","width","strand","id","tx_id","gene_ID","gene_name","type")
# ?plotRegion
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000,addRegions = dmrs,
addTicks = T,
# col = "black",
# annoTrack=annot.chr21[2],
#annoTrack=annoTrack,
stat = "tstat.corrected",
stat.col = "black", stat.lwd = 1,
# regionCol = "blue",
cex.anno = 0.8,
# byt = "o",
#addPoints = T, pointsMinCov = 5, highlightMain = FALSE,
geneTrack = geneTrack,
cex.gene = 1.5,
lwd = 4)
I have a small quesiton about the bsseq offical code in last part:
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000,addRegions = dmrs, addTicks = T,
# col = "black", # annoTrack=annot.chr21[2], #annoTrack=annoTrack, stat = "tstat.corrected", stat.col = "black", stat.lwd = 1, # regionCol = "blue", cex.anno = 0.8, # byt = "o", #addPoints = T, pointsMinCov = 5, highlightMain = FALSE, geneTrack = geneTrack, cex.gene = 1.5, lwd = 4)
I want to add gene information, including Exon, promoter , in the figure.
So here I used annot.chr21[2] as the geneTrack.
However, it appeared with wrong information: Error in if (direction == "+") { : argument is of length zero
I don't know why.
And I also checked the bsseq plotting source code: https://github.com/hansenlab/bsseq/blob/master/R/plotting.R
plotAnnoTrack <- function(gr, annoTrack, cex) {
## check may need to be modified
if(!all(sapply(annoTrack, function(xx) is(xx, "GRanges"))))
stop("all elements in 'annoTrack' needs to be 'GRanges'")
plot(start(gr), 1, type = "n", xaxt = "n", yaxt = "n", bty = "n",
ylim = c(0.5, length(annoTrack) + 0.5), xlim = c(start(gr), end(gr)), xlab = "", ylab = "")
lapply(seq(along = annoTrack), function(ii) {
jj <- length(annoTrack) + 1- ii
ir <- subsetByOverlaps(annoTrack[[ii]], gr)
if(length(ir) > 0)
rect(c(start(ir)-0.5,start(ir)-0.5),
c(jj+0.33 ,jj+0.33 ),
c(end(ir),end(ir)),
c(jj +0.43,jj+0.53 ),
col = c("black","#148bda"),
border = NA)
#mtext(names(annoTrack)[1], side = 2, at = jj, las = 1, line = 1,
# cex = cex, adj = 0.9,
# col = "red")
})
}
# Based on plotAnnoTrack() and
# https://jhu-genomics.slack.com/archives/hansen_gtex/p1461760583000142
# TODO: Use standard Bioconductor object for storing gene information rather
# than this ad hoc data.frame (e.g., based on TxDb.* packages)
plotGeneTrack <- function(gr, geneTrack, cex) {
geneTrack_gr <- makeGRangesFromDataFrame(geneTrack)
ol <- findOverlaps(geneTrack_gr, gr)
genes <- geneTrack[queryHits(ol), ]
plot(start(gr), 1, type = "n", xaxt = "n", yaxt = "n", bty = "n",
ylim = c(-1.5, 1.5), xlim = c(start(gr), end(gr)),
xlab = "", ylab = "", cex.lab = 4, lheight = 2, cex.axis = 1)
if (nrow(genes) > 0) {
for (g in 1:nrow(genes)) {
geneind2 = which(geneTrack$gene_name == genes$gene_name[g])
geneind2 = geneind2[which(geneTrack$isoforms[geneind2] == 1)]
direction = unique(geneTrack$strand[geneind2])
ES = geneTrack$start[geneind2]
EE = geneTrack$end[geneind2]
Exons = cbind(ES, EE)
if (direction == "+") {
lines(x = c(min(ES), max(EE)),
y = c(0.65, 0.65))
apply(Exons, 1, function(x) {
polygon(c(x[1], x[2], x[2], x[1]),
c(0.45, 0.45, 0.85, 0.85),
col = "darkgrey")
})
text((max(start(gr), min(ES)) +
min(end(gr), max(EE))) / 2, 1.2,
genes$gene_name[g], cex = cex)
} else {
lines(x = c(min(ES), max(EE)),
y = c(-0.65, -0.65))
apply(Exons, 1, function(x)
polygon(c(x[1], x[2], x[2], x[1]),
c(-0.45, -0.45, -0.85, -0.85),
col = "darkgrey"))
text((max(start(gr), min(ES)
) + min(end(gr), max(EE)
)) / 2, -1.2, genes$gene_name[g], cex = cex)
}
}
}
}
.......
But I still don't know how to solve this problem.
Error in if (direction == "+") { : argument is of length zero
I hope someone can give me some advice. I will be very thankful.