Question

Something wrong with my code concerning bsseq visualization

0

Entering edit mode

2.3 years ago

diqixiaoyaoer ▴ 20

Here is my code:

    ####
library(bsseq)
library(bsseqData)

## ----showData-----------------------------------------------------------------
data(BS.cancer.ex)
BS.cancer.ex <- updateObject(BS.cancer.ex)
BS.cancer.ex
pData(BS.cancer.ex)

## ----smooth,eval=FALSE--------------------------------------------------------
#  BS.cancer.ex.fit <- BSmooth(
#      BSseq = BS.cancer.ex,
#      BPPARAM = MulticoreParam(workers = 1),
#      verbose = TRUE)

## ----showDataFit--------------------------------------------------------------
data(BS.cancer.ex.fit)
BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit)
BS.cancer.ex.fit

## ----cpgNumbers---------------------------------------------------------------
## The average coverage of CpGs on the two chromosomes
round(colMeans(getCoverage(BS.cancer.ex)), 1)
## Number of CpGs in two chromosomes
length(BS.cancer.ex)
## Number of CpGs which are covered by at least 1 read in all 6 samples
sum(rowSums(getCoverage(BS.cancer.ex) >= 1) == 6)
## Number of CpGs with 0 coverage in all samples
sum(rowSums(getCoverage(BS.cancer.ex)) == 0)

## ----poisson------------------------------------------------------------------
logp <- ppois(0, lambda = 4, lower.tail = FALSE, log.p = TRUE)
round(1 - exp(6 * logp), 3)

## ----smoothSplit,eval=FALSE---------------------------------------------------
#  ## Split datag
#  BS1 <- BS.cancer.ex[, 1]
#  save(BS1, file = "BS1.rda")
#  BS2 <- BS.cancer.ex[, 2]
#  save(BS1, file = "BS1.rda")
#  ## done splitting
#  
#  ## Do the following on each node
#  
#  ## node 1
#  load("BS1.rda")
#  BS1.fit <- BSmooth(BS1)
#  save(BS1.fit)
#  save(BS1.fit, file = "BS1.fit.rda")
#  ## done node 1
#  
#  ## node 2
#  load("BS2.rda")
#  BS2.fit <- BSmooth(BS2)
#  save(BS2.fit, file = "BS2.fit.rda")
#  ## done node 2
#  
#  ## join; in a new R session
#  load("BS1.fit.rda")
#  load("BS2.fit.rda")
#  BS.fit <- combine(BS1.fit, BS2.fit)

## ----keepLoci-----------------------------------------------------------------
BS.cov <- getCoverage(BS.cancer.ex.fit)
keepLoci.ex <- which(rowSums(BS.cov[, BS.cancer.ex$Type == "cancer"] >= 2) >= 2 &
                       rowSums(BS.cov[, BS.cancer.ex$Type == "normal"] >= 2) >= 2)
length(keepLoci.ex)
BS.cancer.ex.fit <- BS.cancer.ex.fit[keepLoci.ex,]

## ----BSmooth.tstat------------------------------------------------------------
BS.cancer.ex.tstat <- BSmooth.tstat(BS.cancer.ex.fit, 
                                    group1 = c("C1", "C2", "C3"),
                                    group2 = c("N1", "N2", "N3"), 
                                    estimate.var = "group2",
                                    local.correct = TRUE,
                                    verbose = TRUE)
BS.cancer.ex.tstat

## ----plotTstat,fig.width=4,fig.height=4---------------------------------------
plot(BS.cancer.ex.tstat)

## ----dmrs---------------------------------------------------------------------
dmrs0 <- dmrFinder(BS.cancer.ex.tstat, cutoff = c(-4.6, 4.6))
dmrs <- subset(dmrs0, n >= 3 & abs(meanDiff) >= 0.1)
nrow(dmrs)
head(dmrs, n = 3)

## ----plotSetup----------------------------------------------------------------
pData <- pData(BS.cancer.ex.fit)
pData$col <- rep(c("red", "blue"), each = 3)
pData(BS.cancer.ex.fit) <- pData
# rm(plotRegion)
## ----plotDmr,fig.width=8,fig.height=4-----------------------------------------
# ?plotRegion
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000, addRegions = dmrs)
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000)

# names(annot.chr21)[2]
# geneTrack<-data.frame(annot.chr21[2])
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000,addRegions = dmrs,addTicks = T,annoTrack=annot.chr21)


#  source("H:\\R_bsseq\\bsseq-master\\R\\plotting.R")
head(geneTrack)
class(geneTrack)
geneTrack<-data.frame(annot.chr21[2])
unique(geneTrack$group)
names(geneTrack)<-c("exon_number","group_name","chr","start","end","width","strand","id","tx_id","gene_ID","gene_name","type")

# ?plotRegion
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000,addRegions = dmrs,
           addTicks = T,
           # col = "black",
           # annoTrack=annot.chr21[2],
           #annoTrack=annoTrack,
           stat = "tstat.corrected",
           stat.col = "black", stat.lwd = 1,
           # regionCol = "blue",
           cex.anno = 0.8,
           # byt = "o",
           #addPoints = T, pointsMinCov = 5, highlightMain = FALSE,
           geneTrack = geneTrack, 
           cex.gene = 1.5,
           lwd = 4)

I have a small quesiton about the bsseq offical code in last part:

plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000,addRegions = dmrs, addTicks = T,

           # col = "black",
           # annoTrack=annot.chr21[2],
           #annoTrack=annoTrack,
           stat = "tstat.corrected",
           stat.col = "black", stat.lwd = 1,
           # regionCol = "blue",
           cex.anno = 0.8,
           # byt = "o",
           #addPoints = T, pointsMinCov = 5, highlightMain = FALSE,
           geneTrack = geneTrack, 
           cex.gene = 1.5,
           lwd = 4)

I want to add gene information, including Exon, promoter , in the figure.

So here I used annot.chr21[2] as the geneTrack.

However, it appeared with wrong information: Error in if (direction == "+") { : argument is of length zero

I don't know why.

And I also checked the bsseq plotting source code: https://github.com/hansenlab/bsseq/blob/master/R/plotting.R

plotAnnoTrack <- function(gr, annoTrack, cex) {
    ## check may need to be modified
    if(!all(sapply(annoTrack, function(xx) is(xx, "GRanges"))))
        stop("all elements in 'annoTrack' needs to be 'GRanges'")
    plot(start(gr), 1, type = "n", xaxt = "n", yaxt = "n", bty = "n",
         ylim = c(0.5, length(annoTrack) + 0.5), xlim = c(start(gr), end(gr)), xlab = "", ylab = "")
    lapply(seq(along = annoTrack), function(ii) {
        jj <- length(annoTrack) + 1- ii
        ir <- subsetByOverlaps(annoTrack[[ii]], gr)
        if(length(ir) > 0)
            rect(c(start(ir)-0.5,start(ir)-0.5), 
                 c(jj+0.33 ,jj+0.33 ), 
                 c(end(ir),end(ir)), 
                 c(jj +0.43,jj+0.53 ), 
                 col = c("black","#148bda"), 
                 border = NA)
        #mtext(names(annoTrack)[1], side = 2, at = jj, las = 1, line = 1,
        #      cex = cex, adj = 0.9,
        #      col = "red")
        })
}

# Based on plotAnnoTrack() and
# https://jhu-genomics.slack.com/archives/hansen_gtex/p1461760583000142
# TODO: Use standard Bioconductor object for storing gene information rather
#       than this ad hoc data.frame (e.g., based on TxDb.* packages)
plotGeneTrack <- function(gr, geneTrack, cex) {
  geneTrack_gr <- makeGRangesFromDataFrame(geneTrack)
  ol <- findOverlaps(geneTrack_gr, gr)
  genes <- geneTrack[queryHits(ol), ]
  plot(start(gr), 1, type = "n", xaxt = "n", yaxt = "n", bty = "n",
       ylim = c(-1.5, 1.5), xlim = c(start(gr), end(gr)),
       xlab = "", ylab = "", cex.lab = 4, lheight = 2, cex.axis = 1)
  if (nrow(genes) > 0) {
    for (g in 1:nrow(genes)) {
      geneind2 = which(geneTrack$gene_name == genes$gene_name[g])
      geneind2 = geneind2[which(geneTrack$isoforms[geneind2] == 1)]
      direction = unique(geneTrack$strand[geneind2])
      ES = geneTrack$start[geneind2]
      EE = geneTrack$end[geneind2]
      Exons = cbind(ES, EE)
      if (direction == "+") {
        lines(x = c(min(ES), max(EE)),
              y = c(0.65, 0.65))
        apply(Exons, 1, function(x) {
          polygon(c(x[1], x[2], x[2], x[1]),
                  c(0.45, 0.45, 0.85, 0.85), 
                  col = "darkgrey")
        })
        text((max(start(gr), min(ES)) +
                min(end(gr), max(EE))) / 2, 1.2,
             genes$gene_name[g], cex = cex)
      } else {
        lines(x = c(min(ES), max(EE)),
              y = c(-0.65, -0.65))
        apply(Exons, 1, function(x)
          polygon(c(x[1], x[2], x[2], x[1]),
                  c(-0.45, -0.45, -0.85, -0.85),
                  col = "darkgrey"))
        text((max(start(gr), min(ES)
        ) + min(end(gr), max(EE)
        )) / 2, -1.2, genes$gene_name[g], cex = cex)
      }

    }
  }
}

.......

But I still don't know how to solve this problem.

Error in if (direction == "+") { : argument is of length zero

I hope someone can give me some advice. I will be very thankful.

bsseq plotRegion R • 543 views

ADD COMMENT • link 2.3 years ago by diqixiaoyaoer ▴ 20