Question

R: Unable to use analyzeFeatures because argument is not a vector

2

Entering edit mode

2.8 years ago

melissachua90 ▴ 70

I want to use the analyzeFeatures from the SGSeq library.

library(SGSeq)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(BSgenome.Hsapiens.UCSC.hg19)
library(org.Hs.eg.db)

First, I subset the variants that I want to analyze and place them in var_nt.

txf_df$variant <- with(txf_df, paste0(type, ave(1:nrow(txf_df), type, FUN=seq_along)))
var_nt <- subset(txf_df, variant %in% c("J3", "J10", "J13"))  
varnt_grange <- makeGRangesFromDataFrame(var.nt, keep.extra.columns=T)

Then, I want to subset each GRanges objects and reduce the ranges into a single vector so that I can analyze them.

for (i in gene.list$entrez) {
  # Skip gene AQP8 (Entrez ID: 343)
  if (i=="343") {
    next
  }
  else{
    varnt.grange.subset <- varnt_grange[varnt_grange$geneName %in% i]
    # Collapse the ranges of the genes into a single vector
    varnt.grange.subset <- IRanges::reduce(varnt.grange.subset)
    # Run analyzeFeatures
    sgfc_pred_subset  <- analyzeFeatures(samFile, which=varnt.grange.subset)
  }
}

Traceback

Predict features...
N60 complete.
N11 complete.
T132 complete.
T114 complete.
Process features...
Obtain counts...
Error in order(...) : argument 1 is not a vector

Data

> dput(head(txf_df))
structure(list(seqnames = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "16", class = "factor"), 
    start = c(12058964L, 12059311L, 12059311L, 12060052L, 12060198L, 
    12060198L), end = c(12059311L, 12060052L, 12061427L, 12060198L, 
    12060877L, 12061427L), width = c(348L, 742L, 2117L, 147L, 
    680L, 1230L), strand = structure(c(1L, 1L, 1L, 1L, 1L, 1L
    ), .Label = c("+", "-", "*"), class = "factor"), type = structure(c(3L, 
    1L, 1L, 2L, 1L, 1L), .Label = c("J", "I", "F", "L", "U"), class = "factor"), 
    txName = structure(list(c("uc002dbv.3", "uc010buy.3", "uc010buz.3"
    ), c("uc002dbv.3", "uc010buy.3"), "uc010buz.3", c("uc002dbv.3", 
    "uc010buy.3"), "uc010buy.3", "uc002dbv.3"), class = "AsIs"), 
    geneName = structure(list("608", "608", "608", "608", "608", 
        "608"), class = "AsIs"), variant = c("F1", "J1", "J2", 
    "I1", "J3", "J4")), row.names = c(NA, 6L), class = "data.frame")


> dput(head(gene.list))
structure(list(Name = c("AQP8", "ZG16", "SCNN1B", "HSD11B2", 
"TNFRSF17", "MT1M"), Pvalue = c(3.24077275512836e-22, 4.14482213350182e-20, 
1.6959409570969e-15, 8.67986104627575e-15, 3.17491475337798e-14, 
3.30811992851482e-14), adjPvalue = c(8.3845272720681e-18, 1.07234838237959e-15, 
4.38773844420109e-11, 2.24565364989246e-10, 8.21413944993951e-10, 
8.55876787905354e-10), logFC = c(-3.73323340223377, -2.87787132076412, 
-1.77694418475864, -1.55599182329306, -1.96370011101605, -2.17842791093474
), entrez = c(AQP8 = "343", ZG16 = "653808", SCNN1B = "6338", 
HSD11B2 = "3291", TNFRSF17 = "608", MT1M = "4499")), row.names = c(1L, 
4L, 18L, 26L, 31L, 32L), class = "data.frame")

Bioconductor SGSeq R • 662 views

ADD COMMENT • link updated 2.0 years ago by Ram 44k • written 2.8 years ago by melissachua90 ▴ 70