R: Unable to use analyzeFeatures because argument is not a vector
0
2
Entering edit mode
2.8 years ago

I want to use the analyzeFeatures from the SGSeq library.

library(SGSeq)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(BSgenome.Hsapiens.UCSC.hg19)
library(org.Hs.eg.db)

First, I subset the variants that I want to analyze and place them in var_nt.

txf_df$variant <- with(txf_df, paste0(type, ave(1:nrow(txf_df), type, FUN=seq_along)))
var_nt <- subset(txf_df, variant %in% c("J3", "J10", "J13"))  
varnt_grange <- makeGRangesFromDataFrame(var.nt, keep.extra.columns=T)

Then, I want to subset each GRanges objects and reduce the ranges into a single vector so that I can analyze them.

for (i in gene.list$entrez) {
  # Skip gene AQP8 (Entrez ID: 343)
  if (i=="343") {
    next
  }
  else{
    varnt.grange.subset <- varnt_grange[varnt_grange$geneName %in% i]
    # Collapse the ranges of the genes into a single vector
    varnt.grange.subset <- IRanges::reduce(varnt.grange.subset)
    # Run analyzeFeatures
    sgfc_pred_subset  <- analyzeFeatures(samFile, which=varnt.grange.subset)
  }
}

Traceback

Predict features...
N60 complete.
N11 complete.
T132 complete.
T114 complete.
Process features...
Obtain counts...
Error in order(...) : argument 1 is not a vector

Data

> dput(head(txf_df))
structure(list(seqnames = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "16", class = "factor"), 
    start = c(12058964L, 12059311L, 12059311L, 12060052L, 12060198L, 
    12060198L), end = c(12059311L, 12060052L, 12061427L, 12060198L, 
    12060877L, 12061427L), width = c(348L, 742L, 2117L, 147L, 
    680L, 1230L), strand = structure(c(1L, 1L, 1L, 1L, 1L, 1L
    ), .Label = c("+", "-", "*"), class = "factor"), type = structure(c(3L, 
    1L, 1L, 2L, 1L, 1L), .Label = c("J", "I", "F", "L", "U"), class = "factor"), 
    txName = structure(list(c("uc002dbv.3", "uc010buy.3", "uc010buz.3"
    ), c("uc002dbv.3", "uc010buy.3"), "uc010buz.3", c("uc002dbv.3", 
    "uc010buy.3"), "uc010buy.3", "uc002dbv.3"), class = "AsIs"), 
    geneName = structure(list("608", "608", "608", "608", "608", 
        "608"), class = "AsIs"), variant = c("F1", "J1", "J2", 
    "I1", "J3", "J4")), row.names = c(NA, 6L), class = "data.frame")


> dput(head(gene.list))
structure(list(Name = c("AQP8", "ZG16", "SCNN1B", "HSD11B2", 
"TNFRSF17", "MT1M"), Pvalue = c(3.24077275512836e-22, 4.14482213350182e-20, 
1.6959409570969e-15, 8.67986104627575e-15, 3.17491475337798e-14, 
3.30811992851482e-14), adjPvalue = c(8.3845272720681e-18, 1.07234838237959e-15, 
4.38773844420109e-11, 2.24565364989246e-10, 8.21413944993951e-10, 
8.55876787905354e-10), logFC = c(-3.73323340223377, -2.87787132076412, 
-1.77694418475864, -1.55599182329306, -1.96370011101605, -2.17842791093474
), entrez = c(AQP8 = "343", ZG16 = "653808", SCNN1B = "6338", 
HSD11B2 = "3291", TNFRSF17 = "608", MT1M = "4499")), row.names = c(1L, 
4L, 18L, 26L, 31L, 32L), class = "data.frame")
Bioconductor SGSeq R • 662 views
ADD COMMENT

Login before adding your answer.

Traffic: 1981 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6