Entering edit mode
2.8 years ago
Omics data mining
▴
260
Hello everyone
I am have transcript abundance data from salmon for 24 samples. I am interested in the differential transcript usgae (DTU). I am following https://bioconductor.org/packages/release/workflows/vignettes/rnaseqDTU/inst/doc/rnaseqDTU.html#salmon-quantification workflow.
library(rnaseqDTU)
samps <- read.table(file="treated_samp", header = TRUE)
names(samps) <- c("sample_id", "condition")
head(samps)
samps$condition <- factor(samps$condition)
table(samps$condition)
files <- file.path("/home/test/Salmon/treated", samps$sample_id, "quant.sf")
names(files) <- samps$sample_id
library(tximport)
txi <- tximport(files, type="salmon", txOut=TRUE,
countsFromAbundance="scaledTPM")
cts <- txi$counts
cts <- cts[rowSums(cts) > 0,]
range(colSums(cts)/1e6)
library(tidyverse)
dir <- "/home/test/"
tx2gene <- read_table(file.path(dir, "salmon_tx2gene.tsv"))
txdf <- tx2gene[,c(1:2)]
tab <- table(txdf$GENEID)
txdf$ntx <- tab[match(txdf$GENEID, names(tab))]
all(rownames(cts) %in% txdf$TXTname )
txdf <- txdf[match(rownames(cts),txdf$TXTname),]
all(rownames(cts) == txdf$TXTname)
txdf$TXNAME <- txdf$TXTname
counts <- data.frame(gene_id=txdf$GENEID,
feature_id=txdf$TXNAME,
cts)
library(DRIMSeq)
labels <- colnames(counts)
samps$sample_id <- labels[3:14]
d <- dmDSdata(counts=counts, samples=samps)
d
n <- 12
n.small <- 6
d <- dmFilter(d,
min_samps_feature_expr=n.small, min_feature_expr=7,
min_samps_feature_prop=n.small, min_feature_prop=0.1,
min_samps_gene_expr=n, min_gene_expr=10)
d
library(DEXSeq)
sample.data <- DRIMSeq::samples(d)
count.data <- round(as.matrix(counts(d)[,-c(1:2)]))
dxd <- DEXSeqDataSet(countData=count.data,
sampleData=sample.data,
design=~sample + exon + condition:exon,
featureID=counts(d)$feature_id,
groupID=counts(d)$gene_id)
system.time({
dxd <- estimateSizeFactors(dxd)
dxd <- estimateDispersions(dxd, quiet=TRUE)
dxd <- testForDEU(dxd, reducedModel=~sample + exon)
})
dxr <- DEXSeqResults(dxd, independentFiltering=FALSE)
qval <- perGeneQValue(dxr)
dxr.g <- data.frame(gene=names(qval),qval)
columns <- c("featureID","groupID","pvalue")
dxr <- as.data.frame(dxr[,columns])
head(dxr)
############# stageR following DRIMSeq
strp <- function(x) substr(x,1,15)
pConfirmation <- matrix(dxr$pvalue,ncol=1)
dimnames(pConfirmation) <- list(strp(dxr$featureID),"transcript")
pScreen <- qval
names(pScreen) <- strp(names(pScreen))
tx2gene <- as.data.frame(dxr[,c("featureID", "groupID")])
for (i in 1:2) tx2gene[,i] <- strp(tx2gene[,i])
stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmation,
pScreenAdjusted=FALSE, tx2gene=tx2gene)
Until this step of analysis, everything works fine. I am getting error at the following step :
stageRObj <- stageWiseAdjustment(stageRObj, method="dtu", alpha=0.05)
**Error in `[<-`(`*tmp*`, idCon, 1, value = unlist(txLevelAdjustments)) :
subscript out of bounds**
I tried different alpha values, but every time it gives me error.
I would appreciate all the suggestions.
Hello Omics data mining,
Please use the formatting bar (especially the
code
option) to present your post better. I've done it for you this time.Thank you!
Hi, did you find a solution for this? I am facing the same exact problem right now.