I am trying to annotate the Ensemble ID in the Deseq2 results file and add a column of Gene symbols and gene names. I have tried to use Biomart and also AnnotationDb/org.Mm.eg.db.
This is the ouput
library(DESeq2)
# Loading required package: Rcpp
# Loading required package: RcppArmadillo
counts = read.delim("3mTA2.txt", header=T, row.names=1)
sample <- read.delim("~/sample.txt")
count.data.set <- DESeqDataSetFromMatrix(countData=counts, colData=sample,design= ~ genotype)
dds<-DESeq(count.data.set)
res <- results(dds)
library("AnnotationDbi")
library("org.Mm.eg.db")
columns (org.Mm.eg.db)
# [1] "ENTREZID" "PFAM" "IPI" "PROSITE" "ACCNUM"
# [6] "ALIAS" "CHR" "CHRLOC" "CHRLOCEND" "ENZYME"
# [11] "PATH" "PMID" "REFSEQ" "SYMBOL" "UNIGENE"
# [16] "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS" "GENENAME" "UNIPROT"
# [21] "GO" "EVIDENCE" "ONTOLOGY" "GOALL" "EVIDENCEALL"
# [26] "ONTOLOGYALL" "MGI"
res$hgnc_symbol <- convertIDs(row.names(res), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
# Error: could not find function "convertIDs"
convertIDs <- function( ids, from, to, db, ifMultiple=c("putNA", "useFirst")) {
stopifnot( inherits( db, "AnnotationDb" ) )
ifMultiple <- match.arg( ifMultiple )
suppressWarnings( res <- AnnotationDbi::select(
db, keys=ids, keytype=from, columns=c(from,to) ) )
if ( ifMultiple == "putNA" ) {
duplicatedIds <- res[ duplicated( selRes[,1] ), 1 ]
res <- res[ ! res[,1] %in% duplicatedIds, ]
}
return(res[ match( ids, selRes[,1] ), 2 ] )}
res$hgnc_symbol <- convertIDs(row.names(res), "ENSEMBL", "SYMBOL", org.Mm.eg.db) Error in .testForValidKeys(x, keys, keytype) : None of the keys entered are valid keys for 'ENSEMBL'. Please use the keys method to see a listing of valid arguments. Called from: .testForValidKeys(x, keys, keytype)
#Browse[1]
library( "biomaRt" )
ensembl = useMart( "ensembl", dataset = "mmusculus_gene_ensembl" )
res$ensembl <- sapply( strsplit( rownames(res), split="nn+" ), "[", 1 )
genemap <- getBM( attributes = c("ensembl_gene_id", "entrezgene", "hgnc_symbol"),
filters = "ensembl_gene_id",
values = res$ensembl
genemap <- getBM( attributes = c("ensembl_gene_id", "entrezgene", "hgnc_symbol"),
filters = "ensembl_gene_id",
values = res$ensembl,
mart = ensembl )
I updated the workflow to use mapIds()
http://bioconductor.org/help/workflows/rnaseqGene/#annotate