Hey,
There are two approaches here.
1, org.At.tair.db
You can use the annotation DB packages from Bioconductor, specifically org.At.tair.db.
Copying my own answer from here: A: Biomart query returns NA when searching for entrez_id, while manual search works
library(org.At.tair.db)
genes <- c("AT2G14610","AT4G23700","AT3G26830",
"AT3G15950","AT3G54830","AT5G24105")
keytypes(org.At.tair.db)
mapIds(org.At.tair.db, keys = genes,
column = c('SYMBOL'), keytype = 'TAIR')
AT2G14610 AT4G23700 AT3G26830 AT3G15950 AT3G54830 AT5G24105
"AtCAPE9" "ATCHX17" "CYP71B15" "NAI2" NA "AGP41"
select(org.At.tair.db, keys = genes,
column = c('ENTREZID', 'SYMBOL', 'REFSEQ'), keytype = 'TAIR')
TAIR ENTREZID SYMBOL REFSEQ
1 AT2G14610 815949 AtCAPE9 NM_127025
2 AT2G14610 815949 AtCAPE9 NP_179068
3 AT2G14610 815949 ATPR1 NM_127025
4 AT2G14610 815949 ATPR1 NP_179068
5 AT2G14610 815949 PR NM_127025
6 AT2G14610 815949 PR NP_179068
7 AT2G14610 815949 PR1 NM_127025
8 AT2G14610 815949 PR1 NP_179068
9 AT4G23700 828470 ATCHX17 NM_001341626
10 AT4G23700 828470 ATCHX17 NM_118501
11 AT4G23700 828470 ATCHX17 NP_001328705
12 AT4G23700 828470 ATCHX17 NP_194101
13 AT4G23700 828470 CHX17 NM_001341626
14 AT4G23700 828470 CHX17 NM_118501
15 AT4G23700 828470 CHX17 NP_001328705
16 AT4G23700 828470 CHX17 NP_194101
17 AT3G26830 822298 CYP71B15 NM_113595
18 AT3G26830 822298 CYP71B15 NP_189318
19 AT3G26830 822298 PAD3 NM_113595
20 AT3G26830 822298 PAD3 NP_189318
21 AT3G15950 820839 NAI2 NM_001035631
22 AT3G15950 820839 NAI2 NM_001338191
23 AT3G15950 820839 NAI2 NM_001338192
24 AT3G15950 820839 NAI2 NM_001338193
25 AT3G15950 820839 NAI2 NM_112465
26 AT3G15950 820839 NAI2 NP_001030708
27 AT3G15950 820839 NAI2 NP_001326807
2, biomaRt
require(biomaRt)
tair_mart <- useMart(biomart = 'plants_mart',
host = 'plants.ensembl.org', dataset = 'athaliana_eg_gene')
head(listAttributes(tair_mart), 15)
annot <- getBM(
values = genes,
mart = tair_mart,
attributes = c('ensembl_gene_id', 'entrezgene_id',
'description', 'external_gene_name'),
filters = 'ensembl_gene_id')
ensembl_gene_id entrezgene_id
1 AT2G14610 815949
2 AT3G15950 820839
3 AT3G26830 822298
4 AT3G54830 NA
5 AT4G23700 828470
6 AT5G24105 2745995
description
1 Pathogenesis-related protein 1 [Source:UniProtKB/Swiss-Prot;Acc:P33154]
2 TSA1-like protein [Source:UniProtKB/Swiss-Prot;Acc:Q9LSB4]
3 Bifunctional dihydrocamalexate synthase/camalexin synthase [Source:UniProtKB/Swiss-Prot;Acc:Q9LW27]
4
5 Cation/H(+) antiporter 17 [Source:UniProtKB/Swiss-Prot;Acc:Q9SUQ7]
6 Arabinogalactan protein 41 [Source:UniProtKB/Swiss-Prot;Acc:Q8L9T8]
external_gene_name
1 PR1
2 NAI2
3 CYP71B15
4
5 CHX17
6 AGP41
If you want a complete table from biomaRt, just use:
annotComplete <- getBM(
mart = tair_mart,
attributes = c('ensembl_gene_id', 'entrezgene_id',
'description', 'external_gene_name'))
dim(annotComplete)
[1] 33528 4
Kevin
this is really helpful. Thank you Kevin