Entering edit mode
5.6 years ago
lakhujanivijay
5.9k
How to fetch gene ids (in RED) from NCBI gene names (in BLUE) using either efetch or python?
I am looking at this link and it does exactly the opposite of what I want.
from Bio import Entrez
import sys
id_list = ['3799']
Entrez.email = "*****@gmail.com"
def retrieve_annotation(id_list):
request = Entrez.epost("gene",id=",".join(id_list))
try:
result = Entrez.read(request)
except RuntimeError as e:
print "An error occurred while retrieving the annotations."
print "The error returned was %s" % e
sys.exit(-1)
webEnv = result["WebEnv"]
queryKey = result["QueryKey"]
data = Entrez.esummary(db="gene", webenv=webEnv, query_key =
queryKey)
annotations = Entrez.read(data)
print "Retrieved %d annotations for %d genes" % (len(annotations),
len(id_list))
return annotations
def print_data(annotation):
for gene_data in annotation:
gene_id = gene_data["Id"]
gene_symbol = gene_data["NomenclatureSymbol"]
gene_name = gene_data["Description"]
print "ID: %s - Gene Symbol: %s - Gene Name: %s" % (gene_id, gene_symbol, gene_name)
annotation=retrieve_annotation(id_list)
print annotation
Output
python ncbi.py
Retrieved 1 annotations for 1 genes
DictElement({u'DocumentSummarySet': DictElement({u'DbBuild': 'Build190501-0100m.1', u'DocumentSummary': [DictElement({u'Status': '0', u'NomenclatureSymbol': 'KIF5B', u'OtherDesignations': 'kinesin-1 heavy chain|conventional kinesin heavy chain|epididymis secretory protein Li 61|kinesin 1 (110-120kD)|kinesin heavy chain|ubiquitous kinesin heavy chain', u'Mim': ['602809'], u'Name': 'KIF5B', u'NomenclatureName': 'kinesin family member 5B', u'CurrentID': '0', u'GenomicInfo': [DictElement({u'ChrAccVer': 'NC_000010.11', u'ChrLoc': '10', u'ExonCount': '27', u'ChrStop': '32009009', u'ChrStart': '32056442'}, attributes={})], u'OtherAliases': 'HEL-S-61, KINH, KNS, KNS1, UKHC', u'Summary': '', u'GeneWeight': '9359', u'GeneticSource': 'genomic', u'MapLocation': '10p11.22', u'ChrSort': '10', u'ChrStart': '32009009', u'LocationHist': [DictElement({u'AssemblyAccVer': 'GCF_000001405.38', u'ChrAccVer': 'NC_000010.11', u'AnnotationRelease': '109', u'ChrStop': '32009009', u'ChrStart': '32056442'}, attributes={}), DictElement({u'AssemblyAccVer': 'GCF_000001405.33', u'ChrAccVer': 'NC_000010.11', u'AnnotationRelease': '108', u'ChrStop': '32009009', u'ChrStart': '32056442'}, attributes={}), DictElement({u'AssemblyAccVer': 'GCF_000306695.2', u'ChrAccVer': 'NC_018921.2', u'AnnotationRelease': '108', u'ChrStop': '32299659', u'ChrStart': '32347070'}, attributes={}), DictElement({u'AssemblyAccVer': 'GCF_000001405.28', u'ChrAccVer': 'NC_000010.11', u'AnnotationRelease': '107', u'ChrStop': '32009009', u'ChrStart': '32056442'}, attributes={}), DictElement({u'AssemblyAccVer': 'GCF_000306695.2', u'ChrAccVer': 'NC_018921.2', u'AnnotationRelease': '107', u'ChrStop': '32299659', u'ChrStart': '32347070'}, attributes={}), DictElement({u'AssemblyAccVer': 'GCF_000001405.25', u'ChrAccVer': 'NC_000010.10', u'AnnotationRelease': '105', u'ChrStop': '32297937', u'ChrStart': '32345370'}, attributes={}), DictElement({u'AssemblyAccVer': 'GCF_000002125.1', u'ChrAccVer': 'AC_000142.1', u'AnnotationRelease': '105', u'ChrStop': '32018110', u'ChrStart': '32065918'}, attributes={}), DictElement({u'AssemblyAccVer': 'GCF_000306695.2', u'ChrAccVer': 'NC_018921.2', u'AnnotationRelease': '105', u'ChrStop': '32299659', u'ChrStart': '32347070'}, attributes={})], u'Organism': DictElement({u'CommonName': 'human', u'ScientificName': 'Homo sapiens', u'TaxID': '9606'}, attributes={}), u'NomenclatureStatus': 'Official', u'Chromosome': '10', u'Description': 'kinesin family member 5B'}, attributes={u'uid': u'3799'})]}, attributes={u'status': u'OK'})}, attributes={})