If JSON/XML output will be useful to you, the following script can be used.
#!/usr/bin/python
from Bio import Entrez
import json
#Increase query limit to 10/s & get warnings
Entrez.email =""#Get one from https://www.ncbi.nlm.nih.gov/account/settings/ page
Entrez.api_key=""
term="GCF_000005845.2"#Finds the ids associated with the assembly
def get_ids(term):
ids =[]
handle = Entrez.esearch(db="assembly", term=term)
record = Entrez.read(handle)
ids.append(record["IdList"])return ids
#Fetch raw output
def get_raw_assembly_summary(id):
handle = Entrez.esummary(db="assembly",id=id,report="full")
record = Entrez.read(handle)#Return individual fields#XML output: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=assembly&id=79781&report=%22full%22#return(record['DocumentSummarySet']['DocumentSummary'][0]['AssemblyName']) #This will return the Assembly name
return(record)#JSON formatted output
def get_assembly_summary_json(id):
handle = Entrez.esummary(db="assembly",id=id,report="full")
record = Entrez.read(handle)#Convert raw output to json
return(json.dumps(record, sort_keys=True,indent=4, separators=(',', ': ')))#Testfor id in get_ids(term):
#print(get_raw_assembly_summary(id)) #For raw output
print(get_assembly_summary_json(id))#JSON Formatted
Wow, this answer is incredible! Thank you so much. This actually makes a lot of sense on why my previous version wasn't working. Did you write this or did you find it in the docs somewhere?
handle = Entrez.efetch(db="assembly", id="GCF_000005845.2") record = Entrez.read(handle) record
['5845', '2']