Hello ! :)
Thank you very much !
I'm late for the answer, because I writting and testing a script to download the genome in Python from PATRIC and the time to understand your answer and learn more about that :).
import requests
import json
import math
import time
import os
import sys
import subprocess
#function
def execute(command,path):
"""
print the command line output in the console
"""
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=path)
# Poll process for new output until finished
while True:
nextline = process.stdout.readline()
if nextline == '' and process.poll() != None:
break
sys.stdout.write(nextline)
sys.stdout.flush()
output = process.communicate()[0]
exitCode = process.returncode
if (exitCode == 0):
return output
else:
raise ProcessException(command, exitCode, output)
#commands
requests.get('https://guest.ulg.ac.be/welcome',verify=True)
taxonID = str(input('Give the NCBI Tax ID of your rank or taxon : '))
limit = '10000'
print "Make POST Request to PATRIC Server to have the number of genome"
r = requests.post('https://www.patricbrc.org/portal/portal/patric/GenomeFinder/GenomeFinderWindow?action=b&cacheability=PAGE&need=0&taxonId='+taxonID+'&keyword=*:*&facet={"facet":"public,genome_status,reference_genome,antimicrobial_resistance,antimicrobial_resistance_evidence,isolation_country,host_name,disease,collection_date,completion_date","facet_text":"Public,Genome Status,Reference Genome,Antimicrobial Resistance,Antimicrobial Resistance Evidence,Isolation Country,Host Name,Disease,Collection Date,Completion Date","field_facets":"genome_status,reference_genome,antimicrobial_resistance,antimicrobial_resistance_evidence,isolation_country,host_name,disease,collection_date,public","date_range_facets":"completion_date"}&sort=[{"property":"genome_name","direction":"ASC"}]',verify=False)
genomeNumber = r.json()[u'total']
print "Number of genome found : ", genomeNumber
if genomeNumber < int(limit):
pageNumber = 1
else:
pageNumber = int(math.ceil(genomeNumber/int(limit)))
dicoFTP = dict()
with open("patricGenome_"+taxonID+".txt","w") as f:
pageList = list()
for page in range(pageNumber):
start = page*int(limit)+1
page+=1
print "page ",page,"/",pageNumber
t0 = time.time()
req = requests.post('https://www.patricbrc.org/portal/portal/patric/GenomeFinder/GenomeFinderWindow?action=b&cacheability=PAGE&need=0&taxonId='+taxonID+'&keyword=*:*&facet={"facet":"public,genome_status,reference_genome,antimicrobial_resistance,antimicrobial_resistance_evidence,isolation_country,host_name,disease,collection_date,completion_date","facet_text":"Public,Genome Status,Reference Genome,Antimicrobial Resistance,Antimicrobial Resistance Evidence,Isolation Country,Host Name,Disease,Collection Date,Completion Date","field_facets":"genome_status,reference_genome,antimicrobial_resistance,antimicrobial_resistance_evidence,isolation_country,host_name,disease,collection_date,public","date_range_facets":"completion_date"}&page='+str(page)+'&start='+str(start)+'&limit='+limit+'&sort=[{"property":"genome_name","direction":"ASC"}]',verify=False)
pageList.append(req)
t1 = time.time()
t = t1-t0
print t,"seconds"
cpt = 0
for page in pageList:
cpt +=1
t0 = time.time()
nbGenome = len(req.json()[u'results'])
for i in range(nbGenome):
t1 = time.time()
dico = req.json()[u'results'][i]
taxID, genomeName, genomeID = dico[u'taxon_id'],dico[u'genome_name'],dico[u'genome_id']
dicoFTP[genomeID]="ftp://ftp.patricbrc.org/patric2/genomes/"+str(genomeID)+"/"+str(genomeID)+".PATRIC.faa"
f.write(str(taxID)+"\t"+str(genomeName)+"\t"+str(genomeID)+"\n")
t2 = time.time()
print "parsed genome",genomeName,"in",t2-t1,"seconds"
t3 = time.time()
print "parsed page",cpt,"in",t3-t0,"seconds"
#download founded genome
patricDir = os.getcwd()+"/patricGenome"
if os.path.isdir(patricDir) == False:
os.mkdir(patricDir)
#logFile
with open("logFilePatric.txt","w") as log:
#download genome
for key in dicoFTP.keys():
print dicoFTP[key]
name = os.path.split(dicoFTP[key])[1]
call = 'wget '+dicoFTP[key]
execute(call,patricDir)
#process = subprocess.Popen(call, shell=True, cwd=patricDir)
#process.communicate()
if os.path.isfile(name) == False:
print name," not found"
log.write(dicoFTP[key]+"\n")
Maybe this code can help someone :)
Thank you !
Hi Yoan,
I am trying to get whole genomes data from PATRICT database using your script.
After running I got this problem:
Do I need to change something for now to get there
I know that is quite a long time from you posted but I hope to hear from you soon.
Thank you very much
Thanh