Entering edit mode
5.2 years ago
zack.henning
•
0
Hey all I am trying to create a data base but I am having some issues. My below code is running but it is only pulling the last enzyme glucose-6-phosphate dehydrogenase into the table (and its subunits, etc.) and I am not sure why. Any help would be appreciated, thanks!
import sqlite3
#creating connection to my.db
conn = sqlite3.connect ('my.db')
c = conn.cursor()
#creation of genes table
c.execute("""CREATE TABLE genes (id INTEGER PRIMARY KEY AUTOINCREMENT, symbol TEXT, name TEXT, description TEXT, n_sequence TEXT)""")
#creation of pathways table
c.execute("""CREATE TABLE pathways (id INT, name TEXT, description TEXT)""")
#creation of enzymes table
c.execute("""CREATE TABLE enzymes (id INT, name TEXT)""")
#creation of association table between genes and enzymes
c.execute("""CREATE TABLE genes_and_enzymes (gene_id INT, enzyme_id INT)""")
#creation of association table between enzymes and pathways
c.execute("""CREATE TABLE enzymes_in_pathways (enzyme_id INT, pathway_id INT)""")
#commitment for new tables
conn.commit()
#next 3 lines are inserting rows into pathways and their description
c.execute("""INSERT INTO pathways (id, name, description) VALUES (1, "glycolysis", "the catalysis of glucose");""")
c.execute("""INSERT INTO pathways (id, name, description) VALUES (2, "TCA", "cyclic pathway");""")
c.execute("""INSERT INTO pathways (id, name, description) VALUES (3, "PPP", "generates NAD(P)H and pentose sugars");""")
#committing pathways insert
conn.commit()
#inserting enzymes into table
enzymes_list = [ (1, "hexokinase"),
(2, "phosphoglucose isomerase"),
(3, "Aldolase"),
(4, "pyruvate kinase"),
(5, "citrate synthase"),
(6, "malate dehydrogenase"),
(7, "isocitrate dehydrogenase"),
(8, "fumurase"),
(9, "transketolase"),
(10, "transaldolase"),
(11, "gluconolactonase"),
(12, "glucose-6-phosphate dehydrogenase")]
for x in enzymes_list:
c.execute("""INSERT INTO enzymes (id, name) VALUES (?, ?);""", x)
conn.commit()
c.execute("SELECT * FROM pathways WHERE name = 'glycolysis';")
#printing the fetched row to ensure its presence
print(c.fetchone())
#importing our Bio import
from Bio import Entrez
from Bio import SeqIO
import time
Entrez.email = 'xxxxx@xxx.xx' #Letting NCBI know who I am
coli_enzymes = ["hexokinase",
"phosphoglucose isomerase",
"Aldolase",
"pyruvate kinase",
"citrate synthase",
"malate dehydrogenase",
"isocitrate dehydrogenase",
"fumurase",
"transketolase",
"transaldolase",
"gluconolactonase",
"glucose-6-phosphate dehydrogenase"]
for enzyme in coli_enzymes:
handle = Entrez.esearch(db = 'nuccore',
term='E. coli [ORGN] ' +enzyme,
sort='relevance',
idtype='symbol')
time.sleep(3)
for i in Entrez.read(handle)['IdList']:
handle = Entrez.efetch(db = 'nuccore', id=i, rettype='gb', retmode='text', retmax=1)
record_coli = SeqIO.read(handle, "genbank")
seq_coli = record.seq
c.execute("""INSERT INTO genes (symbol, name, description, n_sequence) VALUES (?, ?, ?, ?);""",
(str(i),
str(enzyme),
str(record_coli.description),
str(seq)))
conn.commit()
time.sleep(3)
human_enzymes = ["hexokinase",
"phosphoglucose isomerase",
"Aldolase",
"pyruvate kinase",
"citrate synthase",
"malate dehydrogenase",
"isocitrate dehydrogenase",
"fumurase",
"transketolase",
"transaldolase",
"gluconolactonase",
"glucose-6-phosphate dehydrogenase"]
for enz in human_enzymes:
handle = Entrez.esearch(db = 'nuccore',
term='homo sapiens [ORGN] ' +enz,
sort='relevance',
idtype='symbol')
time.sleep(3)
print(['IdList'])
for d in Entrez.read(handle)['IdList']:
handle = Entrez.efetch(db = 'nuccore', id=d, rettype='gb', retmode='text', retmax=1)
record_human = SeqIO.read(handle, "genbank")
seq_human = record.seq
c.execute("""INSERT INTO genes (symbol, name, description, n_sequence) VALUES (?, ?, ?, ?);""",
(str(d),
str(enz),
str(record_human.description),
str(seq_human)))
conn.commit()
time.sleep(3)
drosophila_enzymes = ["hexokinase",
"phosphoglucose isomerase",
"Aldolase",
"pyruvate kinase",
"citrate synthase",
"malate dehydrogenase",
"isocitrate dehydrogenase",
"fumurase",
"transketolase",
"transaldolase",
"gluconolactonase",
"glucose-6-phosphate dehydrogenase"]
for y in drosophila_enzymes:
handle = Entrez.esearch(db = 'nuccore',
term='drosophila melanogaster [ORGN] ' +y,
sort='relevance',
idtype='symbol')
time.sleep(3)
for z in Entrez.read(handle)['IdList']:
handle = Entrez.efetch(db = 'nuccore', id=z, rettype='gb', retmode='text', retmax=1)
record_fly = SeqIO.read(handle, "genbank")
seq_fly = record.seq
c.execute("""INSERT INTO genes (symbol, name, description, n_sequence) VALUES (?, ?, ?, ?);""",
(str(d),
str(y),
str(record_fly.description),
str(seq_fly)))
conn.commit()
time.sleep(3)