Entering edit mode
4.6 years ago
er.doug.ragnar
▴
30
def read_Fasta(filename):
from re import sub, search
res = []
sequence = None
info = None
fh = open(filename)
for line in fh:
if search(">.*", line):
if sequence is not None and info is not None and sequence != "":
res.append(sequence)
info = line
sequence = ""
else:
if sequence is None:
return None
else:
sequence += sub("\s", "", line)
if sequence is not None and info is not None and sequence != "":
res.append(sequence)
fh.close()
return res
def read_submat_file(filename):
sm = {}
f = open(filename, "r")
line = f.readline()
tokens = line.split("\t")
ns = len(tokens)
alphabet = []
for i in range(0, ns):
alphabet.append(tokens[i][0])
for i in range(0, ns):
line = f.readline()
tokens = line.split("\t")
for j in range(0, len(tokens)):
k = alphabet[i] + alphabet[j]
sm[k] = int(tokens[j])
return sm
def needleman_wunsch (seq1, seq2, sm, g):
"""Global Alignment"""
sm = read_submat_file(f)
seq1 = read_Fasta("A.fasta")
seq2 = read_Fasta("B.fasta")
S = [[0]]
T = [[0]]
# initialize gaps in rows
for j in range(1, len(seq2)+1):
S[0].append(g * j)
T[0].append(3)
# initialize gaps in cols
for i in range(1, len(seq1)+1):
S.append([g * i])
T.append([2])
# apply the recurrence to fill the matrices
for i in range(0, len(seq1)):
for j in range(len(seq2)):
s1 = S[i][j] + score_pos(seq1[i], seq2[j], sm, g)
s2 = S[i][j+1] + g
s3 = S[i+1][j] + g
S[i+1].append(max(s1, s2, s3))
T[i+1].append(max3t(s1, s2, s3))
return (S, T)
def test_DNA_GlobalAlign():
sm = read_submat_file("blosum62.mat")
seq1 = read_Fasta("A.fasta")
seq2 = read_Fasta("B.fasta")
p = -3
res = needleman_wunsch(seq1, seq2, sm, p)
S = res[0]
T = res[1]
print("Score of optimal alignment:", S[len(seq1)][len(seq2)])
print_mat(S)
print_mat(T)
alig = recover_align(T, seq1, seq2)
print(alig[0])
print(alig[1])
test_DNA_GlobalAlign()
I have been trying to run the python script to align 2 fasta files, it reads the files but I can't seem to use the previous functions on the subsequent functions. I get these errors.
>>> test_DNA_GlobalAlign()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 6, in test_DNA_GlobalAlign
File "<stdin>", line 20, in needleman_wunsch
File "<stdin>", line 6, in score_pos