Needleman-Wunsch with Python
0
2
Entering edit mode
5.1 years ago

Hi, I'm implementing the Needleman-wunsch algorithm with python and I don't know how to use the open penalty gap. Also, i want to use the BLOSUM62 matrix. Can anyone help me? Thanks

import pandas as pd
import numpy as np
from Bio.SubsMat import MatrixInfo


seq1 ='ACGGTCT'  
seq2 = "ATGGCCTC"


pt ={'MATCH': +1, 'MISMATCH': -3, 'GAP': -4, 'OPENGAP':0.5}


blosum62 = MatrixInfo.blosum62




def match(a, b):
    if a == b:
        return pt['MATCH']
    elif a == '-' or b == '-':
        return pt['GAP']
    else:
        return pt['MISMATCH']






def n_w(s1, s2):
    m, n = len(s1), len(s2)
    matrix = np.zeros((m+1, n+1))

    #Initialization
    for i in range(m+1):
        matrix[i][0] = pt['GAP'] * i
    for j in range(n+1):
        matrix[0][j] = pt['GAP'] * j

    #Fill
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            diag = matrix[i-1][j-1] + match(s1[i-1], s2[j-1])
            delete = matrix[i-1][j] + pt['GAP']
            insert = matrix[i][j-1] + pt['GAP']
            matrix[i][j] = max(diag, delete, insert)

    print('Matrix = \n%s\n' % matrix)
    align1, align2 = '', ''
    i,j = m,n



    #Traceback
    while i > 0 and j > 0:
        score_current = matrix[i][j]
        score_diag = matrix[i-1][j-1]
        score_left = matrix[i][j-1]
        score_up = matrix[i-1][j]

        if score_current == score_diag + match(s1[i-1], s2[j-1]):
            a1,a2 = s1[i-1],s2[j-1]
            i,j = i-1,j-1
        elif score_current == score_up + pt['GAP']:
            a1,a2 = s1[i-1],'-'
            i -= 1
        elif score_current == score_left + pt['GAP']:
            a1,a2 = '-',s2[j-1]
            j -= 1
        align1 += a1
        align2 += a2


    while i > 0:
        a1,a2 = s1[i-1],'-'
        align1 += a1
        align2 += a2
        i -= 1

    while j > 0:
        a1,a2 = '-',s2[j-1]
        align1 += a1
        align2 += a2
        j -= 1

    align1 = align1[::-1]
    align2 = align2[::-1]
    seqN = len(align1)
    sym = ''
    seq_score = 0
    identity = 0
    for i in range(seqN):
        a1 = align1[i]
        a2 = align2[i]
        if a1 == a2:
            sym += a1
            identity += 1
            seq_score += match(a1, a2)

        else: 
            seq_score += match(a1, a2)
            sym += ' '

    identity = identity/seqN * 100

    print('identityity = %2.1f percent' % identity)
    print('Score = %d\n'% seq_score)

    print(align1)
    print(sym)
    print(align2)




n_w(seq1, seq2)
python alignment gap gap penalty blosum62 • 5.4k views
ADD COMMENT
1
Entering edit mode

For differentiating between gap extension and opening, you need to use an affine gap penalty model. For instance, have a look here.

ADD REPLY
0
Entering edit mode

Your indentation is wrong

ADD REPLY
1
Entering edit mode

Ok, i fixed the code

ADD REPLY
0
Entering edit mode

Yup, sorry but i'm newbie

ADD REPLY

Login before adding your answer.

Traffic: 2469 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6