Entering edit mode
2.2 years ago
anasjamshed
▴
140
I am trying to implement the NJ algorithm in python :
import sys
import numpy as np
import scipy as scipy
import itertools
from sys import maxsize
def calculateQ(d):
r = d.shape[0]
q = np.zeros((r,r))
for i in range(r):
for j in range(r):
if i == j:
q[i][j] = 0
else:
sumI = 0
sumJ = 0
for k in range(r):
sumI += d[i][k]
sumJ += d[j][k]
q[i][j] = (r-2) * d[i][j] - sumI - sumJ
return q
def findLowestPair(q):
r = q.shape[0]
minVal = maxsize
for i in range(0,r):
for j in range(i,r):
if (q[i][j] < minVal):
minVal = q[i][j]
minIndex = (i,j)
return minIndex
def doDistOfPairMembersToNewNode(i,j,d):
r = d.shape[0]
sumI = 0
sumJ = 0
for k in range(r):
sumI += d[i][k]
sumJ += d[j][k]
dfu = (1. / (2. * (r - 2.))) * ((r - 2.) * d[i][j] + sumI - sumJ)
dgu = (1. / (2. * (r - 2.))) * ((r - 2.) * d[i][j] - sumI + sumJ)
return (dfu,dgu)
def calculateNewDistanceMatrix(f,g,d):
print (d)
r = d.shape[0]
nd = np.zeros((r-1,r-1))
# Copy over the old data to this matrix
ii = jj = 1
for i in range(0,r):
if i == f or i == g:
continue
for j in range(0,r):
if j == f or j == g:
continue
nd[ii][jj] = d[i][j]
jj += 1
ii += 1
jj = 1
# Calculate the first row and column
ii = 1
for i in range (0,r):
if i == f or i == g:
continue
nd[0][ii] = (d[f][i] + d[g][i] - d[f][g]) / 2.
nd[ii][0] = (d[f][i] + d[g][i] - d[f][g]) / 2.
ii += 1
return nd
def doNeighbourJoining(d):
labels = ["Human","Chimp","Gorilla","Orangutan","Gibbon"]
while d.shape[0] > 1:
q = calculateQ(d)
lowestPair = findLowestPair(q)
print ("Joining")
print (lowestPair[0])
print (lowestPair[1])
# newlabel = "%s%s" % (labels[lowestPair[0]], labels[lowestPair[1]])
# print "lowestPair[0]=%i\tlowestPair[1]=%i" % (lowestPair[0], lowestPair[1])
# print labels
# print newlabel
# del labels[lowestPair[0]]
# del labels[lowestPair[1]]
# labels.insert(0,newlabel)
i = lowestPair[0]
j = lowestPair[1]
pairDist = doDistOfPairMembersToNewNode(i,j,d)
d = calculateNewDistanceMatrix(i,j,d)
# print d
def run(distMatrix):
doNeighbourJoining(distMatrix)
if __name__ == "__main__":
if len(sys.argv) > 1:
print ("Usage: neighbour-joining.py")
distMatrix = np.array(
[
[ 0, 79, 92, 144, 162],
[ 79, 0, 95, 154, 169],
[ 92, 102, 0, 150, 169],
[ 144, 154, 150, 0, 169],
[163, 173, 169, 169, 0],
]
)
run(distMatrix)
But I am getting the following error:
---> 47 dfu = (1. / (2. * (r - 2.))) * ((r - 2.) * d[i][j] + sumI - sumJ)
48 dgu = (1. / (2. * (r - 2.))) * ((r - 2.) * d[i][j] - sumI + sumJ)
49
ZeroDivisionError: float division by zero
The code gives a 5X5 matrix, then a 4x4 and 3x3 matrix as an output. But, it should output a 2x2 and 1X1 as well
Can anyone help me?