I have developed the script to calculate the distance matrix and phylogenetic tree of 63 different sequences
Script:
#Importing Libraries
import pandas as pd
#import seaborn as sns
import numpy as np
#import csv
from Bio import Phylo, AlignIO
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
import matplotlib
import matplotlib.pyplot as plt
from scipy.spatial import distance_matrix
# Read in alignment
aln = AlignIO.read("C://Users//USER//Desktop/63_sequences.fasta","fasta")
# Calculate the distance matrix
calculator = DistanceCalculator('identity')
dm = calculator.get_distance(aln)
#print(dm)
# Visualize neighbor joined tree
constructor = DistanceTreeConstructor()
tree = constructor.nj(dm)
matplotlib.rc('font', size=7)
fig = plt.figure(figsize=(6, 6), dpi=400)
axes = fig.add_subplot(1, 1, 1)
#Drawing of Tree
Phylo.draw(tree, axes=axes, do_show=False)
#Save Figure
plt.savefig('phy.jpg')
#Creation of Array on the basis of DM
a= np.array(dm)
print(a)
#print(a)
genus = pd.read_csv("C://Users//USER//Desktop/canola_root_63.csv")
genus1 = genus["genus"]
#species = ['Taxa1', 'Taxa2', 'Taxa3','Taxa4', 'Taxa5', 'Taxa6','Taxa7', 'Taxa8', 'Taxa9','Taxa10', 'Taxa11', 'Taxa12','Taxa13', 'Taxa14', 'Taxa15','Taxa16', 'Taxa17', 'Taxa18','Taxa19', 'Taxa20', 'Taxa21', 'Taxa22', 'Taxa23', 'Taxa24','Taxa25', 'Taxa26', 'Taxa27','Taxa28', 'Taxa29', 'Taxa30','Taxa31', 'Taxa32', 'Taxa33','Taxa34', 'Taxa35', 'Taxa36','Taxa37', 'Taxa38', 'Taxa39','Taxa40', 'Taxa41', 'Taxa42','Taxa43', 'Taxa44', 'Taxa45','Taxa46', 'Taxa47', 'Taxa48','Taxa49', 'Taxa50', 'Taxa51','Taxa52', 'Taxa53', 'Taxa54','Taxa55', 'Taxa56', 'Taxa57','Taxa58', 'Taxa59', 'Taxa60','Taxa61', 'Taxa62','Taxa63']
print(genus1)
df = pd.DataFrame(a,columns=genus1, index=genus1)
print(df)
#Creation of distance matrix
pd.DataFrame(distance_matrix(df.values, dm), index=df.index, columns=df.index)
#Saving data into csv file
df.to_csv("C:\\Users\\USER\\Desktop\\phylodata.csv")
Anas Jamshed's profile photo
Anas Jamshed
7:26 AM (7 minutes ago)
to networkx-discuss
I have developed the script to calculate the distance matrix and phylogenetic tree of 63 different sequences
Script:
#Importing Libraries
import pandas as pd
#import seaborn as sns
import numpy as np
#import csv
from Bio import Phylo, AlignIO
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
import matplotlib
import matplotlib.pyplot as plt
from scipy.spatial import distance_matrix
# Read in alignment
aln = AlignIO.read("C://Users//USER//Desktop/63_sequences.fasta","fasta")
# Calculate the distance matrix
calculator = DistanceCalculator('identity')
dm = calculator.get_distance(aln)
#print(dm)
# Visualize neighbor joined tree
constructor = DistanceTreeConstructor()
tree = constructor.nj(dm)
matplotlib.rc('font', size=7)
fig = plt.figure(figsize=(6, 6), dpi=400)
axes = fig.add_subplot(1, 1, 1)
#Drawing of Tree
Phylo.draw(tree, axes=axes, do_show=False)
#Save Figure
plt.savefig('phy.jpg')
#Creation of Array on the basis of DM
a= np.array(dm)
print(a)
#print(a)
genus = pd.read_csv("C://Users//USER//Desktop/canola_root_63.csv")
genus1 = genus["genus"]
#species = ['Taxa1', 'Taxa2', 'Taxa3','Taxa4', 'Taxa5', 'Taxa6','Taxa7', 'Taxa8', 'Taxa9','Taxa10', 'Taxa11', 'Taxa12','Taxa13', 'Taxa14', 'Taxa15','Taxa16', 'Taxa17', 'Taxa18','Taxa19', 'Taxa20', 'Taxa21', 'Taxa22', 'Taxa23', 'Taxa24','Taxa25', 'Taxa26', 'Taxa27','Taxa28', 'Taxa29', 'Taxa30','Taxa31', 'Taxa32', 'Taxa33','Taxa34', 'Taxa35', 'Taxa36','Taxa37', 'Taxa38', 'Taxa39','Taxa40', 'Taxa41', 'Taxa42','Taxa43', 'Taxa44', 'Taxa45','Taxa46', 'Taxa47', 'Taxa48','Taxa49', 'Taxa50', 'Taxa51','Taxa52', 'Taxa53', 'Taxa54','Taxa55', 'Taxa56', 'Taxa57','Taxa58', 'Taxa59', 'Taxa60','Taxa61', 'Taxa62','Taxa63']
print(genus1)
df = pd.DataFrame(a,columns=genus1, index=genus1)
print(df)
#Creation of distance matrix
pd.DataFrame(distance_matrix(df.values, dm), index=df.index, columns=df.index)
#Saving data into csv file
df.to_csv("C:\\Users\\USER\\Desktop\\phylodata.csv")
Result:
I need to do the following: 1) By using NetworkX : a. import csv file (matrix file that the distance matrix code generates) into pandas
b.Put labels in the graph (genus names)
- Compare the structures of networks using some metrics and put those in a table in these three ways:
a. Through degree distribution (every node has a degree) and you can distribute that across all the nodes you have so that gives you probability distribution
b. Through Closeness centrality (you can calculate that in a single line from network) where you see how many hops you can make from one line to another
c. Through direct comparison between the different nodes
Can anyone help me plz?
a= nx.closeness_centrality(G,distance='weight')
gives me :
Is this output correct?