Entering edit mode
3.9 years ago
sharmatina189059
▴
110
#!/usr/bin/env python3 (conserved amino acid position)
from Bio import AlignIO
import sys
#data = np.genfromtxt("/home/tina/bin/MDR_aminoglycoside.csv", delimiter=",")
aln = "/home/tina/cd-hit/core_header_WP_000209090.1/seqret_out_WP_000209090.1.aln"
print("Alignment length %i" % alignment.get_alignment_length()) #print alignment length
algnmnt = AlignIO.read(open(aln, 'r'), 'clustal')
for col in range(0, algnmnt.get_alignment_length()):
proteins = set(algnmnt[:,col])
if len(proteins) == 1:
print(f"Postion {col}: protein {''.join(proteins)}") #print position of each match
I have written this code which reads clustal.aln file and give number of conserved amino acid with their position and alignment length. Can anybody tell me how may I print the variation sites (position) too with number of variation for each amino acids?