Hi,
I am trying to extract all class:2 seqeuences from a fasta file but I am getting this error:
Traceback (most recent call last): File "class-sorter.py", line 23, in <module> main(sys.argv) File "class-sorter.py", line 16, in main if "class:2" in record.description(): TypeError: 'str' object is not callable
My code:
import sys
import argparse
import operator i
import re
improt itertools from Bio import SeqIO
def main (argv):
parser = argparse.ArgumentParser(description='find a location')
parser.add_argument('infile', help='file to process')
parser.add_argument('outfile', help='file to produce')
args = parser.parse_args()
with open(args.outfile,"w") as of:
for record in SeqIO.parse(args.infile,"fasta"):
if "class:2" in record.description():
of.write(">"+record.id)
of.write(record.seq)
if __name__ == "__main__":
main(sys.argv)
There is an example of the fasta file:
>NEIS2020_129 class:2
ATGAAAAAATCTCTGATTGCTCTGACTTTGGCAGCTTTGCCTGTTGCAGCTATGGCTGAT
GTGACTCTGTACGGTCAAGTTAAAGCCGGTGTTGAAATTTCTCGCATCAAAGCAGGCTCT
GGTGTGACTGATAATGGTCGTTCTTCTACTACTAAAACTGCAACTGAAATCGCTGACCTC
GGTTCTCGTATCGGTTTCAAAGGTCATGAACACCTGAGCAACAACCTGAACGCTATTTGG
CAAGTAGAACAAAATACTTCTGTTGCTGGTACTGACAGCGGCTGGGGTACTCGTGAATCT
TTCATCGGTTTGGAAGGTGGCTTCGGTAAAATTCGTGCTGGTAAACTCAACACTACTCTG
AAAGACAGCACCGACAGCATCGATCCATGGGAATCCAGCGATGCTAATGAACATGTATTG
TCATTGGGTACTTTGGAACGTGTAGATGAGCGTAAAGTGTCTGTTCGCTACGACTCCCCT
GTGTTCTCAGGCTTTAGCGCAAGCGTTCAATACCAACCTCGCGATAACGCCAACTCTAGC
GACAAATATACTCATGCTGCGAAAAGCCGTGAAGCATACTACGCTGGTTTGAACTATGAA
AATTCCGGTTTCTTTGGTCGCTATGCTGGTAAATTTGCAAAACATGATGTAATTACAGCT
AACGAATATGATGTTGCTGTTGATAAAGTTGCTGATGCTGCTTCTACTTTGAAAGTTGGC
GATACTTTGGCTACTGTTAAAGATCATCAAGTTCATCGTTTAGTAGCTGGTTACGACGCG
AACAATGTTTTATTTGCTGTTGCTGGTCAATATGATGCATCAAAAAATGGTGATGTAAGT
GATGCTAACTACGGTAAGAAAAACGAGCAAACCCAAGTTGCTGTAACTGGTGGCTACCGT
ATGGGCAACGTAATGCCTCGTGTTTCTTACGCTCACGGCTTCAAAGCTAAAGAAGATGGC
GAGAAACAAGCTAACAGTCAATACAACCAAGTTATCGTTGGTGCTGACTACGACTTCTCT
AAACGTACTTCTGCTTTGATTTCTGCTGGTTGGTTGAAACAAGGTAAAGGCGTTAACAAA
GTTGAATCTACTGCTGGTTTGGTTGGTCTGCGCCACAAATTCTAA
>NEIS2020_130 class:3
ATGAAAAAATCCCTGATTGCCCTGACTTTGGCAGCCCTTCCTGTTGCAGCAATGGCCGAT
GTTACCCTGTACGGCACCATCAAAGCCGGCGTAGAAACCTACCGTACTGTAAAACACACA
GACGGCAAAGTAACTGAAGTGAAAACCGGCAGCGAAATCGCCGACTTCGGTTCAAAAATC
GGCTTCAAAGGTCAAGAAGATCTCGGCAACGGCCTGAAAGCCATTTGGCAGTTGGAACAA
AGCGCCTCCATCGCCGGCGCTGACAGCGGCTGGGGCAACAAACAATCTTTCATCGGCTTG
AAAGGCGGCTTCGGTACCGTCCGCGCCGGTAACCTGAACAGCATCCTGAAAAGCACCGGC
GACAACGTCAACGCTTGGGAATCCGGCAAGGCTACCGAAGACGTGCTGCAAGTCAGCAAA
ATTTCCGCTCCGGAACACCGCTACGCATCCGTACGCTACGACTCTCCCGAATTTGCCGGC
TTCAGCGGCAGCGTACAATACGCGCCTAAAGACAATTCAGGTGCAAACGGCGAATCTTAC
CACGTTGGTTTGAACTACCAAAACAGCGGTTTCTTCGCACAATACGCCGGCTTGTTCCAA
AGACACGGCGAAGGCACTAAAGCCACAGTCGGCGAGCCTGTTGAAAAACTGCAAGTCCAC
CGTTTGGTCGGCGGTTACGACAATGATGCCCTGTACGCTTCCGTAGCCGTACAACAACAA
GATGCGAAACTGACTGATGCTTCCAATTCGCACAACTCTCAAACCGAAGTTGCCGCTACC
GTGGCATACCGTTTCGGCAACGTAACGCCCCGCGTTTCTTACGCCCACGGCTTCAAAGGC
ACTGTTGCTAAAGCAGACGGCGACAACCGTTACGACCAAGTGGTTGTCGGTGCGGAATAC
GACTTCTCCAAACGCACTTCTGCCTTGGTTTCTGCCGGCTGGTTGCAAGAAGGCAAAGGT
GCAGGCAAAACCGTATCGACTGCCAGCACCGTCGGTCTGCGCCACAAATTCTAA
Can anyone see what am I doing wrong? Cannot figure it out after googling. Thank you
If fa file is flattened, this should work:
If fa file is not flattened, try with seqkit: