Hello, I'm trying to extract several sequences of DNA from a fasta using the chromosome and coordinates. I then want to write these sequences to a fasta file. I want to do this exclusively in python.
For one sequence, I am able to extract the sequence:
from Bio import SeqIO
from pyfaidx import Fasta
genes = Fasta('Genome.fasta')
f = open('chr02_18s', 'w')
seqFile = genes['chr02'][146062:148216]
f.write(str(seqFile))
This gets the correct sequence, but it only writes the nucleotide sequence to the file, it does not include the fasta header.
If I use:
genes = Fasta('/Users/eflannery/Dropbox/Genomes/PlasmoDB-25_PvivaxSal1_Genome.fasta')
f = open('chr02_18s', 'w')
seqFile = genes['Pv_Sal1_chr02'][146062:148216]
# f.write(str(seqFile))
SeqIO.write(seqFile, f, "fasta")
I get the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-204-1a62c837614c> in <module>()
9 seqFile = genes['Pv_Sal1_chr02'][146062:148216]
10 # f.write(str(seqFile))
---> 11 SeqIO.write(seqFile, f, "fasta")
12 # slice
13 # output_handle.write(handle)
//anaconda/lib/python2.7/site-packages/Bio/SeqIO/__init__.pyc in write(sequences, handle, format)
470 if format in _FormatToWriter:
471 writer_class = _FormatToWriter[format]
--> 472 count = writer_class(fp).write_file(sequences)
473 elif format in AlignIO._FormatToWriter:
474 # Try and turn all the records into a single alignment,
//anaconda/lib/python2.7/site-packages/Bio/SeqIO/Interfaces.pyc in write_file(self, records)
209 """
210 self.write_header()
--> 211 count = self.write_records(records)
212 self.write_footer()
213 return count
//anaconda/lib/python2.7/site-packages/Bio/SeqIO/Interfaces.pyc in write_records(self, records)
194 count = 0
195 for record in records:
--> 196 self.write_record(record)
197 count += 1
198 # Mark as true, even if there where no records
//anaconda/lib/python2.7/site-packages/Bio/SeqIO/FastaIO.pyc in write_record(self, record)
188 title = self.clean(self.record2title(record))
189 else:
--> 190 id = self.clean(record.id)
191 description = self.clean(record.description)
192 if description and description.split(None, 1)[0] == id:
AttributeError: 'Sequence' object has no attribute 'id'
I obviously don't understand what kind of record is created with the fasta function in pyfaidx, but I can't find this info or how to write it to a fasta file anywhere.
Thanks for the help
Thank-you!!