use Bio::SeqIO
sequenceFile= open(standarinput,'ro') #con bioseq
outFile =open(standarinput,'rw')
for seq in sequenceFile:
if startAUG(seq):
if stopAtTheEnd(seq):
if NOTstopMiddle(seq):
outfile.print(seq)
def startAUG(seq):
#SOMETHING INSIDE HERE
def stopAtTheEnd(seq):
# SOMETHING INSIDE HERE
def if NOTstopMiddle(seq):
# SOMETHING INSIDE HERE
That is maybe easy to code, but possibly the approach in itself is un-biological. We need to know where your sequences come from. Firstly, it is not uncommon to have fragmented sequences without the start or stop codons. Secondly, there are non-canonical start codons: http://www.pnas.org/content/105/29/10079.full, http://www.ncbi.nlm.nih.gov/pubmed/16780373.
the sequences are CDSs from tritrypdb and they usually have AUG as start codons. I'd like to keep it simple and can live without rare start codons because it's for use with codonW without sequences that trigger warnings
from Bio import SeqIO
infile= input('input fasta file')
out = input('output fasta file')
outFile =open(out,'rw')
#Opens output file
outFile =open(standarinput,'rw')
######################## Main Program ####################################
#opens and parse the fasta file and each sequense is seq_record
for seq_record in SeqIO.parse(infile, "fasta"):
if startAUG(seq_record.seq):
if stopAtTheEnd(seq_record.seq):
if NOTstopMiddle(seq_record.seq):
outfile.print(">",seq_record.id"\n")
outfile.print(seq_record.seq,"\n")
#close exit file
close outFile
######################## End Main program ####################################
###################### Functions #######################################
########this one checks if there's AUG at the begining
def startAUG(seq):
if seq[0]=='A' and seq[1]=='U' seq[2]=='G'
return true
else
return false
#############################################
######## this one checks if there's stop codon at the end
def stopAtTheEnd(seq):
if (seq[len(seq)-1]=='U' and seq[len(seq)-2]=='A' and seq[len(seq)-3]=='A') or (seq[len(seq)-1]=='U' and seq[len(seq)-2]=='A' and seq[len(seq)-3]=='G') or (seq[len(seq)-1]=='U' and seq[len(seq)-2]=='G' and seq[len(seq)-3]=='A')
return true
else
return false
#############################################
######## stop codons in the middle
def if NOTstopMiddle(seq):
##############################################
I've started a python program like this:
So, any ideas how to do this on python or perl?
That is maybe easy to code, but possibly the approach in itself is un-biological. We need to know where your sequences come from. Firstly, it is not uncommon to have fragmented sequences without the start or stop codons. Secondly, there are non-canonical start codons: http://www.pnas.org/content/105/29/10079.full, http://www.ncbi.nlm.nih.gov/pubmed/16780373.
the sequences are CDSs from tritrypdb and they usually have AUG as start codons. I'd like to keep it simple and can live without rare start codons because it's for use with codonW without sequences that trigger warnings