i have got a big list with proteins like this below:
ID 140U_DROME Reviewed; 261 AA.
AC P81928; Q9VFM8;
FT CHAIN 1 261 RPII140-upstream gene protein.
FT /FTId=PRO_0000064352.
FT TRANSMEM 67 87 Potential.
FT TRANSMEM 131 151 Potential.
FT TRANSMEM 183 203 Potential.
FT CONFLICT 64 64 S -> F (in Ref. 1).
SQ SEQUENCE 261 AA; 29182 MW; 5DB78CF6CFC4435A CRC64;
MNFLWKGRRF LIAGILPTFE GAADEIVDKE NKTYKAFLAS KPPEETGLER LKQMFTIDEF
GSISSELNSV YQAGFLGFLI GAIYGGVTQS RVAYMNFMEN NQATAFKSHF DAKKKLQDQF
TVNFAKGGFK WGWRVGLFTT SYFGIITCMS VYRGKSSIYE YLAAGSITGS LYKVSLGLRG
MAAGGIIGGF LGGVAGVTSL LLMKASGTSM EEVRYWQYKW RLDRDENIQQ AFKKLTEDEN
PELFKAHDEK TSEHVSLDTI K
and the format i want to receive is:
>ADBR2_HUMAN|P07550|413aa
MGQPGNGSAFLLAPNGSHAPDHDVTQERDEVWVVGMGIVMSLIVLAIVFGNVLVITAIAKFERLQTVTN
----------------------------------MMMMMMMMMMMMMMMMMMMMMMMM-----------
//
can sοmeone fix my code please?
#!/usr/bin/perl
use strict;
use warnings;
open(IN, "<transmem_proteins.swiss") or die "i can not open the transmem_proteins.swiss, $!";
open (OUT, ">askisi1.txt");
while (<IN>)
{
if ($_=~m/^ID\s{3} (\w+_\w+)\s / )
$id=$1;
print OUT "$id ID\n";
if ($_=~m/^AC\s{3} (\w+)\w{1} and $ac==0)
$ac=$$1;
$ac++;
if ($_=~m/^SQ\s{3} SEQ\s{3}(\w+)\s{1}AA)
$aa = $1;
print OUT "$aa aa\n";
if ($_=~m/^\s{5}(.*)\n/)
{
seq = $seq.$1;
}
if ($_=~m/^FT\s{3}TRANSMEM\s+(\w+)\s+(\w+)/)
if($start[0] == NULL){
shift (@start);
}
if ($end[0] == NULL){
shift (@end);
}
push @start, $s1;
push @end, $s2;
$number_of_transmem++;
$seq = ~s/\s//g;
if ($_=~m/\/\//;
if ($tr_table[0]=NULL){
shift (@tr_table);
}
for ($a=0; $a<$aa; $a++){
$tr_table[$a]='-';
}
for ($tr=0; $tr<$number_of_transmem; $tr++)
for ($a=0; $a,aa; $a++)
if (($a+1)>= $start[$tr] and ($a+1)<=$end[$tr]){
$tr_table[$a]='M';
}
print OUT @tr_table;
print OUT "\n";
print OUT "//\n";
$ac=0;
$number_of_transmem=0;
$seq=~s/.*//;
@start=NULL;
@end=NULL;
@tr_table=NULL;
}
Thank you so much! You saved my day :D. I am trying to learn Perl, but it's all greek to me.