I have a perl script which gives the sum result of all the sequences present in the multifasta (.txt format) input file. please help me in modifying the code in such a way that it gives result separately for each sequence.
Input sequence:
>seq1
GADD
>seq2
MMGAAL
Observed output:
GA=2
AL= 1
MM= 1
.
.
.
.
.
Sum=4
Abs=19
Expected output:
Seq1:
GA= 1
AL = 0
.
.
.
Sum=1
Abs=21
Seq2:
GA=1
AL=1
MM=1
.
.
.
Sum=3
Abs=19
Code:
#!/usr/bin/perl -w
print "Please type the filename of the DNA sequence MMta: ";
$str= <STDIN>;
chomp $str;
unless ( open(DNAFILE, $str) )
{
print "Cannot open file \"$str\"\n\n";
exit;
}
@str = <DNAFILE>;
$str=join('',@str);
print"\n DNA:\n
$str\n";
$GA = 0;
$AL = 0;
$MM = 0;
$DE = 0;
$DV = 0;
$VD = 0;
$DW = 0;
$QD = 0;
$SD = 0;
$DD = 0;
$ED = 0;
$DY = 0;
$VE = 0;
$EN = 0;
$II = 0;
$KE = 0;
$NV = 0;
$VP = 0;
$FV = 0;
$SS = 0;
$WK = 0;
$KK = 0;
$abs=0;
while ($str =~ /GA/ig)
{$GA++}
while ($str =~ /AL/ig)
{$AL++}
while ($str =~ /MM/ig)
{$MM++}
while ($str =~ /DE/ig)
{$DE++}
while ($str =~ /DV/ig)
{$DV++}
while ($str =~ /VD/ig)
{$VD++}
while ($str =~ /DW/ig)
{$DW++}
while ($str =~ /QD/ig)
{$QD++}
while ($str =~ /SD/ig)
{$SD++}
while ($str =~ /DD/ig)
{$DD++}
while ($str =~ /ED/ig)
{$ED++}
while ($str =~ /DY/ig)
{$DY++}
while ($str =~ /VE/ig)
{$VE++}
while ($str =~ /EN/ig)
{$EN++}
while ($str =~ /II/ig)
{$II++}
while ($str =~ /KE/ig)
{$KE++}
while ($str =~ /NV/ig)
{$NV++}
while ($str =~ /VP/ig)
{$VP++}
while ($str =~ /FV/ig)
{$FV++}
while ($str =~ /SS/ig)
{$SS++}
while ($str =~ /WK/ig)
{$WK++}
while ($str =~ /KK/ig)
{$KK++}
$total= "$GA+$AL+$MM+$DE+$DV+$VD+$DW+$QD+$SD+$DD+$ED+$DY+$VE+$EN+$II+$KE+$NV+$VP+$FV+$SS+$WK+$KK";
while ($total=~ /0/ig)
{$abs++}
$sum= $GA+$AL+$MM+$DE+$DV+$VD+$DW+$QD+$SD+$DD+$ED+$DY+$VE+$EN+$II+$KE+$NV+$VP+$FV+$SS+$WK+$KK;
print "GA = $GA\n";
print "AL = $AL\n";
print "WK = $MM\n";
print "DE = $DE\n";
print "VP = $DV\n";
print "VD = $VD\n";
print "DW = $DW\n";
print "QD = $QD\n";
print "SD = $SD\n";
print "DD = $DD\n";
print "ED = $ED\n";
print "DY = $DY\n";
print "VE = $VE\n";
print "EN = $EN\n";
print "II = $II\n";
print "KE = $KE\n";
print "NV = $NV\n";
print "VP = $VP\n";
print "FV = $FV\n";
print "SS = $SS\n";
print "WK = $WK\n";
print "KK = $KK\n";
print "sum=$sum\n";
print "abs=$abs";
$outputfile = "countbase.txt";
unless ( open(COUNTBASE, ">$outputfile") ) {
print "Cannot open file \"$outputfile\" to write
to!!\n\n";
exit;
}
print COUNTBASE "$GA
$AL
$MM
$DE
$DV
$VD
$DW
$QD
$SD
$DD
$ED
$DY
$VE
$EN
$II
$KE
$NV
$VP
$FV
$SS
$WK
$KK
$sum
$abs";
close(COUNTBASE);
exit;
What do you mean by "
sum result of all the sequences
"