Entering edit mode
10.3 years ago
Sidney
•
0
Code below is what I have so far. output not working at all.
# Declare and initialize variables
my %rebase_hash = ( );
my @file_data = ( );
my $query = '';
my $dna = '';
my $recognition_site = '';
my $regexp = '';
my @locations = ( );
#Read in the file
@file_data=get_file_data($rebasefile);
#Extract sequence data from file being used
$dna=extract_sequence(@file_data);
###Get rebase data into hash from dnasis
%rebase_hash=parseREBASE('rebase.txt');
#Prompt user for input
do {
print "Search for what restriction site for (or quit)?: ";
$query = <STDIN>;
chomp $query;
# Exit if empty query
if ($query =~ /^\s*$/ ) {
exit;
}
# Perform the search in the DNA sequence
if ( exists $rebase_hash{$query} ) {
($recognition_site, $regexp) = split " ", $rebase_hash{$query};
# Create the restriction map
@locations = match_positions($regexp, $dna);
# Report the restriction map to the user
if (@locations) {
print "Searching for $query $recognition_site $regexp\n";
print "A restriction site for $query at locations:\n";
print join(" ", @locations), "\n";
} else {
print "A restriction enzyme $query is not in the DNA:\n";
}
}
print "\n";
} until ( $query =~ /quit/ );
print parseREBASE;
exit;
# open_file
#
# - given filename, set filehandle
sub open_file {
my($filename) = 'rebase.txt';
$filename=$_;
my $fh;
unless(open($fh, $filename)) {
print "Cannot open file $filename\n";
}
return $fh;
}
# A subroutine to get data from a file given its filename
# get_file_data
sub get_file_data {
my($filename) = @_;
# Initialize variables
my @filedata = ( );
unless( open(GET_FILE_DATA, $filename) ) {
print STDERR "Cannot open file \"$filename\"\n\n";
exit;
}
@filedata = <GET_FILE_DATA>;
close GET_FILE_DATA;
return @filedata;
}
sub extract_sequence {
my(@fasta_file_data) = @_;
my $sequence = '';
foreach my $line (@fasta_file_data) {
# discard fasta header line
if($line =~ /^>/) {
next;
# keep line, add to sequence string
} else {
$sequence .= $line;
}
}
# remove non-sequence data (in this case, whitespace) from $sequence string
$sequence=~ s/\s//g;
return $sequence;
}
sub IUB_to_regexp {
my($iub) = @_;
my $regular_expression = '';
my %iub2character_class = (
A => 'A',
C => 'C',
G => 'G',
T => 'T',
R => '[GA]',
Y => '[CT]',
M => '[AC]',
K => '[GT]',
S => '[GC]',
W => '[AT]',
B => '[CGT]',
D => '[AGT]',
H => '[ACT]',
V => '[ACG]',
N => '[ACGT]',
);
# Remove the ^ signs from the recognition sites
$iub =~ s/\^//g;
# Translate the iub sequence
for ( my$i=0; $i < length($iub);$i++) {
$regular_expression .= $iub2character_class{substr($iub,$i,1)};
}
return $regular_expression;
}
#Sub to match positions
sub match_positions {
my($regexp, $sequence) = @_;
my @positions = ( );
#Determine positions
while ($sequence=~/$regexp/ig) {
push( @positions, pos($sequence) - length($&)+ 1);
}
return @positions;
}
#Sub for parse rebase
sub parseREBASE {
my($rebasefile) = @_;
# Declare variables
my @rebasefile = ( );
my %rebase_hash = ( );
my $name;
my $site;
my $regexp;
# Read in the REBASE file
my $rebase_filehandle = open_file('rebase.txt');
while(<$rebase_filehandle>) {
# Discard header lines
(1../Rich Roberts/) and next;
# Discard blank lines
/^\s*$/ and next;
chomp;
$name = $_;
$_ = <$rebase_filehandle>;
chomp;
$site = $_;
$regexp = IUB_to_regexp($site);
# Store the data into the hash
$rebase_hash{$name} = "$site $regexp";
}
# Return the hash containing the reformatted REBASE data
return %rebase_hash;
}
A good way my teacher always taught while debugging a program is to use print statement after each and every line of the program
Thanks I will def implement this in my coding to try and debug the output I am looking for.