#!/usr/bin/perl
open (INPUT, $ARGV[0]) or die $1;
open (QUERY, $ARGV[1]) or die $1;
open (OUTPUT, ">result.txt");
chomp (my @array=<QUERY>);
while (<INPUT>) {
foreach my $temp (@array){
if ($_ =~ $temp) {
$nextline = <INPUT>;
print OUTPUT "$_$nextline";
}
}
}
close (OUTPUT);
close (QUERY);
close (INPUT);
You may also try the following Perl script... and this works for fasta format input files!
use strict;
use warnings;
my @genes;
open my $list, '<file2.list';
while (my $line = <$list>) {
push (@genes, $1) if $line =~ /[^>]+>([^|]+)/;
}
my $input;
close $list;
{
local $/ = undef;
open my $fasta, '<file1.fasta';
$input = <$fasta>;
close $fasta;
}
my @lines = split(/>/,$input);
foreach my $l (@lines) {
foreach my $reg (@genes) {
print ">$l" if $l =~ /$reg\|/;
}
}
File 2 will your query file and File1, the fasta sequence file in this case!
This is a pretty common question: Extracting Sequence From A 3Gb Fasta File?