#!/usr/bin/perl
use strict;
use warnings;
use autodie;
$ARGV[2] or die "use newGenome.pl <VCF> <FASTA> <OUTPUT>\n";
my $vcf = shift @ARGV;
my $fas = shift @ARGV;
my $out = shift @ARGV;
# load sequences
my %seq;
$/ = "\n>";
open (my $fh, "<", $fas);
while (<$fh>) {
s/>//g;
my ($id, @seq) = split (/\n/, $_);
$seq{$id} = join "", @seq;
}
close $fh;
# parse VCF
$/ = "\n";
open (my $vh, "<", $vcf);
while (<$vh>) {
next if (m/^#/);
chomp;
my ($chr, $pos, $id, $ref, $alt, $qual, $fil, $info) = split (/\t/, $_);
next unless ($alt =~ m/^[ACGT]$/); # only valid SNPs
my $afrq = 0.5; # in case of missing values use a 50% chance
if ($info =~ m/AF=(0\.\d+)/) {
$afrq = $1;
}
my $prob = rand();
if ($prob < $afrq) {
substr ($seq{$chr}, $pos - 1, 1) = $alt);
warn "modified $chr:$pos $ref -> $alt\n";
}
}
close $vh;
# Write modified sequences
open (my $oh, ">", $out);
while (my ($id, $seq) = each %seq) {
print $oh ">$id\n";
while ($seq) {
print $oh substr ($seq, 0, 80), "\n";
substr ($seq, 0, 80) = '';
}
}
close $oh;
* Cold night, procastinating work, perl lover
Very useful! Will you teach me the ways of the force?
You don't need a teacher, just start coding to solve problems like this.