#!/bin/bash
# Input BED file with regions from multiple chromosomes
input_bed="wall_region.bed"
# genome file
genome="Wall_10_chr.fasta"
# Iterate through unique chromosomes in the BED file
for chr in $(cut -f1 "$input_bed" | sort -u); do
# Create a temporary BED file for the current chromosome
temp_bed="${chr}_temp.bed"
# Extract regions for the current chromosome and save them to the temporary BED file
awk -v chromosome="$chr" '$1 == chromosome' "$input_bed" > "$temp_bed"
# Run bedtools getfasta for the current chromosome and save to region.fasta
bedtools getfasta -fi "$genome" -bed "$temp_bed" > "${chr}_region.fasta"
# Reverse complement the sequences using seqtk and save to inverted_region.fasta
/prj/pflaphy-pacbio/Softwares/seqtk/seqtk seq -r "${chr}_region.fasta" > "${chr}_inverted_region.fasta"
# Remove the temporary BED file
rm "$temp_bed"
done
I want to replace original sequence in the genome with reverse complement in chromosome3 ,4 and 5, at specific location . And i have 2 genomes to do so. I made a bed file with the chromosome number and location like chrL.3 0 1000 and extracted the regions using the bedtools and reverse complement using seqtk. But now i am not able to replace them in the genome.fasta. Can anyone please help me do it. Thanks in advance.
Now grab the sequences from those regions that complement your original BED file (use https://bedtools.readthedocs.io/en/latest/content/tools/complement.html ) and then
cat
those with the reverse complemented pieces you made to create the sequence you want.