|
#!/bin/bash |
|
|
|
# Script to align fastq reads to a given reference, and output the necessary files sorted etc. |
|
|
|
# Script requires bwa and samtools in path - check for existence: |
|
|
|
command -v bwa >/dev/null 2>&1 || { echo >&2 "BWA doesn't appear to be installed. Aborting."; exit 1; } |
|
command -v samtools >/dev/null 2>&1 || { echo >&2 "samtools doesn't appear to be installed. Aborting."; exit 1; } |
|
# Capture inputs |
|
|
|
usage() |
|
{ |
|
cat << EOF |
|
usage: $0 options |
|
|
|
This script aligns fastq reads to a given |
|
reference, and outputs alignment maps etc. |
|
|
|
OPTIONS: |
|
-h | --help Show this message |
|
-r | --ref Reference fasta sequence |
|
-1 | --read1 First set of reads ("R1") |
|
-2 | --read2 Second set of reads ("R2") |
|
-o | --outdir Directory to output all the files to |
|
EOF |
|
} |
|
|
|
# Tolerate long arguments |
|
for arg in "$@"; do |
|
shift |
|
case "$arg" in |
|
"--help") |
|
set -- "$@" "-h" |
|
;; |
|
"--ref") |
|
set -- "$@" "-r" |
|
;; |
|
"--read1") |
|
set -- "$@" "-1" |
|
;; |
|
"--read2") |
|
set -- "$@" "-2" |
|
;; |
|
"--outdir") |
|
set == "$@" "-o" |
|
;; |
|
*) |
|
set -- "$@" "$arg" |
|
esac |
|
done |
|
# getopts assigns the arguments to variables |
|
while getopts "hr:1:2:o:" OPTION |
|
do |
|
case $OPTION in |
|
r) |
|
reference=$OPTARG |
|
;; |
|
1) |
|
read1=$OPTARG |
|
;; |
|
2) |
|
read2=$OPTARG |
|
;; |
|
o) |
|
outdir=$OPTARG |
|
;; |
|
h) |
|
usage |
|
exit |
|
;; |
|
esac |
|
done |
|
|
|
if [[ -z $reference ]] |
|
then |
|
usage |
|
echo "No Reference sequence provided. Exiting." ; exit 1 |
|
fi |
|
if [[ -z $read1 ]] |
|
then |
|
usage |
|
echo "Forward reads not supplied. Exiting." ; exit 1 |
|
fi |
|
if [[ -z $read2 ]] |
|
then |
|
usage |
|
echo "Reverse reads not supplied. Exiting." ; exit 1 |
|
fi |
|
if [[ -z $outdir ]] |
|
then |
|
outdir=$(pwd) |
|
echo "No output directory was specified. Defaulting to the current working directory:" |
|
pwd |
|
fi |
|
##### |
|
|
|
reads=("$read1" "$read2") |
|
|
|
# Index the reference sequence |
|
bwa index "$reference" |
|
echo "BWA FINISHED INDEXING THE REFERENCE" |
|
|
|
# Align reads |
|
alignedreads=() |
|
|
|
for readelement in ${reads[@]} ; do |
|
bwa mem "$reference" "$readelement" > ${readelement%.*.*}.sai |
|
echo "BWA FINISHED ALIGNING $readelement " |
|
alignedreads+=("${readelement%.*.*}.sai") |
|
done |
|
echo "BWA FINISHED ALIGNING EACH READ." |
|
|
|
bwa sampe "$reference" "${alignedreads[0]}" "${alignedreads[1]}" "$read1" "$read2" > "${reference%.*}"_aligned.sam |
|
echo "SAM FILE CREATED" |
|
|
|
samtools view -S "${reference%.*}"_aligned.sam -b -o ./"${reference%.*}"_aligned.bam |
|
echo "SAM FILE CONVERTED TO BAM" |
|
|
|
samtools sort "${reference%.*}"_aligned.bam "${reference%.*}"_aligned_sorted |
|
echo "BAM FILE SORTED" |
|
|
|
samtools index "${reference%.*}"_aligned_sorted.bam |
|
echo "BAM FILE INDEXED" |
I can't spot the issue. What happens? Are there error messages?
Unless you reads are shorter than 70bp, you should be using
bwa mem
instead ofbwa aln
.There is an error message when I run bwa sampe, but I think the issue begins with bwa aln. No error message occurs when I run bwa aln; however, the .sai output file goes to the current working directory instead of following the path in READ1_list, and the sample name is not written for the .sai file (e.g. READ1_listread1.sai instead of sample_name-read1.sai)
Put an echo in your loop to see how it looks like:
Please note that you could also do the following and pipe directly to samtools sort, without samtools view:
I see two problems in your code:
Also it is hard to find the errors without any detail, are you running this on Linux/Mac or Windows?, what are the names of the input files?, what is inside the READ1_list and READ2_list files?
That shouldn't even work. You should instead e.g.
Just try e.g.
vs.
Only the latter works in bash (as intended)..
But anyway, I wouldn't even use a for loop for this but instead just define a function and use it with find and GNU parallel. Also, why are you aligning the reads separately? Surely it would make more sense to align read pairs properly in the same runs?