Here is a utility in R custom built for you.
Step 1
Create the script file get-msa-variants.R
#!/usr/bin/env Rscript
# Parse arguments
script_args <- commandArgs(trailingOnly=T)
file_name <- script_args[[1]]
# Load data
msa.tab <- read.table(file_name)
ma <- msa.tab$V2
# Convert to matrix
num_sequences <- length(ma)
ma <- paste(ma, sep="", collapse="")
ma <- strsplit(ma, "")[[1]]
ma <- matrix(ma, nrow=num_sequences, byrow=T)
rownames(ma) <- msa.tab$V1
# Calculate consensus
get_consensus <- function(x){
t <- table(x)
consensus <- names(t[t == max(t)])
if (length(consensus) > 1) {
return(sample(consensus, 1)) # Break a tie randomly
} else {
return(consensus)
}
}
consensus <- apply(ma, 2, get_consensus)
# How to Spot mutation in MSA? The answer.
mutations <- apply(ma, 1, function(x){ which(x != consensus) })
# Print results
invisible(
lapply(names(mutations), function(seq_name) {
lapply(mutations[[seq_name]], function(mut_idx) {
cat(seq_name, "has a mutation", ma[seq_name,][[mut_idx]],
"at index", mut_idx, "\n")
})
})
)
## DEBUGGING
#print(file_name)
#print(msa.tab)
#print(ma)
#print(consensus)
#print(mutations)
Step 2
Make the scirpt executable
chmod +x get-msa-variants.R
Step 3
Create an input file msa.tab and put your MSA into a file in this format:
organism1 ACGTACTAC
organism2 ACGTACTCC
organism3 ACGTACTCC
organism4 ACGTACTCC
organism5 ATGTACTCC
Step 4
Run the script
./get-msa-variants.R msa.tab
Output
organism1 has a mutation A at index 8
organism5 has a mutation T at index 2
+2 for using JS