So I coded the solution. However, I strongly recommend using FASTQC (https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), which does create your plot and even generates more interesting FASTQ parameters.
source("http://bioconductor.org/biocLite.R")
biocLite("Biostrings")
library("Biostrings")
library("tidyr")
library("ggplot2")
fastq <- readDNAStringSet("https://d28rh4a8wq0iu5.cloudfront.net/ads1/data/ERR037900_1.first1000.fastq","fastq")
# sequence matrix col = position, row = sequence
sequence_matrix <- do.call(rbind, lapply(fastq, function(seq){return(strsplit(as.character(seq),split = '')[[1]])}))
# calculate frequency by position
freq <- apply(sequence_matrix, 2, function(col){
stat <- table(col)
return(c(stat['A'], stat['T'], stat['G'], stat['C'], stat['N']))
})
row.names(freq) <- c('A', 'T', 'G', 'C', 'N')
freq <-freq/1000
# replace NA with 0
freq[5,] <- sapply(freq[5,], function(x){
ifis.na(x)){
return(0)
}else{
return(x)
}
})
freq <- t(freq)
freq <- cbind(1:nrow(freq), freq)
colnames(freq)[1] <- 'Position'
# width to long format transformation
freq_to_plot <- gather(as.data.frame(freq), 'Type', 'Value', A:N)
#pltting
ggplot(data=freq_to_plot, aes(x=Position, y=Value, group = Type, colour = Type ))+
geom_line()+
theme_classic()+
ylab('Frequency')+
guides(colour=guide_legend("Nucleotide"))
duplicate question of Visualize nucleotides for every position in R