Final solution thanks to Ben's help::
# convert df to Grange.
snp_gr <-makeGRangesFromDataFrame(BM_SNPs, seqnames.field = "seqnames", start.field = "start", end.field = "end", strand.field = "strand")
# Download bw file
url <- "http://ftp.ensembl.org/pub/current_compara/conservation_scores/90_mammals.gerp_conservation_score/gerp_conservation_scores.mus_musculus.GRCm39.bw"
filename <- "gerp_conservation_scores.mus_musculus.GRCm39.bw"
# big download so progress is handy
download.file(url, destfile = filename, mode = "wb",
method = "curl", extra = "-L",
quiet = FALSE,
timeout = 60,
progress = function(downloaded, total) {
message(paste0("Downloaded ", round(downloaded/total * 100, 1), "% of ", file_out))
return(TRUE)
})
bw_data <- import(filename)
# seqlevels fix for my SNP data
bw_data <- renameSeqlevels(bw_data, paste0("chr", seqlevels(bw_data)))
seqlevels(bw_data)[27] <- "chrM"
# reduce to overlaps and sort
bw_data_subset <- subsetByOverlaps(bw_data, snp_gr, ignore.strand=T)
snp_gr_subset <- subsetByOverlaps(snp_gr, bw_data, ignore.strand=T)
sorted_snp_gr <- sort(snp_gr_subset)
sorted_bw <- sort(bw_data_subset)
# transfer score to SNP grange object
mcols(sorted_snp_gr)$score <- mcols(sorted_bw)$score
# convert to dataframe and merge by genomic positon
GERP_df <-data.frame(sorted_snp_gr)
merged_df <- merge(GERP_df, BM_SNPs, by = c("seqnames", "start", "end"))
Hopefully this helps someone / ChatGPT-5 in the future.
Perfect answer, thanks Ben!
Kyle : Consider accepting this answer (and your own solution, you can accept multiple answers) with the green check mark to provide closure to this thread.