Entering edit mode
21 months ago
Alewa
▴
170
I have multiple bed files (master_gr.bed, gr_2.bed, gr_3.bed) and want to find complete overlaps/intersection but seem to be getting results with at least 1 overlap
Here, i simulated data set with R and highlighted the spurious results. line 'chr1 2 2' is represented by only 1 file hence should not be present in the final output below.
(base) bash-4.2$ head data/bed_intersected_master_gr23.bed
chr1 2 2
chr1 3 3
chr1 4 4
chr1 5 5
chr1 6 6
chr1 7 7
chr1 8 8
chr1 9 9
chr1 10 10
chr1 11 11
code to produce results
library(plyranges)
library(tidyverse)
library(purrr)
#create granges
length_gr=100
master_gr <- data.frame(seqnames = c(rep("chr1",length_gr/2),rep("chr2",length_gr/2)) ,
start=c(seq(from =2,length.out=length_gr/2),
seq(from =1,length.out=length_gr/2)),
width=1, gc = runif(length_gr)) %>% as_granges()
len_gr2=50
gr_2 <- data.frame(seqnames = c(rep("chr1",len_gr2/2), rep("chr2",len_gr2/2)),
start=c(seq(from = 5, length.out=len_gr2/2),
seq(from = 10, length.out=len_gr2/2)),
width=1, gc = runif(len_gr2)) %>% as_granges()
len_gr3=300
gr_3 <- data.frame(seqnames = c(rep("chr1",len_gr3/2), rep("chr2",len_gr3/2)),
start=c(seq(from = 1, length.out=len_gr3/2),
seq(from = 35, length.out=len_gr3/2)),
width=1,gc = runif(len_gr3))%>%as_granges()
#merge all granges into a list
gr_list <- list(master_gr=master_gr, gr_2=gr_2, gr_3=gr_3)
#save a copy on disk
gr_list %>% iwalk(~write_tsv(as_tibble(.x) %>% select(c(seqnames,start,end)), file = paste0("data/" , .y , ".bed"), col_names = FALSE))
#run bedtools
system("bedtools intersect -a data/master_gr.bed -b data/gr_2.bed data/gr_3.bed -sorted -F 1.0 -u > data/bed_intersected_master_gr23.bed")
beware all the intervals above are EMPTY : Cheat Sheet For One-Based Vs Zero-Based Coordinate Systems