a 'stupid' solution using javascript ?
// following array is generated with awk '{printf("{\"chrom\":\"%s\",\"start\":%s,\"end\":%s,\"type\":\"%s\"},\n",$1,$2,$3,$4);}' your.bed
var segments=[
{"chrom":"chr1","start":7967601,"end":7984800,"type":"duplication"},
{"chrom":"chr1","start":7967602,"end":7984810,"type":"duplication"},
{"chrom":"chr1","start":7967602,"end":7967603,"type":"duplication"},
{"chrom":"chr1","start":1,"end":100,"type":"duplication"},
{"chrom":"chr1","start":10546001,"end":10553200,"type":"deletion"},
{"chrom":"chr1","start":10546101,"end":10553000,"type":"deletion"}
]
var done=false;
while(done==false)
{
done=true;
var i=0;
while(done && i + 1< segments.length)
{
var j = i+1;
while(j < segments.length)
{
var segi = segments[i];
var segj = segments[j];
if( segi.chrom != segj.chrom) {++j; continue;}
if( segi.type != segj.type) {++j; continue;}
if( segj.end <= segi.start) {++j; continue;}
if( segj.start >= segi.end) {++j; continue;}
var overlap_len = 1.0 * ( Math.min(segi.end,segj.end) - Math.max(segi.start,segj.start) );
var leni = 1.0* (segi.end - segi.start);
if( overlap_len / leni < 0.5) {++j; continue;}
var lenj = 1.0* (segj.end - segj.start);
if( overlap_len / lenj < 0.5) {++j; continue;}
segments.splice(j,1);
segments[i]={
"chrom":segi.chrom,
"start": Math.min(segi.start,segj.start),
"end": Math.max(segi.end,segj.end),
"type":segi.type
}
done=false;
}
i++;
}
}
for(var i in segments)
{
var segi = segments[i];
print(segi.chrom+"\t"+segi.start+"\t"+segi.end+"\t"+segi.type);
}
invoke with jjs or in a firefox scratchad
$ jjs script.js
chr1 7967601 7984810 duplication
chr1 7967602 7967603 duplication
chr1 1 100 duplication
chr1 10546001 10553200 deletion
I met the same problem. Have you solve it?
hi, I also met the same problem ...