|
#include <iostream> |
|
#include <vector> |
|
#include <cstdlib> |
|
#include <cerrno> |
|
#include <algorithm> |
|
|
|
using namespace std; |
|
|
|
class Range |
|
{ |
|
public: |
|
int start; |
|
int end; |
|
inline bool operator < (const Range &other) const |
|
{ |
|
return start < other.start; |
|
} |
|
inline bool overlap(const Range &other) const |
|
{ |
|
return !(end<=other.start || other.end<=start); |
|
} |
|
void merge(const Range &other) |
|
{ |
|
start=min(start,other.start); |
|
end=max(end,other.end); |
|
} |
|
}; |
|
|
|
int main(int argc,char **argv) |
|
{ |
|
const char TAB='\t'; |
|
vector<Range> ranges; |
|
string line; |
|
|
|
if(!getline(cin,line,'\n')) |
|
{ |
|
cerr << "first line missing" << endl; |
|
return EXIT_FAILURE; |
|
} |
|
if(line.compare("query\tstart\tstop")!=0) |
|
{ |
|
cerr << "bad header \""<< line << "\"" << endl; |
|
return EXIT_FAILURE; |
|
} |
|
|
|
while(getline(cin,line,'\n')) |
|
{ |
|
int d1=line.find(TAB); |
|
if(d1==string::npos ) |
|
{ |
|
cerr << "no tab in "<< line << endl; |
|
return EXIT_FAILURE; |
|
} |
|
else if(d1==0) |
|
{ |
|
cerr << "no name in "<< line << endl; |
|
return EXIT_FAILURE; |
|
} |
|
int d2=line.find(TAB,d1+1); |
|
if(d2==string::npos) |
|
{ |
|
cerr << "second tab missing in "<< line << endl; |
|
return EXIT_FAILURE; |
|
} |
|
Range range; |
|
char *p=const_cast<char*>(line.c_str()); |
|
char *p2; |
|
p[d2]=0; |
|
errno=0; |
|
range.start=strtod(&p[d1+1],&p2); |
|
if(range.start<0 || errno!=0 || *p2!=0) |
|
{ |
|
cerr << "bad start in "<< line << endl; |
|
return EXIT_FAILURE; |
|
} |
|
errno=0; |
|
range.end=strtod(&p[d2+1],&p2); |
|
if(range.end<0 || errno!=0 || *p2!=0) |
|
{ |
|
cerr << "bad end in "<< line << endl; |
|
return EXIT_FAILURE; |
|
} |
|
if(range.start>range.end) |
|
{ |
|
swap(range.start,range.end); |
|
} |
|
|
|
if(ranges.empty()) |
|
{ |
|
ranges.push_back(range); |
|
} |
|
else |
|
{ |
|
vector<Range>::iterator iter= lower_bound( |
|
ranges.begin(), |
|
ranges.end(), |
|
range |
|
); |
|
|
|
int index= distance(ranges.begin(),iter); |
|
ranges.insert( iter, range ); |
|
|
|
bool done=false; |
|
do |
|
{ |
|
done=true; |
|
|
|
//remove right side |
|
while(index+1 < ranges.size() && |
|
ranges[index].overlap(ranges[index+1])) |
|
{ |
|
|
|
ranges[index].merge(ranges[index+1]); |
|
ranges.erase(ranges.begin()+(index+1)); |
|
done=false; |
|
} |
|
//remove left side |
|
while(index>0 && |
|
ranges[index-1].overlap(ranges[index])) |
|
{ |
|
|
|
ranges[index-1].merge(ranges[index]); |
|
ranges.erase(ranges.begin()+index); |
|
done=false; |
|
--index; |
|
} |
|
|
|
} while(!done); |
|
} |
|
|
|
} |
|
int sum=0; |
|
for(vector<Range>::const_iterator i=ranges.begin(); |
|
i!=ranges.end(); |
|
++i) |
|
{ |
|
sum+=((*i).end - (*i).start)+1; |
|
} |
|
cout << "coverage="<< sum << " pb" << endl; |
|
return 0; |
|
} |
This problem becomes a lot easier if the start stop are in the form of start<stop, (you'll then need a strand column of course) then in addition you sort the file by the start. That's what I would do first rather than come up with a more complex solution.
Is it sorted or not?
We need more of these challenges :)
that would be considered part of the solution
that could be considered part of the solution
not sorted -i'll make that clearer
Correction: "In this case it is 21+50+21=91" since |130-110|+1=21, not 10.
Correction: "In this case it is 21+50+21=92" since |130-110|+1=21, not 10.