1 #ifndef OLIGOFAR_SPACIALRANGES__HPP
2 #define OLIGOFAR_SPACIALRANGES__HPP
3
4 #include "defs.hpp"
5 #include "array_set.hpp"
6 #include "string-util.hpp"
7
8 BEGIN_OLIGOFAR_SCOPES
9
10 class CSpacialRanges
11 {
12 public:
13 enum EFlags { fUnique = 0x01 };
14 enum EConstants { kHalo = 16 };
15
CSpacialRanges(int min=0,int max=numeric_limits<int>::max ()-1,CSpacialRanges * p=0)16 CSpacialRanges( int min = 0, int max = numeric_limits<int>::max() - 1, CSpacialRanges * p = 0 ) :
17 m_parent( p ), m_left( 0 ), m_right( 0 ), m_flags( 0 ), m_halo(0),
18 m_min( min ), m_max( max ), // m_mid( (m_min + m_max + 1)/2 ),
19 m_from( GetMid() + 1 ), m_to( GetMid() ), m_count( 0 ),
20 m_seqdata(0) {}
21 ~CSpacialRanges();
22
23 void AddRange( int from, int to, Uint4 flags = 0, int count = 1 );
Empty() const24 bool Empty() const { return m_count == 0 && (m_left == 0 || m_left->Empty()) && (m_right == 0 || m_right->Empty()); }
GetLeftmost()25 CSpacialRanges * GetLeftmost() { return m_left ? m_left->GetLeftmost() : m_count ? this : m_right ? m_right->GetLeftmost() : 0; }
GetRightmost()26 CSpacialRanges * GetRightmost() { return m_right ? m_right->GetRightmost() : m_count ? this : m_left ? m_left->GetRightmost() : 0; }
GetTop()27 CSpacialRanges * GetTop() { return m_parent ? m_parent->GetTop() : this; }
28 CSpacialRanges * FindContaining( int from, int to );
29 CSpacialRanges * FindOverlapping( int from, int to );
30 CSpacialRanges * Right();
31 CSpacialRanges * Left();
IsLeft() const32 bool IsLeft() const { return m_parent && m_parent->m_left == this; }
IsRight() const33 bool IsRight() const { return m_parent && m_parent->m_right == this; }
IsTop() const34 bool IsTop() const { return m_parent == 0; }
35 bool Contains( int pos ) const;
36 bool Contains( int from, int to ) const;
37 bool Overlaps( int from, int to ) const;
GetRank() const38 int GetRank() const { return m_parent ? m_parent->GetRank() + 1 : 0; }
GetFrom() const39 int GetFrom() const { return m_from; }
GetTo() const40 int GetTo() const { return m_to; }
GetCount() const41 int GetCount() const { return m_count; }
GetFlags() const42 Uint4 GetFlags() const { return m_flags; }
43 void Print( ostream& o, const string& rs = "; " ) const;
44 void PrintTree( ostream& o ) const;
45 void RemoveSingletons();
46
47 const char * GetBase( int pos ) const;
48
49 void SetSeqdataFrom( const char * begin, int length, int halo = kHalo );
50 void AddDonor( int donor );
51 void AddAcceptor( int acceptor );
GetSeqdata() const52 const char * GetSeqdata() const { return m_seqdata; }
GetSeqdataLength() const53 int GetSeqdataLength() const { return m_seqdata ? m_to - m_from + 1 + 2*m_halo : 0; }
GetSeqdataOffset() const54 int GetSeqdataOffset() const { return m_from - m_halo; }
GetHalo() const55 int GetHalo() const { return m_halo; }
56
57 typedef array_set<int> TSplices;
GetSplices() const58 const TSplices& GetSplices() const { return m_donors; }
59
60 bool HasDonor( int donor ) const;
HasAcceptor(int acceptor) const61 bool HasAcceptor( int acceptor ) const { return HasDonor( -acceptor ); }
62
63 protected:
GetMid() const64 int GetMid() const { return ( m_min + m_max + 1 ) / 2; }
65 CSpacialRanges * x_Right();
66 CSpacialRanges * x_Left();
67 protected:
68 CSpacialRanges * m_parent;
69 CSpacialRanges * m_left;
70 CSpacialRanges * m_right;
71 Uint2 m_flags;
72 Uint2 m_halo;
73 int m_min;
74 int m_max;
75 int m_from;
76 int m_to;
77 int m_count;
78 char * m_seqdata;
79 TSplices m_donors; // acceptors are 'negated'
80 //array_set<int> m_acceptors;
81 };
82
GetBase(int pos) const83 inline const char * CSpacialRanges::GetBase( int pos ) const
84 {
85 if( GetCount() ) {
86 if( pos < (m_from - m_halo/2) ) return m_left ? m_left->GetBase( pos ) : 0;
87 if( pos > (m_to + m_halo/2) ) return m_right ? m_right->GetBase( pos ) : 0;
88 return GetSeqdata() + pos - GetSeqdataOffset();
89 } else {
90 if( pos < GetMid() ) return m_left ? m_left->GetBase( pos ) : 0;
91 if( pos > GetMid() ) return m_right ? m_right->GetBase( pos ) : 0;
92 return 0;
93 }
94 }
95
SetSeqdataFrom(const char * begin,int length,int halo)96 inline void CSpacialRanges::SetSeqdataFrom( const char * begin, int length, int halo )
97 {
98 delete [] m_seqdata;
99 if( m_count ) {
100 m_halo = halo;
101 int len = m_to - m_from + 1 + 2*halo;
102 m_seqdata = new char[len];
103 const char * end = begin + length;
104 const char * b = begin + m_from - halo;
105 const char * e = begin + m_to + 1 + halo;
106 int x = 0;
107 if( b < begin ) {
108 memset( m_seqdata, '\xf', x = (begin - b) );
109 b = begin;
110 }
111 if( e > end ) {
112 memset( m_seqdata + len - (e - end), '\xf', e - end );
113 e = end;
114 }
115 memcpy( m_seqdata + x, b, e - b );
116 } else { m_seqdata = 0; m_halo = 0; }
117 if( m_left ) m_left->SetSeqdataFrom( begin, length, halo );
118 if( m_right ) m_right->SetSeqdataFrom( begin, length, halo );
119 }
120
AddDonor(int donor)121 inline void CSpacialRanges::AddDonor( int donor )
122 {
123 if( donor < m_from ) {
124 if( m_left ) { m_left->AddDonor( donor ); }
125 //else cerr << this << "(" << m_from << ".." << m_to << "} skips donor " << donor << "\n";
126 return;
127 } else if( donor > m_to ) {
128 if( m_right ) { m_right->AddDonor( donor ); }
129 // else cerr << this << "(" << m_from << ".." << m_to << "} skips donor " << donor << "\n";
130 if( donor > m_to + m_halo/2 ) return;
131 }
132 //cerr << this << "(" << m_from << ".." << m_to << "} adds donor " << donor << "\n";
133 m_donors.insert( donor );
134 }
135
AddAcceptor(int acceptor)136 inline void CSpacialRanges::AddAcceptor( int acceptor )
137 {
138 if( acceptor < m_from ) {
139 if( m_left ) { m_left->AddAcceptor( acceptor ); }
140 // else cerr << this << "(" << m_from << ".." << m_to << "} skips acceptor " << acceptor << "\n";
141 if( acceptor < m_from - m_halo/2 || acceptor <= 0 ) return;
142 } else if( acceptor > m_to ) {
143 if( m_right ) { m_right->AddAcceptor( acceptor ); }
144 // else cerr << this << "(" << m_from << ".." << m_to << "} skips acceptor " << acceptor << "\n";
145 return;
146 }
147 // cerr << this << "(" << m_from << ".." << m_to << "} adds acceptor " << acceptor << "\n";
148 m_donors.insert( - acceptor );
149 }
150
Contains(int pos) const151 inline bool CSpacialRanges::Contains( int pos ) const
152 {
153 if( pos < GetMid() ) {
154 if( m_count && pos >= m_from ) return true;
155 else return m_left && m_left->Contains( pos );
156 } else {
157 if( m_count && pos <= m_to ) return true;
158 else return m_right && m_right->Contains( pos );
159 }
160 }
161
HasDonor(int pos) const162 inline bool CSpacialRanges::HasDonor( int pos ) const
163 {
164 int apos = abs( pos );
165 if( apos < GetMid() ) {
166 if( m_count && apos >= m_from ) return m_donors.find( pos ) != m_donors.end();
167 else return m_left && m_left->HasDonor( pos );
168 } else {
169 if( m_count && apos <= m_to ) return m_donors.find( pos ) != m_donors.end();
170 else return m_right && m_right->HasDonor( pos );
171 }
172 return false;
173 }
174
Contains(int from,int to) const175 inline bool CSpacialRanges::Contains( int from, int to ) const
176 {
177 if( m_count && from >= m_from && to <= m_to ) return true;
178 if( to < GetMid() && m_left ) return m_left->Contains( from, to );
179 if( from > GetMid() && m_right ) return m_right->Contains( from, to );
180 return false;
181 }
182
Overlaps(int from,int to) const183 inline bool CSpacialRanges::Overlaps( int from, int to ) const
184 {
185 if( m_count && to >= m_from && from <= m_to ) return true;
186 if( to < GetMid() && m_left ) return m_left->Overlaps( from, to );
187 if( from > GetMid() && m_right ) return m_right->Overlaps( from, to );
188 return false;
189 }
190
FindContaining(int from,int to)191 inline CSpacialRanges * CSpacialRanges::FindContaining( int from, int to )
192 {
193 if( to > from ) swap( from, to );
194 ASSERT( to <= m_max && from >= m_min );
195 if( m_count && from >= m_from && to <= m_to ) return this;
196 if( to < GetMid() && m_left ) return m_left->FindContaining( from, to );
197 if( from > GetMid() && m_right ) return m_right->FindContaining( from, to );
198 return (from >= m_from && to <= m_to && m_count > 0) ? this : 0;
199 }
200
FindOverlapping(int from,int to)201 inline CSpacialRanges * CSpacialRanges::FindOverlapping( int from, int to )
202 {
203 /*
204 if( m_count ) cerr << "\x1b[1m";
205 CRange<int> fullrange( m_min, m_max );
206 CRange<int> islerange( m_from, m_to );
207 CRange<int> lookrange( from, to );
208 //cerr << this << DISPLAY( m_min ) << DISPLAY( from ) << DISPLAY( m_left ) << DISPLAY( GetMid() ) << DISPLAY( m_right ) << DISPLAY( to ) << DISPLAY( m_max ) << "\n";
209 cerr << this << DISPLAY( fullrange ) << DISPLAY( islerange ) << DISPLAY( lookrange ) << DISPLAY( GetMid() ) << DISPLAY( m_left ) << DISPLAY( m_right ) << "\n";
210 if( m_count ) cerr << "\x1b[0m";
211 */
212 //ASSERT( from <= to );
213 if( to > from ) swap( from, to );
214 ASSERT( to <= m_max && from >= m_min );
215 if( m_count && to >= m_from && from <= m_to ) return this;
216 if( to < GetMid() && m_left ) return m_left->FindOverlapping( from, to );
217 if( from > GetMid() && m_right ) return m_right->FindOverlapping( from, to );
218 return 0;
219 }
220
RemoveSingletons()221 inline void CSpacialRanges::RemoveSingletons()
222 {
223 if( m_left ) {
224 m_left->RemoveSingletons();
225 if( m_left->Empty() ) delete m_left;
226 }
227 if( m_right ) {
228 m_right->RemoveSingletons();
229 if( m_right->Empty() ) delete m_right;
230 }
231 if( m_count == 1 && ( m_flags & fUnique ) == 0 ) {
232 m_from = GetMid() + 1;
233 m_to = GetMid();
234 m_count = 0;
235 m_flags = 0;
236 }
237 }
238
Print(ostream & o,const string & rs) const239 inline void CSpacialRanges::Print( ostream& o, const string& rs ) const
240 {
241 if( m_left ) m_left->Print( o, rs );
242 if( m_count ) {
243 o << m_from << ".." << m_to << "(" << m_count << ")" << (m_flags & fUnique ? "u" : "." );
244 if( m_donors.size() ) o << " {" << Join( ",", m_donors ) << "}";
245 o << rs;
246 }
247 if( m_right ) m_right->Print( o, rs );
248 }
249
PrintTree(ostream & o) const250 inline void CSpacialRanges::PrintTree( ostream& o ) const
251 {
252 string prefix = string( GetRank(), ' ' );
253 o << prefix << this << " {" << m_min << ".." << m_max << "\t" << GetMid() << "\t" << m_left << "\t" << m_right << "\n";
254 if( m_left ) m_left->PrintTree( o );
255 o << prefix;
256 if( m_count ) o << "[" << m_from << ".." << m_to << "]";
257 o << "(" << m_count << ")" << (m_flags & fUnique ? "u" : "") << "\n";
258 if( m_right ) m_right->PrintTree( o );
259 o << prefix << "}\n";
260 }
261
Right()262 inline CSpacialRanges * CSpacialRanges::Right()
263 {
264 for( CSpacialRanges * r = x_Right() ; r ; r = r->x_Right() ) {
265 if( r->GetCount() ) return r;
266 }
267 return 0;
268 }
269
x_Right()270 inline CSpacialRanges * CSpacialRanges::x_Right()
271 {
272 if( m_right ) return m_right->GetLeftmost();
273 if( IsLeft() ) return m_parent;
274 CSpacialRanges * r = m_parent;
275 while( r && r->IsRight() ) r = r->m_parent;
276 return r ? r->m_parent : 0;
277 }
278
Left()279 inline CSpacialRanges * CSpacialRanges::Left()
280 {
281 for( CSpacialRanges * r = x_Left() ; r ; r = r->x_Left() ) {
282 if( r->GetCount() ) return r;
283 }
284 return 0;
285 }
286
x_Left()287 inline CSpacialRanges * CSpacialRanges::x_Left()
288 {
289 if( m_left ) return m_left->GetRightmost();
290 if( IsRight() ) return m_parent;
291 CSpacialRanges * r = m_parent;
292 while( r && r->IsLeft() ) r = r->m_parent;
293 return r ? r->m_parent : 0;
294 }
295
~CSpacialRanges()296 inline CSpacialRanges::~CSpacialRanges()
297 {
298 delete m_seqdata;
299 if( m_parent ) {
300 if( m_parent->m_left == this ) m_parent->m_left = 0;
301 else if( m_parent->m_right == this ) m_parent->m_right = 0;
302 }
303 delete m_left; delete m_right;
304 }
305
AddRange(int from,int to,Uint4 flags,int count)306 inline void CSpacialRanges::AddRange( int from, int to, Uint4 flags, int count )
307 {
308 ASSERT( from >= m_min );
309 ASSERT( to <= m_max );
310 ASSERT( from <= to );
311 bool tryMergeLeft = false;
312 bool tryMergeRight = false;
313
314 if( m_count ) {
315 if( to < m_from - 1 ) {
316 if( m_left == 0 ) m_left = new CSpacialRanges( m_min, GetMid() - 1, this );
317 m_left->AddRange( from, to, flags, count );
318 return;
319 } else if( from > m_to + 1 ) {
320 if( m_right == 0 ) m_right = new CSpacialRanges( GetMid(), m_max, this );
321 m_right->AddRange( from, to, flags, count );
322 return;
323 } else {
324 ++m_count;
325 m_flags |= flags;
326 if( from < m_from ) {
327 m_from = from;
328 tryMergeLeft = true;
329 }
330 if( to > m_to ) {
331 m_to = to;
332 tryMergeRight = true;
333 }
334 }
335 } else {
336 if( to < GetMid() ) {
337 if( m_left == 0 ) m_left = new CSpacialRanges( m_min, GetMid() - 1, this );
338 m_left->AddRange( from, to, flags, count );
339 return;
340 } else if( from > GetMid() ) {
341 if( m_right == 0 ) m_right = new CSpacialRanges( GetMid(), m_max, this );
342 m_right->AddRange( from, to, flags, count );
343 return;
344 } else {
345 m_from = from;
346 m_to = to;
347 m_flags |= flags;
348 m_count += count;
349 tryMergeLeft = true;
350 tryMergeRight = true;
351 }
352 }
353
354 while( tryMergeLeft && m_left ) {
355 if( CSpacialRanges * r = m_left->GetRightmost() ) {
356 ASSERT( r->GetCount() );
357 if( r->GetTo() >= m_from - 1 ) {
358 if( m_from >= r->GetFrom() ) { m_from = r->GetFrom(); tryMergeLeft = false; }
359 m_count += r->GetCount();
360 m_flags |= r->m_flags;
361 ASSERT( r->m_right == 0 );
362 r->m_from = r->GetMid() + 1;
363 r->m_to = r->GetMid();
364 r->m_count = 0;
365 r->m_flags = 0;
366 if( ! r->m_left ) {
367 while( r != this && r->Empty() ) {
368 CSpacialRanges * x = r->m_parent;
369 delete r;
370 r = x;
371 }
372 }
373 } else tryMergeLeft = false;
374 } else tryMergeLeft = false;
375 }
376 while( tryMergeRight && m_right ) {
377 if( CSpacialRanges * l = m_right->GetLeftmost() ) {
378 ASSERT( l->GetCount() );
379 if( l->GetFrom() <= m_to + 1 ) {
380 if( m_to <= l->GetTo() ) { m_to = l->GetTo(); tryMergeRight = false; }
381 m_count += l->GetCount();
382 m_flags |= l->m_flags;
383 ASSERT( l->m_left == 0 );
384 l->m_from = l->GetMid() + 1;
385 l->m_to = l->GetMid();
386 l->m_count = 0;
387 l->m_flags = 0;
388 if( ! l->m_right ) {
389 while( l != this && l->Empty() ) {
390 CSpacialRanges * x = l->m_parent;
391 delete l;
392 l = x;
393 }
394 }
395 } else tryMergeRight = false;
396 } else tryMergeRight = false;
397 }
398 ASSERT( m_from >= m_min );
399 ASSERT( m_to <= m_max );
400 ASSERT( m_from <= GetMid() );
401 ASSERT( m_to >= GetMid() );
402 ASSERT( m_from < m_to );
403 ASSERT( m_min < m_max );
404 }
405
406 END_OLIGOFAR_SCOPES
407
408 #endif
409