1 #ifndef OLIGOFAR_SPACIALRANGES__HPP
2 #define OLIGOFAR_SPACIALRANGES__HPP
3 
4 #include "defs.hpp"
5 #include "array_set.hpp"
6 #include "string-util.hpp"
7 
8 BEGIN_OLIGOFAR_SCOPES
9 
10 class CSpacialRanges
11 {
12 public:
13     enum EFlags { fUnique = 0x01 };
14     enum EConstants { kHalo = 16 };
15 
CSpacialRanges(int min=0,int max=numeric_limits<int>::max ()-1,CSpacialRanges * p=0)16     CSpacialRanges( int min = 0, int max = numeric_limits<int>::max() - 1, CSpacialRanges * p = 0 ) :
17         m_parent( p ), m_left( 0 ), m_right( 0 ), m_flags( 0 ), m_halo(0),
18         m_min( min ), m_max( max ), // m_mid( (m_min + m_max + 1)/2 ),
19         m_from( GetMid() + 1 ), m_to( GetMid() ), m_count( 0 ),
20         m_seqdata(0) {}
21     ~CSpacialRanges();
22 
23     void AddRange( int from, int to, Uint4 flags = 0, int count = 1 );
Empty() const24     bool Empty() const { return m_count == 0 && (m_left == 0 || m_left->Empty()) && (m_right == 0 || m_right->Empty()); }
GetLeftmost()25     CSpacialRanges * GetLeftmost() { return m_left ? m_left->GetLeftmost() : m_count ? this : m_right ? m_right->GetLeftmost() : 0; }
GetRightmost()26     CSpacialRanges * GetRightmost() { return m_right ? m_right->GetRightmost() : m_count ? this : m_left ? m_left->GetRightmost() : 0; }
GetTop()27     CSpacialRanges * GetTop() { return m_parent ? m_parent->GetTop() : this; }
28     CSpacialRanges * FindContaining( int from, int to );
29     CSpacialRanges * FindOverlapping( int from, int to );
30     CSpacialRanges * Right();
31     CSpacialRanges * Left();
IsLeft() const32     bool IsLeft() const { return m_parent && m_parent->m_left == this; }
IsRight() const33     bool IsRight() const { return m_parent && m_parent->m_right == this; }
IsTop() const34     bool IsTop() const { return m_parent == 0; }
35     bool Contains( int pos ) const;
36     bool Contains( int from, int to ) const;
37     bool Overlaps( int from, int to ) const;
GetRank() const38     int GetRank() const { return m_parent ? m_parent->GetRank() + 1 : 0; }
GetFrom() const39     int GetFrom() const { return m_from; }
GetTo() const40     int GetTo() const { return m_to; }
GetCount() const41     int GetCount() const { return m_count; }
GetFlags() const42     Uint4 GetFlags() const { return m_flags; }
43     void Print( ostream& o, const string& rs = "; " ) const;
44     void PrintTree( ostream& o ) const;
45     void RemoveSingletons();
46 
47     const char * GetBase( int pos ) const;
48 
49     void SetSeqdataFrom( const char * begin, int length, int halo = kHalo );
50     void AddDonor( int donor );
51     void AddAcceptor( int acceptor );
GetSeqdata() const52     const char * GetSeqdata() const { return m_seqdata; }
GetSeqdataLength() const53     int GetSeqdataLength() const { return m_seqdata ? m_to - m_from + 1 + 2*m_halo : 0; }
GetSeqdataOffset() const54     int GetSeqdataOffset() const { return m_from - m_halo; }
GetHalo() const55     int GetHalo() const { return m_halo; }
56 
57     typedef array_set<int> TSplices;
GetSplices() const58     const TSplices& GetSplices() const { return m_donors; }
59 
60     bool HasDonor( int donor ) const;
HasAcceptor(int acceptor) const61     bool HasAcceptor( int acceptor ) const { return HasDonor( -acceptor ); }
62 
63 protected:
GetMid() const64     int GetMid() const { return ( m_min + m_max + 1 ) / 2; }
65     CSpacialRanges * x_Right();
66     CSpacialRanges * x_Left();
67 protected:
68     CSpacialRanges * m_parent;
69     CSpacialRanges * m_left;
70     CSpacialRanges * m_right;
71     Uint2 m_flags;
72     Uint2 m_halo;
73     int m_min;
74     int m_max;
75     int m_from;
76     int m_to;
77     int m_count;
78     char * m_seqdata;
79     TSplices m_donors; // acceptors are 'negated'
80     //array_set<int> m_acceptors;
81 };
82 
GetBase(int pos) const83 inline const char * CSpacialRanges::GetBase( int pos ) const
84 {
85     if( GetCount() ) {
86         if( pos < (m_from - m_halo/2) ) return m_left ? m_left->GetBase( pos ) : 0;
87         if( pos > (m_to + m_halo/2) ) return m_right ? m_right->GetBase( pos ) : 0;
88         return GetSeqdata() + pos - GetSeqdataOffset();
89     } else {
90         if( pos < GetMid() ) return m_left ? m_left->GetBase( pos ) : 0;
91         if( pos > GetMid() ) return m_right ? m_right->GetBase( pos ) : 0;
92         return 0;
93     }
94 }
95 
SetSeqdataFrom(const char * begin,int length,int halo)96 inline void CSpacialRanges::SetSeqdataFrom( const char * begin, int length, int halo )
97 {
98     delete [] m_seqdata;
99     if( m_count ) {
100         m_halo = halo;
101         int len = m_to - m_from + 1 + 2*halo;
102         m_seqdata = new char[len];
103         const char * end = begin + length;
104         const char * b = begin + m_from - halo;
105         const char * e = begin + m_to + 1 + halo;
106         int x = 0;
107         if( b < begin ) {
108             memset( m_seqdata, '\xf', x = (begin - b) );
109             b = begin;
110         }
111         if( e > end ) {
112             memset( m_seqdata + len - (e - end), '\xf', e - end );
113             e = end;
114         }
115         memcpy( m_seqdata + x, b, e - b );
116     } else { m_seqdata = 0; m_halo = 0; }
117     if( m_left ) m_left->SetSeqdataFrom( begin, length, halo );
118     if( m_right ) m_right->SetSeqdataFrom( begin, length, halo );
119 }
120 
AddDonor(int donor)121 inline void CSpacialRanges::AddDonor( int donor )
122 {
123     if( donor < m_from ) {
124         if( m_left ) { m_left->AddDonor( donor ); }
125         //else cerr << this << "(" << m_from << ".." << m_to << "} skips donor " << donor << "\n";
126         return;
127     } else if( donor > m_to ) {
128         if( m_right ) { m_right->AddDonor( donor ); }
129         // else cerr << this << "(" << m_from << ".." << m_to << "} skips donor " << donor << "\n";
130         if( donor > m_to + m_halo/2 ) return;
131     }
132         //cerr << this << "(" << m_from << ".." << m_to << "} adds donor " << donor << "\n";
133     m_donors.insert( donor );
134 }
135 
AddAcceptor(int acceptor)136 inline void CSpacialRanges::AddAcceptor( int acceptor )
137 {
138     if( acceptor < m_from ) {
139         if( m_left ) { m_left->AddAcceptor( acceptor ); }
140 //        else cerr << this << "(" << m_from << ".." << m_to << "} skips acceptor " << acceptor << "\n";
141         if( acceptor < m_from - m_halo/2 || acceptor <= 0 ) return;
142     } else if( acceptor > m_to ) {
143         if( m_right ) { m_right->AddAcceptor( acceptor ); }
144 //        else cerr << this << "(" << m_from << ".." << m_to << "} skips acceptor " << acceptor << "\n";
145         return;
146     }
147     //    cerr << this << "(" << m_from << ".." << m_to << "} adds acceptor " << acceptor << "\n";
148     m_donors.insert( - acceptor );
149 }
150 
Contains(int pos) const151 inline bool CSpacialRanges::Contains( int pos ) const
152 {
153     if( pos < GetMid() ) {
154         if( m_count && pos >= m_from ) return true;
155         else return m_left && m_left->Contains( pos );
156     } else {
157         if( m_count && pos <= m_to ) return true;
158         else return m_right && m_right->Contains( pos );
159     }
160 }
161 
HasDonor(int pos) const162 inline bool CSpacialRanges::HasDonor( int pos ) const
163 {
164     int apos = abs( pos );
165     if( apos < GetMid() ) {
166         if( m_count && apos >= m_from ) return m_donors.find( pos ) != m_donors.end();
167         else return m_left && m_left->HasDonor( pos );
168     } else {
169         if( m_count && apos <= m_to ) return m_donors.find( pos ) != m_donors.end();
170         else return m_right && m_right->HasDonor( pos );
171     }
172     return false;
173 }
174 
Contains(int from,int to) const175 inline bool CSpacialRanges::Contains( int from, int to ) const
176 {
177     if( m_count && from >= m_from && to <= m_to ) return true;
178     if( to < GetMid() && m_left ) return m_left->Contains( from, to );
179     if( from > GetMid() && m_right ) return m_right->Contains( from, to );
180     return false;
181 }
182 
Overlaps(int from,int to) const183 inline bool CSpacialRanges::Overlaps( int from, int to ) const
184 {
185     if( m_count && to >= m_from && from <= m_to ) return true;
186     if( to < GetMid() && m_left ) return m_left->Overlaps( from, to );
187     if( from > GetMid() && m_right ) return m_right->Overlaps( from, to );
188     return false;
189 }
190 
FindContaining(int from,int to)191 inline CSpacialRanges * CSpacialRanges::FindContaining( int from, int to )
192 {
193     if( to > from ) swap( from, to );
194     ASSERT( to <= m_max && from >= m_min );
195     if( m_count && from >= m_from && to <= m_to ) return this;
196     if( to < GetMid() && m_left ) return m_left->FindContaining( from, to );
197     if( from > GetMid() && m_right ) return m_right->FindContaining( from, to );
198     return (from >= m_from && to <= m_to && m_count > 0) ? this : 0;
199 }
200 
FindOverlapping(int from,int to)201 inline CSpacialRanges * CSpacialRanges::FindOverlapping( int from, int to )
202 {
203     /*
204     if( m_count ) cerr << "\x1b[1m";
205     CRange<int> fullrange( m_min, m_max );
206     CRange<int> islerange( m_from, m_to );
207     CRange<int> lookrange( from, to );
208     //cerr << this << DISPLAY( m_min ) << DISPLAY( from ) << DISPLAY( m_left ) << DISPLAY( GetMid() ) << DISPLAY( m_right ) << DISPLAY( to ) << DISPLAY( m_max ) << "\n";
209     cerr << this << DISPLAY( fullrange ) << DISPLAY( islerange ) << DISPLAY( lookrange ) << DISPLAY( GetMid() ) << DISPLAY( m_left ) << DISPLAY( m_right ) << "\n";
210     if( m_count ) cerr << "\x1b[0m";
211     */
212     //ASSERT( from <= to );
213     if( to > from ) swap( from, to );
214     ASSERT( to <= m_max && from >= m_min );
215     if( m_count && to >= m_from && from <= m_to ) return this;
216     if( to < GetMid() && m_left ) return m_left->FindOverlapping( from, to );
217     if( from > GetMid() && m_right ) return m_right->FindOverlapping( from, to );
218     return 0;
219 }
220 
RemoveSingletons()221 inline void CSpacialRanges::RemoveSingletons()
222 {
223     if( m_left ) {
224         m_left->RemoveSingletons();
225         if( m_left->Empty() ) delete m_left;
226     }
227     if( m_right ) {
228         m_right->RemoveSingletons();
229         if( m_right->Empty() ) delete m_right;
230     }
231     if( m_count == 1 && ( m_flags & fUnique ) == 0 ) {
232         m_from = GetMid() + 1;
233         m_to = GetMid();
234         m_count = 0;
235         m_flags = 0;
236     }
237 }
238 
Print(ostream & o,const string & rs) const239 inline void CSpacialRanges::Print( ostream& o, const string& rs ) const
240 {
241     if( m_left ) m_left->Print( o, rs );
242     if( m_count ) {
243         o << m_from << ".." << m_to << "(" << m_count << ")" << (m_flags & fUnique ? "u" : "." );
244         if( m_donors.size() ) o << " {" << Join( ",", m_donors ) << "}";
245         o <<  rs;
246     }
247     if( m_right ) m_right->Print( o, rs );
248 }
249 
PrintTree(ostream & o) const250 inline void CSpacialRanges::PrintTree( ostream& o ) const
251 {
252     string prefix = string( GetRank(), ' ' );
253     o << prefix << this << " {" << m_min << ".." << m_max << "\t" << GetMid() << "\t" << m_left << "\t" << m_right << "\n";
254     if( m_left ) m_left->PrintTree( o );
255     o << prefix;
256     if( m_count ) o << "[" << m_from << ".." << m_to << "]";
257     o << "(" << m_count << ")" << (m_flags & fUnique ? "u" : "") << "\n";
258     if( m_right ) m_right->PrintTree( o );
259     o << prefix << "}\n";
260 }
261 
Right()262 inline CSpacialRanges * CSpacialRanges::Right()
263 {
264     for( CSpacialRanges * r = x_Right() ; r ; r = r->x_Right() ) {
265         if( r->GetCount() ) return r;
266     }
267     return 0;
268 }
269 
x_Right()270 inline CSpacialRanges * CSpacialRanges::x_Right()
271 {
272     if( m_right ) return m_right->GetLeftmost();
273     if( IsLeft() ) return m_parent;
274     CSpacialRanges * r = m_parent;
275     while( r && r->IsRight() ) r = r->m_parent;
276     return r ? r->m_parent : 0;
277 }
278 
Left()279 inline CSpacialRanges * CSpacialRanges::Left()
280 {
281     for( CSpacialRanges * r = x_Left() ; r ; r = r->x_Left() ) {
282         if( r->GetCount() ) return r;
283     }
284     return 0;
285 }
286 
x_Left()287 inline CSpacialRanges * CSpacialRanges::x_Left()
288 {
289     if( m_left ) return m_left->GetRightmost();
290     if( IsRight() ) return m_parent;
291     CSpacialRanges * r = m_parent;
292     while( r && r->IsLeft() ) r = r->m_parent;
293     return r ? r->m_parent : 0;
294 }
295 
~CSpacialRanges()296 inline CSpacialRanges::~CSpacialRanges()
297 {
298     delete m_seqdata;
299     if( m_parent ) {
300         if( m_parent->m_left == this ) m_parent->m_left = 0;
301         else if( m_parent->m_right == this ) m_parent->m_right = 0;
302     }
303     delete m_left; delete m_right;
304 }
305 
AddRange(int from,int to,Uint4 flags,int count)306 inline void CSpacialRanges::AddRange( int from, int to, Uint4 flags, int count )
307 {
308     ASSERT( from >= m_min );
309     ASSERT( to <= m_max );
310     ASSERT( from <= to );
311     bool tryMergeLeft = false;
312     bool tryMergeRight = false;
313 
314     if( m_count ) {
315         if( to < m_from - 1 ) {
316             if( m_left == 0 ) m_left = new CSpacialRanges( m_min, GetMid() - 1, this );
317             m_left->AddRange( from, to, flags, count );
318             return;
319         } else if( from > m_to + 1 ) {
320             if( m_right == 0 ) m_right = new CSpacialRanges( GetMid(), m_max, this );
321             m_right->AddRange( from, to, flags, count );
322             return;
323         } else {
324             ++m_count;
325             m_flags |= flags;
326             if( from < m_from ) {
327                 m_from = from;
328                 tryMergeLeft = true;
329             }
330             if( to > m_to ) {
331                 m_to = to;
332                 tryMergeRight = true;
333             }
334         }
335     } else {
336         if( to < GetMid() ) {
337             if( m_left == 0 ) m_left = new CSpacialRanges( m_min, GetMid() - 1, this );
338             m_left->AddRange( from, to, flags, count );
339             return;
340         } else if( from > GetMid() ) {
341             if( m_right == 0 ) m_right = new CSpacialRanges( GetMid(), m_max, this );
342             m_right->AddRange( from, to, flags, count );
343             return;
344         } else {
345             m_from = from;
346             m_to = to;
347             m_flags |= flags;
348             m_count += count;
349             tryMergeLeft = true;
350             tryMergeRight = true;
351         }
352     }
353 
354     while( tryMergeLeft && m_left ) {
355         if( CSpacialRanges * r = m_left->GetRightmost() ) {
356             ASSERT( r->GetCount() );
357             if( r->GetTo() >= m_from - 1 ) {
358                 if( m_from >= r->GetFrom() ) { m_from = r->GetFrom(); tryMergeLeft = false; }
359                 m_count += r->GetCount();
360                 m_flags |= r->m_flags;
361                 ASSERT( r->m_right == 0 );
362                 r->m_from = r->GetMid() + 1;
363                 r->m_to = r->GetMid();
364                 r->m_count = 0;
365                 r->m_flags = 0;
366                 if( ! r->m_left ) {
367                     while( r != this && r->Empty() ) {
368                         CSpacialRanges * x = r->m_parent;
369                         delete r;
370                         r = x;
371                     }
372                 }
373             } else tryMergeLeft = false;
374         } else tryMergeLeft = false;
375     }
376     while( tryMergeRight && m_right ) {
377         if( CSpacialRanges * l = m_right->GetLeftmost() ) {
378             ASSERT( l->GetCount() );
379             if( l->GetFrom() <= m_to + 1 ) {
380                 if( m_to <= l->GetTo() ) { m_to = l->GetTo(); tryMergeRight = false; }
381                 m_count += l->GetCount();
382                 m_flags |= l->m_flags;
383                 ASSERT( l->m_left == 0 );
384                 l->m_from = l->GetMid() + 1;
385                 l->m_to = l->GetMid();
386                 l->m_count = 0;
387                 l->m_flags = 0;
388                 if( ! l->m_right ) {
389                     while( l != this && l->Empty() ) {
390                         CSpacialRanges * x = l->m_parent;
391                         delete l;
392                         l = x;
393                     }
394                 }
395             } else tryMergeRight = false;
396         } else tryMergeRight = false;
397     }
398     ASSERT( m_from >= m_min );
399     ASSERT( m_to <= m_max );
400     ASSERT( m_from <= GetMid() );
401     ASSERT( m_to >= GetMid() );
402     ASSERT( m_from < m_to );
403     ASSERT( m_min < m_max );
404 }
405 
406 END_OLIGOFAR_SCOPES
407 
408 #endif
409