1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' (http://kmer.sourceforge.net)
10  *  both originally distributed by Applera Corporation under the GNU General
11  *  Public License, version 2.
12  *
13  *  Canu branched from Celera Assembler at its revision 4587.
14  *  Canu branched from the kmer project at its revision 1994.
15  *
16  *  Modifications by:
17  *
18  *    Brian P. Walenz beginning on 2018-JUL-20
19  *      are a 'United States Government Work', and
20  *      are released in the public domain
21  *
22  *  File 'README.licenses' in the root directory of this distribution contains
23  *  full conditions and disclaimers for each license.
24  */
25 
26 #ifndef INTERVALS_H
27 #define INTERVALS_H
28 
29 #include "types.H"
30 
31 //  The interval coordinates use the usual C semantics of [bgn, end) -
32 //  'x=bgn' is inside the interval, but 'x=end' is not.
33 
34 template <class iNum>
35 class intervals {
36 private:
37   struct _ir {
38     iNum      _bgn;
39     iNum      _end;
40     uint32    _num;
41   };
42 
43 public:
intervals()44   intervals()    {                  };
~intervals()45   ~intervals()   { delete [] _list; };
46 
clear(void)47   void      clear(void) {
48     _isSorted   = true;
49     _isSquashed = true;
50     _listLen    = 0;
51   };
52 
53   //  Accessors.
54 
size(void)55   uint32    size(void) const         { return(_listLen); };
56 
bgn(uint32 idx)57   iNum      bgn (uint32 idx) const   { return(_list[idx]._bgn); };
end(uint32 idx)58   iNum      end (uint32 idx) const   { return(_list[idx]._end); };
span(uint32 idx)59   iNum      span(uint32 idx) const   { return(_list[idx]._end - _list[idx]._bgn); };
60 
count(uint32 idx)61   uint32    count(uint32 idx) const  { return(_list[idx]._num); };
62 
63   //  Modifiers.
64 
bgn(uint32 idx)65   iNum     &bgn (uint32 idx)         { return(_list[idx]._bgn); };
end(uint32 idx)66   iNum     &end (uint32 idx)         { return(_list[idx]._end); };
67 
count(uint32 idx)68   uint32   &count(uint32 idx)        { return(_list[idx]._num); };
69 
clear(uint32 idx)70   void      clear(uint32 idx) {
71     _list[idx]._bgn = iNum();
72     _list[idx]._end = iNum();
73     _list[idx]._num = 0;
74   }
75 
76   //  Creation.
77   //
78   //  Add a single interval to the list of intervals specified by either
79   //   - the position of the end points
80   //   - the position of the start and the length of the span
81   //
82   //  Add(intervals) will copy all the intervals from B into this object,
83   //  no further processing (sorting, squashing or filtering) is performed.
84   //
85   //  Remove the interval at position 'idx' in our list.  Doing so will
86   //  greatly screw up interation over the intervals, and it is suggested
87   //  to instead change the span of the interval to zero and then filter
88   //  them out after iteration is complete.
89 
90   void      add_position(iNum bgn, iNum end);
add_span(iNum bgn,iNum len)91   void      add_span    (iNum bgn, iNum len) {
92     if (len < 0)
93       add_position(bgn+len, bgn);
94     else
95       add_position(bgn, bgn+len);
96   };
97 
98   void      add(intervals<iNum> const &B);
99 
100   void      remove(uint32 idx);
101 
102   //  Sort intervals by increasing coordinate, breaking ties with the end
103   //  coordinate.
104   //
105   //  Combine intervals that overlap by at least 'minOverlap' into one item.
106   //
107   //  Discard intervals that are smaller than minLength or larger than
108   //  maxLength.
109 
110   void      sort(void);
111   void      squash(iNum minOverlap=0);
112   void      filter(iNum minLength, iNum maxLength);
113 
114   //  setToUnion - populate this intervals object with all the intervals in A
115   //  and B.  If both A and B are squashed, this intervals object will also
116   //  be squashed.
117   //
118   //  setToIntersection - each interval in A (B) is intersected with all
119   //  intervals in B (A), and the resulting interval is added to this object.
120   //
121   //  setToContained - each interval in A that is contained fully in some
122   //  interval in B is added to this intervals object.
123 #if 0
124   void      setToUnion       (intervals<iNum> const &A, intervals<iNum> const &B);
125   void      setToIntersection(intervals<iNum> const &A, intervals<iNum> const &B);
126   void      setToContained   (intervals<iNum> const &A, intervals<iNum> const &B);
127 #endif
128   //  setToUnion - copy the intervals in A that oveerlap with the interval
129   //  bgn-end.
130   //
131   //  setToIntersection - copy the intervals in A that intersect with the
132   //  interval bgn-end, and trim them to that range.
133   //
134   //  setToContained - copy the intervals in A that are contained within the
135   //  interval bgn-end.
136   //
137   //  setToInversion
138   //   - if A is squashed, intervals that fill the 'holes' in A, bounded by
139   //     bgn and end) are added to this object.
140   //   - if A is not squashed, each interval in A will contribute 0, 1 or 2
141   //     new intervals to this object, representing the holes, bounded by bgn and end,
142   //     created by only that single interval in A.
143   //
144   //                   bgn[               ]end
145   //                --------  ---------     ----  A
146   //                --------  ---------           union
147   //                      --  ---------           intersection
148   //                          ---------           contained
149   //                        --         ----       inversion
150 #if 0
151   void      setToUnion       (iNum bgn, iNum end, intervals<iNum> const &A);
152   void      setToIntersection(iNum bgn, iNum end, intervals<iNum> const &A);
153   void      setToContained   (iNum bgn, iNum end, intervals<iNum> const &A);
154 #endif
155   void      setToInversion   (iNum bgn, iNum end, intervals<iNum> const &A);
156 
157   //  Helper functions.
158 private:
159   void      setToInversion1(iNum bgn, iNum end, intervals<iNum> const &A);
160   void      setToInversion2(iNum bgn, iNum end, intervals<iNum> const &A);
161 
162 private:
163   bool     _isSorted   = true;
164   bool     _isSquashed = true;
165 
166   uint32   _listMax    = 0;
167   uint32   _listLen    = 0;
168   _ir     *_list       = nullptr;
169 };
170 
171 
172 
173 template <class iNum>
174 class intervalsDepth {
175 private:
176   struct _idp {     //  An intervalDepthPosition stores the position
177     iNum   _pos;    //  of a change in depth, and the delta of that
178     int32  _dlt;    //  change (which is either +1 or -1).
179   };
180 
181   struct _idr {     //  An intervalDepthRegion has the coordinates
182     iNum   _bgn;    //  of the region and the depth.
183     iNum   _end;
184     uint32 _dpt;
185   };
186 
187 public:
intervalsDepth()188   intervalsDepth() {
189   };
intervalsDepth(intervals<iNum> const & IL)190   intervalsDepth(intervals<iNum> const &IL) {
191     computeDepth(IL);
192   };
~intervalsDepth()193   ~intervalsDepth() {
194     delete [] _list;
195   };
196 
size(void)197   uint32    size(void)          { return(_listLen); };
198 
bgn(uint32 idx)199   iNum      bgn (uint32 idx)    { return(_list[idx]._bgn); };
end(uint32 idx)200   iNum      end (uint32 idx)    { return(_list[idx]._end); };
span(uint32 idx)201   iNum      span(uint32 idx)    { return(_list[idx]._end - _list[idx]._bgn); };
202 
depth(uint32 idx)203   uint32    depth(uint32 idx)   { return(_list[idx]._dpt); };
204 
205   void      computeDepth(intervals<iNum> const &IL);
206 
207 private:
208   void      computeDepth(uint32 idplen, _idp *idp);
209 
210   uint32   _listLen = 0;
211   _idr    *_list    = nullptr;
212 };
213 
214 
215 
216 #define INTERVALS_IMPLEMENTATION
217 #include "intervals-implementation.H"
218 #undef  INTERVALS_IMPLEMENTATION
219 
220 
221 #endif  //  INTERVALS_H
222