1 2 /****************************************************************************** 3 * 4 * This file is part of canu, a software program that assembles whole-genome 5 * sequencing reads into contigs. 6 * 7 * This software is based on: 8 * 'Celera Assembler' (http://wgs-assembler.sourceforge.net) 9 * the 'kmer package' (http://kmer.sourceforge.net) 10 * both originally distributed by Applera Corporation under the GNU General 11 * Public License, version 2. 12 * 13 * Canu branched from Celera Assembler at its revision 4587. 14 * Canu branched from the kmer project at its revision 1994. 15 * 16 * Modifications by: 17 * 18 * Brian P. Walenz beginning on 2018-JUL-20 19 * are a 'United States Government Work', and 20 * are released in the public domain 21 * 22 * File 'README.licenses' in the root directory of this distribution contains 23 * full conditions and disclaimers for each license. 24 */ 25 26 #ifndef INTERVALS_H 27 #define INTERVALS_H 28 29 #include "types.H" 30 31 // The interval coordinates use the usual C semantics of [bgn, end) - 32 // 'x=bgn' is inside the interval, but 'x=end' is not. 33 34 template <class iNum> 35 class intervals { 36 private: 37 struct _ir { 38 iNum _bgn; 39 iNum _end; 40 uint32 _num; 41 }; 42 43 public: intervals()44 intervals() { }; ~intervals()45 ~intervals() { delete [] _list; }; 46 clear(void)47 void clear(void) { 48 _isSorted = true; 49 _isSquashed = true; 50 _listLen = 0; 51 }; 52 53 // Accessors. 54 size(void)55 uint32 size(void) const { return(_listLen); }; 56 bgn(uint32 idx)57 iNum bgn (uint32 idx) const { return(_list[idx]._bgn); }; end(uint32 idx)58 iNum end (uint32 idx) const { return(_list[idx]._end); }; span(uint32 idx)59 iNum span(uint32 idx) const { return(_list[idx]._end - _list[idx]._bgn); }; 60 count(uint32 idx)61 uint32 count(uint32 idx) const { return(_list[idx]._num); }; 62 63 // Modifiers. 64 bgn(uint32 idx)65 iNum &bgn (uint32 idx) { return(_list[idx]._bgn); }; end(uint32 idx)66 iNum &end (uint32 idx) { return(_list[idx]._end); }; 67 count(uint32 idx)68 uint32 &count(uint32 idx) { return(_list[idx]._num); }; 69 clear(uint32 idx)70 void clear(uint32 idx) { 71 _list[idx]._bgn = iNum(); 72 _list[idx]._end = iNum(); 73 _list[idx]._num = 0; 74 } 75 76 // Creation. 77 // 78 // Add a single interval to the list of intervals specified by either 79 // - the position of the end points 80 // - the position of the start and the length of the span 81 // 82 // Add(intervals) will copy all the intervals from B into this object, 83 // no further processing (sorting, squashing or filtering) is performed. 84 // 85 // Remove the interval at position 'idx' in our list. Doing so will 86 // greatly screw up interation over the intervals, and it is suggested 87 // to instead change the span of the interval to zero and then filter 88 // them out after iteration is complete. 89 90 void add_position(iNum bgn, iNum end); add_span(iNum bgn,iNum len)91 void add_span (iNum bgn, iNum len) { 92 if (len < 0) 93 add_position(bgn+len, bgn); 94 else 95 add_position(bgn, bgn+len); 96 }; 97 98 void add(intervals<iNum> const &B); 99 100 void remove(uint32 idx); 101 102 // Sort intervals by increasing coordinate, breaking ties with the end 103 // coordinate. 104 // 105 // Combine intervals that overlap by at least 'minOverlap' into one item. 106 // 107 // Discard intervals that are smaller than minLength or larger than 108 // maxLength. 109 110 void sort(void); 111 void squash(iNum minOverlap=0); 112 void filter(iNum minLength, iNum maxLength); 113 114 // setToUnion - populate this intervals object with all the intervals in A 115 // and B. If both A and B are squashed, this intervals object will also 116 // be squashed. 117 // 118 // setToIntersection - each interval in A (B) is intersected with all 119 // intervals in B (A), and the resulting interval is added to this object. 120 // 121 // setToContained - each interval in A that is contained fully in some 122 // interval in B is added to this intervals object. 123 #if 0 124 void setToUnion (intervals<iNum> const &A, intervals<iNum> const &B); 125 void setToIntersection(intervals<iNum> const &A, intervals<iNum> const &B); 126 void setToContained (intervals<iNum> const &A, intervals<iNum> const &B); 127 #endif 128 // setToUnion - copy the intervals in A that oveerlap with the interval 129 // bgn-end. 130 // 131 // setToIntersection - copy the intervals in A that intersect with the 132 // interval bgn-end, and trim them to that range. 133 // 134 // setToContained - copy the intervals in A that are contained within the 135 // interval bgn-end. 136 // 137 // setToInversion 138 // - if A is squashed, intervals that fill the 'holes' in A, bounded by 139 // bgn and end) are added to this object. 140 // - if A is not squashed, each interval in A will contribute 0, 1 or 2 141 // new intervals to this object, representing the holes, bounded by bgn and end, 142 // created by only that single interval in A. 143 // 144 // bgn[ ]end 145 // -------- --------- ---- A 146 // -------- --------- union 147 // -- --------- intersection 148 // --------- contained 149 // -- ---- inversion 150 #if 0 151 void setToUnion (iNum bgn, iNum end, intervals<iNum> const &A); 152 void setToIntersection(iNum bgn, iNum end, intervals<iNum> const &A); 153 void setToContained (iNum bgn, iNum end, intervals<iNum> const &A); 154 #endif 155 void setToInversion (iNum bgn, iNum end, intervals<iNum> const &A); 156 157 // Helper functions. 158 private: 159 void setToInversion1(iNum bgn, iNum end, intervals<iNum> const &A); 160 void setToInversion2(iNum bgn, iNum end, intervals<iNum> const &A); 161 162 private: 163 bool _isSorted = true; 164 bool _isSquashed = true; 165 166 uint32 _listMax = 0; 167 uint32 _listLen = 0; 168 _ir *_list = nullptr; 169 }; 170 171 172 173 template <class iNum> 174 class intervalsDepth { 175 private: 176 struct _idp { // An intervalDepthPosition stores the position 177 iNum _pos; // of a change in depth, and the delta of that 178 int32 _dlt; // change (which is either +1 or -1). 179 }; 180 181 struct _idr { // An intervalDepthRegion has the coordinates 182 iNum _bgn; // of the region and the depth. 183 iNum _end; 184 uint32 _dpt; 185 }; 186 187 public: intervalsDepth()188 intervalsDepth() { 189 }; intervalsDepth(intervals<iNum> const & IL)190 intervalsDepth(intervals<iNum> const &IL) { 191 computeDepth(IL); 192 }; ~intervalsDepth()193 ~intervalsDepth() { 194 delete [] _list; 195 }; 196 size(void)197 uint32 size(void) { return(_listLen); }; 198 bgn(uint32 idx)199 iNum bgn (uint32 idx) { return(_list[idx]._bgn); }; end(uint32 idx)200 iNum end (uint32 idx) { return(_list[idx]._end); }; span(uint32 idx)201 iNum span(uint32 idx) { return(_list[idx]._end - _list[idx]._bgn); }; 202 depth(uint32 idx)203 uint32 depth(uint32 idx) { return(_list[idx]._dpt); }; 204 205 void computeDepth(intervals<iNum> const &IL); 206 207 private: 208 void computeDepth(uint32 idplen, _idp *idp); 209 210 uint32 _listLen = 0; 211 _idr *_list = nullptr; 212 }; 213 214 215 216 #define INTERVALS_IMPLEMENTATION 217 #include "intervals-implementation.H" 218 #undef INTERVALS_IMPLEMENTATION 219 220 221 #endif // INTERVALS_H 222