1 /*
2  *  This file is part of libcxxsupport.
3  *
4  *  libcxxsupport is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  libcxxsupport is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with libcxxsupport; if not, write to the Free Software
16  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 /*
20  *  libcxxsupport is being developed at the Max-Planck-Institut fuer Astrophysik
21  *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
22  *  (DLR).
23  */
24 
25 /*! \file rangeset.h
26  *  Class for storing sets of ranges of integer numbers
27  *
28  *  Copyright (C) 2011-2021 Max-Planck-Society
29  *  \author Martin Reinecke
30  */
31 
32 #ifndef DUCC0_RANGESET_H
33 #define DUCC0_RANGESET_H
34 
35 #include <algorithm>
36 #include <vector>
37 #include <cstddef>
38 #include <iostream>
39 #include "ducc0/infra/error_handling.h"
40 #include "ducc0/math/math_utils.h"
41 
42 namespace ducc0 {
43 
44 /// Class for storing sets of ranges of integer numbers
45 template<typename T> class rangeset
46   {
47   private:
48     typedef std::vector<T> rtype;
49     typedef typename rtype::iterator iterator;
50     typedef typename rtype::const_iterator c_iterator;
51     rtype r;
52 
53     /// Returns the index of the last number in \c r which is <= \a val.
54     /// If \a val is smaller than all numbers in \c r, returns -1.
iiv(const T & val)55     ptrdiff_t iiv (const T &val) const
56       { return ptrdiff_t(std::upper_bound(r.begin(),r.end(),val)-r.begin())-1; }
57 
addRemove(T a,T b,ptrdiff_t v)58     void addRemove (T a, T b, ptrdiff_t v)
59       {
60       ptrdiff_t pos1=iiv(a), pos2=iiv(b);
61       if ((pos1>=0) && (r[size_t(pos1)]==a)) --pos1;
62       // first to delete is at pos1+1; last is at pos2
63       bool insert_a = (pos1&1)==v;
64       bool insert_b = (pos2&1)==v;
65       ptrdiff_t rmstart=pos1+1+(insert_a ? 1 : 0);
66       ptrdiff_t rmend  =pos2-(insert_b ? 1 : 0);
67 
68       MR_assert((rmend-rmstart)&1,"cannot happen");
69 
70       if (insert_a && insert_b && (pos1+1>pos2)) // insert
71         {
72         r.insert(r.begin()+pos1+1,2,a);
73         r[size_t(pos1+2)]=b;
74         }
75       else
76         {
77         if (insert_a) r[size_t(pos1+1)]=a;
78         if (insert_b) r[size_t(pos2)]=b;
79         r.erase(r.begin()+rmstart,r.begin()+rmend+1);
80         }
81       }
82 
83     /*! Estimate a good strategy for set operations involving two rangesets. */
strategy(size_t sza,size_t szb)84     static int strategy (size_t sza, size_t szb)
85       {
86       const double fct1 = 1.;
87       const double fct2 = 1.;
88       size_t slo = sza<szb ? sza : szb,
89             shi = sza<szb ? szb : sza;
90       double cost1 = fct1 * (sza+szb);
91       double cost2 = fct2 * slo * std::max(1u,ilog2(shi));
92       return (cost1<=cost2) ? 1 : (slo==sza) ? 2 : 3;
93       }
94 
generalUnion1(const rangeset & a,const rangeset & b,bool flip_a,bool flip_b)95     void generalUnion1 (const rangeset &a, const rangeset &b,
96       bool flip_a, bool flip_b)
97       {
98       bool state_a=flip_a, state_b=flip_b, state_res=state_a||state_b;
99       size_t ia=0, ea=a.r.size(), ib=0, eb=b.r.size();
100       bool runa = ia!=ea, runb = ib!=eb;
101       while(runa||runb)
102         {
103         T va = runa ? a.r[ia] : T(0),
104           vb = runb ? b.r[ib] : T(0);
105         bool adv_a = runa && (!runb || (va<=vb)),
106              adv_b = runb && (!runa || (vb<=va));
107         if (adv_a) { state_a=!state_a; ++ia; runa = ia!=ea; }
108         if (adv_b) { state_b=!state_b; ++ib; runb = ib!=eb; }
109         if ((state_a||state_b)!=state_res)
110           { r.push_back(adv_a ? va : vb); state_res = !state_res; }
111         }
112       }
generalUnion2(const rangeset & a,const rangeset & b,bool flip_a,bool flip_b)113     void generalUnion2 (const rangeset &a, const rangeset &b,
114       bool flip_a, bool flip_b)
115       {
116       ptrdiff_t iva = flip_a ? 0 : -1;
117       ptrdiff_t asz=ptrdiff_t(a.r.size()), bsz=ptrdiff_t(b.r.size());
118       while (iva<asz)
119         {
120         ptrdiff_t ivb = (iva==-1) ? -1 : b.iiv(a.r[iva]);
121         bool state_b = flip_b^((ivb&1)==0);
122         if ((iva>-1) && (!state_b)) r.push_back(a.r[iva]);
123         while((ivb<bsz-1)&&((iva==asz-1)||(b.r[ivb+1]<a.r[iva+1])))
124           { ++ivb; state_b=!state_b; r.push_back(b.r[ivb]); }
125         if ((iva<asz-1)&&(!state_b)) r.push_back(a.r[iva+1]);
126         iva+=2;
127         }
128       }
generalUnion(const rangeset & a,const rangeset & b,bool flip_a,bool flip_b)129     void generalUnion (const rangeset &a, const rangeset &b,
130       bool flip_a, bool flip_b)
131       {
132       MR_assert((this!=&a)&&(this!=&b), "cannot overwrite the rangeset");
133       if (a.r.empty())
134         {
135         if (flip_a) clear(); else *this=b;
136         return;
137         }
138       if (b.r.empty())
139         {
140         if (flip_b) clear(); else *this=a;
141         return;
142         }
143 
144       clear();
145       int strat = strategy (a.nranges(), b.nranges());
146       (strat==1) ? generalUnion1(a,b,flip_a,flip_b) :
147         ((strat==2) ? generalUnion2(a,b,flip_a,flip_b)
148                     : generalUnion2(b,a,flip_b,flip_a));
149       }
generalXor(const rangeset & a,const rangeset & b)150     void generalXor (const rangeset &a, const rangeset &b)
151       {
152       clear();
153       bool state_a=false, state_b=false, state_res=state_a||state_b;
154       size_t ia=0, ea=a.r.size(), ib=0, eb=b.r.size();
155       bool runa = ia!=ea, runb = ib!=eb;
156       while(runa||runb)
157         {
158         T va = runa ? a.r[ia] : T(0),
159           vb = runb ? b.r[ib] : T(0);
160         bool adv_a = runa && (!runb || (va<=vb)),
161              adv_b = runb && (!runa || (vb<=va));
162         if (adv_a) { state_a=!state_a; ++ia; runa = ia!=ea; }
163         if (adv_b) { state_b=!state_b; ++ib; runb = ib!=eb; }
164         if ((state_a^state_b)!=state_res)
165           { r.push_back(adv_a ? va : vb); state_res = !state_res; }
166         }
167       }
168 
generalAllOrNothing1(const rangeset & a,const rangeset & b,bool flip_a,bool flip_b)169     static bool generalAllOrNothing1 (const rangeset &a, const rangeset &b,
170       bool flip_a, bool flip_b)
171       {
172       bool state_a=flip_a, state_b=flip_b, state_res=state_a||state_b;
173       size_t ia=0, ea=a.r.size(), ib=0, eb=b.r.size();
174       bool runa = ia!=ea, runb = ib!=eb;
175       while(runa||runb)
176         {
177         T va = runa ? a.r[ia] : T(0),
178           vb = runb ? b.r[ib] : T(0);
179         bool adv_a = runa && (!runb || (va<=vb)),
180              adv_b = runb && (!runa || (vb<=va));
181         if (adv_a) { state_a=!state_a; ++ia; runa = ia!=ea; }
182         if (adv_b) { state_b=!state_b; ++ib; runb = ib!=eb; }
183         if ((state_a||state_b)!=state_res)
184           return false;
185         }
186       return true;
187       }
generalAllOrNothing2(const rangeset & a,const rangeset & b,bool flip_a,bool flip_b)188     static bool generalAllOrNothing2 (const rangeset &a, const rangeset &b,
189       bool flip_a, bool flip_b)
190       {
191       ptrdiff_t iva = flip_a ? 0 : -1;
192       ptrdiff_t asz=ptrdiff_t(a.r.size()), bsz=ptrdiff_t(b.r.size());
193       while (iva<asz)
194         {
195         if (iva==-1) // implies that flip_a==false
196           { if ((!flip_b)||(b.r[0]<a.r[0])) return false; }
197         else if (iva==asz-1) // implies that flip_a==false
198           { if ((!flip_b)||(b.r[bsz-1]>a.r[asz-1])) return false; }
199         else
200           {
201           ptrdiff_t ivb=b.iiv(a.r[iva]);
202           if ((ivb!=bsz-1)&&(b.r[ivb+1]<a.r[iva+1])) return false;
203           if (flip_b==((ivb&1)==0)) return false;
204           }
205         iva+=2;
206         }
207       return true;
208       }
generalAllOrNothing(const rangeset & a,const rangeset & b,bool flip_a,bool flip_b)209     static bool generalAllOrNothing (const rangeset &a, const rangeset &b,
210       bool flip_a, bool flip_b)
211       {
212       if (a.r.empty())
213         return flip_a ? true : b.r.empty();
214       if (b.r.empty())
215         return flip_b ? true : a.r.empty();
216       int strat = strategy (a.nranges(), b.nranges());
217       return (strat==1) ? generalAllOrNothing1(a,b,flip_a,flip_b) :
218                ((strat==2) ? generalAllOrNothing2(a,b,flip_a,flip_b)
219                            : generalAllOrNothing2(b,a,flip_b,flip_a));
220       }
221 
222   public:
223     /// Removes all entries.
clear()224     void clear() { r.clear(); }
225     /// Reserves space for \a n ranges.
reserve(size_t n)226     void reserve(size_t n) { r.reserve(2*n); }
227     /// Returns the current number of ranges.
nranges()228     size_t nranges() const { return r.size()>>1; }
229     /// Returns the current number of ranges.
size()230     size_t size() const { return nranges(); }
231     /// Returns `true` iff there are no ranges in the rangeset.
empty()232     bool empty() const { return r.empty(); }
233     /// Returns the current vector of ranges.
data()234     const rtype &data() const { return r; }
checkConsistency()235     void checkConsistency() const
236       {
237       MR_assert((r.size()&1)==0,"invalid number of entries");
238       for (size_t i=1; i<r.size(); ++i)
239         MR_assert(r[i]>r[i-1],"inconsistent entries");
240       }
setData(const rtype & inp)241     void setData (const rtype &inp)
242       {
243       r=inp;
244       checkConsistency();
245       }
246 
247     /// Returns the first value of range \a i.
ivbegin(size_t i)248     const T &ivbegin (size_t i) const { return r[2*i]; }
249     /// Returns the one-past-last value of range \a i.
ivend(size_t i)250     const T &ivend (size_t i) const { return r[2*i+1]; }
251     /// Returns the length of range \a i.
ivlen(size_t i)252     T ivlen (size_t i) const { return r[2*i+1]-r[2*i]; }
253 
254     /// Appends \a [v1;v2[ to the rangeset. \a v1 must be larger
255     /// than the minimum of the last range in the rangeset.
append(const T & v1,const T & v2)256     void append(const T &v1, const T &v2)
257       {
258       if (v2<=v1) return;
259       if ((!r.empty()) && (v1<=r.back()))
260         {
261         MR_assert (v1>=r[r.size()-2],"bad append operation");
262         if (v2>r.back()) r.back()=v2;
263         }
264       else
265         { r.push_back(v1); r.push_back(v2); }
266       }
267     /// Appends \a [v;v+1[ to the rangeset. \a v must be larger
268     /// than the minimum of the last range in the rangeset.
append(const T & v)269     void append(const T &v)
270       { append(v,v+1); }
271 
272     /// Appends \a other to the rangeset. All values in \a other must be larger
273     /// than the minimum of the last range in the rangeset.
append(const rangeset & other)274     void append (const rangeset &other)
275       {
276       for (size_t j=0; j<other.nranges(); ++j)
277         append(other.ivbegin(j),other.ivend(j));
278       }
279 
280     /// After this operation, the rangeset contains the union of itself
281     /// with \a [v1;v2[.
add(const T & v1,const T & v2)282     void add(const T &v1, const T &v2)
283       {
284       if (v2<=v1) return;
285       if (r.empty() || (v1>=r[r.size()-2])) append(v1,v2);
286       addRemove(v1,v2,1);
287       }
288     /// After this operation, the rangeset contains the union of itself
289     /// with \a [v;v+1[.
add(const T & v)290     void add(const T &v) { add(v,v+1); }
291 
292     /// Removes all values within \a [v1;v2[ from the rangeset.
remove(const T & v1,const T & v2)293     void remove(const T &v1, const T &v2)
294       {
295       if (v2<=v1) return;
296       if (r.empty()) return;
297       if ((v2<=r[0])||(v1>=r.back())) return;
298       if ((v1<=r[0]) && (v2>=r.back())) { r.clear(); return; }
299       addRemove(v1,v2,0);
300       }
301     /// Removes the value \a v from the rangeset.
remove(const T & v)302     void remove(const T &v) { remove(v,v+1); }
303 
304     /// Removes all values not within \a [a;b[ from the rangeset.
intersect(const T & a,const T & b)305     void intersect (const T &a, const T &b)
306       {
307       if (r.empty()) return; // nothing to remove
308       if ((b<=r[0]) || (a>=r.back())) { r.clear(); return; } // no overlap
309       if ((a<=r[0]) && (b>=r.back())) return; // full rangeset in interval
310 
311       ptrdiff_t pos2=iiv(b);
312       if ((pos2>=0) && (r[size_t(pos2)]==b)) --pos2;
313       bool insert_b = (pos2&1)==0;
314       r.erase(r.begin()+pos2+1,r.end());
315       if (insert_b) r.push_back(b);
316 
317       ptrdiff_t pos1=iiv(a);
318       bool insert_a = (pos1&1)==0;
319       if (insert_a) r[size_t(pos1--)]=a;
320       if (pos1>=0)
321         r.erase(r.begin(),r.begin()+pos1+1);
322       }
323 
324     /// Returns the total number of elements in the rangeset.
nval()325     size_t nval() const
326       {
327       size_t result(0);
328       for (size_t i=0; i<r.size(); i+=2)
329         result+=size_t(r[i+1]-r[i]);
330       return result;
331       }
332 
333     /// After this operation, \a res contains all elements of the rangeset
334     /// in ascending order.
toVector(std::vector<T> & res)335     void toVector (std::vector<T> &res) const
336       {
337       res.clear();
338       res.reserve(nval());
339       for (size_t i=0; i<r.size(); i+=2)
340         for (T m(r[i]); m<r[i+1]; ++m)
341           res.push_back(m);
342       }
343 
344     /// Returns a vector containing all elements of the rangeset in ascending
345     /// order.
toVector()346     std::vector<T> toVector() const
347       {
348       std::vector<T> res;
349       toVector(res);
350       return res;
351       }
352 
353     /// Returns the union of this rangeset and \a other.
op_or(const rangeset & other)354     rangeset op_or (const rangeset &other) const
355       {
356       rangeset res;
357       res.generalUnion (*this,other,false,false);
358       return res;
359       }
360     /// Returns the intersection of this rangeset and \a other.
op_and(const rangeset & other)361     rangeset op_and (const rangeset &other) const
362       {
363       rangeset res;
364       res.generalUnion (*this,other,true,true);
365       return res;
366       }
367     /// Returns the part of this rangeset which is not in \a other.
op_andnot(const rangeset & other)368     rangeset op_andnot (const rangeset &other) const
369       {
370       rangeset res;
371       res.generalUnion (*this,other,true,false);
372       return res;
373       }
374     /// Returns the parts of this rangeset and \a other, which are not in
375     /// both rangesets.
op_xor(const rangeset & other)376     rangeset op_xor (const rangeset &other) const
377       {
378       rangeset res;
379       res.generalXor (*this,other);
380       return res;
381       }
382 
383     /// Returns the index of the interval containing \a v; if no such interval
384     /// exists, -1 is returned.
findInterval(const T & v)385     ptrdiff_t findInterval (const T &v) const
386       {
387       ptrdiff_t res = iiv(v);
388       return (res&1) ? -1 : res>>1;
389       }
390 
391     /// Returns \a true iff the rangeset is identical to \a other.
392     bool operator== (const rangeset &other) const
393       { return r==other.r; }
394 
395     /// Returns \a true iff the rangeset contains all values in the range
396     /// \a [a;b[.
contains(T a,T b)397     bool contains (T a,T b) const
398       {
399       ptrdiff_t res=iiv(a);
400       if (res&1) return false;
401       return (b<=r[res+1]);
402       }
403     /// Returns \a true iff the rangeset contains the value \a v.
contains(T v)404     bool contains (T v) const
405       { return !(iiv(v)&1); }
406     /// Returns \a true iff the rangeset contains all values stored in \a other.
contains(const rangeset & other)407     bool contains (const rangeset &other) const
408       { return generalAllOrNothing(*this,other,false,true); }
409     /// Returns true if any of the numbers [a;b[ are contained in the set.
overlaps(T a,T b)410     bool overlaps (T a,T b) const
411       {
412       ptrdiff_t res=iiv(a);
413       if ((res&1)==0) return true;
414       if (res==ptrdiff_t(r.size())-1) return false; // beyond the end of the set
415       return (r[res+1]<b);
416       }
417     /// Returns true if there is overlap between the set and \a other.
overlaps(const rangeset & other)418     bool overlaps (const rangeset &other) const
419       { return !generalAllOrNothing(*this,other,true,true); }
420   };
421 
422 template<typename T> inline std::ostream &operator<< (std::ostream &os,
423   const rangeset<T> &rs)
424   {
425   os << "{ ";
426   for (size_t i=0; i<rs.nranges(); ++i)
427     os << "["<<rs.ivbegin(i)<<";"<<rs.ivend(i)<<"[ ";
428   return os << "}";
429   }
430 
431 }
432 
433 #endif
434