1 // interval-set.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to represent and operate on sets of intervals.
20 
21 #ifndef FST_LIB_INTERVAL_SET_H__
22 #define FST_LIB_INTERVAL_SET_H__
23 
24 #include <iostream>
25 #include <vector>
26 using std::vector;
27 
28 
29 #include <fst/util.h>
30 
31 
32 namespace fst {
33 
34 // Stores and operates on a set of half-open integral intervals [a,b)
35 // of signed integers of type T.
36 template <typename T>
37 class IntervalSet {
38  public:
39   struct Interval {
40     T begin;
41     T end;
42 
IntervalInterval43     Interval() : begin(-1), end(-1) {}
44 
IntervalInterval45     Interval(T b, T e) : begin(b), end(e) {}
46 
47     bool operator<(const Interval &i) const {
48       return begin < i.begin || (begin == i.begin && end > i.end);
49     }
50 
51     bool operator==(const Interval &i) const {
52       return begin == i.begin && end == i.end;
53     }
54 
55     bool operator!=(const Interval &i) const {
56       return begin != i.begin || end != i.end;
57     }
58 
ReadInterval59     istream &Read(istream &strm) {
60       T n;
61       ReadType(strm, &n);
62       begin = n;
63       ReadType(strm, &n);
64       end = n;
65       return strm;
66     }
67 
WriteInterval68     ostream &Write(ostream &strm) const {
69       T n = begin;
70       WriteType(strm, n);
71       n = end;
72       WriteType(strm, n);
73       return strm;
74     }
75   };
76 
IntervalSet()77   IntervalSet() : count_(-1) {}
78 
79   // Returns the interval set as a vector.
Intervals()80   vector<Interval> *Intervals() { return &intervals_; }
81 
Intervals()82   const vector<Interval> *Intervals() const { return &intervals_; }
83 
Empty()84   bool Empty() const { return intervals_.empty(); }
85 
Size()86   T Size() const { return intervals_.size(); }
87 
88   // Number of points in the intervals (undefined if not normalized).
Count()89   T Count() const { return count_; }
90 
Clear()91   void Clear() {
92     intervals_.clear();
93     count_ = 0;
94   }
95 
96   // Adds an interval set to the set. The result may not be normalized.
Union(const IntervalSet<T> & iset)97   void Union(const IntervalSet<T> &iset) {
98     const vector<Interval> *intervals = iset.Intervals();
99     for (typename vector<Interval>::const_iterator it = intervals->begin();
100          it != intervals->end(); ++it)
101       intervals_.push_back(*it);
102   }
103 
104   // Requires intervals be normalized.
Member(T value)105   bool Member(T value) const {
106     Interval interval(value, value);
107     typename vector<Interval>::const_iterator lb =
108         lower_bound(intervals_.begin(), intervals_.end(), interval);
109     if (lb == intervals_.begin())
110       return false;
111     return (--lb)->end > value;
112   }
113 
114   // Requires intervals be normalized.
115   bool operator==(const IntervalSet<T>& iset) const {
116     return *(iset.Intervals()) == intervals_;
117   }
118 
119   // Requires intervals be normalized.
120   bool operator!=(const IntervalSet<T>& iset) const {
121     return *(iset.Intervals()) != intervals_;
122   }
123 
Singleton()124   bool Singleton() const {
125     return intervals_.size() == 1 &&
126         intervals_[0].begin + 1 == intervals_[0].end;
127   }
128 
129 
130   // Sorts; collapses overlapping and adjacent interals; sets count.
131   void Normalize();
132 
133   // Intersects an interval set with the set. Requires intervals be
134   // normalized. The result is normalized.
135   void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
136 
137   // Complements the set w.r.t [0, maxval). Requires intervals be
138   // normalized. The result is normalized.
139   void Complement(T maxval, IntervalSet<T> *oset) const;
140 
141   // Subtract an interval set from the set. Requires intervals be
142   // normalized. The result is normalized.
143   void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
144 
145   // Determines if an interval set overlaps with the set. Requires
146   // intervals be normalized.
147   bool Overlaps(const IntervalSet<T> &iset) const;
148 
149   // Determines if an interval set overlaps with the set but neither
150   // is contained in the other. Requires intervals be normalized.
151   bool StrictlyOverlaps(const IntervalSet<T> &iset) const;
152 
153   // Determines if an interval set is contained within the set. Requires
154   // intervals be normalized.
155   bool Contains(const IntervalSet<T> &iset) const;
156 
Read(istream & strm)157   istream &Read(istream &strm) {
158     ReadType(strm, &intervals_);
159     return ReadType(strm, &count_);
160   }
161 
Write(ostream & strm)162   ostream &Write(ostream &strm) const {
163     WriteType(strm, intervals_);
164     return WriteType(strm, count_);
165   }
166 
167  private:
168   vector<Interval> intervals_;
169   T count_;
170 };
171 
172 // Sorts; collapses overlapping and adjacent interavls; sets count.
173 template <typename T>
Normalize()174 void IntervalSet<T>::Normalize() {
175   sort(intervals_.begin(), intervals_.end());
176 
177   count_ = 0;
178   T size = 0;
179   for (T i = 0; i < intervals_.size(); ++i) {
180     Interval &inti = intervals_[i];
181     if (inti.begin == inti.end)
182       continue;
183     for (T j = i + 1; j < intervals_.size(); ++j) {
184       Interval &intj = intervals_[j];
185       if (intj.begin > inti.end)
186         break;
187       if (intj.end > inti.end)
188         inti.end = intj.end;
189       ++i;
190     }
191     count_ += inti.end - inti.begin;
192     intervals_[size++] = inti;
193   }
194   intervals_.resize(size);
195 }
196 
197 // Intersects an interval set with the set. Requires intervals be normalized.
198 // The result is normalized.
199 template <typename T>
Intersect(const IntervalSet<T> & iset,IntervalSet<T> * oset)200 void IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
201                                IntervalSet<T> *oset) const {
202   const vector<Interval> *iintervals = iset.Intervals();
203   vector<Interval> *ointervals = oset->Intervals();
204   typename vector<Interval>::const_iterator it1 = intervals_.begin();
205   typename vector<Interval>::const_iterator it2 = iintervals->begin();
206 
207   ointervals->clear();
208   oset->count_ = 0;
209 
210   while (it1 != intervals_.end() && it2 != iintervals->end()) {
211     if (it1->end <= it2->begin) {
212       ++it1;
213     } else if (it2->end <= it1->begin) {
214       ++it2;
215     } else {
216       Interval interval;
217       interval.begin = max(it1->begin, it2->begin);
218       interval.end = min(it1->end, it2->end);
219       ointervals->push_back(interval);
220       oset->count_ += interval.end - interval.begin;
221       if ((it1->end) < (it2->end))
222         ++it1;
223       else
224         ++it2;
225     }
226   }
227 }
228 
229 // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
230 // The result is normalized.
231 template <typename T>
Complement(T maxval,IntervalSet<T> * oset)232 void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
233   vector<Interval> *ointervals = oset->Intervals();
234   ointervals->clear();
235   oset->count_ = 0;
236 
237   Interval interval;
238   interval.begin = 0;
239   for (typename vector<Interval>::const_iterator it = intervals_.begin();
240        it != intervals_.end();
241        ++it) {
242     interval.end = min(it->begin, maxval);
243     if ((interval.begin) < (interval.end)) {
244       ointervals->push_back(interval);
245       oset->count_ += interval.end - interval.begin;
246     }
247     interval.begin = it->end;
248   }
249   interval.end = maxval;
250   if ((interval.begin) < (interval.end)) {
251     ointervals->push_back(interval);
252     oset->count_ += interval.end - interval.begin;
253   }
254 }
255 
256 // Subtract an interval set from the set. Requires intervals be normalized.
257 // The result is normalized.
258 template <typename T>
Difference(const IntervalSet<T> & iset,IntervalSet<T> * oset)259 void IntervalSet<T>::Difference(const IntervalSet<T> &iset,
260                                 IntervalSet<T> *oset) const {
261   if (intervals_.empty()) {
262     oset->Intervals()->clear();
263     oset->count_ = 0;
264   } else {
265     IntervalSet<T> cset;
266     iset.Complement(intervals_.back().end, &cset);
267     Intersect(cset, oset);
268   }
269 }
270 
271 // Determines if an interval set overlaps with the set. Requires
272 // intervals be normalized.
273 template <typename T>
Overlaps(const IntervalSet<T> & iset)274 bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
275   const vector<Interval> *intervals = iset.Intervals();
276   typename vector<Interval>::const_iterator it1 = intervals_.begin();
277   typename vector<Interval>::const_iterator it2 = intervals->begin();
278 
279   while (it1 != intervals_.end() && it2 != intervals->end()) {
280     if (it1->end <= it2->begin) {
281       ++it1;
282     } else if (it2->end <= it1->begin) {
283       ++it2;
284     } else {
285       return true;
286     }
287   }
288   return false;
289 }
290 
291 // Determines if an interval set overlaps with the set but neither
292 // is contained in the other. Requires intervals be normalized.
293 template <typename T>
StrictlyOverlaps(const IntervalSet<T> & iset)294 bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
295   const vector<Interval> *intervals = iset.Intervals();
296   typename vector<Interval>::const_iterator it1 = intervals_.begin();
297   typename vector<Interval>::const_iterator it2 = intervals->begin();
298   bool only1 = false;   // point in intervals_ but not intervals
299   bool only2 = false;   // point in intervals but not intervals_
300   bool overlap = false; // point in both intervals_ and intervals
301 
302   while (it1 != intervals_.end() && it2 != intervals->end()) {
303     if (it1->end <= it2->begin) {  // no overlap - it1 first
304       only1 = true;
305       ++it1;
306     } else if (it2->end <= it1->begin) {  // no overlap - it2 first
307       only2 = true;
308       ++it2;
309     } else if (it2->begin == it1->begin && it2->end == it1->end) {  // equals
310       overlap = true;
311       ++it1;
312       ++it2;
313     } else if (it2->begin <= it1->begin && it2->end >= it1->end) {  // 1 c 2
314       only2 = true;
315       overlap = true;
316       ++it1;
317     } else if (it1->begin <= it2->begin && it1->end >= it2->end) {  // 2 c 1
318       only1 = true;
319       overlap = true;
320       ++it2;
321     } else {  // strict overlap
322       only1 = true;
323       only2 = true;
324       overlap = true;
325     }
326     if (only1 == true && only2 == true && overlap == true)
327       return true;
328   }
329   if (it1 != intervals_.end())
330     only1 = true;
331   if (it2 != intervals->end())
332     only2 = true;
333 
334   return only1 == true && only2 == true && overlap == true;
335 }
336 
337 // Determines if an interval set is contained within the set. Requires
338 // intervals be normalized.
339 template <typename T>
Contains(const IntervalSet<T> & iset)340 bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
341   if (iset.Count() > Count())
342     return false;
343 
344   const vector<Interval> *intervals = iset.Intervals();
345   typename vector<Interval>::const_iterator it1 = intervals_.begin();
346   typename vector<Interval>::const_iterator it2 = intervals->begin();
347 
348   while (it1 != intervals_.end() && it2 != intervals->end()) {
349     if ((it1->end) <= (it2->begin)) {  // no overlap - it1 first
350       ++it1;
351     } else if ((it2->begin) < (it1->begin) ||
352                (it2->end) > (it1->end)) {  // no C
353       return false;
354     } else if (it2->end == it1->end) {
355       ++it1;
356       ++it2;
357     } else {
358       ++it2;
359     }
360   }
361   return it2 == intervals->end();
362 }
363 
364 template <typename T>
365 ostream &operator<<(ostream &strm, const IntervalSet<T> &s)  {
366   typedef typename IntervalSet<T>::Interval Interval;
367   const vector<Interval> *intervals = s.Intervals();
368   strm << "{";
369   for (typename vector<Interval>::const_iterator it = intervals->begin();
370        it != intervals->end();
371        ++it) {
372     if (it != intervals->begin())
373       strm << ",";
374     strm << "[" << it->begin << "," << it->end << ")";
375   }
376   strm << "}";
377   return strm;
378 }
379 
380 }  // namespace fst
381 
382 #endif  // FST_LIB_INTERVAL_SET_H__
383