1 // interval-set.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to represent and operate on sets of intervals.
20
21 #ifndef FST_LIB_INTERVAL_SET_H__
22 #define FST_LIB_INTERVAL_SET_H__
23
24 #include <iostream>
25 #include <vector>
26 using std::vector;
27
28
29 #include <fst/util.h>
30
31
32 namespace fst {
33
34 // Stores and operates on a set of half-open integral intervals [a,b)
35 // of signed integers of type T.
36 template <typename T>
37 class IntervalSet {
38 public:
39 struct Interval {
40 T begin;
41 T end;
42
IntervalInterval43 Interval() : begin(-1), end(-1) {}
44
IntervalInterval45 Interval(T b, T e) : begin(b), end(e) {}
46
47 bool operator<(const Interval &i) const {
48 return begin < i.begin || (begin == i.begin && end > i.end);
49 }
50
51 bool operator==(const Interval &i) const {
52 return begin == i.begin && end == i.end;
53 }
54
55 bool operator!=(const Interval &i) const {
56 return begin != i.begin || end != i.end;
57 }
58
ReadInterval59 istream &Read(istream &strm) {
60 T n;
61 ReadType(strm, &n);
62 begin = n;
63 ReadType(strm, &n);
64 end = n;
65 return strm;
66 }
67
WriteInterval68 ostream &Write(ostream &strm) const {
69 T n = begin;
70 WriteType(strm, n);
71 n = end;
72 WriteType(strm, n);
73 return strm;
74 }
75 };
76
IntervalSet()77 IntervalSet() : count_(-1) {}
78
79 // Returns the interval set as a vector.
Intervals()80 vector<Interval> *Intervals() { return &intervals_; }
81
Intervals()82 const vector<Interval> *Intervals() const { return &intervals_; }
83
Empty()84 bool Empty() const { return intervals_.empty(); }
85
Size()86 T Size() const { return intervals_.size(); }
87
88 // Number of points in the intervals (undefined if not normalized).
Count()89 T Count() const { return count_; }
90
Clear()91 void Clear() {
92 intervals_.clear();
93 count_ = 0;
94 }
95
96 // Adds an interval set to the set. The result may not be normalized.
Union(const IntervalSet<T> & iset)97 void Union(const IntervalSet<T> &iset) {
98 const vector<Interval> *intervals = iset.Intervals();
99 for (typename vector<Interval>::const_iterator it = intervals->begin();
100 it != intervals->end(); ++it)
101 intervals_.push_back(*it);
102 }
103
104 // Requires intervals be normalized.
Member(T value)105 bool Member(T value) const {
106 Interval interval(value, value);
107 typename vector<Interval>::const_iterator lb =
108 lower_bound(intervals_.begin(), intervals_.end(), interval);
109 if (lb == intervals_.begin())
110 return false;
111 return (--lb)->end > value;
112 }
113
114 // Requires intervals be normalized.
115 bool operator==(const IntervalSet<T>& iset) const {
116 return *(iset.Intervals()) == intervals_;
117 }
118
119 // Requires intervals be normalized.
120 bool operator!=(const IntervalSet<T>& iset) const {
121 return *(iset.Intervals()) != intervals_;
122 }
123
Singleton()124 bool Singleton() const {
125 return intervals_.size() == 1 &&
126 intervals_[0].begin + 1 == intervals_[0].end;
127 }
128
129
130 // Sorts; collapses overlapping and adjacent interals; sets count.
131 void Normalize();
132
133 // Intersects an interval set with the set. Requires intervals be
134 // normalized. The result is normalized.
135 void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
136
137 // Complements the set w.r.t [0, maxval). Requires intervals be
138 // normalized. The result is normalized.
139 void Complement(T maxval, IntervalSet<T> *oset) const;
140
141 // Subtract an interval set from the set. Requires intervals be
142 // normalized. The result is normalized.
143 void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
144
145 // Determines if an interval set overlaps with the set. Requires
146 // intervals be normalized.
147 bool Overlaps(const IntervalSet<T> &iset) const;
148
149 // Determines if an interval set overlaps with the set but neither
150 // is contained in the other. Requires intervals be normalized.
151 bool StrictlyOverlaps(const IntervalSet<T> &iset) const;
152
153 // Determines if an interval set is contained within the set. Requires
154 // intervals be normalized.
155 bool Contains(const IntervalSet<T> &iset) const;
156
Read(istream & strm)157 istream &Read(istream &strm) {
158 ReadType(strm, &intervals_);
159 return ReadType(strm, &count_);
160 }
161
Write(ostream & strm)162 ostream &Write(ostream &strm) const {
163 WriteType(strm, intervals_);
164 return WriteType(strm, count_);
165 }
166
167 private:
168 vector<Interval> intervals_;
169 T count_;
170 };
171
172 // Sorts; collapses overlapping and adjacent interavls; sets count.
173 template <typename T>
Normalize()174 void IntervalSet<T>::Normalize() {
175 sort(intervals_.begin(), intervals_.end());
176
177 count_ = 0;
178 T size = 0;
179 for (T i = 0; i < intervals_.size(); ++i) {
180 Interval &inti = intervals_[i];
181 if (inti.begin == inti.end)
182 continue;
183 for (T j = i + 1; j < intervals_.size(); ++j) {
184 Interval &intj = intervals_[j];
185 if (intj.begin > inti.end)
186 break;
187 if (intj.end > inti.end)
188 inti.end = intj.end;
189 ++i;
190 }
191 count_ += inti.end - inti.begin;
192 intervals_[size++] = inti;
193 }
194 intervals_.resize(size);
195 }
196
197 // Intersects an interval set with the set. Requires intervals be normalized.
198 // The result is normalized.
199 template <typename T>
Intersect(const IntervalSet<T> & iset,IntervalSet<T> * oset)200 void IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
201 IntervalSet<T> *oset) const {
202 const vector<Interval> *iintervals = iset.Intervals();
203 vector<Interval> *ointervals = oset->Intervals();
204 typename vector<Interval>::const_iterator it1 = intervals_.begin();
205 typename vector<Interval>::const_iterator it2 = iintervals->begin();
206
207 ointervals->clear();
208 oset->count_ = 0;
209
210 while (it1 != intervals_.end() && it2 != iintervals->end()) {
211 if (it1->end <= it2->begin) {
212 ++it1;
213 } else if (it2->end <= it1->begin) {
214 ++it2;
215 } else {
216 Interval interval;
217 interval.begin = max(it1->begin, it2->begin);
218 interval.end = min(it1->end, it2->end);
219 ointervals->push_back(interval);
220 oset->count_ += interval.end - interval.begin;
221 if ((it1->end) < (it2->end))
222 ++it1;
223 else
224 ++it2;
225 }
226 }
227 }
228
229 // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
230 // The result is normalized.
231 template <typename T>
Complement(T maxval,IntervalSet<T> * oset)232 void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
233 vector<Interval> *ointervals = oset->Intervals();
234 ointervals->clear();
235 oset->count_ = 0;
236
237 Interval interval;
238 interval.begin = 0;
239 for (typename vector<Interval>::const_iterator it = intervals_.begin();
240 it != intervals_.end();
241 ++it) {
242 interval.end = min(it->begin, maxval);
243 if ((interval.begin) < (interval.end)) {
244 ointervals->push_back(interval);
245 oset->count_ += interval.end - interval.begin;
246 }
247 interval.begin = it->end;
248 }
249 interval.end = maxval;
250 if ((interval.begin) < (interval.end)) {
251 ointervals->push_back(interval);
252 oset->count_ += interval.end - interval.begin;
253 }
254 }
255
256 // Subtract an interval set from the set. Requires intervals be normalized.
257 // The result is normalized.
258 template <typename T>
Difference(const IntervalSet<T> & iset,IntervalSet<T> * oset)259 void IntervalSet<T>::Difference(const IntervalSet<T> &iset,
260 IntervalSet<T> *oset) const {
261 if (intervals_.empty()) {
262 oset->Intervals()->clear();
263 oset->count_ = 0;
264 } else {
265 IntervalSet<T> cset;
266 iset.Complement(intervals_.back().end, &cset);
267 Intersect(cset, oset);
268 }
269 }
270
271 // Determines if an interval set overlaps with the set. Requires
272 // intervals be normalized.
273 template <typename T>
Overlaps(const IntervalSet<T> & iset)274 bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
275 const vector<Interval> *intervals = iset.Intervals();
276 typename vector<Interval>::const_iterator it1 = intervals_.begin();
277 typename vector<Interval>::const_iterator it2 = intervals->begin();
278
279 while (it1 != intervals_.end() && it2 != intervals->end()) {
280 if (it1->end <= it2->begin) {
281 ++it1;
282 } else if (it2->end <= it1->begin) {
283 ++it2;
284 } else {
285 return true;
286 }
287 }
288 return false;
289 }
290
291 // Determines if an interval set overlaps with the set but neither
292 // is contained in the other. Requires intervals be normalized.
293 template <typename T>
StrictlyOverlaps(const IntervalSet<T> & iset)294 bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
295 const vector<Interval> *intervals = iset.Intervals();
296 typename vector<Interval>::const_iterator it1 = intervals_.begin();
297 typename vector<Interval>::const_iterator it2 = intervals->begin();
298 bool only1 = false; // point in intervals_ but not intervals
299 bool only2 = false; // point in intervals but not intervals_
300 bool overlap = false; // point in both intervals_ and intervals
301
302 while (it1 != intervals_.end() && it2 != intervals->end()) {
303 if (it1->end <= it2->begin) { // no overlap - it1 first
304 only1 = true;
305 ++it1;
306 } else if (it2->end <= it1->begin) { // no overlap - it2 first
307 only2 = true;
308 ++it2;
309 } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
310 overlap = true;
311 ++it1;
312 ++it2;
313 } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
314 only2 = true;
315 overlap = true;
316 ++it1;
317 } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
318 only1 = true;
319 overlap = true;
320 ++it2;
321 } else { // strict overlap
322 only1 = true;
323 only2 = true;
324 overlap = true;
325 }
326 if (only1 == true && only2 == true && overlap == true)
327 return true;
328 }
329 if (it1 != intervals_.end())
330 only1 = true;
331 if (it2 != intervals->end())
332 only2 = true;
333
334 return only1 == true && only2 == true && overlap == true;
335 }
336
337 // Determines if an interval set is contained within the set. Requires
338 // intervals be normalized.
339 template <typename T>
Contains(const IntervalSet<T> & iset)340 bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
341 if (iset.Count() > Count())
342 return false;
343
344 const vector<Interval> *intervals = iset.Intervals();
345 typename vector<Interval>::const_iterator it1 = intervals_.begin();
346 typename vector<Interval>::const_iterator it2 = intervals->begin();
347
348 while (it1 != intervals_.end() && it2 != intervals->end()) {
349 if ((it1->end) <= (it2->begin)) { // no overlap - it1 first
350 ++it1;
351 } else if ((it2->begin) < (it1->begin) ||
352 (it2->end) > (it1->end)) { // no C
353 return false;
354 } else if (it2->end == it1->end) {
355 ++it1;
356 ++it2;
357 } else {
358 ++it2;
359 }
360 }
361 return it2 == intervals->end();
362 }
363
364 template <typename T>
365 ostream &operator<<(ostream &strm, const IntervalSet<T> &s) {
366 typedef typename IntervalSet<T>::Interval Interval;
367 const vector<Interval> *intervals = s.Intervals();
368 strm << "{";
369 for (typename vector<Interval>::const_iterator it = intervals->begin();
370 it != intervals->end();
371 ++it) {
372 if (it != intervals->begin())
373 strm << ",";
374 strm << "[" << it->begin << "," << it->end << ")";
375 }
376 strm << "}";
377 return strm;
378 }
379
380 } // namespace fst
381
382 #endif // FST_LIB_INTERVAL_SET_H__
383