1 // Copyright (C) 2018-2019, Cloudflare, Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright notice,
9 //       this list of conditions and the following disclaimer.
10 //
11 //     * Redistributions in binary form must reproduce the above copyright
12 //       notice, this list of conditions and the following disclaimer in the
13 //       documentation and/or other materials provided with the distribution.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
16 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
19 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 use std::ops::Range;
28 
29 use std::collections::btree_map;
30 use std::collections::BTreeMap;
31 use std::collections::Bound;
32 
33 #[derive(Clone, PartialEq, PartialOrd)]
34 pub struct RangeSet {
35     inner: BTreeMap<u64, u64>,
36 
37     capacity: usize,
38 }
39 
40 impl RangeSet {
new(capacity: usize) -> Self41     pub fn new(capacity: usize) -> Self {
42         RangeSet {
43             inner: BTreeMap::default(),
44             capacity,
45         }
46     }
47 
48     // TODO: use RangeInclusive
insert(&mut self, item: Range<u64>)49     pub fn insert(&mut self, item: Range<u64>) {
50         let mut start = item.start;
51         let mut end = item.end;
52 
53         // Check if preceding existing range overlaps with the new one.
54         if let Some(r) = self.prev_to(start) {
55             // New range overlaps with existing range in the set, merge them.
56             if range_overlaps(&r, &item) {
57                 self.inner.remove(&r.start);
58 
59                 start = std::cmp::min(start, r.start);
60                 end = std::cmp::max(end, r.end);
61             }
62         }
63 
64         // Check if following existing ranges overlap with the new one.
65         while let Some(r) = self.next_to(start) {
66             // Existing range is fully contained in the new range, remove it.
67             if item.contains(&r.start) && item.contains(&r.end) {
68                 self.inner.remove(&r.start);
69                 continue;
70             }
71 
72             // New range doesn't overlap anymore, we are done.
73             if !range_overlaps(&r, &item) {
74                 break;
75             }
76 
77             // New range overlaps with existing range in the set, merge them.
78             self.inner.remove(&r.start);
79 
80             start = std::cmp::min(start, r.start);
81             end = std::cmp::max(end, r.end);
82         }
83 
84         if self.inner.len() >= self.capacity {
85             if let Some(first) = self.inner.keys().next().copied() {
86                 self.inner.remove(&first);
87             }
88         }
89 
90         self.inner.insert(start, end);
91     }
92 
remove_until(&mut self, largest: u64)93     pub fn remove_until(&mut self, largest: u64) {
94         let ranges: Vec<Range<u64>> = self
95             .inner
96             .range((Bound::Unbounded, Bound::Included(&largest)))
97             .map(|(&s, &e)| (s..e))
98             .collect();
99 
100         for r in ranges {
101             self.inner.remove(&r.start);
102 
103             if r.end > largest + 1 {
104                 let start = largest + 1;
105                 self.insert(start..r.end);
106             }
107         }
108     }
109 
push_item(&mut self, item: u64)110     pub fn push_item(&mut self, item: u64) {
111         #[allow(clippy::range_plus_one)]
112         self.insert(item..item + 1);
113     }
114 
first(&self) -> Option<u64>115     pub fn first(&self) -> Option<u64> {
116         self.flatten().next()
117     }
118 
last(&self) -> Option<u64>119     pub fn last(&self) -> Option<u64> {
120         self.flatten().next_back()
121     }
122 
len(&self) -> usize123     pub fn len(&self) -> usize {
124         self.inner.len()
125     }
126 
iter(&self) -> Iter127     pub fn iter(&self) -> Iter {
128         Iter {
129             inner: self.inner.iter(),
130         }
131     }
132 
flatten(&self) -> Flatten133     pub fn flatten(&self) -> Flatten {
134         Flatten {
135             inner: self.inner.iter(),
136             next: 0,
137             end: 0,
138         }
139     }
140 
prev_to(&self, item: u64) -> Option<Range<u64>>141     fn prev_to(&self, item: u64) -> Option<Range<u64>> {
142         self.inner
143             .range((Bound::Unbounded, Bound::Included(item)))
144             .map(|(&s, &e)| (s..e))
145             .next_back()
146     }
147 
next_to(&self, item: u64) -> Option<Range<u64>>148     fn next_to(&self, item: u64) -> Option<Range<u64>> {
149         self.inner
150             .range((Bound::Included(item), Bound::Unbounded))
151             .map(|(&s, &e)| (s..e))
152             .next()
153     }
154 }
155 
156 impl Default for RangeSet {
default() -> Self157     fn default() -> Self {
158         Self::new(std::usize::MAX)
159     }
160 }
161 
162 // This implements comparison between `RangeSet` and standard `Range`. The idea
163 // is that a `RangeSet` with no gaps (i.e. that only contains a single range)
164 // is basically equvalent to a normal `Range` so they should be comparable.
165 impl PartialEq<Range<u64>> for RangeSet {
eq(&self, other: &Range<u64>) -> bool166     fn eq(&self, other: &Range<u64>) -> bool {
167         // If there is more than one range it means that the range set is not
168         // contiguous, so can't be equal to a single range.
169         if self.inner.len() != 1 {
170             return false;
171         }
172 
173         // Get the first and only range in the set.
174         let (first_start, first_end) = self.inner.iter().next().unwrap();
175 
176         if (*first_start..*first_end) != *other {
177             return false;
178         }
179 
180         true
181     }
182 }
183 
184 impl std::fmt::Debug for RangeSet {
fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result185     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
186         let ranges: Vec<Range<u64>> = self
187             .iter()
188             .map(|mut r| {
189                 r.end -= 1;
190                 r
191             })
192             .collect();
193 
194         write!(f, "{:?}", ranges)
195     }
196 }
197 
198 pub struct Iter<'a> {
199     inner: btree_map::Iter<'a, u64, u64>,
200 }
201 
202 impl<'a> Iterator for Iter<'a> {
203     type Item = Range<u64>;
204 
next(&mut self) -> Option<Range<u64>>205     fn next(&mut self) -> Option<Range<u64>> {
206         let (&start, &end) = self.inner.next()?;
207         Some(start..end)
208     }
209 }
210 
211 impl<'a> DoubleEndedIterator for Iter<'a> {
next_back(&mut self) -> Option<Range<u64>>212     fn next_back(&mut self) -> Option<Range<u64>> {
213         let (&start, &end) = self.inner.next_back()?;
214         Some(start..end)
215     }
216 }
217 
218 impl<'a> ExactSizeIterator for Iter<'a> {
len(&self) -> usize219     fn len(&self) -> usize {
220         self.inner.len()
221     }
222 }
223 
224 pub struct Flatten<'a> {
225     inner: btree_map::Iter<'a, u64, u64>,
226     next: u64,
227     end: u64,
228 }
229 
230 impl<'a> Iterator for Flatten<'a> {
231     type Item = u64;
232 
next(&mut self) -> Option<u64>233     fn next(&mut self) -> Option<u64> {
234         if self.next == self.end {
235             let (&start, &end) = self.inner.next()?;
236 
237             self.next = start;
238             self.end = end;
239         }
240 
241         let next = self.next;
242         self.next += 1;
243 
244         Some(next)
245     }
246 }
247 
248 impl<'a> DoubleEndedIterator for Flatten<'a> {
next_back(&mut self) -> Option<u64>249     fn next_back(&mut self) -> Option<u64> {
250         if self.next == self.end {
251             let (&start, &end) = self.inner.next_back()?;
252 
253             self.next = start;
254             self.end = end;
255         }
256 
257         self.end -= 1;
258 
259         Some(self.end)
260     }
261 }
262 
range_overlaps(r: &Range<u64>, other: &Range<u64>) -> bool263 fn range_overlaps(r: &Range<u64>, other: &Range<u64>) -> bool {
264     other.start >= r.start && other.start <= r.end ||
265         other.end >= r.start && other.end <= r.end
266 }
267 
268 #[cfg(test)]
269 mod tests {
270     use super::*;
271 
272     #[test]
insert_non_overlapping()273     fn insert_non_overlapping() {
274         let mut r = RangeSet::default();
275         assert_eq!(r.inner.len(), 0);
276         let empty: &[u64] = &[];
277         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &empty);
278 
279         r.insert(4..7);
280         assert_eq!(r.inner.len(), 1);
281         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6]);
282 
283         r.insert(9..12);
284         assert_eq!(r.inner.len(), 2);
285         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6, 9, 10, 11]);
286     }
287 
288     #[test]
insert_contained()289     fn insert_contained() {
290         let mut r = RangeSet::default();
291 
292         r.insert(4..7);
293         r.insert(9..12);
294         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6, 9, 10, 11]);
295 
296         r.insert(4..7);
297         assert_eq!(r.inner.len(), 2);
298         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6, 9, 10, 11]);
299 
300         r.insert(4..6);
301         assert_eq!(r.inner.len(), 2);
302         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6, 9, 10, 11]);
303 
304         r.insert(5..6);
305         assert_eq!(r.inner.len(), 2);
306         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6, 9, 10, 11]);
307 
308         r.insert(10..11);
309         assert_eq!(r.inner.len(), 2);
310         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6, 9, 10, 11]);
311 
312         r.insert(9..11);
313         assert_eq!(r.inner.len(), 2);
314         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6, 9, 10, 11]);
315     }
316 
317     #[test]
insert_overlapping()318     fn insert_overlapping() {
319         let mut r = RangeSet::default();
320 
321         r.insert(3..6);
322         r.insert(9..12);
323         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[3, 4, 5, 9, 10, 11]);
324 
325         r.insert(5..7);
326         assert_eq!(r.inner.len(), 2);
327         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[3, 4, 5, 6, 9, 10, 11]);
328 
329         r.insert(10..15);
330         assert_eq!(r.inner.len(), 2);
331         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
332             3, 4, 5, 6, 9, 10, 11, 12, 13, 14
333         ]);
334 
335         r.insert(2..5);
336         assert_eq!(r.inner.len(), 2);
337         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
338             2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14
339         ]);
340 
341         r.insert(8..10);
342         assert_eq!(r.inner.len(), 2);
343         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
344             2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14
345         ]);
346 
347         r.insert(6..10);
348         assert_eq!(r.inner.len(), 1);
349         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
350             2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
351         ]);
352     }
353 
354     #[test]
insert_overlapping_multi()355     fn insert_overlapping_multi() {
356         let mut r = RangeSet::default();
357 
358         r.insert(3..6);
359         r.insert(16..20);
360         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
361             3, 4, 5, 16, 17, 18, 19
362         ]);
363 
364         r.insert(10..11);
365         assert_eq!(r.inner.len(), 3);
366         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
367             3, 4, 5, 10, 16, 17, 18, 19
368         ]);
369 
370         r.insert(13..14);
371         assert_eq!(r.inner.len(), 4);
372         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
373             3, 4, 5, 10, 13, 16, 17, 18, 19
374         ]);
375 
376         r.insert(4..17);
377         assert_eq!(r.inner.len(), 1);
378         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
379             3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
380         ]);
381     }
382 
383     #[test]
prev_to()384     fn prev_to() {
385         let mut r = RangeSet::default();
386 
387         r.insert(4..7);
388         r.insert(9..12);
389 
390         assert_eq!(r.prev_to(2), None);
391         assert_eq!(r.prev_to(4), Some(4..7));
392         assert_eq!(r.prev_to(15), Some(9..12));
393         assert_eq!(r.prev_to(5), Some(4..7));
394         assert_eq!(r.prev_to(8), Some(4..7));
395     }
396 
397     #[test]
next_to()398     fn next_to() {
399         let mut r = RangeSet::default();
400 
401         r.insert(4..7);
402         r.insert(9..12);
403 
404         assert_eq!(r.next_to(2), Some(4..7));
405         assert_eq!(r.next_to(12), None);
406         assert_eq!(r.next_to(15), None);
407         assert_eq!(r.next_to(5), Some(9..12));
408         assert_eq!(r.next_to(8), Some(9..12));
409     }
410 
411     #[test]
push_item()412     fn push_item() {
413         let mut r = RangeSet::default();
414 
415         r.insert(4..7);
416         r.insert(9..12);
417         assert_eq!(r.inner.len(), 2);
418         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6, 9, 10, 11]);
419 
420         r.push_item(15);
421         assert_eq!(r.inner.len(), 3);
422         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
423             4, 5, 6, 9, 10, 11, 15
424         ]);
425 
426         r.push_item(15);
427         assert_eq!(r.inner.len(), 3);
428         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
429             4, 5, 6, 9, 10, 11, 15
430         ]);
431 
432         r.push_item(1);
433         assert_eq!(r.inner.len(), 4);
434         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
435             1, 4, 5, 6, 9, 10, 11, 15
436         ]);
437 
438         r.push_item(12);
439         r.push_item(13);
440         r.push_item(14);
441         assert_eq!(r.inner.len(), 3);
442         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
443             1, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15
444         ]);
445 
446         r.push_item(2);
447         r.push_item(3);
448         assert_eq!(r.inner.len(), 2);
449         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
450             1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15
451         ]);
452 
453         r.push_item(8);
454         r.push_item(7);
455         assert_eq!(r.inner.len(), 1);
456         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
457             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
458         ]);
459     }
460 
461     #[test]
flatten_rev()462     fn flatten_rev() {
463         let mut r = RangeSet::default();
464         assert_eq!(r.inner.len(), 0);
465 
466         let empty: &[u64] = &[];
467         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &empty);
468 
469         r.insert(4..7);
470         assert_eq!(r.inner.len(), 1);
471         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6]);
472         assert_eq!(&r.flatten().rev().collect::<Vec<u64>>(), &[6, 5, 4]);
473 
474         r.insert(9..12);
475         assert_eq!(r.inner.len(), 2);
476         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[4, 5, 6, 9, 10, 11]);
477         assert_eq!(&r.flatten().rev().collect::<Vec<u64>>(), &[
478             11, 10, 9, 6, 5, 4
479         ]);
480     }
481 
482     #[test]
flatten_one()483     fn flatten_one() {
484         let mut r = RangeSet::default();
485         assert_eq!(r.inner.len(), 0);
486 
487         let empty: &[u64] = &[];
488         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &empty);
489 
490         r.insert(0..1);
491         assert_eq!(r.inner.len(), 1);
492         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[0]);
493         assert_eq!(&r.flatten().rev().collect::<Vec<u64>>(), &[0]);
494     }
495 
496     #[test]
remove_largest()497     fn remove_largest() {
498         let mut r = RangeSet::default();
499 
500         r.insert(3..6);
501         r.insert(9..11);
502         r.insert(13..14);
503         r.insert(16..20);
504         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
505             3, 4, 5, 9, 10, 13, 16, 17, 18, 19
506         ]);
507 
508         r.remove_until(2);
509         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
510             3, 4, 5, 9, 10, 13, 16, 17, 18, 19
511         ]);
512 
513         r.remove_until(4);
514         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
515             5, 9, 10, 13, 16, 17, 18, 19
516         ]);
517 
518         r.remove_until(6);
519         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[
520             9, 10, 13, 16, 17, 18, 19
521         ]);
522 
523         r.remove_until(10);
524         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[13, 16, 17, 18, 19]);
525 
526         r.remove_until(17);
527         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[18, 19]);
528 
529         r.remove_until(18);
530         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &[19]);
531 
532         r.remove_until(20);
533 
534         let empty: &[u64] = &[];
535         assert_eq!(&r.flatten().collect::<Vec<u64>>(), &empty);
536     }
537 
538     #[test]
eq_range()539     fn eq_range() {
540         let mut r = RangeSet::default();
541         assert_ne!(r, 0..0);
542 
543         let expected = 3..20;
544 
545         r.insert(3..6);
546         assert_ne!(r, expected);
547 
548         r.insert(16..20);
549         assert_ne!(r, expected);
550 
551         r.insert(10..11);
552         assert_ne!(r, expected);
553 
554         r.insert(13..14);
555         assert_ne!(r, expected);
556 
557         r.insert(4..17);
558         assert_eq!(r, expected);
559     }
560 
561     #[test]
first_last()562     fn first_last() {
563         let mut r = RangeSet::default();
564         assert_eq!(r.first(), None);
565         assert_eq!(r.last(), None);
566 
567         r.insert(10..11);
568         assert_eq!(r.first(), Some(10));
569         assert_eq!(r.last(), Some(10));
570 
571         r.insert(13..14);
572         assert_eq!(r.first(), Some(10));
573         assert_eq!(r.last(), Some(13));
574 
575         r.insert(3..6);
576         assert_eq!(r.first(), Some(3));
577         assert_eq!(r.last(), Some(13));
578 
579         r.insert(16..20);
580         assert_eq!(r.first(), Some(3));
581         assert_eq!(r.last(), Some(19));
582 
583         r.insert(4..17);
584         assert_eq!(r.first(), Some(3));
585         assert_eq!(r.last(), Some(19));
586     }
587 
588     #[test]
capacity()589     fn capacity() {
590         let mut r = RangeSet::new(3);
591         assert_eq!(r.first(), None);
592         assert_eq!(r.last(), None);
593 
594         r.insert(10..11);
595         assert_eq!(r.first(), Some(10));
596         assert_eq!(r.last(), Some(10));
597 
598         r.insert(13..14);
599         assert_eq!(r.first(), Some(10));
600         assert_eq!(r.last(), Some(13));
601 
602         r.insert(3..6);
603         assert_eq!(r.first(), Some(3));
604         assert_eq!(r.last(), Some(13));
605 
606         r.insert(16..20);
607         assert_eq!(r.first(), Some(10));
608         assert_eq!(r.last(), Some(19));
609 
610         r.insert(4..17);
611         assert_eq!(r.first(), Some(4));
612         assert_eq!(r.last(), Some(19));
613     }
614 }
615