1
2import copy
3import re
4import sys
5from itertools import chain, count, groupby
6
7__all__ = 'intspan spanlist intspanlist TheRest ParseError'.split()
8
9_PY2 = sys.version_info[0] == 2
10if not _PY2:
11    basestring = str
12
13
14# Define regular expressions for spans and spans that may contain
15# star (TheRest) markers
16SPANRE = re.compile(r'^\s*(?P<start>-?\d+)\s*(-\s*(?P<stop>-?\d+))?\s*$')
17SPANRESTAR = re.compile(
18    r'^\s*((?P<star>\*)|(?P<start>-?\d+)\s*(-\s*(?P<stop>-?\d+))?)\s*$')
19
20
21class ParseError(ValueError):
22    """Exception raised if string specification can't be parsed."""
23    pass
24
25
26class Rester(object):
27    """
28    Singleton to represent "the rest of the values."
29    """
30
31    def __repr__(self):
32        return 'TheRest'
33
34    def __str__(self):
35        return '*'
36
37
38TheRest = Rester()
39
40
41def _parse_range(datum):
42
43    """
44    Parser for intspan and intspan list.
45    """
46
47    def parse_chunk(chunk):
48        """
49        Parse each comma-separated chunk. Hyphens (-) can indicate ranges,
50        or negative numbers. Returns a list of specified values. NB Designed
51        to parse correct input correctly. Results of incorrect input are
52        undefined.
53        """
54        m = SPANRESTAR.search(chunk)
55        if m:
56            if m.group('star'):
57                return [TheRest]
58            start = int(m.group('start'))
59            if not m.group('stop'):
60                return [start]
61            stop = int(m.group('stop'))
62            if start > stop:
63                raise ParseError('start value should exceed stop ({0})'.format(chunk))
64            return list(range(start, stop + 1))
65        else:
66            raise ParseError("Can't parse chunk '{0}'".format(chunk))
67
68    if isinstance(datum, basestring):
69        result = []
70        for part in datum.split(','):
71            chunk = part.strip()
72            if chunk:
73                result.extend(parse_chunk(chunk))
74        return result
75
76    return datum if hasattr(datum, '__iter__') else [datum]
77
78
79def spanlist(spec=None):
80    """
81    Given a string specification like the ones given to ``intspan``,
82    return a list of the included items, in the same item given. Thus,
83    ``spanlist("3,1-4")`` yields ``[3, 1, 2, 4]``. Experimental partial
84    implementation of ability to have ordered intspans.
85    """
86    if spec is None or (isinstance(spec, basestring) and spec.strip() == ''):
87        return []
88    rawitems = _parse_range(spec)
89    seen = set()
90    items = []
91    for i in rawitems:
92        if i in seen:
93            continue
94        items.append(i)
95        seen.add(i)
96    return items
97
98
99def _as_range(iterable):
100    """
101    Return a tuple representing the bounds of the range.
102    """
103    l = list(iterable)
104    return (l[0], l[-1])
105
106
107def _as_range_str(iterable):
108    """
109    Return a string representing the range as a string span.
110    """
111    l = list(iterable)
112    if len(l) > 1:
113        return '{0}-{1}'.format(l[0], l[-1])
114    return '{0}'.format(l[0])
115
116
117def _noRestDiff(a, b):
118    """
119    Special difference that, in case difference cannot be computed
120    because of ``TypeError`` (indicating that a ``Rester`` object)
121    has been found), returns a difference indicating the spanned items
122    / group has ended.
123    """
124    try:
125        return a - b
126    except TypeError:
127        return 2  # anything more than 1 signals "the next thing is in
128                  # another span, not this current one"
129
130
131class intspan(set):  # pylint: disable=invalid-name
132    """
133    A set of integers, expressed as an ordered sequence of spans.
134    Because ``intspan('1-3,14,29,92-97')`` is clearer than
135    ``[1, 2, 3, 14, 29, 92, 93, 94, 95, 96, 97]``.
136    """
137
138    def __init__(self, initial=None):
139        """
140        Construct a new ``intspan``.
141
142        :param iterable|str initial: Optional initial list of items.
143        :returns: the intspan
144        :rtype: intspan
145        """
146        super(intspan, self).__init__()
147        if initial:
148            self.update(initial)
149
150    def copy(self):
151        """
152        Return a new set with the same members.
153        """
154        return copy.copy(self)
155
156    def update(self, items):
157        """
158        Add multiple items.
159
160        :param iterable|str items: Items to add. May be an intspan-style string.
161        """
162        super(intspan, self).update(_parse_range(items))
163        return self
164
165    def intersection_update(self, items):
166        super(intspan, self).intersection_update(_parse_range(items))
167        return self
168
169    def difference_update(self, items):
170        super(intspan, self).difference_update(_parse_range(items))
171        return self
172
173    def symmetric_difference_update(self, items):
174        super(intspan, self).symmetric_difference_update(
175            _parse_range(items))
176        return self
177
178    def discard(self, items):
179        """
180        Discard items.
181
182        :param iterable|str items: Items to remove. May be an intspan-style string.
183        """
184        for item in _parse_range(items):
185            super(intspan, self).discard(item)
186
187    def remove(self, items):
188        for item in _parse_range(items):
189            super(intspan, self).remove(item)
190
191    def add(self, items):
192        """
193        Add items.
194
195        :param iterable|str items: Items to add. May be an intspan-style string.
196        """
197        for item in _parse_range(items):
198            super(intspan, self).add(item)
199
200    def issubset(self, items):
201        return super(intspan, self).issubset(_parse_range(items))
202
203    def issuperset(self, items):
204        return super(intspan, self).issuperset(_parse_range(items))
205
206    def union(self, items):
207        return intspan(super(intspan, self).union(_parse_range(items)))
208
209    def intersection(self, items):
210        return intspan(super(intspan, self).intersection(_parse_range(items)))
211
212    def difference(self, items):
213        return intspan(super(intspan, self).difference(_parse_range(items)))
214
215    def symmetric_difference(self, items):
216        return intspan(super(intspan, self).symmetric_difference(_parse_range(items)))
217
218    __le__   = issubset
219    __ge__   = issuperset
220    __or__   = union
221    __and__  = intersection
222    __sub__  = difference
223    __xor__  = symmetric_difference
224    __ior__  = update
225    __iand__ = intersection_update
226    __isub__ = difference_update
227    __ixor__ = symmetric_difference_update
228
229    def __eq__(self, items):
230        return super(intspan, self).__eq__(_parse_range(items))
231
232    def __lt__(self, items):
233        return super(intspan, self).__lt__(_parse_range(items))
234
235    def __gt__(self, items):
236        return super(intspan, self).__gt__(_parse_range(items))
237
238    def __iter__(self):
239        """
240        Iterate in ascending order.
241        """
242        return iter(sorted(super(intspan, self).__iter__()))
243
244    def pop(self):
245        """
246        Remove and return an arbitrary element; raises KeyError if empty.
247
248        :returns: Arbitrary member of the set (which is removed)
249        :rtype: int
250        :raises KeyError: If the set is empty.
251        """
252        if self:
253            min_item = min(self)
254            self.discard(min_item)
255            return min_item
256        else:
257            raise KeyError('pop from an empty set')
258
259        # This method added only for PyPy, which otherwise would get the wrong
260        # answer (unordered).
261
262
263    def universe(self, low=None, high=None):
264        """
265        Return the "universe" or "covering set" of the given intspan--that
266        is, all of the integers between its minimum and missing values.
267        Optionally allows the bounds of the universe set to be manually specified.
268
269        :param int low: Low bound of universe.
270        :param int high: High bound of universe.
271        :returns: the universe or covering set
272        :rtype: intspan
273        """
274        if not self and low is None and high is None:
275            return intspan()
276        low = low if low is not None else min(self)
277        high = high if high is not None else max(self)
278        universe = self.__class__.from_range(low, high)
279        return universe
280
281    def complement(self, low=None, high=None):
282        """
283        Return the complement of the given intspan--that is, all of the
284        'missing' elements between its minimum and missing values.
285        Optionally allows the universe set to be manually specified.
286
287        :param int low: Low bound of universe to complement against.
288        :param int high: High bound of universe to complement against.
289        :returns: the complement set
290        :rtype: intspan
291        :raises ValueError: if the set is empty (thus the compelement is infinite)
292        """
293        if not self:
294            raise ValueError('cannot represent infinite set')
295        return self.universe(low, high) - self
296
297    @classmethod
298    def from_range(cls, low, high):
299        """
300        Construct an intspan from the low value to the high value,
301        inclusive. I.e., closed range, not the more typical Python
302        half-open range.
303
304        :param int low: Low bound of set to construct.
305        :param int high: High bound of set to construct.
306        :returns: New intspan low-high.
307        :rtype: intspan
308        """
309        return cls(range(low, high + 1))
310
311    @classmethod
312    def from_ranges(cls, ranges):
313        """
314        Construct an intspan from a sequence of (low, high) value
315        sequences (lists or tuples, say). Note that these values are
316        inclusive, closed ranges, not the more typical Python
317        half-open ranges.
318
319        :param list ranges: List of closed/inclusive ranges, each a tuple.
320        :returns: intspan combining the ranges
321        :rtype: intspan
322        """
323        return cls(chain(*(range(r[0], r[1] + 1) for r in ranges)))
324
325    def __repr__(self):
326        """
327        Return the representation.
328        """
329        clsname = self.__class__.__name__
330        return '{0}({1!r})'.format(clsname, self.__str__())
331
332    def __str__(self):
333        """
334        Return the stringification.
335        """
336        items = sorted(self)
337        gk = lambda n, c=count(): n - next(c)
338        return ','.join(_as_range_str(g) for _, g in groupby(items, key=gk))
339
340    def ranges(self):
341        """
342        Return a list of the set's contiguous (inclusive) ranges.
343
344        :returns: List of all contained ranges.
345        :rtype: list
346        """
347        items = sorted(self)
348        gk = lambda n, c=count(): n - next(c)
349        return [_as_range(g) for _, g in groupby(items, key=gk)]
350
351    # see Jeff Mercado's answer to http://codereview.stackexchange.com/questions/5196/grouping-consecutive-numbers-into-ranges-in-python-3-2
352    # see also: http://stackoverflow.com/questions/2927213/python-finding-n-consecutive-numbers-in-a-list
353
354
355# It might be interesting to have a metaclass factory that could create
356# spansets of things other than integers. For example, enumerateds defined
357# by giving a universe of possible options. Or characters. The Ranger
358# package seems to do some of this http://pythonhosted.org/ranger/
359
360
361class intspanlist(list):  # pylint: disable=invalid-name
362    """
363    An ordered version of ``intspan``. Is to ``list`` what ``intspan``
364    is to ``set``, except that it is somewhat set-like, in that items
365    are not intended to be repeated. Works fine as an immutable
366    data structure. Still some issues if one mutates an instance. Not
367    terrible problems, but the set-like nature where there is only
368    one entry for each included integer may be broken.
369    """
370
371    def __init__(self, initial=None, universe=None):
372        """
373        Construct a new ``intspanlist``
374
375        :param iterable|str initial: Optional initial list of items.
376        :returns: the intspanlist
377        :rtype: intspanlist
378        """
379        super(intspanlist, self).__init__()
380        if initial:
381            self.extend(initial)
382        if universe is not None:
383            try:
384                restIndex = self.index(TheRest)
385                remaining = sorted(intspan(universe) - set(self))
386                self[restIndex + 1:restIndex + 1] = remaining  # splice
387                self.pop(restIndex)
388            except ValueError:
389                pass
390
391    def therest_update(self, universe, inplace=True):
392        """
393        If the receiving ``intspanlist`` contains a ``TheRest`` marker,
394        replace it with the contents of the universe. Generally done
395        *in situ*, but if value of ``inplace`` kwarg false, returns
396        an edited copy.
397        """
398        toedit = self if inplace else self.copy()
399        try:
400            restIndex = toedit.index(TheRest)
401            remaining = sorted(intspan(universe) - set(toedit))
402            toedit[restIndex + 1:restIndex + 1] = remaining  # splice
403            toedit.pop(restIndex)
404        except ValueError:
405            pass
406        return toedit
407
408    def copy(self):
409        """
410        Return a copy of the intspanlist.
411        """
412        return copy.copy(self)
413
414    def append(self, item):
415        """
416        Add to the end of the intspanlist
417
418        :param int item: Item to add
419        """
420        self.extend(spanlist(item))
421
422    def extend(self, items):
423        """
424        Add a collection to the end of the intspanlist
425
426        :param iterable items: integers to add
427        """
428        seen = set(self)
429        for newitem in spanlist(items):
430            if newitem in seen:
431                continue
432            super(intspanlist, self).append(newitem)
433
434    def __eq__(self, items):
435        return super(intspanlist, self).__eq__(spanlist(items))
436
437    def __lt__(self, items):
438        return super(intspanlist, self).__lt__(spanlist(items))
439
440    def __gt__(self, items):
441        return super(intspanlist, self).__gt__(spanlist(items))
442
443    def __add__(self, other):
444        return intspanlist(list(self) + list(other))
445
446    def __sub__(self, other):
447        result = self[:]
448        for o in other:
449            try:
450                result.remove(o)
451            except ValueError:
452                pass
453        return result
454
455    def __iadd__(self, other):
456        self.extend(other)
457        return self
458
459    def __isub__(self, other):
460        for o in other:
461            try:
462                self.remove(o)
463            except ValueError:
464                pass
465        return self
466
467    def __radd__(self, other):
468        return intspanlist(list(other) + list(self))
469
470    def __rsub__(self, other):
471        raise NotImplementedError("operation doesn't make sense")
472
473    def complement(self, low=None, high=None):
474        """
475        Return the complement of the given intspanlist--that is, all of the
476        'missing' elements between its minimum and missing values.
477        Optionally allows the universe set to be manually specified.
478
479        :param int low: Low bound of universe to complement against.
480        :param int high: High bound of universe to complement against.
481        :returns: the complement set
482        :rtype: intspanlist
483        :raises ValueError: if the set is empty (thus the compelement is infinite)
484        """
485        cls = self.__class__
486        if not self:
487            raise ValueError('cannot represent infinite set')
488        low = low if low is not None else min(self)
489        high = high if high is not None else max(self)
490        universe = cls.from_range(low, high)
491        result = []
492        contained = set(self)
493        for x in universe:
494            if x not in contained:
495                result.append(x)
496        return cls(result)
497
498    @classmethod
499    def from_range(cls, low, high):
500        """
501        Construct an intspanlist from the low value to the high value,
502        inclusive. I.e., closed range, not the more typical Python
503        half-open range.
504
505        :param int low: Low bound.
506        :param int high: High bound.
507        :returns: New intspanlist low-high.
508        :rtype: intspanlist
509        """
510        return cls(range(low, high + 1))
511
512    @classmethod
513    def from_ranges(cls, ranges):
514        """
515        Construct an intspanlist from a sequence of (low, high) value
516        sequences (lists or tuples, say). Note that these values are
517        inclusive, closed ranges, not the more typical Python
518        half-open ranges.
519
520
521        :param list ranges: List of closed/inclusive ranges, each a tuple.
522        :returns: intspanlist combining the ranges
523        :rtype: intspanlist
524        """
525        return cls(chain(*(range(r[0], r[1] + 1) for r in ranges)))
526
527    def __repr__(self):
528        """
529        Return the representation.
530        """
531        clsname = self.__class__.__name__
532        return '{0}({1!r})'.format(clsname, self.__str__())
533
534    def __str__(self):
535        """
536        Return the stringification.
537        """
538        gk = lambda n, c=count(): _noRestDiff(n, next(c))
539        return ','.join(_as_range_str(g) for _, g in groupby(self, key=gk))
540
541    def ranges(self):
542        """
543        Return a list of the set's contiguous (inclusive) ranges.
544        """
545        gk = lambda n, c=count(): _noRestDiff(n, next(c))
546        return [_as_range(g) for _, g in groupby(self, key=gk)]
547