1 2import copy 3import re 4import sys 5from itertools import chain, count, groupby 6 7__all__ = 'intspan spanlist intspanlist TheRest ParseError'.split() 8 9_PY2 = sys.version_info[0] == 2 10if not _PY2: 11 basestring = str 12 13 14# Define regular expressions for spans and spans that may contain 15# star (TheRest) markers 16SPANRE = re.compile(r'^\s*(?P<start>-?\d+)\s*(-\s*(?P<stop>-?\d+))?\s*$') 17SPANRESTAR = re.compile( 18 r'^\s*((?P<star>\*)|(?P<start>-?\d+)\s*(-\s*(?P<stop>-?\d+))?)\s*$') 19 20 21class ParseError(ValueError): 22 """Exception raised if string specification can't be parsed.""" 23 pass 24 25 26class Rester(object): 27 """ 28 Singleton to represent "the rest of the values." 29 """ 30 31 def __repr__(self): 32 return 'TheRest' 33 34 def __str__(self): 35 return '*' 36 37 38TheRest = Rester() 39 40 41def _parse_range(datum): 42 43 """ 44 Parser for intspan and intspan list. 45 """ 46 47 def parse_chunk(chunk): 48 """ 49 Parse each comma-separated chunk. Hyphens (-) can indicate ranges, 50 or negative numbers. Returns a list of specified values. NB Designed 51 to parse correct input correctly. Results of incorrect input are 52 undefined. 53 """ 54 m = SPANRESTAR.search(chunk) 55 if m: 56 if m.group('star'): 57 return [TheRest] 58 start = int(m.group('start')) 59 if not m.group('stop'): 60 return [start] 61 stop = int(m.group('stop')) 62 if start > stop: 63 raise ParseError('start value should exceed stop ({0})'.format(chunk)) 64 return list(range(start, stop + 1)) 65 else: 66 raise ParseError("Can't parse chunk '{0}'".format(chunk)) 67 68 if isinstance(datum, basestring): 69 result = [] 70 for part in datum.split(','): 71 chunk = part.strip() 72 if chunk: 73 result.extend(parse_chunk(chunk)) 74 return result 75 76 return datum if hasattr(datum, '__iter__') else [datum] 77 78 79def spanlist(spec=None): 80 """ 81 Given a string specification like the ones given to ``intspan``, 82 return a list of the included items, in the same item given. Thus, 83 ``spanlist("3,1-4")`` yields ``[3, 1, 2, 4]``. Experimental partial 84 implementation of ability to have ordered intspans. 85 """ 86 if spec is None or (isinstance(spec, basestring) and spec.strip() == ''): 87 return [] 88 rawitems = _parse_range(spec) 89 seen = set() 90 items = [] 91 for i in rawitems: 92 if i in seen: 93 continue 94 items.append(i) 95 seen.add(i) 96 return items 97 98 99def _as_range(iterable): 100 """ 101 Return a tuple representing the bounds of the range. 102 """ 103 l = list(iterable) 104 return (l[0], l[-1]) 105 106 107def _as_range_str(iterable): 108 """ 109 Return a string representing the range as a string span. 110 """ 111 l = list(iterable) 112 if len(l) > 1: 113 return '{0}-{1}'.format(l[0], l[-1]) 114 return '{0}'.format(l[0]) 115 116 117def _noRestDiff(a, b): 118 """ 119 Special difference that, in case difference cannot be computed 120 because of ``TypeError`` (indicating that a ``Rester`` object) 121 has been found), returns a difference indicating the spanned items 122 / group has ended. 123 """ 124 try: 125 return a - b 126 except TypeError: 127 return 2 # anything more than 1 signals "the next thing is in 128 # another span, not this current one" 129 130 131class intspan(set): # pylint: disable=invalid-name 132 """ 133 A set of integers, expressed as an ordered sequence of spans. 134 Because ``intspan('1-3,14,29,92-97')`` is clearer than 135 ``[1, 2, 3, 14, 29, 92, 93, 94, 95, 96, 97]``. 136 """ 137 138 def __init__(self, initial=None): 139 """ 140 Construct a new ``intspan``. 141 142 :param iterable|str initial: Optional initial list of items. 143 :returns: the intspan 144 :rtype: intspan 145 """ 146 super(intspan, self).__init__() 147 if initial: 148 self.update(initial) 149 150 def copy(self): 151 """ 152 Return a new set with the same members. 153 """ 154 return copy.copy(self) 155 156 def update(self, items): 157 """ 158 Add multiple items. 159 160 :param iterable|str items: Items to add. May be an intspan-style string. 161 """ 162 super(intspan, self).update(_parse_range(items)) 163 return self 164 165 def intersection_update(self, items): 166 super(intspan, self).intersection_update(_parse_range(items)) 167 return self 168 169 def difference_update(self, items): 170 super(intspan, self).difference_update(_parse_range(items)) 171 return self 172 173 def symmetric_difference_update(self, items): 174 super(intspan, self).symmetric_difference_update( 175 _parse_range(items)) 176 return self 177 178 def discard(self, items): 179 """ 180 Discard items. 181 182 :param iterable|str items: Items to remove. May be an intspan-style string. 183 """ 184 for item in _parse_range(items): 185 super(intspan, self).discard(item) 186 187 def remove(self, items): 188 for item in _parse_range(items): 189 super(intspan, self).remove(item) 190 191 def add(self, items): 192 """ 193 Add items. 194 195 :param iterable|str items: Items to add. May be an intspan-style string. 196 """ 197 for item in _parse_range(items): 198 super(intspan, self).add(item) 199 200 def issubset(self, items): 201 return super(intspan, self).issubset(_parse_range(items)) 202 203 def issuperset(self, items): 204 return super(intspan, self).issuperset(_parse_range(items)) 205 206 def union(self, items): 207 return intspan(super(intspan, self).union(_parse_range(items))) 208 209 def intersection(self, items): 210 return intspan(super(intspan, self).intersection(_parse_range(items))) 211 212 def difference(self, items): 213 return intspan(super(intspan, self).difference(_parse_range(items))) 214 215 def symmetric_difference(self, items): 216 return intspan(super(intspan, self).symmetric_difference(_parse_range(items))) 217 218 __le__ = issubset 219 __ge__ = issuperset 220 __or__ = union 221 __and__ = intersection 222 __sub__ = difference 223 __xor__ = symmetric_difference 224 __ior__ = update 225 __iand__ = intersection_update 226 __isub__ = difference_update 227 __ixor__ = symmetric_difference_update 228 229 def __eq__(self, items): 230 return super(intspan, self).__eq__(_parse_range(items)) 231 232 def __lt__(self, items): 233 return super(intspan, self).__lt__(_parse_range(items)) 234 235 def __gt__(self, items): 236 return super(intspan, self).__gt__(_parse_range(items)) 237 238 def __iter__(self): 239 """ 240 Iterate in ascending order. 241 """ 242 return iter(sorted(super(intspan, self).__iter__())) 243 244 def pop(self): 245 """ 246 Remove and return an arbitrary element; raises KeyError if empty. 247 248 :returns: Arbitrary member of the set (which is removed) 249 :rtype: int 250 :raises KeyError: If the set is empty. 251 """ 252 if self: 253 min_item = min(self) 254 self.discard(min_item) 255 return min_item 256 else: 257 raise KeyError('pop from an empty set') 258 259 # This method added only for PyPy, which otherwise would get the wrong 260 # answer (unordered). 261 262 263 def universe(self, low=None, high=None): 264 """ 265 Return the "universe" or "covering set" of the given intspan--that 266 is, all of the integers between its minimum and missing values. 267 Optionally allows the bounds of the universe set to be manually specified. 268 269 :param int low: Low bound of universe. 270 :param int high: High bound of universe. 271 :returns: the universe or covering set 272 :rtype: intspan 273 """ 274 if not self and low is None and high is None: 275 return intspan() 276 low = low if low is not None else min(self) 277 high = high if high is not None else max(self) 278 universe = self.__class__.from_range(low, high) 279 return universe 280 281 def complement(self, low=None, high=None): 282 """ 283 Return the complement of the given intspan--that is, all of the 284 'missing' elements between its minimum and missing values. 285 Optionally allows the universe set to be manually specified. 286 287 :param int low: Low bound of universe to complement against. 288 :param int high: High bound of universe to complement against. 289 :returns: the complement set 290 :rtype: intspan 291 :raises ValueError: if the set is empty (thus the compelement is infinite) 292 """ 293 if not self: 294 raise ValueError('cannot represent infinite set') 295 return self.universe(low, high) - self 296 297 @classmethod 298 def from_range(cls, low, high): 299 """ 300 Construct an intspan from the low value to the high value, 301 inclusive. I.e., closed range, not the more typical Python 302 half-open range. 303 304 :param int low: Low bound of set to construct. 305 :param int high: High bound of set to construct. 306 :returns: New intspan low-high. 307 :rtype: intspan 308 """ 309 return cls(range(low, high + 1)) 310 311 @classmethod 312 def from_ranges(cls, ranges): 313 """ 314 Construct an intspan from a sequence of (low, high) value 315 sequences (lists or tuples, say). Note that these values are 316 inclusive, closed ranges, not the more typical Python 317 half-open ranges. 318 319 :param list ranges: List of closed/inclusive ranges, each a tuple. 320 :returns: intspan combining the ranges 321 :rtype: intspan 322 """ 323 return cls(chain(*(range(r[0], r[1] + 1) for r in ranges))) 324 325 def __repr__(self): 326 """ 327 Return the representation. 328 """ 329 clsname = self.__class__.__name__ 330 return '{0}({1!r})'.format(clsname, self.__str__()) 331 332 def __str__(self): 333 """ 334 Return the stringification. 335 """ 336 items = sorted(self) 337 gk = lambda n, c=count(): n - next(c) 338 return ','.join(_as_range_str(g) for _, g in groupby(items, key=gk)) 339 340 def ranges(self): 341 """ 342 Return a list of the set's contiguous (inclusive) ranges. 343 344 :returns: List of all contained ranges. 345 :rtype: list 346 """ 347 items = sorted(self) 348 gk = lambda n, c=count(): n - next(c) 349 return [_as_range(g) for _, g in groupby(items, key=gk)] 350 351 # see Jeff Mercado's answer to http://codereview.stackexchange.com/questions/5196/grouping-consecutive-numbers-into-ranges-in-python-3-2 352 # see also: http://stackoverflow.com/questions/2927213/python-finding-n-consecutive-numbers-in-a-list 353 354 355# It might be interesting to have a metaclass factory that could create 356# spansets of things other than integers. For example, enumerateds defined 357# by giving a universe of possible options. Or characters. The Ranger 358# package seems to do some of this http://pythonhosted.org/ranger/ 359 360 361class intspanlist(list): # pylint: disable=invalid-name 362 """ 363 An ordered version of ``intspan``. Is to ``list`` what ``intspan`` 364 is to ``set``, except that it is somewhat set-like, in that items 365 are not intended to be repeated. Works fine as an immutable 366 data structure. Still some issues if one mutates an instance. Not 367 terrible problems, but the set-like nature where there is only 368 one entry for each included integer may be broken. 369 """ 370 371 def __init__(self, initial=None, universe=None): 372 """ 373 Construct a new ``intspanlist`` 374 375 :param iterable|str initial: Optional initial list of items. 376 :returns: the intspanlist 377 :rtype: intspanlist 378 """ 379 super(intspanlist, self).__init__() 380 if initial: 381 self.extend(initial) 382 if universe is not None: 383 try: 384 restIndex = self.index(TheRest) 385 remaining = sorted(intspan(universe) - set(self)) 386 self[restIndex + 1:restIndex + 1] = remaining # splice 387 self.pop(restIndex) 388 except ValueError: 389 pass 390 391 def therest_update(self, universe, inplace=True): 392 """ 393 If the receiving ``intspanlist`` contains a ``TheRest`` marker, 394 replace it with the contents of the universe. Generally done 395 *in situ*, but if value of ``inplace`` kwarg false, returns 396 an edited copy. 397 """ 398 toedit = self if inplace else self.copy() 399 try: 400 restIndex = toedit.index(TheRest) 401 remaining = sorted(intspan(universe) - set(toedit)) 402 toedit[restIndex + 1:restIndex + 1] = remaining # splice 403 toedit.pop(restIndex) 404 except ValueError: 405 pass 406 return toedit 407 408 def copy(self): 409 """ 410 Return a copy of the intspanlist. 411 """ 412 return copy.copy(self) 413 414 def append(self, item): 415 """ 416 Add to the end of the intspanlist 417 418 :param int item: Item to add 419 """ 420 self.extend(spanlist(item)) 421 422 def extend(self, items): 423 """ 424 Add a collection to the end of the intspanlist 425 426 :param iterable items: integers to add 427 """ 428 seen = set(self) 429 for newitem in spanlist(items): 430 if newitem in seen: 431 continue 432 super(intspanlist, self).append(newitem) 433 434 def __eq__(self, items): 435 return super(intspanlist, self).__eq__(spanlist(items)) 436 437 def __lt__(self, items): 438 return super(intspanlist, self).__lt__(spanlist(items)) 439 440 def __gt__(self, items): 441 return super(intspanlist, self).__gt__(spanlist(items)) 442 443 def __add__(self, other): 444 return intspanlist(list(self) + list(other)) 445 446 def __sub__(self, other): 447 result = self[:] 448 for o in other: 449 try: 450 result.remove(o) 451 except ValueError: 452 pass 453 return result 454 455 def __iadd__(self, other): 456 self.extend(other) 457 return self 458 459 def __isub__(self, other): 460 for o in other: 461 try: 462 self.remove(o) 463 except ValueError: 464 pass 465 return self 466 467 def __radd__(self, other): 468 return intspanlist(list(other) + list(self)) 469 470 def __rsub__(self, other): 471 raise NotImplementedError("operation doesn't make sense") 472 473 def complement(self, low=None, high=None): 474 """ 475 Return the complement of the given intspanlist--that is, all of the 476 'missing' elements between its minimum and missing values. 477 Optionally allows the universe set to be manually specified. 478 479 :param int low: Low bound of universe to complement against. 480 :param int high: High bound of universe to complement against. 481 :returns: the complement set 482 :rtype: intspanlist 483 :raises ValueError: if the set is empty (thus the compelement is infinite) 484 """ 485 cls = self.__class__ 486 if not self: 487 raise ValueError('cannot represent infinite set') 488 low = low if low is not None else min(self) 489 high = high if high is not None else max(self) 490 universe = cls.from_range(low, high) 491 result = [] 492 contained = set(self) 493 for x in universe: 494 if x not in contained: 495 result.append(x) 496 return cls(result) 497 498 @classmethod 499 def from_range(cls, low, high): 500 """ 501 Construct an intspanlist from the low value to the high value, 502 inclusive. I.e., closed range, not the more typical Python 503 half-open range. 504 505 :param int low: Low bound. 506 :param int high: High bound. 507 :returns: New intspanlist low-high. 508 :rtype: intspanlist 509 """ 510 return cls(range(low, high + 1)) 511 512 @classmethod 513 def from_ranges(cls, ranges): 514 """ 515 Construct an intspanlist from a sequence of (low, high) value 516 sequences (lists or tuples, say). Note that these values are 517 inclusive, closed ranges, not the more typical Python 518 half-open ranges. 519 520 521 :param list ranges: List of closed/inclusive ranges, each a tuple. 522 :returns: intspanlist combining the ranges 523 :rtype: intspanlist 524 """ 525 return cls(chain(*(range(r[0], r[1] + 1) for r in ranges))) 526 527 def __repr__(self): 528 """ 529 Return the representation. 530 """ 531 clsname = self.__class__.__name__ 532 return '{0}({1!r})'.format(clsname, self.__str__()) 533 534 def __str__(self): 535 """ 536 Return the stringification. 537 """ 538 gk = lambda n, c=count(): _noRestDiff(n, next(c)) 539 return ','.join(_as_range_str(g) for _, g in groupby(self, key=gk)) 540 541 def ranges(self): 542 """ 543 Return a list of the set's contiguous (inclusive) ranges. 544 """ 545 gk = lambda n, c=count(): _noRestDiff(n, next(c)) 546 return [_as_range(g) for _, g in groupby(self, key=gk)] 547