1import warnings 2 3from collections import Counter, defaultdict, deque, abc 4from collections.abc import Sequence 5from concurrent.futures import ThreadPoolExecutor 6from functools import partial, reduce, wraps 7from heapq import merge, heapify, heapreplace, heappop 8from itertools import ( 9 chain, 10 compress, 11 count, 12 cycle, 13 dropwhile, 14 groupby, 15 islice, 16 repeat, 17 starmap, 18 takewhile, 19 tee, 20 zip_longest, 21) 22from math import exp, factorial, floor, log 23from queue import Empty, Queue 24from random import random, randrange, uniform 25from operator import itemgetter, mul, sub, gt, lt 26from sys import hexversion, maxsize 27from time import monotonic 28 29from .recipes import ( 30 consume, 31 flatten, 32 pairwise, 33 powerset, 34 take, 35 unique_everseen, 36) 37 38__all__ = [ 39 'AbortThread', 40 'adjacent', 41 'always_iterable', 42 'always_reversible', 43 'bucket', 44 'callback_iter', 45 'chunked', 46 'circular_shifts', 47 'collapse', 48 'collate', 49 'consecutive_groups', 50 'consumer', 51 'countable', 52 'count_cycle', 53 'mark_ends', 54 'difference', 55 'distinct_combinations', 56 'distinct_permutations', 57 'distribute', 58 'divide', 59 'exactly_n', 60 'filter_except', 61 'first', 62 'groupby_transform', 63 'ilen', 64 'interleave_longest', 65 'interleave', 66 'intersperse', 67 'islice_extended', 68 'iterate', 69 'ichunked', 70 'is_sorted', 71 'last', 72 'locate', 73 'lstrip', 74 'make_decorator', 75 'map_except', 76 'map_reduce', 77 'nth_or_last', 78 'nth_permutation', 79 'nth_product', 80 'numeric_range', 81 'one', 82 'only', 83 'padded', 84 'partitions', 85 'set_partitions', 86 'peekable', 87 'repeat_last', 88 'replace', 89 'rlocate', 90 'rstrip', 91 'run_length', 92 'sample', 93 'seekable', 94 'SequenceView', 95 'side_effect', 96 'sliced', 97 'sort_together', 98 'split_at', 99 'split_after', 100 'split_before', 101 'split_when', 102 'split_into', 103 'spy', 104 'stagger', 105 'strip', 106 'substrings', 107 'substrings_indexes', 108 'time_limited', 109 'unique_to_each', 110 'unzip', 111 'windowed', 112 'with_iter', 113 'UnequalIterablesError', 114 'zip_equal', 115 'zip_offset', 116 'windowed_complete', 117 'all_unique', 118 'value_chain', 119 'product_index', 120 'combination_index', 121 'permutation_index', 122] 123 124_marker = object() 125 126 127def chunked(iterable, n, strict=False): 128 """Break *iterable* into lists of length *n*: 129 130 >>> list(chunked([1, 2, 3, 4, 5, 6], 3)) 131 [[1, 2, 3], [4, 5, 6]] 132 133 By the default, the last yielded list will have fewer than *n* elements 134 if the length of *iterable* is not divisible by *n*: 135 136 >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3)) 137 [[1, 2, 3], [4, 5, 6], [7, 8]] 138 139 To use a fill-in value instead, see the :func:`grouper` recipe. 140 141 If the length of *iterable* is not divisible by *n* and *strict* is 142 ``True``, then ``ValueError`` will be raised before the last 143 list is yielded. 144 145 """ 146 iterator = iter(partial(take, n, iter(iterable)), []) 147 if strict: 148 149 def ret(): 150 for chunk in iterator: 151 if len(chunk) != n: 152 raise ValueError('iterable is not divisible by n.') 153 yield chunk 154 155 return iter(ret()) 156 else: 157 return iterator 158 159 160def first(iterable, default=_marker): 161 """Return the first item of *iterable*, or *default* if *iterable* is 162 empty. 163 164 >>> first([0, 1, 2, 3]) 165 0 166 >>> first([], 'some default') 167 'some default' 168 169 If *default* is not provided and there are no items in the iterable, 170 raise ``ValueError``. 171 172 :func:`first` is useful when you have a generator of expensive-to-retrieve 173 values and want any arbitrary one. It is marginally shorter than 174 ``next(iter(iterable), default)``. 175 176 """ 177 try: 178 return next(iter(iterable)) 179 except StopIteration as e: 180 if default is _marker: 181 raise ValueError( 182 'first() was called on an empty iterable, and no ' 183 'default value was provided.' 184 ) from e 185 return default 186 187 188def last(iterable, default=_marker): 189 """Return the last item of *iterable*, or *default* if *iterable* is 190 empty. 191 192 >>> last([0, 1, 2, 3]) 193 3 194 >>> last([], 'some default') 195 'some default' 196 197 If *default* is not provided and there are no items in the iterable, 198 raise ``ValueError``. 199 """ 200 try: 201 if isinstance(iterable, Sequence): 202 return iterable[-1] 203 # Work around https://bugs.python.org/issue38525 204 elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0): 205 return next(reversed(iterable)) 206 else: 207 return deque(iterable, maxlen=1)[-1] 208 except (IndexError, TypeError, StopIteration): 209 if default is _marker: 210 raise ValueError( 211 'last() was called on an empty iterable, and no default was ' 212 'provided.' 213 ) 214 return default 215 216 217def nth_or_last(iterable, n, default=_marker): 218 """Return the nth or the last item of *iterable*, 219 or *default* if *iterable* is empty. 220 221 >>> nth_or_last([0, 1, 2, 3], 2) 222 2 223 >>> nth_or_last([0, 1], 2) 224 1 225 >>> nth_or_last([], 0, 'some default') 226 'some default' 227 228 If *default* is not provided and there are no items in the iterable, 229 raise ``ValueError``. 230 """ 231 return last(islice(iterable, n + 1), default=default) 232 233 234class peekable: 235 """Wrap an iterator to allow lookahead and prepending elements. 236 237 Call :meth:`peek` on the result to get the value that will be returned 238 by :func:`next`. This won't advance the iterator: 239 240 >>> p = peekable(['a', 'b']) 241 >>> p.peek() 242 'a' 243 >>> next(p) 244 'a' 245 246 Pass :meth:`peek` a default value to return that instead of raising 247 ``StopIteration`` when the iterator is exhausted. 248 249 >>> p = peekable([]) 250 >>> p.peek('hi') 251 'hi' 252 253 peekables also offer a :meth:`prepend` method, which "inserts" items 254 at the head of the iterable: 255 256 >>> p = peekable([1, 2, 3]) 257 >>> p.prepend(10, 11, 12) 258 >>> next(p) 259 10 260 >>> p.peek() 261 11 262 >>> list(p) 263 [11, 12, 1, 2, 3] 264 265 peekables can be indexed. Index 0 is the item that will be returned by 266 :func:`next`, index 1 is the item after that, and so on: 267 The values up to the given index will be cached. 268 269 >>> p = peekable(['a', 'b', 'c', 'd']) 270 >>> p[0] 271 'a' 272 >>> p[1] 273 'b' 274 >>> next(p) 275 'a' 276 277 Negative indexes are supported, but be aware that they will cache the 278 remaining items in the source iterator, which may require significant 279 storage. 280 281 To check whether a peekable is exhausted, check its truth value: 282 283 >>> p = peekable(['a', 'b']) 284 >>> if p: # peekable has items 285 ... list(p) 286 ['a', 'b'] 287 >>> if not p: # peekable is exhausted 288 ... list(p) 289 [] 290 291 """ 292 293 def __init__(self, iterable): 294 self._it = iter(iterable) 295 self._cache = deque() 296 297 def __iter__(self): 298 return self 299 300 def __bool__(self): 301 try: 302 self.peek() 303 except StopIteration: 304 return False 305 return True 306 307 def peek(self, default=_marker): 308 """Return the item that will be next returned from ``next()``. 309 310 Return ``default`` if there are no items left. If ``default`` is not 311 provided, raise ``StopIteration``. 312 313 """ 314 if not self._cache: 315 try: 316 self._cache.append(next(self._it)) 317 except StopIteration: 318 if default is _marker: 319 raise 320 return default 321 return self._cache[0] 322 323 def prepend(self, *items): 324 """Stack up items to be the next ones returned from ``next()`` or 325 ``self.peek()``. The items will be returned in 326 first in, first out order:: 327 328 >>> p = peekable([1, 2, 3]) 329 >>> p.prepend(10, 11, 12) 330 >>> next(p) 331 10 332 >>> list(p) 333 [11, 12, 1, 2, 3] 334 335 It is possible, by prepending items, to "resurrect" a peekable that 336 previously raised ``StopIteration``. 337 338 >>> p = peekable([]) 339 >>> next(p) 340 Traceback (most recent call last): 341 ... 342 StopIteration 343 >>> p.prepend(1) 344 >>> next(p) 345 1 346 >>> next(p) 347 Traceback (most recent call last): 348 ... 349 StopIteration 350 351 """ 352 self._cache.extendleft(reversed(items)) 353 354 def __next__(self): 355 if self._cache: 356 return self._cache.popleft() 357 358 return next(self._it) 359 360 def _get_slice(self, index): 361 # Normalize the slice's arguments 362 step = 1 if (index.step is None) else index.step 363 if step > 0: 364 start = 0 if (index.start is None) else index.start 365 stop = maxsize if (index.stop is None) else index.stop 366 elif step < 0: 367 start = -1 if (index.start is None) else index.start 368 stop = (-maxsize - 1) if (index.stop is None) else index.stop 369 else: 370 raise ValueError('slice step cannot be zero') 371 372 # If either the start or stop index is negative, we'll need to cache 373 # the rest of the iterable in order to slice from the right side. 374 if (start < 0) or (stop < 0): 375 self._cache.extend(self._it) 376 # Otherwise we'll need to find the rightmost index and cache to that 377 # point. 378 else: 379 n = min(max(start, stop) + 1, maxsize) 380 cache_len = len(self._cache) 381 if n >= cache_len: 382 self._cache.extend(islice(self._it, n - cache_len)) 383 384 return list(self._cache)[index] 385 386 def __getitem__(self, index): 387 if isinstance(index, slice): 388 return self._get_slice(index) 389 390 cache_len = len(self._cache) 391 if index < 0: 392 self._cache.extend(self._it) 393 elif index >= cache_len: 394 self._cache.extend(islice(self._it, index + 1 - cache_len)) 395 396 return self._cache[index] 397 398 399def collate(*iterables, **kwargs): 400 """Return a sorted merge of the items from each of several already-sorted 401 *iterables*. 402 403 >>> list(collate('ACDZ', 'AZ', 'JKL')) 404 ['A', 'A', 'C', 'D', 'J', 'K', 'L', 'Z', 'Z'] 405 406 Works lazily, keeping only the next value from each iterable in memory. Use 407 :func:`collate` to, for example, perform a n-way mergesort of items that 408 don't fit in memory. 409 410 If a *key* function is specified, the iterables will be sorted according 411 to its result: 412 413 >>> key = lambda s: int(s) # Sort by numeric value, not by string 414 >>> list(collate(['1', '10'], ['2', '11'], key=key)) 415 ['1', '2', '10', '11'] 416 417 418 If the *iterables* are sorted in descending order, set *reverse* to 419 ``True``: 420 421 >>> list(collate([5, 3, 1], [4, 2, 0], reverse=True)) 422 [5, 4, 3, 2, 1, 0] 423 424 If the elements of the passed-in iterables are out of order, you might get 425 unexpected results. 426 427 On Python 3.5+, this function is an alias for :func:`heapq.merge`. 428 429 """ 430 warnings.warn( 431 "collate is no longer part of more_itertools, use heapq.merge", 432 DeprecationWarning, 433 ) 434 return merge(*iterables, **kwargs) 435 436 437def consumer(func): 438 """Decorator that automatically advances a PEP-342-style "reverse iterator" 439 to its first yield point so you don't have to call ``next()`` on it 440 manually. 441 442 >>> @consumer 443 ... def tally(): 444 ... i = 0 445 ... while True: 446 ... print('Thing number %s is %s.' % (i, (yield))) 447 ... i += 1 448 ... 449 >>> t = tally() 450 >>> t.send('red') 451 Thing number 0 is red. 452 >>> t.send('fish') 453 Thing number 1 is fish. 454 455 Without the decorator, you would have to call ``next(t)`` before 456 ``t.send()`` could be used. 457 458 """ 459 460 @wraps(func) 461 def wrapper(*args, **kwargs): 462 gen = func(*args, **kwargs) 463 next(gen) 464 return gen 465 466 return wrapper 467 468 469def ilen(iterable): 470 """Return the number of items in *iterable*. 471 472 >>> ilen(x for x in range(1000000) if x % 3 == 0) 473 333334 474 475 This consumes the iterable, so handle with care. 476 477 """ 478 # This approach was selected because benchmarks showed it's likely the 479 # fastest of the known implementations at the time of writing. 480 # See GitHub tracker: #236, #230. 481 counter = count() 482 deque(zip(iterable, counter), maxlen=0) 483 return next(counter) 484 485 486def iterate(func, start): 487 """Return ``start``, ``func(start)``, ``func(func(start))``, ... 488 489 >>> from itertools import islice 490 >>> list(islice(iterate(lambda x: 2*x, 1), 10)) 491 [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] 492 493 """ 494 while True: 495 yield start 496 start = func(start) 497 498 499def with_iter(context_manager): 500 """Wrap an iterable in a ``with`` statement, so it closes once exhausted. 501 502 For example, this will close the file when the iterator is exhausted:: 503 504 upper_lines = (line.upper() for line in with_iter(open('foo'))) 505 506 Any context manager which returns an iterable is a candidate for 507 ``with_iter``. 508 509 """ 510 with context_manager as iterable: 511 yield from iterable 512 513 514def one(iterable, too_short=None, too_long=None): 515 """Return the first item from *iterable*, which is expected to contain only 516 that item. Raise an exception if *iterable* is empty or has more than one 517 item. 518 519 :func:`one` is useful for ensuring that an iterable contains only one item. 520 For example, it can be used to retrieve the result of a database query 521 that is expected to return a single row. 522 523 If *iterable* is empty, ``ValueError`` will be raised. You may specify a 524 different exception with the *too_short* keyword: 525 526 >>> it = [] 527 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL 528 Traceback (most recent call last): 529 ... 530 ValueError: too many items in iterable (expected 1)' 531 >>> too_short = IndexError('too few items') 532 >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL 533 Traceback (most recent call last): 534 ... 535 IndexError: too few items 536 537 Similarly, if *iterable* contains more than one item, ``ValueError`` will 538 be raised. You may specify a different exception with the *too_long* 539 keyword: 540 541 >>> it = ['too', 'many'] 542 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL 543 Traceback (most recent call last): 544 ... 545 ValueError: Expected exactly one item in iterable, but got 'too', 546 'many', and perhaps more. 547 >>> too_long = RuntimeError 548 >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL 549 Traceback (most recent call last): 550 ... 551 RuntimeError 552 553 Note that :func:`one` attempts to advance *iterable* twice to ensure there 554 is only one item. See :func:`spy` or :func:`peekable` to check iterable 555 contents less destructively. 556 557 """ 558 it = iter(iterable) 559 560 try: 561 first_value = next(it) 562 except StopIteration as e: 563 raise ( 564 too_short or ValueError('too few items in iterable (expected 1)') 565 ) from e 566 567 try: 568 second_value = next(it) 569 except StopIteration: 570 pass 571 else: 572 msg = ( 573 'Expected exactly one item in iterable, but got {!r}, {!r}, ' 574 'and perhaps more.'.format(first_value, second_value) 575 ) 576 raise too_long or ValueError(msg) 577 578 return first_value 579 580 581def distinct_permutations(iterable, r=None): 582 """Yield successive distinct permutations of the elements in *iterable*. 583 584 >>> sorted(distinct_permutations([1, 0, 1])) 585 [(0, 1, 1), (1, 0, 1), (1, 1, 0)] 586 587 Equivalent to ``set(permutations(iterable))``, except duplicates are not 588 generated and thrown away. For larger input sequences this is much more 589 efficient. 590 591 Duplicate permutations arise when there are duplicated elements in the 592 input iterable. The number of items returned is 593 `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of 594 items input, and each `x_i` is the count of a distinct item in the input 595 sequence. 596 597 If *r* is given, only the *r*-length permutations are yielded. 598 599 >>> sorted(distinct_permutations([1, 0, 1], r=2)) 600 [(0, 1), (1, 0), (1, 1)] 601 >>> sorted(distinct_permutations(range(3), r=2)) 602 [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] 603 604 """ 605 # Algorithm: https://w.wiki/Qai 606 def _full(A): 607 while True: 608 # Yield the permutation we have 609 yield tuple(A) 610 611 # Find the largest index i such that A[i] < A[i + 1] 612 for i in range(size - 2, -1, -1): 613 if A[i] < A[i + 1]: 614 break 615 # If no such index exists, this permutation is the last one 616 else: 617 return 618 619 # Find the largest index j greater than j such that A[i] < A[j] 620 for j in range(size - 1, i, -1): 621 if A[i] < A[j]: 622 break 623 624 # Swap the value of A[i] with that of A[j], then reverse the 625 # sequence from A[i + 1] to form the new permutation 626 A[i], A[j] = A[j], A[i] 627 A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1] 628 629 # Algorithm: modified from the above 630 def _partial(A, r): 631 # Split A into the first r items and the last r items 632 head, tail = A[:r], A[r:] 633 right_head_indexes = range(r - 1, -1, -1) 634 left_tail_indexes = range(len(tail)) 635 636 while True: 637 # Yield the permutation we have 638 yield tuple(head) 639 640 # Starting from the right, find the first index of the head with 641 # value smaller than the maximum value of the tail - call it i. 642 pivot = tail[-1] 643 for i in right_head_indexes: 644 if head[i] < pivot: 645 break 646 pivot = head[i] 647 else: 648 return 649 650 # Starting from the left, find the first value of the tail 651 # with a value greater than head[i] and swap. 652 for j in left_tail_indexes: 653 if tail[j] > head[i]: 654 head[i], tail[j] = tail[j], head[i] 655 break 656 # If we didn't find one, start from the right and find the first 657 # index of the head with a value greater than head[i] and swap. 658 else: 659 for j in right_head_indexes: 660 if head[j] > head[i]: 661 head[i], head[j] = head[j], head[i] 662 break 663 664 # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)] 665 tail += head[: i - r : -1] # head[i + 1:][::-1] 666 i += 1 667 head[i:], tail[:] = tail[: r - i], tail[r - i :] 668 669 items = sorted(iterable) 670 671 size = len(items) 672 if r is None: 673 r = size 674 675 if 0 < r <= size: 676 return _full(items) if (r == size) else _partial(items, r) 677 678 return iter(() if r else ((),)) 679 680 681def intersperse(e, iterable, n=1): 682 """Intersperse filler element *e* among the items in *iterable*, leaving 683 *n* items between each filler element. 684 685 >>> list(intersperse('!', [1, 2, 3, 4, 5])) 686 [1, '!', 2, '!', 3, '!', 4, '!', 5] 687 688 >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2)) 689 [1, 2, None, 3, 4, None, 5] 690 691 """ 692 if n == 0: 693 raise ValueError('n must be > 0') 694 elif n == 1: 695 # interleave(repeat(e), iterable) -> e, x_0, e, e, x_1, e, x_2... 696 # islice(..., 1, None) -> x_0, e, e, x_1, e, x_2... 697 return islice(interleave(repeat(e), iterable), 1, None) 698 else: 699 # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]... 700 # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]... 701 # flatten(...) -> x_0, x_1, e, x_2, x_3... 702 filler = repeat([e]) 703 chunks = chunked(iterable, n) 704 return flatten(islice(interleave(filler, chunks), 1, None)) 705 706 707def unique_to_each(*iterables): 708 """Return the elements from each of the input iterables that aren't in the 709 other input iterables. 710 711 For example, suppose you have a set of packages, each with a set of 712 dependencies:: 713 714 {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}} 715 716 If you remove one package, which dependencies can also be removed? 717 718 If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not 719 associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for 720 ``pkg_2``, and ``D`` is only needed for ``pkg_3``:: 721 722 >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'}) 723 [['A'], ['C'], ['D']] 724 725 If there are duplicates in one input iterable that aren't in the others 726 they will be duplicated in the output. Input order is preserved:: 727 728 >>> unique_to_each("mississippi", "missouri") 729 [['p', 'p'], ['o', 'u', 'r']] 730 731 It is assumed that the elements of each iterable are hashable. 732 733 """ 734 pool = [list(it) for it in iterables] 735 counts = Counter(chain.from_iterable(map(set, pool))) 736 uniques = {element for element in counts if counts[element] == 1} 737 return [list(filter(uniques.__contains__, it)) for it in pool] 738 739 740def windowed(seq, n, fillvalue=None, step=1): 741 """Return a sliding window of width *n* over the given iterable. 742 743 >>> all_windows = windowed([1, 2, 3, 4, 5], 3) 744 >>> list(all_windows) 745 [(1, 2, 3), (2, 3, 4), (3, 4, 5)] 746 747 When the window is larger than the iterable, *fillvalue* is used in place 748 of missing values: 749 750 >>> list(windowed([1, 2, 3], 4)) 751 [(1, 2, 3, None)] 752 753 Each window will advance in increments of *step*: 754 755 >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2)) 756 [(1, 2, 3), (3, 4, 5), (5, 6, '!')] 757 758 To slide into the iterable's items, use :func:`chain` to add filler items 759 to the left: 760 761 >>> iterable = [1, 2, 3, 4] 762 >>> n = 3 763 >>> padding = [None] * (n - 1) 764 >>> list(windowed(chain(padding, iterable), 3)) 765 [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)] 766 """ 767 if n < 0: 768 raise ValueError('n must be >= 0') 769 if n == 0: 770 yield tuple() 771 return 772 if step < 1: 773 raise ValueError('step must be >= 1') 774 775 window = deque(maxlen=n) 776 i = n 777 for _ in map(window.append, seq): 778 i -= 1 779 if not i: 780 i = step 781 yield tuple(window) 782 783 size = len(window) 784 if size < n: 785 yield tuple(chain(window, repeat(fillvalue, n - size))) 786 elif 0 < i < min(step, n): 787 window += (fillvalue,) * i 788 yield tuple(window) 789 790 791def substrings(iterable): 792 """Yield all of the substrings of *iterable*. 793 794 >>> [''.join(s) for s in substrings('more')] 795 ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more'] 796 797 Note that non-string iterables can also be subdivided. 798 799 >>> list(substrings([0, 1, 2])) 800 [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)] 801 802 """ 803 # The length-1 substrings 804 seq = [] 805 for item in iter(iterable): 806 seq.append(item) 807 yield (item,) 808 seq = tuple(seq) 809 item_count = len(seq) 810 811 # And the rest 812 for n in range(2, item_count + 1): 813 for i in range(item_count - n + 1): 814 yield seq[i : i + n] 815 816 817def substrings_indexes(seq, reverse=False): 818 """Yield all substrings and their positions in *seq* 819 820 The items yielded will be a tuple of the form ``(substr, i, j)``, where 821 ``substr == seq[i:j]``. 822 823 This function only works for iterables that support slicing, such as 824 ``str`` objects. 825 826 >>> for item in substrings_indexes('more'): 827 ... print(item) 828 ('m', 0, 1) 829 ('o', 1, 2) 830 ('r', 2, 3) 831 ('e', 3, 4) 832 ('mo', 0, 2) 833 ('or', 1, 3) 834 ('re', 2, 4) 835 ('mor', 0, 3) 836 ('ore', 1, 4) 837 ('more', 0, 4) 838 839 Set *reverse* to ``True`` to yield the same items in the opposite order. 840 841 842 """ 843 r = range(1, len(seq) + 1) 844 if reverse: 845 r = reversed(r) 846 return ( 847 (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1) 848 ) 849 850 851class bucket: 852 """Wrap *iterable* and return an object that buckets it iterable into 853 child iterables based on a *key* function. 854 855 >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3'] 856 >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character 857 >>> sorted(list(s)) # Get the keys 858 ['a', 'b', 'c'] 859 >>> a_iterable = s['a'] 860 >>> next(a_iterable) 861 'a1' 862 >>> next(a_iterable) 863 'a2' 864 >>> list(s['b']) 865 ['b1', 'b2', 'b3'] 866 867 The original iterable will be advanced and its items will be cached until 868 they are used by the child iterables. This may require significant storage. 869 870 By default, attempting to select a bucket to which no items belong will 871 exhaust the iterable and cache all values. 872 If you specify a *validator* function, selected buckets will instead be 873 checked against it. 874 875 >>> from itertools import count 876 >>> it = count(1, 2) # Infinite sequence of odd numbers 877 >>> key = lambda x: x % 10 # Bucket by last digit 878 >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only 879 >>> s = bucket(it, key=key, validator=validator) 880 >>> 2 in s 881 False 882 >>> list(s[2]) 883 [] 884 885 """ 886 887 def __init__(self, iterable, key, validator=None): 888 self._it = iter(iterable) 889 self._key = key 890 self._cache = defaultdict(deque) 891 self._validator = validator or (lambda x: True) 892 893 def __contains__(self, value): 894 if not self._validator(value): 895 return False 896 897 try: 898 item = next(self[value]) 899 except StopIteration: 900 return False 901 else: 902 self._cache[value].appendleft(item) 903 904 return True 905 906 def _get_values(self, value): 907 """ 908 Helper to yield items from the parent iterator that match *value*. 909 Items that don't match are stored in the local cache as they 910 are encountered. 911 """ 912 while True: 913 # If we've cached some items that match the target value, emit 914 # the first one and evict it from the cache. 915 if self._cache[value]: 916 yield self._cache[value].popleft() 917 # Otherwise we need to advance the parent iterator to search for 918 # a matching item, caching the rest. 919 else: 920 while True: 921 try: 922 item = next(self._it) 923 except StopIteration: 924 return 925 item_value = self._key(item) 926 if item_value == value: 927 yield item 928 break 929 elif self._validator(item_value): 930 self._cache[item_value].append(item) 931 932 def __iter__(self): 933 for item in self._it: 934 item_value = self._key(item) 935 if self._validator(item_value): 936 self._cache[item_value].append(item) 937 938 yield from self._cache.keys() 939 940 def __getitem__(self, value): 941 if not self._validator(value): 942 return iter(()) 943 944 return self._get_values(value) 945 946 947def spy(iterable, n=1): 948 """Return a 2-tuple with a list containing the first *n* elements of 949 *iterable*, and an iterator with the same items as *iterable*. 950 This allows you to "look ahead" at the items in the iterable without 951 advancing it. 952 953 There is one item in the list by default: 954 955 >>> iterable = 'abcdefg' 956 >>> head, iterable = spy(iterable) 957 >>> head 958 ['a'] 959 >>> list(iterable) 960 ['a', 'b', 'c', 'd', 'e', 'f', 'g'] 961 962 You may use unpacking to retrieve items instead of lists: 963 964 >>> (head,), iterable = spy('abcdefg') 965 >>> head 966 'a' 967 >>> (first, second), iterable = spy('abcdefg', 2) 968 >>> first 969 'a' 970 >>> second 971 'b' 972 973 The number of items requested can be larger than the number of items in 974 the iterable: 975 976 >>> iterable = [1, 2, 3, 4, 5] 977 >>> head, iterable = spy(iterable, 10) 978 >>> head 979 [1, 2, 3, 4, 5] 980 >>> list(iterable) 981 [1, 2, 3, 4, 5] 982 983 """ 984 it = iter(iterable) 985 head = take(n, it) 986 987 return head.copy(), chain(head, it) 988 989 990def interleave(*iterables): 991 """Return a new iterable yielding from each iterable in turn, 992 until the shortest is exhausted. 993 994 >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8])) 995 [1, 4, 6, 2, 5, 7] 996 997 For a version that doesn't terminate after the shortest iterable is 998 exhausted, see :func:`interleave_longest`. 999 1000 """ 1001 return chain.from_iterable(zip(*iterables)) 1002 1003 1004def interleave_longest(*iterables): 1005 """Return a new iterable yielding from each iterable in turn, 1006 skipping any that are exhausted. 1007 1008 >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8])) 1009 [1, 4, 6, 2, 5, 7, 3, 8] 1010 1011 This function produces the same output as :func:`roundrobin`, but may 1012 perform better for some inputs (in particular when the number of iterables 1013 is large). 1014 1015 """ 1016 i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker)) 1017 return (x for x in i if x is not _marker) 1018 1019 1020def collapse(iterable, base_type=None, levels=None): 1021 """Flatten an iterable with multiple levels of nesting (e.g., a list of 1022 lists of tuples) into non-iterable types. 1023 1024 >>> iterable = [(1, 2), ([3, 4], [[5], [6]])] 1025 >>> list(collapse(iterable)) 1026 [1, 2, 3, 4, 5, 6] 1027 1028 Binary and text strings are not considered iterable and 1029 will not be collapsed. 1030 1031 To avoid collapsing other types, specify *base_type*: 1032 1033 >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']] 1034 >>> list(collapse(iterable, base_type=tuple)) 1035 ['ab', ('cd', 'ef'), 'gh', 'ij'] 1036 1037 Specify *levels* to stop flattening after a certain level: 1038 1039 >>> iterable = [('a', ['b']), ('c', ['d'])] 1040 >>> list(collapse(iterable)) # Fully flattened 1041 ['a', 'b', 'c', 'd'] 1042 >>> list(collapse(iterable, levels=1)) # Only one level flattened 1043 ['a', ['b'], 'c', ['d']] 1044 1045 """ 1046 1047 def walk(node, level): 1048 if ( 1049 ((levels is not None) and (level > levels)) 1050 or isinstance(node, (str, bytes)) 1051 or ((base_type is not None) and isinstance(node, base_type)) 1052 ): 1053 yield node 1054 return 1055 1056 try: 1057 tree = iter(node) 1058 except TypeError: 1059 yield node 1060 return 1061 else: 1062 for child in tree: 1063 yield from walk(child, level + 1) 1064 1065 yield from walk(iterable, 0) 1066 1067 1068def side_effect(func, iterable, chunk_size=None, before=None, after=None): 1069 """Invoke *func* on each item in *iterable* (or on each *chunk_size* group 1070 of items) before yielding the item. 1071 1072 `func` must be a function that takes a single argument. Its return value 1073 will be discarded. 1074 1075 *before* and *after* are optional functions that take no arguments. They 1076 will be executed before iteration starts and after it ends, respectively. 1077 1078 `side_effect` can be used for logging, updating progress bars, or anything 1079 that is not functionally "pure." 1080 1081 Emitting a status message: 1082 1083 >>> from more_itertools import consume 1084 >>> func = lambda item: print('Received {}'.format(item)) 1085 >>> consume(side_effect(func, range(2))) 1086 Received 0 1087 Received 1 1088 1089 Operating on chunks of items: 1090 1091 >>> pair_sums = [] 1092 >>> func = lambda chunk: pair_sums.append(sum(chunk)) 1093 >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2)) 1094 [0, 1, 2, 3, 4, 5] 1095 >>> list(pair_sums) 1096 [1, 5, 9] 1097 1098 Writing to a file-like object: 1099 1100 >>> from io import StringIO 1101 >>> from more_itertools import consume 1102 >>> f = StringIO() 1103 >>> func = lambda x: print(x, file=f) 1104 >>> before = lambda: print(u'HEADER', file=f) 1105 >>> after = f.close 1106 >>> it = [u'a', u'b', u'c'] 1107 >>> consume(side_effect(func, it, before=before, after=after)) 1108 >>> f.closed 1109 True 1110 1111 """ 1112 try: 1113 if before is not None: 1114 before() 1115 1116 if chunk_size is None: 1117 for item in iterable: 1118 func(item) 1119 yield item 1120 else: 1121 for chunk in chunked(iterable, chunk_size): 1122 func(chunk) 1123 yield from chunk 1124 finally: 1125 if after is not None: 1126 after() 1127 1128 1129def sliced(seq, n, strict=False): 1130 """Yield slices of length *n* from the sequence *seq*. 1131 1132 >>> list(sliced((1, 2, 3, 4, 5, 6), 3)) 1133 [(1, 2, 3), (4, 5, 6)] 1134 1135 By the default, the last yielded slice will have fewer than *n* elements 1136 if the length of *seq* is not divisible by *n*: 1137 1138 >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3)) 1139 [(1, 2, 3), (4, 5, 6), (7, 8)] 1140 1141 If the length of *seq* is not divisible by *n* and *strict* is 1142 ``True``, then ``ValueError`` will be raised before the last 1143 slice is yielded. 1144 1145 This function will only work for iterables that support slicing. 1146 For non-sliceable iterables, see :func:`chunked`. 1147 1148 """ 1149 iterator = takewhile(len, (seq[i : i + n] for i in count(0, n))) 1150 if strict: 1151 1152 def ret(): 1153 for _slice in iterator: 1154 if len(_slice) != n: 1155 raise ValueError("seq is not divisible by n.") 1156 yield _slice 1157 1158 return iter(ret()) 1159 else: 1160 return iterator 1161 1162 1163def split_at(iterable, pred, maxsplit=-1, keep_separator=False): 1164 """Yield lists of items from *iterable*, where each list is delimited by 1165 an item where callable *pred* returns ``True``. 1166 1167 >>> list(split_at('abcdcba', lambda x: x == 'b')) 1168 [['a'], ['c', 'd', 'c'], ['a']] 1169 1170 >>> list(split_at(range(10), lambda n: n % 2 == 1)) 1171 [[0], [2], [4], [6], [8], []] 1172 1173 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, 1174 then there is no limit on the number of splits: 1175 1176 >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2)) 1177 [[0], [2], [4, 5, 6, 7, 8, 9]] 1178 1179 By default, the delimiting items are not included in the output. 1180 The include them, set *keep_separator* to ``True``. 1181 1182 >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True)) 1183 [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']] 1184 1185 """ 1186 if maxsplit == 0: 1187 yield list(iterable) 1188 return 1189 1190 buf = [] 1191 it = iter(iterable) 1192 for item in it: 1193 if pred(item): 1194 yield buf 1195 if keep_separator: 1196 yield [item] 1197 if maxsplit == 1: 1198 yield list(it) 1199 return 1200 buf = [] 1201 maxsplit -= 1 1202 else: 1203 buf.append(item) 1204 yield buf 1205 1206 1207def split_before(iterable, pred, maxsplit=-1): 1208 """Yield lists of items from *iterable*, where each list ends just before 1209 an item for which callable *pred* returns ``True``: 1210 1211 >>> list(split_before('OneTwo', lambda s: s.isupper())) 1212 [['O', 'n', 'e'], ['T', 'w', 'o']] 1213 1214 >>> list(split_before(range(10), lambda n: n % 3 == 0)) 1215 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 1216 1217 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, 1218 then there is no limit on the number of splits: 1219 1220 >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2)) 1221 [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]] 1222 """ 1223 if maxsplit == 0: 1224 yield list(iterable) 1225 return 1226 1227 buf = [] 1228 it = iter(iterable) 1229 for item in it: 1230 if pred(item) and buf: 1231 yield buf 1232 if maxsplit == 1: 1233 yield [item] + list(it) 1234 return 1235 buf = [] 1236 maxsplit -= 1 1237 buf.append(item) 1238 if buf: 1239 yield buf 1240 1241 1242def split_after(iterable, pred, maxsplit=-1): 1243 """Yield lists of items from *iterable*, where each list ends with an 1244 item where callable *pred* returns ``True``: 1245 1246 >>> list(split_after('one1two2', lambda s: s.isdigit())) 1247 [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']] 1248 1249 >>> list(split_after(range(10), lambda n: n % 3 == 0)) 1250 [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]] 1251 1252 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, 1253 then there is no limit on the number of splits: 1254 1255 >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2)) 1256 [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]] 1257 1258 """ 1259 if maxsplit == 0: 1260 yield list(iterable) 1261 return 1262 1263 buf = [] 1264 it = iter(iterable) 1265 for item in it: 1266 buf.append(item) 1267 if pred(item) and buf: 1268 yield buf 1269 if maxsplit == 1: 1270 yield list(it) 1271 return 1272 buf = [] 1273 maxsplit -= 1 1274 if buf: 1275 yield buf 1276 1277 1278def split_when(iterable, pred, maxsplit=-1): 1279 """Split *iterable* into pieces based on the output of *pred*. 1280 *pred* should be a function that takes successive pairs of items and 1281 returns ``True`` if the iterable should be split in between them. 1282 1283 For example, to find runs of increasing numbers, split the iterable when 1284 element ``i`` is larger than element ``i + 1``: 1285 1286 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y)) 1287 [[1, 2, 3, 3], [2, 5], [2, 4], [2]] 1288 1289 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, 1290 then there is no limit on the number of splits: 1291 1292 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], 1293 ... lambda x, y: x > y, maxsplit=2)) 1294 [[1, 2, 3, 3], [2, 5], [2, 4, 2]] 1295 1296 """ 1297 if maxsplit == 0: 1298 yield list(iterable) 1299 return 1300 1301 it = iter(iterable) 1302 try: 1303 cur_item = next(it) 1304 except StopIteration: 1305 return 1306 1307 buf = [cur_item] 1308 for next_item in it: 1309 if pred(cur_item, next_item): 1310 yield buf 1311 if maxsplit == 1: 1312 yield [next_item] + list(it) 1313 return 1314 buf = [] 1315 maxsplit -= 1 1316 1317 buf.append(next_item) 1318 cur_item = next_item 1319 1320 yield buf 1321 1322 1323def split_into(iterable, sizes): 1324 """Yield a list of sequential items from *iterable* of length 'n' for each 1325 integer 'n' in *sizes*. 1326 1327 >>> list(split_into([1,2,3,4,5,6], [1,2,3])) 1328 [[1], [2, 3], [4, 5, 6]] 1329 1330 If the sum of *sizes* is smaller than the length of *iterable*, then the 1331 remaining items of *iterable* will not be returned. 1332 1333 >>> list(split_into([1,2,3,4,5,6], [2,3])) 1334 [[1, 2], [3, 4, 5]] 1335 1336 If the sum of *sizes* is larger than the length of *iterable*, fewer items 1337 will be returned in the iteration that overruns *iterable* and further 1338 lists will be empty: 1339 1340 >>> list(split_into([1,2,3,4], [1,2,3,4])) 1341 [[1], [2, 3], [4], []] 1342 1343 When a ``None`` object is encountered in *sizes*, the returned list will 1344 contain items up to the end of *iterable* the same way that itertools.slice 1345 does: 1346 1347 >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None])) 1348 [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]] 1349 1350 :func:`split_into` can be useful for grouping a series of items where the 1351 sizes of the groups are not uniform. An example would be where in a row 1352 from a table, multiple columns represent elements of the same feature 1353 (e.g. a point represented by x,y,z) but, the format is not the same for 1354 all columns. 1355 """ 1356 # convert the iterable argument into an iterator so its contents can 1357 # be consumed by islice in case it is a generator 1358 it = iter(iterable) 1359 1360 for size in sizes: 1361 if size is None: 1362 yield list(it) 1363 return 1364 else: 1365 yield list(islice(it, size)) 1366 1367 1368def padded(iterable, fillvalue=None, n=None, next_multiple=False): 1369 """Yield the elements from *iterable*, followed by *fillvalue*, such that 1370 at least *n* items are emitted. 1371 1372 >>> list(padded([1, 2, 3], '?', 5)) 1373 [1, 2, 3, '?', '?'] 1374 1375 If *next_multiple* is ``True``, *fillvalue* will be emitted until the 1376 number of items emitted is a multiple of *n*:: 1377 1378 >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True)) 1379 [1, 2, 3, 4, None, None] 1380 1381 If *n* is ``None``, *fillvalue* will be emitted indefinitely. 1382 1383 """ 1384 it = iter(iterable) 1385 if n is None: 1386 yield from chain(it, repeat(fillvalue)) 1387 elif n < 1: 1388 raise ValueError('n must be at least 1') 1389 else: 1390 item_count = 0 1391 for item in it: 1392 yield item 1393 item_count += 1 1394 1395 remaining = (n - item_count) % n if next_multiple else n - item_count 1396 for _ in range(remaining): 1397 yield fillvalue 1398 1399 1400def repeat_last(iterable, default=None): 1401 """After the *iterable* is exhausted, keep yielding its last element. 1402 1403 >>> list(islice(repeat_last(range(3)), 5)) 1404 [0, 1, 2, 2, 2] 1405 1406 If the iterable is empty, yield *default* forever:: 1407 1408 >>> list(islice(repeat_last(range(0), 42), 5)) 1409 [42, 42, 42, 42, 42] 1410 1411 """ 1412 item = _marker 1413 for item in iterable: 1414 yield item 1415 final = default if item is _marker else item 1416 yield from repeat(final) 1417 1418 1419def distribute(n, iterable): 1420 """Distribute the items from *iterable* among *n* smaller iterables. 1421 1422 >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6]) 1423 >>> list(group_1) 1424 [1, 3, 5] 1425 >>> list(group_2) 1426 [2, 4, 6] 1427 1428 If the length of *iterable* is not evenly divisible by *n*, then the 1429 length of the returned iterables will not be identical: 1430 1431 >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7]) 1432 >>> [list(c) for c in children] 1433 [[1, 4, 7], [2, 5], [3, 6]] 1434 1435 If the length of *iterable* is smaller than *n*, then the last returned 1436 iterables will be empty: 1437 1438 >>> children = distribute(5, [1, 2, 3]) 1439 >>> [list(c) for c in children] 1440 [[1], [2], [3], [], []] 1441 1442 This function uses :func:`itertools.tee` and may require significant 1443 storage. If you need the order items in the smaller iterables to match the 1444 original iterable, see :func:`divide`. 1445 1446 """ 1447 if n < 1: 1448 raise ValueError('n must be at least 1') 1449 1450 children = tee(iterable, n) 1451 return [islice(it, index, None, n) for index, it in enumerate(children)] 1452 1453 1454def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None): 1455 """Yield tuples whose elements are offset from *iterable*. 1456 The amount by which the `i`-th item in each tuple is offset is given by 1457 the `i`-th item in *offsets*. 1458 1459 >>> list(stagger([0, 1, 2, 3])) 1460 [(None, 0, 1), (0, 1, 2), (1, 2, 3)] 1461 >>> list(stagger(range(8), offsets=(0, 2, 4))) 1462 [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)] 1463 1464 By default, the sequence will end when the final element of a tuple is the 1465 last item in the iterable. To continue until the first element of a tuple 1466 is the last item in the iterable, set *longest* to ``True``:: 1467 1468 >>> list(stagger([0, 1, 2, 3], longest=True)) 1469 [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] 1470 1471 By default, ``None`` will be used to replace offsets beyond the end of the 1472 sequence. Specify *fillvalue* to use some other value. 1473 1474 """ 1475 children = tee(iterable, len(offsets)) 1476 1477 return zip_offset( 1478 *children, offsets=offsets, longest=longest, fillvalue=fillvalue 1479 ) 1480 1481 1482class UnequalIterablesError(ValueError): 1483 def __init__(self, details=None): 1484 msg = 'Iterables have different lengths' 1485 if details is not None: 1486 msg += (': index 0 has length {}; index {} has length {}').format( 1487 *details 1488 ) 1489 1490 super().__init__(msg) 1491 1492 1493def _zip_equal_generator(iterables): 1494 for combo in zip_longest(*iterables, fillvalue=_marker): 1495 for val in combo: 1496 if val is _marker: 1497 raise UnequalIterablesError() 1498 yield combo 1499 1500 1501def zip_equal(*iterables): 1502 """``zip`` the input *iterables* together, but raise 1503 ``UnequalIterablesError`` if they aren't all the same length. 1504 1505 >>> it_1 = range(3) 1506 >>> it_2 = iter('abc') 1507 >>> list(zip_equal(it_1, it_2)) 1508 [(0, 'a'), (1, 'b'), (2, 'c')] 1509 1510 >>> it_1 = range(3) 1511 >>> it_2 = iter('abcd') 1512 >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL 1513 Traceback (most recent call last): 1514 ... 1515 more_itertools.more.UnequalIterablesError: Iterables have different 1516 lengths 1517 1518 """ 1519 if hexversion >= 0x30A00A6: 1520 warnings.warn( 1521 ( 1522 'zip_equal will be removed in a future version of ' 1523 'more-itertools. Use the builtin zip function with ' 1524 'strict=True instead.' 1525 ), 1526 DeprecationWarning, 1527 ) 1528 # Check whether the iterables are all the same size. 1529 try: 1530 first_size = len(iterables[0]) 1531 for i, it in enumerate(iterables[1:], 1): 1532 size = len(it) 1533 if size != first_size: 1534 break 1535 else: 1536 # If we didn't break out, we can use the built-in zip. 1537 return zip(*iterables) 1538 1539 # If we did break out, there was a mismatch. 1540 raise UnequalIterablesError(details=(first_size, i, size)) 1541 # If any one of the iterables didn't have a length, start reading 1542 # them until one runs out. 1543 except TypeError: 1544 return _zip_equal_generator(iterables) 1545 1546 1547def zip_offset(*iterables, offsets, longest=False, fillvalue=None): 1548 """``zip`` the input *iterables* together, but offset the `i`-th iterable 1549 by the `i`-th item in *offsets*. 1550 1551 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1))) 1552 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')] 1553 1554 This can be used as a lightweight alternative to SciPy or pandas to analyze 1555 data sets in which some series have a lead or lag relationship. 1556 1557 By default, the sequence will end when the shortest iterable is exhausted. 1558 To continue until the longest iterable is exhausted, set *longest* to 1559 ``True``. 1560 1561 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True)) 1562 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')] 1563 1564 By default, ``None`` will be used to replace offsets beyond the end of the 1565 sequence. Specify *fillvalue* to use some other value. 1566 1567 """ 1568 if len(iterables) != len(offsets): 1569 raise ValueError("Number of iterables and offsets didn't match") 1570 1571 staggered = [] 1572 for it, n in zip(iterables, offsets): 1573 if n < 0: 1574 staggered.append(chain(repeat(fillvalue, -n), it)) 1575 elif n > 0: 1576 staggered.append(islice(it, n, None)) 1577 else: 1578 staggered.append(it) 1579 1580 if longest: 1581 return zip_longest(*staggered, fillvalue=fillvalue) 1582 1583 return zip(*staggered) 1584 1585 1586def sort_together(iterables, key_list=(0,), key=None, reverse=False): 1587 """Return the input iterables sorted together, with *key_list* as the 1588 priority for sorting. All iterables are trimmed to the length of the 1589 shortest one. 1590 1591 This can be used like the sorting function in a spreadsheet. If each 1592 iterable represents a column of data, the key list determines which 1593 columns are used for sorting. 1594 1595 By default, all iterables are sorted using the ``0``-th iterable:: 1596 1597 >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')] 1598 >>> sort_together(iterables) 1599 [(1, 2, 3, 4), ('d', 'c', 'b', 'a')] 1600 1601 Set a different key list to sort according to another iterable. 1602 Specifying multiple keys dictates how ties are broken:: 1603 1604 >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')] 1605 >>> sort_together(iterables, key_list=(1, 2)) 1606 [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')] 1607 1608 To sort by a function of the elements of the iterable, pass a *key* 1609 function. Its arguments are the elements of the iterables corresponding to 1610 the key list:: 1611 1612 >>> names = ('a', 'b', 'c') 1613 >>> lengths = (1, 2, 3) 1614 >>> widths = (5, 2, 1) 1615 >>> def area(length, width): 1616 ... return length * width 1617 >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area) 1618 [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)] 1619 1620 Set *reverse* to ``True`` to sort in descending order. 1621 1622 >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True) 1623 [(3, 2, 1), ('a', 'b', 'c')] 1624 1625 """ 1626 if key is None: 1627 # if there is no key function, the key argument to sorted is an 1628 # itemgetter 1629 key_argument = itemgetter(*key_list) 1630 else: 1631 # if there is a key function, call it with the items at the offsets 1632 # specified by the key function as arguments 1633 key_list = list(key_list) 1634 if len(key_list) == 1: 1635 # if key_list contains a single item, pass the item at that offset 1636 # as the only argument to the key function 1637 key_offset = key_list[0] 1638 key_argument = lambda zipped_items: key(zipped_items[key_offset]) 1639 else: 1640 # if key_list contains multiple items, use itemgetter to return a 1641 # tuple of items, which we pass as *args to the key function 1642 get_key_items = itemgetter(*key_list) 1643 key_argument = lambda zipped_items: key( 1644 *get_key_items(zipped_items) 1645 ) 1646 1647 return list( 1648 zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse)) 1649 ) 1650 1651 1652def unzip(iterable): 1653 """The inverse of :func:`zip`, this function disaggregates the elements 1654 of the zipped *iterable*. 1655 1656 The ``i``-th iterable contains the ``i``-th element from each element 1657 of the zipped iterable. The first element is used to to determine the 1658 length of the remaining elements. 1659 1660 >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] 1661 >>> letters, numbers = unzip(iterable) 1662 >>> list(letters) 1663 ['a', 'b', 'c', 'd'] 1664 >>> list(numbers) 1665 [1, 2, 3, 4] 1666 1667 This is similar to using ``zip(*iterable)``, but it avoids reading 1668 *iterable* into memory. Note, however, that this function uses 1669 :func:`itertools.tee` and thus may require significant storage. 1670 1671 """ 1672 head, iterable = spy(iter(iterable)) 1673 if not head: 1674 # empty iterable, e.g. zip([], [], []) 1675 return () 1676 # spy returns a one-length iterable as head 1677 head = head[0] 1678 iterables = tee(iterable, len(head)) 1679 1680 def itemgetter(i): 1681 def getter(obj): 1682 try: 1683 return obj[i] 1684 except IndexError: 1685 # basically if we have an iterable like 1686 # iter([(1, 2, 3), (4, 5), (6,)]) 1687 # the second unzipped iterable would fail at the third tuple 1688 # since it would try to access tup[1] 1689 # same with the third unzipped iterable and the second tuple 1690 # to support these "improperly zipped" iterables, 1691 # we create a custom itemgetter 1692 # which just stops the unzipped iterables 1693 # at first length mismatch 1694 raise StopIteration 1695 1696 return getter 1697 1698 return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables)) 1699 1700 1701def divide(n, iterable): 1702 """Divide the elements from *iterable* into *n* parts, maintaining 1703 order. 1704 1705 >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6]) 1706 >>> list(group_1) 1707 [1, 2, 3] 1708 >>> list(group_2) 1709 [4, 5, 6] 1710 1711 If the length of *iterable* is not evenly divisible by *n*, then the 1712 length of the returned iterables will not be identical: 1713 1714 >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7]) 1715 >>> [list(c) for c in children] 1716 [[1, 2, 3], [4, 5], [6, 7]] 1717 1718 If the length of the iterable is smaller than n, then the last returned 1719 iterables will be empty: 1720 1721 >>> children = divide(5, [1, 2, 3]) 1722 >>> [list(c) for c in children] 1723 [[1], [2], [3], [], []] 1724 1725 This function will exhaust the iterable before returning and may require 1726 significant storage. If order is not important, see :func:`distribute`, 1727 which does not first pull the iterable into memory. 1728 1729 """ 1730 if n < 1: 1731 raise ValueError('n must be at least 1') 1732 1733 try: 1734 iterable[:0] 1735 except TypeError: 1736 seq = tuple(iterable) 1737 else: 1738 seq = iterable 1739 1740 q, r = divmod(len(seq), n) 1741 1742 ret = [] 1743 stop = 0 1744 for i in range(1, n + 1): 1745 start = stop 1746 stop += q + 1 if i <= r else q 1747 ret.append(iter(seq[start:stop])) 1748 1749 return ret 1750 1751 1752def always_iterable(obj, base_type=(str, bytes)): 1753 """If *obj* is iterable, return an iterator over its items:: 1754 1755 >>> obj = (1, 2, 3) 1756 >>> list(always_iterable(obj)) 1757 [1, 2, 3] 1758 1759 If *obj* is not iterable, return a one-item iterable containing *obj*:: 1760 1761 >>> obj = 1 1762 >>> list(always_iterable(obj)) 1763 [1] 1764 1765 If *obj* is ``None``, return an empty iterable: 1766 1767 >>> obj = None 1768 >>> list(always_iterable(None)) 1769 [] 1770 1771 By default, binary and text strings are not considered iterable:: 1772 1773 >>> obj = 'foo' 1774 >>> list(always_iterable(obj)) 1775 ['foo'] 1776 1777 If *base_type* is set, objects for which ``isinstance(obj, base_type)`` 1778 returns ``True`` won't be considered iterable. 1779 1780 >>> obj = {'a': 1} 1781 >>> list(always_iterable(obj)) # Iterate over the dict's keys 1782 ['a'] 1783 >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit 1784 [{'a': 1}] 1785 1786 Set *base_type* to ``None`` to avoid any special handling and treat objects 1787 Python considers iterable as iterable: 1788 1789 >>> obj = 'foo' 1790 >>> list(always_iterable(obj, base_type=None)) 1791 ['f', 'o', 'o'] 1792 """ 1793 if obj is None: 1794 return iter(()) 1795 1796 if (base_type is not None) and isinstance(obj, base_type): 1797 return iter((obj,)) 1798 1799 try: 1800 return iter(obj) 1801 except TypeError: 1802 return iter((obj,)) 1803 1804 1805def adjacent(predicate, iterable, distance=1): 1806 """Return an iterable over `(bool, item)` tuples where the `item` is 1807 drawn from *iterable* and the `bool` indicates whether 1808 that item satisfies the *predicate* or is adjacent to an item that does. 1809 1810 For example, to find whether items are adjacent to a ``3``:: 1811 1812 >>> list(adjacent(lambda x: x == 3, range(6))) 1813 [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)] 1814 1815 Set *distance* to change what counts as adjacent. For example, to find 1816 whether items are two places away from a ``3``: 1817 1818 >>> list(adjacent(lambda x: x == 3, range(6), distance=2)) 1819 [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)] 1820 1821 This is useful for contextualizing the results of a search function. 1822 For example, a code comparison tool might want to identify lines that 1823 have changed, but also surrounding lines to give the viewer of the diff 1824 context. 1825 1826 The predicate function will only be called once for each item in the 1827 iterable. 1828 1829 See also :func:`groupby_transform`, which can be used with this function 1830 to group ranges of items with the same `bool` value. 1831 1832 """ 1833 # Allow distance=0 mainly for testing that it reproduces results with map() 1834 if distance < 0: 1835 raise ValueError('distance must be at least 0') 1836 1837 i1, i2 = tee(iterable) 1838 padding = [False] * distance 1839 selected = chain(padding, map(predicate, i1), padding) 1840 adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1)) 1841 return zip(adjacent_to_selected, i2) 1842 1843 1844def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None): 1845 """An extension of :func:`itertools.groupby` that can apply transformations 1846 to the grouped data. 1847 1848 * *keyfunc* is a function computing a key value for each item in *iterable* 1849 * *valuefunc* is a function that transforms the individual items from 1850 *iterable* after grouping 1851 * *reducefunc* is a function that transforms each group of items 1852 1853 >>> iterable = 'aAAbBBcCC' 1854 >>> keyfunc = lambda k: k.upper() 1855 >>> valuefunc = lambda v: v.lower() 1856 >>> reducefunc = lambda g: ''.join(g) 1857 >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc)) 1858 [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')] 1859 1860 Each optional argument defaults to an identity function if not specified. 1861 1862 :func:`groupby_transform` is useful when grouping elements of an iterable 1863 using a separate iterable as the key. To do this, :func:`zip` the iterables 1864 and pass a *keyfunc* that extracts the first element and a *valuefunc* 1865 that extracts the second element:: 1866 1867 >>> from operator import itemgetter 1868 >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3] 1869 >>> values = 'abcdefghi' 1870 >>> iterable = zip(keys, values) 1871 >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1)) 1872 >>> [(k, ''.join(g)) for k, g in grouper] 1873 [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')] 1874 1875 Note that the order of items in the iterable is significant. 1876 Only adjacent items are grouped together, so if you don't want any 1877 duplicate groups, you should sort the iterable by the key function. 1878 1879 """ 1880 ret = groupby(iterable, keyfunc) 1881 if valuefunc: 1882 ret = ((k, map(valuefunc, g)) for k, g in ret) 1883 if reducefunc: 1884 ret = ((k, reducefunc(g)) for k, g in ret) 1885 1886 return ret 1887 1888 1889class numeric_range(abc.Sequence, abc.Hashable): 1890 """An extension of the built-in ``range()`` function whose arguments can 1891 be any orderable numeric type. 1892 1893 With only *stop* specified, *start* defaults to ``0`` and *step* 1894 defaults to ``1``. The output items will match the type of *stop*: 1895 1896 >>> list(numeric_range(3.5)) 1897 [0.0, 1.0, 2.0, 3.0] 1898 1899 With only *start* and *stop* specified, *step* defaults to ``1``. The 1900 output items will match the type of *start*: 1901 1902 >>> from decimal import Decimal 1903 >>> start = Decimal('2.1') 1904 >>> stop = Decimal('5.1') 1905 >>> list(numeric_range(start, stop)) 1906 [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')] 1907 1908 With *start*, *stop*, and *step* specified the output items will match 1909 the type of ``start + step``: 1910 1911 >>> from fractions import Fraction 1912 >>> start = Fraction(1, 2) # Start at 1/2 1913 >>> stop = Fraction(5, 2) # End at 5/2 1914 >>> step = Fraction(1, 2) # Count by 1/2 1915 >>> list(numeric_range(start, stop, step)) 1916 [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)] 1917 1918 If *step* is zero, ``ValueError`` is raised. Negative steps are supported: 1919 1920 >>> list(numeric_range(3, -1, -1.0)) 1921 [3.0, 2.0, 1.0, 0.0] 1922 1923 Be aware of the limitations of floating point numbers; the representation 1924 of the yielded numbers may be surprising. 1925 1926 ``datetime.datetime`` objects can be used for *start* and *stop*, if *step* 1927 is a ``datetime.timedelta`` object: 1928 1929 >>> import datetime 1930 >>> start = datetime.datetime(2019, 1, 1) 1931 >>> stop = datetime.datetime(2019, 1, 3) 1932 >>> step = datetime.timedelta(days=1) 1933 >>> items = iter(numeric_range(start, stop, step)) 1934 >>> next(items) 1935 datetime.datetime(2019, 1, 1, 0, 0) 1936 >>> next(items) 1937 datetime.datetime(2019, 1, 2, 0, 0) 1938 1939 """ 1940 1941 _EMPTY_HASH = hash(range(0, 0)) 1942 1943 def __init__(self, *args): 1944 argc = len(args) 1945 if argc == 1: 1946 (self._stop,) = args 1947 self._start = type(self._stop)(0) 1948 self._step = type(self._stop - self._start)(1) 1949 elif argc == 2: 1950 self._start, self._stop = args 1951 self._step = type(self._stop - self._start)(1) 1952 elif argc == 3: 1953 self._start, self._stop, self._step = args 1954 elif argc == 0: 1955 raise TypeError( 1956 'numeric_range expected at least ' 1957 '1 argument, got {}'.format(argc) 1958 ) 1959 else: 1960 raise TypeError( 1961 'numeric_range expected at most ' 1962 '3 arguments, got {}'.format(argc) 1963 ) 1964 1965 self._zero = type(self._step)(0) 1966 if self._step == self._zero: 1967 raise ValueError('numeric_range() arg 3 must not be zero') 1968 self._growing = self._step > self._zero 1969 self._init_len() 1970 1971 def __bool__(self): 1972 if self._growing: 1973 return self._start < self._stop 1974 else: 1975 return self._start > self._stop 1976 1977 def __contains__(self, elem): 1978 if self._growing: 1979 if self._start <= elem < self._stop: 1980 return (elem - self._start) % self._step == self._zero 1981 else: 1982 if self._start >= elem > self._stop: 1983 return (self._start - elem) % (-self._step) == self._zero 1984 1985 return False 1986 1987 def __eq__(self, other): 1988 if isinstance(other, numeric_range): 1989 empty_self = not bool(self) 1990 empty_other = not bool(other) 1991 if empty_self or empty_other: 1992 return empty_self and empty_other # True if both empty 1993 else: 1994 return ( 1995 self._start == other._start 1996 and self._step == other._step 1997 and self._get_by_index(-1) == other._get_by_index(-1) 1998 ) 1999 else: 2000 return False 2001 2002 def __getitem__(self, key): 2003 if isinstance(key, int): 2004 return self._get_by_index(key) 2005 elif isinstance(key, slice): 2006 step = self._step if key.step is None else key.step * self._step 2007 2008 if key.start is None or key.start <= -self._len: 2009 start = self._start 2010 elif key.start >= self._len: 2011 start = self._stop 2012 else: # -self._len < key.start < self._len 2013 start = self._get_by_index(key.start) 2014 2015 if key.stop is None or key.stop >= self._len: 2016 stop = self._stop 2017 elif key.stop <= -self._len: 2018 stop = self._start 2019 else: # -self._len < key.stop < self._len 2020 stop = self._get_by_index(key.stop) 2021 2022 return numeric_range(start, stop, step) 2023 else: 2024 raise TypeError( 2025 'numeric range indices must be ' 2026 'integers or slices, not {}'.format(type(key).__name__) 2027 ) 2028 2029 def __hash__(self): 2030 if self: 2031 return hash((self._start, self._get_by_index(-1), self._step)) 2032 else: 2033 return self._EMPTY_HASH 2034 2035 def __iter__(self): 2036 values = (self._start + (n * self._step) for n in count()) 2037 if self._growing: 2038 return takewhile(partial(gt, self._stop), values) 2039 else: 2040 return takewhile(partial(lt, self._stop), values) 2041 2042 def __len__(self): 2043 return self._len 2044 2045 def _init_len(self): 2046 if self._growing: 2047 start = self._start 2048 stop = self._stop 2049 step = self._step 2050 else: 2051 start = self._stop 2052 stop = self._start 2053 step = -self._step 2054 distance = stop - start 2055 if distance <= self._zero: 2056 self._len = 0 2057 else: # distance > 0 and step > 0: regular euclidean division 2058 q, r = divmod(distance, step) 2059 self._len = int(q) + int(r != self._zero) 2060 2061 def __reduce__(self): 2062 return numeric_range, (self._start, self._stop, self._step) 2063 2064 def __repr__(self): 2065 if self._step == 1: 2066 return "numeric_range({}, {})".format( 2067 repr(self._start), repr(self._stop) 2068 ) 2069 else: 2070 return "numeric_range({}, {}, {})".format( 2071 repr(self._start), repr(self._stop), repr(self._step) 2072 ) 2073 2074 def __reversed__(self): 2075 return iter( 2076 numeric_range( 2077 self._get_by_index(-1), self._start - self._step, -self._step 2078 ) 2079 ) 2080 2081 def count(self, value): 2082 return int(value in self) 2083 2084 def index(self, value): 2085 if self._growing: 2086 if self._start <= value < self._stop: 2087 q, r = divmod(value - self._start, self._step) 2088 if r == self._zero: 2089 return int(q) 2090 else: 2091 if self._start >= value > self._stop: 2092 q, r = divmod(self._start - value, -self._step) 2093 if r == self._zero: 2094 return int(q) 2095 2096 raise ValueError("{} is not in numeric range".format(value)) 2097 2098 def _get_by_index(self, i): 2099 if i < 0: 2100 i += self._len 2101 if i < 0 or i >= self._len: 2102 raise IndexError("numeric range object index out of range") 2103 return self._start + i * self._step 2104 2105 2106def count_cycle(iterable, n=None): 2107 """Cycle through the items from *iterable* up to *n* times, yielding 2108 the number of completed cycles along with each item. If *n* is omitted the 2109 process repeats indefinitely. 2110 2111 >>> list(count_cycle('AB', 3)) 2112 [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')] 2113 2114 """ 2115 iterable = tuple(iterable) 2116 if not iterable: 2117 return iter(()) 2118 counter = count() if n is None else range(n) 2119 return ((i, item) for i in counter for item in iterable) 2120 2121 2122def mark_ends(iterable): 2123 """Yield 3-tuples of the form ``(is_first, is_last, item)``. 2124 2125 >>> list(mark_ends('ABC')) 2126 [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')] 2127 2128 Use this when looping over an iterable to take special action on its first 2129 and/or last items: 2130 2131 >>> iterable = ['Header', 100, 200, 'Footer'] 2132 >>> total = 0 2133 >>> for is_first, is_last, item in mark_ends(iterable): 2134 ... if is_first: 2135 ... continue # Skip the header 2136 ... if is_last: 2137 ... continue # Skip the footer 2138 ... total += item 2139 >>> print(total) 2140 300 2141 """ 2142 it = iter(iterable) 2143 2144 try: 2145 b = next(it) 2146 except StopIteration: 2147 return 2148 2149 try: 2150 for i in count(): 2151 a = b 2152 b = next(it) 2153 yield i == 0, False, a 2154 2155 except StopIteration: 2156 yield i == 0, True, a 2157 2158 2159def locate(iterable, pred=bool, window_size=None): 2160 """Yield the index of each item in *iterable* for which *pred* returns 2161 ``True``. 2162 2163 *pred* defaults to :func:`bool`, which will select truthy items: 2164 2165 >>> list(locate([0, 1, 1, 0, 1, 0, 0])) 2166 [1, 2, 4] 2167 2168 Set *pred* to a custom function to, e.g., find the indexes for a particular 2169 item. 2170 2171 >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b')) 2172 [1, 3] 2173 2174 If *window_size* is given, then the *pred* function will be called with 2175 that many items. This enables searching for sub-sequences: 2176 2177 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] 2178 >>> pred = lambda *args: args == (1, 2, 3) 2179 >>> list(locate(iterable, pred=pred, window_size=3)) 2180 [1, 5, 9] 2181 2182 Use with :func:`seekable` to find indexes and then retrieve the associated 2183 items: 2184 2185 >>> from itertools import count 2186 >>> from more_itertools import seekable 2187 >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count()) 2188 >>> it = seekable(source) 2189 >>> pred = lambda x: x > 100 2190 >>> indexes = locate(it, pred=pred) 2191 >>> i = next(indexes) 2192 >>> it.seek(i) 2193 >>> next(it) 2194 106 2195 2196 """ 2197 if window_size is None: 2198 return compress(count(), map(pred, iterable)) 2199 2200 if window_size < 1: 2201 raise ValueError('window size must be at least 1') 2202 2203 it = windowed(iterable, window_size, fillvalue=_marker) 2204 return compress(count(), starmap(pred, it)) 2205 2206 2207def lstrip(iterable, pred): 2208 """Yield the items from *iterable*, but strip any from the beginning 2209 for which *pred* returns ``True``. 2210 2211 For example, to remove a set of items from the start of an iterable: 2212 2213 >>> iterable = (None, False, None, 1, 2, None, 3, False, None) 2214 >>> pred = lambda x: x in {None, False, ''} 2215 >>> list(lstrip(iterable, pred)) 2216 [1, 2, None, 3, False, None] 2217 2218 This function is analogous to to :func:`str.lstrip`, and is essentially 2219 an wrapper for :func:`itertools.dropwhile`. 2220 2221 """ 2222 return dropwhile(pred, iterable) 2223 2224 2225def rstrip(iterable, pred): 2226 """Yield the items from *iterable*, but strip any from the end 2227 for which *pred* returns ``True``. 2228 2229 For example, to remove a set of items from the end of an iterable: 2230 2231 >>> iterable = (None, False, None, 1, 2, None, 3, False, None) 2232 >>> pred = lambda x: x in {None, False, ''} 2233 >>> list(rstrip(iterable, pred)) 2234 [None, False, None, 1, 2, None, 3] 2235 2236 This function is analogous to :func:`str.rstrip`. 2237 2238 """ 2239 cache = [] 2240 cache_append = cache.append 2241 cache_clear = cache.clear 2242 for x in iterable: 2243 if pred(x): 2244 cache_append(x) 2245 else: 2246 yield from cache 2247 cache_clear() 2248 yield x 2249 2250 2251def strip(iterable, pred): 2252 """Yield the items from *iterable*, but strip any from the 2253 beginning and end for which *pred* returns ``True``. 2254 2255 For example, to remove a set of items from both ends of an iterable: 2256 2257 >>> iterable = (None, False, None, 1, 2, None, 3, False, None) 2258 >>> pred = lambda x: x in {None, False, ''} 2259 >>> list(strip(iterable, pred)) 2260 [1, 2, None, 3] 2261 2262 This function is analogous to :func:`str.strip`. 2263 2264 """ 2265 return rstrip(lstrip(iterable, pred), pred) 2266 2267 2268class islice_extended: 2269 """An extension of :func:`itertools.islice` that supports negative values 2270 for *stop*, *start*, and *step*. 2271 2272 >>> iterable = iter('abcdefgh') 2273 >>> list(islice_extended(iterable, -4, -1)) 2274 ['e', 'f', 'g'] 2275 2276 Slices with negative values require some caching of *iterable*, but this 2277 function takes care to minimize the amount of memory required. 2278 2279 For example, you can use a negative step with an infinite iterator: 2280 2281 >>> from itertools import count 2282 >>> list(islice_extended(count(), 110, 99, -2)) 2283 [110, 108, 106, 104, 102, 100] 2284 2285 You can also use slice notation directly: 2286 2287 >>> iterable = map(str, count()) 2288 >>> it = islice_extended(iterable)[10:20:2] 2289 >>> list(it) 2290 ['10', '12', '14', '16', '18'] 2291 2292 """ 2293 2294 def __init__(self, iterable, *args): 2295 it = iter(iterable) 2296 if args: 2297 self._iterable = _islice_helper(it, slice(*args)) 2298 else: 2299 self._iterable = it 2300 2301 def __iter__(self): 2302 return self 2303 2304 def __next__(self): 2305 return next(self._iterable) 2306 2307 def __getitem__(self, key): 2308 if isinstance(key, slice): 2309 return islice_extended(_islice_helper(self._iterable, key)) 2310 2311 raise TypeError('islice_extended.__getitem__ argument must be a slice') 2312 2313 2314def _islice_helper(it, s): 2315 start = s.start 2316 stop = s.stop 2317 if s.step == 0: 2318 raise ValueError('step argument must be a non-zero integer or None.') 2319 step = s.step or 1 2320 2321 if step > 0: 2322 start = 0 if (start is None) else start 2323 2324 if start < 0: 2325 # Consume all but the last -start items 2326 cache = deque(enumerate(it, 1), maxlen=-start) 2327 len_iter = cache[-1][0] if cache else 0 2328 2329 # Adjust start to be positive 2330 i = max(len_iter + start, 0) 2331 2332 # Adjust stop to be positive 2333 if stop is None: 2334 j = len_iter 2335 elif stop >= 0: 2336 j = min(stop, len_iter) 2337 else: 2338 j = max(len_iter + stop, 0) 2339 2340 # Slice the cache 2341 n = j - i 2342 if n <= 0: 2343 return 2344 2345 for index, item in islice(cache, 0, n, step): 2346 yield item 2347 elif (stop is not None) and (stop < 0): 2348 # Advance to the start position 2349 next(islice(it, start, start), None) 2350 2351 # When stop is negative, we have to carry -stop items while 2352 # iterating 2353 cache = deque(islice(it, -stop), maxlen=-stop) 2354 2355 for index, item in enumerate(it): 2356 cached_item = cache.popleft() 2357 if index % step == 0: 2358 yield cached_item 2359 cache.append(item) 2360 else: 2361 # When both start and stop are positive we have the normal case 2362 yield from islice(it, start, stop, step) 2363 else: 2364 start = -1 if (start is None) else start 2365 2366 if (stop is not None) and (stop < 0): 2367 # Consume all but the last items 2368 n = -stop - 1 2369 cache = deque(enumerate(it, 1), maxlen=n) 2370 len_iter = cache[-1][0] if cache else 0 2371 2372 # If start and stop are both negative they are comparable and 2373 # we can just slice. Otherwise we can adjust start to be negative 2374 # and then slice. 2375 if start < 0: 2376 i, j = start, stop 2377 else: 2378 i, j = min(start - len_iter, -1), None 2379 2380 for index, item in list(cache)[i:j:step]: 2381 yield item 2382 else: 2383 # Advance to the stop position 2384 if stop is not None: 2385 m = stop + 1 2386 next(islice(it, m, m), None) 2387 2388 # stop is positive, so if start is negative they are not comparable 2389 # and we need the rest of the items. 2390 if start < 0: 2391 i = start 2392 n = None 2393 # stop is None and start is positive, so we just need items up to 2394 # the start index. 2395 elif stop is None: 2396 i = None 2397 n = start + 1 2398 # Both stop and start are positive, so they are comparable. 2399 else: 2400 i = None 2401 n = start - stop 2402 if n <= 0: 2403 return 2404 2405 cache = list(islice(it, n)) 2406 2407 yield from cache[i::step] 2408 2409 2410def always_reversible(iterable): 2411 """An extension of :func:`reversed` that supports all iterables, not 2412 just those which implement the ``Reversible`` or ``Sequence`` protocols. 2413 2414 >>> print(*always_reversible(x for x in range(3))) 2415 2 1 0 2416 2417 If the iterable is already reversible, this function returns the 2418 result of :func:`reversed()`. If the iterable is not reversible, 2419 this function will cache the remaining items in the iterable and 2420 yield them in reverse order, which may require significant storage. 2421 """ 2422 try: 2423 return reversed(iterable) 2424 except TypeError: 2425 return reversed(list(iterable)) 2426 2427 2428def consecutive_groups(iterable, ordering=lambda x: x): 2429 """Yield groups of consecutive items using :func:`itertools.groupby`. 2430 The *ordering* function determines whether two items are adjacent by 2431 returning their position. 2432 2433 By default, the ordering function is the identity function. This is 2434 suitable for finding runs of numbers: 2435 2436 >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40] 2437 >>> for group in consecutive_groups(iterable): 2438 ... print(list(group)) 2439 [1] 2440 [10, 11, 12] 2441 [20] 2442 [30, 31, 32, 33] 2443 [40] 2444 2445 For finding runs of adjacent letters, try using the :meth:`index` method 2446 of a string of letters: 2447 2448 >>> from string import ascii_lowercase 2449 >>> iterable = 'abcdfgilmnop' 2450 >>> ordering = ascii_lowercase.index 2451 >>> for group in consecutive_groups(iterable, ordering): 2452 ... print(list(group)) 2453 ['a', 'b', 'c', 'd'] 2454 ['f', 'g'] 2455 ['i'] 2456 ['l', 'm', 'n', 'o', 'p'] 2457 2458 Each group of consecutive items is an iterator that shares it source with 2459 *iterable*. When an an output group is advanced, the previous group is 2460 no longer available unless its elements are copied (e.g., into a ``list``). 2461 2462 >>> iterable = [1, 2, 11, 12, 21, 22] 2463 >>> saved_groups = [] 2464 >>> for group in consecutive_groups(iterable): 2465 ... saved_groups.append(list(group)) # Copy group elements 2466 >>> saved_groups 2467 [[1, 2], [11, 12], [21, 22]] 2468 2469 """ 2470 for k, g in groupby( 2471 enumerate(iterable), key=lambda x: x[0] - ordering(x[1]) 2472 ): 2473 yield map(itemgetter(1), g) 2474 2475 2476def difference(iterable, func=sub, *, initial=None): 2477 """This function is the inverse of :func:`itertools.accumulate`. By default 2478 it will compute the first difference of *iterable* using 2479 :func:`operator.sub`: 2480 2481 >>> from itertools import accumulate 2482 >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10 2483 >>> list(difference(iterable)) 2484 [0, 1, 2, 3, 4] 2485 2486 *func* defaults to :func:`operator.sub`, but other functions can be 2487 specified. They will be applied as follows:: 2488 2489 A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ... 2490 2491 For example, to do progressive division: 2492 2493 >>> iterable = [1, 2, 6, 24, 120] 2494 >>> func = lambda x, y: x // y 2495 >>> list(difference(iterable, func)) 2496 [1, 2, 3, 4, 5] 2497 2498 If the *initial* keyword is set, the first element will be skipped when 2499 computing successive differences. 2500 2501 >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10) 2502 >>> list(difference(it, initial=10)) 2503 [1, 2, 3] 2504 2505 """ 2506 a, b = tee(iterable) 2507 try: 2508 first = [next(b)] 2509 except StopIteration: 2510 return iter([]) 2511 2512 if initial is not None: 2513 first = [] 2514 2515 return chain(first, starmap(func, zip(b, a))) 2516 2517 2518class SequenceView(Sequence): 2519 """Return a read-only view of the sequence object *target*. 2520 2521 :class:`SequenceView` objects are analogous to Python's built-in 2522 "dictionary view" types. They provide a dynamic view of a sequence's items, 2523 meaning that when the sequence updates, so does the view. 2524 2525 >>> seq = ['0', '1', '2'] 2526 >>> view = SequenceView(seq) 2527 >>> view 2528 SequenceView(['0', '1', '2']) 2529 >>> seq.append('3') 2530 >>> view 2531 SequenceView(['0', '1', '2', '3']) 2532 2533 Sequence views support indexing, slicing, and length queries. They act 2534 like the underlying sequence, except they don't allow assignment: 2535 2536 >>> view[1] 2537 '1' 2538 >>> view[1:-1] 2539 ['1', '2'] 2540 >>> len(view) 2541 4 2542 2543 Sequence views are useful as an alternative to copying, as they don't 2544 require (much) extra storage. 2545 2546 """ 2547 2548 def __init__(self, target): 2549 if not isinstance(target, Sequence): 2550 raise TypeError 2551 self._target = target 2552 2553 def __getitem__(self, index): 2554 return self._target[index] 2555 2556 def __len__(self): 2557 return len(self._target) 2558 2559 def __repr__(self): 2560 return '{}({})'.format(self.__class__.__name__, repr(self._target)) 2561 2562 2563class seekable: 2564 """Wrap an iterator to allow for seeking backward and forward. This 2565 progressively caches the items in the source iterable so they can be 2566 re-visited. 2567 2568 Call :meth:`seek` with an index to seek to that position in the source 2569 iterable. 2570 2571 To "reset" an iterator, seek to ``0``: 2572 2573 >>> from itertools import count 2574 >>> it = seekable((str(n) for n in count())) 2575 >>> next(it), next(it), next(it) 2576 ('0', '1', '2') 2577 >>> it.seek(0) 2578 >>> next(it), next(it), next(it) 2579 ('0', '1', '2') 2580 >>> next(it) 2581 '3' 2582 2583 You can also seek forward: 2584 2585 >>> it = seekable((str(n) for n in range(20))) 2586 >>> it.seek(10) 2587 >>> next(it) 2588 '10' 2589 >>> it.seek(20) # Seeking past the end of the source isn't a problem 2590 >>> list(it) 2591 [] 2592 >>> it.seek(0) # Resetting works even after hitting the end 2593 >>> next(it), next(it), next(it) 2594 ('0', '1', '2') 2595 2596 Call :meth:`peek` to look ahead one item without advancing the iterator: 2597 2598 >>> it = seekable('1234') 2599 >>> it.peek() 2600 '1' 2601 >>> list(it) 2602 ['1', '2', '3', '4'] 2603 >>> it.peek(default='empty') 2604 'empty' 2605 2606 Before the iterator is at its end, calling :func:`bool` on it will return 2607 ``True``. After it will return ``False``: 2608 2609 >>> it = seekable('5678') 2610 >>> bool(it) 2611 True 2612 >>> list(it) 2613 ['5', '6', '7', '8'] 2614 >>> bool(it) 2615 False 2616 2617 You may view the contents of the cache with the :meth:`elements` method. 2618 That returns a :class:`SequenceView`, a view that updates automatically: 2619 2620 >>> it = seekable((str(n) for n in range(10))) 2621 >>> next(it), next(it), next(it) 2622 ('0', '1', '2') 2623 >>> elements = it.elements() 2624 >>> elements 2625 SequenceView(['0', '1', '2']) 2626 >>> next(it) 2627 '3' 2628 >>> elements 2629 SequenceView(['0', '1', '2', '3']) 2630 2631 By default, the cache grows as the source iterable progresses, so beware of 2632 wrapping very large or infinite iterables. Supply *maxlen* to limit the 2633 size of the cache (this of course limits how far back you can seek). 2634 2635 >>> from itertools import count 2636 >>> it = seekable((str(n) for n in count()), maxlen=2) 2637 >>> next(it), next(it), next(it), next(it) 2638 ('0', '1', '2', '3') 2639 >>> list(it.elements()) 2640 ['2', '3'] 2641 >>> it.seek(0) 2642 >>> next(it), next(it), next(it), next(it) 2643 ('2', '3', '4', '5') 2644 >>> next(it) 2645 '6' 2646 2647 """ 2648 2649 def __init__(self, iterable, maxlen=None): 2650 self._source = iter(iterable) 2651 if maxlen is None: 2652 self._cache = [] 2653 else: 2654 self._cache = deque([], maxlen) 2655 self._index = None 2656 2657 def __iter__(self): 2658 return self 2659 2660 def __next__(self): 2661 if self._index is not None: 2662 try: 2663 item = self._cache[self._index] 2664 except IndexError: 2665 self._index = None 2666 else: 2667 self._index += 1 2668 return item 2669 2670 item = next(self._source) 2671 self._cache.append(item) 2672 return item 2673 2674 def __bool__(self): 2675 try: 2676 self.peek() 2677 except StopIteration: 2678 return False 2679 return True 2680 2681 def peek(self, default=_marker): 2682 try: 2683 peeked = next(self) 2684 except StopIteration: 2685 if default is _marker: 2686 raise 2687 return default 2688 if self._index is None: 2689 self._index = len(self._cache) 2690 self._index -= 1 2691 return peeked 2692 2693 def elements(self): 2694 return SequenceView(self._cache) 2695 2696 def seek(self, index): 2697 self._index = index 2698 remainder = index - len(self._cache) 2699 if remainder > 0: 2700 consume(self, remainder) 2701 2702 2703class run_length: 2704 """ 2705 :func:`run_length.encode` compresses an iterable with run-length encoding. 2706 It yields groups of repeated items with the count of how many times they 2707 were repeated: 2708 2709 >>> uncompressed = 'abbcccdddd' 2710 >>> list(run_length.encode(uncompressed)) 2711 [('a', 1), ('b', 2), ('c', 3), ('d', 4)] 2712 2713 :func:`run_length.decode` decompresses an iterable that was previously 2714 compressed with run-length encoding. It yields the items of the 2715 decompressed iterable: 2716 2717 >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] 2718 >>> list(run_length.decode(compressed)) 2719 ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd'] 2720 2721 """ 2722 2723 @staticmethod 2724 def encode(iterable): 2725 return ((k, ilen(g)) for k, g in groupby(iterable)) 2726 2727 @staticmethod 2728 def decode(iterable): 2729 return chain.from_iterable(repeat(k, n) for k, n in iterable) 2730 2731 2732def exactly_n(iterable, n, predicate=bool): 2733 """Return ``True`` if exactly ``n`` items in the iterable are ``True`` 2734 according to the *predicate* function. 2735 2736 >>> exactly_n([True, True, False], 2) 2737 True 2738 >>> exactly_n([True, True, False], 1) 2739 False 2740 >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3) 2741 True 2742 2743 The iterable will be advanced until ``n + 1`` truthy items are encountered, 2744 so avoid calling it on infinite iterables. 2745 2746 """ 2747 return len(take(n + 1, filter(predicate, iterable))) == n 2748 2749 2750def circular_shifts(iterable): 2751 """Return a list of circular shifts of *iterable*. 2752 2753 >>> circular_shifts(range(4)) 2754 [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] 2755 """ 2756 lst = list(iterable) 2757 return take(len(lst), windowed(cycle(lst), len(lst))) 2758 2759 2760def make_decorator(wrapping_func, result_index=0): 2761 """Return a decorator version of *wrapping_func*, which is a function that 2762 modifies an iterable. *result_index* is the position in that function's 2763 signature where the iterable goes. 2764 2765 This lets you use itertools on the "production end," i.e. at function 2766 definition. This can augment what the function returns without changing the 2767 function's code. 2768 2769 For example, to produce a decorator version of :func:`chunked`: 2770 2771 >>> from more_itertools import chunked 2772 >>> chunker = make_decorator(chunked, result_index=0) 2773 >>> @chunker(3) 2774 ... def iter_range(n): 2775 ... return iter(range(n)) 2776 ... 2777 >>> list(iter_range(9)) 2778 [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 2779 2780 To only allow truthy items to be returned: 2781 2782 >>> truth_serum = make_decorator(filter, result_index=1) 2783 >>> @truth_serum(bool) 2784 ... def boolean_test(): 2785 ... return [0, 1, '', ' ', False, True] 2786 ... 2787 >>> list(boolean_test()) 2788 [1, ' ', True] 2789 2790 The :func:`peekable` and :func:`seekable` wrappers make for practical 2791 decorators: 2792 2793 >>> from more_itertools import peekable 2794 >>> peekable_function = make_decorator(peekable) 2795 >>> @peekable_function() 2796 ... def str_range(*args): 2797 ... return (str(x) for x in range(*args)) 2798 ... 2799 >>> it = str_range(1, 20, 2) 2800 >>> next(it), next(it), next(it) 2801 ('1', '3', '5') 2802 >>> it.peek() 2803 '7' 2804 >>> next(it) 2805 '7' 2806 2807 """ 2808 # See https://sites.google.com/site/bbayles/index/decorator_factory for 2809 # notes on how this works. 2810 def decorator(*wrapping_args, **wrapping_kwargs): 2811 def outer_wrapper(f): 2812 def inner_wrapper(*args, **kwargs): 2813 result = f(*args, **kwargs) 2814 wrapping_args_ = list(wrapping_args) 2815 wrapping_args_.insert(result_index, result) 2816 return wrapping_func(*wrapping_args_, **wrapping_kwargs) 2817 2818 return inner_wrapper 2819 2820 return outer_wrapper 2821 2822 return decorator 2823 2824 2825def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None): 2826 """Return a dictionary that maps the items in *iterable* to categories 2827 defined by *keyfunc*, transforms them with *valuefunc*, and 2828 then summarizes them by category with *reducefunc*. 2829 2830 *valuefunc* defaults to the identity function if it is unspecified. 2831 If *reducefunc* is unspecified, no summarization takes place: 2832 2833 >>> keyfunc = lambda x: x.upper() 2834 >>> result = map_reduce('abbccc', keyfunc) 2835 >>> sorted(result.items()) 2836 [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])] 2837 2838 Specifying *valuefunc* transforms the categorized items: 2839 2840 >>> keyfunc = lambda x: x.upper() 2841 >>> valuefunc = lambda x: 1 2842 >>> result = map_reduce('abbccc', keyfunc, valuefunc) 2843 >>> sorted(result.items()) 2844 [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])] 2845 2846 Specifying *reducefunc* summarizes the categorized items: 2847 2848 >>> keyfunc = lambda x: x.upper() 2849 >>> valuefunc = lambda x: 1 2850 >>> reducefunc = sum 2851 >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc) 2852 >>> sorted(result.items()) 2853 [('A', 1), ('B', 2), ('C', 3)] 2854 2855 You may want to filter the input iterable before applying the map/reduce 2856 procedure: 2857 2858 >>> all_items = range(30) 2859 >>> items = [x for x in all_items if 10 <= x <= 20] # Filter 2860 >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1 2861 >>> categories = map_reduce(items, keyfunc=keyfunc) 2862 >>> sorted(categories.items()) 2863 [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])] 2864 >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum) 2865 >>> sorted(summaries.items()) 2866 [(0, 90), (1, 75)] 2867 2868 Note that all items in the iterable are gathered into a list before the 2869 summarization step, which may require significant storage. 2870 2871 The returned object is a :obj:`collections.defaultdict` with the 2872 ``default_factory`` set to ``None``, such that it behaves like a normal 2873 dictionary. 2874 2875 """ 2876 valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc 2877 2878 ret = defaultdict(list) 2879 for item in iterable: 2880 key = keyfunc(item) 2881 value = valuefunc(item) 2882 ret[key].append(value) 2883 2884 if reducefunc is not None: 2885 for key, value_list in ret.items(): 2886 ret[key] = reducefunc(value_list) 2887 2888 ret.default_factory = None 2889 return ret 2890 2891 2892def rlocate(iterable, pred=bool, window_size=None): 2893 """Yield the index of each item in *iterable* for which *pred* returns 2894 ``True``, starting from the right and moving left. 2895 2896 *pred* defaults to :func:`bool`, which will select truthy items: 2897 2898 >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4 2899 [4, 2, 1] 2900 2901 Set *pred* to a custom function to, e.g., find the indexes for a particular 2902 item: 2903 2904 >>> iterable = iter('abcb') 2905 >>> pred = lambda x: x == 'b' 2906 >>> list(rlocate(iterable, pred)) 2907 [3, 1] 2908 2909 If *window_size* is given, then the *pred* function will be called with 2910 that many items. This enables searching for sub-sequences: 2911 2912 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] 2913 >>> pred = lambda *args: args == (1, 2, 3) 2914 >>> list(rlocate(iterable, pred=pred, window_size=3)) 2915 [9, 5, 1] 2916 2917 Beware, this function won't return anything for infinite iterables. 2918 If *iterable* is reversible, ``rlocate`` will reverse it and search from 2919 the right. Otherwise, it will search from the left and return the results 2920 in reverse order. 2921 2922 See :func:`locate` to for other example applications. 2923 2924 """ 2925 if window_size is None: 2926 try: 2927 len_iter = len(iterable) 2928 return (len_iter - i - 1 for i in locate(reversed(iterable), pred)) 2929 except TypeError: 2930 pass 2931 2932 return reversed(list(locate(iterable, pred, window_size))) 2933 2934 2935def replace(iterable, pred, substitutes, count=None, window_size=1): 2936 """Yield the items from *iterable*, replacing the items for which *pred* 2937 returns ``True`` with the items from the iterable *substitutes*. 2938 2939 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1] 2940 >>> pred = lambda x: x == 0 2941 >>> substitutes = (2, 3) 2942 >>> list(replace(iterable, pred, substitutes)) 2943 [1, 1, 2, 3, 1, 1, 2, 3, 1, 1] 2944 2945 If *count* is given, the number of replacements will be limited: 2946 2947 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0] 2948 >>> pred = lambda x: x == 0 2949 >>> substitutes = [None] 2950 >>> list(replace(iterable, pred, substitutes, count=2)) 2951 [1, 1, None, 1, 1, None, 1, 1, 0] 2952 2953 Use *window_size* to control the number of items passed as arguments to 2954 *pred*. This allows for locating and replacing subsequences. 2955 2956 >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5] 2957 >>> window_size = 3 2958 >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred 2959 >>> substitutes = [3, 4] # Splice in these items 2960 >>> list(replace(iterable, pred, substitutes, window_size=window_size)) 2961 [3, 4, 5, 3, 4, 5] 2962 2963 """ 2964 if window_size < 1: 2965 raise ValueError('window_size must be at least 1') 2966 2967 # Save the substitutes iterable, since it's used more than once 2968 substitutes = tuple(substitutes) 2969 2970 # Add padding such that the number of windows matches the length of the 2971 # iterable 2972 it = chain(iterable, [_marker] * (window_size - 1)) 2973 windows = windowed(it, window_size) 2974 2975 n = 0 2976 for w in windows: 2977 # If the current window matches our predicate (and we haven't hit 2978 # our maximum number of replacements), splice in the substitutes 2979 # and then consume the following windows that overlap with this one. 2980 # For example, if the iterable is (0, 1, 2, 3, 4...) 2981 # and the window size is 2, we have (0, 1), (1, 2), (2, 3)... 2982 # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2) 2983 if pred(*w): 2984 if (count is None) or (n < count): 2985 n += 1 2986 yield from substitutes 2987 consume(windows, window_size - 1) 2988 continue 2989 2990 # If there was no match (or we've reached the replacement limit), 2991 # yield the first item from the window. 2992 if w and (w[0] is not _marker): 2993 yield w[0] 2994 2995 2996def partitions(iterable): 2997 """Yield all possible order-preserving partitions of *iterable*. 2998 2999 >>> iterable = 'abc' 3000 >>> for part in partitions(iterable): 3001 ... print([''.join(p) for p in part]) 3002 ['abc'] 3003 ['a', 'bc'] 3004 ['ab', 'c'] 3005 ['a', 'b', 'c'] 3006 3007 This is unrelated to :func:`partition`. 3008 3009 """ 3010 sequence = list(iterable) 3011 n = len(sequence) 3012 for i in powerset(range(1, n)): 3013 yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))] 3014 3015 3016def set_partitions(iterable, k=None): 3017 """ 3018 Yield the set partitions of *iterable* into *k* parts. Set partitions are 3019 not order-preserving. 3020 3021 >>> iterable = 'abc' 3022 >>> for part in set_partitions(iterable, 2): 3023 ... print([''.join(p) for p in part]) 3024 ['a', 'bc'] 3025 ['ab', 'c'] 3026 ['b', 'ac'] 3027 3028 3029 If *k* is not given, every set partition is generated. 3030 3031 >>> iterable = 'abc' 3032 >>> for part in set_partitions(iterable): 3033 ... print([''.join(p) for p in part]) 3034 ['abc'] 3035 ['a', 'bc'] 3036 ['ab', 'c'] 3037 ['b', 'ac'] 3038 ['a', 'b', 'c'] 3039 3040 """ 3041 L = list(iterable) 3042 n = len(L) 3043 if k is not None: 3044 if k < 1: 3045 raise ValueError( 3046 "Can't partition in a negative or zero number of groups" 3047 ) 3048 elif k > n: 3049 return 3050 3051 def set_partitions_helper(L, k): 3052 n = len(L) 3053 if k == 1: 3054 yield [L] 3055 elif n == k: 3056 yield [[s] for s in L] 3057 else: 3058 e, *M = L 3059 for p in set_partitions_helper(M, k - 1): 3060 yield [[e], *p] 3061 for p in set_partitions_helper(M, k): 3062 for i in range(len(p)): 3063 yield p[:i] + [[e] + p[i]] + p[i + 1 :] 3064 3065 if k is None: 3066 for k in range(1, n + 1): 3067 yield from set_partitions_helper(L, k) 3068 else: 3069 yield from set_partitions_helper(L, k) 3070 3071 3072class time_limited: 3073 """ 3074 Yield items from *iterable* until *limit_seconds* have passed. 3075 If the time limit expires before all items have been yielded, the 3076 ``timed_out`` parameter will be set to ``True``. 3077 3078 >>> from time import sleep 3079 >>> def generator(): 3080 ... yield 1 3081 ... yield 2 3082 ... sleep(0.2) 3083 ... yield 3 3084 >>> iterable = time_limited(0.1, generator()) 3085 >>> list(iterable) 3086 [1, 2] 3087 >>> iterable.timed_out 3088 True 3089 3090 Note that the time is checked before each item is yielded, and iteration 3091 stops if the time elapsed is greater than *limit_seconds*. If your time 3092 limit is 1 second, but it takes 2 seconds to generate the first item from 3093 the iterable, the function will run for 2 seconds and not yield anything. 3094 3095 """ 3096 3097 def __init__(self, limit_seconds, iterable): 3098 if limit_seconds < 0: 3099 raise ValueError('limit_seconds must be positive') 3100 self.limit_seconds = limit_seconds 3101 self._iterable = iter(iterable) 3102 self._start_time = monotonic() 3103 self.timed_out = False 3104 3105 def __iter__(self): 3106 return self 3107 3108 def __next__(self): 3109 item = next(self._iterable) 3110 if monotonic() - self._start_time > self.limit_seconds: 3111 self.timed_out = True 3112 raise StopIteration 3113 3114 return item 3115 3116 3117def only(iterable, default=None, too_long=None): 3118 """If *iterable* has only one item, return it. 3119 If it has zero items, return *default*. 3120 If it has more than one item, raise the exception given by *too_long*, 3121 which is ``ValueError`` by default. 3122 3123 >>> only([], default='missing') 3124 'missing' 3125 >>> only([1]) 3126 1 3127 >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL 3128 Traceback (most recent call last): 3129 ... 3130 ValueError: Expected exactly one item in iterable, but got 1, 2, 3131 and perhaps more.' 3132 >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL 3133 Traceback (most recent call last): 3134 ... 3135 TypeError 3136 3137 Note that :func:`only` attempts to advance *iterable* twice to ensure there 3138 is only one item. See :func:`spy` or :func:`peekable` to check 3139 iterable contents less destructively. 3140 """ 3141 it = iter(iterable) 3142 first_value = next(it, default) 3143 3144 try: 3145 second_value = next(it) 3146 except StopIteration: 3147 pass 3148 else: 3149 msg = ( 3150 'Expected exactly one item in iterable, but got {!r}, {!r}, ' 3151 'and perhaps more.'.format(first_value, second_value) 3152 ) 3153 raise too_long or ValueError(msg) 3154 3155 return first_value 3156 3157 3158def ichunked(iterable, n): 3159 """Break *iterable* into sub-iterables with *n* elements each. 3160 :func:`ichunked` is like :func:`chunked`, but it yields iterables 3161 instead of lists. 3162 3163 If the sub-iterables are read in order, the elements of *iterable* 3164 won't be stored in memory. 3165 If they are read out of order, :func:`itertools.tee` is used to cache 3166 elements as necessary. 3167 3168 >>> from itertools import count 3169 >>> all_chunks = ichunked(count(), 4) 3170 >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks) 3171 >>> list(c_2) # c_1's elements have been cached; c_3's haven't been 3172 [4, 5, 6, 7] 3173 >>> list(c_1) 3174 [0, 1, 2, 3] 3175 >>> list(c_3) 3176 [8, 9, 10, 11] 3177 3178 """ 3179 source = iter(iterable) 3180 3181 while True: 3182 # Check to see whether we're at the end of the source iterable 3183 item = next(source, _marker) 3184 if item is _marker: 3185 return 3186 3187 # Clone the source and yield an n-length slice 3188 source, it = tee(chain([item], source)) 3189 yield islice(it, n) 3190 3191 # Advance the source iterable 3192 consume(source, n) 3193 3194 3195def distinct_combinations(iterable, r): 3196 """Yield the distinct combinations of *r* items taken from *iterable*. 3197 3198 >>> list(distinct_combinations([0, 0, 1], 2)) 3199 [(0, 0), (0, 1)] 3200 3201 Equivalent to ``set(combinations(iterable))``, except duplicates are not 3202 generated and thrown away. For larger input sequences this is much more 3203 efficient. 3204 3205 """ 3206 if r < 0: 3207 raise ValueError('r must be non-negative') 3208 elif r == 0: 3209 yield () 3210 return 3211 pool = tuple(iterable) 3212 generators = [unique_everseen(enumerate(pool), key=itemgetter(1))] 3213 current_combo = [None] * r 3214 level = 0 3215 while generators: 3216 try: 3217 cur_idx, p = next(generators[-1]) 3218 except StopIteration: 3219 generators.pop() 3220 level -= 1 3221 continue 3222 current_combo[level] = p 3223 if level + 1 == r: 3224 yield tuple(current_combo) 3225 else: 3226 generators.append( 3227 unique_everseen( 3228 enumerate(pool[cur_idx + 1 :], cur_idx + 1), 3229 key=itemgetter(1), 3230 ) 3231 ) 3232 level += 1 3233 3234 3235def filter_except(validator, iterable, *exceptions): 3236 """Yield the items from *iterable* for which the *validator* function does 3237 not raise one of the specified *exceptions*. 3238 3239 *validator* is called for each item in *iterable*. 3240 It should be a function that accepts one argument and raises an exception 3241 if that item is not valid. 3242 3243 >>> iterable = ['1', '2', 'three', '4', None] 3244 >>> list(filter_except(int, iterable, ValueError, TypeError)) 3245 ['1', '2', '4'] 3246 3247 If an exception other than one given by *exceptions* is raised by 3248 *validator*, it is raised like normal. 3249 """ 3250 for item in iterable: 3251 try: 3252 validator(item) 3253 except exceptions: 3254 pass 3255 else: 3256 yield item 3257 3258 3259def map_except(function, iterable, *exceptions): 3260 """Transform each item from *iterable* with *function* and yield the 3261 result, unless *function* raises one of the specified *exceptions*. 3262 3263 *function* is called to transform each item in *iterable*. 3264 It should be a accept one argument. 3265 3266 >>> iterable = ['1', '2', 'three', '4', None] 3267 >>> list(map_except(int, iterable, ValueError, TypeError)) 3268 [1, 2, 4] 3269 3270 If an exception other than one given by *exceptions* is raised by 3271 *function*, it is raised like normal. 3272 """ 3273 for item in iterable: 3274 try: 3275 yield function(item) 3276 except exceptions: 3277 pass 3278 3279 3280def _sample_unweighted(iterable, k): 3281 # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li: 3282 # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))". 3283 3284 # Fill up the reservoir (collection of samples) with the first `k` samples 3285 reservoir = take(k, iterable) 3286 3287 # Generate random number that's the largest in a sample of k U(0,1) numbers 3288 # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic 3289 W = exp(log(random()) / k) 3290 3291 # The number of elements to skip before changing the reservoir is a random 3292 # number with a geometric distribution. Sample it using random() and logs. 3293 next_index = k + floor(log(random()) / log(1 - W)) 3294 3295 for index, element in enumerate(iterable, k): 3296 3297 if index == next_index: 3298 reservoir[randrange(k)] = element 3299 # The new W is the largest in a sample of k U(0, `old_W`) numbers 3300 W *= exp(log(random()) / k) 3301 next_index += floor(log(random()) / log(1 - W)) + 1 3302 3303 return reservoir 3304 3305 3306def _sample_weighted(iterable, k, weights): 3307 # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. : 3308 # "Weighted random sampling with a reservoir". 3309 3310 # Log-transform for numerical stability for weights that are small/large 3311 weight_keys = (log(random()) / weight for weight in weights) 3312 3313 # Fill up the reservoir (collection of samples) with the first `k` 3314 # weight-keys and elements, then heapify the list. 3315 reservoir = take(k, zip(weight_keys, iterable)) 3316 heapify(reservoir) 3317 3318 # The number of jumps before changing the reservoir is a random variable 3319 # with an exponential distribution. Sample it using random() and logs. 3320 smallest_weight_key, _ = reservoir[0] 3321 weights_to_skip = log(random()) / smallest_weight_key 3322 3323 for weight, element in zip(weights, iterable): 3324 if weight >= weights_to_skip: 3325 # The notation here is consistent with the paper, but we store 3326 # the weight-keys in log-space for better numerical stability. 3327 smallest_weight_key, _ = reservoir[0] 3328 t_w = exp(weight * smallest_weight_key) 3329 r_2 = uniform(t_w, 1) # generate U(t_w, 1) 3330 weight_key = log(r_2) / weight 3331 heapreplace(reservoir, (weight_key, element)) 3332 smallest_weight_key, _ = reservoir[0] 3333 weights_to_skip = log(random()) / smallest_weight_key 3334 else: 3335 weights_to_skip -= weight 3336 3337 # Equivalent to [element for weight_key, element in sorted(reservoir)] 3338 return [heappop(reservoir)[1] for _ in range(k)] 3339 3340 3341def sample(iterable, k, weights=None): 3342 """Return a *k*-length list of elements chosen (without replacement) 3343 from the *iterable*. Like :func:`random.sample`, but works on iterables 3344 of unknown length. 3345 3346 >>> iterable = range(100) 3347 >>> sample(iterable, 5) # doctest: +SKIP 3348 [81, 60, 96, 16, 4] 3349 3350 An iterable with *weights* may also be given: 3351 3352 >>> iterable = range(100) 3353 >>> weights = (i * i + 1 for i in range(100)) 3354 >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP 3355 [79, 67, 74, 66, 78] 3356 3357 The algorithm can also be used to generate weighted random permutations. 3358 The relative weight of each item determines the probability that it 3359 appears late in the permutation. 3360 3361 >>> data = "abcdefgh" 3362 >>> weights = range(1, len(data) + 1) 3363 >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP 3364 ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f'] 3365 """ 3366 if k == 0: 3367 return [] 3368 3369 iterable = iter(iterable) 3370 if weights is None: 3371 return _sample_unweighted(iterable, k) 3372 else: 3373 weights = iter(weights) 3374 return _sample_weighted(iterable, k, weights) 3375 3376 3377def is_sorted(iterable, key=None, reverse=False): 3378 """Returns ``True`` if the items of iterable are in sorted order, and 3379 ``False`` otherwise. *key* and *reverse* have the same meaning that they do 3380 in the built-in :func:`sorted` function. 3381 3382 >>> is_sorted(['1', '2', '3', '4', '5'], key=int) 3383 True 3384 >>> is_sorted([5, 4, 3, 1, 2], reverse=True) 3385 False 3386 3387 The function returns ``False`` after encountering the first out-of-order 3388 item. If there are no out-of-order items, the iterable is exhausted. 3389 """ 3390 3391 compare = lt if reverse else gt 3392 it = iterable if (key is None) else map(key, iterable) 3393 return not any(starmap(compare, pairwise(it))) 3394 3395 3396class AbortThread(BaseException): 3397 pass 3398 3399 3400class callback_iter: 3401 """Convert a function that uses callbacks to an iterator. 3402 3403 Let *func* be a function that takes a `callback` keyword argument. 3404 For example: 3405 3406 >>> def func(callback=None): 3407 ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]: 3408 ... if callback: 3409 ... callback(i, c) 3410 ... return 4 3411 3412 3413 Use ``with callback_iter(func)`` to get an iterator over the parameters 3414 that are delivered to the callback. 3415 3416 >>> with callback_iter(func) as it: 3417 ... for args, kwargs in it: 3418 ... print(args) 3419 (1, 'a') 3420 (2, 'b') 3421 (3, 'c') 3422 3423 The function will be called in a background thread. The ``done`` property 3424 indicates whether it has completed execution. 3425 3426 >>> it.done 3427 True 3428 3429 If it completes successfully, its return value will be available 3430 in the ``result`` property. 3431 3432 >>> it.result 3433 4 3434 3435 Notes: 3436 3437 * If the function uses some keyword argument besides ``callback``, supply 3438 *callback_kwd*. 3439 * If it finished executing, but raised an exception, accessing the 3440 ``result`` property will raise the same exception. 3441 * If it hasn't finished executing, accessing the ``result`` 3442 property from within the ``with`` block will raise ``RuntimeError``. 3443 * If it hasn't finished executing, accessing the ``result`` property from 3444 outside the ``with`` block will raise a 3445 ``more_itertools.AbortThread`` exception. 3446 * Provide *wait_seconds* to adjust how frequently the it is polled for 3447 output. 3448 3449 """ 3450 3451 def __init__(self, func, callback_kwd='callback', wait_seconds=0.1): 3452 self._func = func 3453 self._callback_kwd = callback_kwd 3454 self._aborted = False 3455 self._future = None 3456 self._wait_seconds = wait_seconds 3457 self._executor = ThreadPoolExecutor(max_workers=1) 3458 self._iterator = self._reader() 3459 3460 def __enter__(self): 3461 return self 3462 3463 def __exit__(self, exc_type, exc_value, traceback): 3464 self._aborted = True 3465 self._executor.shutdown() 3466 3467 def __iter__(self): 3468 return self 3469 3470 def __next__(self): 3471 return next(self._iterator) 3472 3473 @property 3474 def done(self): 3475 if self._future is None: 3476 return False 3477 return self._future.done() 3478 3479 @property 3480 def result(self): 3481 if not self.done: 3482 raise RuntimeError('Function has not yet completed') 3483 3484 return self._future.result() 3485 3486 def _reader(self): 3487 q = Queue() 3488 3489 def callback(*args, **kwargs): 3490 if self._aborted: 3491 raise AbortThread('canceled by user') 3492 3493 q.put((args, kwargs)) 3494 3495 self._future = self._executor.submit( 3496 self._func, **{self._callback_kwd: callback} 3497 ) 3498 3499 while True: 3500 try: 3501 item = q.get(timeout=self._wait_seconds) 3502 except Empty: 3503 pass 3504 else: 3505 q.task_done() 3506 yield item 3507 3508 if self._future.done(): 3509 break 3510 3511 remaining = [] 3512 while True: 3513 try: 3514 item = q.get_nowait() 3515 except Empty: 3516 break 3517 else: 3518 q.task_done() 3519 remaining.append(item) 3520 q.join() 3521 yield from remaining 3522 3523 3524def windowed_complete(iterable, n): 3525 """ 3526 Yield ``(beginning, middle, end)`` tuples, where: 3527 3528 * Each ``middle`` has *n* items from *iterable* 3529 * Each ``beginning`` has the items before the ones in ``middle`` 3530 * Each ``end`` has the items after the ones in ``middle`` 3531 3532 >>> iterable = range(7) 3533 >>> n = 3 3534 >>> for beginning, middle, end in windowed_complete(iterable, n): 3535 ... print(beginning, middle, end) 3536 () (0, 1, 2) (3, 4, 5, 6) 3537 (0,) (1, 2, 3) (4, 5, 6) 3538 (0, 1) (2, 3, 4) (5, 6) 3539 (0, 1, 2) (3, 4, 5) (6,) 3540 (0, 1, 2, 3) (4, 5, 6) () 3541 3542 Note that *n* must be at least 0 and most equal to the length of 3543 *iterable*. 3544 3545 This function will exhaust the iterable and may require significant 3546 storage. 3547 """ 3548 if n < 0: 3549 raise ValueError('n must be >= 0') 3550 3551 seq = tuple(iterable) 3552 size = len(seq) 3553 3554 if n > size: 3555 raise ValueError('n must be <= len(seq)') 3556 3557 for i in range(size - n + 1): 3558 beginning = seq[:i] 3559 middle = seq[i : i + n] 3560 end = seq[i + n :] 3561 yield beginning, middle, end 3562 3563 3564def all_unique(iterable, key=None): 3565 """ 3566 Returns ``True`` if all the elements of *iterable* are unique (no two 3567 elements are equal). 3568 3569 >>> all_unique('ABCB') 3570 False 3571 3572 If a *key* function is specified, it will be used to make comparisons. 3573 3574 >>> all_unique('ABCb') 3575 True 3576 >>> all_unique('ABCb', str.lower) 3577 False 3578 3579 The function returns as soon as the first non-unique element is 3580 encountered. Iterables with a mix of hashable and unhashable items can 3581 be used, but the function will be slower for unhashable items. 3582 """ 3583 seenset = set() 3584 seenset_add = seenset.add 3585 seenlist = [] 3586 seenlist_add = seenlist.append 3587 for element in map(key, iterable) if key else iterable: 3588 try: 3589 if element in seenset: 3590 return False 3591 seenset_add(element) 3592 except TypeError: 3593 if element in seenlist: 3594 return False 3595 seenlist_add(element) 3596 return True 3597 3598 3599def nth_product(index, *args): 3600 """Equivalent to ``list(product(*args))[index]``. 3601 3602 The products of *args* can be ordered lexicographically. 3603 :func:`nth_product` computes the product at sort position *index* without 3604 computing the previous products. 3605 3606 >>> nth_product(8, range(2), range(2), range(2), range(2)) 3607 (1, 0, 0, 0) 3608 3609 ``IndexError`` will be raised if the given *index* is invalid. 3610 """ 3611 pools = list(map(tuple, reversed(args))) 3612 ns = list(map(len, pools)) 3613 3614 c = reduce(mul, ns) 3615 3616 if index < 0: 3617 index += c 3618 3619 if not 0 <= index < c: 3620 raise IndexError 3621 3622 result = [] 3623 for pool, n in zip(pools, ns): 3624 result.append(pool[index % n]) 3625 index //= n 3626 3627 return tuple(reversed(result)) 3628 3629 3630def nth_permutation(iterable, r, index): 3631 """Equivalent to ``list(permutations(iterable, r))[index]``` 3632 3633 The subsequences of *iterable* that are of length *r* where order is 3634 important can be ordered lexicographically. :func:`nth_permutation` 3635 computes the subsequence at sort position *index* directly, without 3636 computing the previous subsequences. 3637 3638 >>> nth_permutation('ghijk', 2, 5) 3639 ('h', 'i') 3640 3641 ``ValueError`` will be raised If *r* is negative or greater than the length 3642 of *iterable*. 3643 ``IndexError`` will be raised if the given *index* is invalid. 3644 """ 3645 pool = list(iterable) 3646 n = len(pool) 3647 3648 if r is None or r == n: 3649 r, c = n, factorial(n) 3650 elif not 0 <= r < n: 3651 raise ValueError 3652 else: 3653 c = factorial(n) // factorial(n - r) 3654 3655 if index < 0: 3656 index += c 3657 3658 if not 0 <= index < c: 3659 raise IndexError 3660 3661 if c == 0: 3662 return tuple() 3663 3664 result = [0] * r 3665 q = index * factorial(n) // c if r < n else index 3666 for d in range(1, n + 1): 3667 q, i = divmod(q, d) 3668 if 0 <= n - d < r: 3669 result[n - d] = i 3670 if q == 0: 3671 break 3672 3673 return tuple(map(pool.pop, result)) 3674 3675 3676def value_chain(*args): 3677 """Yield all arguments passed to the function in the same order in which 3678 they were passed. If an argument itself is iterable then iterate over its 3679 values. 3680 3681 >>> list(value_chain(1, 2, 3, [4, 5, 6])) 3682 [1, 2, 3, 4, 5, 6] 3683 3684 Binary and text strings are not considered iterable and are emitted 3685 as-is: 3686 3687 >>> list(value_chain('12', '34', ['56', '78'])) 3688 ['12', '34', '56', '78'] 3689 3690 3691 Multiple levels of nesting are not flattened. 3692 3693 """ 3694 for value in args: 3695 if isinstance(value, (str, bytes)): 3696 yield value 3697 continue 3698 try: 3699 yield from value 3700 except TypeError: 3701 yield value 3702 3703 3704def product_index(element, *args): 3705 """Equivalent to ``list(product(*args)).index(element)`` 3706 3707 The products of *args* can be ordered lexicographically. 3708 :func:`product_index` computes the first index of *element* without 3709 computing the previous products. 3710 3711 >>> product_index([8, 2], range(10), range(5)) 3712 42 3713 3714 ``ValueError`` will be raised if the given *element* isn't in the product 3715 of *args*. 3716 """ 3717 index = 0 3718 3719 for x, pool in zip_longest(element, args, fillvalue=_marker): 3720 if x is _marker or pool is _marker: 3721 raise ValueError('element is not a product of args') 3722 3723 pool = tuple(pool) 3724 index = index * len(pool) + pool.index(x) 3725 3726 return index 3727 3728 3729def combination_index(element, iterable): 3730 """Equivalent to ``list(combinations(iterable, r)).index(element)`` 3731 3732 The subsequences of *iterable* that are of length *r* can be ordered 3733 lexicographically. :func:`combination_index` computes the index of the 3734 first *element*, without computing the previous combinations. 3735 3736 >>> combination_index('adf', 'abcdefg') 3737 10 3738 3739 ``ValueError`` will be raised if the given *element* isn't one of the 3740 combinations of *iterable*. 3741 """ 3742 element = enumerate(element) 3743 k, y = next(element, (None, None)) 3744 if k is None: 3745 return 0 3746 3747 indexes = [] 3748 pool = enumerate(iterable) 3749 for n, x in pool: 3750 if x == y: 3751 indexes.append(n) 3752 tmp, y = next(element, (None, None)) 3753 if tmp is None: 3754 break 3755 else: 3756 k = tmp 3757 else: 3758 raise ValueError('element is not a combination of iterable') 3759 3760 n, _ = last(pool, default=(n, None)) 3761 3762 # Python versiosn below 3.8 don't have math.comb 3763 index = 1 3764 for i, j in enumerate(reversed(indexes), start=1): 3765 j = n - j 3766 if i <= j: 3767 index += factorial(j) // (factorial(i) * factorial(j - i)) 3768 3769 return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index 3770 3771 3772def permutation_index(element, iterable): 3773 """Equivalent to ``list(permutations(iterable, r)).index(element)``` 3774 3775 The subsequences of *iterable* that are of length *r* where order is 3776 important can be ordered lexicographically. :func:`permutation_index` 3777 computes the index of the first *element* directly, without computing 3778 the previous permutations. 3779 3780 >>> permutation_index([1, 3, 2], range(5)) 3781 19 3782 3783 ``ValueError`` will be raised if the given *element* isn't one of the 3784 permutations of *iterable*. 3785 """ 3786 index = 0 3787 pool = list(iterable) 3788 for i, x in zip(range(len(pool), -1, -1), element): 3789 r = pool.index(x) 3790 index = index * i + r 3791 del pool[r] 3792 3793 return index 3794 3795 3796class countable: 3797 """Wrap *iterable* and keep a count of how many items have been consumed. 3798 3799 The ``items_seen`` attribute starts at ``0`` and increments as the iterable 3800 is consumed: 3801 3802 >>> iterable = map(str, range(10)) 3803 >>> it = countable(iterable) 3804 >>> it.items_seen 3805 0 3806 >>> next(it), next(it) 3807 ('0', '1') 3808 >>> list(it) 3809 ['2', '3', '4', '5', '6', '7', '8', '9'] 3810 >>> it.items_seen 3811 10 3812 """ 3813 3814 def __init__(self, iterable): 3815 self._it = iter(iterable) 3816 self.items_seen = 0 3817 3818 def __iter__(self): 3819 return self 3820 3821 def __next__(self): 3822 item = next(self._it) 3823 self.items_seen += 1 3824 3825 return item 3826