1# Natural Language Toolkit: Utility functions 2# 3# Copyright (C) 2001-2019 NLTK Project 4# Author: Steven Bird <stevenbird1@gmail.com> 5# URL: <http://nltk.org/> 6# For license information, see LICENSE.TXT 7from __future__ import print_function 8 9import sys 10import inspect 11import locale 12import re 13import types 14import textwrap 15import pydoc 16import bisect 17import os 18 19from itertools import islice, chain, combinations 20from pprint import pprint 21from collections import defaultdict, deque 22from sys import version_info 23 24from six import class_types, string_types, text_type 25from six.moves.urllib.request import ( 26 build_opener, 27 install_opener, 28 getproxies, 29 ProxyHandler, 30 ProxyBasicAuthHandler, 31 ProxyDigestAuthHandler, 32 HTTPPasswordMgrWithDefaultRealm, 33) 34 35from nltk.internals import slice_bounds, raise_unorderable_types 36from nltk.collections import * 37from nltk.compat import python_2_unicode_compatible 38 39 40###################################################################### 41# Short usage message 42###################################################################### 43 44 45def usage(obj, selfname='self'): 46 str(obj) # In case it's lazy, this will load it. 47 48 if not isinstance(obj, class_types): 49 obj = obj.__class__ 50 51 print('%s supports the following operations:' % obj.__name__) 52 for (name, method) in sorted(pydoc.allmethods(obj).items()): 53 if name.startswith('_'): 54 continue 55 if getattr(method, '__deprecated__', False): 56 continue 57 58 if sys.version_info[0] >= 3: 59 getargspec = inspect.getfullargspec 60 else: 61 getargspec = inspect.getargspec 62 args, varargs, varkw, defaults = getargspec(method)[:4] 63 if ( 64 args 65 and args[0] == 'self' 66 and (defaults is None or len(args) > len(defaults)) 67 ): 68 args = args[1:] 69 name = '%s.%s' % (selfname, name) 70 argspec = inspect.formatargspec(args, varargs, varkw, defaults) 71 print( 72 textwrap.fill( 73 '%s%s' % (name, argspec), 74 initial_indent=' - ', 75 subsequent_indent=' ' * (len(name) + 5), 76 ) 77 ) 78 79 80########################################################################## 81# IDLE 82########################################################################## 83 84 85def in_idle(): 86 """ 87 Return True if this function is run within idle. Tkinter 88 programs that are run in idle should never call ``Tk.mainloop``; so 89 this function should be used to gate all calls to ``Tk.mainloop``. 90 91 :warning: This function works by checking ``sys.stdin``. If the 92 user has modified ``sys.stdin``, then it may return incorrect 93 results. 94 :rtype: bool 95 """ 96 import sys 97 98 return sys.stdin.__class__.__name__ in ('PyShell', 'RPCProxy') 99 100 101########################################################################## 102# PRETTY PRINTING 103########################################################################## 104 105 106def pr(data, start=0, end=None): 107 """ 108 Pretty print a sequence of data items 109 110 :param data: the data stream to print 111 :type data: sequence or iter 112 :param start: the start position 113 :type start: int 114 :param end: the end position 115 :type end: int 116 """ 117 pprint(list(islice(data, start, end))) 118 119 120def print_string(s, width=70): 121 """ 122 Pretty print a string, breaking lines on whitespace 123 124 :param s: the string to print, consisting of words and spaces 125 :type s: str 126 :param width: the display width 127 :type width: int 128 """ 129 print('\n'.join(textwrap.wrap(s, width=width))) 130 131 132def tokenwrap(tokens, separator=" ", width=70): 133 """ 134 Pretty print a list of text tokens, breaking lines on whitespace 135 136 :param tokens: the tokens to print 137 :type tokens: list 138 :param separator: the string to use to separate tokens 139 :type separator: str 140 :param width: the display width (default=70) 141 :type width: int 142 """ 143 return '\n'.join(textwrap.wrap(separator.join(tokens), width=width)) 144 145 146########################################################################## 147# Python version 148########################################################################## 149 150 151def py25(): 152 return version_info[0] == 2 and version_info[1] == 5 153 154 155def py26(): 156 return version_info[0] == 2 and version_info[1] == 6 157 158 159def py27(): 160 return version_info[0] == 2 and version_info[1] == 7 161 162 163########################################################################## 164# Indexing 165########################################################################## 166 167 168class Index(defaultdict): 169 def __init__(self, pairs): 170 defaultdict.__init__(self, list) 171 for key, value in pairs: 172 self[key].append(value) 173 174 175###################################################################### 176## Regexp display (thanks to David Mertz) 177###################################################################### 178 179 180def re_show(regexp, string, left="{", right="}"): 181 """ 182 Return a string with markers surrounding the matched substrings. 183 Search str for substrings matching ``regexp`` and wrap the matches 184 with braces. This is convenient for learning about regular expressions. 185 186 :param regexp: The regular expression. 187 :type regexp: str 188 :param string: The string being matched. 189 :type string: str 190 :param left: The left delimiter (printed before the matched substring) 191 :type left: str 192 :param right: The right delimiter (printed after the matched substring) 193 :type right: str 194 :rtype: str 195 """ 196 print(re.compile(regexp, re.M).sub(left + r"\g<0>" + right, string.rstrip())) 197 198 199########################################################################## 200# READ FROM FILE OR STRING 201########################################################################## 202 203# recipe from David Mertz 204def filestring(f): 205 if hasattr(f, 'read'): 206 return f.read() 207 elif isinstance(f, string_types): 208 with open(f, 'r') as infile: 209 return infile.read() 210 else: 211 raise ValueError("Must be called with a filename or file-like object") 212 213 214########################################################################## 215# Breadth-First Search 216########################################################################## 217 218 219def breadth_first(tree, children=iter, maxdepth=-1): 220 """Traverse the nodes of a tree in breadth-first order. 221 (No need to check for cycles.) 222 The first argument should be the tree root; 223 children should be a function taking as argument a tree node 224 and returning an iterator of the node's children. 225 """ 226 queue = deque([(tree, 0)]) 227 228 while queue: 229 node, depth = queue.popleft() 230 yield node 231 232 if depth != maxdepth: 233 try: 234 queue.extend((c, depth + 1) for c in children(node)) 235 except TypeError: 236 pass 237 238 239########################################################################## 240# Guess Character Encoding 241########################################################################## 242 243# adapted from io.py in the docutils extension module (http://docutils.sourceforge.net) 244# http://www.pyzine.com/Issue008/Section_Articles/article_Encodings.html 245 246 247def guess_encoding(data): 248 """ 249 Given a byte string, attempt to decode it. 250 Tries the standard 'UTF8' and 'latin-1' encodings, 251 Plus several gathered from locale information. 252 253 The calling program *must* first call:: 254 255 locale.setlocale(locale.LC_ALL, '') 256 257 If successful it returns ``(decoded_unicode, successful_encoding)``. 258 If unsuccessful it raises a ``UnicodeError``. 259 """ 260 successful_encoding = None 261 # we make 'utf-8' the first encoding 262 encodings = ['utf-8'] 263 # 264 # next we add anything we can learn from the locale 265 try: 266 encodings.append(locale.nl_langinfo(locale.CODESET)) 267 except AttributeError: 268 pass 269 try: 270 encodings.append(locale.getlocale()[1]) 271 except (AttributeError, IndexError): 272 pass 273 try: 274 encodings.append(locale.getdefaultlocale()[1]) 275 except (AttributeError, IndexError): 276 pass 277 # 278 # we try 'latin-1' last 279 encodings.append('latin-1') 280 for enc in encodings: 281 # some of the locale calls 282 # may have returned None 283 if not enc: 284 continue 285 try: 286 decoded = text_type(data, enc) 287 successful_encoding = enc 288 289 except (UnicodeError, LookupError): 290 pass 291 else: 292 break 293 if not successful_encoding: 294 raise UnicodeError( 295 'Unable to decode input data. ' 296 'Tried the following encodings: %s.' 297 % ', '.join([repr(enc) for enc in encodings if enc]) 298 ) 299 else: 300 return (decoded, successful_encoding) 301 302 303########################################################################## 304# Remove repeated elements from a list deterministcally 305########################################################################## 306 307 308def unique_list(xs): 309 seen = set() 310 # not seen.add(x) here acts to make the code shorter without using if statements, seen.add(x) always returns None. 311 return [x for x in xs if x not in seen and not seen.add(x)] 312 313 314########################################################################## 315# Invert a dictionary 316########################################################################## 317 318 319def invert_dict(d): 320 inverted_dict = defaultdict(list) 321 for key in d: 322 if hasattr(d[key], '__iter__'): 323 for term in d[key]: 324 inverted_dict[term].append(key) 325 else: 326 inverted_dict[d[key]] = key 327 return inverted_dict 328 329 330########################################################################## 331# Utilities for directed graphs: transitive closure, and inversion 332# The graph is represented as a dictionary of sets 333########################################################################## 334 335 336def transitive_closure(graph, reflexive=False): 337 """ 338 Calculate the transitive closure of a directed graph, 339 optionally the reflexive transitive closure. 340 341 The algorithm is a slight modification of the "Marking Algorithm" of 342 Ioannidis & Ramakrishnan (1998) "Efficient Transitive Closure Algorithms". 343 344 :param graph: the initial graph, represented as a dictionary of sets 345 :type graph: dict(set) 346 :param reflexive: if set, also make the closure reflexive 347 :type reflexive: bool 348 :rtype: dict(set) 349 """ 350 if reflexive: 351 base_set = lambda k: set([k]) 352 else: 353 base_set = lambda k: set() 354 # The graph U_i in the article: 355 agenda_graph = dict((k, graph[k].copy()) for k in graph) 356 # The graph M_i in the article: 357 closure_graph = dict((k, base_set(k)) for k in graph) 358 for i in graph: 359 agenda = agenda_graph[i] 360 closure = closure_graph[i] 361 while agenda: 362 j = agenda.pop() 363 closure.add(j) 364 closure |= closure_graph.setdefault(j, base_set(j)) 365 agenda |= agenda_graph.get(j, base_set(j)) 366 agenda -= closure 367 return closure_graph 368 369 370def invert_graph(graph): 371 """ 372 Inverts a directed graph. 373 374 :param graph: the graph, represented as a dictionary of sets 375 :type graph: dict(set) 376 :return: the inverted graph 377 :rtype: dict(set) 378 """ 379 inverted = {} 380 for key in graph: 381 for value in graph[key]: 382 inverted.setdefault(value, set()).add(key) 383 return inverted 384 385 386########################################################################## 387# HTML Cleaning 388########################################################################## 389 390 391def clean_html(html): 392 raise NotImplementedError( 393 "To remove HTML markup, use BeautifulSoup's get_text() function" 394 ) 395 396 397def clean_url(url): 398 raise NotImplementedError( 399 "To remove HTML markup, use BeautifulSoup's get_text() function" 400 ) 401 402 403########################################################################## 404# FLATTEN LISTS 405########################################################################## 406 407 408def flatten(*args): 409 """ 410 Flatten a list. 411 412 >>> from nltk.util import flatten 413 >>> flatten(1, 2, ['b', 'a' , ['c', 'd']], 3) 414 [1, 2, 'b', 'a', 'c', 'd', 3] 415 416 :param args: items and lists to be combined into a single list 417 :rtype: list 418 """ 419 420 x = [] 421 for l in args: 422 if not isinstance(l, (list, tuple)): 423 l = [l] 424 for item in l: 425 if isinstance(item, (list, tuple)): 426 x.extend(flatten(item)) 427 else: 428 x.append(item) 429 return x 430 431 432########################################################################## 433# Ngram iteration 434########################################################################## 435 436 437def pad_sequence( 438 sequence, 439 n, 440 pad_left=False, 441 pad_right=False, 442 left_pad_symbol=None, 443 right_pad_symbol=None, 444): 445 """ 446 Returns a padded sequence of items before ngram extraction. 447 448 >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>')) 449 ['<s>', 1, 2, 3, 4, 5, '</s>'] 450 >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>')) 451 ['<s>', 1, 2, 3, 4, 5] 452 >>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>')) 453 [1, 2, 3, 4, 5, '</s>'] 454 455 :param sequence: the source data to be padded 456 :type sequence: sequence or iter 457 :param n: the degree of the ngrams 458 :type n: int 459 :param pad_left: whether the ngrams should be left-padded 460 :type pad_left: bool 461 :param pad_right: whether the ngrams should be right-padded 462 :type pad_right: bool 463 :param left_pad_symbol: the symbol to use for left padding (default is None) 464 :type left_pad_symbol: any 465 :param right_pad_symbol: the symbol to use for right padding (default is None) 466 :type right_pad_symbol: any 467 :rtype: sequence or iter 468 """ 469 sequence = iter(sequence) 470 if pad_left: 471 sequence = chain((left_pad_symbol,) * (n - 1), sequence) 472 if pad_right: 473 sequence = chain(sequence, (right_pad_symbol,) * (n - 1)) 474 return sequence 475 476 477# add a flag to pad the sequence so we get peripheral ngrams? 478 479 480def ngrams( 481 sequence, 482 n, 483 pad_left=False, 484 pad_right=False, 485 left_pad_symbol=None, 486 right_pad_symbol=None, 487): 488 """ 489 Return the ngrams generated from a sequence of items, as an iterator. 490 For example: 491 492 >>> from nltk.util import ngrams 493 >>> list(ngrams([1,2,3,4,5], 3)) 494 [(1, 2, 3), (2, 3, 4), (3, 4, 5)] 495 496 Wrap with list for a list version of this function. Set pad_left 497 or pad_right to true in order to get additional ngrams: 498 499 >>> list(ngrams([1,2,3,4,5], 2, pad_right=True)) 500 [(1, 2), (2, 3), (3, 4), (4, 5), (5, None)] 501 >>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>')) 502 [(1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')] 503 >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>')) 504 [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5)] 505 >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>')) 506 [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')] 507 508 509 :param sequence: the source data to be converted into ngrams 510 :type sequence: sequence or iter 511 :param n: the degree of the ngrams 512 :type n: int 513 :param pad_left: whether the ngrams should be left-padded 514 :type pad_left: bool 515 :param pad_right: whether the ngrams should be right-padded 516 :type pad_right: bool 517 :param left_pad_symbol: the symbol to use for left padding (default is None) 518 :type left_pad_symbol: any 519 :param right_pad_symbol: the symbol to use for right padding (default is None) 520 :type right_pad_symbol: any 521 :rtype: sequence or iter 522 """ 523 sequence = pad_sequence( 524 sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol 525 ) 526 527 history = [] 528 while n > 1: 529 # PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator 530 try: 531 next_item = next(sequence) 532 except StopIteration: 533 # no more data, terminate the generator 534 return 535 history.append(next_item) 536 n -= 1 537 for item in sequence: 538 history.append(item) 539 yield tuple(history) 540 del history[0] 541 542 543def bigrams(sequence, **kwargs): 544 """ 545 Return the bigrams generated from a sequence of items, as an iterator. 546 For example: 547 548 >>> from nltk.util import bigrams 549 >>> list(bigrams([1,2,3,4,5])) 550 [(1, 2), (2, 3), (3, 4), (4, 5)] 551 552 Use bigrams for a list version of this function. 553 554 :param sequence: the source data to be converted into bigrams 555 :type sequence: sequence or iter 556 :rtype: iter(tuple) 557 """ 558 559 for item in ngrams(sequence, 2, **kwargs): 560 yield item 561 562 563def trigrams(sequence, **kwargs): 564 """ 565 Return the trigrams generated from a sequence of items, as an iterator. 566 For example: 567 568 >>> from nltk.util import trigrams 569 >>> list(trigrams([1,2,3,4,5])) 570 [(1, 2, 3), (2, 3, 4), (3, 4, 5)] 571 572 Use trigrams for a list version of this function. 573 574 :param sequence: the source data to be converted into trigrams 575 :type sequence: sequence or iter 576 :rtype: iter(tuple) 577 """ 578 579 for item in ngrams(sequence, 3, **kwargs): 580 yield item 581 582 583def everygrams(sequence, min_len=1, max_len=-1, **kwargs): 584 """ 585 Returns all possible ngrams generated from a sequence of items, as an iterator. 586 587 >>> sent = 'a b c'.split() 588 >>> list(everygrams(sent)) 589 [('a',), ('b',), ('c',), ('a', 'b'), ('b', 'c'), ('a', 'b', 'c')] 590 >>> list(everygrams(sent, max_len=2)) 591 [('a',), ('b',), ('c',), ('a', 'b'), ('b', 'c')] 592 593 :param sequence: the source data to be converted into trigrams 594 :type sequence: sequence or iter 595 :param min_len: minimum length of the ngrams, aka. n-gram order/degree of ngram 596 :type min_len: int 597 :param max_len: maximum length of the ngrams (set to length of sequence by default) 598 :type max_len: int 599 :rtype: iter(tuple) 600 """ 601 602 if max_len == -1: 603 max_len = len(sequence) 604 for n in range(min_len, max_len + 1): 605 for ng in ngrams(sequence, n, **kwargs): 606 yield ng 607 608 609def skipgrams(sequence, n, k, **kwargs): 610 """ 611 Returns all possible skipgrams generated from a sequence of items, as an iterator. 612 Skipgrams are ngrams that allows tokens to be skipped. 613 Refer to http://homepages.inf.ed.ac.uk/ballison/pdf/lrec_skipgrams.pdf 614 615 >>> sent = "Insurgents killed in ongoing fighting".split() 616 >>> list(skipgrams(sent, 2, 2)) 617 [('Insurgents', 'killed'), ('Insurgents', 'in'), ('Insurgents', 'ongoing'), ('killed', 'in'), ('killed', 'ongoing'), ('killed', 'fighting'), ('in', 'ongoing'), ('in', 'fighting'), ('ongoing', 'fighting')] 618 >>> list(skipgrams(sent, 3, 2)) 619 [('Insurgents', 'killed', 'in'), ('Insurgents', 'killed', 'ongoing'), ('Insurgents', 'killed', 'fighting'), ('Insurgents', 'in', 'ongoing'), ('Insurgents', 'in', 'fighting'), ('Insurgents', 'ongoing', 'fighting'), ('killed', 'in', 'ongoing'), ('killed', 'in', 'fighting'), ('killed', 'ongoing', 'fighting'), ('in', 'ongoing', 'fighting')] 620 621 :param sequence: the source data to be converted into trigrams 622 :type sequence: sequence or iter 623 :param n: the degree of the ngrams 624 :type n: int 625 :param k: the skip distance 626 :type k: int 627 :rtype: iter(tuple) 628 """ 629 630 # Pads the sequence as desired by **kwargs. 631 if 'pad_left' in kwargs or 'pad_right' in kwargs: 632 sequence = pad_sequence(sequence, n, **kwargs) 633 634 # Note when iterating through the ngrams, the pad_right here is not 635 # the **kwargs padding, it's for the algorithm to detect the SENTINEL 636 # object on the right pad to stop inner loop. 637 SENTINEL = object() 638 for ngram in ngrams(sequence, n + k, pad_right=True, right_pad_symbol=SENTINEL): 639 head = ngram[:1] 640 tail = ngram[1:] 641 for skip_tail in combinations(tail, n - 1): 642 if skip_tail[-1] is SENTINEL: 643 continue 644 yield head + skip_tail 645 646 647###################################################################### 648# Binary Search in a File 649###################################################################### 650 651# inherited from pywordnet, by Oliver Steele 652def binary_search_file(file, key, cache={}, cacheDepth=-1): 653 """ 654 Return the line from the file with first word key. 655 Searches through a sorted file using the binary search algorithm. 656 657 :type file: file 658 :param file: the file to be searched through. 659 :type key: str 660 :param key: the identifier we are searching for. 661 """ 662 663 key = key + ' ' 664 keylen = len(key) 665 start = 0 666 currentDepth = 0 667 668 if hasattr(file, 'name'): 669 end = os.stat(file.name).st_size - 1 670 else: 671 file.seek(0, 2) 672 end = file.tell() - 1 673 file.seek(0) 674 675 while start < end: 676 lastState = start, end 677 middle = (start + end) // 2 678 679 if cache.get(middle): 680 offset, line = cache[middle] 681 682 else: 683 line = "" 684 while True: 685 file.seek(max(0, middle - 1)) 686 if middle > 0: 687 file.discard_line() 688 offset = file.tell() 689 line = file.readline() 690 if line != "": 691 break 692 # at EOF; try to find start of the last line 693 middle = (start + middle) // 2 694 if middle == end - 1: 695 return None 696 if currentDepth < cacheDepth: 697 cache[middle] = (offset, line) 698 699 if offset > end: 700 assert end != middle - 1, "infinite loop" 701 end = middle - 1 702 elif line[:keylen] == key: 703 return line 704 elif line > key: 705 assert end != middle - 1, "infinite loop" 706 end = middle - 1 707 elif line < key: 708 start = offset + len(line) - 1 709 710 currentDepth += 1 711 thisState = start, end 712 713 if lastState == thisState: 714 # Detects the condition where we're searching past the end 715 # of the file, which is otherwise difficult to detect 716 return None 717 718 return None 719 720 721###################################################################### 722# Proxy configuration 723###################################################################### 724 725 726def set_proxy(proxy, user=None, password=''): 727 """ 728 Set the HTTP proxy for Python to download through. 729 730 If ``proxy`` is None then tries to set proxy from environment or system 731 settings. 732 733 :param proxy: The HTTP proxy server to use. For example: 734 'http://proxy.example.com:3128/' 735 :param user: The username to authenticate with. Use None to disable 736 authentication. 737 :param password: The password to authenticate with. 738 """ 739 from nltk import compat 740 741 if proxy is None: 742 # Try and find the system proxy settings 743 try: 744 proxy = getproxies()['http'] 745 except KeyError: 746 raise ValueError('Could not detect default proxy settings') 747 748 # Set up the proxy handler 749 proxy_handler = ProxyHandler({'https': proxy, 'http': proxy}) 750 opener = build_opener(proxy_handler) 751 752 if user is not None: 753 # Set up basic proxy authentication if provided 754 password_manager = HTTPPasswordMgrWithDefaultRealm() 755 password_manager.add_password(realm=None, uri=proxy, user=user, passwd=password) 756 opener.add_handler(ProxyBasicAuthHandler(password_manager)) 757 opener.add_handler(ProxyDigestAuthHandler(password_manager)) 758 759 # Overide the existing url opener 760 install_opener(opener) 761 762 763###################################################################### 764# ElementTree pretty printing from http://www.effbot.org/zone/element-lib.htm 765###################################################################### 766 767 768def elementtree_indent(elem, level=0): 769 """ 770 Recursive function to indent an ElementTree._ElementInterface 771 used for pretty printing. Run indent on elem and then output 772 in the normal way. 773 774 :param elem: element to be indented. will be modified. 775 :type elem: ElementTree._ElementInterface 776 :param level: level of indentation for this element 777 :type level: nonnegative integer 778 :rtype: ElementTree._ElementInterface 779 :return: Contents of elem indented to reflect its structure 780 """ 781 782 i = "\n" + level * " " 783 if len(elem): 784 if not elem.text or not elem.text.strip(): 785 elem.text = i + " " 786 for elem in elem: 787 elementtree_indent(elem, level + 1) 788 if not elem.tail or not elem.tail.strip(): 789 elem.tail = i 790 else: 791 if level and (not elem.tail or not elem.tail.strip()): 792 elem.tail = i 793 794 795###################################################################### 796# Mathematical approximations 797###################################################################### 798 799 800def choose(n, k): 801 """ 802 This function is a fast way to calculate binomial coefficients, commonly 803 known as nCk, i.e. the number of combinations of n things taken k at a time. 804 (https://en.wikipedia.org/wiki/Binomial_coefficient). 805 806 This is the *scipy.special.comb()* with long integer computation but this 807 approximation is faster, see https://github.com/nltk/nltk/issues/1181 808 809 >>> choose(4, 2) 810 6 811 >>> choose(6, 2) 812 15 813 814 :param n: The number of things. 815 :type n: int 816 :param r: The number of times a thing is taken. 817 :type r: int 818 """ 819 if 0 <= k <= n: 820 ntok, ktok = 1, 1 821 for t in range(1, min(k, n - k) + 1): 822 ntok *= n 823 ktok *= t 824 n -= 1 825 return ntok // ktok 826 else: 827 return 0 828