1# Natural Language Toolkit: Utility functions
2#
3# Copyright (C) 2001-2019 NLTK Project
4# Author: Steven Bird <stevenbird1@gmail.com>
5# URL: <http://nltk.org/>
6# For license information, see LICENSE.TXT
7from __future__ import print_function
8
9import sys
10import inspect
11import locale
12import re
13import types
14import textwrap
15import pydoc
16import bisect
17import os
18
19from itertools import islice, chain, combinations
20from pprint import pprint
21from collections import defaultdict, deque
22from sys import version_info
23
24from six import class_types, string_types, text_type
25from six.moves.urllib.request import (
26    build_opener,
27    install_opener,
28    getproxies,
29    ProxyHandler,
30    ProxyBasicAuthHandler,
31    ProxyDigestAuthHandler,
32    HTTPPasswordMgrWithDefaultRealm,
33)
34
35from nltk.internals import slice_bounds, raise_unorderable_types
36from nltk.collections import *
37from nltk.compat import python_2_unicode_compatible
38
39
40######################################################################
41# Short usage message
42######################################################################
43
44
45def usage(obj, selfname='self'):
46    str(obj)  # In case it's lazy, this will load it.
47
48    if not isinstance(obj, class_types):
49        obj = obj.__class__
50
51    print('%s supports the following operations:' % obj.__name__)
52    for (name, method) in sorted(pydoc.allmethods(obj).items()):
53        if name.startswith('_'):
54            continue
55        if getattr(method, '__deprecated__', False):
56            continue
57
58        if sys.version_info[0] >= 3:
59            getargspec = inspect.getfullargspec
60        else:
61            getargspec = inspect.getargspec
62        args, varargs, varkw, defaults = getargspec(method)[:4]
63        if (
64            args
65            and args[0] == 'self'
66            and (defaults is None or len(args) > len(defaults))
67        ):
68            args = args[1:]
69            name = '%s.%s' % (selfname, name)
70        argspec = inspect.formatargspec(args, varargs, varkw, defaults)
71        print(
72            textwrap.fill(
73                '%s%s' % (name, argspec),
74                initial_indent='  - ',
75                subsequent_indent=' ' * (len(name) + 5),
76            )
77        )
78
79
80##########################################################################
81# IDLE
82##########################################################################
83
84
85def in_idle():
86    """
87    Return True if this function is run within idle.  Tkinter
88    programs that are run in idle should never call ``Tk.mainloop``; so
89    this function should be used to gate all calls to ``Tk.mainloop``.
90
91    :warning: This function works by checking ``sys.stdin``.  If the
92        user has modified ``sys.stdin``, then it may return incorrect
93        results.
94    :rtype: bool
95    """
96    import sys
97
98    return sys.stdin.__class__.__name__ in ('PyShell', 'RPCProxy')
99
100
101##########################################################################
102# PRETTY PRINTING
103##########################################################################
104
105
106def pr(data, start=0, end=None):
107    """
108    Pretty print a sequence of data items
109
110    :param data: the data stream to print
111    :type data: sequence or iter
112    :param start: the start position
113    :type start: int
114    :param end: the end position
115    :type end: int
116    """
117    pprint(list(islice(data, start, end)))
118
119
120def print_string(s, width=70):
121    """
122    Pretty print a string, breaking lines on whitespace
123
124    :param s: the string to print, consisting of words and spaces
125    :type s: str
126    :param width: the display width
127    :type width: int
128    """
129    print('\n'.join(textwrap.wrap(s, width=width)))
130
131
132def tokenwrap(tokens, separator=" ", width=70):
133    """
134    Pretty print a list of text tokens, breaking lines on whitespace
135
136    :param tokens: the tokens to print
137    :type tokens: list
138    :param separator: the string to use to separate tokens
139    :type separator: str
140    :param width: the display width (default=70)
141    :type width: int
142    """
143    return '\n'.join(textwrap.wrap(separator.join(tokens), width=width))
144
145
146##########################################################################
147# Python version
148##########################################################################
149
150
151def py25():
152    return version_info[0] == 2 and version_info[1] == 5
153
154
155def py26():
156    return version_info[0] == 2 and version_info[1] == 6
157
158
159def py27():
160    return version_info[0] == 2 and version_info[1] == 7
161
162
163##########################################################################
164# Indexing
165##########################################################################
166
167
168class Index(defaultdict):
169    def __init__(self, pairs):
170        defaultdict.__init__(self, list)
171        for key, value in pairs:
172            self[key].append(value)
173
174
175######################################################################
176## Regexp display (thanks to David Mertz)
177######################################################################
178
179
180def re_show(regexp, string, left="{", right="}"):
181    """
182    Return a string with markers surrounding the matched substrings.
183    Search str for substrings matching ``regexp`` and wrap the matches
184    with braces.  This is convenient for learning about regular expressions.
185
186    :param regexp: The regular expression.
187    :type regexp: str
188    :param string: The string being matched.
189    :type string: str
190    :param left: The left delimiter (printed before the matched substring)
191    :type left: str
192    :param right: The right delimiter (printed after the matched substring)
193    :type right: str
194    :rtype: str
195    """
196    print(re.compile(regexp, re.M).sub(left + r"\g<0>" + right, string.rstrip()))
197
198
199##########################################################################
200# READ FROM FILE OR STRING
201##########################################################################
202
203# recipe from David Mertz
204def filestring(f):
205    if hasattr(f, 'read'):
206        return f.read()
207    elif isinstance(f, string_types):
208        with open(f, 'r') as infile:
209            return infile.read()
210    else:
211        raise ValueError("Must be called with a filename or file-like object")
212
213
214##########################################################################
215# Breadth-First Search
216##########################################################################
217
218
219def breadth_first(tree, children=iter, maxdepth=-1):
220    """Traverse the nodes of a tree in breadth-first order.
221    (No need to check for cycles.)
222    The first argument should be the tree root;
223    children should be a function taking as argument a tree node
224    and returning an iterator of the node's children.
225    """
226    queue = deque([(tree, 0)])
227
228    while queue:
229        node, depth = queue.popleft()
230        yield node
231
232        if depth != maxdepth:
233            try:
234                queue.extend((c, depth + 1) for c in children(node))
235            except TypeError:
236                pass
237
238
239##########################################################################
240# Guess Character Encoding
241##########################################################################
242
243# adapted from io.py in the docutils extension module (http://docutils.sourceforge.net)
244# http://www.pyzine.com/Issue008/Section_Articles/article_Encodings.html
245
246
247def guess_encoding(data):
248    """
249    Given a byte string, attempt to decode it.
250    Tries the standard 'UTF8' and 'latin-1' encodings,
251    Plus several gathered from locale information.
252
253    The calling program *must* first call::
254
255        locale.setlocale(locale.LC_ALL, '')
256
257    If successful it returns ``(decoded_unicode, successful_encoding)``.
258    If unsuccessful it raises a ``UnicodeError``.
259    """
260    successful_encoding = None
261    # we make 'utf-8' the first encoding
262    encodings = ['utf-8']
263    #
264    # next we add anything we can learn from the locale
265    try:
266        encodings.append(locale.nl_langinfo(locale.CODESET))
267    except AttributeError:
268        pass
269    try:
270        encodings.append(locale.getlocale()[1])
271    except (AttributeError, IndexError):
272        pass
273    try:
274        encodings.append(locale.getdefaultlocale()[1])
275    except (AttributeError, IndexError):
276        pass
277    #
278    # we try 'latin-1' last
279    encodings.append('latin-1')
280    for enc in encodings:
281        # some of the locale calls
282        # may have returned None
283        if not enc:
284            continue
285        try:
286            decoded = text_type(data, enc)
287            successful_encoding = enc
288
289        except (UnicodeError, LookupError):
290            pass
291        else:
292            break
293    if not successful_encoding:
294        raise UnicodeError(
295            'Unable to decode input data. '
296            'Tried the following encodings: %s.'
297            % ', '.join([repr(enc) for enc in encodings if enc])
298        )
299    else:
300        return (decoded, successful_encoding)
301
302
303##########################################################################
304# Remove repeated elements from a list deterministcally
305##########################################################################
306
307
308def unique_list(xs):
309    seen = set()
310    # not seen.add(x) here acts to make the code shorter without using if statements, seen.add(x) always returns None.
311    return [x for x in xs if x not in seen and not seen.add(x)]
312
313
314##########################################################################
315# Invert a dictionary
316##########################################################################
317
318
319def invert_dict(d):
320    inverted_dict = defaultdict(list)
321    for key in d:
322        if hasattr(d[key], '__iter__'):
323            for term in d[key]:
324                inverted_dict[term].append(key)
325        else:
326            inverted_dict[d[key]] = key
327    return inverted_dict
328
329
330##########################################################################
331# Utilities for directed graphs: transitive closure, and inversion
332# The graph is represented as a dictionary of sets
333##########################################################################
334
335
336def transitive_closure(graph, reflexive=False):
337    """
338    Calculate the transitive closure of a directed graph,
339    optionally the reflexive transitive closure.
340
341    The algorithm is a slight modification of the "Marking Algorithm" of
342    Ioannidis & Ramakrishnan (1998) "Efficient Transitive Closure Algorithms".
343
344    :param graph: the initial graph, represented as a dictionary of sets
345    :type graph: dict(set)
346    :param reflexive: if set, also make the closure reflexive
347    :type reflexive: bool
348    :rtype: dict(set)
349    """
350    if reflexive:
351        base_set = lambda k: set([k])
352    else:
353        base_set = lambda k: set()
354    # The graph U_i in the article:
355    agenda_graph = dict((k, graph[k].copy()) for k in graph)
356    # The graph M_i in the article:
357    closure_graph = dict((k, base_set(k)) for k in graph)
358    for i in graph:
359        agenda = agenda_graph[i]
360        closure = closure_graph[i]
361        while agenda:
362            j = agenda.pop()
363            closure.add(j)
364            closure |= closure_graph.setdefault(j, base_set(j))
365            agenda |= agenda_graph.get(j, base_set(j))
366            agenda -= closure
367    return closure_graph
368
369
370def invert_graph(graph):
371    """
372    Inverts a directed graph.
373
374    :param graph: the graph, represented as a dictionary of sets
375    :type graph: dict(set)
376    :return: the inverted graph
377    :rtype: dict(set)
378    """
379    inverted = {}
380    for key in graph:
381        for value in graph[key]:
382            inverted.setdefault(value, set()).add(key)
383    return inverted
384
385
386##########################################################################
387# HTML Cleaning
388##########################################################################
389
390
391def clean_html(html):
392    raise NotImplementedError(
393        "To remove HTML markup, use BeautifulSoup's get_text() function"
394    )
395
396
397def clean_url(url):
398    raise NotImplementedError(
399        "To remove HTML markup, use BeautifulSoup's get_text() function"
400    )
401
402
403##########################################################################
404# FLATTEN LISTS
405##########################################################################
406
407
408def flatten(*args):
409    """
410    Flatten a list.
411
412        >>> from nltk.util import flatten
413        >>> flatten(1, 2, ['b', 'a' , ['c', 'd']], 3)
414        [1, 2, 'b', 'a', 'c', 'd', 3]
415
416    :param args: items and lists to be combined into a single list
417    :rtype: list
418    """
419
420    x = []
421    for l in args:
422        if not isinstance(l, (list, tuple)):
423            l = [l]
424        for item in l:
425            if isinstance(item, (list, tuple)):
426                x.extend(flatten(item))
427            else:
428                x.append(item)
429    return x
430
431
432##########################################################################
433# Ngram iteration
434##########################################################################
435
436
437def pad_sequence(
438    sequence,
439    n,
440    pad_left=False,
441    pad_right=False,
442    left_pad_symbol=None,
443    right_pad_symbol=None,
444):
445    """
446    Returns a padded sequence of items before ngram extraction.
447
448        >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
449        ['<s>', 1, 2, 3, 4, 5, '</s>']
450        >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
451        ['<s>', 1, 2, 3, 4, 5]
452        >>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
453        [1, 2, 3, 4, 5, '</s>']
454
455    :param sequence: the source data to be padded
456    :type sequence: sequence or iter
457    :param n: the degree of the ngrams
458    :type n: int
459    :param pad_left: whether the ngrams should be left-padded
460    :type pad_left: bool
461    :param pad_right: whether the ngrams should be right-padded
462    :type pad_right: bool
463    :param left_pad_symbol: the symbol to use for left padding (default is None)
464    :type left_pad_symbol: any
465    :param right_pad_symbol: the symbol to use for right padding (default is None)
466    :type right_pad_symbol: any
467    :rtype: sequence or iter
468    """
469    sequence = iter(sequence)
470    if pad_left:
471        sequence = chain((left_pad_symbol,) * (n - 1), sequence)
472    if pad_right:
473        sequence = chain(sequence, (right_pad_symbol,) * (n - 1))
474    return sequence
475
476
477# add a flag to pad the sequence so we get peripheral ngrams?
478
479
480def ngrams(
481    sequence,
482    n,
483    pad_left=False,
484    pad_right=False,
485    left_pad_symbol=None,
486    right_pad_symbol=None,
487):
488    """
489    Return the ngrams generated from a sequence of items, as an iterator.
490    For example:
491
492        >>> from nltk.util import ngrams
493        >>> list(ngrams([1,2,3,4,5], 3))
494        [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
495
496    Wrap with list for a list version of this function.  Set pad_left
497    or pad_right to true in order to get additional ngrams:
498
499        >>> list(ngrams([1,2,3,4,5], 2, pad_right=True))
500        [(1, 2), (2, 3), (3, 4), (4, 5), (5, None)]
501        >>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
502        [(1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
503        >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
504        [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5)]
505        >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
506        [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
507
508
509    :param sequence: the source data to be converted into ngrams
510    :type sequence: sequence or iter
511    :param n: the degree of the ngrams
512    :type n: int
513    :param pad_left: whether the ngrams should be left-padded
514    :type pad_left: bool
515    :param pad_right: whether the ngrams should be right-padded
516    :type pad_right: bool
517    :param left_pad_symbol: the symbol to use for left padding (default is None)
518    :type left_pad_symbol: any
519    :param right_pad_symbol: the symbol to use for right padding (default is None)
520    :type right_pad_symbol: any
521    :rtype: sequence or iter
522    """
523    sequence = pad_sequence(
524        sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol
525    )
526
527    history = []
528    while n > 1:
529        # PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator
530        try:
531            next_item = next(sequence)
532        except StopIteration:
533            # no more data, terminate the generator
534            return
535        history.append(next_item)
536        n -= 1
537    for item in sequence:
538        history.append(item)
539        yield tuple(history)
540        del history[0]
541
542
543def bigrams(sequence, **kwargs):
544    """
545    Return the bigrams generated from a sequence of items, as an iterator.
546    For example:
547
548        >>> from nltk.util import bigrams
549        >>> list(bigrams([1,2,3,4,5]))
550        [(1, 2), (2, 3), (3, 4), (4, 5)]
551
552    Use bigrams for a list version of this function.
553
554    :param sequence: the source data to be converted into bigrams
555    :type sequence: sequence or iter
556    :rtype: iter(tuple)
557    """
558
559    for item in ngrams(sequence, 2, **kwargs):
560        yield item
561
562
563def trigrams(sequence, **kwargs):
564    """
565    Return the trigrams generated from a sequence of items, as an iterator.
566    For example:
567
568        >>> from nltk.util import trigrams
569        >>> list(trigrams([1,2,3,4,5]))
570        [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
571
572    Use trigrams for a list version of this function.
573
574    :param sequence: the source data to be converted into trigrams
575    :type sequence: sequence or iter
576    :rtype: iter(tuple)
577    """
578
579    for item in ngrams(sequence, 3, **kwargs):
580        yield item
581
582
583def everygrams(sequence, min_len=1, max_len=-1, **kwargs):
584    """
585    Returns all possible ngrams generated from a sequence of items, as an iterator.
586
587        >>> sent = 'a b c'.split()
588        >>> list(everygrams(sent))
589        [('a',), ('b',), ('c',), ('a', 'b'), ('b', 'c'), ('a', 'b', 'c')]
590        >>> list(everygrams(sent, max_len=2))
591        [('a',), ('b',), ('c',), ('a', 'b'), ('b', 'c')]
592
593    :param sequence: the source data to be converted into trigrams
594    :type sequence: sequence or iter
595    :param min_len: minimum length of the ngrams, aka. n-gram order/degree of ngram
596    :type  min_len: int
597    :param max_len: maximum length of the ngrams (set to length of sequence by default)
598    :type  max_len: int
599    :rtype: iter(tuple)
600    """
601
602    if max_len == -1:
603        max_len = len(sequence)
604    for n in range(min_len, max_len + 1):
605        for ng in ngrams(sequence, n, **kwargs):
606            yield ng
607
608
609def skipgrams(sequence, n, k, **kwargs):
610    """
611    Returns all possible skipgrams generated from a sequence of items, as an iterator.
612    Skipgrams are ngrams that allows tokens to be skipped.
613    Refer to http://homepages.inf.ed.ac.uk/ballison/pdf/lrec_skipgrams.pdf
614
615        >>> sent = "Insurgents killed in ongoing fighting".split()
616        >>> list(skipgrams(sent, 2, 2))
617        [('Insurgents', 'killed'), ('Insurgents', 'in'), ('Insurgents', 'ongoing'), ('killed', 'in'), ('killed', 'ongoing'), ('killed', 'fighting'), ('in', 'ongoing'), ('in', 'fighting'), ('ongoing', 'fighting')]
618        >>> list(skipgrams(sent, 3, 2))
619        [('Insurgents', 'killed', 'in'), ('Insurgents', 'killed', 'ongoing'), ('Insurgents', 'killed', 'fighting'), ('Insurgents', 'in', 'ongoing'), ('Insurgents', 'in', 'fighting'), ('Insurgents', 'ongoing', 'fighting'), ('killed', 'in', 'ongoing'), ('killed', 'in', 'fighting'), ('killed', 'ongoing', 'fighting'), ('in', 'ongoing', 'fighting')]
620
621    :param sequence: the source data to be converted into trigrams
622    :type sequence: sequence or iter
623    :param n: the degree of the ngrams
624    :type n: int
625    :param k: the skip distance
626    :type  k: int
627    :rtype: iter(tuple)
628    """
629
630    # Pads the sequence as desired by **kwargs.
631    if 'pad_left' in kwargs or 'pad_right' in kwargs:
632        sequence = pad_sequence(sequence, n, **kwargs)
633
634    # Note when iterating through the ngrams, the pad_right here is not
635    # the **kwargs padding, it's for the algorithm to detect the SENTINEL
636    # object on the right pad to stop inner loop.
637    SENTINEL = object()
638    for ngram in ngrams(sequence, n + k, pad_right=True, right_pad_symbol=SENTINEL):
639        head = ngram[:1]
640        tail = ngram[1:]
641        for skip_tail in combinations(tail, n - 1):
642            if skip_tail[-1] is SENTINEL:
643                continue
644            yield head + skip_tail
645
646
647######################################################################
648# Binary Search in a File
649######################################################################
650
651# inherited from pywordnet, by Oliver Steele
652def binary_search_file(file, key, cache={}, cacheDepth=-1):
653    """
654    Return the line from the file with first word key.
655    Searches through a sorted file using the binary search algorithm.
656
657    :type file: file
658    :param file: the file to be searched through.
659    :type key: str
660    :param key: the identifier we are searching for.
661    """
662
663    key = key + ' '
664    keylen = len(key)
665    start = 0
666    currentDepth = 0
667
668    if hasattr(file, 'name'):
669        end = os.stat(file.name).st_size - 1
670    else:
671        file.seek(0, 2)
672        end = file.tell() - 1
673        file.seek(0)
674
675    while start < end:
676        lastState = start, end
677        middle = (start + end) // 2
678
679        if cache.get(middle):
680            offset, line = cache[middle]
681
682        else:
683            line = ""
684            while True:
685                file.seek(max(0, middle - 1))
686                if middle > 0:
687                    file.discard_line()
688                offset = file.tell()
689                line = file.readline()
690                if line != "":
691                    break
692                # at EOF; try to find start of the last line
693                middle = (start + middle) // 2
694                if middle == end - 1:
695                    return None
696            if currentDepth < cacheDepth:
697                cache[middle] = (offset, line)
698
699        if offset > end:
700            assert end != middle - 1, "infinite loop"
701            end = middle - 1
702        elif line[:keylen] == key:
703            return line
704        elif line > key:
705            assert end != middle - 1, "infinite loop"
706            end = middle - 1
707        elif line < key:
708            start = offset + len(line) - 1
709
710        currentDepth += 1
711        thisState = start, end
712
713        if lastState == thisState:
714            # Detects the condition where we're searching past the end
715            # of the file, which is otherwise difficult to detect
716            return None
717
718    return None
719
720
721######################################################################
722# Proxy configuration
723######################################################################
724
725
726def set_proxy(proxy, user=None, password=''):
727    """
728    Set the HTTP proxy for Python to download through.
729
730    If ``proxy`` is None then tries to set proxy from environment or system
731    settings.
732
733    :param proxy: The HTTP proxy server to use. For example:
734        'http://proxy.example.com:3128/'
735    :param user: The username to authenticate with. Use None to disable
736        authentication.
737    :param password: The password to authenticate with.
738    """
739    from nltk import compat
740
741    if proxy is None:
742        # Try and find the system proxy settings
743        try:
744            proxy = getproxies()['http']
745        except KeyError:
746            raise ValueError('Could not detect default proxy settings')
747
748    # Set up the proxy handler
749    proxy_handler = ProxyHandler({'https': proxy, 'http': proxy})
750    opener = build_opener(proxy_handler)
751
752    if user is not None:
753        # Set up basic proxy authentication if provided
754        password_manager = HTTPPasswordMgrWithDefaultRealm()
755        password_manager.add_password(realm=None, uri=proxy, user=user, passwd=password)
756        opener.add_handler(ProxyBasicAuthHandler(password_manager))
757        opener.add_handler(ProxyDigestAuthHandler(password_manager))
758
759    # Overide the existing url opener
760    install_opener(opener)
761
762
763######################################################################
764# ElementTree pretty printing from http://www.effbot.org/zone/element-lib.htm
765######################################################################
766
767
768def elementtree_indent(elem, level=0):
769    """
770    Recursive function to indent an ElementTree._ElementInterface
771    used for pretty printing. Run indent on elem and then output
772    in the normal way.
773
774    :param elem: element to be indented. will be modified.
775    :type elem: ElementTree._ElementInterface
776    :param level: level of indentation for this element
777    :type level: nonnegative integer
778    :rtype:   ElementTree._ElementInterface
779    :return:  Contents of elem indented to reflect its structure
780    """
781
782    i = "\n" + level * "  "
783    if len(elem):
784        if not elem.text or not elem.text.strip():
785            elem.text = i + "  "
786        for elem in elem:
787            elementtree_indent(elem, level + 1)
788        if not elem.tail or not elem.tail.strip():
789            elem.tail = i
790    else:
791        if level and (not elem.tail or not elem.tail.strip()):
792            elem.tail = i
793
794
795######################################################################
796# Mathematical approximations
797######################################################################
798
799
800def choose(n, k):
801    """
802    This function is a fast way to calculate binomial coefficients, commonly
803    known as nCk, i.e. the number of combinations of n things taken k at a time.
804    (https://en.wikipedia.org/wiki/Binomial_coefficient).
805
806    This is the *scipy.special.comb()* with long integer computation but this
807    approximation is faster, see https://github.com/nltk/nltk/issues/1181
808
809        >>> choose(4, 2)
810        6
811        >>> choose(6, 2)
812        15
813
814    :param n: The number of things.
815    :type n: int
816    :param r: The number of times a thing is taken.
817    :type r: int
818    """
819    if 0 <= k <= n:
820        ntok, ktok = 1, 1
821        for t in range(1, min(k, n - k) + 1):
822            ntok *= n
823            ktok *= t
824            n -= 1
825        return ntok // ktok
826    else:
827        return 0
828