1# Natural Language Toolkit: Word Sense Disambiguation Algorithms
2#
3# Authors: Liling Tan <alvations@gmail.com>,
4#          Dmitrijs Milajevs <dimazest@gmail.com>
5#
6# Copyright (C) 2001-2019 NLTK Project
7# URL: <http://nltk.org/>
8# For license information, see LICENSE.TXT
9
10from nltk.corpus import wordnet
11
12
13def lesk(context_sentence, ambiguous_word, pos=None, synsets=None):
14    """Return a synset for an ambiguous word in a context.
15
16    :param iter context_sentence: The context sentence where the ambiguous word
17         occurs, passed as an iterable of words.
18    :param str ambiguous_word: The ambiguous word that requires WSD.
19    :param str pos: A specified Part-of-Speech (POS).
20    :param iter synsets: Possible synsets of the ambiguous word.
21    :return: ``lesk_sense`` The Synset() object with the highest signature overlaps.
22
23    This function is an implementation of the original Lesk algorithm (1986) [1].
24
25    Usage example::
26
27        >>> lesk(['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.'], 'bank', 'n')
28        Synset('savings_bank.n.02')
29
30    [1] Lesk, Michael. "Automatic sense disambiguation using machine
31    readable dictionaries: how to tell a pine cone from an ice cream
32    cone." Proceedings of the 5th Annual International Conference on
33    Systems Documentation. ACM, 1986.
34    http://dl.acm.org/citation.cfm?id=318728
35    """
36
37    context = set(context_sentence)
38    if synsets is None:
39        synsets = wordnet.synsets(ambiguous_word)
40
41    if pos:
42        synsets = [ss for ss in synsets if str(ss.pos()) == pos]
43
44    if not synsets:
45        return None
46
47    _, sense = max(
48        (len(context.intersection(ss.definition().split())), ss) for ss in synsets
49    )
50
51    return sense
52