1# Natural Language Toolkit: Word Sense Disambiguation Algorithms 2# 3# Authors: Liling Tan <alvations@gmail.com>, 4# Dmitrijs Milajevs <dimazest@gmail.com> 5# 6# Copyright (C) 2001-2019 NLTK Project 7# URL: <http://nltk.org/> 8# For license information, see LICENSE.TXT 9 10from nltk.corpus import wordnet 11 12 13def lesk(context_sentence, ambiguous_word, pos=None, synsets=None): 14 """Return a synset for an ambiguous word in a context. 15 16 :param iter context_sentence: The context sentence where the ambiguous word 17 occurs, passed as an iterable of words. 18 :param str ambiguous_word: The ambiguous word that requires WSD. 19 :param str pos: A specified Part-of-Speech (POS). 20 :param iter synsets: Possible synsets of the ambiguous word. 21 :return: ``lesk_sense`` The Synset() object with the highest signature overlaps. 22 23 This function is an implementation of the original Lesk algorithm (1986) [1]. 24 25 Usage example:: 26 27 >>> lesk(['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.'], 'bank', 'n') 28 Synset('savings_bank.n.02') 29 30 [1] Lesk, Michael. "Automatic sense disambiguation using machine 31 readable dictionaries: how to tell a pine cone from an ice cream 32 cone." Proceedings of the 5th Annual International Conference on 33 Systems Documentation. ACM, 1986. 34 http://dl.acm.org/citation.cfm?id=318728 35 """ 36 37 context = set(context_sentence) 38 if synsets is None: 39 synsets = wordnet.synsets(ambiguous_word) 40 41 if pos: 42 synsets = [ss for ss in synsets if str(ss.pos()) == pos] 43 44 if not synsets: 45 return None 46 47 _, sense = max( 48 (len(context.intersection(ss.definition().split())), ss) for ss in synsets 49 ) 50 51 return sense 52