1# Natural Language Toolkit: Dispersion Plots 2# 3# Copyright (C) 2001-2019 NLTK Project 4# Author: Steven Bird <stevenbird1@gmail.com> 5# URL: <http://nltk.org/> 6# For license information, see LICENSE.TXT 7 8""" 9A utility for displaying lexical dispersion. 10""" 11 12 13def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"): 14 """ 15 Generate a lexical dispersion plot. 16 17 :param text: The source text 18 :type text: list(str) or enum(str) 19 :param words: The target words 20 :type words: list of str 21 :param ignore_case: flag to set if case should be ignored when searching text 22 :type ignore_case: bool 23 """ 24 25 try: 26 from matplotlib import pylab 27 except ImportError: 28 raise ValueError( 29 'The plot function requires matplotlib to be installed.' 30 'See http://matplotlib.org/' 31 ) 32 33 text = list(text) 34 words.reverse() 35 36 if ignore_case: 37 words_to_comp = list(map(str.lower, words)) 38 text_to_comp = list(map(str.lower, text)) 39 else: 40 words_to_comp = words 41 text_to_comp = text 42 43 points = [ 44 (x, y) 45 for x in range(len(text_to_comp)) 46 for y in range(len(words_to_comp)) 47 if text_to_comp[x] == words_to_comp[y] 48 ] 49 if points: 50 x, y = list(zip(*points)) 51 else: 52 x = y = () 53 pylab.plot(x, y, "b|", scalex=0.1) 54 pylab.yticks(list(range(len(words))), words, color="b") 55 pylab.ylim(-1, len(words)) 56 pylab.title(title) 57 pylab.xlabel("Word Offset") 58 pylab.show() 59 60 61if __name__ == '__main__': 62 import nltk.compat 63 from nltk.corpus import gutenberg 64 65 words = ['Elinor', 'Marianne', 'Edward', 'Willoughby'] 66 dispersion_plot(gutenberg.words('austen-sense.txt'), words) 67