1# Natural Language Toolkit: Dispersion Plots
2#
3# Copyright (C) 2001-2019 NLTK Project
4# Author: Steven Bird <stevenbird1@gmail.com>
5# URL: <http://nltk.org/>
6# For license information, see LICENSE.TXT
7
8"""
9A utility for displaying lexical dispersion.
10"""
11
12
13def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
14    """
15    Generate a lexical dispersion plot.
16
17    :param text: The source text
18    :type text: list(str) or enum(str)
19    :param words: The target words
20    :type words: list of str
21    :param ignore_case: flag to set if case should be ignored when searching text
22    :type ignore_case: bool
23    """
24
25    try:
26        from matplotlib import pylab
27    except ImportError:
28        raise ValueError(
29            'The plot function requires matplotlib to be installed.'
30            'See http://matplotlib.org/'
31        )
32
33    text = list(text)
34    words.reverse()
35
36    if ignore_case:
37        words_to_comp = list(map(str.lower, words))
38        text_to_comp = list(map(str.lower, text))
39    else:
40        words_to_comp = words
41        text_to_comp = text
42
43    points = [
44        (x, y)
45        for x in range(len(text_to_comp))
46        for y in range(len(words_to_comp))
47        if text_to_comp[x] == words_to_comp[y]
48    ]
49    if points:
50        x, y = list(zip(*points))
51    else:
52        x = y = ()
53    pylab.plot(x, y, "b|", scalex=0.1)
54    pylab.yticks(list(range(len(words))), words, color="b")
55    pylab.ylim(-1, len(words))
56    pylab.title(title)
57    pylab.xlabel("Word Offset")
58    pylab.show()
59
60
61if __name__ == '__main__':
62    import nltk.compat
63    from nltk.corpus import gutenberg
64
65    words = ['Elinor', 'Marianne', 'Edward', 'Willoughby']
66    dispersion_plot(gutenberg.words('austen-sense.txt'), words)
67