1# -*- coding: utf-8 -*-
2# This file is part of pygal
3#
4# A python svg graph plotting library
5# Copyright © 2012-2016 Kozea
6#
7# This library is free software: you can redistribute it and/or modify it under
8# the terms of the GNU Lesser General Public License as published by the Free
9# Software Foundation, either version 3 of the License, or (at your option) any
10# later version.
11#
12# This library is distributed in the hope that it will be useful, but WITHOUT
13# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
15# details.
16#
17# You should have received a copy of the GNU Lesser General Public License
18# along with pygal. If not, see <http://www.gnu.org/licenses/>.
19
20"""
21Box plot: a convenient way to display series as box with whiskers and outliers
22Different types are available throught the box_mode option
23"""
24
25from __future__ import division
26
27from bisect import bisect_left, bisect_right
28
29from pygal.graph.graph import Graph
30from pygal.util import alter, decorate
31
32
33class Box(Graph):
34
35    """
36    Box plot
37    For each series, shows the median value, the 25th and 75th percentiles,
38    and the values within
39    1.5 times the interquartile range of the 25th and 75th percentiles.
40
41    See http://en.wikipedia.org/wiki/Box_plot
42    """
43
44    _series_margin = .06
45
46    def _value_format(self, value, serie):
47        """
48        Format value for dual value display.
49        """
50        if self.box_mode == "extremes":
51            return (
52                'Min: %s\nQ1 : %s\nQ2 : %s\nQ3 : %s\nMax: %s' % tuple(
53                    map(self._y_format, serie.points[1:6])))
54        elif self.box_mode in ["tukey", "stdev", "pstdev"]:
55            return (
56                'Min: %s\nLower Whisker: %s\nQ1: %s\nQ2: %s\nQ3: %s\n'
57                'Upper Whisker: %s\nMax: %s' % tuple(map(
58                    self._y_format, serie.points)))
59        elif self.box_mode == '1.5IQR':
60            # 1.5IQR mode
61            return 'Q1: %s\nQ2: %s\nQ3: %s' % tuple(map(
62                self._y_format, serie.points[2:5]))
63        else:
64            return self._y_format(serie.points)
65
66    def _compute(self):
67        """
68        Compute parameters necessary for later steps
69        within the rendering process
70        """
71        for serie in self.series:
72            serie.points, serie.outliers = \
73                self._box_points(serie.values, self.box_mode)
74
75        self._x_pos = [
76            (i + .5) / self._order for i in range(self._order)]
77
78        if self._min:
79            self._box.ymin = min(self._min, self.zero)
80        if self._max:
81            self._box.ymax = max(self._max, self.zero)
82
83    def _plot(self):
84        """Plot the series data"""
85        for serie in self.series:
86            self._boxf(serie)
87
88    @property
89    def _len(self):
90        """Len is always 7 here"""
91        return 7
92
93    def _boxf(self, serie):
94        """For a specific series, draw the box plot."""
95        serie_node = self.svg.serie(serie)
96        # Note: q0 and q4 do not literally mean the zero-th quartile
97        # and the fourth quartile, but rather the distance from 1.5 times
98        # the inter-quartile range to Q1 and Q3, respectively.
99        boxes = self.svg.node(serie_node['plot'], class_="boxes")
100
101        metadata = serie.metadata.get(0)
102
103        box = decorate(
104            self.svg,
105            self.svg.node(boxes, class_='box'),
106            metadata)
107
108        val = self._format(serie, 0)
109
110        x_center, y_center = self._draw_box(
111            box, serie.points[1:6], serie.outliers, serie.index, metadata)
112        self._tooltip_data(box, val, x_center, y_center, "centered",
113                           self._get_x_label(serie.index))
114        self._static_value(serie_node, val, x_center, y_center, metadata)
115
116    def _draw_box(self, parent_node, quartiles, outliers, box_index, metadata):
117        """
118        Return the center of a bounding box defined by a box plot.
119        Draws a box plot on self.svg.
120        """
121        width = (self.view.x(1) - self.view.x(0)) / self._order
122        series_margin = width * self._series_margin
123        left_edge = self.view.x(0) + width * box_index + series_margin
124        width -= 2 * series_margin
125
126        # draw lines for whiskers - bottom, median, and top
127        for i, whisker in enumerate(
128                (quartiles[0], quartiles[2], quartiles[4])):
129            whisker_width = width if i == 1 else width / 2
130            shift = (width - whisker_width) / 2
131            xs = left_edge + shift
132            xe = left_edge + width - shift
133            alter(self.svg.line(
134                parent_node,
135                coords=[(xs, self.view.y(whisker)),
136                        (xe, self.view.y(whisker))],
137                class_='reactive tooltip-trigger',
138                attrib={'stroke-width': 3}), metadata)
139
140        # draw lines connecting whiskers to box (Q1 and Q3)
141        alter(self.svg.line(
142            parent_node,
143            coords=[(left_edge + width / 2, self.view.y(quartiles[0])),
144                    (left_edge + width / 2, self.view.y(quartiles[1]))],
145            class_='reactive tooltip-trigger',
146            attrib={'stroke-width': 2}), metadata)
147        alter(self.svg.line(
148            parent_node,
149            coords=[(left_edge + width / 2, self.view.y(quartiles[4])),
150                    (left_edge + width / 2, self.view.y(quartiles[3]))],
151            class_='reactive tooltip-trigger',
152            attrib={'stroke-width': 2}), metadata)
153
154        # box, bounded by Q1 and Q3
155        alter(self.svg.node(
156            parent_node,
157            tag='rect',
158            x=left_edge,
159            y=self.view.y(quartiles[1]),
160            height=self.view.y(quartiles[3]) - self.view.y(quartiles[1]),
161            width=width,
162            class_='subtle-fill reactive tooltip-trigger'), metadata)
163
164        # draw outliers
165        for o in outliers:
166            alter(self.svg.node(
167                parent_node,
168                tag='circle',
169                cx=left_edge + width / 2,
170                cy=self.view.y(o),
171                r=3,
172                class_='subtle-fill reactive tooltip-trigger'), metadata)
173
174        return (left_edge + width / 2, self.view.y(
175            sum(quartiles) / len(quartiles)))
176
177    @staticmethod
178    def _box_points(values, mode='extremes'):
179        """
180        Default mode: (mode='extremes' or unset)
181            Return a 7-tuple of 2x minimum, Q1, Median, Q3,
182        and 2x maximum for a list of numeric values.
183        1.5IQR mode: (mode='1.5IQR')
184            Return a 7-tuple of min, Q1 - 1.5 * IQR, Q1, Median, Q3,
185        Q3 + 1.5 * IQR and max for a list of numeric values.
186        Tukey mode: (mode='tukey')
187            Return a 7-tuple of min, q[0..4], max and a list of outliers
188        Outliers are considered values x: x < q1 - IQR or x > q3 + IQR
189        SD mode: (mode='stdev')
190            Return a 7-tuple of min, q[0..4], max and a list of outliers
191        Outliers are considered values x: x < q2 - SD or x > q2 + SD
192        SDp mode: (mode='pstdev')
193            Return a 7-tuple of min, q[0..4], max and a list of outliers
194        Outliers are considered values x: x < q2 - SDp or x > q2 + SDp
195
196        The iterator values may include None values.
197
198        Uses quartile definition from  Mendenhall, W. and
199        Sincich, T. L. Statistics for Engineering and the
200        Sciences, 4th ed. Prentice-Hall, 1995.
201        """
202        def median(seq):
203            n = len(seq)
204            if n % 2 == 0:  # seq has an even length
205                return (seq[n // 2] + seq[n // 2 - 1]) / 2
206            else:  # seq has an odd length
207                return seq[n // 2]
208
209        def mean(seq):
210            return sum(seq) / len(seq)
211
212        def stdev(seq):
213            m = mean(seq)
214            l = len(seq)
215            v = sum((n - m)**2 for n in seq) / (l - 1)  # variance
216            return v**0.5  # sqrt
217
218        def pstdev(seq):
219            m = mean(seq)
220            l = len(seq)
221            v = sum((n - m)**2 for n in seq) / l  # variance
222            return v**0.5  # sqrt
223
224        outliers = []
225        # sort the copy in case the originals must stay in original order
226        s = sorted([x for x in values if x is not None])
227        n = len(s)
228        if not n:
229            return (0, 0, 0, 0, 0, 0, 0), []
230        elif n == 1:
231            return (s[0], s[0], s[0], s[0], s[0], s[0], s[0]), []
232        else:
233            q2 = median(s)
234            # See 'Method 3' in http://en.wikipedia.org/wiki/Quartile
235            if n % 2 == 0:  # even
236                q1 = median(s[:n // 2])
237                q3 = median(s[n // 2:])
238            else:  # odd
239                if n == 1:  # special case
240                    q1 = s[0]
241                    q3 = s[0]
242                elif n % 4 == 1:  # n is of form 4n + 1 where n >= 1
243                    m = (n - 1) // 4
244                    q1 = 0.25 * s[m - 1] + 0.75 * s[m]
245                    q3 = 0.75 * s[3 * m] + 0.25 * s[3 * m + 1]
246                else:  # n is of form 4n + 3 where n >= 1
247                    m = (n - 3) // 4
248                    q1 = 0.75 * s[m] + 0.25 * s[m + 1]
249                    q3 = 0.25 * s[3 * m + 1] + 0.75 * s[3 * m + 2]
250
251            iqr = q3 - q1
252            min_s = s[0]
253            max_s = s[-1]
254            if mode == 'extremes':
255                q0 = min_s
256                q4 = max_s
257            elif mode == 'tukey':
258                # the lowest datum still within 1.5 IQR of the lower quartile,
259                # and the highest datum still within 1.5 IQR of the upper
260                # quartile [Tukey box plot, Wikipedia ]
261                b0 = bisect_left(s, q1 - 1.5 * iqr)
262                b4 = bisect_right(s, q3 + 1.5 * iqr)
263                q0 = s[b0]
264                q4 = s[b4 - 1]
265                outliers = s[:b0] + s[b4:]
266            elif mode == 'stdev':
267                # one standard deviation above and below the mean of the data
268                sd = stdev(s)
269                b0 = bisect_left(s, q2 - sd)
270                b4 = bisect_right(s, q2 + sd)
271                q0 = s[b0]
272                q4 = s[b4 - 1]
273                outliers = s[:b0] + s[b4:]
274            elif mode == 'pstdev':
275                # one population standard deviation above and below
276                # the mean of the data
277                sdp = pstdev(s)
278                b0 = bisect_left(s, q2 - sdp)
279                b4 = bisect_right(s, q2 + sdp)
280                q0 = s[b0]
281                q4 = s[b4 - 1]
282                outliers = s[:b0] + s[b4:]
283            elif mode == '1.5IQR':
284                # 1.5IQR mode
285                q0 = q1 - 1.5 * iqr
286                q4 = q3 + 1.5 * iqr
287            return (min_s, q0, q1, q2, q3, q4, max_s), outliers
288