1# -*- coding: utf-8 -*- 2# This file is part of pygal 3# 4# A python svg graph plotting library 5# Copyright © 2012-2016 Kozea 6# 7# This library is free software: you can redistribute it and/or modify it under 8# the terms of the GNU Lesser General Public License as published by the Free 9# Software Foundation, either version 3 of the License, or (at your option) any 10# later version. 11# 12# This library is distributed in the hope that it will be useful, but WITHOUT 13# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 15# details. 16# 17# You should have received a copy of the GNU Lesser General Public License 18# along with pygal. If not, see <http://www.gnu.org/licenses/>. 19 20""" 21Box plot: a convenient way to display series as box with whiskers and outliers 22Different types are available throught the box_mode option 23""" 24 25from __future__ import division 26 27from bisect import bisect_left, bisect_right 28 29from pygal.graph.graph import Graph 30from pygal.util import alter, decorate 31 32 33class Box(Graph): 34 35 """ 36 Box plot 37 For each series, shows the median value, the 25th and 75th percentiles, 38 and the values within 39 1.5 times the interquartile range of the 25th and 75th percentiles. 40 41 See http://en.wikipedia.org/wiki/Box_plot 42 """ 43 44 _series_margin = .06 45 46 def _value_format(self, value, serie): 47 """ 48 Format value for dual value display. 49 """ 50 if self.box_mode == "extremes": 51 return ( 52 'Min: %s\nQ1 : %s\nQ2 : %s\nQ3 : %s\nMax: %s' % tuple( 53 map(self._y_format, serie.points[1:6]))) 54 elif self.box_mode in ["tukey", "stdev", "pstdev"]: 55 return ( 56 'Min: %s\nLower Whisker: %s\nQ1: %s\nQ2: %s\nQ3: %s\n' 57 'Upper Whisker: %s\nMax: %s' % tuple(map( 58 self._y_format, serie.points))) 59 elif self.box_mode == '1.5IQR': 60 # 1.5IQR mode 61 return 'Q1: %s\nQ2: %s\nQ3: %s' % tuple(map( 62 self._y_format, serie.points[2:5])) 63 else: 64 return self._y_format(serie.points) 65 66 def _compute(self): 67 """ 68 Compute parameters necessary for later steps 69 within the rendering process 70 """ 71 for serie in self.series: 72 serie.points, serie.outliers = \ 73 self._box_points(serie.values, self.box_mode) 74 75 self._x_pos = [ 76 (i + .5) / self._order for i in range(self._order)] 77 78 if self._min: 79 self._box.ymin = min(self._min, self.zero) 80 if self._max: 81 self._box.ymax = max(self._max, self.zero) 82 83 def _plot(self): 84 """Plot the series data""" 85 for serie in self.series: 86 self._boxf(serie) 87 88 @property 89 def _len(self): 90 """Len is always 7 here""" 91 return 7 92 93 def _boxf(self, serie): 94 """For a specific series, draw the box plot.""" 95 serie_node = self.svg.serie(serie) 96 # Note: q0 and q4 do not literally mean the zero-th quartile 97 # and the fourth quartile, but rather the distance from 1.5 times 98 # the inter-quartile range to Q1 and Q3, respectively. 99 boxes = self.svg.node(serie_node['plot'], class_="boxes") 100 101 metadata = serie.metadata.get(0) 102 103 box = decorate( 104 self.svg, 105 self.svg.node(boxes, class_='box'), 106 metadata) 107 108 val = self._format(serie, 0) 109 110 x_center, y_center = self._draw_box( 111 box, serie.points[1:6], serie.outliers, serie.index, metadata) 112 self._tooltip_data(box, val, x_center, y_center, "centered", 113 self._get_x_label(serie.index)) 114 self._static_value(serie_node, val, x_center, y_center, metadata) 115 116 def _draw_box(self, parent_node, quartiles, outliers, box_index, metadata): 117 """ 118 Return the center of a bounding box defined by a box plot. 119 Draws a box plot on self.svg. 120 """ 121 width = (self.view.x(1) - self.view.x(0)) / self._order 122 series_margin = width * self._series_margin 123 left_edge = self.view.x(0) + width * box_index + series_margin 124 width -= 2 * series_margin 125 126 # draw lines for whiskers - bottom, median, and top 127 for i, whisker in enumerate( 128 (quartiles[0], quartiles[2], quartiles[4])): 129 whisker_width = width if i == 1 else width / 2 130 shift = (width - whisker_width) / 2 131 xs = left_edge + shift 132 xe = left_edge + width - shift 133 alter(self.svg.line( 134 parent_node, 135 coords=[(xs, self.view.y(whisker)), 136 (xe, self.view.y(whisker))], 137 class_='reactive tooltip-trigger', 138 attrib={'stroke-width': 3}), metadata) 139 140 # draw lines connecting whiskers to box (Q1 and Q3) 141 alter(self.svg.line( 142 parent_node, 143 coords=[(left_edge + width / 2, self.view.y(quartiles[0])), 144 (left_edge + width / 2, self.view.y(quartiles[1]))], 145 class_='reactive tooltip-trigger', 146 attrib={'stroke-width': 2}), metadata) 147 alter(self.svg.line( 148 parent_node, 149 coords=[(left_edge + width / 2, self.view.y(quartiles[4])), 150 (left_edge + width / 2, self.view.y(quartiles[3]))], 151 class_='reactive tooltip-trigger', 152 attrib={'stroke-width': 2}), metadata) 153 154 # box, bounded by Q1 and Q3 155 alter(self.svg.node( 156 parent_node, 157 tag='rect', 158 x=left_edge, 159 y=self.view.y(quartiles[1]), 160 height=self.view.y(quartiles[3]) - self.view.y(quartiles[1]), 161 width=width, 162 class_='subtle-fill reactive tooltip-trigger'), metadata) 163 164 # draw outliers 165 for o in outliers: 166 alter(self.svg.node( 167 parent_node, 168 tag='circle', 169 cx=left_edge + width / 2, 170 cy=self.view.y(o), 171 r=3, 172 class_='subtle-fill reactive tooltip-trigger'), metadata) 173 174 return (left_edge + width / 2, self.view.y( 175 sum(quartiles) / len(quartiles))) 176 177 @staticmethod 178 def _box_points(values, mode='extremes'): 179 """ 180 Default mode: (mode='extremes' or unset) 181 Return a 7-tuple of 2x minimum, Q1, Median, Q3, 182 and 2x maximum for a list of numeric values. 183 1.5IQR mode: (mode='1.5IQR') 184 Return a 7-tuple of min, Q1 - 1.5 * IQR, Q1, Median, Q3, 185 Q3 + 1.5 * IQR and max for a list of numeric values. 186 Tukey mode: (mode='tukey') 187 Return a 7-tuple of min, q[0..4], max and a list of outliers 188 Outliers are considered values x: x < q1 - IQR or x > q3 + IQR 189 SD mode: (mode='stdev') 190 Return a 7-tuple of min, q[0..4], max and a list of outliers 191 Outliers are considered values x: x < q2 - SD or x > q2 + SD 192 SDp mode: (mode='pstdev') 193 Return a 7-tuple of min, q[0..4], max and a list of outliers 194 Outliers are considered values x: x < q2 - SDp or x > q2 + SDp 195 196 The iterator values may include None values. 197 198 Uses quartile definition from Mendenhall, W. and 199 Sincich, T. L. Statistics for Engineering and the 200 Sciences, 4th ed. Prentice-Hall, 1995. 201 """ 202 def median(seq): 203 n = len(seq) 204 if n % 2 == 0: # seq has an even length 205 return (seq[n // 2] + seq[n // 2 - 1]) / 2 206 else: # seq has an odd length 207 return seq[n // 2] 208 209 def mean(seq): 210 return sum(seq) / len(seq) 211 212 def stdev(seq): 213 m = mean(seq) 214 l = len(seq) 215 v = sum((n - m)**2 for n in seq) / (l - 1) # variance 216 return v**0.5 # sqrt 217 218 def pstdev(seq): 219 m = mean(seq) 220 l = len(seq) 221 v = sum((n - m)**2 for n in seq) / l # variance 222 return v**0.5 # sqrt 223 224 outliers = [] 225 # sort the copy in case the originals must stay in original order 226 s = sorted([x for x in values if x is not None]) 227 n = len(s) 228 if not n: 229 return (0, 0, 0, 0, 0, 0, 0), [] 230 elif n == 1: 231 return (s[0], s[0], s[0], s[0], s[0], s[0], s[0]), [] 232 else: 233 q2 = median(s) 234 # See 'Method 3' in http://en.wikipedia.org/wiki/Quartile 235 if n % 2 == 0: # even 236 q1 = median(s[:n // 2]) 237 q3 = median(s[n // 2:]) 238 else: # odd 239 if n == 1: # special case 240 q1 = s[0] 241 q3 = s[0] 242 elif n % 4 == 1: # n is of form 4n + 1 where n >= 1 243 m = (n - 1) // 4 244 q1 = 0.25 * s[m - 1] + 0.75 * s[m] 245 q3 = 0.75 * s[3 * m] + 0.25 * s[3 * m + 1] 246 else: # n is of form 4n + 3 where n >= 1 247 m = (n - 3) // 4 248 q1 = 0.75 * s[m] + 0.25 * s[m + 1] 249 q3 = 0.25 * s[3 * m + 1] + 0.75 * s[3 * m + 2] 250 251 iqr = q3 - q1 252 min_s = s[0] 253 max_s = s[-1] 254 if mode == 'extremes': 255 q0 = min_s 256 q4 = max_s 257 elif mode == 'tukey': 258 # the lowest datum still within 1.5 IQR of the lower quartile, 259 # and the highest datum still within 1.5 IQR of the upper 260 # quartile [Tukey box plot, Wikipedia ] 261 b0 = bisect_left(s, q1 - 1.5 * iqr) 262 b4 = bisect_right(s, q3 + 1.5 * iqr) 263 q0 = s[b0] 264 q4 = s[b4 - 1] 265 outliers = s[:b0] + s[b4:] 266 elif mode == 'stdev': 267 # one standard deviation above and below the mean of the data 268 sd = stdev(s) 269 b0 = bisect_left(s, q2 - sd) 270 b4 = bisect_right(s, q2 + sd) 271 q0 = s[b0] 272 q4 = s[b4 - 1] 273 outliers = s[:b0] + s[b4:] 274 elif mode == 'pstdev': 275 # one population standard deviation above and below 276 # the mean of the data 277 sdp = pstdev(s) 278 b0 = bisect_left(s, q2 - sdp) 279 b4 = bisect_right(s, q2 + sdp) 280 q0 = s[b0] 281 q4 = s[b4 - 1] 282 outliers = s[:b0] + s[b4:] 283 elif mode == '1.5IQR': 284 # 1.5IQR mode 285 q0 = q1 - 1.5 * iqr 286 q4 = q3 + 1.5 * iqr 287 return (min_s, q0, q1, q2, q3, q4, max_s), outliers 288