1""" Test cases for .boxplot method """ 2 3import itertools 4import string 5 6import numpy as np 7import pytest 8 9import pandas.util._test_decorators as td 10 11from pandas import DataFrame, MultiIndex, Series, date_range, timedelta_range 12import pandas._testing as tm 13from pandas.tests.plotting.common import TestPlotBase, _check_plot_works 14 15import pandas.plotting as plotting 16 17pytestmark = pytest.mark.slow 18 19 20@td.skip_if_no_mpl 21class TestDataFramePlots(TestPlotBase): 22 def test_boxplot_legacy1(self): 23 df = DataFrame( 24 np.random.randn(6, 4), 25 index=list(string.ascii_letters[:6]), 26 columns=["one", "two", "three", "four"], 27 ) 28 df["indic"] = ["foo", "bar"] * 3 29 df["indic2"] = ["foo", "bar", "foo"] * 2 30 31 _check_plot_works(df.boxplot, return_type="dict") 32 _check_plot_works(df.boxplot, column=["one", "two"], return_type="dict") 33 # _check_plot_works adds an ax so catch warning. see GH #13188 34 with tm.assert_produces_warning(UserWarning): 35 _check_plot_works(df.boxplot, column=["one", "two"], by="indic") 36 _check_plot_works(df.boxplot, column="one", by=["indic", "indic2"]) 37 with tm.assert_produces_warning(UserWarning): 38 _check_plot_works(df.boxplot, by="indic") 39 with tm.assert_produces_warning(UserWarning): 40 _check_plot_works(df.boxplot, by=["indic", "indic2"]) 41 _check_plot_works(plotting._core.boxplot, data=df["one"], return_type="dict") 42 _check_plot_works(df.boxplot, notch=1, return_type="dict") 43 with tm.assert_produces_warning(UserWarning): 44 _check_plot_works(df.boxplot, by="indic", notch=1) 45 46 def test_boxplot_legacy2(self): 47 df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) 48 df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) 49 df["Y"] = Series(["A"] * 10) 50 with tm.assert_produces_warning(UserWarning): 51 _check_plot_works(df.boxplot, by="X") 52 53 # When ax is supplied and required number of axes is 1, 54 # passed ax should be used: 55 fig, ax = self.plt.subplots() 56 axes = df.boxplot("Col1", by="X", ax=ax) 57 ax_axes = ax.axes 58 assert ax_axes is axes 59 60 fig, ax = self.plt.subplots() 61 axes = df.groupby("Y").boxplot(ax=ax, return_type="axes") 62 ax_axes = ax.axes 63 assert ax_axes is axes["A"] 64 65 # Multiple columns with an ax argument should use same figure 66 fig, ax = self.plt.subplots() 67 with tm.assert_produces_warning(UserWarning): 68 axes = df.boxplot( 69 column=["Col1", "Col2"], by="X", ax=ax, return_type="axes" 70 ) 71 assert axes["Col1"].get_figure() is fig 72 73 # When by is None, check that all relevant lines are present in the 74 # dict 75 fig, ax = self.plt.subplots() 76 d = df.boxplot(ax=ax, return_type="dict") 77 lines = list(itertools.chain.from_iterable(d.values())) 78 assert len(ax.get_lines()) == len(lines) 79 80 def test_boxplot_return_type_none(self): 81 # GH 12216; return_type=None & by=None -> axes 82 result = self.hist_df.boxplot() 83 assert isinstance(result, self.plt.Axes) 84 85 def test_boxplot_return_type_legacy(self): 86 # API change in https://github.com/pandas-dev/pandas/pull/7096 87 import matplotlib as mpl # noqa 88 89 df = DataFrame( 90 np.random.randn(6, 4), 91 index=list(string.ascii_letters[:6]), 92 columns=["one", "two", "three", "four"], 93 ) 94 with pytest.raises(ValueError): 95 df.boxplot(return_type="NOTATYPE") 96 97 result = df.boxplot() 98 self._check_box_return_type(result, "axes") 99 100 with tm.assert_produces_warning(False): 101 result = df.boxplot(return_type="dict") 102 self._check_box_return_type(result, "dict") 103 104 with tm.assert_produces_warning(False): 105 result = df.boxplot(return_type="axes") 106 self._check_box_return_type(result, "axes") 107 108 with tm.assert_produces_warning(False): 109 result = df.boxplot(return_type="both") 110 self._check_box_return_type(result, "both") 111 112 def test_boxplot_axis_limits(self): 113 def _check_ax_limits(col, ax): 114 y_min, y_max = ax.get_ylim() 115 assert y_min <= col.min() 116 assert y_max >= col.max() 117 118 df = self.hist_df.copy() 119 df["age"] = np.random.randint(1, 20, df.shape[0]) 120 # One full row 121 height_ax, weight_ax = df.boxplot(["height", "weight"], by="category") 122 _check_ax_limits(df["height"], height_ax) 123 _check_ax_limits(df["weight"], weight_ax) 124 assert weight_ax._sharey == height_ax 125 126 # Two rows, one partial 127 p = df.boxplot(["height", "weight", "age"], by="category") 128 height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0] 129 dummy_ax = p[1, 1] 130 131 _check_ax_limits(df["height"], height_ax) 132 _check_ax_limits(df["weight"], weight_ax) 133 _check_ax_limits(df["age"], age_ax) 134 assert weight_ax._sharey == height_ax 135 assert age_ax._sharey == height_ax 136 assert dummy_ax._sharey is None 137 138 def test_boxplot_empty_column(self): 139 df = DataFrame(np.random.randn(20, 4)) 140 df.loc[:, 0] = np.nan 141 _check_plot_works(df.boxplot, return_type="axes") 142 143 def test_figsize(self): 144 df = DataFrame(np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"]) 145 result = df.boxplot(return_type="axes", figsize=(12, 8)) 146 assert result.figure.bbox_inches.width == 12 147 assert result.figure.bbox_inches.height == 8 148 149 def test_fontsize(self): 150 df = DataFrame({"a": [1, 2, 3, 4, 5, 6]}) 151 self._check_ticks_props( 152 df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16 153 ) 154 155 def test_boxplot_numeric_data(self): 156 # GH 22799 157 df = DataFrame( 158 { 159 "a": date_range("2012-01-01", periods=100), 160 "b": np.random.randn(100), 161 "c": np.random.randn(100) + 2, 162 "d": date_range("2012-01-01", periods=100).astype(str), 163 "e": date_range("2012-01-01", periods=100, tz="UTC"), 164 "f": timedelta_range("1 days", periods=100), 165 } 166 ) 167 ax = df.plot(kind="box") 168 assert [x.get_text() for x in ax.get_xticklabels()] == ["b", "c"] 169 170 @pytest.mark.parametrize( 171 "colors_kwd, expected", 172 [ 173 ( 174 {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"}, 175 {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"}, 176 ), 177 ({"boxes": "r"}, {"boxes": "r"}), 178 ("r", {"boxes": "r", "whiskers": "r", "medians": "r", "caps": "r"}), 179 ], 180 ) 181 def test_color_kwd(self, colors_kwd, expected): 182 # GH: 26214 183 df = DataFrame(np.random.rand(10, 2)) 184 result = df.boxplot(color=colors_kwd, return_type="dict") 185 for k, v in expected.items(): 186 assert result[k][0].get_color() == v 187 188 @pytest.mark.parametrize( 189 "dict_colors, msg", 190 [({"boxes": "r", "invalid_key": "r"}, "invalid key 'invalid_key'")], 191 ) 192 def test_color_kwd_errors(self, dict_colors, msg): 193 # GH: 26214 194 df = DataFrame(np.random.rand(10, 2)) 195 with pytest.raises(ValueError, match=msg): 196 df.boxplot(color=dict_colors, return_type="dict") 197 198 @pytest.mark.parametrize( 199 "props, expected", 200 [ 201 ("boxprops", "boxes"), 202 ("whiskerprops", "whiskers"), 203 ("capprops", "caps"), 204 ("medianprops", "medians"), 205 ], 206 ) 207 def test_specified_props_kwd(self, props, expected): 208 # GH 30346 209 df = DataFrame({k: np.random.random(100) for k in "ABC"}) 210 kwd = {props: {"color": "C1"}} 211 result = df.boxplot(return_type="dict", **kwd) 212 213 assert result[expected][0].get_color() == "C1" 214 215 216@td.skip_if_no_mpl 217class TestDataFrameGroupByPlots(TestPlotBase): 218 def test_boxplot_legacy1(self): 219 grouped = self.hist_df.groupby(by="gender") 220 with tm.assert_produces_warning(UserWarning): 221 axes = _check_plot_works(grouped.boxplot, return_type="axes") 222 self._check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2)) 223 axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") 224 self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) 225 226 def test_boxplot_legacy2(self): 227 tuples = zip(string.ascii_letters[:10], range(10)) 228 df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) 229 grouped = df.groupby(level=1) 230 with tm.assert_produces_warning(UserWarning): 231 axes = _check_plot_works(grouped.boxplot, return_type="axes") 232 self._check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3)) 233 234 axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") 235 self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) 236 237 def test_boxplot_legacy3(self): 238 tuples = zip(string.ascii_letters[:10], range(10)) 239 df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) 240 grouped = df.unstack(level=1).groupby(level=0, axis=1) 241 with tm.assert_produces_warning(UserWarning): 242 axes = _check_plot_works(grouped.boxplot, return_type="axes") 243 self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2)) 244 axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") 245 self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) 246 247 def test_grouped_plot_fignums(self): 248 n = 10 249 weight = Series(np.random.normal(166, 20, size=n)) 250 height = Series(np.random.normal(60, 10, size=n)) 251 with tm.RNGContext(42): 252 gender = np.random.choice(["male", "female"], size=n) 253 df = DataFrame({"height": height, "weight": weight, "gender": gender}) 254 gb = df.groupby("gender") 255 256 res = gb.plot() 257 assert len(self.plt.get_fignums()) == 2 258 assert len(res) == 2 259 tm.close() 260 261 res = gb.boxplot(return_type="axes") 262 assert len(self.plt.get_fignums()) == 1 263 assert len(res) == 2 264 tm.close() 265 266 # now works with GH 5610 as gender is excluded 267 res = df.groupby("gender").hist() 268 tm.close() 269 270 def test_grouped_box_return_type(self): 271 df = self.hist_df 272 273 # old style: return_type=None 274 result = df.boxplot(by="gender") 275 assert isinstance(result, np.ndarray) 276 self._check_box_return_type( 277 result, None, expected_keys=["height", "weight", "category"] 278 ) 279 280 # now for groupby 281 result = df.groupby("gender").boxplot(return_type="dict") 282 self._check_box_return_type(result, "dict", expected_keys=["Male", "Female"]) 283 284 columns2 = "X B C D A G Y N Q O".split() 285 df2 = DataFrame(np.random.randn(50, 10), columns=columns2) 286 categories2 = "A B C D E F G H I J".split() 287 df2["category"] = categories2 * 5 288 289 for t in ["dict", "axes", "both"]: 290 returned = df.groupby("classroom").boxplot(return_type=t) 291 self._check_box_return_type(returned, t, expected_keys=["A", "B", "C"]) 292 293 returned = df.boxplot(by="classroom", return_type=t) 294 self._check_box_return_type( 295 returned, t, expected_keys=["height", "weight", "category"] 296 ) 297 298 returned = df2.groupby("category").boxplot(return_type=t) 299 self._check_box_return_type(returned, t, expected_keys=categories2) 300 301 returned = df2.boxplot(by="category", return_type=t) 302 self._check_box_return_type(returned, t, expected_keys=columns2) 303 304 def test_grouped_box_layout(self): 305 df = self.hist_df 306 307 msg = "Layout of 1x1 must be larger than required size 2" 308 with pytest.raises(ValueError, match=msg): 309 df.boxplot(column=["weight", "height"], by=df.gender, layout=(1, 1)) 310 311 msg = "The 'layout' keyword is not supported when 'by' is None" 312 with pytest.raises(ValueError, match=msg): 313 df.boxplot( 314 column=["height", "weight", "category"], 315 layout=(2, 1), 316 return_type="dict", 317 ) 318 319 msg = "At least one dimension of layout must be positive" 320 with pytest.raises(ValueError, match=msg): 321 df.boxplot(column=["weight", "height"], by=df.gender, layout=(-1, -1)) 322 323 # _check_plot_works adds an ax so catch warning. see GH #13188 324 with tm.assert_produces_warning(UserWarning): 325 box = _check_plot_works( 326 df.groupby("gender").boxplot, column="height", return_type="dict" 327 ) 328 self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2)) 329 330 with tm.assert_produces_warning(UserWarning): 331 box = _check_plot_works( 332 df.groupby("category").boxplot, column="height", return_type="dict" 333 ) 334 self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) 335 336 # GH 6769 337 with tm.assert_produces_warning(UserWarning): 338 box = _check_plot_works( 339 df.groupby("classroom").boxplot, column="height", return_type="dict" 340 ) 341 self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) 342 343 # GH 5897 344 axes = df.boxplot( 345 column=["height", "weight", "category"], by="gender", return_type="axes" 346 ) 347 self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) 348 for ax in [axes["height"]]: 349 self._check_visible(ax.get_xticklabels(), visible=False) 350 self._check_visible([ax.xaxis.get_label()], visible=False) 351 for ax in [axes["weight"], axes["category"]]: 352 self._check_visible(ax.get_xticklabels()) 353 self._check_visible([ax.xaxis.get_label()]) 354 355 box = df.groupby("classroom").boxplot( 356 column=["height", "weight", "category"], return_type="dict" 357 ) 358 self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) 359 360 with tm.assert_produces_warning(UserWarning): 361 box = _check_plot_works( 362 df.groupby("category").boxplot, 363 column="height", 364 layout=(3, 2), 365 return_type="dict", 366 ) 367 self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) 368 with tm.assert_produces_warning(UserWarning): 369 box = _check_plot_works( 370 df.groupby("category").boxplot, 371 column="height", 372 layout=(3, -1), 373 return_type="dict", 374 ) 375 self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) 376 377 box = df.boxplot( 378 column=["height", "weight", "category"], by="gender", layout=(4, 1) 379 ) 380 self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(4, 1)) 381 382 box = df.boxplot( 383 column=["height", "weight", "category"], by="gender", layout=(-1, 1) 384 ) 385 self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(3, 1)) 386 387 box = df.groupby("classroom").boxplot( 388 column=["height", "weight", "category"], layout=(1, 4), return_type="dict" 389 ) 390 self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4)) 391 392 box = df.groupby("classroom").boxplot( # noqa 393 column=["height", "weight", "category"], layout=(1, -1), return_type="dict" 394 ) 395 self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3)) 396 397 def test_grouped_box_multiple_axes(self): 398 # GH 6970, GH 7069 399 df = self.hist_df 400 401 # check warning to ignore sharex / sharey 402 # this check should be done in the first function which 403 # passes multiple axes to plot, hist or boxplot 404 # location should be changed if other test is added 405 # which has earlier alphabetical order 406 with tm.assert_produces_warning(UserWarning): 407 fig, axes = self.plt.subplots(2, 2) 408 df.groupby("category").boxplot(column="height", return_type="axes", ax=axes) 409 self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) 410 411 fig, axes = self.plt.subplots(2, 3) 412 with tm.assert_produces_warning(UserWarning): 413 returned = df.boxplot( 414 column=["height", "weight", "category"], 415 by="gender", 416 return_type="axes", 417 ax=axes[0], 418 ) 419 returned = np.array(list(returned.values)) 420 self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) 421 tm.assert_numpy_array_equal(returned, axes[0]) 422 assert returned[0].figure is fig 423 424 # draw on second row 425 with tm.assert_produces_warning(UserWarning): 426 returned = df.groupby("classroom").boxplot( 427 column=["height", "weight", "category"], return_type="axes", ax=axes[1] 428 ) 429 returned = np.array(list(returned.values)) 430 self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) 431 tm.assert_numpy_array_equal(returned, axes[1]) 432 assert returned[0].figure is fig 433 434 with pytest.raises(ValueError): 435 fig, axes = self.plt.subplots(2, 3) 436 # pass different number of axes from required 437 with tm.assert_produces_warning(UserWarning): 438 axes = df.groupby("classroom").boxplot(ax=axes) 439 440 def test_fontsize(self): 441 df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]}) 442 self._check_ticks_props( 443 df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16 444 ) 445 446 @pytest.mark.parametrize( 447 "col, expected_xticklabel", 448 [ 449 ("v", ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), 450 (["v"], ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), 451 ("v1", ["(a, v1)", "(b, v1)", "(c, v1)", "(d, v1)", "(e, v1)"]), 452 ( 453 ["v", "v1"], 454 [ 455 "(a, v)", 456 "(a, v1)", 457 "(b, v)", 458 "(b, v1)", 459 "(c, v)", 460 "(c, v1)", 461 "(d, v)", 462 "(d, v1)", 463 "(e, v)", 464 "(e, v1)", 465 ], 466 ), 467 ( 468 None, 469 [ 470 "(a, v)", 471 "(a, v1)", 472 "(b, v)", 473 "(b, v1)", 474 "(c, v)", 475 "(c, v1)", 476 "(d, v)", 477 "(d, v1)", 478 "(e, v)", 479 "(e, v1)", 480 ], 481 ), 482 ], 483 ) 484 def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): 485 # GH 16748 486 df = DataFrame( 487 { 488 "cat": np.random.choice(list("abcde"), 100), 489 "v": np.random.rand(100), 490 "v1": np.random.rand(100), 491 } 492 ) 493 grouped = df.groupby("cat") 494 495 axes = _check_plot_works( 496 grouped.boxplot, subplots=False, column=col, return_type="axes" 497 ) 498 499 result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] 500 assert expected_xticklabel == result_xticklabel 501 502 def test_boxplot_multiindex_column(self): 503 # GH 16748 504 arrays = [ 505 ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], 506 ["one", "two", "one", "two", "one", "two", "one", "two"], 507 ] 508 tuples = list(zip(*arrays)) 509 index = MultiIndex.from_tuples(tuples, names=["first", "second"]) 510 df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) 511 512 col = [("bar", "one"), ("bar", "two")] 513 axes = _check_plot_works(df.boxplot, column=col, return_type="axes") 514 515 expected_xticklabel = ["(bar, one)", "(bar, two)"] 516 result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] 517 assert expected_xticklabel == result_xticklabel 518