1""" Test cases for .boxplot method """
2
3import itertools
4import string
5
6import numpy as np
7import pytest
8
9import pandas.util._test_decorators as td
10
11from pandas import DataFrame, MultiIndex, Series, date_range, timedelta_range
12import pandas._testing as tm
13from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
14
15import pandas.plotting as plotting
16
17pytestmark = pytest.mark.slow
18
19
20@td.skip_if_no_mpl
21class TestDataFramePlots(TestPlotBase):
22    def test_boxplot_legacy1(self):
23        df = DataFrame(
24            np.random.randn(6, 4),
25            index=list(string.ascii_letters[:6]),
26            columns=["one", "two", "three", "four"],
27        )
28        df["indic"] = ["foo", "bar"] * 3
29        df["indic2"] = ["foo", "bar", "foo"] * 2
30
31        _check_plot_works(df.boxplot, return_type="dict")
32        _check_plot_works(df.boxplot, column=["one", "two"], return_type="dict")
33        # _check_plot_works adds an ax so catch warning. see GH #13188
34        with tm.assert_produces_warning(UserWarning):
35            _check_plot_works(df.boxplot, column=["one", "two"], by="indic")
36        _check_plot_works(df.boxplot, column="one", by=["indic", "indic2"])
37        with tm.assert_produces_warning(UserWarning):
38            _check_plot_works(df.boxplot, by="indic")
39        with tm.assert_produces_warning(UserWarning):
40            _check_plot_works(df.boxplot, by=["indic", "indic2"])
41        _check_plot_works(plotting._core.boxplot, data=df["one"], return_type="dict")
42        _check_plot_works(df.boxplot, notch=1, return_type="dict")
43        with tm.assert_produces_warning(UserWarning):
44            _check_plot_works(df.boxplot, by="indic", notch=1)
45
46    def test_boxplot_legacy2(self):
47        df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"])
48        df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
49        df["Y"] = Series(["A"] * 10)
50        with tm.assert_produces_warning(UserWarning):
51            _check_plot_works(df.boxplot, by="X")
52
53        # When ax is supplied and required number of axes is 1,
54        # passed ax should be used:
55        fig, ax = self.plt.subplots()
56        axes = df.boxplot("Col1", by="X", ax=ax)
57        ax_axes = ax.axes
58        assert ax_axes is axes
59
60        fig, ax = self.plt.subplots()
61        axes = df.groupby("Y").boxplot(ax=ax, return_type="axes")
62        ax_axes = ax.axes
63        assert ax_axes is axes["A"]
64
65        # Multiple columns with an ax argument should use same figure
66        fig, ax = self.plt.subplots()
67        with tm.assert_produces_warning(UserWarning):
68            axes = df.boxplot(
69                column=["Col1", "Col2"], by="X", ax=ax, return_type="axes"
70            )
71        assert axes["Col1"].get_figure() is fig
72
73        # When by is None, check that all relevant lines are present in the
74        # dict
75        fig, ax = self.plt.subplots()
76        d = df.boxplot(ax=ax, return_type="dict")
77        lines = list(itertools.chain.from_iterable(d.values()))
78        assert len(ax.get_lines()) == len(lines)
79
80    def test_boxplot_return_type_none(self):
81        # GH 12216; return_type=None & by=None -> axes
82        result = self.hist_df.boxplot()
83        assert isinstance(result, self.plt.Axes)
84
85    def test_boxplot_return_type_legacy(self):
86        # API change in https://github.com/pandas-dev/pandas/pull/7096
87        import matplotlib as mpl  # noqa
88
89        df = DataFrame(
90            np.random.randn(6, 4),
91            index=list(string.ascii_letters[:6]),
92            columns=["one", "two", "three", "four"],
93        )
94        with pytest.raises(ValueError):
95            df.boxplot(return_type="NOTATYPE")
96
97        result = df.boxplot()
98        self._check_box_return_type(result, "axes")
99
100        with tm.assert_produces_warning(False):
101            result = df.boxplot(return_type="dict")
102        self._check_box_return_type(result, "dict")
103
104        with tm.assert_produces_warning(False):
105            result = df.boxplot(return_type="axes")
106        self._check_box_return_type(result, "axes")
107
108        with tm.assert_produces_warning(False):
109            result = df.boxplot(return_type="both")
110        self._check_box_return_type(result, "both")
111
112    def test_boxplot_axis_limits(self):
113        def _check_ax_limits(col, ax):
114            y_min, y_max = ax.get_ylim()
115            assert y_min <= col.min()
116            assert y_max >= col.max()
117
118        df = self.hist_df.copy()
119        df["age"] = np.random.randint(1, 20, df.shape[0])
120        # One full row
121        height_ax, weight_ax = df.boxplot(["height", "weight"], by="category")
122        _check_ax_limits(df["height"], height_ax)
123        _check_ax_limits(df["weight"], weight_ax)
124        assert weight_ax._sharey == height_ax
125
126        # Two rows, one partial
127        p = df.boxplot(["height", "weight", "age"], by="category")
128        height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0]
129        dummy_ax = p[1, 1]
130
131        _check_ax_limits(df["height"], height_ax)
132        _check_ax_limits(df["weight"], weight_ax)
133        _check_ax_limits(df["age"], age_ax)
134        assert weight_ax._sharey == height_ax
135        assert age_ax._sharey == height_ax
136        assert dummy_ax._sharey is None
137
138    def test_boxplot_empty_column(self):
139        df = DataFrame(np.random.randn(20, 4))
140        df.loc[:, 0] = np.nan
141        _check_plot_works(df.boxplot, return_type="axes")
142
143    def test_figsize(self):
144        df = DataFrame(np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"])
145        result = df.boxplot(return_type="axes", figsize=(12, 8))
146        assert result.figure.bbox_inches.width == 12
147        assert result.figure.bbox_inches.height == 8
148
149    def test_fontsize(self):
150        df = DataFrame({"a": [1, 2, 3, 4, 5, 6]})
151        self._check_ticks_props(
152            df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16
153        )
154
155    def test_boxplot_numeric_data(self):
156        # GH 22799
157        df = DataFrame(
158            {
159                "a": date_range("2012-01-01", periods=100),
160                "b": np.random.randn(100),
161                "c": np.random.randn(100) + 2,
162                "d": date_range("2012-01-01", periods=100).astype(str),
163                "e": date_range("2012-01-01", periods=100, tz="UTC"),
164                "f": timedelta_range("1 days", periods=100),
165            }
166        )
167        ax = df.plot(kind="box")
168        assert [x.get_text() for x in ax.get_xticklabels()] == ["b", "c"]
169
170    @pytest.mark.parametrize(
171        "colors_kwd, expected",
172        [
173            (
174                {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"},
175                {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"},
176            ),
177            ({"boxes": "r"}, {"boxes": "r"}),
178            ("r", {"boxes": "r", "whiskers": "r", "medians": "r", "caps": "r"}),
179        ],
180    )
181    def test_color_kwd(self, colors_kwd, expected):
182        # GH: 26214
183        df = DataFrame(np.random.rand(10, 2))
184        result = df.boxplot(color=colors_kwd, return_type="dict")
185        for k, v in expected.items():
186            assert result[k][0].get_color() == v
187
188    @pytest.mark.parametrize(
189        "dict_colors, msg",
190        [({"boxes": "r", "invalid_key": "r"}, "invalid key 'invalid_key'")],
191    )
192    def test_color_kwd_errors(self, dict_colors, msg):
193        # GH: 26214
194        df = DataFrame(np.random.rand(10, 2))
195        with pytest.raises(ValueError, match=msg):
196            df.boxplot(color=dict_colors, return_type="dict")
197
198    @pytest.mark.parametrize(
199        "props, expected",
200        [
201            ("boxprops", "boxes"),
202            ("whiskerprops", "whiskers"),
203            ("capprops", "caps"),
204            ("medianprops", "medians"),
205        ],
206    )
207    def test_specified_props_kwd(self, props, expected):
208        # GH 30346
209        df = DataFrame({k: np.random.random(100) for k in "ABC"})
210        kwd = {props: {"color": "C1"}}
211        result = df.boxplot(return_type="dict", **kwd)
212
213        assert result[expected][0].get_color() == "C1"
214
215
216@td.skip_if_no_mpl
217class TestDataFrameGroupByPlots(TestPlotBase):
218    def test_boxplot_legacy1(self):
219        grouped = self.hist_df.groupby(by="gender")
220        with tm.assert_produces_warning(UserWarning):
221            axes = _check_plot_works(grouped.boxplot, return_type="axes")
222        self._check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2))
223        axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
224        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
225
226    def test_boxplot_legacy2(self):
227        tuples = zip(string.ascii_letters[:10], range(10))
228        df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
229        grouped = df.groupby(level=1)
230        with tm.assert_produces_warning(UserWarning):
231            axes = _check_plot_works(grouped.boxplot, return_type="axes")
232        self._check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3))
233
234        axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
235        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
236
237    def test_boxplot_legacy3(self):
238        tuples = zip(string.ascii_letters[:10], range(10))
239        df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
240        grouped = df.unstack(level=1).groupby(level=0, axis=1)
241        with tm.assert_produces_warning(UserWarning):
242            axes = _check_plot_works(grouped.boxplot, return_type="axes")
243        self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2))
244        axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
245        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
246
247    def test_grouped_plot_fignums(self):
248        n = 10
249        weight = Series(np.random.normal(166, 20, size=n))
250        height = Series(np.random.normal(60, 10, size=n))
251        with tm.RNGContext(42):
252            gender = np.random.choice(["male", "female"], size=n)
253        df = DataFrame({"height": height, "weight": weight, "gender": gender})
254        gb = df.groupby("gender")
255
256        res = gb.plot()
257        assert len(self.plt.get_fignums()) == 2
258        assert len(res) == 2
259        tm.close()
260
261        res = gb.boxplot(return_type="axes")
262        assert len(self.plt.get_fignums()) == 1
263        assert len(res) == 2
264        tm.close()
265
266        # now works with GH 5610 as gender is excluded
267        res = df.groupby("gender").hist()
268        tm.close()
269
270    def test_grouped_box_return_type(self):
271        df = self.hist_df
272
273        # old style: return_type=None
274        result = df.boxplot(by="gender")
275        assert isinstance(result, np.ndarray)
276        self._check_box_return_type(
277            result, None, expected_keys=["height", "weight", "category"]
278        )
279
280        # now for groupby
281        result = df.groupby("gender").boxplot(return_type="dict")
282        self._check_box_return_type(result, "dict", expected_keys=["Male", "Female"])
283
284        columns2 = "X B C D A G Y N Q O".split()
285        df2 = DataFrame(np.random.randn(50, 10), columns=columns2)
286        categories2 = "A B C D E F G H I J".split()
287        df2["category"] = categories2 * 5
288
289        for t in ["dict", "axes", "both"]:
290            returned = df.groupby("classroom").boxplot(return_type=t)
291            self._check_box_return_type(returned, t, expected_keys=["A", "B", "C"])
292
293            returned = df.boxplot(by="classroom", return_type=t)
294            self._check_box_return_type(
295                returned, t, expected_keys=["height", "weight", "category"]
296            )
297
298            returned = df2.groupby("category").boxplot(return_type=t)
299            self._check_box_return_type(returned, t, expected_keys=categories2)
300
301            returned = df2.boxplot(by="category", return_type=t)
302            self._check_box_return_type(returned, t, expected_keys=columns2)
303
304    def test_grouped_box_layout(self):
305        df = self.hist_df
306
307        msg = "Layout of 1x1 must be larger than required size 2"
308        with pytest.raises(ValueError, match=msg):
309            df.boxplot(column=["weight", "height"], by=df.gender, layout=(1, 1))
310
311        msg = "The 'layout' keyword is not supported when 'by' is None"
312        with pytest.raises(ValueError, match=msg):
313            df.boxplot(
314                column=["height", "weight", "category"],
315                layout=(2, 1),
316                return_type="dict",
317            )
318
319        msg = "At least one dimension of layout must be positive"
320        with pytest.raises(ValueError, match=msg):
321            df.boxplot(column=["weight", "height"], by=df.gender, layout=(-1, -1))
322
323        # _check_plot_works adds an ax so catch warning. see GH #13188
324        with tm.assert_produces_warning(UserWarning):
325            box = _check_plot_works(
326                df.groupby("gender").boxplot, column="height", return_type="dict"
327            )
328        self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2))
329
330        with tm.assert_produces_warning(UserWarning):
331            box = _check_plot_works(
332                df.groupby("category").boxplot, column="height", return_type="dict"
333            )
334        self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2))
335
336        # GH 6769
337        with tm.assert_produces_warning(UserWarning):
338            box = _check_plot_works(
339                df.groupby("classroom").boxplot, column="height", return_type="dict"
340            )
341        self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
342
343        # GH 5897
344        axes = df.boxplot(
345            column=["height", "weight", "category"], by="gender", return_type="axes"
346        )
347        self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
348        for ax in [axes["height"]]:
349            self._check_visible(ax.get_xticklabels(), visible=False)
350            self._check_visible([ax.xaxis.get_label()], visible=False)
351        for ax in [axes["weight"], axes["category"]]:
352            self._check_visible(ax.get_xticklabels())
353            self._check_visible([ax.xaxis.get_label()])
354
355        box = df.groupby("classroom").boxplot(
356            column=["height", "weight", "category"], return_type="dict"
357        )
358        self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
359
360        with tm.assert_produces_warning(UserWarning):
361            box = _check_plot_works(
362                df.groupby("category").boxplot,
363                column="height",
364                layout=(3, 2),
365                return_type="dict",
366            )
367        self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2))
368        with tm.assert_produces_warning(UserWarning):
369            box = _check_plot_works(
370                df.groupby("category").boxplot,
371                column="height",
372                layout=(3, -1),
373                return_type="dict",
374            )
375        self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2))
376
377        box = df.boxplot(
378            column=["height", "weight", "category"], by="gender", layout=(4, 1)
379        )
380        self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(4, 1))
381
382        box = df.boxplot(
383            column=["height", "weight", "category"], by="gender", layout=(-1, 1)
384        )
385        self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(3, 1))
386
387        box = df.groupby("classroom").boxplot(
388            column=["height", "weight", "category"], layout=(1, 4), return_type="dict"
389        )
390        self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4))
391
392        box = df.groupby("classroom").boxplot(  # noqa
393            column=["height", "weight", "category"], layout=(1, -1), return_type="dict"
394        )
395        self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3))
396
397    def test_grouped_box_multiple_axes(self):
398        # GH 6970, GH 7069
399        df = self.hist_df
400
401        # check warning to ignore sharex / sharey
402        # this check should be done in the first function which
403        # passes multiple axes to plot, hist or boxplot
404        # location should be changed if other test is added
405        # which has earlier alphabetical order
406        with tm.assert_produces_warning(UserWarning):
407            fig, axes = self.plt.subplots(2, 2)
408            df.groupby("category").boxplot(column="height", return_type="axes", ax=axes)
409            self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2))
410
411        fig, axes = self.plt.subplots(2, 3)
412        with tm.assert_produces_warning(UserWarning):
413            returned = df.boxplot(
414                column=["height", "weight", "category"],
415                by="gender",
416                return_type="axes",
417                ax=axes[0],
418            )
419        returned = np.array(list(returned.values))
420        self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
421        tm.assert_numpy_array_equal(returned, axes[0])
422        assert returned[0].figure is fig
423
424        # draw on second row
425        with tm.assert_produces_warning(UserWarning):
426            returned = df.groupby("classroom").boxplot(
427                column=["height", "weight", "category"], return_type="axes", ax=axes[1]
428            )
429        returned = np.array(list(returned.values))
430        self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
431        tm.assert_numpy_array_equal(returned, axes[1])
432        assert returned[0].figure is fig
433
434        with pytest.raises(ValueError):
435            fig, axes = self.plt.subplots(2, 3)
436            # pass different number of axes from required
437            with tm.assert_produces_warning(UserWarning):
438                axes = df.groupby("classroom").boxplot(ax=axes)
439
440    def test_fontsize(self):
441        df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]})
442        self._check_ticks_props(
443            df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16
444        )
445
446    @pytest.mark.parametrize(
447        "col, expected_xticklabel",
448        [
449            ("v", ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]),
450            (["v"], ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]),
451            ("v1", ["(a, v1)", "(b, v1)", "(c, v1)", "(d, v1)", "(e, v1)"]),
452            (
453                ["v", "v1"],
454                [
455                    "(a, v)",
456                    "(a, v1)",
457                    "(b, v)",
458                    "(b, v1)",
459                    "(c, v)",
460                    "(c, v1)",
461                    "(d, v)",
462                    "(d, v1)",
463                    "(e, v)",
464                    "(e, v1)",
465                ],
466            ),
467            (
468                None,
469                [
470                    "(a, v)",
471                    "(a, v1)",
472                    "(b, v)",
473                    "(b, v1)",
474                    "(c, v)",
475                    "(c, v1)",
476                    "(d, v)",
477                    "(d, v1)",
478                    "(e, v)",
479                    "(e, v1)",
480                ],
481            ),
482        ],
483    )
484    def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel):
485        # GH 16748
486        df = DataFrame(
487            {
488                "cat": np.random.choice(list("abcde"), 100),
489                "v": np.random.rand(100),
490                "v1": np.random.rand(100),
491            }
492        )
493        grouped = df.groupby("cat")
494
495        axes = _check_plot_works(
496            grouped.boxplot, subplots=False, column=col, return_type="axes"
497        )
498
499        result_xticklabel = [x.get_text() for x in axes.get_xticklabels()]
500        assert expected_xticklabel == result_xticklabel
501
502    def test_boxplot_multiindex_column(self):
503        # GH 16748
504        arrays = [
505            ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
506            ["one", "two", "one", "two", "one", "two", "one", "two"],
507        ]
508        tuples = list(zip(*arrays))
509        index = MultiIndex.from_tuples(tuples, names=["first", "second"])
510        df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
511
512        col = [("bar", "one"), ("bar", "two")]
513        axes = _check_plot_works(df.boxplot, column=col, return_type="axes")
514
515        expected_xticklabel = ["(bar, one)", "(bar, two)"]
516        result_xticklabel = [x.get_text() for x in axes.get_xticklabels()]
517        assert expected_xticklabel == result_xticklabel
518