1from datetime import timedelta
2from decimal import Decimal
3import re
4
5from dateutil.tz import tzlocal
6import numpy as np
7import pytest
8
9from pandas.compat import is_platform_windows
10import pandas.util._test_decorators as td
11
12import pandas as pd
13from pandas import (
14    Categorical,
15    DataFrame,
16    Index,
17    MultiIndex,
18    Series,
19    Timestamp,
20    date_range,
21    isna,
22    notna,
23    to_datetime,
24    to_timedelta,
25)
26import pandas._testing as tm
27import pandas.core.algorithms as algorithms
28import pandas.core.nanops as nanops
29
30
31def assert_stat_op_calc(
32    opname,
33    alternative,
34    frame,
35    has_skipna=True,
36    check_dtype=True,
37    check_dates=False,
38    rtol=1e-5,
39    atol=1e-8,
40    skipna_alternative=None,
41):
42    """
43    Check that operator opname works as advertised on frame
44
45    Parameters
46    ----------
47    opname : string
48        Name of the operator to test on frame
49    alternative : function
50        Function that opname is tested against; i.e. "frame.opname()" should
51        equal "alternative(frame)".
52    frame : DataFrame
53        The object that the tests are executed on
54    has_skipna : bool, default True
55        Whether the method "opname" has the kwarg "skip_na"
56    check_dtype : bool, default True
57        Whether the dtypes of the result of "frame.opname()" and
58        "alternative(frame)" should be checked.
59    check_dates : bool, default false
60        Whether opname should be tested on a Datetime Series
61    rtol : float, default 1e-5
62        Relative tolerance.
63    atol : float, default 1e-8
64        Absolute tolerance.
65    skipna_alternative : function, default None
66        NaN-safe version of alternative
67    """
68    f = getattr(frame, opname)
69
70    if check_dates:
71        expected_warning = FutureWarning if opname in ["mean", "median"] else None
72        df = DataFrame({"b": date_range("1/1/2001", periods=2)})
73        with tm.assert_produces_warning(expected_warning):
74            result = getattr(df, opname)()
75        assert isinstance(result, Series)
76
77        df["a"] = range(len(df))
78        with tm.assert_produces_warning(expected_warning):
79            result = getattr(df, opname)()
80        assert isinstance(result, Series)
81        assert len(result)
82
83    if has_skipna:
84
85        def wrapper(x):
86            return alternative(x.values)
87
88        skipna_wrapper = tm._make_skipna_wrapper(alternative, skipna_alternative)
89        result0 = f(axis=0, skipna=False)
90        result1 = f(axis=1, skipna=False)
91        tm.assert_series_equal(
92            result0, frame.apply(wrapper), check_dtype=check_dtype, rtol=rtol, atol=atol
93        )
94        # HACK: win32
95        tm.assert_series_equal(
96            result1,
97            frame.apply(wrapper, axis=1),
98            check_dtype=False,
99            rtol=rtol,
100            atol=atol,
101        )
102    else:
103        skipna_wrapper = alternative
104
105    result0 = f(axis=0)
106    result1 = f(axis=1)
107    tm.assert_series_equal(
108        result0,
109        frame.apply(skipna_wrapper),
110        check_dtype=check_dtype,
111        rtol=rtol,
112        atol=atol,
113    )
114
115    if opname in ["sum", "prod"]:
116        expected = frame.apply(skipna_wrapper, axis=1)
117        tm.assert_series_equal(
118            result1, expected, check_dtype=False, rtol=rtol, atol=atol
119        )
120
121    # check dtypes
122    if check_dtype:
123        lcd_dtype = frame.values.dtype
124        assert lcd_dtype == result0.dtype
125        assert lcd_dtype == result1.dtype
126
127    # bad axis
128    with pytest.raises(ValueError, match="No axis named 2"):
129        f(axis=2)
130
131    # all NA case
132    if has_skipna:
133        all_na = frame * np.NaN
134        r0 = getattr(all_na, opname)(axis=0)
135        r1 = getattr(all_na, opname)(axis=1)
136        if opname in ["sum", "prod"]:
137            unit = 1 if opname == "prod" else 0  # result for empty sum/prod
138            expected = Series(unit, index=r0.index, dtype=r0.dtype)
139            tm.assert_series_equal(r0, expected)
140            expected = Series(unit, index=r1.index, dtype=r1.dtype)
141            tm.assert_series_equal(r1, expected)
142
143
144def assert_stat_op_api(opname, float_frame, float_string_frame, has_numeric_only=False):
145    """
146    Check that API for operator opname works as advertised on frame
147
148    Parameters
149    ----------
150    opname : string
151        Name of the operator to test on frame
152    float_frame : DataFrame
153        DataFrame with columns of type float
154    float_string_frame : DataFrame
155        DataFrame with both float and string columns
156    has_numeric_only : bool, default False
157        Whether the method "opname" has the kwarg "numeric_only"
158    """
159    # make sure works on mixed-type frame
160    getattr(float_string_frame, opname)(axis=0)
161    getattr(float_string_frame, opname)(axis=1)
162
163    if has_numeric_only:
164        getattr(float_string_frame, opname)(axis=0, numeric_only=True)
165        getattr(float_string_frame, opname)(axis=1, numeric_only=True)
166        getattr(float_frame, opname)(axis=0, numeric_only=False)
167        getattr(float_frame, opname)(axis=1, numeric_only=False)
168
169
170def assert_bool_op_calc(opname, alternative, frame, has_skipna=True):
171    """
172    Check that bool operator opname works as advertised on frame
173
174    Parameters
175    ----------
176    opname : string
177        Name of the operator to test on frame
178    alternative : function
179        Function that opname is tested against; i.e. "frame.opname()" should
180        equal "alternative(frame)".
181    frame : DataFrame
182        The object that the tests are executed on
183    has_skipna : bool, default True
184        Whether the method "opname" has the kwarg "skip_na"
185    """
186    f = getattr(frame, opname)
187
188    if has_skipna:
189
190        def skipna_wrapper(x):
191            nona = x.dropna().values
192            return alternative(nona)
193
194        def wrapper(x):
195            return alternative(x.values)
196
197        result0 = f(axis=0, skipna=False)
198        result1 = f(axis=1, skipna=False)
199
200        tm.assert_series_equal(result0, frame.apply(wrapper))
201        tm.assert_series_equal(
202            result1, frame.apply(wrapper, axis=1), check_dtype=False
203        )  # HACK: win32
204    else:
205        skipna_wrapper = alternative
206        wrapper = alternative
207
208    result0 = f(axis=0)
209    result1 = f(axis=1)
210
211    tm.assert_series_equal(result0, frame.apply(skipna_wrapper))
212    tm.assert_series_equal(
213        result1, frame.apply(skipna_wrapper, axis=1), check_dtype=False
214    )
215
216    # bad axis
217    with pytest.raises(ValueError, match="No axis named 2"):
218        f(axis=2)
219
220    # all NA case
221    if has_skipna:
222        all_na = frame * np.NaN
223        r0 = getattr(all_na, opname)(axis=0)
224        r1 = getattr(all_na, opname)(axis=1)
225        if opname == "any":
226            assert not r0.any()
227            assert not r1.any()
228        else:
229            assert r0.all()
230            assert r1.all()
231
232
233def assert_bool_op_api(
234    opname, bool_frame_with_na, float_string_frame, has_bool_only=False
235):
236    """
237    Check that API for boolean operator opname works as advertised on frame
238
239    Parameters
240    ----------
241    opname : string
242        Name of the operator to test on frame
243    float_frame : DataFrame
244        DataFrame with columns of type float
245    float_string_frame : DataFrame
246        DataFrame with both float and string columns
247    has_bool_only : bool, default False
248        Whether the method "opname" has the kwarg "bool_only"
249    """
250    # make sure op works on mixed-type frame
251    mixed = float_string_frame
252    mixed["_bool_"] = np.random.randn(len(mixed)) > 0.5
253    getattr(mixed, opname)(axis=0)
254    getattr(mixed, opname)(axis=1)
255
256    if has_bool_only:
257        getattr(mixed, opname)(axis=0, bool_only=True)
258        getattr(mixed, opname)(axis=1, bool_only=True)
259        getattr(bool_frame_with_na, opname)(axis=0, bool_only=False)
260        getattr(bool_frame_with_na, opname)(axis=1, bool_only=False)
261
262
263class TestDataFrameAnalytics:
264
265    # ---------------------------------------------------------------------
266    # Reductions
267
268    def test_stat_op_api(self, float_frame, float_string_frame):
269        assert_stat_op_api(
270            "count", float_frame, float_string_frame, has_numeric_only=True
271        )
272        assert_stat_op_api(
273            "sum", float_frame, float_string_frame, has_numeric_only=True
274        )
275
276        assert_stat_op_api("nunique", float_frame, float_string_frame)
277        assert_stat_op_api("mean", float_frame, float_string_frame)
278        assert_stat_op_api("product", float_frame, float_string_frame)
279        assert_stat_op_api("median", float_frame, float_string_frame)
280        assert_stat_op_api("min", float_frame, float_string_frame)
281        assert_stat_op_api("max", float_frame, float_string_frame)
282        assert_stat_op_api("mad", float_frame, float_string_frame)
283        assert_stat_op_api("var", float_frame, float_string_frame)
284        assert_stat_op_api("std", float_frame, float_string_frame)
285        assert_stat_op_api("sem", float_frame, float_string_frame)
286        assert_stat_op_api("median", float_frame, float_string_frame)
287
288        try:
289            from scipy.stats import kurtosis, skew  # noqa:F401
290
291            assert_stat_op_api("skew", float_frame, float_string_frame)
292            assert_stat_op_api("kurt", float_frame, float_string_frame)
293        except ImportError:
294            pass
295
296    def test_stat_op_calc(self, float_frame_with_na, mixed_float_frame):
297        def count(s):
298            return notna(s).sum()
299
300        def nunique(s):
301            return len(algorithms.unique1d(s.dropna()))
302
303        def mad(x):
304            return np.abs(x - x.mean()).mean()
305
306        def var(x):
307            return np.var(x, ddof=1)
308
309        def std(x):
310            return np.std(x, ddof=1)
311
312        def sem(x):
313            return np.std(x, ddof=1) / np.sqrt(len(x))
314
315        def skewness(x):
316            from scipy.stats import skew  # noqa:F811
317
318            if len(x) < 3:
319                return np.nan
320            return skew(x, bias=False)
321
322        def kurt(x):
323            from scipy.stats import kurtosis  # noqa:F811
324
325            if len(x) < 4:
326                return np.nan
327            return kurtosis(x, bias=False)
328
329        assert_stat_op_calc(
330            "nunique",
331            nunique,
332            float_frame_with_na,
333            has_skipna=False,
334            check_dtype=False,
335            check_dates=True,
336        )
337
338        # GH#32571 check_less_precise is needed on apparently-random
339        #  py37-npdev builds and OSX-PY36-min_version builds
340        # mixed types (with upcasting happening)
341        assert_stat_op_calc(
342            "sum",
343            np.sum,
344            mixed_float_frame.astype("float32"),
345            check_dtype=False,
346            rtol=1e-3,
347        )
348
349        assert_stat_op_calc(
350            "sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum
351        )
352        assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True)
353        assert_stat_op_calc(
354            "product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod
355        )
356
357        assert_stat_op_calc("mad", mad, float_frame_with_na)
358        assert_stat_op_calc("var", var, float_frame_with_na)
359        assert_stat_op_calc("std", std, float_frame_with_na)
360        assert_stat_op_calc("sem", sem, float_frame_with_na)
361
362        assert_stat_op_calc(
363            "count",
364            count,
365            float_frame_with_na,
366            has_skipna=False,
367            check_dtype=False,
368            check_dates=True,
369        )
370
371        try:
372            from scipy import kurtosis, skew  # noqa:F401
373
374            assert_stat_op_calc("skew", skewness, float_frame_with_na)
375            assert_stat_op_calc("kurt", kurt, float_frame_with_na)
376        except ImportError:
377            pass
378
379    # TODO: Ensure warning isn't emitted in the first place
380    @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning")
381    def test_median(self, float_frame_with_na, int_frame):
382        def wrapper(x):
383            if isna(x).any():
384                return np.nan
385            return np.median(x)
386
387        assert_stat_op_calc("median", wrapper, float_frame_with_na, check_dates=True)
388        assert_stat_op_calc(
389            "median", wrapper, int_frame, check_dtype=False, check_dates=True
390        )
391
392    @pytest.mark.parametrize(
393        "method", ["sum", "mean", "prod", "var", "std", "skew", "min", "max"]
394    )
395    def test_stat_operators_attempt_obj_array(self, method):
396        # GH#676
397        data = {
398            "a": [
399                -0.00049987540199591344,
400                -0.0016467257772919831,
401                0.00067695870775883013,
402            ],
403            "b": [-0, -0, 0.0],
404            "c": [
405                0.00031111847529610595,
406                0.0014902627951905339,
407                -0.00094099200035979691,
408            ],
409        }
410        df1 = DataFrame(data, index=["foo", "bar", "baz"], dtype="O")
411
412        df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object)
413
414        for df in [df1, df2]:
415            assert df.values.dtype == np.object_
416            result = getattr(df, method)(1)
417            expected = getattr(df.astype("f8"), method)(1)
418
419            if method in ["sum", "prod"]:
420                tm.assert_series_equal(result, expected)
421
422    @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"])
423    def test_mixed_ops(self, op):
424        # GH#16116
425        df = DataFrame(
426            {
427                "int": [1, 2, 3, 4],
428                "float": [1.0, 2.0, 3.0, 4.0],
429                "str": ["a", "b", "c", "d"],
430            }
431        )
432
433        result = getattr(df, op)()
434        assert len(result) == 2
435
436        with pd.option_context("use_bottleneck", False):
437            result = getattr(df, op)()
438            assert len(result) == 2
439
440    def test_reduce_mixed_frame(self):
441        # GH 6806
442        df = DataFrame(
443            {
444                "bool_data": [True, True, False, False, False],
445                "int_data": [10, 20, 30, 40, 50],
446                "string_data": ["a", "b", "c", "d", "e"],
447            }
448        )
449        df.reindex(columns=["bool_data", "int_data", "string_data"])
450        test = df.sum(axis=0)
451        tm.assert_numpy_array_equal(
452            test.values, np.array([2, 150, "abcde"], dtype=object)
453        )
454        tm.assert_series_equal(test, df.T.sum(axis=1))
455
456    def test_nunique(self):
457        df = DataFrame({"A": [1, 1, 1], "B": [1, 2, 3], "C": [1, np.nan, 3]})
458        tm.assert_series_equal(df.nunique(), Series({"A": 1, "B": 3, "C": 2}))
459        tm.assert_series_equal(
460            df.nunique(dropna=False), Series({"A": 1, "B": 3, "C": 3})
461        )
462        tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2}))
463        tm.assert_series_equal(
464            df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2})
465        )
466
467    @pytest.mark.parametrize("tz", [None, "UTC"])
468    def test_mean_mixed_datetime_numeric(self, tz):
469        # https://github.com/pandas-dev/pandas/issues/24752
470        df = DataFrame({"A": [1, 1], "B": [Timestamp("2000", tz=tz)] * 2})
471        with tm.assert_produces_warning(FutureWarning):
472            result = df.mean()
473        expected = Series([1.0], index=["A"])
474        tm.assert_series_equal(result, expected)
475
476    @pytest.mark.parametrize("tz", [None, "UTC"])
477    def test_mean_excludes_datetimes(self, tz):
478        # https://github.com/pandas-dev/pandas/issues/24752
479        # Our long-term desired behavior is unclear, but the behavior in
480        # 0.24.0rc1 was buggy.
481        df = DataFrame({"A": [Timestamp("2000", tz=tz)] * 2})
482        with tm.assert_produces_warning(FutureWarning):
483            result = df.mean()
484
485        expected = Series(dtype=np.float64)
486        tm.assert_series_equal(result, expected)
487
488    def test_mean_mixed_string_decimal(self):
489        # GH 11670
490        # possible bug when calculating mean of DataFrame?
491
492        d = [
493            {"A": 2, "B": None, "C": Decimal("628.00")},
494            {"A": 1, "B": None, "C": Decimal("383.00")},
495            {"A": 3, "B": None, "C": Decimal("651.00")},
496            {"A": 2, "B": None, "C": Decimal("575.00")},
497            {"A": 4, "B": None, "C": Decimal("1114.00")},
498            {"A": 1, "B": "TEST", "C": Decimal("241.00")},
499            {"A": 2, "B": None, "C": Decimal("572.00")},
500            {"A": 4, "B": None, "C": Decimal("609.00")},
501            {"A": 3, "B": None, "C": Decimal("820.00")},
502            {"A": 5, "B": None, "C": Decimal("1223.00")},
503        ]
504
505        df = DataFrame(d)
506
507        result = df.mean()
508        expected = Series([2.7, 681.6], index=["A", "C"])
509        tm.assert_series_equal(result, expected)
510
511    def test_var_std(self, datetime_frame):
512        result = datetime_frame.std(ddof=4)
513        expected = datetime_frame.apply(lambda x: x.std(ddof=4))
514        tm.assert_almost_equal(result, expected)
515
516        result = datetime_frame.var(ddof=4)
517        expected = datetime_frame.apply(lambda x: x.var(ddof=4))
518        tm.assert_almost_equal(result, expected)
519
520        arr = np.repeat(np.random.random((1, 1000)), 1000, 0)
521        result = nanops.nanvar(arr, axis=0)
522        assert not (result < 0).any()
523
524        with pd.option_context("use_bottleneck", False):
525            result = nanops.nanvar(arr, axis=0)
526            assert not (result < 0).any()
527
528    @pytest.mark.parametrize("meth", ["sem", "var", "std"])
529    def test_numeric_only_flag(self, meth):
530        # GH 9201
531        df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"])
532        # set one entry to a number in str format
533        df1.loc[0, "foo"] = "100"
534
535        df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"])
536        # set one entry to a non-number str
537        df2.loc[0, "foo"] = "a"
538
539        result = getattr(df1, meth)(axis=1, numeric_only=True)
540        expected = getattr(df1[["bar", "baz"]], meth)(axis=1)
541        tm.assert_series_equal(expected, result)
542
543        result = getattr(df2, meth)(axis=1, numeric_only=True)
544        expected = getattr(df2[["bar", "baz"]], meth)(axis=1)
545        tm.assert_series_equal(expected, result)
546
547        # df1 has all numbers, df2 has a letter inside
548        msg = r"unsupported operand type\(s\) for -: 'float' and 'str'"
549        with pytest.raises(TypeError, match=msg):
550            getattr(df1, meth)(axis=1, numeric_only=False)
551        msg = "could not convert string to float: 'a'"
552        with pytest.raises(TypeError, match=msg):
553            getattr(df2, meth)(axis=1, numeric_only=False)
554
555    def test_sem(self, datetime_frame):
556        result = datetime_frame.sem(ddof=4)
557        expected = datetime_frame.apply(lambda x: x.std(ddof=4) / np.sqrt(len(x)))
558        tm.assert_almost_equal(result, expected)
559
560        arr = np.repeat(np.random.random((1, 1000)), 1000, 0)
561        result = nanops.nansem(arr, axis=0)
562        assert not (result < 0).any()
563
564        with pd.option_context("use_bottleneck", False):
565            result = nanops.nansem(arr, axis=0)
566            assert not (result < 0).any()
567
568    @td.skip_if_no_scipy
569    def test_kurt(self):
570        index = MultiIndex(
571            levels=[["bar"], ["one", "two", "three"], [0, 1]],
572            codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
573        )
574        df = DataFrame(np.random.randn(6, 3), index=index)
575
576        kurt = df.kurt()
577        kurt2 = df.kurt(level=0).xs("bar")
578        tm.assert_series_equal(kurt, kurt2, check_names=False)
579        assert kurt.name is None
580        assert kurt2.name == "bar"
581
582    @pytest.mark.parametrize(
583        "dropna, expected",
584        [
585            (
586                True,
587                {
588                    "A": [12],
589                    "B": [10.0],
590                    "C": [1.0],
591                    "D": ["a"],
592                    "E": Categorical(["a"], categories=["a"]),
593                    "F": to_datetime(["2000-1-2"]),
594                    "G": to_timedelta(["1 days"]),
595                },
596            ),
597            (
598                False,
599                {
600                    "A": [12],
601                    "B": [10.0],
602                    "C": [np.nan],
603                    "D": np.array([np.nan], dtype=object),
604                    "E": Categorical([np.nan], categories=["a"]),
605                    "F": [pd.NaT],
606                    "G": to_timedelta([pd.NaT]),
607                },
608            ),
609            (
610                True,
611                {
612                    "H": [8, 9, np.nan, np.nan],
613                    "I": [8, 9, np.nan, np.nan],
614                    "J": [1, np.nan, np.nan, np.nan],
615                    "K": Categorical(["a", np.nan, np.nan, np.nan], categories=["a"]),
616                    "L": to_datetime(["2000-1-2", "NaT", "NaT", "NaT"]),
617                    "M": to_timedelta(["1 days", "nan", "nan", "nan"]),
618                    "N": [0, 1, 2, 3],
619                },
620            ),
621            (
622                False,
623                {
624                    "H": [8, 9, np.nan, np.nan],
625                    "I": [8, 9, np.nan, np.nan],
626                    "J": [1, np.nan, np.nan, np.nan],
627                    "K": Categorical([np.nan, "a", np.nan, np.nan], categories=["a"]),
628                    "L": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]),
629                    "M": to_timedelta(["nan", "1 days", "nan", "nan"]),
630                    "N": [0, 1, 2, 3],
631                },
632            ),
633        ],
634    )
635    def test_mode_dropna(self, dropna, expected):
636
637        df = DataFrame(
638            {
639                "A": [12, 12, 19, 11],
640                "B": [10, 10, np.nan, 3],
641                "C": [1, np.nan, np.nan, np.nan],
642                "D": [np.nan, np.nan, "a", np.nan],
643                "E": Categorical([np.nan, np.nan, "a", np.nan]),
644                "F": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]),
645                "G": to_timedelta(["1 days", "nan", "nan", "nan"]),
646                "H": [8, 8, 9, 9],
647                "I": [9, 9, 8, 8],
648                "J": [1, 1, np.nan, np.nan],
649                "K": Categorical(["a", np.nan, "a", np.nan]),
650                "L": to_datetime(["2000-1-2", "2000-1-2", "NaT", "NaT"]),
651                "M": to_timedelta(["1 days", "nan", "1 days", "nan"]),
652                "N": np.arange(4, dtype="int64"),
653            }
654        )
655
656        result = df[sorted(expected.keys())].mode(dropna=dropna)
657        expected = DataFrame(expected)
658        tm.assert_frame_equal(result, expected)
659
660    def test_mode_sortwarning(self):
661        # Check for the warning that is raised when the mode
662        # results cannot be sorted
663
664        df = DataFrame({"A": [np.nan, np.nan, "a", "a"]})
665        expected = DataFrame({"A": ["a", np.nan]})
666
667        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
668            result = df.mode(dropna=False)
669            result = result.sort_values(by="A").reset_index(drop=True)
670
671        tm.assert_frame_equal(result, expected)
672
673    def test_operators_timedelta64(self):
674        df = DataFrame(
675            {
676                "A": date_range("2012-1-1", periods=3, freq="D"),
677                "B": date_range("2012-1-2", periods=3, freq="D"),
678                "C": Timestamp("20120101") - timedelta(minutes=5, seconds=5),
679            }
680        )
681
682        diffs = DataFrame({"A": df["A"] - df["C"], "B": df["A"] - df["B"]})
683
684        # min
685        result = diffs.min()
686        assert result[0] == diffs.loc[0, "A"]
687        assert result[1] == diffs.loc[0, "B"]
688
689        result = diffs.min(axis=1)
690        assert (result == diffs.loc[0, "B"]).all()
691
692        # max
693        result = diffs.max()
694        assert result[0] == diffs.loc[2, "A"]
695        assert result[1] == diffs.loc[2, "B"]
696
697        result = diffs.max(axis=1)
698        assert (result == diffs["A"]).all()
699
700        # abs
701        result = diffs.abs()
702        result2 = abs(diffs)
703        expected = DataFrame({"A": df["A"] - df["C"], "B": df["B"] - df["A"]})
704        tm.assert_frame_equal(result, expected)
705        tm.assert_frame_equal(result2, expected)
706
707        # mixed frame
708        mixed = diffs.copy()
709        mixed["C"] = "foo"
710        mixed["D"] = 1
711        mixed["E"] = 1.0
712        mixed["F"] = Timestamp("20130101")
713
714        # results in an object array
715        result = mixed.min()
716        expected = Series(
717            [
718                pd.Timedelta(timedelta(seconds=5 * 60 + 5)),
719                pd.Timedelta(timedelta(days=-1)),
720                "foo",
721                1,
722                1.0,
723                Timestamp("20130101"),
724            ],
725            index=mixed.columns,
726        )
727        tm.assert_series_equal(result, expected)
728
729        # excludes numeric
730        result = mixed.min(axis=1)
731        expected = Series([1, 1, 1.0], index=[0, 1, 2])
732        tm.assert_series_equal(result, expected)
733
734        # works when only those columns are selected
735        result = mixed[["A", "B"]].min(1)
736        expected = Series([timedelta(days=-1)] * 3)
737        tm.assert_series_equal(result, expected)
738
739        result = mixed[["A", "B"]].min()
740        expected = Series(
741            [timedelta(seconds=5 * 60 + 5), timedelta(days=-1)], index=["A", "B"]
742        )
743        tm.assert_series_equal(result, expected)
744
745        # GH 3106
746        df = DataFrame(
747            {
748                "time": date_range("20130102", periods=5),
749                "time2": date_range("20130105", periods=5),
750            }
751        )
752        df["off1"] = df["time2"] - df["time"]
753        assert df["off1"].dtype == "timedelta64[ns]"
754
755        df["off2"] = df["time"] - df["time2"]
756        df._consolidate_inplace()
757        assert df["off1"].dtype == "timedelta64[ns]"
758        assert df["off2"].dtype == "timedelta64[ns]"
759
760    def test_std_timedelta64_skipna_false(self):
761        # GH#37392
762        tdi = pd.timedelta_range("1 Day", periods=10)
763        df = DataFrame({"A": tdi, "B": tdi})
764        df.iloc[-2, -1] = pd.NaT
765
766        result = df.std(skipna=False)
767        expected = Series(
768            [df["A"].std(), pd.NaT], index=["A", "B"], dtype="timedelta64[ns]"
769        )
770        tm.assert_series_equal(result, expected)
771
772        result = df.std(axis=1, skipna=False)
773        expected = Series([pd.Timedelta(0)] * 8 + [pd.NaT, pd.Timedelta(0)])
774        tm.assert_series_equal(result, expected)
775
776    def test_sum_corner(self):
777        empty_frame = DataFrame()
778
779        axis0 = empty_frame.sum(0)
780        axis1 = empty_frame.sum(1)
781        assert isinstance(axis0, Series)
782        assert isinstance(axis1, Series)
783        assert len(axis0) == 0
784        assert len(axis1) == 0
785
786    @pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)])
787    @pytest.mark.parametrize("numeric_only", [None, True, False])
788    def test_sum_prod_nanops(self, method, unit, numeric_only):
789        idx = ["a", "b", "c"]
790        df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]})
791        # The default
792        result = getattr(df, method)(numeric_only=numeric_only)
793        expected = Series([unit, unit, unit], index=idx, dtype="float64")
794
795        # min_count=1
796        result = getattr(df, method)(numeric_only=numeric_only, min_count=1)
797        expected = Series([unit, unit, np.nan], index=idx)
798        tm.assert_series_equal(result, expected)
799
800        # min_count=0
801        result = getattr(df, method)(numeric_only=numeric_only, min_count=0)
802        expected = Series([unit, unit, unit], index=idx, dtype="float64")
803        tm.assert_series_equal(result, expected)
804
805        result = getattr(df.iloc[1:], method)(numeric_only=numeric_only, min_count=1)
806        expected = Series([unit, np.nan, np.nan], index=idx)
807        tm.assert_series_equal(result, expected)
808
809        # min_count > 1
810        df = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5})
811        result = getattr(df, method)(numeric_only=numeric_only, min_count=5)
812        expected = Series(result, index=["A", "B"])
813        tm.assert_series_equal(result, expected)
814
815        result = getattr(df, method)(numeric_only=numeric_only, min_count=6)
816        expected = Series(result, index=["A", "B"])
817        tm.assert_series_equal(result, expected)
818
819    def test_sum_nanops_timedelta(self):
820        # prod isn't defined on timedeltas
821        idx = ["a", "b", "c"]
822        df = DataFrame({"a": [0, 0], "b": [0, np.nan], "c": [np.nan, np.nan]})
823
824        df2 = df.apply(pd.to_timedelta)
825
826        # 0 by default
827        result = df2.sum()
828        expected = Series([0, 0, 0], dtype="m8[ns]", index=idx)
829        tm.assert_series_equal(result, expected)
830
831        # min_count=0
832        result = df2.sum(min_count=0)
833        tm.assert_series_equal(result, expected)
834
835        # min_count=1
836        result = df2.sum(min_count=1)
837        expected = Series([0, 0, np.nan], dtype="m8[ns]", index=idx)
838        tm.assert_series_equal(result, expected)
839
840    def test_sum_nanops_min_count(self):
841        # https://github.com/pandas-dev/pandas/issues/39738
842        df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
843        result = df.sum(min_count=10)
844        expected = Series([np.nan, np.nan], index=["x", "y"])
845        tm.assert_series_equal(result, expected)
846
847    def test_sum_object(self, float_frame):
848        values = float_frame.values.astype(int)
849        frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns)
850        deltas = frame * timedelta(1)
851        deltas.sum()
852
853    def test_sum_bool(self, float_frame):
854        # ensure this works, bug report
855        bools = np.isnan(float_frame)
856        bools.sum(1)
857        bools.sum(0)
858
859    def test_sum_mixed_datetime(self):
860        # GH#30886
861        df = DataFrame(
862            {"A": pd.date_range("2000", periods=4), "B": [1, 2, 3, 4]}
863        ).reindex([2, 3, 4])
864        result = df.sum()
865
866        expected = Series({"B": 7.0})
867        tm.assert_series_equal(result, expected)
868
869    def test_mean_corner(self, float_frame, float_string_frame):
870        # unit test when have object data
871        the_mean = float_string_frame.mean(axis=0)
872        the_sum = float_string_frame.sum(axis=0, numeric_only=True)
873        tm.assert_index_equal(the_sum.index, the_mean.index)
874        assert len(the_mean.index) < len(float_string_frame.columns)
875
876        # xs sum mixed type, just want to know it works...
877        the_mean = float_string_frame.mean(axis=1)
878        the_sum = float_string_frame.sum(axis=1, numeric_only=True)
879        tm.assert_index_equal(the_sum.index, the_mean.index)
880
881        # take mean of boolean column
882        float_frame["bool"] = float_frame["A"] > 0
883        means = float_frame.mean(0)
884        assert means["bool"] == float_frame["bool"].values.mean()
885
886    def test_mean_datetimelike(self):
887        # GH#24757 check that datetimelike are excluded by default, handled
888        #  correctly with numeric_only=True
889
890        df = DataFrame(
891            {
892                "A": np.arange(3),
893                "B": pd.date_range("2016-01-01", periods=3),
894                "C": pd.timedelta_range("1D", periods=3),
895                "D": pd.period_range("2016", periods=3, freq="A"),
896            }
897        )
898        result = df.mean(numeric_only=True)
899        expected = Series({"A": 1.0})
900        tm.assert_series_equal(result, expected)
901
902        with tm.assert_produces_warning(FutureWarning):
903            # in the future datetime columns will be included
904            result = df.mean()
905        expected = Series({"A": 1.0, "C": df.loc[1, "C"]})
906        tm.assert_series_equal(result, expected)
907
908    def test_mean_datetimelike_numeric_only_false(self):
909        df = DataFrame(
910            {
911                "A": np.arange(3),
912                "B": pd.date_range("2016-01-01", periods=3),
913                "C": pd.timedelta_range("1D", periods=3),
914            }
915        )
916
917        # datetime(tz) and timedelta work
918        result = df.mean(numeric_only=False)
919        expected = Series({"A": 1, "B": df.loc[1, "B"], "C": df.loc[1, "C"]})
920        tm.assert_series_equal(result, expected)
921
922        # mean of period is not allowed
923        df["D"] = pd.period_range("2016", periods=3, freq="A")
924
925        with pytest.raises(TypeError, match="mean is not implemented for Period"):
926            df.mean(numeric_only=False)
927
928    def test_mean_extensionarray_numeric_only_true(self):
929        # https://github.com/pandas-dev/pandas/issues/33256
930        arr = np.random.randint(1000, size=(10, 5))
931        df = DataFrame(arr, dtype="Int64")
932        result = df.mean(numeric_only=True)
933        expected = DataFrame(arr).mean()
934        tm.assert_series_equal(result, expected)
935
936    def test_stats_mixed_type(self, float_string_frame):
937        # don't blow up
938        float_string_frame.std(1)
939        float_string_frame.var(1)
940        float_string_frame.mean(1)
941        float_string_frame.skew(1)
942
943    def test_sum_bools(self):
944        df = DataFrame(index=range(1), columns=range(10))
945        bools = isna(df)
946        assert bools.sum(axis=1)[0] == 10
947
948    # ----------------------------------------------------------------------
949    # Index of max / min
950
951    def test_idxmin(self, float_frame, int_frame):
952        frame = float_frame
953        frame.iloc[5:10] = np.nan
954        frame.iloc[15:20, -2:] = np.nan
955        for skipna in [True, False]:
956            for axis in [0, 1]:
957                for df in [frame, int_frame]:
958                    result = df.idxmin(axis=axis, skipna=skipna)
959                    expected = df.apply(Series.idxmin, axis=axis, skipna=skipna)
960                    tm.assert_series_equal(result, expected)
961
962        msg = "No axis named 2 for object type DataFrame"
963        with pytest.raises(ValueError, match=msg):
964            frame.idxmin(axis=2)
965
966    def test_idxmax(self, float_frame, int_frame):
967        frame = float_frame
968        frame.iloc[5:10] = np.nan
969        frame.iloc[15:20, -2:] = np.nan
970        for skipna in [True, False]:
971            for axis in [0, 1]:
972                for df in [frame, int_frame]:
973                    result = df.idxmax(axis=axis, skipna=skipna)
974                    expected = df.apply(Series.idxmax, axis=axis, skipna=skipna)
975                    tm.assert_series_equal(result, expected)
976
977        msg = "No axis named 2 for object type DataFrame"
978        with pytest.raises(ValueError, match=msg):
979            frame.idxmax(axis=2)
980
981    def test_idxmax_mixed_dtype(self):
982        # don't cast to object, which would raise in nanops
983        dti = pd.date_range("2016-01-01", periods=3)
984
985        df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti})
986
987        result = df.idxmax()
988        expected = Series([1, 0, 2], index=[1, 2, 3])
989        tm.assert_series_equal(result, expected)
990
991        result = df.idxmin()
992        expected = Series([0, 2, 0], index=[1, 2, 3])
993        tm.assert_series_equal(result, expected)
994
995    # ----------------------------------------------------------------------
996    # Logical reductions
997
998    @pytest.mark.parametrize("opname", ["any", "all"])
999    def test_any_all(self, opname, bool_frame_with_na, float_string_frame):
1000        assert_bool_op_calc(
1001            opname, getattr(np, opname), bool_frame_with_na, has_skipna=True
1002        )
1003        assert_bool_op_api(
1004            opname, bool_frame_with_na, float_string_frame, has_bool_only=True
1005        )
1006
1007    def test_any_all_extra(self):
1008        df = DataFrame(
1009            {
1010                "A": [True, False, False],
1011                "B": [True, True, False],
1012                "C": [True, True, True],
1013            },
1014            index=["a", "b", "c"],
1015        )
1016        result = df[["A", "B"]].any(1)
1017        expected = Series([True, True, False], index=["a", "b", "c"])
1018        tm.assert_series_equal(result, expected)
1019
1020        result = df[["A", "B"]].any(1, bool_only=True)
1021        tm.assert_series_equal(result, expected)
1022
1023        result = df.all(1)
1024        expected = Series([True, False, False], index=["a", "b", "c"])
1025        tm.assert_series_equal(result, expected)
1026
1027        result = df.all(1, bool_only=True)
1028        tm.assert_series_equal(result, expected)
1029
1030        # Axis is None
1031        result = df.all(axis=None).item()
1032        assert result is False
1033
1034        result = df.any(axis=None).item()
1035        assert result is True
1036
1037        result = df[["C"]].all(axis=None).item()
1038        assert result is True
1039
1040    def test_any_datetime(self):
1041
1042        # GH 23070
1043        float_data = [1, np.nan, 3, np.nan]
1044        datetime_data = [
1045            Timestamp("1960-02-15"),
1046            Timestamp("1960-02-16"),
1047            pd.NaT,
1048            pd.NaT,
1049        ]
1050        df = DataFrame({"A": float_data, "B": datetime_data})
1051
1052        result = df.any(1)
1053        expected = Series([True, True, True, False])
1054        tm.assert_series_equal(result, expected)
1055
1056    def test_any_all_bool_only(self):
1057
1058        # GH 25101
1059        df = DataFrame(
1060            {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [None, None, None]}
1061        )
1062
1063        result = df.all(bool_only=True)
1064        expected = Series(dtype=np.bool_)
1065        tm.assert_series_equal(result, expected)
1066
1067        df = DataFrame(
1068            {
1069                "col1": [1, 2, 3],
1070                "col2": [4, 5, 6],
1071                "col3": [None, None, None],
1072                "col4": [False, False, True],
1073            }
1074        )
1075
1076        result = df.all(bool_only=True)
1077        expected = Series({"col4": False})
1078        tm.assert_series_equal(result, expected)
1079
1080    @pytest.mark.parametrize(
1081        "func, data, expected",
1082        [
1083            (np.any, {}, False),
1084            (np.all, {}, True),
1085            (np.any, {"A": []}, False),
1086            (np.all, {"A": []}, True),
1087            (np.any, {"A": [False, False]}, False),
1088            (np.all, {"A": [False, False]}, False),
1089            (np.any, {"A": [True, False]}, True),
1090            (np.all, {"A": [True, False]}, False),
1091            (np.any, {"A": [True, True]}, True),
1092            (np.all, {"A": [True, True]}, True),
1093            (np.any, {"A": [False], "B": [False]}, False),
1094            (np.all, {"A": [False], "B": [False]}, False),
1095            (np.any, {"A": [False, False], "B": [False, True]}, True),
1096            (np.all, {"A": [False, False], "B": [False, True]}, False),
1097            # other types
1098            (np.all, {"A": Series([0.0, 1.0], dtype="float")}, False),
1099            (np.any, {"A": Series([0.0, 1.0], dtype="float")}, True),
1100            (np.all, {"A": Series([0, 1], dtype=int)}, False),
1101            (np.any, {"A": Series([0, 1], dtype=int)}, True),
1102            pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns]")}, False),
1103            pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, False),
1104            pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns]")}, True),
1105            pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, True),
1106            pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns]")}, True),
1107            pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True),
1108            pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns]")}, True),
1109            pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True),
1110            pytest.param(np.all, {"A": Series([0, 1], dtype="m8[ns]")}, False),
1111            pytest.param(np.any, {"A": Series([0, 1], dtype="m8[ns]")}, True),
1112            pytest.param(np.all, {"A": Series([1, 2], dtype="m8[ns]")}, True),
1113            pytest.param(np.any, {"A": Series([1, 2], dtype="m8[ns]")}, True),
1114            # np.all on Categorical raises, so the reduction drops the
1115            #  column, so all is being done on an empty Series, so is True
1116            (np.all, {"A": Series([0, 1], dtype="category")}, True),
1117            (np.any, {"A": Series([0, 1], dtype="category")}, False),
1118            (np.all, {"A": Series([1, 2], dtype="category")}, True),
1119            (np.any, {"A": Series([1, 2], dtype="category")}, False),
1120            # Mix GH#21484
1121            pytest.param(
1122                np.all,
1123                {
1124                    "A": Series([10, 20], dtype="M8[ns]"),
1125                    "B": Series([10, 20], dtype="m8[ns]"),
1126                },
1127                True,
1128            ),
1129        ],
1130    )
1131    def test_any_all_np_func(self, func, data, expected):
1132        # GH 19976
1133        data = DataFrame(data)
1134        result = func(data)
1135        assert isinstance(result, np.bool_)
1136        assert result.item() is expected
1137
1138        # method version
1139        result = getattr(DataFrame(data), func.__name__)(axis=None)
1140        assert isinstance(result, np.bool_)
1141        assert result.item() is expected
1142
1143    def test_any_all_object(self):
1144        # GH 19976
1145        result = np.all(DataFrame(columns=["a", "b"])).item()
1146        assert result is True
1147
1148        result = np.any(DataFrame(columns=["a", "b"])).item()
1149        assert result is False
1150
1151    def test_any_all_object_bool_only(self):
1152        df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object)
1153        df._consolidate_inplace()
1154        df["C"] = Series([True, True])
1155
1156        # The underlying bug is in DataFrame._get_bool_data, so we check
1157        #  that while we're here
1158        res = df._get_bool_data()
1159        expected = df[["B", "C"]]
1160        tm.assert_frame_equal(res, expected)
1161
1162        res = df.all(bool_only=True, axis=0)
1163        expected = Series([False, True], index=["B", "C"])
1164        tm.assert_series_equal(res, expected)
1165
1166        # operating on a subset of columns should not produce a _larger_ Series
1167        res = df[["B", "C"]].all(bool_only=True, axis=0)
1168        tm.assert_series_equal(res, expected)
1169
1170        assert not df.all(bool_only=True, axis=None)
1171
1172        res = df.any(bool_only=True, axis=0)
1173        expected = Series([True, True], index=["B", "C"])
1174        tm.assert_series_equal(res, expected)
1175
1176        # operating on a subset of columns should not produce a _larger_ Series
1177        res = df[["B", "C"]].any(bool_only=True, axis=0)
1178        tm.assert_series_equal(res, expected)
1179
1180        assert df.any(bool_only=True, axis=None)
1181
1182    @pytest.mark.parametrize("method", ["any", "all"])
1183    def test_any_all_level_axis_none_raises(self, method):
1184        df = DataFrame(
1185            {"A": 1},
1186            index=MultiIndex.from_product(
1187                [["A", "B"], ["a", "b"]], names=["out", "in"]
1188            ),
1189        )
1190        xpr = "Must specify 'axis' when aggregating by level."
1191        with pytest.raises(ValueError, match=xpr):
1192            getattr(df, method)(axis=None, level="out")
1193
1194    # ---------------------------------------------------------------------
1195    # Unsorted
1196
1197    def test_series_broadcasting(self):
1198        # smoke test for numpy warnings
1199        # GH 16378, GH 16306
1200        df = DataFrame([1.0, 1.0, 1.0])
1201        df_nan = DataFrame({"A": [np.nan, 2.0, np.nan]})
1202        s = Series([1, 1, 1])
1203        s_nan = Series([np.nan, np.nan, 1])
1204
1205        with tm.assert_produces_warning(None):
1206            df_nan.clip(lower=s, axis=0)
1207            for op in ["lt", "le", "gt", "ge", "eq", "ne"]:
1208                getattr(df, op)(s_nan, axis=0)
1209
1210
1211class TestDataFrameReductions:
1212    def test_min_max_dt64_with_NaT(self):
1213        # Both NaT and Timestamp are in DataFrame.
1214        df = DataFrame({"foo": [pd.NaT, pd.NaT, Timestamp("2012-05-01")]})
1215
1216        res = df.min()
1217        exp = Series([Timestamp("2012-05-01")], index=["foo"])
1218        tm.assert_series_equal(res, exp)
1219
1220        res = df.max()
1221        exp = Series([Timestamp("2012-05-01")], index=["foo"])
1222        tm.assert_series_equal(res, exp)
1223
1224        # GH12941, only NaTs are in DataFrame.
1225        df = DataFrame({"foo": [pd.NaT, pd.NaT]})
1226
1227        res = df.min()
1228        exp = Series([pd.NaT], index=["foo"])
1229        tm.assert_series_equal(res, exp)
1230
1231        res = df.max()
1232        exp = Series([pd.NaT], index=["foo"])
1233        tm.assert_series_equal(res, exp)
1234
1235    def test_min_max_dt64_with_NaT_skipna_false(self, tz_naive_fixture):
1236        # GH#36907
1237        tz = tz_naive_fixture
1238        if isinstance(tz, tzlocal) and is_platform_windows():
1239            pytest.xfail(
1240                reason="GH#37659 OSError raised within tzlocal bc Windows "
1241                "chokes in times before 1970-01-01"
1242            )
1243
1244        df = DataFrame(
1245            {
1246                "a": [
1247                    Timestamp("2020-01-01 08:00:00", tz=tz),
1248                    Timestamp("1920-02-01 09:00:00", tz=tz),
1249                ],
1250                "b": [Timestamp("2020-02-01 08:00:00", tz=tz), pd.NaT],
1251            }
1252        )
1253
1254        res = df.min(axis=1, skipna=False)
1255        expected = Series([df.loc[0, "a"], pd.NaT])
1256        assert expected.dtype == df["a"].dtype
1257
1258        tm.assert_series_equal(res, expected)
1259
1260        res = df.max(axis=1, skipna=False)
1261        expected = Series([df.loc[0, "b"], pd.NaT])
1262        assert expected.dtype == df["a"].dtype
1263
1264        tm.assert_series_equal(res, expected)
1265
1266    def test_min_max_dt64_api_consistency_with_NaT(self):
1267        # Calling the following sum functions returned an error for dataframes but
1268        # returned NaT for series. These tests check that the API is consistent in
1269        # min/max calls on empty Series/DataFrames. See GH:33704 for more
1270        # information
1271        df = DataFrame({"x": pd.to_datetime([])})
1272        expected_dt_series = Series(pd.to_datetime([]))
1273        # check axis 0
1274        assert (df.min(axis=0).x is pd.NaT) == (expected_dt_series.min() is pd.NaT)
1275        assert (df.max(axis=0).x is pd.NaT) == (expected_dt_series.max() is pd.NaT)
1276
1277        # check axis 1
1278        tm.assert_series_equal(df.min(axis=1), expected_dt_series)
1279        tm.assert_series_equal(df.max(axis=1), expected_dt_series)
1280
1281    def test_min_max_dt64_api_consistency_empty_df(self):
1282        # check DataFrame/Series api consistency when calling min/max on an empty
1283        # DataFrame/Series.
1284        df = DataFrame({"x": []})
1285        expected_float_series = Series([], dtype=float)
1286        # check axis 0
1287        assert np.isnan(df.min(axis=0).x) == np.isnan(expected_float_series.min())
1288        assert np.isnan(df.max(axis=0).x) == np.isnan(expected_float_series.max())
1289        # check axis 1
1290        tm.assert_series_equal(df.min(axis=1), expected_float_series)
1291        tm.assert_series_equal(df.min(axis=1), expected_float_series)
1292
1293    @pytest.mark.parametrize(
1294        "initial",
1295        ["2018-10-08 13:36:45+00:00", "2018-10-08 13:36:45+03:00"],  # Non-UTC timezone
1296    )
1297    @pytest.mark.parametrize("method", ["min", "max"])
1298    def test_preserve_timezone(self, initial: str, method):
1299        # GH 28552
1300        initial_dt = pd.to_datetime(initial)
1301        expected = Series([initial_dt])
1302        df = DataFrame([expected])
1303        result = getattr(df, method)(axis=1)
1304        tm.assert_series_equal(result, expected)
1305
1306    def test_frame_any_all_with_level(self):
1307        df = DataFrame(
1308            {"data": [False, False, True, False, True, False, True]},
1309            index=[
1310                ["one", "one", "two", "one", "two", "two", "two"],
1311                [0, 1, 0, 2, 1, 2, 3],
1312            ],
1313        )
1314
1315        result = df.any(level=0)
1316        ex = DataFrame({"data": [False, True]}, index=["one", "two"])
1317        tm.assert_frame_equal(result, ex)
1318
1319        result = df.all(level=0)
1320        ex = DataFrame({"data": [False, False]}, index=["one", "two"])
1321        tm.assert_frame_equal(result, ex)
1322
1323    def test_frame_any_with_timedelta(self):
1324        # GH#17667
1325        df = DataFrame(
1326            {
1327                "a": Series([0, 0]),
1328                "t": Series([pd.to_timedelta(0, "s"), pd.to_timedelta(1, "ms")]),
1329            }
1330        )
1331
1332        result = df.any(axis=0)
1333        expected = Series(data=[False, True], index=["a", "t"])
1334        tm.assert_series_equal(result, expected)
1335
1336        result = df.any(axis=1)
1337        expected = Series(data=[False, True])
1338        tm.assert_series_equal(result, expected)
1339
1340
1341class TestNuisanceColumns:
1342    @pytest.mark.parametrize("method", ["any", "all"])
1343    def test_any_all_categorical_dtype_nuisance_column(self, method):
1344        # GH#36076 DataFrame should match Series behavior
1345        ser = Series([0, 1], dtype="category", name="A")
1346        df = ser.to_frame()
1347
1348        # Double-check the Series behavior is to raise
1349        with pytest.raises(TypeError, match="does not implement reduction"):
1350            getattr(ser, method)()
1351
1352        with pytest.raises(TypeError, match="does not implement reduction"):
1353            getattr(np, method)(ser)
1354
1355        with pytest.raises(TypeError, match="does not implement reduction"):
1356            getattr(df, method)(bool_only=False)
1357
1358        # With bool_only=None, operating on this column raises and is ignored,
1359        #  so we expect an empty result.
1360        result = getattr(df, method)(bool_only=None)
1361        expected = Series([], index=Index([]), dtype=bool)
1362        tm.assert_series_equal(result, expected)
1363
1364        result = getattr(np, method)(df, axis=0)
1365        tm.assert_series_equal(result, expected)
1366
1367    def test_median_categorical_dtype_nuisance_column(self):
1368        # GH#21020 DataFrame.median should match Series.median
1369        df = DataFrame({"A": Categorical([1, 2, 2, 2, 3])})
1370        ser = df["A"]
1371
1372        # Double-check the Series behavior is to raise
1373        with pytest.raises(TypeError, match="does not implement reduction"):
1374            ser.median()
1375
1376        with pytest.raises(TypeError, match="does not implement reduction"):
1377            df.median(numeric_only=False)
1378
1379        result = df.median()
1380        expected = Series([], index=Index([]), dtype=np.float64)
1381        tm.assert_series_equal(result, expected)
1382
1383        # same thing, but with an additional non-categorical column
1384        df["B"] = df["A"].astype(int)
1385
1386        with pytest.raises(TypeError, match="does not implement reduction"):
1387            df.median(numeric_only=False)
1388
1389        result = df.median()
1390        expected = Series([2.0], index=["B"])
1391        tm.assert_series_equal(result, expected)
1392
1393        # TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead
1394        #  of expected.values
1395
1396    @pytest.mark.parametrize("method", ["min", "max"])
1397    def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method):
1398        # GH#28949 DataFrame.min should behave like Series.min
1399        cat = Categorical(["a", "b", "c", "b"], ordered=False)
1400        ser = Series(cat)
1401        df = ser.to_frame("A")
1402
1403        # Double-check the Series behavior
1404        with pytest.raises(TypeError, match="is not ordered for operation"):
1405            getattr(ser, method)()
1406
1407        with pytest.raises(TypeError, match="is not ordered for operation"):
1408            getattr(np, method)(ser)
1409
1410        with pytest.raises(TypeError, match="is not ordered for operation"):
1411            getattr(df, method)(numeric_only=False)
1412
1413        result = getattr(df, method)()
1414        expected = Series([], index=Index([]), dtype=np.float64)
1415        tm.assert_series_equal(result, expected)
1416
1417        result = getattr(np, method)(df)
1418        tm.assert_series_equal(result, expected)
1419
1420        # same thing, but with an additional non-categorical column
1421        df["B"] = df["A"].astype(object)
1422        result = getattr(df, method)()
1423        if method == "min":
1424            expected = Series(["a"], index=["B"])
1425        else:
1426            expected = Series(["c"], index=["B"])
1427        tm.assert_series_equal(result, expected)
1428
1429        result = getattr(np, method)(df)
1430        tm.assert_series_equal(result, expected)
1431
1432    def test_reduction_object_block_splits_nuisance_columns(self):
1433        # GH#37827
1434        df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", "c"]}, dtype=object)
1435
1436        # We should only exclude "B", not "A"
1437        result = df.mean()
1438        expected = Series([1.0], index=["A"])
1439        tm.assert_series_equal(result, expected)
1440
1441        # Same behavior but heterogeneous dtype
1442        df["C"] = df["A"].astype(int) + 4
1443
1444        result = df.mean()
1445        expected = Series([1.0, 5.0], index=["A", "C"])
1446        tm.assert_series_equal(result, expected)
1447
1448
1449def test_sum_timedelta64_skipna_false():
1450    # GH#17235
1451    arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2)
1452    arr[-1, -1] = "Nat"
1453
1454    df = DataFrame(arr)
1455
1456    result = df.sum(skipna=False)
1457    expected = Series([pd.Timedelta(seconds=12), pd.NaT])
1458    tm.assert_series_equal(result, expected)
1459
1460    result = df.sum(axis=0, skipna=False)
1461    tm.assert_series_equal(result, expected)
1462
1463    result = df.sum(axis=1, skipna=False)
1464    expected = Series(
1465        [
1466            pd.Timedelta(seconds=1),
1467            pd.Timedelta(seconds=5),
1468            pd.Timedelta(seconds=9),
1469            pd.NaT,
1470        ]
1471    )
1472    tm.assert_series_equal(result, expected)
1473
1474
1475def test_mixed_frame_with_integer_sum():
1476    # https://github.com/pandas-dev/pandas/issues/34520
1477    df = DataFrame([["a", 1]], columns=list("ab"))
1478    df = df.astype({"b": "Int64"})
1479    result = df.sum()
1480    expected = Series(["a", 1], index=["a", "b"])
1481    tm.assert_series_equal(result, expected)
1482
1483
1484@pytest.mark.parametrize("numeric_only", [True, False, None])
1485@pytest.mark.parametrize("method", ["min", "max"])
1486def test_minmax_extensionarray(method, numeric_only):
1487    # https://github.com/pandas-dev/pandas/issues/32651
1488    int64_info = np.iinfo("int64")
1489    ser = Series([int64_info.max, None, int64_info.min], dtype=pd.Int64Dtype())
1490    df = DataFrame({"Int64": ser})
1491    result = getattr(df, method)(numeric_only=numeric_only)
1492    expected = Series(
1493        [getattr(int64_info, method)], index=Index(["Int64"], dtype="object")
1494    )
1495    tm.assert_series_equal(result, expected)
1496
1497
1498def test_prod_sum_min_count_mixed_object():
1499    # https://github.com/pandas-dev/pandas/issues/41074
1500    df = DataFrame([1, "a", True])
1501
1502    result = df.prod(axis=0, min_count=1, numeric_only=False)
1503    expected = Series(["a"])
1504    tm.assert_series_equal(result, expected)
1505
1506    msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'")
1507    with pytest.raises(TypeError, match=msg):
1508        df.sum(axis=0, min_count=1, numeric_only=False)
1509