1from functools import partial
2import operator
3import warnings
4
5import numpy as np
6import pytest
7
8import pandas.util._test_decorators as td
9
10from pandas.core.dtypes.common import is_integer_dtype
11
12import pandas as pd
13from pandas import Series, isna
14import pandas._testing as tm
15from pandas.core.arrays import DatetimeArray
16import pandas.core.nanops as nanops
17
18use_bn = nanops._USE_BOTTLENECK
19has_c16 = hasattr(np, "complex128")
20
21
22@pytest.fixture(params=[True, False])
23def skipna(request):
24    """
25    Fixture to pass skipna to nanops functions.
26    """
27    return request.param
28
29
30class TestnanopsDataFrame:
31    def setup_method(self, method):
32        np.random.seed(11235)
33        nanops._USE_BOTTLENECK = False
34
35        arr_shape = (11, 7)
36
37        self.arr_float = np.random.randn(*arr_shape)
38        self.arr_float1 = np.random.randn(*arr_shape)
39        self.arr_complex = self.arr_float + self.arr_float1 * 1j
40        self.arr_int = np.random.randint(-10, 10, arr_shape)
41        self.arr_bool = np.random.randint(0, 2, arr_shape) == 0
42        self.arr_str = np.abs(self.arr_float).astype("S")
43        self.arr_utf = np.abs(self.arr_float).astype("U")
44        self.arr_date = np.random.randint(0, 20000, arr_shape).astype("M8[ns]")
45        self.arr_tdelta = np.random.randint(0, 20000, arr_shape).astype("m8[ns]")
46
47        self.arr_nan = np.tile(np.nan, arr_shape)
48        self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan])
49        self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan])
50        self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1])
51        self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan])
52
53        self.arr_inf = self.arr_float * np.inf
54        self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf])
55
56        self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf])
57        self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf])
58        self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf])
59        self.arr_obj = np.vstack(
60            [
61                self.arr_float.astype("O"),
62                self.arr_int.astype("O"),
63                self.arr_bool.astype("O"),
64                self.arr_complex.astype("O"),
65                self.arr_str.astype("O"),
66                self.arr_utf.astype("O"),
67                self.arr_date.astype("O"),
68                self.arr_tdelta.astype("O"),
69            ]
70        )
71
72        with np.errstate(invalid="ignore"):
73            self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j
74            self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj])
75
76            self.arr_nan_infj = self.arr_inf * 1j
77            self.arr_complex_nan_infj = np.vstack([self.arr_complex, self.arr_nan_infj])
78
79        self.arr_float_2d = self.arr_float
80        self.arr_float1_2d = self.arr_float1
81
82        self.arr_nan_2d = self.arr_nan
83        self.arr_float_nan_2d = self.arr_float_nan
84        self.arr_float1_nan_2d = self.arr_float1_nan
85        self.arr_nan_float1_2d = self.arr_nan_float1
86
87        self.arr_float_1d = self.arr_float[:, 0]
88        self.arr_float1_1d = self.arr_float1[:, 0]
89
90        self.arr_nan_1d = self.arr_nan[:, 0]
91        self.arr_float_nan_1d = self.arr_float_nan[:, 0]
92        self.arr_float1_nan_1d = self.arr_float1_nan[:, 0]
93        self.arr_nan_float1_1d = self.arr_nan_float1[:, 0]
94
95    def teardown_method(self, method):
96        nanops._USE_BOTTLENECK = use_bn
97
98    def check_results(self, targ, res, axis, check_dtype=True):
99        res = getattr(res, "asm8", res)
100
101        if (
102            axis != 0
103            and hasattr(targ, "shape")
104            and targ.ndim
105            and targ.shape != res.shape
106        ):
107            res = np.split(res, [targ.shape[0]], axis=0)[0]
108
109        try:
110            tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
111        except AssertionError:
112
113            # handle timedelta dtypes
114            if hasattr(targ, "dtype") and targ.dtype == "m8[ns]":
115                raise
116
117            # There are sometimes rounding errors with
118            # complex and object dtypes.
119            # If it isn't one of those, re-raise the error.
120            if not hasattr(res, "dtype") or res.dtype.kind not in ["c", "O"]:
121                raise
122            # convert object dtypes to something that can be split into
123            # real and imaginary parts
124            if res.dtype.kind == "O":
125                if targ.dtype.kind != "O":
126                    res = res.astype(targ.dtype)
127                else:
128                    cast_dtype = "c16" if has_c16 else "f8"
129                    res = res.astype(cast_dtype)
130                    targ = targ.astype(cast_dtype)
131            # there should never be a case where numpy returns an object
132            # but nanops doesn't, so make that an exception
133            elif targ.dtype.kind == "O":
134                raise
135            tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype)
136            tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype)
137
138    def check_fun_data(
139        self,
140        testfunc,
141        targfunc,
142        testarval,
143        targarval,
144        skipna,
145        check_dtype=True,
146        empty_targfunc=None,
147        **kwargs,
148    ):
149        for axis in list(range(targarval.ndim)) + [None]:
150            targartempval = targarval if skipna else testarval
151            if skipna and empty_targfunc and isna(targartempval).all():
152                targ = empty_targfunc(targartempval, axis=axis, **kwargs)
153            else:
154                targ = targfunc(targartempval, axis=axis, **kwargs)
155
156            res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs)
157            self.check_results(targ, res, axis, check_dtype=check_dtype)
158            if skipna:
159                res = testfunc(testarval, axis=axis, **kwargs)
160                self.check_results(targ, res, axis, check_dtype=check_dtype)
161            if axis is None:
162                res = testfunc(testarval, skipna=skipna, **kwargs)
163                self.check_results(targ, res, axis, check_dtype=check_dtype)
164            if skipna and axis is None:
165                res = testfunc(testarval, **kwargs)
166                self.check_results(targ, res, axis, check_dtype=check_dtype)
167
168        if testarval.ndim <= 1:
169            return
170
171        # Recurse on lower-dimension
172        testarval2 = np.take(testarval, 0, axis=-1)
173        targarval2 = np.take(targarval, 0, axis=-1)
174        self.check_fun_data(
175            testfunc,
176            targfunc,
177            testarval2,
178            targarval2,
179            skipna=skipna,
180            check_dtype=check_dtype,
181            empty_targfunc=empty_targfunc,
182            **kwargs,
183        )
184
185    def check_fun(
186        self, testfunc, targfunc, testar, skipna, empty_targfunc=None, **kwargs
187    ):
188
189        targar = testar
190        if testar.endswith("_nan") and hasattr(self, testar[:-4]):
191            targar = testar[:-4]
192
193        testarval = getattr(self, testar)
194        targarval = getattr(self, targar)
195        self.check_fun_data(
196            testfunc,
197            targfunc,
198            testarval,
199            targarval,
200            skipna=skipna,
201            empty_targfunc=empty_targfunc,
202            **kwargs,
203        )
204
205    def check_funs(
206        self,
207        testfunc,
208        targfunc,
209        skipna,
210        allow_complex=True,
211        allow_all_nan=True,
212        allow_date=True,
213        allow_tdelta=True,
214        allow_obj=True,
215        **kwargs,
216    ):
217        self.check_fun(testfunc, targfunc, "arr_float", skipna, **kwargs)
218        self.check_fun(testfunc, targfunc, "arr_float_nan", skipna, **kwargs)
219        self.check_fun(testfunc, targfunc, "arr_int", skipna, **kwargs)
220        self.check_fun(testfunc, targfunc, "arr_bool", skipna, **kwargs)
221        objs = [
222            self.arr_float.astype("O"),
223            self.arr_int.astype("O"),
224            self.arr_bool.astype("O"),
225        ]
226
227        if allow_all_nan:
228            self.check_fun(testfunc, targfunc, "arr_nan", skipna, **kwargs)
229
230        if allow_complex:
231            self.check_fun(testfunc, targfunc, "arr_complex", skipna, **kwargs)
232            self.check_fun(testfunc, targfunc, "arr_complex_nan", skipna, **kwargs)
233            if allow_all_nan:
234                self.check_fun(testfunc, targfunc, "arr_nan_nanj", skipna, **kwargs)
235            objs += [self.arr_complex.astype("O")]
236
237        if allow_date:
238            targfunc(self.arr_date)
239            self.check_fun(testfunc, targfunc, "arr_date", skipna, **kwargs)
240            objs += [self.arr_date.astype("O")]
241
242        if allow_tdelta:
243            try:
244                targfunc(self.arr_tdelta)
245            except TypeError:
246                pass
247            else:
248                self.check_fun(testfunc, targfunc, "arr_tdelta", skipna, **kwargs)
249                objs += [self.arr_tdelta.astype("O")]
250
251        if allow_obj:
252            self.arr_obj = np.vstack(objs)
253            # some nanops handle object dtypes better than their numpy
254            # counterparts, so the numpy functions need to be given something
255            # else
256            if allow_obj == "convert":
257                targfunc = partial(
258                    self._badobj_wrap, func=targfunc, allow_complex=allow_complex
259                )
260            self.check_fun(testfunc, targfunc, "arr_obj", skipna, **kwargs)
261
262    def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
263        if value.dtype.kind == "O":
264            if allow_complex:
265                value = value.astype("c16")
266            else:
267                value = value.astype("f8")
268        return func(value, **kwargs)
269
270    @pytest.mark.parametrize(
271        "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)]
272    )
273    def test_nan_funcs(self, nan_op, np_op, skipna):
274        self.check_funs(nan_op, np_op, skipna, allow_all_nan=False, allow_date=False)
275
276    def test_nansum(self, skipna):
277        self.check_funs(
278            nanops.nansum,
279            np.sum,
280            skipna,
281            allow_date=False,
282            check_dtype=False,
283            empty_targfunc=np.nansum,
284        )
285
286    def test_nanmean(self, skipna):
287        self.check_funs(
288            nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False
289        )
290
291    def test_nanmean_overflow(self):
292        # GH 10155
293        # In the previous implementation mean can overflow for int dtypes, it
294        # is now consistent with numpy
295
296        for a in [2 ** 55, -(2 ** 55), 20150515061816532]:
297            s = Series(a, index=range(500), dtype=np.int64)
298            result = s.mean()
299            np_result = s.values.mean()
300            assert result == a
301            assert result == np_result
302            assert result.dtype == np.float64
303
304    @pytest.mark.parametrize(
305        "dtype",
306        [
307            np.int16,
308            np.int32,
309            np.int64,
310            np.float32,
311            np.float64,
312            getattr(np, "float128", None),
313        ],
314    )
315    def test_returned_dtype(self, dtype):
316        if dtype is None:
317            # no float128 available
318            return
319
320        s = Series(range(10), dtype=dtype)
321        group_a = ["mean", "std", "var", "skew", "kurt"]
322        group_b = ["min", "max"]
323        for method in group_a + group_b:
324            result = getattr(s, method)()
325            if is_integer_dtype(dtype) and method in group_a:
326                assert result.dtype == np.float64
327            else:
328                assert result.dtype == dtype
329
330    def test_nanmedian(self, skipna):
331        with warnings.catch_warnings(record=True):
332            warnings.simplefilter("ignore", RuntimeWarning)
333            self.check_funs(
334                nanops.nanmedian,
335                np.median,
336                skipna,
337                allow_complex=False,
338                allow_date=False,
339                allow_obj="convert",
340            )
341
342    @pytest.mark.parametrize("ddof", range(3))
343    def test_nanvar(self, ddof, skipna):
344        self.check_funs(
345            nanops.nanvar,
346            np.var,
347            skipna,
348            allow_complex=False,
349            allow_date=False,
350            allow_obj="convert",
351            ddof=ddof,
352        )
353
354    @pytest.mark.parametrize("ddof", range(3))
355    def test_nanstd(self, ddof, skipna):
356        self.check_funs(
357            nanops.nanstd,
358            np.std,
359            skipna,
360            allow_complex=False,
361            allow_date=False,
362            allow_obj="convert",
363            ddof=ddof,
364        )
365
366    @td.skip_if_no_scipy
367    @pytest.mark.parametrize("ddof", range(3))
368    def test_nansem(self, ddof, skipna):
369        from scipy.stats import sem
370
371        with np.errstate(invalid="ignore"):
372            self.check_funs(
373                nanops.nansem,
374                sem,
375                skipna,
376                allow_complex=False,
377                allow_date=False,
378                allow_tdelta=False,
379                allow_obj="convert",
380                ddof=ddof,
381            )
382
383    @pytest.mark.parametrize(
384        "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)]
385    )
386    def test_nanops_with_warnings(self, nan_op, np_op, skipna):
387        with warnings.catch_warnings(record=True):
388            warnings.simplefilter("ignore", RuntimeWarning)
389            self.check_funs(nan_op, np_op, skipna, allow_obj=False)
390
391    def _argminmax_wrap(self, value, axis=None, func=None):
392        res = func(value, axis)
393        nans = np.min(value, axis)
394        nullnan = isna(nans)
395        if res.ndim:
396            res[nullnan] = -1
397        elif (
398            hasattr(nullnan, "all")
399            and nullnan.all()
400            or not hasattr(nullnan, "all")
401            and nullnan
402        ):
403            res = -1
404        return res
405
406    def test_nanargmax(self, skipna):
407        with warnings.catch_warnings(record=True):
408            warnings.simplefilter("ignore", RuntimeWarning)
409            func = partial(self._argminmax_wrap, func=np.argmax)
410            self.check_funs(nanops.nanargmax, func, skipna, allow_obj=False)
411
412    def test_nanargmin(self, skipna):
413        with warnings.catch_warnings(record=True):
414            warnings.simplefilter("ignore", RuntimeWarning)
415            func = partial(self._argminmax_wrap, func=np.argmin)
416            self.check_funs(nanops.nanargmin, func, skipna, allow_obj=False)
417
418    def _skew_kurt_wrap(self, values, axis=None, func=None):
419        if not isinstance(values.dtype.type, np.floating):
420            values = values.astype("f8")
421        result = func(values, axis=axis, bias=False)
422        # fix for handling cases where all elements in an axis are the same
423        if isinstance(result, np.ndarray):
424            result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
425            return result
426        elif np.max(values) == np.min(values):
427            return 0.0
428        return result
429
430    @td.skip_if_no_scipy
431    def test_nanskew(self, skipna):
432        from scipy.stats import skew
433
434        func = partial(self._skew_kurt_wrap, func=skew)
435        with np.errstate(invalid="ignore"):
436            self.check_funs(
437                nanops.nanskew,
438                func,
439                skipna,
440                allow_complex=False,
441                allow_date=False,
442                allow_tdelta=False,
443            )
444
445    @td.skip_if_no_scipy
446    def test_nankurt(self, skipna):
447        from scipy.stats import kurtosis
448
449        func1 = partial(kurtosis, fisher=True)
450        func = partial(self._skew_kurt_wrap, func=func1)
451        with np.errstate(invalid="ignore"):
452            self.check_funs(
453                nanops.nankurt,
454                func,
455                skipna,
456                allow_complex=False,
457                allow_date=False,
458                allow_tdelta=False,
459            )
460
461    def test_nanprod(self, skipna):
462        self.check_funs(
463            nanops.nanprod,
464            np.prod,
465            skipna,
466            allow_date=False,
467            allow_tdelta=False,
468            empty_targfunc=np.nanprod,
469        )
470
471    def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs):
472        res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs)
473        res01 = checkfun(
474            self.arr_float_2d,
475            self.arr_float1_2d,
476            min_periods=len(self.arr_float_2d) - 1,
477            **kwargs,
478        )
479        tm.assert_almost_equal(targ0, res00)
480        tm.assert_almost_equal(targ0, res01)
481
482        res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs)
483        res11 = checkfun(
484            self.arr_float_nan_2d,
485            self.arr_float1_nan_2d,
486            min_periods=len(self.arr_float_2d) - 1,
487            **kwargs,
488        )
489        tm.assert_almost_equal(targ1, res10)
490        tm.assert_almost_equal(targ1, res11)
491
492        targ2 = np.nan
493        res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs)
494        res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs)
495        res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs)
496        res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs)
497        res24 = checkfun(
498            self.arr_float_nan_2d,
499            self.arr_nan_float1_2d,
500            min_periods=len(self.arr_float_2d) - 1,
501            **kwargs,
502        )
503        res25 = checkfun(
504            self.arr_float_2d,
505            self.arr_float1_2d,
506            min_periods=len(self.arr_float_2d) + 1,
507            **kwargs,
508        )
509        tm.assert_almost_equal(targ2, res20)
510        tm.assert_almost_equal(targ2, res21)
511        tm.assert_almost_equal(targ2, res22)
512        tm.assert_almost_equal(targ2, res23)
513        tm.assert_almost_equal(targ2, res24)
514        tm.assert_almost_equal(targ2, res25)
515
516    def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs):
517        res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs)
518        res01 = checkfun(
519            self.arr_float_1d,
520            self.arr_float1_1d,
521            min_periods=len(self.arr_float_1d) - 1,
522            **kwargs,
523        )
524        tm.assert_almost_equal(targ0, res00)
525        tm.assert_almost_equal(targ0, res01)
526
527        res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs)
528        res11 = checkfun(
529            self.arr_float_nan_1d,
530            self.arr_float1_nan_1d,
531            min_periods=len(self.arr_float_1d) - 1,
532            **kwargs,
533        )
534        tm.assert_almost_equal(targ1, res10)
535        tm.assert_almost_equal(targ1, res11)
536
537        targ2 = np.nan
538        res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs)
539        res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs)
540        res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs)
541        res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs)
542        res24 = checkfun(
543            self.arr_float_nan_1d,
544            self.arr_nan_float1_1d,
545            min_periods=len(self.arr_float_1d) - 1,
546            **kwargs,
547        )
548        res25 = checkfun(
549            self.arr_float_1d,
550            self.arr_float1_1d,
551            min_periods=len(self.arr_float_1d) + 1,
552            **kwargs,
553        )
554        tm.assert_almost_equal(targ2, res20)
555        tm.assert_almost_equal(targ2, res21)
556        tm.assert_almost_equal(targ2, res22)
557        tm.assert_almost_equal(targ2, res23)
558        tm.assert_almost_equal(targ2, res24)
559        tm.assert_almost_equal(targ2, res25)
560
561    def test_nancorr(self):
562        targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
563        targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
564        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1)
565        targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
566        targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
567        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
568
569    def test_nancorr_pearson(self):
570        targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
571        targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
572        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson")
573        targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
574        targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
575        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
576
577    @td.skip_if_no_scipy
578    def test_nancorr_kendall(self):
579        from scipy.stats import kendalltau
580
581        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
582        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
583        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall")
584        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
585        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
586        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall")
587
588    @td.skip_if_no_scipy
589    def test_nancorr_spearman(self):
590        from scipy.stats import spearmanr
591
592        targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0]
593        targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
594        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman")
595        targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0]
596        targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
597        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman")
598
599    @td.skip_if_no_scipy
600    def test_invalid_method(self):
601        targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
602        targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
603        msg = "Unknown method 'foo', expected one of 'kendall', 'spearman'"
604        with pytest.raises(ValueError, match=msg):
605            self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo")
606
607    def test_nancov(self):
608        targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1]
609        targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
610        self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1)
611        targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1]
612        targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
613        self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
614
615    def check_nancomp(self, checkfun, targ0):
616        arr_float = self.arr_float
617        arr_float1 = self.arr_float1
618        arr_nan = self.arr_nan
619        arr_nan_nan = self.arr_nan_nan
620        arr_float_nan = self.arr_float_nan
621        arr_float1_nan = self.arr_float1_nan
622        arr_nan_float1 = self.arr_nan_float1
623
624        while targ0.ndim:
625            res0 = checkfun(arr_float, arr_float1)
626            tm.assert_almost_equal(targ0, res0)
627
628            if targ0.ndim > 1:
629                targ1 = np.vstack([targ0, arr_nan])
630            else:
631                targ1 = np.hstack([targ0, arr_nan])
632            res1 = checkfun(arr_float_nan, arr_float1_nan)
633            tm.assert_numpy_array_equal(targ1, res1, check_dtype=False)
634
635            targ2 = arr_nan_nan
636            res2 = checkfun(arr_float_nan, arr_nan_float1)
637            tm.assert_numpy_array_equal(targ2, res2, check_dtype=False)
638
639            # Lower dimension for next step in the loop
640            arr_float = np.take(arr_float, 0, axis=-1)
641            arr_float1 = np.take(arr_float1, 0, axis=-1)
642            arr_nan = np.take(arr_nan, 0, axis=-1)
643            arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1)
644            arr_float_nan = np.take(arr_float_nan, 0, axis=-1)
645            arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1)
646            arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1)
647            targ0 = np.take(targ0, 0, axis=-1)
648
649    @pytest.mark.parametrize(
650        "op,nanop",
651        [
652            (operator.eq, nanops.naneq),
653            (operator.ne, nanops.nanne),
654            (operator.gt, nanops.nangt),
655            (operator.ge, nanops.nange),
656            (operator.lt, nanops.nanlt),
657            (operator.le, nanops.nanle),
658        ],
659    )
660    def test_nan_comparison(self, op, nanop):
661        targ0 = op(self.arr_float, self.arr_float1)
662        self.check_nancomp(nanop, targ0)
663
664    def check_bool(self, func, value, correct):
665        while getattr(value, "ndim", True):
666            res0 = func(value)
667            if correct:
668                assert res0
669            else:
670                assert not res0
671
672            if not hasattr(value, "ndim"):
673                break
674
675            # Reduce dimension for next step in the loop
676            value = np.take(value, 0, axis=-1)
677
678    def test__has_infs(self):
679        pairs = [
680            ("arr_complex", False),
681            ("arr_int", False),
682            ("arr_bool", False),
683            ("arr_str", False),
684            ("arr_utf", False),
685            ("arr_complex", False),
686            ("arr_complex_nan", False),
687            ("arr_nan_nanj", False),
688            ("arr_nan_infj", True),
689            ("arr_complex_nan_infj", True),
690        ]
691        pairs_float = [
692            ("arr_float", False),
693            ("arr_nan", False),
694            ("arr_float_nan", False),
695            ("arr_nan_nan", False),
696            ("arr_float_inf", True),
697            ("arr_inf", True),
698            ("arr_nan_inf", True),
699            ("arr_float_nan_inf", True),
700            ("arr_nan_nan_inf", True),
701        ]
702
703        for arr, correct in pairs:
704            val = getattr(self, arr)
705            self.check_bool(nanops._has_infs, val, correct)
706
707        for arr, correct in pairs_float:
708            val = getattr(self, arr)
709            self.check_bool(nanops._has_infs, val, correct)
710            self.check_bool(nanops._has_infs, val.astype("f4"), correct)
711            self.check_bool(nanops._has_infs, val.astype("f2"), correct)
712
713    def test__bn_ok_dtype(self):
714        assert nanops._bn_ok_dtype(self.arr_float.dtype, "test")
715        assert nanops._bn_ok_dtype(self.arr_complex.dtype, "test")
716        assert nanops._bn_ok_dtype(self.arr_int.dtype, "test")
717        assert nanops._bn_ok_dtype(self.arr_bool.dtype, "test")
718        assert nanops._bn_ok_dtype(self.arr_str.dtype, "test")
719        assert nanops._bn_ok_dtype(self.arr_utf.dtype, "test")
720        assert not nanops._bn_ok_dtype(self.arr_date.dtype, "test")
721        assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, "test")
722        assert not nanops._bn_ok_dtype(self.arr_obj.dtype, "test")
723
724
725class TestEnsureNumeric:
726    def test_numeric_values(self):
727        # Test integer
728        assert nanops._ensure_numeric(1) == 1
729
730        # Test float
731        assert nanops._ensure_numeric(1.1) == 1.1
732
733        # Test complex
734        assert nanops._ensure_numeric(1 + 2j) == 1 + 2j
735
736    def test_ndarray(self):
737        # Test numeric ndarray
738        values = np.array([1, 2, 3])
739        assert np.allclose(nanops._ensure_numeric(values), values)
740
741        # Test object ndarray
742        o_values = values.astype(object)
743        assert np.allclose(nanops._ensure_numeric(o_values), values)
744
745        # Test convertible string ndarray
746        s_values = np.array(["1", "2", "3"], dtype=object)
747        assert np.allclose(nanops._ensure_numeric(s_values), values)
748
749        # Test non-convertible string ndarray
750        s_values = np.array(["foo", "bar", "baz"], dtype=object)
751        msg = r"Could not convert .* to numeric"
752        with pytest.raises(TypeError, match=msg):
753            nanops._ensure_numeric(s_values)
754
755    def test_convertable_values(self):
756        assert np.allclose(nanops._ensure_numeric("1"), 1.0)
757        assert np.allclose(nanops._ensure_numeric("1.1"), 1.1)
758        assert np.allclose(nanops._ensure_numeric("1+1j"), 1 + 1j)
759
760    def test_non_convertable_values(self):
761        msg = "Could not convert foo to numeric"
762        with pytest.raises(TypeError, match=msg):
763            nanops._ensure_numeric("foo")
764
765        # with the wrong type, python raises TypeError for us
766        msg = "argument must be a string or a number"
767        with pytest.raises(TypeError, match=msg):
768            nanops._ensure_numeric({})
769        with pytest.raises(TypeError, match=msg):
770            nanops._ensure_numeric([])
771
772
773class TestNanvarFixedValues:
774
775    # xref GH10242
776
777    def setup_method(self, method):
778        # Samples from a normal distribution.
779        self.variance = variance = 3.0
780        self.samples = self.prng.normal(scale=variance ** 0.5, size=100000)
781
782    def test_nanvar_all_finite(self):
783        samples = self.samples
784        actual_variance = nanops.nanvar(samples)
785        tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2)
786
787    def test_nanvar_nans(self):
788        samples = np.nan * np.ones(2 * self.samples.shape[0])
789        samples[::2] = self.samples
790
791        actual_variance = nanops.nanvar(samples, skipna=True)
792        tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2)
793
794        actual_variance = nanops.nanvar(samples, skipna=False)
795        tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2)
796
797    def test_nanstd_nans(self):
798        samples = np.nan * np.ones(2 * self.samples.shape[0])
799        samples[::2] = self.samples
800
801        actual_std = nanops.nanstd(samples, skipna=True)
802        tm.assert_almost_equal(actual_std, self.variance ** 0.5, rtol=1e-2)
803
804        actual_std = nanops.nanvar(samples, skipna=False)
805        tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2)
806
807    def test_nanvar_axis(self):
808        # Generate some sample data.
809        samples_norm = self.samples
810        samples_unif = self.prng.uniform(size=samples_norm.shape[0])
811        samples = np.vstack([samples_norm, samples_unif])
812
813        actual_variance = nanops.nanvar(samples, axis=1)
814        tm.assert_almost_equal(
815            actual_variance, np.array([self.variance, 1.0 / 12]), rtol=1e-2
816        )
817
818    def test_nanvar_ddof(self):
819        n = 5
820        samples = self.prng.uniform(size=(10000, n + 1))
821        samples[:, -1] = np.nan  # Force use of our own algorithm.
822
823        variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean()
824        variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean()
825        variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean()
826
827        # The unbiased estimate.
828        var = 1.0 / 12
829        tm.assert_almost_equal(variance_1, var, rtol=1e-2)
830
831        # The underestimated variance.
832        tm.assert_almost_equal(variance_0, (n - 1.0) / n * var, rtol=1e-2)
833
834        # The overestimated variance.
835        tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var, rtol=1e-2)
836
837    def test_ground_truth(self):
838        # Test against values that were precomputed with Numpy.
839        samples = np.empty((4, 4))
840        samples[:3, :3] = np.array(
841            [
842                [0.97303362, 0.21869576, 0.55560287],
843                [0.72980153, 0.03109364, 0.99155171],
844                [0.09317602, 0.60078248, 0.15871292],
845            ]
846        )
847        samples[3] = samples[:, 3] = np.nan
848
849        # Actual variances along axis=0, 1 for ddof=0, 1, 2
850        variance = np.array(
851            [
852                [
853                    [0.13762259, 0.05619224, 0.11568816],
854                    [0.20643388, 0.08428837, 0.17353224],
855                    [0.41286776, 0.16857673, 0.34706449],
856                ],
857                [
858                    [0.09519783, 0.16435395, 0.05082054],
859                    [0.14279674, 0.24653093, 0.07623082],
860                    [0.28559348, 0.49306186, 0.15246163],
861                ],
862            ]
863        )
864
865        # Test nanvar.
866        for axis in range(2):
867            for ddof in range(3):
868                var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof)
869                tm.assert_almost_equal(var[:3], variance[axis, ddof])
870                assert np.isnan(var[3])
871
872        # Test nanstd.
873        for axis in range(2):
874            for ddof in range(3):
875                std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof)
876                tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5)
877                assert np.isnan(std[3])
878
879    def test_nanstd_roundoff(self):
880        # Regression test for GH 10242 (test data taken from GH 10489). Ensure
881        # that variance is stable.
882        data = Series(766897346 * np.ones(10))
883        for ddof in range(3):
884            result = data.std(ddof=ddof)
885            assert result == 0.0
886
887    @property
888    def prng(self):
889        return np.random.RandomState(1234)
890
891
892class TestNanskewFixedValues:
893
894    # xref GH 11974
895
896    def setup_method(self, method):
897        # Test data + skewness value (computed with scipy.stats.skew)
898        self.samples = np.sin(np.linspace(0, 1, 200))
899        self.actual_skew = -0.1875895205961754
900
901    def test_constant_series(self):
902        # xref GH 11974
903        for val in [3075.2, 3075.3, 3075.5]:
904            data = val * np.ones(300)
905            skew = nanops.nanskew(data)
906            assert skew == 0.0
907
908    def test_all_finite(self):
909        alpha, beta = 0.3, 0.1
910        left_tailed = self.prng.beta(alpha, beta, size=100)
911        assert nanops.nanskew(left_tailed) < 0
912
913        alpha, beta = 0.1, 0.3
914        right_tailed = self.prng.beta(alpha, beta, size=100)
915        assert nanops.nanskew(right_tailed) > 0
916
917    def test_ground_truth(self):
918        skew = nanops.nanskew(self.samples)
919        tm.assert_almost_equal(skew, self.actual_skew)
920
921    def test_axis(self):
922        samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))])
923        skew = nanops.nanskew(samples, axis=1)
924        tm.assert_almost_equal(skew, np.array([self.actual_skew, np.nan]))
925
926    def test_nans(self):
927        samples = np.hstack([self.samples, np.nan])
928        skew = nanops.nanskew(samples, skipna=False)
929        assert np.isnan(skew)
930
931    def test_nans_skipna(self):
932        samples = np.hstack([self.samples, np.nan])
933        skew = nanops.nanskew(samples, skipna=True)
934        tm.assert_almost_equal(skew, self.actual_skew)
935
936    @property
937    def prng(self):
938        return np.random.RandomState(1234)
939
940
941class TestNankurtFixedValues:
942
943    # xref GH 11974
944
945    def setup_method(self, method):
946        # Test data + kurtosis value (computed with scipy.stats.kurtosis)
947        self.samples = np.sin(np.linspace(0, 1, 200))
948        self.actual_kurt = -1.2058303433799713
949
950    def test_constant_series(self):
951        # xref GH 11974
952        for val in [3075.2, 3075.3, 3075.5]:
953            data = val * np.ones(300)
954            kurt = nanops.nankurt(data)
955            assert kurt == 0.0
956
957    def test_all_finite(self):
958        alpha, beta = 0.3, 0.1
959        left_tailed = self.prng.beta(alpha, beta, size=100)
960        assert nanops.nankurt(left_tailed) < 0
961
962        alpha, beta = 0.1, 0.3
963        right_tailed = self.prng.beta(alpha, beta, size=100)
964        assert nanops.nankurt(right_tailed) > 0
965
966    def test_ground_truth(self):
967        kurt = nanops.nankurt(self.samples)
968        tm.assert_almost_equal(kurt, self.actual_kurt)
969
970    def test_axis(self):
971        samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))])
972        kurt = nanops.nankurt(samples, axis=1)
973        tm.assert_almost_equal(kurt, np.array([self.actual_kurt, np.nan]))
974
975    def test_nans(self):
976        samples = np.hstack([self.samples, np.nan])
977        kurt = nanops.nankurt(samples, skipna=False)
978        assert np.isnan(kurt)
979
980    def test_nans_skipna(self):
981        samples = np.hstack([self.samples, np.nan])
982        kurt = nanops.nankurt(samples, skipna=True)
983        tm.assert_almost_equal(kurt, self.actual_kurt)
984
985    @property
986    def prng(self):
987        return np.random.RandomState(1234)
988
989
990class TestDatetime64NaNOps:
991    # Enabling mean changes the behavior of DataFrame.mean
992    # See https://github.com/pandas-dev/pandas/issues/24752
993    def test_nanmean(self):
994        dti = pd.date_range("2016-01-01", periods=3)
995        expected = dti[1]
996
997        for obj in [dti, DatetimeArray(dti), Series(dti)]:
998            result = nanops.nanmean(obj)
999            assert result == expected
1000
1001        dti2 = dti.insert(1, pd.NaT)
1002
1003        for obj in [dti2, DatetimeArray(dti2), Series(dti2)]:
1004            result = nanops.nanmean(obj)
1005            assert result == expected
1006
1007    @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
1008    def test_nanmean_skipna_false(self, dtype):
1009        arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)
1010
1011        arr[-1, -1] = "NaT"
1012
1013        result = nanops.nanmean(arr, skipna=False)
1014        assert result is pd.NaT
1015
1016        result = nanops.nanmean(arr, axis=0, skipna=False)
1017        expected = np.array([4, 5, "NaT"], dtype=arr.dtype)
1018        tm.assert_numpy_array_equal(result, expected)
1019
1020        result = nanops.nanmean(arr, axis=1, skipna=False)
1021        expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]])
1022        tm.assert_numpy_array_equal(result, expected)
1023
1024
1025def test_use_bottleneck():
1026
1027    if nanops._BOTTLENECK_INSTALLED:
1028
1029        pd.set_option("use_bottleneck", True)
1030        assert pd.get_option("use_bottleneck")
1031
1032        pd.set_option("use_bottleneck", False)
1033        assert not pd.get_option("use_bottleneck")
1034
1035        pd.set_option("use_bottleneck", use_bn)
1036
1037
1038@pytest.mark.parametrize(
1039    "numpy_op, expected",
1040    [
1041        (np.sum, 10),
1042        (np.nansum, 10),
1043        (np.mean, 2.5),
1044        (np.nanmean, 2.5),
1045        (np.median, 2.5),
1046        (np.nanmedian, 2.5),
1047        (np.min, 1),
1048        (np.max, 4),
1049        (np.nanmin, 1),
1050        (np.nanmax, 4),
1051    ],
1052)
1053def test_numpy_ops(numpy_op, expected):
1054    # GH8383
1055    result = numpy_op(Series([1, 2, 3, 4]))
1056    assert result == expected
1057
1058
1059@pytest.mark.parametrize(
1060    "operation",
1061    [
1062        nanops.nanany,
1063        nanops.nanall,
1064        nanops.nansum,
1065        nanops.nanmean,
1066        nanops.nanmedian,
1067        nanops.nanstd,
1068        nanops.nanvar,
1069        nanops.nansem,
1070        nanops.nanargmax,
1071        nanops.nanargmin,
1072        nanops.nanmax,
1073        nanops.nanmin,
1074        nanops.nanskew,
1075        nanops.nankurt,
1076        nanops.nanprod,
1077    ],
1078)
1079def test_nanops_independent_of_mask_param(operation):
1080    # GH22764
1081    s = Series([1, 2, np.nan, 3, np.nan, 4])
1082    mask = s.isna()
1083    median_expected = operation(s)
1084    median_result = operation(s, mask=mask)
1085    assert median_expected == median_result
1086