1from functools import partial 2import operator 3import warnings 4 5import numpy as np 6import pytest 7 8import pandas.util._test_decorators as td 9 10from pandas.core.dtypes.common import is_integer_dtype 11 12import pandas as pd 13from pandas import Series, isna 14import pandas._testing as tm 15from pandas.core.arrays import DatetimeArray 16import pandas.core.nanops as nanops 17 18use_bn = nanops._USE_BOTTLENECK 19has_c16 = hasattr(np, "complex128") 20 21 22@pytest.fixture(params=[True, False]) 23def skipna(request): 24 """ 25 Fixture to pass skipna to nanops functions. 26 """ 27 return request.param 28 29 30class TestnanopsDataFrame: 31 def setup_method(self, method): 32 np.random.seed(11235) 33 nanops._USE_BOTTLENECK = False 34 35 arr_shape = (11, 7) 36 37 self.arr_float = np.random.randn(*arr_shape) 38 self.arr_float1 = np.random.randn(*arr_shape) 39 self.arr_complex = self.arr_float + self.arr_float1 * 1j 40 self.arr_int = np.random.randint(-10, 10, arr_shape) 41 self.arr_bool = np.random.randint(0, 2, arr_shape) == 0 42 self.arr_str = np.abs(self.arr_float).astype("S") 43 self.arr_utf = np.abs(self.arr_float).astype("U") 44 self.arr_date = np.random.randint(0, 20000, arr_shape).astype("M8[ns]") 45 self.arr_tdelta = np.random.randint(0, 20000, arr_shape).astype("m8[ns]") 46 47 self.arr_nan = np.tile(np.nan, arr_shape) 48 self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan]) 49 self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan]) 50 self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1]) 51 self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan]) 52 53 self.arr_inf = self.arr_float * np.inf 54 self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf]) 55 56 self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf]) 57 self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf]) 58 self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf]) 59 self.arr_obj = np.vstack( 60 [ 61 self.arr_float.astype("O"), 62 self.arr_int.astype("O"), 63 self.arr_bool.astype("O"), 64 self.arr_complex.astype("O"), 65 self.arr_str.astype("O"), 66 self.arr_utf.astype("O"), 67 self.arr_date.astype("O"), 68 self.arr_tdelta.astype("O"), 69 ] 70 ) 71 72 with np.errstate(invalid="ignore"): 73 self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j 74 self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj]) 75 76 self.arr_nan_infj = self.arr_inf * 1j 77 self.arr_complex_nan_infj = np.vstack([self.arr_complex, self.arr_nan_infj]) 78 79 self.arr_float_2d = self.arr_float 80 self.arr_float1_2d = self.arr_float1 81 82 self.arr_nan_2d = self.arr_nan 83 self.arr_float_nan_2d = self.arr_float_nan 84 self.arr_float1_nan_2d = self.arr_float1_nan 85 self.arr_nan_float1_2d = self.arr_nan_float1 86 87 self.arr_float_1d = self.arr_float[:, 0] 88 self.arr_float1_1d = self.arr_float1[:, 0] 89 90 self.arr_nan_1d = self.arr_nan[:, 0] 91 self.arr_float_nan_1d = self.arr_float_nan[:, 0] 92 self.arr_float1_nan_1d = self.arr_float1_nan[:, 0] 93 self.arr_nan_float1_1d = self.arr_nan_float1[:, 0] 94 95 def teardown_method(self, method): 96 nanops._USE_BOTTLENECK = use_bn 97 98 def check_results(self, targ, res, axis, check_dtype=True): 99 res = getattr(res, "asm8", res) 100 101 if ( 102 axis != 0 103 and hasattr(targ, "shape") 104 and targ.ndim 105 and targ.shape != res.shape 106 ): 107 res = np.split(res, [targ.shape[0]], axis=0)[0] 108 109 try: 110 tm.assert_almost_equal(targ, res, check_dtype=check_dtype) 111 except AssertionError: 112 113 # handle timedelta dtypes 114 if hasattr(targ, "dtype") and targ.dtype == "m8[ns]": 115 raise 116 117 # There are sometimes rounding errors with 118 # complex and object dtypes. 119 # If it isn't one of those, re-raise the error. 120 if not hasattr(res, "dtype") or res.dtype.kind not in ["c", "O"]: 121 raise 122 # convert object dtypes to something that can be split into 123 # real and imaginary parts 124 if res.dtype.kind == "O": 125 if targ.dtype.kind != "O": 126 res = res.astype(targ.dtype) 127 else: 128 cast_dtype = "c16" if has_c16 else "f8" 129 res = res.astype(cast_dtype) 130 targ = targ.astype(cast_dtype) 131 # there should never be a case where numpy returns an object 132 # but nanops doesn't, so make that an exception 133 elif targ.dtype.kind == "O": 134 raise 135 tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype) 136 tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype) 137 138 def check_fun_data( 139 self, 140 testfunc, 141 targfunc, 142 testarval, 143 targarval, 144 skipna, 145 check_dtype=True, 146 empty_targfunc=None, 147 **kwargs, 148 ): 149 for axis in list(range(targarval.ndim)) + [None]: 150 targartempval = targarval if skipna else testarval 151 if skipna and empty_targfunc and isna(targartempval).all(): 152 targ = empty_targfunc(targartempval, axis=axis, **kwargs) 153 else: 154 targ = targfunc(targartempval, axis=axis, **kwargs) 155 156 res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs) 157 self.check_results(targ, res, axis, check_dtype=check_dtype) 158 if skipna: 159 res = testfunc(testarval, axis=axis, **kwargs) 160 self.check_results(targ, res, axis, check_dtype=check_dtype) 161 if axis is None: 162 res = testfunc(testarval, skipna=skipna, **kwargs) 163 self.check_results(targ, res, axis, check_dtype=check_dtype) 164 if skipna and axis is None: 165 res = testfunc(testarval, **kwargs) 166 self.check_results(targ, res, axis, check_dtype=check_dtype) 167 168 if testarval.ndim <= 1: 169 return 170 171 # Recurse on lower-dimension 172 testarval2 = np.take(testarval, 0, axis=-1) 173 targarval2 = np.take(targarval, 0, axis=-1) 174 self.check_fun_data( 175 testfunc, 176 targfunc, 177 testarval2, 178 targarval2, 179 skipna=skipna, 180 check_dtype=check_dtype, 181 empty_targfunc=empty_targfunc, 182 **kwargs, 183 ) 184 185 def check_fun( 186 self, testfunc, targfunc, testar, skipna, empty_targfunc=None, **kwargs 187 ): 188 189 targar = testar 190 if testar.endswith("_nan") and hasattr(self, testar[:-4]): 191 targar = testar[:-4] 192 193 testarval = getattr(self, testar) 194 targarval = getattr(self, targar) 195 self.check_fun_data( 196 testfunc, 197 targfunc, 198 testarval, 199 targarval, 200 skipna=skipna, 201 empty_targfunc=empty_targfunc, 202 **kwargs, 203 ) 204 205 def check_funs( 206 self, 207 testfunc, 208 targfunc, 209 skipna, 210 allow_complex=True, 211 allow_all_nan=True, 212 allow_date=True, 213 allow_tdelta=True, 214 allow_obj=True, 215 **kwargs, 216 ): 217 self.check_fun(testfunc, targfunc, "arr_float", skipna, **kwargs) 218 self.check_fun(testfunc, targfunc, "arr_float_nan", skipna, **kwargs) 219 self.check_fun(testfunc, targfunc, "arr_int", skipna, **kwargs) 220 self.check_fun(testfunc, targfunc, "arr_bool", skipna, **kwargs) 221 objs = [ 222 self.arr_float.astype("O"), 223 self.arr_int.astype("O"), 224 self.arr_bool.astype("O"), 225 ] 226 227 if allow_all_nan: 228 self.check_fun(testfunc, targfunc, "arr_nan", skipna, **kwargs) 229 230 if allow_complex: 231 self.check_fun(testfunc, targfunc, "arr_complex", skipna, **kwargs) 232 self.check_fun(testfunc, targfunc, "arr_complex_nan", skipna, **kwargs) 233 if allow_all_nan: 234 self.check_fun(testfunc, targfunc, "arr_nan_nanj", skipna, **kwargs) 235 objs += [self.arr_complex.astype("O")] 236 237 if allow_date: 238 targfunc(self.arr_date) 239 self.check_fun(testfunc, targfunc, "arr_date", skipna, **kwargs) 240 objs += [self.arr_date.astype("O")] 241 242 if allow_tdelta: 243 try: 244 targfunc(self.arr_tdelta) 245 except TypeError: 246 pass 247 else: 248 self.check_fun(testfunc, targfunc, "arr_tdelta", skipna, **kwargs) 249 objs += [self.arr_tdelta.astype("O")] 250 251 if allow_obj: 252 self.arr_obj = np.vstack(objs) 253 # some nanops handle object dtypes better than their numpy 254 # counterparts, so the numpy functions need to be given something 255 # else 256 if allow_obj == "convert": 257 targfunc = partial( 258 self._badobj_wrap, func=targfunc, allow_complex=allow_complex 259 ) 260 self.check_fun(testfunc, targfunc, "arr_obj", skipna, **kwargs) 261 262 def _badobj_wrap(self, value, func, allow_complex=True, **kwargs): 263 if value.dtype.kind == "O": 264 if allow_complex: 265 value = value.astype("c16") 266 else: 267 value = value.astype("f8") 268 return func(value, **kwargs) 269 270 @pytest.mark.parametrize( 271 "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)] 272 ) 273 def test_nan_funcs(self, nan_op, np_op, skipna): 274 self.check_funs(nan_op, np_op, skipna, allow_all_nan=False, allow_date=False) 275 276 def test_nansum(self, skipna): 277 self.check_funs( 278 nanops.nansum, 279 np.sum, 280 skipna, 281 allow_date=False, 282 check_dtype=False, 283 empty_targfunc=np.nansum, 284 ) 285 286 def test_nanmean(self, skipna): 287 self.check_funs( 288 nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False 289 ) 290 291 def test_nanmean_overflow(self): 292 # GH 10155 293 # In the previous implementation mean can overflow for int dtypes, it 294 # is now consistent with numpy 295 296 for a in [2 ** 55, -(2 ** 55), 20150515061816532]: 297 s = Series(a, index=range(500), dtype=np.int64) 298 result = s.mean() 299 np_result = s.values.mean() 300 assert result == a 301 assert result == np_result 302 assert result.dtype == np.float64 303 304 @pytest.mark.parametrize( 305 "dtype", 306 [ 307 np.int16, 308 np.int32, 309 np.int64, 310 np.float32, 311 np.float64, 312 getattr(np, "float128", None), 313 ], 314 ) 315 def test_returned_dtype(self, dtype): 316 if dtype is None: 317 # no float128 available 318 return 319 320 s = Series(range(10), dtype=dtype) 321 group_a = ["mean", "std", "var", "skew", "kurt"] 322 group_b = ["min", "max"] 323 for method in group_a + group_b: 324 result = getattr(s, method)() 325 if is_integer_dtype(dtype) and method in group_a: 326 assert result.dtype == np.float64 327 else: 328 assert result.dtype == dtype 329 330 def test_nanmedian(self, skipna): 331 with warnings.catch_warnings(record=True): 332 warnings.simplefilter("ignore", RuntimeWarning) 333 self.check_funs( 334 nanops.nanmedian, 335 np.median, 336 skipna, 337 allow_complex=False, 338 allow_date=False, 339 allow_obj="convert", 340 ) 341 342 @pytest.mark.parametrize("ddof", range(3)) 343 def test_nanvar(self, ddof, skipna): 344 self.check_funs( 345 nanops.nanvar, 346 np.var, 347 skipna, 348 allow_complex=False, 349 allow_date=False, 350 allow_obj="convert", 351 ddof=ddof, 352 ) 353 354 @pytest.mark.parametrize("ddof", range(3)) 355 def test_nanstd(self, ddof, skipna): 356 self.check_funs( 357 nanops.nanstd, 358 np.std, 359 skipna, 360 allow_complex=False, 361 allow_date=False, 362 allow_obj="convert", 363 ddof=ddof, 364 ) 365 366 @td.skip_if_no_scipy 367 @pytest.mark.parametrize("ddof", range(3)) 368 def test_nansem(self, ddof, skipna): 369 from scipy.stats import sem 370 371 with np.errstate(invalid="ignore"): 372 self.check_funs( 373 nanops.nansem, 374 sem, 375 skipna, 376 allow_complex=False, 377 allow_date=False, 378 allow_tdelta=False, 379 allow_obj="convert", 380 ddof=ddof, 381 ) 382 383 @pytest.mark.parametrize( 384 "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)] 385 ) 386 def test_nanops_with_warnings(self, nan_op, np_op, skipna): 387 with warnings.catch_warnings(record=True): 388 warnings.simplefilter("ignore", RuntimeWarning) 389 self.check_funs(nan_op, np_op, skipna, allow_obj=False) 390 391 def _argminmax_wrap(self, value, axis=None, func=None): 392 res = func(value, axis) 393 nans = np.min(value, axis) 394 nullnan = isna(nans) 395 if res.ndim: 396 res[nullnan] = -1 397 elif ( 398 hasattr(nullnan, "all") 399 and nullnan.all() 400 or not hasattr(nullnan, "all") 401 and nullnan 402 ): 403 res = -1 404 return res 405 406 def test_nanargmax(self, skipna): 407 with warnings.catch_warnings(record=True): 408 warnings.simplefilter("ignore", RuntimeWarning) 409 func = partial(self._argminmax_wrap, func=np.argmax) 410 self.check_funs(nanops.nanargmax, func, skipna, allow_obj=False) 411 412 def test_nanargmin(self, skipna): 413 with warnings.catch_warnings(record=True): 414 warnings.simplefilter("ignore", RuntimeWarning) 415 func = partial(self._argminmax_wrap, func=np.argmin) 416 self.check_funs(nanops.nanargmin, func, skipna, allow_obj=False) 417 418 def _skew_kurt_wrap(self, values, axis=None, func=None): 419 if not isinstance(values.dtype.type, np.floating): 420 values = values.astype("f8") 421 result = func(values, axis=axis, bias=False) 422 # fix for handling cases where all elements in an axis are the same 423 if isinstance(result, np.ndarray): 424 result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0 425 return result 426 elif np.max(values) == np.min(values): 427 return 0.0 428 return result 429 430 @td.skip_if_no_scipy 431 def test_nanskew(self, skipna): 432 from scipy.stats import skew 433 434 func = partial(self._skew_kurt_wrap, func=skew) 435 with np.errstate(invalid="ignore"): 436 self.check_funs( 437 nanops.nanskew, 438 func, 439 skipna, 440 allow_complex=False, 441 allow_date=False, 442 allow_tdelta=False, 443 ) 444 445 @td.skip_if_no_scipy 446 def test_nankurt(self, skipna): 447 from scipy.stats import kurtosis 448 449 func1 = partial(kurtosis, fisher=True) 450 func = partial(self._skew_kurt_wrap, func=func1) 451 with np.errstate(invalid="ignore"): 452 self.check_funs( 453 nanops.nankurt, 454 func, 455 skipna, 456 allow_complex=False, 457 allow_date=False, 458 allow_tdelta=False, 459 ) 460 461 def test_nanprod(self, skipna): 462 self.check_funs( 463 nanops.nanprod, 464 np.prod, 465 skipna, 466 allow_date=False, 467 allow_tdelta=False, 468 empty_targfunc=np.nanprod, 469 ) 470 471 def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs): 472 res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs) 473 res01 = checkfun( 474 self.arr_float_2d, 475 self.arr_float1_2d, 476 min_periods=len(self.arr_float_2d) - 1, 477 **kwargs, 478 ) 479 tm.assert_almost_equal(targ0, res00) 480 tm.assert_almost_equal(targ0, res01) 481 482 res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs) 483 res11 = checkfun( 484 self.arr_float_nan_2d, 485 self.arr_float1_nan_2d, 486 min_periods=len(self.arr_float_2d) - 1, 487 **kwargs, 488 ) 489 tm.assert_almost_equal(targ1, res10) 490 tm.assert_almost_equal(targ1, res11) 491 492 targ2 = np.nan 493 res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs) 494 res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs) 495 res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs) 496 res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs) 497 res24 = checkfun( 498 self.arr_float_nan_2d, 499 self.arr_nan_float1_2d, 500 min_periods=len(self.arr_float_2d) - 1, 501 **kwargs, 502 ) 503 res25 = checkfun( 504 self.arr_float_2d, 505 self.arr_float1_2d, 506 min_periods=len(self.arr_float_2d) + 1, 507 **kwargs, 508 ) 509 tm.assert_almost_equal(targ2, res20) 510 tm.assert_almost_equal(targ2, res21) 511 tm.assert_almost_equal(targ2, res22) 512 tm.assert_almost_equal(targ2, res23) 513 tm.assert_almost_equal(targ2, res24) 514 tm.assert_almost_equal(targ2, res25) 515 516 def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs): 517 res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs) 518 res01 = checkfun( 519 self.arr_float_1d, 520 self.arr_float1_1d, 521 min_periods=len(self.arr_float_1d) - 1, 522 **kwargs, 523 ) 524 tm.assert_almost_equal(targ0, res00) 525 tm.assert_almost_equal(targ0, res01) 526 527 res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs) 528 res11 = checkfun( 529 self.arr_float_nan_1d, 530 self.arr_float1_nan_1d, 531 min_periods=len(self.arr_float_1d) - 1, 532 **kwargs, 533 ) 534 tm.assert_almost_equal(targ1, res10) 535 tm.assert_almost_equal(targ1, res11) 536 537 targ2 = np.nan 538 res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs) 539 res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs) 540 res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs) 541 res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs) 542 res24 = checkfun( 543 self.arr_float_nan_1d, 544 self.arr_nan_float1_1d, 545 min_periods=len(self.arr_float_1d) - 1, 546 **kwargs, 547 ) 548 res25 = checkfun( 549 self.arr_float_1d, 550 self.arr_float1_1d, 551 min_periods=len(self.arr_float_1d) + 1, 552 **kwargs, 553 ) 554 tm.assert_almost_equal(targ2, res20) 555 tm.assert_almost_equal(targ2, res21) 556 tm.assert_almost_equal(targ2, res22) 557 tm.assert_almost_equal(targ2, res23) 558 tm.assert_almost_equal(targ2, res24) 559 tm.assert_almost_equal(targ2, res25) 560 561 def test_nancorr(self): 562 targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] 563 targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] 564 self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1) 565 targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1] 566 targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] 567 self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson") 568 569 def test_nancorr_pearson(self): 570 targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] 571 targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] 572 self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson") 573 targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1] 574 targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] 575 self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson") 576 577 @td.skip_if_no_scipy 578 def test_nancorr_kendall(self): 579 from scipy.stats import kendalltau 580 581 targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0] 582 targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] 583 self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall") 584 targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0] 585 targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] 586 self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall") 587 588 @td.skip_if_no_scipy 589 def test_nancorr_spearman(self): 590 from scipy.stats import spearmanr 591 592 targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0] 593 targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] 594 self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman") 595 targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0] 596 targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] 597 self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman") 598 599 @td.skip_if_no_scipy 600 def test_invalid_method(self): 601 targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] 602 targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] 603 msg = "Unknown method 'foo', expected one of 'kendall', 'spearman'" 604 with pytest.raises(ValueError, match=msg): 605 self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo") 606 607 def test_nancov(self): 608 targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1] 609 targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] 610 self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1) 611 targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1] 612 targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] 613 self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1) 614 615 def check_nancomp(self, checkfun, targ0): 616 arr_float = self.arr_float 617 arr_float1 = self.arr_float1 618 arr_nan = self.arr_nan 619 arr_nan_nan = self.arr_nan_nan 620 arr_float_nan = self.arr_float_nan 621 arr_float1_nan = self.arr_float1_nan 622 arr_nan_float1 = self.arr_nan_float1 623 624 while targ0.ndim: 625 res0 = checkfun(arr_float, arr_float1) 626 tm.assert_almost_equal(targ0, res0) 627 628 if targ0.ndim > 1: 629 targ1 = np.vstack([targ0, arr_nan]) 630 else: 631 targ1 = np.hstack([targ0, arr_nan]) 632 res1 = checkfun(arr_float_nan, arr_float1_nan) 633 tm.assert_numpy_array_equal(targ1, res1, check_dtype=False) 634 635 targ2 = arr_nan_nan 636 res2 = checkfun(arr_float_nan, arr_nan_float1) 637 tm.assert_numpy_array_equal(targ2, res2, check_dtype=False) 638 639 # Lower dimension for next step in the loop 640 arr_float = np.take(arr_float, 0, axis=-1) 641 arr_float1 = np.take(arr_float1, 0, axis=-1) 642 arr_nan = np.take(arr_nan, 0, axis=-1) 643 arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1) 644 arr_float_nan = np.take(arr_float_nan, 0, axis=-1) 645 arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1) 646 arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1) 647 targ0 = np.take(targ0, 0, axis=-1) 648 649 @pytest.mark.parametrize( 650 "op,nanop", 651 [ 652 (operator.eq, nanops.naneq), 653 (operator.ne, nanops.nanne), 654 (operator.gt, nanops.nangt), 655 (operator.ge, nanops.nange), 656 (operator.lt, nanops.nanlt), 657 (operator.le, nanops.nanle), 658 ], 659 ) 660 def test_nan_comparison(self, op, nanop): 661 targ0 = op(self.arr_float, self.arr_float1) 662 self.check_nancomp(nanop, targ0) 663 664 def check_bool(self, func, value, correct): 665 while getattr(value, "ndim", True): 666 res0 = func(value) 667 if correct: 668 assert res0 669 else: 670 assert not res0 671 672 if not hasattr(value, "ndim"): 673 break 674 675 # Reduce dimension for next step in the loop 676 value = np.take(value, 0, axis=-1) 677 678 def test__has_infs(self): 679 pairs = [ 680 ("arr_complex", False), 681 ("arr_int", False), 682 ("arr_bool", False), 683 ("arr_str", False), 684 ("arr_utf", False), 685 ("arr_complex", False), 686 ("arr_complex_nan", False), 687 ("arr_nan_nanj", False), 688 ("arr_nan_infj", True), 689 ("arr_complex_nan_infj", True), 690 ] 691 pairs_float = [ 692 ("arr_float", False), 693 ("arr_nan", False), 694 ("arr_float_nan", False), 695 ("arr_nan_nan", False), 696 ("arr_float_inf", True), 697 ("arr_inf", True), 698 ("arr_nan_inf", True), 699 ("arr_float_nan_inf", True), 700 ("arr_nan_nan_inf", True), 701 ] 702 703 for arr, correct in pairs: 704 val = getattr(self, arr) 705 self.check_bool(nanops._has_infs, val, correct) 706 707 for arr, correct in pairs_float: 708 val = getattr(self, arr) 709 self.check_bool(nanops._has_infs, val, correct) 710 self.check_bool(nanops._has_infs, val.astype("f4"), correct) 711 self.check_bool(nanops._has_infs, val.astype("f2"), correct) 712 713 def test__bn_ok_dtype(self): 714 assert nanops._bn_ok_dtype(self.arr_float.dtype, "test") 715 assert nanops._bn_ok_dtype(self.arr_complex.dtype, "test") 716 assert nanops._bn_ok_dtype(self.arr_int.dtype, "test") 717 assert nanops._bn_ok_dtype(self.arr_bool.dtype, "test") 718 assert nanops._bn_ok_dtype(self.arr_str.dtype, "test") 719 assert nanops._bn_ok_dtype(self.arr_utf.dtype, "test") 720 assert not nanops._bn_ok_dtype(self.arr_date.dtype, "test") 721 assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, "test") 722 assert not nanops._bn_ok_dtype(self.arr_obj.dtype, "test") 723 724 725class TestEnsureNumeric: 726 def test_numeric_values(self): 727 # Test integer 728 assert nanops._ensure_numeric(1) == 1 729 730 # Test float 731 assert nanops._ensure_numeric(1.1) == 1.1 732 733 # Test complex 734 assert nanops._ensure_numeric(1 + 2j) == 1 + 2j 735 736 def test_ndarray(self): 737 # Test numeric ndarray 738 values = np.array([1, 2, 3]) 739 assert np.allclose(nanops._ensure_numeric(values), values) 740 741 # Test object ndarray 742 o_values = values.astype(object) 743 assert np.allclose(nanops._ensure_numeric(o_values), values) 744 745 # Test convertible string ndarray 746 s_values = np.array(["1", "2", "3"], dtype=object) 747 assert np.allclose(nanops._ensure_numeric(s_values), values) 748 749 # Test non-convertible string ndarray 750 s_values = np.array(["foo", "bar", "baz"], dtype=object) 751 msg = r"Could not convert .* to numeric" 752 with pytest.raises(TypeError, match=msg): 753 nanops._ensure_numeric(s_values) 754 755 def test_convertable_values(self): 756 assert np.allclose(nanops._ensure_numeric("1"), 1.0) 757 assert np.allclose(nanops._ensure_numeric("1.1"), 1.1) 758 assert np.allclose(nanops._ensure_numeric("1+1j"), 1 + 1j) 759 760 def test_non_convertable_values(self): 761 msg = "Could not convert foo to numeric" 762 with pytest.raises(TypeError, match=msg): 763 nanops._ensure_numeric("foo") 764 765 # with the wrong type, python raises TypeError for us 766 msg = "argument must be a string or a number" 767 with pytest.raises(TypeError, match=msg): 768 nanops._ensure_numeric({}) 769 with pytest.raises(TypeError, match=msg): 770 nanops._ensure_numeric([]) 771 772 773class TestNanvarFixedValues: 774 775 # xref GH10242 776 777 def setup_method(self, method): 778 # Samples from a normal distribution. 779 self.variance = variance = 3.0 780 self.samples = self.prng.normal(scale=variance ** 0.5, size=100000) 781 782 def test_nanvar_all_finite(self): 783 samples = self.samples 784 actual_variance = nanops.nanvar(samples) 785 tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2) 786 787 def test_nanvar_nans(self): 788 samples = np.nan * np.ones(2 * self.samples.shape[0]) 789 samples[::2] = self.samples 790 791 actual_variance = nanops.nanvar(samples, skipna=True) 792 tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2) 793 794 actual_variance = nanops.nanvar(samples, skipna=False) 795 tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2) 796 797 def test_nanstd_nans(self): 798 samples = np.nan * np.ones(2 * self.samples.shape[0]) 799 samples[::2] = self.samples 800 801 actual_std = nanops.nanstd(samples, skipna=True) 802 tm.assert_almost_equal(actual_std, self.variance ** 0.5, rtol=1e-2) 803 804 actual_std = nanops.nanvar(samples, skipna=False) 805 tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2) 806 807 def test_nanvar_axis(self): 808 # Generate some sample data. 809 samples_norm = self.samples 810 samples_unif = self.prng.uniform(size=samples_norm.shape[0]) 811 samples = np.vstack([samples_norm, samples_unif]) 812 813 actual_variance = nanops.nanvar(samples, axis=1) 814 tm.assert_almost_equal( 815 actual_variance, np.array([self.variance, 1.0 / 12]), rtol=1e-2 816 ) 817 818 def test_nanvar_ddof(self): 819 n = 5 820 samples = self.prng.uniform(size=(10000, n + 1)) 821 samples[:, -1] = np.nan # Force use of our own algorithm. 822 823 variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean() 824 variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean() 825 variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean() 826 827 # The unbiased estimate. 828 var = 1.0 / 12 829 tm.assert_almost_equal(variance_1, var, rtol=1e-2) 830 831 # The underestimated variance. 832 tm.assert_almost_equal(variance_0, (n - 1.0) / n * var, rtol=1e-2) 833 834 # The overestimated variance. 835 tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var, rtol=1e-2) 836 837 def test_ground_truth(self): 838 # Test against values that were precomputed with Numpy. 839 samples = np.empty((4, 4)) 840 samples[:3, :3] = np.array( 841 [ 842 [0.97303362, 0.21869576, 0.55560287], 843 [0.72980153, 0.03109364, 0.99155171], 844 [0.09317602, 0.60078248, 0.15871292], 845 ] 846 ) 847 samples[3] = samples[:, 3] = np.nan 848 849 # Actual variances along axis=0, 1 for ddof=0, 1, 2 850 variance = np.array( 851 [ 852 [ 853 [0.13762259, 0.05619224, 0.11568816], 854 [0.20643388, 0.08428837, 0.17353224], 855 [0.41286776, 0.16857673, 0.34706449], 856 ], 857 [ 858 [0.09519783, 0.16435395, 0.05082054], 859 [0.14279674, 0.24653093, 0.07623082], 860 [0.28559348, 0.49306186, 0.15246163], 861 ], 862 ] 863 ) 864 865 # Test nanvar. 866 for axis in range(2): 867 for ddof in range(3): 868 var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof) 869 tm.assert_almost_equal(var[:3], variance[axis, ddof]) 870 assert np.isnan(var[3]) 871 872 # Test nanstd. 873 for axis in range(2): 874 for ddof in range(3): 875 std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof) 876 tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5) 877 assert np.isnan(std[3]) 878 879 def test_nanstd_roundoff(self): 880 # Regression test for GH 10242 (test data taken from GH 10489). Ensure 881 # that variance is stable. 882 data = Series(766897346 * np.ones(10)) 883 for ddof in range(3): 884 result = data.std(ddof=ddof) 885 assert result == 0.0 886 887 @property 888 def prng(self): 889 return np.random.RandomState(1234) 890 891 892class TestNanskewFixedValues: 893 894 # xref GH 11974 895 896 def setup_method(self, method): 897 # Test data + skewness value (computed with scipy.stats.skew) 898 self.samples = np.sin(np.linspace(0, 1, 200)) 899 self.actual_skew = -0.1875895205961754 900 901 def test_constant_series(self): 902 # xref GH 11974 903 for val in [3075.2, 3075.3, 3075.5]: 904 data = val * np.ones(300) 905 skew = nanops.nanskew(data) 906 assert skew == 0.0 907 908 def test_all_finite(self): 909 alpha, beta = 0.3, 0.1 910 left_tailed = self.prng.beta(alpha, beta, size=100) 911 assert nanops.nanskew(left_tailed) < 0 912 913 alpha, beta = 0.1, 0.3 914 right_tailed = self.prng.beta(alpha, beta, size=100) 915 assert nanops.nanskew(right_tailed) > 0 916 917 def test_ground_truth(self): 918 skew = nanops.nanskew(self.samples) 919 tm.assert_almost_equal(skew, self.actual_skew) 920 921 def test_axis(self): 922 samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))]) 923 skew = nanops.nanskew(samples, axis=1) 924 tm.assert_almost_equal(skew, np.array([self.actual_skew, np.nan])) 925 926 def test_nans(self): 927 samples = np.hstack([self.samples, np.nan]) 928 skew = nanops.nanskew(samples, skipna=False) 929 assert np.isnan(skew) 930 931 def test_nans_skipna(self): 932 samples = np.hstack([self.samples, np.nan]) 933 skew = nanops.nanskew(samples, skipna=True) 934 tm.assert_almost_equal(skew, self.actual_skew) 935 936 @property 937 def prng(self): 938 return np.random.RandomState(1234) 939 940 941class TestNankurtFixedValues: 942 943 # xref GH 11974 944 945 def setup_method(self, method): 946 # Test data + kurtosis value (computed with scipy.stats.kurtosis) 947 self.samples = np.sin(np.linspace(0, 1, 200)) 948 self.actual_kurt = -1.2058303433799713 949 950 def test_constant_series(self): 951 # xref GH 11974 952 for val in [3075.2, 3075.3, 3075.5]: 953 data = val * np.ones(300) 954 kurt = nanops.nankurt(data) 955 assert kurt == 0.0 956 957 def test_all_finite(self): 958 alpha, beta = 0.3, 0.1 959 left_tailed = self.prng.beta(alpha, beta, size=100) 960 assert nanops.nankurt(left_tailed) < 0 961 962 alpha, beta = 0.1, 0.3 963 right_tailed = self.prng.beta(alpha, beta, size=100) 964 assert nanops.nankurt(right_tailed) > 0 965 966 def test_ground_truth(self): 967 kurt = nanops.nankurt(self.samples) 968 tm.assert_almost_equal(kurt, self.actual_kurt) 969 970 def test_axis(self): 971 samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))]) 972 kurt = nanops.nankurt(samples, axis=1) 973 tm.assert_almost_equal(kurt, np.array([self.actual_kurt, np.nan])) 974 975 def test_nans(self): 976 samples = np.hstack([self.samples, np.nan]) 977 kurt = nanops.nankurt(samples, skipna=False) 978 assert np.isnan(kurt) 979 980 def test_nans_skipna(self): 981 samples = np.hstack([self.samples, np.nan]) 982 kurt = nanops.nankurt(samples, skipna=True) 983 tm.assert_almost_equal(kurt, self.actual_kurt) 984 985 @property 986 def prng(self): 987 return np.random.RandomState(1234) 988 989 990class TestDatetime64NaNOps: 991 # Enabling mean changes the behavior of DataFrame.mean 992 # See https://github.com/pandas-dev/pandas/issues/24752 993 def test_nanmean(self): 994 dti = pd.date_range("2016-01-01", periods=3) 995 expected = dti[1] 996 997 for obj in [dti, DatetimeArray(dti), Series(dti)]: 998 result = nanops.nanmean(obj) 999 assert result == expected 1000 1001 dti2 = dti.insert(1, pd.NaT) 1002 1003 for obj in [dti2, DatetimeArray(dti2), Series(dti2)]: 1004 result = nanops.nanmean(obj) 1005 assert result == expected 1006 1007 @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) 1008 def test_nanmean_skipna_false(self, dtype): 1009 arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3) 1010 1011 arr[-1, -1] = "NaT" 1012 1013 result = nanops.nanmean(arr, skipna=False) 1014 assert result is pd.NaT 1015 1016 result = nanops.nanmean(arr, axis=0, skipna=False) 1017 expected = np.array([4, 5, "NaT"], dtype=arr.dtype) 1018 tm.assert_numpy_array_equal(result, expected) 1019 1020 result = nanops.nanmean(arr, axis=1, skipna=False) 1021 expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]]) 1022 tm.assert_numpy_array_equal(result, expected) 1023 1024 1025def test_use_bottleneck(): 1026 1027 if nanops._BOTTLENECK_INSTALLED: 1028 1029 pd.set_option("use_bottleneck", True) 1030 assert pd.get_option("use_bottleneck") 1031 1032 pd.set_option("use_bottleneck", False) 1033 assert not pd.get_option("use_bottleneck") 1034 1035 pd.set_option("use_bottleneck", use_bn) 1036 1037 1038@pytest.mark.parametrize( 1039 "numpy_op, expected", 1040 [ 1041 (np.sum, 10), 1042 (np.nansum, 10), 1043 (np.mean, 2.5), 1044 (np.nanmean, 2.5), 1045 (np.median, 2.5), 1046 (np.nanmedian, 2.5), 1047 (np.min, 1), 1048 (np.max, 4), 1049 (np.nanmin, 1), 1050 (np.nanmax, 4), 1051 ], 1052) 1053def test_numpy_ops(numpy_op, expected): 1054 # GH8383 1055 result = numpy_op(Series([1, 2, 3, 4])) 1056 assert result == expected 1057 1058 1059@pytest.mark.parametrize( 1060 "operation", 1061 [ 1062 nanops.nanany, 1063 nanops.nanall, 1064 nanops.nansum, 1065 nanops.nanmean, 1066 nanops.nanmedian, 1067 nanops.nanstd, 1068 nanops.nanvar, 1069 nanops.nansem, 1070 nanops.nanargmax, 1071 nanops.nanargmin, 1072 nanops.nanmax, 1073 nanops.nanmin, 1074 nanops.nanskew, 1075 nanops.nankurt, 1076 nanops.nanprod, 1077 ], 1078) 1079def test_nanops_independent_of_mask_param(operation): 1080 # GH22764 1081 s = Series([1, 2, np.nan, 3, np.nan, 4]) 1082 mask = s.isna() 1083 median_expected = operation(s) 1084 median_result = operation(s, mask=mask) 1085 assert median_expected == median_result 1086