1import numpy as np 2import pytest 3 4import pandas.util._test_decorators as td 5 6from pandas import DataFrame, Series, date_range 7import pandas._testing as tm 8 9 10def test_centered_axis_validation(): 11 12 # ok 13 Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() 14 15 # bad axis 16 msg = "No axis named 1 for object type Series" 17 with pytest.raises(ValueError, match=msg): 18 Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() 19 20 # ok ok 21 DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() 22 DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() 23 24 # bad axis 25 msg = "No axis named 2 for object type DataFrame" 26 with pytest.raises(ValueError, match=msg): 27 (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) 28 29 30@td.skip_if_no_scipy 31def test_cmov_mean(): 32 # GH 8238 33 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) 34 result = Series(vals).rolling(5, center=True).mean() 35 expected_values = [ 36 np.nan, 37 np.nan, 38 9.962, 39 11.27, 40 11.564, 41 12.516, 42 12.818, 43 12.952, 44 np.nan, 45 np.nan, 46 ] 47 expected = Series(expected_values) 48 tm.assert_series_equal(expected, result) 49 50 51@td.skip_if_no_scipy 52def test_cmov_window(): 53 # GH 8238 54 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) 55 result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() 56 expected_values = [ 57 np.nan, 58 np.nan, 59 9.962, 60 11.27, 61 11.564, 62 12.516, 63 12.818, 64 12.952, 65 np.nan, 66 np.nan, 67 ] 68 expected = Series(expected_values) 69 tm.assert_series_equal(expected, result) 70 71 72@td.skip_if_no_scipy 73def test_cmov_window_corner(): 74 # GH 8238 75 # all nan 76 vals = Series([np.nan] * 10) 77 result = vals.rolling(5, center=True, win_type="boxcar").mean() 78 assert np.isnan(result).all() 79 80 # empty 81 vals = Series([], dtype=object) 82 result = vals.rolling(5, center=True, win_type="boxcar").mean() 83 assert len(result) == 0 84 85 # shorter than window 86 vals = Series(np.random.randn(5)) 87 result = vals.rolling(10, win_type="boxcar").mean() 88 assert np.isnan(result).all() 89 assert len(result) == 5 90 91 92@td.skip_if_no_scipy 93@pytest.mark.parametrize( 94 "f,xp", 95 [ 96 ( 97 "mean", 98 [ 99 [np.nan, np.nan], 100 [np.nan, np.nan], 101 [9.252, 9.392], 102 [8.644, 9.906], 103 [8.87, 10.208], 104 [6.81, 8.588], 105 [7.792, 8.644], 106 [9.05, 7.824], 107 [np.nan, np.nan], 108 [np.nan, np.nan], 109 ], 110 ), 111 ( 112 "std", 113 [ 114 [np.nan, np.nan], 115 [np.nan, np.nan], 116 [3.789706, 4.068313], 117 [3.429232, 3.237411], 118 [3.589269, 3.220810], 119 [3.405195, 2.380655], 120 [3.281839, 2.369869], 121 [3.676846, 1.801799], 122 [np.nan, np.nan], 123 [np.nan, np.nan], 124 ], 125 ), 126 ( 127 "var", 128 [ 129 [np.nan, np.nan], 130 [np.nan, np.nan], 131 [14.36187, 16.55117], 132 [11.75963, 10.48083], 133 [12.88285, 10.37362], 134 [11.59535, 5.66752], 135 [10.77047, 5.61628], 136 [13.51920, 3.24648], 137 [np.nan, np.nan], 138 [np.nan, np.nan], 139 ], 140 ), 141 ( 142 "sum", 143 [ 144 [np.nan, np.nan], 145 [np.nan, np.nan], 146 [46.26, 46.96], 147 [43.22, 49.53], 148 [44.35, 51.04], 149 [34.05, 42.94], 150 [38.96, 43.22], 151 [45.25, 39.12], 152 [np.nan, np.nan], 153 [np.nan, np.nan], 154 ], 155 ), 156 ], 157) 158def test_cmov_window_frame(f, xp): 159 # Gh 8238 160 df = DataFrame( 161 np.array( 162 [ 163 [12.18, 3.64], 164 [10.18, 9.16], 165 [13.24, 14.61], 166 [4.51, 8.11], 167 [6.15, 11.44], 168 [9.14, 6.21], 169 [11.31, 10.67], 170 [2.94, 6.51], 171 [9.42, 8.39], 172 [12.44, 7.34], 173 ] 174 ) 175 ) 176 xp = DataFrame(np.array(xp)) 177 178 roll = df.rolling(5, win_type="boxcar", center=True) 179 rs = getattr(roll, f)() 180 181 tm.assert_frame_equal(xp, rs) 182 183 184@td.skip_if_no_scipy 185def test_cmov_window_na_min_periods(): 186 # min_periods 187 vals = Series(np.random.randn(10)) 188 vals[4] = np.nan 189 vals[8] = np.nan 190 191 xp = vals.rolling(5, min_periods=4, center=True).mean() 192 rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() 193 tm.assert_series_equal(xp, rs) 194 195 196@td.skip_if_no_scipy 197def test_cmov_window_regular(win_types): 198 # GH 8238 199 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) 200 xps = { 201 "hamming": [ 202 np.nan, 203 np.nan, 204 8.71384, 205 9.56348, 206 12.38009, 207 14.03687, 208 13.8567, 209 11.81473, 210 np.nan, 211 np.nan, 212 ], 213 "triang": [ 214 np.nan, 215 np.nan, 216 9.28667, 217 10.34667, 218 12.00556, 219 13.33889, 220 13.38, 221 12.33667, 222 np.nan, 223 np.nan, 224 ], 225 "barthann": [ 226 np.nan, 227 np.nan, 228 8.4425, 229 9.1925, 230 12.5575, 231 14.3675, 232 14.0825, 233 11.5675, 234 np.nan, 235 np.nan, 236 ], 237 "bohman": [ 238 np.nan, 239 np.nan, 240 7.61599, 241 9.1764, 242 12.83559, 243 14.17267, 244 14.65923, 245 11.10401, 246 np.nan, 247 np.nan, 248 ], 249 "blackmanharris": [ 250 np.nan, 251 np.nan, 252 6.97691, 253 9.16438, 254 13.05052, 255 14.02156, 256 15.10512, 257 10.74574, 258 np.nan, 259 np.nan, 260 ], 261 "nuttall": [ 262 np.nan, 263 np.nan, 264 7.04618, 265 9.16786, 266 13.02671, 267 14.03559, 268 15.05657, 269 10.78514, 270 np.nan, 271 np.nan, 272 ], 273 "blackman": [ 274 np.nan, 275 np.nan, 276 7.73345, 277 9.17869, 278 12.79607, 279 14.20036, 280 14.57726, 281 11.16988, 282 np.nan, 283 np.nan, 284 ], 285 "bartlett": [ 286 np.nan, 287 np.nan, 288 8.4425, 289 9.1925, 290 12.5575, 291 14.3675, 292 14.0825, 293 11.5675, 294 np.nan, 295 np.nan, 296 ], 297 } 298 299 xp = Series(xps[win_types]) 300 rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() 301 tm.assert_series_equal(xp, rs) 302 303 304@td.skip_if_no_scipy 305def test_cmov_window_regular_linear_range(win_types): 306 # GH 8238 307 vals = np.array(range(10), dtype=float) 308 xp = vals.copy() 309 xp[:2] = np.nan 310 xp[-2:] = np.nan 311 xp = Series(xp) 312 313 rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() 314 tm.assert_series_equal(xp, rs) 315 316 317@td.skip_if_no_scipy 318def test_cmov_window_regular_missing_data(win_types): 319 # GH 8238 320 vals = np.array( 321 [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] 322 ) 323 xps = { 324 "bartlett": [ 325 np.nan, 326 np.nan, 327 9.70333, 328 10.5225, 329 8.4425, 330 9.1925, 331 12.5575, 332 14.3675, 333 15.61667, 334 13.655, 335 ], 336 "blackman": [ 337 np.nan, 338 np.nan, 339 9.04582, 340 11.41536, 341 7.73345, 342 9.17869, 343 12.79607, 344 14.20036, 345 15.8706, 346 13.655, 347 ], 348 "barthann": [ 349 np.nan, 350 np.nan, 351 9.70333, 352 10.5225, 353 8.4425, 354 9.1925, 355 12.5575, 356 14.3675, 357 15.61667, 358 13.655, 359 ], 360 "bohman": [ 361 np.nan, 362 np.nan, 363 8.9444, 364 11.56327, 365 7.61599, 366 9.1764, 367 12.83559, 368 14.17267, 369 15.90976, 370 13.655, 371 ], 372 "hamming": [ 373 np.nan, 374 np.nan, 375 9.59321, 376 10.29694, 377 8.71384, 378 9.56348, 379 12.38009, 380 14.20565, 381 15.24694, 382 13.69758, 383 ], 384 "nuttall": [ 385 np.nan, 386 np.nan, 387 8.47693, 388 12.2821, 389 7.04618, 390 9.16786, 391 13.02671, 392 14.03673, 393 16.08759, 394 13.65553, 395 ], 396 "triang": [ 397 np.nan, 398 np.nan, 399 9.33167, 400 9.76125, 401 9.28667, 402 10.34667, 403 12.00556, 404 13.82125, 405 14.49429, 406 13.765, 407 ], 408 "blackmanharris": [ 409 np.nan, 410 np.nan, 411 8.42526, 412 12.36824, 413 6.97691, 414 9.16438, 415 13.05052, 416 14.02175, 417 16.1098, 418 13.65509, 419 ], 420 } 421 422 xp = Series(xps[win_types]) 423 rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() 424 tm.assert_series_equal(xp, rs) 425 426 427@td.skip_if_no_scipy 428def test_cmov_window_special(win_types_special): 429 # GH 8238 430 kwds = { 431 "kaiser": {"beta": 1.0}, 432 "gaussian": {"std": 1.0}, 433 "general_gaussian": {"p": 2.0, "sig": 2.0}, 434 "exponential": {"tau": 10}, 435 } 436 437 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) 438 439 xps = { 440 "gaussian": [ 441 np.nan, 442 np.nan, 443 8.97297, 444 9.76077, 445 12.24763, 446 13.89053, 447 13.65671, 448 12.01002, 449 np.nan, 450 np.nan, 451 ], 452 "general_gaussian": [ 453 np.nan, 454 np.nan, 455 9.85011, 456 10.71589, 457 11.73161, 458 13.08516, 459 12.95111, 460 12.74577, 461 np.nan, 462 np.nan, 463 ], 464 "kaiser": [ 465 np.nan, 466 np.nan, 467 9.86851, 468 11.02969, 469 11.65161, 470 12.75129, 471 12.90702, 472 12.83757, 473 np.nan, 474 np.nan, 475 ], 476 "exponential": [ 477 np.nan, 478 np.nan, 479 9.83364, 480 11.10472, 481 11.64551, 482 12.66138, 483 12.92379, 484 12.83770, 485 np.nan, 486 np.nan, 487 ], 488 } 489 490 xp = Series(xps[win_types_special]) 491 rs = ( 492 Series(vals) 493 .rolling(5, win_type=win_types_special, center=True) 494 .mean(**kwds[win_types_special]) 495 ) 496 tm.assert_series_equal(xp, rs) 497 498 499@td.skip_if_no_scipy 500def test_cmov_window_special_linear_range(win_types_special): 501 # GH 8238 502 kwds = { 503 "kaiser": {"beta": 1.0}, 504 "gaussian": {"std": 1.0}, 505 "general_gaussian": {"p": 2.0, "sig": 2.0}, 506 "slepian": {"width": 0.5}, 507 "exponential": {"tau": 10}, 508 } 509 510 vals = np.array(range(10), dtype=float) 511 xp = vals.copy() 512 xp[:2] = np.nan 513 xp[-2:] = np.nan 514 xp = Series(xp) 515 516 rs = ( 517 Series(vals) 518 .rolling(5, win_type=win_types_special, center=True) 519 .mean(**kwds[win_types_special]) 520 ) 521 tm.assert_series_equal(xp, rs) 522 523 524def test_rolling_min_min_periods(): 525 a = Series([1, 2, 3, 4, 5]) 526 result = a.rolling(window=100, min_periods=1).min() 527 expected = Series(np.ones(len(a))) 528 tm.assert_series_equal(result, expected) 529 msg = "min_periods 5 must be <= window 3" 530 with pytest.raises(ValueError, match=msg): 531 Series([1, 2, 3]).rolling(window=3, min_periods=5).min() 532 533 534def test_rolling_max_min_periods(): 535 a = Series([1, 2, 3, 4, 5], dtype=np.float64) 536 b = a.rolling(window=100, min_periods=1).max() 537 tm.assert_almost_equal(a, b) 538 msg = "min_periods 5 must be <= window 3" 539 with pytest.raises(ValueError, match=msg): 540 Series([1, 2, 3]).rolling(window=3, min_periods=5).max() 541 542 543def test_rolling_quantile_np_percentile(): 544 # #9413: Tests that rolling window's quantile default behavior 545 # is analogous to Numpy's percentile 546 row = 10 547 col = 5 548 idx = date_range("20100101", periods=row, freq="B") 549 df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) 550 551 df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) 552 np_percentile = np.percentile(df, [25, 50, 75], axis=0) 553 554 tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) 555 556 557@pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1]) 558@pytest.mark.parametrize( 559 "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] 560) 561@pytest.mark.parametrize( 562 "data", 563 [ 564 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], 565 [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0], 566 [0.0, np.nan, 0.2, np.nan, 0.4], 567 [np.nan, np.nan, np.nan, np.nan], 568 [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5], 569 [0.5], 570 [np.nan, 0.7, 0.6], 571 ], 572) 573def test_rolling_quantile_interpolation_options(quantile, interpolation, data): 574 # Tests that rolling window's quantile behavior is analogous to 575 # Series' quantile for each interpolation option 576 s = Series(data) 577 578 q1 = s.quantile(quantile, interpolation) 579 q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1] 580 581 if np.isnan(q1): 582 assert np.isnan(q2) 583 else: 584 assert q1 == q2 585 586 587def test_invalid_quantile_value(): 588 data = np.arange(5) 589 s = Series(data) 590 591 msg = "Interpolation 'invalid' is not supported" 592 with pytest.raises(ValueError, match=msg): 593 s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid") 594 595 596def test_rolling_quantile_param(): 597 ser = Series([0.0, 0.1, 0.5, 0.9, 1.0]) 598 msg = "quantile value -0.1 not in \\[0, 1\\]" 599 with pytest.raises(ValueError, match=msg): 600 ser.rolling(3).quantile(-0.1) 601 602 msg = "quantile value 10.0 not in \\[0, 1\\]" 603 with pytest.raises(ValueError, match=msg): 604 ser.rolling(3).quantile(10.0) 605 606 msg = "must be real number, not str" 607 with pytest.raises(TypeError, match=msg): 608 ser.rolling(3).quantile("foo") 609 610 611def test_rolling_std_1obs(): 612 vals = Series([1.0, 2.0, 3.0, 4.0, 5.0]) 613 614 result = vals.rolling(1, min_periods=1).std() 615 expected = Series([np.nan] * 5) 616 tm.assert_series_equal(result, expected) 617 618 result = vals.rolling(1, min_periods=1).std(ddof=0) 619 expected = Series([0.0] * 5) 620 tm.assert_series_equal(result, expected) 621 622 result = Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std() 623 assert np.isnan(result[2]) 624 625 626def test_rolling_std_neg_sqrt(): 627 # unit test from Bottleneck 628 629 # Test move_nanstd for neg sqrt. 630 631 a = Series( 632 [ 633 0.0011448196318903589, 634 0.00028718669878572767, 635 0.00028718669878572767, 636 0.00028718669878572767, 637 0.00028718669878572767, 638 ] 639 ) 640 b = a.rolling(window=3).std() 641 assert np.isfinite(b[2:]).all() 642 643 b = a.ewm(span=3).std() 644 assert np.isfinite(b[2:]).all() 645