1import numpy as np 2import pytest 3 4from pandas import DataFrame, Float64Index, Index, Int64Index, RangeIndex, Series 5import pandas._testing as tm 6 7 8def gen_obj(klass, index): 9 if klass is Series: 10 obj = Series(np.arange(len(index)), index=index) 11 else: 12 obj = DataFrame( 13 np.random.randn(len(index), len(index)), index=index, columns=index 14 ) 15 return obj 16 17 18class TestFloatIndexers: 19 def check(self, result, original, indexer, getitem): 20 """ 21 comparator for results 22 we need to take care if we are indexing on a 23 Series or a frame 24 """ 25 if isinstance(original, Series): 26 expected = original.iloc[indexer] 27 else: 28 if getitem: 29 expected = original.iloc[:, indexer] 30 else: 31 expected = original.iloc[indexer] 32 33 tm.assert_almost_equal(result, expected) 34 35 @pytest.mark.parametrize( 36 "index_func", 37 [ 38 tm.makeStringIndex, 39 tm.makeUnicodeIndex, 40 tm.makeCategoricalIndex, 41 tm.makeDateIndex, 42 tm.makeTimedeltaIndex, 43 tm.makePeriodIndex, 44 ], 45 ) 46 def test_scalar_non_numeric(self, index_func, frame_or_series): 47 48 # GH 4892 49 # float_indexers should raise exceptions 50 # on appropriate Index types & accessors 51 52 i = index_func(5) 53 s = gen_obj(frame_or_series, i) 54 55 # getting 56 with pytest.raises(KeyError, match="^3.0$"): 57 s[3.0] 58 59 with pytest.raises(KeyError, match="^3.0$"): 60 s.loc[3.0] 61 62 # contains 63 assert 3.0 not in s 64 65 # setting with an indexer 66 if s.index.inferred_type in ["categorical"]: 67 # Value or Type Error 68 pass 69 elif s.index.inferred_type in ["datetime64", "timedelta64", "period"]: 70 71 # FIXME: dont leave commented-out 72 # these should prob work 73 # and are inconsistent between series/dataframe ATM 74 # for idxr in [lambda x: x]: 75 # s2 = s.copy() 76 # 77 # with pytest.raises(TypeError): 78 # idxr(s2)[3.0] = 0 79 pass 80 81 else: 82 83 s2 = s.copy() 84 s2.loc[3.0] = 10 85 assert s2.index.is_object() 86 87 s2 = s.copy() 88 s2[3.0] = 0 89 assert s2.index.is_object() 90 91 @pytest.mark.parametrize( 92 "index_func", 93 [ 94 tm.makeStringIndex, 95 tm.makeUnicodeIndex, 96 tm.makeCategoricalIndex, 97 tm.makeDateIndex, 98 tm.makeTimedeltaIndex, 99 tm.makePeriodIndex, 100 ], 101 ) 102 def test_scalar_non_numeric_series_fallback(self, index_func): 103 # fallsback to position selection, series only 104 i = index_func(5) 105 s = Series(np.arange(len(i)), index=i) 106 s[3] 107 with pytest.raises(KeyError, match="^3.0$"): 108 s[3.0] 109 110 def test_scalar_with_mixed(self): 111 112 s2 = Series([1, 2, 3], index=["a", "b", "c"]) 113 s3 = Series([1, 2, 3], index=["a", "b", 1.5]) 114 115 # lookup in a pure string index with an invalid indexer 116 117 with pytest.raises(KeyError, match="^1.0$"): 118 s2[1.0] 119 120 with pytest.raises(KeyError, match=r"^1\.0$"): 121 s2.loc[1.0] 122 123 result = s2.loc["b"] 124 expected = 2 125 assert result == expected 126 127 # mixed index so we have label 128 # indexing 129 with pytest.raises(KeyError, match="^1.0$"): 130 s3[1.0] 131 132 result = s3[1] 133 expected = 2 134 assert result == expected 135 136 with pytest.raises(KeyError, match=r"^1\.0$"): 137 s3.loc[1.0] 138 139 result = s3.loc[1.5] 140 expected = 3 141 assert result == expected 142 143 @pytest.mark.parametrize( 144 "idxr,getitem", [(lambda x: x.loc, False), (lambda x: x, True)] 145 ) 146 @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) 147 def test_scalar_integer(self, index_func, frame_or_series, idxr, getitem): 148 149 # test how scalar float indexers work on int indexes 150 151 # integer index 152 i = index_func(5) 153 obj = gen_obj(frame_or_series, i) 154 155 # coerce to equal int 156 157 result = idxr(obj)[3.0] 158 self.check(result, obj, 3, getitem) 159 160 if isinstance(obj, Series): 161 162 def compare(x, y): 163 assert x == y 164 165 expected = 100 166 else: 167 compare = tm.assert_series_equal 168 if getitem: 169 expected = Series(100, index=range(len(obj)), name=3) 170 else: 171 expected = Series(100.0, index=range(len(obj)), name=3) 172 173 s2 = obj.copy() 174 idxr(s2)[3.0] = 100 175 176 result = idxr(s2)[3.0] 177 compare(result, expected) 178 179 result = idxr(s2)[3] 180 compare(result, expected) 181 182 @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) 183 def test_scalar_integer_contains_float(self, index_func, frame_or_series): 184 # contains 185 # integer index 186 index = index_func(5) 187 obj = gen_obj(frame_or_series, index) 188 189 # coerce to equal int 190 assert 3.0 in obj 191 192 def test_scalar_float(self, frame_or_series): 193 194 # scalar float indexers work on a float index 195 index = Index(np.arange(5.0)) 196 s = gen_obj(frame_or_series, index) 197 198 # assert all operations except for iloc are ok 199 indexer = index[3] 200 for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: 201 202 # getting 203 result = idxr(s)[indexer] 204 self.check(result, s, 3, getitem) 205 206 # setting 207 s2 = s.copy() 208 209 result = idxr(s2)[indexer] 210 self.check(result, s, 3, getitem) 211 212 # random float is a KeyError 213 with pytest.raises(KeyError, match=r"^3\.5$"): 214 idxr(s)[3.5] 215 216 # contains 217 assert 3.0 in s 218 219 # iloc succeeds with an integer 220 expected = s.iloc[3] 221 s2 = s.copy() 222 223 s2.iloc[3] = expected 224 result = s2.iloc[3] 225 self.check(result, s, 3, False) 226 227 @pytest.mark.parametrize( 228 "index_func", 229 [ 230 tm.makeStringIndex, 231 tm.makeUnicodeIndex, 232 tm.makeDateIndex, 233 tm.makeTimedeltaIndex, 234 tm.makePeriodIndex, 235 ], 236 ) 237 @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) 238 def test_slice_non_numeric(self, index_func, idx, frame_or_series): 239 240 # GH 4892 241 # float_indexers should raise exceptions 242 # on appropriate Index types & accessors 243 244 index = index_func(5) 245 s = gen_obj(frame_or_series, index) 246 247 # getitem 248 msg = ( 249 "cannot do positional indexing " 250 fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " 251 "type float" 252 ) 253 with pytest.raises(TypeError, match=msg): 254 s.iloc[idx] 255 256 msg = ( 257 "cannot do (slice|positional) indexing " 258 fr"on {type(index).__name__} with these indexers " 259 r"\[(3|4)(\.0)?\] " 260 r"of type (float|int)" 261 ) 262 for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: 263 with pytest.raises(TypeError, match=msg): 264 idxr(s)[idx] 265 266 # setitem 267 msg = "slice indices must be integers or None or have an __index__ method" 268 with pytest.raises(TypeError, match=msg): 269 s.iloc[idx] = 0 270 271 msg = ( 272 "cannot do (slice|positional) indexing " 273 fr"on {type(index).__name__} with these indexers " 274 r"\[(3|4)(\.0)?\] " 275 r"of type (float|int)" 276 ) 277 for idxr in [lambda x: x.loc, lambda x: x]: 278 with pytest.raises(TypeError, match=msg): 279 idxr(s)[idx] = 0 280 281 def test_slice_integer(self): 282 283 # same as above, but for Integer based indexes 284 # these coerce to a like integer 285 # oob indicates if we are out of bounds 286 # of positional indexing 287 for index, oob in [ 288 (Int64Index(range(5)), False), 289 (RangeIndex(5), False), 290 (Int64Index(range(5)) + 10, True), 291 ]: 292 293 # s is an in-range index 294 s = Series(range(5), index=index) 295 296 # getitem 297 for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: 298 299 result = s.loc[idx] 300 301 # these are all label indexing 302 # except getitem which is positional 303 # empty 304 if oob: 305 indexer = slice(0, 0) 306 else: 307 indexer = slice(3, 5) 308 self.check(result, s, indexer, False) 309 310 # getitem out-of-bounds 311 for idx in [slice(-6, 6), slice(-6.0, 6.0)]: 312 313 result = s.loc[idx] 314 315 # these are all label indexing 316 # except getitem which is positional 317 # empty 318 if oob: 319 indexer = slice(0, 0) 320 else: 321 indexer = slice(-6, 6) 322 self.check(result, s, indexer, False) 323 324 # positional indexing 325 msg = ( 326 "cannot do slice indexing " 327 fr"on {type(index).__name__} with these indexers \[-6\.0\] of " 328 "type float" 329 ) 330 with pytest.raises(TypeError, match=msg): 331 s[slice(-6.0, 6.0)] 332 333 # getitem odd floats 334 for idx, res1 in [ 335 (slice(2.5, 4), slice(3, 5)), 336 (slice(2, 3.5), slice(2, 4)), 337 (slice(2.5, 3.5), slice(3, 4)), 338 ]: 339 340 result = s.loc[idx] 341 if oob: 342 res = slice(0, 0) 343 else: 344 res = res1 345 346 self.check(result, s, res, False) 347 348 # positional indexing 349 msg = ( 350 "cannot do slice indexing " 351 fr"on {type(index).__name__} with these indexers \[(2|3)\.5\] of " 352 "type float" 353 ) 354 with pytest.raises(TypeError, match=msg): 355 s[idx] 356 357 @pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]) 358 def test_integer_positional_indexing(self, idx): 359 """make sure that we are raising on positional indexing 360 w.r.t. an integer index 361 """ 362 s = Series(range(2, 6), index=range(2, 6)) 363 364 result = s[2:4] 365 expected = s.iloc[2:4] 366 tm.assert_series_equal(result, expected) 367 368 klass = RangeIndex 369 msg = ( 370 "cannot do (slice|positional) indexing " 371 fr"on {klass.__name__} with these indexers \[(2|4)\.0\] of " 372 "type float" 373 ) 374 with pytest.raises(TypeError, match=msg): 375 s[idx] 376 with pytest.raises(TypeError, match=msg): 377 s.iloc[idx] 378 379 @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) 380 def test_slice_integer_frame_getitem(self, index_func): 381 382 # similar to above, but on the getitem dim (of a DataFrame) 383 index = index_func(5) 384 385 s = DataFrame(np.random.randn(5, 2), index=index) 386 387 # getitem 388 for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: 389 390 result = s.loc[idx] 391 indexer = slice(0, 2) 392 self.check(result, s, indexer, False) 393 394 # positional indexing 395 msg = ( 396 "cannot do slice indexing " 397 fr"on {type(index).__name__} with these indexers \[(0|1)\.0\] of " 398 "type float" 399 ) 400 with pytest.raises(TypeError, match=msg): 401 s[idx] 402 403 # getitem out-of-bounds 404 for idx in [slice(-10, 10), slice(-10.0, 10.0)]: 405 406 result = s.loc[idx] 407 self.check(result, s, slice(-10, 10), True) 408 409 # positional indexing 410 msg = ( 411 "cannot do slice indexing " 412 fr"on {type(index).__name__} with these indexers \[-10\.0\] of " 413 "type float" 414 ) 415 with pytest.raises(TypeError, match=msg): 416 s[slice(-10.0, 10.0)] 417 418 # getitem odd floats 419 for idx, res in [ 420 (slice(0.5, 1), slice(1, 2)), 421 (slice(0, 0.5), slice(0, 1)), 422 (slice(0.5, 1.5), slice(1, 2)), 423 ]: 424 425 result = s.loc[idx] 426 self.check(result, s, res, False) 427 428 # positional indexing 429 msg = ( 430 "cannot do slice indexing " 431 fr"on {type(index).__name__} with these indexers \[0\.5\] of " 432 "type float" 433 ) 434 with pytest.raises(TypeError, match=msg): 435 s[idx] 436 437 @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) 438 @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) 439 def test_float_slice_getitem_with_integer_index_raises(self, idx, index_func): 440 441 # similar to above, but on the getitem dim (of a DataFrame) 442 index = index_func(5) 443 444 s = DataFrame(np.random.randn(5, 2), index=index) 445 446 # setitem 447 sc = s.copy() 448 sc.loc[idx] = 0 449 result = sc.loc[idx].values.ravel() 450 assert (result == 0).all() 451 452 # positional indexing 453 msg = ( 454 "cannot do slice indexing " 455 fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " 456 "type float" 457 ) 458 with pytest.raises(TypeError, match=msg): 459 s[idx] = 0 460 461 with pytest.raises(TypeError, match=msg): 462 s[idx] 463 464 @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) 465 def test_slice_float(self, idx, frame_or_series): 466 467 # same as above, but for floats 468 index = Index(np.arange(5.0)) + 0.1 469 s = gen_obj(frame_or_series, index) 470 471 expected = s.iloc[3:4] 472 for idxr in [lambda x: x.loc, lambda x: x]: 473 474 # getitem 475 result = idxr(s)[idx] 476 assert isinstance(result, type(s)) 477 tm.assert_equal(result, expected) 478 479 # setitem 480 s2 = s.copy() 481 idxr(s2)[idx] = 0 482 result = idxr(s2)[idx].values.ravel() 483 assert (result == 0).all() 484 485 def test_floating_index_doc_example(self): 486 487 index = Index([1.5, 2, 3, 4.5, 5]) 488 s = Series(range(5), index=index) 489 assert s[3] == 2 490 assert s.loc[3] == 2 491 assert s.loc[3] == 2 492 assert s.iloc[3] == 3 493 494 def test_floating_misc(self): 495 496 # related 236 497 # scalar/slicing of a float index 498 s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64) 499 500 # label based slicing 501 result1 = s[1.0:3.0] 502 result2 = s.loc[1.0:3.0] 503 result3 = s.loc[1.0:3.0] 504 tm.assert_series_equal(result1, result2) 505 tm.assert_series_equal(result1, result3) 506 507 # exact indexing when found 508 result1 = s[5.0] 509 result2 = s.loc[5.0] 510 result3 = s.loc[5.0] 511 assert result1 == result2 512 assert result1 == result3 513 514 result1 = s[5] 515 result2 = s.loc[5] 516 result3 = s.loc[5] 517 assert result1 == result2 518 assert result1 == result3 519 520 assert s[5.0] == s[5] 521 522 # value not found (and no fallbacking at all) 523 524 # scalar integers 525 with pytest.raises(KeyError, match=r"^4$"): 526 s.loc[4] 527 with pytest.raises(KeyError, match=r"^4$"): 528 s.loc[4] 529 with pytest.raises(KeyError, match=r"^4$"): 530 s[4] 531 532 # fancy floats/integers create the correct entry (as nan) 533 # fancy tests 534 expected = Series([2, 0], index=Float64Index([5.0, 0.0])) 535 for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float 536 tm.assert_series_equal(s[fancy_idx], expected) 537 tm.assert_series_equal(s.loc[fancy_idx], expected) 538 tm.assert_series_equal(s.loc[fancy_idx], expected) 539 540 expected = Series([2, 0], index=Index([5, 0], dtype="int64")) 541 for fancy_idx in [[5, 0], np.array([5, 0])]: # int 542 tm.assert_series_equal(s[fancy_idx], expected) 543 tm.assert_series_equal(s.loc[fancy_idx], expected) 544 tm.assert_series_equal(s.loc[fancy_idx], expected) 545 546 # all should return the same as we are slicing 'the same' 547 result1 = s.loc[2:5] 548 result2 = s.loc[2.0:5.0] 549 result3 = s.loc[2.0:5] 550 result4 = s.loc[2.1:5] 551 tm.assert_series_equal(result1, result2) 552 tm.assert_series_equal(result1, result3) 553 tm.assert_series_equal(result1, result4) 554 555 # previously this did fallback indexing 556 result1 = s[2:5] 557 result2 = s[2.0:5.0] 558 result3 = s[2.0:5] 559 result4 = s[2.1:5] 560 tm.assert_series_equal(result1, result2) 561 tm.assert_series_equal(result1, result3) 562 tm.assert_series_equal(result1, result4) 563 564 result1 = s.loc[2:5] 565 result2 = s.loc[2.0:5.0] 566 result3 = s.loc[2.0:5] 567 result4 = s.loc[2.1:5] 568 tm.assert_series_equal(result1, result2) 569 tm.assert_series_equal(result1, result3) 570 tm.assert_series_equal(result1, result4) 571 572 # combined test 573 result1 = s.loc[2:5] 574 result2 = s.loc[2:5] 575 result3 = s[2:5] 576 577 tm.assert_series_equal(result1, result2) 578 tm.assert_series_equal(result1, result3) 579 580 # list selection 581 result1 = s[[0.0, 5, 10]] 582 result2 = s.loc[[0.0, 5, 10]] 583 result3 = s.loc[[0.0, 5, 10]] 584 result4 = s.iloc[[0, 2, 4]] 585 tm.assert_series_equal(result1, result2) 586 tm.assert_series_equal(result1, result3) 587 tm.assert_series_equal(result1, result4) 588 589 with pytest.raises(KeyError, match="with any missing labels"): 590 s[[1.6, 5, 10]] 591 with pytest.raises(KeyError, match="with any missing labels"): 592 s.loc[[1.6, 5, 10]] 593 594 with pytest.raises(KeyError, match="with any missing labels"): 595 s[[0, 1, 2]] 596 with pytest.raises(KeyError, match="with any missing labels"): 597 s.loc[[0, 1, 2]] 598 599 result1 = s.loc[[2.5, 5]] 600 result2 = s.loc[[2.5, 5]] 601 tm.assert_series_equal(result1, result2) 602 tm.assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0])) 603 604 result1 = s[[2.5]] 605 result2 = s.loc[[2.5]] 606 result3 = s.loc[[2.5]] 607 tm.assert_series_equal(result1, result2) 608 tm.assert_series_equal(result1, result3) 609 tm.assert_series_equal(result1, Series([1], index=[2.5])) 610 611 def test_floating_tuples(self): 612 # see gh-13509 613 s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo") 614 615 result = s[0.0] 616 assert result == (1, 1) 617 618 expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo") 619 s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo") 620 621 result = s[0.0] 622 tm.assert_series_equal(result, expected) 623 624 def test_float64index_slicing_bug(self): 625 # GH 5557, related to slicing a float index 626 ser = { 627 256: 2321.0, 628 1: 78.0, 629 2: 2716.0, 630 3: 0.0, 631 4: 369.0, 632 5: 0.0, 633 6: 269.0, 634 7: 0.0, 635 8: 0.0, 636 9: 0.0, 637 10: 3536.0, 638 11: 0.0, 639 12: 24.0, 640 13: 0.0, 641 14: 931.0, 642 15: 0.0, 643 16: 101.0, 644 17: 78.0, 645 18: 9643.0, 646 19: 0.0, 647 20: 0.0, 648 21: 0.0, 649 22: 63761.0, 650 23: 0.0, 651 24: 446.0, 652 25: 0.0, 653 26: 34773.0, 654 27: 0.0, 655 28: 729.0, 656 29: 78.0, 657 30: 0.0, 658 31: 0.0, 659 32: 3374.0, 660 33: 0.0, 661 34: 1391.0, 662 35: 0.0, 663 36: 361.0, 664 37: 0.0, 665 38: 61808.0, 666 39: 0.0, 667 40: 0.0, 668 41: 0.0, 669 42: 6677.0, 670 43: 0.0, 671 44: 802.0, 672 45: 0.0, 673 46: 2691.0, 674 47: 0.0, 675 48: 3582.0, 676 49: 0.0, 677 50: 734.0, 678 51: 0.0, 679 52: 627.0, 680 53: 70.0, 681 54: 2584.0, 682 55: 0.0, 683 56: 324.0, 684 57: 0.0, 685 58: 605.0, 686 59: 0.0, 687 60: 0.0, 688 61: 0.0, 689 62: 3989.0, 690 63: 10.0, 691 64: 42.0, 692 65: 0.0, 693 66: 904.0, 694 67: 0.0, 695 68: 88.0, 696 69: 70.0, 697 70: 8172.0, 698 71: 0.0, 699 72: 0.0, 700 73: 0.0, 701 74: 64902.0, 702 75: 0.0, 703 76: 347.0, 704 77: 0.0, 705 78: 36605.0, 706 79: 0.0, 707 80: 379.0, 708 81: 70.0, 709 82: 0.0, 710 83: 0.0, 711 84: 3001.0, 712 85: 0.0, 713 86: 1630.0, 714 87: 7.0, 715 88: 364.0, 716 89: 0.0, 717 90: 67404.0, 718 91: 9.0, 719 92: 0.0, 720 93: 0.0, 721 94: 7685.0, 722 95: 0.0, 723 96: 1017.0, 724 97: 0.0, 725 98: 2831.0, 726 99: 0.0, 727 100: 2963.0, 728 101: 0.0, 729 102: 854.0, 730 103: 0.0, 731 104: 0.0, 732 105: 0.0, 733 106: 0.0, 734 107: 0.0, 735 108: 0.0, 736 109: 0.0, 737 110: 0.0, 738 111: 0.0, 739 112: 0.0, 740 113: 0.0, 741 114: 0.0, 742 115: 0.0, 743 116: 0.0, 744 117: 0.0, 745 118: 0.0, 746 119: 0.0, 747 120: 0.0, 748 121: 0.0, 749 122: 0.0, 750 123: 0.0, 751 124: 0.0, 752 125: 0.0, 753 126: 67744.0, 754 127: 22.0, 755 128: 264.0, 756 129: 0.0, 757 260: 197.0, 758 268: 0.0, 759 265: 0.0, 760 269: 0.0, 761 261: 0.0, 762 266: 1198.0, 763 267: 0.0, 764 262: 2629.0, 765 258: 775.0, 766 257: 0.0, 767 263: 0.0, 768 259: 0.0, 769 264: 163.0, 770 250: 10326.0, 771 251: 0.0, 772 252: 1228.0, 773 253: 0.0, 774 254: 2769.0, 775 255: 0.0, 776 } 777 778 # smoke test for the repr 779 s = Series(ser) 780 result = s.value_counts() 781 str(result) 782