1""" test positional based indexing with iloc """ 2 3from datetime import datetime 4import re 5from warnings import catch_warnings, simplefilter 6 7import numpy as np 8import pytest 9 10from pandas import ( 11 Categorical, 12 CategoricalDtype, 13 DataFrame, 14 Index, 15 NaT, 16 Series, 17 concat, 18 date_range, 19 isna, 20) 21import pandas._testing as tm 22from pandas.api.types import is_scalar 23from pandas.core.indexing import IndexingError 24from pandas.tests.indexing.common import Base 25 26# We pass through the error message from numpy 27_slice_iloc_msg = re.escape( 28 "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) " 29 "and integer or boolean arrays are valid indices" 30) 31 32 33class TestiLoc(Base): 34 def test_iloc_getitem_int(self): 35 # integer 36 self.check_result( 37 "iloc", 38 2, 39 typs=["labels", "mixed", "ts", "floats", "empty"], 40 fails=IndexError, 41 ) 42 43 def test_iloc_getitem_neg_int(self): 44 # neg integer 45 self.check_result( 46 "iloc", 47 -1, 48 typs=["labels", "mixed", "ts", "floats", "empty"], 49 fails=IndexError, 50 ) 51 52 def test_iloc_getitem_list_int(self): 53 self.check_result( 54 "iloc", 55 [0, 1, 2], 56 typs=["labels", "mixed", "ts", "floats", "empty"], 57 fails=IndexError, 58 ) 59 60 # array of ints (GH5006), make sure that a single indexer is returning 61 # the correct type 62 63 64class TestiLoc2: 65 # TODO: better name, just separating out things that dont rely on base class 66 67 def test_is_scalar_access(self): 68 # GH#32085 index with duplicates doesnt matter for _is_scalar_access 69 index = Index([1, 2, 1]) 70 ser = Series(range(3), index=index) 71 72 assert ser.iloc._is_scalar_access((1,)) 73 74 df = ser.to_frame() 75 assert df.iloc._is_scalar_access((1, 0)) 76 77 def test_iloc_exceeds_bounds(self): 78 79 # GH6296 80 # iloc should allow indexers that exceed the bounds 81 df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE")) 82 83 # lists of positions should raise IndexError! 84 msg = "positional indexers are out-of-bounds" 85 with pytest.raises(IndexError, match=msg): 86 df.iloc[:, [0, 1, 2, 3, 4, 5]] 87 with pytest.raises(IndexError, match=msg): 88 df.iloc[[1, 30]] 89 with pytest.raises(IndexError, match=msg): 90 df.iloc[[1, -30]] 91 with pytest.raises(IndexError, match=msg): 92 df.iloc[[100]] 93 94 s = df["A"] 95 with pytest.raises(IndexError, match=msg): 96 s.iloc[[100]] 97 with pytest.raises(IndexError, match=msg): 98 s.iloc[[-100]] 99 100 # still raise on a single indexer 101 msg = "single positional indexer is out-of-bounds" 102 with pytest.raises(IndexError, match=msg): 103 df.iloc[30] 104 with pytest.raises(IndexError, match=msg): 105 df.iloc[-30] 106 107 # GH10779 108 # single positive/negative indexer exceeding Series bounds should raise 109 # an IndexError 110 with pytest.raises(IndexError, match=msg): 111 s.iloc[30] 112 with pytest.raises(IndexError, match=msg): 113 s.iloc[-30] 114 115 # slices are ok 116 result = df.iloc[:, 4:10] # 0 < start < len < stop 117 expected = df.iloc[:, 4:] 118 tm.assert_frame_equal(result, expected) 119 120 result = df.iloc[:, -4:-10] # stop < 0 < start < len 121 expected = df.iloc[:, :0] 122 tm.assert_frame_equal(result, expected) 123 124 result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) 125 expected = df.iloc[:, :4:-1] 126 tm.assert_frame_equal(result, expected) 127 128 result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) 129 expected = df.iloc[:, 4::-1] 130 tm.assert_frame_equal(result, expected) 131 132 result = df.iloc[:, -10:4] # start < 0 < stop < len 133 expected = df.iloc[:, :4] 134 tm.assert_frame_equal(result, expected) 135 136 result = df.iloc[:, 10:4] # 0 < stop < len < start 137 expected = df.iloc[:, :0] 138 tm.assert_frame_equal(result, expected) 139 140 result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) 141 expected = df.iloc[:, :0] 142 tm.assert_frame_equal(result, expected) 143 144 result = df.iloc[:, 10:11] # 0 < len < start < stop 145 expected = df.iloc[:, :0] 146 tm.assert_frame_equal(result, expected) 147 148 # slice bounds exceeding is ok 149 result = s.iloc[18:30] 150 expected = s.iloc[18:] 151 tm.assert_series_equal(result, expected) 152 153 result = s.iloc[30:] 154 expected = s.iloc[:0] 155 tm.assert_series_equal(result, expected) 156 157 result = s.iloc[30::-1] 158 expected = s.iloc[::-1] 159 tm.assert_series_equal(result, expected) 160 161 # doc example 162 def check(result, expected): 163 str(result) 164 result.dtypes 165 tm.assert_frame_equal(result, expected) 166 167 dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) 168 check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) 169 check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) 170 check(dfl.iloc[4:6], dfl.iloc[[4]]) 171 172 msg = "positional indexers are out-of-bounds" 173 with pytest.raises(IndexError, match=msg): 174 dfl.iloc[[4, 5, 6]] 175 msg = "single positional indexer is out-of-bounds" 176 with pytest.raises(IndexError, match=msg): 177 dfl.iloc[:, 4] 178 179 @pytest.mark.parametrize("index,columns", [(np.arange(20), list("ABCDE"))]) 180 @pytest.mark.parametrize( 181 "index_vals,column_vals", 182 [ 183 ([slice(None), ["A", "D"]]), 184 (["1", "2"], slice(None)), 185 ([datetime(2019, 1, 1)], slice(None)), 186 ], 187 ) 188 def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals): 189 # GH 25753 190 df = DataFrame( 191 np.random.randn(len(index), len(columns)), index=index, columns=columns 192 ) 193 msg = ".iloc requires numeric indexers, got" 194 with pytest.raises(IndexError, match=msg): 195 df.iloc[index_vals, column_vals] 196 197 @pytest.mark.parametrize("dims", [1, 2]) 198 def test_iloc_getitem_invalid_scalar(self, dims): 199 # GH 21982 200 201 if dims == 1: 202 s = Series(np.arange(10)) 203 else: 204 s = DataFrame(np.arange(100).reshape(10, 10)) 205 206 with pytest.raises(TypeError, match="Cannot index by location index"): 207 s.iloc["a"] 208 209 def test_iloc_array_not_mutating_negative_indices(self): 210 211 # GH 21867 212 array_with_neg_numbers = np.array([1, 2, -1]) 213 array_copy = array_with_neg_numbers.copy() 214 df = DataFrame( 215 {"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]}, 216 index=[1, 2, 3], 217 ) 218 df.iloc[array_with_neg_numbers] 219 tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) 220 df.iloc[:, array_with_neg_numbers] 221 tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) 222 223 def test_iloc_getitem_neg_int_can_reach_first_index(self): 224 # GH10547 and GH10779 225 # negative integers should be able to reach index 0 226 df = DataFrame({"A": [2, 3, 5], "B": [7, 11, 13]}) 227 s = df["A"] 228 229 expected = df.iloc[0] 230 result = df.iloc[-3] 231 tm.assert_series_equal(result, expected) 232 233 expected = df.iloc[[0]] 234 result = df.iloc[[-3]] 235 tm.assert_frame_equal(result, expected) 236 237 expected = s.iloc[0] 238 result = s.iloc[-3] 239 assert result == expected 240 241 expected = s.iloc[[0]] 242 result = s.iloc[[-3]] 243 tm.assert_series_equal(result, expected) 244 245 # check the length 1 Series case highlighted in GH10547 246 expected = Series(["a"], index=["A"]) 247 result = expected.iloc[[-1]] 248 tm.assert_series_equal(result, expected) 249 250 def test_iloc_getitem_dups(self): 251 # GH 6766 252 df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) 253 df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) 254 df = concat([df1, df2], axis=1) 255 256 # cross-sectional indexing 257 result = df.iloc[0, 0] 258 assert isna(result) 259 260 result = df.iloc[0, :] 261 expected = Series([np.nan, 1, 3, 3], index=["A", "B", "A", "B"], name=0) 262 tm.assert_series_equal(result, expected) 263 264 def test_iloc_getitem_array(self): 265 # TODO: test something here? 266 pass 267 268 def test_iloc_getitem_bool(self): 269 # TODO: test something here? 270 pass 271 272 @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) 273 def test_iloc_getitem_bool_diff_len(self, index): 274 # GH26658 275 s = Series([1, 2, 3]) 276 msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}" 277 with pytest.raises(IndexError, match=msg): 278 _ = s.iloc[index] 279 280 def test_iloc_getitem_slice(self): 281 # TODO: test something here? 282 pass 283 284 def test_iloc_getitem_slice_dups(self): 285 286 df1 = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) 287 df2 = DataFrame( 288 np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] 289 ) 290 291 # axis=1 292 df = concat([df1, df2], axis=1) 293 tm.assert_frame_equal(df.iloc[:, :4], df1) 294 tm.assert_frame_equal(df.iloc[:, 4:], df2) 295 296 df = concat([df2, df1], axis=1) 297 tm.assert_frame_equal(df.iloc[:, :2], df2) 298 tm.assert_frame_equal(df.iloc[:, 2:], df1) 299 300 exp = concat([df2, df1.iloc[:, [0]]], axis=1) 301 tm.assert_frame_equal(df.iloc[:, 0:3], exp) 302 303 # axis=0 304 df = concat([df, df], axis=0) 305 tm.assert_frame_equal(df.iloc[0:10, :2], df2) 306 tm.assert_frame_equal(df.iloc[0:10, 2:], df1) 307 tm.assert_frame_equal(df.iloc[10:, :2], df2) 308 tm.assert_frame_equal(df.iloc[10:, 2:], df1) 309 310 def test_iloc_setitem(self): 311 df = DataFrame( 312 np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) 313 ) 314 315 df.iloc[1, 1] = 1 316 result = df.iloc[1, 1] 317 assert result == 1 318 319 df.iloc[:, 2:3] = 0 320 expected = df.iloc[:, 2:3] 321 result = df.iloc[:, 2:3] 322 tm.assert_frame_equal(result, expected) 323 324 # GH5771 325 s = Series(0, index=[4, 5, 6]) 326 s.iloc[1:2] += 1 327 expected = Series([0, 1, 0], index=[4, 5, 6]) 328 tm.assert_series_equal(s, expected) 329 330 def test_iloc_setitem_list(self): 331 332 # setitem with an iloc list 333 df = DataFrame( 334 np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"] 335 ) 336 df.iloc[[0, 1], [1, 2]] 337 df.iloc[[0, 1], [1, 2]] += 100 338 339 expected = DataFrame( 340 np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), 341 index=["A", "B", "C"], 342 columns=["A", "B", "C"], 343 ) 344 tm.assert_frame_equal(df, expected) 345 346 def test_iloc_setitem_pandas_object(self): 347 # GH 17193 348 s_orig = Series([0, 1, 2, 3]) 349 expected = Series([0, -1, -2, 3]) 350 351 s = s_orig.copy() 352 s.iloc[Series([1, 2])] = [-1, -2] 353 tm.assert_series_equal(s, expected) 354 355 s = s_orig.copy() 356 s.iloc[Index([1, 2])] = [-1, -2] 357 tm.assert_series_equal(s, expected) 358 359 def test_iloc_setitem_dups(self): 360 361 # GH 6766 362 # iloc with a mask aligning from another iloc 363 df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) 364 df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) 365 df = concat([df1, df2], axis=1) 366 367 expected = df.fillna(3) 368 inds = np.isnan(df.iloc[:, 0]) 369 mask = inds[inds].index 370 df.iloc[mask, 0] = df.iloc[mask, 2] 371 tm.assert_frame_equal(df, expected) 372 373 # del a dup column across blocks 374 expected = DataFrame({0: [1, 2], 1: [3, 4]}) 375 expected.columns = ["B", "B"] 376 del df["A"] 377 tm.assert_frame_equal(df, expected) 378 379 # assign back to self 380 df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] 381 tm.assert_frame_equal(df, expected) 382 383 # reversed x 2 384 df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) 385 df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) 386 tm.assert_frame_equal(df, expected) 387 388 def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(self): 389 # Same as the "assign back to self" check in test_iloc_setitem_dups 390 # but on a DataFrame with multiple blocks 391 df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) 392 393 df.iloc[:, 0] = df.iloc[:, 0].astype("f8") 394 assert len(df._mgr.blocks) == 2 395 expected = df.copy() 396 397 # assign back to self 398 df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] 399 400 tm.assert_frame_equal(df, expected) 401 402 # TODO: GH#27620 this test used to compare iloc against ix; check if this 403 # is redundant with another test comparing iloc against loc 404 def test_iloc_getitem_frame(self): 405 df = DataFrame( 406 np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2) 407 ) 408 409 result = df.iloc[2] 410 exp = df.loc[4] 411 tm.assert_series_equal(result, exp) 412 413 result = df.iloc[2, 2] 414 exp = df.loc[4, 4] 415 assert result == exp 416 417 # slice 418 result = df.iloc[4:8] 419 expected = df.loc[8:14] 420 tm.assert_frame_equal(result, expected) 421 422 result = df.iloc[:, 2:3] 423 expected = df.loc[:, 4:5] 424 tm.assert_frame_equal(result, expected) 425 426 # list of integers 427 result = df.iloc[[0, 1, 3]] 428 expected = df.loc[[0, 2, 6]] 429 tm.assert_frame_equal(result, expected) 430 431 result = df.iloc[[0, 1, 3], [0, 1]] 432 expected = df.loc[[0, 2, 6], [0, 2]] 433 tm.assert_frame_equal(result, expected) 434 435 # neg indices 436 result = df.iloc[[-1, 1, 3], [-1, 1]] 437 expected = df.loc[[18, 2, 6], [6, 2]] 438 tm.assert_frame_equal(result, expected) 439 440 # dups indices 441 result = df.iloc[[-1, -1, 1, 3], [-1, 1]] 442 expected = df.loc[[18, 18, 2, 6], [6, 2]] 443 tm.assert_frame_equal(result, expected) 444 445 # with index-like 446 s = Series(index=range(1, 5), dtype=object) 447 result = df.iloc[s.index] 448 expected = df.loc[[2, 4, 6, 8]] 449 tm.assert_frame_equal(result, expected) 450 451 def test_iloc_getitem_labelled_frame(self): 452 # try with labelled frame 453 df = DataFrame( 454 np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") 455 ) 456 457 result = df.iloc[1, 1] 458 exp = df.loc["b", "B"] 459 assert result == exp 460 461 result = df.iloc[:, 2:3] 462 expected = df.loc[:, ["C"]] 463 tm.assert_frame_equal(result, expected) 464 465 # negative indexing 466 result = df.iloc[-1, -1] 467 exp = df.loc["j", "D"] 468 assert result == exp 469 470 # out-of-bounds exception 471 msg = "single positional indexer is out-of-bounds" 472 with pytest.raises(IndexError, match=msg): 473 df.iloc[10, 5] 474 475 # trying to use a label 476 msg = ( 477 r"Location based indexing can only have \[integer, integer " 478 r"slice \(START point is INCLUDED, END point is EXCLUDED\), " 479 r"listlike of integers, boolean array\] types" 480 ) 481 with pytest.raises(ValueError, match=msg): 482 df.iloc["j", "D"] 483 484 def test_iloc_getitem_doc_issue(self): 485 486 # multi axis slicing issue with single block 487 # surfaced in GH 6059 488 489 arr = np.random.randn(6, 4) 490 index = date_range("20130101", periods=6) 491 columns = list("ABCD") 492 df = DataFrame(arr, index=index, columns=columns) 493 494 # defines ref_locs 495 df.describe() 496 497 result = df.iloc[3:5, 0:2] 498 str(result) 499 result.dtypes 500 501 expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=columns[0:2]) 502 tm.assert_frame_equal(result, expected) 503 504 # for dups 505 df.columns = list("aaaa") 506 result = df.iloc[3:5, 0:2] 507 str(result) 508 result.dtypes 509 510 expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=list("aa")) 511 tm.assert_frame_equal(result, expected) 512 513 # related 514 arr = np.random.randn(6, 4) 515 index = list(range(0, 12, 2)) 516 columns = list(range(0, 8, 2)) 517 df = DataFrame(arr, index=index, columns=columns) 518 519 df._mgr.blocks[0].mgr_locs 520 result = df.iloc[1:5, 2:4] 521 str(result) 522 result.dtypes 523 expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4]) 524 tm.assert_frame_equal(result, expected) 525 526 def test_iloc_setitem_series(self): 527 df = DataFrame( 528 np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") 529 ) 530 531 df.iloc[1, 1] = 1 532 result = df.iloc[1, 1] 533 assert result == 1 534 535 df.iloc[:, 2:3] = 0 536 expected = df.iloc[:, 2:3] 537 result = df.iloc[:, 2:3] 538 tm.assert_frame_equal(result, expected) 539 540 s = Series(np.random.randn(10), index=range(0, 20, 2)) 541 542 s.iloc[1] = 1 543 result = s.iloc[1] 544 assert result == 1 545 546 s.iloc[:4] = 0 547 expected = s.iloc[:4] 548 result = s.iloc[:4] 549 tm.assert_series_equal(result, expected) 550 551 s = Series([-1] * 6) 552 s.iloc[0::2] = [0, 2, 4] 553 s.iloc[1::2] = [1, 3, 5] 554 result = s 555 expected = Series([0, 1, 2, 3, 4, 5]) 556 tm.assert_series_equal(result, expected) 557 558 def test_iloc_setitem_list_of_lists(self): 559 560 # GH 7551 561 # list-of-list is set incorrectly in mixed vs. single dtyped frames 562 df = DataFrame( 563 {"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")} 564 ) 565 df.iloc[2:4] = [[10, 11], [12, 13]] 566 expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]}) 567 tm.assert_frame_equal(df, expected) 568 569 df = DataFrame( 570 {"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")} 571 ) 572 df.iloc[2:4] = [["x", 11], ["y", 13]] 573 expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]}) 574 tm.assert_frame_equal(df, expected) 575 576 @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])]) 577 @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) 578 def test_iloc_setitem_with_scalar_index(self, indexer, value): 579 # GH #19474 580 # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated 581 # elementwisely, not using "setter('A', ['Z'])". 582 583 df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) 584 df.iloc[0, indexer] = value 585 result = df.iloc[0, 0] 586 587 assert is_scalar(result) and result == "Z" 588 589 def test_iloc_mask(self): 590 591 # GH 3631, iloc with a mask (of a series) should raise 592 df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"]) 593 mask = df.a % 2 == 0 594 msg = "iLocation based boolean indexing cannot use an indexable as a mask" 595 with pytest.raises(ValueError, match=msg): 596 df.iloc[mask] 597 mask.index = range(len(mask)) 598 msg = "iLocation based boolean indexing on an integer type is not available" 599 with pytest.raises(NotImplementedError, match=msg): 600 df.iloc[mask] 601 602 # ndarray ok 603 result = df.iloc[np.array([True] * len(mask), dtype=bool)] 604 tm.assert_frame_equal(result, df) 605 606 # the possibilities 607 locs = np.arange(4) 608 nums = 2 ** locs 609 reps = [bin(num) for num in nums] 610 df = DataFrame({"locs": locs, "nums": nums}, reps) 611 612 expected = { 613 (None, ""): "0b1100", 614 (None, ".loc"): "0b1100", 615 (None, ".iloc"): "0b1100", 616 ("index", ""): "0b11", 617 ("index", ".loc"): "0b11", 618 ("index", ".iloc"): ( 619 "iLocation based boolean indexing cannot use an indexable as a mask" 620 ), 621 ("locs", ""): "Unalignable boolean Series provided as indexer " 622 "(index of the boolean Series and of the indexed " 623 "object do not match).", 624 ("locs", ".loc"): "Unalignable boolean Series provided as indexer " 625 "(index of the boolean Series and of the " 626 "indexed object do not match).", 627 ("locs", ".iloc"): ( 628 "iLocation based boolean indexing on an " 629 "integer type is not available" 630 ), 631 } 632 633 # UserWarnings from reindex of a boolean mask 634 with catch_warnings(record=True): 635 simplefilter("ignore", UserWarning) 636 for idx in [None, "index", "locs"]: 637 mask = (df.nums > 2).values 638 if idx: 639 mask = Series(mask, list(reversed(getattr(df, idx)))) 640 for method in ["", ".loc", ".iloc"]: 641 try: 642 if method: 643 accessor = getattr(df, method[1:]) 644 else: 645 accessor = df 646 ans = str(bin(accessor[mask]["nums"].sum())) 647 except (ValueError, IndexingError, NotImplementedError) as e: 648 ans = str(e) 649 650 key = ( 651 idx, 652 method, 653 ) 654 r = expected.get(key) 655 if r != ans: 656 raise AssertionError( 657 f"[{key}] does not match [{ans}], received [{r}]" 658 ) 659 660 def test_iloc_non_unique_indexing(self): 661 662 # GH 4017, non-unique indexing (on the axis) 663 df = DataFrame({"A": [0.1] * 3000, "B": [1] * 3000}) 664 idx = np.arange(30) * 99 665 expected = df.iloc[idx] 666 667 df3 = concat([df, 2 * df, 3 * df]) 668 result = df3.iloc[idx] 669 670 tm.assert_frame_equal(result, expected) 671 672 df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000}) 673 df2 = concat([df2, 2 * df2, 3 * df2]) 674 675 with pytest.raises(KeyError, match="with any missing labels"): 676 df2.loc[idx] 677 678 def test_iloc_empty_list_indexer_is_ok(self): 679 680 df = tm.makeCustomDataframe(5, 2) 681 # vertical empty 682 tm.assert_frame_equal( 683 df.iloc[:, []], 684 df.iloc[:, :0], 685 check_index_type=True, 686 check_column_type=True, 687 ) 688 # horizontal empty 689 tm.assert_frame_equal( 690 df.iloc[[], :], 691 df.iloc[:0, :], 692 check_index_type=True, 693 check_column_type=True, 694 ) 695 # horizontal empty 696 tm.assert_frame_equal( 697 df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True 698 ) 699 700 def test_identity_slice_returns_new_object(self): 701 # GH13873 702 original_df = DataFrame({"a": [1, 2, 3]}) 703 sliced_df = original_df.iloc[:] 704 assert sliced_df is not original_df 705 706 # should be a shallow copy 707 original_df["a"] = [4, 4, 4] 708 assert (sliced_df["a"] == 4).all() 709 710 original_series = Series([1, 2, 3, 4, 5, 6]) 711 sliced_series = original_series.iloc[:] 712 assert sliced_series is not original_series 713 714 # should also be a shallow copy 715 original_series[:3] = [7, 8, 9] 716 assert all(sliced_series[:3] == [7, 8, 9]) 717 718 def test_indexing_zerodim_np_array(self): 719 # GH24919 720 df = DataFrame([[1, 2], [3, 4]]) 721 result = df.iloc[np.array(0)] 722 s = Series([1, 2], name=0) 723 tm.assert_series_equal(result, s) 724 725 def test_series_indexing_zerodim_np_array(self): 726 # GH24919 727 s = Series([1, 2]) 728 result = s.iloc[np.array(0)] 729 assert result == 1 730 731 @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457") 732 def test_iloc_setitem_categorical_updates_inplace(self): 733 # Mixed dtype ensures we go through take_split_path in setitem_with_indexer 734 cat = Categorical(["A", "B", "C"]) 735 df = DataFrame({1: cat, 2: [1, 2, 3]}) 736 737 # This should modify our original values in-place 738 df.iloc[:, 0] = cat[::-1] 739 740 expected = Categorical(["C", "B", "A"]) 741 tm.assert_categorical_equal(cat, expected) 742 743 def test_iloc_with_boolean_operation(self): 744 # GH 20627 745 result = DataFrame([[0, 1], [2, 3], [4, 5], [6, np.nan]]) 746 result.iloc[result.index <= 2] *= 2 747 expected = DataFrame([[0, 2], [4, 6], [8, 10], [6, np.nan]]) 748 tm.assert_frame_equal(result, expected) 749 750 result.iloc[result.index > 2] *= 2 751 expected = DataFrame([[0, 2], [4, 6], [8, 10], [12, np.nan]]) 752 tm.assert_frame_equal(result, expected) 753 754 result.iloc[[True, True, False, False]] *= 2 755 expected = DataFrame([[0, 4], [8, 12], [8, 10], [12, np.nan]]) 756 tm.assert_frame_equal(result, expected) 757 758 result.iloc[[False, False, True, True]] /= 2 759 expected = DataFrame([[0.0, 4.0], [8.0, 12.0], [4.0, 5.0], [6.0, np.nan]]) 760 tm.assert_frame_equal(result, expected) 761 762 def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self): 763 # GH#29521 764 df = DataFrame({"x": Categorical("a b c d e".split())}) 765 result = df.iloc[0] 766 raw_cat = Categorical(["a"], categories=["a", "b", "c", "d", "e"]) 767 expected = Series(raw_cat, index=["x"], name=0, dtype="category") 768 769 tm.assert_series_equal(result, expected) 770 771 def test_iloc_getitem_categorical_values(self): 772 # GH#14580 773 # test iloc() on Series with Categorical data 774 775 ser = Series([1, 2, 3]).astype("category") 776 777 # get slice 778 result = ser.iloc[0:2] 779 expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) 780 tm.assert_series_equal(result, expected) 781 782 # get list of indexes 783 result = ser.iloc[[0, 1]] 784 expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) 785 tm.assert_series_equal(result, expected) 786 787 # get boolean array 788 result = ser.iloc[[True, False, False]] 789 expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) 790 tm.assert_series_equal(result, expected) 791 792 @pytest.mark.parametrize("value", [None, NaT, np.nan]) 793 def test_iloc_setitem_td64_values_cast_na(self, value): 794 # GH#18586 795 series = Series([0, 1, 2], dtype="timedelta64[ns]") 796 series.iloc[0] = value 797 expected = Series([NaT, 1, 2], dtype="timedelta64[ns]") 798 tm.assert_series_equal(series, expected) 799 800 def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): 801 idx = Index([]) 802 obj = DataFrame(np.random.randn(len(idx), len(idx)), index=idx, columns=idx) 803 nd3 = np.random.randint(5, size=(2, 2, 2)) 804 805 msg = f"Cannot set values with ndim > {obj.ndim}" 806 with pytest.raises(ValueError, match=msg): 807 obj.iloc[nd3] = 0 808 809 @pytest.mark.parametrize("indexer", [lambda x: x.loc, lambda x: x.iloc]) 810 def test_iloc_getitem_read_only_values(self, indexer): 811 # GH#10043 this is fundamentally a test for iloc, but test loc while 812 # we're here 813 rw_array = np.eye(10) 814 rw_df = DataFrame(rw_array) 815 816 ro_array = np.eye(10) 817 ro_array.setflags(write=False) 818 ro_df = DataFrame(ro_array) 819 820 tm.assert_frame_equal(indexer(rw_df)[[1, 2, 3]], indexer(ro_df)[[1, 2, 3]]) 821 tm.assert_frame_equal(indexer(rw_df)[[1]], indexer(ro_df)[[1]]) 822 tm.assert_series_equal(indexer(rw_df)[1], indexer(ro_df)[1]) 823 tm.assert_frame_equal(indexer(rw_df)[1:3], indexer(ro_df)[1:3]) 824 825 def test_iloc_getitem_readonly_key(self): 826 # GH#17192 iloc with read-only array raising TypeError 827 df = DataFrame({"data": np.ones(100, dtype="float64")}) 828 indices = np.array([1, 3, 6]) 829 indices.flags.writeable = False 830 831 result = df.iloc[indices] 832 expected = df.loc[[1, 3, 6]] 833 tm.assert_frame_equal(result, expected) 834 835 result = df["data"].iloc[indices] 836 expected = df["data"].loc[[1, 3, 6]] 837 tm.assert_series_equal(result, expected) 838 839 def test_iloc_assign_series_to_df_cell(self): 840 # GH 37593 841 df = DataFrame(columns=["a"], index=[0]) 842 df.iloc[0, 0] = Series([1, 2, 3]) 843 expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0]) 844 tm.assert_frame_equal(df, expected) 845 846 @pytest.mark.parametrize("klass", [list, np.array]) 847 def test_iloc_setitem_bool_indexer(self, klass): 848 # GH#36741 849 df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) 850 indexer = klass([True, False, False]) 851 df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 852 expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) 853 tm.assert_frame_equal(df, expected) 854 855 @pytest.mark.parametrize("indexer", [[1], slice(1, 2)]) 856 def test_iloc_setitem_pure_position_based(self, indexer): 857 # GH#22046 858 df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) 859 df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) 860 df2.iloc[:, indexer] = df1.iloc[:, [0]] 861 expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) 862 tm.assert_frame_equal(df2, expected) 863 864 def test_iloc_setitem_dictionary_value(self): 865 # GH#37728 866 df = DataFrame({"x": [1, 2], "y": [2, 2]}) 867 rhs = {"x": 9, "y": 99} 868 df.iloc[1] = rhs 869 expected = DataFrame({"x": [1, 9], "y": [2, 99]}) 870 tm.assert_frame_equal(df, expected) 871 872 873class TestILocErrors: 874 # NB: this test should work for _any_ Series we can pass as 875 # series_with_simple_index 876 def test_iloc_float_raises(self, series_with_simple_index, frame_or_series): 877 # GH#4892 878 # float_indexers should raise exceptions 879 # on appropriate Index types & accessors 880 # this duplicates the code below 881 # but is specifically testing for the error 882 # message 883 884 obj = series_with_simple_index 885 if frame_or_series is DataFrame: 886 obj = obj.to_frame() 887 888 msg = "Cannot index by location index with a non-integer key" 889 with pytest.raises(TypeError, match=msg): 890 obj.iloc[3.0] 891 892 with pytest.raises(IndexError, match=_slice_iloc_msg): 893 obj.iloc[3.0] = 0 894 895 896class TestILocSetItemDuplicateColumns: 897 def test_iloc_setitem_scalar_duplicate_columns(self): 898 # GH#15686, duplicate columns and mixed dtype 899 df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) 900 df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) 901 df = concat([df1, df2], axis=1) 902 df.iloc[0, 0] = -1 903 904 assert df.iloc[0, 0] == -1 905 assert df.iloc[0, 2] == 3 906 assert df.dtypes.iloc[2] == np.int64 907 908 def test_iloc_setitem_list_duplicate_columns(self): 909 # GH#22036 setting with same-sized list 910 df = DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"]) 911 912 df.iloc[:, 2] = ["str3"] 913 914 expected = DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"]) 915 tm.assert_frame_equal(df, expected) 916 917 def test_iloc_setitem_series_duplicate_columns(self): 918 df = DataFrame( 919 np.arange(8, dtype=np.int64).reshape(2, 4), columns=["A", "B", "A", "B"] 920 ) 921 df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64) 922 assert df.dtypes.iloc[2] == np.int64 923 924 925class TestILocCallable: 926 def test_frame_iloc_getitem_callable(self): 927 # GH#11485 928 df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) 929 930 # return location 931 res = df.iloc[lambda x: [1, 3]] 932 tm.assert_frame_equal(res, df.iloc[[1, 3]]) 933 934 res = df.iloc[lambda x: [1, 3], :] 935 tm.assert_frame_equal(res, df.iloc[[1, 3], :]) 936 937 res = df.iloc[lambda x: [1, 3], lambda x: 0] 938 tm.assert_series_equal(res, df.iloc[[1, 3], 0]) 939 940 res = df.iloc[lambda x: [1, 3], lambda x: [0]] 941 tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) 942 943 # mixture 944 res = df.iloc[[1, 3], lambda x: 0] 945 tm.assert_series_equal(res, df.iloc[[1, 3], 0]) 946 947 res = df.iloc[[1, 3], lambda x: [0]] 948 tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) 949 950 res = df.iloc[lambda x: [1, 3], 0] 951 tm.assert_series_equal(res, df.iloc[[1, 3], 0]) 952 953 res = df.iloc[lambda x: [1, 3], [0]] 954 tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) 955 956 def test_frame_iloc_setitem_callable(self): 957 # GH#11485 958 df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) 959 960 # return location 961 res = df.copy() 962 res.iloc[lambda x: [1, 3]] = 0 963 exp = df.copy() 964 exp.iloc[[1, 3]] = 0 965 tm.assert_frame_equal(res, exp) 966 967 res = df.copy() 968 res.iloc[lambda x: [1, 3], :] = -1 969 exp = df.copy() 970 exp.iloc[[1, 3], :] = -1 971 tm.assert_frame_equal(res, exp) 972 973 res = df.copy() 974 res.iloc[lambda x: [1, 3], lambda x: 0] = 5 975 exp = df.copy() 976 exp.iloc[[1, 3], 0] = 5 977 tm.assert_frame_equal(res, exp) 978 979 res = df.copy() 980 res.iloc[lambda x: [1, 3], lambda x: [0]] = 25 981 exp = df.copy() 982 exp.iloc[[1, 3], [0]] = 25 983 tm.assert_frame_equal(res, exp) 984 985 # mixture 986 res = df.copy() 987 res.iloc[[1, 3], lambda x: 0] = -3 988 exp = df.copy() 989 exp.iloc[[1, 3], 0] = -3 990 tm.assert_frame_equal(res, exp) 991 992 res = df.copy() 993 res.iloc[[1, 3], lambda x: [0]] = -5 994 exp = df.copy() 995 exp.iloc[[1, 3], [0]] = -5 996 tm.assert_frame_equal(res, exp) 997 998 res = df.copy() 999 res.iloc[lambda x: [1, 3], 0] = 10 1000 exp = df.copy() 1001 exp.iloc[[1, 3], 0] = 10 1002 tm.assert_frame_equal(res, exp) 1003 1004 res = df.copy() 1005 res.iloc[lambda x: [1, 3], [0]] = [-5, -5] 1006 exp = df.copy() 1007 exp.iloc[[1, 3], [0]] = [-5, -5] 1008 tm.assert_frame_equal(res, exp) 1009 1010 1011class TestILocSeries: 1012 def test_iloc(self): 1013 ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) 1014 1015 for i in range(len(ser)): 1016 result = ser.iloc[i] 1017 exp = ser[ser.index[i]] 1018 tm.assert_almost_equal(result, exp) 1019 1020 # pass a slice 1021 result = ser.iloc[slice(1, 3)] 1022 expected = ser.loc[2:4] 1023 tm.assert_series_equal(result, expected) 1024 1025 # test slice is a view 1026 result[:] = 0 1027 assert (ser[1:3] == 0).all() 1028 1029 # list of integers 1030 result = ser.iloc[[0, 2, 3, 4, 5]] 1031 expected = ser.reindex(ser.index[[0, 2, 3, 4, 5]]) 1032 tm.assert_series_equal(result, expected) 1033 1034 def test_iloc_getitem_nonunique(self): 1035 ser = Series([0, 1, 2], index=[0, 1, 0]) 1036 assert ser.iloc[2] == 2 1037 1038 def test_iloc_setitem_pure_position_based(self): 1039 # GH#22046 1040 ser1 = Series([1, 2, 3]) 1041 ser2 = Series([4, 5, 6], index=[1, 0, 2]) 1042 ser1.iloc[1:3] = ser2.iloc[1:3] 1043 expected = Series([1, 5, 6]) 1044 tm.assert_series_equal(ser1, expected) 1045