1""" test label based indexing with loc """ 2from datetime import datetime, time, timedelta 3from io import StringIO 4import re 5 6from dateutil.tz import gettz 7import numpy as np 8import pytest 9 10import pandas.util._test_decorators as td 11 12import pandas as pd 13from pandas import ( 14 Categorical, 15 CategoricalIndex, 16 DataFrame, 17 Index, 18 MultiIndex, 19 Series, 20 SparseDtype, 21 Timedelta, 22 Timestamp, 23 date_range, 24 timedelta_range, 25 to_datetime, 26 to_timedelta, 27) 28import pandas._testing as tm 29from pandas.api.types import is_scalar 30from pandas.tests.indexing.common import Base 31 32 33class TestLoc(Base): 34 def test_loc_getitem_int(self): 35 36 # int label 37 self.check_result("loc", 2, typs=["labels"], fails=KeyError) 38 39 def test_loc_getitem_label(self): 40 41 # label 42 self.check_result("loc", "c", typs=["empty"], fails=KeyError) 43 44 def test_loc_getitem_label_out_of_range(self): 45 46 # out of range label 47 self.check_result( 48 "loc", "f", typs=["ints", "uints", "labels", "mixed", "ts"], fails=KeyError 49 ) 50 self.check_result("loc", "f", typs=["floats"], fails=KeyError) 51 self.check_result("loc", "f", typs=["floats"], fails=KeyError) 52 self.check_result("loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError) 53 self.check_result("loc", 20, typs=["labels"], fails=KeyError) 54 self.check_result("loc", 20, typs=["ts"], axes=0, fails=KeyError) 55 self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError) 56 57 def test_loc_getitem_label_list(self): 58 # TODO: test something here? 59 # list of labels 60 pass 61 62 def test_loc_getitem_label_list_with_missing(self): 63 self.check_result("loc", [0, 1, 2], typs=["empty"], fails=KeyError) 64 self.check_result( 65 "loc", [0, 2, 10], typs=["ints", "uints", "floats"], axes=0, fails=KeyError 66 ) 67 68 self.check_result( 69 "loc", [3, 6, 7], typs=["ints", "uints", "floats"], axes=1, fails=KeyError 70 ) 71 72 # GH 17758 - MultiIndex and missing keys 73 self.check_result( 74 "loc", [(1, 3), (1, 4), (2, 5)], typs=["multi"], axes=0, fails=KeyError 75 ) 76 77 def test_loc_getitem_label_list_fails(self): 78 # fails 79 self.check_result( 80 "loc", [20, 30, 40], typs=["ints", "uints"], axes=1, fails=KeyError 81 ) 82 83 def test_loc_getitem_label_array_like(self): 84 # TODO: test something? 85 # array like 86 pass 87 88 def test_loc_getitem_bool(self): 89 # boolean indexers 90 b = [True, False, True, False] 91 92 self.check_result("loc", b, typs=["empty"], fails=IndexError) 93 94 def test_loc_getitem_label_slice(self): 95 96 # label slices (with ints) 97 98 # real label slices 99 100 # GH 14316 101 102 self.check_result( 103 "loc", 104 slice(1, 3), 105 typs=["labels", "mixed", "empty", "ts", "floats"], 106 fails=TypeError, 107 ) 108 109 self.check_result( 110 "loc", slice("20130102", "20130104"), typs=["ts"], axes=1, fails=TypeError 111 ) 112 113 self.check_result("loc", slice(2, 8), typs=["mixed"], axes=0, fails=TypeError) 114 self.check_result("loc", slice(2, 8), typs=["mixed"], axes=1, fails=KeyError) 115 116 self.check_result( 117 "loc", slice(2, 4, 2), typs=["mixed"], axes=0, fails=TypeError 118 ) 119 120 def test_setitem_from_duplicate_axis(self): 121 # GH#34034 122 df = DataFrame( 123 [[20, "a"], [200, "a"], [200, "a"]], 124 columns=["col1", "col2"], 125 index=[10, 1, 1], 126 ) 127 df.loc[1, "col1"] = np.arange(2) 128 expected = DataFrame( 129 [[20, "a"], [0, "a"], [1, "a"]], columns=["col1", "col2"], index=[10, 1, 1] 130 ) 131 tm.assert_frame_equal(df, expected) 132 133 134class TestLoc2: 135 # TODO: better name, just separating out things that rely on base class 136 137 def test_loc_getitem_missing_unicode_key(self): 138 df = DataFrame({"a": [1]}) 139 with pytest.raises(KeyError, match="\u05d0"): 140 df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError 141 142 def test_loc_getitem_dups(self): 143 # GH 5678 144 # repeated getitems on a dup index returning a ndarray 145 df = DataFrame( 146 np.random.random_sample((20, 5)), index=["ABCDE"[x % 5] for x in range(20)] 147 ) 148 expected = df.loc["A", 0] 149 result = df.loc[:, 0].loc["A"] 150 tm.assert_series_equal(result, expected) 151 152 def test_loc_getitem_dups2(self): 153 154 # GH4726 155 # dup indexing with iloc/loc 156 df = DataFrame( 157 [[1, 2, "foo", "bar", Timestamp("20130101")]], 158 columns=["a", "a", "a", "a", "a"], 159 index=[1], 160 ) 161 expected = Series( 162 [1, 2, "foo", "bar", Timestamp("20130101")], 163 index=["a", "a", "a", "a", "a"], 164 name=1, 165 ) 166 167 result = df.iloc[0] 168 tm.assert_series_equal(result, expected) 169 170 result = df.loc[1] 171 tm.assert_series_equal(result, expected) 172 173 def test_loc_setitem_dups(self): 174 175 # GH 6541 176 df_orig = DataFrame( 177 { 178 "me": list("rttti"), 179 "foo": list("aaade"), 180 "bar": np.arange(5, dtype="float64") * 1.34 + 2, 181 "bar2": np.arange(5, dtype="float64") * -0.34 + 2, 182 } 183 ).set_index("me") 184 185 indexer = ( 186 "r", 187 ["bar", "bar2"], 188 ) 189 df = df_orig.copy() 190 df.loc[indexer] *= 2.0 191 tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) 192 193 indexer = ( 194 "r", 195 "bar", 196 ) 197 df = df_orig.copy() 198 df.loc[indexer] *= 2.0 199 assert df.loc[indexer] == 2.0 * df_orig.loc[indexer] 200 201 indexer = ( 202 "t", 203 ["bar", "bar2"], 204 ) 205 df = df_orig.copy() 206 df.loc[indexer] *= 2.0 207 tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) 208 209 def test_loc_setitem_slice(self): 210 # GH10503 211 212 # assigning the same type should not change the type 213 df1 = DataFrame({"a": [0, 1, 1], "b": Series([100, 200, 300], dtype="uint32")}) 214 ix = df1["a"] == 1 215 newb1 = df1.loc[ix, "b"] + 1 216 df1.loc[ix, "b"] = newb1 217 expected = DataFrame( 218 {"a": [0, 1, 1], "b": Series([100, 201, 301], dtype="uint32")} 219 ) 220 tm.assert_frame_equal(df1, expected) 221 222 # assigning a new type should get the inferred type 223 df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") 224 ix = df1["a"] == 1 225 newb2 = df2.loc[ix, "b"] 226 df1.loc[ix, "b"] = newb2 227 expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") 228 tm.assert_frame_equal(df2, expected) 229 230 def test_loc_setitem_dtype(self): 231 # GH31340 232 df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}) 233 cols = ["a", "b", "c"] 234 df.loc[:, cols] = df.loc[:, cols].astype("float32") 235 236 expected = DataFrame( 237 {"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}, dtype="float32" 238 ) # id is inferred as object 239 240 tm.assert_frame_equal(df, expected) 241 242 def test_getitem_label_list_with_missing(self): 243 s = Series(range(3), index=["a", "b", "c"]) 244 245 # consistency 246 with pytest.raises(KeyError, match="with any missing labels"): 247 s[["a", "d"]] 248 249 s = Series(range(3)) 250 with pytest.raises(KeyError, match="with any missing labels"): 251 s[[0, 3]] 252 253 @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) 254 def test_loc_getitem_bool_diff_len(self, index): 255 # GH26658 256 s = Series([1, 2, 3]) 257 msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}" 258 with pytest.raises(IndexError, match=msg): 259 _ = s.loc[index] 260 261 def test_loc_getitem_int_slice(self): 262 # TODO: test something here? 263 pass 264 265 def test_loc_to_fail(self): 266 267 # GH3449 268 df = DataFrame( 269 np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"] 270 ) 271 272 # raise a KeyError? 273 msg = ( 274 r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are " 275 r"in the \[index\]\"" 276 ) 277 with pytest.raises(KeyError, match=msg): 278 df.loc[[1, 2], [1, 2]] 279 280 # GH 7496 281 # loc should not fallback 282 283 s = Series(dtype=object) 284 s.loc[1] = 1 285 s.loc["a"] = 2 286 287 with pytest.raises(KeyError, match=r"^-1$"): 288 s.loc[-1] 289 290 msg = ( 291 r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are " 292 r"in the \[index\]\"" 293 ) 294 with pytest.raises(KeyError, match=msg): 295 s.loc[[-1, -2]] 296 297 msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\"" 298 with pytest.raises(KeyError, match=msg): 299 s.loc[["4"]] 300 301 s.loc[-1] = 3 302 with pytest.raises(KeyError, match="with any missing labels"): 303 s.loc[[-1, -2]] 304 305 s["a"] = 2 306 msg = ( 307 r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are " 308 r"in the \[index\]\"" 309 ) 310 with pytest.raises(KeyError, match=msg): 311 s.loc[[-2]] 312 313 del s["a"] 314 315 with pytest.raises(KeyError, match=msg): 316 s.loc[[-2]] = 0 317 318 # inconsistency between .loc[values] and .loc[values,:] 319 # GH 7999 320 df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"]) 321 322 msg = ( 323 r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are " 324 r"in the \[index\]\"" 325 ) 326 with pytest.raises(KeyError, match=msg): 327 df.loc[[3], :] 328 329 with pytest.raises(KeyError, match=msg): 330 df.loc[[3]] 331 332 def test_loc_getitem_list_with_fail(self): 333 # 15747 334 # should KeyError if *any* missing labels 335 336 s = Series([1, 2, 3]) 337 338 s.loc[[2]] 339 340 with pytest.raises( 341 KeyError, 342 match=re.escape( 343 "\"None of [Int64Index([3], dtype='int64')] are in the [index]\"" 344 ), 345 ): 346 s.loc[[3]] 347 348 # a non-match and a match 349 with pytest.raises(KeyError, match="with any missing labels"): 350 s.loc[[2, 3]] 351 352 def test_loc_index(self): 353 # gh-17131 354 # a boolean index should index like a boolean numpy array 355 356 df = DataFrame( 357 np.random.random(size=(5, 10)), 358 index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"], 359 ) 360 361 mask = df.index.map(lambda x: "alpha" in x) 362 expected = df.loc[np.array(mask)] 363 364 result = df.loc[mask] 365 tm.assert_frame_equal(result, expected) 366 367 result = df.loc[mask.values] 368 tm.assert_frame_equal(result, expected) 369 370 result = df.loc[pd.array(mask, dtype="boolean")] 371 tm.assert_frame_equal(result, expected) 372 373 def test_loc_general(self): 374 375 df = DataFrame( 376 np.random.rand(4, 4), 377 columns=["A", "B", "C", "D"], 378 index=["A", "B", "C", "D"], 379 ) 380 381 # want this to work 382 result = df.loc[:, "A":"B"].iloc[0:2, :] 383 assert (result.columns == ["A", "B"]).all() 384 assert (result.index == ["A", "B"]).all() 385 386 # mixed type 387 result = DataFrame({"a": [Timestamp("20130101")], "b": [1]}).iloc[0] 388 expected = Series([Timestamp("20130101"), 1], index=["a", "b"], name=0) 389 tm.assert_series_equal(result, expected) 390 assert result.dtype == object 391 392 def test_loc_setitem_consistency(self): 393 # GH 6149 394 # coerce similarly for setitem and loc when rows have a null-slice 395 expected = DataFrame( 396 { 397 "date": Series(0, index=range(5), dtype=np.int64), 398 "val": Series(range(5), dtype=np.int64), 399 } 400 ) 401 402 df = DataFrame( 403 { 404 "date": date_range("2000-01-01", "2000-01-5"), 405 "val": Series(range(5), dtype=np.int64), 406 } 407 ) 408 df.loc[:, "date"] = 0 409 tm.assert_frame_equal(df, expected) 410 411 df = DataFrame( 412 { 413 "date": date_range("2000-01-01", "2000-01-5"), 414 "val": Series(range(5), dtype=np.int64), 415 } 416 ) 417 df.loc[:, "date"] = np.array(0, dtype=np.int64) 418 tm.assert_frame_equal(df, expected) 419 420 df = DataFrame( 421 { 422 "date": date_range("2000-01-01", "2000-01-5"), 423 "val": Series(range(5), dtype=np.int64), 424 } 425 ) 426 df.loc[:, "date"] = np.array([0, 0, 0, 0, 0], dtype=np.int64) 427 tm.assert_frame_equal(df, expected) 428 429 expected = DataFrame( 430 { 431 "date": Series("foo", index=range(5)), 432 "val": Series(range(5), dtype=np.int64), 433 } 434 ) 435 df = DataFrame( 436 { 437 "date": date_range("2000-01-01", "2000-01-5"), 438 "val": Series(range(5), dtype=np.int64), 439 } 440 ) 441 df.loc[:, "date"] = "foo" 442 tm.assert_frame_equal(df, expected) 443 444 expected = DataFrame( 445 { 446 "date": Series(1.0, index=range(5)), 447 "val": Series(range(5), dtype=np.int64), 448 } 449 ) 450 df = DataFrame( 451 { 452 "date": date_range("2000-01-01", "2000-01-5"), 453 "val": Series(range(5), dtype=np.int64), 454 } 455 ) 456 df.loc[:, "date"] = 1.0 457 tm.assert_frame_equal(df, expected) 458 459 # GH 15494 460 # setting on frame with single row 461 df = DataFrame({"date": Series([Timestamp("20180101")])}) 462 df.loc[:, "date"] = "string" 463 expected = DataFrame({"date": Series(["string"])}) 464 tm.assert_frame_equal(df, expected) 465 466 def test_loc_setitem_consistency_empty(self): 467 # empty (essentially noops) 468 expected = DataFrame(columns=["x", "y"]) 469 expected["x"] = expected["x"].astype(np.int64) 470 df = DataFrame(columns=["x", "y"]) 471 df.loc[:, "x"] = 1 472 tm.assert_frame_equal(df, expected) 473 474 df = DataFrame(columns=["x", "y"]) 475 df["x"] = 1 476 tm.assert_frame_equal(df, expected) 477 478 def test_loc_setitem_consistency_slice_column_len(self): 479 # .loc[:,column] setting with slice == len of the column 480 # GH10408 481 data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat 482Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse 483Region,Site,RespondentID,,,,, 484Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, 485Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes 486Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, 487Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" 488 489 df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) 490 df.loc[:, ("Respondent", "StartDate")] = pd.to_datetime( 491 df.loc[:, ("Respondent", "StartDate")] 492 ) 493 df.loc[:, ("Respondent", "EndDate")] = pd.to_datetime( 494 df.loc[:, ("Respondent", "EndDate")] 495 ) 496 df.loc[:, ("Respondent", "Duration")] = ( 497 df.loc[:, ("Respondent", "EndDate")] 498 - df.loc[:, ("Respondent", "StartDate")] 499 ) 500 501 df.loc[:, ("Respondent", "Duration")] = df.loc[ 502 :, ("Respondent", "Duration") 503 ].astype("timedelta64[s]") 504 expected = Series( 505 [1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration") 506 ) 507 tm.assert_series_equal(df[("Respondent", "Duration")], expected) 508 509 @pytest.mark.parametrize("unit", ["Y", "M", "D", "h", "m", "s", "ms", "us"]) 510 def test_loc_assign_non_ns_datetime(self, unit): 511 # GH 27395, non-ns dtype assignment via .loc should work 512 # and return the same result when using simple assignment 513 df = DataFrame( 514 { 515 "timestamp": [ 516 np.datetime64("2017-02-11 12:41:29"), 517 np.datetime64("1991-11-07 04:22:37"), 518 ] 519 } 520 ) 521 522 df.loc[:, unit] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]") 523 df["expected"] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]") 524 expected = Series(df.loc[:, "expected"], name=unit) 525 tm.assert_series_equal(df.loc[:, unit], expected) 526 527 def test_loc_modify_datetime(self): 528 # see gh-28837 529 df = DataFrame.from_dict( 530 {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]} 531 ) 532 533 df["date_dt"] = pd.to_datetime(df["date"], unit="ms", cache=True) 534 535 df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"] 536 df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"] 537 538 expected = DataFrame( 539 [ 540 [1485264372711, "2017-01-24 13:26:12.711", "2017-01-24 13:26:12.711"], 541 [1485265925110, "2017-01-24 13:52:05.110", "2017-01-24 13:52:05.110"], 542 [1540215845888, "2018-10-22 13:44:05.888", "2018-10-22 13:44:05.888"], 543 [1540282121025, "2018-10-23 08:08:41.025", "2018-10-23 08:08:41.025"], 544 ], 545 columns=["date", "date_dt", "date_dt_cp"], 546 ) 547 548 columns = ["date_dt", "date_dt_cp"] 549 expected[columns] = expected[columns].apply(pd.to_datetime) 550 551 tm.assert_frame_equal(df, expected) 552 553 def test_loc_setitem_frame(self): 554 df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")) 555 556 result = df.iloc[0, 0] 557 558 df.loc["a", "A"] = 1 559 result = df.loc["a", "A"] 560 assert result == 1 561 562 result = df.iloc[0, 0] 563 assert result == 1 564 565 df.loc[:, "B":"D"] = 0 566 expected = df.loc[:, "B":"D"] 567 result = df.iloc[:, 1:] 568 tm.assert_frame_equal(result, expected) 569 570 # GH 6254 571 # setting issue 572 df = DataFrame(index=[3, 5, 4], columns=["A"]) 573 df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") 574 expected = DataFrame({"A": Series([1, 2, 3], index=[4, 3, 5])}).reindex( 575 index=[3, 5, 4] 576 ) 577 tm.assert_frame_equal(df, expected) 578 579 # GH 6252 580 # setting with an empty frame 581 keys1 = ["@" + str(i) for i in range(5)] 582 val1 = np.arange(5, dtype="int64") 583 584 keys2 = ["@" + str(i) for i in range(4)] 585 val2 = np.arange(4, dtype="int64") 586 587 index = list(set(keys1).union(keys2)) 588 df = DataFrame(index=index) 589 df["A"] = np.nan 590 df.loc[keys1, "A"] = val1 591 592 df["B"] = np.nan 593 df.loc[keys2, "B"] = val2 594 595 expected = DataFrame( 596 {"A": Series(val1, index=keys1), "B": Series(val2, index=keys2)} 597 ).reindex(index=index) 598 tm.assert_frame_equal(df, expected) 599 600 # GH 8669 601 # invalid coercion of nan -> int 602 df = DataFrame({"A": [1, 2, 3], "B": np.nan}) 603 df.loc[df.B > df.A, "B"] = df.A 604 expected = DataFrame({"A": [1, 2, 3], "B": np.nan}) 605 tm.assert_frame_equal(df, expected) 606 607 # GH 6546 608 # setting with mixed labels 609 df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]}) 610 611 result = df.loc[0, [1, 2]] 612 expected = Series([1, 3], index=[1, 2], dtype=object, name=0) 613 tm.assert_series_equal(result, expected) 614 615 expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]}) 616 df.loc[0, [1, 2]] = [5, 6] 617 tm.assert_frame_equal(df, expected) 618 619 def test_loc_setitem_frame_multiples(self): 620 # multiple setting 621 df = DataFrame( 622 {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)} 623 ) 624 rhs = df.loc[1:2] 625 rhs.index = df.index[0:2] 626 df.loc[0:1] = rhs 627 expected = DataFrame( 628 {"A": ["bar", "baz", "baz"], "B": Series([1, 2, 2], dtype=np.int64)} 629 ) 630 tm.assert_frame_equal(df, expected) 631 632 # multiple setting with frame on rhs (with M8) 633 df = DataFrame( 634 { 635 "date": date_range("2000-01-01", "2000-01-5"), 636 "val": Series(range(5), dtype=np.int64), 637 } 638 ) 639 expected = DataFrame( 640 { 641 "date": [ 642 Timestamp("20000101"), 643 Timestamp("20000102"), 644 Timestamp("20000101"), 645 Timestamp("20000102"), 646 Timestamp("20000103"), 647 ], 648 "val": Series([0, 1, 0, 1, 2], dtype=np.int64), 649 } 650 ) 651 rhs = df.loc[0:2] 652 rhs.index = df.index[2:5] 653 df.loc[2:4] = rhs 654 tm.assert_frame_equal(df, expected) 655 656 @pytest.mark.parametrize( 657 "indexer", [["A"], slice(None, "A", None), np.array(["A"])] 658 ) 659 @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) 660 def test_loc_setitem_with_scalar_index(self, indexer, value): 661 # GH #19474 662 # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated 663 # elementwisely, not using "setter('A', ['Z'])". 664 665 df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) 666 df.loc[0, indexer] = value 667 result = df.loc[0, "A"] 668 669 assert is_scalar(result) and result == "Z" 670 671 @pytest.mark.parametrize( 672 "index,box,expected", 673 [ 674 ( 675 ([0, 2], ["A", "B", "C", "D"]), 676 7, 677 DataFrame( 678 [[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]], 679 columns=["A", "B", "C", "D"], 680 ), 681 ), 682 ( 683 (1, ["C", "D"]), 684 [7, 8], 685 DataFrame( 686 [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]], 687 columns=["A", "B", "C", "D"], 688 ), 689 ), 690 ( 691 (1, ["A", "B", "C"]), 692 np.array([7, 8, 9], dtype=np.int64), 693 DataFrame( 694 [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], columns=["A", "B", "C"] 695 ), 696 ), 697 ( 698 (slice(1, 3, None), ["B", "C", "D"]), 699 [[7, 8, 9], [10, 11, 12]], 700 DataFrame( 701 [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]], 702 columns=["A", "B", "C", "D"], 703 ), 704 ), 705 ( 706 (slice(1, 3, None), ["C", "A", "D"]), 707 np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64), 708 DataFrame( 709 [[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]], 710 columns=["A", "B", "C", "D"], 711 ), 712 ), 713 ( 714 (slice(None, None, None), ["A", "C"]), 715 DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), 716 DataFrame( 717 [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] 718 ), 719 ), 720 ], 721 ) 722 def test_loc_setitem_missing_columns(self, index, box, expected): 723 # GH 29334 724 df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) 725 df.loc[index] = box 726 tm.assert_frame_equal(df, expected) 727 728 def test_loc_coercion(self): 729 730 # 12411 731 df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC"), pd.NaT]}) 732 expected = df.dtypes 733 734 result = df.iloc[[0]] 735 tm.assert_series_equal(result.dtypes, expected) 736 737 result = df.iloc[[1]] 738 tm.assert_series_equal(result.dtypes, expected) 739 740 # 12045 741 import datetime 742 743 df = DataFrame( 744 {"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]} 745 ) 746 expected = df.dtypes 747 748 result = df.iloc[[0]] 749 tm.assert_series_equal(result.dtypes, expected) 750 751 result = df.iloc[[1]] 752 tm.assert_series_equal(result.dtypes, expected) 753 754 # 11594 755 df = DataFrame({"text": ["some words"] + [None] * 9}) 756 expected = df.dtypes 757 758 result = df.iloc[0:2] 759 tm.assert_series_equal(result.dtypes, expected) 760 761 result = df.iloc[3:] 762 tm.assert_series_equal(result.dtypes, expected) 763 764 def test_setitem_new_key_tz(self): 765 # GH#12862 should not raise on assigning the second value 766 vals = [ 767 pd.to_datetime(42).tz_localize("UTC"), 768 pd.to_datetime(666).tz_localize("UTC"), 769 ] 770 expected = Series(vals, index=["foo", "bar"]) 771 772 ser = Series(dtype=object) 773 ser["foo"] = vals[0] 774 ser["bar"] = vals[1] 775 776 tm.assert_series_equal(ser, expected) 777 778 ser = Series(dtype=object) 779 ser.loc["foo"] = vals[0] 780 ser.loc["bar"] = vals[1] 781 782 tm.assert_series_equal(ser, expected) 783 784 def test_loc_non_unique(self): 785 # GH3659 786 # non-unique indexer with loc slice 787 # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs 788 789 # these are going to raise because the we are non monotonic 790 df = DataFrame( 791 {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] 792 ) 793 msg = "'Cannot get left slice bound for non-unique label: 1'" 794 with pytest.raises(KeyError, match=msg): 795 df.loc[1:] 796 msg = "'Cannot get left slice bound for non-unique label: 0'" 797 with pytest.raises(KeyError, match=msg): 798 df.loc[0:] 799 msg = "'Cannot get left slice bound for non-unique label: 1'" 800 with pytest.raises(KeyError, match=msg): 801 df.loc[1:2] 802 803 # monotonic are ok 804 df = DataFrame( 805 {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] 806 ).sort_index(axis=0) 807 result = df.loc[1:] 808 expected = DataFrame({"A": [2, 4, 5, 6], "B": [4, 6, 7, 8]}, index=[1, 1, 2, 3]) 809 tm.assert_frame_equal(result, expected) 810 811 result = df.loc[0:] 812 tm.assert_frame_equal(result, df) 813 814 result = df.loc[1:2] 815 expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2]) 816 tm.assert_frame_equal(result, expected) 817 818 @pytest.mark.arm_slow 819 def test_loc_non_unique_memory_error(self): 820 821 # GH 4280 822 # non_unique index with a large selection triggers a memory error 823 824 columns = list("ABCDEFG") 825 826 def gen_test(length, l2): 827 return pd.concat( 828 [ 829 DataFrame( 830 np.random.randn(length, len(columns)), 831 index=np.arange(length), 832 columns=columns, 833 ), 834 DataFrame( 835 np.ones((l2, len(columns))), index=[0] * l2, columns=columns 836 ), 837 ] 838 ) 839 840 def gen_expected(df, mask): 841 len_mask = len(mask) 842 return pd.concat( 843 [ 844 df.take([0]), 845 DataFrame( 846 np.ones((len_mask, len(columns))), 847 index=[0] * len_mask, 848 columns=columns, 849 ), 850 df.take(mask[1:]), 851 ] 852 ) 853 854 df = gen_test(900, 100) 855 assert df.index.is_unique is False 856 857 mask = np.arange(100) 858 result = df.loc[mask] 859 expected = gen_expected(df, mask) 860 tm.assert_frame_equal(result, expected) 861 862 df = gen_test(900000, 100000) 863 assert df.index.is_unique is False 864 865 mask = np.arange(100000) 866 result = df.loc[mask] 867 expected = gen_expected(df, mask) 868 tm.assert_frame_equal(result, expected) 869 870 def test_loc_name(self): 871 # GH 3880 872 df = DataFrame([[1, 1], [1, 1]]) 873 df.index.name = "index_name" 874 result = df.iloc[[0, 1]].index.name 875 assert result == "index_name" 876 877 result = df.loc[[0, 1]].index.name 878 assert result == "index_name" 879 880 def test_loc_empty_list_indexer_is_ok(self): 881 882 df = tm.makeCustomDataframe(5, 2) 883 # vertical empty 884 tm.assert_frame_equal( 885 df.loc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True 886 ) 887 # horizontal empty 888 tm.assert_frame_equal( 889 df.loc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True 890 ) 891 # horizontal empty 892 tm.assert_frame_equal( 893 df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True 894 ) 895 896 def test_identity_slice_returns_new_object(self): 897 # GH13873 898 original_df = DataFrame({"a": [1, 2, 3]}) 899 sliced_df = original_df.loc[:] 900 assert sliced_df is not original_df 901 assert original_df[:] is not original_df 902 903 # should be a shallow copy 904 original_df["a"] = [4, 4, 4] 905 assert (sliced_df["a"] == 4).all() 906 907 # These should not return copies 908 assert original_df is original_df.loc[:, :] 909 df = DataFrame(np.random.randn(10, 4)) 910 assert df[0] is df.loc[:, 0] 911 912 # Same tests for Series 913 original_series = Series([1, 2, 3, 4, 5, 6]) 914 sliced_series = original_series.loc[:] 915 assert sliced_series is not original_series 916 assert original_series[:] is not original_series 917 918 original_series[:3] = [7, 8, 9] 919 assert all(sliced_series[:3] == [7, 8, 9]) 920 921 @pytest.mark.xfail(reason="accidental fix reverted - GH37497") 922 def test_loc_copy_vs_view(self): 923 # GH 15631 924 x = DataFrame(zip(range(3), range(3)), columns=["a", "b"]) 925 926 y = x.copy() 927 q = y.loc[:, "a"] 928 q += 2 929 930 tm.assert_frame_equal(x, y) 931 932 z = x.copy() 933 q = z.loc[x.index, "a"] 934 q += 2 935 936 tm.assert_frame_equal(x, z) 937 938 def test_loc_uint64(self): 939 # GH20722 940 # Test whether loc accept uint64 max value as index. 941 s = Series([1, 2], index=[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]) 942 943 result = s.loc[np.iinfo("uint64").max - 1] 944 expected = s.iloc[0] 945 assert result == expected 946 947 result = s.loc[[np.iinfo("uint64").max - 1]] 948 expected = s.iloc[[0]] 949 tm.assert_series_equal(result, expected) 950 951 result = s.loc[[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]] 952 tm.assert_series_equal(result, s) 953 954 def test_loc_setitem_empty_append_expands_rows(self): 955 # GH6173, various appends to an empty dataframe 956 957 data = [1, 2, 3] 958 expected = DataFrame({"x": data, "y": [None] * len(data)}) 959 960 # appends to fit length of data 961 df = DataFrame(columns=["x", "y"]) 962 df.loc[:, "x"] = data 963 tm.assert_frame_equal(df, expected) 964 965 def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): 966 # GH#37932 same as test_loc_setitem_empty_append_expands_rows 967 # but with mixed dtype so we go through take_split_path 968 data = [1, 2, 3] 969 expected = DataFrame({"x": data, "y": [None] * len(data)}) 970 971 df = DataFrame(columns=["x", "y"]) 972 df["x"] = df["x"].astype(np.int64) 973 df.loc[:, "x"] = data 974 tm.assert_frame_equal(df, expected) 975 976 def test_loc_setitem_empty_append_single_value(self): 977 # only appends one value 978 expected = DataFrame({"x": [1.0], "y": [np.nan]}) 979 df = DataFrame(columns=["x", "y"], dtype=float) 980 df.loc[0, "x"] = expected.loc[0, "x"] 981 tm.assert_frame_equal(df, expected) 982 983 def test_loc_setitem_empty_append_raises(self): 984 # GH6173, various appends to an empty dataframe 985 986 data = [1, 2] 987 df = DataFrame(columns=["x", "y"]) 988 df.index = df.index.astype(np.int64) 989 msg = ( 990 r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " 991 r"are in the \[index\]" 992 ) 993 with pytest.raises(KeyError, match=msg): 994 df.loc[[0, 1], "x"] = data 995 996 msg = "|".join( 997 [ 998 "cannot copy sequence with size 2 to array axis with dimension 0", 999 r"could not broadcast input array from shape \(2,\) into shape \(0,\)", 1000 ] 1001 ) 1002 with pytest.raises(ValueError, match=msg): 1003 df.loc[0:2, "x"] = data 1004 1005 def test_indexing_zerodim_np_array(self): 1006 # GH24924 1007 df = DataFrame([[1, 2], [3, 4]]) 1008 result = df.loc[np.array(0)] 1009 s = Series([1, 2], name=0) 1010 tm.assert_series_equal(result, s) 1011 1012 def test_series_indexing_zerodim_np_array(self): 1013 # GH24924 1014 s = Series([1, 2]) 1015 result = s.loc[np.array(0)] 1016 assert result == 1 1017 1018 def test_loc_reverse_assignment(self): 1019 # GH26939 1020 data = [1, 2, 3, 4, 5, 6] + [None] * 4 1021 expected = Series(data, index=range(2010, 2020)) 1022 1023 result = Series(index=range(2010, 2020), dtype=np.float64) 1024 result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1] 1025 1026 tm.assert_series_equal(result, expected) 1027 1028 def test_loc_setitem_str_to_small_float_conversion_type(self): 1029 # GH#20388 1030 np.random.seed(13) 1031 col_data = [str(np.random.random() * 1e-12) for _ in range(5)] 1032 result = DataFrame(col_data, columns=["A"]) 1033 expected = DataFrame(col_data, columns=["A"], dtype=object) 1034 tm.assert_frame_equal(result, expected) 1035 1036 # change the dtype of the elements from object to float one by one 1037 result.loc[result.index, "A"] = [float(x) for x in col_data] 1038 expected = DataFrame(col_data, columns=["A"], dtype=float) 1039 tm.assert_frame_equal(result, expected) 1040 1041 def test_loc_getitem_time_object(self, frame_or_series): 1042 rng = date_range("1/1/2000", "1/5/2000", freq="5min") 1043 mask = (rng.hour == 9) & (rng.minute == 30) 1044 1045 obj = DataFrame(np.random.randn(len(rng), 3), index=rng) 1046 if frame_or_series is Series: 1047 obj = obj[0] 1048 1049 result = obj.loc[time(9, 30)] 1050 exp = obj.loc[mask] 1051 tm.assert_equal(result, exp) 1052 1053 chunk = obj.loc["1/4/2000":] 1054 result = chunk.loc[time(9, 30)] 1055 expected = result[-1:] 1056 1057 # Without resetting the freqs, these are 5 min and 1440 min, respectively 1058 result.index = result.index._with_freq(None) 1059 expected.index = expected.index._with_freq(None) 1060 tm.assert_equal(result, expected) 1061 1062 @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) 1063 @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) 1064 @td.skip_if_no_scipy 1065 def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype): 1066 import scipy.sparse 1067 1068 spmatrix_t = getattr(scipy.sparse, spmatrix_t) 1069 1070 # The bug is triggered by a sparse matrix with purely sparse columns. So the 1071 # recipe below generates a rectangular matrix of dimension (5, 7) where all the 1072 # diagonal cells are ones, meaning the last two columns are purely sparse. 1073 rows, cols = 5, 7 1074 spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype) 1075 df = DataFrame.sparse.from_spmatrix(spmatrix) 1076 1077 # regression test for GH#34526 1078 itr_idx = range(2, rows) 1079 result = df.loc[itr_idx].values 1080 expected = spmatrix.toarray()[itr_idx] 1081 tm.assert_numpy_array_equal(result, expected) 1082 1083 # regression test for GH#34540 1084 result = df.loc[itr_idx].dtypes.values 1085 expected = np.full(cols, SparseDtype(dtype, fill_value=0)) 1086 tm.assert_numpy_array_equal(result, expected) 1087 1088 def test_loc_getitem_listlike_all_retains_sparse(self): 1089 df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))}) 1090 result = df.loc[[0, 1]] 1091 tm.assert_frame_equal(result, df) 1092 1093 @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) 1094 def test_loc_getitem_iterable(self, float_frame, key_type): 1095 idx = key_type(["A", "B", "C"]) 1096 result = float_frame.loc[:, idx] 1097 expected = float_frame.loc[:, ["A", "B", "C"]] 1098 tm.assert_frame_equal(result, expected) 1099 1100 def test_loc_getitem_timedelta_0seconds(self): 1101 # GH#10583 1102 df = DataFrame(np.random.normal(size=(10, 4))) 1103 df.index = timedelta_range(start="0s", periods=10, freq="s") 1104 expected = df.loc[Timedelta("0s") :, :] 1105 result = df.loc["0s":, :] 1106 tm.assert_frame_equal(expected, result) 1107 1108 @pytest.mark.parametrize( 1109 "val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))] 1110 ) 1111 def test_loc_getitem_uint64_scalar(self, val, expected): 1112 # see GH#19399 1113 df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63]) 1114 result = df.loc[val] 1115 1116 expected.name = val 1117 tm.assert_series_equal(result, expected) 1118 1119 def test_loc_setitem_int_label_with_float64index(self): 1120 # note labels are floats 1121 ser = Series(["a", "b", "c"], index=[0, 0.5, 1]) 1122 tmp = ser.copy() 1123 1124 ser.loc[1] = "zoo" 1125 tmp.iloc[2] = "zoo" 1126 1127 tm.assert_series_equal(ser, tmp) 1128 1129 @pytest.mark.parametrize( 1130 "indexer, expected", 1131 [ 1132 # The test name is a misnomer in the 0 case as df.index[indexer] 1133 # is a scalar. 1134 (0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 1135 (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]), 1136 ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]), 1137 ], 1138 ) 1139 def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): 1140 # GH#16637 1141 tdi = to_timedelta(range(10), unit="s") 1142 df = DataFrame({"x": range(10)}, dtype="int64", index=tdi) 1143 1144 df.loc[df.index[indexer], "x"] = 20 1145 1146 expected = DataFrame( 1147 expected, 1148 index=tdi, 1149 columns=["x"], 1150 dtype="int64", 1151 ) 1152 1153 tm.assert_frame_equal(expected, df) 1154 1155 1156class TestLocWithMultiIndex: 1157 @pytest.mark.parametrize( 1158 "keys, expected", 1159 [ 1160 (["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]), 1161 (["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]), 1162 ((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]), 1163 ((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]), 1164 ((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]), 1165 ((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]), 1166 ((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]), 1167 ], 1168 ) 1169 @pytest.mark.parametrize("dim", ["index", "columns"]) 1170 def test_loc_getitem_multilevel_index_order(self, dim, keys, expected): 1171 # GH#22797 1172 # Try to respect order of keys given for MultiIndex.loc 1173 kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]} 1174 df = DataFrame(np.arange(25).reshape(5, 5), **kwargs) 1175 exp_index = MultiIndex.from_arrays(expected) 1176 if dim == "index": 1177 res = df.loc[keys, :] 1178 tm.assert_index_equal(res.index, exp_index) 1179 elif dim == "columns": 1180 res = df.loc[:, keys] 1181 tm.assert_index_equal(res.columns, exp_index) 1182 1183 def test_loc_preserve_names(self, multiindex_year_month_day_dataframe_random_data): 1184 ymd = multiindex_year_month_day_dataframe_random_data 1185 1186 result = ymd.loc[2000] 1187 result2 = ymd["A"].loc[2000] 1188 assert result.index.names == ymd.index.names[1:] 1189 assert result2.index.names == ymd.index.names[1:] 1190 1191 result = ymd.loc[2000, 2] 1192 result2 = ymd["A"].loc[2000, 2] 1193 assert result.index.name == ymd.index.names[2] 1194 assert result2.index.name == ymd.index.names[2] 1195 1196 def test_loc_getitem_multiindex_nonunique_len_zero(self): 1197 # GH#13691 1198 mi = MultiIndex.from_product([[0], [1, 1]]) 1199 ser = Series(0, index=mi) 1200 1201 res = ser.loc[[]] 1202 1203 expected = ser[:0] 1204 tm.assert_series_equal(res, expected) 1205 1206 res2 = ser.loc[ser.iloc[0:0]] 1207 tm.assert_series_equal(res2, expected) 1208 1209 def test_loc_getitem_access_none_value_in_multiindex(self): 1210 # GH#34318: test that you can access a None value using .loc 1211 # through a Multiindex 1212 1213 ser = Series([None], pd.MultiIndex.from_arrays([["Level1"], ["Level2"]])) 1214 result = ser.loc[("Level1", "Level2")] 1215 assert result is None 1216 1217 midx = MultiIndex.from_product([["Level1"], ["Level2_a", "Level2_b"]]) 1218 ser = Series([None] * len(midx), dtype=object, index=midx) 1219 result = ser.loc[("Level1", "Level2_a")] 1220 assert result is None 1221 1222 ser = Series([1] * len(midx), dtype=object, index=midx) 1223 result = ser.loc[("Level1", "Level2_a")] 1224 assert result == 1 1225 1226 def test_loc_setitem_multiindex_slice(self): 1227 # GH 34870 1228 1229 index = pd.MultiIndex.from_tuples( 1230 zip( 1231 ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], 1232 ["one", "two", "one", "two", "one", "two", "one", "two"], 1233 ), 1234 names=["first", "second"], 1235 ) 1236 1237 result = Series([1, 1, 1, 1, 1, 1, 1, 1], index=index) 1238 result.loc[("baz", "one"):("foo", "two")] = 100 1239 1240 expected = Series([1, 1, 100, 100, 100, 100, 1, 1], index=index) 1241 1242 tm.assert_series_equal(result, expected) 1243 1244 def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self): 1245 times = date_range("2000-01-01", freq="10min", periods=100000) 1246 ser = Series(range(100000), times) 1247 result = ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] 1248 tm.assert_series_equal(result, ser) 1249 1250 def test_loc_getitem_sorted_index_level_with_duplicates(self): 1251 # GH#4516 sorting a MultiIndex with duplicates and multiple dtypes 1252 mi = MultiIndex.from_tuples( 1253 [ 1254 ("foo", "bar"), 1255 ("foo", "bar"), 1256 ("bah", "bam"), 1257 ("bah", "bam"), 1258 ("foo", "bar"), 1259 ("bah", "bam"), 1260 ], 1261 names=["A", "B"], 1262 ) 1263 df = DataFrame( 1264 [ 1265 [1.0, 1], 1266 [2.0, 2], 1267 [3.0, 3], 1268 [4.0, 4], 1269 [5.0, 5], 1270 [6.0, 6], 1271 ], 1272 index=mi, 1273 columns=["C", "D"], 1274 ) 1275 df = df.sort_index(level=0) 1276 1277 expected = DataFrame( 1278 [[1.0, 1], [2.0, 2], [5.0, 5]], columns=["C", "D"], index=mi.take([0, 1, 4]) 1279 ) 1280 1281 result = df.loc[("foo", "bar")] 1282 tm.assert_frame_equal(result, expected) 1283 1284 1285class TestLocSetitemWithExpansion: 1286 @pytest.mark.slow 1287 def test_loc_setitem_with_expansion_large_dataframe(self): 1288 # GH#10692 1289 result = DataFrame({"x": range(10 ** 6)}, dtype="int64") 1290 result.loc[len(result)] = len(result) + 1 1291 expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") 1292 tm.assert_frame_equal(result, expected) 1293 1294 def test_loc_setitem_empty_series(self): 1295 # GH#5226 1296 1297 # partially set with an empty object series 1298 ser = Series(dtype=object) 1299 ser.loc[1] = 1 1300 tm.assert_series_equal(ser, Series([1], index=[1])) 1301 ser.loc[3] = 3 1302 tm.assert_series_equal(ser, Series([1, 3], index=[1, 3])) 1303 1304 ser = Series(dtype=object) 1305 ser.loc[1] = 1.0 1306 tm.assert_series_equal(ser, Series([1.0], index=[1])) 1307 ser.loc[3] = 3.0 1308 tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3])) 1309 1310 ser = Series(dtype=object) 1311 ser.loc["foo"] = 1 1312 tm.assert_series_equal(ser, Series([1], index=["foo"])) 1313 ser.loc["bar"] = 3 1314 tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"])) 1315 ser.loc[3] = 4 1316 tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3])) 1317 1318 def test_loc_setitem_incremental_with_dst(self): 1319 # GH#20724 1320 base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific")) 1321 idxs = [base + timedelta(seconds=i * 900) for i in range(16)] 1322 result = Series([0], index=[idxs[0]]) 1323 for ts in idxs: 1324 result.loc[ts] = 1 1325 expected = Series(1, index=idxs) 1326 tm.assert_series_equal(result, expected) 1327 1328 def test_loc_setitem_datetime_keys_cast(self): 1329 # GH#9516 1330 dt1 = Timestamp("20130101 09:00:00") 1331 dt2 = Timestamp("20130101 10:00:00") 1332 1333 for conv in [ 1334 lambda x: x, 1335 lambda x: x.to_datetime64(), 1336 lambda x: x.to_pydatetime(), 1337 lambda x: np.datetime64(x), 1338 ]: 1339 1340 df = DataFrame() 1341 df.loc[conv(dt1), "one"] = 100 1342 df.loc[conv(dt2), "one"] = 200 1343 1344 expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) 1345 tm.assert_frame_equal(df, expected) 1346 1347 def test_loc_setitem_categorical_column_retains_dtype(self, ordered): 1348 # GH16360 1349 result = DataFrame({"A": [1]}) 1350 result.loc[:, "B"] = Categorical(["b"], ordered=ordered) 1351 expected = DataFrame({"A": [1], "B": Categorical(["b"], ordered=ordered)}) 1352 tm.assert_frame_equal(result, expected) 1353 1354 1355class TestLocCallable: 1356 def test_frame_loc_getitem_callable(self): 1357 # GH#11485 1358 df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) 1359 # iloc cannot use boolean Series (see GH3635) 1360 1361 # return bool indexer 1362 res = df.loc[lambda x: x.A > 2] 1363 tm.assert_frame_equal(res, df.loc[df.A > 2]) 1364 1365 res = df.loc[lambda x: x.A > 2] 1366 tm.assert_frame_equal(res, df.loc[df.A > 2]) 1367 1368 res = df.loc[lambda x: x.A > 2] 1369 tm.assert_frame_equal(res, df.loc[df.A > 2]) 1370 1371 res = df.loc[lambda x: x.A > 2] 1372 tm.assert_frame_equal(res, df.loc[df.A > 2]) 1373 1374 res = df.loc[lambda x: x.B == "b", :] 1375 tm.assert_frame_equal(res, df.loc[df.B == "b", :]) 1376 1377 res = df.loc[lambda x: x.B == "b", :] 1378 tm.assert_frame_equal(res, df.loc[df.B == "b", :]) 1379 1380 res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] 1381 tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) 1382 1383 res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] 1384 tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) 1385 1386 res = df.loc[lambda x: x.A > 2, lambda x: "B"] 1387 tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) 1388 1389 res = df.loc[lambda x: x.A > 2, lambda x: "B"] 1390 tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) 1391 1392 res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] 1393 tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) 1394 1395 res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] 1396 tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) 1397 1398 res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] 1399 tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) 1400 1401 res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] 1402 tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) 1403 1404 # scalar 1405 res = df.loc[lambda x: 1, lambda x: "A"] 1406 assert res == df.loc[1, "A"] 1407 1408 res = df.loc[lambda x: 1, lambda x: "A"] 1409 assert res == df.loc[1, "A"] 1410 1411 def test_frame_loc_getitem_callable_mixture(self): 1412 # GH#11485 1413 df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) 1414 1415 res = df.loc[lambda x: x.A > 2, ["A", "B"]] 1416 tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) 1417 1418 res = df.loc[lambda x: x.A > 2, ["A", "B"]] 1419 tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) 1420 1421 res = df.loc[[2, 3], lambda x: ["A", "B"]] 1422 tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) 1423 1424 res = df.loc[[2, 3], lambda x: ["A", "B"]] 1425 tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) 1426 1427 res = df.loc[3, lambda x: ["A", "B"]] 1428 tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) 1429 1430 res = df.loc[3, lambda x: ["A", "B"]] 1431 tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) 1432 1433 def test_frame_loc_getitem_callable_labels(self): 1434 # GH#11485 1435 df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) 1436 1437 # return label 1438 res = df.loc[lambda x: ["A", "C"]] 1439 tm.assert_frame_equal(res, df.loc[["A", "C"]]) 1440 1441 res = df.loc[lambda x: ["A", "C"]] 1442 tm.assert_frame_equal(res, df.loc[["A", "C"]]) 1443 1444 res = df.loc[lambda x: ["A", "C"], :] 1445 tm.assert_frame_equal(res, df.loc[["A", "C"], :]) 1446 1447 res = df.loc[lambda x: ["A", "C"], lambda x: "X"] 1448 tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) 1449 1450 res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] 1451 tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) 1452 1453 # mixture 1454 res = df.loc[["A", "C"], lambda x: "X"] 1455 tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) 1456 1457 res = df.loc[["A", "C"], lambda x: ["X"]] 1458 tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) 1459 1460 res = df.loc[lambda x: ["A", "C"], "X"] 1461 tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) 1462 1463 res = df.loc[lambda x: ["A", "C"], ["X"]] 1464 tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) 1465 1466 def test_frame_loc_setitem_callable(self): 1467 # GH#11485 1468 df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) 1469 1470 # return label 1471 res = df.copy() 1472 res.loc[lambda x: ["A", "C"]] = -20 1473 exp = df.copy() 1474 exp.loc[["A", "C"]] = -20 1475 tm.assert_frame_equal(res, exp) 1476 1477 res = df.copy() 1478 res.loc[lambda x: ["A", "C"], :] = 20 1479 exp = df.copy() 1480 exp.loc[["A", "C"], :] = 20 1481 tm.assert_frame_equal(res, exp) 1482 1483 res = df.copy() 1484 res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 1485 exp = df.copy() 1486 exp.loc[["A", "C"], "X"] = -1 1487 tm.assert_frame_equal(res, exp) 1488 1489 res = df.copy() 1490 res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] 1491 exp = df.copy() 1492 exp.loc[["A", "C"], ["X"]] = [5, 10] 1493 tm.assert_frame_equal(res, exp) 1494 1495 # mixture 1496 res = df.copy() 1497 res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) 1498 exp = df.copy() 1499 exp.loc[["A", "C"], "X"] = np.array([-1, -2]) 1500 tm.assert_frame_equal(res, exp) 1501 1502 res = df.copy() 1503 res.loc[["A", "C"], lambda x: ["X"]] = 10 1504 exp = df.copy() 1505 exp.loc[["A", "C"], ["X"]] = 10 1506 tm.assert_frame_equal(res, exp) 1507 1508 res = df.copy() 1509 res.loc[lambda x: ["A", "C"], "X"] = -2 1510 exp = df.copy() 1511 exp.loc[["A", "C"], "X"] = -2 1512 tm.assert_frame_equal(res, exp) 1513 1514 res = df.copy() 1515 res.loc[lambda x: ["A", "C"], ["X"]] = -4 1516 exp = df.copy() 1517 exp.loc[["A", "C"], ["X"]] = -4 1518 tm.assert_frame_equal(res, exp) 1519 1520 1521class TestPartialStringSlicing: 1522 def test_loc_getitem_partial_string_slicing_datetimeindex(self): 1523 # GH#35509 1524 df = DataFrame( 1525 {"col1": ["a", "b", "c"], "col2": [1, 2, 3]}, 1526 index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]), 1527 ) 1528 expected = DataFrame( 1529 {"col1": ["a", "c"], "col2": [1, 3]}, 1530 index=to_datetime(["2020-08-01", "2020-08-05"]), 1531 ) 1532 result = df.loc["2020-08"] 1533 tm.assert_frame_equal(result, expected) 1534 1535 def test_loc_getitem_partial_string_slicing_with_periodindex(self): 1536 pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") 1537 ser = pi.to_series() 1538 result = ser.loc[:"2017-12"] 1539 expected = ser.iloc[:-1] 1540 1541 tm.assert_series_equal(result, expected) 1542 1543 def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self): 1544 ix = timedelta_range(start="1 day", end="2 days", freq="1H") 1545 ser = ix.to_series() 1546 result = ser.loc[:"1 days"] 1547 expected = ser.iloc[:-1] 1548 1549 tm.assert_series_equal(result, expected) 1550 1551 def test_loc_getitem_str_timedeltaindex(self): 1552 # GH#16896 1553 df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days")) 1554 expected = df.iloc[0] 1555 sliced = df.loc["0 days"] 1556 tm.assert_series_equal(sliced, expected) 1557 1558 1559class TestLabelSlicing: 1560 def test_loc_getitem_label_slice_across_dst(self): 1561 # GH#21846 1562 idx = date_range( 1563 "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" 1564 ) 1565 series2 = Series([0, 1, 2, 3, 4], index=idx) 1566 1567 t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min") 1568 t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min") 1569 result = series2.loc[t_1:t_2] 1570 expected = Series([2, 3], index=idx[2:4]) 1571 tm.assert_series_equal(result, expected) 1572 1573 result = series2[t_1] 1574 expected = 2 1575 assert result == expected 1576 1577 def test_loc_getitem_label_slice_period(self): 1578 ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") 1579 ser = ix.to_series() 1580 result = ser.loc[: ix[-2]] 1581 expected = ser.iloc[:-1] 1582 1583 tm.assert_series_equal(result, expected) 1584 1585 def test_loc_getitem_label_slice_timedelta64(self): 1586 ix = timedelta_range(start="1 day", end="2 days", freq="1H") 1587 ser = ix.to_series() 1588 result = ser.loc[: ix[-2]] 1589 expected = ser.iloc[:-1] 1590 1591 tm.assert_series_equal(result, expected) 1592 1593 def test_loc_getitem_slice_floats_inexact(self): 1594 index = [52195.504153, 52196.303147, 52198.369883] 1595 df = DataFrame(np.random.rand(3, 2), index=index) 1596 1597 s1 = df.loc[52195.1:52196.5] 1598 assert len(s1) == 2 1599 1600 s1 = df.loc[52195.1:52196.6] 1601 assert len(s1) == 2 1602 1603 s1 = df.loc[52195.1:52198.9] 1604 assert len(s1) == 3 1605 1606 def test_loc_getitem_float_slice_float64index(self): 1607 ser = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) 1608 1609 assert len(ser.loc[12.0:]) == 8 1610 assert len(ser.loc[12.5:]) == 7 1611 1612 idx = np.arange(10, 20, dtype=float) 1613 idx[2] = 12.2 1614 ser.index = idx 1615 assert len(ser.loc[12.0:]) == 8 1616 assert len(ser.loc[12.5:]) == 7 1617 1618 @pytest.mark.parametrize( 1619 "start,stop, expected_slice", 1620 [ 1621 [np.timedelta64(0, "ns"), None, slice(0, 11)], 1622 [np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)], 1623 [None, np.timedelta64(4, "D"), slice(0, 5)], 1624 ], 1625 ) 1626 def test_loc_getitem_slice_label_td64obj(self, start, stop, expected_slice): 1627 # GH#20393 1628 ser = Series(range(11), timedelta_range("0 days", "10 days")) 1629 result = ser.loc[slice(start, stop)] 1630 expected = ser.iloc[expected_slice] 1631 tm.assert_series_equal(result, expected) 1632 1633 @pytest.mark.parametrize("start", ["2018", "2020"]) 1634 def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series, start): 1635 obj = frame_or_series( 1636 [1, 2, 3], 1637 index=[Timestamp("2016"), Timestamp("2019"), Timestamp("2017")], 1638 ) 1639 with tm.assert_produces_warning(FutureWarning): 1640 obj.loc[start:"2022"] 1641 1642 @pytest.mark.parametrize("value", [1, 1.5]) 1643 def test_loc_getitem_slice_labels_int_in_object_index(self, frame_or_series, value): 1644 # GH: 26491 1645 obj = frame_or_series(range(4), index=[value, "first", 2, "third"]) 1646 result = obj.loc[value:"third"] 1647 expected = frame_or_series(range(4), index=[value, "first", 2, "third"]) 1648 tm.assert_equal(result, expected) 1649 1650 1651class TestLocBooleanMask: 1652 def test_loc_setitem_bool_mask_timedeltaindex(self): 1653 # GH#14946 1654 df = DataFrame({"x": range(10)}) 1655 df.index = to_timedelta(range(10), unit="s") 1656 conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3] 1657 expected_data = [ 1658 [0, 1, 2, 3, 10, 10, 10, 10, 10, 10], 1659 [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], 1660 [10, 10, 10, 3, 4, 5, 6, 7, 8, 9], 1661 ] 1662 for cond, data in zip(conditions, expected_data): 1663 result = df.copy() 1664 result.loc[cond, "x"] = 10 1665 1666 expected = DataFrame( 1667 data, 1668 index=to_timedelta(range(10), unit="s"), 1669 columns=["x"], 1670 dtype="int64", 1671 ) 1672 tm.assert_frame_equal(expected, result) 1673 1674 def test_loc_setitem_mask_with_datetimeindex_tz(self): 1675 # GH#16889 1676 # support .loc with alignment and tz-aware DatetimeIndex 1677 mask = np.array([True, False, True, False]) 1678 1679 idx = date_range("20010101", periods=4, tz="UTC") 1680 df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") 1681 1682 result = df.copy() 1683 result.loc[mask, :] = df.loc[mask, :] 1684 tm.assert_frame_equal(result, df) 1685 1686 result = df.copy() 1687 result.loc[mask] = df.loc[mask] 1688 tm.assert_frame_equal(result, df) 1689 1690 idx = date_range("20010101", periods=4) 1691 df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") 1692 1693 result = df.copy() 1694 result.loc[mask, :] = df.loc[mask, :] 1695 tm.assert_frame_equal(result, df) 1696 1697 result = df.copy() 1698 result.loc[mask] = df.loc[mask] 1699 tm.assert_frame_equal(result, df) 1700 1701 def test_loc_setitem_mask_and_label_with_datetimeindex(self): 1702 # GH#9478 1703 # a datetimeindex alignment issue with partial setting 1704 df = DataFrame( 1705 np.arange(6.0).reshape(3, 2), 1706 columns=list("AB"), 1707 index=date_range("1/1/2000", periods=3, freq="1H"), 1708 ) 1709 expected = df.copy() 1710 expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] 1711 1712 mask = df.A < 1 1713 df.loc[mask, "C"] = df.loc[mask].index 1714 tm.assert_frame_equal(df, expected) 1715 1716 def test_loc_setitem_mask_td64_series_value(self): 1717 # GH#23462 key list of bools, value is a Series 1718 td1 = Timedelta(0) 1719 td2 = Timedelta(28767471428571405) 1720 df = DataFrame({"col": Series([td1, td2])}) 1721 df_copy = df.copy() 1722 ser = Series([td1]) 1723 1724 expected = df["col"].iloc[1].value 1725 df.loc[[True, False]] = ser 1726 result = df["col"].iloc[1].value 1727 1728 assert expected == result 1729 tm.assert_frame_equal(df, df_copy) 1730 1731 1732class TestLocListlike: 1733 @pytest.mark.parametrize("box", [lambda x: x, np.asarray, list]) 1734 def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box): 1735 # passing a list can include valid categories _or_ NA values 1736 ci = CategoricalIndex(["A", "B", np.nan]) 1737 ser = Series(range(3), index=ci) 1738 1739 result = ser.loc[box(ci)] 1740 tm.assert_series_equal(result, ser) 1741 1742 result = ser[box(ci)] 1743 tm.assert_series_equal(result, ser) 1744 1745 result = ser.to_frame().loc[box(ci)] 1746 tm.assert_frame_equal(result, ser.to_frame()) 1747 1748 ser2 = ser[:-1] 1749 ci2 = ci[1:] 1750 # but if there are no NAs present, this should raise KeyError 1751 msg = ( 1752 r"Passing list-likes to .loc or \[\] with any missing labels is no " 1753 "longer supported. The following labels were missing: " 1754 r"(Categorical)?Index\(\[nan\], .*\). " 1755 "See https" 1756 ) 1757 with pytest.raises(KeyError, match=msg): 1758 ser2.loc[box(ci2)] 1759 1760 with pytest.raises(KeyError, match=msg): 1761 ser2[box(ci2)] 1762 1763 with pytest.raises(KeyError, match=msg): 1764 ser2.to_frame().loc[box(ci2)] 1765 1766 1767def test_series_loc_getitem_label_list_missing_values(): 1768 # gh-11428 1769 key = np.array( 1770 ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" 1771 ) 1772 s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) 1773 with pytest.raises(KeyError, match="with any missing labels"): 1774 s.loc[key] 1775 1776 1777def test_series_getitem_label_list_missing_integer_values(): 1778 # GH: 25927 1779 s = Series( 1780 index=np.array([9730701000001104, 10049011000001109]), 1781 data=np.array([999000011000001104, 999000011000001104]), 1782 ) 1783 with pytest.raises(KeyError, match="with any missing labels"): 1784 s.loc[np.array([9730701000001104, 10047311000001102])] 1785 1786 1787@pytest.mark.parametrize( 1788 "columns, column_key, expected_columns", 1789 [ 1790 ([2011, 2012, 2013], [2011, 2012], [0, 1]), 1791 ([2011, 2012, "All"], [2011, 2012], [0, 1]), 1792 ([2011, 2012, "All"], [2011, "All"], [0, 2]), 1793 ], 1794) 1795def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns): 1796 # gh-14836 1797 df = DataFrame(np.random.rand(3, 3), columns=columns, index=list("ABC")) 1798 expected = df.iloc[:, expected_columns] 1799 result = df.loc[["A", "B", "C"], column_key] 1800 1801 if df.columns.is_object() and all(isinstance(x, int) for x in column_key): 1802 expected.columns = expected.columns.astype(int) 1803 1804 tm.assert_frame_equal(result, expected, check_column_type=True) 1805 1806 1807def test_loc_setitem_float_intindex(): 1808 # GH 8720 1809 rand_data = np.random.randn(8, 4) 1810 result = DataFrame(rand_data) 1811 result.loc[:, 0.5] = np.nan 1812 expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) 1813 expected = DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5]) 1814 tm.assert_frame_equal(result, expected) 1815 1816 result = DataFrame(rand_data) 1817 result.loc[:, 0.5] = np.nan 1818 tm.assert_frame_equal(result, expected) 1819 1820 1821def test_loc_axis_1_slice(): 1822 # GH 10586 1823 cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]] 1824 df = DataFrame( 1825 np.ones((10, 8)), 1826 index=tuple("ABCDEFGHIJ"), 1827 columns=pd.MultiIndex.from_tuples(cols), 1828 ) 1829 result = df.loc(axis=1)[(2014, 9):(2015, 8)] 1830 expected = DataFrame( 1831 np.ones((10, 4)), 1832 index=tuple("ABCDEFGHIJ"), 1833 columns=pd.MultiIndex.from_tuples( 1834 [(2014, 9), (2014, 10), (2015, 7), (2015, 8)] 1835 ), 1836 ) 1837 tm.assert_frame_equal(result, expected) 1838 1839 1840def test_loc_set_dataframe_multiindex(): 1841 # GH 14592 1842 expected = DataFrame( 1843 "a", index=range(2), columns=pd.MultiIndex.from_product([range(2), range(2)]) 1844 ) 1845 result = expected.copy() 1846 result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]] 1847 tm.assert_frame_equal(result, expected) 1848 1849 1850def test_loc_mixed_int_float(): 1851 # GH#19456 1852 ser = Series(range(2), pd.Index([1, 2.0], dtype=object)) 1853 1854 result = ser.loc[1] 1855 assert result == 0 1856 1857 1858def test_loc_with_positional_slice_deprecation(): 1859 # GH#31840 1860 ser = Series(range(4), index=["A", "B", "C", "D"]) 1861 1862 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): 1863 ser.loc[:3] = 2 1864 1865 expected = Series([2, 2, 2, 3], index=["A", "B", "C", "D"]) 1866 tm.assert_series_equal(ser, expected) 1867 1868 1869def test_loc_slice_disallows_positional(): 1870 # GH#16121, GH#24612, GH#31810 1871 dti = pd.date_range("2016-01-01", periods=3) 1872 df = DataFrame(np.random.random((3, 2)), index=dti) 1873 1874 ser = df[0] 1875 1876 msg = ( 1877 "cannot do slice indexing on DatetimeIndex with these " 1878 r"indexers \[1\] of type int" 1879 ) 1880 1881 for obj in [df, ser]: 1882 with pytest.raises(TypeError, match=msg): 1883 obj.loc[1:3] 1884 1885 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): 1886 # GH#31840 deprecated incorrect behavior 1887 obj.loc[1:3] = 1 1888 1889 with pytest.raises(TypeError, match=msg): 1890 df.loc[1:3, 1] 1891 1892 with tm.assert_produces_warning(FutureWarning): 1893 # GH#31840 deprecated incorrect behavior 1894 df.loc[1:3, 1] = 2 1895 1896 1897def test_loc_datetimelike_mismatched_dtypes(): 1898 # GH#32650 dont mix and match datetime/timedelta/period dtypes 1899 1900 df = DataFrame( 1901 np.random.randn(5, 3), 1902 columns=["a", "b", "c"], 1903 index=pd.date_range("2012", freq="H", periods=5), 1904 ) 1905 # create dataframe with non-unique DatetimeIndex 1906 df = df.iloc[[0, 2, 2, 3]].copy() 1907 1908 dti = df.index 1909 tdi = pd.TimedeltaIndex(dti.asi8) # matching i8 values 1910 1911 msg = r"None of \[TimedeltaIndex.* are in the \[index\]" 1912 with pytest.raises(KeyError, match=msg): 1913 df.loc[tdi] 1914 1915 with pytest.raises(KeyError, match=msg): 1916 df["a"].loc[tdi] 1917 1918 1919def test_loc_with_period_index_indexer(): 1920 # GH#4125 1921 idx = pd.period_range("2002-01", "2003-12", freq="M") 1922 df = DataFrame(np.random.randn(24, 10), index=idx) 1923 tm.assert_frame_equal(df, df.loc[idx]) 1924 tm.assert_frame_equal(df, df.loc[list(idx)]) 1925 tm.assert_frame_equal(df, df.loc[list(idx)]) 1926 tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) 1927 tm.assert_frame_equal(df, df.loc[list(idx)]) 1928 1929 1930class TestLocSeries: 1931 @pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)]) 1932 def test_loc_uint64(self, val, expected): 1933 # see GH#19399 1934 ser = Series({2 ** 63 - 1: 3, 2 ** 63: 4}) 1935 assert ser.loc[val] == expected 1936 1937 def test_loc_getitem(self, string_series, datetime_series): 1938 inds = string_series.index[[3, 4, 7]] 1939 tm.assert_series_equal(string_series.loc[inds], string_series.reindex(inds)) 1940 tm.assert_series_equal(string_series.iloc[5::2], string_series[5::2]) 1941 1942 # slice with indices 1943 d1, d2 = datetime_series.index[[5, 15]] 1944 result = datetime_series.loc[d1:d2] 1945 expected = datetime_series.truncate(d1, d2) 1946 tm.assert_series_equal(result, expected) 1947 1948 # boolean 1949 mask = string_series > string_series.median() 1950 tm.assert_series_equal(string_series.loc[mask], string_series[mask]) 1951 1952 # ask for index value 1953 assert datetime_series.loc[d1] == datetime_series[d1] 1954 assert datetime_series.loc[d2] == datetime_series[d2] 1955 1956 def test_loc_getitem_not_monotonic(self, datetime_series): 1957 d1, d2 = datetime_series.index[[5, 15]] 1958 1959 ts2 = datetime_series[::2][[1, 2, 0]] 1960 1961 msg = r"Timestamp\('2000-01-10 00:00:00'\)" 1962 with pytest.raises(KeyError, match=msg): 1963 ts2.loc[d1:d2] 1964 with pytest.raises(KeyError, match=msg): 1965 ts2.loc[d1:d2] = 0 1966 1967 def test_loc_getitem_setitem_integer_slice_keyerrors(self): 1968 ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) 1969 1970 # this is OK 1971 cp = ser.copy() 1972 cp.iloc[4:10] = 0 1973 assert (cp.iloc[4:10] == 0).all() 1974 1975 # so is this 1976 cp = ser.copy() 1977 cp.iloc[3:11] = 0 1978 assert (cp.iloc[3:11] == 0).values.all() 1979 1980 result = ser.iloc[2:6] 1981 result2 = ser.loc[3:11] 1982 expected = ser.reindex([4, 6, 8, 10]) 1983 1984 tm.assert_series_equal(result, expected) 1985 tm.assert_series_equal(result2, expected) 1986 1987 # non-monotonic, raise KeyError 1988 s2 = ser.iloc[list(range(5)) + list(range(9, 4, -1))] 1989 with pytest.raises(KeyError, match=r"^3$"): 1990 s2.loc[3:11] 1991 with pytest.raises(KeyError, match=r"^3$"): 1992 s2.loc[3:11] = 0 1993 1994 def test_loc_getitem_iterator(self, string_series): 1995 idx = iter(string_series.index[:10]) 1996 result = string_series.loc[idx] 1997 tm.assert_series_equal(result, string_series[:10]) 1998 1999 def test_loc_setitem_boolean(self, string_series): 2000 mask = string_series > string_series.median() 2001 2002 result = string_series.copy() 2003 result.loc[mask] = 0 2004 expected = string_series 2005 expected[mask] = 0 2006 tm.assert_series_equal(result, expected) 2007 2008 def test_loc_setitem_corner(self, string_series): 2009 inds = list(string_series.index[[5, 8, 12]]) 2010 string_series.loc[inds] = 5 2011 msg = r"\['foo'\] not in index" 2012 with pytest.raises(KeyError, match=msg): 2013 string_series.loc[inds + ["foo"]] = 5 2014 2015 def test_basic_setitem_with_labels(self, datetime_series): 2016 indices = datetime_series.index[[5, 10, 15]] 2017 2018 cp = datetime_series.copy() 2019 exp = datetime_series.copy() 2020 cp[indices] = 0 2021 exp.loc[indices] = 0 2022 tm.assert_series_equal(cp, exp) 2023 2024 cp = datetime_series.copy() 2025 exp = datetime_series.copy() 2026 cp[indices[0] : indices[2]] = 0 2027 exp.loc[indices[0] : indices[2]] = 0 2028 tm.assert_series_equal(cp, exp) 2029 2030 def test_loc_setitem_listlike_of_ints(self): 2031 2032 # integer indexes, be careful 2033 ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) 2034 inds = [0, 4, 6] 2035 arr_inds = np.array([0, 4, 6]) 2036 2037 cp = ser.copy() 2038 exp = ser.copy() 2039 ser[inds] = 0 2040 ser.loc[inds] = 0 2041 tm.assert_series_equal(cp, exp) 2042 2043 cp = ser.copy() 2044 exp = ser.copy() 2045 ser[arr_inds] = 0 2046 ser.loc[arr_inds] = 0 2047 tm.assert_series_equal(cp, exp) 2048 2049 inds_notfound = [0, 4, 5, 6] 2050 arr_inds_notfound = np.array([0, 4, 5, 6]) 2051 msg = r"\[5\] not in index" 2052 with pytest.raises(KeyError, match=msg): 2053 ser[inds_notfound] = 0 2054 with pytest.raises(Exception, match=msg): 2055 ser[arr_inds_notfound] = 0 2056 2057 def test_loc_setitem_dt64tz_values(self): 2058 # GH#12089 2059 ser = Series( 2060 date_range("2011-01-01", periods=3, tz="US/Eastern"), 2061 index=["a", "b", "c"], 2062 ) 2063 s2 = ser.copy() 2064 expected = Timestamp("2011-01-03", tz="US/Eastern") 2065 s2.loc["a"] = expected 2066 result = s2.loc["a"] 2067 assert result == expected 2068 2069 s2 = ser.copy() 2070 s2.iloc[0] = expected 2071 result = s2.iloc[0] 2072 assert result == expected 2073 2074 s2 = ser.copy() 2075 s2["a"] = expected 2076 result = s2["a"] 2077 assert result == expected 2078 2079 @pytest.mark.parametrize("array_fn", [np.array, pd.array, list, tuple]) 2080 @pytest.mark.parametrize("size", [0, 4, 5, 6]) 2081 def test_loc_iloc_setitem_with_listlike(self, size, array_fn): 2082 # GH37748 2083 # testing insertion, in a Series of size N (here 5), of a listlike object 2084 # of size 0, N-1, N, N+1 2085 2086 arr = array_fn([0] * size) 2087 expected = Series([arr, 0, 0, 0, 0], index=list("abcde"), dtype=object) 2088 2089 ser = Series(0, index=list("abcde"), dtype=object) 2090 ser.loc["a"] = arr 2091 tm.assert_series_equal(ser, expected) 2092 2093 ser = Series(0, index=list("abcde"), dtype=object) 2094 ser.iloc[0] = arr 2095 tm.assert_series_equal(ser, expected) 2096