1from collections import defaultdict 2from datetime import datetime, timedelta 3from io import StringIO 4import math 5import operator 6import re 7 8import numpy as np 9import pytest 10 11from pandas._libs.tslib import Timestamp 12from pandas.compat import IS64 13from pandas.compat.numpy import np_datetime64_compat 14from pandas.util._test_decorators import async_mark 15 16import pandas as pd 17from pandas import ( 18 CategoricalIndex, 19 DataFrame, 20 DatetimeIndex, 21 Float64Index, 22 Int64Index, 23 IntervalIndex, 24 PeriodIndex, 25 RangeIndex, 26 Series, 27 TimedeltaIndex, 28 UInt64Index, 29 date_range, 30 isna, 31 period_range, 32) 33import pandas._testing as tm 34from pandas.core.indexes.api import ( 35 Index, 36 MultiIndex, 37 _get_combined_index, 38 ensure_index, 39 ensure_index_from_sequences, 40) 41from pandas.tests.indexes.common import Base 42 43 44class TestIndex(Base): 45 _holder = Index 46 47 def create_index(self) -> Index: 48 return Index(list("abcde")) 49 50 def test_can_hold_identifiers(self): 51 index = self.create_index() 52 key = index[0] 53 assert index._can_hold_identifiers_and_holds_name(key) is True 54 55 @pytest.mark.parametrize("index", ["datetime"], indirect=True) 56 def test_new_axis(self, index): 57 with tm.assert_produces_warning(FutureWarning): 58 # GH#30588 multi-dimensional indexing deprecated 59 new_index = index[None, :] 60 assert new_index.ndim == 2 61 assert isinstance(new_index, np.ndarray) 62 63 def test_constructor_regular(self, index): 64 tm.assert_contains_all(index, index) 65 66 @pytest.mark.parametrize("index", ["string"], indirect=True) 67 def test_constructor_casting(self, index): 68 # casting 69 arr = np.array(index) 70 new_index = Index(arr) 71 tm.assert_contains_all(arr, new_index) 72 tm.assert_index_equal(index, new_index) 73 74 @pytest.mark.parametrize("index", ["string"], indirect=True) 75 def test_constructor_copy(self, index): 76 # copy 77 # index = self.create_index() 78 arr = np.array(index) 79 new_index = Index(arr, copy=True, name="name") 80 assert isinstance(new_index, Index) 81 assert new_index.name == "name" 82 tm.assert_numpy_array_equal(arr, new_index.values) 83 arr[0] = "SOMEBIGLONGSTRING" 84 assert new_index[0] != "SOMEBIGLONGSTRING" 85 86 # FIXME: dont leave commented-out 87 # what to do here? 88 # arr = np.array(5.) 89 # pytest.raises(Exception, arr.view, Index) 90 91 @pytest.mark.parametrize("cast_as_obj", [True, False]) 92 @pytest.mark.parametrize( 93 "index", 94 [ 95 date_range( 96 "2015-01-01 10:00", 97 freq="D", 98 periods=3, 99 tz="US/Eastern", 100 name="Green Eggs & Ham", 101 ), # DTI with tz 102 date_range("2015-01-01 10:00", freq="D", periods=3), # DTI no tz 103 pd.timedelta_range("1 days", freq="D", periods=3), # td 104 period_range("2015-01-01", freq="D", periods=3), # period 105 ], 106 ) 107 def test_constructor_from_index_dtlike(self, cast_as_obj, index): 108 if cast_as_obj: 109 result = Index(index.astype(object)) 110 else: 111 result = Index(index) 112 113 tm.assert_index_equal(result, index) 114 115 if isinstance(index, DatetimeIndex): 116 assert result.tz == index.tz 117 if cast_as_obj: 118 # GH#23524 check that Index(dti, dtype=object) does not 119 # incorrectly raise ValueError, and that nanoseconds are not 120 # dropped 121 index += pd.Timedelta(nanoseconds=50) 122 result = Index(index, dtype=object) 123 assert result.dtype == np.object_ 124 assert list(result) == list(index) 125 126 @pytest.mark.parametrize( 127 "index,has_tz", 128 [ 129 ( 130 date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"), 131 True, 132 ), # datetimetz 133 (pd.timedelta_range("1 days", freq="D", periods=3), False), # td 134 (period_range("2015-01-01", freq="D", periods=3), False), # period 135 ], 136 ) 137 def test_constructor_from_series_dtlike(self, index, has_tz): 138 result = Index(Series(index)) 139 tm.assert_index_equal(result, index) 140 141 if has_tz: 142 assert result.tz == index.tz 143 144 def test_constructor_from_series_freq(self): 145 # GH 6273 146 # create from a series, passing a freq 147 dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] 148 expected = DatetimeIndex(dts, freq="MS") 149 150 s = Series(pd.to_datetime(dts)) 151 result = DatetimeIndex(s, freq="MS") 152 153 tm.assert_index_equal(result, expected) 154 155 def test_constructor_from_frame_series_freq(self): 156 # GH 6273 157 # create from a series, passing a freq 158 dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] 159 expected = DatetimeIndex(dts, freq="MS") 160 161 df = pd.DataFrame(np.random.rand(5, 3)) 162 df["date"] = dts 163 result = DatetimeIndex(df["date"], freq="MS") 164 165 assert df["date"].dtype == object 166 expected.name = "date" 167 tm.assert_index_equal(result, expected) 168 169 expected = Series(dts, name="date") 170 tm.assert_series_equal(df["date"], expected) 171 172 # GH 6274 173 # infer freq of same 174 freq = pd.infer_freq(df["date"]) 175 assert freq == "MS" 176 177 @pytest.mark.parametrize( 178 "array", 179 [ 180 np.arange(5), 181 np.array(["a", "b", "c"]), 182 date_range("2000-01-01", periods=3).values, 183 ], 184 ) 185 def test_constructor_ndarray_like(self, array): 186 # GH 5460#issuecomment-44474502 187 # it should be possible to convert any object that satisfies the numpy 188 # ndarray interface directly into an Index 189 class ArrayLike: 190 def __init__(self, array): 191 self.array = array 192 193 def __array__(self, dtype=None) -> np.ndarray: 194 return self.array 195 196 expected = Index(array) 197 result = Index(ArrayLike(array)) 198 tm.assert_index_equal(result, expected) 199 200 def test_constructor_int_dtype_nan(self): 201 # see gh-15187 202 data = [np.nan] 203 expected = Float64Index(data) 204 result = Index(data, dtype="float") 205 tm.assert_index_equal(result, expected) 206 207 @pytest.mark.parametrize("dtype", ["int64", "uint64"]) 208 def test_constructor_int_dtype_nan_raises(self, dtype): 209 # see gh-15187 210 data = [np.nan] 211 msg = "cannot convert" 212 with pytest.raises(ValueError, match=msg): 213 Index(data, dtype=dtype) 214 215 def test_constructor_no_pandas_array(self): 216 ser = Series([1, 2, 3]) 217 result = Index(ser.array) 218 expected = Index([1, 2, 3]) 219 tm.assert_index_equal(result, expected) 220 221 @pytest.mark.parametrize( 222 "klass,dtype,na_val", 223 [ 224 (Float64Index, np.float64, np.nan), 225 (DatetimeIndex, "datetime64[ns]", pd.NaT), 226 ], 227 ) 228 def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val): 229 # GH 13467 230 na_list = [na_val, na_val] 231 expected = klass(na_list) 232 assert expected.dtype == dtype 233 234 result = Index(na_list) 235 tm.assert_index_equal(result, expected) 236 237 result = Index(np.array(na_list)) 238 tm.assert_index_equal(result, expected) 239 240 @pytest.mark.parametrize( 241 "vals,dtype", 242 [ 243 ([1, 2, 3, 4, 5], "int"), 244 ([1.1, np.nan, 2.2, 3.0], "float"), 245 (["A", "B", "C", np.nan], "obj"), 246 ], 247 ) 248 def test_constructor_simple_new(self, vals, dtype): 249 index = Index(vals, name=dtype) 250 result = index._simple_new(index.values, dtype) 251 tm.assert_index_equal(result, index) 252 253 @pytest.mark.parametrize( 254 "vals", 255 [ 256 [1, 2, 3], 257 np.array([1, 2, 3]), 258 np.array([1, 2, 3], dtype=int), 259 # below should coerce 260 [1.0, 2.0, 3.0], 261 np.array([1.0, 2.0, 3.0], dtype=float), 262 ], 263 ) 264 def test_constructor_dtypes_to_int64(self, vals): 265 index = Index(vals, dtype=int) 266 assert isinstance(index, Int64Index) 267 268 @pytest.mark.parametrize( 269 "vals", 270 [ 271 [1, 2, 3], 272 [1.0, 2.0, 3.0], 273 np.array([1.0, 2.0, 3.0]), 274 np.array([1, 2, 3], dtype=int), 275 np.array([1.0, 2.0, 3.0], dtype=float), 276 ], 277 ) 278 def test_constructor_dtypes_to_float64(self, vals): 279 index = Index(vals, dtype=float) 280 assert isinstance(index, Float64Index) 281 282 @pytest.mark.parametrize( 283 "vals", 284 [ 285 [1, 2, 3], 286 np.array([1, 2, 3], dtype=int), 287 np.array( 288 [np_datetime64_compat("2011-01-01"), np_datetime64_compat("2011-01-02")] 289 ), 290 [datetime(2011, 1, 1), datetime(2011, 1, 2)], 291 ], 292 ) 293 def test_constructor_dtypes_to_categorical(self, vals): 294 index = Index(vals, dtype="category") 295 assert isinstance(index, CategoricalIndex) 296 297 @pytest.mark.parametrize("cast_index", [True, False]) 298 @pytest.mark.parametrize( 299 "vals", 300 [ 301 Index( 302 np.array( 303 [ 304 np_datetime64_compat("2011-01-01"), 305 np_datetime64_compat("2011-01-02"), 306 ] 307 ) 308 ), 309 Index([datetime(2011, 1, 1), datetime(2011, 1, 2)]), 310 ], 311 ) 312 def test_constructor_dtypes_to_datetime(self, cast_index, vals): 313 if cast_index: 314 index = Index(vals, dtype=object) 315 assert isinstance(index, Index) 316 assert index.dtype == object 317 else: 318 index = Index(vals) 319 assert isinstance(index, DatetimeIndex) 320 321 @pytest.mark.parametrize("cast_index", [True, False]) 322 @pytest.mark.parametrize( 323 "vals", 324 [ 325 np.array([np.timedelta64(1, "D"), np.timedelta64(1, "D")]), 326 [timedelta(1), timedelta(1)], 327 ], 328 ) 329 def test_constructor_dtypes_to_timedelta(self, cast_index, vals): 330 if cast_index: 331 index = Index(vals, dtype=object) 332 assert isinstance(index, Index) 333 assert index.dtype == object 334 else: 335 index = Index(vals) 336 assert isinstance(index, TimedeltaIndex) 337 338 @pytest.mark.parametrize("attr", ["values", "asi8"]) 339 @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) 340 def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): 341 # Test constructing with a datetimetz dtype 342 # .values produces numpy datetimes, so these are considered naive 343 # .asi8 produces integers, so these are considered epoch timestamps 344 # ^the above will be true in a later version. Right now we `.view` 345 # the i8 values as NS_DTYPE, effectively treating them as wall times. 346 index = date_range("2011-01-01", periods=5) 347 arg = getattr(index, attr) 348 index = index.tz_localize(tz_naive_fixture) 349 dtype = index.dtype 350 351 if attr == "asi8": 352 result = DatetimeIndex(arg).tz_localize(tz_naive_fixture) 353 else: 354 result = klass(arg, tz=tz_naive_fixture) 355 tm.assert_index_equal(result, index) 356 357 if attr == "asi8": 358 result = DatetimeIndex(arg).astype(dtype) 359 else: 360 result = klass(arg, dtype=dtype) 361 tm.assert_index_equal(result, index) 362 363 if attr == "asi8": 364 result = DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture) 365 else: 366 result = klass(list(arg), tz=tz_naive_fixture) 367 tm.assert_index_equal(result, index) 368 369 if attr == "asi8": 370 result = DatetimeIndex(list(arg)).astype(dtype) 371 else: 372 result = klass(list(arg), dtype=dtype) 373 tm.assert_index_equal(result, index) 374 375 @pytest.mark.parametrize("attr", ["values", "asi8"]) 376 @pytest.mark.parametrize("klass", [Index, pd.TimedeltaIndex]) 377 def test_constructor_dtypes_timedelta(self, attr, klass): 378 index = pd.timedelta_range("1 days", periods=5) 379 index = index._with_freq(None) # wont be preserved by constructors 380 dtype = index.dtype 381 382 values = getattr(index, attr) 383 384 result = klass(values, dtype=dtype) 385 tm.assert_index_equal(result, index) 386 387 result = klass(list(values), dtype=dtype) 388 tm.assert_index_equal(result, index) 389 390 @pytest.mark.parametrize("value", [[], iter([]), (_ for _ in [])]) 391 @pytest.mark.parametrize( 392 "klass", 393 [ 394 Index, 395 Float64Index, 396 Int64Index, 397 UInt64Index, 398 CategoricalIndex, 399 DatetimeIndex, 400 TimedeltaIndex, 401 ], 402 ) 403 def test_constructor_empty(self, value, klass): 404 empty = klass(value) 405 assert isinstance(empty, klass) 406 assert not len(empty) 407 408 @pytest.mark.parametrize( 409 "empty,klass", 410 [ 411 (PeriodIndex([], freq="B"), PeriodIndex), 412 (PeriodIndex(iter([]), freq="B"), PeriodIndex), 413 (PeriodIndex((_ for _ in []), freq="B"), PeriodIndex), 414 (RangeIndex(step=1), RangeIndex), 415 (MultiIndex(levels=[[1, 2], ["blue", "red"]], codes=[[], []]), MultiIndex), 416 ], 417 ) 418 def test_constructor_empty_special(self, empty, klass): 419 assert isinstance(empty, klass) 420 assert not len(empty) 421 422 def test_constructor_overflow_int64(self): 423 # see gh-15832 424 msg = ( 425 "The elements provided in the data cannot " 426 "all be casted to the dtype int64" 427 ) 428 with pytest.raises(OverflowError, match=msg): 429 Index([np.iinfo(np.uint64).max - 1], dtype="int64") 430 431 @pytest.mark.parametrize( 432 "index", 433 [ 434 "datetime", 435 "float", 436 "int", 437 "period", 438 "range", 439 "repeats", 440 "timedelta", 441 "tuples", 442 "uint", 443 ], 444 indirect=True, 445 ) 446 def test_view_with_args(self, index): 447 index.view("i8") 448 449 @pytest.mark.parametrize( 450 "index", 451 [ 452 "unicode", 453 "string", 454 pytest.param("categorical", marks=pytest.mark.xfail(reason="gh-25464")), 455 "bool", 456 "empty", 457 ], 458 indirect=True, 459 ) 460 def test_view_with_args_object_array_raises(self, index): 461 msg = "Cannot change data-type for object array" 462 with pytest.raises(TypeError, match=msg): 463 index.view("i8") 464 465 @pytest.mark.parametrize("index", ["int", "range"], indirect=True) 466 def test_astype(self, index): 467 casted = index.astype("i8") 468 469 # it works! 470 casted.get_loc(5) 471 472 # pass on name 473 index.name = "foobar" 474 casted = index.astype("i8") 475 assert casted.name == "foobar" 476 477 def test_equals_object(self): 478 # same 479 assert Index(["a", "b", "c"]).equals(Index(["a", "b", "c"])) 480 481 @pytest.mark.parametrize( 482 "comp", [Index(["a", "b"]), Index(["a", "b", "d"]), ["a", "b", "c"]] 483 ) 484 def test_not_equals_object(self, comp): 485 assert not Index(["a", "b", "c"]).equals(comp) 486 487 def test_insert_missing(self, nulls_fixture): 488 # GH 22295 489 # test there is no mangling of NA values 490 expected = Index(["a", nulls_fixture, "b", "c"]) 491 result = Index(list("abc")).insert(1, nulls_fixture) 492 tm.assert_index_equal(result, expected) 493 494 def test_delete_raises(self): 495 index = Index(["a", "b", "c", "d"], name="index") 496 msg = "index 5 is out of bounds for axis 0 with size 4" 497 with pytest.raises(IndexError, match=msg): 498 index.delete(5) 499 500 def test_identical(self): 501 502 # index 503 i1 = Index(["a", "b", "c"]) 504 i2 = Index(["a", "b", "c"]) 505 506 assert i1.identical(i2) 507 508 i1 = i1.rename("foo") 509 assert i1.equals(i2) 510 assert not i1.identical(i2) 511 512 i2 = i2.rename("foo") 513 assert i1.identical(i2) 514 515 i3 = Index([("a", "a"), ("a", "b"), ("b", "a")]) 516 i4 = Index([("a", "a"), ("a", "b"), ("b", "a")], tupleize_cols=False) 517 assert not i3.identical(i4) 518 519 def test_is_(self): 520 ind = Index(range(10)) 521 assert ind.is_(ind) 522 assert ind.is_(ind.view().view().view().view()) 523 assert not ind.is_(Index(range(10))) 524 assert not ind.is_(ind.copy()) 525 assert not ind.is_(ind.copy(deep=False)) 526 assert not ind.is_(ind[:]) 527 assert not ind.is_(np.array(range(10))) 528 529 # quasi-implementation dependent 530 assert ind.is_(ind.view()) 531 ind2 = ind.view() 532 ind2.name = "bob" 533 assert ind.is_(ind2) 534 assert ind2.is_(ind) 535 # doesn't matter if Indices are *actually* views of underlying data, 536 assert not ind.is_(Index(ind.values)) 537 arr = np.array(range(1, 11)) 538 ind1 = Index(arr, copy=False) 539 ind2 = Index(arr, copy=False) 540 assert not ind1.is_(ind2) 541 542 @pytest.mark.parametrize("index", ["datetime"], indirect=True) 543 def test_asof(self, index): 544 d = index[0] 545 assert index.asof(d) == d 546 assert isna(index.asof(d - timedelta(1))) 547 548 d = index[-1] 549 assert index.asof(d + timedelta(1)) == d 550 551 d = index[0].to_pydatetime() 552 assert isinstance(index.asof(d), Timestamp) 553 554 def test_asof_datetime_partial(self): 555 index = date_range("2010-01-01", periods=2, freq="m") 556 expected = Timestamp("2010-02-28") 557 result = index.asof("2010-02") 558 assert result == expected 559 assert not isinstance(result, Index) 560 561 def test_nanosecond_index_access(self): 562 s = Series([Timestamp("20130101")]).values.view("i8")[0] 563 r = DatetimeIndex([s + 50 + i for i in range(100)]) 564 x = Series(np.random.randn(100), index=r) 565 566 first_value = x.asof(x.index[0]) 567 568 # this does not yet work, as parsing strings is done via dateutil 569 # assert first_value == x['2013-01-01 00:00:00.000000050+0000'] 570 571 expected_ts = np_datetime64_compat("2013-01-01 00:00:00.000000050+0000", "ns") 572 assert first_value == x[Timestamp(expected_ts)] 573 574 @pytest.mark.parametrize("index", ["string"], indirect=True) 575 def test_booleanindex(self, index): 576 bool_index = np.ones(len(index), dtype=bool) 577 bool_index[5:30:2] = False 578 579 sub_index = index[bool_index] 580 581 for i, val in enumerate(sub_index): 582 assert sub_index.get_loc(val) == i 583 584 sub_index = index[list(bool_index)] 585 for i, val in enumerate(sub_index): 586 assert sub_index.get_loc(val) == i 587 588 def test_fancy(self): 589 index = self.create_index() 590 sl = index[[1, 2, 3]] 591 for i in sl: 592 assert i == sl[sl.get_loc(i)] 593 594 @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) 595 @pytest.mark.parametrize("dtype", [np.int_, np.bool_]) 596 def test_empty_fancy(self, index, dtype): 597 empty_arr = np.array([], dtype=dtype) 598 empty_index = type(index)([]) 599 600 assert index[[]].identical(empty_index) 601 assert index[empty_arr].identical(empty_index) 602 603 @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) 604 def test_empty_fancy_raises(self, index): 605 # DatetimeIndex is excluded, because it overrides getitem and should 606 # be tested separately. 607 empty_farr = np.array([], dtype=np.float_) 608 empty_index = type(index)([]) 609 610 assert index[[]].identical(empty_index) 611 # np.ndarray only accepts ndarray of int & bool dtypes, so should Index 612 msg = r"arrays used as indices must be of integer \(or boolean\) type" 613 with pytest.raises(IndexError, match=msg): 614 index[empty_farr] 615 616 @pytest.mark.parametrize("index", ["string"], indirect=True) 617 def test_intersection(self, index, sort): 618 first = index[:20] 619 second = index[:10] 620 intersect = first.intersection(second, sort=sort) 621 if sort is None: 622 tm.assert_index_equal(intersect, second.sort_values()) 623 assert tm.equalContents(intersect, second) 624 625 # Corner cases 626 inter = first.intersection(first, sort=sort) 627 assert inter is first 628 629 @pytest.mark.parametrize( 630 "index2,keeps_name", 631 [ 632 (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name 633 (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names 634 (Index([3, 4, 5, 6, 7]), False), 635 ], 636 ) 637 def test_intersection_name_preservation(self, index2, keeps_name, sort): 638 index1 = Index([1, 2, 3, 4, 5], name="index") 639 expected = Index([3, 4, 5]) 640 result = index1.intersection(index2, sort) 641 642 if keeps_name: 643 expected.name = "index" 644 645 assert result.name == expected.name 646 tm.assert_index_equal(result, expected) 647 648 @pytest.mark.parametrize("index", ["string"], indirect=True) 649 @pytest.mark.parametrize( 650 "first_name,second_name,expected_name", 651 [("A", "A", "A"), ("A", "B", None), (None, "B", None)], 652 ) 653 def test_intersection_name_preservation2( 654 self, index, first_name, second_name, expected_name, sort 655 ): 656 first = index[5:20] 657 second = index[:10] 658 first.name = first_name 659 second.name = second_name 660 intersect = first.intersection(second, sort=sort) 661 assert intersect.name == expected_name 662 663 def test_chained_union(self, sort): 664 # Chained unions handles names correctly 665 i1 = Index([1, 2], name="i1") 666 i2 = Index([5, 6], name="i2") 667 i3 = Index([3, 4], name="i3") 668 union = i1.union(i2.union(i3, sort=sort), sort=sort) 669 expected = i1.union(i2, sort=sort).union(i3, sort=sort) 670 tm.assert_index_equal(union, expected) 671 672 j1 = Index([1, 2], name="j1") 673 j2 = Index([], name="j2") 674 j3 = Index([], name="j3") 675 union = j1.union(j2.union(j3, sort=sort), sort=sort) 676 expected = j1.union(j2, sort=sort).union(j3, sort=sort) 677 tm.assert_index_equal(union, expected) 678 679 @pytest.mark.parametrize("index", ["string"], indirect=True) 680 def test_union(self, index, sort): 681 first = index[5:20] 682 second = index[:10] 683 everything = index[:20] 684 685 union = first.union(second, sort=sort) 686 if sort is None: 687 tm.assert_index_equal(union, everything.sort_values()) 688 assert tm.equalContents(union, everything) 689 690 @pytest.mark.parametrize("klass", [np.array, Series, list]) 691 @pytest.mark.parametrize("index", ["string"], indirect=True) 692 def test_union_from_iterables(self, index, klass, sort): 693 # GH 10149 694 first = index[5:20] 695 second = index[:10] 696 everything = index[:20] 697 698 case = klass(second.values) 699 result = first.union(case, sort=sort) 700 if sort is None: 701 tm.assert_index_equal(result, everything.sort_values()) 702 assert tm.equalContents(result, everything) 703 704 @pytest.mark.parametrize("index", ["string"], indirect=True) 705 def test_union_identity(self, index, sort): 706 first = index[5:20] 707 708 union = first.union(first, sort=sort) 709 # i.e. identity is not preserved when sort is True 710 assert (union is first) is (not sort) 711 712 # This should no longer be the same object, since [] is not consistent, 713 # both objects will be recast to dtype('O') 714 union = first.union([], sort=sort) 715 assert (union is first) is (not sort) 716 717 union = Index([]).union(first, sort=sort) 718 assert (union is first) is (not sort) 719 720 def test_union_dt_as_obj(self, sort): 721 # TODO: Replace with fixturesult 722 index = self.create_index() 723 date_index = date_range("2019-01-01", periods=10) 724 first_cat = index.union(date_index) 725 second_cat = index.union(index) 726 727 appended = np.append(index, date_index.astype("O")) 728 729 assert tm.equalContents(first_cat, appended) 730 assert tm.equalContents(second_cat, index) 731 tm.assert_contains_all(index, first_cat) 732 tm.assert_contains_all(index, second_cat) 733 tm.assert_contains_all(date_index, first_cat) 734 735 def test_map_identity_mapping(self, index): 736 # GH 12766 737 tm.assert_index_equal(index, index.map(lambda x: x)) 738 739 def test_map_with_tuples(self): 740 # GH 12766 741 742 # Test that returning a single tuple from an Index 743 # returns an Index. 744 index = tm.makeIntIndex(3) 745 result = tm.makeIntIndex(3).map(lambda x: (x,)) 746 expected = Index([(i,) for i in index]) 747 tm.assert_index_equal(result, expected) 748 749 # Test that returning a tuple from a map of a single index 750 # returns a MultiIndex object. 751 result = index.map(lambda x: (x, x == 1)) 752 expected = MultiIndex.from_tuples([(i, i == 1) for i in index]) 753 tm.assert_index_equal(result, expected) 754 755 def test_map_with_tuples_mi(self): 756 # Test that returning a single object from a MultiIndex 757 # returns an Index. 758 first_level = ["foo", "bar", "baz"] 759 multi_index = MultiIndex.from_tuples(zip(first_level, [1, 2, 3])) 760 reduced_index = multi_index.map(lambda x: x[0]) 761 tm.assert_index_equal(reduced_index, Index(first_level)) 762 763 @pytest.mark.parametrize( 764 "attr", ["makeDateIndex", "makePeriodIndex", "makeTimedeltaIndex"] 765 ) 766 def test_map_tseries_indices_return_index(self, attr): 767 index = getattr(tm, attr)(10) 768 expected = Index([1] * 10) 769 result = index.map(lambda x: 1) 770 tm.assert_index_equal(expected, result) 771 772 def test_map_tseries_indices_accsr_return_index(self): 773 date_index = tm.makeDateIndex(24, freq="h", name="hourly") 774 expected = Index(range(24), name="hourly") 775 tm.assert_index_equal(expected, date_index.map(lambda x: x.hour)) 776 777 @pytest.mark.parametrize( 778 "mapper", 779 [ 780 lambda values, index: {i: e for e, i in zip(values, index)}, 781 lambda values, index: Series(values, index), 782 ], 783 ) 784 def test_map_dictlike_simple(self, mapper): 785 # GH 12756 786 expected = Index(["foo", "bar", "baz"]) 787 index = tm.makeIntIndex(3) 788 result = index.map(mapper(expected.values, index)) 789 tm.assert_index_equal(result, expected) 790 791 @pytest.mark.parametrize( 792 "mapper", 793 [ 794 lambda values, index: {i: e for e, i in zip(values, index)}, 795 lambda values, index: Series(values, index), 796 ], 797 ) 798 def test_map_dictlike(self, index, mapper): 799 # GH 12756 800 if isinstance(index, CategoricalIndex): 801 # Tested in test_categorical 802 return 803 elif not index.is_unique: 804 # Cannot map duplicated index 805 return 806 807 if index.empty: 808 # to match proper result coercion for uints 809 expected = Index([]) 810 else: 811 expected = Index(np.arange(len(index), 0, -1)) 812 813 result = index.map(mapper(expected, index)) 814 tm.assert_index_equal(result, expected) 815 816 @pytest.mark.parametrize( 817 "mapper", 818 [Series(["foo", 2.0, "baz"], index=[0, 2, -1]), {0: "foo", 2: 2.0, -1: "baz"}], 819 ) 820 def test_map_with_non_function_missing_values(self, mapper): 821 # GH 12756 822 expected = Index([2.0, np.nan, "foo"]) 823 result = Index([2, 1, 0]).map(mapper) 824 825 tm.assert_index_equal(expected, result) 826 827 def test_map_na_exclusion(self): 828 index = Index([1.5, np.nan, 3, np.nan, 5]) 829 830 result = index.map(lambda x: x * 2, na_action="ignore") 831 expected = index * 2 832 tm.assert_index_equal(result, expected) 833 834 def test_map_defaultdict(self): 835 index = Index([1, 2, 3]) 836 default_dict = defaultdict(lambda: "blank") 837 default_dict[1] = "stuff" 838 result = index.map(default_dict) 839 expected = Index(["stuff", "blank", "blank"]) 840 tm.assert_index_equal(result, expected) 841 842 @pytest.mark.parametrize("name,expected", [("foo", "foo"), ("bar", None)]) 843 def test_append_empty_preserve_name(self, name, expected): 844 left = Index([], name="foo") 845 right = Index([1, 2, 3], name=name) 846 847 result = left.append(right) 848 assert result.name == expected 849 850 @pytest.mark.parametrize("index", ["string"], indirect=True) 851 @pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")]) 852 def test_difference_name_preservation(self, index, second_name, expected, sort): 853 first = index[5:20] 854 second = index[:10] 855 answer = index[10:20] 856 857 first.name = "name" 858 second.name = second_name 859 result = first.difference(second, sort=sort) 860 861 assert tm.equalContents(result, answer) 862 863 if expected is None: 864 assert result.name is None 865 else: 866 assert result.name == expected 867 868 @pytest.mark.parametrize("index", ["string"], indirect=True) 869 def test_difference_empty_arg(self, index, sort): 870 first = index[5:20] 871 first.name = "name" 872 result = first.difference([], sort) 873 874 assert tm.equalContents(result, first) 875 assert result.name == first.name 876 877 @pytest.mark.parametrize("index", ["string"], indirect=True) 878 def test_difference_identity(self, index, sort): 879 first = index[5:20] 880 first.name = "name" 881 result = first.difference(first, sort) 882 883 assert len(result) == 0 884 assert result.name == first.name 885 886 @pytest.mark.parametrize("index", ["string"], indirect=True) 887 def test_difference_sort(self, index, sort): 888 first = index[5:20] 889 second = index[:10] 890 891 result = first.difference(second, sort) 892 expected = index[10:20] 893 894 if sort is None: 895 expected = expected.sort_values() 896 897 tm.assert_index_equal(result, expected) 898 899 def test_symmetric_difference(self, sort): 900 # smoke 901 index1 = Index([5, 2, 3, 4], name="index1") 902 index2 = Index([2, 3, 4, 1]) 903 result = index1.symmetric_difference(index2, sort=sort) 904 expected = Index([5, 1]) 905 assert tm.equalContents(result, expected) 906 assert result.name is None 907 if sort is None: 908 expected = expected.sort_values() 909 tm.assert_index_equal(result, expected) 910 911 # __xor__ syntax 912 with tm.assert_produces_warning(FutureWarning): 913 expected = index1 ^ index2 914 assert tm.equalContents(result, expected) 915 assert result.name is None 916 917 @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) 918 def test_difference_incomparable(self, opname): 919 a = Index([3, Timestamp("2000"), 1]) 920 b = Index([2, Timestamp("1999"), 1]) 921 op = operator.methodcaller(opname, b) 922 923 # sort=None, the default 924 result = op(a) 925 expected = Index([3, Timestamp("2000"), 2, Timestamp("1999")]) 926 if opname == "difference": 927 expected = expected[:2] 928 tm.assert_index_equal(result, expected) 929 930 # sort=False 931 op = operator.methodcaller(opname, b, sort=False) 932 result = op(a) 933 tm.assert_index_equal(result, expected) 934 935 @pytest.mark.xfail(reason="Not implemented") 936 @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) 937 def test_difference_incomparable_true(self, opname): 938 # TODO decide on True behaviour 939 # # sort=True, raises 940 a = Index([3, Timestamp("2000"), 1]) 941 b = Index([2, Timestamp("1999"), 1]) 942 op = operator.methodcaller(opname, b, sort=True) 943 944 with pytest.raises(TypeError, match="Cannot compare"): 945 op(a) 946 947 def test_symmetric_difference_mi(self, sort): 948 index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])) 949 index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)]) 950 result = index1.symmetric_difference(index2, sort=sort) 951 expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)]) 952 if sort is None: 953 expected = expected.sort_values() 954 tm.assert_index_equal(result, expected) 955 assert tm.equalContents(result, expected) 956 957 @pytest.mark.parametrize( 958 "index2,expected", 959 [ 960 (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])), 961 (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])), 962 ], 963 ) 964 def test_symmetric_difference_missing(self, index2, expected, sort): 965 # GH 13514 change: {nan} - {nan} == {} 966 # (GH 6444, sorting of nans, is no longer an issue) 967 index1 = Index([1, np.nan, 2, 3]) 968 969 result = index1.symmetric_difference(index2, sort=sort) 970 if sort is None: 971 expected = expected.sort_values() 972 tm.assert_index_equal(result, expected) 973 974 def test_symmetric_difference_non_index(self, sort): 975 index1 = Index([1, 2, 3, 4], name="index1") 976 index2 = np.array([2, 3, 4, 5]) 977 expected = Index([1, 5]) 978 result = index1.symmetric_difference(index2, sort=sort) 979 assert tm.equalContents(result, expected) 980 assert result.name == "index1" 981 982 result = index1.symmetric_difference(index2, result_name="new_name", sort=sort) 983 assert tm.equalContents(result, expected) 984 assert result.name == "new_name" 985 986 def test_is_mixed_deprecated(self): 987 # GH#32922 988 index = self.create_index() 989 with tm.assert_produces_warning(FutureWarning): 990 index.is_mixed() 991 992 @pytest.mark.parametrize( 993 "index, expected", 994 [ 995 ("string", False), 996 ("bool", False), 997 ("categorical", False), 998 ("int", True), 999 ("datetime", False), 1000 ("float", True), 1001 ], 1002 indirect=["index"], 1003 ) 1004 def test_is_numeric(self, index, expected): 1005 assert index.is_numeric() is expected 1006 1007 @pytest.mark.parametrize( 1008 "index, expected", 1009 [ 1010 ("string", True), 1011 ("bool", True), 1012 ("categorical", False), 1013 ("int", False), 1014 ("datetime", False), 1015 ("float", False), 1016 ], 1017 indirect=["index"], 1018 ) 1019 def test_is_object(self, index, expected): 1020 assert index.is_object() is expected 1021 1022 @pytest.mark.parametrize( 1023 "index, expected", 1024 [ 1025 ("string", False), 1026 ("bool", False), 1027 ("categorical", False), 1028 ("int", False), 1029 ("datetime", True), 1030 ("float", False), 1031 ], 1032 indirect=["index"], 1033 ) 1034 def test_is_all_dates(self, index, expected): 1035 with tm.assert_produces_warning(FutureWarning): 1036 assert index.is_all_dates is expected 1037 1038 def test_summary(self, index): 1039 self._check_method_works(Index._summary, index) 1040 1041 def test_summary_bug(self): 1042 # GH3869` 1043 ind = Index(["{other}%s", "~:{range}:0"], name="A") 1044 result = ind._summary() 1045 # shouldn't be formatted accidentally. 1046 assert "~:{range}:0" in result 1047 assert "{other}%s" in result 1048 1049 def test_format_different_scalar_lengths(self): 1050 # GH35439 1051 idx = Index(["aaaaaaaaa", "b"]) 1052 expected = ["aaaaaaaaa", "b"] 1053 assert idx.format() == expected 1054 1055 def test_format_bug(self): 1056 # GH 14626 1057 # windows has different precision on datetime.datetime.now (it doesn't 1058 # include us since the default for Timestamp shows these but Index 1059 # formatting does not we are skipping) 1060 now = datetime.now() 1061 if not str(now).endswith("000"): 1062 index = Index([now]) 1063 formatted = index.format() 1064 expected = [str(index[0])] 1065 assert formatted == expected 1066 1067 Index([]).format() 1068 1069 @pytest.mark.parametrize("vals", [[1, 2.0 + 3.0j, 4.0], ["a", "b", "c"]]) 1070 def test_format_missing(self, vals, nulls_fixture): 1071 # 2845 1072 vals = list(vals) # Copy for each iteration 1073 vals.append(nulls_fixture) 1074 index = Index(vals) 1075 1076 formatted = index.format() 1077 expected = [str(index[0]), str(index[1]), str(index[2]), "NaN"] 1078 1079 assert formatted == expected 1080 assert index[3] is nulls_fixture 1081 1082 def test_format_with_name_time_info(self): 1083 # bug I fixed 12/20/2011 1084 dates = date_range("2011-01-01 04:00:00", periods=10, name="something") 1085 1086 formatted = dates.format(name=True) 1087 assert formatted[0] == "something" 1088 1089 def test_format_datetime_with_time(self): 1090 t = Index([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)]) 1091 1092 result = t.format() 1093 expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"] 1094 assert len(result) == 2 1095 assert result == expected 1096 1097 @pytest.mark.parametrize("op", ["any", "all"]) 1098 def test_logical_compat(self, op): 1099 index = self.create_index() 1100 assert getattr(index, op)() == getattr(index.values, op)() 1101 1102 def _check_method_works(self, method, index): 1103 method(index) 1104 1105 def test_get_indexer(self): 1106 index1 = Index([1, 2, 3, 4, 5]) 1107 index2 = Index([2, 4, 6]) 1108 1109 r1 = index1.get_indexer(index2) 1110 e1 = np.array([1, 3, -1], dtype=np.intp) 1111 tm.assert_almost_equal(r1, e1) 1112 1113 @pytest.mark.parametrize("reverse", [True, False]) 1114 @pytest.mark.parametrize( 1115 "expected,method", 1116 [ 1117 (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"), 1118 (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"), 1119 (np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"), 1120 (np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"), 1121 ], 1122 ) 1123 def test_get_indexer_methods(self, reverse, expected, method): 1124 index1 = Index([1, 2, 3, 4, 5]) 1125 index2 = Index([2, 4, 6]) 1126 1127 if reverse: 1128 index1 = index1[::-1] 1129 expected = expected[::-1] 1130 1131 result = index2.get_indexer(index1, method=method) 1132 tm.assert_almost_equal(result, expected) 1133 1134 def test_get_indexer_invalid(self): 1135 # GH10411 1136 index = Index(np.arange(10)) 1137 1138 with pytest.raises(ValueError, match="tolerance argument"): 1139 index.get_indexer([1, 0], tolerance=1) 1140 1141 with pytest.raises(ValueError, match="limit argument"): 1142 index.get_indexer([1, 0], limit=1) 1143 1144 @pytest.mark.parametrize( 1145 "method, tolerance, indexer, expected", 1146 [ 1147 ("pad", None, [0, 5, 9], [0, 5, 9]), 1148 ("backfill", None, [0, 5, 9], [0, 5, 9]), 1149 ("nearest", None, [0, 5, 9], [0, 5, 9]), 1150 ("pad", 0, [0, 5, 9], [0, 5, 9]), 1151 ("backfill", 0, [0, 5, 9], [0, 5, 9]), 1152 ("nearest", 0, [0, 5, 9], [0, 5, 9]), 1153 ("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]), 1154 ("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]), 1155 ("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]), 1156 ("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]), 1157 ("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]), 1158 ("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]), 1159 ("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]), 1160 ("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), 1161 ("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]), 1162 ], 1163 ) 1164 def test_get_indexer_nearest(self, method, tolerance, indexer, expected): 1165 index = Index(np.arange(10)) 1166 1167 actual = index.get_indexer(indexer, method=method, tolerance=tolerance) 1168 tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) 1169 1170 @pytest.mark.parametrize("listtype", [list, tuple, Series, np.array]) 1171 @pytest.mark.parametrize( 1172 "tolerance, expected", 1173 list( 1174 zip( 1175 [[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]], 1176 [[0, 2, -1], [0, -1, -1], [-1, 2, 9]], 1177 ) 1178 ), 1179 ) 1180 def test_get_indexer_nearest_listlike_tolerance( 1181 self, tolerance, expected, listtype 1182 ): 1183 index = Index(np.arange(10)) 1184 1185 actual = index.get_indexer( 1186 [0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance) 1187 ) 1188 tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) 1189 1190 def test_get_indexer_nearest_error(self): 1191 index = Index(np.arange(10)) 1192 with pytest.raises(ValueError, match="limit argument"): 1193 index.get_indexer([1, 0], method="nearest", limit=1) 1194 1195 with pytest.raises(ValueError, match="tolerance size must match"): 1196 index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3]) 1197 1198 @pytest.mark.parametrize( 1199 "method,expected", 1200 [("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])], 1201 ) 1202 def test_get_indexer_nearest_decreasing(self, method, expected): 1203 index = Index(np.arange(10))[::-1] 1204 1205 actual = index.get_indexer([0, 5, 9], method=method) 1206 tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp)) 1207 1208 actual = index.get_indexer([0.2, 1.8, 8.5], method=method) 1209 tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) 1210 1211 @pytest.mark.parametrize( 1212 "method,expected", 1213 [ 1214 ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)), 1215 ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)), 1216 ], 1217 ) 1218 def test_get_indexer_strings(self, method, expected): 1219 index = Index(["b", "c"]) 1220 actual = index.get_indexer(["a", "b", "c", "d"], method=method) 1221 1222 tm.assert_numpy_array_equal(actual, expected) 1223 1224 def test_get_indexer_strings_raises(self): 1225 index = Index(["b", "c"]) 1226 1227 msg = r"unsupported operand type\(s\) for -: 'str' and 'str'" 1228 with pytest.raises(TypeError, match=msg): 1229 index.get_indexer(["a", "b", "c", "d"], method="nearest") 1230 1231 with pytest.raises(TypeError, match=msg): 1232 index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2) 1233 1234 with pytest.raises(TypeError, match=msg): 1235 index.get_indexer( 1236 ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2] 1237 ) 1238 1239 @pytest.mark.parametrize( 1240 "idx_class", [Int64Index, RangeIndex, Float64Index, UInt64Index] 1241 ) 1242 @pytest.mark.parametrize("method", ["get_indexer", "get_indexer_non_unique"]) 1243 def test_get_indexer_numeric_index_boolean_target(self, method, idx_class): 1244 # GH 16877 1245 1246 numeric_index = idx_class(RangeIndex(4)) 1247 other = Index([True, False, True]) 1248 1249 result = getattr(numeric_index, method)(other) 1250 expected = np.array([-1, -1, -1], dtype=np.intp) 1251 if method == "get_indexer": 1252 tm.assert_numpy_array_equal(result, expected) 1253 else: 1254 missing = np.arange(3, dtype=np.intp) 1255 tm.assert_numpy_array_equal(result[0], expected) 1256 tm.assert_numpy_array_equal(result[1], missing) 1257 1258 def test_get_indexer_with_NA_values( 1259 self, unique_nulls_fixture, unique_nulls_fixture2 1260 ): 1261 # GH 22332 1262 # check pairwise, that no pair of na values 1263 # is mangled 1264 if unique_nulls_fixture is unique_nulls_fixture2: 1265 return # skip it, values are not unique 1266 arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object) 1267 index = Index(arr, dtype=object) 1268 result = index.get_indexer( 1269 [unique_nulls_fixture, unique_nulls_fixture2, "Unknown"] 1270 ) 1271 expected = np.array([0, 1, -1], dtype=np.intp) 1272 tm.assert_numpy_array_equal(result, expected) 1273 1274 @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) 1275 def test_get_loc(self, method): 1276 index = Index([0, 1, 2]) 1277 assert index.get_loc(1, method=method) == 1 1278 1279 if method: 1280 assert index.get_loc(1, method=method, tolerance=0) == 1 1281 1282 @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) 1283 def test_get_loc_raises_bad_label(self, method): 1284 index = Index([0, 1, 2]) 1285 if method: 1286 msg = "not supported between" 1287 else: 1288 msg = "invalid key" 1289 1290 with pytest.raises(TypeError, match=msg): 1291 index.get_loc([1, 2], method=method) 1292 1293 @pytest.mark.parametrize( 1294 "method,loc", [("pad", 1), ("backfill", 2), ("nearest", 1)] 1295 ) 1296 def test_get_loc_tolerance(self, method, loc): 1297 index = Index([0, 1, 2]) 1298 assert index.get_loc(1.1, method) == loc 1299 assert index.get_loc(1.1, method, tolerance=1) == loc 1300 1301 @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) 1302 def test_get_loc_outside_tolerance_raises(self, method): 1303 index = Index([0, 1, 2]) 1304 with pytest.raises(KeyError, match="1.1"): 1305 index.get_loc(1.1, method, tolerance=0.05) 1306 1307 def test_get_loc_bad_tolerance_raises(self): 1308 index = Index([0, 1, 2]) 1309 with pytest.raises(ValueError, match="must be numeric"): 1310 index.get_loc(1.1, "nearest", tolerance="invalid") 1311 1312 def test_get_loc_tolerance_no_method_raises(self): 1313 index = Index([0, 1, 2]) 1314 with pytest.raises(ValueError, match="tolerance .* valid if"): 1315 index.get_loc(1.1, tolerance=1) 1316 1317 def test_get_loc_raises_missized_tolerance(self): 1318 index = Index([0, 1, 2]) 1319 with pytest.raises(ValueError, match="tolerance size must match"): 1320 index.get_loc(1.1, "nearest", tolerance=[1, 1]) 1321 1322 def test_get_loc_raises_object_nearest(self): 1323 index = Index(["a", "c"]) 1324 with pytest.raises(TypeError, match="unsupported operand type"): 1325 index.get_loc("a", method="nearest") 1326 1327 def test_get_loc_raises_object_tolerance(self): 1328 index = Index(["a", "c"]) 1329 with pytest.raises(TypeError, match="unsupported operand type"): 1330 index.get_loc("a", method="pad", tolerance="invalid") 1331 1332 @pytest.mark.parametrize("dtype", [int, float]) 1333 def test_slice_locs(self, dtype): 1334 index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) 1335 n = len(index) 1336 1337 assert index.slice_locs(start=2) == (2, n) 1338 assert index.slice_locs(start=3) == (3, n) 1339 assert index.slice_locs(3, 8) == (3, 6) 1340 assert index.slice_locs(5, 10) == (3, n) 1341 assert index.slice_locs(end=8) == (0, 6) 1342 assert index.slice_locs(end=9) == (0, 7) 1343 1344 # reversed 1345 index2 = index[::-1] 1346 assert index2.slice_locs(8, 2) == (2, 6) 1347 assert index2.slice_locs(7, 3) == (2, 5) 1348 1349 @pytest.mark.parametrize("dtype", [int, float]) 1350 def test_slice_float_locs(self, dtype): 1351 index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) 1352 n = len(index) 1353 assert index.slice_locs(5.0, 10.0) == (3, n) 1354 assert index.slice_locs(4.5, 10.5) == (3, 8) 1355 1356 index2 = index[::-1] 1357 assert index2.slice_locs(8.5, 1.5) == (2, 6) 1358 assert index2.slice_locs(10.5, -1) == (0, n) 1359 1360 def test_slice_locs_dup(self): 1361 index = Index(["a", "a", "b", "c", "d", "d"]) 1362 assert index.slice_locs("a", "d") == (0, 6) 1363 assert index.slice_locs(end="d") == (0, 6) 1364 assert index.slice_locs("a", "c") == (0, 4) 1365 assert index.slice_locs("b", "d") == (2, 6) 1366 1367 index2 = index[::-1] 1368 assert index2.slice_locs("d", "a") == (0, 6) 1369 assert index2.slice_locs(end="a") == (0, 6) 1370 assert index2.slice_locs("d", "b") == (0, 4) 1371 assert index2.slice_locs("c", "a") == (2, 6) 1372 1373 @pytest.mark.parametrize("dtype", [int, float]) 1374 def test_slice_locs_dup_numeric(self, dtype): 1375 index = Index(np.array([10, 12, 12, 14], dtype=dtype)) 1376 assert index.slice_locs(12, 12) == (1, 3) 1377 assert index.slice_locs(11, 13) == (1, 3) 1378 1379 index2 = index[::-1] 1380 assert index2.slice_locs(12, 12) == (1, 3) 1381 assert index2.slice_locs(13, 11) == (1, 3) 1382 1383 def test_slice_locs_na(self): 1384 index = Index([np.nan, 1, 2]) 1385 assert index.slice_locs(1) == (1, 3) 1386 assert index.slice_locs(np.nan) == (0, 3) 1387 1388 index = Index([0, np.nan, np.nan, 1, 2]) 1389 assert index.slice_locs(np.nan) == (1, 5) 1390 1391 def test_slice_locs_na_raises(self): 1392 index = Index([np.nan, 1, 2]) 1393 with pytest.raises(KeyError, match=""): 1394 index.slice_locs(start=1.5) 1395 1396 with pytest.raises(KeyError, match=""): 1397 index.slice_locs(end=1.5) 1398 1399 @pytest.mark.parametrize( 1400 "in_slice,expected", 1401 [ 1402 # error: Slice index must be an integer or None 1403 (pd.IndexSlice[::-1], "yxdcb"), 1404 (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc] 1405 (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc] 1406 (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc] 1407 (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc] 1408 (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc] 1409 (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc] 1410 # absent labels 1411 (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc] 1412 (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc] 1413 (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc] 1414 (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc] 1415 (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc] 1416 (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc] 1417 (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc] 1418 (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc] 1419 (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc] 1420 ], 1421 ) 1422 def test_slice_locs_negative_step(self, in_slice, expected): 1423 index = Index(list("bcdxy")) 1424 1425 s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) 1426 result = index[s_start : s_stop : in_slice.step] 1427 expected = Index(list(expected)) 1428 tm.assert_index_equal(result, expected) 1429 1430 @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) 1431 def test_drop_by_str_label(self, index): 1432 n = len(index) 1433 drop = index[list(range(5, 10))] 1434 dropped = index.drop(drop) 1435 1436 expected = index[list(range(5)) + list(range(10, n))] 1437 tm.assert_index_equal(dropped, expected) 1438 1439 dropped = index.drop(index[0]) 1440 expected = index[1:] 1441 tm.assert_index_equal(dropped, expected) 1442 1443 @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) 1444 @pytest.mark.parametrize("keys", [["foo", "bar"], ["1", "bar"]]) 1445 def test_drop_by_str_label_raises_missing_keys(self, index, keys): 1446 with pytest.raises(KeyError, match=""): 1447 index.drop(keys) 1448 1449 @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) 1450 def test_drop_by_str_label_errors_ignore(self, index): 1451 n = len(index) 1452 drop = index[list(range(5, 10))] 1453 mixed = drop.tolist() + ["foo"] 1454 dropped = index.drop(mixed, errors="ignore") 1455 1456 expected = index[list(range(5)) + list(range(10, n))] 1457 tm.assert_index_equal(dropped, expected) 1458 1459 dropped = index.drop(["foo", "bar"], errors="ignore") 1460 expected = index[list(range(n))] 1461 tm.assert_index_equal(dropped, expected) 1462 1463 def test_drop_by_numeric_label_loc(self): 1464 # TODO: Parametrize numeric and str tests after self.strIndex fixture 1465 index = Index([1, 2, 3]) 1466 dropped = index.drop(1) 1467 expected = Index([2, 3]) 1468 1469 tm.assert_index_equal(dropped, expected) 1470 1471 def test_drop_by_numeric_label_raises_missing_keys(self): 1472 index = Index([1, 2, 3]) 1473 with pytest.raises(KeyError, match=""): 1474 index.drop([3, 4]) 1475 1476 @pytest.mark.parametrize( 1477 "key,expected", [(4, Index([1, 2, 3])), ([3, 4, 5], Index([1, 2]))] 1478 ) 1479 def test_drop_by_numeric_label_errors_ignore(self, key, expected): 1480 index = Index([1, 2, 3]) 1481 dropped = index.drop(key, errors="ignore") 1482 1483 tm.assert_index_equal(dropped, expected) 1484 1485 @pytest.mark.parametrize( 1486 "values", 1487 [["a", "b", ("c", "d")], ["a", ("c", "d"), "b"], [("c", "d"), "a", "b"]], 1488 ) 1489 @pytest.mark.parametrize("to_drop", [[("c", "d"), "a"], ["a", ("c", "d")]]) 1490 def test_drop_tuple(self, values, to_drop): 1491 # GH 18304 1492 index = Index(values) 1493 expected = Index(["b"]) 1494 1495 result = index.drop(to_drop) 1496 tm.assert_index_equal(result, expected) 1497 1498 removed = index.drop(to_drop[0]) 1499 for drop_me in to_drop[1], [to_drop[1]]: 1500 result = removed.drop(drop_me) 1501 tm.assert_index_equal(result, expected) 1502 1503 removed = index.drop(to_drop[1]) 1504 msg = fr"\"\[{re.escape(to_drop[1].__repr__())}\] not found in axis\"" 1505 for drop_me in to_drop[1], [to_drop[1]]: 1506 with pytest.raises(KeyError, match=msg): 1507 removed.drop(drop_me) 1508 1509 def test_drop_with_duplicates_in_index(self, index): 1510 # GH38051 1511 if len(index) == 0 or isinstance(index, MultiIndex): 1512 return 1513 if isinstance(index, IntervalIndex) and not IS64: 1514 pytest.skip("Cannot test IntervalIndex with int64 dtype on 32 bit platform") 1515 index = index.unique().repeat(2) 1516 expected = index[2:] 1517 result = index.drop(index[0]) 1518 tm.assert_index_equal(result, expected) 1519 1520 @pytest.mark.parametrize( 1521 "attr", 1522 [ 1523 "is_monotonic_increasing", 1524 "is_monotonic_decreasing", 1525 "_is_strictly_monotonic_increasing", 1526 "_is_strictly_monotonic_decreasing", 1527 ], 1528 ) 1529 def test_is_monotonic_incomparable(self, attr): 1530 index = Index([5, datetime.now(), 7]) 1531 assert not getattr(index, attr) 1532 1533 def test_set_value_deprecated(self): 1534 # GH 28621 1535 idx = self.create_index() 1536 arr = np.array([1, 2, 3]) 1537 with tm.assert_produces_warning(FutureWarning): 1538 idx.set_value(arr, idx[1], 80) 1539 assert arr[1] == 80 1540 1541 @pytest.mark.parametrize( 1542 "index", ["string", "int", "datetime", "timedelta"], indirect=True 1543 ) 1544 def test_get_value(self, index): 1545 # TODO: Remove function? GH 19728 1546 values = np.random.randn(100) 1547 value = index[67] 1548 1549 with pytest.raises(AttributeError, match="has no attribute '_values'"): 1550 # Index.get_value requires a Series, not an ndarray 1551 with tm.assert_produces_warning(FutureWarning): 1552 index.get_value(values, value) 1553 1554 with tm.assert_produces_warning(FutureWarning): 1555 result = index.get_value(Series(values, index=values), value) 1556 tm.assert_almost_equal(result, values[67]) 1557 1558 @pytest.mark.parametrize("values", [["foo", "bar", "quux"], {"foo", "bar", "quux"}]) 1559 @pytest.mark.parametrize( 1560 "index,expected", 1561 [ 1562 (Index(["qux", "baz", "foo", "bar"]), np.array([False, False, True, True])), 1563 (Index([]), np.array([], dtype=bool)), # empty 1564 ], 1565 ) 1566 def test_isin(self, values, index, expected): 1567 result = index.isin(values) 1568 tm.assert_numpy_array_equal(result, expected) 1569 1570 def test_isin_nan_common_object(self, nulls_fixture, nulls_fixture2): 1571 # Test cartesian product of null fixtures and ensure that we don't 1572 # mangle the various types (save a corner case with PyPy) 1573 1574 # all nans are the same 1575 if ( 1576 isinstance(nulls_fixture, float) 1577 and isinstance(nulls_fixture2, float) 1578 and math.isnan(nulls_fixture) 1579 and math.isnan(nulls_fixture2) 1580 ): 1581 tm.assert_numpy_array_equal( 1582 Index(["a", nulls_fixture]).isin([nulls_fixture2]), 1583 np.array([False, True]), 1584 ) 1585 1586 elif nulls_fixture is nulls_fixture2: # should preserve NA type 1587 tm.assert_numpy_array_equal( 1588 Index(["a", nulls_fixture]).isin([nulls_fixture2]), 1589 np.array([False, True]), 1590 ) 1591 1592 else: 1593 tm.assert_numpy_array_equal( 1594 Index(["a", nulls_fixture]).isin([nulls_fixture2]), 1595 np.array([False, False]), 1596 ) 1597 1598 def test_isin_nan_common_float64(self, nulls_fixture): 1599 if nulls_fixture is pd.NaT: 1600 pytest.skip("pd.NaT not compatible with Float64Index") 1601 1602 # Float64Index overrides isin, so must be checked separately 1603 if nulls_fixture is pd.NA: 1604 pytest.xfail("Float64Index cannot contain pd.NA") 1605 1606 tm.assert_numpy_array_equal( 1607 Float64Index([1.0, nulls_fixture]).isin([np.nan]), np.array([False, True]) 1608 ) 1609 1610 # we cannot compare NaT with NaN 1611 tm.assert_numpy_array_equal( 1612 Float64Index([1.0, nulls_fixture]).isin([pd.NaT]), np.array([False, False]) 1613 ) 1614 1615 @pytest.mark.parametrize("level", [0, -1]) 1616 @pytest.mark.parametrize( 1617 "index", 1618 [ 1619 Index(["qux", "baz", "foo", "bar"]), 1620 # Float64Index overrides isin, so must be checked separately 1621 Float64Index([1.0, 2.0, 3.0, 4.0]), 1622 ], 1623 ) 1624 def test_isin_level_kwarg(self, level, index): 1625 values = index.tolist()[-2:] + ["nonexisting"] 1626 1627 expected = np.array([False, False, True, True]) 1628 tm.assert_numpy_array_equal(expected, index.isin(values, level=level)) 1629 1630 index.name = "foobar" 1631 tm.assert_numpy_array_equal(expected, index.isin(values, level="foobar")) 1632 1633 def test_isin_level_kwarg_bad_level_raises(self, index): 1634 for level in [10, index.nlevels, -(index.nlevels + 1)]: 1635 with pytest.raises(IndexError, match="Too many levels"): 1636 index.isin([], level=level) 1637 1638 @pytest.mark.parametrize("label", [1.0, "foobar", "xyzzy", np.nan]) 1639 def test_isin_level_kwarg_bad_label_raises(self, label, index): 1640 if isinstance(index, MultiIndex): 1641 index = index.rename(["foo", "bar"] + index.names[2:]) 1642 msg = f"'Level {label} not found'" 1643 else: 1644 index = index.rename("foo") 1645 msg = fr"Requested level \({label}\) does not match index name \(foo\)" 1646 with pytest.raises(KeyError, match=msg): 1647 index.isin([], level=label) 1648 1649 @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) 1650 def test_isin_empty(self, empty): 1651 # see gh-16991 1652 index = Index(["a", "b"]) 1653 expected = np.array([False, False]) 1654 1655 result = index.isin(empty) 1656 tm.assert_numpy_array_equal(expected, result) 1657 1658 @pytest.mark.parametrize( 1659 "values", 1660 [ 1661 [1, 2, 3, 4], 1662 [1.0, 2.0, 3.0, 4.0], 1663 [True, True, True, True], 1664 ["foo", "bar", "baz", "qux"], 1665 date_range("2018-01-01", freq="D", periods=4), 1666 ], 1667 ) 1668 def test_boolean_cmp(self, values): 1669 index = Index(values) 1670 result = index == values 1671 expected = np.array([True, True, True, True], dtype=bool) 1672 1673 tm.assert_numpy_array_equal(result, expected) 1674 1675 @pytest.mark.parametrize("index", ["string"], indirect=True) 1676 @pytest.mark.parametrize("name,level", [(None, 0), ("a", "a")]) 1677 def test_get_level_values(self, index, name, level): 1678 expected = index.copy() 1679 if name: 1680 expected.name = name 1681 1682 result = expected.get_level_values(level) 1683 tm.assert_index_equal(result, expected) 1684 1685 def test_slice_keep_name(self): 1686 index = Index(["a", "b"], name="asdf") 1687 assert index.name == index[1:].name 1688 1689 @pytest.mark.parametrize( 1690 "index", 1691 ["unicode", "string", "datetime", "int", "uint", "float"], 1692 indirect=True, 1693 ) 1694 def test_join_self(self, index, join_type): 1695 joined = index.join(index, how=join_type) 1696 assert index is joined 1697 1698 @pytest.mark.parametrize("method", ["strip", "rstrip", "lstrip"]) 1699 def test_str_attribute(self, method): 1700 # GH9068 1701 index = Index([" jack", "jill ", " jesse ", "frank"]) 1702 expected = Index([getattr(str, method)(x) for x in index.values]) 1703 1704 result = getattr(index.str, method)() 1705 tm.assert_index_equal(result, expected) 1706 1707 @pytest.mark.parametrize( 1708 "index", 1709 [ 1710 Index(range(5)), 1711 tm.makeDateIndex(10), 1712 MultiIndex.from_tuples([("foo", "1"), ("bar", "3")]), 1713 period_range(start="2000", end="2010", freq="A"), 1714 ], 1715 ) 1716 def test_str_attribute_raises(self, index): 1717 with pytest.raises(AttributeError, match="only use .str accessor"): 1718 index.str.repeat(2) 1719 1720 @pytest.mark.parametrize( 1721 "expand,expected", 1722 [ 1723 (None, Index([["a", "b", "c"], ["d", "e"], ["f"]])), 1724 (False, Index([["a", "b", "c"], ["d", "e"], ["f"]])), 1725 ( 1726 True, 1727 MultiIndex.from_tuples( 1728 [("a", "b", "c"), ("d", "e", np.nan), ("f", np.nan, np.nan)] 1729 ), 1730 ), 1731 ], 1732 ) 1733 def test_str_split(self, expand, expected): 1734 index = Index(["a b c", "d e", "f"]) 1735 if expand is not None: 1736 result = index.str.split(expand=expand) 1737 else: 1738 result = index.str.split() 1739 1740 tm.assert_index_equal(result, expected) 1741 1742 def test_str_bool_return(self): 1743 # test boolean case, should return np.array instead of boolean Index 1744 index = Index(["a1", "a2", "b1", "b2"]) 1745 result = index.str.startswith("a") 1746 expected = np.array([True, True, False, False]) 1747 1748 tm.assert_numpy_array_equal(result, expected) 1749 assert isinstance(result, np.ndarray) 1750 1751 def test_str_bool_series_indexing(self): 1752 index = Index(["a1", "a2", "b1", "b2"]) 1753 s = Series(range(4), index=index) 1754 1755 result = s[s.index.str.startswith("a")] 1756 expected = Series(range(2), index=["a1", "a2"]) 1757 tm.assert_series_equal(result, expected) 1758 1759 @pytest.mark.parametrize( 1760 "index,expected", [(Index(list("abcd")), True), (Index(range(4)), False)] 1761 ) 1762 def test_tab_completion(self, index, expected): 1763 # GH 9910 1764 result = "str" in dir(index) 1765 assert result == expected 1766 1767 def test_indexing_doesnt_change_class(self): 1768 index = Index([1, 2, 3, "a", "b", "c"]) 1769 1770 assert index[1:3].identical(Index([2, 3], dtype=np.object_)) 1771 assert index[[0, 1]].identical(Index([1, 2], dtype=np.object_)) 1772 1773 def test_outer_join_sort(self): 1774 left_index = Index(np.random.permutation(15)) 1775 right_index = tm.makeDateIndex(10) 1776 1777 with tm.assert_produces_warning(RuntimeWarning): 1778 result = left_index.join(right_index, how="outer") 1779 1780 # right_index in this case because DatetimeIndex has join precedence 1781 # over Int64Index 1782 with tm.assert_produces_warning(RuntimeWarning): 1783 expected = right_index.astype(object).union(left_index.astype(object)) 1784 1785 tm.assert_index_equal(result, expected) 1786 1787 def test_nan_first_take_datetime(self): 1788 index = Index([pd.NaT, Timestamp("20130101"), Timestamp("20130102")]) 1789 result = index.take([-1, 0, 1]) 1790 expected = Index([index[-1], index[0], index[1]]) 1791 tm.assert_index_equal(result, expected) 1792 1793 def test_take_fill_value(self): 1794 # GH 12631 1795 index = Index(list("ABC"), name="xxx") 1796 result = index.take(np.array([1, 0, -1])) 1797 expected = Index(list("BAC"), name="xxx") 1798 tm.assert_index_equal(result, expected) 1799 1800 # fill_value 1801 result = index.take(np.array([1, 0, -1]), fill_value=True) 1802 expected = Index(["B", "A", np.nan], name="xxx") 1803 tm.assert_index_equal(result, expected) 1804 1805 # allow_fill=False 1806 result = index.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) 1807 expected = Index(["B", "A", "C"], name="xxx") 1808 tm.assert_index_equal(result, expected) 1809 1810 def test_take_fill_value_none_raises(self): 1811 index = Index(list("ABC"), name="xxx") 1812 msg = ( 1813 "When allow_fill=True and fill_value is not None, " 1814 "all indices must be >= -1" 1815 ) 1816 1817 with pytest.raises(ValueError, match=msg): 1818 index.take(np.array([1, 0, -2]), fill_value=True) 1819 with pytest.raises(ValueError, match=msg): 1820 index.take(np.array([1, 0, -5]), fill_value=True) 1821 1822 def test_take_bad_bounds_raises(self): 1823 index = Index(list("ABC"), name="xxx") 1824 with pytest.raises(IndexError, match="out of bounds"): 1825 index.take(np.array([1, -5])) 1826 1827 @pytest.mark.parametrize("name", [None, "foobar"]) 1828 @pytest.mark.parametrize( 1829 "labels", 1830 [ 1831 [], 1832 np.array([]), 1833 ["A", "B", "C"], 1834 ["C", "B", "A"], 1835 np.array(["A", "B", "C"]), 1836 np.array(["C", "B", "A"]), 1837 # Must preserve name even if dtype changes 1838 date_range("20130101", periods=3).values, 1839 date_range("20130101", periods=3).tolist(), 1840 ], 1841 ) 1842 def test_reindex_preserves_name_if_target_is_list_or_ndarray(self, name, labels): 1843 # GH6552 1844 index = Index([0, 1, 2]) 1845 index.name = name 1846 assert index.reindex(labels)[0].name == name 1847 1848 @pytest.mark.parametrize("labels", [[], np.array([]), np.array([], dtype=np.int64)]) 1849 def test_reindex_preserves_type_if_target_is_empty_list_or_array(self, labels): 1850 # GH7774 1851 index = Index(list("abc")) 1852 assert index.reindex(labels)[0].dtype.type == np.object_ 1853 1854 @pytest.mark.parametrize( 1855 "labels,dtype", 1856 [ 1857 (pd.Int64Index([]), np.int64), 1858 (Float64Index([]), np.float64), 1859 (DatetimeIndex([]), np.datetime64), 1860 ], 1861 ) 1862 def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self, labels, dtype): 1863 # GH7774 1864 index = Index(list("abc")) 1865 assert index.reindex(labels)[0].dtype.type == dtype 1866 1867 def test_reindex_no_type_preserve_target_empty_mi(self): 1868 index = Index(list("abc")) 1869 result = index.reindex( 1870 MultiIndex([pd.Int64Index([]), Float64Index([])], [[], []]) 1871 )[0] 1872 assert result.levels[0].dtype.type == np.int64 1873 assert result.levels[1].dtype.type == np.float64 1874 1875 def test_groupby(self): 1876 index = Index(range(5)) 1877 result = index.groupby(np.array([1, 1, 2, 2, 2])) 1878 expected = {1: Index([0, 1]), 2: Index([2, 3, 4])} 1879 1880 tm.assert_dict_equal(result, expected) 1881 1882 @pytest.mark.parametrize( 1883 "mi,expected", 1884 [ 1885 (MultiIndex.from_tuples([(1, 2), (4, 5)]), np.array([True, True])), 1886 (MultiIndex.from_tuples([(1, 2), (4, 6)]), np.array([True, False])), 1887 ], 1888 ) 1889 def test_equals_op_multiindex(self, mi, expected): 1890 # GH9785 1891 # test comparisons of multiindex 1892 df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) 1893 1894 result = df.index == mi 1895 tm.assert_numpy_array_equal(result, expected) 1896 1897 def test_equals_op_multiindex_identify(self): 1898 df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) 1899 1900 result = df.index == df.index 1901 expected = np.array([True, True]) 1902 tm.assert_numpy_array_equal(result, expected) 1903 1904 @pytest.mark.parametrize( 1905 "index", 1906 [ 1907 MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]), 1908 Index(["foo", "bar", "baz"]), 1909 ], 1910 ) 1911 def test_equals_op_mismatched_multiindex_raises(self, index): 1912 df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) 1913 1914 with pytest.raises(ValueError, match="Lengths must match"): 1915 df.index == index 1916 1917 def test_equals_op_index_vs_mi_same_length(self): 1918 mi = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]) 1919 index = Index(["foo", "bar", "baz"]) 1920 1921 result = mi == index 1922 expected = np.array([False, False, False]) 1923 tm.assert_numpy_array_equal(result, expected) 1924 1925 @pytest.mark.parametrize("dt_conv", [pd.to_datetime, pd.to_timedelta]) 1926 def test_dt_conversion_preserves_name(self, dt_conv): 1927 # GH 10875 1928 index = Index(["01:02:03", "01:02:04"], name="label") 1929 assert index.name == dt_conv(index).name 1930 1931 def test_cached_properties_not_settable(self): 1932 index = Index([1, 2, 3]) 1933 with pytest.raises(AttributeError, match="Can't set attribute"): 1934 index.is_unique = False 1935 1936 @async_mark() 1937 async def test_tab_complete_warning(self, ip): 1938 # https://github.com/pandas-dev/pandas/issues/16409 1939 pytest.importorskip("IPython", minversion="6.0.0") 1940 from IPython.core.completer import provisionalcompleter 1941 1942 code = "import pandas as pd; idx = Index([1, 2])" 1943 await ip.run_code(code) 1944 1945 # GH 31324 newer jedi version raises Deprecation warning; 1946 # appears resolved 2021-02-02 1947 with tm.assert_produces_warning(None): 1948 with provisionalcompleter("ignore"): 1949 list(ip.Completer.completions("idx.", 4)) 1950 1951 def test_contains_method_removed(self, index): 1952 # GH#30103 method removed for all types except IntervalIndex 1953 if isinstance(index, pd.IntervalIndex): 1954 index.contains(1) 1955 else: 1956 msg = f"'{type(index).__name__}' object has no attribute 'contains'" 1957 with pytest.raises(AttributeError, match=msg): 1958 index.contains(1) 1959 1960 def test_sortlevel(self): 1961 index = Index([5, 4, 3, 2, 1]) 1962 with pytest.raises(Exception, match="ascending must be a single bool value or"): 1963 index.sortlevel(ascending="True") 1964 1965 with pytest.raises( 1966 Exception, match="ascending must be a list of bool values of length 1" 1967 ): 1968 index.sortlevel(ascending=[True, True]) 1969 1970 with pytest.raises(Exception, match="ascending must be a bool value"): 1971 index.sortlevel(ascending=["True"]) 1972 1973 expected = Index([1, 2, 3, 4, 5]) 1974 result = index.sortlevel(ascending=[True]) 1975 tm.assert_index_equal(result[0], expected) 1976 1977 expected = Index([1, 2, 3, 4, 5]) 1978 result = index.sortlevel(ascending=True) 1979 tm.assert_index_equal(result[0], expected) 1980 1981 expected = Index([5, 4, 3, 2, 1]) 1982 result = index.sortlevel(ascending=False) 1983 tm.assert_index_equal(result[0], expected) 1984 1985 1986class TestMixedIntIndex(Base): 1987 # Mostly the tests from common.py for which the results differ 1988 # in py2 and py3 because ints and strings are uncomparable in py3 1989 # (GH 13514) 1990 _holder = Index 1991 1992 @pytest.fixture(params=[[0, "a", 1, "b", 2, "c"]], ids=["mixedIndex"]) 1993 def index(self, request): 1994 return Index(request.param) 1995 1996 def create_index(self) -> Index: 1997 return Index([0, "a", 1, "b", 2, "c"]) 1998 1999 def test_argsort(self): 2000 index = self.create_index() 2001 with pytest.raises(TypeError, match="'>|<' not supported"): 2002 index.argsort() 2003 2004 def test_numpy_argsort(self): 2005 index = self.create_index() 2006 with pytest.raises(TypeError, match="'>|<' not supported"): 2007 np.argsort(index) 2008 2009 def test_copy_name(self): 2010 # Check that "name" argument passed at initialization is honoured 2011 # GH12309 2012 index = self.create_index() 2013 2014 first = type(index)(index, copy=True, name="mario") 2015 second = type(first)(first, copy=False) 2016 2017 # Even though "copy=False", we want a new object. 2018 assert first is not second 2019 tm.assert_index_equal(first, second) 2020 2021 assert first.name == "mario" 2022 assert second.name == "mario" 2023 2024 s1 = Series(2, index=first) 2025 s2 = Series(3, index=second[:-1]) 2026 2027 s3 = s1 * s2 2028 2029 assert s3.index.name == "mario" 2030 2031 def test_copy_name2(self): 2032 # Check that adding a "name" parameter to the copy is honored 2033 # GH14302 2034 index = Index([1, 2], name="MyName") 2035 index1 = index.copy() 2036 2037 tm.assert_index_equal(index, index1) 2038 2039 index2 = index.copy(name="NewName") 2040 tm.assert_index_equal(index, index2, check_names=False) 2041 assert index.name == "MyName" 2042 assert index2.name == "NewName" 2043 2044 index3 = index.copy(names=["NewName"]) 2045 tm.assert_index_equal(index, index3, check_names=False) 2046 assert index.name == "MyName" 2047 assert index.names == ["MyName"] 2048 assert index3.name == "NewName" 2049 assert index3.names == ["NewName"] 2050 2051 def test_unique_na(self): 2052 idx = Index([2, np.nan, 2, 1], name="my_index") 2053 expected = Index([2, np.nan, 1], name="my_index") 2054 result = idx.unique() 2055 tm.assert_index_equal(result, expected) 2056 2057 def test_logical_compat(self): 2058 index = self.create_index() 2059 assert index.all() == index.values.all() 2060 assert index.any() == index.values.any() 2061 2062 @pytest.mark.parametrize("how", ["any", "all"]) 2063 @pytest.mark.parametrize("dtype", [None, object, "category"]) 2064 @pytest.mark.parametrize( 2065 "vals,expected", 2066 [ 2067 ([1, 2, 3], [1, 2, 3]), 2068 ([1.0, 2.0, 3.0], [1.0, 2.0, 3.0]), 2069 ([1.0, 2.0, np.nan, 3.0], [1.0, 2.0, 3.0]), 2070 (["A", "B", "C"], ["A", "B", "C"]), 2071 (["A", np.nan, "B", "C"], ["A", "B", "C"]), 2072 ], 2073 ) 2074 def test_dropna(self, how, dtype, vals, expected): 2075 # GH 6194 2076 index = Index(vals, dtype=dtype) 2077 result = index.dropna(how=how) 2078 expected = Index(expected, dtype=dtype) 2079 tm.assert_index_equal(result, expected) 2080 2081 @pytest.mark.parametrize("how", ["any", "all"]) 2082 @pytest.mark.parametrize( 2083 "index,expected", 2084 [ 2085 ( 2086 DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), 2087 DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), 2088 ), 2089 ( 2090 DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", pd.NaT]), 2091 DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), 2092 ), 2093 ( 2094 pd.TimedeltaIndex(["1 days", "2 days", "3 days"]), 2095 pd.TimedeltaIndex(["1 days", "2 days", "3 days"]), 2096 ), 2097 ( 2098 pd.TimedeltaIndex([pd.NaT, "1 days", "2 days", "3 days", pd.NaT]), 2099 pd.TimedeltaIndex(["1 days", "2 days", "3 days"]), 2100 ), 2101 ( 2102 PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), 2103 PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), 2104 ), 2105 ( 2106 PeriodIndex(["2012-02", "2012-04", "NaT", "2012-05"], freq="M"), 2107 PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), 2108 ), 2109 ], 2110 ) 2111 def test_dropna_dt_like(self, how, index, expected): 2112 result = index.dropna(how=how) 2113 tm.assert_index_equal(result, expected) 2114 2115 def test_dropna_invalid_how_raises(self): 2116 msg = "invalid how option: xxx" 2117 with pytest.raises(ValueError, match=msg): 2118 Index([1, 2, 3]).dropna(how="xxx") 2119 2120 def test_get_combined_index(self): 2121 result = _get_combined_index([]) 2122 expected = Index([]) 2123 tm.assert_index_equal(result, expected) 2124 2125 @pytest.mark.parametrize( 2126 "index", 2127 [ 2128 Index([np.nan]), 2129 Index([np.nan, 1]), 2130 Index([1, 2, np.nan]), 2131 Index(["a", "b", np.nan]), 2132 pd.to_datetime(["NaT"]), 2133 pd.to_datetime(["NaT", "2000-01-01"]), 2134 pd.to_datetime(["2000-01-01", "NaT", "2000-01-02"]), 2135 pd.to_timedelta(["1 day", "NaT"]), 2136 ], 2137 ) 2138 def test_is_monotonic_na(self, index): 2139 assert index.is_monotonic_increasing is False 2140 assert index.is_monotonic_decreasing is False 2141 assert index._is_strictly_monotonic_increasing is False 2142 assert index._is_strictly_monotonic_decreasing is False 2143 2144 @pytest.mark.parametrize("klass", [Series, DataFrame]) 2145 def test_int_name_format(self, klass): 2146 index = Index(["a", "b", "c"], name=0) 2147 result = klass(list(range(3)), index=index) 2148 assert "0" in repr(result) 2149 2150 def test_str_to_bytes_raises(self): 2151 # GH 26447 2152 index = Index([str(x) for x in range(10)]) 2153 msg = "^'str' object cannot be interpreted as an integer$" 2154 with pytest.raises(TypeError, match=msg): 2155 bytes(index) 2156 2157 def test_intersect_str_dates(self): 2158 dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] 2159 2160 index1 = Index(dt_dates, dtype=object) 2161 index2 = Index(["aa"], dtype=object) 2162 result = index2.intersection(index1) 2163 2164 expected = Index([], dtype=object) 2165 tm.assert_index_equal(result, expected) 2166 2167 @pytest.mark.filterwarnings("ignore:elementwise comparison failed:FutureWarning") 2168 def test_index_with_tuple_bool(self): 2169 # GH34123 2170 # TODO: remove tupleize_cols=False once correct behaviour is restored 2171 # TODO: also this op right now produces FutureWarning from numpy 2172 idx = Index([("a", "b"), ("b", "c"), ("c", "a")], tupleize_cols=False) 2173 result = idx == ("c", "a") 2174 expected = np.array([False, False, True]) 2175 tm.assert_numpy_array_equal(result, expected) 2176 2177 2178class TestIndexUtils: 2179 @pytest.mark.parametrize( 2180 "data, names, expected", 2181 [ 2182 ([[1, 2, 3]], None, Index([1, 2, 3])), 2183 ([[1, 2, 3]], ["name"], Index([1, 2, 3], name="name")), 2184 ( 2185 [["a", "a"], ["c", "d"]], 2186 None, 2187 MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]]), 2188 ), 2189 ( 2190 [["a", "a"], ["c", "d"]], 2191 ["L1", "L2"], 2192 MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]], names=["L1", "L2"]), 2193 ), 2194 ], 2195 ) 2196 def test_ensure_index_from_sequences(self, data, names, expected): 2197 result = ensure_index_from_sequences(data, names) 2198 tm.assert_index_equal(result, expected) 2199 2200 def test_ensure_index_mixed_closed_intervals(self): 2201 # GH27172 2202 intervals = [ 2203 pd.Interval(0, 1, closed="left"), 2204 pd.Interval(1, 2, closed="right"), 2205 pd.Interval(2, 3, closed="neither"), 2206 pd.Interval(3, 4, closed="both"), 2207 ] 2208 result = ensure_index(intervals) 2209 expected = Index(intervals, dtype=object) 2210 tm.assert_index_equal(result, expected) 2211 2212 2213@pytest.mark.parametrize( 2214 "opname", 2215 [ 2216 "eq", 2217 "ne", 2218 "le", 2219 "lt", 2220 "ge", 2221 "gt", 2222 "add", 2223 "radd", 2224 "sub", 2225 "rsub", 2226 "mul", 2227 "rmul", 2228 "truediv", 2229 "rtruediv", 2230 "floordiv", 2231 "rfloordiv", 2232 "pow", 2233 "rpow", 2234 "mod", 2235 "divmod", 2236 ], 2237) 2238def test_generated_op_names(opname, index): 2239 opname = f"__{opname}__" 2240 method = getattr(index, opname) 2241 assert method.__name__ == opname 2242 2243 2244@pytest.mark.parametrize("index_maker", tm.index_subclass_makers_generator()) 2245def test_index_subclass_constructor_wrong_kwargs(index_maker): 2246 # GH #19348 2247 with pytest.raises(TypeError, match="unexpected keyword argument"): 2248 index_maker(foo="bar") 2249 2250 2251def test_deprecated_fastpath(): 2252 msg = "[Uu]nexpected keyword argument" 2253 with pytest.raises(TypeError, match=msg): 2254 Index(np.array(["a", "b"], dtype=object), name="test", fastpath=True) 2255 2256 with pytest.raises(TypeError, match=msg): 2257 pd.Int64Index(np.array([1, 2, 3], dtype="int64"), name="test", fastpath=True) 2258 2259 with pytest.raises(TypeError, match=msg): 2260 RangeIndex(0, 5, 2, name="test", fastpath=True) 2261 2262 with pytest.raises(TypeError, match=msg): 2263 pd.CategoricalIndex(["a", "b", "c"], name="test", fastpath=True) 2264 2265 2266def test_shape_of_invalid_index(): 2267 # Currently, it is possible to create "invalid" index objects backed by 2268 # a multi-dimensional array (see https://github.com/pandas-dev/pandas/issues/27125 2269 # about this). However, as long as this is not solved in general,this test ensures 2270 # that the returned shape is consistent with this underlying array for 2271 # compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775) 2272 idx = Index([0, 1, 2, 3]) 2273 with tm.assert_produces_warning(FutureWarning): 2274 # GH#30588 multi-dimensional indexing deprecated 2275 assert idx[:, None].shape == (4, 1) 2276 2277 2278def test_validate_1d_input(): 2279 # GH#27125 check that we do not have >1-dimensional input 2280 msg = "Index data must be 1-dimensional" 2281 2282 arr = np.arange(8).reshape(2, 2, 2) 2283 with pytest.raises(ValueError, match=msg): 2284 Index(arr) 2285 2286 with pytest.raises(ValueError, match=msg): 2287 Float64Index(arr.astype(np.float64)) 2288 2289 with pytest.raises(ValueError, match=msg): 2290 pd.Int64Index(arr.astype(np.int64)) 2291 2292 with pytest.raises(ValueError, match=msg): 2293 pd.UInt64Index(arr.astype(np.uint64)) 2294 2295 df = pd.DataFrame(arr.reshape(4, 2)) 2296 with pytest.raises(ValueError, match=msg): 2297 Index(df) 2298 2299 # GH#13601 trying to assign a multi-dimensional array to an index is not 2300 # allowed 2301 ser = Series(0, range(4)) 2302 with pytest.raises(ValueError, match=msg): 2303 ser.index = np.array([[2, 3]] * 4) 2304 2305 2306def test_convert_almost_null_slice(index): 2307 # slice with None at both ends, but not step 2308 2309 key = slice(None, None, "foo") 2310 2311 if isinstance(index, pd.IntervalIndex): 2312 msg = "label-based slicing with step!=1 is not supported for IntervalIndex" 2313 with pytest.raises(ValueError, match=msg): 2314 index._convert_slice_indexer(key, "loc") 2315 else: 2316 msg = "'>=' not supported between instances of 'str' and 'int'" 2317 with pytest.raises(TypeError, match=msg): 2318 index._convert_slice_indexer(key, "loc") 2319 2320 2321dtlike_dtypes = [ 2322 np.dtype("timedelta64[ns]"), 2323 np.dtype("datetime64[ns]"), 2324 pd.DatetimeTZDtype("ns", "Asia/Tokyo"), 2325 pd.PeriodDtype("ns"), 2326] 2327 2328 2329@pytest.mark.parametrize("ldtype", dtlike_dtypes) 2330@pytest.mark.parametrize("rdtype", dtlike_dtypes) 2331def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype): 2332 2333 vals = np.tile(3600 * 10 ** 9 * np.arange(3), 2) 2334 2335 def construct(dtype): 2336 if dtype is dtlike_dtypes[-1]: 2337 # PeriodArray will try to cast ints to strings 2338 return DatetimeIndex(vals).astype(dtype) 2339 return Index(vals, dtype=dtype) 2340 2341 left = construct(ldtype) 2342 right = construct(rdtype) 2343 2344 result = left.get_indexer_non_unique(right) 2345 2346 if ldtype is rdtype: 2347 ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp) 2348 ex2 = np.array([], dtype=np.intp) 2349 tm.assert_numpy_array_equal(result[0], ex1) 2350 tm.assert_numpy_array_equal(result[1], ex2) 2351 2352 else: 2353 no_matches = np.array([-1] * 6, dtype=np.intp) 2354 missing = np.arange(6, dtype=np.intp) 2355 tm.assert_numpy_array_equal(result[0], no_matches) 2356 tm.assert_numpy_array_equal(result[1], missing) 2357