1""" 2Series.__getitem__ test classes are organized by the type of key passed. 3""" 4from datetime import date, datetime, time 5 6import numpy as np 7import pytest 8 9from pandas._libs.tslibs import conversion, timezones 10 11import pandas as pd 12from pandas import ( 13 Categorical, 14 DataFrame, 15 DatetimeIndex, 16 Index, 17 Series, 18 Timestamp, 19 date_range, 20 period_range, 21) 22import pandas._testing as tm 23from pandas.core.indexing import IndexingError 24 25from pandas.tseries.offsets import BDay 26 27 28class TestSeriesGetitemScalars: 29 def test_getitem_out_of_bounds_indexerror(self, datetime_series): 30 # don't segfault, GH#495 31 msg = r"index \d+ is out of bounds for axis 0 with size \d+" 32 with pytest.raises(IndexError, match=msg): 33 datetime_series[len(datetime_series)] 34 35 def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self): 36 # GH#917 37 # With a RangeIndex, an int key gives a KeyError 38 ser = Series([], dtype=object) 39 with pytest.raises(KeyError, match="-1"): 40 ser[-1] 41 42 def test_getitem_keyerror_with_int64index(self): 43 ser = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2]) 44 45 with pytest.raises(KeyError, match=r"^5$"): 46 ser[5] 47 48 with pytest.raises(KeyError, match=r"^'c'$"): 49 ser["c"] 50 51 # not monotonic 52 ser = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1]) 53 54 with pytest.raises(KeyError, match=r"^5$"): 55 ser[5] 56 57 with pytest.raises(KeyError, match=r"^'c'$"): 58 ser["c"] 59 60 def test_getitem_int64(self, datetime_series): 61 idx = np.int64(5) 62 assert datetime_series[idx] == datetime_series[5] 63 64 # TODO: better name/GH ref? 65 def test_getitem_regression(self): 66 ser = Series(range(5), index=list(range(5))) 67 result = ser[list(range(5))] 68 tm.assert_series_equal(result, ser) 69 70 # ------------------------------------------------------------------ 71 # Series with DatetimeIndex 72 73 @pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"]) 74 def test_getitem_pydatetime_tz(self, tzstr): 75 tz = timezones.maybe_get_tz(tzstr) 76 77 index = date_range( 78 start="2012-12-24 16:00", end="2012-12-24 18:00", freq="H", tz=tzstr 79 ) 80 ts = Series(index=index, data=index.hour) 81 time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr) 82 83 dt = datetime(2012, 12, 24, 17, 0) 84 time_datetime = conversion.localize_pydatetime(dt, tz) 85 assert ts[time_pandas] == ts[time_datetime] 86 87 @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) 88 def test_string_index_alias_tz_aware(self, tz): 89 rng = date_range("1/1/2000", periods=10, tz=tz) 90 ser = Series(np.random.randn(len(rng)), index=rng) 91 92 result = ser["1/3/2000"] 93 tm.assert_almost_equal(result, ser[2]) 94 95 def test_getitem_time_object(self): 96 rng = date_range("1/1/2000", "1/5/2000", freq="5min") 97 ts = Series(np.random.randn(len(rng)), index=rng) 98 99 mask = (rng.hour == 9) & (rng.minute == 30) 100 result = ts[time(9, 30)] 101 expected = ts[mask] 102 result.index = result.index._with_freq(None) 103 tm.assert_series_equal(result, expected) 104 105 # ------------------------------------------------------------------ 106 # Series with CategoricalIndex 107 108 def test_getitem_scalar_categorical_index(self): 109 cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) 110 111 ser = Series([1, 2], index=cats) 112 113 expected = ser.iloc[0] 114 result = ser[cats[0]] 115 assert result == expected 116 117 118class TestSeriesGetitemSlices: 119 def test_getitem_partial_str_slice_with_datetimeindex(self): 120 # GH#34860 121 arr = date_range("1/1/2008", "1/1/2009") 122 ser = arr.to_series() 123 result = ser["2008"] 124 125 rng = date_range(start="2008-01-01", end="2008-12-31") 126 expected = Series(rng, index=rng) 127 128 tm.assert_series_equal(result, expected) 129 130 def test_getitem_slice_strings_with_datetimeindex(self): 131 idx = DatetimeIndex( 132 ["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"] 133 ) 134 135 ts = Series(np.random.randn(len(idx)), index=idx) 136 137 result = ts["1/2/2000":] 138 expected = ts[1:] 139 tm.assert_series_equal(result, expected) 140 141 result = ts["1/2/2000":"1/3/2000"] 142 expected = ts[1:4] 143 tm.assert_series_equal(result, expected) 144 145 def test_getitem_slice_2d(self, datetime_series): 146 # GH#30588 multi-dimensional indexing deprecated 147 148 with tm.assert_produces_warning(FutureWarning): 149 # GH#30867 Don't want to support this long-term, but 150 # for now ensure that the warning from Index 151 # doesn't comes through via Series.__getitem__. 152 result = datetime_series[:, np.newaxis] 153 expected = datetime_series.values[:, np.newaxis] 154 tm.assert_almost_equal(result, expected) 155 156 # FutureWarning from NumPy. 157 @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") 158 def test_getitem_median_slice_bug(self): 159 index = date_range("20090415", "20090519", freq="2B") 160 s = Series(np.random.randn(13), index=index) 161 162 indexer = [slice(6, 7, None)] 163 with tm.assert_produces_warning(FutureWarning): 164 # GH#31299 165 result = s[indexer] 166 expected = s[indexer[0]] 167 tm.assert_series_equal(result, expected) 168 169 @pytest.mark.parametrize( 170 "slc, positions", 171 [ 172 [slice(date(2018, 1, 1), None), [0, 1, 2]], 173 [slice(date(2019, 1, 2), None), [2]], 174 [slice(date(2020, 1, 1), None), []], 175 [slice(None, date(2020, 1, 1)), [0, 1, 2]], 176 [slice(None, date(2019, 1, 1)), [0]], 177 ], 178 ) 179 def test_getitem_slice_date(self, slc, positions): 180 # https://github.com/pandas-dev/pandas/issues/31501 181 ser = Series( 182 [0, 1, 2], 183 DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]), 184 ) 185 result = ser[slc] 186 expected = ser.take(positions) 187 tm.assert_series_equal(result, expected) 188 189 190class TestSeriesGetitemListLike: 191 @pytest.mark.parametrize("box", [list, np.array, Index, pd.Series]) 192 def test_getitem_no_matches(self, box): 193 # GH#33462 we expect the same behavior for list/ndarray/Index/Series 194 ser = Series(["A", "B"]) 195 196 key = Series(["C"], dtype=object) 197 key = box(key) 198 199 msg = r"None of \[Index\(\['C'\], dtype='object'\)\] are in the \[index\]" 200 with pytest.raises(KeyError, match=msg): 201 ser[key] 202 203 def test_getitem_intlist_intindex_periodvalues(self): 204 ser = Series(period_range("2000-01-01", periods=10, freq="D")) 205 206 result = ser[[2, 4]] 207 exp = Series( 208 [pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")], 209 index=[2, 4], 210 dtype="Period[D]", 211 ) 212 tm.assert_series_equal(result, exp) 213 assert result.dtype == "Period[D]" 214 215 @pytest.mark.parametrize("box", [list, np.array, Index]) 216 def test_getitem_intlist_intervalindex_non_int(self, box): 217 # GH#33404 fall back to positional since ints are unambiguous 218 dti = date_range("2000-01-03", periods=3)._with_freq(None) 219 ii = pd.IntervalIndex.from_breaks(dti) 220 ser = Series(range(len(ii)), index=ii) 221 222 expected = ser.iloc[:1] 223 key = box([0]) 224 result = ser[key] 225 tm.assert_series_equal(result, expected) 226 227 @pytest.mark.parametrize("box", [list, np.array, Index]) 228 @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64]) 229 def test_getitem_intlist_multiindex_numeric_level(self, dtype, box): 230 # GH#33404 do _not_ fall back to positional since ints are ambiguous 231 idx = Index(range(4)).astype(dtype) 232 dti = date_range("2000-01-03", periods=3) 233 mi = pd.MultiIndex.from_product([idx, dti]) 234 ser = Series(range(len(mi))[::-1], index=mi) 235 236 key = box([5]) 237 with pytest.raises(KeyError, match="5"): 238 ser[key] 239 240 def test_getitem_uint_array_key(self, uint_dtype): 241 # GH #37218 242 ser = Series([1, 2, 3]) 243 key = np.array([4], dtype=uint_dtype) 244 245 with pytest.raises(KeyError, match="4"): 246 ser[key] 247 with pytest.raises(KeyError, match="4"): 248 ser.loc[key] 249 250 251class TestGetitemBooleanMask: 252 def test_getitem_boolean(self, string_series): 253 ser = string_series 254 mask = ser > ser.median() 255 256 # passing list is OK 257 result = ser[list(mask)] 258 expected = ser[mask] 259 tm.assert_series_equal(result, expected) 260 tm.assert_index_equal(result.index, ser.index[mask]) 261 262 def test_getitem_boolean_empty(self): 263 ser = Series([], dtype=np.int64) 264 ser.index.name = "index_name" 265 ser = ser[ser.isna()] 266 assert ser.index.name == "index_name" 267 assert ser.dtype == np.int64 268 269 # GH#5877 270 # indexing with empty series 271 ser = Series(["A", "B"]) 272 expected = Series(dtype=object, index=Index([], dtype="int64")) 273 result = ser[Series([], dtype=object)] 274 tm.assert_series_equal(result, expected) 275 276 # invalid because of the boolean indexer 277 # that's empty or not-aligned 278 msg = ( 279 r"Unalignable boolean Series provided as indexer \(index of " 280 r"the boolean Series and of the indexed object do not match" 281 ) 282 with pytest.raises(IndexingError, match=msg): 283 ser[Series([], dtype=bool)] 284 285 with pytest.raises(IndexingError, match=msg): 286 ser[Series([True], dtype=bool)] 287 288 def test_getitem_boolean_object(self, string_series): 289 # using column from DataFrame 290 291 ser = string_series 292 mask = ser > ser.median() 293 omask = mask.astype(object) 294 295 # getitem 296 result = ser[omask] 297 expected = ser[mask] 298 tm.assert_series_equal(result, expected) 299 300 # setitem 301 s2 = ser.copy() 302 cop = ser.copy() 303 cop[omask] = 5 304 s2[mask] = 5 305 tm.assert_series_equal(cop, s2) 306 307 # nans raise exception 308 omask[5:10] = np.nan 309 msg = "Cannot mask with non-boolean array containing NA / NaN values" 310 with pytest.raises(ValueError, match=msg): 311 ser[omask] 312 with pytest.raises(ValueError, match=msg): 313 ser[omask] = 5 314 315 def test_getitem_boolean_dt64_copies(self): 316 # GH#36210 317 dti = date_range("2016-01-01", periods=4, tz="US/Pacific") 318 key = np.array([True, True, False, False]) 319 320 ser = Series(dti._data) 321 322 res = ser[key] 323 assert res._values._data.base is None 324 325 # compare with numeric case for reference 326 ser2 = Series(range(4)) 327 res2 = ser2[key] 328 assert res2._values.base is None 329 330 def test_getitem_boolean_corner(self, datetime_series): 331 ts = datetime_series 332 mask_shifted = ts.shift(1, freq=BDay()) > ts.median() 333 334 msg = ( 335 r"Unalignable boolean Series provided as indexer \(index of " 336 r"the boolean Series and of the indexed object do not match" 337 ) 338 with pytest.raises(IndexingError, match=msg): 339 ts[mask_shifted] 340 341 with pytest.raises(IndexingError, match=msg): 342 ts.loc[mask_shifted] 343 344 def test_getitem_boolean_different_order(self, string_series): 345 ordered = string_series.sort_values() 346 347 sel = string_series[ordered > 0] 348 exp = string_series[string_series > 0] 349 tm.assert_series_equal(sel, exp) 350 351 def test_getitem_boolean_contiguous_preserve_freq(self): 352 rng = date_range("1/1/2000", "3/1/2000", freq="B") 353 354 mask = np.zeros(len(rng), dtype=bool) 355 mask[10:20] = True 356 357 masked = rng[mask] 358 expected = rng[10:20] 359 assert expected.freq == rng.freq 360 tm.assert_index_equal(masked, expected) 361 362 mask[22] = True 363 masked = rng[mask] 364 assert masked.freq is None 365 366 367class TestGetitemCallable: 368 def test_getitem_callable(self): 369 # GH#12533 370 ser = Series(4, index=list("ABCD")) 371 result = ser[lambda x: "A"] 372 assert result == ser.loc["A"] 373 374 result = ser[lambda x: ["A", "B"]] 375 expected = ser.loc[["A", "B"]] 376 tm.assert_series_equal(result, expected) 377 378 result = ser[lambda x: [True, False, True, True]] 379 expected = ser.iloc[[0, 2, 3]] 380 tm.assert_series_equal(result, expected) 381 382 383def test_getitem_generator(string_series): 384 gen = (x > 0 for x in string_series) 385 result = string_series[gen] 386 result2 = string_series[iter(string_series > 0)] 387 expected = string_series[string_series > 0] 388 tm.assert_series_equal(result, expected) 389 tm.assert_series_equal(result2, expected) 390 391 392@pytest.mark.parametrize( 393 "series", 394 [ 395 Series([0, 1]), 396 Series(date_range("2012-01-01", periods=2)), 397 Series(date_range("2012-01-01", periods=2, tz="CET")), 398 ], 399) 400def test_getitem_ndim_deprecated(series): 401 with tm.assert_produces_warning( 402 FutureWarning, match="Support for multi-dimensional indexing" 403 ): 404 result = series[:, None] 405 406 expected = np.asarray(series)[:, None] 407 tm.assert_numpy_array_equal(result, expected) 408 409 410def test_getitem_multilevel_scalar_slice_not_implemented( 411 multiindex_year_month_day_dataframe_random_data, 412): 413 # not implementing this for now 414 df = multiindex_year_month_day_dataframe_random_data 415 ser = df["A"] 416 417 msg = r"\(2000, slice\(3, 4, None\)\)" 418 with pytest.raises(TypeError, match=msg): 419 ser[2000, 3:4] 420 421 422def test_getitem_dataframe_raises(): 423 rng = list(range(10)) 424 ser = Series(10, index=rng) 425 df = DataFrame(rng, index=rng) 426 msg = ( 427 "Indexing a Series with DataFrame is not supported, " 428 "use the appropriate DataFrame column" 429 ) 430 with pytest.raises(TypeError, match=msg): 431 ser[df > 5] 432 433 434def test_getitem_assignment_series_aligment(): 435 # https://github.com/pandas-dev/pandas/issues/37427 436 # with getitem, when assigning with a Series, it is not first aligned 437 ser = Series(range(10)) 438 idx = np.array([2, 4, 9]) 439 ser[idx] = Series([10, 11, 12]) 440 expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12]) 441 tm.assert_series_equal(ser, expected) 442 443 444def test_getitem_duplicate_index_mistyped_key_raises_keyerror(): 445 # GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError 446 ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0]) 447 with pytest.raises(KeyError, match="None"): 448 ser[None] 449 450 with pytest.raises(KeyError, match="None"): 451 ser.index.get_loc(None) 452 453 with pytest.raises(KeyError, match="None"): 454 ser.index._engine.get_loc(None) 455 456 457def test_getitem_1tuple_slice_without_multiindex(): 458 ser = Series(range(5)) 459 key = (slice(3),) 460 461 result = ser[key] 462 expected = ser[key[0]] 463 tm.assert_series_equal(result, expected) 464