1"""
2Series.__getitem__ test classes are organized by the type of key passed.
3"""
4from datetime import date, datetime, time
5
6import numpy as np
7import pytest
8
9from pandas._libs.tslibs import conversion, timezones
10
11import pandas as pd
12from pandas import (
13    Categorical,
14    DataFrame,
15    DatetimeIndex,
16    Index,
17    Series,
18    Timestamp,
19    date_range,
20    period_range,
21)
22import pandas._testing as tm
23from pandas.core.indexing import IndexingError
24
25from pandas.tseries.offsets import BDay
26
27
28class TestSeriesGetitemScalars:
29    def test_getitem_out_of_bounds_indexerror(self, datetime_series):
30        # don't segfault, GH#495
31        msg = r"index \d+ is out of bounds for axis 0 with size \d+"
32        with pytest.raises(IndexError, match=msg):
33            datetime_series[len(datetime_series)]
34
35    def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self):
36        # GH#917
37        # With a RangeIndex, an int key gives a KeyError
38        ser = Series([], dtype=object)
39        with pytest.raises(KeyError, match="-1"):
40            ser[-1]
41
42    def test_getitem_keyerror_with_int64index(self):
43        ser = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
44
45        with pytest.raises(KeyError, match=r"^5$"):
46            ser[5]
47
48        with pytest.raises(KeyError, match=r"^'c'$"):
49            ser["c"]
50
51        # not monotonic
52        ser = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
53
54        with pytest.raises(KeyError, match=r"^5$"):
55            ser[5]
56
57        with pytest.raises(KeyError, match=r"^'c'$"):
58            ser["c"]
59
60    def test_getitem_int64(self, datetime_series):
61        idx = np.int64(5)
62        assert datetime_series[idx] == datetime_series[5]
63
64    # TODO: better name/GH ref?
65    def test_getitem_regression(self):
66        ser = Series(range(5), index=list(range(5)))
67        result = ser[list(range(5))]
68        tm.assert_series_equal(result, ser)
69
70    # ------------------------------------------------------------------
71    # Series with DatetimeIndex
72
73    @pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
74    def test_getitem_pydatetime_tz(self, tzstr):
75        tz = timezones.maybe_get_tz(tzstr)
76
77        index = date_range(
78            start="2012-12-24 16:00", end="2012-12-24 18:00", freq="H", tz=tzstr
79        )
80        ts = Series(index=index, data=index.hour)
81        time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr)
82
83        dt = datetime(2012, 12, 24, 17, 0)
84        time_datetime = conversion.localize_pydatetime(dt, tz)
85        assert ts[time_pandas] == ts[time_datetime]
86
87    @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
88    def test_string_index_alias_tz_aware(self, tz):
89        rng = date_range("1/1/2000", periods=10, tz=tz)
90        ser = Series(np.random.randn(len(rng)), index=rng)
91
92        result = ser["1/3/2000"]
93        tm.assert_almost_equal(result, ser[2])
94
95    def test_getitem_time_object(self):
96        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
97        ts = Series(np.random.randn(len(rng)), index=rng)
98
99        mask = (rng.hour == 9) & (rng.minute == 30)
100        result = ts[time(9, 30)]
101        expected = ts[mask]
102        result.index = result.index._with_freq(None)
103        tm.assert_series_equal(result, expected)
104
105    # ------------------------------------------------------------------
106    # Series with CategoricalIndex
107
108    def test_getitem_scalar_categorical_index(self):
109        cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
110
111        ser = Series([1, 2], index=cats)
112
113        expected = ser.iloc[0]
114        result = ser[cats[0]]
115        assert result == expected
116
117
118class TestSeriesGetitemSlices:
119    def test_getitem_partial_str_slice_with_datetimeindex(self):
120        # GH#34860
121        arr = date_range("1/1/2008", "1/1/2009")
122        ser = arr.to_series()
123        result = ser["2008"]
124
125        rng = date_range(start="2008-01-01", end="2008-12-31")
126        expected = Series(rng, index=rng)
127
128        tm.assert_series_equal(result, expected)
129
130    def test_getitem_slice_strings_with_datetimeindex(self):
131        idx = DatetimeIndex(
132            ["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]
133        )
134
135        ts = Series(np.random.randn(len(idx)), index=idx)
136
137        result = ts["1/2/2000":]
138        expected = ts[1:]
139        tm.assert_series_equal(result, expected)
140
141        result = ts["1/2/2000":"1/3/2000"]
142        expected = ts[1:4]
143        tm.assert_series_equal(result, expected)
144
145    def test_getitem_slice_2d(self, datetime_series):
146        # GH#30588 multi-dimensional indexing deprecated
147
148        with tm.assert_produces_warning(FutureWarning):
149            # GH#30867 Don't want to support this long-term, but
150            # for now ensure that the warning from Index
151            # doesn't comes through via Series.__getitem__.
152            result = datetime_series[:, np.newaxis]
153        expected = datetime_series.values[:, np.newaxis]
154        tm.assert_almost_equal(result, expected)
155
156    # FutureWarning from NumPy.
157    @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
158    def test_getitem_median_slice_bug(self):
159        index = date_range("20090415", "20090519", freq="2B")
160        s = Series(np.random.randn(13), index=index)
161
162        indexer = [slice(6, 7, None)]
163        with tm.assert_produces_warning(FutureWarning):
164            # GH#31299
165            result = s[indexer]
166        expected = s[indexer[0]]
167        tm.assert_series_equal(result, expected)
168
169    @pytest.mark.parametrize(
170        "slc, positions",
171        [
172            [slice(date(2018, 1, 1), None), [0, 1, 2]],
173            [slice(date(2019, 1, 2), None), [2]],
174            [slice(date(2020, 1, 1), None), []],
175            [slice(None, date(2020, 1, 1)), [0, 1, 2]],
176            [slice(None, date(2019, 1, 1)), [0]],
177        ],
178    )
179    def test_getitem_slice_date(self, slc, positions):
180        # https://github.com/pandas-dev/pandas/issues/31501
181        ser = Series(
182            [0, 1, 2],
183            DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]),
184        )
185        result = ser[slc]
186        expected = ser.take(positions)
187        tm.assert_series_equal(result, expected)
188
189
190class TestSeriesGetitemListLike:
191    @pytest.mark.parametrize("box", [list, np.array, Index, pd.Series])
192    def test_getitem_no_matches(self, box):
193        # GH#33462 we expect the same behavior for list/ndarray/Index/Series
194        ser = Series(["A", "B"])
195
196        key = Series(["C"], dtype=object)
197        key = box(key)
198
199        msg = r"None of \[Index\(\['C'\], dtype='object'\)\] are in the \[index\]"
200        with pytest.raises(KeyError, match=msg):
201            ser[key]
202
203    def test_getitem_intlist_intindex_periodvalues(self):
204        ser = Series(period_range("2000-01-01", periods=10, freq="D"))
205
206        result = ser[[2, 4]]
207        exp = Series(
208            [pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")],
209            index=[2, 4],
210            dtype="Period[D]",
211        )
212        tm.assert_series_equal(result, exp)
213        assert result.dtype == "Period[D]"
214
215    @pytest.mark.parametrize("box", [list, np.array, Index])
216    def test_getitem_intlist_intervalindex_non_int(self, box):
217        # GH#33404 fall back to positional since ints are unambiguous
218        dti = date_range("2000-01-03", periods=3)._with_freq(None)
219        ii = pd.IntervalIndex.from_breaks(dti)
220        ser = Series(range(len(ii)), index=ii)
221
222        expected = ser.iloc[:1]
223        key = box([0])
224        result = ser[key]
225        tm.assert_series_equal(result, expected)
226
227    @pytest.mark.parametrize("box", [list, np.array, Index])
228    @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64])
229    def test_getitem_intlist_multiindex_numeric_level(self, dtype, box):
230        # GH#33404 do _not_ fall back to positional since ints are ambiguous
231        idx = Index(range(4)).astype(dtype)
232        dti = date_range("2000-01-03", periods=3)
233        mi = pd.MultiIndex.from_product([idx, dti])
234        ser = Series(range(len(mi))[::-1], index=mi)
235
236        key = box([5])
237        with pytest.raises(KeyError, match="5"):
238            ser[key]
239
240    def test_getitem_uint_array_key(self, uint_dtype):
241        # GH #37218
242        ser = Series([1, 2, 3])
243        key = np.array([4], dtype=uint_dtype)
244
245        with pytest.raises(KeyError, match="4"):
246            ser[key]
247        with pytest.raises(KeyError, match="4"):
248            ser.loc[key]
249
250
251class TestGetitemBooleanMask:
252    def test_getitem_boolean(self, string_series):
253        ser = string_series
254        mask = ser > ser.median()
255
256        # passing list is OK
257        result = ser[list(mask)]
258        expected = ser[mask]
259        tm.assert_series_equal(result, expected)
260        tm.assert_index_equal(result.index, ser.index[mask])
261
262    def test_getitem_boolean_empty(self):
263        ser = Series([], dtype=np.int64)
264        ser.index.name = "index_name"
265        ser = ser[ser.isna()]
266        assert ser.index.name == "index_name"
267        assert ser.dtype == np.int64
268
269        # GH#5877
270        # indexing with empty series
271        ser = Series(["A", "B"])
272        expected = Series(dtype=object, index=Index([], dtype="int64"))
273        result = ser[Series([], dtype=object)]
274        tm.assert_series_equal(result, expected)
275
276        # invalid because of the boolean indexer
277        # that's empty or not-aligned
278        msg = (
279            r"Unalignable boolean Series provided as indexer \(index of "
280            r"the boolean Series and of the indexed object do not match"
281        )
282        with pytest.raises(IndexingError, match=msg):
283            ser[Series([], dtype=bool)]
284
285        with pytest.raises(IndexingError, match=msg):
286            ser[Series([True], dtype=bool)]
287
288    def test_getitem_boolean_object(self, string_series):
289        # using column from DataFrame
290
291        ser = string_series
292        mask = ser > ser.median()
293        omask = mask.astype(object)
294
295        # getitem
296        result = ser[omask]
297        expected = ser[mask]
298        tm.assert_series_equal(result, expected)
299
300        # setitem
301        s2 = ser.copy()
302        cop = ser.copy()
303        cop[omask] = 5
304        s2[mask] = 5
305        tm.assert_series_equal(cop, s2)
306
307        # nans raise exception
308        omask[5:10] = np.nan
309        msg = "Cannot mask with non-boolean array containing NA / NaN values"
310        with pytest.raises(ValueError, match=msg):
311            ser[omask]
312        with pytest.raises(ValueError, match=msg):
313            ser[omask] = 5
314
315    def test_getitem_boolean_dt64_copies(self):
316        # GH#36210
317        dti = date_range("2016-01-01", periods=4, tz="US/Pacific")
318        key = np.array([True, True, False, False])
319
320        ser = Series(dti._data)
321
322        res = ser[key]
323        assert res._values._data.base is None
324
325        # compare with numeric case for reference
326        ser2 = Series(range(4))
327        res2 = ser2[key]
328        assert res2._values.base is None
329
330    def test_getitem_boolean_corner(self, datetime_series):
331        ts = datetime_series
332        mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
333
334        msg = (
335            r"Unalignable boolean Series provided as indexer \(index of "
336            r"the boolean Series and of the indexed object do not match"
337        )
338        with pytest.raises(IndexingError, match=msg):
339            ts[mask_shifted]
340
341        with pytest.raises(IndexingError, match=msg):
342            ts.loc[mask_shifted]
343
344    def test_getitem_boolean_different_order(self, string_series):
345        ordered = string_series.sort_values()
346
347        sel = string_series[ordered > 0]
348        exp = string_series[string_series > 0]
349        tm.assert_series_equal(sel, exp)
350
351    def test_getitem_boolean_contiguous_preserve_freq(self):
352        rng = date_range("1/1/2000", "3/1/2000", freq="B")
353
354        mask = np.zeros(len(rng), dtype=bool)
355        mask[10:20] = True
356
357        masked = rng[mask]
358        expected = rng[10:20]
359        assert expected.freq == rng.freq
360        tm.assert_index_equal(masked, expected)
361
362        mask[22] = True
363        masked = rng[mask]
364        assert masked.freq is None
365
366
367class TestGetitemCallable:
368    def test_getitem_callable(self):
369        # GH#12533
370        ser = Series(4, index=list("ABCD"))
371        result = ser[lambda x: "A"]
372        assert result == ser.loc["A"]
373
374        result = ser[lambda x: ["A", "B"]]
375        expected = ser.loc[["A", "B"]]
376        tm.assert_series_equal(result, expected)
377
378        result = ser[lambda x: [True, False, True, True]]
379        expected = ser.iloc[[0, 2, 3]]
380        tm.assert_series_equal(result, expected)
381
382
383def test_getitem_generator(string_series):
384    gen = (x > 0 for x in string_series)
385    result = string_series[gen]
386    result2 = string_series[iter(string_series > 0)]
387    expected = string_series[string_series > 0]
388    tm.assert_series_equal(result, expected)
389    tm.assert_series_equal(result2, expected)
390
391
392@pytest.mark.parametrize(
393    "series",
394    [
395        Series([0, 1]),
396        Series(date_range("2012-01-01", periods=2)),
397        Series(date_range("2012-01-01", periods=2, tz="CET")),
398    ],
399)
400def test_getitem_ndim_deprecated(series):
401    with tm.assert_produces_warning(
402        FutureWarning, match="Support for multi-dimensional indexing"
403    ):
404        result = series[:, None]
405
406    expected = np.asarray(series)[:, None]
407    tm.assert_numpy_array_equal(result, expected)
408
409
410def test_getitem_multilevel_scalar_slice_not_implemented(
411    multiindex_year_month_day_dataframe_random_data,
412):
413    # not implementing this for now
414    df = multiindex_year_month_day_dataframe_random_data
415    ser = df["A"]
416
417    msg = r"\(2000, slice\(3, 4, None\)\)"
418    with pytest.raises(TypeError, match=msg):
419        ser[2000, 3:4]
420
421
422def test_getitem_dataframe_raises():
423    rng = list(range(10))
424    ser = Series(10, index=rng)
425    df = DataFrame(rng, index=rng)
426    msg = (
427        "Indexing a Series with DataFrame is not supported, "
428        "use the appropriate DataFrame column"
429    )
430    with pytest.raises(TypeError, match=msg):
431        ser[df > 5]
432
433
434def test_getitem_assignment_series_aligment():
435    # https://github.com/pandas-dev/pandas/issues/37427
436    # with getitem, when assigning with a Series, it is not first aligned
437    ser = Series(range(10))
438    idx = np.array([2, 4, 9])
439    ser[idx] = Series([10, 11, 12])
440    expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12])
441    tm.assert_series_equal(ser, expected)
442
443
444def test_getitem_duplicate_index_mistyped_key_raises_keyerror():
445    # GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError
446    ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
447    with pytest.raises(KeyError, match="None"):
448        ser[None]
449
450    with pytest.raises(KeyError, match="None"):
451        ser.index.get_loc(None)
452
453    with pytest.raises(KeyError, match="None"):
454        ser.index._engine.get_loc(None)
455
456
457def test_getitem_1tuple_slice_without_multiindex():
458    ser = Series(range(5))
459    key = (slice(3),)
460
461    result = ser[key]
462    expected = ser[key[0]]
463    tm.assert_series_equal(result, expected)
464