1import numpy as np
2import pytest
3
4from pandas import DataFrame, Float64Index, Index, Int64Index, RangeIndex, Series
5import pandas._testing as tm
6
7
8def gen_obj(klass, index):
9    if klass is Series:
10        obj = Series(np.arange(len(index)), index=index)
11    else:
12        obj = DataFrame(
13            np.random.randn(len(index), len(index)), index=index, columns=index
14        )
15    return obj
16
17
18class TestFloatIndexers:
19    def check(self, result, original, indexer, getitem):
20        """
21        comparator for results
22        we need to take care if we are indexing on a
23        Series or a frame
24        """
25        if isinstance(original, Series):
26            expected = original.iloc[indexer]
27        else:
28            if getitem:
29                expected = original.iloc[:, indexer]
30            else:
31                expected = original.iloc[indexer]
32
33        tm.assert_almost_equal(result, expected)
34
35    @pytest.mark.parametrize(
36        "index_func",
37        [
38            tm.makeStringIndex,
39            tm.makeUnicodeIndex,
40            tm.makeCategoricalIndex,
41            tm.makeDateIndex,
42            tm.makeTimedeltaIndex,
43            tm.makePeriodIndex,
44        ],
45    )
46    def test_scalar_non_numeric(self, index_func, frame_or_series):
47
48        # GH 4892
49        # float_indexers should raise exceptions
50        # on appropriate Index types & accessors
51
52        i = index_func(5)
53        s = gen_obj(frame_or_series, i)
54
55        # getting
56        with pytest.raises(KeyError, match="^3.0$"):
57            s[3.0]
58
59        with pytest.raises(KeyError, match="^3.0$"):
60            s.loc[3.0]
61
62        # contains
63        assert 3.0 not in s
64
65        # setting with an indexer
66        if s.index.inferred_type in ["categorical"]:
67            # Value or Type Error
68            pass
69        elif s.index.inferred_type in ["datetime64", "timedelta64", "period"]:
70
71            # FIXME: dont leave commented-out
72            # these should prob work
73            # and are inconsistent between series/dataframe ATM
74            # for idxr in [lambda x: x]:
75            #    s2 = s.copy()
76            #
77            #    with pytest.raises(TypeError):
78            #        idxr(s2)[3.0] = 0
79            pass
80
81        else:
82
83            s2 = s.copy()
84            s2.loc[3.0] = 10
85            assert s2.index.is_object()
86
87            s2 = s.copy()
88            s2[3.0] = 0
89            assert s2.index.is_object()
90
91    @pytest.mark.parametrize(
92        "index_func",
93        [
94            tm.makeStringIndex,
95            tm.makeUnicodeIndex,
96            tm.makeCategoricalIndex,
97            tm.makeDateIndex,
98            tm.makeTimedeltaIndex,
99            tm.makePeriodIndex,
100        ],
101    )
102    def test_scalar_non_numeric_series_fallback(self, index_func):
103        # fallsback to position selection, series only
104        i = index_func(5)
105        s = Series(np.arange(len(i)), index=i)
106        s[3]
107        with pytest.raises(KeyError, match="^3.0$"):
108            s[3.0]
109
110    def test_scalar_with_mixed(self):
111
112        s2 = Series([1, 2, 3], index=["a", "b", "c"])
113        s3 = Series([1, 2, 3], index=["a", "b", 1.5])
114
115        # lookup in a pure string index with an invalid indexer
116
117        with pytest.raises(KeyError, match="^1.0$"):
118            s2[1.0]
119
120        with pytest.raises(KeyError, match=r"^1\.0$"):
121            s2.loc[1.0]
122
123        result = s2.loc["b"]
124        expected = 2
125        assert result == expected
126
127        # mixed index so we have label
128        # indexing
129        with pytest.raises(KeyError, match="^1.0$"):
130            s3[1.0]
131
132        result = s3[1]
133        expected = 2
134        assert result == expected
135
136        with pytest.raises(KeyError, match=r"^1\.0$"):
137            s3.loc[1.0]
138
139        result = s3.loc[1.5]
140        expected = 3
141        assert result == expected
142
143    @pytest.mark.parametrize(
144        "idxr,getitem", [(lambda x: x.loc, False), (lambda x: x, True)]
145    )
146    @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
147    def test_scalar_integer(self, index_func, frame_or_series, idxr, getitem):
148
149        # test how scalar float indexers work on int indexes
150
151        # integer index
152        i = index_func(5)
153        obj = gen_obj(frame_or_series, i)
154
155        # coerce to equal int
156
157        result = idxr(obj)[3.0]
158        self.check(result, obj, 3, getitem)
159
160        if isinstance(obj, Series):
161
162            def compare(x, y):
163                assert x == y
164
165            expected = 100
166        else:
167            compare = tm.assert_series_equal
168            if getitem:
169                expected = Series(100, index=range(len(obj)), name=3)
170            else:
171                expected = Series(100.0, index=range(len(obj)), name=3)
172
173        s2 = obj.copy()
174        idxr(s2)[3.0] = 100
175
176        result = idxr(s2)[3.0]
177        compare(result, expected)
178
179        result = idxr(s2)[3]
180        compare(result, expected)
181
182    @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
183    def test_scalar_integer_contains_float(self, index_func, frame_or_series):
184        # contains
185        # integer index
186        index = index_func(5)
187        obj = gen_obj(frame_or_series, index)
188
189        # coerce to equal int
190        assert 3.0 in obj
191
192    def test_scalar_float(self, frame_or_series):
193
194        # scalar float indexers work on a float index
195        index = Index(np.arange(5.0))
196        s = gen_obj(frame_or_series, index)
197
198        # assert all operations except for iloc are ok
199        indexer = index[3]
200        for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]:
201
202            # getting
203            result = idxr(s)[indexer]
204            self.check(result, s, 3, getitem)
205
206            # setting
207            s2 = s.copy()
208
209            result = idxr(s2)[indexer]
210            self.check(result, s, 3, getitem)
211
212            # random float is a KeyError
213            with pytest.raises(KeyError, match=r"^3\.5$"):
214                idxr(s)[3.5]
215
216        # contains
217        assert 3.0 in s
218
219        # iloc succeeds with an integer
220        expected = s.iloc[3]
221        s2 = s.copy()
222
223        s2.iloc[3] = expected
224        result = s2.iloc[3]
225        self.check(result, s, 3, False)
226
227    @pytest.mark.parametrize(
228        "index_func",
229        [
230            tm.makeStringIndex,
231            tm.makeUnicodeIndex,
232            tm.makeDateIndex,
233            tm.makeTimedeltaIndex,
234            tm.makePeriodIndex,
235        ],
236    )
237    @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
238    def test_slice_non_numeric(self, index_func, idx, frame_or_series):
239
240        # GH 4892
241        # float_indexers should raise exceptions
242        # on appropriate Index types & accessors
243
244        index = index_func(5)
245        s = gen_obj(frame_or_series, index)
246
247        # getitem
248        msg = (
249            "cannot do positional indexing "
250            fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
251            "type float"
252        )
253        with pytest.raises(TypeError, match=msg):
254            s.iloc[idx]
255
256        msg = (
257            "cannot do (slice|positional) indexing "
258            fr"on {type(index).__name__} with these indexers "
259            r"\[(3|4)(\.0)?\] "
260            r"of type (float|int)"
261        )
262        for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]:
263            with pytest.raises(TypeError, match=msg):
264                idxr(s)[idx]
265
266        # setitem
267        msg = "slice indices must be integers or None or have an __index__ method"
268        with pytest.raises(TypeError, match=msg):
269            s.iloc[idx] = 0
270
271        msg = (
272            "cannot do (slice|positional) indexing "
273            fr"on {type(index).__name__} with these indexers "
274            r"\[(3|4)(\.0)?\] "
275            r"of type (float|int)"
276        )
277        for idxr in [lambda x: x.loc, lambda x: x]:
278            with pytest.raises(TypeError, match=msg):
279                idxr(s)[idx] = 0
280
281    def test_slice_integer(self):
282
283        # same as above, but for Integer based indexes
284        # these coerce to a like integer
285        # oob indicates if we are out of bounds
286        # of positional indexing
287        for index, oob in [
288            (Int64Index(range(5)), False),
289            (RangeIndex(5), False),
290            (Int64Index(range(5)) + 10, True),
291        ]:
292
293            # s is an in-range index
294            s = Series(range(5), index=index)
295
296            # getitem
297            for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
298
299                result = s.loc[idx]
300
301                # these are all label indexing
302                # except getitem which is positional
303                # empty
304                if oob:
305                    indexer = slice(0, 0)
306                else:
307                    indexer = slice(3, 5)
308                self.check(result, s, indexer, False)
309
310            # getitem out-of-bounds
311            for idx in [slice(-6, 6), slice(-6.0, 6.0)]:
312
313                result = s.loc[idx]
314
315                # these are all label indexing
316                # except getitem which is positional
317                # empty
318                if oob:
319                    indexer = slice(0, 0)
320                else:
321                    indexer = slice(-6, 6)
322                self.check(result, s, indexer, False)
323
324            # positional indexing
325            msg = (
326                "cannot do slice indexing "
327                fr"on {type(index).__name__} with these indexers \[-6\.0\] of "
328                "type float"
329            )
330            with pytest.raises(TypeError, match=msg):
331                s[slice(-6.0, 6.0)]
332
333            # getitem odd floats
334            for idx, res1 in [
335                (slice(2.5, 4), slice(3, 5)),
336                (slice(2, 3.5), slice(2, 4)),
337                (slice(2.5, 3.5), slice(3, 4)),
338            ]:
339
340                result = s.loc[idx]
341                if oob:
342                    res = slice(0, 0)
343                else:
344                    res = res1
345
346                self.check(result, s, res, False)
347
348                # positional indexing
349                msg = (
350                    "cannot do slice indexing "
351                    fr"on {type(index).__name__} with these indexers \[(2|3)\.5\] of "
352                    "type float"
353                )
354                with pytest.raises(TypeError, match=msg):
355                    s[idx]
356
357    @pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)])
358    def test_integer_positional_indexing(self, idx):
359        """make sure that we are raising on positional indexing
360        w.r.t. an integer index
361        """
362        s = Series(range(2, 6), index=range(2, 6))
363
364        result = s[2:4]
365        expected = s.iloc[2:4]
366        tm.assert_series_equal(result, expected)
367
368        klass = RangeIndex
369        msg = (
370            "cannot do (slice|positional) indexing "
371            fr"on {klass.__name__} with these indexers \[(2|4)\.0\] of "
372            "type float"
373        )
374        with pytest.raises(TypeError, match=msg):
375            s[idx]
376        with pytest.raises(TypeError, match=msg):
377            s.iloc[idx]
378
379    @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
380    def test_slice_integer_frame_getitem(self, index_func):
381
382        # similar to above, but on the getitem dim (of a DataFrame)
383        index = index_func(5)
384
385        s = DataFrame(np.random.randn(5, 2), index=index)
386
387        # getitem
388        for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]:
389
390            result = s.loc[idx]
391            indexer = slice(0, 2)
392            self.check(result, s, indexer, False)
393
394            # positional indexing
395            msg = (
396                "cannot do slice indexing "
397                fr"on {type(index).__name__} with these indexers \[(0|1)\.0\] of "
398                "type float"
399            )
400            with pytest.raises(TypeError, match=msg):
401                s[idx]
402
403        # getitem out-of-bounds
404        for idx in [slice(-10, 10), slice(-10.0, 10.0)]:
405
406            result = s.loc[idx]
407            self.check(result, s, slice(-10, 10), True)
408
409        # positional indexing
410        msg = (
411            "cannot do slice indexing "
412            fr"on {type(index).__name__} with these indexers \[-10\.0\] of "
413            "type float"
414        )
415        with pytest.raises(TypeError, match=msg):
416            s[slice(-10.0, 10.0)]
417
418        # getitem odd floats
419        for idx, res in [
420            (slice(0.5, 1), slice(1, 2)),
421            (slice(0, 0.5), slice(0, 1)),
422            (slice(0.5, 1.5), slice(1, 2)),
423        ]:
424
425            result = s.loc[idx]
426            self.check(result, s, res, False)
427
428            # positional indexing
429            msg = (
430                "cannot do slice indexing "
431                fr"on {type(index).__name__} with these indexers \[0\.5\] of "
432                "type float"
433            )
434            with pytest.raises(TypeError, match=msg):
435                s[idx]
436
437    @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
438    @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
439    def test_float_slice_getitem_with_integer_index_raises(self, idx, index_func):
440
441        # similar to above, but on the getitem dim (of a DataFrame)
442        index = index_func(5)
443
444        s = DataFrame(np.random.randn(5, 2), index=index)
445
446        # setitem
447        sc = s.copy()
448        sc.loc[idx] = 0
449        result = sc.loc[idx].values.ravel()
450        assert (result == 0).all()
451
452        # positional indexing
453        msg = (
454            "cannot do slice indexing "
455            fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
456            "type float"
457        )
458        with pytest.raises(TypeError, match=msg):
459            s[idx] = 0
460
461        with pytest.raises(TypeError, match=msg):
462            s[idx]
463
464    @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
465    def test_slice_float(self, idx, frame_or_series):
466
467        # same as above, but for floats
468        index = Index(np.arange(5.0)) + 0.1
469        s = gen_obj(frame_or_series, index)
470
471        expected = s.iloc[3:4]
472        for idxr in [lambda x: x.loc, lambda x: x]:
473
474            # getitem
475            result = idxr(s)[idx]
476            assert isinstance(result, type(s))
477            tm.assert_equal(result, expected)
478
479            # setitem
480            s2 = s.copy()
481            idxr(s2)[idx] = 0
482            result = idxr(s2)[idx].values.ravel()
483            assert (result == 0).all()
484
485    def test_floating_index_doc_example(self):
486
487        index = Index([1.5, 2, 3, 4.5, 5])
488        s = Series(range(5), index=index)
489        assert s[3] == 2
490        assert s.loc[3] == 2
491        assert s.loc[3] == 2
492        assert s.iloc[3] == 3
493
494    def test_floating_misc(self):
495
496        # related 236
497        # scalar/slicing of a float index
498        s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
499
500        # label based slicing
501        result1 = s[1.0:3.0]
502        result2 = s.loc[1.0:3.0]
503        result3 = s.loc[1.0:3.0]
504        tm.assert_series_equal(result1, result2)
505        tm.assert_series_equal(result1, result3)
506
507        # exact indexing when found
508        result1 = s[5.0]
509        result2 = s.loc[5.0]
510        result3 = s.loc[5.0]
511        assert result1 == result2
512        assert result1 == result3
513
514        result1 = s[5]
515        result2 = s.loc[5]
516        result3 = s.loc[5]
517        assert result1 == result2
518        assert result1 == result3
519
520        assert s[5.0] == s[5]
521
522        # value not found (and no fallbacking at all)
523
524        # scalar integers
525        with pytest.raises(KeyError, match=r"^4$"):
526            s.loc[4]
527        with pytest.raises(KeyError, match=r"^4$"):
528            s.loc[4]
529        with pytest.raises(KeyError, match=r"^4$"):
530            s[4]
531
532        # fancy floats/integers create the correct entry (as nan)
533        # fancy tests
534        expected = Series([2, 0], index=Float64Index([5.0, 0.0]))
535        for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]:  # float
536            tm.assert_series_equal(s[fancy_idx], expected)
537            tm.assert_series_equal(s.loc[fancy_idx], expected)
538            tm.assert_series_equal(s.loc[fancy_idx], expected)
539
540        expected = Series([2, 0], index=Index([5, 0], dtype="int64"))
541        for fancy_idx in [[5, 0], np.array([5, 0])]:  # int
542            tm.assert_series_equal(s[fancy_idx], expected)
543            tm.assert_series_equal(s.loc[fancy_idx], expected)
544            tm.assert_series_equal(s.loc[fancy_idx], expected)
545
546        # all should return the same as we are slicing 'the same'
547        result1 = s.loc[2:5]
548        result2 = s.loc[2.0:5.0]
549        result3 = s.loc[2.0:5]
550        result4 = s.loc[2.1:5]
551        tm.assert_series_equal(result1, result2)
552        tm.assert_series_equal(result1, result3)
553        tm.assert_series_equal(result1, result4)
554
555        # previously this did fallback indexing
556        result1 = s[2:5]
557        result2 = s[2.0:5.0]
558        result3 = s[2.0:5]
559        result4 = s[2.1:5]
560        tm.assert_series_equal(result1, result2)
561        tm.assert_series_equal(result1, result3)
562        tm.assert_series_equal(result1, result4)
563
564        result1 = s.loc[2:5]
565        result2 = s.loc[2.0:5.0]
566        result3 = s.loc[2.0:5]
567        result4 = s.loc[2.1:5]
568        tm.assert_series_equal(result1, result2)
569        tm.assert_series_equal(result1, result3)
570        tm.assert_series_equal(result1, result4)
571
572        # combined test
573        result1 = s.loc[2:5]
574        result2 = s.loc[2:5]
575        result3 = s[2:5]
576
577        tm.assert_series_equal(result1, result2)
578        tm.assert_series_equal(result1, result3)
579
580        # list selection
581        result1 = s[[0.0, 5, 10]]
582        result2 = s.loc[[0.0, 5, 10]]
583        result3 = s.loc[[0.0, 5, 10]]
584        result4 = s.iloc[[0, 2, 4]]
585        tm.assert_series_equal(result1, result2)
586        tm.assert_series_equal(result1, result3)
587        tm.assert_series_equal(result1, result4)
588
589        with pytest.raises(KeyError, match="with any missing labels"):
590            s[[1.6, 5, 10]]
591        with pytest.raises(KeyError, match="with any missing labels"):
592            s.loc[[1.6, 5, 10]]
593
594        with pytest.raises(KeyError, match="with any missing labels"):
595            s[[0, 1, 2]]
596        with pytest.raises(KeyError, match="with any missing labels"):
597            s.loc[[0, 1, 2]]
598
599        result1 = s.loc[[2.5, 5]]
600        result2 = s.loc[[2.5, 5]]
601        tm.assert_series_equal(result1, result2)
602        tm.assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0]))
603
604        result1 = s[[2.5]]
605        result2 = s.loc[[2.5]]
606        result3 = s.loc[[2.5]]
607        tm.assert_series_equal(result1, result2)
608        tm.assert_series_equal(result1, result3)
609        tm.assert_series_equal(result1, Series([1], index=[2.5]))
610
611    def test_floating_tuples(self):
612        # see gh-13509
613        s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo")
614
615        result = s[0.0]
616        assert result == (1, 1)
617
618        expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo")
619        s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo")
620
621        result = s[0.0]
622        tm.assert_series_equal(result, expected)
623
624    def test_float64index_slicing_bug(self):
625        # GH 5557, related to slicing a float index
626        ser = {
627            256: 2321.0,
628            1: 78.0,
629            2: 2716.0,
630            3: 0.0,
631            4: 369.0,
632            5: 0.0,
633            6: 269.0,
634            7: 0.0,
635            8: 0.0,
636            9: 0.0,
637            10: 3536.0,
638            11: 0.0,
639            12: 24.0,
640            13: 0.0,
641            14: 931.0,
642            15: 0.0,
643            16: 101.0,
644            17: 78.0,
645            18: 9643.0,
646            19: 0.0,
647            20: 0.0,
648            21: 0.0,
649            22: 63761.0,
650            23: 0.0,
651            24: 446.0,
652            25: 0.0,
653            26: 34773.0,
654            27: 0.0,
655            28: 729.0,
656            29: 78.0,
657            30: 0.0,
658            31: 0.0,
659            32: 3374.0,
660            33: 0.0,
661            34: 1391.0,
662            35: 0.0,
663            36: 361.0,
664            37: 0.0,
665            38: 61808.0,
666            39: 0.0,
667            40: 0.0,
668            41: 0.0,
669            42: 6677.0,
670            43: 0.0,
671            44: 802.0,
672            45: 0.0,
673            46: 2691.0,
674            47: 0.0,
675            48: 3582.0,
676            49: 0.0,
677            50: 734.0,
678            51: 0.0,
679            52: 627.0,
680            53: 70.0,
681            54: 2584.0,
682            55: 0.0,
683            56: 324.0,
684            57: 0.0,
685            58: 605.0,
686            59: 0.0,
687            60: 0.0,
688            61: 0.0,
689            62: 3989.0,
690            63: 10.0,
691            64: 42.0,
692            65: 0.0,
693            66: 904.0,
694            67: 0.0,
695            68: 88.0,
696            69: 70.0,
697            70: 8172.0,
698            71: 0.0,
699            72: 0.0,
700            73: 0.0,
701            74: 64902.0,
702            75: 0.0,
703            76: 347.0,
704            77: 0.0,
705            78: 36605.0,
706            79: 0.0,
707            80: 379.0,
708            81: 70.0,
709            82: 0.0,
710            83: 0.0,
711            84: 3001.0,
712            85: 0.0,
713            86: 1630.0,
714            87: 7.0,
715            88: 364.0,
716            89: 0.0,
717            90: 67404.0,
718            91: 9.0,
719            92: 0.0,
720            93: 0.0,
721            94: 7685.0,
722            95: 0.0,
723            96: 1017.0,
724            97: 0.0,
725            98: 2831.0,
726            99: 0.0,
727            100: 2963.0,
728            101: 0.0,
729            102: 854.0,
730            103: 0.0,
731            104: 0.0,
732            105: 0.0,
733            106: 0.0,
734            107: 0.0,
735            108: 0.0,
736            109: 0.0,
737            110: 0.0,
738            111: 0.0,
739            112: 0.0,
740            113: 0.0,
741            114: 0.0,
742            115: 0.0,
743            116: 0.0,
744            117: 0.0,
745            118: 0.0,
746            119: 0.0,
747            120: 0.0,
748            121: 0.0,
749            122: 0.0,
750            123: 0.0,
751            124: 0.0,
752            125: 0.0,
753            126: 67744.0,
754            127: 22.0,
755            128: 264.0,
756            129: 0.0,
757            260: 197.0,
758            268: 0.0,
759            265: 0.0,
760            269: 0.0,
761            261: 0.0,
762            266: 1198.0,
763            267: 0.0,
764            262: 2629.0,
765            258: 775.0,
766            257: 0.0,
767            263: 0.0,
768            259: 0.0,
769            264: 163.0,
770            250: 10326.0,
771            251: 0.0,
772            252: 1228.0,
773            253: 0.0,
774            254: 2769.0,
775            255: 0.0,
776        }
777
778        # smoke test for the repr
779        s = Series(ser)
780        result = s.value_counts()
781        str(result)
782