1from datetime import date, timedelta
2
3import dateutil
4import numpy as np
5import pytest
6
7import pandas as pd
8from pandas import DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets
9import pandas._testing as tm
10
11randn = np.random.randn
12
13
14class TestDatetimeIndex:
15    def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
16        # GH7774
17        index = date_range("20130101", periods=3, tz="US/Eastern")
18        assert str(index.reindex([])[0].tz) == "US/Eastern"
19        assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern"
20
21    def test_reindex_with_same_tz(self):
22        # GH 32740
23        rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc")
24        rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc")
25        result1, result2 = rng_a.reindex(
26            rng_b, method="nearest", tolerance=timedelta(seconds=20)
27        )
28        expected_list1 = [
29            "2010-01-01 00:00:00",
30            "2010-01-01 01:05:27.272727272",
31            "2010-01-01 02:10:54.545454545",
32            "2010-01-01 03:16:21.818181818",
33            "2010-01-01 04:21:49.090909090",
34            "2010-01-01 05:27:16.363636363",
35            "2010-01-01 06:32:43.636363636",
36            "2010-01-01 07:38:10.909090909",
37            "2010-01-01 08:43:38.181818181",
38            "2010-01-01 09:49:05.454545454",
39            "2010-01-01 10:54:32.727272727",
40            "2010-01-01 12:00:00",
41            "2010-01-01 13:05:27.272727272",
42            "2010-01-01 14:10:54.545454545",
43            "2010-01-01 15:16:21.818181818",
44            "2010-01-01 16:21:49.090909090",
45            "2010-01-01 17:27:16.363636363",
46            "2010-01-01 18:32:43.636363636",
47            "2010-01-01 19:38:10.909090909",
48            "2010-01-01 20:43:38.181818181",
49            "2010-01-01 21:49:05.454545454",
50            "2010-01-01 22:54:32.727272727",
51            "2010-01-02 00:00:00",
52        ]
53        expected1 = DatetimeIndex(
54            expected_list1, dtype="datetime64[ns, UTC]", freq=None
55        )
56        expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp"))
57        tm.assert_index_equal(result1, expected1)
58        tm.assert_numpy_array_equal(result2, expected2)
59
60    def test_time_loc(self):  # GH8667
61        from datetime import time
62
63        from pandas._libs.index import _SIZE_CUTOFF
64
65        ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64)
66        key = time(15, 11, 30)
67        start = key.hour * 3600 + key.minute * 60 + key.second
68        step = 24 * 3600
69
70        for n in ns:
71            idx = date_range("2014-11-26", periods=n, freq="S")
72            ts = pd.Series(np.random.randn(n), index=idx)
73            i = np.arange(start, n, step)
74
75            tm.assert_numpy_array_equal(ts.index.get_loc(key), i, check_dtype=False)
76            tm.assert_series_equal(ts[key], ts.iloc[i])
77
78            left, right = ts.copy(), ts.copy()
79            left[key] *= -10
80            right.iloc[i] *= -10
81            tm.assert_series_equal(left, right)
82
83    def test_time_overflow_for_32bit_machines(self):
84        # GH8943.  On some machines NumPy defaults to np.int32 (for example,
85        # 32-bit Linux machines).  In the function _generate_regular_range
86        # found in tseries/index.py, `periods` gets multiplied by `strides`
87        # (which has value 1e9) and since the max value for np.int32 is ~2e9,
88        # and since those machines won't promote np.int32 to np.int64, we get
89        # overflow.
90        periods = np.int_(1000)
91
92        idx1 = date_range(start="2000", periods=periods, freq="S")
93        assert len(idx1) == periods
94
95        idx2 = date_range(end="2000", periods=periods, freq="S")
96        assert len(idx2) == periods
97
98    def test_nat(self):
99        assert DatetimeIndex([np.nan])[0] is pd.NaT
100
101    def test_week_of_month_frequency(self):
102        # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
103        d1 = date(2002, 9, 1)
104        d2 = date(2013, 10, 27)
105        d3 = date(2012, 9, 30)
106        idx1 = DatetimeIndex([d1, d2])
107        idx2 = DatetimeIndex([d3])
108        result_append = idx1.append(idx2)
109        expected = DatetimeIndex([d1, d2, d3])
110        tm.assert_index_equal(result_append, expected)
111        result_union = idx1.union(idx2)
112        expected = DatetimeIndex([d1, d3, d2])
113        tm.assert_index_equal(result_union, expected)
114
115        # GH 5115
116        result = date_range("2013-1-1", periods=4, freq="WOM-1SAT")
117        dates = ["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"]
118        expected = DatetimeIndex(dates, freq="WOM-1SAT")
119        tm.assert_index_equal(result, expected)
120
121    def test_stringified_slice_with_tz(self):
122        # GH#2658
123        start = "2013-01-07"
124        idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
125        df = DataFrame(np.arange(10), index=idx)
126        df["2013-01-14 23:44:34.437768-05:00":]  # no exception here
127
128    def test_append_nondatetimeindex(self):
129        rng = date_range("1/1/2000", periods=10)
130        idx = Index(["a", "b", "c", "d"])
131
132        result = rng.append(idx)
133        assert isinstance(result[0], Timestamp)
134
135    def test_iteration_preserves_tz(self):
136        # see gh-8890
137        index = date_range("2012-01-01", periods=3, freq="H", tz="US/Eastern")
138
139        for i, ts in enumerate(index):
140            result = ts
141            expected = index[i]
142            assert result == expected
143
144        index = date_range(
145            "2012-01-01", periods=3, freq="H", tz=dateutil.tz.tzoffset(None, -28800)
146        )
147
148        for i, ts in enumerate(index):
149            result = ts
150            expected = index[i]
151            assert result._repr_base == expected._repr_base
152            assert result == expected
153
154        # 9100
155        index = DatetimeIndex(
156            ["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"]
157        )
158        for i, ts in enumerate(index):
159            result = ts
160            expected = index[i]
161            assert result._repr_base == expected._repr_base
162            assert result == expected
163
164    @pytest.mark.parametrize("periods", [0, 9999, 10000, 10001])
165    def test_iteration_over_chunksize(self, periods):
166        # GH21012
167
168        index = date_range("2000-01-01 00:00:00", periods=periods, freq="min")
169        num = 0
170        for stamp in index:
171            assert index[num] == stamp
172            num += 1
173        assert num == len(index)
174
175    def test_misc_coverage(self):
176        rng = date_range("1/1/2000", periods=5)
177        result = rng.groupby(rng.day)
178        assert isinstance(list(result.values())[0][0], Timestamp)
179
180    def test_string_index_series_name_converted(self):
181        # #1644
182        df = DataFrame(np.random.randn(10, 4), index=date_range("1/1/2000", periods=10))
183
184        result = df.loc["1/3/2000"]
185        assert result.name == df.index[2]
186
187        result = df.T["1/3/2000"]
188        assert result.name == df.index[2]
189
190    def test_argmin_argmax(self):
191        idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
192        assert idx.argmin() == 1
193        assert idx.argmax() == 0
194
195    def test_sort_values(self):
196        idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
197
198        ordered = idx.sort_values()
199        assert ordered.is_monotonic
200
201        ordered = idx.sort_values(ascending=False)
202        assert ordered[::-1].is_monotonic
203
204        ordered, dexer = idx.sort_values(return_indexer=True)
205        assert ordered.is_monotonic
206        tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
207
208        ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
209        assert ordered[::-1].is_monotonic
210        tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
211
212    def test_groupby_function_tuple_1677(self):
213        df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100))
214        monthly_group = df.groupby(lambda x: (x.year, x.month))
215
216        result = monthly_group.mean()
217        assert isinstance(result.index[0], tuple)
218
219    def test_append_numpy_bug_1681(self):
220        # another datetime64 bug
221        dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
222        a = DataFrame()
223        c = DataFrame({"A": "foo", "B": dr}, index=dr)
224
225        result = a.append(c)
226        assert (result["B"] == dr).all()
227
228    def test_isin(self):
229        index = tm.makeDateIndex(4)
230        result = index.isin(index)
231        assert result.all()
232
233        result = index.isin(list(index))
234        assert result.all()
235
236        tm.assert_almost_equal(
237            index.isin([index[2], 5]), np.array([False, False, True, False])
238        )
239
240    def assert_index_parameters(self, index):
241        assert index.freq == "40960N"
242        assert index.inferred_freq == "40960N"
243
244    def test_ns_index(self):
245        nsamples = 400
246        ns = int(1e9 / 24414)
247        dtstart = np.datetime64("2012-09-20T00:00:00")
248
249        dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns")
250        freq = ns * offsets.Nano()
251        index = DatetimeIndex(dt, freq=freq, name="time")
252        self.assert_index_parameters(index)
253
254        new_index = date_range(start=index[0], end=index[-1], freq=index.freq)
255        self.assert_index_parameters(new_index)
256
257    def test_factorize(self):
258        idx1 = DatetimeIndex(
259            ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"]
260        )
261
262        exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
263        exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
264
265        arr, idx = idx1.factorize()
266        tm.assert_numpy_array_equal(arr, exp_arr)
267        tm.assert_index_equal(idx, exp_idx)
268        assert idx.freq == exp_idx.freq
269
270        arr, idx = idx1.factorize(sort=True)
271        tm.assert_numpy_array_equal(arr, exp_arr)
272        tm.assert_index_equal(idx, exp_idx)
273        assert idx.freq == exp_idx.freq
274
275        # tz must be preserved
276        idx1 = idx1.tz_localize("Asia/Tokyo")
277        exp_idx = exp_idx.tz_localize("Asia/Tokyo")
278
279        arr, idx = idx1.factorize()
280        tm.assert_numpy_array_equal(arr, exp_arr)
281        tm.assert_index_equal(idx, exp_idx)
282        assert idx.freq == exp_idx.freq
283
284        idx2 = DatetimeIndex(
285            ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]
286        )
287
288        exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
289        exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
290        arr, idx = idx2.factorize(sort=True)
291        tm.assert_numpy_array_equal(arr, exp_arr)
292        tm.assert_index_equal(idx, exp_idx)
293        assert idx.freq == exp_idx.freq
294
295        exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
296        exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
297        arr, idx = idx2.factorize()
298        tm.assert_numpy_array_equal(arr, exp_arr)
299        tm.assert_index_equal(idx, exp_idx)
300        assert idx.freq == exp_idx.freq
301
302    def test_factorize_preserves_freq(self):
303        # GH#38120 freq should be preserved
304        idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo")
305        exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
306
307        arr, idx = idx3.factorize()
308        tm.assert_numpy_array_equal(arr, exp_arr)
309        tm.assert_index_equal(idx, idx3)
310        assert idx.freq == idx3.freq
311
312        arr, idx = pd.factorize(idx3)
313        tm.assert_numpy_array_equal(arr, exp_arr)
314        tm.assert_index_equal(idx, idx3)
315        assert idx.freq == idx3.freq
316
317    def test_factorize_tz(self, tz_naive_fixture, index_or_series):
318        tz = tz_naive_fixture
319        # GH#13750
320        base = date_range("2016-11-05", freq="H", periods=100, tz=tz)
321        idx = base.repeat(5)
322
323        exp_arr = np.arange(100, dtype=np.intp).repeat(5)
324
325        obj = index_or_series(idx)
326
327        arr, res = obj.factorize()
328        tm.assert_numpy_array_equal(arr, exp_arr)
329        expected = base._with_freq(None)
330        tm.assert_index_equal(res, expected)
331        assert res.freq == expected.freq
332
333    def test_factorize_dst(self, index_or_series):
334        # GH 13750
335        idx = date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern")
336        obj = index_or_series(idx)
337
338        arr, res = obj.factorize()
339        tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
340        tm.assert_index_equal(res, idx)
341        if index_or_series is Index:
342            assert res.freq == idx.freq
343
344        idx = date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern")
345        obj = index_or_series(idx)
346
347        arr, res = obj.factorize()
348        tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
349        tm.assert_index_equal(res, idx)
350        if index_or_series is Index:
351            assert res.freq == idx.freq
352
353    @pytest.mark.parametrize(
354        "arr, expected",
355        [
356            (DatetimeIndex(["2017", "2017"]), DatetimeIndex(["2017"])),
357            (
358                DatetimeIndex(["2017", "2017"], tz="US/Eastern"),
359                DatetimeIndex(["2017"], tz="US/Eastern"),
360            ),
361        ],
362    )
363    def test_unique(self, arr, expected):
364        result = arr.unique()
365        tm.assert_index_equal(result, expected)
366        # GH 21737
367        # Ensure the underlying data is consistent
368        assert result[0] == expected[0]
369
370    def test_asarray_tz_naive(self):
371        # This shouldn't produce a warning.
372        idx = date_range("2000", periods=2)
373        # M8[ns] by default
374        result = np.asarray(idx)
375
376        expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
377        tm.assert_numpy_array_equal(result, expected)
378
379        # optionally, object
380        result = np.asarray(idx, dtype=object)
381
382        expected = np.array([pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02")])
383        tm.assert_numpy_array_equal(result, expected)
384
385    def test_asarray_tz_aware(self):
386        tz = "US/Central"
387        idx = date_range("2000", periods=2, tz=tz)
388        expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
389        result = np.asarray(idx, dtype="datetime64[ns]")
390
391        tm.assert_numpy_array_equal(result, expected)
392
393        # Old behavior with no warning
394        result = np.asarray(idx, dtype="M8[ns]")
395
396        tm.assert_numpy_array_equal(result, expected)
397
398        # Future behavior with no warning
399        expected = np.array(
400            [pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)]
401        )
402        result = np.asarray(idx, dtype=object)
403
404        tm.assert_numpy_array_equal(result, expected)
405
406    def test_to_frame_datetime_tz(self):
407        # GH 25809
408        idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC")
409        result = idx.to_frame()
410        expected = DataFrame(idx, index=idx)
411        tm.assert_frame_equal(result, expected)
412
413    def test_split_non_utc(self):
414        # GH 14042
415        indices = date_range("2016-01-01 00:00:00+0200", freq="S", periods=10)
416        result = np.split(indices, indices_or_sections=[])[0]
417        expected = indices._with_freq(None)
418        tm.assert_index_equal(result, expected)
419