1from datetime import date, timedelta 2 3import dateutil 4import numpy as np 5import pytest 6 7import pandas as pd 8from pandas import DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets 9import pandas._testing as tm 10 11randn = np.random.randn 12 13 14class TestDatetimeIndex: 15 def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): 16 # GH7774 17 index = date_range("20130101", periods=3, tz="US/Eastern") 18 assert str(index.reindex([])[0].tz) == "US/Eastern" 19 assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern" 20 21 def test_reindex_with_same_tz(self): 22 # GH 32740 23 rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc") 24 rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc") 25 result1, result2 = rng_a.reindex( 26 rng_b, method="nearest", tolerance=timedelta(seconds=20) 27 ) 28 expected_list1 = [ 29 "2010-01-01 00:00:00", 30 "2010-01-01 01:05:27.272727272", 31 "2010-01-01 02:10:54.545454545", 32 "2010-01-01 03:16:21.818181818", 33 "2010-01-01 04:21:49.090909090", 34 "2010-01-01 05:27:16.363636363", 35 "2010-01-01 06:32:43.636363636", 36 "2010-01-01 07:38:10.909090909", 37 "2010-01-01 08:43:38.181818181", 38 "2010-01-01 09:49:05.454545454", 39 "2010-01-01 10:54:32.727272727", 40 "2010-01-01 12:00:00", 41 "2010-01-01 13:05:27.272727272", 42 "2010-01-01 14:10:54.545454545", 43 "2010-01-01 15:16:21.818181818", 44 "2010-01-01 16:21:49.090909090", 45 "2010-01-01 17:27:16.363636363", 46 "2010-01-01 18:32:43.636363636", 47 "2010-01-01 19:38:10.909090909", 48 "2010-01-01 20:43:38.181818181", 49 "2010-01-01 21:49:05.454545454", 50 "2010-01-01 22:54:32.727272727", 51 "2010-01-02 00:00:00", 52 ] 53 expected1 = DatetimeIndex( 54 expected_list1, dtype="datetime64[ns, UTC]", freq=None 55 ) 56 expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp")) 57 tm.assert_index_equal(result1, expected1) 58 tm.assert_numpy_array_equal(result2, expected2) 59 60 def test_time_loc(self): # GH8667 61 from datetime import time 62 63 from pandas._libs.index import _SIZE_CUTOFF 64 65 ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) 66 key = time(15, 11, 30) 67 start = key.hour * 3600 + key.minute * 60 + key.second 68 step = 24 * 3600 69 70 for n in ns: 71 idx = date_range("2014-11-26", periods=n, freq="S") 72 ts = pd.Series(np.random.randn(n), index=idx) 73 i = np.arange(start, n, step) 74 75 tm.assert_numpy_array_equal(ts.index.get_loc(key), i, check_dtype=False) 76 tm.assert_series_equal(ts[key], ts.iloc[i]) 77 78 left, right = ts.copy(), ts.copy() 79 left[key] *= -10 80 right.iloc[i] *= -10 81 tm.assert_series_equal(left, right) 82 83 def test_time_overflow_for_32bit_machines(self): 84 # GH8943. On some machines NumPy defaults to np.int32 (for example, 85 # 32-bit Linux machines). In the function _generate_regular_range 86 # found in tseries/index.py, `periods` gets multiplied by `strides` 87 # (which has value 1e9) and since the max value for np.int32 is ~2e9, 88 # and since those machines won't promote np.int32 to np.int64, we get 89 # overflow. 90 periods = np.int_(1000) 91 92 idx1 = date_range(start="2000", periods=periods, freq="S") 93 assert len(idx1) == periods 94 95 idx2 = date_range(end="2000", periods=periods, freq="S") 96 assert len(idx2) == periods 97 98 def test_nat(self): 99 assert DatetimeIndex([np.nan])[0] is pd.NaT 100 101 def test_week_of_month_frequency(self): 102 # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise 103 d1 = date(2002, 9, 1) 104 d2 = date(2013, 10, 27) 105 d3 = date(2012, 9, 30) 106 idx1 = DatetimeIndex([d1, d2]) 107 idx2 = DatetimeIndex([d3]) 108 result_append = idx1.append(idx2) 109 expected = DatetimeIndex([d1, d2, d3]) 110 tm.assert_index_equal(result_append, expected) 111 result_union = idx1.union(idx2) 112 expected = DatetimeIndex([d1, d3, d2]) 113 tm.assert_index_equal(result_union, expected) 114 115 # GH 5115 116 result = date_range("2013-1-1", periods=4, freq="WOM-1SAT") 117 dates = ["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"] 118 expected = DatetimeIndex(dates, freq="WOM-1SAT") 119 tm.assert_index_equal(result, expected) 120 121 def test_stringified_slice_with_tz(self): 122 # GH#2658 123 start = "2013-01-07" 124 idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern") 125 df = DataFrame(np.arange(10), index=idx) 126 df["2013-01-14 23:44:34.437768-05:00":] # no exception here 127 128 def test_append_nondatetimeindex(self): 129 rng = date_range("1/1/2000", periods=10) 130 idx = Index(["a", "b", "c", "d"]) 131 132 result = rng.append(idx) 133 assert isinstance(result[0], Timestamp) 134 135 def test_iteration_preserves_tz(self): 136 # see gh-8890 137 index = date_range("2012-01-01", periods=3, freq="H", tz="US/Eastern") 138 139 for i, ts in enumerate(index): 140 result = ts 141 expected = index[i] 142 assert result == expected 143 144 index = date_range( 145 "2012-01-01", periods=3, freq="H", tz=dateutil.tz.tzoffset(None, -28800) 146 ) 147 148 for i, ts in enumerate(index): 149 result = ts 150 expected = index[i] 151 assert result._repr_base == expected._repr_base 152 assert result == expected 153 154 # 9100 155 index = DatetimeIndex( 156 ["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"] 157 ) 158 for i, ts in enumerate(index): 159 result = ts 160 expected = index[i] 161 assert result._repr_base == expected._repr_base 162 assert result == expected 163 164 @pytest.mark.parametrize("periods", [0, 9999, 10000, 10001]) 165 def test_iteration_over_chunksize(self, periods): 166 # GH21012 167 168 index = date_range("2000-01-01 00:00:00", periods=periods, freq="min") 169 num = 0 170 for stamp in index: 171 assert index[num] == stamp 172 num += 1 173 assert num == len(index) 174 175 def test_misc_coverage(self): 176 rng = date_range("1/1/2000", periods=5) 177 result = rng.groupby(rng.day) 178 assert isinstance(list(result.values())[0][0], Timestamp) 179 180 def test_string_index_series_name_converted(self): 181 # #1644 182 df = DataFrame(np.random.randn(10, 4), index=date_range("1/1/2000", periods=10)) 183 184 result = df.loc["1/3/2000"] 185 assert result.name == df.index[2] 186 187 result = df.T["1/3/2000"] 188 assert result.name == df.index[2] 189 190 def test_argmin_argmax(self): 191 idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) 192 assert idx.argmin() == 1 193 assert idx.argmax() == 0 194 195 def test_sort_values(self): 196 idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) 197 198 ordered = idx.sort_values() 199 assert ordered.is_monotonic 200 201 ordered = idx.sort_values(ascending=False) 202 assert ordered[::-1].is_monotonic 203 204 ordered, dexer = idx.sort_values(return_indexer=True) 205 assert ordered.is_monotonic 206 tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) 207 208 ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) 209 assert ordered[::-1].is_monotonic 210 tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) 211 212 def test_groupby_function_tuple_1677(self): 213 df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100)) 214 monthly_group = df.groupby(lambda x: (x.year, x.month)) 215 216 result = monthly_group.mean() 217 assert isinstance(result.index[0], tuple) 218 219 def test_append_numpy_bug_1681(self): 220 # another datetime64 bug 221 dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") 222 a = DataFrame() 223 c = DataFrame({"A": "foo", "B": dr}, index=dr) 224 225 result = a.append(c) 226 assert (result["B"] == dr).all() 227 228 def test_isin(self): 229 index = tm.makeDateIndex(4) 230 result = index.isin(index) 231 assert result.all() 232 233 result = index.isin(list(index)) 234 assert result.all() 235 236 tm.assert_almost_equal( 237 index.isin([index[2], 5]), np.array([False, False, True, False]) 238 ) 239 240 def assert_index_parameters(self, index): 241 assert index.freq == "40960N" 242 assert index.inferred_freq == "40960N" 243 244 def test_ns_index(self): 245 nsamples = 400 246 ns = int(1e9 / 24414) 247 dtstart = np.datetime64("2012-09-20T00:00:00") 248 249 dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns") 250 freq = ns * offsets.Nano() 251 index = DatetimeIndex(dt, freq=freq, name="time") 252 self.assert_index_parameters(index) 253 254 new_index = date_range(start=index[0], end=index[-1], freq=index.freq) 255 self.assert_index_parameters(new_index) 256 257 def test_factorize(self): 258 idx1 = DatetimeIndex( 259 ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"] 260 ) 261 262 exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) 263 exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) 264 265 arr, idx = idx1.factorize() 266 tm.assert_numpy_array_equal(arr, exp_arr) 267 tm.assert_index_equal(idx, exp_idx) 268 assert idx.freq == exp_idx.freq 269 270 arr, idx = idx1.factorize(sort=True) 271 tm.assert_numpy_array_equal(arr, exp_arr) 272 tm.assert_index_equal(idx, exp_idx) 273 assert idx.freq == exp_idx.freq 274 275 # tz must be preserved 276 idx1 = idx1.tz_localize("Asia/Tokyo") 277 exp_idx = exp_idx.tz_localize("Asia/Tokyo") 278 279 arr, idx = idx1.factorize() 280 tm.assert_numpy_array_equal(arr, exp_arr) 281 tm.assert_index_equal(idx, exp_idx) 282 assert idx.freq == exp_idx.freq 283 284 idx2 = DatetimeIndex( 285 ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"] 286 ) 287 288 exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) 289 exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) 290 arr, idx = idx2.factorize(sort=True) 291 tm.assert_numpy_array_equal(arr, exp_arr) 292 tm.assert_index_equal(idx, exp_idx) 293 assert idx.freq == exp_idx.freq 294 295 exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) 296 exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"]) 297 arr, idx = idx2.factorize() 298 tm.assert_numpy_array_equal(arr, exp_arr) 299 tm.assert_index_equal(idx, exp_idx) 300 assert idx.freq == exp_idx.freq 301 302 def test_factorize_preserves_freq(self): 303 # GH#38120 freq should be preserved 304 idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") 305 exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) 306 307 arr, idx = idx3.factorize() 308 tm.assert_numpy_array_equal(arr, exp_arr) 309 tm.assert_index_equal(idx, idx3) 310 assert idx.freq == idx3.freq 311 312 arr, idx = pd.factorize(idx3) 313 tm.assert_numpy_array_equal(arr, exp_arr) 314 tm.assert_index_equal(idx, idx3) 315 assert idx.freq == idx3.freq 316 317 def test_factorize_tz(self, tz_naive_fixture, index_or_series): 318 tz = tz_naive_fixture 319 # GH#13750 320 base = date_range("2016-11-05", freq="H", periods=100, tz=tz) 321 idx = base.repeat(5) 322 323 exp_arr = np.arange(100, dtype=np.intp).repeat(5) 324 325 obj = index_or_series(idx) 326 327 arr, res = obj.factorize() 328 tm.assert_numpy_array_equal(arr, exp_arr) 329 expected = base._with_freq(None) 330 tm.assert_index_equal(res, expected) 331 assert res.freq == expected.freq 332 333 def test_factorize_dst(self, index_or_series): 334 # GH 13750 335 idx = date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern") 336 obj = index_or_series(idx) 337 338 arr, res = obj.factorize() 339 tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) 340 tm.assert_index_equal(res, idx) 341 if index_or_series is Index: 342 assert res.freq == idx.freq 343 344 idx = date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern") 345 obj = index_or_series(idx) 346 347 arr, res = obj.factorize() 348 tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) 349 tm.assert_index_equal(res, idx) 350 if index_or_series is Index: 351 assert res.freq == idx.freq 352 353 @pytest.mark.parametrize( 354 "arr, expected", 355 [ 356 (DatetimeIndex(["2017", "2017"]), DatetimeIndex(["2017"])), 357 ( 358 DatetimeIndex(["2017", "2017"], tz="US/Eastern"), 359 DatetimeIndex(["2017"], tz="US/Eastern"), 360 ), 361 ], 362 ) 363 def test_unique(self, arr, expected): 364 result = arr.unique() 365 tm.assert_index_equal(result, expected) 366 # GH 21737 367 # Ensure the underlying data is consistent 368 assert result[0] == expected[0] 369 370 def test_asarray_tz_naive(self): 371 # This shouldn't produce a warning. 372 idx = date_range("2000", periods=2) 373 # M8[ns] by default 374 result = np.asarray(idx) 375 376 expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") 377 tm.assert_numpy_array_equal(result, expected) 378 379 # optionally, object 380 result = np.asarray(idx, dtype=object) 381 382 expected = np.array([pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02")]) 383 tm.assert_numpy_array_equal(result, expected) 384 385 def test_asarray_tz_aware(self): 386 tz = "US/Central" 387 idx = date_range("2000", periods=2, tz=tz) 388 expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]") 389 result = np.asarray(idx, dtype="datetime64[ns]") 390 391 tm.assert_numpy_array_equal(result, expected) 392 393 # Old behavior with no warning 394 result = np.asarray(idx, dtype="M8[ns]") 395 396 tm.assert_numpy_array_equal(result, expected) 397 398 # Future behavior with no warning 399 expected = np.array( 400 [pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)] 401 ) 402 result = np.asarray(idx, dtype=object) 403 404 tm.assert_numpy_array_equal(result, expected) 405 406 def test_to_frame_datetime_tz(self): 407 # GH 25809 408 idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") 409 result = idx.to_frame() 410 expected = DataFrame(idx, index=idx) 411 tm.assert_frame_equal(result, expected) 412 413 def test_split_non_utc(self): 414 # GH 14042 415 indices = date_range("2016-01-01 00:00:00+0200", freq="S", periods=10) 416 result = np.split(indices, indices_or_sections=[])[0] 417 expected = indices._with_freq(None) 418 tm.assert_index_equal(result, expected) 419