1import warnings 2from datetime import timedelta 3from itertools import product 4 5import numpy as np 6import pandas as pd 7import pytest 8from pandas.errors import OutOfBoundsDatetime 9 10from xarray import ( 11 DataArray, 12 Dataset, 13 Variable, 14 cftime_range, 15 coding, 16 conventions, 17 decode_cf, 18) 19from xarray.coding.times import ( 20 _encode_datetime_with_cftime, 21 cftime_to_nptime, 22 decode_cf_datetime, 23 encode_cf_datetime, 24 to_timedelta_unboxed, 25) 26from xarray.coding.variables import SerializationWarning 27from xarray.conventions import _update_bounds_attributes, cf_encoder 28from xarray.core.common import contains_cftime_datetimes 29from xarray.testing import assert_equal, assert_identical 30 31from . import ( 32 arm_xfail, 33 assert_array_equal, 34 has_cftime, 35 has_cftime_1_4_1, 36 requires_cftime, 37 requires_cftime_1_4_1, 38 requires_dask, 39) 40 41_NON_STANDARD_CALENDARS_SET = { 42 "noleap", 43 "365_day", 44 "360_day", 45 "julian", 46 "all_leap", 47 "366_day", 48} 49_ALL_CALENDARS = sorted( 50 _NON_STANDARD_CALENDARS_SET.union(coding.times._STANDARD_CALENDARS) 51) 52_NON_STANDARD_CALENDARS = sorted(_NON_STANDARD_CALENDARS_SET) 53_STANDARD_CALENDARS = sorted(coding.times._STANDARD_CALENDARS) 54_CF_DATETIME_NUM_DATES_UNITS = [ 55 (np.arange(10), "days since 2000-01-01"), 56 (np.arange(10).astype("float64"), "days since 2000-01-01"), 57 (np.arange(10).astype("float32"), "days since 2000-01-01"), 58 (np.arange(10).reshape(2, 5), "days since 2000-01-01"), 59 (12300 + np.arange(5), "hours since 1680-01-01 00:00:00"), 60 # here we add a couple minor formatting errors to test 61 # the robustness of the parsing algorithm. 62 (12300 + np.arange(5), "hour since 1680-01-01 00:00:00"), 63 (12300 + np.arange(5), "Hour since 1680-01-01 00:00:00"), 64 (12300 + np.arange(5), " Hour since 1680-01-01 00:00:00 "), 65 (10, "days since 2000-01-01"), 66 ([10], "daYs since 2000-01-01"), 67 ([[10]], "days since 2000-01-01"), 68 ([10, 10], "days since 2000-01-01"), 69 (np.array(10), "days since 2000-01-01"), 70 (0, "days since 1000-01-01"), 71 ([0], "days since 1000-01-01"), 72 ([[0]], "days since 1000-01-01"), 73 (np.arange(2), "days since 1000-01-01"), 74 (np.arange(0, 100000, 20000), "days since 1900-01-01"), 75 (np.arange(0, 100000, 20000), "days since 1-01-01"), 76 (17093352.0, "hours since 1-1-1 00:00:0.0"), 77 ([0.5, 1.5], "hours since 1900-01-01T00:00:00"), 78 (0, "milliseconds since 2000-01-01T00:00:00"), 79 (0, "microseconds since 2000-01-01T00:00:00"), 80 (np.int32(788961600), "seconds since 1981-01-01"), # GH2002 81 (12300 + np.arange(5), "hour since 1680-01-01 00:00:00.500000"), 82 (164375, "days since 1850-01-01 00:00:00"), 83 (164374.5, "days since 1850-01-01 00:00:00"), 84 ([164374.5, 168360.5], "days since 1850-01-01 00:00:00"), 85] 86_CF_DATETIME_TESTS = [ 87 num_dates_units + (calendar,) 88 for num_dates_units, calendar in product( 89 _CF_DATETIME_NUM_DATES_UNITS, _STANDARD_CALENDARS 90 ) 91] 92 93 94def _all_cftime_date_types(): 95 import cftime 96 97 return { 98 "noleap": cftime.DatetimeNoLeap, 99 "365_day": cftime.DatetimeNoLeap, 100 "360_day": cftime.Datetime360Day, 101 "julian": cftime.DatetimeJulian, 102 "all_leap": cftime.DatetimeAllLeap, 103 "366_day": cftime.DatetimeAllLeap, 104 "gregorian": cftime.DatetimeGregorian, 105 "proleptic_gregorian": cftime.DatetimeProlepticGregorian, 106 } 107 108 109@requires_cftime 110@pytest.mark.filterwarnings("ignore:Ambiguous reference date string") 111@pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS) 112def test_cf_datetime(num_dates, units, calendar) -> None: 113 import cftime 114 115 expected = cftime.num2date( 116 num_dates, units, calendar, only_use_cftime_datetimes=True 117 ) 118 min_y = np.ravel(np.atleast_1d(expected))[np.nanargmin(num_dates)].year 119 max_y = np.ravel(np.atleast_1d(expected))[np.nanargmax(num_dates)].year 120 if min_y >= 1678 and max_y < 2262: 121 expected = cftime_to_nptime(expected) 122 123 with warnings.catch_warnings(): 124 warnings.filterwarnings("ignore", "Unable to decode time axis") 125 actual = coding.times.decode_cf_datetime(num_dates, units, calendar) 126 127 abs_diff = np.asarray(abs(actual - expected)).ravel() 128 abs_diff = pd.to_timedelta(abs_diff.tolist()).to_numpy() 129 130 # once we no longer support versions of netCDF4 older than 1.1.5, 131 # we could do this check with near microsecond accuracy: 132 # https://github.com/Unidata/netcdf4-python/issues/355 133 assert (abs_diff <= np.timedelta64(1, "s")).all() 134 encoded, _, _ = coding.times.encode_cf_datetime(actual, units, calendar) 135 136 assert_array_equal(num_dates, np.around(encoded, 1)) 137 if hasattr(num_dates, "ndim") and num_dates.ndim == 1 and "1000" not in units: 138 # verify that wrapping with a pandas.Index works 139 # note that it *does not* currently work to put 140 # non-datetime64 compatible dates into a pandas.Index 141 encoded, _, _ = coding.times.encode_cf_datetime( 142 pd.Index(actual), units, calendar 143 ) 144 assert_array_equal(num_dates, np.around(encoded, 1)) 145 146 147@requires_cftime 148def test_decode_cf_datetime_overflow() -> None: 149 # checks for 150 # https://github.com/pydata/pandas/issues/14068 151 # https://github.com/pydata/xarray/issues/975 152 from cftime import DatetimeGregorian 153 154 datetime = DatetimeGregorian 155 units = "days since 2000-01-01 00:00:00" 156 157 # date after 2262 and before 1678 158 days = (-117608, 95795) 159 expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) 160 161 for i, day in enumerate(days): 162 with warnings.catch_warnings(): 163 warnings.filterwarnings("ignore", "Unable to decode time axis") 164 result = coding.times.decode_cf_datetime(day, units) 165 assert result == expected[i] 166 167 168def test_decode_cf_datetime_non_standard_units() -> None: 169 expected = pd.date_range(periods=100, start="1970-01-01", freq="h") 170 # netCDFs from madis.noaa.gov use this format for their time units 171 # they cannot be parsed by cftime, but pd.Timestamp works 172 units = "hours since 1-1-1970" 173 actual = coding.times.decode_cf_datetime(np.arange(100), units) 174 assert_array_equal(actual, expected) 175 176 177@requires_cftime 178def test_decode_cf_datetime_non_iso_strings() -> None: 179 # datetime strings that are _almost_ ISO compliant but not quite, 180 # but which cftime.num2date can still parse correctly 181 expected = pd.date_range(periods=100, start="2000-01-01", freq="h") 182 cases = [ 183 (np.arange(100), "hours since 2000-01-01 0"), 184 (np.arange(100), "hours since 2000-1-1 0"), 185 (np.arange(100), "hours since 2000-01-01 0:00"), 186 ] 187 for num_dates, units in cases: 188 actual = coding.times.decode_cf_datetime(num_dates, units) 189 abs_diff = abs(actual - expected.values) 190 # once we no longer support versions of netCDF4 older than 1.1.5, 191 # we could do this check with near microsecond accuracy: 192 # https://github.com/Unidata/netcdf4-python/issues/355 193 assert (abs_diff <= np.timedelta64(1, "s")).all() 194 195 196@requires_cftime 197@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) 198def test_decode_standard_calendar_inside_timestamp_range(calendar) -> None: 199 import cftime 200 201 units = "days since 0001-01-01" 202 times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H") 203 time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar) 204 expected = times.values 205 expected_dtype = np.dtype("M8[ns]") 206 207 actual = coding.times.decode_cf_datetime(time, units, calendar=calendar) 208 assert actual.dtype == expected_dtype 209 abs_diff = abs(actual - expected) 210 # once we no longer support versions of netCDF4 older than 1.1.5, 211 # we could do this check with near microsecond accuracy: 212 # https://github.com/Unidata/netcdf4-python/issues/355 213 assert (abs_diff <= np.timedelta64(1, "s")).all() 214 215 216@requires_cftime 217@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) 218def test_decode_non_standard_calendar_inside_timestamp_range(calendar) -> None: 219 import cftime 220 221 units = "days since 0001-01-01" 222 times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H") 223 non_standard_time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar) 224 225 expected = cftime.num2date( 226 non_standard_time, units, calendar=calendar, only_use_cftime_datetimes=True 227 ) 228 expected_dtype = np.dtype("O") 229 230 actual = coding.times.decode_cf_datetime( 231 non_standard_time, units, calendar=calendar 232 ) 233 assert actual.dtype == expected_dtype 234 abs_diff = abs(actual - expected) 235 # once we no longer support versions of netCDF4 older than 1.1.5, 236 # we could do this check with near microsecond accuracy: 237 # https://github.com/Unidata/netcdf4-python/issues/355 238 assert (abs_diff <= np.timedelta64(1, "s")).all() 239 240 241@requires_cftime 242@pytest.mark.parametrize("calendar", _ALL_CALENDARS) 243def test_decode_dates_outside_timestamp_range(calendar) -> None: 244 from datetime import datetime 245 246 import cftime 247 248 units = "days since 0001-01-01" 249 times = [datetime(1, 4, 1, h) for h in range(1, 5)] 250 time = cftime.date2num(times, units, calendar=calendar) 251 252 expected = cftime.num2date( 253 time, units, calendar=calendar, only_use_cftime_datetimes=True 254 ) 255 expected_date_type = type(expected[0]) 256 257 with warnings.catch_warnings(): 258 warnings.filterwarnings("ignore", "Unable to decode time axis") 259 actual = coding.times.decode_cf_datetime(time, units, calendar=calendar) 260 assert all(isinstance(value, expected_date_type) for value in actual) 261 abs_diff = abs(actual - expected) 262 # once we no longer support versions of netCDF4 older than 1.1.5, 263 # we could do this check with near microsecond accuracy: 264 # https://github.com/Unidata/netcdf4-python/issues/355 265 assert (abs_diff <= np.timedelta64(1, "s")).all() 266 267 268@requires_cftime 269@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) 270def test_decode_standard_calendar_single_element_inside_timestamp_range( 271 calendar, 272) -> None: 273 units = "days since 0001-01-01" 274 for num_time in [735368, [735368], [[735368]]]: 275 with warnings.catch_warnings(): 276 warnings.filterwarnings("ignore", "Unable to decode time axis") 277 actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar) 278 assert actual.dtype == np.dtype("M8[ns]") 279 280 281@requires_cftime 282@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) 283def test_decode_non_standard_calendar_single_element_inside_timestamp_range( 284 calendar, 285) -> None: 286 units = "days since 0001-01-01" 287 for num_time in [735368, [735368], [[735368]]]: 288 with warnings.catch_warnings(): 289 warnings.filterwarnings("ignore", "Unable to decode time axis") 290 actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar) 291 assert actual.dtype == np.dtype("O") 292 293 294@requires_cftime 295@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) 296def test_decode_single_element_outside_timestamp_range(calendar) -> None: 297 import cftime 298 299 units = "days since 0001-01-01" 300 for days in [1, 1470376]: 301 for num_time in [days, [days], [[days]]]: 302 with warnings.catch_warnings(): 303 warnings.filterwarnings("ignore", "Unable to decode time axis") 304 actual = coding.times.decode_cf_datetime( 305 num_time, units, calendar=calendar 306 ) 307 308 expected = cftime.num2date( 309 days, units, calendar, only_use_cftime_datetimes=True 310 ) 311 assert isinstance(actual.item(), type(expected)) 312 313 314@requires_cftime 315@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) 316def test_decode_standard_calendar_multidim_time_inside_timestamp_range( 317 calendar, 318) -> None: 319 import cftime 320 321 units = "days since 0001-01-01" 322 times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D") 323 times2 = pd.date_range("2001-05-01", end="2001-05-05", freq="D") 324 time1 = cftime.date2num(times1.to_pydatetime(), units, calendar=calendar) 325 time2 = cftime.date2num(times2.to_pydatetime(), units, calendar=calendar) 326 mdim_time = np.empty((len(time1), 2)) 327 mdim_time[:, 0] = time1 328 mdim_time[:, 1] = time2 329 330 expected1 = times1.values 331 expected2 = times2.values 332 333 actual = coding.times.decode_cf_datetime(mdim_time, units, calendar=calendar) 334 assert actual.dtype == np.dtype("M8[ns]") 335 336 abs_diff1 = abs(actual[:, 0] - expected1) 337 abs_diff2 = abs(actual[:, 1] - expected2) 338 # once we no longer support versions of netCDF4 older than 1.1.5, 339 # we could do this check with near microsecond accuracy: 340 # https://github.com/Unidata/netcdf4-python/issues/355 341 assert (abs_diff1 <= np.timedelta64(1, "s")).all() 342 assert (abs_diff2 <= np.timedelta64(1, "s")).all() 343 344 345@requires_cftime 346@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) 347def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( 348 calendar, 349) -> None: 350 import cftime 351 352 units = "days since 0001-01-01" 353 times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D") 354 times2 = pd.date_range("2001-05-01", end="2001-05-05", freq="D") 355 time1 = cftime.date2num(times1.to_pydatetime(), units, calendar=calendar) 356 time2 = cftime.date2num(times2.to_pydatetime(), units, calendar=calendar) 357 mdim_time = np.empty((len(time1), 2)) 358 mdim_time[:, 0] = time1 359 mdim_time[:, 1] = time2 360 361 if cftime.__name__ == "cftime": 362 expected1 = cftime.num2date( 363 time1, units, calendar, only_use_cftime_datetimes=True 364 ) 365 expected2 = cftime.num2date( 366 time2, units, calendar, only_use_cftime_datetimes=True 367 ) 368 else: 369 expected1 = cftime.num2date(time1, units, calendar) 370 expected2 = cftime.num2date(time2, units, calendar) 371 372 expected_dtype = np.dtype("O") 373 374 actual = coding.times.decode_cf_datetime(mdim_time, units, calendar=calendar) 375 376 assert actual.dtype == expected_dtype 377 abs_diff1 = abs(actual[:, 0] - expected1) 378 abs_diff2 = abs(actual[:, 1] - expected2) 379 # once we no longer support versions of netCDF4 older than 1.1.5, 380 # we could do this check with near microsecond accuracy: 381 # https://github.com/Unidata/netcdf4-python/issues/355 382 assert (abs_diff1 <= np.timedelta64(1, "s")).all() 383 assert (abs_diff2 <= np.timedelta64(1, "s")).all() 384 385 386@requires_cftime 387@pytest.mark.parametrize("calendar", _ALL_CALENDARS) 388def test_decode_multidim_time_outside_timestamp_range(calendar) -> None: 389 from datetime import datetime 390 391 import cftime 392 393 units = "days since 0001-01-01" 394 times1 = [datetime(1, 4, day) for day in range(1, 6)] 395 times2 = [datetime(1, 5, day) for day in range(1, 6)] 396 time1 = cftime.date2num(times1, units, calendar=calendar) 397 time2 = cftime.date2num(times2, units, calendar=calendar) 398 mdim_time = np.empty((len(time1), 2)) 399 mdim_time[:, 0] = time1 400 mdim_time[:, 1] = time2 401 402 expected1 = cftime.num2date(time1, units, calendar, only_use_cftime_datetimes=True) 403 expected2 = cftime.num2date(time2, units, calendar, only_use_cftime_datetimes=True) 404 405 with warnings.catch_warnings(): 406 warnings.filterwarnings("ignore", "Unable to decode time axis") 407 actual = coding.times.decode_cf_datetime(mdim_time, units, calendar=calendar) 408 409 assert actual.dtype == np.dtype("O") 410 411 abs_diff1 = abs(actual[:, 0] - expected1) 412 abs_diff2 = abs(actual[:, 1] - expected2) 413 # once we no longer support versions of netCDF4 older than 1.1.5, 414 # we could do this check with near microsecond accuracy: 415 # https://github.com/Unidata/netcdf4-python/issues/355 416 assert (abs_diff1 <= np.timedelta64(1, "s")).all() 417 assert (abs_diff2 <= np.timedelta64(1, "s")).all() 418 419 420@requires_cftime 421@pytest.mark.parametrize( 422 ("calendar", "num_time"), 423 [("360_day", 720058.0), ("all_leap", 732059.0), ("366_day", 732059.0)], 424) 425def test_decode_non_standard_calendar_single_element(calendar, num_time) -> None: 426 import cftime 427 428 units = "days since 0001-01-01" 429 430 actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar) 431 432 expected = np.asarray( 433 cftime.num2date(num_time, units, calendar, only_use_cftime_datetimes=True) 434 ) 435 assert actual.dtype == np.dtype("O") 436 assert expected == actual 437 438 439@requires_cftime 440def test_decode_360_day_calendar() -> None: 441 import cftime 442 443 calendar = "360_day" 444 # ensure leap year doesn't matter 445 for year in [2010, 2011, 2012, 2013, 2014]: 446 units = f"days since {year}-01-01" 447 num_times = np.arange(100) 448 449 expected = cftime.num2date( 450 num_times, units, calendar, only_use_cftime_datetimes=True 451 ) 452 453 with warnings.catch_warnings(record=True) as w: 454 warnings.simplefilter("always") 455 actual = coding.times.decode_cf_datetime( 456 num_times, units, calendar=calendar 457 ) 458 assert len(w) == 0 459 460 assert actual.dtype == np.dtype("O") 461 assert_array_equal(actual, expected) 462 463 464@requires_cftime 465def test_decode_abbreviation() -> None: 466 """Test making sure we properly fall back to cftime on abbreviated units.""" 467 import cftime 468 469 val = np.array([1586628000000.0]) 470 units = "msecs since 1970-01-01T00:00:00Z" 471 actual = coding.times.decode_cf_datetime(val, units) 472 expected = coding.times.cftime_to_nptime(cftime.num2date(val, units)) 473 assert_array_equal(actual, expected) 474 475 476@arm_xfail 477@requires_cftime 478@pytest.mark.parametrize( 479 ["num_dates", "units", "expected_list"], 480 [ 481 ([np.nan], "days since 2000-01-01", ["NaT"]), 482 ([np.nan, 0], "days since 2000-01-01", ["NaT", "2000-01-01T00:00:00Z"]), 483 ( 484 [np.nan, 0, 1], 485 "days since 2000-01-01", 486 ["NaT", "2000-01-01T00:00:00Z", "2000-01-02T00:00:00Z"], 487 ), 488 ], 489) 490def test_cf_datetime_nan(num_dates, units, expected_list) -> None: 491 with warnings.catch_warnings(): 492 warnings.filterwarnings("ignore", "All-NaN") 493 actual = coding.times.decode_cf_datetime(num_dates, units) 494 # use pandas because numpy will deprecate timezone-aware conversions 495 expected = pd.to_datetime(expected_list).to_numpy(dtype="datetime64[ns]") 496 assert_array_equal(expected, actual) 497 498 499@requires_cftime 500def test_decoded_cf_datetime_array_2d() -> None: 501 # regression test for GH1229 502 variable = Variable( 503 ("x", "y"), np.array([[0, 1], [2, 3]]), {"units": "days since 2000-01-01"} 504 ) 505 result = coding.times.CFDatetimeCoder().decode(variable) 506 assert result.dtype == "datetime64[ns]" 507 expected = pd.date_range("2000-01-01", periods=4).values.reshape(2, 2) 508 assert_array_equal(np.asarray(result), expected) 509 510 511FREQUENCIES_TO_ENCODING_UNITS = { 512 "N": "nanoseconds", 513 "U": "microseconds", 514 "L": "milliseconds", 515 "S": "seconds", 516 "T": "minutes", 517 "H": "hours", 518 "D": "days", 519} 520 521 522@pytest.mark.parametrize(("freq", "units"), FREQUENCIES_TO_ENCODING_UNITS.items()) 523def test_infer_datetime_units(freq, units) -> None: 524 dates = pd.date_range("2000", periods=2, freq=freq) 525 expected = f"{units} since 2000-01-01 00:00:00" 526 assert expected == coding.times.infer_datetime_units(dates) 527 528 529@pytest.mark.parametrize( 530 ["dates", "expected"], 531 [ 532 ( 533 pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"]), 534 "days since 1900-01-01 00:00:00", 535 ), 536 (pd.to_datetime(["NaT", "1900-01-01"]), "days since 1900-01-01 00:00:00"), 537 (pd.to_datetime(["NaT"]), "days since 1970-01-01 00:00:00"), 538 ], 539) 540def test_infer_datetime_units_with_NaT(dates, expected) -> None: 541 assert expected == coding.times.infer_datetime_units(dates) 542 543 544_CFTIME_DATETIME_UNITS_TESTS = [ 545 ([(1900, 1, 1), (1900, 1, 1)], "days since 1900-01-01 00:00:00.000000"), 546 ( 547 [(1900, 1, 1), (1900, 1, 2), (1900, 1, 2, 0, 0, 1)], 548 "seconds since 1900-01-01 00:00:00.000000", 549 ), 550 ( 551 [(1900, 1, 1), (1900, 1, 8), (1900, 1, 16)], 552 "days since 1900-01-01 00:00:00.000000", 553 ), 554] 555 556 557@requires_cftime 558@pytest.mark.parametrize( 559 "calendar", _NON_STANDARD_CALENDARS + ["gregorian", "proleptic_gregorian"] 560) 561@pytest.mark.parametrize(("date_args", "expected"), _CFTIME_DATETIME_UNITS_TESTS) 562def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None: 563 date_type = _all_cftime_date_types()[calendar] 564 dates = [date_type(*args) for args in date_args] 565 assert expected == coding.times.infer_datetime_units(dates) 566 567 568@pytest.mark.parametrize( 569 ["timedeltas", "units", "numbers"], 570 [ 571 ("1D", "days", np.int64(1)), 572 (["1D", "2D", "3D"], "days", np.array([1, 2, 3], "int64")), 573 ("1h", "hours", np.int64(1)), 574 ("1ms", "milliseconds", np.int64(1)), 575 ("1us", "microseconds", np.int64(1)), 576 ("1ns", "nanoseconds", np.int64(1)), 577 (["NaT", "0s", "1s"], None, [np.nan, 0, 1]), 578 (["30m", "60m"], "hours", [0.5, 1.0]), 579 ("NaT", "days", np.nan), 580 (["NaT", "NaT"], "days", [np.nan, np.nan]), 581 ], 582) 583def test_cf_timedelta(timedeltas, units, numbers) -> None: 584 if timedeltas == "NaT": 585 timedeltas = np.timedelta64("NaT", "ns") 586 else: 587 timedeltas = to_timedelta_unboxed(timedeltas) 588 numbers = np.array(numbers) 589 590 expected = numbers 591 actual, _ = coding.times.encode_cf_timedelta(timedeltas, units) 592 assert_array_equal(expected, actual) 593 assert expected.dtype == actual.dtype 594 595 if units is not None: 596 expected = timedeltas 597 actual = coding.times.decode_cf_timedelta(numbers, units) 598 assert_array_equal(expected, actual) 599 assert expected.dtype == actual.dtype 600 601 expected = np.timedelta64("NaT", "ns") 602 actual = coding.times.decode_cf_timedelta(np.array(np.nan), "days") 603 assert_array_equal(expected, actual) 604 605 606def test_cf_timedelta_2d() -> None: 607 units = "days" 608 numbers = np.atleast_2d([1, 2, 3]) 609 610 timedeltas = np.atleast_2d(to_timedelta_unboxed(["1D", "2D", "3D"])) 611 expected = timedeltas 612 613 actual = coding.times.decode_cf_timedelta(numbers, units) 614 assert_array_equal(expected, actual) 615 assert expected.dtype == actual.dtype # type: ignore 616 617 618@pytest.mark.parametrize( 619 ["deltas", "expected"], 620 [ 621 (pd.to_timedelta(["1 day", "2 days"]), "days"), 622 (pd.to_timedelta(["1h", "1 day 1 hour"]), "hours"), 623 (pd.to_timedelta(["1m", "2m", np.nan]), "minutes"), 624 (pd.to_timedelta(["1m3s", "1m4s"]), "seconds"), 625 ], 626) 627def test_infer_timedelta_units(deltas, expected) -> None: 628 assert expected == coding.times.infer_timedelta_units(deltas) 629 630 631@requires_cftime 632@pytest.mark.parametrize( 633 ["date_args", "expected"], 634 [ 635 ((1, 2, 3, 4, 5, 6), "0001-02-03 04:05:06.000000"), 636 ((10, 2, 3, 4, 5, 6), "0010-02-03 04:05:06.000000"), 637 ((100, 2, 3, 4, 5, 6), "0100-02-03 04:05:06.000000"), 638 ((1000, 2, 3, 4, 5, 6), "1000-02-03 04:05:06.000000"), 639 ], 640) 641def test_format_cftime_datetime(date_args, expected) -> None: 642 date_types = _all_cftime_date_types() 643 for date_type in date_types.values(): 644 result = coding.times.format_cftime_datetime(date_type(*date_args)) 645 assert result == expected 646 647 648@pytest.mark.parametrize("calendar", _ALL_CALENDARS) 649def test_decode_cf(calendar) -> None: 650 days = [1.0, 2.0, 3.0] 651 # TODO: GH5690 — do we want to allow this type for `coords`? 652 da = DataArray(days, coords=[days], dims=["time"], name="test") # type: ignore 653 ds = da.to_dataset() 654 655 for v in ["test", "time"]: 656 ds[v].attrs["units"] = "days since 2001-01-01" 657 ds[v].attrs["calendar"] = calendar 658 659 if not has_cftime and calendar not in _STANDARD_CALENDARS: 660 with pytest.raises(ValueError): 661 ds = decode_cf(ds) 662 else: 663 ds = decode_cf(ds) 664 665 if calendar not in _STANDARD_CALENDARS: 666 assert ds.test.dtype == np.dtype("O") 667 else: 668 assert ds.test.dtype == np.dtype("M8[ns]") 669 670 671def test_decode_cf_time_bounds() -> None: 672 673 da = DataArray( 674 np.arange(6, dtype="int64").reshape((3, 2)), 675 coords={"time": [1, 2, 3]}, 676 dims=("time", "nbnd"), 677 name="time_bnds", 678 ) 679 680 attrs = { 681 "units": "days since 2001-01", 682 "calendar": "standard", 683 "bounds": "time_bnds", 684 } 685 686 ds = da.to_dataset() 687 ds["time"].attrs.update(attrs) 688 _update_bounds_attributes(ds.variables) 689 assert ds.variables["time_bnds"].attrs == { 690 "units": "days since 2001-01", 691 "calendar": "standard", 692 } 693 dsc = decode_cf(ds) 694 assert dsc.time_bnds.dtype == np.dtype("M8[ns]") 695 dsc = decode_cf(ds, decode_times=False) 696 assert dsc.time_bnds.dtype == np.dtype("int64") 697 698 # Do not overwrite existing attrs 699 ds = da.to_dataset() 700 ds["time"].attrs.update(attrs) 701 bnd_attr = {"units": "hours since 2001-01", "calendar": "noleap"} 702 ds["time_bnds"].attrs.update(bnd_attr) 703 _update_bounds_attributes(ds.variables) 704 assert ds.variables["time_bnds"].attrs == bnd_attr 705 706 # If bounds variable not available do not complain 707 ds = da.to_dataset() 708 ds["time"].attrs.update(attrs) 709 ds["time"].attrs["bounds"] = "fake_var" 710 _update_bounds_attributes(ds.variables) 711 712 713@requires_cftime 714def test_encode_time_bounds() -> None: 715 716 time = pd.date_range("2000-01-16", periods=1) 717 time_bounds = pd.date_range("2000-01-01", periods=2, freq="MS") 718 ds = Dataset(dict(time=time, time_bounds=time_bounds)) 719 ds.time.attrs = {"bounds": "time_bounds"} 720 ds.time.encoding = {"calendar": "noleap", "units": "days since 2000-01-01"} 721 722 expected = {} 723 # expected['time'] = Variable(data=np.array([15]), dims=['time']) 724 expected["time_bounds"] = Variable(data=np.array([0, 31]), dims=["time_bounds"]) 725 726 encoded, _ = cf_encoder(ds.variables, ds.attrs) 727 assert_equal(encoded["time_bounds"], expected["time_bounds"]) 728 assert "calendar" not in encoded["time_bounds"].attrs 729 assert "units" not in encoded["time_bounds"].attrs 730 731 # if time_bounds attrs are same as time attrs, it doesn't matter 732 ds.time_bounds.encoding = {"calendar": "noleap", "units": "days since 2000-01-01"} 733 encoded, _ = cf_encoder({k: ds[k] for k in ds.variables}, ds.attrs) 734 assert_equal(encoded["time_bounds"], expected["time_bounds"]) 735 assert "calendar" not in encoded["time_bounds"].attrs 736 assert "units" not in encoded["time_bounds"].attrs 737 738 # for CF-noncompliant case of time_bounds attrs being different from 739 # time attrs; preserve them for faithful roundtrip 740 ds.time_bounds.encoding = {"calendar": "noleap", "units": "days since 1849-01-01"} 741 encoded, _ = cf_encoder({k: ds[k] for k in ds.variables}, ds.attrs) 742 with pytest.raises(AssertionError): 743 assert_equal(encoded["time_bounds"], expected["time_bounds"]) 744 assert "calendar" not in encoded["time_bounds"].attrs 745 assert encoded["time_bounds"].attrs["units"] == ds.time_bounds.encoding["units"] 746 747 ds.time.encoding = {} 748 with pytest.warns(UserWarning): 749 cf_encoder(ds.variables, ds.attrs) 750 751 752@pytest.fixture(params=_ALL_CALENDARS) 753def calendar(request): 754 return request.param 755 756 757@pytest.fixture() 758def times(calendar): 759 import cftime 760 761 return cftime.num2date( 762 np.arange(4), 763 units="hours since 2000-01-01", 764 calendar=calendar, 765 only_use_cftime_datetimes=True, 766 ) 767 768 769@pytest.fixture() 770def data(times): 771 data = np.random.rand(2, 2, 4) 772 lons = np.linspace(0, 11, 2) 773 lats = np.linspace(0, 20, 2) 774 return DataArray( 775 data, coords=[lons, lats, times], dims=["lon", "lat", "time"], name="data" 776 ) 777 778 779@pytest.fixture() 780def times_3d(times): 781 lons = np.linspace(0, 11, 2) 782 lats = np.linspace(0, 20, 2) 783 times_arr = np.random.choice(times, size=(2, 2, 4)) 784 return DataArray( 785 times_arr, coords=[lons, lats, times], dims=["lon", "lat", "time"], name="data" 786 ) 787 788 789@requires_cftime 790def test_contains_cftime_datetimes_1d(data) -> None: 791 assert contains_cftime_datetimes(data.time) 792 793 794@requires_cftime 795@requires_dask 796def test_contains_cftime_datetimes_dask_1d(data) -> None: 797 assert contains_cftime_datetimes(data.time.chunk()) 798 799 800@requires_cftime 801def test_contains_cftime_datetimes_3d(times_3d) -> None: 802 assert contains_cftime_datetimes(times_3d) 803 804 805@requires_cftime 806@requires_dask 807def test_contains_cftime_datetimes_dask_3d(times_3d) -> None: 808 assert contains_cftime_datetimes(times_3d.chunk()) 809 810 811@pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])]) 812def test_contains_cftime_datetimes_non_cftimes(non_cftime_data) -> None: 813 assert not contains_cftime_datetimes(non_cftime_data) 814 815 816@requires_dask 817@pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])]) 818def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data) -> None: 819 assert not contains_cftime_datetimes(non_cftime_data.chunk()) 820 821 822@requires_cftime 823@pytest.mark.parametrize("shape", [(24,), (8, 3), (2, 4, 3)]) 824def test_encode_cf_datetime_overflow(shape) -> None: 825 # Test for fix to GH 2272 826 dates = pd.date_range("2100", periods=24).values.reshape(shape) 827 units = "days since 1800-01-01" 828 calendar = "standard" 829 830 num, _, _ = encode_cf_datetime(dates, units, calendar) 831 roundtrip = decode_cf_datetime(num, units, calendar) 832 np.testing.assert_array_equal(dates, roundtrip) 833 834 835def test_encode_expected_failures() -> None: 836 837 dates = pd.date_range("2000", periods=3) 838 with pytest.raises(ValueError, match="invalid time units"): 839 encode_cf_datetime(dates, units="days after 2000-01-01") 840 with pytest.raises(ValueError, match="invalid reference date"): 841 encode_cf_datetime(dates, units="days since NO_YEAR") 842 843 844def test_encode_cf_datetime_pandas_min() -> None: 845 # GH 2623 846 dates = pd.date_range("2000", periods=3) 847 num, units, calendar = encode_cf_datetime(dates) 848 expected_num = np.array([0.0, 1.0, 2.0]) 849 expected_units = "days since 2000-01-01 00:00:00" 850 expected_calendar = "proleptic_gregorian" 851 np.testing.assert_array_equal(num, expected_num) 852 assert units == expected_units 853 assert calendar == expected_calendar 854 855 856@requires_cftime 857def test_time_units_with_timezone_roundtrip(calendar) -> None: 858 # Regression test for GH 2649 859 expected_units = "days since 2000-01-01T00:00:00-05:00" 860 expected_num_dates = np.array([1, 2, 3]) 861 dates = decode_cf_datetime(expected_num_dates, expected_units, calendar) 862 863 # Check that dates were decoded to UTC; here the hours should all 864 # equal 5. 865 result_hours = DataArray(dates).dt.hour 866 expected_hours = DataArray([5, 5, 5]) 867 assert_equal(result_hours, expected_hours) 868 869 # Check that the encoded values are accurately roundtripped. 870 result_num_dates, result_units, result_calendar = encode_cf_datetime( 871 dates, expected_units, calendar 872 ) 873 874 if calendar in _STANDARD_CALENDARS: 875 np.testing.assert_array_equal(result_num_dates, expected_num_dates) 876 else: 877 # cftime datetime arithmetic is not quite exact. 878 np.testing.assert_allclose(result_num_dates, expected_num_dates) 879 880 assert result_units == expected_units 881 assert result_calendar == calendar 882 883 884@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) 885def test_use_cftime_default_standard_calendar_in_range(calendar) -> None: 886 numerical_dates = [0, 1] 887 units = "days since 2000-01-01" 888 expected = pd.date_range("2000", periods=2) 889 890 with pytest.warns(None) as record: 891 result = decode_cf_datetime(numerical_dates, units, calendar) 892 np.testing.assert_array_equal(result, expected) 893 assert not record 894 895 896@requires_cftime 897@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) 898@pytest.mark.parametrize("units_year", [1500, 2500]) 899def test_use_cftime_default_standard_calendar_out_of_range( 900 calendar, units_year 901) -> None: 902 from cftime import num2date 903 904 numerical_dates = [0, 1] 905 units = f"days since {units_year}-01-01" 906 expected = num2date( 907 numerical_dates, units, calendar, only_use_cftime_datetimes=True 908 ) 909 910 with pytest.warns(SerializationWarning): 911 result = decode_cf_datetime(numerical_dates, units, calendar) 912 np.testing.assert_array_equal(result, expected) 913 914 915@requires_cftime 916@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) 917@pytest.mark.parametrize("units_year", [1500, 2000, 2500]) 918def test_use_cftime_default_non_standard_calendar(calendar, units_year) -> None: 919 from cftime import num2date 920 921 numerical_dates = [0, 1] 922 units = f"days since {units_year}-01-01" 923 expected = num2date( 924 numerical_dates, units, calendar, only_use_cftime_datetimes=True 925 ) 926 927 with pytest.warns(None) as record: 928 result = decode_cf_datetime(numerical_dates, units, calendar) 929 np.testing.assert_array_equal(result, expected) 930 assert not record 931 932 933@requires_cftime 934@pytest.mark.parametrize("calendar", _ALL_CALENDARS) 935@pytest.mark.parametrize("units_year", [1500, 2000, 2500]) 936def test_use_cftime_true(calendar, units_year) -> None: 937 from cftime import num2date 938 939 numerical_dates = [0, 1] 940 units = f"days since {units_year}-01-01" 941 expected = num2date( 942 numerical_dates, units, calendar, only_use_cftime_datetimes=True 943 ) 944 945 with pytest.warns(None) as record: 946 result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=True) 947 np.testing.assert_array_equal(result, expected) 948 assert not record 949 950 951@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) 952def test_use_cftime_false_standard_calendar_in_range(calendar) -> None: 953 numerical_dates = [0, 1] 954 units = "days since 2000-01-01" 955 expected = pd.date_range("2000", periods=2) 956 957 with pytest.warns(None) as record: 958 result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False) 959 np.testing.assert_array_equal(result, expected) 960 assert not record 961 962 963@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) 964@pytest.mark.parametrize("units_year", [1500, 2500]) 965def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year) -> None: 966 numerical_dates = [0, 1] 967 units = f"days since {units_year}-01-01" 968 with pytest.raises(OutOfBoundsDatetime): 969 decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False) 970 971 972@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) 973@pytest.mark.parametrize("units_year", [1500, 2000, 2500]) 974def test_use_cftime_false_non_standard_calendar(calendar, units_year) -> None: 975 numerical_dates = [0, 1] 976 units = f"days since {units_year}-01-01" 977 with pytest.raises(OutOfBoundsDatetime): 978 decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False) 979 980 981@requires_cftime 982@pytest.mark.parametrize("calendar", _ALL_CALENDARS) 983def test_decode_ambiguous_time_warns(calendar) -> None: 984 # GH 4422, 4506 985 from cftime import num2date 986 987 # we don't decode non-standard calendards with 988 # pandas so expect no warning will be emitted 989 is_standard_calendar = calendar in coding.times._STANDARD_CALENDARS 990 991 dates = [1, 2, 3] 992 units = "days since 1-1-1" 993 expected = num2date(dates, units, calendar=calendar, only_use_cftime_datetimes=True) 994 995 exp_warn_type = SerializationWarning if is_standard_calendar else None 996 997 with pytest.warns(exp_warn_type) as record: 998 result = decode_cf_datetime(dates, units, calendar=calendar) 999 1000 if is_standard_calendar: 1001 relevant_warnings = [ 1002 r 1003 for r in record.list 1004 if str(r.message).startswith("Ambiguous reference date string: 1-1-1") 1005 ] 1006 assert len(relevant_warnings) == 1 1007 else: 1008 assert not record 1009 1010 np.testing.assert_array_equal(result, expected) 1011 1012 1013@pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values()) 1014@pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) 1015@pytest.mark.parametrize("date_range", [pd.date_range, cftime_range]) 1016def test_encode_cf_datetime_defaults_to_correct_dtype( 1017 encoding_units, freq, date_range 1018) -> None: 1019 if not has_cftime_1_4_1 and date_range == cftime_range: 1020 pytest.skip("Test requires cftime 1.4.1.") 1021 if (freq == "N" or encoding_units == "nanoseconds") and date_range == cftime_range: 1022 pytest.skip("Nanosecond frequency is not valid for cftime dates.") 1023 times = date_range("2000", periods=3, freq=freq) 1024 units = f"{encoding_units} since 2000-01-01" 1025 encoded, _, _ = coding.times.encode_cf_datetime(times, units) 1026 1027 numpy_timeunit = coding.times._netcdf_to_numpy_timeunit(encoding_units) 1028 encoding_units_as_timedelta = np.timedelta64(1, numpy_timeunit) 1029 if pd.to_timedelta(1, freq) >= encoding_units_as_timedelta: 1030 assert encoded.dtype == np.int64 1031 else: 1032 assert encoded.dtype == np.float64 1033 1034 1035@pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) 1036def test_encode_decode_roundtrip_datetime64(freq) -> None: 1037 # See GH 4045. Prior to GH 4684 this test would fail for frequencies of 1038 # "S", "L", "U", and "N". 1039 initial_time = pd.date_range("1678-01-01", periods=1) 1040 times = initial_time.append(pd.date_range("1968", periods=2, freq=freq)) 1041 variable = Variable(["time"], times) 1042 encoded = conventions.encode_cf_variable(variable) 1043 decoded = conventions.decode_cf_variable("time", encoded) 1044 assert_equal(variable, decoded) 1045 1046 1047@requires_cftime_1_4_1 1048@pytest.mark.parametrize("freq", ["U", "L", "S", "T", "H", "D"]) 1049def test_encode_decode_roundtrip_cftime(freq) -> None: 1050 initial_time = cftime_range("0001", periods=1) 1051 times = initial_time.append( 1052 cftime_range("0001", periods=2, freq=freq) + timedelta(days=291000 * 365) 1053 ) 1054 variable = Variable(["time"], times) 1055 encoded = conventions.encode_cf_variable(variable) 1056 decoded = conventions.decode_cf_variable("time", encoded, use_cftime=True) 1057 assert_equal(variable, decoded) 1058 1059 1060@requires_cftime 1061def test__encode_datetime_with_cftime() -> None: 1062 # See GH 4870. cftime versions > 1.4.0 required us to adapt the 1063 # way _encode_datetime_with_cftime was written. 1064 import cftime 1065 1066 calendar = "gregorian" 1067 times = cftime.num2date([0, 1], "hours since 2000-01-01", calendar) 1068 1069 encoding_units = "days since 2000-01-01" 1070 expected = cftime.date2num(times, encoding_units, calendar) 1071 result = _encode_datetime_with_cftime(times, encoding_units, calendar) 1072 np.testing.assert_equal(result, expected) 1073 1074 1075@pytest.mark.parametrize("calendar", ["gregorian", "Gregorian", "GREGORIAN"]) 1076def test_decode_encode_roundtrip_with_non_lowercase_letters(calendar) -> None: 1077 # See GH 5093. 1078 times = [0, 1] 1079 units = "days since 2000-01-01" 1080 attrs = {"calendar": calendar, "units": units} 1081 variable = Variable(["time"], times, attrs) 1082 decoded = conventions.decode_cf_variable("time", variable) 1083 encoded = conventions.encode_cf_variable(decoded) 1084 1085 # Previously this would erroneously be an array of cftime.datetime 1086 # objects. We check here that it is decoded properly to np.datetime64. 1087 assert np.issubdtype(decoded.dtype, np.datetime64) 1088 1089 # Use assert_identical to ensure that the calendar attribute maintained its 1090 # original form throughout the roundtripping process, uppercase letters and 1091 # all. 1092 assert_identical(variable, encoded) 1093