1import warnings
2from datetime import timedelta
3from itertools import product
4
5import numpy as np
6import pandas as pd
7import pytest
8from pandas.errors import OutOfBoundsDatetime
9
10from xarray import (
11    DataArray,
12    Dataset,
13    Variable,
14    cftime_range,
15    coding,
16    conventions,
17    decode_cf,
18)
19from xarray.coding.times import (
20    _encode_datetime_with_cftime,
21    cftime_to_nptime,
22    decode_cf_datetime,
23    encode_cf_datetime,
24    to_timedelta_unboxed,
25)
26from xarray.coding.variables import SerializationWarning
27from xarray.conventions import _update_bounds_attributes, cf_encoder
28from xarray.core.common import contains_cftime_datetimes
29from xarray.testing import assert_equal, assert_identical
30
31from . import (
32    arm_xfail,
33    assert_array_equal,
34    has_cftime,
35    has_cftime_1_4_1,
36    requires_cftime,
37    requires_cftime_1_4_1,
38    requires_dask,
39)
40
41_NON_STANDARD_CALENDARS_SET = {
42    "noleap",
43    "365_day",
44    "360_day",
45    "julian",
46    "all_leap",
47    "366_day",
48}
49_ALL_CALENDARS = sorted(
50    _NON_STANDARD_CALENDARS_SET.union(coding.times._STANDARD_CALENDARS)
51)
52_NON_STANDARD_CALENDARS = sorted(_NON_STANDARD_CALENDARS_SET)
53_STANDARD_CALENDARS = sorted(coding.times._STANDARD_CALENDARS)
54_CF_DATETIME_NUM_DATES_UNITS = [
55    (np.arange(10), "days since 2000-01-01"),
56    (np.arange(10).astype("float64"), "days since 2000-01-01"),
57    (np.arange(10).astype("float32"), "days since 2000-01-01"),
58    (np.arange(10).reshape(2, 5), "days since 2000-01-01"),
59    (12300 + np.arange(5), "hours since 1680-01-01 00:00:00"),
60    # here we add a couple minor formatting errors to test
61    # the robustness of the parsing algorithm.
62    (12300 + np.arange(5), "hour since 1680-01-01  00:00:00"),
63    (12300 + np.arange(5), "Hour  since 1680-01-01 00:00:00"),
64    (12300 + np.arange(5), " Hour  since  1680-01-01 00:00:00 "),
65    (10, "days since 2000-01-01"),
66    ([10], "daYs  since 2000-01-01"),
67    ([[10]], "days since 2000-01-01"),
68    ([10, 10], "days since 2000-01-01"),
69    (np.array(10), "days since 2000-01-01"),
70    (0, "days since 1000-01-01"),
71    ([0], "days since 1000-01-01"),
72    ([[0]], "days since 1000-01-01"),
73    (np.arange(2), "days since 1000-01-01"),
74    (np.arange(0, 100000, 20000), "days since 1900-01-01"),
75    (np.arange(0, 100000, 20000), "days since 1-01-01"),
76    (17093352.0, "hours since 1-1-1 00:00:0.0"),
77    ([0.5, 1.5], "hours since 1900-01-01T00:00:00"),
78    (0, "milliseconds since 2000-01-01T00:00:00"),
79    (0, "microseconds since 2000-01-01T00:00:00"),
80    (np.int32(788961600), "seconds since 1981-01-01"),  # GH2002
81    (12300 + np.arange(5), "hour since 1680-01-01 00:00:00.500000"),
82    (164375, "days since 1850-01-01 00:00:00"),
83    (164374.5, "days since 1850-01-01 00:00:00"),
84    ([164374.5, 168360.5], "days since 1850-01-01 00:00:00"),
85]
86_CF_DATETIME_TESTS = [
87    num_dates_units + (calendar,)
88    for num_dates_units, calendar in product(
89        _CF_DATETIME_NUM_DATES_UNITS, _STANDARD_CALENDARS
90    )
91]
92
93
94def _all_cftime_date_types():
95    import cftime
96
97    return {
98        "noleap": cftime.DatetimeNoLeap,
99        "365_day": cftime.DatetimeNoLeap,
100        "360_day": cftime.Datetime360Day,
101        "julian": cftime.DatetimeJulian,
102        "all_leap": cftime.DatetimeAllLeap,
103        "366_day": cftime.DatetimeAllLeap,
104        "gregorian": cftime.DatetimeGregorian,
105        "proleptic_gregorian": cftime.DatetimeProlepticGregorian,
106    }
107
108
109@requires_cftime
110@pytest.mark.filterwarnings("ignore:Ambiguous reference date string")
111@pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS)
112def test_cf_datetime(num_dates, units, calendar) -> None:
113    import cftime
114
115    expected = cftime.num2date(
116        num_dates, units, calendar, only_use_cftime_datetimes=True
117    )
118    min_y = np.ravel(np.atleast_1d(expected))[np.nanargmin(num_dates)].year
119    max_y = np.ravel(np.atleast_1d(expected))[np.nanargmax(num_dates)].year
120    if min_y >= 1678 and max_y < 2262:
121        expected = cftime_to_nptime(expected)
122
123    with warnings.catch_warnings():
124        warnings.filterwarnings("ignore", "Unable to decode time axis")
125        actual = coding.times.decode_cf_datetime(num_dates, units, calendar)
126
127    abs_diff = np.asarray(abs(actual - expected)).ravel()
128    abs_diff = pd.to_timedelta(abs_diff.tolist()).to_numpy()
129
130    # once we no longer support versions of netCDF4 older than 1.1.5,
131    # we could do this check with near microsecond accuracy:
132    # https://github.com/Unidata/netcdf4-python/issues/355
133    assert (abs_diff <= np.timedelta64(1, "s")).all()
134    encoded, _, _ = coding.times.encode_cf_datetime(actual, units, calendar)
135
136    assert_array_equal(num_dates, np.around(encoded, 1))
137    if hasattr(num_dates, "ndim") and num_dates.ndim == 1 and "1000" not in units:
138        # verify that wrapping with a pandas.Index works
139        # note that it *does not* currently work to put
140        # non-datetime64 compatible dates into a pandas.Index
141        encoded, _, _ = coding.times.encode_cf_datetime(
142            pd.Index(actual), units, calendar
143        )
144        assert_array_equal(num_dates, np.around(encoded, 1))
145
146
147@requires_cftime
148def test_decode_cf_datetime_overflow() -> None:
149    # checks for
150    # https://github.com/pydata/pandas/issues/14068
151    # https://github.com/pydata/xarray/issues/975
152    from cftime import DatetimeGregorian
153
154    datetime = DatetimeGregorian
155    units = "days since 2000-01-01 00:00:00"
156
157    # date after 2262 and before 1678
158    days = (-117608, 95795)
159    expected = (datetime(1677, 12, 31), datetime(2262, 4, 12))
160
161    for i, day in enumerate(days):
162        with warnings.catch_warnings():
163            warnings.filterwarnings("ignore", "Unable to decode time axis")
164            result = coding.times.decode_cf_datetime(day, units)
165        assert result == expected[i]
166
167
168def test_decode_cf_datetime_non_standard_units() -> None:
169    expected = pd.date_range(periods=100, start="1970-01-01", freq="h")
170    # netCDFs from madis.noaa.gov use this format for their time units
171    # they cannot be parsed by cftime, but pd.Timestamp works
172    units = "hours since 1-1-1970"
173    actual = coding.times.decode_cf_datetime(np.arange(100), units)
174    assert_array_equal(actual, expected)
175
176
177@requires_cftime
178def test_decode_cf_datetime_non_iso_strings() -> None:
179    # datetime strings that are _almost_ ISO compliant but not quite,
180    # but which cftime.num2date can still parse correctly
181    expected = pd.date_range(periods=100, start="2000-01-01", freq="h")
182    cases = [
183        (np.arange(100), "hours since 2000-01-01 0"),
184        (np.arange(100), "hours since 2000-1-1 0"),
185        (np.arange(100), "hours since 2000-01-01 0:00"),
186    ]
187    for num_dates, units in cases:
188        actual = coding.times.decode_cf_datetime(num_dates, units)
189        abs_diff = abs(actual - expected.values)
190        # once we no longer support versions of netCDF4 older than 1.1.5,
191        # we could do this check with near microsecond accuracy:
192        # https://github.com/Unidata/netcdf4-python/issues/355
193        assert (abs_diff <= np.timedelta64(1, "s")).all()
194
195
196@requires_cftime
197@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
198def test_decode_standard_calendar_inside_timestamp_range(calendar) -> None:
199    import cftime
200
201    units = "days since 0001-01-01"
202    times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H")
203    time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar)
204    expected = times.values
205    expected_dtype = np.dtype("M8[ns]")
206
207    actual = coding.times.decode_cf_datetime(time, units, calendar=calendar)
208    assert actual.dtype == expected_dtype
209    abs_diff = abs(actual - expected)
210    # once we no longer support versions of netCDF4 older than 1.1.5,
211    # we could do this check with near microsecond accuracy:
212    # https://github.com/Unidata/netcdf4-python/issues/355
213    assert (abs_diff <= np.timedelta64(1, "s")).all()
214
215
216@requires_cftime
217@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
218def test_decode_non_standard_calendar_inside_timestamp_range(calendar) -> None:
219    import cftime
220
221    units = "days since 0001-01-01"
222    times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H")
223    non_standard_time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar)
224
225    expected = cftime.num2date(
226        non_standard_time, units, calendar=calendar, only_use_cftime_datetimes=True
227    )
228    expected_dtype = np.dtype("O")
229
230    actual = coding.times.decode_cf_datetime(
231        non_standard_time, units, calendar=calendar
232    )
233    assert actual.dtype == expected_dtype
234    abs_diff = abs(actual - expected)
235    # once we no longer support versions of netCDF4 older than 1.1.5,
236    # we could do this check with near microsecond accuracy:
237    # https://github.com/Unidata/netcdf4-python/issues/355
238    assert (abs_diff <= np.timedelta64(1, "s")).all()
239
240
241@requires_cftime
242@pytest.mark.parametrize("calendar", _ALL_CALENDARS)
243def test_decode_dates_outside_timestamp_range(calendar) -> None:
244    from datetime import datetime
245
246    import cftime
247
248    units = "days since 0001-01-01"
249    times = [datetime(1, 4, 1, h) for h in range(1, 5)]
250    time = cftime.date2num(times, units, calendar=calendar)
251
252    expected = cftime.num2date(
253        time, units, calendar=calendar, only_use_cftime_datetimes=True
254    )
255    expected_date_type = type(expected[0])
256
257    with warnings.catch_warnings():
258        warnings.filterwarnings("ignore", "Unable to decode time axis")
259        actual = coding.times.decode_cf_datetime(time, units, calendar=calendar)
260    assert all(isinstance(value, expected_date_type) for value in actual)
261    abs_diff = abs(actual - expected)
262    # once we no longer support versions of netCDF4 older than 1.1.5,
263    # we could do this check with near microsecond accuracy:
264    # https://github.com/Unidata/netcdf4-python/issues/355
265    assert (abs_diff <= np.timedelta64(1, "s")).all()
266
267
268@requires_cftime
269@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
270def test_decode_standard_calendar_single_element_inside_timestamp_range(
271    calendar,
272) -> None:
273    units = "days since 0001-01-01"
274    for num_time in [735368, [735368], [[735368]]]:
275        with warnings.catch_warnings():
276            warnings.filterwarnings("ignore", "Unable to decode time axis")
277            actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar)
278        assert actual.dtype == np.dtype("M8[ns]")
279
280
281@requires_cftime
282@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
283def test_decode_non_standard_calendar_single_element_inside_timestamp_range(
284    calendar,
285) -> None:
286    units = "days since 0001-01-01"
287    for num_time in [735368, [735368], [[735368]]]:
288        with warnings.catch_warnings():
289            warnings.filterwarnings("ignore", "Unable to decode time axis")
290            actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar)
291        assert actual.dtype == np.dtype("O")
292
293
294@requires_cftime
295@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
296def test_decode_single_element_outside_timestamp_range(calendar) -> None:
297    import cftime
298
299    units = "days since 0001-01-01"
300    for days in [1, 1470376]:
301        for num_time in [days, [days], [[days]]]:
302            with warnings.catch_warnings():
303                warnings.filterwarnings("ignore", "Unable to decode time axis")
304                actual = coding.times.decode_cf_datetime(
305                    num_time, units, calendar=calendar
306                )
307
308            expected = cftime.num2date(
309                days, units, calendar, only_use_cftime_datetimes=True
310            )
311            assert isinstance(actual.item(), type(expected))
312
313
314@requires_cftime
315@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
316def test_decode_standard_calendar_multidim_time_inside_timestamp_range(
317    calendar,
318) -> None:
319    import cftime
320
321    units = "days since 0001-01-01"
322    times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D")
323    times2 = pd.date_range("2001-05-01", end="2001-05-05", freq="D")
324    time1 = cftime.date2num(times1.to_pydatetime(), units, calendar=calendar)
325    time2 = cftime.date2num(times2.to_pydatetime(), units, calendar=calendar)
326    mdim_time = np.empty((len(time1), 2))
327    mdim_time[:, 0] = time1
328    mdim_time[:, 1] = time2
329
330    expected1 = times1.values
331    expected2 = times2.values
332
333    actual = coding.times.decode_cf_datetime(mdim_time, units, calendar=calendar)
334    assert actual.dtype == np.dtype("M8[ns]")
335
336    abs_diff1 = abs(actual[:, 0] - expected1)
337    abs_diff2 = abs(actual[:, 1] - expected2)
338    # once we no longer support versions of netCDF4 older than 1.1.5,
339    # we could do this check with near microsecond accuracy:
340    # https://github.com/Unidata/netcdf4-python/issues/355
341    assert (abs_diff1 <= np.timedelta64(1, "s")).all()
342    assert (abs_diff2 <= np.timedelta64(1, "s")).all()
343
344
345@requires_cftime
346@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
347def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(
348    calendar,
349) -> None:
350    import cftime
351
352    units = "days since 0001-01-01"
353    times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D")
354    times2 = pd.date_range("2001-05-01", end="2001-05-05", freq="D")
355    time1 = cftime.date2num(times1.to_pydatetime(), units, calendar=calendar)
356    time2 = cftime.date2num(times2.to_pydatetime(), units, calendar=calendar)
357    mdim_time = np.empty((len(time1), 2))
358    mdim_time[:, 0] = time1
359    mdim_time[:, 1] = time2
360
361    if cftime.__name__ == "cftime":
362        expected1 = cftime.num2date(
363            time1, units, calendar, only_use_cftime_datetimes=True
364        )
365        expected2 = cftime.num2date(
366            time2, units, calendar, only_use_cftime_datetimes=True
367        )
368    else:
369        expected1 = cftime.num2date(time1, units, calendar)
370        expected2 = cftime.num2date(time2, units, calendar)
371
372    expected_dtype = np.dtype("O")
373
374    actual = coding.times.decode_cf_datetime(mdim_time, units, calendar=calendar)
375
376    assert actual.dtype == expected_dtype
377    abs_diff1 = abs(actual[:, 0] - expected1)
378    abs_diff2 = abs(actual[:, 1] - expected2)
379    # once we no longer support versions of netCDF4 older than 1.1.5,
380    # we could do this check with near microsecond accuracy:
381    # https://github.com/Unidata/netcdf4-python/issues/355
382    assert (abs_diff1 <= np.timedelta64(1, "s")).all()
383    assert (abs_diff2 <= np.timedelta64(1, "s")).all()
384
385
386@requires_cftime
387@pytest.mark.parametrize("calendar", _ALL_CALENDARS)
388def test_decode_multidim_time_outside_timestamp_range(calendar) -> None:
389    from datetime import datetime
390
391    import cftime
392
393    units = "days since 0001-01-01"
394    times1 = [datetime(1, 4, day) for day in range(1, 6)]
395    times2 = [datetime(1, 5, day) for day in range(1, 6)]
396    time1 = cftime.date2num(times1, units, calendar=calendar)
397    time2 = cftime.date2num(times2, units, calendar=calendar)
398    mdim_time = np.empty((len(time1), 2))
399    mdim_time[:, 0] = time1
400    mdim_time[:, 1] = time2
401
402    expected1 = cftime.num2date(time1, units, calendar, only_use_cftime_datetimes=True)
403    expected2 = cftime.num2date(time2, units, calendar, only_use_cftime_datetimes=True)
404
405    with warnings.catch_warnings():
406        warnings.filterwarnings("ignore", "Unable to decode time axis")
407        actual = coding.times.decode_cf_datetime(mdim_time, units, calendar=calendar)
408
409    assert actual.dtype == np.dtype("O")
410
411    abs_diff1 = abs(actual[:, 0] - expected1)
412    abs_diff2 = abs(actual[:, 1] - expected2)
413    # once we no longer support versions of netCDF4 older than 1.1.5,
414    # we could do this check with near microsecond accuracy:
415    # https://github.com/Unidata/netcdf4-python/issues/355
416    assert (abs_diff1 <= np.timedelta64(1, "s")).all()
417    assert (abs_diff2 <= np.timedelta64(1, "s")).all()
418
419
420@requires_cftime
421@pytest.mark.parametrize(
422    ("calendar", "num_time"),
423    [("360_day", 720058.0), ("all_leap", 732059.0), ("366_day", 732059.0)],
424)
425def test_decode_non_standard_calendar_single_element(calendar, num_time) -> None:
426    import cftime
427
428    units = "days since 0001-01-01"
429
430    actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar)
431
432    expected = np.asarray(
433        cftime.num2date(num_time, units, calendar, only_use_cftime_datetimes=True)
434    )
435    assert actual.dtype == np.dtype("O")
436    assert expected == actual
437
438
439@requires_cftime
440def test_decode_360_day_calendar() -> None:
441    import cftime
442
443    calendar = "360_day"
444    # ensure leap year doesn't matter
445    for year in [2010, 2011, 2012, 2013, 2014]:
446        units = f"days since {year}-01-01"
447        num_times = np.arange(100)
448
449        expected = cftime.num2date(
450            num_times, units, calendar, only_use_cftime_datetimes=True
451        )
452
453        with warnings.catch_warnings(record=True) as w:
454            warnings.simplefilter("always")
455            actual = coding.times.decode_cf_datetime(
456                num_times, units, calendar=calendar
457            )
458            assert len(w) == 0
459
460        assert actual.dtype == np.dtype("O")
461        assert_array_equal(actual, expected)
462
463
464@requires_cftime
465def test_decode_abbreviation() -> None:
466    """Test making sure we properly fall back to cftime on abbreviated units."""
467    import cftime
468
469    val = np.array([1586628000000.0])
470    units = "msecs since 1970-01-01T00:00:00Z"
471    actual = coding.times.decode_cf_datetime(val, units)
472    expected = coding.times.cftime_to_nptime(cftime.num2date(val, units))
473    assert_array_equal(actual, expected)
474
475
476@arm_xfail
477@requires_cftime
478@pytest.mark.parametrize(
479    ["num_dates", "units", "expected_list"],
480    [
481        ([np.nan], "days since 2000-01-01", ["NaT"]),
482        ([np.nan, 0], "days since 2000-01-01", ["NaT", "2000-01-01T00:00:00Z"]),
483        (
484            [np.nan, 0, 1],
485            "days since 2000-01-01",
486            ["NaT", "2000-01-01T00:00:00Z", "2000-01-02T00:00:00Z"],
487        ),
488    ],
489)
490def test_cf_datetime_nan(num_dates, units, expected_list) -> None:
491    with warnings.catch_warnings():
492        warnings.filterwarnings("ignore", "All-NaN")
493        actual = coding.times.decode_cf_datetime(num_dates, units)
494    # use pandas because numpy will deprecate timezone-aware conversions
495    expected = pd.to_datetime(expected_list).to_numpy(dtype="datetime64[ns]")
496    assert_array_equal(expected, actual)
497
498
499@requires_cftime
500def test_decoded_cf_datetime_array_2d() -> None:
501    # regression test for GH1229
502    variable = Variable(
503        ("x", "y"), np.array([[0, 1], [2, 3]]), {"units": "days since 2000-01-01"}
504    )
505    result = coding.times.CFDatetimeCoder().decode(variable)
506    assert result.dtype == "datetime64[ns]"
507    expected = pd.date_range("2000-01-01", periods=4).values.reshape(2, 2)
508    assert_array_equal(np.asarray(result), expected)
509
510
511FREQUENCIES_TO_ENCODING_UNITS = {
512    "N": "nanoseconds",
513    "U": "microseconds",
514    "L": "milliseconds",
515    "S": "seconds",
516    "T": "minutes",
517    "H": "hours",
518    "D": "days",
519}
520
521
522@pytest.mark.parametrize(("freq", "units"), FREQUENCIES_TO_ENCODING_UNITS.items())
523def test_infer_datetime_units(freq, units) -> None:
524    dates = pd.date_range("2000", periods=2, freq=freq)
525    expected = f"{units} since 2000-01-01 00:00:00"
526    assert expected == coding.times.infer_datetime_units(dates)
527
528
529@pytest.mark.parametrize(
530    ["dates", "expected"],
531    [
532        (
533            pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"]),
534            "days since 1900-01-01 00:00:00",
535        ),
536        (pd.to_datetime(["NaT", "1900-01-01"]), "days since 1900-01-01 00:00:00"),
537        (pd.to_datetime(["NaT"]), "days since 1970-01-01 00:00:00"),
538    ],
539)
540def test_infer_datetime_units_with_NaT(dates, expected) -> None:
541    assert expected == coding.times.infer_datetime_units(dates)
542
543
544_CFTIME_DATETIME_UNITS_TESTS = [
545    ([(1900, 1, 1), (1900, 1, 1)], "days since 1900-01-01 00:00:00.000000"),
546    (
547        [(1900, 1, 1), (1900, 1, 2), (1900, 1, 2, 0, 0, 1)],
548        "seconds since 1900-01-01 00:00:00.000000",
549    ),
550    (
551        [(1900, 1, 1), (1900, 1, 8), (1900, 1, 16)],
552        "days since 1900-01-01 00:00:00.000000",
553    ),
554]
555
556
557@requires_cftime
558@pytest.mark.parametrize(
559    "calendar", _NON_STANDARD_CALENDARS + ["gregorian", "proleptic_gregorian"]
560)
561@pytest.mark.parametrize(("date_args", "expected"), _CFTIME_DATETIME_UNITS_TESTS)
562def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None:
563    date_type = _all_cftime_date_types()[calendar]
564    dates = [date_type(*args) for args in date_args]
565    assert expected == coding.times.infer_datetime_units(dates)
566
567
568@pytest.mark.parametrize(
569    ["timedeltas", "units", "numbers"],
570    [
571        ("1D", "days", np.int64(1)),
572        (["1D", "2D", "3D"], "days", np.array([1, 2, 3], "int64")),
573        ("1h", "hours", np.int64(1)),
574        ("1ms", "milliseconds", np.int64(1)),
575        ("1us", "microseconds", np.int64(1)),
576        ("1ns", "nanoseconds", np.int64(1)),
577        (["NaT", "0s", "1s"], None, [np.nan, 0, 1]),
578        (["30m", "60m"], "hours", [0.5, 1.0]),
579        ("NaT", "days", np.nan),
580        (["NaT", "NaT"], "days", [np.nan, np.nan]),
581    ],
582)
583def test_cf_timedelta(timedeltas, units, numbers) -> None:
584    if timedeltas == "NaT":
585        timedeltas = np.timedelta64("NaT", "ns")
586    else:
587        timedeltas = to_timedelta_unboxed(timedeltas)
588    numbers = np.array(numbers)
589
590    expected = numbers
591    actual, _ = coding.times.encode_cf_timedelta(timedeltas, units)
592    assert_array_equal(expected, actual)
593    assert expected.dtype == actual.dtype
594
595    if units is not None:
596        expected = timedeltas
597        actual = coding.times.decode_cf_timedelta(numbers, units)
598        assert_array_equal(expected, actual)
599        assert expected.dtype == actual.dtype
600
601    expected = np.timedelta64("NaT", "ns")
602    actual = coding.times.decode_cf_timedelta(np.array(np.nan), "days")
603    assert_array_equal(expected, actual)
604
605
606def test_cf_timedelta_2d() -> None:
607    units = "days"
608    numbers = np.atleast_2d([1, 2, 3])
609
610    timedeltas = np.atleast_2d(to_timedelta_unboxed(["1D", "2D", "3D"]))
611    expected = timedeltas
612
613    actual = coding.times.decode_cf_timedelta(numbers, units)
614    assert_array_equal(expected, actual)
615    assert expected.dtype == actual.dtype  # type: ignore
616
617
618@pytest.mark.parametrize(
619    ["deltas", "expected"],
620    [
621        (pd.to_timedelta(["1 day", "2 days"]), "days"),
622        (pd.to_timedelta(["1h", "1 day 1 hour"]), "hours"),
623        (pd.to_timedelta(["1m", "2m", np.nan]), "minutes"),
624        (pd.to_timedelta(["1m3s", "1m4s"]), "seconds"),
625    ],
626)
627def test_infer_timedelta_units(deltas, expected) -> None:
628    assert expected == coding.times.infer_timedelta_units(deltas)
629
630
631@requires_cftime
632@pytest.mark.parametrize(
633    ["date_args", "expected"],
634    [
635        ((1, 2, 3, 4, 5, 6), "0001-02-03 04:05:06.000000"),
636        ((10, 2, 3, 4, 5, 6), "0010-02-03 04:05:06.000000"),
637        ((100, 2, 3, 4, 5, 6), "0100-02-03 04:05:06.000000"),
638        ((1000, 2, 3, 4, 5, 6), "1000-02-03 04:05:06.000000"),
639    ],
640)
641def test_format_cftime_datetime(date_args, expected) -> None:
642    date_types = _all_cftime_date_types()
643    for date_type in date_types.values():
644        result = coding.times.format_cftime_datetime(date_type(*date_args))
645        assert result == expected
646
647
648@pytest.mark.parametrize("calendar", _ALL_CALENDARS)
649def test_decode_cf(calendar) -> None:
650    days = [1.0, 2.0, 3.0]
651    # TODO: GH5690 — do we want to allow this type for `coords`?
652    da = DataArray(days, coords=[days], dims=["time"], name="test")  # type: ignore
653    ds = da.to_dataset()
654
655    for v in ["test", "time"]:
656        ds[v].attrs["units"] = "days since 2001-01-01"
657        ds[v].attrs["calendar"] = calendar
658
659    if not has_cftime and calendar not in _STANDARD_CALENDARS:
660        with pytest.raises(ValueError):
661            ds = decode_cf(ds)
662    else:
663        ds = decode_cf(ds)
664
665        if calendar not in _STANDARD_CALENDARS:
666            assert ds.test.dtype == np.dtype("O")
667        else:
668            assert ds.test.dtype == np.dtype("M8[ns]")
669
670
671def test_decode_cf_time_bounds() -> None:
672
673    da = DataArray(
674        np.arange(6, dtype="int64").reshape((3, 2)),
675        coords={"time": [1, 2, 3]},
676        dims=("time", "nbnd"),
677        name="time_bnds",
678    )
679
680    attrs = {
681        "units": "days since 2001-01",
682        "calendar": "standard",
683        "bounds": "time_bnds",
684    }
685
686    ds = da.to_dataset()
687    ds["time"].attrs.update(attrs)
688    _update_bounds_attributes(ds.variables)
689    assert ds.variables["time_bnds"].attrs == {
690        "units": "days since 2001-01",
691        "calendar": "standard",
692    }
693    dsc = decode_cf(ds)
694    assert dsc.time_bnds.dtype == np.dtype("M8[ns]")
695    dsc = decode_cf(ds, decode_times=False)
696    assert dsc.time_bnds.dtype == np.dtype("int64")
697
698    # Do not overwrite existing attrs
699    ds = da.to_dataset()
700    ds["time"].attrs.update(attrs)
701    bnd_attr = {"units": "hours since 2001-01", "calendar": "noleap"}
702    ds["time_bnds"].attrs.update(bnd_attr)
703    _update_bounds_attributes(ds.variables)
704    assert ds.variables["time_bnds"].attrs == bnd_attr
705
706    # If bounds variable not available do not complain
707    ds = da.to_dataset()
708    ds["time"].attrs.update(attrs)
709    ds["time"].attrs["bounds"] = "fake_var"
710    _update_bounds_attributes(ds.variables)
711
712
713@requires_cftime
714def test_encode_time_bounds() -> None:
715
716    time = pd.date_range("2000-01-16", periods=1)
717    time_bounds = pd.date_range("2000-01-01", periods=2, freq="MS")
718    ds = Dataset(dict(time=time, time_bounds=time_bounds))
719    ds.time.attrs = {"bounds": "time_bounds"}
720    ds.time.encoding = {"calendar": "noleap", "units": "days since 2000-01-01"}
721
722    expected = {}
723    # expected['time'] = Variable(data=np.array([15]), dims=['time'])
724    expected["time_bounds"] = Variable(data=np.array([0, 31]), dims=["time_bounds"])
725
726    encoded, _ = cf_encoder(ds.variables, ds.attrs)
727    assert_equal(encoded["time_bounds"], expected["time_bounds"])
728    assert "calendar" not in encoded["time_bounds"].attrs
729    assert "units" not in encoded["time_bounds"].attrs
730
731    # if time_bounds attrs are same as time attrs, it doesn't matter
732    ds.time_bounds.encoding = {"calendar": "noleap", "units": "days since 2000-01-01"}
733    encoded, _ = cf_encoder({k: ds[k] for k in ds.variables}, ds.attrs)
734    assert_equal(encoded["time_bounds"], expected["time_bounds"])
735    assert "calendar" not in encoded["time_bounds"].attrs
736    assert "units" not in encoded["time_bounds"].attrs
737
738    # for CF-noncompliant case of time_bounds attrs being different from
739    # time attrs; preserve them for faithful roundtrip
740    ds.time_bounds.encoding = {"calendar": "noleap", "units": "days since 1849-01-01"}
741    encoded, _ = cf_encoder({k: ds[k] for k in ds.variables}, ds.attrs)
742    with pytest.raises(AssertionError):
743        assert_equal(encoded["time_bounds"], expected["time_bounds"])
744    assert "calendar" not in encoded["time_bounds"].attrs
745    assert encoded["time_bounds"].attrs["units"] == ds.time_bounds.encoding["units"]
746
747    ds.time.encoding = {}
748    with pytest.warns(UserWarning):
749        cf_encoder(ds.variables, ds.attrs)
750
751
752@pytest.fixture(params=_ALL_CALENDARS)
753def calendar(request):
754    return request.param
755
756
757@pytest.fixture()
758def times(calendar):
759    import cftime
760
761    return cftime.num2date(
762        np.arange(4),
763        units="hours since 2000-01-01",
764        calendar=calendar,
765        only_use_cftime_datetimes=True,
766    )
767
768
769@pytest.fixture()
770def data(times):
771    data = np.random.rand(2, 2, 4)
772    lons = np.linspace(0, 11, 2)
773    lats = np.linspace(0, 20, 2)
774    return DataArray(
775        data, coords=[lons, lats, times], dims=["lon", "lat", "time"], name="data"
776    )
777
778
779@pytest.fixture()
780def times_3d(times):
781    lons = np.linspace(0, 11, 2)
782    lats = np.linspace(0, 20, 2)
783    times_arr = np.random.choice(times, size=(2, 2, 4))
784    return DataArray(
785        times_arr, coords=[lons, lats, times], dims=["lon", "lat", "time"], name="data"
786    )
787
788
789@requires_cftime
790def test_contains_cftime_datetimes_1d(data) -> None:
791    assert contains_cftime_datetimes(data.time)
792
793
794@requires_cftime
795@requires_dask
796def test_contains_cftime_datetimes_dask_1d(data) -> None:
797    assert contains_cftime_datetimes(data.time.chunk())
798
799
800@requires_cftime
801def test_contains_cftime_datetimes_3d(times_3d) -> None:
802    assert contains_cftime_datetimes(times_3d)
803
804
805@requires_cftime
806@requires_dask
807def test_contains_cftime_datetimes_dask_3d(times_3d) -> None:
808    assert contains_cftime_datetimes(times_3d.chunk())
809
810
811@pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])])
812def test_contains_cftime_datetimes_non_cftimes(non_cftime_data) -> None:
813    assert not contains_cftime_datetimes(non_cftime_data)
814
815
816@requires_dask
817@pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])])
818def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data) -> None:
819    assert not contains_cftime_datetimes(non_cftime_data.chunk())
820
821
822@requires_cftime
823@pytest.mark.parametrize("shape", [(24,), (8, 3), (2, 4, 3)])
824def test_encode_cf_datetime_overflow(shape) -> None:
825    # Test for fix to GH 2272
826    dates = pd.date_range("2100", periods=24).values.reshape(shape)
827    units = "days since 1800-01-01"
828    calendar = "standard"
829
830    num, _, _ = encode_cf_datetime(dates, units, calendar)
831    roundtrip = decode_cf_datetime(num, units, calendar)
832    np.testing.assert_array_equal(dates, roundtrip)
833
834
835def test_encode_expected_failures() -> None:
836
837    dates = pd.date_range("2000", periods=3)
838    with pytest.raises(ValueError, match="invalid time units"):
839        encode_cf_datetime(dates, units="days after 2000-01-01")
840    with pytest.raises(ValueError, match="invalid reference date"):
841        encode_cf_datetime(dates, units="days since NO_YEAR")
842
843
844def test_encode_cf_datetime_pandas_min() -> None:
845    # GH 2623
846    dates = pd.date_range("2000", periods=3)
847    num, units, calendar = encode_cf_datetime(dates)
848    expected_num = np.array([0.0, 1.0, 2.0])
849    expected_units = "days since 2000-01-01 00:00:00"
850    expected_calendar = "proleptic_gregorian"
851    np.testing.assert_array_equal(num, expected_num)
852    assert units == expected_units
853    assert calendar == expected_calendar
854
855
856@requires_cftime
857def test_time_units_with_timezone_roundtrip(calendar) -> None:
858    # Regression test for GH 2649
859    expected_units = "days since 2000-01-01T00:00:00-05:00"
860    expected_num_dates = np.array([1, 2, 3])
861    dates = decode_cf_datetime(expected_num_dates, expected_units, calendar)
862
863    # Check that dates were decoded to UTC; here the hours should all
864    # equal 5.
865    result_hours = DataArray(dates).dt.hour
866    expected_hours = DataArray([5, 5, 5])
867    assert_equal(result_hours, expected_hours)
868
869    # Check that the encoded values are accurately roundtripped.
870    result_num_dates, result_units, result_calendar = encode_cf_datetime(
871        dates, expected_units, calendar
872    )
873
874    if calendar in _STANDARD_CALENDARS:
875        np.testing.assert_array_equal(result_num_dates, expected_num_dates)
876    else:
877        # cftime datetime arithmetic is not quite exact.
878        np.testing.assert_allclose(result_num_dates, expected_num_dates)
879
880    assert result_units == expected_units
881    assert result_calendar == calendar
882
883
884@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
885def test_use_cftime_default_standard_calendar_in_range(calendar) -> None:
886    numerical_dates = [0, 1]
887    units = "days since 2000-01-01"
888    expected = pd.date_range("2000", periods=2)
889
890    with pytest.warns(None) as record:
891        result = decode_cf_datetime(numerical_dates, units, calendar)
892        np.testing.assert_array_equal(result, expected)
893        assert not record
894
895
896@requires_cftime
897@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
898@pytest.mark.parametrize("units_year", [1500, 2500])
899def test_use_cftime_default_standard_calendar_out_of_range(
900    calendar, units_year
901) -> None:
902    from cftime import num2date
903
904    numerical_dates = [0, 1]
905    units = f"days since {units_year}-01-01"
906    expected = num2date(
907        numerical_dates, units, calendar, only_use_cftime_datetimes=True
908    )
909
910    with pytest.warns(SerializationWarning):
911        result = decode_cf_datetime(numerical_dates, units, calendar)
912        np.testing.assert_array_equal(result, expected)
913
914
915@requires_cftime
916@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
917@pytest.mark.parametrize("units_year", [1500, 2000, 2500])
918def test_use_cftime_default_non_standard_calendar(calendar, units_year) -> None:
919    from cftime import num2date
920
921    numerical_dates = [0, 1]
922    units = f"days since {units_year}-01-01"
923    expected = num2date(
924        numerical_dates, units, calendar, only_use_cftime_datetimes=True
925    )
926
927    with pytest.warns(None) as record:
928        result = decode_cf_datetime(numerical_dates, units, calendar)
929        np.testing.assert_array_equal(result, expected)
930        assert not record
931
932
933@requires_cftime
934@pytest.mark.parametrize("calendar", _ALL_CALENDARS)
935@pytest.mark.parametrize("units_year", [1500, 2000, 2500])
936def test_use_cftime_true(calendar, units_year) -> None:
937    from cftime import num2date
938
939    numerical_dates = [0, 1]
940    units = f"days since {units_year}-01-01"
941    expected = num2date(
942        numerical_dates, units, calendar, only_use_cftime_datetimes=True
943    )
944
945    with pytest.warns(None) as record:
946        result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=True)
947        np.testing.assert_array_equal(result, expected)
948        assert not record
949
950
951@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
952def test_use_cftime_false_standard_calendar_in_range(calendar) -> None:
953    numerical_dates = [0, 1]
954    units = "days since 2000-01-01"
955    expected = pd.date_range("2000", periods=2)
956
957    with pytest.warns(None) as record:
958        result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False)
959        np.testing.assert_array_equal(result, expected)
960        assert not record
961
962
963@pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
964@pytest.mark.parametrize("units_year", [1500, 2500])
965def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year) -> None:
966    numerical_dates = [0, 1]
967    units = f"days since {units_year}-01-01"
968    with pytest.raises(OutOfBoundsDatetime):
969        decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False)
970
971
972@pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS)
973@pytest.mark.parametrize("units_year", [1500, 2000, 2500])
974def test_use_cftime_false_non_standard_calendar(calendar, units_year) -> None:
975    numerical_dates = [0, 1]
976    units = f"days since {units_year}-01-01"
977    with pytest.raises(OutOfBoundsDatetime):
978        decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False)
979
980
981@requires_cftime
982@pytest.mark.parametrize("calendar", _ALL_CALENDARS)
983def test_decode_ambiguous_time_warns(calendar) -> None:
984    # GH 4422, 4506
985    from cftime import num2date
986
987    # we don't decode non-standard calendards with
988    # pandas so expect no warning will be emitted
989    is_standard_calendar = calendar in coding.times._STANDARD_CALENDARS
990
991    dates = [1, 2, 3]
992    units = "days since 1-1-1"
993    expected = num2date(dates, units, calendar=calendar, only_use_cftime_datetimes=True)
994
995    exp_warn_type = SerializationWarning if is_standard_calendar else None
996
997    with pytest.warns(exp_warn_type) as record:
998        result = decode_cf_datetime(dates, units, calendar=calendar)
999
1000    if is_standard_calendar:
1001        relevant_warnings = [
1002            r
1003            for r in record.list
1004            if str(r.message).startswith("Ambiguous reference date string: 1-1-1")
1005        ]
1006        assert len(relevant_warnings) == 1
1007    else:
1008        assert not record
1009
1010    np.testing.assert_array_equal(result, expected)
1011
1012
1013@pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values())
1014@pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys())
1015@pytest.mark.parametrize("date_range", [pd.date_range, cftime_range])
1016def test_encode_cf_datetime_defaults_to_correct_dtype(
1017    encoding_units, freq, date_range
1018) -> None:
1019    if not has_cftime_1_4_1 and date_range == cftime_range:
1020        pytest.skip("Test requires cftime 1.4.1.")
1021    if (freq == "N" or encoding_units == "nanoseconds") and date_range == cftime_range:
1022        pytest.skip("Nanosecond frequency is not valid for cftime dates.")
1023    times = date_range("2000", periods=3, freq=freq)
1024    units = f"{encoding_units} since 2000-01-01"
1025    encoded, _, _ = coding.times.encode_cf_datetime(times, units)
1026
1027    numpy_timeunit = coding.times._netcdf_to_numpy_timeunit(encoding_units)
1028    encoding_units_as_timedelta = np.timedelta64(1, numpy_timeunit)
1029    if pd.to_timedelta(1, freq) >= encoding_units_as_timedelta:
1030        assert encoded.dtype == np.int64
1031    else:
1032        assert encoded.dtype == np.float64
1033
1034
1035@pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys())
1036def test_encode_decode_roundtrip_datetime64(freq) -> None:
1037    # See GH 4045. Prior to GH 4684 this test would fail for frequencies of
1038    # "S", "L", "U", and "N".
1039    initial_time = pd.date_range("1678-01-01", periods=1)
1040    times = initial_time.append(pd.date_range("1968", periods=2, freq=freq))
1041    variable = Variable(["time"], times)
1042    encoded = conventions.encode_cf_variable(variable)
1043    decoded = conventions.decode_cf_variable("time", encoded)
1044    assert_equal(variable, decoded)
1045
1046
1047@requires_cftime_1_4_1
1048@pytest.mark.parametrize("freq", ["U", "L", "S", "T", "H", "D"])
1049def test_encode_decode_roundtrip_cftime(freq) -> None:
1050    initial_time = cftime_range("0001", periods=1)
1051    times = initial_time.append(
1052        cftime_range("0001", periods=2, freq=freq) + timedelta(days=291000 * 365)
1053    )
1054    variable = Variable(["time"], times)
1055    encoded = conventions.encode_cf_variable(variable)
1056    decoded = conventions.decode_cf_variable("time", encoded, use_cftime=True)
1057    assert_equal(variable, decoded)
1058
1059
1060@requires_cftime
1061def test__encode_datetime_with_cftime() -> None:
1062    # See GH 4870. cftime versions > 1.4.0 required us to adapt the
1063    # way _encode_datetime_with_cftime was written.
1064    import cftime
1065
1066    calendar = "gregorian"
1067    times = cftime.num2date([0, 1], "hours since 2000-01-01", calendar)
1068
1069    encoding_units = "days since 2000-01-01"
1070    expected = cftime.date2num(times, encoding_units, calendar)
1071    result = _encode_datetime_with_cftime(times, encoding_units, calendar)
1072    np.testing.assert_equal(result, expected)
1073
1074
1075@pytest.mark.parametrize("calendar", ["gregorian", "Gregorian", "GREGORIAN"])
1076def test_decode_encode_roundtrip_with_non_lowercase_letters(calendar) -> None:
1077    # See GH 5093.
1078    times = [0, 1]
1079    units = "days since 2000-01-01"
1080    attrs = {"calendar": calendar, "units": units}
1081    variable = Variable(["time"], times, attrs)
1082    decoded = conventions.decode_cf_variable("time", variable)
1083    encoded = conventions.encode_cf_variable(decoded)
1084
1085    # Previously this would erroneously be an array of cftime.datetime
1086    # objects.  We check here that it is decoded properly to np.datetime64.
1087    assert np.issubdtype(decoded.dtype, np.datetime64)
1088
1089    # Use assert_identical to ensure that the calendar attribute maintained its
1090    # original form throughout the roundtripping process, uppercase letters and
1091    # all.
1092    assert_identical(variable, encoded)
1093