1import warnings 2 3import numpy as np 4import pandas as pd 5 6from .common import ( 7 _contains_datetime_like_objects, 8 is_np_datetime_like, 9 is_np_timedelta_like, 10) 11from .npcompat import DTypeLike 12from .pycompat import is_duck_dask_array 13 14 15def _season_from_months(months): 16 """Compute season (DJF, MAM, JJA, SON) from month ordinal""" 17 # TODO: Move "season" accessor upstream into pandas 18 seasons = np.array(["DJF", "MAM", "JJA", "SON"]) 19 months = np.asarray(months) 20 return seasons[(months // 3) % 4] 21 22 23def _access_through_cftimeindex(values, name): 24 """Coerce an array of datetime-like values to a CFTimeIndex 25 and access requested datetime component 26 """ 27 from ..coding.cftimeindex import CFTimeIndex 28 29 values_as_cftimeindex = CFTimeIndex(values.ravel()) 30 if name == "season": 31 months = values_as_cftimeindex.month 32 field_values = _season_from_months(months) 33 elif name == "date": 34 raise AttributeError( 35 "'CFTimeIndex' object has no attribute `date`. Consider using the floor method instead, for instance: `.time.dt.floor('D')`." 36 ) 37 else: 38 field_values = getattr(values_as_cftimeindex, name) 39 return field_values.reshape(values.shape) 40 41 42def _access_through_series(values, name): 43 """Coerce an array of datetime-like values to a pandas Series and 44 access requested datetime component 45 """ 46 values_as_series = pd.Series(values.ravel()) 47 if name == "season": 48 months = values_as_series.dt.month.values 49 field_values = _season_from_months(months) 50 elif name == "isocalendar": 51 # isocalendar returns iso- year, week, and weekday -> reshape 52 field_values = np.array(values_as_series.dt.isocalendar(), dtype=np.int64) 53 return field_values.T.reshape(3, *values.shape) 54 else: 55 field_values = getattr(values_as_series.dt, name).values 56 return field_values.reshape(values.shape) 57 58 59def _get_date_field(values, name, dtype): 60 """Indirectly access pandas' libts.get_date_field by wrapping data 61 as a Series and calling through `.dt` attribute. 62 63 Parameters 64 ---------- 65 values : np.ndarray or dask.array-like 66 Array-like container of datetime-like values 67 name : str 68 Name of datetime field to access 69 dtype : dtype-like 70 dtype for output date field values 71 72 Returns 73 ------- 74 datetime_fields : same type as values 75 Array-like of datetime fields accessed for each element in values 76 77 """ 78 if is_np_datetime_like(values.dtype): 79 access_method = _access_through_series 80 else: 81 access_method = _access_through_cftimeindex 82 83 if is_duck_dask_array(values): 84 from dask.array import map_blocks 85 86 new_axis = chunks = None 87 # isocalendar adds adds an axis 88 if name == "isocalendar": 89 chunks = (3,) + values.chunksize 90 new_axis = 0 91 92 return map_blocks( 93 access_method, values, name, dtype=dtype, new_axis=new_axis, chunks=chunks 94 ) 95 else: 96 return access_method(values, name) 97 98 99def _round_through_series_or_index(values, name, freq): 100 """Coerce an array of datetime-like values to a pandas Series or xarray 101 CFTimeIndex and apply requested rounding 102 """ 103 from ..coding.cftimeindex import CFTimeIndex 104 105 if is_np_datetime_like(values.dtype): 106 values_as_series = pd.Series(values.ravel()) 107 method = getattr(values_as_series.dt, name) 108 else: 109 values_as_cftimeindex = CFTimeIndex(values.ravel()) 110 method = getattr(values_as_cftimeindex, name) 111 112 field_values = method(freq=freq).values 113 114 return field_values.reshape(values.shape) 115 116 117def _round_field(values, name, freq): 118 """Indirectly access rounding functions by wrapping data 119 as a Series or CFTimeIndex 120 121 Parameters 122 ---------- 123 values : np.ndarray or dask.array-like 124 Array-like container of datetime-like values 125 name : {"ceil", "floor", "round"} 126 Name of rounding function 127 freq : str 128 a freq string indicating the rounding resolution 129 130 Returns 131 ------- 132 rounded timestamps : same type as values 133 Array-like of datetime fields accessed for each element in values 134 135 """ 136 if is_duck_dask_array(values): 137 from dask.array import map_blocks 138 139 dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") 140 return map_blocks( 141 _round_through_series_or_index, values, name, freq=freq, dtype=dtype 142 ) 143 else: 144 return _round_through_series_or_index(values, name, freq) 145 146 147def _strftime_through_cftimeindex(values, date_format): 148 """Coerce an array of cftime-like values to a CFTimeIndex 149 and access requested datetime component 150 """ 151 from ..coding.cftimeindex import CFTimeIndex 152 153 values_as_cftimeindex = CFTimeIndex(values.ravel()) 154 155 field_values = values_as_cftimeindex.strftime(date_format) 156 return field_values.values.reshape(values.shape) 157 158 159def _strftime_through_series(values, date_format): 160 """Coerce an array of datetime-like values to a pandas Series and 161 apply string formatting 162 """ 163 values_as_series = pd.Series(values.ravel()) 164 strs = values_as_series.dt.strftime(date_format) 165 return strs.values.reshape(values.shape) 166 167 168def _strftime(values, date_format): 169 if is_np_datetime_like(values.dtype): 170 access_method = _strftime_through_series 171 else: 172 access_method = _strftime_through_cftimeindex 173 if is_duck_dask_array(values): 174 from dask.array import map_blocks 175 176 return map_blocks(access_method, values, date_format) 177 else: 178 return access_method(values, date_format) 179 180 181class Properties: 182 def __init__(self, obj): 183 self._obj = obj 184 185 @staticmethod 186 def _tslib_field_accessor( 187 name: str, docstring: str = None, dtype: DTypeLike = None 188 ): 189 def f(self, dtype=dtype): 190 if dtype is None: 191 dtype = self._obj.dtype 192 obj_type = type(self._obj) 193 result = _get_date_field(self._obj.data, name, dtype) 194 return obj_type( 195 result, name=name, coords=self._obj.coords, dims=self._obj.dims 196 ) 197 198 f.__name__ = name 199 f.__doc__ = docstring 200 return property(f) 201 202 def _tslib_round_accessor(self, name, freq): 203 obj_type = type(self._obj) 204 result = _round_field(self._obj.data, name, freq) 205 return obj_type(result, name=name, coords=self._obj.coords, dims=self._obj.dims) 206 207 def floor(self, freq): 208 """ 209 Round timestamps downward to specified frequency resolution. 210 211 Parameters 212 ---------- 213 freq : str 214 a freq string indicating the rounding resolution e.g. "D" for daily resolution 215 216 Returns 217 ------- 218 floor-ed timestamps : same type as values 219 Array-like of datetime fields accessed for each element in values 220 """ 221 222 return self._tslib_round_accessor("floor", freq) 223 224 def ceil(self, freq): 225 """ 226 Round timestamps upward to specified frequency resolution. 227 228 Parameters 229 ---------- 230 freq : str 231 a freq string indicating the rounding resolution e.g. "D" for daily resolution 232 233 Returns 234 ------- 235 ceil-ed timestamps : same type as values 236 Array-like of datetime fields accessed for each element in values 237 """ 238 return self._tslib_round_accessor("ceil", freq) 239 240 def round(self, freq): 241 """ 242 Round timestamps to specified frequency resolution. 243 244 Parameters 245 ---------- 246 freq : str 247 a freq string indicating the rounding resolution e.g. "D" for daily resolution 248 249 Returns 250 ------- 251 rounded timestamps : same type as values 252 Array-like of datetime fields accessed for each element in values 253 """ 254 return self._tslib_round_accessor("round", freq) 255 256 257class DatetimeAccessor(Properties): 258 """Access datetime fields for DataArrays with datetime-like dtypes. 259 260 Fields can be accessed through the `.dt` attribute 261 for applicable DataArrays. 262 263 Examples 264 --------- 265 >>> dates = pd.date_range(start="2000/01/01", freq="D", periods=10) 266 >>> ts = xr.DataArray(dates, dims=("time")) 267 >>> ts 268 <xarray.DataArray (time: 10)> 269 array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000', 270 '2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000', 271 '2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000', 272 '2000-01-07T00:00:00.000000000', '2000-01-08T00:00:00.000000000', 273 '2000-01-09T00:00:00.000000000', '2000-01-10T00:00:00.000000000'], 274 dtype='datetime64[ns]') 275 Coordinates: 276 * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 277 >>> ts.dt # doctest: +ELLIPSIS 278 <xarray.core.accessor_dt.DatetimeAccessor object at 0x...> 279 >>> ts.dt.dayofyear 280 <xarray.DataArray 'dayofyear' (time: 10)> 281 array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 282 Coordinates: 283 * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 284 >>> ts.dt.quarter 285 <xarray.DataArray 'quarter' (time: 10)> 286 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) 287 Coordinates: 288 * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 289 290 """ 291 292 def strftime(self, date_format): 293 """ 294 Return an array of formatted strings specified by date_format, which 295 supports the same string format as the python standard library. Details 296 of the string format can be found in `python string format doc 297 <https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior>`__ 298 299 Parameters 300 ---------- 301 date_format : str 302 date format string (e.g. "%Y-%m-%d") 303 304 Returns 305 ------- 306 formatted strings : same type as values 307 Array-like of strings formatted for each element in values 308 309 Examples 310 -------- 311 >>> import datetime 312 >>> rng = xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) 313 >>> rng["time"].dt.strftime("%B %d, %Y, %r") 314 <xarray.DataArray 'strftime' ()> 315 array('January 01, 2000, 12:00:00 AM', dtype=object) 316 """ 317 obj_type = type(self._obj) 318 319 result = _strftime(self._obj.data, date_format) 320 321 return obj_type( 322 result, name="strftime", coords=self._obj.coords, dims=self._obj.dims 323 ) 324 325 def isocalendar(self): 326 """Dataset containing ISO year, week number, and weekday. 327 328 Notes 329 ----- 330 The iso year and weekday differ from the nominal year and weekday. 331 """ 332 333 from .dataset import Dataset 334 335 if not is_np_datetime_like(self._obj.data.dtype): 336 raise AttributeError("'CFTimeIndex' object has no attribute 'isocalendar'") 337 338 values = _get_date_field(self._obj.data, "isocalendar", np.int64) 339 340 obj_type = type(self._obj) 341 data_vars = {} 342 for i, name in enumerate(["year", "week", "weekday"]): 343 data_vars[name] = obj_type( 344 values[i], name=name, coords=self._obj.coords, dims=self._obj.dims 345 ) 346 347 return Dataset(data_vars) 348 349 year = Properties._tslib_field_accessor( 350 "year", "The year of the datetime", np.int64 351 ) 352 month = Properties._tslib_field_accessor( 353 "month", "The month as January=1, December=12", np.int64 354 ) 355 day = Properties._tslib_field_accessor("day", "The days of the datetime", np.int64) 356 hour = Properties._tslib_field_accessor( 357 "hour", "The hours of the datetime", np.int64 358 ) 359 minute = Properties._tslib_field_accessor( 360 "minute", "The minutes of the datetime", np.int64 361 ) 362 second = Properties._tslib_field_accessor( 363 "second", "The seconds of the datetime", np.int64 364 ) 365 microsecond = Properties._tslib_field_accessor( 366 "microsecond", "The microseconds of the datetime", np.int64 367 ) 368 nanosecond = Properties._tslib_field_accessor( 369 "nanosecond", "The nanoseconds of the datetime", np.int64 370 ) 371 372 @property 373 def weekofyear(self): 374 "The week ordinal of the year" 375 376 warnings.warn( 377 "dt.weekofyear and dt.week have been deprecated. Please use " 378 "dt.isocalendar().week instead.", 379 FutureWarning, 380 ) 381 382 weekofyear = self.isocalendar().week 383 384 return weekofyear 385 386 week = weekofyear 387 dayofweek = Properties._tslib_field_accessor( 388 "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 389 ) 390 weekday = dayofweek 391 392 weekday_name = Properties._tslib_field_accessor( 393 "weekday_name", "The name of day in a week", object 394 ) 395 396 dayofyear = Properties._tslib_field_accessor( 397 "dayofyear", "The ordinal day of the year", np.int64 398 ) 399 quarter = Properties._tslib_field_accessor("quarter", "The quarter of the date") 400 days_in_month = Properties._tslib_field_accessor( 401 "days_in_month", "The number of days in the month", np.int64 402 ) 403 daysinmonth = days_in_month 404 405 season = Properties._tslib_field_accessor("season", "Season of the year", object) 406 407 time = Properties._tslib_field_accessor( 408 "time", "Timestamps corresponding to datetimes", object 409 ) 410 411 date = Properties._tslib_field_accessor( 412 "date", "Date corresponding to datetimes", object 413 ) 414 415 is_month_start = Properties._tslib_field_accessor( 416 "is_month_start", 417 "Indicates whether the date is the first day of the month.", 418 bool, 419 ) 420 is_month_end = Properties._tslib_field_accessor( 421 "is_month_end", "Indicates whether the date is the last day of the month.", bool 422 ) 423 is_quarter_start = Properties._tslib_field_accessor( 424 "is_quarter_start", 425 "Indicator for whether the date is the first day of a quarter.", 426 bool, 427 ) 428 is_quarter_end = Properties._tslib_field_accessor( 429 "is_quarter_end", 430 "Indicator for whether the date is the last day of a quarter.", 431 bool, 432 ) 433 is_year_start = Properties._tslib_field_accessor( 434 "is_year_start", "Indicate whether the date is the first day of a year.", bool 435 ) 436 is_year_end = Properties._tslib_field_accessor( 437 "is_year_end", "Indicate whether the date is the last day of the year.", bool 438 ) 439 is_leap_year = Properties._tslib_field_accessor( 440 "is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool 441 ) 442 443 444class TimedeltaAccessor(Properties): 445 """Access Timedelta fields for DataArrays with Timedelta-like dtypes. 446 447 Fields can be accessed through the `.dt` attribute for applicable DataArrays. 448 449 Examples 450 -------- 451 >>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20) 452 >>> ts = xr.DataArray(dates, dims=("time")) 453 >>> ts 454 <xarray.DataArray (time: 20)> 455 array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000, 456 172800000000000, 194400000000000, 216000000000000, 237600000000000, 457 259200000000000, 280800000000000, 302400000000000, 324000000000000, 458 345600000000000, 367200000000000, 388800000000000, 410400000000000, 459 432000000000000, 453600000000000, 475200000000000, 496800000000000], 460 dtype='timedelta64[ns]') 461 Coordinates: 462 * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 463 >>> ts.dt # doctest: +ELLIPSIS 464 <xarray.core.accessor_dt.TimedeltaAccessor object at 0x...> 465 >>> ts.dt.days 466 <xarray.DataArray 'days' (time: 20)> 467 array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]) 468 Coordinates: 469 * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 470 >>> ts.dt.microseconds 471 <xarray.DataArray 'microseconds' (time: 20)> 472 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) 473 Coordinates: 474 * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 475 >>> ts.dt.seconds 476 <xarray.DataArray 'seconds' (time: 20)> 477 array([ 0, 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, 478 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, 21600, 479 43200, 64800]) 480 Coordinates: 481 * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 482 """ 483 484 days = Properties._tslib_field_accessor( 485 "days", "Number of days for each element.", np.int64 486 ) 487 seconds = Properties._tslib_field_accessor( 488 "seconds", 489 "Number of seconds (>= 0 and less than 1 day) for each element.", 490 np.int64, 491 ) 492 microseconds = Properties._tslib_field_accessor( 493 "microseconds", 494 "Number of microseconds (>= 0 and less than 1 second) for each element.", 495 np.int64, 496 ) 497 nanoseconds = Properties._tslib_field_accessor( 498 "nanoseconds", 499 "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.", 500 np.int64, 501 ) 502 503 504class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor): 505 def __new__(cls, obj): 506 # CombinedDatetimelikeAccessor isn't really instatiated. Instead 507 # we need to choose which parent (datetime or timedelta) is 508 # appropriate. Since we're checking the dtypes anyway, we'll just 509 # do all the validation here. 510 if not _contains_datetime_like_objects(obj): 511 raise TypeError( 512 "'.dt' accessor only available for " 513 "DataArray with datetime64 timedelta64 dtype or " 514 "for arrays containing cftime datetime " 515 "objects." 516 ) 517 518 if is_np_timedelta_like(obj.dtype): 519 return TimedeltaAccessor(obj) 520 else: 521 return DatetimeAccessor(obj) 522