1import functools 2import itertools 3import warnings 4from typing import Any, Callable, Dict 5 6import numpy as np 7 8from . import dtypes, duck_array_ops, utils 9from .arithmetic import CoarsenArithmetic 10from .options import OPTIONS, _get_keep_attrs 11from .pycompat import is_duck_dask_array 12from .utils import either_dict_or_kwargs 13 14try: 15 import bottleneck 16except ImportError: 17 # use numpy methods instead 18 bottleneck = None 19 20 21_ROLLING_REDUCE_DOCSTRING_TEMPLATE = """\ 22Reduce this object's data windows by applying `{name}` along its dimension. 23 24Parameters 25---------- 26keep_attrs : bool, default: None 27 If True, the attributes (``attrs``) will be copied from the original 28 object to the new one. If False, the new object will be returned 29 without attributes. If None uses the global default. 30**kwargs : dict 31 Additional keyword arguments passed on to `{name}`. 32 33Returns 34------- 35reduced : same type as caller 36 New object with `{name}` applied along its rolling dimnension. 37""" 38 39 40class Rolling: 41 """A object that implements the moving window pattern. 42 43 See Also 44 -------- 45 xarray.Dataset.groupby 46 xarray.DataArray.groupby 47 xarray.Dataset.rolling 48 xarray.DataArray.rolling 49 """ 50 51 __slots__ = ("obj", "window", "min_periods", "center", "dim") 52 _attributes = ("window", "min_periods", "center", "dim") 53 54 def __init__(self, obj, windows, min_periods=None, center=False): 55 """ 56 Moving window object. 57 58 Parameters 59 ---------- 60 obj : Dataset or DataArray 61 Object to window. 62 windows : mapping of hashable to int 63 A mapping from the name of the dimension to create the rolling 64 window along (e.g. `time`) to the size of the moving window. 65 min_periods : int, default: None 66 Minimum number of observations in window required to have a value 67 (otherwise result is NA). The default, None, is equivalent to 68 setting min_periods equal to the size of the window. 69 center : bool, default: False 70 Set the labels at the center of the window. 71 72 Returns 73 ------- 74 rolling : type of input argument 75 """ 76 self.dim, self.window = [], [] 77 for d, w in windows.items(): 78 self.dim.append(d) 79 if w <= 0: 80 raise ValueError("window must be > 0") 81 self.window.append(w) 82 83 self.center = self._mapping_to_list(center, default=False) 84 self.obj = obj 85 86 # attributes 87 if min_periods is not None and min_periods <= 0: 88 raise ValueError("min_periods must be greater than zero or None") 89 90 self.min_periods = np.prod(self.window) if min_periods is None else min_periods 91 92 def __repr__(self): 93 """provide a nice str repr of our rolling object""" 94 95 attrs = [ 96 "{k}->{v}{c}".format(k=k, v=w, c="(center)" if c else "") 97 for k, w, c in zip(self.dim, self.window, self.center) 98 ] 99 return "{klass} [{attrs}]".format( 100 klass=self.__class__.__name__, attrs=",".join(attrs) 101 ) 102 103 def __len__(self): 104 return self.obj.sizes[self.dim] 105 106 def _reduce_method( # type: ignore[misc] 107 name: str, fillna, rolling_agg_func: Callable = None 108 ) -> Callable: 109 """Constructs reduction methods built on a numpy reduction function (e.g. sum), 110 a bottleneck reduction function (e.g. move_sum), or a Rolling reduction (_mean).""" 111 if rolling_agg_func: 112 array_agg_func = None 113 else: 114 array_agg_func = getattr(duck_array_ops, name) 115 116 bottleneck_move_func = getattr(bottleneck, "move_" + name, None) 117 118 def method(self, keep_attrs=None, **kwargs): 119 120 keep_attrs = self._get_keep_attrs(keep_attrs) 121 122 return self._numpy_or_bottleneck_reduce( 123 array_agg_func, 124 bottleneck_move_func, 125 rolling_agg_func, 126 keep_attrs=keep_attrs, 127 fillna=fillna, 128 **kwargs, 129 ) 130 131 method.__name__ = name 132 method.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(name=name) 133 return method 134 135 def _mean(self, keep_attrs, **kwargs): 136 result = self.sum(keep_attrs=False, **kwargs) / self.count(keep_attrs=False) 137 if keep_attrs: 138 result.attrs = self.obj.attrs 139 return result 140 141 _mean.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(name="mean") 142 143 argmax = _reduce_method("argmax", dtypes.NINF) 144 argmin = _reduce_method("argmin", dtypes.INF) 145 max = _reduce_method("max", dtypes.NINF) 146 min = _reduce_method("min", dtypes.INF) 147 prod = _reduce_method("prod", 1) 148 sum = _reduce_method("sum", 0) 149 mean = _reduce_method("mean", None, _mean) 150 std = _reduce_method("std", None) 151 var = _reduce_method("var", None) 152 median = _reduce_method("median", None) 153 154 def count(self, keep_attrs=None): 155 keep_attrs = self._get_keep_attrs(keep_attrs) 156 rolling_count = self._counts(keep_attrs=keep_attrs) 157 enough_periods = rolling_count >= self.min_periods 158 return rolling_count.where(enough_periods) 159 160 count.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(name="count") 161 162 def _mapping_to_list( 163 self, arg, default=None, allow_default=True, allow_allsame=True 164 ): 165 if utils.is_dict_like(arg): 166 if allow_default: 167 return [arg.get(d, default) for d in self.dim] 168 for d in self.dim: 169 if d not in arg: 170 raise KeyError(f"argument has no key {d}.") 171 return [arg[d] for d in self.dim] 172 elif allow_allsame: # for single argument 173 return [arg] * len(self.dim) 174 elif len(self.dim) == 1: 175 return [arg] 176 else: 177 raise ValueError( 178 "Mapping argument is necessary for {}d-rolling.".format(len(self.dim)) 179 ) 180 181 def _get_keep_attrs(self, keep_attrs): 182 if keep_attrs is None: 183 keep_attrs = _get_keep_attrs(default=True) 184 185 return keep_attrs 186 187 188class DataArrayRolling(Rolling): 189 __slots__ = ("window_labels",) 190 191 def __init__(self, obj, windows, min_periods=None, center=False): 192 """ 193 Moving window object for DataArray. 194 You should use DataArray.rolling() method to construct this object 195 instead of the class constructor. 196 197 Parameters 198 ---------- 199 obj : DataArray 200 Object to window. 201 windows : mapping of hashable to int 202 A mapping from the name of the dimension to create the rolling 203 exponential window along (e.g. `time`) to the size of the moving window. 204 min_periods : int, default: None 205 Minimum number of observations in window required to have a value 206 (otherwise result is NA). The default, None, is equivalent to 207 setting min_periods equal to the size of the window. 208 center : bool, default: False 209 Set the labels at the center of the window. 210 211 Returns 212 ------- 213 rolling : type of input argument 214 215 See Also 216 -------- 217 xarray.DataArray.rolling 218 xarray.DataArray.groupby 219 xarray.Dataset.rolling 220 xarray.Dataset.groupby 221 """ 222 super().__init__(obj, windows, min_periods=min_periods, center=center) 223 224 # TODO legacy attribute 225 self.window_labels = self.obj[self.dim[0]] 226 227 def __iter__(self): 228 if len(self.dim) > 1: 229 raise ValueError("__iter__ is only supported for 1d-rolling") 230 stops = np.arange(1, len(self.window_labels) + 1) 231 starts = stops - int(self.window[0]) 232 starts[: int(self.window[0])] = 0 233 for (label, start, stop) in zip(self.window_labels, starts, stops): 234 window = self.obj.isel(**{self.dim[0]: slice(start, stop)}) 235 236 counts = window.count(dim=self.dim[0]) 237 window = window.where(counts >= self.min_periods) 238 239 yield (label, window) 240 241 def construct( 242 self, 243 window_dim=None, 244 stride=1, 245 fill_value=dtypes.NA, 246 keep_attrs=None, 247 **window_dim_kwargs, 248 ): 249 """ 250 Convert this rolling object to xr.DataArray, 251 where the window dimension is stacked as a new dimension 252 253 Parameters 254 ---------- 255 window_dim : str or mapping, optional 256 A mapping from dimension name to the new window dimension names. 257 stride : int or mapping of int, default: 1 258 Size of stride for the rolling window. 259 fill_value : default: dtypes.NA 260 Filling value to match the dimension size. 261 keep_attrs : bool, default: None 262 If True, the attributes (``attrs``) will be copied from the original 263 object to the new one. If False, the new object will be returned 264 without attributes. If None uses the global default. 265 **window_dim_kwargs : {dim: new_name, ...}, optional 266 The keyword arguments form of ``window_dim``. 267 268 Returns 269 ------- 270 DataArray that is a view of the original array. The returned array is 271 not writeable. 272 273 Examples 274 -------- 275 >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=("a", "b")) 276 277 >>> rolling = da.rolling(b=3) 278 >>> rolling.construct("window_dim") 279 <xarray.DataArray (a: 2, b: 4, window_dim: 3)> 280 array([[[nan, nan, 0.], 281 [nan, 0., 1.], 282 [ 0., 1., 2.], 283 [ 1., 2., 3.]], 284 <BLANKLINE> 285 [[nan, nan, 4.], 286 [nan, 4., 5.], 287 [ 4., 5., 6.], 288 [ 5., 6., 7.]]]) 289 Dimensions without coordinates: a, b, window_dim 290 291 >>> rolling = da.rolling(b=3, center=True) 292 >>> rolling.construct("window_dim") 293 <xarray.DataArray (a: 2, b: 4, window_dim: 3)> 294 array([[[nan, 0., 1.], 295 [ 0., 1., 2.], 296 [ 1., 2., 3.], 297 [ 2., 3., nan]], 298 <BLANKLINE> 299 [[nan, 4., 5.], 300 [ 4., 5., 6.], 301 [ 5., 6., 7.], 302 [ 6., 7., nan]]]) 303 Dimensions without coordinates: a, b, window_dim 304 305 """ 306 307 return self._construct( 308 self.obj, 309 window_dim=window_dim, 310 stride=stride, 311 fill_value=fill_value, 312 keep_attrs=keep_attrs, 313 **window_dim_kwargs, 314 ) 315 316 def _construct( 317 self, 318 obj, 319 window_dim=None, 320 stride=1, 321 fill_value=dtypes.NA, 322 keep_attrs=None, 323 **window_dim_kwargs, 324 ): 325 from .dataarray import DataArray 326 327 keep_attrs = self._get_keep_attrs(keep_attrs) 328 329 if window_dim is None: 330 if len(window_dim_kwargs) == 0: 331 raise ValueError( 332 "Either window_dim or window_dim_kwargs need to be specified." 333 ) 334 window_dim = {d: window_dim_kwargs[d] for d in self.dim} 335 336 window_dim = self._mapping_to_list( 337 window_dim, allow_default=False, allow_allsame=False 338 ) 339 stride = self._mapping_to_list(stride, default=1) 340 341 window = obj.variable.rolling_window( 342 self.dim, self.window, window_dim, self.center, fill_value=fill_value 343 ) 344 345 attrs = obj.attrs if keep_attrs else {} 346 347 result = DataArray( 348 window, 349 dims=obj.dims + tuple(window_dim), 350 coords=obj.coords, 351 attrs=attrs, 352 name=obj.name, 353 ) 354 return result.isel( 355 **{d: slice(None, None, s) for d, s in zip(self.dim, stride)} 356 ) 357 358 def reduce(self, func, keep_attrs=None, **kwargs): 359 """Reduce the items in this group by applying `func` along some 360 dimension(s). 361 362 Parameters 363 ---------- 364 func : callable 365 Function which can be called in the form 366 `func(x, **kwargs)` to return the result of collapsing an 367 np.ndarray over an the rolling dimension. 368 keep_attrs : bool, default: None 369 If True, the attributes (``attrs``) will be copied from the original 370 object to the new one. If False, the new object will be returned 371 without attributes. If None uses the global default. 372 **kwargs : dict 373 Additional keyword arguments passed on to `func`. 374 375 Returns 376 ------- 377 reduced : DataArray 378 Array with summarized data. 379 380 Examples 381 -------- 382 >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=("a", "b")) 383 >>> rolling = da.rolling(b=3) 384 >>> rolling.construct("window_dim") 385 <xarray.DataArray (a: 2, b: 4, window_dim: 3)> 386 array([[[nan, nan, 0.], 387 [nan, 0., 1.], 388 [ 0., 1., 2.], 389 [ 1., 2., 3.]], 390 <BLANKLINE> 391 [[nan, nan, 4.], 392 [nan, 4., 5.], 393 [ 4., 5., 6.], 394 [ 5., 6., 7.]]]) 395 Dimensions without coordinates: a, b, window_dim 396 397 >>> rolling.reduce(np.sum) 398 <xarray.DataArray (a: 2, b: 4)> 399 array([[nan, nan, 3., 6.], 400 [nan, nan, 15., 18.]]) 401 Dimensions without coordinates: a, b 402 403 >>> rolling = da.rolling(b=3, min_periods=1) 404 >>> rolling.reduce(np.nansum) 405 <xarray.DataArray (a: 2, b: 4)> 406 array([[ 0., 1., 3., 6.], 407 [ 4., 9., 15., 18.]]) 408 Dimensions without coordinates: a, b 409 """ 410 411 keep_attrs = self._get_keep_attrs(keep_attrs) 412 413 rolling_dim = { 414 d: utils.get_temp_dimname(self.obj.dims, f"_rolling_dim_{d}") 415 for d in self.dim 416 } 417 418 # save memory with reductions GH4325 419 fillna = kwargs.pop("fillna", dtypes.NA) 420 if fillna is not dtypes.NA: 421 obj = self.obj.fillna(fillna) 422 else: 423 obj = self.obj 424 windows = self._construct( 425 obj, rolling_dim, keep_attrs=keep_attrs, fill_value=fillna 426 ) 427 428 result = windows.reduce( 429 func, dim=list(rolling_dim.values()), keep_attrs=keep_attrs, **kwargs 430 ) 431 432 # Find valid windows based on count. 433 counts = self._counts(keep_attrs=False) 434 return result.where(counts >= self.min_periods) 435 436 def _counts(self, keep_attrs): 437 """Number of non-nan entries in each rolling window.""" 438 439 rolling_dim = { 440 d: utils.get_temp_dimname(self.obj.dims, f"_rolling_dim_{d}") 441 for d in self.dim 442 } 443 # We use False as the fill_value instead of np.nan, since boolean 444 # array is faster to be reduced than object array. 445 # The use of skipna==False is also faster since it does not need to 446 # copy the strided array. 447 counts = ( 448 self.obj.notnull(keep_attrs=keep_attrs) 449 .rolling( 450 center={d: self.center[i] for i, d in enumerate(self.dim)}, 451 **{d: w for d, w in zip(self.dim, self.window)}, 452 ) 453 .construct(rolling_dim, fill_value=False, keep_attrs=keep_attrs) 454 .sum(dim=list(rolling_dim.values()), skipna=False, keep_attrs=keep_attrs) 455 ) 456 return counts 457 458 def _bottleneck_reduce(self, func, keep_attrs, **kwargs): 459 from .dataarray import DataArray 460 461 # bottleneck doesn't allow min_count to be 0, although it should 462 # work the same as if min_count = 1 463 # Note bottleneck only works with 1d-rolling. 464 if self.min_periods is not None and self.min_periods == 0: 465 min_count = 1 466 else: 467 min_count = self.min_periods 468 469 axis = self.obj.get_axis_num(self.dim[0]) 470 471 padded = self.obj.variable 472 if self.center[0]: 473 if is_duck_dask_array(padded.data): 474 # workaround to make the padded chunk size larger than 475 # self.window - 1 476 shift = -(self.window[0] + 1) // 2 477 offset = (self.window[0] - 1) // 2 478 valid = (slice(None),) * axis + ( 479 slice(offset, offset + self.obj.shape[axis]), 480 ) 481 else: 482 shift = (-self.window[0] // 2) + 1 483 valid = (slice(None),) * axis + (slice(-shift, None),) 484 padded = padded.pad({self.dim[0]: (0, -shift)}, mode="constant") 485 486 if is_duck_dask_array(padded.data): 487 raise AssertionError("should not be reachable") 488 else: 489 values = func( 490 padded.data, window=self.window[0], min_count=min_count, axis=axis 491 ) 492 493 if self.center[0]: 494 values = values[valid] 495 496 attrs = self.obj.attrs if keep_attrs else {} 497 498 return DataArray(values, self.obj.coords, attrs=attrs, name=self.obj.name) 499 500 def _numpy_or_bottleneck_reduce( 501 self, 502 array_agg_func, 503 bottleneck_move_func, 504 rolling_agg_func, 505 keep_attrs, 506 fillna, 507 **kwargs, 508 ): 509 if "dim" in kwargs: 510 warnings.warn( 511 f"Reductions are applied along the rolling dimension(s) " 512 f"'{self.dim}'. Passing the 'dim' kwarg to reduction " 513 f"operations has no effect.", 514 DeprecationWarning, 515 stacklevel=3, 516 ) 517 del kwargs["dim"] 518 519 if ( 520 OPTIONS["use_bottleneck"] 521 and bottleneck_move_func is not None 522 and not is_duck_dask_array(self.obj.data) 523 and len(self.dim) == 1 524 ): 525 # TODO: renable bottleneck with dask after the issues 526 # underlying https://github.com/pydata/xarray/issues/2940 are 527 # fixed. 528 return self._bottleneck_reduce( 529 bottleneck_move_func, keep_attrs=keep_attrs, **kwargs 530 ) 531 if rolling_agg_func: 532 return rolling_agg_func(self, keep_attrs=self._get_keep_attrs(keep_attrs)) 533 if fillna is not None: 534 if fillna is dtypes.INF: 535 fillna = dtypes.get_pos_infinity(self.obj.dtype, max_for_int=True) 536 elif fillna is dtypes.NINF: 537 fillna = dtypes.get_neg_infinity(self.obj.dtype, min_for_int=True) 538 kwargs.setdefault("skipna", False) 539 kwargs.setdefault("fillna", fillna) 540 541 return self.reduce(array_agg_func, keep_attrs=keep_attrs, **kwargs) 542 543 544class DatasetRolling(Rolling): 545 __slots__ = ("rollings",) 546 547 def __init__(self, obj, windows, min_periods=None, center=False): 548 """ 549 Moving window object for Dataset. 550 You should use Dataset.rolling() method to construct this object 551 instead of the class constructor. 552 553 Parameters 554 ---------- 555 obj : Dataset 556 Object to window. 557 windows : mapping of hashable to int 558 A mapping from the name of the dimension to create the rolling 559 exponential window along (e.g. `time`) to the size of the moving window. 560 min_periods : int, default: None 561 Minimum number of observations in window required to have a value 562 (otherwise result is NA). The default, None, is equivalent to 563 setting min_periods equal to the size of the window. 564 center : bool or mapping of hashable to bool, default: False 565 Set the labels at the center of the window. 566 567 Returns 568 ------- 569 rolling : type of input argument 570 571 See Also 572 -------- 573 xarray.Dataset.rolling 574 xarray.DataArray.rolling 575 xarray.Dataset.groupby 576 xarray.DataArray.groupby 577 """ 578 super().__init__(obj, windows, min_periods, center) 579 if any(d not in self.obj.dims for d in self.dim): 580 raise KeyError(self.dim) 581 # Keep each Rolling object as a dictionary 582 self.rollings = {} 583 for key, da in self.obj.data_vars.items(): 584 # keeps rollings only for the dataset depending on self.dim 585 dims, center = [], {} 586 for i, d in enumerate(self.dim): 587 if d in da.dims: 588 dims.append(d) 589 center[d] = self.center[i] 590 591 if dims: 592 w = {d: windows[d] for d in dims} 593 self.rollings[key] = DataArrayRolling(da, w, min_periods, center) 594 595 def _dataset_implementation(self, func, keep_attrs, **kwargs): 596 from .dataset import Dataset 597 598 keep_attrs = self._get_keep_attrs(keep_attrs) 599 600 reduced = {} 601 for key, da in self.obj.data_vars.items(): 602 if any(d in da.dims for d in self.dim): 603 reduced[key] = func(self.rollings[key], keep_attrs=keep_attrs, **kwargs) 604 else: 605 reduced[key] = self.obj[key].copy() 606 # we need to delete the attrs of the copied DataArray 607 if not keep_attrs: 608 reduced[key].attrs = {} 609 610 attrs = self.obj.attrs if keep_attrs else {} 611 return Dataset(reduced, coords=self.obj.coords, attrs=attrs) 612 613 def reduce(self, func, keep_attrs=None, **kwargs): 614 """Reduce the items in this group by applying `func` along some 615 dimension(s). 616 617 Parameters 618 ---------- 619 func : callable 620 Function which can be called in the form 621 `func(x, **kwargs)` to return the result of collapsing an 622 np.ndarray over an the rolling dimension. 623 keep_attrs : bool, default: None 624 If True, the attributes (``attrs``) will be copied from the original 625 object to the new one. If False, the new object will be returned 626 without attributes. If None uses the global default. 627 **kwargs : dict 628 Additional keyword arguments passed on to `func`. 629 630 Returns 631 ------- 632 reduced : DataArray 633 Array with summarized data. 634 """ 635 return self._dataset_implementation( 636 functools.partial(DataArrayRolling.reduce, func=func), 637 keep_attrs=keep_attrs, 638 **kwargs, 639 ) 640 641 def _counts(self, keep_attrs): 642 return self._dataset_implementation( 643 DataArrayRolling._counts, keep_attrs=keep_attrs 644 ) 645 646 def _numpy_or_bottleneck_reduce( 647 self, 648 array_agg_func, 649 bottleneck_move_func, 650 rolling_agg_func, 651 keep_attrs, 652 **kwargs, 653 ): 654 return self._dataset_implementation( 655 functools.partial( 656 DataArrayRolling._numpy_or_bottleneck_reduce, 657 array_agg_func=array_agg_func, 658 bottleneck_move_func=bottleneck_move_func, 659 rolling_agg_func=rolling_agg_func, 660 ), 661 keep_attrs=keep_attrs, 662 **kwargs, 663 ) 664 665 def construct( 666 self, 667 window_dim=None, 668 stride=1, 669 fill_value=dtypes.NA, 670 keep_attrs=None, 671 **window_dim_kwargs, 672 ): 673 """ 674 Convert this rolling object to xr.Dataset, 675 where the window dimension is stacked as a new dimension 676 677 Parameters 678 ---------- 679 window_dim : str or mapping, optional 680 A mapping from dimension name to the new window dimension names. 681 Just a string can be used for 1d-rolling. 682 stride : int, optional 683 size of stride for the rolling window. 684 fill_value : Any, default: dtypes.NA 685 Filling value to match the dimension size. 686 **window_dim_kwargs : {dim: new_name, ...}, optional 687 The keyword arguments form of ``window_dim``. 688 689 Returns 690 ------- 691 Dataset with variables converted from rolling object. 692 """ 693 694 from .dataset import Dataset 695 696 keep_attrs = self._get_keep_attrs(keep_attrs) 697 698 if window_dim is None: 699 if len(window_dim_kwargs) == 0: 700 raise ValueError( 701 "Either window_dim or window_dim_kwargs need to be specified." 702 ) 703 window_dim = {d: window_dim_kwargs[d] for d in self.dim} 704 705 window_dim = self._mapping_to_list( 706 window_dim, allow_default=False, allow_allsame=False 707 ) 708 stride = self._mapping_to_list(stride, default=1) 709 710 dataset = {} 711 for key, da in self.obj.data_vars.items(): 712 # keeps rollings only for the dataset depending on self.dim 713 dims = [d for d in self.dim if d in da.dims] 714 if dims: 715 wi = {d: window_dim[i] for i, d in enumerate(self.dim) if d in da.dims} 716 st = {d: stride[i] for i, d in enumerate(self.dim) if d in da.dims} 717 718 dataset[key] = self.rollings[key].construct( 719 window_dim=wi, 720 fill_value=fill_value, 721 stride=st, 722 keep_attrs=keep_attrs, 723 ) 724 else: 725 dataset[key] = da.copy() 726 727 # as the DataArrays can be copied we need to delete the attrs 728 if not keep_attrs: 729 dataset[key].attrs = {} 730 731 attrs = self.obj.attrs if keep_attrs else {} 732 733 return Dataset(dataset, coords=self.obj.coords, attrs=attrs).isel( 734 **{d: slice(None, None, s) for d, s in zip(self.dim, stride)} 735 ) 736 737 738class Coarsen(CoarsenArithmetic): 739 """A object that implements the coarsen. 740 741 See Also 742 -------- 743 Dataset.coarsen 744 DataArray.coarsen 745 """ 746 747 __slots__ = ( 748 "obj", 749 "boundary", 750 "coord_func", 751 "windows", 752 "side", 753 "trim_excess", 754 ) 755 _attributes = ("windows", "side", "trim_excess") 756 757 def __init__(self, obj, windows, boundary, side, coord_func): 758 """ 759 Moving window object. 760 761 Parameters 762 ---------- 763 obj : Dataset or DataArray 764 Object to window. 765 windows : mapping of hashable to int 766 A mapping from the name of the dimension to create the rolling 767 exponential window along (e.g. `time`) to the size of the moving window. 768 boundary : 'exact' | 'trim' | 'pad' 769 If 'exact', a ValueError will be raised if dimension size is not a 770 multiple of window size. If 'trim', the excess indexes are trimed. 771 If 'pad', NA will be padded. 772 side : 'left' or 'right' or mapping from dimension to 'left' or 'right' 773 coord_func : mapping from coordinate name to func. 774 775 Returns 776 ------- 777 coarsen 778 """ 779 self.obj = obj 780 self.windows = windows 781 self.side = side 782 self.boundary = boundary 783 784 absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] 785 if absent_dims: 786 raise ValueError( 787 f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}." 788 ) 789 if not utils.is_dict_like(coord_func): 790 coord_func = {d: coord_func for d in self.obj.dims} 791 for c in self.obj.coords: 792 if c not in coord_func: 793 coord_func[c] = duck_array_ops.mean 794 self.coord_func = coord_func 795 796 def _get_keep_attrs(self, keep_attrs): 797 if keep_attrs is None: 798 keep_attrs = _get_keep_attrs(default=True) 799 800 return keep_attrs 801 802 def __repr__(self): 803 """provide a nice str repr of our coarsen object""" 804 805 attrs = [ 806 "{k}->{v}".format(k=k, v=getattr(self, k)) 807 for k in self._attributes 808 if getattr(self, k, None) is not None 809 ] 810 return "{klass} [{attrs}]".format( 811 klass=self.__class__.__name__, attrs=",".join(attrs) 812 ) 813 814 def construct( 815 self, 816 window_dim=None, 817 keep_attrs=None, 818 **window_dim_kwargs, 819 ): 820 """ 821 Convert this Coarsen object to a DataArray or Dataset, 822 where the coarsening dimension is split or reshaped to two 823 new dimensions. 824 825 Parameters 826 ---------- 827 window_dim: mapping 828 A mapping from existing dimension name to new dimension names. 829 The size of the second dimension will be the length of the 830 coarsening window. 831 keep_attrs: bool, optional 832 Preserve attributes if True 833 **window_dim_kwargs : {dim: new_name, ...} 834 The keyword arguments form of ``window_dim``. 835 836 Returns 837 ------- 838 Dataset or DataArray with reshaped dimensions 839 840 Examples 841 -------- 842 >>> da = xr.DataArray(np.arange(24), dims="time") 843 >>> da.coarsen(time=12).construct(time=("year", "month")) 844 <xarray.DataArray (year: 2, month: 12)> 845 array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 846 [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]]) 847 Dimensions without coordinates: year, month 848 849 See Also 850 -------- 851 DataArrayRolling.construct 852 DatasetRolling.construct 853 """ 854 855 from .dataarray import DataArray 856 from .dataset import Dataset 857 858 window_dim = either_dict_or_kwargs( 859 window_dim, window_dim_kwargs, "Coarsen.construct" 860 ) 861 if not window_dim: 862 raise ValueError( 863 "Either window_dim or window_dim_kwargs need to be specified." 864 ) 865 866 bad_new_dims = tuple( 867 win 868 for win, dims in window_dim.items() 869 if len(dims) != 2 or isinstance(dims, str) 870 ) 871 if bad_new_dims: 872 raise ValueError( 873 f"Please provide exactly two dimension names for the following coarsening dimensions: {bad_new_dims}" 874 ) 875 876 if keep_attrs is None: 877 keep_attrs = _get_keep_attrs(default=True) 878 879 missing_dims = set(window_dim) - set(self.windows) 880 if missing_dims: 881 raise ValueError( 882 f"'window_dim' must contain entries for all dimensions to coarsen. Missing {missing_dims}" 883 ) 884 extra_windows = set(self.windows) - set(window_dim) 885 if extra_windows: 886 raise ValueError( 887 f"'window_dim' includes dimensions that will not be coarsened: {extra_windows}" 888 ) 889 890 reshaped = Dataset() 891 if isinstance(self.obj, DataArray): 892 obj = self.obj._to_temp_dataset() 893 else: 894 obj = self.obj 895 896 reshaped.attrs = obj.attrs if keep_attrs else {} 897 898 for key, var in obj.variables.items(): 899 reshaped_dims = tuple( 900 itertools.chain(*[window_dim.get(dim, [dim]) for dim in list(var.dims)]) 901 ) 902 if reshaped_dims != var.dims: 903 windows = {w: self.windows[w] for w in window_dim if w in var.dims} 904 reshaped_var, _ = var.coarsen_reshape(windows, self.boundary, self.side) 905 attrs = var.attrs if keep_attrs else {} 906 reshaped[key] = (reshaped_dims, reshaped_var, attrs) 907 else: 908 reshaped[key] = var 909 910 should_be_coords = set(window_dim) & set(self.obj.coords) 911 result = reshaped.set_coords(should_be_coords) 912 if isinstance(self.obj, DataArray): 913 return self.obj._from_temp_dataset(result) 914 else: 915 return result 916 917 918class DataArrayCoarsen(Coarsen): 919 __slots__ = () 920 921 _reduce_extra_args_docstring = """""" 922 923 @classmethod 924 def _reduce_method( 925 cls, func: Callable, include_skipna: bool = False, numeric_only: bool = False 926 ): 927 """ 928 Return a wrapped function for injecting reduction methods. 929 see ops.inject_reduce_methods 930 """ 931 kwargs: Dict[str, Any] = {} 932 if include_skipna: 933 kwargs["skipna"] = None 934 935 def wrapped_func(self, keep_attrs: bool = None, **kwargs): 936 from .dataarray import DataArray 937 938 keep_attrs = self._get_keep_attrs(keep_attrs) 939 940 reduced = self.obj.variable.coarsen( 941 self.windows, func, self.boundary, self.side, keep_attrs, **kwargs 942 ) 943 coords = {} 944 for c, v in self.obj.coords.items(): 945 if c == self.obj.name: 946 coords[c] = reduced 947 else: 948 if any(d in self.windows for d in v.dims): 949 coords[c] = v.variable.coarsen( 950 self.windows, 951 self.coord_func[c], 952 self.boundary, 953 self.side, 954 keep_attrs, 955 **kwargs, 956 ) 957 else: 958 coords[c] = v 959 return DataArray( 960 reduced, dims=self.obj.dims, coords=coords, name=self.obj.name 961 ) 962 963 return wrapped_func 964 965 def reduce(self, func: Callable, keep_attrs: bool = None, **kwargs): 966 """Reduce the items in this group by applying `func` along some 967 dimension(s). 968 969 Parameters 970 ---------- 971 func : callable 972 Function which can be called in the form `func(x, axis, **kwargs)` 973 to return the result of collapsing an np.ndarray over the coarsening 974 dimensions. It must be possible to provide the `axis` argument 975 with a tuple of integers. 976 keep_attrs : bool, default: None 977 If True, the attributes (``attrs``) will be copied from the original 978 object to the new one. If False, the new object will be returned 979 without attributes. If None uses the global default. 980 **kwargs : dict 981 Additional keyword arguments passed on to `func`. 982 983 Returns 984 ------- 985 reduced : DataArray 986 Array with summarized data. 987 988 Examples 989 -------- 990 >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=("a", "b")) 991 >>> coarsen = da.coarsen(b=2) 992 >>> coarsen.reduce(np.sum) 993 <xarray.DataArray (a: 2, b: 2)> 994 array([[ 1, 5], 995 [ 9, 13]]) 996 Dimensions without coordinates: a, b 997 """ 998 wrapped_func = self._reduce_method(func) 999 return wrapped_func(self, keep_attrs=keep_attrs, **kwargs) 1000 1001 1002class DatasetCoarsen(Coarsen): 1003 __slots__ = () 1004 1005 _reduce_extra_args_docstring = """""" 1006 1007 @classmethod 1008 def _reduce_method( 1009 cls, func: Callable, include_skipna: bool = False, numeric_only: bool = False 1010 ): 1011 """ 1012 Return a wrapped function for injecting reduction methods. 1013 see ops.inject_reduce_methods 1014 """ 1015 kwargs: Dict[str, Any] = {} 1016 if include_skipna: 1017 kwargs["skipna"] = None 1018 1019 def wrapped_func(self, keep_attrs: bool = None, **kwargs): 1020 from .dataset import Dataset 1021 1022 keep_attrs = self._get_keep_attrs(keep_attrs) 1023 1024 if keep_attrs: 1025 attrs = self.obj.attrs 1026 else: 1027 attrs = {} 1028 1029 reduced = {} 1030 for key, da in self.obj.data_vars.items(): 1031 reduced[key] = da.variable.coarsen( 1032 self.windows, 1033 func, 1034 self.boundary, 1035 self.side, 1036 keep_attrs=keep_attrs, 1037 **kwargs, 1038 ) 1039 1040 coords = {} 1041 for c, v in self.obj.coords.items(): 1042 # variable.coarsen returns variables not containing the window dims 1043 # unchanged (maybe removes attrs) 1044 coords[c] = v.variable.coarsen( 1045 self.windows, 1046 self.coord_func[c], 1047 self.boundary, 1048 self.side, 1049 keep_attrs=keep_attrs, 1050 **kwargs, 1051 ) 1052 1053 return Dataset(reduced, coords=coords, attrs=attrs) 1054 1055 return wrapped_func 1056 1057 def reduce(self, func: Callable, keep_attrs=None, **kwargs): 1058 """Reduce the items in this group by applying `func` along some 1059 dimension(s). 1060 1061 Parameters 1062 ---------- 1063 func : callable 1064 Function which can be called in the form `func(x, axis, **kwargs)` 1065 to return the result of collapsing an np.ndarray over the coarsening 1066 dimensions. It must be possible to provide the `axis` argument with 1067 a tuple of integers. 1068 keep_attrs : bool, default: None 1069 If True, the attributes (``attrs``) will be copied from the original 1070 object to the new one. If False, the new object will be returned 1071 without attributes. If None uses the global default. 1072 **kwargs : dict 1073 Additional keyword arguments passed on to `func`. 1074 1075 Returns 1076 ------- 1077 reduced : Dataset 1078 Arrays with summarized data. 1079 """ 1080 wrapped_func = self._reduce_method(func) 1081 return wrapped_func(self, keep_attrs=keep_attrs, **kwargs) 1082