1from __future__ import annotations 2 3import datetime 4import warnings 5from typing import ( 6 TYPE_CHECKING, 7 Any, 8 Callable, 9 Dict, 10 Hashable, 11 Iterable, 12 List, 13 Mapping, 14 Optional, 15 Sequence, 16 Tuple, 17 Union, 18 cast, 19) 20 21import numpy as np 22import pandas as pd 23 24from ..plot.plot import _PlotMethods 25from ..plot.utils import _get_units_from_attrs 26from . import ( 27 computation, 28 dtypes, 29 groupby, 30 indexing, 31 ops, 32 pdcompat, 33 resample, 34 rolling, 35 utils, 36 weighted, 37) 38from .accessor_dt import CombinedDatetimelikeAccessor 39from .accessor_str import StringAccessor 40from .alignment import ( 41 _broadcast_helper, 42 _get_broadcast_dims_map_common_coords, 43 align, 44 reindex_like_indexers, 45) 46from .arithmetic import DataArrayArithmetic 47from .common import AbstractArray, DataWithCoords, get_chunksizes 48from .computation import unify_chunks 49from .coordinates import ( 50 DataArrayCoordinates, 51 assert_coordinate_consistent, 52 remap_label_indexers, 53) 54from .dataset import Dataset, split_indexes 55from .formatting import format_item 56from .indexes import Index, Indexes, default_indexes, propagate_indexes 57from .indexing import is_fancy_indexer 58from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords 59from .options import OPTIONS, _get_keep_attrs 60from .utils import ( 61 Default, 62 HybridMappingProxy, 63 ReprObject, 64 _default, 65 either_dict_or_kwargs, 66) 67from .variable import ( 68 IndexVariable, 69 Variable, 70 as_compatible_data, 71 as_variable, 72 assert_unique_multiindex_level_names, 73) 74 75if TYPE_CHECKING: 76 try: 77 from dask.delayed import Delayed 78 except ImportError: 79 Delayed = None 80 try: 81 from cdms2 import Variable as cdms2_Variable 82 except ImportError: 83 cdms2_Variable = None 84 try: 85 from iris.cube import Cube as iris_Cube 86 except ImportError: 87 iris_Cube = None 88 89 from .types import T_DataArray, T_Xarray 90 91 92def _infer_coords_and_dims( 93 shape, coords, dims 94) -> "Tuple[Dict[Any, Variable], Tuple[Hashable, ...]]": 95 """All the logic for creating a new DataArray""" 96 97 if ( 98 coords is not None 99 and not utils.is_dict_like(coords) 100 and len(coords) != len(shape) 101 ): 102 raise ValueError( 103 f"coords is not dict-like, but it has {len(coords)} items, " 104 f"which does not match the {len(shape)} dimensions of the " 105 "data" 106 ) 107 108 if isinstance(dims, str): 109 dims = (dims,) 110 111 if dims is None: 112 dims = [f"dim_{n}" for n in range(len(shape))] 113 if coords is not None and len(coords) == len(shape): 114 # try to infer dimensions from coords 115 if utils.is_dict_like(coords): 116 dims = list(coords.keys()) 117 else: 118 for n, (dim, coord) in enumerate(zip(dims, coords)): 119 coord = as_variable(coord, name=dims[n]).to_index_variable() 120 dims[n] = coord.name 121 dims = tuple(dims) 122 elif len(dims) != len(shape): 123 raise ValueError( 124 "different number of dimensions on data " 125 f"and dims: {len(shape)} vs {len(dims)}" 126 ) 127 else: 128 for d in dims: 129 if not isinstance(d, str): 130 raise TypeError(f"dimension {d} is not a string") 131 132 new_coords: Dict[Any, Variable] = {} 133 134 if utils.is_dict_like(coords): 135 for k, v in coords.items(): 136 new_coords[k] = as_variable(v, name=k) 137 elif coords is not None: 138 for dim, coord in zip(dims, coords): 139 var = as_variable(coord, name=dim) 140 var.dims = (dim,) 141 new_coords[dim] = var.to_index_variable() 142 143 sizes = dict(zip(dims, shape)) 144 for k, v in new_coords.items(): 145 if any(d not in dims for d in v.dims): 146 raise ValueError( 147 f"coordinate {k} has dimensions {v.dims}, but these " 148 "are not a subset of the DataArray " 149 f"dimensions {dims}" 150 ) 151 152 for d, s in zip(v.dims, v.shape): 153 if s != sizes[d]: 154 raise ValueError( 155 f"conflicting sizes for dimension {d!r}: " 156 f"length {sizes[d]} on the data but length {s} on " 157 f"coordinate {k!r}" 158 ) 159 160 if k in sizes and v.shape != (sizes[k],): 161 raise ValueError( 162 f"coordinate {k!r} is a DataArray dimension, but " 163 f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} " 164 "matching the dimension size" 165 ) 166 167 assert_unique_multiindex_level_names(new_coords) 168 169 return new_coords, dims 170 171 172def _check_data_shape(data, coords, dims): 173 if data is dtypes.NA: 174 data = np.nan 175 if coords is not None and utils.is_scalar(data, include_0d=False): 176 if utils.is_dict_like(coords): 177 if dims is None: 178 return data 179 else: 180 data_shape = tuple( 181 as_variable(coords[k], k).size if k in coords.keys() else 1 182 for k in dims 183 ) 184 else: 185 data_shape = tuple(as_variable(coord, "foo").size for coord in coords) 186 data = np.full(data_shape, data) 187 return data 188 189 190class _LocIndexer: 191 __slots__ = ("data_array",) 192 193 def __init__(self, data_array: "DataArray"): 194 self.data_array = data_array 195 196 def __getitem__(self, key) -> "DataArray": 197 if not utils.is_dict_like(key): 198 # expand the indexer so we can handle Ellipsis 199 labels = indexing.expanded_indexer(key, self.data_array.ndim) 200 key = dict(zip(self.data_array.dims, labels)) 201 return self.data_array.sel(key) 202 203 def __setitem__(self, key, value) -> None: 204 if not utils.is_dict_like(key): 205 # expand the indexer so we can handle Ellipsis 206 labels = indexing.expanded_indexer(key, self.data_array.ndim) 207 key = dict(zip(self.data_array.dims, labels)) 208 209 pos_indexers, _ = remap_label_indexers(self.data_array, key) 210 self.data_array[pos_indexers] = value 211 212 213# Used as the key corresponding to a DataArray's variable when converting 214# arbitrary DataArray objects to datasets 215_THIS_ARRAY = ReprObject("<this-array>") 216 217 218class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic): 219 """N-dimensional array with labeled coordinates and dimensions. 220 221 DataArray provides a wrapper around numpy ndarrays that uses 222 labeled dimensions and coordinates to support metadata aware 223 operations. The API is similar to that for the pandas Series or 224 DataFrame, but DataArray objects can have any number of dimensions, 225 and their contents have fixed data types. 226 227 Additional features over raw numpy arrays: 228 229 - Apply operations over dimensions by name: ``x.sum('time')``. 230 - Select or assign values by integer location (like numpy): 231 ``x[:10]`` or by label (like pandas): ``x.loc['2014-01-01']`` or 232 ``x.sel(time='2014-01-01')``. 233 - Mathematical operations (e.g., ``x - y``) vectorize across 234 multiple dimensions (known in numpy as "broadcasting") based on 235 dimension names, regardless of their original order. 236 - Keep track of arbitrary metadata in the form of a Python 237 dictionary: ``x.attrs`` 238 - Convert to a pandas Series: ``x.to_series()``. 239 240 Getting items from or doing mathematical operations with a 241 DataArray always returns another DataArray. 242 243 Parameters 244 ---------- 245 data : array_like 246 Values for this array. Must be an ``numpy.ndarray``, ndarray 247 like, or castable to an ``ndarray``. If a self-described xarray 248 or pandas object, attempts are made to use this array's 249 metadata to fill in other unspecified arguments. A view of the 250 array's data is used instead of a copy if possible. 251 coords : sequence or dict of array_like, optional 252 Coordinates (tick labels) to use for indexing along each 253 dimension. The following notations are accepted: 254 255 - mapping {dimension name: array-like} 256 - sequence of tuples that are valid arguments for 257 ``xarray.Variable()`` 258 - (dims, data) 259 - (dims, data, attrs) 260 - (dims, data, attrs, encoding) 261 262 Additionally, it is possible to define a coord whose name 263 does not match the dimension name, or a coord based on multiple 264 dimensions, with one of the following notations: 265 266 - mapping {coord name: DataArray} 267 - mapping {coord name: Variable} 268 - mapping {coord name: (dimension name, array-like)} 269 - mapping {coord name: (tuple of dimension names, array-like)} 270 271 dims : hashable or sequence of hashable, optional 272 Name(s) of the data dimension(s). Must be either a hashable 273 (only for 1D data) or a sequence of hashables with length equal 274 to the number of dimensions. If this argument is omitted, 275 dimension names are taken from ``coords`` (if possible) and 276 otherwise default to ``['dim_0', ... 'dim_n']``. 277 name : str or None, optional 278 Name of this array. 279 attrs : dict_like or None, optional 280 Attributes to assign to the new instance. By default, an empty 281 attribute dictionary is initialized. 282 283 Examples 284 -------- 285 Create data: 286 287 >>> np.random.seed(0) 288 >>> temperature = 15 + 8 * np.random.randn(2, 2, 3) 289 >>> lon = [[-99.83, -99.32], [-99.79, -99.23]] 290 >>> lat = [[42.25, 42.21], [42.63, 42.59]] 291 >>> time = pd.date_range("2014-09-06", periods=3) 292 >>> reference_time = pd.Timestamp("2014-09-05") 293 294 Initialize a dataarray with multiple dimensions: 295 296 >>> da = xr.DataArray( 297 ... data=temperature, 298 ... dims=["x", "y", "time"], 299 ... coords=dict( 300 ... lon=(["x", "y"], lon), 301 ... lat=(["x", "y"], lat), 302 ... time=time, 303 ... reference_time=reference_time, 304 ... ), 305 ... attrs=dict( 306 ... description="Ambient temperature.", 307 ... units="degC", 308 ... ), 309 ... ) 310 >>> da 311 <xarray.DataArray (x: 2, y: 2, time: 3)> 312 array([[[29.11241877, 18.20125767, 22.82990387], 313 [32.92714559, 29.94046392, 7.18177696]], 314 <BLANKLINE> 315 [[22.60070734, 13.78914233, 14.17424919], 316 [18.28478802, 16.15234857, 26.63418806]]]) 317 Coordinates: 318 lon (x, y) float64 -99.83 -99.32 -99.79 -99.23 319 lat (x, y) float64 42.25 42.21 42.63 42.59 320 * time (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08 321 reference_time datetime64[ns] 2014-09-05 322 Dimensions without coordinates: x, y 323 Attributes: 324 description: Ambient temperature. 325 units: degC 326 327 Find out where the coldest temperature was: 328 329 >>> da.isel(da.argmin(...)) 330 <xarray.DataArray ()> 331 array(7.18177696) 332 Coordinates: 333 lon float64 -99.32 334 lat float64 42.21 335 time datetime64[ns] 2014-09-08 336 reference_time datetime64[ns] 2014-09-05 337 Attributes: 338 description: Ambient temperature. 339 units: degC 340 """ 341 342 _cache: Dict[str, Any] 343 _coords: Dict[Any, Variable] 344 _close: Optional[Callable[[], None]] 345 _indexes: Optional[Dict[Hashable, Index]] 346 _name: Optional[Hashable] 347 _variable: Variable 348 349 __slots__ = ( 350 "_cache", 351 "_coords", 352 "_close", 353 "_indexes", 354 "_name", 355 "_variable", 356 "__weakref__", 357 ) 358 359 _groupby_cls = groupby.DataArrayGroupBy 360 _rolling_cls = rolling.DataArrayRolling 361 _coarsen_cls = rolling.DataArrayCoarsen 362 _resample_cls = resample.DataArrayResample 363 _weighted_cls = weighted.DataArrayWeighted 364 365 dt = utils.UncachedAccessor(CombinedDatetimelikeAccessor) 366 367 def __init__( 368 self, 369 data: Any = dtypes.NA, 370 coords: Union[Sequence[Tuple], Mapping[Any, Any], None] = None, 371 dims: Union[Hashable, Sequence[Hashable], None] = None, 372 name: Hashable = None, 373 attrs: Mapping = None, 374 # internal parameters 375 indexes: Dict[Hashable, pd.Index] = None, 376 fastpath: bool = False, 377 ): 378 if fastpath: 379 variable = data 380 assert dims is None 381 assert attrs is None 382 else: 383 # try to fill in arguments from data if they weren't supplied 384 if coords is None: 385 386 if isinstance(data, DataArray): 387 coords = data.coords 388 elif isinstance(data, pd.Series): 389 coords = [data.index] 390 elif isinstance(data, pd.DataFrame): 391 coords = [data.index, data.columns] 392 elif isinstance(data, (pd.Index, IndexVariable)): 393 coords = [data] 394 elif isinstance(data, pdcompat.Panel): 395 coords = [data.items, data.major_axis, data.minor_axis] 396 397 if dims is None: 398 dims = getattr(data, "dims", getattr(coords, "dims", None)) 399 if name is None: 400 name = getattr(data, "name", None) 401 if attrs is None and not isinstance(data, PANDAS_TYPES): 402 attrs = getattr(data, "attrs", None) 403 404 data = _check_data_shape(data, coords, dims) 405 data = as_compatible_data(data) 406 coords, dims = _infer_coords_and_dims(data.shape, coords, dims) 407 variable = Variable(dims, data, attrs, fastpath=True) 408 indexes = dict( 409 _extract_indexes_from_coords(coords) 410 ) # needed for to_dataset 411 412 # These fully describe a DataArray 413 self._variable = variable 414 assert isinstance(coords, dict) 415 self._coords = coords 416 self._name = name 417 418 # TODO(shoyer): document this argument, once it becomes part of the 419 # public interface. 420 self._indexes = indexes 421 422 self._close = None 423 424 def _replace( 425 self: T_DataArray, 426 variable: Variable = None, 427 coords=None, 428 name: Union[Hashable, None, Default] = _default, 429 indexes=None, 430 ) -> T_DataArray: 431 if variable is None: 432 variable = self.variable 433 if coords is None: 434 coords = self._coords 435 if name is _default: 436 name = self.name 437 return type(self)(variable, coords, name=name, fastpath=True, indexes=indexes) 438 439 def _replace_maybe_drop_dims( 440 self, variable: Variable, name: Union[Hashable, None, Default] = _default 441 ) -> "DataArray": 442 if variable.dims == self.dims and variable.shape == self.shape: 443 coords = self._coords.copy() 444 indexes = self._indexes 445 elif variable.dims == self.dims: 446 # Shape has changed (e.g. from reduce(..., keepdims=True) 447 new_sizes = dict(zip(self.dims, variable.shape)) 448 coords = { 449 k: v 450 for k, v in self._coords.items() 451 if v.shape == tuple(new_sizes[d] for d in v.dims) 452 } 453 changed_dims = [ 454 k for k in variable.dims if variable.sizes[k] != self.sizes[k] 455 ] 456 indexes = propagate_indexes(self._indexes, exclude=changed_dims) 457 else: 458 allowed_dims = set(variable.dims) 459 coords = { 460 k: v for k, v in self._coords.items() if set(v.dims) <= allowed_dims 461 } 462 indexes = propagate_indexes( 463 self._indexes, exclude=(set(self.dims) - allowed_dims) 464 ) 465 return self._replace(variable, coords, name, indexes=indexes) 466 467 def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> "DataArray": 468 if not len(indexes): 469 return self 470 coords = self._coords.copy() 471 for name, idx in indexes.items(): 472 coords[name] = IndexVariable(name, idx.to_pandas_index()) 473 obj = self._replace(coords=coords) 474 475 # switch from dimension to level names, if necessary 476 dim_names: Dict[Any, str] = {} 477 for dim, idx in indexes.items(): 478 pd_idx = idx.to_pandas_index() 479 if not isinstance(idx, pd.MultiIndex) and pd_idx.name != dim: 480 dim_names[dim] = idx.name 481 if dim_names: 482 obj = obj.rename(dim_names) 483 return obj 484 485 def _to_temp_dataset(self) -> Dataset: 486 return self._to_dataset_whole(name=_THIS_ARRAY, shallow_copy=False) 487 488 def _from_temp_dataset( 489 self, dataset: Dataset, name: Union[Hashable, None, Default] = _default 490 ) -> "DataArray": 491 variable = dataset._variables.pop(_THIS_ARRAY) 492 coords = dataset._variables 493 indexes = dataset._indexes 494 return self._replace(variable, coords, name, indexes=indexes) 495 496 def _to_dataset_split(self, dim: Hashable) -> Dataset: 497 """splits dataarray along dimension 'dim'""" 498 499 def subset(dim, label): 500 array = self.loc[{dim: label}] 501 array.attrs = {} 502 return as_variable(array) 503 504 variables = {label: subset(dim, label) for label in self.get_index(dim)} 505 variables.update({k: v for k, v in self._coords.items() if k != dim}) 506 indexes = propagate_indexes(self._indexes, exclude=dim) 507 coord_names = set(self._coords) - {dim} 508 dataset = Dataset._construct_direct( 509 variables, coord_names, indexes=indexes, attrs=self.attrs 510 ) 511 return dataset 512 513 def _to_dataset_whole( 514 self, name: Hashable = None, shallow_copy: bool = True 515 ) -> Dataset: 516 if name is None: 517 name = self.name 518 if name is None: 519 raise ValueError( 520 "unable to convert unnamed DataArray to a " 521 "Dataset without providing an explicit name" 522 ) 523 if name in self.coords: 524 raise ValueError( 525 "cannot create a Dataset from a DataArray with " 526 "the same name as one of its coordinates" 527 ) 528 # use private APIs for speed: this is called by _to_temp_dataset(), 529 # which is used in the guts of a lot of operations (e.g., reindex) 530 variables = self._coords.copy() 531 variables[name] = self.variable 532 if shallow_copy: 533 for k in variables: 534 variables[k] = variables[k].copy(deep=False) 535 indexes = self._indexes 536 537 coord_names = set(self._coords) 538 return Dataset._construct_direct(variables, coord_names, indexes=indexes) 539 540 def to_dataset( 541 self, 542 dim: Hashable = None, 543 *, 544 name: Hashable = None, 545 promote_attrs: bool = False, 546 ) -> Dataset: 547 """Convert a DataArray to a Dataset. 548 549 Parameters 550 ---------- 551 dim : hashable, optional 552 Name of the dimension on this array along which to split this array 553 into separate variables. If not provided, this array is converted 554 into a Dataset of one variable. 555 name : hashable, optional 556 Name to substitute for this array's name. Only valid if ``dim`` is 557 not provided. 558 promote_attrs : bool, default: False 559 Set to True to shallow copy attrs of DataArray to returned Dataset. 560 561 Returns 562 ------- 563 dataset : Dataset 564 """ 565 if dim is not None and dim not in self.dims: 566 raise TypeError( 567 f"{dim} is not a dim. If supplying a ``name``, pass as a kwarg." 568 ) 569 570 if dim is not None: 571 if name is not None: 572 raise TypeError("cannot supply both dim and name arguments") 573 result = self._to_dataset_split(dim) 574 else: 575 result = self._to_dataset_whole(name) 576 577 if promote_attrs: 578 result.attrs = dict(self.attrs) 579 580 return result 581 582 @property 583 def name(self) -> Optional[Hashable]: 584 """The name of this array.""" 585 return self._name 586 587 @name.setter 588 def name(self, value: Optional[Hashable]) -> None: 589 self._name = value 590 591 @property 592 def variable(self) -> Variable: 593 """Low level interface to the Variable object for this DataArray.""" 594 return self._variable 595 596 @property 597 def dtype(self) -> np.dtype: 598 return self.variable.dtype 599 600 @property 601 def shape(self) -> Tuple[int, ...]: 602 return self.variable.shape 603 604 @property 605 def size(self) -> int: 606 return self.variable.size 607 608 @property 609 def nbytes(self) -> int: 610 return self.variable.nbytes 611 612 @property 613 def ndim(self) -> int: 614 return self.variable.ndim 615 616 def __len__(self) -> int: 617 return len(self.variable) 618 619 @property 620 def data(self) -> Any: 621 """ 622 The DataArray's data as an array. The underlying array type 623 (e.g. dask, sparse, pint) is preserved. 624 625 See Also 626 -------- 627 DataArray.to_numpy 628 DataArray.as_numpy 629 DataArray.values 630 """ 631 return self.variable.data 632 633 @data.setter 634 def data(self, value: Any) -> None: 635 self.variable.data = value 636 637 @property 638 def values(self) -> np.ndarray: 639 """ 640 The array's data as a numpy.ndarray. 641 642 If the array's data is not a numpy.ndarray this will attempt to convert 643 it naively using np.array(), which will raise an error if the array 644 type does not support coercion like this (e.g. cupy). 645 """ 646 return self.variable.values 647 648 @values.setter 649 def values(self, value: Any) -> None: 650 self.variable.values = value 651 652 def to_numpy(self) -> np.ndarray: 653 """ 654 Coerces wrapped data to numpy and returns a numpy.ndarray. 655 656 See Also 657 -------- 658 DataArray.as_numpy : Same but returns the surrounding DataArray instead. 659 Dataset.as_numpy 660 DataArray.values 661 DataArray.data 662 """ 663 return self.variable.to_numpy() 664 665 def as_numpy(self: T_DataArray) -> T_DataArray: 666 """ 667 Coerces wrapped data and coordinates into numpy arrays, returning a DataArray. 668 669 See Also 670 -------- 671 DataArray.to_numpy : Same but returns only the data as a numpy.ndarray object. 672 Dataset.as_numpy : Converts all variables in a Dataset. 673 DataArray.values 674 DataArray.data 675 """ 676 coords = {k: v.as_numpy() for k, v in self._coords.items()} 677 return self._replace(self.variable.as_numpy(), coords, indexes=self._indexes) 678 679 @property 680 def _in_memory(self) -> bool: 681 return self.variable._in_memory 682 683 def to_index(self) -> pd.Index: 684 """Convert this variable to a pandas.Index. Only possible for 1D 685 arrays. 686 """ 687 return self.variable.to_index() 688 689 @property 690 def dims(self) -> Tuple[Hashable, ...]: 691 """Tuple of dimension names associated with this array. 692 693 Note that the type of this property is inconsistent with 694 `Dataset.dims`. See `Dataset.sizes` and `DataArray.sizes` for 695 consistently named properties. 696 """ 697 return self.variable.dims 698 699 @dims.setter 700 def dims(self, value): 701 raise AttributeError( 702 "you cannot assign dims on a DataArray. Use " 703 ".rename() or .swap_dims() instead." 704 ) 705 706 def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]: 707 if utils.is_dict_like(key): 708 return key 709 key = indexing.expanded_indexer(key, self.ndim) 710 return dict(zip(self.dims, key)) 711 712 @property 713 def _level_coords(self) -> Dict[Hashable, Hashable]: 714 """Return a mapping of all MultiIndex levels and their corresponding 715 coordinate name. 716 """ 717 level_coords: Dict[Hashable, Hashable] = {} 718 719 for cname, var in self._coords.items(): 720 if var.ndim == 1 and isinstance(var, IndexVariable): 721 level_names = var.level_names 722 if level_names is not None: 723 (dim,) = var.dims 724 level_coords.update({lname: dim for lname in level_names}) 725 return level_coords 726 727 def _getitem_coord(self, key): 728 from .dataset import _get_virtual_variable 729 730 try: 731 var = self._coords[key] 732 except KeyError: 733 dim_sizes = dict(zip(self.dims, self.shape)) 734 _, key, var = _get_virtual_variable( 735 self._coords, key, self._level_coords, dim_sizes 736 ) 737 738 return self._replace_maybe_drop_dims(var, name=key) 739 740 def __getitem__(self, key: Any) -> "DataArray": 741 if isinstance(key, str): 742 return self._getitem_coord(key) 743 else: 744 # xarray-style array indexing 745 return self.isel(indexers=self._item_key_to_dict(key)) 746 747 def __setitem__(self, key: Any, value: Any) -> None: 748 if isinstance(key, str): 749 self.coords[key] = value 750 else: 751 # Coordinates in key, value and self[key] should be consistent. 752 # TODO Coordinate consistency in key is checked here, but it 753 # causes unnecessary indexing. It should be optimized. 754 obj = self[key] 755 if isinstance(value, DataArray): 756 assert_coordinate_consistent(value, obj.coords.variables) 757 # DataArray key -> Variable key 758 key = { 759 k: v.variable if isinstance(v, DataArray) else v 760 for k, v in self._item_key_to_dict(key).items() 761 } 762 self.variable[key] = value 763 764 def __delitem__(self, key: Any) -> None: 765 del self.coords[key] 766 767 @property 768 def _attr_sources(self) -> Iterable[Mapping[Hashable, Any]]: 769 """Places to look-up items for attribute-style access""" 770 yield from self._item_sources 771 yield self.attrs 772 773 @property 774 def _item_sources(self) -> Iterable[Mapping[Hashable, Any]]: 775 """Places to look-up items for key-completion""" 776 yield HybridMappingProxy(keys=self._coords, mapping=self.coords) 777 778 # virtual coordinates 779 # uses empty dict -- everything here can already be found in self.coords. 780 yield HybridMappingProxy(keys=self.dims, mapping={}) 781 yield HybridMappingProxy(keys=self._level_coords, mapping={}) 782 783 def __contains__(self, key: Any) -> bool: 784 return key in self.data 785 786 @property 787 def loc(self) -> _LocIndexer: 788 """Attribute for location based indexing like pandas.""" 789 return _LocIndexer(self) 790 791 @property 792 # Key type needs to be `Any` because of mypy#4167 793 def attrs(self) -> Dict[Any, Any]: 794 """Dictionary storing arbitrary metadata with this array.""" 795 return self.variable.attrs 796 797 @attrs.setter 798 def attrs(self, value: Mapping[Any, Any]) -> None: 799 # Disable type checking to work around mypy bug - see mypy#4167 800 self.variable.attrs = value # type: ignore[assignment] 801 802 @property 803 def encoding(self) -> Dict[Hashable, Any]: 804 """Dictionary of format-specific settings for how this array should be 805 serialized.""" 806 return self.variable.encoding 807 808 @encoding.setter 809 def encoding(self, value: Mapping[Any, Any]) -> None: 810 self.variable.encoding = value 811 812 @property 813 def indexes(self) -> Indexes: 814 """Mapping of pandas.Index objects used for label based indexing. 815 816 Raises an error if this Dataset has indexes that cannot be coerced 817 to pandas.Index objects. 818 819 See Also 820 -------- 821 DataArray.xindexes 822 823 """ 824 return Indexes({k: idx.to_pandas_index() for k, idx in self.xindexes.items()}) 825 826 @property 827 def xindexes(self) -> Indexes: 828 """Mapping of xarray Index objects used for label based indexing.""" 829 if self._indexes is None: 830 self._indexes = default_indexes(self._coords, self.dims) 831 return Indexes(self._indexes) 832 833 @property 834 def coords(self) -> DataArrayCoordinates: 835 """Dictionary-like container of coordinate arrays.""" 836 return DataArrayCoordinates(self) 837 838 def reset_coords( 839 self, 840 names: Union[Iterable[Hashable], Hashable, None] = None, 841 drop: bool = False, 842 ) -> Union[None, "DataArray", Dataset]: 843 """Given names of coordinates, reset them to become variables. 844 845 Parameters 846 ---------- 847 names : hashable or iterable of hashable, optional 848 Name(s) of non-index coordinates in this dataset to reset into 849 variables. By default, all non-index coordinates are reset. 850 drop : bool, optional 851 If True, remove coordinates instead of converting them into 852 variables. 853 854 Returns 855 ------- 856 Dataset, or DataArray if ``drop == True`` 857 """ 858 if names is None: 859 names = set(self.coords) - set(self.dims) 860 dataset = self.coords.to_dataset().reset_coords(names, drop) 861 if drop: 862 return self._replace(coords=dataset._variables) 863 if self.name is None: 864 raise ValueError( 865 "cannot reset_coords with drop=False on an unnamed DataArrray" 866 ) 867 dataset[self.name] = self.variable 868 return dataset 869 870 def __dask_tokenize__(self): 871 from dask.base import normalize_token 872 873 return normalize_token((type(self), self._variable, self._coords, self._name)) 874 875 def __dask_graph__(self): 876 return self._to_temp_dataset().__dask_graph__() 877 878 def __dask_keys__(self): 879 return self._to_temp_dataset().__dask_keys__() 880 881 def __dask_layers__(self): 882 return self._to_temp_dataset().__dask_layers__() 883 884 @property 885 def __dask_optimize__(self): 886 return self._to_temp_dataset().__dask_optimize__ 887 888 @property 889 def __dask_scheduler__(self): 890 return self._to_temp_dataset().__dask_scheduler__ 891 892 def __dask_postcompute__(self): 893 func, args = self._to_temp_dataset().__dask_postcompute__() 894 return self._dask_finalize, (self.name, func) + args 895 896 def __dask_postpersist__(self): 897 func, args = self._to_temp_dataset().__dask_postpersist__() 898 return self._dask_finalize, (self.name, func) + args 899 900 @staticmethod 901 def _dask_finalize(results, name, func, *args, **kwargs): 902 ds = func(results, *args, **kwargs) 903 variable = ds._variables.pop(_THIS_ARRAY) 904 coords = ds._variables 905 return DataArray(variable, coords, name=name, fastpath=True) 906 907 def load(self, **kwargs) -> "DataArray": 908 """Manually trigger loading of this array's data from disk or a 909 remote source into memory and return this array. 910 911 Normally, it should not be necessary to call this method in user code, 912 because all xarray functions should either work on deferred data or 913 load data automatically. However, this method can be necessary when 914 working with many file objects on disk. 915 916 Parameters 917 ---------- 918 **kwargs : dict 919 Additional keyword arguments passed on to ``dask.compute``. 920 921 See Also 922 -------- 923 dask.compute 924 """ 925 ds = self._to_temp_dataset().load(**kwargs) 926 new = self._from_temp_dataset(ds) 927 self._variable = new._variable 928 self._coords = new._coords 929 return self 930 931 def compute(self, **kwargs) -> "DataArray": 932 """Manually trigger loading of this array's data from disk or a 933 remote source into memory and return a new array. The original is 934 left unaltered. 935 936 Normally, it should not be necessary to call this method in user code, 937 because all xarray functions should either work on deferred data or 938 load data automatically. However, this method can be necessary when 939 working with many file objects on disk. 940 941 Parameters 942 ---------- 943 **kwargs : dict 944 Additional keyword arguments passed on to ``dask.compute``. 945 946 See Also 947 -------- 948 dask.compute 949 """ 950 new = self.copy(deep=False) 951 return new.load(**kwargs) 952 953 def persist(self, **kwargs) -> "DataArray": 954 """Trigger computation in constituent dask arrays 955 956 This keeps them as dask arrays but encourages them to keep data in 957 memory. This is particularly useful when on a distributed machine. 958 When on a single machine consider using ``.compute()`` instead. 959 960 Parameters 961 ---------- 962 **kwargs : dict 963 Additional keyword arguments passed on to ``dask.persist``. 964 965 See Also 966 -------- 967 dask.persist 968 """ 969 ds = self._to_temp_dataset().persist(**kwargs) 970 return self._from_temp_dataset(ds) 971 972 def copy(self: T_DataArray, deep: bool = True, data: Any = None) -> T_DataArray: 973 """Returns a copy of this array. 974 975 If `deep=True`, a deep copy is made of the data array. 976 Otherwise, a shallow copy is made, and the returned data array's 977 values are a new view of this data array's values. 978 979 Use `data` to create a new object with the same structure as 980 original but entirely new data. 981 982 Parameters 983 ---------- 984 deep : bool, optional 985 Whether the data array and its coordinates are loaded into memory 986 and copied onto the new object. Default is True. 987 data : array_like, optional 988 Data to use in the new object. Must have same shape as original. 989 When `data` is used, `deep` is ignored for all data variables, 990 and only used for coords. 991 992 Returns 993 ------- 994 object : DataArray 995 New object with dimensions, attributes, coordinates, name, 996 encoding, and optionally data copied from original. 997 998 Examples 999 -------- 1000 Shallow versus deep copy 1001 1002 >>> array = xr.DataArray([1, 2, 3], dims="x", coords={"x": ["a", "b", "c"]}) 1003 >>> array.copy() 1004 <xarray.DataArray (x: 3)> 1005 array([1, 2, 3]) 1006 Coordinates: 1007 * x (x) <U1 'a' 'b' 'c' 1008 >>> array_0 = array.copy(deep=False) 1009 >>> array_0[0] = 7 1010 >>> array_0 1011 <xarray.DataArray (x: 3)> 1012 array([7, 2, 3]) 1013 Coordinates: 1014 * x (x) <U1 'a' 'b' 'c' 1015 >>> array 1016 <xarray.DataArray (x: 3)> 1017 array([7, 2, 3]) 1018 Coordinates: 1019 * x (x) <U1 'a' 'b' 'c' 1020 1021 Changing the data using the ``data`` argument maintains the 1022 structure of the original object, but with the new data. Original 1023 object is unaffected. 1024 1025 >>> array.copy(data=[0.1, 0.2, 0.3]) 1026 <xarray.DataArray (x: 3)> 1027 array([0.1, 0.2, 0.3]) 1028 Coordinates: 1029 * x (x) <U1 'a' 'b' 'c' 1030 >>> array 1031 <xarray.DataArray (x: 3)> 1032 array([7, 2, 3]) 1033 Coordinates: 1034 * x (x) <U1 'a' 'b' 'c' 1035 1036 See Also 1037 -------- 1038 pandas.DataFrame.copy 1039 """ 1040 variable = self.variable.copy(deep=deep, data=data) 1041 coords = {k: v.copy(deep=deep) for k, v in self._coords.items()} 1042 if self._indexes is None: 1043 indexes = self._indexes 1044 else: 1045 indexes = {k: v.copy(deep=deep) for k, v in self._indexes.items()} 1046 return self._replace(variable, coords, indexes=indexes) 1047 1048 def __copy__(self) -> "DataArray": 1049 return self.copy(deep=False) 1050 1051 def __deepcopy__(self, memo=None) -> "DataArray": 1052 # memo does nothing but is required for compatibility with 1053 # copy.deepcopy 1054 return self.copy(deep=True) 1055 1056 # mutable objects should not be hashable 1057 # https://github.com/python/mypy/issues/4266 1058 __hash__ = None # type: ignore[assignment] 1059 1060 @property 1061 def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]: 1062 """ 1063 Tuple of block lengths for this dataarray's data, in order of dimensions, or None if 1064 the underlying data is not a dask array. 1065 1066 See Also 1067 -------- 1068 DataArray.chunk 1069 DataArray.chunksizes 1070 xarray.unify_chunks 1071 """ 1072 return self.variable.chunks 1073 1074 @property 1075 def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]: 1076 """ 1077 Mapping from dimension names to block lengths for this dataarray's data, or None if 1078 the underlying data is not a dask array. 1079 Cannot be modified directly, but can be modified by calling .chunk(). 1080 1081 Differs from DataArray.chunks because it returns a mapping of dimensions to chunk shapes 1082 instead of a tuple of chunk shapes. 1083 1084 See Also 1085 -------- 1086 DataArray.chunk 1087 DataArray.chunks 1088 xarray.unify_chunks 1089 """ 1090 all_variables = [self.variable] + [c.variable for c in self.coords.values()] 1091 return get_chunksizes(all_variables) 1092 1093 def chunk( 1094 self, 1095 chunks: Union[ 1096 int, 1097 Tuple[int, ...], 1098 Tuple[Tuple[int, ...], ...], 1099 Mapping[Any, Union[None, int, Tuple[int, ...]]], 1100 ] = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667) 1101 name_prefix: str = "xarray-", 1102 token: str = None, 1103 lock: bool = False, 1104 ) -> "DataArray": 1105 """Coerce this array's data into a dask arrays with the given chunks. 1106 1107 If this variable is a non-dask array, it will be converted to dask 1108 array. If it's a dask array, it will be rechunked to the given chunk 1109 sizes. 1110 1111 If neither chunks is not provided for one or more dimensions, chunk 1112 sizes along that dimension will not be updated; non-dask arrays will be 1113 converted into dask arrays with a single block. 1114 1115 Parameters 1116 ---------- 1117 chunks : int, tuple of int or mapping of hashable to int, optional 1118 Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or 1119 ``{'x': 5, 'y': 5}``. 1120 name_prefix : str, optional 1121 Prefix for the name of the new dask array. 1122 token : str, optional 1123 Token uniquely identifying this array. 1124 lock : optional 1125 Passed on to :py:func:`dask.array.from_array`, if the array is not 1126 already as dask array. 1127 1128 Returns 1129 ------- 1130 chunked : xarray.DataArray 1131 """ 1132 if isinstance(chunks, (tuple, list)): 1133 chunks = dict(zip(self.dims, chunks)) 1134 1135 ds = self._to_temp_dataset().chunk( 1136 chunks, name_prefix=name_prefix, token=token, lock=lock 1137 ) 1138 return self._from_temp_dataset(ds) 1139 1140 def isel( 1141 self, 1142 indexers: Mapping[Any, Any] = None, 1143 drop: bool = False, 1144 missing_dims: str = "raise", 1145 **indexers_kwargs: Any, 1146 ) -> "DataArray": 1147 """Return a new DataArray whose data is given by integer indexing 1148 along the specified dimension(s). 1149 1150 Parameters 1151 ---------- 1152 indexers : dict, optional 1153 A dict with keys matching dimensions and values given 1154 by integers, slice objects or arrays. 1155 indexer can be a integer, slice, array-like or DataArray. 1156 If DataArrays are passed as indexers, xarray-style indexing will be 1157 carried out. See :ref:`indexing` for the details. 1158 One of indexers or indexers_kwargs must be provided. 1159 drop : bool, optional 1160 If ``drop=True``, drop coordinates variables indexed by integers 1161 instead of making them scalar. 1162 missing_dims : {"raise", "warn", "ignore"}, default: "raise" 1163 What to do if dimensions that should be selected from are not present in the 1164 DataArray: 1165 - "raise": raise an exception 1166 - "warn": raise a warning, and ignore the missing dimensions 1167 - "ignore": ignore the missing dimensions 1168 **indexers_kwargs : {dim: indexer, ...}, optional 1169 The keyword arguments form of ``indexers``. 1170 1171 See Also 1172 -------- 1173 Dataset.isel 1174 DataArray.sel 1175 1176 Examples 1177 -------- 1178 >>> da = xr.DataArray(np.arange(25).reshape(5, 5), dims=("x", "y")) 1179 >>> da 1180 <xarray.DataArray (x: 5, y: 5)> 1181 array([[ 0, 1, 2, 3, 4], 1182 [ 5, 6, 7, 8, 9], 1183 [10, 11, 12, 13, 14], 1184 [15, 16, 17, 18, 19], 1185 [20, 21, 22, 23, 24]]) 1186 Dimensions without coordinates: x, y 1187 1188 >>> tgt_x = xr.DataArray(np.arange(0, 5), dims="points") 1189 >>> tgt_y = xr.DataArray(np.arange(0, 5), dims="points") 1190 >>> da = da.isel(x=tgt_x, y=tgt_y) 1191 >>> da 1192 <xarray.DataArray (points: 5)> 1193 array([ 0, 6, 12, 18, 24]) 1194 Dimensions without coordinates: points 1195 """ 1196 1197 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") 1198 1199 if any(is_fancy_indexer(idx) for idx in indexers.values()): 1200 ds = self._to_temp_dataset()._isel_fancy( 1201 indexers, drop=drop, missing_dims=missing_dims 1202 ) 1203 return self._from_temp_dataset(ds) 1204 1205 # Much faster algorithm for when all indexers are ints, slices, one-dimensional 1206 # lists, or zero or one-dimensional np.ndarray's 1207 1208 variable = self._variable.isel(indexers, missing_dims=missing_dims) 1209 1210 coords = {} 1211 for coord_name, coord_value in self._coords.items(): 1212 coord_indexers = { 1213 k: v for k, v in indexers.items() if k in coord_value.dims 1214 } 1215 if coord_indexers: 1216 coord_value = coord_value.isel(coord_indexers) 1217 if drop and coord_value.ndim == 0: 1218 continue 1219 coords[coord_name] = coord_value 1220 1221 return self._replace(variable=variable, coords=coords) 1222 1223 def sel( 1224 self, 1225 indexers: Mapping[Any, Any] = None, 1226 method: str = None, 1227 tolerance=None, 1228 drop: bool = False, 1229 **indexers_kwargs: Any, 1230 ) -> "DataArray": 1231 """Return a new DataArray whose data is given by selecting index 1232 labels along the specified dimension(s). 1233 1234 In contrast to `DataArray.isel`, indexers for this method should use 1235 labels instead of integers. 1236 1237 Under the hood, this method is powered by using pandas's powerful Index 1238 objects. This makes label based indexing essentially just as fast as 1239 using integer indexing. 1240 1241 It also means this method uses pandas's (well documented) logic for 1242 indexing. This means you can use string shortcuts for datetime indexes 1243 (e.g., '2000-01' to select all values in January 2000). It also means 1244 that slices are treated as inclusive of both the start and stop values, 1245 unlike normal Python indexing. 1246 1247 .. warning:: 1248 1249 Do not try to assign values when using any of the indexing methods 1250 ``isel`` or ``sel``:: 1251 1252 da = xr.DataArray([0, 1, 2, 3], dims=['x']) 1253 # DO NOT do this 1254 da.isel(x=[0, 1, 2])[1] = -1 1255 1256 Assigning values with the chained indexing using ``.sel`` or 1257 ``.isel`` fails silently. 1258 1259 Parameters 1260 ---------- 1261 indexers : dict, optional 1262 A dict with keys matching dimensions and values given 1263 by scalars, slices or arrays of tick labels. For dimensions with 1264 multi-index, the indexer may also be a dict-like object with keys 1265 matching index level names. 1266 If DataArrays are passed as indexers, xarray-style indexing will be 1267 carried out. See :ref:`indexing` for the details. 1268 One of indexers or indexers_kwargs must be provided. 1269 method : {None, "nearest", "pad", "ffill", "backfill", "bfill"}, optional 1270 Method to use for inexact matches: 1271 1272 * None (default): only exact matches 1273 * pad / ffill: propagate last valid index value forward 1274 * backfill / bfill: propagate next valid index value backward 1275 * nearest: use nearest valid index value 1276 tolerance : optional 1277 Maximum distance between original and new labels for inexact 1278 matches. The values of the index at the matching locations must 1279 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 1280 drop : bool, optional 1281 If ``drop=True``, drop coordinates variables in `indexers` instead 1282 of making them scalar. 1283 **indexers_kwargs : {dim: indexer, ...}, optional 1284 The keyword arguments form of ``indexers``. 1285 One of indexers or indexers_kwargs must be provided. 1286 1287 Returns 1288 ------- 1289 obj : DataArray 1290 A new DataArray with the same contents as this DataArray, except the 1291 data and each dimension is indexed by the appropriate indexers. 1292 If indexer DataArrays have coordinates that do not conflict with 1293 this object, then these coordinates will be attached. 1294 In general, each array's data will be a view of the array's data 1295 in this DataArray, unless vectorized indexing was triggered by using 1296 an array indexer, in which case the data will be a copy. 1297 1298 See Also 1299 -------- 1300 Dataset.sel 1301 DataArray.isel 1302 1303 Examples 1304 -------- 1305 >>> da = xr.DataArray( 1306 ... np.arange(25).reshape(5, 5), 1307 ... coords={"x": np.arange(5), "y": np.arange(5)}, 1308 ... dims=("x", "y"), 1309 ... ) 1310 >>> da 1311 <xarray.DataArray (x: 5, y: 5)> 1312 array([[ 0, 1, 2, 3, 4], 1313 [ 5, 6, 7, 8, 9], 1314 [10, 11, 12, 13, 14], 1315 [15, 16, 17, 18, 19], 1316 [20, 21, 22, 23, 24]]) 1317 Coordinates: 1318 * x (x) int64 0 1 2 3 4 1319 * y (y) int64 0 1 2 3 4 1320 1321 >>> tgt_x = xr.DataArray(np.linspace(0, 4, num=5), dims="points") 1322 >>> tgt_y = xr.DataArray(np.linspace(0, 4, num=5), dims="points") 1323 >>> da = da.sel(x=tgt_x, y=tgt_y, method="nearest") 1324 >>> da 1325 <xarray.DataArray (points: 5)> 1326 array([ 0, 6, 12, 18, 24]) 1327 Coordinates: 1328 x (points) int64 0 1 2 3 4 1329 y (points) int64 0 1 2 3 4 1330 Dimensions without coordinates: points 1331 """ 1332 ds = self._to_temp_dataset().sel( 1333 indexers=indexers, 1334 drop=drop, 1335 method=method, 1336 tolerance=tolerance, 1337 **indexers_kwargs, 1338 ) 1339 return self._from_temp_dataset(ds) 1340 1341 def head( 1342 self, 1343 indexers: Union[Mapping[Any, int], int] = None, 1344 **indexers_kwargs: Any, 1345 ) -> "DataArray": 1346 """Return a new DataArray whose data is given by the the first `n` 1347 values along the specified dimension(s). Default `n` = 5 1348 1349 See Also 1350 -------- 1351 Dataset.head 1352 DataArray.tail 1353 DataArray.thin 1354 """ 1355 ds = self._to_temp_dataset().head(indexers, **indexers_kwargs) 1356 return self._from_temp_dataset(ds) 1357 1358 def tail( 1359 self, 1360 indexers: Union[Mapping[Any, int], int] = None, 1361 **indexers_kwargs: Any, 1362 ) -> "DataArray": 1363 """Return a new DataArray whose data is given by the the last `n` 1364 values along the specified dimension(s). Default `n` = 5 1365 1366 See Also 1367 -------- 1368 Dataset.tail 1369 DataArray.head 1370 DataArray.thin 1371 """ 1372 ds = self._to_temp_dataset().tail(indexers, **indexers_kwargs) 1373 return self._from_temp_dataset(ds) 1374 1375 def thin( 1376 self, 1377 indexers: Union[Mapping[Any, int], int] = None, 1378 **indexers_kwargs: Any, 1379 ) -> "DataArray": 1380 """Return a new DataArray whose data is given by each `n` value 1381 along the specified dimension(s). 1382 1383 See Also 1384 -------- 1385 Dataset.thin 1386 DataArray.head 1387 DataArray.tail 1388 """ 1389 ds = self._to_temp_dataset().thin(indexers, **indexers_kwargs) 1390 return self._from_temp_dataset(ds) 1391 1392 def broadcast_like( 1393 self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None 1394 ) -> "DataArray": 1395 """Broadcast this DataArray against another Dataset or DataArray. 1396 1397 This is equivalent to xr.broadcast(other, self)[1] 1398 1399 xarray objects are broadcast against each other in arithmetic 1400 operations, so this method is not be necessary for most uses. 1401 1402 If no change is needed, the input data is returned to the output 1403 without being copied. 1404 1405 If new coords are added by the broadcast, their values are 1406 NaN filled. 1407 1408 Parameters 1409 ---------- 1410 other : Dataset or DataArray 1411 Object against which to broadcast this array. 1412 exclude : iterable of hashable, optional 1413 Dimensions that must not be broadcasted 1414 1415 Returns 1416 ------- 1417 new_da : DataArray 1418 The caller broadcasted against ``other``. 1419 1420 Examples 1421 -------- 1422 >>> arr1 = xr.DataArray( 1423 ... np.random.randn(2, 3), 1424 ... dims=("x", "y"), 1425 ... coords={"x": ["a", "b"], "y": ["a", "b", "c"]}, 1426 ... ) 1427 >>> arr2 = xr.DataArray( 1428 ... np.random.randn(3, 2), 1429 ... dims=("x", "y"), 1430 ... coords={"x": ["a", "b", "c"], "y": ["a", "b"]}, 1431 ... ) 1432 >>> arr1 1433 <xarray.DataArray (x: 2, y: 3)> 1434 array([[ 1.76405235, 0.40015721, 0.97873798], 1435 [ 2.2408932 , 1.86755799, -0.97727788]]) 1436 Coordinates: 1437 * x (x) <U1 'a' 'b' 1438 * y (y) <U1 'a' 'b' 'c' 1439 >>> arr2 1440 <xarray.DataArray (x: 3, y: 2)> 1441 array([[ 0.95008842, -0.15135721], 1442 [-0.10321885, 0.4105985 ], 1443 [ 0.14404357, 1.45427351]]) 1444 Coordinates: 1445 * x (x) <U1 'a' 'b' 'c' 1446 * y (y) <U1 'a' 'b' 1447 >>> arr1.broadcast_like(arr2) 1448 <xarray.DataArray (x: 3, y: 3)> 1449 array([[ 1.76405235, 0.40015721, 0.97873798], 1450 [ 2.2408932 , 1.86755799, -0.97727788], 1451 [ nan, nan, nan]]) 1452 Coordinates: 1453 * x (x) <U1 'a' 'b' 'c' 1454 * y (y) <U1 'a' 'b' 'c' 1455 """ 1456 if exclude is None: 1457 exclude = set() 1458 else: 1459 exclude = set(exclude) 1460 args = align(other, self, join="outer", copy=False, exclude=exclude) 1461 1462 dims_map, common_coords = _get_broadcast_dims_map_common_coords(args, exclude) 1463 1464 return _broadcast_helper(args[1], exclude, dims_map, common_coords) 1465 1466 def reindex_like( 1467 self, 1468 other: Union["DataArray", Dataset], 1469 method: str = None, 1470 tolerance=None, 1471 copy: bool = True, 1472 fill_value=dtypes.NA, 1473 ) -> "DataArray": 1474 """Conform this object onto the indexes of another object, filling in 1475 missing values with ``fill_value``. The default fill value is NaN. 1476 1477 Parameters 1478 ---------- 1479 other : Dataset or DataArray 1480 Object with an 'indexes' attribute giving a mapping from dimension 1481 names to pandas.Index objects, which provides coordinates upon 1482 which to index the variables in this dataset. The indexes on this 1483 other object need not be the same as the indexes on this 1484 dataset. Any mis-matched index values will be filled in with 1485 NaN, and any mis-matched dimension names will simply be ignored. 1486 method : {None, "nearest", "pad", "ffill", "backfill", "bfill"}, optional 1487 Method to use for filling index values from other not found on this 1488 data array: 1489 1490 * None (default): don't fill gaps 1491 * pad / ffill: propagate last valid index value forward 1492 * backfill / bfill: propagate next valid index value backward 1493 * nearest: use nearest valid index value 1494 tolerance : optional 1495 Maximum distance between original and new labels for inexact 1496 matches. The values of the index at the matching locations must 1497 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 1498 copy : bool, optional 1499 If ``copy=True``, data in the return value is always copied. If 1500 ``copy=False`` and reindexing is unnecessary, or can be performed 1501 with only slice operations, then the output may share memory with 1502 the input. In either case, a new xarray object is always returned. 1503 fill_value : scalar or dict-like, optional 1504 Value to use for newly missing values. If a dict-like, maps 1505 variable names (including coordinates) to fill values. Use this 1506 data array's name to refer to the data array's values. 1507 1508 Returns 1509 ------- 1510 reindexed : DataArray 1511 Another dataset array, with this array's data but coordinates from 1512 the other object. 1513 1514 See Also 1515 -------- 1516 DataArray.reindex 1517 align 1518 """ 1519 indexers = reindex_like_indexers(self, other) 1520 return self.reindex( 1521 indexers=indexers, 1522 method=method, 1523 tolerance=tolerance, 1524 copy=copy, 1525 fill_value=fill_value, 1526 ) 1527 1528 def reindex( 1529 self, 1530 indexers: Mapping[Any, Any] = None, 1531 method: str = None, 1532 tolerance=None, 1533 copy: bool = True, 1534 fill_value=dtypes.NA, 1535 **indexers_kwargs: Any, 1536 ) -> "DataArray": 1537 """Conform this object onto the indexes of another object, filling in 1538 missing values with ``fill_value``. The default fill value is NaN. 1539 1540 Parameters 1541 ---------- 1542 indexers : dict, optional 1543 Dictionary with keys given by dimension names and values given by 1544 arrays of coordinates tick labels. Any mis-matched coordinate 1545 values will be filled in with NaN, and any mis-matched dimension 1546 names will simply be ignored. 1547 One of indexers or indexers_kwargs must be provided. 1548 copy : bool, optional 1549 If ``copy=True``, data in the return value is always copied. If 1550 ``copy=False`` and reindexing is unnecessary, or can be performed 1551 with only slice operations, then the output may share memory with 1552 the input. In either case, a new xarray object is always returned. 1553 method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional 1554 Method to use for filling index values in ``indexers`` not found on 1555 this data array: 1556 1557 * None (default): don't fill gaps 1558 * pad / ffill: propagate last valid index value forward 1559 * backfill / bfill: propagate next valid index value backward 1560 * nearest: use nearest valid index value 1561 tolerance : optional 1562 Maximum distance between original and new labels for inexact 1563 matches. The values of the index at the matching locations must 1564 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 1565 fill_value : scalar or dict-like, optional 1566 Value to use for newly missing values. If a dict-like, maps 1567 variable names (including coordinates) to fill values. Use this 1568 data array's name to refer to the data array's values. 1569 **indexers_kwargs : {dim: indexer, ...}, optional 1570 The keyword arguments form of ``indexers``. 1571 One of indexers or indexers_kwargs must be provided. 1572 1573 Returns 1574 ------- 1575 reindexed : DataArray 1576 Another dataset array, with this array's data but replaced 1577 coordinates. 1578 1579 Examples 1580 -------- 1581 Reverse latitude: 1582 1583 >>> da = xr.DataArray( 1584 ... np.arange(4), 1585 ... coords=[np.array([90, 89, 88, 87])], 1586 ... dims="lat", 1587 ... ) 1588 >>> da 1589 <xarray.DataArray (lat: 4)> 1590 array([0, 1, 2, 3]) 1591 Coordinates: 1592 * lat (lat) int64 90 89 88 87 1593 >>> da.reindex(lat=da.lat[::-1]) 1594 <xarray.DataArray (lat: 4)> 1595 array([3, 2, 1, 0]) 1596 Coordinates: 1597 * lat (lat) int64 87 88 89 90 1598 1599 See Also 1600 -------- 1601 DataArray.reindex_like 1602 align 1603 """ 1604 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") 1605 if isinstance(fill_value, dict): 1606 fill_value = fill_value.copy() 1607 sentinel = object() 1608 value = fill_value.pop(self.name, sentinel) 1609 if value is not sentinel: 1610 fill_value[_THIS_ARRAY] = value 1611 1612 ds = self._to_temp_dataset().reindex( 1613 indexers=indexers, 1614 method=method, 1615 tolerance=tolerance, 1616 copy=copy, 1617 fill_value=fill_value, 1618 ) 1619 return self._from_temp_dataset(ds) 1620 1621 def interp( 1622 self, 1623 coords: Mapping[Any, Any] = None, 1624 method: str = "linear", 1625 assume_sorted: bool = False, 1626 kwargs: Mapping[str, Any] = None, 1627 **coords_kwargs: Any, 1628 ) -> "DataArray": 1629 """Multidimensional interpolation of variables. 1630 1631 Parameters 1632 ---------- 1633 coords : dict, optional 1634 Mapping from dimension names to the new coordinates. 1635 New coordinate can be an scalar, array-like or DataArray. 1636 If DataArrays are passed as new coordinates, their dimensions are 1637 used for the broadcasting. Missing values are skipped. 1638 method : str, default: "linear" 1639 The method used to interpolate. Choose from 1640 1641 - {"linear", "nearest"} for multidimensional array, 1642 - {"linear", "nearest", "zero", "slinear", "quadratic", "cubic"} for 1-dimensional array. 1643 assume_sorted : bool, optional 1644 If False, values of x can be in any order and they are sorted 1645 first. If True, x has to be an array of monotonically increasing 1646 values. 1647 kwargs : dict 1648 Additional keyword arguments passed to scipy's interpolator. Valid 1649 options and their behavior depend on if 1-dimensional or 1650 multi-dimensional interpolation is used. 1651 **coords_kwargs : {dim: coordinate, ...}, optional 1652 The keyword arguments form of ``coords``. 1653 One of coords or coords_kwargs must be provided. 1654 1655 Returns 1656 ------- 1657 interpolated : DataArray 1658 New dataarray on the new coordinates. 1659 1660 Notes 1661 ----- 1662 scipy is required. 1663 1664 See Also 1665 -------- 1666 scipy.interpolate.interp1d 1667 scipy.interpolate.interpn 1668 1669 Examples 1670 -------- 1671 >>> da = xr.DataArray( 1672 ... data=[[1, 4, 2, 9], [2, 7, 6, np.nan], [6, np.nan, 5, 8]], 1673 ... dims=("x", "y"), 1674 ... coords={"x": [0, 1, 2], "y": [10, 12, 14, 16]}, 1675 ... ) 1676 >>> da 1677 <xarray.DataArray (x: 3, y: 4)> 1678 array([[ 1., 4., 2., 9.], 1679 [ 2., 7., 6., nan], 1680 [ 6., nan, 5., 8.]]) 1681 Coordinates: 1682 * x (x) int64 0 1 2 1683 * y (y) int64 10 12 14 16 1684 1685 1D linear interpolation (the default): 1686 1687 >>> da.interp(x=[0, 0.75, 1.25, 1.75]) 1688 <xarray.DataArray (x: 4, y: 4)> 1689 array([[1. , 4. , 2. , nan], 1690 [1.75, 6.25, 5. , nan], 1691 [3. , nan, 5.75, nan], 1692 [5. , nan, 5.25, nan]]) 1693 Coordinates: 1694 * y (y) int64 10 12 14 16 1695 * x (x) float64 0.0 0.75 1.25 1.75 1696 1697 1D nearest interpolation: 1698 1699 >>> da.interp(x=[0, 0.75, 1.25, 1.75], method="nearest") 1700 <xarray.DataArray (x: 4, y: 4)> 1701 array([[ 1., 4., 2., 9.], 1702 [ 2., 7., 6., nan], 1703 [ 2., 7., 6., nan], 1704 [ 6., nan, 5., 8.]]) 1705 Coordinates: 1706 * y (y) int64 10 12 14 16 1707 * x (x) float64 0.0 0.75 1.25 1.75 1708 1709 1D linear extrapolation: 1710 1711 >>> da.interp( 1712 ... x=[1, 1.5, 2.5, 3.5], 1713 ... method="linear", 1714 ... kwargs={"fill_value": "extrapolate"}, 1715 ... ) 1716 <xarray.DataArray (x: 4, y: 4)> 1717 array([[ 2. , 7. , 6. , nan], 1718 [ 4. , nan, 5.5, nan], 1719 [ 8. , nan, 4.5, nan], 1720 [12. , nan, 3.5, nan]]) 1721 Coordinates: 1722 * y (y) int64 10 12 14 16 1723 * x (x) float64 1.0 1.5 2.5 3.5 1724 1725 2D linear interpolation: 1726 1727 >>> da.interp(x=[0, 0.75, 1.25, 1.75], y=[11, 13, 15], method="linear") 1728 <xarray.DataArray (x: 4, y: 3)> 1729 array([[2.5 , 3. , nan], 1730 [4. , 5.625, nan], 1731 [ nan, nan, nan], 1732 [ nan, nan, nan]]) 1733 Coordinates: 1734 * x (x) float64 0.0 0.75 1.25 1.75 1735 * y (y) int64 11 13 15 1736 """ 1737 if self.dtype.kind not in "uifc": 1738 raise TypeError( 1739 "interp only works for a numeric type array. " 1740 "Given {}.".format(self.dtype) 1741 ) 1742 ds = self._to_temp_dataset().interp( 1743 coords, 1744 method=method, 1745 kwargs=kwargs, 1746 assume_sorted=assume_sorted, 1747 **coords_kwargs, 1748 ) 1749 return self._from_temp_dataset(ds) 1750 1751 def interp_like( 1752 self, 1753 other: Union["DataArray", Dataset], 1754 method: str = "linear", 1755 assume_sorted: bool = False, 1756 kwargs: Mapping[str, Any] = None, 1757 ) -> "DataArray": 1758 """Interpolate this object onto the coordinates of another object, 1759 filling out of range values with NaN. 1760 1761 Parameters 1762 ---------- 1763 other : Dataset or DataArray 1764 Object with an 'indexes' attribute giving a mapping from dimension 1765 names to an 1d array-like, which provides coordinates upon 1766 which to index the variables in this dataset. Missing values are skipped. 1767 method : str, default: "linear" 1768 The method used to interpolate. Choose from 1769 1770 - {"linear", "nearest"} for multidimensional array, 1771 - {"linear", "nearest", "zero", "slinear", "quadratic", "cubic"} for 1-dimensional array. 1772 assume_sorted : bool, optional 1773 If False, values of coordinates that are interpolated over can be 1774 in any order and they are sorted first. If True, interpolated 1775 coordinates are assumed to be an array of monotonically increasing 1776 values. 1777 kwargs : dict, optional 1778 Additional keyword passed to scipy's interpolator. 1779 1780 Returns 1781 ------- 1782 interpolated : DataArray 1783 Another dataarray by interpolating this dataarray's data along the 1784 coordinates of the other object. 1785 1786 Notes 1787 ----- 1788 scipy is required. 1789 If the dataarray has object-type coordinates, reindex is used for these 1790 coordinates instead of the interpolation. 1791 1792 See Also 1793 -------- 1794 DataArray.interp 1795 DataArray.reindex_like 1796 """ 1797 if self.dtype.kind not in "uifc": 1798 raise TypeError( 1799 "interp only works for a numeric type array. " 1800 "Given {}.".format(self.dtype) 1801 ) 1802 ds = self._to_temp_dataset().interp_like( 1803 other, method=method, kwargs=kwargs, assume_sorted=assume_sorted 1804 ) 1805 return self._from_temp_dataset(ds) 1806 1807 def rename( 1808 self, 1809 new_name_or_name_dict: Union[Hashable, Mapping[Any, Hashable]] = None, 1810 **names: Hashable, 1811 ) -> "DataArray": 1812 """Returns a new DataArray with renamed coordinates or a new name. 1813 1814 Parameters 1815 ---------- 1816 new_name_or_name_dict : str or dict-like, optional 1817 If the argument is dict-like, it used as a mapping from old 1818 names to new names for coordinates. Otherwise, use the argument 1819 as the new name for this array. 1820 **names : hashable, optional 1821 The keyword arguments form of a mapping from old names to 1822 new names for coordinates. 1823 One of new_name_or_name_dict or names must be provided. 1824 1825 Returns 1826 ------- 1827 renamed : DataArray 1828 Renamed array or array with renamed coordinates. 1829 1830 See Also 1831 -------- 1832 Dataset.rename 1833 DataArray.swap_dims 1834 """ 1835 if names or utils.is_dict_like(new_name_or_name_dict): 1836 new_name_or_name_dict = cast( 1837 Mapping[Hashable, Hashable], new_name_or_name_dict 1838 ) 1839 name_dict = either_dict_or_kwargs(new_name_or_name_dict, names, "rename") 1840 dataset = self._to_temp_dataset().rename(name_dict) 1841 return self._from_temp_dataset(dataset) 1842 else: 1843 new_name_or_name_dict = cast(Hashable, new_name_or_name_dict) 1844 return self._replace(name=new_name_or_name_dict) 1845 1846 def swap_dims( 1847 self, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs 1848 ) -> "DataArray": 1849 """Returns a new DataArray with swapped dimensions. 1850 1851 Parameters 1852 ---------- 1853 dims_dict : dict-like 1854 Dictionary whose keys are current dimension names and whose values 1855 are new names. 1856 **dims_kwargs : {existing_dim: new_dim, ...}, optional 1857 The keyword arguments form of ``dims_dict``. 1858 One of dims_dict or dims_kwargs must be provided. 1859 1860 Returns 1861 ------- 1862 swapped : DataArray 1863 DataArray with swapped dimensions. 1864 1865 Examples 1866 -------- 1867 >>> arr = xr.DataArray( 1868 ... data=[0, 1], 1869 ... dims="x", 1870 ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}, 1871 ... ) 1872 >>> arr 1873 <xarray.DataArray (x: 2)> 1874 array([0, 1]) 1875 Coordinates: 1876 * x (x) <U1 'a' 'b' 1877 y (x) int64 0 1 1878 1879 >>> arr.swap_dims({"x": "y"}) 1880 <xarray.DataArray (y: 2)> 1881 array([0, 1]) 1882 Coordinates: 1883 x (y) <U1 'a' 'b' 1884 * y (y) int64 0 1 1885 1886 >>> arr.swap_dims({"x": "z"}) 1887 <xarray.DataArray (z: 2)> 1888 array([0, 1]) 1889 Coordinates: 1890 x (z) <U1 'a' 'b' 1891 y (z) int64 0 1 1892 Dimensions without coordinates: z 1893 1894 See Also 1895 -------- 1896 DataArray.rename 1897 Dataset.swap_dims 1898 """ 1899 dims_dict = either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") 1900 ds = self._to_temp_dataset().swap_dims(dims_dict) 1901 return self._from_temp_dataset(ds) 1902 1903 def expand_dims( 1904 self, 1905 dim: Union[None, Hashable, Sequence[Hashable], Mapping[Any, Any]] = None, 1906 axis=None, 1907 **dim_kwargs: Any, 1908 ) -> "DataArray": 1909 """Return a new object with an additional axis (or axes) inserted at 1910 the corresponding position in the array shape. The new object is a 1911 view into the underlying array, not a copy. 1912 1913 If dim is already a scalar coordinate, it will be promoted to a 1D 1914 coordinate consisting of a single value. 1915 1916 Parameters 1917 ---------- 1918 dim : hashable, sequence of hashable, dict, or None, optional 1919 Dimensions to include on the new variable. 1920 If provided as str or sequence of str, then dimensions are inserted 1921 with length 1. If provided as a dict, then the keys are the new 1922 dimensions and the values are either integers (giving the length of 1923 the new dimensions) or sequence/ndarray (giving the coordinates of 1924 the new dimensions). 1925 axis : int, list of int or tuple of int, or None, default: None 1926 Axis position(s) where new axis is to be inserted (position(s) on 1927 the result array). If a list (or tuple) of integers is passed, 1928 multiple axes are inserted. In this case, dim arguments should be 1929 same length list. If axis=None is passed, all the axes will be 1930 inserted to the start of the result array. 1931 **dim_kwargs : int or sequence or ndarray 1932 The keywords are arbitrary dimensions being inserted and the values 1933 are either the lengths of the new dims (if int is given), or their 1934 coordinates. Note, this is an alternative to passing a dict to the 1935 dim kwarg and will only be used if dim is None. 1936 1937 Returns 1938 ------- 1939 expanded : same type as caller 1940 This object, but with an additional dimension(s). 1941 """ 1942 if isinstance(dim, int): 1943 raise TypeError("dim should be hashable or sequence/mapping of hashables") 1944 elif isinstance(dim, Sequence) and not isinstance(dim, str): 1945 if len(dim) != len(set(dim)): 1946 raise ValueError("dims should not contain duplicate values.") 1947 dim = dict.fromkeys(dim, 1) 1948 elif dim is not None and not isinstance(dim, Mapping): 1949 dim = {cast(Hashable, dim): 1} 1950 1951 dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") 1952 ds = self._to_temp_dataset().expand_dims(dim, axis) 1953 return self._from_temp_dataset(ds) 1954 1955 def set_index( 1956 self, 1957 indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]] = None, 1958 append: bool = False, 1959 **indexes_kwargs: Union[Hashable, Sequence[Hashable]], 1960 ) -> "DataArray": 1961 """Set DataArray (multi-)indexes using one or more existing 1962 coordinates. 1963 1964 Parameters 1965 ---------- 1966 indexes : {dim: index, ...} 1967 Mapping from names matching dimensions and values given 1968 by (lists of) the names of existing coordinates or variables to set 1969 as new (multi-)index. 1970 append : bool, optional 1971 If True, append the supplied index(es) to the existing index(es). 1972 Otherwise replace the existing index(es) (default). 1973 **indexes_kwargs : optional 1974 The keyword arguments form of ``indexes``. 1975 One of indexes or indexes_kwargs must be provided. 1976 1977 Returns 1978 ------- 1979 obj : DataArray 1980 Another DataArray, with this data but replaced coordinates. 1981 1982 Examples 1983 -------- 1984 >>> arr = xr.DataArray( 1985 ... data=np.ones((2, 3)), 1986 ... dims=["x", "y"], 1987 ... coords={"x": range(2), "y": range(3), "a": ("x", [3, 4])}, 1988 ... ) 1989 >>> arr 1990 <xarray.DataArray (x: 2, y: 3)> 1991 array([[1., 1., 1.], 1992 [1., 1., 1.]]) 1993 Coordinates: 1994 * x (x) int64 0 1 1995 * y (y) int64 0 1 2 1996 a (x) int64 3 4 1997 >>> arr.set_index(x="a") 1998 <xarray.DataArray (x: 2, y: 3)> 1999 array([[1., 1., 1.], 2000 [1., 1., 1.]]) 2001 Coordinates: 2002 * x (x) int64 3 4 2003 * y (y) int64 0 1 2 2004 2005 See Also 2006 -------- 2007 DataArray.reset_index 2008 """ 2009 ds = self._to_temp_dataset().set_index(indexes, append=append, **indexes_kwargs) 2010 return self._from_temp_dataset(ds) 2011 2012 def reset_index( 2013 self, 2014 dims_or_levels: Union[Hashable, Sequence[Hashable]], 2015 drop: bool = False, 2016 ) -> "DataArray": 2017 """Reset the specified index(es) or multi-index level(s). 2018 2019 Parameters 2020 ---------- 2021 dims_or_levels : hashable or sequence of hashable 2022 Name(s) of the dimension(s) and/or multi-index level(s) that will 2023 be reset. 2024 drop : bool, optional 2025 If True, remove the specified indexes and/or multi-index levels 2026 instead of extracting them as new coordinates (default: False). 2027 2028 Returns 2029 ------- 2030 obj : DataArray 2031 Another dataarray, with this dataarray's data but replaced 2032 coordinates. 2033 2034 See Also 2035 -------- 2036 DataArray.set_index 2037 """ 2038 coords, _ = split_indexes( 2039 dims_or_levels, self._coords, set(), self._level_coords, drop=drop 2040 ) 2041 return self._replace(coords=coords) 2042 2043 def reorder_levels( 2044 self, 2045 dim_order: Mapping[Any, Sequence[int]] = None, 2046 **dim_order_kwargs: Sequence[int], 2047 ) -> "DataArray": 2048 """Rearrange index levels using input order. 2049 2050 Parameters 2051 ---------- 2052 dim_order : optional 2053 Mapping from names matching dimensions and values given 2054 by lists representing new level orders. Every given dimension 2055 must have a multi-index. 2056 **dim_order_kwargs : optional 2057 The keyword arguments form of ``dim_order``. 2058 One of dim_order or dim_order_kwargs must be provided. 2059 2060 Returns 2061 ------- 2062 obj : DataArray 2063 Another dataarray, with this dataarray's data but replaced 2064 coordinates. 2065 """ 2066 dim_order = either_dict_or_kwargs(dim_order, dim_order_kwargs, "reorder_levels") 2067 replace_coords = {} 2068 for dim, order in dim_order.items(): 2069 coord = self._coords[dim] 2070 index = coord.to_index() 2071 if not isinstance(index, pd.MultiIndex): 2072 raise ValueError(f"coordinate {dim!r} has no MultiIndex") 2073 replace_coords[dim] = IndexVariable(coord.dims, index.reorder_levels(order)) 2074 coords = self._coords.copy() 2075 coords.update(replace_coords) 2076 return self._replace(coords=coords) 2077 2078 def stack( 2079 self, 2080 dimensions: Mapping[Any, Sequence[Hashable]] = None, 2081 **dimensions_kwargs: Sequence[Hashable], 2082 ) -> "DataArray": 2083 """ 2084 Stack any number of existing dimensions into a single new dimension. 2085 2086 New dimensions will be added at the end, and the corresponding 2087 coordinate variables will be combined into a MultiIndex. 2088 2089 Parameters 2090 ---------- 2091 dimensions : mapping of hashable to sequence of hashable 2092 Mapping of the form `new_name=(dim1, dim2, ...)`. 2093 Names of new dimensions, and the existing dimensions that they 2094 replace. An ellipsis (`...`) will be replaced by all unlisted dimensions. 2095 Passing a list containing an ellipsis (`stacked_dim=[...]`) will stack over 2096 all dimensions. 2097 **dimensions_kwargs 2098 The keyword arguments form of ``dimensions``. 2099 One of dimensions or dimensions_kwargs must be provided. 2100 2101 Returns 2102 ------- 2103 stacked : DataArray 2104 DataArray with stacked data. 2105 2106 Examples 2107 -------- 2108 >>> arr = xr.DataArray( 2109 ... np.arange(6).reshape(2, 3), 2110 ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], 2111 ... ) 2112 >>> arr 2113 <xarray.DataArray (x: 2, y: 3)> 2114 array([[0, 1, 2], 2115 [3, 4, 5]]) 2116 Coordinates: 2117 * x (x) <U1 'a' 'b' 2118 * y (y) int64 0 1 2 2119 >>> stacked = arr.stack(z=("x", "y")) 2120 >>> stacked.indexes["z"] 2121 MultiIndex([('a', 0), 2122 ('a', 1), 2123 ('a', 2), 2124 ('b', 0), 2125 ('b', 1), 2126 ('b', 2)], 2127 names=['x', 'y']) 2128 2129 See Also 2130 -------- 2131 DataArray.unstack 2132 """ 2133 ds = self._to_temp_dataset().stack(dimensions, **dimensions_kwargs) 2134 return self._from_temp_dataset(ds) 2135 2136 def unstack( 2137 self, 2138 dim: Union[Hashable, Sequence[Hashable], None] = None, 2139 fill_value: Any = dtypes.NA, 2140 sparse: bool = False, 2141 ) -> "DataArray": 2142 """ 2143 Unstack existing dimensions corresponding to MultiIndexes into 2144 multiple new dimensions. 2145 2146 New dimensions will be added at the end. 2147 2148 Parameters 2149 ---------- 2150 dim : hashable or sequence of hashable, optional 2151 Dimension(s) over which to unstack. By default unstacks all 2152 MultiIndexes. 2153 fill_value : scalar or dict-like, default: nan 2154 value to be filled. If a dict-like, maps variable names to 2155 fill values. Use the data array's name to refer to its 2156 name. If not provided or if the dict-like does not contain 2157 all variables, the dtype's NA value will be used. 2158 sparse : bool, default: False 2159 use sparse-array if True 2160 2161 Returns 2162 ------- 2163 unstacked : DataArray 2164 Array with unstacked data. 2165 2166 Examples 2167 -------- 2168 >>> arr = xr.DataArray( 2169 ... np.arange(6).reshape(2, 3), 2170 ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], 2171 ... ) 2172 >>> arr 2173 <xarray.DataArray (x: 2, y: 3)> 2174 array([[0, 1, 2], 2175 [3, 4, 5]]) 2176 Coordinates: 2177 * x (x) <U1 'a' 'b' 2178 * y (y) int64 0 1 2 2179 >>> stacked = arr.stack(z=("x", "y")) 2180 >>> stacked.indexes["z"] 2181 MultiIndex([('a', 0), 2182 ('a', 1), 2183 ('a', 2), 2184 ('b', 0), 2185 ('b', 1), 2186 ('b', 2)], 2187 names=['x', 'y']) 2188 >>> roundtripped = stacked.unstack() 2189 >>> arr.identical(roundtripped) 2190 True 2191 2192 See Also 2193 -------- 2194 DataArray.stack 2195 """ 2196 ds = self._to_temp_dataset().unstack(dim, fill_value, sparse) 2197 return self._from_temp_dataset(ds) 2198 2199 def to_unstacked_dataset(self, dim, level=0): 2200 """Unstack DataArray expanding to Dataset along a given level of a 2201 stacked coordinate. 2202 2203 This is the inverse operation of Dataset.to_stacked_array. 2204 2205 Parameters 2206 ---------- 2207 dim : str 2208 Name of existing dimension to unstack 2209 level : int or str 2210 The MultiIndex level to expand to a dataset along. Can either be 2211 the integer index of the level or its name. 2212 2213 Returns 2214 ------- 2215 unstacked: Dataset 2216 2217 Examples 2218 -------- 2219 >>> arr = xr.DataArray( 2220 ... np.arange(6).reshape(2, 3), 2221 ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], 2222 ... ) 2223 >>> data = xr.Dataset({"a": arr, "b": arr.isel(y=0)}) 2224 >>> data 2225 <xarray.Dataset> 2226 Dimensions: (x: 2, y: 3) 2227 Coordinates: 2228 * x (x) <U1 'a' 'b' 2229 * y (y) int64 0 1 2 2230 Data variables: 2231 a (x, y) int64 0 1 2 3 4 5 2232 b (x) int64 0 3 2233 >>> stacked = data.to_stacked_array("z", ["x"]) 2234 >>> stacked.indexes["z"] 2235 MultiIndex([('a', 0.0), 2236 ('a', 1.0), 2237 ('a', 2.0), 2238 ('b', nan)], 2239 names=['variable', 'y']) 2240 >>> roundtripped = stacked.to_unstacked_dataset(dim="z") 2241 >>> data.identical(roundtripped) 2242 True 2243 2244 See Also 2245 -------- 2246 Dataset.to_stacked_array 2247 """ 2248 2249 # TODO: benbovy - flexible indexes: update when MultIndex has its own 2250 # class inheriting from xarray.Index 2251 idx = self.xindexes[dim].to_pandas_index() 2252 if not isinstance(idx, pd.MultiIndex): 2253 raise ValueError(f"'{dim}' is not a stacked coordinate") 2254 2255 level_number = idx._get_level_number(level) 2256 variables = idx.levels[level_number] 2257 variable_dim = idx.names[level_number] 2258 2259 # pull variables out of datarray 2260 data_dict = {} 2261 for k in variables: 2262 data_dict[k] = self.sel({variable_dim: k}, drop=True).squeeze(drop=True) 2263 2264 # unstacked dataset 2265 return Dataset(data_dict) 2266 2267 def transpose( 2268 self, 2269 *dims: Hashable, 2270 transpose_coords: bool = True, 2271 missing_dims: str = "raise", 2272 ) -> "DataArray": 2273 """Return a new DataArray object with transposed dimensions. 2274 2275 Parameters 2276 ---------- 2277 *dims : hashable, optional 2278 By default, reverse the dimensions. Otherwise, reorder the 2279 dimensions to this order. 2280 transpose_coords : bool, default: True 2281 If True, also transpose the coordinates of this DataArray. 2282 missing_dims : {"raise", "warn", "ignore"}, default: "raise" 2283 What to do if dimensions that should be selected from are not present in the 2284 DataArray: 2285 - "raise": raise an exception 2286 - "warn": raise a warning, and ignore the missing dimensions 2287 - "ignore": ignore the missing dimensions 2288 2289 Returns 2290 ------- 2291 transposed : DataArray 2292 The returned DataArray's array is transposed. 2293 2294 Notes 2295 ----- 2296 This operation returns a view of this array's data. It is 2297 lazy for dask-backed DataArrays but not for numpy-backed DataArrays 2298 -- the data will be fully loaded. 2299 2300 See Also 2301 -------- 2302 numpy.transpose 2303 Dataset.transpose 2304 """ 2305 if dims: 2306 dims = tuple(utils.infix_dims(dims, self.dims, missing_dims)) 2307 variable = self.variable.transpose(*dims) 2308 if transpose_coords: 2309 coords: Dict[Hashable, Variable] = {} 2310 for name, coord in self.coords.items(): 2311 coord_dims = tuple(dim for dim in dims if dim in coord.dims) 2312 coords[name] = coord.variable.transpose(*coord_dims) 2313 return self._replace(variable, coords) 2314 else: 2315 return self._replace(variable) 2316 2317 @property 2318 def T(self) -> "DataArray": 2319 return self.transpose() 2320 2321 def drop_vars( 2322 self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" 2323 ) -> "DataArray": 2324 """Returns an array with dropped variables. 2325 2326 Parameters 2327 ---------- 2328 names : hashable or iterable of hashable 2329 Name(s) of variables to drop. 2330 errors : {"raise", "ignore"}, optional 2331 If 'raise' (default), raises a ValueError error if any of the variable 2332 passed are not in the dataset. If 'ignore', any given names that are in the 2333 DataArray are dropped and no error is raised. 2334 2335 Returns 2336 ------- 2337 dropped : Dataset 2338 New Dataset copied from `self` with variables removed. 2339 """ 2340 ds = self._to_temp_dataset().drop_vars(names, errors=errors) 2341 return self._from_temp_dataset(ds) 2342 2343 def drop( 2344 self, 2345 labels: Mapping = None, 2346 dim: Hashable = None, 2347 *, 2348 errors: str = "raise", 2349 **labels_kwargs, 2350 ) -> "DataArray": 2351 """Backward compatible method based on `drop_vars` and `drop_sel` 2352 2353 Using either `drop_vars` or `drop_sel` is encouraged 2354 2355 See Also 2356 -------- 2357 DataArray.drop_vars 2358 DataArray.drop_sel 2359 """ 2360 ds = self._to_temp_dataset().drop(labels, dim, errors=errors) 2361 return self._from_temp_dataset(ds) 2362 2363 def drop_sel( 2364 self, 2365 labels: Mapping[Any, Any] = None, 2366 *, 2367 errors: str = "raise", 2368 **labels_kwargs, 2369 ) -> "DataArray": 2370 """Drop index labels from this DataArray. 2371 2372 Parameters 2373 ---------- 2374 labels : mapping of hashable to Any 2375 Index labels to drop 2376 errors : {"raise", "ignore"}, optional 2377 If 'raise' (default), raises a ValueError error if 2378 any of the index labels passed are not 2379 in the dataset. If 'ignore', any given labels that are in the 2380 dataset are dropped and no error is raised. 2381 **labels_kwargs : {dim: label, ...}, optional 2382 The keyword arguments form of ``dim`` and ``labels`` 2383 2384 Returns 2385 ------- 2386 dropped : DataArray 2387 """ 2388 if labels_kwargs or isinstance(labels, dict): 2389 labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") 2390 2391 ds = self._to_temp_dataset().drop_sel(labels, errors=errors) 2392 return self._from_temp_dataset(ds) 2393 2394 def drop_isel(self, indexers=None, **indexers_kwargs): 2395 """Drop index positions from this DataArray. 2396 2397 Parameters 2398 ---------- 2399 indexers : mapping of hashable to Any 2400 Index locations to drop 2401 **indexers_kwargs : {dim: position, ...}, optional 2402 The keyword arguments form of ``dim`` and ``positions`` 2403 2404 Returns 2405 ------- 2406 dropped : DataArray 2407 2408 Raises 2409 ------ 2410 IndexError 2411 """ 2412 dataset = self._to_temp_dataset() 2413 dataset = dataset.drop_isel(indexers=indexers, **indexers_kwargs) 2414 return self._from_temp_dataset(dataset) 2415 2416 def dropna( 2417 self, dim: Hashable, how: str = "any", thresh: int = None 2418 ) -> "DataArray": 2419 """Returns a new array with dropped labels for missing values along 2420 the provided dimension. 2421 2422 Parameters 2423 ---------- 2424 dim : hashable 2425 Dimension along which to drop missing values. Dropping along 2426 multiple dimensions simultaneously is not yet supported. 2427 how : {"any", "all"}, optional 2428 * any : if any NA values are present, drop that label 2429 * all : if all values are NA, drop that label 2430 thresh : int, default: None 2431 If supplied, require this many non-NA values. 2432 2433 Returns 2434 ------- 2435 DataArray 2436 """ 2437 ds = self._to_temp_dataset().dropna(dim, how=how, thresh=thresh) 2438 return self._from_temp_dataset(ds) 2439 2440 def fillna(self, value: Any) -> "DataArray": 2441 """Fill missing values in this object. 2442 2443 This operation follows the normal broadcasting and alignment rules that 2444 xarray uses for binary arithmetic, except the result is aligned to this 2445 object (``join='left'``) instead of aligned to the intersection of 2446 index coordinates (``join='inner'``). 2447 2448 Parameters 2449 ---------- 2450 value : scalar, ndarray or DataArray 2451 Used to fill all matching missing values in this array. If the 2452 argument is a DataArray, it is first aligned with (reindexed to) 2453 this array. 2454 2455 Returns 2456 ------- 2457 DataArray 2458 """ 2459 if utils.is_dict_like(value): 2460 raise TypeError( 2461 "cannot provide fill value as a dictionary with " 2462 "fillna on a DataArray" 2463 ) 2464 out = ops.fillna(self, value) 2465 return out 2466 2467 def interpolate_na( 2468 self, 2469 dim: Hashable = None, 2470 method: str = "linear", 2471 limit: int = None, 2472 use_coordinate: Union[bool, str] = True, 2473 max_gap: Union[ 2474 int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta 2475 ] = None, 2476 keep_attrs: bool = None, 2477 **kwargs: Any, 2478 ) -> "DataArray": 2479 """Fill in NaNs by interpolating according to different methods. 2480 2481 Parameters 2482 ---------- 2483 dim : str 2484 Specifies the dimension along which to interpolate. 2485 method : str, optional 2486 String indicating which method to use for interpolation: 2487 2488 - 'linear': linear interpolation (Default). Additional keyword 2489 arguments are passed to :py:func:`numpy.interp` 2490 - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': 2491 are passed to :py:func:`scipy.interpolate.interp1d`. If 2492 ``method='polynomial'``, the ``order`` keyword argument must also be 2493 provided. 2494 - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their 2495 respective :py:class:`scipy.interpolate` classes. 2496 use_coordinate : bool or str, default: True 2497 Specifies which index to use as the x values in the interpolation 2498 formulated as `y = f(x)`. If False, values are treated as if 2499 eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is 2500 used. If ``use_coordinate`` is a string, it specifies the name of a 2501 coordinate variariable to use as the index. 2502 limit : int, default: None 2503 Maximum number of consecutive NaNs to fill. Must be greater than 0 2504 or None for no limit. This filling is done regardless of the size of 2505 the gap in the data. To only interpolate over gaps less than a given length, 2506 see ``max_gap``. 2507 max_gap : int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default: None 2508 Maximum size of gap, a continuous sequence of NaNs, that will be filled. 2509 Use None for no limit. When interpolating along a datetime64 dimension 2510 and ``use_coordinate=True``, ``max_gap`` can be one of the following: 2511 2512 - a string that is valid input for pandas.to_timedelta 2513 - a :py:class:`numpy.timedelta64` object 2514 - a :py:class:`pandas.Timedelta` object 2515 - a :py:class:`datetime.timedelta` object 2516 2517 Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled 2518 dimensions has not been implemented yet. Gap length is defined as the difference 2519 between coordinate values at the first data point after a gap and the last value 2520 before a gap. For gaps at the beginning (end), gap length is defined as the difference 2521 between coordinate values at the first (last) valid data point and the first (last) NaN. 2522 For example, consider:: 2523 2524 <xarray.DataArray (x: 9)> 2525 array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) 2526 Coordinates: 2527 * x (x) int64 0 1 2 3 4 5 6 7 8 2528 2529 The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively 2530 keep_attrs : bool, default: True 2531 If True, the dataarray's attributes (`attrs`) will be copied from 2532 the original object to the new one. If False, the new 2533 object will be returned without attributes. 2534 **kwargs : dict, optional 2535 parameters passed verbatim to the underlying interpolation function 2536 2537 Returns 2538 ------- 2539 interpolated: DataArray 2540 Filled in DataArray. 2541 2542 See Also 2543 -------- 2544 numpy.interp 2545 scipy.interpolate 2546 2547 Examples 2548 -------- 2549 >>> da = xr.DataArray( 2550 ... [np.nan, 2, 3, np.nan, 0], dims="x", coords={"x": [0, 1, 2, 3, 4]} 2551 ... ) 2552 >>> da 2553 <xarray.DataArray (x: 5)> 2554 array([nan, 2., 3., nan, 0.]) 2555 Coordinates: 2556 * x (x) int64 0 1 2 3 4 2557 2558 >>> da.interpolate_na(dim="x", method="linear") 2559 <xarray.DataArray (x: 5)> 2560 array([nan, 2. , 3. , 1.5, 0. ]) 2561 Coordinates: 2562 * x (x) int64 0 1 2 3 4 2563 2564 >>> da.interpolate_na(dim="x", method="linear", fill_value="extrapolate") 2565 <xarray.DataArray (x: 5)> 2566 array([1. , 2. , 3. , 1.5, 0. ]) 2567 Coordinates: 2568 * x (x) int64 0 1 2 3 4 2569 """ 2570 from .missing import interp_na 2571 2572 return interp_na( 2573 self, 2574 dim=dim, 2575 method=method, 2576 limit=limit, 2577 use_coordinate=use_coordinate, 2578 max_gap=max_gap, 2579 keep_attrs=keep_attrs, 2580 **kwargs, 2581 ) 2582 2583 def ffill(self, dim: Hashable, limit: int = None) -> "DataArray": 2584 """Fill NaN values by propogating values forward 2585 2586 *Requires bottleneck.* 2587 2588 Parameters 2589 ---------- 2590 dim : hashable 2591 Specifies the dimension along which to propagate values when 2592 filling. 2593 limit : int, default: None 2594 The maximum number of consecutive NaN values to forward fill. In 2595 other words, if there is a gap with more than this number of 2596 consecutive NaNs, it will only be partially filled. Must be greater 2597 than 0 or None for no limit. Must be None or greater than or equal 2598 to axis length if filling along chunked axes (dimensions). 2599 2600 Returns 2601 ------- 2602 DataArray 2603 """ 2604 from .missing import ffill 2605 2606 return ffill(self, dim, limit=limit) 2607 2608 def bfill(self, dim: Hashable, limit: int = None) -> "DataArray": 2609 """Fill NaN values by propogating values backward 2610 2611 *Requires bottleneck.* 2612 2613 Parameters 2614 ---------- 2615 dim : str 2616 Specifies the dimension along which to propagate values when 2617 filling. 2618 limit : int, default: None 2619 The maximum number of consecutive NaN values to backward fill. In 2620 other words, if there is a gap with more than this number of 2621 consecutive NaNs, it will only be partially filled. Must be greater 2622 than 0 or None for no limit. Must be None or greater than or equal 2623 to axis length if filling along chunked axes (dimensions). 2624 2625 Returns 2626 ------- 2627 DataArray 2628 """ 2629 from .missing import bfill 2630 2631 return bfill(self, dim, limit=limit) 2632 2633 def combine_first(self, other: "DataArray") -> "DataArray": 2634 """Combine two DataArray objects, with union of coordinates. 2635 2636 This operation follows the normal broadcasting and alignment rules of 2637 ``join='outer'``. Default to non-null values of array calling the 2638 method. Use np.nan to fill in vacant cells after alignment. 2639 2640 Parameters 2641 ---------- 2642 other : DataArray 2643 Used to fill all matching missing values in this array. 2644 2645 Returns 2646 ------- 2647 DataArray 2648 """ 2649 return ops.fillna(self, other, join="outer") 2650 2651 def reduce( 2652 self, 2653 func: Callable[..., Any], 2654 dim: Union[None, Hashable, Sequence[Hashable]] = None, 2655 axis: Union[None, int, Sequence[int]] = None, 2656 keep_attrs: bool = None, 2657 keepdims: bool = False, 2658 **kwargs: Any, 2659 ) -> "DataArray": 2660 """Reduce this array by applying `func` along some dimension(s). 2661 2662 Parameters 2663 ---------- 2664 func : callable 2665 Function which can be called in the form 2666 `f(x, axis=axis, **kwargs)` to return the result of reducing an 2667 np.ndarray over an integer valued axis. 2668 dim : hashable or sequence of hashable, optional 2669 Dimension(s) over which to apply `func`. 2670 axis : int or sequence of int, optional 2671 Axis(es) over which to repeatedly apply `func`. Only one of the 2672 'dim' and 'axis' arguments can be supplied. If neither are 2673 supplied, then the reduction is calculated over the flattened array 2674 (by calling `f(x)` without an axis argument). 2675 keep_attrs : bool, optional 2676 If True, the variable's attributes (`attrs`) will be copied from 2677 the original object to the new one. If False (default), the new 2678 object will be returned without attributes. 2679 keepdims : bool, default: False 2680 If True, the dimensions which are reduced are left in the result 2681 as dimensions of size one. Coordinates that use these dimensions 2682 are removed. 2683 **kwargs : dict 2684 Additional keyword arguments passed on to `func`. 2685 2686 Returns 2687 ------- 2688 reduced : DataArray 2689 DataArray with this object's array replaced with an array with 2690 summarized data and the indicated dimension(s) removed. 2691 """ 2692 2693 var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims, **kwargs) 2694 return self._replace_maybe_drop_dims(var) 2695 2696 def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: 2697 """Convert this array into a pandas object with the same shape. 2698 2699 The type of the returned object depends on the number of DataArray 2700 dimensions: 2701 2702 * 0D -> `xarray.DataArray` 2703 * 1D -> `pandas.Series` 2704 * 2D -> `pandas.DataFrame` 2705 2706 Only works for arrays with 2 or fewer dimensions. 2707 2708 The DataArray constructor performs the inverse transformation. 2709 """ 2710 # TODO: consolidate the info about pandas constructors and the 2711 # attributes that correspond to their indexes into a separate module? 2712 constructors = {0: lambda x: x, 1: pd.Series, 2: pd.DataFrame} 2713 try: 2714 constructor = constructors[self.ndim] 2715 except KeyError: 2716 raise ValueError( 2717 f"cannot convert arrays with {self.ndim} dimensions into " 2718 "pandas objects" 2719 ) 2720 indexes = [self.get_index(dim) for dim in self.dims] 2721 return constructor(self.values, *indexes) 2722 2723 def to_dataframe( 2724 self, name: Hashable = None, dim_order: List[Hashable] = None 2725 ) -> pd.DataFrame: 2726 """Convert this array and its coordinates into a tidy pandas.DataFrame. 2727 2728 The DataFrame is indexed by the Cartesian product of index coordinates 2729 (in the form of a :py:class:`pandas.MultiIndex`). 2730 2731 Other coordinates are included as columns in the DataFrame. 2732 2733 Parameters 2734 ---------- 2735 name 2736 Name to give to this array (required if unnamed). 2737 dim_order 2738 Hierarchical dimension order for the resulting dataframe. 2739 Array content is transposed to this order and then written out as flat 2740 vectors in contiguous order, so the last dimension in this list 2741 will be contiguous in the resulting DataFrame. This has a major 2742 influence on which operations are efficient on the resulting 2743 dataframe. 2744 2745 If provided, must include all dimensions of this DataArray. By default, 2746 dimensions are sorted according to the DataArray dimensions order. 2747 2748 Returns 2749 ------- 2750 result 2751 DataArray as a pandas DataFrame. 2752 2753 """ 2754 if name is None: 2755 name = self.name 2756 if name is None: 2757 raise ValueError( 2758 "cannot convert an unnamed DataArray to a " 2759 "DataFrame: use the ``name`` parameter" 2760 ) 2761 if self.ndim == 0: 2762 raise ValueError("cannot convert a scalar to a DataFrame") 2763 2764 # By using a unique name, we can convert a DataArray into a DataFrame 2765 # even if it shares a name with one of its coordinates. 2766 # I would normally use unique_name = object() but that results in a 2767 # dataframe with columns in the wrong order, for reasons I have not 2768 # been able to debug (possibly a pandas bug?). 2769 unique_name = "__unique_name_identifier_z98xfz98xugfg73ho__" 2770 ds = self._to_dataset_whole(name=unique_name) 2771 2772 if dim_order is None: 2773 ordered_dims = dict(zip(self.dims, self.shape)) 2774 else: 2775 ordered_dims = ds._normalize_dim_order(dim_order=dim_order) 2776 2777 df = ds._to_dataframe(ordered_dims) 2778 df.columns = [name if c == unique_name else c for c in df.columns] 2779 return df 2780 2781 def to_series(self) -> pd.Series: 2782 """Convert this array into a pandas.Series. 2783 2784 The Series is indexed by the Cartesian product of index coordinates 2785 (in the form of a :py:class:`pandas.MultiIndex`). 2786 """ 2787 index = self.coords.to_index() 2788 return pd.Series(self.values.reshape(-1), index=index, name=self.name) 2789 2790 def to_masked_array(self, copy: bool = True) -> np.ma.MaskedArray: 2791 """Convert this array into a numpy.ma.MaskedArray 2792 2793 Parameters 2794 ---------- 2795 copy : bool, default: True 2796 If True make a copy of the array in the result. If False, 2797 a MaskedArray view of DataArray.values is returned. 2798 2799 Returns 2800 ------- 2801 result : MaskedArray 2802 Masked where invalid values (nan or inf) occur. 2803 """ 2804 values = self.to_numpy() # only compute lazy arrays once 2805 isnull = pd.isnull(values) 2806 return np.ma.MaskedArray(data=values, mask=isnull, copy=copy) 2807 2808 def to_netcdf(self, *args, **kwargs) -> Union[bytes, "Delayed", None]: 2809 """Write DataArray contents to a netCDF file. 2810 2811 All parameters are passed directly to :py:meth:`xarray.Dataset.to_netcdf`. 2812 2813 Notes 2814 ----- 2815 Only xarray.Dataset objects can be written to netCDF files, so 2816 the xarray.DataArray is converted to a xarray.Dataset object 2817 containing a single variable. If the DataArray has no name, or if the 2818 name is the same as a coordinate name, then it is given the name 2819 ``"__xarray_dataarray_variable__"``. 2820 2821 See Also 2822 -------- 2823 Dataset.to_netcdf 2824 """ 2825 from ..backends.api import DATAARRAY_NAME, DATAARRAY_VARIABLE 2826 2827 if self.name is None: 2828 # If no name is set then use a generic xarray name 2829 dataset = self.to_dataset(name=DATAARRAY_VARIABLE) 2830 elif self.name in self.coords or self.name in self.dims: 2831 # The name is the same as one of the coords names, which netCDF 2832 # doesn't support, so rename it but keep track of the old name 2833 dataset = self.to_dataset(name=DATAARRAY_VARIABLE) 2834 dataset.attrs[DATAARRAY_NAME] = self.name 2835 else: 2836 # No problems with the name - so we're fine! 2837 dataset = self.to_dataset() 2838 2839 return dataset.to_netcdf(*args, **kwargs) 2840 2841 def to_dict(self, data: bool = True) -> dict: 2842 """ 2843 Convert this xarray.DataArray into a dictionary following xarray 2844 naming conventions. 2845 2846 Converts all variables and attributes to native Python objects. 2847 Useful for converting to json. To avoid datetime incompatibility 2848 use decode_times=False kwarg in xarray.open_dataset. 2849 2850 Parameters 2851 ---------- 2852 data : bool, optional 2853 Whether to include the actual data in the dictionary. When set to 2854 False, returns just the schema. 2855 2856 See Also 2857 -------- 2858 DataArray.from_dict 2859 """ 2860 d = self.variable.to_dict(data=data) 2861 d.update({"coords": {}, "name": self.name}) 2862 for k in self.coords: 2863 d["coords"][k] = self.coords[k].variable.to_dict(data=data) 2864 return d 2865 2866 @classmethod 2867 def from_dict(cls, d: dict) -> "DataArray": 2868 """ 2869 Convert a dictionary into an xarray.DataArray 2870 2871 Input dict can take several forms: 2872 2873 .. code:: python 2874 2875 d = {"dims": ("t"), "data": x} 2876 2877 d = { 2878 "coords": {"t": {"dims": "t", "data": t, "attrs": {"units": "s"}}}, 2879 "attrs": {"title": "air temperature"}, 2880 "dims": "t", 2881 "data": x, 2882 "name": "a", 2883 } 2884 2885 where "t" is the name of the dimesion, "a" is the name of the array, 2886 and x and t are lists, numpy.arrays, or pandas objects. 2887 2888 Parameters 2889 ---------- 2890 d : dict 2891 Mapping with a minimum structure of {"dims": [...], "data": [...]} 2892 2893 Returns 2894 ------- 2895 obj : xarray.DataArray 2896 2897 See Also 2898 -------- 2899 DataArray.to_dict 2900 Dataset.from_dict 2901 """ 2902 coords = None 2903 if "coords" in d: 2904 try: 2905 coords = { 2906 k: (v["dims"], v["data"], v.get("attrs")) 2907 for k, v in d["coords"].items() 2908 } 2909 except KeyError as e: 2910 raise ValueError( 2911 "cannot convert dict when coords are missing the key " 2912 "'{dims_data}'".format(dims_data=str(e.args[0])) 2913 ) 2914 try: 2915 data = d["data"] 2916 except KeyError: 2917 raise ValueError("cannot convert dict without the key 'data''") 2918 else: 2919 obj = cls(data, coords, d.get("dims"), d.get("name"), d.get("attrs")) 2920 return obj 2921 2922 @classmethod 2923 def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray": 2924 """Convert a pandas.Series into an xarray.DataArray. 2925 2926 If the series's index is a MultiIndex, it will be expanded into a 2927 tensor product of one-dimensional coordinates (filling in missing 2928 values with NaN). Thus this operation should be the inverse of the 2929 `to_series` method. 2930 2931 If sparse=True, creates a sparse array instead of a dense NumPy array. 2932 Requires the pydata/sparse package. 2933 2934 See Also 2935 -------- 2936 xarray.Dataset.from_dataframe 2937 """ 2938 temp_name = "__temporary_name" 2939 df = pd.DataFrame({temp_name: series}) 2940 ds = Dataset.from_dataframe(df, sparse=sparse) 2941 result = cast(DataArray, ds[temp_name]) 2942 result.name = series.name 2943 return result 2944 2945 def to_cdms2(self) -> "cdms2_Variable": 2946 """Convert this array into a cdms2.Variable""" 2947 from ..convert import to_cdms2 2948 2949 return to_cdms2(self) 2950 2951 @classmethod 2952 def from_cdms2(cls, variable: "cdms2_Variable") -> "DataArray": 2953 """Convert a cdms2.Variable into an xarray.DataArray""" 2954 from ..convert import from_cdms2 2955 2956 return from_cdms2(variable) 2957 2958 def to_iris(self) -> "iris_Cube": 2959 """Convert this array into a iris.cube.Cube""" 2960 from ..convert import to_iris 2961 2962 return to_iris(self) 2963 2964 @classmethod 2965 def from_iris(cls, cube: "iris_Cube") -> "DataArray": 2966 """Convert a iris.cube.Cube into an xarray.DataArray""" 2967 from ..convert import from_iris 2968 2969 return from_iris(cube) 2970 2971 def _all_compat(self, other: "DataArray", compat_str: str) -> bool: 2972 """Helper function for equals, broadcast_equals, and identical""" 2973 2974 def compat(x, y): 2975 return getattr(x.variable, compat_str)(y.variable) 2976 2977 return utils.dict_equiv(self.coords, other.coords, compat=compat) and compat( 2978 self, other 2979 ) 2980 2981 def broadcast_equals(self, other: "DataArray") -> bool: 2982 """Two DataArrays are broadcast equal if they are equal after 2983 broadcasting them against each other such that they have the same 2984 dimensions. 2985 2986 See Also 2987 -------- 2988 DataArray.equals 2989 DataArray.identical 2990 """ 2991 try: 2992 return self._all_compat(other, "broadcast_equals") 2993 except (TypeError, AttributeError): 2994 return False 2995 2996 def equals(self, other: "DataArray") -> bool: 2997 """True if two DataArrays have the same dimensions, coordinates and 2998 values; otherwise False. 2999 3000 DataArrays can still be equal (like pandas objects) if they have NaN 3001 values in the same locations. 3002 3003 This method is necessary because `v1 == v2` for ``DataArray`` 3004 does element-wise comparisons (like numpy.ndarrays). 3005 3006 See Also 3007 -------- 3008 DataArray.broadcast_equals 3009 DataArray.identical 3010 """ 3011 try: 3012 return self._all_compat(other, "equals") 3013 except (TypeError, AttributeError): 3014 return False 3015 3016 def identical(self, other: "DataArray") -> bool: 3017 """Like equals, but also checks the array name and attributes, and 3018 attributes on all coordinates. 3019 3020 See Also 3021 -------- 3022 DataArray.broadcast_equals 3023 DataArray.equals 3024 """ 3025 try: 3026 return self.name == other.name and self._all_compat(other, "identical") 3027 except (TypeError, AttributeError): 3028 return False 3029 3030 def _result_name(self, other: Any = None) -> Optional[Hashable]: 3031 # use the same naming heuristics as pandas: 3032 # https://github.com/ContinuumIO/blaze/issues/458#issuecomment-51936356 3033 other_name = getattr(other, "name", _default) 3034 if other_name is _default or other_name == self.name: 3035 return self.name 3036 else: 3037 return None 3038 3039 def __array_wrap__(self, obj, context=None) -> "DataArray": 3040 new_var = self.variable.__array_wrap__(obj, context) 3041 return self._replace(new_var) 3042 3043 def __matmul__(self, obj): 3044 return self.dot(obj) 3045 3046 def __rmatmul__(self, other): 3047 # currently somewhat duplicative, as only other DataArrays are 3048 # compatible with matmul 3049 return computation.dot(other, self) 3050 3051 def _unary_op(self, f: Callable, *args, **kwargs): 3052 keep_attrs = kwargs.pop("keep_attrs", None) 3053 if keep_attrs is None: 3054 keep_attrs = _get_keep_attrs(default=True) 3055 with warnings.catch_warnings(): 3056 warnings.filterwarnings("ignore", r"All-NaN (slice|axis) encountered") 3057 warnings.filterwarnings( 3058 "ignore", r"Mean of empty slice", category=RuntimeWarning 3059 ) 3060 with np.errstate(all="ignore"): 3061 da = self.__array_wrap__(f(self.variable.data, *args, **kwargs)) 3062 if keep_attrs: 3063 da.attrs = self.attrs 3064 return da 3065 3066 def _binary_op( 3067 self, 3068 other, 3069 f: Callable, 3070 reflexive: bool = False, 3071 ): 3072 if isinstance(other, (Dataset, groupby.GroupBy)): 3073 return NotImplemented 3074 if isinstance(other, DataArray): 3075 align_type = OPTIONS["arithmetic_join"] 3076 self, other = align(self, other, join=align_type, copy=False) 3077 other_variable = getattr(other, "variable", other) 3078 other_coords = getattr(other, "coords", None) 3079 3080 variable = ( 3081 f(self.variable, other_variable) 3082 if not reflexive 3083 else f(other_variable, self.variable) 3084 ) 3085 coords, indexes = self.coords._merge_raw(other_coords, reflexive) 3086 name = self._result_name(other) 3087 3088 return self._replace(variable, coords, name, indexes=indexes) 3089 3090 def _inplace_binary_op(self, other, f: Callable): 3091 if isinstance(other, groupby.GroupBy): 3092 raise TypeError( 3093 "in-place operations between a DataArray and " 3094 "a grouped object are not permitted" 3095 ) 3096 # n.b. we can't align other to self (with other.reindex_like(self)) 3097 # because `other` may be converted into floats, which would cause 3098 # in-place arithmetic to fail unpredictably. Instead, we simply 3099 # don't support automatic alignment with in-place arithmetic. 3100 other_coords = getattr(other, "coords", None) 3101 other_variable = getattr(other, "variable", other) 3102 try: 3103 with self.coords._merge_inplace(other_coords): 3104 f(self.variable, other_variable) 3105 except MergeError as exc: 3106 raise MergeError( 3107 "Automatic alignment is not supported for in-place operations.\n" 3108 "Consider aligning the indices manually or using a not-in-place operation.\n" 3109 "See https://github.com/pydata/xarray/issues/3910 for more explanations." 3110 ) from exc 3111 return self 3112 3113 def _copy_attrs_from(self, other: Union["DataArray", Dataset, Variable]) -> None: 3114 self.attrs = other.attrs 3115 3116 plot = utils.UncachedAccessor(_PlotMethods) 3117 3118 def _title_for_slice(self, truncate: int = 50) -> str: 3119 """ 3120 If the dataarray has 1 dimensional coordinates or comes from a slice 3121 we can show that info in the title 3122 3123 Parameters 3124 ---------- 3125 truncate : int, default: 50 3126 maximum number of characters for title 3127 3128 Returns 3129 ------- 3130 title : string 3131 Can be used for plot titles 3132 3133 """ 3134 one_dims = [] 3135 for dim, coord in self.coords.items(): 3136 if coord.size == 1: 3137 one_dims.append( 3138 "{dim} = {v}{unit}".format( 3139 dim=dim, 3140 v=format_item(coord.values), 3141 unit=_get_units_from_attrs(coord), 3142 ) 3143 ) 3144 3145 title = ", ".join(one_dims) 3146 if len(title) > truncate: 3147 title = title[: (truncate - 3)] + "..." 3148 3149 return title 3150 3151 def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArray": 3152 """Calculate the n-th order discrete difference along given axis. 3153 3154 Parameters 3155 ---------- 3156 dim : hashable 3157 Dimension over which to calculate the finite difference. 3158 n : int, optional 3159 The number of times values are differenced. 3160 label : hashable, optional 3161 The new coordinate in dimension ``dim`` will have the 3162 values of either the minuend's or subtrahend's coordinate 3163 for values 'upper' and 'lower', respectively. Other 3164 values are not supported. 3165 3166 Returns 3167 ------- 3168 difference : same type as caller 3169 The n-th order finite difference of this object. 3170 3171 Notes 3172 ----- 3173 `n` matches numpy's behavior and is different from pandas' first argument named 3174 `periods`. 3175 3176 Examples 3177 -------- 3178 >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ["x"]) 3179 >>> arr.diff("x") 3180 <xarray.DataArray (x: 3)> 3181 array([0, 1, 0]) 3182 Coordinates: 3183 * x (x) int64 2 3 4 3184 >>> arr.diff("x", 2) 3185 <xarray.DataArray (x: 2)> 3186 array([ 1, -1]) 3187 Coordinates: 3188 * x (x) int64 3 4 3189 3190 See Also 3191 -------- 3192 DataArray.differentiate 3193 """ 3194 ds = self._to_temp_dataset().diff(n=n, dim=dim, label=label) 3195 return self._from_temp_dataset(ds) 3196 3197 def shift( 3198 self, 3199 shifts: Mapping[Any, int] = None, 3200 fill_value: Any = dtypes.NA, 3201 **shifts_kwargs: int, 3202 ) -> "DataArray": 3203 """Shift this DataArray by an offset along one or more dimensions. 3204 3205 Only the data is moved; coordinates stay in place. This is consistent 3206 with the behavior of ``shift`` in pandas. 3207 3208 Values shifted from beyond array bounds will appear at one end of 3209 each dimension, which are filled according to `fill_value`. For periodic 3210 offsets instead see `roll`. 3211 3212 Parameters 3213 ---------- 3214 shifts : mapping of hashable to int, optional 3215 Integer offset to shift along each of the given dimensions. 3216 Positive offsets shift to the right; negative offsets shift to the 3217 left. 3218 fill_value : scalar, optional 3219 Value to use for newly missing values 3220 **shifts_kwargs 3221 The keyword arguments form of ``shifts``. 3222 One of shifts or shifts_kwargs must be provided. 3223 3224 Returns 3225 ------- 3226 shifted : DataArray 3227 DataArray with the same coordinates and attributes but shifted 3228 data. 3229 3230 See Also 3231 -------- 3232 roll 3233 3234 Examples 3235 -------- 3236 >>> arr = xr.DataArray([5, 6, 7], dims="x") 3237 >>> arr.shift(x=1) 3238 <xarray.DataArray (x: 3)> 3239 array([nan, 5., 6.]) 3240 Dimensions without coordinates: x 3241 """ 3242 variable = self.variable.shift( 3243 shifts=shifts, fill_value=fill_value, **shifts_kwargs 3244 ) 3245 return self._replace(variable=variable) 3246 3247 def roll( 3248 self, 3249 shifts: Mapping[Hashable, int] = None, 3250 roll_coords: bool = False, 3251 **shifts_kwargs: int, 3252 ) -> "DataArray": 3253 """Roll this array by an offset along one or more dimensions. 3254 3255 Unlike shift, roll treats the given dimensions as periodic, so will not 3256 create any missing values to be filled. 3257 3258 Unlike shift, roll may rotate all variables, including coordinates 3259 if specified. The direction of rotation is consistent with 3260 :py:func:`numpy.roll`. 3261 3262 Parameters 3263 ---------- 3264 shifts : mapping of hashable to int, optional 3265 Integer offset to rotate each of the given dimensions. 3266 Positive offsets roll to the right; negative offsets roll to the 3267 left. 3268 roll_coords : bool, default: False 3269 Indicates whether to roll the coordinates by the offset too. 3270 **shifts_kwargs : {dim: offset, ...}, optional 3271 The keyword arguments form of ``shifts``. 3272 One of shifts or shifts_kwargs must be provided. 3273 3274 Returns 3275 ------- 3276 rolled : DataArray 3277 DataArray with the same attributes but rolled data and coordinates. 3278 3279 See Also 3280 -------- 3281 shift 3282 3283 Examples 3284 -------- 3285 >>> arr = xr.DataArray([5, 6, 7], dims="x") 3286 >>> arr.roll(x=1) 3287 <xarray.DataArray (x: 3)> 3288 array([7, 5, 6]) 3289 Dimensions without coordinates: x 3290 """ 3291 ds = self._to_temp_dataset().roll( 3292 shifts=shifts, roll_coords=roll_coords, **shifts_kwargs 3293 ) 3294 return self._from_temp_dataset(ds) 3295 3296 @property 3297 def real(self) -> "DataArray": 3298 return self._replace(self.variable.real) 3299 3300 @property 3301 def imag(self) -> "DataArray": 3302 return self._replace(self.variable.imag) 3303 3304 def dot( 3305 self, other: "DataArray", dims: Union[Hashable, Sequence[Hashable], None] = None 3306 ) -> "DataArray": 3307 """Perform dot product of two DataArrays along their shared dims. 3308 3309 Equivalent to taking taking tensordot over all shared dims. 3310 3311 Parameters 3312 ---------- 3313 other : DataArray 3314 The other array with which the dot product is performed. 3315 dims : ..., hashable or sequence of hashable, optional 3316 Which dimensions to sum over. Ellipsis (`...`) sums over all dimensions. 3317 If not specified, then all the common dimensions are summed over. 3318 3319 Returns 3320 ------- 3321 result : DataArray 3322 Array resulting from the dot product over all shared dimensions. 3323 3324 See Also 3325 -------- 3326 dot 3327 numpy.tensordot 3328 3329 Examples 3330 -------- 3331 >>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) 3332 >>> da = xr.DataArray(da_vals, dims=["x", "y", "z"]) 3333 >>> dm_vals = np.arange(4) 3334 >>> dm = xr.DataArray(dm_vals, dims=["z"]) 3335 3336 >>> dm.dims 3337 ('z',) 3338 3339 >>> da.dims 3340 ('x', 'y', 'z') 3341 3342 >>> dot_result = da.dot(dm) 3343 >>> dot_result.dims 3344 ('x', 'y') 3345 3346 """ 3347 if isinstance(other, Dataset): 3348 raise NotImplementedError( 3349 "dot products are not yet supported with Dataset objects." 3350 ) 3351 if not isinstance(other, DataArray): 3352 raise TypeError("dot only operates on DataArrays.") 3353 3354 return computation.dot(self, other, dims=dims) 3355 3356 def sortby( 3357 self, 3358 variables: Union[Hashable, "DataArray", Sequence[Union[Hashable, "DataArray"]]], 3359 ascending: bool = True, 3360 ) -> "DataArray": 3361 """Sort object by labels or values (along an axis). 3362 3363 Sorts the dataarray, either along specified dimensions, 3364 or according to values of 1-D dataarrays that share dimension 3365 with calling object. 3366 3367 If the input variables are dataarrays, then the dataarrays are aligned 3368 (via left-join) to the calling object prior to sorting by cell values. 3369 NaNs are sorted to the end, following Numpy convention. 3370 3371 If multiple sorts along the same dimension is 3372 given, numpy's lexsort is performed along that dimension: 3373 https://docs.scipy.org/doc/numpy/reference/generated/numpy.lexsort.html 3374 and the FIRST key in the sequence is used as the primary sort key, 3375 followed by the 2nd key, etc. 3376 3377 Parameters 3378 ---------- 3379 variables : hashable, DataArray, or sequence of hashable or DataArray 3380 1D DataArray objects or name(s) of 1D variable(s) in 3381 coords whose values are used to sort this array. 3382 ascending : bool, optional 3383 Whether to sort by ascending or descending order. 3384 3385 Returns 3386 ------- 3387 sorted : DataArray 3388 A new dataarray where all the specified dims are sorted by dim 3389 labels. 3390 3391 See Also 3392 -------- 3393 Dataset.sortby 3394 numpy.sort 3395 pandas.sort_values 3396 pandas.sort_index 3397 3398 Examples 3399 -------- 3400 >>> da = xr.DataArray( 3401 ... np.random.rand(5), 3402 ... coords=[pd.date_range("1/1/2000", periods=5)], 3403 ... dims="time", 3404 ... ) 3405 >>> da 3406 <xarray.DataArray (time: 5)> 3407 array([0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ]) 3408 Coordinates: 3409 * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05 3410 3411 >>> da.sortby(da) 3412 <xarray.DataArray (time: 5)> 3413 array([0.4236548 , 0.54488318, 0.5488135 , 0.60276338, 0.71518937]) 3414 Coordinates: 3415 * time (time) datetime64[ns] 2000-01-05 2000-01-04 ... 2000-01-02 3416 """ 3417 ds = self._to_temp_dataset().sortby(variables, ascending=ascending) 3418 return self._from_temp_dataset(ds) 3419 3420 def quantile( 3421 self, 3422 q: Any, 3423 dim: Union[Hashable, Sequence[Hashable], None] = None, 3424 interpolation: str = "linear", 3425 keep_attrs: bool = None, 3426 skipna: bool = True, 3427 ) -> "DataArray": 3428 """Compute the qth quantile of the data along the specified dimension. 3429 3430 Returns the qth quantiles(s) of the array elements. 3431 3432 Parameters 3433 ---------- 3434 q : float or array-like of float 3435 Quantile to compute, which must be between 0 and 1 inclusive. 3436 dim : hashable or sequence of hashable, optional 3437 Dimension(s) over which to apply quantile. 3438 interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear" 3439 This optional parameter specifies the interpolation method to 3440 use when the desired quantile lies between two data points 3441 ``i < j``: 3442 3443 - linear: ``i + (j - i) * fraction``, where ``fraction`` is 3444 the fractional part of the index surrounded by ``i`` and 3445 ``j``. 3446 - lower: ``i``. 3447 - higher: ``j``. 3448 - nearest: ``i`` or ``j``, whichever is nearest. 3449 - midpoint: ``(i + j) / 2``. 3450 keep_attrs : bool, optional 3451 If True, the dataset's attributes (`attrs`) will be copied from 3452 the original object to the new one. If False (default), the new 3453 object will be returned without attributes. 3454 skipna : bool, optional 3455 Whether to skip missing values when aggregating. 3456 3457 Returns 3458 ------- 3459 quantiles : DataArray 3460 If `q` is a single quantile, then the result 3461 is a scalar. If multiple percentiles are given, first axis of 3462 the result corresponds to the quantile and a quantile dimension 3463 is added to the return array. The other dimensions are the 3464 dimensions that remain after the reduction of the array. 3465 3466 See Also 3467 -------- 3468 numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile 3469 3470 Examples 3471 -------- 3472 >>> da = xr.DataArray( 3473 ... data=[[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]], 3474 ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, 3475 ... dims=("x", "y"), 3476 ... ) 3477 >>> da.quantile(0) # or da.quantile(0, dim=...) 3478 <xarray.DataArray ()> 3479 array(0.7) 3480 Coordinates: 3481 quantile float64 0.0 3482 >>> da.quantile(0, dim="x") 3483 <xarray.DataArray (y: 4)> 3484 array([0.7, 4.2, 2.6, 1.5]) 3485 Coordinates: 3486 * y (y) float64 1.0 1.5 2.0 2.5 3487 quantile float64 0.0 3488 >>> da.quantile([0, 0.5, 1]) 3489 <xarray.DataArray (quantile: 3)> 3490 array([0.7, 3.4, 9.4]) 3491 Coordinates: 3492 * quantile (quantile) float64 0.0 0.5 1.0 3493 >>> da.quantile([0, 0.5, 1], dim="x") 3494 <xarray.DataArray (quantile: 3, y: 4)> 3495 array([[0.7 , 4.2 , 2.6 , 1.5 ], 3496 [3.6 , 5.75, 6. , 1.7 ], 3497 [6.5 , 7.3 , 9.4 , 1.9 ]]) 3498 Coordinates: 3499 * y (y) float64 1.0 1.5 2.0 2.5 3500 * quantile (quantile) float64 0.0 0.5 1.0 3501 """ 3502 3503 ds = self._to_temp_dataset().quantile( 3504 q, 3505 dim=dim, 3506 keep_attrs=keep_attrs, 3507 interpolation=interpolation, 3508 skipna=skipna, 3509 ) 3510 return self._from_temp_dataset(ds) 3511 3512 def rank( 3513 self, dim: Hashable, pct: bool = False, keep_attrs: bool = None 3514 ) -> "DataArray": 3515 """Ranks the data. 3516 3517 Equal values are assigned a rank that is the average of the ranks that 3518 would have been otherwise assigned to all of the values within that 3519 set. Ranks begin at 1, not 0. If pct, computes percentage ranks. 3520 3521 NaNs in the input array are returned as NaNs. 3522 3523 The `bottleneck` library is required. 3524 3525 Parameters 3526 ---------- 3527 dim : hashable 3528 Dimension over which to compute rank. 3529 pct : bool, optional 3530 If True, compute percentage ranks, otherwise compute integer ranks. 3531 keep_attrs : bool, optional 3532 If True, the dataset's attributes (`attrs`) will be copied from 3533 the original object to the new one. If False (default), the new 3534 object will be returned without attributes. 3535 3536 Returns 3537 ------- 3538 ranked : DataArray 3539 DataArray with the same coordinates and dtype 'float64'. 3540 3541 Examples 3542 -------- 3543 >>> arr = xr.DataArray([5, 6, 7], dims="x") 3544 >>> arr.rank("x") 3545 <xarray.DataArray (x: 3)> 3546 array([1., 2., 3.]) 3547 Dimensions without coordinates: x 3548 """ 3549 3550 ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs) 3551 return self._from_temp_dataset(ds) 3552 3553 def differentiate( 3554 self, coord: Hashable, edge_order: int = 1, datetime_unit: str = None 3555 ) -> "DataArray": 3556 """ Differentiate the array with the second order accurate central 3557 differences. 3558 3559 .. note:: 3560 This feature is limited to simple cartesian geometry, i.e. coord 3561 must be one dimensional. 3562 3563 Parameters 3564 ---------- 3565 coord : hashable 3566 The coordinate to be used to compute the gradient. 3567 edge_order : {1, 2}, default: 1 3568 N-th order accurate differences at the boundaries. 3569 datetime_unit : {"Y", "M", "W", "D", "h", "m", "s", "ms", \ 3570 "us", "ns", "ps", "fs", "as"} or None, optional 3571 Unit to compute gradient. Only valid for datetime coordinate. 3572 3573 Returns 3574 ------- 3575 differentiated: DataArray 3576 3577 See also 3578 -------- 3579 numpy.gradient: corresponding numpy function 3580 3581 Examples 3582 -------- 3583 3584 >>> da = xr.DataArray( 3585 ... np.arange(12).reshape(4, 3), 3586 ... dims=["x", "y"], 3587 ... coords={"x": [0, 0.1, 1.1, 1.2]}, 3588 ... ) 3589 >>> da 3590 <xarray.DataArray (x: 4, y: 3)> 3591 array([[ 0, 1, 2], 3592 [ 3, 4, 5], 3593 [ 6, 7, 8], 3594 [ 9, 10, 11]]) 3595 Coordinates: 3596 * x (x) float64 0.0 0.1 1.1 1.2 3597 Dimensions without coordinates: y 3598 >>> 3599 >>> da.differentiate("x") 3600 <xarray.DataArray (x: 4, y: 3)> 3601 array([[30. , 30. , 30. ], 3602 [27.54545455, 27.54545455, 27.54545455], 3603 [27.54545455, 27.54545455, 27.54545455], 3604 [30. , 30. , 30. ]]) 3605 Coordinates: 3606 * x (x) float64 0.0 0.1 1.1 1.2 3607 Dimensions without coordinates: y 3608 """ 3609 ds = self._to_temp_dataset().differentiate(coord, edge_order, datetime_unit) 3610 return self._from_temp_dataset(ds) 3611 3612 def integrate( 3613 self, 3614 coord: Union[Hashable, Sequence[Hashable]] = None, 3615 datetime_unit: str = None, 3616 ) -> "DataArray": 3617 """Integrate along the given coordinate using the trapezoidal rule. 3618 3619 .. note:: 3620 This feature is limited to simple cartesian geometry, i.e. coord 3621 must be one dimensional. 3622 3623 Parameters 3624 ---------- 3625 coord : hashable, or sequence of hashable 3626 Coordinate(s) used for the integration. 3627 datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 3628 'ps', 'fs', 'as'}, optional 3629 Specify the unit if a datetime coordinate is used. 3630 3631 Returns 3632 ------- 3633 integrated : DataArray 3634 3635 See also 3636 -------- 3637 Dataset.integrate 3638 numpy.trapz : corresponding numpy function 3639 3640 Examples 3641 -------- 3642 3643 >>> da = xr.DataArray( 3644 ... np.arange(12).reshape(4, 3), 3645 ... dims=["x", "y"], 3646 ... coords={"x": [0, 0.1, 1.1, 1.2]}, 3647 ... ) 3648 >>> da 3649 <xarray.DataArray (x: 4, y: 3)> 3650 array([[ 0, 1, 2], 3651 [ 3, 4, 5], 3652 [ 6, 7, 8], 3653 [ 9, 10, 11]]) 3654 Coordinates: 3655 * x (x) float64 0.0 0.1 1.1 1.2 3656 Dimensions without coordinates: y 3657 >>> 3658 >>> da.integrate("x") 3659 <xarray.DataArray (y: 3)> 3660 array([5.4, 6.6, 7.8]) 3661 Dimensions without coordinates: y 3662 """ 3663 ds = self._to_temp_dataset().integrate(coord, datetime_unit) 3664 return self._from_temp_dataset(ds) 3665 3666 def cumulative_integrate( 3667 self, 3668 coord: Union[Hashable, Sequence[Hashable]] = None, 3669 datetime_unit: str = None, 3670 ) -> "DataArray": 3671 """Integrate cumulatively along the given coordinate using the trapezoidal rule. 3672 3673 .. note:: 3674 This feature is limited to simple cartesian geometry, i.e. coord 3675 must be one dimensional. 3676 3677 The first entry of the cumulative integral is always 0, in order to keep the 3678 length of the dimension unchanged between input and output. 3679 3680 Parameters 3681 ---------- 3682 coord : hashable, or sequence of hashable 3683 Coordinate(s) used for the integration. 3684 datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 3685 'ps', 'fs', 'as'}, optional 3686 Specify the unit if a datetime coordinate is used. 3687 3688 Returns 3689 ------- 3690 integrated : DataArray 3691 3692 See also 3693 -------- 3694 Dataset.cumulative_integrate 3695 scipy.integrate.cumulative_trapezoid : corresponding scipy function 3696 3697 Examples 3698 -------- 3699 3700 >>> da = xr.DataArray( 3701 ... np.arange(12).reshape(4, 3), 3702 ... dims=["x", "y"], 3703 ... coords={"x": [0, 0.1, 1.1, 1.2]}, 3704 ... ) 3705 >>> da 3706 <xarray.DataArray (x: 4, y: 3)> 3707 array([[ 0, 1, 2], 3708 [ 3, 4, 5], 3709 [ 6, 7, 8], 3710 [ 9, 10, 11]]) 3711 Coordinates: 3712 * x (x) float64 0.0 0.1 1.1 1.2 3713 Dimensions without coordinates: y 3714 >>> 3715 >>> da.cumulative_integrate("x") 3716 <xarray.DataArray (x: 4, y: 3)> 3717 array([[0. , 0. , 0. ], 3718 [0.15, 0.25, 0.35], 3719 [4.65, 5.75, 6.85], 3720 [5.4 , 6.6 , 7.8 ]]) 3721 Coordinates: 3722 * x (x) float64 0.0 0.1 1.1 1.2 3723 Dimensions without coordinates: y 3724 """ 3725 ds = self._to_temp_dataset().cumulative_integrate(coord, datetime_unit) 3726 return self._from_temp_dataset(ds) 3727 3728 def unify_chunks(self) -> "DataArray": 3729 """Unify chunk size along all chunked dimensions of this DataArray. 3730 3731 Returns 3732 ------- 3733 DataArray with consistent chunk sizes for all dask-array variables 3734 3735 See Also 3736 -------- 3737 dask.array.core.unify_chunks 3738 """ 3739 3740 return unify_chunks(self)[0] 3741 3742 def map_blocks( 3743 self, 3744 func: Callable[..., T_Xarray], 3745 args: Sequence[Any] = (), 3746 kwargs: Mapping[str, Any] = None, 3747 template: Union["DataArray", "Dataset"] = None, 3748 ) -> T_Xarray: 3749 """ 3750 Apply a function to each block of this DataArray. 3751 3752 .. warning:: 3753 This method is experimental and its signature may change. 3754 3755 Parameters 3756 ---------- 3757 func : callable 3758 User-provided function that accepts a DataArray as its first 3759 parameter. The function will receive a subset or 'block' of this DataArray (see below), 3760 corresponding to one chunk along each chunked dimension. ``func`` will be 3761 executed as ``func(subset_dataarray, *subset_args, **kwargs)``. 3762 3763 This function must return either a single DataArray or a single Dataset. 3764 3765 This function cannot add a new chunked dimension. 3766 args : sequence 3767 Passed to func after unpacking and subsetting any xarray objects by blocks. 3768 xarray objects in args must be aligned with this object, otherwise an error is raised. 3769 kwargs : mapping 3770 Passed verbatim to func after unpacking. xarray objects, if any, will not be 3771 subset to blocks. Passing dask collections in kwargs is not allowed. 3772 template : DataArray or Dataset, optional 3773 xarray object representing the final result after compute is called. If not provided, 3774 the function will be first run on mocked-up data, that looks like this object but 3775 has sizes 0, to determine properties of the returned object such as dtype, 3776 variable names, attributes, new dimensions and new indexes (if any). 3777 ``template`` must be provided if the function changes the size of existing dimensions. 3778 When provided, ``attrs`` on variables in `template` are copied over to the result. Any 3779 ``attrs`` set by ``func`` will be ignored. 3780 3781 Returns 3782 ------- 3783 A single DataArray or Dataset with dask backend, reassembled from the outputs of the 3784 function. 3785 3786 Notes 3787 ----- 3788 This function is designed for when ``func`` needs to manipulate a whole xarray object 3789 subset to each block. Each block is loaded into memory. In the more common case where 3790 ``func`` can work on numpy arrays, it is recommended to use ``apply_ufunc``. 3791 3792 If none of the variables in this object is backed by dask arrays, calling this function is 3793 equivalent to calling ``func(obj, *args, **kwargs)``. 3794 3795 See Also 3796 -------- 3797 dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks 3798 xarray.DataArray.map_blocks 3799 3800 Examples 3801 -------- 3802 Calculate an anomaly from climatology using ``.groupby()``. Using 3803 ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, 3804 its indices, and its methods like ``.groupby()``. 3805 3806 >>> def calculate_anomaly(da, groupby_type="time.month"): 3807 ... gb = da.groupby(groupby_type) 3808 ... clim = gb.mean(dim="time") 3809 ... return gb - clim 3810 ... 3811 >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") 3812 >>> month = xr.DataArray(time.month, coords={"time": time}, dims=["time"]) 3813 >>> np.random.seed(123) 3814 >>> array = xr.DataArray( 3815 ... np.random.rand(len(time)), 3816 ... dims=["time"], 3817 ... coords={"time": time, "month": month}, 3818 ... ).chunk() 3819 >>> array.map_blocks(calculate_anomaly, template=array).compute() 3820 <xarray.DataArray (time: 24)> 3821 array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, 3822 0.12382735, 0.22460641, 0.07650108, -0.07673453, -0.22865714, 3823 -0.19063865, 0.0590131 , -0.12894847, -0.11323072, 0.0855964 , 3824 0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108, 3825 0.07673453, 0.22865714, 0.19063865, -0.0590131 ]) 3826 Coordinates: 3827 * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 3828 month (time) int64 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 3829 3830 Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments 3831 to the function being applied in ``xr.map_blocks()``: 3832 3833 >>> array.map_blocks( 3834 ... calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=array 3835 ... ) # doctest: +ELLIPSIS 3836 <xarray.DataArray (time: 24)> 3837 dask.array<<this-array>-calculate_anomaly, shape=(24,), dtype=float64, chunksize=(24,), chunktype=numpy.ndarray> 3838 Coordinates: 3839 * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 3840 month (time) int64 dask.array<chunksize=(24,), meta=np.ndarray> 3841 """ 3842 from .parallel import map_blocks 3843 3844 return map_blocks(func, self, args, kwargs, template) 3845 3846 def polyfit( 3847 self, 3848 dim: Hashable, 3849 deg: int, 3850 skipna: bool = None, 3851 rcond: float = None, 3852 w: Union[Hashable, Any] = None, 3853 full: bool = False, 3854 cov: bool = False, 3855 ): 3856 """ 3857 Least squares polynomial fit. 3858 3859 This replicates the behaviour of `numpy.polyfit` but differs by skipping 3860 invalid values when `skipna = True`. 3861 3862 Parameters 3863 ---------- 3864 dim : hashable 3865 Coordinate along which to fit the polynomials. 3866 deg : int 3867 Degree of the fitting polynomial. 3868 skipna : bool, optional 3869 If True, removes all invalid values before fitting each 1D slices of the array. 3870 Default is True if data is stored in a dask.array or if there is any 3871 invalid values, False otherwise. 3872 rcond : float, optional 3873 Relative condition number to the fit. 3874 w : hashable or array-like, optional 3875 Weights to apply to the y-coordinate of the sample points. 3876 Can be an array-like object or the name of a coordinate in the dataset. 3877 full : bool, optional 3878 Whether to return the residuals, matrix rank and singular values in addition 3879 to the coefficients. 3880 cov : bool or str, optional 3881 Whether to return to the covariance matrix in addition to the coefficients. 3882 The matrix is not scaled if `cov='unscaled'`. 3883 3884 Returns 3885 ------- 3886 polyfit_results : Dataset 3887 A single dataset which contains: 3888 3889 polyfit_coefficients 3890 The coefficients of the best fit. 3891 polyfit_residuals 3892 The residuals of the least-square computation (only included if `full=True`). 3893 When the matrix rank is deficient, np.nan is returned. 3894 [dim]_matrix_rank 3895 The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`) 3896 [dim]_singular_value 3897 The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`) 3898 polyfit_covariance 3899 The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`) 3900 3901 See Also 3902 -------- 3903 numpy.polyfit 3904 numpy.polyval 3905 xarray.polyval 3906 """ 3907 return self._to_temp_dataset().polyfit( 3908 dim, deg, skipna=skipna, rcond=rcond, w=w, full=full, cov=cov 3909 ) 3910 3911 def pad( 3912 self, 3913 pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None, 3914 mode: str = "constant", 3915 stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None, 3916 constant_values: Union[ 3917 int, Tuple[int, int], Mapping[Any, Tuple[int, int]] 3918 ] = None, 3919 end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None, 3920 reflect_type: str = None, 3921 **pad_width_kwargs: Any, 3922 ) -> "DataArray": 3923 """Pad this array along one or more dimensions. 3924 3925 .. warning:: 3926 This function is experimental and its behaviour is likely to change 3927 especially regarding padding of dimension coordinates (or IndexVariables). 3928 3929 When using one of the modes ("edge", "reflect", "symmetric", "wrap"), 3930 coordinates will be padded with the same mode, otherwise coordinates 3931 are padded using the "constant" mode with fill_value dtypes.NA. 3932 3933 Parameters 3934 ---------- 3935 pad_width : mapping of hashable to tuple of int 3936 Mapping with the form of {dim: (pad_before, pad_after)} 3937 describing the number of values padded along each dimension. 3938 {dim: pad} is a shortcut for pad_before = pad_after = pad 3939 mode : str, default: "constant" 3940 One of the following string values (taken from numpy docs) 3941 3942 'constant' (default) 3943 Pads with a constant value. 3944 'edge' 3945 Pads with the edge values of array. 3946 'linear_ramp' 3947 Pads with the linear ramp between end_value and the 3948 array edge value. 3949 'maximum' 3950 Pads with the maximum value of all or part of the 3951 vector along each axis. 3952 'mean' 3953 Pads with the mean value of all or part of the 3954 vector along each axis. 3955 'median' 3956 Pads with the median value of all or part of the 3957 vector along each axis. 3958 'minimum' 3959 Pads with the minimum value of all or part of the 3960 vector along each axis. 3961 'reflect' 3962 Pads with the reflection of the vector mirrored on 3963 the first and last values of the vector along each 3964 axis. 3965 'symmetric' 3966 Pads with the reflection of the vector mirrored 3967 along the edge of the array. 3968 'wrap' 3969 Pads with the wrap of the vector along the axis. 3970 The first values are used to pad the end and the 3971 end values are used to pad the beginning. 3972 stat_length : int, tuple or mapping of hashable to tuple, default: None 3973 Used in 'maximum', 'mean', 'median', and 'minimum'. Number of 3974 values at edge of each axis used to calculate the statistic value. 3975 {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)} unique 3976 statistic lengths along each dimension. 3977 ((before, after),) yields same before and after statistic lengths 3978 for each dimension. 3979 (stat_length,) or int is a shortcut for before = after = statistic 3980 length for all axes. 3981 Default is ``None``, to use the entire axis. 3982 constant_values : scalar, tuple or mapping of hashable to tuple, default: 0 3983 Used in 'constant'. The values to set the padded values for each 3984 axis. 3985 ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique 3986 pad constants along each dimension. 3987 ``((before, after),)`` yields same before and after constants for each 3988 dimension. 3989 ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for 3990 all dimensions. 3991 Default is 0. 3992 end_values : scalar, tuple or mapping of hashable to tuple, default: 0 3993 Used in 'linear_ramp'. The values used for the ending value of the 3994 linear_ramp and that will form the edge of the padded array. 3995 ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique 3996 end values along each dimension. 3997 ``((before, after),)`` yields same before and after end values for each 3998 axis. 3999 ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for 4000 all axes. 4001 Default is 0. 4002 reflect_type : {"even", "odd"}, optional 4003 Used in "reflect", and "symmetric". The "even" style is the 4004 default with an unaltered reflection around the edge value. For 4005 the "odd" style, the extended part of the array is created by 4006 subtracting the reflected values from two times the edge value. 4007 **pad_width_kwargs 4008 The keyword arguments form of ``pad_width``. 4009 One of ``pad_width`` or ``pad_width_kwargs`` must be provided. 4010 4011 Returns 4012 ------- 4013 padded : DataArray 4014 DataArray with the padded coordinates and data. 4015 4016 See Also 4017 -------- 4018 DataArray.shift, DataArray.roll, DataArray.bfill, DataArray.ffill, numpy.pad, dask.array.pad 4019 4020 Notes 4021 ----- 4022 For ``mode="constant"`` and ``constant_values=None``, integer types will be 4023 promoted to ``float`` and padded with ``np.nan``. 4024 4025 Examples 4026 -------- 4027 >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0, 1, 2])]) 4028 >>> arr.pad(x=(1, 2), constant_values=0) 4029 <xarray.DataArray (x: 6)> 4030 array([0, 5, 6, 7, 0, 0]) 4031 Coordinates: 4032 * x (x) float64 nan 0.0 1.0 2.0 nan nan 4033 4034 >>> da = xr.DataArray( 4035 ... [[0, 1, 2, 3], [10, 11, 12, 13]], 4036 ... dims=["x", "y"], 4037 ... coords={"x": [0, 1], "y": [10, 20, 30, 40], "z": ("x", [100, 200])}, 4038 ... ) 4039 >>> da.pad(x=1) 4040 <xarray.DataArray (x: 4, y: 4)> 4041 array([[nan, nan, nan, nan], 4042 [ 0., 1., 2., 3.], 4043 [10., 11., 12., 13.], 4044 [nan, nan, nan, nan]]) 4045 Coordinates: 4046 * x (x) float64 nan 0.0 1.0 nan 4047 * y (y) int64 10 20 30 40 4048 z (x) float64 nan 100.0 200.0 nan 4049 4050 Careful, ``constant_values`` are coerced to the data type of the array which may 4051 lead to a loss of precision: 4052 4053 >>> da.pad(x=1, constant_values=1.23456789) 4054 <xarray.DataArray (x: 4, y: 4)> 4055 array([[ 1, 1, 1, 1], 4056 [ 0, 1, 2, 3], 4057 [10, 11, 12, 13], 4058 [ 1, 1, 1, 1]]) 4059 Coordinates: 4060 * x (x) float64 nan 0.0 1.0 nan 4061 * y (y) int64 10 20 30 40 4062 z (x) float64 nan 100.0 200.0 nan 4063 """ 4064 ds = self._to_temp_dataset().pad( 4065 pad_width=pad_width, 4066 mode=mode, 4067 stat_length=stat_length, 4068 constant_values=constant_values, 4069 end_values=end_values, 4070 reflect_type=reflect_type, 4071 **pad_width_kwargs, 4072 ) 4073 return self._from_temp_dataset(ds) 4074 4075 def idxmin( 4076 self, 4077 dim: Hashable = None, 4078 skipna: bool = None, 4079 fill_value: Any = dtypes.NA, 4080 keep_attrs: bool = None, 4081 ) -> "DataArray": 4082 """Return the coordinate label of the minimum value along a dimension. 4083 4084 Returns a new `DataArray` named after the dimension with the values of 4085 the coordinate labels along that dimension corresponding to minimum 4086 values along that dimension. 4087 4088 In comparison to :py:meth:`~DataArray.argmin`, this returns the 4089 coordinate label while :py:meth:`~DataArray.argmin` returns the index. 4090 4091 Parameters 4092 ---------- 4093 dim : str, optional 4094 Dimension over which to apply `idxmin`. This is optional for 1D 4095 arrays, but required for arrays with 2 or more dimensions. 4096 skipna : bool or None, default: None 4097 If True, skip missing values (as marked by NaN). By default, only 4098 skips missing values for ``float``, ``complex``, and ``object`` 4099 dtypes; other dtypes either do not have a sentinel missing value 4100 (``int``) or ``skipna=True`` has not been implemented 4101 (``datetime64`` or ``timedelta64``). 4102 fill_value : Any, default: NaN 4103 Value to be filled in case all of the values along a dimension are 4104 null. By default this is NaN. The fill value and result are 4105 automatically converted to a compatible dtype if possible. 4106 Ignored if ``skipna`` is False. 4107 keep_attrs : bool, default: False 4108 If True, the attributes (``attrs``) will be copied from the 4109 original object to the new one. If False (default), the new object 4110 will be returned without attributes. 4111 4112 Returns 4113 ------- 4114 reduced : DataArray 4115 New `DataArray` object with `idxmin` applied to its data and the 4116 indicated dimension removed. 4117 4118 See Also 4119 -------- 4120 Dataset.idxmin, DataArray.idxmax, DataArray.min, DataArray.argmin 4121 4122 Examples 4123 -------- 4124 >>> array = xr.DataArray( 4125 ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} 4126 ... ) 4127 >>> array.min() 4128 <xarray.DataArray ()> 4129 array(-2) 4130 >>> array.argmin() 4131 <xarray.DataArray ()> 4132 array(4) 4133 >>> array.idxmin() 4134 <xarray.DataArray 'x' ()> 4135 array('e', dtype='<U1') 4136 4137 >>> array = xr.DataArray( 4138 ... [ 4139 ... [2.0, 1.0, 2.0, 0.0, -2.0], 4140 ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], 4141 ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], 4142 ... ], 4143 ... dims=["y", "x"], 4144 ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, 4145 ... ) 4146 >>> array.min(dim="x") 4147 <xarray.DataArray (y: 3)> 4148 array([-2., -4., 1.]) 4149 Coordinates: 4150 * y (y) int64 -1 0 1 4151 >>> array.argmin(dim="x") 4152 <xarray.DataArray (y: 3)> 4153 array([4, 0, 2]) 4154 Coordinates: 4155 * y (y) int64 -1 0 1 4156 >>> array.idxmin(dim="x") 4157 <xarray.DataArray 'x' (y: 3)> 4158 array([16., 0., 4.]) 4159 Coordinates: 4160 * y (y) int64 -1 0 1 4161 """ 4162 return computation._calc_idxminmax( 4163 array=self, 4164 func=lambda x, *args, **kwargs: x.argmin(*args, **kwargs), 4165 dim=dim, 4166 skipna=skipna, 4167 fill_value=fill_value, 4168 keep_attrs=keep_attrs, 4169 ) 4170 4171 def idxmax( 4172 self, 4173 dim: Hashable = None, 4174 skipna: bool = None, 4175 fill_value: Any = dtypes.NA, 4176 keep_attrs: bool = None, 4177 ) -> "DataArray": 4178 """Return the coordinate label of the maximum value along a dimension. 4179 4180 Returns a new `DataArray` named after the dimension with the values of 4181 the coordinate labels along that dimension corresponding to maximum 4182 values along that dimension. 4183 4184 In comparison to :py:meth:`~DataArray.argmax`, this returns the 4185 coordinate label while :py:meth:`~DataArray.argmax` returns the index. 4186 4187 Parameters 4188 ---------- 4189 dim : hashable, optional 4190 Dimension over which to apply `idxmax`. This is optional for 1D 4191 arrays, but required for arrays with 2 or more dimensions. 4192 skipna : bool or None, default: None 4193 If True, skip missing values (as marked by NaN). By default, only 4194 skips missing values for ``float``, ``complex``, and ``object`` 4195 dtypes; other dtypes either do not have a sentinel missing value 4196 (``int``) or ``skipna=True`` has not been implemented 4197 (``datetime64`` or ``timedelta64``). 4198 fill_value : Any, default: NaN 4199 Value to be filled in case all of the values along a dimension are 4200 null. By default this is NaN. The fill value and result are 4201 automatically converted to a compatible dtype if possible. 4202 Ignored if ``skipna`` is False. 4203 keep_attrs : bool, default: False 4204 If True, the attributes (``attrs``) will be copied from the 4205 original object to the new one. If False (default), the new object 4206 will be returned without attributes. 4207 4208 Returns 4209 ------- 4210 reduced : DataArray 4211 New `DataArray` object with `idxmax` applied to its data and the 4212 indicated dimension removed. 4213 4214 See Also 4215 -------- 4216 Dataset.idxmax, DataArray.idxmin, DataArray.max, DataArray.argmax 4217 4218 Examples 4219 -------- 4220 >>> array = xr.DataArray( 4221 ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} 4222 ... ) 4223 >>> array.max() 4224 <xarray.DataArray ()> 4225 array(2) 4226 >>> array.argmax() 4227 <xarray.DataArray ()> 4228 array(1) 4229 >>> array.idxmax() 4230 <xarray.DataArray 'x' ()> 4231 array('b', dtype='<U1') 4232 4233 >>> array = xr.DataArray( 4234 ... [ 4235 ... [2.0, 1.0, 2.0, 0.0, -2.0], 4236 ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], 4237 ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], 4238 ... ], 4239 ... dims=["y", "x"], 4240 ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, 4241 ... ) 4242 >>> array.max(dim="x") 4243 <xarray.DataArray (y: 3)> 4244 array([2., 2., 1.]) 4245 Coordinates: 4246 * y (y) int64 -1 0 1 4247 >>> array.argmax(dim="x") 4248 <xarray.DataArray (y: 3)> 4249 array([0, 2, 2]) 4250 Coordinates: 4251 * y (y) int64 -1 0 1 4252 >>> array.idxmax(dim="x") 4253 <xarray.DataArray 'x' (y: 3)> 4254 array([0., 4., 4.]) 4255 Coordinates: 4256 * y (y) int64 -1 0 1 4257 """ 4258 return computation._calc_idxminmax( 4259 array=self, 4260 func=lambda x, *args, **kwargs: x.argmax(*args, **kwargs), 4261 dim=dim, 4262 skipna=skipna, 4263 fill_value=fill_value, 4264 keep_attrs=keep_attrs, 4265 ) 4266 4267 def argmin( 4268 self, 4269 dim: Union[Hashable, Sequence[Hashable]] = None, 4270 axis: int = None, 4271 keep_attrs: bool = None, 4272 skipna: bool = None, 4273 ) -> Union["DataArray", Dict[Hashable, "DataArray"]]: 4274 """Index or indices of the minimum of the DataArray over one or more dimensions. 4275 4276 If a sequence is passed to 'dim', then result returned as dict of DataArrays, 4277 which can be passed directly to isel(). If a single str is passed to 'dim' then 4278 returns a DataArray with dtype int. 4279 4280 If there are multiple minima, the indices of the first one found will be 4281 returned. 4282 4283 Parameters 4284 ---------- 4285 dim : hashable, sequence of hashable or ..., optional 4286 The dimensions over which to find the minimum. By default, finds minimum over 4287 all dimensions - for now returning an int for backward compatibility, but 4288 this is deprecated, in future will return a dict with indices for all 4289 dimensions; to return a dict with all dimensions now, pass '...'. 4290 axis : int, optional 4291 Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments 4292 can be supplied. 4293 keep_attrs : bool, optional 4294 If True, the attributes (`attrs`) will be copied from the original 4295 object to the new one. If False (default), the new object will be 4296 returned without attributes. 4297 skipna : bool, optional 4298 If True, skip missing values (as marked by NaN). By default, only 4299 skips missing values for float dtypes; other dtypes either do not 4300 have a sentinel missing value (int) or skipna=True has not been 4301 implemented (object, datetime64 or timedelta64). 4302 4303 Returns 4304 ------- 4305 result : DataArray or dict of DataArray 4306 4307 See Also 4308 -------- 4309 Variable.argmin, DataArray.idxmin 4310 4311 Examples 4312 -------- 4313 >>> array = xr.DataArray([0, 2, -1, 3], dims="x") 4314 >>> array.min() 4315 <xarray.DataArray ()> 4316 array(-1) 4317 >>> array.argmin() 4318 <xarray.DataArray ()> 4319 array(2) 4320 >>> array.argmin(...) 4321 {'x': <xarray.DataArray ()> 4322 array(2)} 4323 >>> array.isel(array.argmin(...)) 4324 <xarray.DataArray ()> 4325 array(-1) 4326 4327 >>> array = xr.DataArray( 4328 ... [[[3, 2, 1], [3, 1, 2], [2, 1, 3]], [[1, 3, 2], [2, -5, 1], [2, 3, 1]]], 4329 ... dims=("x", "y", "z"), 4330 ... ) 4331 >>> array.min(dim="x") 4332 <xarray.DataArray (y: 3, z: 3)> 4333 array([[ 1, 2, 1], 4334 [ 2, -5, 1], 4335 [ 2, 1, 1]]) 4336 Dimensions without coordinates: y, z 4337 >>> array.argmin(dim="x") 4338 <xarray.DataArray (y: 3, z: 3)> 4339 array([[1, 0, 0], 4340 [1, 1, 1], 4341 [0, 0, 1]]) 4342 Dimensions without coordinates: y, z 4343 >>> array.argmin(dim=["x"]) 4344 {'x': <xarray.DataArray (y: 3, z: 3)> 4345 array([[1, 0, 0], 4346 [1, 1, 1], 4347 [0, 0, 1]]) 4348 Dimensions without coordinates: y, z} 4349 >>> array.min(dim=("x", "z")) 4350 <xarray.DataArray (y: 3)> 4351 array([ 1, -5, 1]) 4352 Dimensions without coordinates: y 4353 >>> array.argmin(dim=["x", "z"]) 4354 {'x': <xarray.DataArray (y: 3)> 4355 array([0, 1, 0]) 4356 Dimensions without coordinates: y, 'z': <xarray.DataArray (y: 3)> 4357 array([2, 1, 1]) 4358 Dimensions without coordinates: y} 4359 >>> array.isel(array.argmin(dim=["x", "z"])) 4360 <xarray.DataArray (y: 3)> 4361 array([ 1, -5, 1]) 4362 Dimensions without coordinates: y 4363 """ 4364 result = self.variable.argmin(dim, axis, keep_attrs, skipna) 4365 if isinstance(result, dict): 4366 return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()} 4367 else: 4368 return self._replace_maybe_drop_dims(result) 4369 4370 def argmax( 4371 self, 4372 dim: Union[Hashable, Sequence[Hashable]] = None, 4373 axis: int = None, 4374 keep_attrs: bool = None, 4375 skipna: bool = None, 4376 ) -> Union["DataArray", Dict[Hashable, "DataArray"]]: 4377 """Index or indices of the maximum of the DataArray over one or more dimensions. 4378 4379 If a sequence is passed to 'dim', then result returned as dict of DataArrays, 4380 which can be passed directly to isel(). If a single str is passed to 'dim' then 4381 returns a DataArray with dtype int. 4382 4383 If there are multiple maxima, the indices of the first one found will be 4384 returned. 4385 4386 Parameters 4387 ---------- 4388 dim : hashable, sequence of hashable or ..., optional 4389 The dimensions over which to find the maximum. By default, finds maximum over 4390 all dimensions - for now returning an int for backward compatibility, but 4391 this is deprecated, in future will return a dict with indices for all 4392 dimensions; to return a dict with all dimensions now, pass '...'. 4393 axis : int, optional 4394 Axis over which to apply `argmax`. Only one of the 'dim' and 'axis' arguments 4395 can be supplied. 4396 keep_attrs : bool, optional 4397 If True, the attributes (`attrs`) will be copied from the original 4398 object to the new one. If False (default), the new object will be 4399 returned without attributes. 4400 skipna : bool, optional 4401 If True, skip missing values (as marked by NaN). By default, only 4402 skips missing values for float dtypes; other dtypes either do not 4403 have a sentinel missing value (int) or skipna=True has not been 4404 implemented (object, datetime64 or timedelta64). 4405 4406 Returns 4407 ------- 4408 result : DataArray or dict of DataArray 4409 4410 See Also 4411 -------- 4412 Variable.argmax, DataArray.idxmax 4413 4414 Examples 4415 -------- 4416 >>> array = xr.DataArray([0, 2, -1, 3], dims="x") 4417 >>> array.max() 4418 <xarray.DataArray ()> 4419 array(3) 4420 >>> array.argmax() 4421 <xarray.DataArray ()> 4422 array(3) 4423 >>> array.argmax(...) 4424 {'x': <xarray.DataArray ()> 4425 array(3)} 4426 >>> array.isel(array.argmax(...)) 4427 <xarray.DataArray ()> 4428 array(3) 4429 4430 >>> array = xr.DataArray( 4431 ... [[[3, 2, 1], [3, 1, 2], [2, 1, 3]], [[1, 3, 2], [2, 5, 1], [2, 3, 1]]], 4432 ... dims=("x", "y", "z"), 4433 ... ) 4434 >>> array.max(dim="x") 4435 <xarray.DataArray (y: 3, z: 3)> 4436 array([[3, 3, 2], 4437 [3, 5, 2], 4438 [2, 3, 3]]) 4439 Dimensions without coordinates: y, z 4440 >>> array.argmax(dim="x") 4441 <xarray.DataArray (y: 3, z: 3)> 4442 array([[0, 1, 1], 4443 [0, 1, 0], 4444 [0, 1, 0]]) 4445 Dimensions without coordinates: y, z 4446 >>> array.argmax(dim=["x"]) 4447 {'x': <xarray.DataArray (y: 3, z: 3)> 4448 array([[0, 1, 1], 4449 [0, 1, 0], 4450 [0, 1, 0]]) 4451 Dimensions without coordinates: y, z} 4452 >>> array.max(dim=("x", "z")) 4453 <xarray.DataArray (y: 3)> 4454 array([3, 5, 3]) 4455 Dimensions without coordinates: y 4456 >>> array.argmax(dim=["x", "z"]) 4457 {'x': <xarray.DataArray (y: 3)> 4458 array([0, 1, 0]) 4459 Dimensions without coordinates: y, 'z': <xarray.DataArray (y: 3)> 4460 array([0, 1, 2]) 4461 Dimensions without coordinates: y} 4462 >>> array.isel(array.argmax(dim=["x", "z"])) 4463 <xarray.DataArray (y: 3)> 4464 array([3, 5, 3]) 4465 Dimensions without coordinates: y 4466 """ 4467 result = self.variable.argmax(dim, axis, keep_attrs, skipna) 4468 if isinstance(result, dict): 4469 return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()} 4470 else: 4471 return self._replace_maybe_drop_dims(result) 4472 4473 def query( 4474 self, 4475 queries: Mapping[Any, Any] = None, 4476 parser: str = "pandas", 4477 engine: str = None, 4478 missing_dims: str = "raise", 4479 **queries_kwargs: Any, 4480 ) -> "DataArray": 4481 """Return a new data array indexed along the specified 4482 dimension(s), where the indexers are given as strings containing 4483 Python expressions to be evaluated against the values in the array. 4484 4485 Parameters 4486 ---------- 4487 queries : dict, optional 4488 A dict with keys matching dimensions and values given by strings 4489 containing Python expressions to be evaluated against the data variables 4490 in the dataset. The expressions will be evaluated using the pandas 4491 eval() function, and can contain any valid Python expressions but cannot 4492 contain any Python statements. 4493 parser : {"pandas", "python"}, default: "pandas" 4494 The parser to use to construct the syntax tree from the expression. 4495 The default of 'pandas' parses code slightly different than standard 4496 Python. Alternatively, you can parse an expression using the 'python' 4497 parser to retain strict Python semantics. 4498 engine : {"python", "numexpr", None}, default: None 4499 The engine used to evaluate the expression. Supported engines are: 4500 - None: tries to use numexpr, falls back to python 4501 - "numexpr": evaluates expressions using numexpr 4502 - "python": performs operations as if you had eval’d in top level python 4503 missing_dims : {"raise", "warn", "ignore"}, default: "raise" 4504 What to do if dimensions that should be selected from are not present in the 4505 Dataset: 4506 - "raise": raise an exception 4507 - "warn": raise a warning, and ignore the missing dimensions 4508 - "ignore": ignore the missing dimensions 4509 **queries_kwargs : {dim: query, ...}, optional 4510 The keyword arguments form of ``queries``. 4511 One of queries or queries_kwargs must be provided. 4512 4513 Returns 4514 ------- 4515 obj : DataArray 4516 A new DataArray with the same contents as this dataset, indexed by 4517 the results of the appropriate queries. 4518 4519 See Also 4520 -------- 4521 DataArray.isel 4522 Dataset.query 4523 pandas.eval 4524 4525 Examples 4526 -------- 4527 >>> da = xr.DataArray(np.arange(0, 5, 1), dims="x", name="a") 4528 >>> da 4529 <xarray.DataArray 'a' (x: 5)> 4530 array([0, 1, 2, 3, 4]) 4531 Dimensions without coordinates: x 4532 >>> da.query(x="a > 2") 4533 <xarray.DataArray 'a' (x: 2)> 4534 array([3, 4]) 4535 Dimensions without coordinates: x 4536 """ 4537 4538 ds = self._to_dataset_whole(shallow_copy=True) 4539 ds = ds.query( 4540 queries=queries, 4541 parser=parser, 4542 engine=engine, 4543 missing_dims=missing_dims, 4544 **queries_kwargs, 4545 ) 4546 return ds[self.name] 4547 4548 def curvefit( 4549 self, 4550 coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]], 4551 func: Callable[..., Any], 4552 reduce_dims: Union[Hashable, Iterable[Hashable]] = None, 4553 skipna: bool = True, 4554 p0: Dict[str, Any] = None, 4555 bounds: Dict[str, Any] = None, 4556 param_names: Sequence[str] = None, 4557 kwargs: Dict[str, Any] = None, 4558 ): 4559 """ 4560 Curve fitting optimization for arbitrary functions. 4561 4562 Wraps `scipy.optimize.curve_fit` with `apply_ufunc`. 4563 4564 Parameters 4565 ---------- 4566 coords : hashable, DataArray, or sequence of DataArray or hashable 4567 Independent coordinate(s) over which to perform the curve fitting. Must share 4568 at least one dimension with the calling object. When fitting multi-dimensional 4569 functions, supply `coords` as a sequence in the same order as arguments in 4570 `func`. To fit along existing dimensions of the calling object, `coords` can 4571 also be specified as a str or sequence of strs. 4572 func : callable 4573 User specified function in the form `f(x, *params)` which returns a numpy 4574 array of length `len(x)`. `params` are the fittable parameters which are optimized 4575 by scipy curve_fit. `x` can also be specified as a sequence containing multiple 4576 coordinates, e.g. `f((x0, x1), *params)`. 4577 reduce_dims : hashable or sequence of hashable 4578 Additional dimension(s) over which to aggregate while fitting. For example, 4579 calling `ds.curvefit(coords='time', reduce_dims=['lat', 'lon'], ...)` will 4580 aggregate all lat and lon points and fit the specified function along the 4581 time dimension. 4582 skipna : bool, optional 4583 Whether to skip missing values when fitting. Default is True. 4584 p0 : dict-like, optional 4585 Optional dictionary of parameter names to initial guesses passed to the 4586 `curve_fit` `p0` arg. If none or only some parameters are passed, the rest will 4587 be assigned initial values following the default scipy behavior. 4588 bounds : dict-like, optional 4589 Optional dictionary of parameter names to bounding values passed to the 4590 `curve_fit` `bounds` arg. If none or only some parameters are passed, the rest 4591 will be unbounded following the default scipy behavior. 4592 param_names : sequence of hashable, optional 4593 Sequence of names for the fittable parameters of `func`. If not supplied, 4594 this will be automatically determined by arguments of `func`. `param_names` 4595 should be manually supplied when fitting a function that takes a variable 4596 number of parameters. 4597 **kwargs : optional 4598 Additional keyword arguments to passed to scipy curve_fit. 4599 4600 Returns 4601 ------- 4602 curvefit_results : Dataset 4603 A single dataset which contains: 4604 4605 [var]_curvefit_coefficients 4606 The coefficients of the best fit. 4607 [var]_curvefit_covariance 4608 The covariance matrix of the coefficient estimates. 4609 4610 See Also 4611 -------- 4612 DataArray.polyfit 4613 scipy.optimize.curve_fit 4614 """ 4615 return self._to_temp_dataset().curvefit( 4616 coords, 4617 func, 4618 reduce_dims=reduce_dims, 4619 skipna=skipna, 4620 p0=p0, 4621 bounds=bounds, 4622 param_names=param_names, 4623 kwargs=kwargs, 4624 ) 4625 4626 def drop_duplicates( 4627 self, 4628 dim: Hashable, 4629 keep: Union[ 4630 str, 4631 bool, 4632 ] = "first", 4633 ): 4634 """Returns a new DataArray with duplicate dimension values removed. 4635 4636 Parameters 4637 ---------- 4638 dim : dimension label, optional 4639 keep : {"first", "last", False}, default: "first" 4640 Determines which duplicates (if any) to keep. 4641 - ``"first"`` : Drop duplicates except for the first occurrence. 4642 - ``"last"`` : Drop duplicates except for the last occurrence. 4643 - False : Drop all duplicates. 4644 4645 Returns 4646 ------- 4647 DataArray 4648 """ 4649 if dim not in self.dims: 4650 raise ValueError(f"'{dim}' not found in dimensions") 4651 indexes = {dim: ~self.get_index(dim).duplicated(keep=keep)} 4652 return self.isel(indexes) 4653 4654 # this needs to be at the end, or mypy will confuse with `str` 4655 # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names 4656 str = utils.UncachedAccessor(StringAccessor) 4657