1import warnings 2from copy import copy, deepcopy 3from datetime import datetime, timedelta 4from textwrap import dedent 5 6import numpy as np 7import pandas as pd 8import pytest 9import pytz 10 11from xarray import Coordinate, DataArray, Dataset, IndexVariable, Variable, set_options 12from xarray.core import dtypes, duck_array_ops, indexing 13from xarray.core.common import full_like, ones_like, zeros_like 14from xarray.core.indexing import ( 15 BasicIndexer, 16 CopyOnWriteArray, 17 DaskIndexingAdapter, 18 LazilyIndexedArray, 19 MemoryCachedArray, 20 NumpyIndexingAdapter, 21 OuterIndexer, 22 PandasIndexingAdapter, 23 VectorizedIndexer, 24) 25from xarray.core.pycompat import dask_array_type 26from xarray.core.utils import NDArrayMixin 27from xarray.core.variable import as_compatible_data, as_variable 28from xarray.tests import requires_bottleneck 29 30from . import ( 31 assert_allclose, 32 assert_array_equal, 33 assert_equal, 34 assert_identical, 35 raise_if_dask_computes, 36 requires_cupy, 37 requires_dask, 38 requires_pint, 39 requires_sparse, 40 source_ndarray, 41) 42 43_PAD_XR_NP_ARGS = [ 44 [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], 45 [{"x": 1}, ((1, 1), (0, 0), (0, 0))], 46 [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], 47 [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], 48 [{"x": (3, 1), "z": 2}, ((3, 1), (0, 0), (2, 2))], 49] 50 51 52@pytest.fixture 53def var(): 54 return Variable(dims=list("xyz"), data=np.random.rand(3, 4, 5)) 55 56 57class VariableSubclassobjects: 58 def test_properties(self): 59 data = 0.5 * np.arange(10) 60 v = self.cls(["time"], data, {"foo": "bar"}) 61 assert v.dims == ("time",) 62 assert_array_equal(v.values, data) 63 assert v.dtype == float 64 assert v.shape == (10,) 65 assert v.size == 10 66 assert v.sizes == {"time": 10} 67 assert v.nbytes == 80 68 assert v.ndim == 1 69 assert len(v) == 10 70 assert v.attrs == {"foo": "bar"} 71 72 def test_attrs(self): 73 v = self.cls(["time"], 0.5 * np.arange(10)) 74 assert v.attrs == {} 75 attrs = {"foo": "bar"} 76 v.attrs = attrs 77 assert v.attrs == attrs 78 assert isinstance(v.attrs, dict) 79 v.attrs["foo"] = "baz" 80 assert v.attrs["foo"] == "baz" 81 82 def test_getitem_dict(self): 83 v = self.cls(["x"], np.random.randn(5)) 84 actual = v[{"x": 0}] 85 expected = v[0] 86 assert_identical(expected, actual) 87 88 def test_getitem_1d(self): 89 data = np.array([0, 1, 2]) 90 v = self.cls(["x"], data) 91 92 v_new = v[dict(x=[0, 1])] 93 assert v_new.dims == ("x",) 94 assert_array_equal(v_new, data[[0, 1]]) 95 96 v_new = v[dict(x=slice(None))] 97 assert v_new.dims == ("x",) 98 assert_array_equal(v_new, data) 99 100 v_new = v[dict(x=Variable("a", [0, 1]))] 101 assert v_new.dims == ("a",) 102 assert_array_equal(v_new, data[[0, 1]]) 103 104 v_new = v[dict(x=1)] 105 assert v_new.dims == () 106 assert_array_equal(v_new, data[1]) 107 108 # tuple argument 109 v_new = v[slice(None)] 110 assert v_new.dims == ("x",) 111 assert_array_equal(v_new, data) 112 113 def test_getitem_1d_fancy(self): 114 v = self.cls(["x"], [0, 1, 2]) 115 # 1d-variable should be indexable by multi-dimensional Variable 116 ind = Variable(("a", "b"), [[0, 1], [0, 1]]) 117 v_new = v[ind] 118 assert v_new.dims == ("a", "b") 119 expected = np.array(v._data)[([0, 1], [0, 1]), ...] 120 assert_array_equal(v_new, expected) 121 122 # boolean indexing 123 ind = Variable(("x",), [True, False, True]) 124 v_new = v[ind] 125 assert_identical(v[[0, 2]], v_new) 126 v_new = v[[True, False, True]] 127 assert_identical(v[[0, 2]], v_new) 128 129 with pytest.raises(IndexError, match=r"Boolean indexer should"): 130 ind = Variable(("a",), [True, False, True]) 131 v[ind] 132 133 def test_getitem_with_mask(self): 134 v = self.cls(["x"], [0, 1, 2]) 135 assert_identical(v._getitem_with_mask(-1), Variable((), np.nan)) 136 assert_identical( 137 v._getitem_with_mask([0, -1, 1]), self.cls(["x"], [0, np.nan, 1]) 138 ) 139 assert_identical(v._getitem_with_mask(slice(2)), self.cls(["x"], [0, 1])) 140 assert_identical( 141 v._getitem_with_mask([0, -1, 1], fill_value=-99), 142 self.cls(["x"], [0, -99, 1]), 143 ) 144 145 def test_getitem_with_mask_size_zero(self): 146 v = self.cls(["x"], []) 147 assert_identical(v._getitem_with_mask(-1), Variable((), np.nan)) 148 assert_identical( 149 v._getitem_with_mask([-1, -1, -1]), 150 self.cls(["x"], [np.nan, np.nan, np.nan]), 151 ) 152 153 def test_getitem_with_mask_nd_indexer(self): 154 v = self.cls(["x"], [0, 1, 2]) 155 indexer = Variable(("x", "y"), [[0, -1], [-1, 2]]) 156 assert_identical(v._getitem_with_mask(indexer, fill_value=-1), indexer) 157 158 def _assertIndexedLikeNDArray(self, variable, expected_value0, expected_dtype=None): 159 """Given a 1-dimensional variable, verify that the variable is indexed 160 like a numpy.ndarray. 161 """ 162 assert variable[0].shape == () 163 assert variable[0].ndim == 0 164 assert variable[0].size == 1 165 # test identity 166 assert variable.equals(variable.copy()) 167 assert variable.identical(variable.copy()) 168 # check value is equal for both ndarray and Variable 169 with warnings.catch_warnings(): 170 warnings.filterwarnings("ignore", "In the future, 'NAT == x'") 171 np.testing.assert_equal(variable.values[0], expected_value0) 172 np.testing.assert_equal(variable[0].values, expected_value0) 173 # check type or dtype is consistent for both ndarray and Variable 174 if expected_dtype is None: 175 # check output type instead of array dtype 176 assert type(variable.values[0]) == type(expected_value0) 177 assert type(variable[0].values) == type(expected_value0) 178 elif expected_dtype is not False: 179 assert variable.values[0].dtype == expected_dtype 180 assert variable[0].values.dtype == expected_dtype 181 182 def test_index_0d_int(self): 183 for value, dtype in [(0, np.int_), (np.int32(0), np.int32)]: 184 x = self.cls(["x"], [value]) 185 self._assertIndexedLikeNDArray(x, value, dtype) 186 187 def test_index_0d_float(self): 188 for value, dtype in [(0.5, np.float_), (np.float32(0.5), np.float32)]: 189 x = self.cls(["x"], [value]) 190 self._assertIndexedLikeNDArray(x, value, dtype) 191 192 def test_index_0d_string(self): 193 value = "foo" 194 dtype = np.dtype("U3") 195 x = self.cls(["x"], [value]) 196 self._assertIndexedLikeNDArray(x, value, dtype) 197 198 def test_index_0d_datetime(self): 199 d = datetime(2000, 1, 1) 200 x = self.cls(["x"], [d]) 201 self._assertIndexedLikeNDArray(x, np.datetime64(d)) 202 203 x = self.cls(["x"], [np.datetime64(d)]) 204 self._assertIndexedLikeNDArray(x, np.datetime64(d), "datetime64[ns]") 205 206 x = self.cls(["x"], pd.DatetimeIndex([d])) 207 self._assertIndexedLikeNDArray(x, np.datetime64(d), "datetime64[ns]") 208 209 def test_index_0d_timedelta64(self): 210 td = timedelta(hours=1) 211 212 x = self.cls(["x"], [np.timedelta64(td)]) 213 self._assertIndexedLikeNDArray(x, np.timedelta64(td), "timedelta64[ns]") 214 215 x = self.cls(["x"], pd.to_timedelta([td])) 216 self._assertIndexedLikeNDArray(x, np.timedelta64(td), "timedelta64[ns]") 217 218 def test_index_0d_not_a_time(self): 219 d = np.datetime64("NaT", "ns") 220 x = self.cls(["x"], [d]) 221 self._assertIndexedLikeNDArray(x, d) 222 223 def test_index_0d_object(self): 224 class HashableItemWrapper: 225 def __init__(self, item): 226 self.item = item 227 228 def __eq__(self, other): 229 return self.item == other.item 230 231 def __hash__(self): 232 return hash(self.item) 233 234 def __repr__(self): 235 return "{}(item={!r})".format(type(self).__name__, self.item) 236 237 item = HashableItemWrapper((1, 2, 3)) 238 x = self.cls("x", [item]) 239 self._assertIndexedLikeNDArray(x, item, expected_dtype=False) 240 241 def test_0d_object_array_with_list(self): 242 listarray = np.empty((1,), dtype=object) 243 listarray[0] = [1, 2, 3] 244 x = self.cls("x", listarray) 245 assert_array_equal(x.data, listarray) 246 assert_array_equal(x[0].data, listarray.squeeze()) 247 assert_array_equal(x.squeeze().data, listarray.squeeze()) 248 249 def test_index_and_concat_datetime(self): 250 # regression test for #125 251 date_range = pd.date_range("2011-09-01", periods=10) 252 for dates in [date_range, date_range.values, date_range.to_pydatetime()]: 253 expected = self.cls("t", dates) 254 for times in [ 255 [expected[i] for i in range(10)], 256 [expected[i : (i + 1)] for i in range(10)], 257 [expected[[i]] for i in range(10)], 258 ]: 259 actual = Variable.concat(times, "t") 260 assert expected.dtype == actual.dtype 261 assert_array_equal(expected, actual) 262 263 def test_0d_time_data(self): 264 # regression test for #105 265 x = self.cls("time", pd.date_range("2000-01-01", periods=5)) 266 expected = np.datetime64("2000-01-01", "ns") 267 assert x[0].values == expected 268 269 def test_datetime64_conversion(self): 270 times = pd.date_range("2000-01-01", periods=3) 271 for values, preserve_source in [ 272 (times, True), 273 (times.values, True), 274 (times.values.astype("datetime64[s]"), False), 275 (times.to_pydatetime(), False), 276 ]: 277 v = self.cls(["t"], values) 278 assert v.dtype == np.dtype("datetime64[ns]") 279 assert_array_equal(v.values, times.values) 280 assert v.values.dtype == np.dtype("datetime64[ns]") 281 same_source = source_ndarray(v.values) is source_ndarray(values) 282 assert preserve_source == same_source 283 284 def test_timedelta64_conversion(self): 285 times = pd.timedelta_range(start=0, periods=3) 286 for values, preserve_source in [ 287 (times, True), 288 (times.values, True), 289 (times.values.astype("timedelta64[s]"), False), 290 (times.to_pytimedelta(), False), 291 ]: 292 v = self.cls(["t"], values) 293 assert v.dtype == np.dtype("timedelta64[ns]") 294 assert_array_equal(v.values, times.values) 295 assert v.values.dtype == np.dtype("timedelta64[ns]") 296 same_source = source_ndarray(v.values) is source_ndarray(values) 297 assert preserve_source == same_source 298 299 def test_object_conversion(self): 300 data = np.arange(5).astype(str).astype(object) 301 actual = self.cls("x", data) 302 assert actual.dtype == data.dtype 303 304 def test_datetime64_valid_range(self): 305 data = np.datetime64("1250-01-01", "us") 306 pderror = pd.errors.OutOfBoundsDatetime 307 with pytest.raises(pderror, match=r"Out of bounds nanosecond"): 308 self.cls(["t"], [data]) 309 310 @pytest.mark.xfail(reason="pandas issue 36615") 311 def test_timedelta64_valid_range(self): 312 data = np.timedelta64("200000", "D") 313 pderror = pd.errors.OutOfBoundsTimedelta 314 with pytest.raises(pderror, match=r"Out of bounds nanosecond"): 315 self.cls(["t"], [data]) 316 317 def test_pandas_data(self): 318 v = self.cls(["x"], pd.Series([0, 1, 2], index=[3, 2, 1])) 319 assert_identical(v, v[[0, 1, 2]]) 320 v = self.cls(["x"], pd.Index([0, 1, 2])) 321 assert v[0].values == v.values[0] 322 323 def test_pandas_period_index(self): 324 v = self.cls(["x"], pd.period_range(start="2000", periods=20, freq="B")) 325 v = v.load() # for dask-based Variable 326 assert v[0] == pd.Period("2000", freq="B") 327 assert "Period('2000-01-03', 'B')" in repr(v) 328 329 def test_1d_math(self): 330 x = 1.0 * np.arange(5) 331 y = np.ones(5) 332 333 # should we need `.to_base_variable()`? 334 # probably a break that `+v` changes type? 335 v = self.cls(["x"], x) 336 base_v = v.to_base_variable() 337 # unary ops 338 assert_identical(base_v, +v) 339 assert_identical(base_v, abs(v)) 340 assert_array_equal((-v).values, -x) 341 # binary ops with numbers 342 assert_identical(base_v, v + 0) 343 assert_identical(base_v, 0 + v) 344 assert_identical(base_v, v * 1) 345 # binary ops with numpy arrays 346 assert_array_equal((v * x).values, x ** 2) 347 assert_array_equal((x * v).values, x ** 2) 348 assert_array_equal(v - y, v - 1) 349 assert_array_equal(y - v, 1 - v) 350 # verify attributes are dropped 351 v2 = self.cls(["x"], x, {"units": "meters"}) 352 with set_options(keep_attrs=False): 353 assert_identical(base_v, +v2) 354 # binary ops with all variables 355 assert_array_equal(v + v, 2 * v) 356 w = self.cls(["x"], y, {"foo": "bar"}) 357 assert_identical(v + w, self.cls(["x"], x + y).to_base_variable()) 358 assert_array_equal((v * w).values, x * y) 359 360 # something complicated 361 assert_array_equal((v ** 2 * w - 1 + x).values, x ** 2 * y - 1 + x) 362 # make sure dtype is preserved (for Index objects) 363 assert float == (+v).dtype 364 assert float == (+v).values.dtype 365 assert float == (0 + v).dtype 366 assert float == (0 + v).values.dtype 367 # check types of returned data 368 assert isinstance(+v, Variable) 369 assert not isinstance(+v, IndexVariable) 370 assert isinstance(0 + v, Variable) 371 assert not isinstance(0 + v, IndexVariable) 372 373 def test_1d_reduce(self): 374 x = np.arange(5) 375 v = self.cls(["x"], x) 376 actual = v.sum() 377 expected = Variable((), 10) 378 assert_identical(expected, actual) 379 assert type(actual) is Variable 380 381 def test_array_interface(self): 382 x = np.arange(5) 383 v = self.cls(["x"], x) 384 assert_array_equal(np.asarray(v), x) 385 # test patched in methods 386 assert_array_equal(v.astype(float), x.astype(float)) 387 # think this is a break, that argsort changes the type 388 assert_identical(v.argsort(), v.to_base_variable()) 389 assert_identical(v.clip(2, 3), self.cls("x", x.clip(2, 3)).to_base_variable()) 390 # test ufuncs 391 assert_identical(np.sin(v), self.cls(["x"], np.sin(x)).to_base_variable()) 392 assert isinstance(np.sin(v), Variable) 393 assert not isinstance(np.sin(v), IndexVariable) 394 395 def example_1d_objects(self): 396 for data in [ 397 range(3), 398 0.5 * np.arange(3), 399 0.5 * np.arange(3, dtype=np.float32), 400 pd.date_range("2000-01-01", periods=3), 401 np.array(["a", "b", "c"], dtype=object), 402 ]: 403 yield (self.cls("x", data), data) 404 405 def test___array__(self): 406 for v, data in self.example_1d_objects(): 407 assert_array_equal(v.values, np.asarray(data)) 408 assert_array_equal(np.asarray(v), np.asarray(data)) 409 assert v[0].values == np.asarray(data)[0] 410 assert np.asarray(v[0]) == np.asarray(data)[0] 411 412 def test_equals_all_dtypes(self): 413 for v, _ in self.example_1d_objects(): 414 v2 = v.copy() 415 assert v.equals(v2) 416 assert v.identical(v2) 417 assert v.no_conflicts(v2) 418 assert v[0].equals(v2[0]) 419 assert v[0].identical(v2[0]) 420 assert v[0].no_conflicts(v2[0]) 421 assert v[:2].equals(v2[:2]) 422 assert v[:2].identical(v2[:2]) 423 assert v[:2].no_conflicts(v2[:2]) 424 425 def test_eq_all_dtypes(self): 426 # ensure that we don't choke on comparisons for which numpy returns 427 # scalars 428 expected = Variable("x", 3 * [False]) 429 for v, _ in self.example_1d_objects(): 430 actual = "z" == v 431 assert_identical(expected, actual) 432 actual = ~("z" != v) 433 assert_identical(expected, actual) 434 435 def test_encoding_preserved(self): 436 expected = self.cls("x", range(3), {"foo": 1}, {"bar": 2}) 437 for actual in [ 438 expected.T, 439 expected[...], 440 expected.squeeze(), 441 expected.isel(x=slice(None)), 442 expected.set_dims({"x": 3}), 443 expected.copy(deep=True), 444 expected.copy(deep=False), 445 ]: 446 447 assert_identical(expected.to_base_variable(), actual.to_base_variable()) 448 assert expected.encoding == actual.encoding 449 450 def test_concat(self): 451 x = np.arange(5) 452 y = np.arange(5, 10) 453 v = self.cls(["a"], x) 454 w = self.cls(["a"], y) 455 assert_identical( 456 Variable(["b", "a"], np.array([x, y])), Variable.concat([v, w], "b") 457 ) 458 assert_identical( 459 Variable(["b", "a"], np.array([x, y])), Variable.concat((v, w), "b") 460 ) 461 assert_identical( 462 Variable(["b", "a"], np.array([x, y])), Variable.concat((v, w), "b") 463 ) 464 with pytest.raises(ValueError, match=r"Variable has dimensions"): 465 Variable.concat([v, Variable(["c"], y)], "b") 466 # test indexers 467 actual = Variable.concat( 468 [v, w], positions=[np.arange(0, 10, 2), np.arange(1, 10, 2)], dim="a" 469 ) 470 expected = Variable("a", np.array([x, y]).ravel(order="F")) 471 assert_identical(expected, actual) 472 # test concatenating along a dimension 473 v = Variable(["time", "x"], np.random.random((10, 8))) 474 assert_identical(v, Variable.concat([v[:5], v[5:]], "time")) 475 assert_identical(v, Variable.concat([v[:5], v[5:6], v[6:]], "time")) 476 assert_identical(v, Variable.concat([v[:1], v[1:]], "time")) 477 # test dimension order 478 assert_identical(v, Variable.concat([v[:, :5], v[:, 5:]], "x")) 479 with pytest.raises(ValueError, match=r"all input arrays must have"): 480 Variable.concat([v[:, 0], v[:, 1:]], "x") 481 482 def test_concat_attrs(self): 483 # always keep attrs from first variable 484 v = self.cls("a", np.arange(5), {"foo": "bar"}) 485 w = self.cls("a", np.ones(5)) 486 expected = self.cls( 487 "a", np.concatenate([np.arange(5), np.ones(5)]) 488 ).to_base_variable() 489 expected.attrs["foo"] = "bar" 490 assert_identical(expected, Variable.concat([v, w], "a")) 491 492 def test_concat_fixed_len_str(self): 493 # regression test for #217 494 for kind in ["S", "U"]: 495 x = self.cls("animal", np.array(["horse"], dtype=kind)) 496 y = self.cls("animal", np.array(["aardvark"], dtype=kind)) 497 actual = Variable.concat([x, y], "animal") 498 expected = Variable("animal", np.array(["horse", "aardvark"], dtype=kind)) 499 assert_equal(expected, actual) 500 501 def test_concat_number_strings(self): 502 # regression test for #305 503 a = self.cls("x", ["0", "1", "2"]) 504 b = self.cls("x", ["3", "4"]) 505 actual = Variable.concat([a, b], dim="x") 506 expected = Variable("x", np.arange(5).astype(str)) 507 assert_identical(expected, actual) 508 assert actual.dtype.kind == expected.dtype.kind 509 510 def test_concat_mixed_dtypes(self): 511 a = self.cls("x", [0, 1]) 512 b = self.cls("x", ["two"]) 513 actual = Variable.concat([a, b], dim="x") 514 expected = Variable("x", np.array([0, 1, "two"], dtype=object)) 515 assert_identical(expected, actual) 516 assert actual.dtype == object 517 518 @pytest.mark.parametrize("deep", [True, False]) 519 @pytest.mark.parametrize("astype", [float, int, str]) 520 def test_copy(self, deep, astype): 521 v = self.cls("x", (0.5 * np.arange(10)).astype(astype), {"foo": "bar"}) 522 w = v.copy(deep=deep) 523 assert type(v) is type(w) 524 assert_identical(v, w) 525 assert v.dtype == w.dtype 526 if self.cls is Variable: 527 if deep: 528 assert source_ndarray(v.values) is not source_ndarray(w.values) 529 else: 530 assert source_ndarray(v.values) is source_ndarray(w.values) 531 assert_identical(v, copy(v)) 532 533 def test_copy_index(self): 534 midx = pd.MultiIndex.from_product( 535 [["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three") 536 ) 537 v = self.cls("x", midx) 538 for deep in [True, False]: 539 w = v.copy(deep=deep) 540 assert isinstance(w._data, PandasIndexingAdapter) 541 assert isinstance(w.to_index(), pd.MultiIndex) 542 assert_array_equal(v._data.array, w._data.array) 543 544 def test_copy_with_data(self): 545 orig = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"}) 546 new_data = np.array([[2.5, 5.0], [7.1, 43]]) 547 actual = orig.copy(data=new_data) 548 expected = orig.copy() 549 expected.data = new_data 550 assert_identical(expected, actual) 551 552 def test_copy_with_data_errors(self): 553 orig = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"}) 554 new_data = [2.5, 5.0] 555 with pytest.raises(ValueError, match=r"must match shape of object"): 556 orig.copy(data=new_data) 557 558 def test_copy_index_with_data(self): 559 orig = IndexVariable("x", np.arange(5)) 560 new_data = np.arange(5, 10) 561 actual = orig.copy(data=new_data) 562 expected = IndexVariable("x", np.arange(5, 10)) 563 assert_identical(expected, actual) 564 565 def test_copy_index_with_data_errors(self): 566 orig = IndexVariable("x", np.arange(5)) 567 new_data = np.arange(5, 20) 568 with pytest.raises(ValueError, match=r"must match shape of object"): 569 orig.copy(data=new_data) 570 with pytest.raises(ValueError, match=r"Cannot assign to the .data"): 571 orig.data = new_data 572 with pytest.raises(ValueError, match=r"Cannot assign to the .values"): 573 orig.values = new_data 574 575 def test_replace(self): 576 var = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"}) 577 result = var._replace() 578 assert_identical(result, var) 579 580 new_data = np.arange(4).reshape(2, 2) 581 result = var._replace(data=new_data) 582 assert_array_equal(result.data, new_data) 583 584 def test_real_and_imag(self): 585 v = self.cls("x", np.arange(3) - 1j * np.arange(3), {"foo": "bar"}) 586 expected_re = self.cls("x", np.arange(3), {"foo": "bar"}) 587 assert_identical(v.real, expected_re) 588 589 expected_im = self.cls("x", -np.arange(3), {"foo": "bar"}) 590 assert_identical(v.imag, expected_im) 591 592 expected_abs = self.cls("x", np.sqrt(2 * np.arange(3) ** 2)).to_base_variable() 593 assert_allclose(abs(v), expected_abs) 594 595 def test_aggregate_complex(self): 596 # should skip NaNs 597 v = self.cls("x", [1, 2j, np.nan]) 598 expected = Variable((), 0.5 + 1j) 599 assert_allclose(v.mean(), expected) 600 601 def test_pandas_cateogrical_dtype(self): 602 data = pd.Categorical(np.arange(10, dtype="int64")) 603 v = self.cls("x", data) 604 print(v) # should not error 605 assert v.dtype == "int64" 606 607 def test_pandas_datetime64_with_tz(self): 608 data = pd.date_range( 609 start="2000-01-01", 610 tz=pytz.timezone("America/New_York"), 611 periods=10, 612 freq="1h", 613 ) 614 v = self.cls("x", data) 615 print(v) # should not error 616 if "America/New_York" in str(data.dtype): 617 # pandas is new enough that it has datetime64 with timezone dtype 618 assert v.dtype == "object" 619 620 def test_multiindex(self): 621 idx = pd.MultiIndex.from_product([list("abc"), [0, 1]]) 622 v = self.cls("x", idx) 623 assert_identical(Variable((), ("a", 0)), v[0]) 624 assert_identical(v, v[:]) 625 626 def test_load(self): 627 array = self.cls("x", np.arange(5)) 628 orig_data = array._data 629 copied = array.copy(deep=True) 630 if array.chunks is None: 631 array.load() 632 assert type(array._data) is type(orig_data) 633 assert type(copied._data) is type(orig_data) 634 assert_identical(array, copied) 635 636 def test_getitem_advanced(self): 637 v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]]) 638 v_data = v.compute().data 639 640 # orthogonal indexing 641 v_new = v[([0, 1], [1, 0])] 642 assert v_new.dims == ("x", "y") 643 assert_array_equal(v_new, v_data[[0, 1]][:, [1, 0]]) 644 645 v_new = v[[0, 1]] 646 assert v_new.dims == ("x", "y") 647 assert_array_equal(v_new, v_data[[0, 1]]) 648 649 # with mixed arguments 650 ind = Variable(["a"], [0, 1]) 651 v_new = v[dict(x=[0, 1], y=ind)] 652 assert v_new.dims == ("x", "a") 653 assert_array_equal(v_new, v_data[[0, 1]][:, [0, 1]]) 654 655 # boolean indexing 656 v_new = v[dict(x=[True, False], y=[False, True, False])] 657 assert v_new.dims == ("x", "y") 658 assert_array_equal(v_new, v_data[0][1]) 659 660 # with scalar variable 661 ind = Variable((), 2) 662 v_new = v[dict(y=ind)] 663 expected = v[dict(y=2)] 664 assert_array_equal(v_new, expected) 665 666 # with boolean variable with wrong shape 667 ind = np.array([True, False]) 668 with pytest.raises(IndexError, match=r"Boolean array size 2 is "): 669 v[Variable(("a", "b"), [[0, 1]]), ind] 670 671 # boolean indexing with different dimension 672 ind = Variable(["a"], [True, False, False]) 673 with pytest.raises(IndexError, match=r"Boolean indexer should be"): 674 v[dict(y=ind)] 675 676 def test_getitem_uint_1d(self): 677 # regression test for #1405 678 v = self.cls(["x"], [0, 1, 2]) 679 v_data = v.compute().data 680 681 v_new = v[np.array([0])] 682 assert_array_equal(v_new, v_data[0]) 683 v_new = v[np.array([0], dtype="uint64")] 684 assert_array_equal(v_new, v_data[0]) 685 686 def test_getitem_uint(self): 687 # regression test for #1405 688 v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]]) 689 v_data = v.compute().data 690 691 v_new = v[np.array([0])] 692 assert_array_equal(v_new, v_data[[0], :]) 693 v_new = v[np.array([0], dtype="uint64")] 694 assert_array_equal(v_new, v_data[[0], :]) 695 696 v_new = v[np.uint64(0)] 697 assert_array_equal(v_new, v_data[0, :]) 698 699 def test_getitem_0d_array(self): 700 # make sure 0d-np.array can be used as an indexer 701 v = self.cls(["x"], [0, 1, 2]) 702 v_data = v.compute().data 703 704 v_new = v[np.array([0])[0]] 705 assert_array_equal(v_new, v_data[0]) 706 707 v_new = v[np.array(0)] 708 assert_array_equal(v_new, v_data[0]) 709 710 v_new = v[Variable((), np.array(0))] 711 assert_array_equal(v_new, v_data[0]) 712 713 def test_getitem_fancy(self): 714 v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]]) 715 v_data = v.compute().data 716 717 ind = Variable(["a", "b"], [[0, 1, 1], [1, 1, 0]]) 718 v_new = v[ind] 719 assert v_new.dims == ("a", "b", "y") 720 assert_array_equal(v_new, v_data[[[0, 1, 1], [1, 1, 0]], :]) 721 722 # It would be ok if indexed with the multi-dimensional array including 723 # the same name 724 ind = Variable(["x", "b"], [[0, 1, 1], [1, 1, 0]]) 725 v_new = v[ind] 726 assert v_new.dims == ("x", "b", "y") 727 assert_array_equal(v_new, v_data[[[0, 1, 1], [1, 1, 0]], :]) 728 729 ind = Variable(["a", "b"], [[0, 1, 2], [2, 1, 0]]) 730 v_new = v[dict(y=ind)] 731 assert v_new.dims == ("x", "a", "b") 732 assert_array_equal(v_new, v_data[:, ([0, 1, 2], [2, 1, 0])]) 733 734 ind = Variable(["a", "b"], [[0, 0], [1, 1]]) 735 v_new = v[dict(x=[1, 0], y=ind)] 736 assert v_new.dims == ("x", "a", "b") 737 assert_array_equal(v_new, v_data[[1, 0]][:, ind]) 738 739 # along diagonal 740 ind = Variable(["a"], [0, 1]) 741 v_new = v[ind, ind] 742 assert v_new.dims == ("a",) 743 assert_array_equal(v_new, v_data[[0, 1], [0, 1]]) 744 745 # with integer 746 ind = Variable(["a", "b"], [[0, 0], [1, 1]]) 747 v_new = v[dict(x=0, y=ind)] 748 assert v_new.dims == ("a", "b") 749 assert_array_equal(v_new[0], v_data[0][[0, 0]]) 750 assert_array_equal(v_new[1], v_data[0][[1, 1]]) 751 752 # with slice 753 ind = Variable(["a", "b"], [[0, 0], [1, 1]]) 754 v_new = v[dict(x=slice(None), y=ind)] 755 assert v_new.dims == ("x", "a", "b") 756 assert_array_equal(v_new, v_data[:, [[0, 0], [1, 1]]]) 757 758 ind = Variable(["a", "b"], [[0, 0], [1, 1]]) 759 v_new = v[dict(x=ind, y=slice(None))] 760 assert v_new.dims == ("a", "b", "y") 761 assert_array_equal(v_new, v_data[[[0, 0], [1, 1]], :]) 762 763 ind = Variable(["a", "b"], [[0, 0], [1, 1]]) 764 v_new = v[dict(x=ind, y=slice(None, 1))] 765 assert v_new.dims == ("a", "b", "y") 766 assert_array_equal(v_new, v_data[[[0, 0], [1, 1]], slice(None, 1)]) 767 768 # slice matches explicit dimension 769 ind = Variable(["y"], [0, 1]) 770 v_new = v[ind, :2] 771 assert v_new.dims == ("y",) 772 assert_array_equal(v_new, v_data[[0, 1], [0, 1]]) 773 774 # with multiple slices 775 v = self.cls(["x", "y", "z"], [[[1, 2, 3], [4, 5, 6]]]) 776 ind = Variable(["a", "b"], [[0]]) 777 v_new = v[ind, :, :] 778 expected = Variable(["a", "b", "y", "z"], v.data[np.newaxis, ...]) 779 assert_identical(v_new, expected) 780 781 v = Variable(["w", "x", "y", "z"], [[[[1, 2, 3], [4, 5, 6]]]]) 782 ind = Variable(["y"], [0]) 783 v_new = v[ind, :, 1:2, 2] 784 expected = Variable(["y", "x"], [[6]]) 785 assert_identical(v_new, expected) 786 787 # slice and vector mixed indexing resulting in the same dimension 788 v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5)) 789 ind = Variable(["x"], [0, 1, 2]) 790 v_new = v[:, ind] 791 expected = Variable(("x", "z"), np.zeros((3, 5))) 792 expected[0] = v.data[0, 0] 793 expected[1] = v.data[1, 1] 794 expected[2] = v.data[2, 2] 795 assert_identical(v_new, expected) 796 797 v_new = v[:, ind.data] 798 assert v_new.shape == (3, 3, 5) 799 800 def test_getitem_error(self): 801 v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]]) 802 803 with pytest.raises(IndexError, match=r"labeled multi-"): 804 v[[[0, 1], [1, 2]]] 805 806 ind_x = Variable(["a"], [0, 1, 1]) 807 ind_y = Variable(["a"], [0, 1]) 808 with pytest.raises(IndexError, match=r"Dimensions of indexers "): 809 v[ind_x, ind_y] 810 811 ind = Variable(["a", "b"], [[True, False], [False, True]]) 812 with pytest.raises(IndexError, match=r"2-dimensional boolean"): 813 v[dict(x=ind)] 814 815 v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5)) 816 ind = Variable(["x"], [0, 1]) 817 with pytest.raises(IndexError, match=r"Dimensions of indexers mis"): 818 v[:, ind] 819 820 @pytest.mark.parametrize( 821 "mode", 822 [ 823 "mean", 824 pytest.param( 825 "median", 826 marks=pytest.mark.xfail(reason="median is not implemented by Dask"), 827 ), 828 pytest.param( 829 "reflect", marks=pytest.mark.xfail(reason="dask.array.pad bug") 830 ), 831 "edge", 832 pytest.param( 833 "linear_ramp", 834 marks=pytest.mark.xfail( 835 reason="pint bug: https://github.com/hgrecco/pint/issues/1026" 836 ), 837 ), 838 "maximum", 839 "minimum", 840 "symmetric", 841 "wrap", 842 ], 843 ) 844 @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) 845 @pytest.mark.filterwarnings( 846 r"ignore:dask.array.pad.+? converts integers to floats." 847 ) 848 def test_pad(self, mode, xr_arg, np_arg): 849 data = np.arange(4 * 3 * 2).reshape(4, 3, 2) 850 v = self.cls(["x", "y", "z"], data) 851 852 actual = v.pad(mode=mode, **xr_arg) 853 expected = np.pad(data, np_arg, mode=mode) 854 855 assert_array_equal(actual, expected) 856 assert isinstance(actual._data, type(v._data)) 857 858 @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) 859 def test_pad_constant_values(self, xr_arg, np_arg): 860 data = np.arange(4 * 3 * 2).reshape(4, 3, 2) 861 v = self.cls(["x", "y", "z"], data) 862 863 actual = v.pad(**xr_arg) 864 expected = np.pad( 865 np.array(v.data.astype(float)), 866 np_arg, 867 mode="constant", 868 constant_values=np.nan, 869 ) 870 assert_array_equal(actual, expected) 871 assert isinstance(actual._data, type(v._data)) 872 873 # for the boolean array, we pad False 874 data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) 875 v = self.cls(["x", "y", "z"], data) 876 877 actual = v.pad(mode="constant", constant_values=False, **xr_arg) 878 expected = np.pad( 879 np.array(v.data), np_arg, mode="constant", constant_values=False 880 ) 881 assert_array_equal(actual, expected) 882 883 @pytest.mark.parametrize("d, w", (("x", 3), ("y", 5))) 884 def test_rolling_window(self, d, w): 885 # Just a working test. See test_nputils for the algorithm validation 886 v = self.cls(["x", "y", "z"], np.arange(40 * 30 * 2).reshape(40, 30, 2)) 887 v_rolling = v.rolling_window(d, w, d + "_window") 888 assert v_rolling.dims == ("x", "y", "z", d + "_window") 889 assert v_rolling.shape == v.shape + (w,) 890 891 v_rolling = v.rolling_window(d, w, d + "_window", center=True) 892 assert v_rolling.dims == ("x", "y", "z", d + "_window") 893 assert v_rolling.shape == v.shape + (w,) 894 895 # dask and numpy result should be the same 896 v_loaded = v.load().rolling_window(d, w, d + "_window", center=True) 897 assert_array_equal(v_rolling, v_loaded) 898 899 # numpy backend should not be over-written 900 if isinstance(v._data, np.ndarray): 901 with pytest.raises(ValueError): 902 v_loaded[0] = 1.0 903 904 def test_rolling_1d(self): 905 x = self.cls("x", np.array([1, 2, 3, 4], dtype=float)) 906 907 kwargs = dict(dim="x", window=3, window_dim="xw") 908 actual = x.rolling_window(**kwargs, center=True, fill_value=np.nan) 909 expected = Variable( 910 ("x", "xw"), 911 np.array( 912 [[np.nan, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, np.nan]], dtype=float 913 ), 914 ) 915 assert_equal(actual, expected) 916 917 actual = x.rolling_window(**kwargs, center=False, fill_value=0.0) 918 expected = self.cls( 919 ("x", "xw"), 920 np.array([[0, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], dtype=float), 921 ) 922 assert_equal(actual, expected) 923 924 x = self.cls(("y", "x"), np.stack([x, x * 1.1])) 925 actual = x.rolling_window(**kwargs, center=False, fill_value=0.0) 926 expected = self.cls( 927 ("y", "x", "xw"), np.stack([expected.data, expected.data * 1.1], axis=0) 928 ) 929 assert_equal(actual, expected) 930 931 @pytest.mark.parametrize("center", [[True, True], [False, False]]) 932 @pytest.mark.parametrize("dims", [("x", "y"), ("y", "z"), ("z", "x")]) 933 def test_nd_rolling(self, center, dims): 934 x = self.cls( 935 ("x", "y", "z"), 936 np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float), 937 ) 938 window = [3, 3] 939 actual = x.rolling_window( 940 dim=dims, 941 window=window, 942 window_dim=[f"{k}w" for k in dims], 943 center=center, 944 fill_value=np.nan, 945 ) 946 expected = x 947 for dim, win, cent in zip(dims, window, center): 948 expected = expected.rolling_window( 949 dim=dim, 950 window=win, 951 window_dim=f"{dim}w", 952 center=cent, 953 fill_value=np.nan, 954 ) 955 assert_equal(actual, expected) 956 957 @pytest.mark.parametrize( 958 ("dim, window, window_dim, center"), 959 [ 960 ("x", [3, 3], "x_w", True), 961 ("x", 3, ("x_w", "x_w"), True), 962 ("x", 3, "x_w", [True, True]), 963 ], 964 ) 965 def test_rolling_window_errors(self, dim, window, window_dim, center): 966 x = self.cls( 967 ("x", "y", "z"), 968 np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float), 969 ) 970 with pytest.raises(ValueError): 971 x.rolling_window( 972 dim=dim, 973 window=window, 974 window_dim=window_dim, 975 center=center, 976 ) 977 978 979class TestVariable(VariableSubclassobjects): 980 cls = staticmethod(Variable) 981 982 @pytest.fixture(autouse=True) 983 def setup(self): 984 self.d = np.random.random((10, 3)).astype(np.float64) 985 986 def test_data_and_values(self): 987 v = Variable(["time", "x"], self.d) 988 assert_array_equal(v.data, self.d) 989 assert_array_equal(v.values, self.d) 990 assert source_ndarray(v.values) is self.d 991 with pytest.raises(ValueError): 992 # wrong size 993 v.values = np.random.random(5) 994 d2 = np.random.random((10, 3)) 995 v.values = d2 996 assert source_ndarray(v.values) is d2 997 d3 = np.random.random((10, 3)) 998 v.data = d3 999 assert source_ndarray(v.data) is d3 1000 1001 def test_numpy_same_methods(self): 1002 v = Variable([], np.float32(0.0)) 1003 assert v.item() == 0 1004 assert type(v.item()) is float 1005 1006 v = IndexVariable("x", np.arange(5)) 1007 assert 2 == v.searchsorted(2) 1008 1009 def test_datetime64_conversion_scalar(self): 1010 expected = np.datetime64("2000-01-01", "ns") 1011 for values in [ 1012 np.datetime64("2000-01-01"), 1013 pd.Timestamp("2000-01-01T00"), 1014 datetime(2000, 1, 1), 1015 ]: 1016 v = Variable([], values) 1017 assert v.dtype == np.dtype("datetime64[ns]") 1018 assert v.values == expected 1019 assert v.values.dtype == np.dtype("datetime64[ns]") 1020 1021 def test_timedelta64_conversion_scalar(self): 1022 expected = np.timedelta64(24 * 60 * 60 * 10 ** 9, "ns") 1023 for values in [ 1024 np.timedelta64(1, "D"), 1025 pd.Timedelta("1 day"), 1026 timedelta(days=1), 1027 ]: 1028 v = Variable([], values) 1029 assert v.dtype == np.dtype("timedelta64[ns]") 1030 assert v.values == expected 1031 assert v.values.dtype == np.dtype("timedelta64[ns]") 1032 1033 def test_0d_str(self): 1034 v = Variable([], "foo") 1035 assert v.dtype == np.dtype("U3") 1036 assert v.values == "foo" 1037 1038 v = Variable([], np.string_("foo")) 1039 assert v.dtype == np.dtype("S3") 1040 assert v.values == bytes("foo", "ascii") 1041 1042 def test_0d_datetime(self): 1043 v = Variable([], pd.Timestamp("2000-01-01")) 1044 assert v.dtype == np.dtype("datetime64[ns]") 1045 assert v.values == np.datetime64("2000-01-01", "ns") 1046 1047 def test_0d_timedelta(self): 1048 for td in [pd.to_timedelta("1s"), np.timedelta64(1, "s")]: 1049 v = Variable([], td) 1050 assert v.dtype == np.dtype("timedelta64[ns]") 1051 assert v.values == np.timedelta64(10 ** 9, "ns") 1052 1053 def test_equals_and_identical(self): 1054 d = np.random.rand(10, 3) 1055 d[0, 0] = np.nan 1056 v1 = Variable(("dim1", "dim2"), data=d, attrs={"att1": 3, "att2": [1, 2, 3]}) 1057 v2 = Variable(("dim1", "dim2"), data=d, attrs={"att1": 3, "att2": [1, 2, 3]}) 1058 assert v1.equals(v2) 1059 assert v1.identical(v2) 1060 1061 v3 = Variable(("dim1", "dim3"), data=d) 1062 assert not v1.equals(v3) 1063 1064 v4 = Variable(("dim1", "dim2"), data=d) 1065 assert v1.equals(v4) 1066 assert not v1.identical(v4) 1067 1068 v5 = deepcopy(v1) 1069 v5.values[:] = np.random.rand(10, 3) 1070 assert not v1.equals(v5) 1071 1072 assert not v1.equals(None) 1073 assert not v1.equals(d) 1074 1075 assert not v1.identical(None) 1076 assert not v1.identical(d) 1077 1078 def test_broadcast_equals(self): 1079 v1 = Variable((), np.nan) 1080 v2 = Variable(("x"), [np.nan, np.nan]) 1081 assert v1.broadcast_equals(v2) 1082 assert not v1.equals(v2) 1083 assert not v1.identical(v2) 1084 1085 v3 = Variable(("x"), [np.nan]) 1086 assert v1.broadcast_equals(v3) 1087 assert not v1.equals(v3) 1088 assert not v1.identical(v3) 1089 1090 assert not v1.broadcast_equals(None) 1091 1092 v4 = Variable(("x"), [np.nan] * 3) 1093 assert not v2.broadcast_equals(v4) 1094 1095 def test_no_conflicts(self): 1096 v1 = Variable(("x"), [1, 2, np.nan, np.nan]) 1097 v2 = Variable(("x"), [np.nan, 2, 3, np.nan]) 1098 assert v1.no_conflicts(v2) 1099 assert not v1.equals(v2) 1100 assert not v1.broadcast_equals(v2) 1101 assert not v1.identical(v2) 1102 1103 assert not v1.no_conflicts(None) 1104 1105 v3 = Variable(("y"), [np.nan, 2, 3, np.nan]) 1106 assert not v3.no_conflicts(v1) 1107 1108 d = np.array([1, 2, np.nan, np.nan]) 1109 assert not v1.no_conflicts(d) 1110 assert not v2.no_conflicts(d) 1111 1112 v4 = Variable(("w", "x"), [d]) 1113 assert v1.no_conflicts(v4) 1114 1115 def test_as_variable(self): 1116 data = np.arange(10) 1117 expected = Variable("x", data) 1118 expected_extra = Variable( 1119 "x", data, attrs={"myattr": "val"}, encoding={"scale_factor": 1} 1120 ) 1121 1122 assert_identical(expected, as_variable(expected)) 1123 1124 ds = Dataset({"x": expected}) 1125 var = as_variable(ds["x"]).to_base_variable() 1126 assert_identical(expected, var) 1127 assert not isinstance(ds["x"], Variable) 1128 assert isinstance(as_variable(ds["x"]), Variable) 1129 1130 xarray_tuple = ( 1131 expected_extra.dims, 1132 expected_extra.values, 1133 expected_extra.attrs, 1134 expected_extra.encoding, 1135 ) 1136 assert_identical(expected_extra, as_variable(xarray_tuple)) 1137 1138 with pytest.raises(TypeError, match=r"tuple of form"): 1139 as_variable(tuple(data)) 1140 with pytest.raises(ValueError, match=r"tuple of form"): # GH1016 1141 as_variable(("five", "six", "seven")) 1142 with pytest.raises(TypeError, match=r"without an explicit list of dimensions"): 1143 as_variable(data) 1144 1145 actual = as_variable(data, name="x") 1146 assert_identical(expected.to_index_variable(), actual) 1147 1148 actual = as_variable(0) 1149 expected = Variable([], 0) 1150 assert_identical(expected, actual) 1151 1152 data = np.arange(9).reshape((3, 3)) 1153 expected = Variable(("x", "y"), data) 1154 with pytest.raises(ValueError, match=r"without explicit dimension names"): 1155 as_variable(data, name="x") 1156 with pytest.raises(ValueError, match=r"has more than 1-dimension"): 1157 as_variable(expected, name="x") 1158 1159 # test datetime, timedelta conversion 1160 dt = np.array([datetime(1999, 1, 1) + timedelta(days=x) for x in range(10)]) 1161 assert as_variable(dt, "time").dtype.kind == "M" 1162 td = np.array([timedelta(days=x) for x in range(10)]) 1163 assert as_variable(td, "time").dtype.kind == "m" 1164 1165 with pytest.raises(TypeError): 1166 as_variable(("x", DataArray([]))) 1167 1168 def test_repr(self): 1169 v = Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"}) 1170 expected = dedent( 1171 """ 1172 <xarray.Variable (time: 2, x: 3)> 1173 array([[1, 2, 3], 1174 [4, 5, 6]]) 1175 Attributes: 1176 foo: bar 1177 """ 1178 ).strip() 1179 assert expected == repr(v) 1180 1181 def test_repr_lazy_data(self): 1182 v = Variable("x", LazilyIndexedArray(np.arange(2e5))) 1183 assert "200000 values with dtype" in repr(v) 1184 assert isinstance(v._data, LazilyIndexedArray) 1185 1186 def test_detect_indexer_type(self): 1187 """Tests indexer type was correctly detected.""" 1188 data = np.random.random((10, 11)) 1189 v = Variable(["x", "y"], data) 1190 1191 _, ind, _ = v._broadcast_indexes((0, 1)) 1192 assert type(ind) == indexing.BasicIndexer 1193 1194 _, ind, _ = v._broadcast_indexes((0, slice(0, 8, 2))) 1195 assert type(ind) == indexing.BasicIndexer 1196 1197 _, ind, _ = v._broadcast_indexes((0, [0, 1])) 1198 assert type(ind) == indexing.OuterIndexer 1199 1200 _, ind, _ = v._broadcast_indexes(([0, 1], 1)) 1201 assert type(ind) == indexing.OuterIndexer 1202 1203 _, ind, _ = v._broadcast_indexes(([0, 1], [1, 2])) 1204 assert type(ind) == indexing.OuterIndexer 1205 1206 _, ind, _ = v._broadcast_indexes(([0, 1], slice(0, 8, 2))) 1207 assert type(ind) == indexing.OuterIndexer 1208 1209 vind = Variable(("a",), [0, 1]) 1210 _, ind, _ = v._broadcast_indexes((vind, slice(0, 8, 2))) 1211 assert type(ind) == indexing.OuterIndexer 1212 1213 vind = Variable(("y",), [0, 1]) 1214 _, ind, _ = v._broadcast_indexes((vind, 3)) 1215 assert type(ind) == indexing.OuterIndexer 1216 1217 vind = Variable(("a",), [0, 1]) 1218 _, ind, _ = v._broadcast_indexes((vind, vind)) 1219 assert type(ind) == indexing.VectorizedIndexer 1220 1221 vind = Variable(("a", "b"), [[0, 2], [1, 3]]) 1222 _, ind, _ = v._broadcast_indexes((vind, 3)) 1223 assert type(ind) == indexing.VectorizedIndexer 1224 1225 def test_indexer_type(self): 1226 # GH:issue:1688. Wrong indexer type induces NotImplementedError 1227 data = np.random.random((10, 11)) 1228 v = Variable(["x", "y"], data) 1229 1230 def assert_indexer_type(key, object_type): 1231 dims, index_tuple, new_order = v._broadcast_indexes(key) 1232 assert isinstance(index_tuple, object_type) 1233 1234 # should return BasicIndexer 1235 assert_indexer_type((0, 1), BasicIndexer) 1236 assert_indexer_type((0, slice(None, None)), BasicIndexer) 1237 assert_indexer_type((Variable([], 3), slice(None, None)), BasicIndexer) 1238 assert_indexer_type((Variable([], 3), (Variable([], 6))), BasicIndexer) 1239 1240 # should return OuterIndexer 1241 assert_indexer_type(([0, 1], 1), OuterIndexer) 1242 assert_indexer_type(([0, 1], [1, 2]), OuterIndexer) 1243 assert_indexer_type((Variable(("x"), [0, 1]), 1), OuterIndexer) 1244 assert_indexer_type((Variable(("x"), [0, 1]), slice(None, None)), OuterIndexer) 1245 assert_indexer_type( 1246 (Variable(("x"), [0, 1]), Variable(("y"), [0, 1])), OuterIndexer 1247 ) 1248 1249 # should return VectorizedIndexer 1250 assert_indexer_type((Variable(("y"), [0, 1]), [0, 1]), VectorizedIndexer) 1251 assert_indexer_type( 1252 (Variable(("z"), [0, 1]), Variable(("z"), [0, 1])), VectorizedIndexer 1253 ) 1254 assert_indexer_type( 1255 ( 1256 Variable(("a", "b"), [[0, 1], [1, 2]]), 1257 Variable(("a", "b"), [[0, 1], [1, 2]]), 1258 ), 1259 VectorizedIndexer, 1260 ) 1261 1262 def test_items(self): 1263 data = np.random.random((10, 11)) 1264 v = Variable(["x", "y"], data) 1265 # test slicing 1266 assert_identical(v, v[:]) 1267 assert_identical(v, v[...]) 1268 assert_identical(Variable(["y"], data[0]), v[0]) 1269 assert_identical(Variable(["x"], data[:, 0]), v[:, 0]) 1270 assert_identical(Variable(["x", "y"], data[:3, :2]), v[:3, :2]) 1271 # test array indexing 1272 x = Variable(["x"], np.arange(10)) 1273 y = Variable(["y"], np.arange(11)) 1274 assert_identical(v, v[x.values]) 1275 assert_identical(v, v[x]) 1276 assert_identical(v[:3], v[x < 3]) 1277 assert_identical(v[:, 3:], v[:, y >= 3]) 1278 assert_identical(v[:3, 3:], v[x < 3, y >= 3]) 1279 assert_identical(v[:3, :2], v[x[:3], y[:2]]) 1280 assert_identical(v[:3, :2], v[range(3), range(2)]) 1281 # test iteration 1282 for n, item in enumerate(v): 1283 assert_identical(Variable(["y"], data[n]), item) 1284 with pytest.raises(TypeError, match=r"iteration over a 0-d"): 1285 iter(Variable([], 0)) 1286 # test setting 1287 v.values[:] = 0 1288 assert np.all(v.values == 0) 1289 # test orthogonal setting 1290 v[range(10), range(11)] = 1 1291 assert_array_equal(v.values, np.ones((10, 11))) 1292 1293 def test_getitem_basic(self): 1294 v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]]) 1295 1296 # int argument 1297 v_new = v[0] 1298 assert v_new.dims == ("y",) 1299 assert_array_equal(v_new, v._data[0]) 1300 1301 # slice argument 1302 v_new = v[:2] 1303 assert v_new.dims == ("x", "y") 1304 assert_array_equal(v_new, v._data[:2]) 1305 1306 # list arguments 1307 v_new = v[[0]] 1308 assert v_new.dims == ("x", "y") 1309 assert_array_equal(v_new, v._data[[0]]) 1310 1311 v_new = v[[]] 1312 assert v_new.dims == ("x", "y") 1313 assert_array_equal(v_new, v._data[[]]) 1314 1315 # dict arguments 1316 v_new = v[dict(x=0)] 1317 assert v_new.dims == ("y",) 1318 assert_array_equal(v_new, v._data[0]) 1319 1320 v_new = v[dict(x=0, y=slice(None))] 1321 assert v_new.dims == ("y",) 1322 assert_array_equal(v_new, v._data[0]) 1323 1324 v_new = v[dict(x=0, y=1)] 1325 assert v_new.dims == () 1326 assert_array_equal(v_new, v._data[0, 1]) 1327 1328 v_new = v[dict(y=1)] 1329 assert v_new.dims == ("x",) 1330 assert_array_equal(v_new, v._data[:, 1]) 1331 1332 # tuple argument 1333 v_new = v[(slice(None), 1)] 1334 assert v_new.dims == ("x",) 1335 assert_array_equal(v_new, v._data[:, 1]) 1336 1337 # test that we obtain a modifiable view when taking a 0d slice 1338 v_new = v[0, 0] 1339 v_new[...] += 99 1340 assert_array_equal(v_new, v._data[0, 0]) 1341 1342 def test_getitem_with_mask_2d_input(self): 1343 v = Variable(("x", "y"), [[0, 1, 2], [3, 4, 5]]) 1344 assert_identical( 1345 v._getitem_with_mask(([-1, 0], [1, -1])), 1346 Variable(("x", "y"), [[np.nan, np.nan], [1, np.nan]]), 1347 ) 1348 assert_identical(v._getitem_with_mask((slice(2), [0, 1, 2])), v) 1349 1350 def test_isel(self): 1351 v = Variable(["time", "x"], self.d) 1352 assert_identical(v.isel(time=slice(None)), v) 1353 assert_identical(v.isel(time=0), v[0]) 1354 assert_identical(v.isel(time=slice(0, 3)), v[:3]) 1355 assert_identical(v.isel(x=0), v[:, 0]) 1356 assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]]) 1357 assert_identical(v.isel(time=[]), v[[]]) 1358 with pytest.raises( 1359 ValueError, 1360 match=r"Dimensions {'not_a_dim'} do not exist. Expected one or more of " 1361 r"\('time', 'x'\)", 1362 ): 1363 v.isel(not_a_dim=0) 1364 with pytest.warns( 1365 UserWarning, 1366 match=r"Dimensions {'not_a_dim'} do not exist. Expected one or more of " 1367 r"\('time', 'x'\)", 1368 ): 1369 v.isel(not_a_dim=0, missing_dims="warn") 1370 assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore")) 1371 1372 def test_index_0d_numpy_string(self): 1373 # regression test to verify our work around for indexing 0d strings 1374 v = Variable([], np.string_("asdf")) 1375 assert_identical(v[()], v) 1376 1377 v = Variable([], np.unicode_("asdf")) 1378 assert_identical(v[()], v) 1379 1380 def test_indexing_0d_unicode(self): 1381 # regression test for GH568 1382 actual = Variable(("x"), ["tmax"])[0][()] 1383 expected = Variable((), "tmax") 1384 assert_identical(actual, expected) 1385 1386 @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0]) 1387 def test_shift(self, fill_value): 1388 v = Variable("x", [1, 2, 3, 4, 5]) 1389 1390 assert_identical(v, v.shift(x=0)) 1391 assert v is not v.shift(x=0) 1392 1393 expected = Variable("x", [np.nan, np.nan, 1, 2, 3]) 1394 assert_identical(expected, v.shift(x=2)) 1395 1396 if fill_value == dtypes.NA: 1397 # if we supply the default, we expect the missing value for a 1398 # float array 1399 fill_value_exp = np.nan 1400 else: 1401 fill_value_exp = fill_value 1402 1403 expected = Variable("x", [fill_value_exp, 1, 2, 3, 4]) 1404 assert_identical(expected, v.shift(x=1, fill_value=fill_value)) 1405 1406 expected = Variable("x", [2, 3, 4, 5, fill_value_exp]) 1407 assert_identical(expected, v.shift(x=-1, fill_value=fill_value)) 1408 1409 expected = Variable("x", [fill_value_exp] * 5) 1410 assert_identical(expected, v.shift(x=5, fill_value=fill_value)) 1411 assert_identical(expected, v.shift(x=6, fill_value=fill_value)) 1412 1413 with pytest.raises(ValueError, match=r"dimension"): 1414 v.shift(z=0) 1415 1416 v = Variable("x", [1, 2, 3, 4, 5], {"foo": "bar"}) 1417 assert_identical(v, v.shift(x=0)) 1418 1419 expected = Variable("x", [fill_value_exp, 1, 2, 3, 4], {"foo": "bar"}) 1420 assert_identical(expected, v.shift(x=1, fill_value=fill_value)) 1421 1422 def test_shift2d(self): 1423 v = Variable(("x", "y"), [[1, 2], [3, 4]]) 1424 expected = Variable(("x", "y"), [[np.nan, np.nan], [np.nan, 1]]) 1425 assert_identical(expected, v.shift(x=1, y=1)) 1426 1427 def test_roll(self): 1428 v = Variable("x", [1, 2, 3, 4, 5]) 1429 1430 assert_identical(v, v.roll(x=0)) 1431 assert v is not v.roll(x=0) 1432 1433 expected = Variable("x", [5, 1, 2, 3, 4]) 1434 assert_identical(expected, v.roll(x=1)) 1435 assert_identical(expected, v.roll(x=-4)) 1436 assert_identical(expected, v.roll(x=6)) 1437 1438 expected = Variable("x", [4, 5, 1, 2, 3]) 1439 assert_identical(expected, v.roll(x=2)) 1440 assert_identical(expected, v.roll(x=-3)) 1441 1442 with pytest.raises(ValueError, match=r"dimension"): 1443 v.roll(z=0) 1444 1445 def test_roll_consistency(self): 1446 v = Variable(("x", "y"), np.random.randn(5, 6)) 1447 1448 for axis, dim in [(0, "x"), (1, "y")]: 1449 for shift in [-3, 0, 1, 7, 11]: 1450 expected = np.roll(v.values, shift, axis=axis) 1451 actual = v.roll(**{dim: shift}).values 1452 assert_array_equal(expected, actual) 1453 1454 def test_transpose(self): 1455 v = Variable(["time", "x"], self.d) 1456 v2 = Variable(["x", "time"], self.d.T) 1457 assert_identical(v, v2.transpose()) 1458 assert_identical(v.transpose(), v.T) 1459 x = np.random.randn(2, 3, 4, 5) 1460 w = Variable(["a", "b", "c", "d"], x) 1461 w2 = Variable(["d", "b", "c", "a"], np.einsum("abcd->dbca", x)) 1462 assert w2.shape == (5, 3, 4, 2) 1463 assert_identical(w2, w.transpose("d", "b", "c", "a")) 1464 assert_identical(w2, w.transpose("d", ..., "a")) 1465 assert_identical(w2, w.transpose("d", "b", "c", ...)) 1466 assert_identical(w2, w.transpose(..., "b", "c", "a")) 1467 assert_identical(w, w2.transpose("a", "b", "c", "d")) 1468 w3 = Variable(["b", "c", "d", "a"], np.einsum("abcd->bcda", x)) 1469 assert_identical(w, w3.transpose("a", "b", "c", "d")) 1470 1471 # test missing dimension, raise error 1472 with pytest.raises(ValueError): 1473 v.transpose(..., "not_a_dim") 1474 1475 # test missing dimension, ignore error 1476 actual = v.transpose(..., "not_a_dim", missing_dims="ignore") 1477 expected_ell = v.transpose(...) 1478 assert_identical(expected_ell, actual) 1479 1480 # test missing dimension, raise warning 1481 with pytest.warns(UserWarning): 1482 v.transpose(..., "not_a_dim", missing_dims="warn") 1483 assert_identical(expected_ell, actual) 1484 1485 def test_transpose_0d(self): 1486 for value in [ 1487 3.5, 1488 ("a", 1), 1489 np.datetime64("2000-01-01"), 1490 np.timedelta64(1, "h"), 1491 None, 1492 object(), 1493 ]: 1494 variable = Variable([], value) 1495 actual = variable.transpose() 1496 assert_identical(actual, variable) 1497 1498 def test_squeeze(self): 1499 v = Variable(["x", "y"], [[1]]) 1500 assert_identical(Variable([], 1), v.squeeze()) 1501 assert_identical(Variable(["y"], [1]), v.squeeze("x")) 1502 assert_identical(Variable(["y"], [1]), v.squeeze(["x"])) 1503 assert_identical(Variable(["x"], [1]), v.squeeze("y")) 1504 assert_identical(Variable([], 1), v.squeeze(["x", "y"])) 1505 1506 v = Variable(["x", "y"], [[1, 2]]) 1507 assert_identical(Variable(["y"], [1, 2]), v.squeeze()) 1508 assert_identical(Variable(["y"], [1, 2]), v.squeeze("x")) 1509 with pytest.raises(ValueError, match=r"cannot select a dimension"): 1510 v.squeeze("y") 1511 1512 def test_get_axis_num(self): 1513 v = Variable(["x", "y", "z"], np.random.randn(2, 3, 4)) 1514 assert v.get_axis_num("x") == 0 1515 assert v.get_axis_num(["x"]) == (0,) 1516 assert v.get_axis_num(["x", "y"]) == (0, 1) 1517 assert v.get_axis_num(["z", "y", "x"]) == (2, 1, 0) 1518 with pytest.raises(ValueError, match=r"not found in array dim"): 1519 v.get_axis_num("foobar") 1520 1521 def test_set_dims(self): 1522 v = Variable(["x"], [0, 1]) 1523 actual = v.set_dims(["x", "y"]) 1524 expected = Variable(["x", "y"], [[0], [1]]) 1525 assert_identical(actual, expected) 1526 1527 actual = v.set_dims(["y", "x"]) 1528 assert_identical(actual, expected.T) 1529 1530 actual = v.set_dims({"x": 2, "y": 2}) 1531 expected = Variable(["x", "y"], [[0, 0], [1, 1]]) 1532 assert_identical(actual, expected) 1533 1534 v = Variable(["foo"], [0, 1]) 1535 actual = v.set_dims("foo") 1536 expected = v 1537 assert_identical(actual, expected) 1538 1539 with pytest.raises(ValueError, match=r"must be a superset"): 1540 v.set_dims(["z"]) 1541 1542 def test_set_dims_object_dtype(self): 1543 v = Variable([], ("a", 1)) 1544 actual = v.set_dims(("x",), (3,)) 1545 exp_values = np.empty((3,), dtype=object) 1546 for i in range(3): 1547 exp_values[i] = ("a", 1) 1548 expected = Variable(["x"], exp_values) 1549 assert_identical(actual, expected) 1550 1551 def test_stack(self): 1552 v = Variable(["x", "y"], [[0, 1], [2, 3]], {"foo": "bar"}) 1553 actual = v.stack(z=("x", "y")) 1554 expected = Variable("z", [0, 1, 2, 3], v.attrs) 1555 assert_identical(actual, expected) 1556 1557 actual = v.stack(z=("x",)) 1558 expected = Variable(("y", "z"), v.data.T, v.attrs) 1559 assert_identical(actual, expected) 1560 1561 actual = v.stack(z=()) 1562 assert_identical(actual, v) 1563 1564 actual = v.stack(X=("x",), Y=("y",)).transpose("X", "Y") 1565 expected = Variable(("X", "Y"), v.data, v.attrs) 1566 assert_identical(actual, expected) 1567 1568 def test_stack_errors(self): 1569 v = Variable(["x", "y"], [[0, 1], [2, 3]], {"foo": "bar"}) 1570 1571 with pytest.raises(ValueError, match=r"invalid existing dim"): 1572 v.stack(z=("x1",)) 1573 with pytest.raises(ValueError, match=r"cannot create a new dim"): 1574 v.stack(x=("x",)) 1575 1576 def test_unstack(self): 1577 v = Variable("z", [0, 1, 2, 3], {"foo": "bar"}) 1578 actual = v.unstack(z={"x": 2, "y": 2}) 1579 expected = Variable(("x", "y"), [[0, 1], [2, 3]], v.attrs) 1580 assert_identical(actual, expected) 1581 1582 actual = v.unstack(z={"x": 4, "y": 1}) 1583 expected = Variable(("x", "y"), [[0], [1], [2], [3]], v.attrs) 1584 assert_identical(actual, expected) 1585 1586 actual = v.unstack(z={"x": 4}) 1587 expected = Variable("x", [0, 1, 2, 3], v.attrs) 1588 assert_identical(actual, expected) 1589 1590 def test_unstack_errors(self): 1591 v = Variable("z", [0, 1, 2, 3]) 1592 with pytest.raises(ValueError, match=r"invalid existing dim"): 1593 v.unstack(foo={"x": 4}) 1594 with pytest.raises(ValueError, match=r"cannot create a new dim"): 1595 v.stack(z=("z",)) 1596 with pytest.raises(ValueError, match=r"the product of the new dim"): 1597 v.unstack(z={"x": 5}) 1598 1599 def test_unstack_2d(self): 1600 v = Variable(["x", "y"], [[0, 1], [2, 3]]) 1601 actual = v.unstack(y={"z": 2}) 1602 expected = Variable(["x", "z"], v.data) 1603 assert_identical(actual, expected) 1604 1605 actual = v.unstack(x={"z": 2}) 1606 expected = Variable(["y", "z"], v.data.T) 1607 assert_identical(actual, expected) 1608 1609 def test_stack_unstack_consistency(self): 1610 v = Variable(["x", "y"], [[0, 1], [2, 3]]) 1611 actual = v.stack(z=("x", "y")).unstack(z={"x": 2, "y": 2}) 1612 assert_identical(actual, v) 1613 1614 def test_broadcasting_math(self): 1615 x = np.random.randn(2, 3) 1616 v = Variable(["a", "b"], x) 1617 # 1d to 2d broadcasting 1618 assert_identical(v * v, Variable(["a", "b"], np.einsum("ab,ab->ab", x, x))) 1619 assert_identical(v * v[0], Variable(["a", "b"], np.einsum("ab,b->ab", x, x[0]))) 1620 assert_identical(v[0] * v, Variable(["b", "a"], np.einsum("b,ab->ba", x[0], x))) 1621 assert_identical( 1622 v[0] * v[:, 0], Variable(["b", "a"], np.einsum("b,a->ba", x[0], x[:, 0])) 1623 ) 1624 # higher dim broadcasting 1625 y = np.random.randn(3, 4, 5) 1626 w = Variable(["b", "c", "d"], y) 1627 assert_identical( 1628 v * w, Variable(["a", "b", "c", "d"], np.einsum("ab,bcd->abcd", x, y)) 1629 ) 1630 assert_identical( 1631 w * v, Variable(["b", "c", "d", "a"], np.einsum("bcd,ab->bcda", y, x)) 1632 ) 1633 assert_identical( 1634 v * w[0], Variable(["a", "b", "c", "d"], np.einsum("ab,cd->abcd", x, y[0])) 1635 ) 1636 1637 def test_broadcasting_failures(self): 1638 a = Variable(["x"], np.arange(10)) 1639 b = Variable(["x"], np.arange(5)) 1640 c = Variable(["x", "x"], np.arange(100).reshape(10, 10)) 1641 with pytest.raises(ValueError, match=r"mismatched lengths"): 1642 a + b 1643 with pytest.raises(ValueError, match=r"duplicate dimensions"): 1644 a + c 1645 1646 def test_inplace_math(self): 1647 x = np.arange(5) 1648 v = Variable(["x"], x) 1649 v2 = v 1650 v2 += 1 1651 assert v is v2 1652 # since we provided an ndarray for data, it is also modified in-place 1653 assert source_ndarray(v.values) is x 1654 assert_array_equal(v.values, np.arange(5) + 1) 1655 1656 with pytest.raises(ValueError, match=r"dimensions cannot change"): 1657 v += Variable("y", np.arange(5)) 1658 1659 def test_reduce(self): 1660 v = Variable(["x", "y"], self.d, {"ignored": "attributes"}) 1661 assert_identical(v.reduce(np.std, "x"), Variable(["y"], self.d.std(axis=0))) 1662 assert_identical(v.reduce(np.std, axis=0), v.reduce(np.std, dim="x")) 1663 assert_identical( 1664 v.reduce(np.std, ["y", "x"]), Variable([], self.d.std(axis=(0, 1))) 1665 ) 1666 assert_identical(v.reduce(np.std), Variable([], self.d.std())) 1667 assert_identical( 1668 v.reduce(np.mean, "x").reduce(np.std, "y"), 1669 Variable([], self.d.mean(axis=0).std()), 1670 ) 1671 assert_allclose(v.mean("x"), v.reduce(np.mean, "x")) 1672 1673 with pytest.raises(ValueError, match=r"cannot supply both"): 1674 v.mean(dim="x", axis=0) 1675 1676 @requires_bottleneck 1677 def test_reduce_use_bottleneck(self, monkeypatch): 1678 def raise_if_called(*args, **kwargs): 1679 raise RuntimeError("should not have been called") 1680 1681 import bottleneck as bn 1682 1683 monkeypatch.setattr(bn, "nanmin", raise_if_called) 1684 1685 v = Variable("x", [0.0, np.nan, 1.0]) 1686 with pytest.raises(RuntimeError, match="should not have been called"): 1687 with set_options(use_bottleneck=True): 1688 v.min() 1689 1690 with set_options(use_bottleneck=False): 1691 v.min() 1692 1693 @pytest.mark.parametrize("skipna", [True, False]) 1694 @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) 1695 @pytest.mark.parametrize( 1696 "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) 1697 ) 1698 def test_quantile(self, q, axis, dim, skipna): 1699 v = Variable(["x", "y"], self.d) 1700 actual = v.quantile(q, dim=dim, skipna=skipna) 1701 _percentile_func = np.nanpercentile if skipna else np.percentile 1702 expected = _percentile_func(self.d, np.array(q) * 100, axis=axis) 1703 np.testing.assert_allclose(actual.values, expected) 1704 1705 @requires_dask 1706 @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) 1707 @pytest.mark.parametrize("axis, dim", [[1, "y"], [[1], ["y"]]]) 1708 def test_quantile_dask(self, q, axis, dim): 1709 v = Variable(["x", "y"], self.d).chunk({"x": 2}) 1710 actual = v.quantile(q, dim=dim) 1711 assert isinstance(actual.data, dask_array_type) 1712 expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) 1713 np.testing.assert_allclose(actual.values, expected) 1714 1715 @requires_dask 1716 def test_quantile_chunked_dim_error(self): 1717 v = Variable(["x", "y"], self.d).chunk({"x": 2}) 1718 1719 # this checks for ValueError in dask.array.apply_gufunc 1720 with pytest.raises(ValueError, match=r"consists of multiple chunks"): 1721 v.quantile(0.5, dim="x") 1722 1723 @pytest.mark.parametrize("q", [-0.1, 1.1, [2], [0.25, 2]]) 1724 def test_quantile_out_of_bounds(self, q): 1725 v = Variable(["x", "y"], self.d) 1726 1727 # escape special characters 1728 with pytest.raises( 1729 ValueError, match=r"Quantiles must be in the range \[0, 1\]" 1730 ): 1731 v.quantile(q, dim="x") 1732 1733 @requires_dask 1734 @requires_bottleneck 1735 def test_rank_dask_raises(self): 1736 v = Variable(["x"], [3.0, 1.0, np.nan, 2.0, 4.0]).chunk(2) 1737 with pytest.raises(TypeError, match=r"arrays stored as dask"): 1738 v.rank("x") 1739 1740 def test_rank_use_bottleneck(self): 1741 v = Variable(["x"], [3.0, 1.0, np.nan, 2.0, 4.0]) 1742 with set_options(use_bottleneck=False): 1743 with pytest.raises(RuntimeError): 1744 v.rank("x") 1745 1746 @requires_bottleneck 1747 def test_rank(self): 1748 import bottleneck as bn 1749 1750 # floats 1751 v = Variable(["x", "y"], [[3, 4, np.nan, 1]]) 1752 expect_0 = bn.nanrankdata(v.data, axis=0) 1753 expect_1 = bn.nanrankdata(v.data, axis=1) 1754 np.testing.assert_allclose(v.rank("x").values, expect_0) 1755 np.testing.assert_allclose(v.rank("y").values, expect_1) 1756 # int 1757 v = Variable(["x"], [3, 2, 1]) 1758 expect = bn.rankdata(v.data, axis=0) 1759 np.testing.assert_allclose(v.rank("x").values, expect) 1760 # str 1761 v = Variable(["x"], ["c", "b", "a"]) 1762 expect = bn.rankdata(v.data, axis=0) 1763 np.testing.assert_allclose(v.rank("x").values, expect) 1764 # pct 1765 v = Variable(["x"], [3.0, 1.0, np.nan, 2.0, 4.0]) 1766 v_expect = Variable(["x"], [0.75, 0.25, np.nan, 0.5, 1.0]) 1767 assert_equal(v.rank("x", pct=True), v_expect) 1768 # invalid dim 1769 with pytest.raises(ValueError, match=r"not found"): 1770 v.rank("y") 1771 1772 def test_big_endian_reduce(self): 1773 # regression test for GH489 1774 data = np.ones(5, dtype=">f4") 1775 v = Variable(["x"], data) 1776 expected = Variable([], 5) 1777 assert_identical(expected, v.sum()) 1778 1779 def test_reduce_funcs(self): 1780 v = Variable("x", np.array([1, np.nan, 2, 3])) 1781 assert_identical(v.mean(), Variable([], 2)) 1782 assert_identical(v.mean(skipna=True), Variable([], 2)) 1783 assert_identical(v.mean(skipna=False), Variable([], np.nan)) 1784 assert_identical(np.mean(v), Variable([], 2)) 1785 1786 assert_identical(v.prod(), Variable([], 6)) 1787 assert_identical(v.cumsum(axis=0), Variable("x", np.array([1, 1, 3, 6]))) 1788 assert_identical(v.cumprod(axis=0), Variable("x", np.array([1, 1, 2, 6]))) 1789 assert_identical(v.var(), Variable([], 2.0 / 3)) 1790 assert_identical(v.median(), Variable([], 2)) 1791 1792 v = Variable("x", [True, False, False]) 1793 assert_identical(v.any(), Variable([], True)) 1794 assert_identical(v.all(dim="x"), Variable([], False)) 1795 1796 v = Variable("t", pd.date_range("2000-01-01", periods=3)) 1797 assert v.argmax(skipna=True, dim="t") == 2 1798 1799 assert_identical(v.max(), Variable([], pd.Timestamp("2000-01-03"))) 1800 1801 def test_reduce_keepdims(self): 1802 v = Variable(["x", "y"], self.d) 1803 1804 assert_identical( 1805 v.mean(keepdims=True), Variable(v.dims, np.mean(self.d, keepdims=True)) 1806 ) 1807 assert_identical( 1808 v.mean(dim="x", keepdims=True), 1809 Variable(v.dims, np.mean(self.d, axis=0, keepdims=True)), 1810 ) 1811 assert_identical( 1812 v.mean(dim="y", keepdims=True), 1813 Variable(v.dims, np.mean(self.d, axis=1, keepdims=True)), 1814 ) 1815 assert_identical( 1816 v.mean(dim=["y", "x"], keepdims=True), 1817 Variable(v.dims, np.mean(self.d, axis=(1, 0), keepdims=True)), 1818 ) 1819 1820 v = Variable([], 1.0) 1821 assert_identical( 1822 v.mean(keepdims=True), Variable([], np.mean(v.data, keepdims=True)) 1823 ) 1824 1825 @requires_dask 1826 def test_reduce_keepdims_dask(self): 1827 import dask.array 1828 1829 v = Variable(["x", "y"], self.d).chunk() 1830 1831 actual = v.mean(keepdims=True) 1832 assert isinstance(actual.data, dask.array.Array) 1833 1834 expected = Variable(v.dims, np.mean(self.d, keepdims=True)) 1835 assert_identical(actual, expected) 1836 1837 actual = v.mean(dim="y", keepdims=True) 1838 assert isinstance(actual.data, dask.array.Array) 1839 1840 expected = Variable(v.dims, np.mean(self.d, axis=1, keepdims=True)) 1841 assert_identical(actual, expected) 1842 1843 def test_reduce_keep_attrs(self): 1844 _attrs = {"units": "test", "long_name": "testing"} 1845 1846 v = Variable(["x", "y"], self.d, _attrs) 1847 1848 # Test dropped attrs 1849 vm = v.mean() 1850 assert len(vm.attrs) == 0 1851 assert vm.attrs == {} 1852 1853 # Test kept attrs 1854 vm = v.mean(keep_attrs=True) 1855 assert len(vm.attrs) == len(_attrs) 1856 assert vm.attrs == _attrs 1857 1858 def test_binary_ops_keep_attrs(self): 1859 _attrs = {"units": "test", "long_name": "testing"} 1860 a = Variable(["x", "y"], np.random.randn(3, 3), _attrs) 1861 b = Variable(["x", "y"], np.random.randn(3, 3), _attrs) 1862 # Test dropped attrs 1863 d = a - b # just one operation 1864 assert d.attrs == {} 1865 # Test kept attrs 1866 with set_options(keep_attrs=True): 1867 d = a - b 1868 assert d.attrs == _attrs 1869 1870 def test_count(self): 1871 expected = Variable([], 3) 1872 actual = Variable(["x"], [1, 2, 3, np.nan]).count() 1873 assert_identical(expected, actual) 1874 1875 v = Variable(["x"], np.array(["1", "2", "3", np.nan], dtype=object)) 1876 actual = v.count() 1877 assert_identical(expected, actual) 1878 1879 actual = Variable(["x"], [True, False, True]).count() 1880 assert_identical(expected, actual) 1881 assert actual.dtype == int 1882 1883 expected = Variable(["x"], [2, 3]) 1884 actual = Variable(["x", "y"], [[1, 0, np.nan], [1, 1, 1]]).count("y") 1885 assert_identical(expected, actual) 1886 1887 def test_setitem(self): 1888 v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]]) 1889 v[0, 1] = 1 1890 assert v[0, 1] == 1 1891 1892 v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]]) 1893 v[dict(x=[0, 1])] = 1 1894 assert_array_equal(v[[0, 1]], np.ones_like(v[[0, 1]])) 1895 1896 # boolean indexing 1897 v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]]) 1898 v[dict(x=[True, False])] = 1 1899 1900 assert_array_equal(v[0], np.ones_like(v[0])) 1901 v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]]) 1902 v[dict(x=[True, False], y=[False, True, False])] = 1 1903 assert v[0, 1] == 1 1904 1905 def test_setitem_fancy(self): 1906 # assignment which should work as np.ndarray does 1907 def assert_assigned_2d(array, key_x, key_y, values): 1908 expected = array.copy() 1909 expected[key_x, key_y] = values 1910 v = Variable(["x", "y"], array) 1911 v[dict(x=key_x, y=key_y)] = values 1912 assert_array_equal(expected, v) 1913 1914 # 1d vectorized indexing 1915 assert_assigned_2d( 1916 np.random.randn(4, 3), 1917 key_x=Variable(["a"], [0, 1]), 1918 key_y=Variable(["a"], [0, 1]), 1919 values=0, 1920 ) 1921 assert_assigned_2d( 1922 np.random.randn(4, 3), 1923 key_x=Variable(["a"], [0, 1]), 1924 key_y=Variable(["a"], [0, 1]), 1925 values=Variable((), 0), 1926 ) 1927 assert_assigned_2d( 1928 np.random.randn(4, 3), 1929 key_x=Variable(["a"], [0, 1]), 1930 key_y=Variable(["a"], [0, 1]), 1931 values=Variable(("a"), [3, 2]), 1932 ) 1933 assert_assigned_2d( 1934 np.random.randn(4, 3), 1935 key_x=slice(None), 1936 key_y=Variable(["a"], [0, 1]), 1937 values=Variable(("a"), [3, 2]), 1938 ) 1939 1940 # 2d-vectorized indexing 1941 assert_assigned_2d( 1942 np.random.randn(4, 3), 1943 key_x=Variable(["a", "b"], [[0, 1]]), 1944 key_y=Variable(["a", "b"], [[1, 0]]), 1945 values=0, 1946 ) 1947 assert_assigned_2d( 1948 np.random.randn(4, 3), 1949 key_x=Variable(["a", "b"], [[0, 1]]), 1950 key_y=Variable(["a", "b"], [[1, 0]]), 1951 values=[0], 1952 ) 1953 assert_assigned_2d( 1954 np.random.randn(5, 4), 1955 key_x=Variable(["a", "b"], [[0, 1], [2, 3]]), 1956 key_y=Variable(["a", "b"], [[1, 0], [3, 3]]), 1957 values=[2, 3], 1958 ) 1959 1960 # vindex with slice 1961 v = Variable(["x", "y", "z"], np.ones((4, 3, 2))) 1962 ind = Variable(["a"], [0, 1]) 1963 v[dict(x=ind, z=ind)] = 0 1964 expected = Variable(["x", "y", "z"], np.ones((4, 3, 2))) 1965 expected[0, :, 0] = 0 1966 expected[1, :, 1] = 0 1967 assert_identical(expected, v) 1968 1969 # dimension broadcast 1970 v = Variable(["x", "y"], np.ones((3, 2))) 1971 ind = Variable(["a", "b"], [[0, 1]]) 1972 v[ind, :] = 0 1973 expected = Variable(["x", "y"], [[0, 0], [0, 0], [1, 1]]) 1974 assert_identical(expected, v) 1975 1976 with pytest.raises(ValueError, match=r"shape mismatch"): 1977 v[ind, ind] = np.zeros((1, 2, 1)) 1978 1979 v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]]) 1980 ind = Variable(["a"], [0, 1]) 1981 v[dict(x=ind)] = Variable(["a", "y"], np.ones((2, 3), dtype=int) * 10) 1982 assert_array_equal(v[0], np.ones_like(v[0]) * 10) 1983 assert_array_equal(v[1], np.ones_like(v[1]) * 10) 1984 assert v.dims == ("x", "y") # dimension should not change 1985 1986 # increment 1987 v = Variable(["x", "y"], np.arange(6).reshape(3, 2)) 1988 ind = Variable(["a"], [0, 1]) 1989 v[dict(x=ind)] += 1 1990 expected = Variable(["x", "y"], [[1, 2], [3, 4], [4, 5]]) 1991 assert_identical(v, expected) 1992 1993 ind = Variable(["a"], [0, 0]) 1994 v[dict(x=ind)] += 1 1995 expected = Variable(["x", "y"], [[2, 3], [3, 4], [4, 5]]) 1996 assert_identical(v, expected) 1997 1998 def test_coarsen(self): 1999 v = self.cls(["x"], [0, 1, 2, 3, 4]) 2000 actual = v.coarsen({"x": 2}, boundary="pad", func="mean") 2001 expected = self.cls(["x"], [0.5, 2.5, 4]) 2002 assert_identical(actual, expected) 2003 2004 actual = v.coarsen({"x": 2}, func="mean", boundary="pad", side="right") 2005 expected = self.cls(["x"], [0, 1.5, 3.5]) 2006 assert_identical(actual, expected) 2007 2008 actual = v.coarsen({"x": 2}, func=np.mean, side="right", boundary="trim") 2009 expected = self.cls(["x"], [1.5, 3.5]) 2010 assert_identical(actual, expected) 2011 2012 # working test 2013 v = self.cls(["x", "y", "z"], np.arange(40 * 30 * 2).reshape(40, 30, 2)) 2014 for windows, func, side, boundary in [ 2015 ({"x": 2}, np.mean, "left", "trim"), 2016 ({"x": 2}, np.median, {"x": "left"}, "pad"), 2017 ({"x": 2, "y": 3}, np.max, "left", {"x": "pad", "y": "trim"}), 2018 ]: 2019 v.coarsen(windows, func, boundary, side) 2020 2021 def test_coarsen_2d(self): 2022 # 2d-mean should be the same with the successive 1d-mean 2023 v = self.cls(["x", "y"], np.arange(6 * 12).reshape(6, 12)) 2024 actual = v.coarsen({"x": 3, "y": 4}, func="mean") 2025 expected = v.coarsen({"x": 3}, func="mean").coarsen({"y": 4}, func="mean") 2026 assert_equal(actual, expected) 2027 2028 v = self.cls(["x", "y"], np.arange(7 * 12).reshape(7, 12)) 2029 actual = v.coarsen({"x": 3, "y": 4}, func="mean", boundary="trim") 2030 expected = v.coarsen({"x": 3}, func="mean", boundary="trim").coarsen( 2031 {"y": 4}, func="mean", boundary="trim" 2032 ) 2033 assert_equal(actual, expected) 2034 2035 # if there is nan, the two should be different 2036 v = self.cls(["x", "y"], 1.0 * np.arange(6 * 12).reshape(6, 12)) 2037 v[2, 4] = np.nan 2038 v[3, 5] = np.nan 2039 actual = v.coarsen({"x": 3, "y": 4}, func="mean", boundary="trim") 2040 expected = ( 2041 v.coarsen({"x": 3}, func="sum", boundary="trim").coarsen( 2042 {"y": 4}, func="sum", boundary="trim" 2043 ) 2044 / 12 2045 ) 2046 assert not actual.equals(expected) 2047 # adjusting the nan count 2048 expected[0, 1] *= 12 / 11 2049 expected[1, 1] *= 12 / 11 2050 assert_allclose(actual, expected) 2051 2052 v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4)) 2053 actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") 2054 expected = self.cls(("x", "y"), 4 * np.ones((2, 2))) 2055 assert_equal(actual, expected) 2056 2057 v[0, 0] = np.nan 2058 v[-1, -1] = np.nan 2059 expected[0, 0] = 3 2060 expected[-1, -1] = 3 2061 actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") 2062 assert_equal(actual, expected) 2063 2064 actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False) 2065 expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]]) 2066 assert_equal(actual, expected) 2067 2068 actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True) 2069 expected = self.cls(("x", "y"), [[10, 18], [42, 35]]) 2070 assert_equal(actual, expected) 2071 2072 # perhaps @pytest.mark.parametrize("operation", [f for f in duck_array_ops]) 2073 def test_coarsen_keep_attrs(self, operation="mean"): 2074 _attrs = {"units": "test", "long_name": "testing"} 2075 2076 test_func = getattr(duck_array_ops, operation, None) 2077 2078 # Test dropped attrs 2079 with set_options(keep_attrs=False): 2080 new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen( 2081 windows={"coord": 1}, func=test_func, boundary="exact", side="left" 2082 ) 2083 assert new.attrs == {} 2084 2085 # Test kept attrs 2086 with set_options(keep_attrs=True): 2087 new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen( 2088 windows={"coord": 1}, 2089 func=test_func, 2090 boundary="exact", 2091 side="left", 2092 ) 2093 assert new.attrs == _attrs 2094 2095 2096@requires_dask 2097class TestVariableWithDask(VariableSubclassobjects): 2098 cls = staticmethod(lambda *args: Variable(*args).chunk()) 2099 2100 @pytest.mark.xfail 2101 def test_0d_object_array_with_list(self): 2102 super().test_0d_object_array_with_list() 2103 2104 @pytest.mark.xfail 2105 def test_array_interface(self): 2106 # dask array does not have `argsort` 2107 super().test_array_interface() 2108 2109 @pytest.mark.xfail 2110 def test_copy_index(self): 2111 super().test_copy_index() 2112 2113 @pytest.mark.xfail 2114 def test_eq_all_dtypes(self): 2115 super().test_eq_all_dtypes() 2116 2117 def test_getitem_fancy(self): 2118 super().test_getitem_fancy() 2119 2120 def test_getitem_1d_fancy(self): 2121 super().test_getitem_1d_fancy() 2122 2123 def test_getitem_with_mask_nd_indexer(self): 2124 import dask.array as da 2125 2126 v = Variable(["x"], da.arange(3, chunks=3)) 2127 indexer = Variable(("x", "y"), [[0, -1], [-1, 2]]) 2128 assert_identical( 2129 v._getitem_with_mask(indexer, fill_value=-1), 2130 self.cls(("x", "y"), [[0, -1], [-1, 2]]), 2131 ) 2132 2133 @pytest.mark.parametrize("dim", ["x", "y"]) 2134 @pytest.mark.parametrize("window", [3, 8, 11]) 2135 @pytest.mark.parametrize("center", [True, False]) 2136 def test_dask_rolling(self, dim, window, center): 2137 import dask 2138 import dask.array as da 2139 2140 dask.config.set(scheduler="single-threaded") 2141 2142 x = Variable(("x", "y"), np.array(np.random.randn(100, 40), dtype=float)) 2143 dx = Variable(("x", "y"), da.from_array(x, chunks=[(6, 30, 30, 20, 14), 8])) 2144 2145 expected = x.rolling_window( 2146 dim, window, "window", center=center, fill_value=np.nan 2147 ) 2148 with raise_if_dask_computes(): 2149 actual = dx.rolling_window( 2150 dim, window, "window", center=center, fill_value=np.nan 2151 ) 2152 assert isinstance(actual.data, da.Array) 2153 assert actual.shape == expected.shape 2154 assert_equal(actual, expected) 2155 2156 2157@requires_sparse 2158class TestVariableWithSparse: 2159 # TODO inherit VariableSubclassobjects to cover more tests 2160 2161 def test_as_sparse(self): 2162 data = np.arange(12).reshape(3, 4) 2163 var = Variable(("x", "y"), data)._as_sparse(fill_value=-1) 2164 actual = var._to_dense() 2165 assert_identical(var, actual) 2166 2167 2168class TestIndexVariable(VariableSubclassobjects): 2169 cls = staticmethod(IndexVariable) 2170 2171 def test_init(self): 2172 with pytest.raises(ValueError, match=r"must be 1-dimensional"): 2173 IndexVariable((), 0) 2174 2175 def test_to_index(self): 2176 data = 0.5 * np.arange(10) 2177 v = IndexVariable(["time"], data, {"foo": "bar"}) 2178 assert pd.Index(data, name="time").identical(v.to_index()) 2179 2180 def test_multiindex_default_level_names(self): 2181 midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]]) 2182 v = IndexVariable(["x"], midx, {"foo": "bar"}) 2183 assert v.to_index().names == ("x_level_0", "x_level_1") 2184 2185 def test_data(self): 2186 x = IndexVariable("x", np.arange(3.0)) 2187 assert isinstance(x._data, PandasIndexingAdapter) 2188 assert isinstance(x.data, np.ndarray) 2189 assert float == x.dtype 2190 assert_array_equal(np.arange(3), x) 2191 assert float == x.values.dtype 2192 with pytest.raises(TypeError, match=r"cannot be modified"): 2193 x[:] = 0 2194 2195 def test_name(self): 2196 coord = IndexVariable("x", [10.0]) 2197 assert coord.name == "x" 2198 2199 with pytest.raises(AttributeError): 2200 coord.name = "y" 2201 2202 def test_level_names(self): 2203 midx = pd.MultiIndex.from_product( 2204 [["a", "b"], [1, 2]], names=["level_1", "level_2"] 2205 ) 2206 x = IndexVariable("x", midx) 2207 assert x.level_names == midx.names 2208 2209 assert IndexVariable("y", [10.0]).level_names is None 2210 2211 def test_get_level_variable(self): 2212 midx = pd.MultiIndex.from_product( 2213 [["a", "b"], [1, 2]], names=["level_1", "level_2"] 2214 ) 2215 x = IndexVariable("x", midx) 2216 level_1 = IndexVariable("x", midx.get_level_values("level_1")) 2217 assert_identical(x.get_level_variable("level_1"), level_1) 2218 2219 with pytest.raises(ValueError, match=r"has no MultiIndex"): 2220 IndexVariable("y", [10.0]).get_level_variable("level") 2221 2222 def test_concat_periods(self): 2223 periods = pd.period_range("2000-01-01", periods=10) 2224 coords = [IndexVariable("t", periods[:5]), IndexVariable("t", periods[5:])] 2225 expected = IndexVariable("t", periods) 2226 actual = IndexVariable.concat(coords, dim="t") 2227 assert_identical(actual, expected) 2228 assert isinstance(actual.to_index(), pd.PeriodIndex) 2229 2230 positions = [list(range(5)), list(range(5, 10))] 2231 actual = IndexVariable.concat(coords, dim="t", positions=positions) 2232 assert_identical(actual, expected) 2233 assert isinstance(actual.to_index(), pd.PeriodIndex) 2234 2235 def test_concat_multiindex(self): 2236 idx = pd.MultiIndex.from_product([[0, 1, 2], ["a", "b"]]) 2237 coords = [IndexVariable("x", idx[:2]), IndexVariable("x", idx[2:])] 2238 expected = IndexVariable("x", idx) 2239 actual = IndexVariable.concat(coords, dim="x") 2240 assert_identical(actual, expected) 2241 assert isinstance(actual.to_index(), pd.MultiIndex) 2242 2243 @pytest.mark.parametrize("dtype", [str, bytes]) 2244 def test_concat_str_dtype(self, dtype): 2245 2246 a = IndexVariable("x", np.array(["a"], dtype=dtype)) 2247 b = IndexVariable("x", np.array(["b"], dtype=dtype)) 2248 expected = IndexVariable("x", np.array(["a", "b"], dtype=dtype)) 2249 2250 actual = IndexVariable.concat([a, b]) 2251 assert actual.identical(expected) 2252 assert np.issubdtype(actual.dtype, dtype) 2253 2254 def test_coordinate_alias(self): 2255 with pytest.warns(Warning, match="deprecated"): 2256 x = Coordinate("x", [1, 2, 3]) 2257 assert isinstance(x, IndexVariable) 2258 2259 def test_datetime64(self): 2260 # GH:1932 Make sure indexing keeps precision 2261 t = np.array([1518418799999986560, 1518418799999996560], dtype="datetime64[ns]") 2262 v = IndexVariable("t", t) 2263 assert v[0].data == t[0] 2264 2265 # These tests make use of multi-dimensional variables, which are not valid 2266 # IndexVariable objects: 2267 @pytest.mark.skip 2268 def test_getitem_error(self): 2269 super().test_getitem_error() 2270 2271 @pytest.mark.skip 2272 def test_getitem_advanced(self): 2273 super().test_getitem_advanced() 2274 2275 @pytest.mark.skip 2276 def test_getitem_fancy(self): 2277 super().test_getitem_fancy() 2278 2279 @pytest.mark.skip 2280 def test_getitem_uint(self): 2281 super().test_getitem_fancy() 2282 2283 @pytest.mark.skip 2284 @pytest.mark.parametrize( 2285 "mode", 2286 [ 2287 "mean", 2288 "median", 2289 "reflect", 2290 "edge", 2291 "linear_ramp", 2292 "maximum", 2293 "minimum", 2294 "symmetric", 2295 "wrap", 2296 ], 2297 ) 2298 @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) 2299 def test_pad(self, mode, xr_arg, np_arg): 2300 super().test_pad(mode, xr_arg, np_arg) 2301 2302 @pytest.mark.skip 2303 def test_pad_constant_values(self, xr_arg, np_arg): 2304 super().test_pad_constant_values(xr_arg, np_arg) 2305 2306 @pytest.mark.skip 2307 def test_rolling_window(self): 2308 super().test_rolling_window() 2309 2310 @pytest.mark.skip 2311 def test_rolling_1d(self): 2312 super().test_rolling_1d() 2313 2314 @pytest.mark.skip 2315 def test_nd_rolling(self): 2316 super().test_nd_rolling() 2317 2318 @pytest.mark.skip 2319 def test_rolling_window_errors(self): 2320 super().test_rolling_window_errors() 2321 2322 @pytest.mark.skip 2323 def test_coarsen_2d(self): 2324 super().test_coarsen_2d() 2325 2326 2327class TestAsCompatibleData: 2328 def test_unchanged_types(self): 2329 types = (np.asarray, PandasIndexingAdapter, LazilyIndexedArray) 2330 for t in types: 2331 for data in [ 2332 np.arange(3), 2333 pd.date_range("2000-01-01", periods=3), 2334 pd.date_range("2000-01-01", periods=3).values, 2335 ]: 2336 x = t(data) 2337 assert source_ndarray(x) is source_ndarray(as_compatible_data(x)) 2338 2339 def test_converted_types(self): 2340 for input_array in [[[0, 1, 2]], pd.DataFrame([[0, 1, 2]])]: 2341 actual = as_compatible_data(input_array) 2342 assert_array_equal(np.asarray(input_array), actual) 2343 assert np.ndarray == type(actual) 2344 assert np.asarray(input_array).dtype == actual.dtype 2345 2346 def test_masked_array(self): 2347 original = np.ma.MaskedArray(np.arange(5)) 2348 expected = np.arange(5) 2349 actual = as_compatible_data(original) 2350 assert_array_equal(expected, actual) 2351 assert np.dtype(int) == actual.dtype 2352 2353 original = np.ma.MaskedArray(np.arange(5), mask=4 * [False] + [True]) 2354 expected = np.arange(5.0) 2355 expected[-1] = np.nan 2356 actual = as_compatible_data(original) 2357 assert_array_equal(expected, actual) 2358 assert np.dtype(float) == actual.dtype 2359 2360 def test_datetime(self): 2361 expected = np.datetime64("2000-01-01") 2362 actual = as_compatible_data(expected) 2363 assert expected == actual 2364 assert np.ndarray == type(actual) 2365 assert np.dtype("datetime64[ns]") == actual.dtype 2366 2367 expected = np.array([np.datetime64("2000-01-01")]) 2368 actual = as_compatible_data(expected) 2369 assert np.asarray(expected) == actual 2370 assert np.ndarray == type(actual) 2371 assert np.dtype("datetime64[ns]") == actual.dtype 2372 2373 expected = np.array([np.datetime64("2000-01-01", "ns")]) 2374 actual = as_compatible_data(expected) 2375 assert np.asarray(expected) == actual 2376 assert np.ndarray == type(actual) 2377 assert np.dtype("datetime64[ns]") == actual.dtype 2378 assert expected is source_ndarray(np.asarray(actual)) 2379 2380 expected = np.datetime64("2000-01-01", "ns") 2381 actual = as_compatible_data(datetime(2000, 1, 1)) 2382 assert np.asarray(expected) == actual 2383 assert np.ndarray == type(actual) 2384 assert np.dtype("datetime64[ns]") == actual.dtype 2385 2386 def test_full_like(self): 2387 # For more thorough tests, see test_variable.py 2388 orig = Variable( 2389 dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"} 2390 ) 2391 2392 expect = orig.copy(deep=True) 2393 expect.values = [[2.0, 2.0], [2.0, 2.0]] 2394 assert_identical(expect, full_like(orig, 2)) 2395 2396 # override dtype 2397 expect.values = [[True, True], [True, True]] 2398 assert expect.dtype == bool 2399 assert_identical(expect, full_like(orig, True, dtype=bool)) 2400 2401 # raise error on non-scalar fill_value 2402 with pytest.raises(ValueError, match=r"must be scalar"): 2403 full_like(orig, [1.0, 2.0]) 2404 2405 with pytest.raises(ValueError, match="'dtype' cannot be dict-like"): 2406 full_like(orig, True, dtype={"x": bool}) 2407 2408 @requires_dask 2409 def test_full_like_dask(self): 2410 orig = Variable( 2411 dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"} 2412 ).chunk(((1, 1), (2,))) 2413 2414 def check(actual, expect_dtype, expect_values): 2415 assert actual.dtype == expect_dtype 2416 assert actual.shape == orig.shape 2417 assert actual.dims == orig.dims 2418 assert actual.attrs == orig.attrs 2419 assert actual.chunks == orig.chunks 2420 assert_array_equal(actual.values, expect_values) 2421 2422 check(full_like(orig, 2), orig.dtype, np.full_like(orig.values, 2)) 2423 # override dtype 2424 check( 2425 full_like(orig, True, dtype=bool), 2426 bool, 2427 np.full_like(orig.values, True, dtype=bool), 2428 ) 2429 2430 # Check that there's no array stored inside dask 2431 # (e.g. we didn't create a numpy array and then we chunked it!) 2432 dsk = full_like(orig, 1).data.dask 2433 for v in dsk.values(): 2434 if isinstance(v, tuple): 2435 for vi in v: 2436 assert not isinstance(vi, np.ndarray) 2437 else: 2438 assert not isinstance(v, np.ndarray) 2439 2440 def test_zeros_like(self): 2441 orig = Variable( 2442 dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"} 2443 ) 2444 assert_identical(zeros_like(orig), full_like(orig, 0)) 2445 assert_identical(zeros_like(orig, dtype=int), full_like(orig, 0, dtype=int)) 2446 2447 def test_ones_like(self): 2448 orig = Variable( 2449 dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"} 2450 ) 2451 assert_identical(ones_like(orig), full_like(orig, 1)) 2452 assert_identical(ones_like(orig, dtype=int), full_like(orig, 1, dtype=int)) 2453 2454 def test_unsupported_type(self): 2455 # Non indexable type 2456 class CustomArray(NDArrayMixin): 2457 def __init__(self, array): 2458 self.array = array 2459 2460 class CustomIndexable(CustomArray, indexing.ExplicitlyIndexed): 2461 pass 2462 2463 # Type with data stored in values attribute 2464 class CustomWithValuesAttr: 2465 def __init__(self, array): 2466 self.values = array 2467 2468 array = CustomArray(np.arange(3)) 2469 orig = Variable(dims=("x"), data=array, attrs={"foo": "bar"}) 2470 assert isinstance(orig._data, np.ndarray) # should not be CustomArray 2471 2472 array = CustomIndexable(np.arange(3)) 2473 orig = Variable(dims=("x"), data=array, attrs={"foo": "bar"}) 2474 assert isinstance(orig._data, CustomIndexable) 2475 2476 array = CustomWithValuesAttr(np.arange(3)) 2477 orig = Variable(dims=(), data=array) 2478 assert isinstance(orig._data.item(), CustomWithValuesAttr) 2479 2480 2481def test_raise_no_warning_for_nan_in_binary_ops(): 2482 with pytest.warns(None) as record: 2483 Variable("x", [1, 2, np.NaN]) > 0 2484 assert len(record) == 0 2485 2486 2487class TestBackendIndexing: 2488 """Make sure all the array wrappers can be indexed.""" 2489 2490 @pytest.fixture(autouse=True) 2491 def setUp(self): 2492 self.d = np.random.random((10, 3)).astype(np.float64) 2493 2494 def check_orthogonal_indexing(self, v): 2495 assert np.allclose(v.isel(x=[8, 3], y=[2, 1]), self.d[[8, 3]][:, [2, 1]]) 2496 2497 def check_vectorized_indexing(self, v): 2498 ind_x = Variable("z", [0, 2]) 2499 ind_y = Variable("z", [2, 1]) 2500 assert np.allclose(v.isel(x=ind_x, y=ind_y), self.d[ind_x, ind_y]) 2501 2502 def test_NumpyIndexingAdapter(self): 2503 v = Variable(dims=("x", "y"), data=NumpyIndexingAdapter(self.d)) 2504 self.check_orthogonal_indexing(v) 2505 self.check_vectorized_indexing(v) 2506 # could not doubly wrapping 2507 with pytest.raises(TypeError, match=r"NumpyIndexingAdapter only wraps "): 2508 v = Variable( 2509 dims=("x", "y"), data=NumpyIndexingAdapter(NumpyIndexingAdapter(self.d)) 2510 ) 2511 2512 def test_LazilyIndexedArray(self): 2513 v = Variable(dims=("x", "y"), data=LazilyIndexedArray(self.d)) 2514 self.check_orthogonal_indexing(v) 2515 self.check_vectorized_indexing(v) 2516 # doubly wrapping 2517 v = Variable( 2518 dims=("x", "y"), 2519 data=LazilyIndexedArray(LazilyIndexedArray(self.d)), 2520 ) 2521 self.check_orthogonal_indexing(v) 2522 # hierarchical wrapping 2523 v = Variable( 2524 dims=("x", "y"), data=LazilyIndexedArray(NumpyIndexingAdapter(self.d)) 2525 ) 2526 self.check_orthogonal_indexing(v) 2527 2528 def test_CopyOnWriteArray(self): 2529 v = Variable(dims=("x", "y"), data=CopyOnWriteArray(self.d)) 2530 self.check_orthogonal_indexing(v) 2531 self.check_vectorized_indexing(v) 2532 # doubly wrapping 2533 v = Variable(dims=("x", "y"), data=CopyOnWriteArray(LazilyIndexedArray(self.d))) 2534 self.check_orthogonal_indexing(v) 2535 self.check_vectorized_indexing(v) 2536 2537 def test_MemoryCachedArray(self): 2538 v = Variable(dims=("x", "y"), data=MemoryCachedArray(self.d)) 2539 self.check_orthogonal_indexing(v) 2540 self.check_vectorized_indexing(v) 2541 # doubly wrapping 2542 v = Variable(dims=("x", "y"), data=CopyOnWriteArray(MemoryCachedArray(self.d))) 2543 self.check_orthogonal_indexing(v) 2544 self.check_vectorized_indexing(v) 2545 2546 @requires_dask 2547 def test_DaskIndexingAdapter(self): 2548 import dask.array as da 2549 2550 da = da.asarray(self.d) 2551 v = Variable(dims=("x", "y"), data=DaskIndexingAdapter(da)) 2552 self.check_orthogonal_indexing(v) 2553 self.check_vectorized_indexing(v) 2554 # doubly wrapping 2555 v = Variable(dims=("x", "y"), data=CopyOnWriteArray(DaskIndexingAdapter(da))) 2556 self.check_orthogonal_indexing(v) 2557 self.check_vectorized_indexing(v) 2558 2559 2560def test_clip(var): 2561 # Copied from test_dataarray (would there be a way to combine the tests?) 2562 result = var.clip(min=0.5) 2563 assert result.min(...) >= 0.5 2564 2565 result = var.clip(max=0.5) 2566 assert result.max(...) <= 0.5 2567 2568 result = var.clip(min=0.25, max=0.75) 2569 assert result.min(...) >= 0.25 2570 assert result.max(...) <= 0.75 2571 2572 result = var.clip(min=var.mean("x"), max=var.mean("z")) 2573 assert result.dims == var.dims 2574 assert_array_equal( 2575 result.data, 2576 np.clip( 2577 var.data, 2578 var.mean("x").data[np.newaxis, :, :], 2579 var.mean("z").data[:, :, np.newaxis], 2580 ), 2581 ) 2582 2583 2584@pytest.mark.parametrize("Var", [Variable, IndexVariable]) 2585class TestNumpyCoercion: 2586 def test_from_numpy(self, Var): 2587 v = Var("x", [1, 2, 3]) 2588 2589 assert_identical(v.as_numpy(), v) 2590 np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3])) 2591 2592 @requires_dask 2593 def test_from_dask(self, Var): 2594 v = Var("x", [1, 2, 3]) 2595 v_chunked = v.chunk(1) 2596 2597 assert_identical(v_chunked.as_numpy(), v.compute()) 2598 np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3])) 2599 2600 @requires_pint 2601 def test_from_pint(self, Var): 2602 from pint import Quantity 2603 2604 arr = np.array([1, 2, 3]) 2605 v = Var("x", Quantity(arr, units="m")) 2606 2607 assert_identical(v.as_numpy(), Var("x", arr)) 2608 np.testing.assert_equal(v.to_numpy(), arr) 2609 2610 @requires_sparse 2611 def test_from_sparse(self, Var): 2612 if Var is IndexVariable: 2613 pytest.skip("Can't have 2D IndexVariables") 2614 2615 import sparse 2616 2617 arr = np.diagflat([1, 2, 3]) 2618 sparr = sparse.COO(coords=[[0, 1, 2], [0, 1, 2]], data=[1, 2, 3]) 2619 v = Variable(["x", "y"], sparr) 2620 2621 assert_identical(v.as_numpy(), Variable(["x", "y"], arr)) 2622 np.testing.assert_equal(v.to_numpy(), arr) 2623 2624 @requires_cupy 2625 def test_from_cupy(self, Var): 2626 import cupy as cp 2627 2628 arr = np.array([1, 2, 3]) 2629 v = Var("x", cp.array(arr)) 2630 2631 assert_identical(v.as_numpy(), Var("x", arr)) 2632 np.testing.assert_equal(v.to_numpy(), arr) 2633 2634 @requires_dask 2635 @requires_pint 2636 def test_from_pint_wrapping_dask(self, Var): 2637 import dask 2638 from pint import Quantity 2639 2640 arr = np.array([1, 2, 3]) 2641 d = dask.array.from_array(np.array([1, 2, 3])) 2642 v = Var("x", Quantity(d, units="m")) 2643 2644 result = v.as_numpy() 2645 assert_identical(result, Var("x", arr)) 2646 np.testing.assert_equal(v.to_numpy(), arr) 2647