1import contextlib 2import copy 3import pathlib 4import xml.etree.ElementTree 5from unittest import mock 6 7import pytest 8 9np = pytest.importorskip("numpy") 10 11import operator 12import os 13import time 14import warnings 15from io import StringIO 16from operator import add, sub 17from threading import Lock 18 19from numpy import nancumprod, nancumsum 20from tlz import concat, countby, merge 21from tlz.curried import identity 22 23import dask 24import dask.array as da 25from dask.array.core import ( 26 Array, 27 BlockView, 28 blockdims_from_blockshape, 29 broadcast_chunks, 30 broadcast_shapes, 31 broadcast_to, 32 common_blockdim, 33 concatenate, 34 concatenate3, 35 concatenate_axes, 36 dotmany, 37 from_array, 38 from_delayed, 39 from_func, 40 getem, 41 getter, 42 normalize_chunks, 43 optimize, 44 stack, 45 store, 46) 47from dask.array.utils import assert_eq, same_keys 48from dask.base import compute_as_if_collection, tokenize 49from dask.blockwise import broadcast_dimensions 50from dask.blockwise import make_blockwise_graph as top 51from dask.blockwise import optimize_blockwise 52from dask.delayed import Delayed, delayed 53from dask.utils import apply, key_split, tmpdir, tmpfile 54from dask.utils_test import dec, inc 55 56from ..chunk import getitem 57from .test_dispatch import EncapsulateNDArray 58 59 60def test_getem(): 61 sol = { 62 ("X", 0, 0): (getter, "X", (slice(0, 2), slice(0, 3))), 63 ("X", 1, 0): (getter, "X", (slice(2, 4), slice(0, 3))), 64 ("X", 1, 1): (getter, "X", (slice(2, 4), slice(3, 6))), 65 ("X", 0, 1): (getter, "X", (slice(0, 2), slice(3, 6))), 66 } 67 assert getem("X", (2, 3), shape=(4, 6)) == sol 68 69 70def test_top(): 71 assert top(inc, "z", "ij", "x", "ij", numblocks={"x": (2, 2)}) == { 72 ("z", 0, 0): (inc, ("x", 0, 0)), 73 ("z", 0, 1): (inc, ("x", 0, 1)), 74 ("z", 1, 0): (inc, ("x", 1, 0)), 75 ("z", 1, 1): (inc, ("x", 1, 1)), 76 } 77 78 assert top( 79 add, "z", "ij", "x", "ij", "y", "ij", numblocks={"x": (2, 2), "y": (2, 2)} 80 ) == { 81 ("z", 0, 0): (add, ("x", 0, 0), ("y", 0, 0)), 82 ("z", 0, 1): (add, ("x", 0, 1), ("y", 0, 1)), 83 ("z", 1, 0): (add, ("x", 1, 0), ("y", 1, 0)), 84 ("z", 1, 1): (add, ("x", 1, 1), ("y", 1, 1)), 85 } 86 87 assert top( 88 dotmany, "z", "ik", "x", "ij", "y", "jk", numblocks={"x": (2, 2), "y": (2, 2)} 89 ) == { 90 ("z", 0, 0): (dotmany, [("x", 0, 0), ("x", 0, 1)], [("y", 0, 0), ("y", 1, 0)]), 91 ("z", 0, 1): (dotmany, [("x", 0, 0), ("x", 0, 1)], [("y", 0, 1), ("y", 1, 1)]), 92 ("z", 1, 0): (dotmany, [("x", 1, 0), ("x", 1, 1)], [("y", 0, 0), ("y", 1, 0)]), 93 ("z", 1, 1): (dotmany, [("x", 1, 0), ("x", 1, 1)], [("y", 0, 1), ("y", 1, 1)]), 94 } 95 96 assert top(identity, "z", "", "x", "ij", numblocks={"x": (2, 2)}) == { 97 ("z",): (identity, [[("x", 0, 0), ("x", 0, 1)], [("x", 1, 0), ("x", 1, 1)]]) 98 } 99 100 101def test_top_with_kwargs(): 102 assert top(add, "z", "i", "x", "i", numblocks={"x": (2, 0)}, b=100) == { 103 ("z", 0): (apply, add, [("x", 0)], {"b": 100}), 104 ("z", 1): (apply, add, [("x", 1)], {"b": 100}), 105 } 106 107 108def test_top_supports_broadcasting_rules(): 109 assert top( 110 add, "z", "ij", "x", "ij", "y", "ij", numblocks={"x": (1, 2), "y": (2, 1)} 111 ) == { 112 ("z", 0, 0): (add, ("x", 0, 0), ("y", 0, 0)), 113 ("z", 0, 1): (add, ("x", 0, 1), ("y", 0, 0)), 114 ("z", 1, 0): (add, ("x", 0, 0), ("y", 1, 0)), 115 ("z", 1, 1): (add, ("x", 0, 1), ("y", 1, 0)), 116 } 117 118 119def test_top_literals(): 120 assert top(add, "z", "ij", "x", "ij", 123, None, numblocks={"x": (2, 2)}) == { 121 ("z", 0, 0): (add, ("x", 0, 0), 123), 122 ("z", 0, 1): (add, ("x", 0, 1), 123), 123 ("z", 1, 0): (add, ("x", 1, 0), 123), 124 ("z", 1, 1): (add, ("x", 1, 1), 123), 125 } 126 127 128def test_blockwise_literals(): 129 x = da.ones((10, 10), chunks=(5, 5)) 130 z = da.blockwise(add, "ij", x, "ij", 100, None, dtype=x.dtype) 131 assert_eq(z, x + 100) 132 133 z = da.blockwise( 134 lambda x, y, z: x * y + z, "ij", 2, None, x, "ij", 100, None, dtype=x.dtype 135 ) 136 assert_eq(z, 2 * x + 100) 137 138 z = da.blockwise(getitem, "ij", x, "ij", slice(None), None, dtype=x.dtype) 139 assert_eq(z, x) 140 141 142def test_blockwise_1_in_shape_I(): 143 def test_f(a, b): 144 assert 1 in b.shape 145 146 p, k, N = 7, 2, 5 147 da.blockwise( 148 test_f, 149 "x", 150 da.zeros((2 * p, 9, k * N), chunks=(p, 3, k)), 151 "xzt", 152 da.zeros((2 * p, 9, 1), chunks=(p, 3, -1)), 153 "xzt", 154 concatenate=True, 155 dtype=float, 156 ).compute() 157 158 159def test_blockwise_1_in_shape_II(): 160 def test_f(a, b): 161 assert 1 in b.shape 162 163 p, k, N = 7, 2, 5 164 da.blockwise( 165 test_f, 166 "x", 167 da.zeros((2 * p, 9, k * N, 8), chunks=(p, 9, k, 4)), 168 "xztu", 169 da.zeros((2 * p, 9, 1, 8), chunks=(p, 9, -1, 4)), 170 "xztu", 171 concatenate=True, 172 dtype=float, 173 ).compute() 174 175 176def test_blockwise_1_in_shape_III(): 177 def test_f(a, b): 178 assert 1 in b.shape 179 180 k, N = 2, 5 181 da.blockwise( 182 test_f, 183 "x", 184 da.zeros((k * N, 9, 8), chunks=(k, 3, 4)), 185 "xtu", 186 da.zeros((1, 9, 8), chunks=(-1, 3, 4)), 187 "xtu", 188 concatenate=True, 189 dtype=float, 190 ).compute() 191 192 193def test_concatenate3_on_scalars(): 194 assert_eq(concatenate3([1, 2]), np.array([1, 2])) 195 196 197def test_chunked_dot_product(): 198 x = np.arange(400).reshape((20, 20)) 199 o = np.ones((20, 20)) 200 201 d = {"x": x, "o": o} 202 203 getx = getem("x", (5, 5), shape=(20, 20)) 204 geto = getem("o", (5, 5), shape=(20, 20)) 205 206 result = top( 207 dotmany, "out", "ik", "x", "ij", "o", "jk", numblocks={"x": (4, 4), "o": (4, 4)} 208 ) 209 210 dsk = merge(d, getx, geto, result) 211 out = dask.get(dsk, [[("out", i, j) for j in range(4)] for i in range(4)]) 212 213 assert_eq(np.dot(x, o), concatenate3(out)) 214 215 216def test_chunked_transpose_plus_one(): 217 x = np.arange(400).reshape((20, 20)) 218 219 d = {"x": x} 220 221 getx = getem("x", (5, 5), shape=(20, 20)) 222 223 f = lambda x: x.T + 1 224 comp = top(f, "out", "ij", "x", "ji", numblocks={"x": (4, 4)}) 225 226 dsk = merge(d, getx, comp) 227 out = dask.get(dsk, [[("out", i, j) for j in range(4)] for i in range(4)]) 228 229 assert_eq(concatenate3(out), x.T + 1) 230 231 232def test_broadcast_dimensions_works_with_singleton_dimensions(): 233 argpairs = [("x", "i")] 234 numblocks = {"x": ((1,),)} 235 assert broadcast_dimensions(argpairs, numblocks) == {"i": (1,)} 236 237 238def test_broadcast_dimensions(): 239 argpairs = [("x", "ij"), ("y", "ij")] 240 d = {"x": ("Hello", 1), "y": (1, (2, 3))} 241 assert broadcast_dimensions(argpairs, d) == {"i": "Hello", "j": (2, 3)} 242 243 244def test_Array(): 245 shape = (1000, 1000) 246 chunks = (100, 100) 247 name = "x" 248 dsk = merge({name: "some-array"}, getem(name, chunks, shape=shape)) 249 a = Array(dsk, name, chunks, shape=shape, dtype="f8") 250 251 assert a.numblocks == (10, 10) 252 253 assert a.__dask_keys__() == [[("x", i, j) for j in range(10)] for i in range(10)] 254 255 assert a.chunks == ((100,) * 10, (100,) * 10) 256 257 assert a.shape == shape 258 259 assert len(a) == shape[0] 260 261 with pytest.raises(ValueError): 262 Array(dsk, name, chunks, shape=shape) 263 with pytest.raises(TypeError): 264 Array(dsk, name, chunks, shape=shape, dtype="f8", meta=np.empty(0, 0)) 265 266 267def test_uneven_chunks(): 268 a = Array({}, "x", chunks=(3, 3), shape=(10, 10), dtype="f8") 269 assert a.chunks == ((3, 3, 3, 1), (3, 3, 3, 1)) 270 271 272def test_numblocks_suppoorts_singleton_block_dims(): 273 shape = (100, 10) 274 chunks = (10, 10) 275 name = "x" 276 dsk = merge({name: "some-array"}, getem(name, shape=shape, chunks=chunks)) 277 a = Array(dsk, name, chunks, shape=shape, dtype="f8") 278 279 assert set(concat(a.__dask_keys__())) == {("x", i, 0) for i in range(10)} 280 281 282def test_keys(): 283 dsk = {("x", i, j): () for i in range(5) for j in range(6)} 284 dx = Array(dsk, "x", chunks=(10, 10), shape=(50, 60), dtype="f8") 285 assert dx.__dask_keys__() == [[(dx.name, i, j) for j in range(6)] for i in range(5)] 286 # Cache works 287 assert dx.__dask_keys__() is dx.__dask_keys__() 288 # Test mutating names clears key cache 289 dx.dask = {("y", i, j): () for i in range(5) for j in range(6)} 290 dx._name = "y" 291 new_keys = [[(dx.name, i, j) for j in range(6)] for i in range(5)] 292 assert dx.__dask_keys__() == new_keys 293 assert np.array_equal(dx._key_array, np.array(new_keys, dtype="object")) 294 d = Array({}, "x", (), shape=(), dtype="f8") 295 assert d.__dask_keys__() == [("x",)] 296 297 298def test_Array_computation(): 299 a = Array({("x", 0, 0): np.eye(3)}, "x", shape=(3, 3), chunks=(3, 3), dtype="f8") 300 assert_eq(np.array(a), np.eye(3)) 301 assert isinstance(a.compute(), np.ndarray) 302 assert float(a[0, 0]) == 1 303 304 305def test_Array_numpy_gufunc_call__array_ufunc__01(): 306 x = da.random.normal(size=(3, 10, 10), chunks=(2, 10, 10)) 307 nx = x.compute() 308 ny = np.linalg._umath_linalg.inv(nx) 309 y = np.linalg._umath_linalg.inv(x) 310 assert_eq(ny, y) 311 312 313def test_Array_numpy_gufunc_call__array_ufunc__02(): 314 x = da.random.normal(size=(3, 10, 10), chunks=(2, 10, 10)) 315 nx = x.compute() 316 nw, nv = np.linalg._umath_linalg.eig(nx) 317 w, v = np.linalg._umath_linalg.eig(x) 318 assert_eq(nw, w) 319 assert_eq(nv, v) 320 321 322def test_stack(): 323 a, b, c = ( 324 Array( 325 getem(name, chunks=(2, 3), shape=(4, 6)), 326 name, 327 chunks=(2, 3), 328 dtype="f8", 329 shape=(4, 6), 330 ) 331 for name in "ABC" 332 ) 333 334 s = stack([a, b, c], axis=0) 335 336 colon = slice(None, None, None) 337 338 assert s.shape == (3, 4, 6) 339 assert s.chunks == ((1, 1, 1), (2, 2), (3, 3)) 340 assert s.chunksize == (1, 2, 3) 341 assert s.dask[(s.name, 0, 1, 0)] == (getitem, ("A", 1, 0), (None, colon, colon)) 342 assert s.dask[(s.name, 2, 1, 0)] == (getitem, ("C", 1, 0), (None, colon, colon)) 343 assert same_keys(s, stack([a, b, c], axis=0)) 344 345 s2 = stack([a, b, c], axis=1) 346 assert s2.shape == (4, 3, 6) 347 assert s2.chunks == ((2, 2), (1, 1, 1), (3, 3)) 348 assert s2.chunksize == (2, 1, 3) 349 assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ("B", 0, 0), (colon, None, colon)) 350 assert s2.dask[(s2.name, 1, 1, 0)] == (getitem, ("B", 1, 0), (colon, None, colon)) 351 assert same_keys(s2, stack([a, b, c], axis=1)) 352 353 s2 = stack([a, b, c], axis=2) 354 assert s2.shape == (4, 6, 3) 355 assert s2.chunks == ((2, 2), (3, 3), (1, 1, 1)) 356 assert s2.chunksize == (2, 3, 1) 357 assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ("A", 0, 1), (colon, colon, None)) 358 assert s2.dask[(s2.name, 1, 1, 2)] == (getitem, ("C", 1, 1), (colon, colon, None)) 359 assert same_keys(s2, stack([a, b, c], axis=2)) 360 361 pytest.raises(ValueError, lambda: stack([])) 362 pytest.raises(ValueError, lambda: stack([a, b, c], axis=3)) 363 364 assert set(b.dask.keys()).issubset(s2.dask.keys()) 365 366 assert stack([a, b, c], axis=-1).chunks == stack([a, b, c], axis=2).chunks 367 368 369def test_stack_zero_size(): 370 x = np.empty((2, 0, 3)) 371 y = da.from_array(x, chunks=1) 372 373 result_np = np.concatenate([x, x]) 374 result_da = da.concatenate([y, y]) 375 376 assert_eq(result_np, result_da) 377 378 379def test_short_stack(): 380 x = np.array([1]) 381 d = da.from_array(x, chunks=(1,)) 382 s = da.stack([d]) 383 assert s.shape == (1, 1) 384 chunks = compute_as_if_collection(Array, s.dask, s.__dask_keys__()) 385 assert chunks[0][0].shape == (1, 1) 386 387 388def test_stack_scalars(): 389 d = da.arange(4, chunks=2) 390 391 s = da.stack([d.mean(), d.sum()]) 392 393 assert s.compute().tolist() == [np.arange(4).mean(), np.arange(4).sum()] 394 395 396def test_stack_promote_type(): 397 i = np.arange(10, dtype="i4") 398 f = np.arange(10, dtype="f4") 399 di = da.from_array(i, chunks=5) 400 df = da.from_array(f, chunks=5) 401 res = da.stack([di, df]) 402 assert_eq(res, np.stack([i, f])) 403 404 405def test_stack_rechunk(): 406 x = da.random.random(10, chunks=5) 407 y = da.random.random(10, chunks=4) 408 409 z = da.stack([x, y], axis=0) 410 assert z.shape == (2, 10) 411 assert z.chunks == ((1, 1), (4, 1, 3, 2)) 412 413 assert_eq(z, np.stack([x.compute(), y.compute()], axis=0)) 414 415 416def test_stack_unknown_chunksizes(): 417 dd = pytest.importorskip("dask.dataframe") 418 pd = pytest.importorskip("pandas") 419 420 a_df = pd.DataFrame({"x": np.arange(12)}) 421 b_df = pd.DataFrame({"y": np.arange(12) * 10}) 422 423 a_ddf = dd.from_pandas(a_df, sort=False, npartitions=3) 424 b_ddf = dd.from_pandas(b_df, sort=False, npartitions=3) 425 426 a_x = a_ddf.values 427 b_x = b_ddf.values 428 429 assert np.isnan(a_x.shape[0]) 430 assert np.isnan(b_x.shape[0]) 431 432 with pytest.raises(ValueError) as exc_info: 433 da.stack([a_x, b_x], axis=0) 434 435 assert "shape" in str(exc_info.value) 436 assert "nan" in str(exc_info.value) 437 438 c_x = da.stack([a_x, b_x], axis=0, allow_unknown_chunksizes=True) 439 440 assert_eq(c_x, np.stack([a_df.values, b_df.values], axis=0)) 441 442 with pytest.raises(ValueError) as exc_info: 443 da.stack([a_x, b_x], axis=1) 444 445 assert "shape" in str(exc_info.value) 446 assert "nan" in str(exc_info.value) 447 448 c_x = da.stack([a_x, b_x], axis=1, allow_unknown_chunksizes=True) 449 450 assert_eq(c_x, np.stack([a_df.values, b_df.values], axis=1)) 451 452 m_df = pd.DataFrame({"m": np.arange(12) * 100}) 453 n_df = pd.DataFrame({"n": np.arange(12) * 1000}) 454 455 m_ddf = dd.from_pandas(m_df, sort=False, npartitions=3) 456 n_ddf = dd.from_pandas(n_df, sort=False, npartitions=3) 457 458 m_x = m_ddf.values 459 n_x = n_ddf.values 460 461 assert np.isnan(m_x.shape[0]) 462 assert np.isnan(n_x.shape[0]) 463 464 with pytest.raises(ValueError) as exc_info: 465 da.stack([[a_x, b_x], [m_x, n_x]]) 466 467 assert "shape" in str(exc_info.value) 468 assert "nan" in str(exc_info.value) 469 470 c_x = da.stack([[a_x, b_x], [m_x, n_x]], allow_unknown_chunksizes=True) 471 472 assert_eq(c_x, np.stack([[a_df.values, b_df.values], [m_df.values, n_df.values]])) 473 474 475def test_concatenate(): 476 a, b, c = ( 477 Array( 478 getem(name, chunks=(2, 3), shape=(4, 6)), 479 name, 480 chunks=(2, 3), 481 dtype="f8", 482 shape=(4, 6), 483 ) 484 for name in "ABC" 485 ) 486 487 x = concatenate([a, b, c], axis=0) 488 489 assert x.shape == (12, 6) 490 assert x.chunks == ((2, 2, 2, 2, 2, 2), (3, 3)) 491 assert x.dask[(x.name, 0, 1)] == ("A", 0, 1) 492 assert x.dask[(x.name, 5, 0)] == ("C", 1, 0) 493 assert same_keys(x, concatenate([a, b, c], axis=0)) 494 495 y = concatenate([a, b, c], axis=1) 496 497 assert y.shape == (4, 18) 498 assert y.chunks == ((2, 2), (3, 3, 3, 3, 3, 3)) 499 assert y.dask[(y.name, 1, 0)] == ("A", 1, 0) 500 assert y.dask[(y.name, 1, 5)] == ("C", 1, 1) 501 assert same_keys(y, concatenate([a, b, c], axis=1)) 502 503 assert set(b.dask.keys()).issubset(y.dask.keys()) 504 505 z = concatenate([a], axis=0) 506 507 assert z.shape == a.shape 508 assert z.chunks == a.chunks 509 assert z.dask == a.dask 510 assert z is a 511 512 assert ( 513 concatenate([a, b, c], axis=-1).chunks == concatenate([a, b, c], axis=1).chunks 514 ) 515 516 pytest.raises(ValueError, lambda: concatenate([])) 517 pytest.raises(ValueError, lambda: concatenate([a, b, c], axis=2)) 518 519 520@pytest.mark.parametrize( 521 "dtypes", [((">f8", ">f8"), "float64"), (("<f4", "<f8"), "float64")] 522) 523def test_concatenate_types(dtypes): 524 dts_in, dt_out = dtypes 525 arrs = [np.zeros(4, dtype=dt) for dt in dts_in] 526 darrs = [from_array(arr, chunks=(2,)) for arr in arrs] 527 528 x = concatenate(darrs, axis=0) 529 assert x.dtype == dt_out 530 531 532def test_concatenate_unknown_axes(): 533 dd = pytest.importorskip("dask.dataframe") 534 pd = pytest.importorskip("pandas") 535 536 a_df = pd.DataFrame({"x": np.arange(12)}) 537 b_df = pd.DataFrame({"y": np.arange(12) * 10}) 538 539 a_ddf = dd.from_pandas(a_df, sort=False, npartitions=3) 540 b_ddf = dd.from_pandas(b_df, sort=False, npartitions=3) 541 542 a_x = a_ddf.values 543 b_x = b_ddf.values 544 545 assert np.isnan(a_x.shape[0]) 546 assert np.isnan(b_x.shape[0]) 547 548 da.concatenate([a_x, b_x], axis=0) # works fine 549 550 with pytest.raises(ValueError) as exc_info: 551 da.concatenate([a_x, b_x], axis=1) # unknown chunks 552 553 assert "nan" in str(exc_info.value) 554 assert "allow_unknown_chunksize" in str(exc_info.value) 555 556 c_x = da.concatenate( 557 [a_x, b_x], axis=1, allow_unknown_chunksizes=True 558 ) # unknown chunks 559 560 assert_eq(c_x, np.concatenate([a_df.values, b_df.values], axis=1)) 561 562 563def test_concatenate_rechunk(): 564 x = da.random.random((6, 6), chunks=(3, 3)) 565 y = da.random.random((6, 6), chunks=(2, 2)) 566 567 z = da.concatenate([x, y], axis=0) 568 assert z.shape == (12, 6) 569 assert z.chunks == ((3, 3, 2, 2, 2), (2, 1, 1, 2)) 570 assert_eq(z, np.concatenate([x.compute(), y.compute()], axis=0)) 571 572 z = da.concatenate([x, y], axis=1) 573 assert z.shape == (6, 12) 574 assert z.chunks == ((2, 1, 1, 2), (3, 3, 2, 2, 2)) 575 assert_eq(z, np.concatenate([x.compute(), y.compute()], axis=1)) 576 577 578def test_concatenate_fixlen_strings(): 579 x = np.array(["a", "b", "c"]) 580 y = np.array(["aa", "bb", "cc"]) 581 582 a = da.from_array(x, chunks=(2,)) 583 b = da.from_array(y, chunks=(2,)) 584 585 assert_eq(np.concatenate([x, y]), da.concatenate([a, b])) 586 587 588def test_concatenate_zero_size(): 589 590 x = np.random.random(10) 591 y = da.from_array(x, chunks=3) 592 result_np = np.concatenate([x, x[:0]]) 593 result_da = da.concatenate([y, y[:0]]) 594 assert_eq(result_np, result_da) 595 assert result_da is y 596 597 # dtype of a size 0 arrays can affect the output dtype 598 result_np = np.concatenate([np.zeros(0, dtype=float), np.zeros(1, dtype=int)]) 599 result_da = da.concatenate([da.zeros(0, dtype=float), da.zeros(1, dtype=int)]) 600 601 assert_eq(result_np, result_da) 602 603 # All empty arrays case 604 result_np = np.concatenate([np.zeros(0), np.zeros(0)]) 605 result_da = da.concatenate([da.zeros(0), da.zeros(0)]) 606 607 assert_eq(result_np, result_da) 608 609 610def test_block_simple_row_wise(): 611 a1 = np.ones((2, 2)) 612 a2 = 2 * a1 613 614 d1 = da.asarray(a1) 615 d2 = da.asarray(a2) 616 617 expected = np.block([a1, a2]) 618 result = da.block([d1, d2]) 619 620 assert_eq(expected, result) 621 622 expected = np.block([a1, a2[:, :0]]) 623 result = da.block([d1, d2[:, :0]]) 624 625 assert result is d1 626 assert_eq(expected, result) 627 628 629def test_block_simple_column_wise(): 630 a1 = np.ones((2, 2)) 631 a2 = 2 * a1 632 633 d1 = da.asarray(a1) 634 d2 = da.asarray(a2) 635 636 expected = np.block([[a1], [a2]]) 637 result = da.block([[d1], [d2]]) 638 639 assert_eq(expected, result) 640 641 642def test_block_with_1d_arrays_row_wise(): 643 # # # 1-D vectors are treated as row arrays 644 a1 = np.array([1, 2, 3]) 645 a2 = np.array([2, 3, 4]) 646 647 d1 = da.asarray(a1) 648 d2 = da.asarray(a2) 649 650 expected = np.block([a1, a2]) 651 result = da.block([d1, d2]) 652 653 assert_eq(expected, result) 654 655 expected = np.block([a1, a2[:0]]) 656 result = da.block([d1, d2[:0]]) 657 658 assert result is d1 659 assert_eq(expected, result) 660 661 662def test_block_with_1d_arrays_multiple_rows(): 663 a1 = np.array([1, 2, 3]) 664 a2 = np.array([2, 3, 4]) 665 666 d1 = da.asarray(a1) 667 d2 = da.asarray(a2) 668 669 expected = np.block([[a1, a2], [a1, a2]]) 670 result = da.block([[d1, d2], [d1, d2]]) 671 672 assert_eq(expected, result) 673 674 675def test_block_with_1d_arrays_column_wise(): 676 # # # 1-D vectors are treated as row arrays 677 a1 = np.array([1, 2, 3]) 678 a2 = np.array([2, 3, 4]) 679 680 d1 = da.asarray(a1) 681 d2 = da.asarray(a2) 682 683 expected = np.block([[a1], [a2]]) 684 result = da.block([[d1], [d2]]) 685 686 assert_eq(expected, result) 687 688 689def test_block_mixed_1d_and_2d(): 690 a1 = np.ones((2, 2)) 691 a2 = np.array([2, 2]) 692 693 d1 = da.asarray(a1) 694 d2 = da.asarray(a2) 695 696 expected = np.block([[d1], [d2]]) 697 result = da.block([[a1], [a2]]) 698 699 assert_eq(expected, result) 700 701 702def test_block_complicated(): 703 # a bit more complicated 704 a1 = np.array([[1, 1, 1]]) 705 a2 = np.array([[2, 2, 2]]) 706 a3 = np.array([[3, 3, 3, 3, 3, 3]]) 707 a4 = np.array([4, 4, 4, 4, 4, 4]) 708 a5 = np.array(5) 709 a6 = np.array([6, 6, 6, 6, 6]) 710 a7 = np.zeros((2, 6)) 711 712 d1 = da.asarray(a1) 713 d2 = da.asarray(a2) 714 d3 = da.asarray(a3) 715 d4 = da.asarray(a4) 716 d5 = da.asarray(a5) 717 d6 = da.asarray(a6) 718 d7 = da.asarray(a7) 719 720 expected = np.block([[a1, a2], [a3], [a4], [a5, a6], [a7]]) 721 result = da.block([[d1, d2], [d3], [d4], [d5, d6], [d7]]) 722 723 assert_eq(expected, result) 724 725 726def test_block_nested(): 727 a1 = np.array([1, 1, 1]) 728 a2 = np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]]) 729 a3 = np.array([3, 3, 3]) 730 a4 = np.array([4, 4, 4]) 731 a5 = np.array(5) 732 a6 = np.array([6, 6, 6, 6, 6]) 733 a7 = np.zeros((2, 6)) 734 735 d1 = da.asarray(a1) 736 d2 = da.asarray(a2) 737 d3 = da.asarray(a3) 738 d4 = da.asarray(a4) 739 d5 = da.asarray(a5) 740 d6 = da.asarray(a6) 741 d7 = da.asarray(a7) 742 743 expected = np.block([[np.block([[a1], [a3], [a4]]), a2], [a5, a6], [a7]]) 744 result = da.block([[da.block([[d1], [d3], [d4]]), d2], [d5, d6], [d7]]) 745 746 assert_eq(expected, result) 747 748 749def test_block_3d(): 750 a000 = np.ones((2, 2, 2), int) * 1 751 752 a100 = np.ones((3, 2, 2), int) * 2 753 a010 = np.ones((2, 3, 2), int) * 3 754 a001 = np.ones((2, 2, 3), int) * 4 755 756 a011 = np.ones((2, 3, 3), int) * 5 757 a101 = np.ones((3, 2, 3), int) * 6 758 a110 = np.ones((3, 3, 2), int) * 7 759 760 a111 = np.ones((3, 3, 3), int) * 8 761 762 d000 = da.asarray(a000) 763 764 d100 = da.asarray(a100) 765 d010 = da.asarray(a010) 766 d001 = da.asarray(a001) 767 768 d011 = da.asarray(a011) 769 d101 = da.asarray(a101) 770 d110 = da.asarray(a110) 771 772 d111 = da.asarray(a111) 773 774 expected = np.block([[[a000, a001], [a010, a011]], [[a100, a101], [a110, a111]]]) 775 result = da.block([[[d000, d001], [d010, d011]], [[d100, d101], [d110, d111]]]) 776 777 assert_eq(expected, result) 778 779 expected = np.block( 780 [ 781 [[a000, a001[:, :, :0]], [a010[:, :0, :], a011[:, :0, :0]]], 782 [[a100[:0, :, :], a101[:0, :, :0]], [a110[:0, :0, :], a111[:0, :0, :0]]], 783 ] 784 ) 785 result = da.block( 786 [ 787 [[d000, d001[:, :, :0]], [d010[:, :0, :], d011[:, :0, :0]]], 788 [[d100[:0, :, :], d101[:0, :, :0]], [d110[:0, :0, :], d111[:0, :0, :0]]], 789 ] 790 ) 791 792 assert result is d000 793 assert_eq(expected, result) 794 795 796def test_block_with_mismatched_shape(): 797 a = np.array([0, 0]) 798 b = np.eye(2) 799 800 for arrays in [[a, b], [b, a]]: 801 with pytest.raises(ValueError): 802 da.block(arrays) 803 804 805def test_block_no_lists(): 806 assert_eq(da.block(1), np.block(1)) 807 assert_eq(da.block(np.eye(3)), np.block(np.eye(3))) 808 809 810def test_block_invalid_nesting(): 811 for arrays in [ 812 [1, [2]], 813 [1, []], 814 [[1], 2], 815 [[], 2], 816 [[[1], [2]], [[3, 4]], [5]], # missing brackets 817 ]: 818 with pytest.raises(ValueError) as e: 819 da.block(arrays) 820 e.match(r"depths are mismatched") 821 822 823def test_block_empty_lists(): 824 for arrays in [[], [[]], [[1], []]]: 825 with pytest.raises(ValueError) as e: 826 da.block(arrays) 827 e.match(r"empty") 828 829 830def test_block_tuple(): 831 for arrays in [([1, 2], [3, 4]), [(1, 2), (3, 4)]]: 832 with pytest.raises(TypeError) as e: 833 da.block(arrays) 834 e.match(r"tuple") 835 836 837def test_broadcast_shapes(): 838 with warnings.catch_warnings(record=True) as record: 839 assert () == broadcast_shapes() 840 assert (2, 5) == broadcast_shapes((2, 5)) 841 assert (0, 5) == broadcast_shapes((0, 1), (1, 5)) 842 assert np.allclose( 843 (2, np.nan), broadcast_shapes((1, np.nan), (2, 1)), equal_nan=True 844 ) 845 assert np.allclose( 846 (2, np.nan), broadcast_shapes((2, 1), (1, np.nan)), equal_nan=True 847 ) 848 assert (3, 4, 5) == broadcast_shapes((3, 4, 5), (4, 1), ()) 849 assert (3, 4) == broadcast_shapes((3, 1), (1, 4), (4,)) 850 assert (5, 6, 7, 3, 4) == broadcast_shapes((3, 1), (), (5, 6, 7, 1, 4)) 851 852 assert not record 853 854 pytest.raises(ValueError, lambda: broadcast_shapes((3,), (3, 4))) 855 pytest.raises(ValueError, lambda: broadcast_shapes((2, 3), (2, 3, 1))) 856 pytest.raises(ValueError, lambda: broadcast_shapes((2, 3), (1, np.nan))) 857 858 859def test_elemwise_on_scalars(): 860 x = np.arange(10, dtype=np.int64) 861 a = from_array(x, chunks=(5,)) 862 assert len(a.__dask_keys__()) == 2 863 assert_eq(a.sum() ** 2, x.sum() ** 2) 864 865 y = np.arange(10, dtype=np.int32) 866 b = from_array(y, chunks=(5,)) 867 result = a.sum() * b 868 # Dask 0-d arrays do not behave like numpy scalars for type promotion 869 assert result.dtype == np.int64 870 assert result.compute().dtype == np.int64 871 assert (x.sum() * y).dtype == np.int32 872 assert_eq((x.sum() * y).astype(np.int64), result) 873 874 875def test_elemwise_with_ndarrays(): 876 x = np.arange(3) 877 y = np.arange(12).reshape(4, 3) 878 a = from_array(x, chunks=(3,)) 879 b = from_array(y, chunks=(2, 3)) 880 881 assert_eq(x + a, 2 * x) 882 assert_eq(a + x, 2 * x) 883 884 assert_eq(x + b, x + y) 885 assert_eq(b + x, x + y) 886 assert_eq(a + y, x + y) 887 assert_eq(y + a, x + y) 888 # Error on shape mismatch 889 pytest.raises(ValueError, lambda: a + y.T) 890 pytest.raises(ValueError, lambda: a + np.arange(2)) 891 892 893def test_elemwise_differently_chunked(): 894 x = np.arange(3) 895 y = np.arange(12).reshape(4, 3) 896 a = from_array(x, chunks=(3,)) 897 b = from_array(y, chunks=(2, 2)) 898 899 assert_eq(a + b, x + y) 900 assert_eq(b + a, x + y) 901 902 903def test_elemwise_dtype(): 904 values = [ 905 da.from_array(np.ones(5, np.float32), chunks=3), 906 da.from_array(np.ones(5, np.int16), chunks=3), 907 da.from_array(np.ones(5, np.int64), chunks=3), 908 da.from_array(np.ones((), np.float64), chunks=()) * 1e200, 909 np.ones(5, np.float32), 910 1, 911 1.0, 912 1e200, 913 np.int64(1), 914 np.ones((), np.int64), 915 ] 916 for x in values: 917 for y in values: 918 assert da.maximum(x, y).dtype == da.result_type(x, y) 919 920 921def test_operators(): 922 x = np.arange(10) 923 y = np.arange(10).reshape((10, 1)) 924 a = from_array(x, chunks=(5,)) 925 b = from_array(y, chunks=(5, 1)) 926 927 c = a + 1 928 assert_eq(c, x + 1) 929 930 c = a + b 931 assert_eq(c, x + x.reshape((10, 1))) 932 933 expr = (3 / a * b) ** 2 > 5 934 with pytest.warns(None): # ZeroDivisionWarning 935 assert_eq(expr, (3 / x * y) ** 2 > 5) 936 937 with pytest.warns(None): # OverflowWarning 938 c = da.exp(a) 939 assert_eq(c, np.exp(x)) 940 941 assert_eq(abs(-a), a) 942 assert_eq(a, +x) 943 944 945def test_operator_dtype_promotion(): 946 x = np.arange(10, dtype=np.float32) 947 y = np.array([1]) 948 a = from_array(x, chunks=(5,)) 949 950 assert_eq(x + 1, a + 1) # still float32 951 assert_eq(x + 1e50, a + 1e50) # now float64 952 assert_eq(x + y, a + y) # also float64 953 954 955def test_field_access(): 956 x = np.array([(1, 1.0), (2, 2.0)], dtype=[("a", "i4"), ("b", "f4")]) 957 y = from_array(x, chunks=(1,)) 958 assert_eq(y["a"], x["a"]) 959 assert_eq(y[["b", "a"]], x[["b", "a"]]) 960 assert same_keys(y[["b", "a"]], y[["b", "a"]]) 961 962 963def test_field_access_with_shape(): 964 dtype = [("col1", ("f4", (3, 2))), ("col2", ("f4", 3))] 965 data = np.ones((100, 50), dtype=dtype) 966 x = da.from_array(data, 10) 967 assert_eq(x["col1"], data["col1"]) 968 assert_eq(x[["col1"]], data[["col1"]]) 969 assert_eq(x["col2"], data["col2"]) 970 assert_eq(x[["col1", "col2"]], data[["col1", "col2"]]) 971 972 973def test_matmul(): 974 x = np.random.random((5, 5)) 975 y = np.random.random((5, 2)) 976 a = from_array(x, chunks=(1, 5)) 977 b = from_array(y, chunks=(5, 1)) 978 assert_eq(operator.matmul(a, b), a.dot(b)) 979 assert_eq(operator.matmul(a, b), operator.matmul(x, y)) 980 assert_eq(operator.matmul(a, y), operator.matmul(x, b)) 981 list_vec = list(range(1, 6)) 982 assert_eq(operator.matmul(list_vec, b), operator.matmul(list_vec, y)) 983 assert_eq(operator.matmul(x, list_vec), operator.matmul(a, list_vec)) 984 z = np.random.random((5, 5, 5)) 985 c = from_array(z, chunks=(1, 5, 1)) 986 assert_eq(operator.matmul(a, z), operator.matmul(x, c)) 987 assert_eq(operator.matmul(z, a), operator.matmul(c, x)) 988 989 990def test_matmul_array_ufunc(): 991 # regression test for https://github.com/dask/dask/issues/4353 992 x = np.random.random((5, 5)) 993 y = np.random.random((5, 2)) 994 a = from_array(x, chunks=(1, 5)) 995 b = from_array(y, chunks=(5, 1)) 996 result = b.__array_ufunc__(np.matmul, "__call__", a, b) 997 assert_eq(result, x.dot(y)) 998 999 1000def test_T(): 1001 x = np.arange(400).reshape((20, 20)) 1002 a = from_array(x, chunks=(5, 5)) 1003 1004 assert_eq(x.T, a.T) 1005 1006 1007def test_broadcast_to(): 1008 x = np.random.randint(10, size=(5, 1, 6)) 1009 a = from_array(x, chunks=(3, 1, 3)) 1010 1011 for shape in [a.shape, (5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]: 1012 xb = np.broadcast_to(x, shape) 1013 ab = broadcast_to(a, shape) 1014 1015 assert_eq(xb, ab) 1016 1017 if a.shape == ab.shape: 1018 assert a is ab 1019 1020 pytest.raises(ValueError, lambda: broadcast_to(a, (2, 1, 6))) 1021 pytest.raises(ValueError, lambda: broadcast_to(a, (3,))) 1022 1023 1024def test_broadcast_to_array(): 1025 x = np.random.randint(10, size=(5, 1, 6)) 1026 1027 for shape in [(5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]: 1028 a = np.broadcast_to(x, shape) 1029 d = broadcast_to(x, shape) 1030 1031 assert_eq(a, d) 1032 1033 1034def test_broadcast_to_scalar(): 1035 x = 5 1036 1037 for shape in [tuple(), (0,), (2, 3), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]: 1038 a = np.broadcast_to(x, shape) 1039 d = broadcast_to(x, shape) 1040 1041 assert_eq(a, d) 1042 1043 1044def test_broadcast_to_chunks(): 1045 x = np.random.randint(10, size=(5, 1, 6)) 1046 a = from_array(x, chunks=(3, 1, 3)) 1047 1048 for shape, chunks, expected_chunks in [ 1049 ((5, 3, 6), (3, -1, 3), ((3, 2), (3,), (3, 3))), 1050 ((5, 3, 6), (3, 1, 3), ((3, 2), (1, 1, 1), (3, 3))), 1051 ((2, 5, 3, 6), (1, 3, 1, 3), ((1, 1), (3, 2), (1, 1, 1), (3, 3))), 1052 ]: 1053 xb = np.broadcast_to(x, shape) 1054 ab = broadcast_to(a, shape, chunks=chunks) 1055 assert_eq(xb, ab) 1056 assert ab.chunks == expected_chunks 1057 1058 with pytest.raises(ValueError): 1059 broadcast_to(a, a.shape, chunks=((2, 3), (1,), (3, 3))) 1060 with pytest.raises(ValueError): 1061 broadcast_to(a, a.shape, chunks=((3, 2), (3,), (3, 3))) 1062 with pytest.raises(ValueError): 1063 broadcast_to(a, (5, 2, 6), chunks=((3, 2), (3,), (3, 3))) 1064 1065 1066def test_broadcast_arrays(): 1067 assert np.broadcast_arrays() == da.broadcast_arrays() 1068 1069 a = np.arange(4) 1070 d_a = da.from_array(a, chunks=tuple(s // 2 for s in a.shape)) 1071 1072 a_0 = np.arange(4)[None, :] 1073 a_1 = np.arange(4)[:, None] 1074 1075 d_a_0 = d_a[None, :] 1076 d_a_1 = d_a[:, None] 1077 1078 a_r = np.broadcast_arrays(a_0, a_1) 1079 d_r = da.broadcast_arrays(d_a_0, d_a_1) 1080 1081 assert isinstance(d_r, list) 1082 assert len(a_r) == len(d_r) 1083 1084 for e_a_r, e_d_r in zip(a_r, d_r): 1085 assert_eq(e_a_r, e_d_r) 1086 1087 1088def test_broadcast_arrays_uneven_chunks(): 1089 x = da.ones(30, chunks=(3,)) 1090 y = da.ones(30, chunks=(5,)) 1091 z = np.broadcast_arrays(x, y) 1092 1093 assert_eq(z, z) 1094 1095 x = da.ones((1, 30), chunks=(1, 3)) 1096 y = da.ones(30, chunks=(5,)) 1097 z = np.broadcast_arrays(x, y) 1098 1099 assert_eq(z, z) 1100 1101 1102@pytest.mark.parametrize( 1103 "u_shape, v_shape", 1104 [ 1105 [tuple(), (2, 3)], 1106 [(1,), (2, 3)], 1107 [(1, 1), (2, 3)], 1108 [(0, 3), (1, 3)], 1109 [(2, 0), (2, 1)], 1110 [(1, 0), (2, 1)], 1111 [(0, 1), (1, 3)], 1112 ], 1113) 1114def test_broadcast_operator(u_shape, v_shape): 1115 u = np.random.random(u_shape) 1116 v = np.random.random(v_shape) 1117 1118 d_u = from_array(u, chunks=1) 1119 d_v = from_array(v, chunks=1) 1120 1121 w = u * v 1122 d_w = d_u * d_v 1123 1124 assert_eq(w, d_w) 1125 1126 1127@pytest.mark.parametrize( 1128 "original_shape,new_shape,chunks", 1129 [ 1130 ((10,), (10,), (3, 3, 4)), 1131 ((10,), (10, 1, 1), 5), 1132 ((10,), (1, 10), 5), 1133 ((24,), (2, 3, 4), 12), 1134 ((1, 24), (2, 3, 4), 12), 1135 ((2, 3, 4), (24,), (1, 3, 4)), 1136 ((2, 3, 4), (24,), 4), 1137 ((2, 3, 4), (24, 1), 4), 1138 ((2, 3, 4), (1, 24), 4), 1139 ((4, 4, 1), (4, 4), 2), 1140 ((4, 4), (4, 4, 1), 2), 1141 ((1, 4, 4), (4, 4), 2), 1142 ((1, 4, 4), (4, 4, 1), 2), 1143 ((1, 4, 4), (1, 1, 4, 4), 2), 1144 ((4, 4), (1, 4, 4, 1), 2), 1145 ((4, 4), (1, 4, 4), 2), 1146 ((2, 3), (2, 3), (1, 2)), 1147 ((2, 3), (3, 2), 3), 1148 ((4, 2, 3), (4, 6), 4), 1149 ((3, 4, 5, 6), (3, 4, 5, 6), (2, 3, 4, 5)), 1150 ((), (1,), 1), 1151 ((1,), (), 1), 1152 ((24,), (3, 8), 24), 1153 ((24,), (4, 6), 6), 1154 ((24,), (4, 3, 2), 6), 1155 ((24,), (4, 6, 1), 6), 1156 ((24,), (4, 6), (6, 12, 6)), 1157 ((64, 4), (8, 8, 4), (16, 2)), 1158 ((4, 64), (4, 8, 4, 2), (2, 16)), 1159 ((4, 8, 4, 2), (2, 1, 2, 32, 2), (2, 4, 2, 2)), 1160 ((4, 1, 4), (4, 4), (2, 1, 2)), 1161 ((0, 10), (0, 5, 2), (5, 5)), 1162 ((5, 0, 2), (0, 10), (5, 2, 2)), 1163 ((0,), (2, 0, 2), (4,)), 1164 ((2, 0, 2), (0,), (4, 4, 4)), 1165 ], 1166) 1167def test_reshape(original_shape, new_shape, chunks): 1168 x = np.random.randint(10, size=original_shape) 1169 a = from_array(x, chunks=chunks) 1170 1171 xr = x.reshape(new_shape) 1172 ar = a.reshape(new_shape) 1173 1174 if a.shape == new_shape: 1175 assert a is ar 1176 1177 assert_eq(xr, ar) 1178 1179 1180def test_reshape_exceptions(): 1181 x = np.random.randint(10, size=(5,)) 1182 a = from_array(x, chunks=(2,)) 1183 with pytest.raises(ValueError): 1184 da.reshape(a, (100,)) 1185 1186 1187def test_reshape_splat(): 1188 x = da.ones((5, 5), chunks=(2, 2)) 1189 assert_eq(x.reshape((25,)), x.reshape(25)) 1190 1191 1192def test_reshape_fails_for_dask_only(): 1193 cases = [((3, 4), (4, 3), 2)] 1194 for original_shape, new_shape, chunks in cases: 1195 x = np.random.randint(10, size=original_shape) 1196 a = from_array(x, chunks=chunks) 1197 assert x.reshape(new_shape).shape == new_shape 1198 with pytest.raises(ValueError): 1199 da.reshape(a, new_shape) 1200 1201 1202def test_reshape_unknown_dimensions(): 1203 for original_shape in [(24,), (2, 12), (2, 3, 4)]: 1204 for new_shape in [(-1,), (2, -1), (-1, 3, 4)]: 1205 x = np.random.randint(10, size=original_shape) 1206 a = from_array(x, 24) 1207 assert_eq(x.reshape(new_shape), a.reshape(new_shape)) 1208 1209 pytest.raises(ValueError, lambda: da.reshape(a, (-1, -1))) 1210 1211 1212def test_full(): 1213 d = da.full((3, 4), 2, chunks=((2, 1), (2, 2))) 1214 assert d.chunks == ((2, 1), (2, 2)) 1215 assert_eq(d, np.full((3, 4), 2)) 1216 1217 1218def test_map_blocks(): 1219 x = np.arange(400).reshape((20, 20)) 1220 d = from_array(x, chunks=(7, 7)) 1221 1222 e = d.map_blocks(inc, dtype=d.dtype) 1223 1224 assert d.chunks == e.chunks 1225 assert_eq(e, x + 1) 1226 1227 e = d.map_blocks(inc, name="increment") 1228 assert e.name.startswith("increment-") 1229 1230 assert d.map_blocks(inc, name="foo").name != d.map_blocks(dec, name="foo").name 1231 1232 d = from_array(x, chunks=(10, 10)) 1233 e = d.map_blocks(lambda x: x[::2, ::2], chunks=(5, 5), dtype=d.dtype) 1234 1235 assert e.chunks == ((5, 5), (5, 5)) 1236 assert_eq(e, x[::2, ::2]) 1237 1238 d = from_array(x, chunks=(8, 8)) 1239 e = d.map_blocks( 1240 lambda x: x[::2, ::2], chunks=((4, 4, 2), (4, 4, 2)), dtype=d.dtype 1241 ) 1242 1243 assert_eq(e, x[::2, ::2]) 1244 1245 1246def test_map_blocks2(): 1247 x = np.arange(10, dtype="i8") 1248 d = from_array(x, chunks=(2,)) 1249 1250 def func(block, block_id=None, c=0): 1251 return np.ones_like(block) * sum(block_id) + c 1252 1253 out = d.map_blocks(func, dtype="i8") 1254 expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype="i8") 1255 1256 assert_eq(out, expected) 1257 assert same_keys(d.map_blocks(func, dtype="i8"), out) 1258 1259 out = d.map_blocks(func, dtype="i8", c=1) 1260 expected = expected + 1 1261 1262 assert_eq(out, expected) 1263 assert same_keys(d.map_blocks(func, dtype="i8", c=1), out) 1264 1265 1266def test_map_blocks_block_info(): 1267 x = da.arange(50, chunks=10) 1268 1269 def func(a, b, c, block_info=None): 1270 for idx in [0, 2, None]: # positions in args 1271 assert block_info[idx]["shape"] == (50,) 1272 assert block_info[idx]["num-chunks"] == (5,) 1273 start, stop = block_info[idx]["array-location"][0] 1274 assert stop - start == 10 1275 assert 0 <= start <= 40 1276 assert 10 <= stop <= 50 1277 1278 assert 0 <= block_info[idx]["chunk-location"][0] <= 4 1279 assert block_info[None]["chunk-shape"] == (10,) 1280 assert block_info[None]["dtype"] == x.dtype 1281 1282 return a + b + c 1283 1284 z = da.map_blocks(func, x, 100, x + 1, dtype=x.dtype) 1285 assert_eq(z, x + x + 1 + 100) 1286 1287 1288def test_map_blocks_block_info_with_new_axis(): 1289 # https://github.com/dask/dask/issues/4298 1290 values = da.from_array(np.array(["a", "a", "b", "c"]), 2) 1291 1292 def func(x, block_info=None): 1293 assert block_info.keys() == {0, None} 1294 assert block_info[0]["shape"] == (4,) 1295 assert block_info[0]["num-chunks"] == (2,) 1296 assert block_info[None]["shape"] == (4, 3) 1297 assert block_info[None]["num-chunks"] == (2, 1) 1298 assert block_info[None]["chunk-shape"] == (2, 3) 1299 assert block_info[None]["dtype"] == np.dtype("f8") 1300 1301 assert block_info[0]["chunk-location"] in {(0,), (1,)} 1302 1303 if block_info[0]["chunk-location"] == (0,): 1304 assert block_info[0]["array-location"] == [(0, 2)] 1305 assert block_info[None]["chunk-location"] == (0, 0) 1306 assert block_info[None]["array-location"] == [(0, 2), (0, 3)] 1307 elif block_info[0]["chunk-location"] == (1,): 1308 assert block_info[0]["array-location"] == [(2, 4)] 1309 assert block_info[None]["chunk-location"] == (1, 0) 1310 assert block_info[None]["array-location"] == [(2, 4), (0, 3)] 1311 1312 return np.ones((len(x), 3)) 1313 1314 z = values.map_blocks(func, chunks=((2, 2), 3), new_axis=1, dtype="f8") 1315 assert_eq(z, np.ones((4, 3), dtype="f8")) 1316 1317 1318def test_map_blocks_block_info_with_drop_axis(): 1319 # https://github.com/dask/dask/issues/4584 1320 values = da.from_array( 1321 np.array( 1322 [[1, 2, 4], [8, 16, 32], [64, 128, 256], [1024, 2048, 4096]], dtype="u4" 1323 ), 1324 (2, 1), 1325 ) 1326 1327 def func(x, block_info=None): 1328 assert block_info.keys() == {0, None} 1329 assert block_info[0]["shape"] == (4, 3) 1330 # drop_axis concatenates along the dropped dimension, hence not (2, 3) 1331 assert block_info[0]["num-chunks"] == (2, 1) 1332 assert block_info[None]["shape"] == (4,) 1333 assert block_info[None]["num-chunks"] == (2,) 1334 assert block_info[None]["chunk-shape"] == (2,) 1335 assert block_info[None]["dtype"] == np.dtype("u4") 1336 1337 assert block_info[0]["chunk-location"] in {(0, 0), (1, 0)} 1338 1339 if block_info[0]["chunk-location"] == (0, 0): 1340 assert block_info[0]["array-location"] == [(0, 2), (0, 3)] 1341 assert block_info[None]["chunk-location"] == (0,) 1342 assert block_info[None]["array-location"] == [(0, 2)] 1343 elif block_info[0]["chunk-location"] == (1, 0): 1344 assert block_info[0]["array-location"] == [(2, 4), (0, 3)] 1345 assert block_info[None]["chunk-location"] == (1,) 1346 assert block_info[None]["array-location"] == [(2, 4)] 1347 1348 return np.sum(x, axis=1, dtype="u4") 1349 1350 z = values.map_blocks(func, drop_axis=1, dtype="u4") 1351 assert_eq(z, np.array([7, 56, 448, 7168], dtype="u4")) 1352 1353 1354def test_map_blocks_block_info_with_broadcast(): 1355 expected0 = [ 1356 { 1357 "shape": (3, 4), 1358 "num-chunks": (1, 2), 1359 "array-location": [(0, 3), (0, 2)], 1360 "chunk-location": (0, 0), 1361 }, 1362 { 1363 "shape": (3, 4), 1364 "num-chunks": (1, 2), 1365 "array-location": [(0, 3), (2, 4)], 1366 "chunk-location": (0, 1), 1367 }, 1368 ] 1369 expected1 = [ 1370 { 1371 "shape": (6, 2), 1372 "num-chunks": (2, 1), 1373 "array-location": [(0, 3), (0, 2)], 1374 "chunk-location": (0, 0), 1375 }, 1376 { 1377 "shape": (6, 2), 1378 "num-chunks": (2, 1), 1379 "array-location": [(3, 6), (0, 2)], 1380 "chunk-location": (1, 0), 1381 }, 1382 ] 1383 expected2 = [ 1384 { 1385 "shape": (4,), 1386 "num-chunks": (2,), 1387 "array-location": [(0, 2)], 1388 "chunk-location": (0,), 1389 }, 1390 { 1391 "shape": (4,), 1392 "num-chunks": (2,), 1393 "array-location": [(2, 4)], 1394 "chunk-location": (1,), 1395 }, 1396 ] 1397 expected = [ 1398 { 1399 0: expected0[0], 1400 1: expected1[0], 1401 2: expected2[0], 1402 None: { 1403 "shape": (6, 4), 1404 "num-chunks": (2, 2), 1405 "dtype": np.float_, 1406 "chunk-shape": (3, 2), 1407 "array-location": [(0, 3), (0, 2)], 1408 "chunk-location": (0, 0), 1409 }, 1410 }, 1411 { 1412 0: expected0[1], 1413 1: expected1[0], 1414 2: expected2[1], 1415 None: { 1416 "shape": (6, 4), 1417 "num-chunks": (2, 2), 1418 "dtype": np.float_, 1419 "chunk-shape": (3, 2), 1420 "array-location": [(0, 3), (2, 4)], 1421 "chunk-location": (0, 1), 1422 }, 1423 }, 1424 { 1425 0: expected0[0], 1426 1: expected1[1], 1427 2: expected2[0], 1428 None: { 1429 "shape": (6, 4), 1430 "num-chunks": (2, 2), 1431 "dtype": np.float_, 1432 "chunk-shape": (3, 2), 1433 "array-location": [(3, 6), (0, 2)], 1434 "chunk-location": (1, 0), 1435 }, 1436 }, 1437 { 1438 0: expected0[1], 1439 1: expected1[1], 1440 2: expected2[1], 1441 None: { 1442 "shape": (6, 4), 1443 "num-chunks": (2, 2), 1444 "dtype": np.float_, 1445 "chunk-shape": (3, 2), 1446 "array-location": [(3, 6), (2, 4)], 1447 "chunk-location": (1, 1), 1448 }, 1449 }, 1450 ] 1451 1452 def func(x, y, z, block_info=None): 1453 for info in expected: 1454 if block_info[None]["chunk-location"] == info[None]["chunk-location"]: 1455 assert block_info == info 1456 break 1457 else: 1458 assert False 1459 return x + y + z 1460 1461 a = da.ones((3, 4), chunks=(3, 2)) 1462 b = da.ones((6, 2), chunks=(3, 2)) 1463 c = da.ones((4,), chunks=(2,)) 1464 d = da.map_blocks(func, a, b, c, chunks=((3, 3), (2, 2)), dtype=a.dtype) 1465 assert d.chunks == ((3, 3), (2, 2)) 1466 assert_eq(d, 3 * np.ones((6, 4))) 1467 1468 1469def test_map_blocks_with_constants(): 1470 d = da.arange(10, chunks=3) 1471 e = d.map_blocks(add, 100, dtype=d.dtype) 1472 1473 assert_eq(e, np.arange(10) + 100) 1474 1475 assert_eq(da.map_blocks(sub, d, 10, dtype=d.dtype), np.arange(10) - 10) 1476 assert_eq(da.map_blocks(sub, 10, d, dtype=d.dtype), 10 - np.arange(10)) 1477 1478 1479def test_map_blocks_with_kwargs(): 1480 d = da.arange(10, chunks=5) 1481 1482 result = d.map_blocks(np.max, axis=0, keepdims=True, dtype=d.dtype, chunks=(1,)) 1483 1484 assert_eq(result, np.array([4, 9])) 1485 1486 1487def test_map_blocks_infer_chunks_broadcast(): 1488 dx = da.from_array([[1, 2, 3, 4]], chunks=((1,), (2, 2))) 1489 dy = da.from_array([[10, 20], [30, 40]], chunks=((1, 1), (2,))) 1490 result = da.map_blocks(lambda x, y: x + y, dx, dy) 1491 assert result.chunks == ((1, 1), (2, 2)) 1492 assert_eq(result, np.array([[11, 22, 13, 24], [31, 42, 33, 44]])) 1493 1494 1495def test_map_blocks_with_chunks(): 1496 dx = da.ones((5, 3), chunks=(2, 2)) 1497 dy = da.ones((5, 3), chunks=(2, 2)) 1498 dz = da.map_blocks(np.add, dx, dy, chunks=dx.chunks) 1499 assert_eq(dz, np.ones((5, 3)) * 2) 1500 1501 1502def test_map_blocks_dtype_inference(): 1503 x = np.arange(50).reshape((5, 10)) 1504 y = np.arange(10) 1505 dx = da.from_array(x, chunks=5) 1506 dy = da.from_array(y, chunks=5) 1507 1508 def foo(x, *args, **kwargs): 1509 cast = kwargs.pop("cast", "i8") 1510 return (x + sum(args)).astype(cast) 1511 1512 assert_eq(dx.map_blocks(foo, dy, 1), foo(dx, dy, 1)) 1513 assert_eq(dx.map_blocks(foo, dy, 1, cast="f8"), foo(dx, dy, 1, cast="f8")) 1514 assert_eq( 1515 dx.map_blocks(foo, dy, 1, cast="f8", dtype="f8"), 1516 foo(dx, dy, 1, cast="f8", dtype="f8"), 1517 ) 1518 1519 def foo(x): 1520 raise RuntimeError("Woops") 1521 1522 with pytest.raises(ValueError) as e: 1523 dx.map_blocks(foo) 1524 msg = str(e.value) 1525 assert "dtype" in msg 1526 1527 1528def test_map_blocks_infer_newaxis(): 1529 x = da.ones((5, 3), chunks=(2, 2)) 1530 y = da.map_blocks(lambda x: x[None], x, chunks=((1,), (2, 2, 1), (2, 1))) 1531 assert_eq(y, da.ones((1, 5, 3))) 1532 1533 1534def test_map_blocks_no_array_args(): 1535 def func(dtype, block_info=None): 1536 loc = block_info[None]["array-location"] 1537 return np.arange(loc[0][0], loc[0][1], dtype=dtype) 1538 1539 x = da.map_blocks(func, np.float32, chunks=((5, 3),), dtype=np.float32) 1540 assert x.chunks == ((5, 3),) 1541 assert_eq(x, np.arange(8, dtype=np.float32)) 1542 1543 1544@pytest.mark.parametrize("func", [lambda x, y: x + y, lambda x, y, block_info: x + y]) 1545def test_map_blocks_optimize_blockwise(func): 1546 # Check that map_blocks layers can merge with elementwise layers 1547 base = [da.full((1,), i, chunks=1) for i in range(4)] 1548 a = base[0] + base[1] 1549 b = da.map_blocks(func, a, base[2], dtype=np.int8) 1550 c = b + base[3] 1551 dsk = c.__dask_graph__() 1552 optimized = optimize_blockwise(dsk) 1553 1554 # Everything should be fused into a single layer. 1555 # If the lambda includes block_info, there will be two layers. 1556 assert len(optimized.layers) == len(dsk.layers) - 6 1557 1558 1559def test_repr(): 1560 d = da.ones((4, 4), chunks=(2, 2)) 1561 assert key_split(d.name) in repr(d) 1562 assert str(d.shape) in repr(d) 1563 assert str(d.dtype) in repr(d) 1564 d = da.ones((4000, 4), chunks=(4, 2)) 1565 assert len(str(d)) < 1000 1566 1567 1568def test_repr_meta(): 1569 d = da.ones((4, 4), chunks=(2, 2)) 1570 assert "chunktype=numpy.ndarray" in repr(d) 1571 1572 # Test non-numpy meta 1573 sparse = pytest.importorskip("sparse") 1574 s = d.map_blocks(sparse.COO) 1575 assert "chunktype=sparse.COO" in repr(s) 1576 1577 1578def test_repr_html_array_highlevelgraph(): 1579 pytest.importorskip("jinja2") 1580 x = da.ones((9, 9), chunks=(3, 3)).T[0:4, 0:4] 1581 hg = x.dask 1582 assert xml.etree.ElementTree.fromstring(hg._repr_html_()) is not None 1583 for layer in hg.layers.values(): 1584 assert xml.etree.ElementTree.fromstring(layer._repr_html_()) is not None 1585 1586 1587def test_slicing_with_ellipsis(): 1588 x = np.arange(256).reshape((4, 4, 4, 4)) 1589 d = da.from_array(x, chunks=((2, 2, 2, 2))) 1590 1591 assert_eq(d[..., 1], x[..., 1]) 1592 assert_eq(d[0, ..., 1], x[0, ..., 1]) 1593 1594 1595def test_slicing_with_ndarray(): 1596 x = np.arange(64).reshape((8, 8)) 1597 d = da.from_array(x, chunks=((4, 4))) 1598 1599 assert_eq(d[np.arange(8)], x) 1600 assert_eq(d[np.ones(8, dtype=bool)], x) 1601 assert_eq(d[np.array([1])], x[[1]]) 1602 assert_eq(d[np.array([True, False, True] + [False] * 5)], x[[0, 2]]) 1603 1604 1605def test_slicing_flexible_type(): 1606 a = np.array([["a", "b"], ["c", "d"]]) 1607 b = da.from_array(a, 2) 1608 1609 assert_eq(a[:, 0], b[:, 0]) 1610 1611 1612def test_slicing_with_object_dtype(): 1613 # https://github.com/dask/dask/issues/6892 1614 d = da.from_array(np.array(["a", "b"], dtype=object), chunks=(1,)) 1615 assert d.dtype == d[(0,)].dtype 1616 1617 1618def test_dtype(): 1619 d = da.ones((4, 4), chunks=(2, 2)) 1620 1621 assert d.dtype == d.compute().dtype 1622 assert (d * 1.0).dtype == (d + 1.0).compute().dtype 1623 assert d.sum().dtype == d.sum().compute().dtype # no shape 1624 1625 1626def test_blockdims_from_blockshape(): 1627 assert blockdims_from_blockshape((10, 10), (4, 3)) == ((4, 4, 2), (3, 3, 3, 1)) 1628 pytest.raises(TypeError, lambda: blockdims_from_blockshape((10,), None)) 1629 assert blockdims_from_blockshape((1e2, 3), [1e1, 3]) == ((10,) * 10, (3,)) 1630 assert blockdims_from_blockshape((np.int8(10),), (5,)) == ((5, 5),) 1631 1632 1633def test_coerce(): 1634 d0 = da.from_array(np.array(1), chunks=(1,)) 1635 d1 = da.from_array(np.array([1]), chunks=(1,)) 1636 with dask.config.set(scheduler="sync"): 1637 for d in d0, d1: 1638 assert bool(d) is True 1639 assert int(d) == 1 1640 assert float(d) == 1.0 1641 assert complex(d) == complex(1) 1642 1643 a2 = np.arange(2) 1644 d2 = da.from_array(a2, chunks=(2,)) 1645 for func in (int, float, complex): 1646 pytest.raises(TypeError, lambda: func(d2)) 1647 1648 1649def test_bool(): 1650 arr = np.arange(100).reshape((10, 10)) 1651 darr = da.from_array(arr, chunks=(10, 10)) 1652 with pytest.raises(ValueError): 1653 bool(darr) 1654 bool(darr == darr) 1655 1656 1657def test_store_kwargs(): 1658 d = da.ones((10, 10), chunks=(2, 2)) 1659 a = d + 1 1660 1661 called = [False] 1662 1663 def get_func(*args, **kwargs): 1664 assert kwargs.pop("foo") == "test kwarg" 1665 r = dask.get(*args, **kwargs) 1666 called[0] = True 1667 return r 1668 1669 called[0] = False 1670 at = np.zeros(shape=(10, 10)) 1671 store([a], [at], scheduler=get_func, foo="test kwarg") 1672 assert called[0] 1673 1674 called[0] = False 1675 at = np.zeros(shape=(10, 10)) 1676 a.store(at, scheduler=get_func, foo="test kwarg") 1677 assert called[0] 1678 1679 called[0] = False 1680 at = np.zeros(shape=(10, 10)) 1681 store([a], [at], scheduler=get_func, return_stored=True, foo="test kwarg") 1682 assert called[0] 1683 1684 1685def test_store_delayed_target(): 1686 from dask.delayed import delayed 1687 1688 d = da.ones((4, 4), chunks=(2, 2)) 1689 a, b = d + 1, d + 2 1690 1691 # empty buffers to be used as targets 1692 targs = {} 1693 1694 def make_target(key): 1695 a = np.empty((4, 4)) 1696 targs[key] = a 1697 return a 1698 1699 # delayed calls to these targets 1700 atd = delayed(make_target)("at") 1701 btd = delayed(make_target)("bt") 1702 1703 # test not keeping result 1704 st = store([a, b], [atd, btd]) 1705 1706 at = targs["at"] 1707 bt = targs["bt"] 1708 1709 assert st is None 1710 assert_eq(at, a) 1711 assert_eq(bt, b) 1712 1713 # test keeping result 1714 for st_compute in [False, True]: 1715 targs.clear() 1716 1717 st = store([a, b], [atd, btd], return_stored=True, compute=st_compute) 1718 if st_compute: 1719 assert all(not any(dask.core.get_deps(e.dask)[0].values()) for e in st) 1720 1721 st = dask.compute(*st) 1722 1723 at = targs["at"] 1724 bt = targs["bt"] 1725 1726 assert st is not None 1727 assert isinstance(st, tuple) 1728 assert all([isinstance(v, np.ndarray) for v in st]) 1729 assert_eq(at, a) 1730 assert_eq(bt, b) 1731 assert_eq(st[0], a) 1732 assert_eq(st[1], b) 1733 1734 pytest.raises(ValueError, lambda: store([a], [at, bt])) 1735 pytest.raises(ValueError, lambda: store(at, at)) 1736 pytest.raises(ValueError, lambda: store([at, bt], [at, bt])) 1737 1738 1739def test_store(): 1740 d = da.ones((4, 4), chunks=(2, 2)) 1741 a, b = d + 1, d + 2 1742 1743 at = np.empty(shape=(4, 4)) 1744 bt = np.empty(shape=(4, 4)) 1745 1746 st = store([a, b], [at, bt]) 1747 assert st is None 1748 assert (at == 2).all() 1749 assert (bt == 3).all() 1750 1751 pytest.raises(ValueError, lambda: store([a], [at, bt])) 1752 pytest.raises(ValueError, lambda: store(at, at)) 1753 pytest.raises(ValueError, lambda: store([at, bt], [at, bt])) 1754 1755 1756def test_store_regions(): 1757 d = da.ones((4, 4, 4), dtype=int, chunks=(2, 2, 2)) 1758 a, b = d + 1, d + 2 1759 a = a[:, 1:, :].astype(float) 1760 1761 region = (slice(None, None, 2), slice(None), [1, 2, 4, 5]) 1762 1763 # Single region: 1764 at = np.zeros(shape=(8, 3, 6)) 1765 bt = np.zeros(shape=(8, 4, 6)) 1766 v = store([a, b], [at, bt], regions=region, compute=False) 1767 assert isinstance(v, Delayed) 1768 assert (at == 0).all() and (bt[region] == 0).all() 1769 assert all([ev is None for ev in v.compute()]) 1770 assert (at[region] == 2).all() and (bt[region] == 3).all() 1771 assert not (bt == 3).all() and not (bt == 0).all() 1772 assert not (at == 2).all() and not (at == 0).all() 1773 1774 # Multiple regions: 1775 at = np.zeros(shape=(8, 3, 6)) 1776 bt = np.zeros(shape=(8, 4, 6)) 1777 v = store([a, b], [at, bt], regions=[region, region], compute=False) 1778 assert isinstance(v, Delayed) 1779 assert (at == 0).all() and (bt[region] == 0).all() 1780 assert all([ev is None for ev in v.compute()]) 1781 assert (at[region] == 2).all() and (bt[region] == 3).all() 1782 assert not (bt == 3).all() and not (bt == 0).all() 1783 assert not (at == 2).all() and not (at == 0).all() 1784 1785 # Single region (keep result): 1786 for st_compute in [False, True]: 1787 at = np.zeros(shape=(8, 3, 6)) 1788 bt = np.zeros(shape=(8, 4, 6)) 1789 v = store( 1790 [a, b], [at, bt], regions=region, compute=st_compute, return_stored=True 1791 ) 1792 assert isinstance(v, tuple) 1793 assert all([isinstance(e, da.Array) for e in v]) 1794 if st_compute: 1795 assert all(not any(dask.core.get_deps(e.dask)[0].values()) for e in v) 1796 else: 1797 assert (at == 0).all() and (bt[region] == 0).all() 1798 1799 ar, br = v 1800 assert ar.dtype == a.dtype 1801 assert br.dtype == b.dtype 1802 assert ar.shape == a.shape 1803 assert br.shape == b.shape 1804 assert ar.chunks == a.chunks 1805 assert br.chunks == b.chunks 1806 1807 ar, br = da.compute(ar, br) 1808 assert (at[region] == 2).all() and (bt[region] == 3).all() 1809 assert not (bt == 3).all() and not (bt == 0).all() 1810 assert not (at == 2).all() and not (at == 0).all() 1811 assert (br == 3).all() 1812 assert (ar == 2).all() 1813 1814 # Multiple regions (keep result): 1815 for st_compute in [False, True]: 1816 at = np.zeros(shape=(8, 3, 6)) 1817 bt = np.zeros(shape=(8, 4, 6)) 1818 v = store( 1819 [a, b], 1820 [at, bt], 1821 regions=[region, region], 1822 compute=st_compute, 1823 return_stored=True, 1824 ) 1825 assert isinstance(v, tuple) 1826 assert all([isinstance(e, da.Array) for e in v]) 1827 if st_compute: 1828 assert all(not any(dask.core.get_deps(e.dask)[0].values()) for e in v) 1829 else: 1830 assert (at == 0).all() and (bt[region] == 0).all() 1831 1832 ar, br = v 1833 assert ar.dtype == a.dtype 1834 assert br.dtype == b.dtype 1835 assert ar.shape == a.shape 1836 assert br.shape == b.shape 1837 assert ar.chunks == a.chunks 1838 assert br.chunks == b.chunks 1839 1840 ar, br = da.compute(ar, br) 1841 assert (at[region] == 2).all() and (bt[region] == 3).all() 1842 assert not (bt == 3).all() and not (bt == 0).all() 1843 assert not (at == 2).all() and not (at == 0).all() 1844 assert (br == 3).all() 1845 assert (ar == 2).all() 1846 1847 1848def test_store_compute_false(): 1849 d = da.ones((4, 4), chunks=(2, 2)) 1850 a, b = d + 1, d + 2 1851 1852 at = np.zeros(shape=(4, 4)) 1853 bt = np.zeros(shape=(4, 4)) 1854 1855 v = store([a, b], [at, bt], compute=False) 1856 assert isinstance(v, Delayed) 1857 1858 # You need a well-formed HighLevelgraph for e.g. dask.graph_manipulation.bind 1859 for layer in v.__dask_layers__(): 1860 assert layer in v.dask.layers 1861 1862 assert (at == 0).all() and (bt == 0).all() 1863 assert all([ev is None for ev in v.compute()]) 1864 assert (at == 2).all() and (bt == 3).all() 1865 1866 at = np.zeros(shape=(4, 4)) 1867 bt = np.zeros(shape=(4, 4)) 1868 1869 dat, dbt = store([a, b], [at, bt], compute=False, return_stored=True) 1870 assert isinstance(dat, Array) and isinstance(dbt, Array) 1871 assert (at == 0).all() and (bt == 0).all() 1872 assert (dat.compute() == at).all() and (dbt.compute() == bt).all() 1873 assert (at == 2).all() and (bt == 3).all() 1874 1875 1876def test_store_nocompute_regions(): 1877 x = da.ones(10, chunks=1) 1878 y = np.zeros((2, 10)) 1879 d1 = da.store(x, y, regions=(0,), compute=False) 1880 d2 = da.store(x, y, regions=(1,), compute=False) 1881 assert d1.key != d2.key 1882 1883 1884class ThreadSafetyError(Exception): 1885 pass 1886 1887 1888class NonthreadSafeStore: 1889 def __init__(self): 1890 self.in_use = False 1891 1892 def __setitem__(self, key, value): 1893 if self.in_use: 1894 raise ThreadSafetyError() 1895 self.in_use = True 1896 time.sleep(0.001) 1897 self.in_use = False 1898 1899 1900class ThreadSafeStore: 1901 def __init__(self): 1902 self.concurrent_uses = 0 1903 self.max_concurrent_uses = 0 1904 1905 def __setitem__(self, key, value): 1906 self.concurrent_uses += 1 1907 self.max_concurrent_uses = max(self.concurrent_uses, self.max_concurrent_uses) 1908 time.sleep(0.01) 1909 self.concurrent_uses -= 1 1910 1911 1912class CounterLock: 1913 def __init__(self, *args, **kwargs): 1914 self.lock = Lock(*args, **kwargs) 1915 1916 self.acquire_count = 0 1917 self.release_count = 0 1918 1919 def acquire(self, *args, **kwargs): 1920 self.acquire_count += 1 1921 return self.lock.acquire(*args, **kwargs) 1922 1923 def release(self, *args, **kwargs): 1924 self.release_count += 1 1925 return self.lock.release(*args, **kwargs) 1926 1927 1928def test_store_locks(): 1929 _Lock = type(Lock()) 1930 d = da.ones((10, 10), chunks=(2, 2)) 1931 a, b = d + 1, d + 2 1932 1933 at = np.zeros(shape=(10, 10)) 1934 bt = np.zeros(shape=(10, 10)) 1935 1936 lock = Lock() 1937 v = store([a, b], [at, bt], compute=False, lock=lock) 1938 assert isinstance(v, Delayed) 1939 dsk = v.dask 1940 locks = {vv for v in dsk.values() for vv in v if isinstance(vv, _Lock)} 1941 assert locks == {lock} 1942 1943 # Ensure same lock applies over multiple stores 1944 at = NonthreadSafeStore() 1945 v = store([a, b], [at, at], lock=lock, scheduler="threads", num_workers=10) 1946 assert v is None 1947 1948 # Don't assume thread safety by default 1949 at = NonthreadSafeStore() 1950 assert store(a, at, scheduler="threads", num_workers=10) is None 1951 assert a.store(at, scheduler="threads", num_workers=10) is None 1952 1953 # Ensure locks can be removed 1954 at = ThreadSafeStore() 1955 for i in range(10): 1956 st = a.store(at, lock=False, scheduler="threads", num_workers=10) 1957 assert st is None 1958 if at.max_concurrent_uses > 1: 1959 break 1960 if i == 9: 1961 assert False 1962 1963 # Verify number of lock calls 1964 nchunks = np.sum([np.prod([len(c) for c in e.chunks]) for e in [a, b]]) 1965 for c in (False, True): 1966 at = np.zeros(shape=(10, 10)) 1967 bt = np.zeros(shape=(10, 10)) 1968 lock = CounterLock() 1969 1970 v = store([a, b], [at, bt], lock=lock, compute=c, return_stored=True) 1971 assert all(isinstance(e, Array) for e in v) 1972 1973 da.compute(v) 1974 1975 # When `return_stored=True` and `compute=False`, 1976 # the lock should be acquired only once for store and load steps 1977 # as they are fused together into one step. 1978 assert lock.acquire_count == lock.release_count 1979 if c: 1980 assert lock.acquire_count == 2 * nchunks 1981 else: 1982 assert lock.acquire_count == nchunks 1983 1984 1985def test_store_method_return(): 1986 d = da.ones((10, 10), chunks=(2, 2)) 1987 a = d + 1 1988 1989 for compute in [False, True]: 1990 for return_stored in [False, True]: 1991 at = np.zeros(shape=(10, 10)) 1992 r = a.store( 1993 at, scheduler="threads", compute=compute, return_stored=return_stored 1994 ) 1995 1996 if return_stored: 1997 assert isinstance(r, Array) 1998 elif compute: 1999 assert r is None 2000 else: 2001 assert isinstance(r, Delayed) 2002 2003 2004@pytest.mark.xfail(reason="can't lock with multiprocessing") 2005def test_store_multiprocessing_lock(): 2006 d = da.ones((10, 10), chunks=(2, 2)) 2007 a = d + 1 2008 2009 at = np.zeros(shape=(10, 10)) 2010 st = a.store(at, scheduler="processes", num_workers=10) 2011 assert st is None 2012 2013 2014@pytest.mark.parametrize("return_stored", [False, True]) 2015@pytest.mark.parametrize("delayed_target", [False, True]) 2016def test_store_deterministic_keys(return_stored, delayed_target): 2017 a = da.ones((10, 10), chunks=(2, 2)) 2018 at = np.zeros(shape=(10, 10)) 2019 if delayed_target: 2020 at = delayed(at) 2021 st1 = a.store(at, return_stored=return_stored, compute=False) 2022 st2 = a.store(at, return_stored=return_stored, compute=False) 2023 assert st1.dask.keys() == st2.dask.keys() 2024 2025 2026def test_to_hdf5(): 2027 h5py = pytest.importorskip("h5py") 2028 x = da.ones((4, 4), chunks=(2, 2)) 2029 y = da.ones(4, chunks=2, dtype="i4") 2030 2031 with tmpfile(".hdf5") as fn: 2032 x.to_hdf5(fn, "/x") 2033 with h5py.File(fn, mode="r+") as f: 2034 d = f["/x"] 2035 2036 assert_eq(d[:], x) 2037 assert d.chunks == (2, 2) 2038 2039 with tmpfile(".hdf5") as fn: 2040 x.to_hdf5(fn, "/x", chunks=None) 2041 with h5py.File(fn, mode="r+") as f: 2042 d = f["/x"] 2043 2044 assert_eq(d[:], x) 2045 assert d.chunks is None 2046 2047 with tmpfile(".hdf5") as fn: 2048 x.to_hdf5(fn, "/x", chunks=(1, 1)) 2049 with h5py.File(fn, mode="r+") as f: 2050 d = f["/x"] 2051 2052 assert_eq(d[:], x) 2053 assert d.chunks == (1, 1) 2054 2055 with tmpfile(".hdf5") as fn: 2056 da.to_hdf5(fn, {"/x": x, "/y": y}) 2057 2058 with h5py.File(fn, mode="r+") as f: 2059 assert_eq(f["/x"][:], x) 2060 assert f["/x"].chunks == (2, 2) 2061 assert_eq(f["/y"][:], y) 2062 assert f["/y"].chunks == (2,) 2063 2064 2065def test_to_dask_dataframe(): 2066 dd = pytest.importorskip("dask.dataframe") 2067 a = da.ones((4,), chunks=(2,)) 2068 d = a.to_dask_dataframe() 2069 assert isinstance(d, dd.Series) 2070 2071 a = da.ones((4, 4), chunks=(2, 2)) 2072 d = a.to_dask_dataframe() 2073 assert isinstance(d, dd.DataFrame) 2074 2075 2076def test_np_array_with_zero_dimensions(): 2077 d = da.ones((4, 4), chunks=(2, 2)) 2078 assert_eq(np.array(d.sum()), np.array(d.compute().sum())) 2079 2080 2081def test_dtype_complex(): 2082 x = np.arange(24).reshape((4, 6)).astype("f4") 2083 y = np.arange(24).reshape((4, 6)).astype("i8") 2084 z = np.arange(24).reshape((4, 6)).astype("i2") 2085 2086 a = da.from_array(x, chunks=(2, 3)) 2087 b = da.from_array(y, chunks=(2, 3)) 2088 c = da.from_array(z, chunks=(2, 3)) 2089 2090 def assert_eq(a, b): 2091 return isinstance(a, np.dtype) and isinstance(b, np.dtype) and str(a) == str(b) 2092 2093 assert_eq(a.dtype, x.dtype) 2094 assert_eq(b.dtype, y.dtype) 2095 2096 assert_eq((a + 1).dtype, (x + 1).dtype) 2097 assert_eq((a + b).dtype, (x + y).dtype) 2098 assert_eq(a.T.dtype, x.T.dtype) 2099 assert_eq(a[:3].dtype, x[:3].dtype) 2100 assert_eq((a.dot(b.T)).dtype, (x.dot(y.T)).dtype) 2101 2102 assert_eq(stack([a, b]).dtype, np.vstack([x, y]).dtype) 2103 assert_eq(concatenate([a, b]).dtype, np.concatenate([x, y]).dtype) 2104 2105 assert_eq(b.std().dtype, y.std().dtype) 2106 assert_eq(c.sum().dtype, z.sum().dtype) 2107 assert_eq(a.min().dtype, a.min().dtype) 2108 assert_eq(b.std().dtype, b.std().dtype) 2109 assert_eq(a.argmin(axis=0).dtype, a.argmin(axis=0).dtype) 2110 2111 assert_eq(da.sin(c).dtype, np.sin(z).dtype) 2112 assert_eq(da.exp(b).dtype, np.exp(y).dtype) 2113 assert_eq(da.floor(a).dtype, np.floor(x).dtype) 2114 assert_eq(da.isnan(b).dtype, np.isnan(y).dtype) 2115 with contextlib.suppress(ImportError): 2116 assert da.isnull(b).dtype == "bool" 2117 assert da.notnull(b).dtype == "bool" 2118 2119 x = np.array([("a", 1)], dtype=[("text", "S1"), ("numbers", "i4")]) 2120 d = da.from_array(x, chunks=(1,)) 2121 2122 assert_eq(d["text"].dtype, x["text"].dtype) 2123 assert_eq(d[["numbers", "text"]].dtype, x[["numbers", "text"]].dtype) 2124 2125 2126def test_astype(): 2127 x = np.ones((5, 5), dtype="f8") 2128 d = da.from_array(x, chunks=(2, 2)) 2129 2130 assert d.astype("i8").dtype == "i8" 2131 assert_eq(d.astype("i8"), x.astype("i8")) 2132 assert same_keys(d.astype("i8"), d.astype("i8")) 2133 2134 with pytest.raises(TypeError): 2135 d.astype("i8", casting="safe") 2136 2137 with pytest.raises(TypeError): 2138 d.astype("i8", not_a_real_kwarg="foo") 2139 2140 # smoketest with kwargs 2141 assert_eq(d.astype("i8", copy=False), x.astype("i8", copy=False)) 2142 2143 # Check it's a noop 2144 assert d.astype("f8") is d 2145 2146 2147def test_arithmetic(): 2148 x = np.arange(5).astype("f4") + 2 2149 y = np.arange(5).astype("i8") + 2 2150 z = np.arange(5).astype("i4") + 2 2151 a = da.from_array(x, chunks=(2,)) 2152 b = da.from_array(y, chunks=(2,)) 2153 c = da.from_array(z, chunks=(2,)) 2154 assert_eq(a + b, x + y) 2155 assert_eq(a * b, x * y) 2156 assert_eq(a - b, x - y) 2157 assert_eq(a / b, x / y) 2158 assert_eq(b & b, y & y) 2159 assert_eq(b | b, y | y) 2160 assert_eq(b ^ b, y ^ y) 2161 assert_eq(a // b, x // y) 2162 assert_eq(a ** b, x ** y) 2163 assert_eq(a % b, x % y) 2164 assert_eq(a > b, x > y) 2165 assert_eq(a < b, x < y) 2166 assert_eq(a >= b, x >= y) 2167 assert_eq(a <= b, x <= y) 2168 assert_eq(a == b, x == y) 2169 assert_eq(a != b, x != y) 2170 2171 assert_eq(a + 2, x + 2) 2172 assert_eq(a * 2, x * 2) 2173 assert_eq(a - 2, x - 2) 2174 assert_eq(a / 2, x / 2) 2175 assert_eq(b & True, y & True) 2176 assert_eq(b | True, y | True) 2177 assert_eq(b ^ True, y ^ True) 2178 assert_eq(a // 2, x // 2) 2179 assert_eq(a ** 2, x ** 2) 2180 assert_eq(a % 2, x % 2) 2181 assert_eq(a > 2, x > 2) 2182 assert_eq(a < 2, x < 2) 2183 assert_eq(a >= 2, x >= 2) 2184 assert_eq(a <= 2, x <= 2) 2185 assert_eq(a == 2, x == 2) 2186 assert_eq(a != 2, x != 2) 2187 2188 assert_eq(2 + b, 2 + y) 2189 assert_eq(2 * b, 2 * y) 2190 assert_eq(2 - b, 2 - y) 2191 assert_eq(2 / b, 2 / y) 2192 assert_eq(True & b, True & y) 2193 assert_eq(True | b, True | y) 2194 assert_eq(True ^ b, True ^ y) 2195 assert_eq(2 // b, 2 // y) 2196 assert_eq(2 ** b, 2 ** y) 2197 assert_eq(2 % b, 2 % y) 2198 assert_eq(2 > b, 2 > y) 2199 assert_eq(2 < b, 2 < y) 2200 assert_eq(2 >= b, 2 >= y) 2201 assert_eq(2 <= b, 2 <= y) 2202 assert_eq(2 == b, 2 == y) 2203 assert_eq(2 != b, 2 != y) 2204 2205 assert_eq(-a, -x) 2206 assert_eq(abs(a), abs(x)) 2207 assert_eq(~(a == b), ~(x == y)) 2208 assert_eq(~(a == b), ~(x == y)) 2209 2210 assert_eq(da.logaddexp(a, b), np.logaddexp(x, y)) 2211 assert_eq(da.logaddexp2(a, b), np.logaddexp2(x, y)) 2212 with pytest.warns(None): # Overflow warning 2213 assert_eq(da.exp(b), np.exp(y)) 2214 assert_eq(da.log(a), np.log(x)) 2215 assert_eq(da.log10(a), np.log10(x)) 2216 assert_eq(da.log1p(a), np.log1p(x)) 2217 with pytest.warns(None): # Overflow warning 2218 assert_eq(da.expm1(b), np.expm1(y)) 2219 assert_eq(da.sqrt(a), np.sqrt(x)) 2220 assert_eq(da.square(a), np.square(x)) 2221 2222 assert_eq(da.sin(a), np.sin(x)) 2223 assert_eq(da.cos(b), np.cos(y)) 2224 assert_eq(da.tan(a), np.tan(x)) 2225 assert_eq(da.arcsin(b / 10), np.arcsin(y / 10)) 2226 assert_eq(da.arccos(b / 10), np.arccos(y / 10)) 2227 assert_eq(da.arctan(b / 10), np.arctan(y / 10)) 2228 assert_eq(da.arctan2(b * 10, a), np.arctan2(y * 10, x)) 2229 assert_eq(da.hypot(b, a), np.hypot(y, x)) 2230 assert_eq(da.sinh(a), np.sinh(x)) 2231 with pytest.warns(None): # Overflow warning 2232 assert_eq(da.cosh(b), np.cosh(y)) 2233 assert_eq(da.tanh(a), np.tanh(x)) 2234 assert_eq(da.arcsinh(b * 10), np.arcsinh(y * 10)) 2235 assert_eq(da.arccosh(b * 10), np.arccosh(y * 10)) 2236 assert_eq(da.arctanh(b / 10), np.arctanh(y / 10)) 2237 assert_eq(da.deg2rad(a), np.deg2rad(x)) 2238 assert_eq(da.rad2deg(a), np.rad2deg(x)) 2239 2240 assert_eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4)) 2241 assert_eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4)) 2242 assert_eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4)) 2243 assert_eq(da.logical_not(a < 1), np.logical_not(x < 1)) 2244 assert_eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a)) 2245 assert_eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a)) 2246 assert_eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a)) 2247 assert_eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a)) 2248 2249 assert_eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y)) 2250 assert_eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y)) 2251 assert_eq(da.isfinite(a), np.isfinite(x)) 2252 assert_eq(da.isinf(a), np.isinf(x)) 2253 assert_eq(da.isnan(a), np.isnan(x)) 2254 assert_eq(da.signbit(a - 3), np.signbit(x - 3)) 2255 assert_eq(da.copysign(a - 3, b), np.copysign(x - 3, y)) 2256 assert_eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y)) 2257 with pytest.warns(None): # overflow warning 2258 assert_eq(da.ldexp(c, c), np.ldexp(z, z)) 2259 assert_eq(da.fmod(a * 12, b), np.fmod(x * 12, y)) 2260 assert_eq(da.floor(a * 0.5), np.floor(x * 0.5)) 2261 assert_eq(da.ceil(a), np.ceil(x)) 2262 assert_eq(da.trunc(a / 2), np.trunc(x / 2)) 2263 2264 assert_eq(da.degrees(b), np.degrees(y)) 2265 assert_eq(da.radians(a), np.radians(x)) 2266 2267 assert_eq(da.rint(a + 0.3), np.rint(x + 0.3)) 2268 assert_eq(da.fix(a - 2.5), np.fix(x - 2.5)) 2269 2270 assert_eq(da.angle(a + 1j), np.angle(x + 1j)) 2271 assert_eq(da.real(a + 1j), np.real(x + 1j)) 2272 assert_eq((a + 1j).real, np.real(x + 1j)) 2273 assert_eq(da.imag(a + 1j), np.imag(x + 1j)) 2274 assert_eq((a + 1j).imag, np.imag(x + 1j)) 2275 assert_eq(da.conj(a + 1j * b), np.conj(x + 1j * y)) 2276 assert_eq((a + 1j * b).conj(), (x + 1j * y).conj()) 2277 2278 assert_eq(da.clip(b, 1, 4), np.clip(y, 1, 4)) 2279 assert_eq(b.clip(1, 4), y.clip(1, 4)) 2280 assert_eq(da.fabs(b), np.fabs(y)) 2281 assert_eq(da.sign(b - 2), np.sign(y - 2)) 2282 assert_eq(da.absolute(b - 2), np.absolute(y - 2)) 2283 assert_eq(da.absolute(b - 2 + 1j), np.absolute(y - 2 + 1j)) 2284 2285 l1, l2 = da.frexp(a) 2286 r1, r2 = np.frexp(x) 2287 assert_eq(l1, r1) 2288 assert_eq(l2, r2) 2289 2290 l1, l2 = da.modf(a) 2291 r1, r2 = np.modf(x) 2292 assert_eq(l1, r1) 2293 assert_eq(l2, r2) 2294 2295 assert_eq(da.around(a, -1), np.around(x, -1)) 2296 2297 2298def test_elemwise_consistent_names(): 2299 a = da.from_array(np.arange(5, dtype="f4"), chunks=(2,)) 2300 b = da.from_array(np.arange(5, dtype="f4"), chunks=(2,)) 2301 assert same_keys(a + b, a + b) 2302 assert same_keys(a + 2, a + 2) 2303 assert same_keys(da.exp(a), da.exp(a)) 2304 assert same_keys(da.exp(a, dtype="f8"), da.exp(a, dtype="f8")) 2305 assert same_keys(da.maximum(a, b), da.maximum(a, b)) 2306 2307 2308def test_optimize(): 2309 x = np.arange(5).astype("f4") 2310 a = da.from_array(x, chunks=(2,)) 2311 expr = a[1:4] + 1 2312 result = optimize(expr.dask, expr.__dask_keys__()) 2313 assert isinstance(result, dict) 2314 assert all(key in result for key in expr.__dask_keys__()) 2315 2316 2317def test_slicing_with_non_ndarrays(): 2318 class ARangeSlice: 2319 dtype = np.dtype("i8") 2320 ndim = 1 2321 2322 def __init__(self, start, stop): 2323 self.start = start 2324 self.stop = stop 2325 2326 def __array__(self): 2327 return np.arange(self.start, self.stop) 2328 2329 class ARangeSlicable: 2330 dtype = np.dtype("i8") 2331 ndim = 1 2332 2333 def __init__(self, n): 2334 self.n = n 2335 2336 @property 2337 def shape(self): 2338 return (self.n,) 2339 2340 def __getitem__(self, key): 2341 return ARangeSlice(key[0].start, key[0].stop) 2342 2343 x = da.from_array(ARangeSlicable(10), chunks=(4,)) 2344 2345 assert_eq((x + 1).sum(), (np.arange(10, dtype=x.dtype) + 1).sum()) 2346 2347 2348@pytest.mark.filterwarnings("ignore:the matrix subclass") 2349def test_getter(): 2350 assert type(getter(np.matrix([[1]]), 0)) is np.ndarray 2351 assert type(getter(np.matrix([[1]]), 0, asarray=False)) is np.matrix 2352 assert_eq(getter([1, 2, 3, 4, 5], slice(1, 4)), np.array([2, 3, 4])) 2353 2354 assert_eq(getter(np.arange(5), (None, slice(None, None))), np.arange(5)[None, :]) 2355 2356 2357def test_size(): 2358 x = da.ones((10, 2), chunks=(3, 1)) 2359 assert x.size == np.array(x).size 2360 assert isinstance(x.size, int) 2361 2362 2363def test_nbytes(): 2364 x = da.ones((10, 2), chunks=(3, 1)) 2365 assert x.nbytes == np.array(x).nbytes 2366 2367 2368def test_itemsize(): 2369 x = da.ones((10, 2), chunks=(3, 1)) 2370 assert x.itemsize == 8 2371 2372 2373def test_Array_normalizes_dtype(): 2374 x = da.ones((3,), chunks=(1,), dtype=int) 2375 assert isinstance(x.dtype, np.dtype) 2376 2377 2378def test_from_array_with_lock(): 2379 x = np.arange(10) 2380 d = da.from_array(x, chunks=5, lock=True) 2381 2382 tasks = [v for k, v in d.dask.items() if k[0] == d.name] 2383 2384 assert hasattr(tasks[0][4], "acquire") 2385 assert len({task[4] for task in tasks}) == 1 2386 2387 assert_eq(d, x) 2388 2389 lock = Lock() 2390 e = da.from_array(x, chunks=5, lock=lock) 2391 f = da.from_array(x, chunks=5, lock=lock) 2392 2393 assert_eq(e + f, x + x) 2394 2395 2396class MyArray: 2397 def __init__(self, x): 2398 self.x = x 2399 self.dtype = x.dtype 2400 self.shape = x.shape 2401 self.ndim = len(x.shape) 2402 2403 def __getitem__(self, i): 2404 return self.x[i] 2405 2406 2407@pytest.mark.parametrize( 2408 "x,chunks", 2409 [ 2410 (np.arange(25).reshape((5, 5)), (5, 5)), 2411 (np.arange(25).reshape((5, 5)), -1), 2412 (np.array([[1]]), 1), 2413 (np.array(1), 1), 2414 ], 2415) 2416def test_from_array_tasks_always_call_getter(x, chunks): 2417 dx = da.from_array(MyArray(x), chunks=chunks, asarray=False) 2418 assert_eq(x, dx) 2419 2420 2421def test_from_array_ndarray_onechunk(): 2422 """ndarray with a single chunk produces a minimal single key dict""" 2423 x = np.array([[1, 2], [3, 4]]) 2424 dx = da.from_array(x, chunks=-1) 2425 assert_eq(x, dx) 2426 assert len(dx.dask) == 1 2427 assert dx.dask[dx.name, 0, 0] is x 2428 2429 2430def test_from_array_ndarray_getitem(): 2431 """For ndarray, don't use getter / getter_nofancy; use the cleaner 2432 operator.getitem""" 2433 x = np.array([[1, 2], [3, 4]]) 2434 dx = da.from_array(x, chunks=(1, 2)) 2435 assert_eq(x, dx) 2436 assert (dx.dask[dx.name, 0, 0] == np.array([[1, 2]])).all() 2437 2438 2439@pytest.mark.parametrize("x", [[1, 2], (1, 2), memoryview(b"abc")]) 2440def test_from_array_list(x): 2441 """Lists, tuples, and memoryviews are automatically converted to ndarray""" 2442 dx = da.from_array(x, chunks=-1) 2443 assert_eq(np.array(x), dx) 2444 assert isinstance(dx.dask[dx.name, 0], np.ndarray) 2445 2446 dx = da.from_array(x, chunks=1) 2447 assert_eq(np.array(x), dx) 2448 assert dx.dask[dx.name, 0][0] == x[0] 2449 2450 2451# On MacOS Python 3.9, the order of the np.ScalarType tuple randomly changes across 2452# interpreter restarts, thus causing pytest-xdist failures; setting PYTHONHASHSEED does 2453# not help 2454@pytest.mark.parametrize( 2455 "type_", sorted((t for t in np.ScalarType if t is not memoryview), key=str) 2456) 2457def test_from_array_scalar(type_): 2458 """Python and numpy scalars are automatically converted to ndarray""" 2459 if type_ == np.datetime64: 2460 x = np.datetime64("2000-01-01") 2461 else: 2462 x = type_(1) 2463 2464 dx = da.from_array(x, chunks=-1) 2465 assert_eq(np.array(x), dx) 2466 assert isinstance( 2467 dx.dask[ 2468 dx.name, 2469 ], 2470 np.ndarray, 2471 ) 2472 2473 2474@pytest.mark.parametrize("asarray,cls", [(True, np.ndarray), (False, np.matrix)]) 2475@pytest.mark.filterwarnings("ignore:the matrix subclass") 2476def test_from_array_no_asarray(asarray, cls): 2477 def assert_chunks_are_of_type(x): 2478 chunks = compute_as_if_collection(Array, x.dask, x.__dask_keys__()) 2479 for c in concat(chunks): 2480 assert type(c) is cls 2481 2482 x = np.matrix(np.arange(100).reshape((10, 10))) 2483 dx = da.from_array(x, chunks=(5, 5), asarray=asarray) 2484 assert_chunks_are_of_type(dx) 2485 assert_chunks_are_of_type(dx[0:5]) 2486 assert_chunks_are_of_type(dx[0:5][:, 0]) 2487 2488 2489def test_from_array_getitem(): 2490 x = np.arange(10) 2491 2492 def my_getitem(x, ind): 2493 return x[ind] 2494 2495 y = da.from_array(x, chunks=(5,), getitem=my_getitem) 2496 2497 for k, v in y.dask.items(): 2498 if isinstance(v, tuple): 2499 assert v[0] is my_getitem 2500 2501 assert_eq(x, y) 2502 2503 2504def test_from_array_minus_one(): 2505 x = np.arange(10) 2506 y = da.from_array(x, -1) 2507 assert y.chunks == ((10,),) 2508 assert_eq(x, y) 2509 2510 2511def test_from_array_copy(): 2512 # Regression test for https://github.com/dask/dask/issues/3751 2513 x = np.arange(10) 2514 y = da.from_array(x, -1) 2515 assert y.npartitions == 1 2516 y_c = y.copy() 2517 assert y is not y_c 2518 assert y.compute() is not y_c.compute() 2519 2520 2521def test_from_array_dask_array(): 2522 x = np.array([[1, 2], [3, 4]]) 2523 dx = da.from_array(x, chunks=(1, 2)) 2524 with pytest.raises(ValueError): 2525 da.from_array(dx) 2526 2527 2528def test_from_array_dask_collection_warns(): 2529 class CustomCollection(np.ndarray): 2530 def __dask_graph__(self): 2531 return {"bar": 1} 2532 2533 x = CustomCollection([1, 2, 3]) 2534 with pytest.warns(UserWarning): 2535 da.from_array(x) 2536 2537 # Ensure da.array warns too 2538 with pytest.warns(UserWarning): 2539 da.array(x) 2540 2541 2542def test_from_array_inline(): 2543 class MyArray(np.ndarray): 2544 pass 2545 2546 a = np.array([1, 2, 3]).view(MyArray) 2547 dsk = dict(da.from_array(a, name="my-array").dask) 2548 assert dsk["my-array"] is a 2549 2550 dsk = dict(da.from_array(a, name="my-array", inline_array=True).dask) 2551 assert "my-array" not in dsk 2552 assert a is dsk[("my-array", 0)][1] 2553 2554 2555@pytest.mark.parametrize("asarray", [da.asarray, da.asanyarray]) 2556def test_asarray(asarray): 2557 assert_eq(asarray([1, 2, 3]), np.asarray([1, 2, 3])) 2558 2559 x = asarray([1, 2, 3]) 2560 assert asarray(x) is x 2561 2562 y = [x[0], 2, x[2]] 2563 assert_eq(asarray(y), x) 2564 2565 2566@pytest.mark.parametrize("asarray", [da.asarray, da.asanyarray]) 2567def test_asarray_dask_dataframe(asarray): 2568 # https://github.com/dask/dask/issues/3885 2569 dd = pytest.importorskip("dask.dataframe") 2570 import pandas as pd 2571 2572 s = dd.from_pandas(pd.Series([1, 2, 3, 4]), 2) 2573 result = asarray(s) 2574 expected = s.values 2575 assert_eq(result, expected) 2576 2577 df = s.to_frame(name="s") 2578 result = asarray(df) 2579 expected = df.values 2580 assert_eq(result, expected) 2581 2582 2583@pytest.mark.parametrize("asarray", [da.asarray, da.asanyarray]) 2584def test_asarray_h5py(asarray): 2585 h5py = pytest.importorskip("h5py") 2586 2587 with tmpfile(".hdf5") as fn: 2588 with h5py.File(fn, mode="a") as f: 2589 d = f.create_dataset("/x", shape=(2, 2), dtype=float) 2590 x = asarray(d) 2591 assert d in x.dask.values() 2592 assert not any(isinstance(v, np.ndarray) for v in x.dask.values()) 2593 2594 2595def test_asarray_chunks(): 2596 with dask.config.set({"array.chunk-size": "100 B"}): 2597 x = np.ones(1000) 2598 d = da.asarray(x) 2599 assert d.npartitions > 1 2600 2601 2602@pytest.mark.filterwarnings("ignore:the matrix subclass") 2603def test_asanyarray(): 2604 x = np.matrix([1, 2, 3]) 2605 dx = da.asanyarray(x) 2606 assert dx.numblocks == (1, 1) 2607 chunks = compute_as_if_collection(Array, dx.dask, dx.__dask_keys__()) 2608 assert isinstance(chunks[0][0], np.matrix) 2609 assert da.asanyarray(dx) is dx 2610 2611 2612def test_asanyarray_dataframe(): 2613 pd = pytest.importorskip("pandas") 2614 dd = pytest.importorskip("dask.dataframe") 2615 2616 df = pd.DataFrame({"x": [1, 2, 3]}) 2617 ddf = dd.from_pandas(df, npartitions=2) 2618 2619 x = np.asanyarray(df) 2620 dx = da.asanyarray(ddf) 2621 assert isinstance(dx, da.Array) 2622 2623 assert_eq(x, dx) 2624 2625 x = np.asanyarray(df.x) 2626 dx = da.asanyarray(ddf.x) 2627 assert isinstance(dx, da.Array) 2628 2629 assert_eq(x, dx) 2630 2631 2632def test_asanyarray_datetime64(): 2633 x = np.array(["2000-01-01"], dtype="datetime64") 2634 dx = da.asanyarray(x) 2635 assert isinstance(dx, da.Array) 2636 assert_eq(x, dx) 2637 2638 2639def test_from_func(): 2640 x = np.arange(10) 2641 f = lambda n: n * x 2642 d = from_func(f, (10,), x.dtype, kwargs={"n": 2}) 2643 2644 assert d.shape == x.shape 2645 assert d.dtype == x.dtype 2646 assert_eq(d, 2 * x) 2647 assert same_keys(d, from_func(f, (10,), x.dtype, kwargs={"n": 2})) 2648 2649 2650def test_concatenate3_2(): 2651 x = np.array([1, 2]) 2652 assert_eq(concatenate3([x, x, x]), np.array([1, 2, 1, 2, 1, 2])) 2653 2654 x = np.array([[1, 2]]) 2655 assert ( 2656 concatenate3([[x, x, x], [x, x, x]]) 2657 == np.array([[1, 2, 1, 2, 1, 2], [1, 2, 1, 2, 1, 2]]) 2658 ).all() 2659 2660 assert ( 2661 concatenate3([[x, x], [x, x], [x, x]]) 2662 == np.array([[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]]) 2663 ).all() 2664 2665 x = np.arange(12).reshape((2, 2, 3)) 2666 assert_eq( 2667 concatenate3([[[x, x, x], [x, x, x]], [[x, x, x], [x, x, x]]]), 2668 np.array( 2669 [ 2670 [ 2671 [0, 1, 2, 0, 1, 2, 0, 1, 2], 2672 [3, 4, 5, 3, 4, 5, 3, 4, 5], 2673 [0, 1, 2, 0, 1, 2, 0, 1, 2], 2674 [3, 4, 5, 3, 4, 5, 3, 4, 5], 2675 ], 2676 [ 2677 [6, 7, 8, 6, 7, 8, 6, 7, 8], 2678 [9, 10, 11, 9, 10, 11, 9, 10, 11], 2679 [6, 7, 8, 6, 7, 8, 6, 7, 8], 2680 [9, 10, 11, 9, 10, 11, 9, 10, 11], 2681 ], 2682 [ 2683 [0, 1, 2, 0, 1, 2, 0, 1, 2], 2684 [3, 4, 5, 3, 4, 5, 3, 4, 5], 2685 [0, 1, 2, 0, 1, 2, 0, 1, 2], 2686 [3, 4, 5, 3, 4, 5, 3, 4, 5], 2687 ], 2688 [ 2689 [6, 7, 8, 6, 7, 8, 6, 7, 8], 2690 [9, 10, 11, 9, 10, 11, 9, 10, 11], 2691 [6, 7, 8, 6, 7, 8, 6, 7, 8], 2692 [9, 10, 11, 9, 10, 11, 9, 10, 11], 2693 ], 2694 ] 2695 ), 2696 ) 2697 2698 2699@pytest.mark.parametrize("one_d", [True, False]) 2700@mock.patch.object(da.core, "_concatenate2", wraps=da.core._concatenate2) 2701def test_concatenate3_nep18_dispatching(mock_concatenate2, one_d): 2702 x = EncapsulateNDArray(np.arange(10)) 2703 concat = [x, x] if one_d else [[x[None]], [x[None]]] 2704 result = concatenate3(concat) 2705 assert type(result) is type(x) 2706 mock_concatenate2.assert_called() 2707 mock_concatenate2.reset_mock() 2708 2709 # When all the inputs are supported by plain `np.concatenate`, we should take the concatenate3 2710 # fastpath of allocating the full array up front and writing blocks into it. 2711 concat = [x.arr, x.arr] if one_d else [[x.arr[None]], [x.arr[None]]] 2712 plain_np_result = concatenate3(concat) 2713 mock_concatenate2.assert_not_called() 2714 assert type(plain_np_result) is np.ndarray 2715 2716 2717def test_map_blocks3(): 2718 x = np.arange(10) 2719 y = np.arange(10) * 2 2720 2721 d = da.from_array(x, chunks=5) 2722 e = da.from_array(y, chunks=5) 2723 2724 assert_eq( 2725 da.core.map_blocks(lambda a, b: a + 2 * b, d, e, dtype=d.dtype), x + 2 * y 2726 ) 2727 2728 z = np.arange(100).reshape((10, 10)) 2729 f = da.from_array(z, chunks=5) 2730 2731 func = lambda a, b: a + 2 * b 2732 res = da.core.map_blocks(func, d, f, dtype=d.dtype) 2733 assert_eq(res, x + 2 * z) 2734 assert same_keys(da.core.map_blocks(func, d, f, dtype=d.dtype), res) 2735 2736 assert_eq(da.map_blocks(func, f, d, dtype=d.dtype), z + 2 * x) 2737 2738 2739def test_from_array_with_missing_chunks(): 2740 x = np.random.randn(2, 4, 3) 2741 d = da.from_array(x, chunks=(None, 2, None)) 2742 assert d.chunks == da.from_array(x, chunks=(2, 2, 3)).chunks 2743 2744 2745def test_normalize_chunks(): 2746 assert normalize_chunks(3, (4, 6)) == ((3, 1), (3, 3)) 2747 assert normalize_chunks(((3, 3), (8,)), (6, 8)) == ((3, 3), (8,)) 2748 assert normalize_chunks((4, 5), (9,)) == ((4, 5),) 2749 assert normalize_chunks((4, 5), (9, 9)) == ((4, 4, 1), (5, 4)) 2750 assert normalize_chunks(-1, (5, 5)) == ((5,), (5,)) 2751 assert normalize_chunks((3, -1), (5, 5)) == ((3, 2), (5,)) 2752 assert normalize_chunks((3, None), (5, 5)) == ((3, 2), (5,)) 2753 assert normalize_chunks({0: 3}, (5, 5)) == ((3, 2), (5,)) 2754 assert normalize_chunks([[2, 2], [3, 3]]) == ((2, 2), (3, 3)) 2755 assert normalize_chunks(10, (30, 5)) == ((10, 10, 10), (5,)) 2756 assert normalize_chunks((), (0, 0)) == ((0,), (0,)) 2757 assert normalize_chunks(-1, (0, 3)) == ((0,), (3,)) 2758 assert normalize_chunks("auto", shape=(20,), limit=5, dtype="uint8") == ( 2759 (5, 5, 5, 5), 2760 ) 2761 assert normalize_chunks(("auto", None), (5, 5), dtype=int) == ((5,), (5,)) 2762 2763 with pytest.raises(ValueError): 2764 normalize_chunks(((10,),), (11,)) 2765 with pytest.raises(ValueError): 2766 normalize_chunks(((5,), (5,)), (5,)) 2767 2768 2769def test_align_chunks_to_previous_chunks(): 2770 chunks = normalize_chunks( 2771 "auto", shape=(2000,), previous_chunks=(512,), limit="600 B", dtype=np.uint8 2772 ) 2773 assert chunks == ((512, 512, 512, 2000 - 512 * 3),) 2774 2775 chunks = normalize_chunks( 2776 "auto", shape=(2000,), previous_chunks=(128,), limit="600 B", dtype=np.uint8 2777 ) 2778 assert chunks == ((512, 512, 512, 2000 - 512 * 3),) 2779 2780 chunks = normalize_chunks( 2781 "auto", shape=(2000,), previous_chunks=(512,), limit="1200 B", dtype=np.uint8 2782 ) 2783 assert chunks == ((1024, 2000 - 1024),) 2784 2785 chunks = normalize_chunks( 2786 "auto", 2787 shape=(3, 10211, 10376), 2788 previous_chunks=(1, 512, 512), 2789 limit="1MiB", 2790 dtype=np.float32, 2791 ) 2792 assert chunks[0] == (1, 1, 1) 2793 assert all(c % 512 == 0 for c in chunks[1][:-1]) 2794 assert all(c % 512 == 0 for c in chunks[2][:-1]) 2795 2796 2797def test_raise_on_no_chunks(): 2798 x = da.ones(6, chunks=3) 2799 try: 2800 Array(x.dask, x.name, chunks=None, dtype=x.dtype, shape=None) 2801 assert False 2802 except ValueError as e: 2803 assert "dask" in str(e) 2804 assert ".org" in str(e) 2805 2806 2807def test_chunks_is_immutable(): 2808 x = da.ones(6, chunks=3) 2809 try: 2810 x.chunks = 2 2811 assert False 2812 except TypeError as e: 2813 assert "rechunk(2)" in str(e) 2814 2815 2816def test_raise_on_bad_kwargs(): 2817 x = da.ones(5, chunks=3) 2818 try: 2819 da.minimum(x, foo=None) 2820 except TypeError as e: 2821 assert "minimum" in str(e) 2822 assert "foo" in str(e) 2823 2824 2825def test_long_slice(): 2826 x = np.arange(10000) 2827 d = da.from_array(x, chunks=1) 2828 2829 assert_eq(d[8000:8200], x[8000:8200]) 2830 2831 2832def test_h5py_newaxis(): 2833 h5py = pytest.importorskip("h5py") 2834 2835 with tmpfile("h5") as fn: 2836 with h5py.File(fn, mode="a") as f: 2837 x = f.create_dataset("/x", shape=(10, 10), dtype="f8") 2838 d = da.from_array(x, chunks=(5, 5)) 2839 assert d[None, :, :].compute(scheduler="sync").shape == (1, 10, 10) 2840 assert d[:, None, :].compute(scheduler="sync").shape == (10, 1, 10) 2841 assert d[:, :, None].compute(scheduler="sync").shape == (10, 10, 1) 2842 assert same_keys(d[:, :, None], d[:, :, None]) 2843 2844 2845def test_ellipsis_slicing(): 2846 assert_eq(da.ones(4, chunks=2)[...], np.ones(4)) 2847 2848 2849def test_point_slicing(): 2850 x = np.arange(56).reshape((7, 8)) 2851 d = da.from_array(x, chunks=(3, 4)) 2852 2853 result = d.vindex[[1, 2, 5, 5], [3, 1, 6, 1]] 2854 assert_eq(result, x[[1, 2, 5, 5], [3, 1, 6, 1]]) 2855 2856 result = d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]] 2857 assert_eq(result, x[[0, 1, 6, 0], [0, 1, 0, 7]]) 2858 assert same_keys(result, d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]]) 2859 2860 2861def test_point_slicing_with_full_slice(): 2862 from dask.array.core import _get_axis, _vindex_transpose 2863 2864 x = np.arange(4 * 5 * 6 * 7).reshape((4, 5, 6, 7)) 2865 d = da.from_array(x, chunks=(2, 3, 3, 4)) 2866 2867 inds = [ 2868 [[1, 2, 3], None, [3, 2, 1], [5, 3, 4]], 2869 [[1, 2, 3], None, [4, 3, 2], None], 2870 [[1, 2, 3], [3, 2, 1]], 2871 [[1, 2, 3], [3, 2, 1], [3, 2, 1], [5, 3, 4]], 2872 [[], [], [], None], 2873 [np.array([1, 2, 3]), None, np.array([4, 3, 2]), None], 2874 [None, None, [1, 2, 3], [4, 3, 2]], 2875 [None, [0, 2, 3], None, [0, 3, 2]], 2876 ] 2877 2878 for ind in inds: 2879 slc = [ 2880 i if isinstance(i, (np.ndarray, list)) else slice(None, None) for i in ind 2881 ] 2882 result = d.vindex[tuple(slc)] 2883 2884 # Rotate the expected result accordingly 2885 axis = _get_axis(ind) 2886 expected = _vindex_transpose(x[tuple(slc)], axis) 2887 2888 assert_eq(result, expected) 2889 2890 # Always have the first axis be the length of the points 2891 k = len(next(i for i in ind if isinstance(i, (np.ndarray, list)))) 2892 assert result.shape[0] == k 2893 2894 2895def test_slice_with_floats(): 2896 d = da.ones((5,), chunks=(3,)) 2897 with pytest.raises(IndexError): 2898 d[1.5] 2899 with pytest.raises(IndexError): 2900 d[0:1.5] 2901 with pytest.raises(IndexError): 2902 d[[1, 1.5]] 2903 2904 2905def test_slice_with_integer_types(): 2906 x = np.arange(10) 2907 dx = da.from_array(x, chunks=5) 2908 inds = np.array([0, 3, 6], dtype="u8") 2909 assert_eq(dx[inds], x[inds]) 2910 assert_eq(dx[inds.astype("u4")], x[inds.astype("u4")]) 2911 2912 inds = np.array([0, 3, 6], dtype=np.int64) 2913 assert_eq(dx[inds], x[inds]) 2914 assert_eq(dx[inds.astype("u4")], x[inds.astype("u4")]) 2915 2916 2917def test_index_with_integer_types(): 2918 x = np.arange(10) 2919 dx = da.from_array(x, chunks=5) 2920 inds = int(3) 2921 assert_eq(dx[inds], x[inds]) 2922 2923 inds = np.int64(3) 2924 assert_eq(dx[inds], x[inds]) 2925 2926 2927def test_vindex_basic(): 2928 x = np.arange(56).reshape((7, 8)) 2929 d = da.from_array(x, chunks=(3, 4)) 2930 2931 # cases where basic and advanced indexing coincide 2932 result = d.vindex[0] 2933 assert_eq(result, x[0]) 2934 2935 result = d.vindex[0, 1] 2936 assert_eq(result, x[0, 1]) 2937 2938 result = d.vindex[[0, 1], ::-1] # slices last 2939 assert_eq(result, x[:2, ::-1]) 2940 2941 2942def test_vindex_nd(): 2943 x = np.arange(56).reshape((7, 8)) 2944 d = da.from_array(x, chunks=(3, 4)) 2945 2946 result = d.vindex[[[0, 1], [6, 0]], [[0, 1], [0, 7]]] 2947 assert_eq(result, x[[[0, 1], [6, 0]], [[0, 1], [0, 7]]]) 2948 2949 result = d.vindex[np.arange(7)[:, None], np.arange(8)[None, :]] 2950 assert_eq(result, x) 2951 2952 result = d.vindex[np.arange(7)[None, :], np.arange(8)[:, None]] 2953 assert_eq(result, x.T) 2954 2955 2956def test_vindex_negative(): 2957 x = np.arange(10) 2958 d = da.from_array(x, chunks=(5, 5)) 2959 2960 result = d.vindex[np.array([0, -1])] 2961 assert_eq(result, x[np.array([0, -1])]) 2962 2963 2964def test_vindex_errors(): 2965 d = da.ones((5, 5, 5), chunks=(3, 3, 3)) 2966 pytest.raises(IndexError, lambda: d.vindex[np.newaxis]) 2967 pytest.raises(IndexError, lambda: d.vindex[[1, 2], [1, 2, 3]]) 2968 pytest.raises(IndexError, lambda: d.vindex[[True] * 5]) 2969 pytest.raises(IndexError, lambda: d.vindex[[0], [5]]) 2970 pytest.raises(IndexError, lambda: d.vindex[[0], [-6]]) 2971 2972 2973def test_vindex_merge(): 2974 from dask.array.core import _vindex_merge 2975 2976 locations = [1], [2, 0] 2977 values = [np.array([[1, 2, 3]]), np.array([[10, 20, 30], [40, 50, 60]])] 2978 2979 assert ( 2980 _vindex_merge(locations, values) 2981 == np.array([[40, 50, 60], [1, 2, 3], [10, 20, 30]]) 2982 ).all() 2983 2984 2985def test_vindex_identity(): 2986 rng = da.random.RandomState(42) 2987 a, b = 10, 20 2988 2989 x = rng.random(a, chunks=a // 2) 2990 assert x is x.vindex[:] 2991 assert x is x.vindex[:a] 2992 pytest.raises(IndexError, lambda: x.vindex[: a - 1]) 2993 pytest.raises(IndexError, lambda: x.vindex[1:]) 2994 pytest.raises(IndexError, lambda: x.vindex[0:a:2]) 2995 2996 x = rng.random((a, b), chunks=(a // 2, b // 2)) 2997 assert x is x.vindex[:, :] 2998 assert x is x.vindex[:a, :b] 2999 pytest.raises(IndexError, lambda: x.vindex[:, : b - 1]) 3000 pytest.raises(IndexError, lambda: x.vindex[:, 1:]) 3001 pytest.raises(IndexError, lambda: x.vindex[:, 0:b:2]) 3002 3003 3004def test_empty_array(): 3005 assert_eq(np.arange(0), da.arange(0, chunks=5)) 3006 3007 3008def test_memmap(): 3009 with tmpfile("npy") as fn_1: 3010 with tmpfile("npy") as fn_2: 3011 try: 3012 x = da.arange(100, chunks=15) 3013 target = np.memmap(fn_1, shape=x.shape, mode="w+", dtype=x.dtype) 3014 3015 x.store(target) 3016 3017 assert_eq(target, x, check_type=False) 3018 3019 np.save(fn_2, target) 3020 3021 assert_eq(np.load(fn_2, mmap_mode="r"), x, check_type=False) 3022 finally: 3023 target._mmap.close() 3024 3025 3026def test_to_npy_stack(): 3027 x = np.arange(5 * 10 * 10).reshape((5, 10, 10)) 3028 d = da.from_array(x, chunks=(2, 4, 4)) 3029 3030 with tmpdir() as dirname: 3031 stackdir = os.path.join(dirname, "test") 3032 da.to_npy_stack(stackdir, d, axis=0) 3033 assert os.path.exists(os.path.join(stackdir, "0.npy")) 3034 assert (np.load(os.path.join(stackdir, "1.npy")) == x[2:4]).all() 3035 3036 e = da.from_npy_stack(stackdir) 3037 assert_eq(d, e) 3038 3039 3040def test_view(): 3041 x = np.arange(56).reshape((7, 8)) 3042 d = da.from_array(x, chunks=(2, 3)) 3043 3044 assert_eq(x.view(), d.view()) 3045 assert_eq(x.view("i4"), d.view("i4")) 3046 assert_eq(x.view("i2"), d.view("i2")) 3047 assert all(isinstance(s, int) for s in d.shape) 3048 3049 x = np.arange(8, dtype="i1") 3050 d = da.from_array(x, chunks=(4,)) 3051 assert_eq(x.view("i4"), d.view("i4")) 3052 3053 with pytest.raises(ValueError): 3054 x = np.arange(8, dtype="i1") 3055 d = da.from_array(x, chunks=(3,)) 3056 d.view("i4") 3057 3058 with pytest.raises(ValueError): 3059 d.view("i4", order="asdf") 3060 3061 3062def test_view_fortran(): 3063 x = np.asfortranarray(np.arange(64).reshape((8, 8))) 3064 d = da.from_array(x, chunks=(2, 3)) 3065 assert_eq(x.T.view("i4").T, d.view("i4", order="F")) 3066 assert_eq(x.T.view("i2").T, d.view("i2", order="F")) 3067 3068 3069def test_h5py_tokenize(): 3070 h5py = pytest.importorskip("h5py") 3071 with tmpfile("hdf5") as fn1: 3072 with tmpfile("hdf5") as fn2: 3073 f = h5py.File(fn1, mode="a") 3074 g = h5py.File(fn2, mode="a") 3075 3076 f["x"] = np.arange(10).astype(float) 3077 g["x"] = np.ones(10).astype(float) 3078 3079 x1 = f["x"] 3080 x2 = g["x"] 3081 3082 assert tokenize(x1) != tokenize(x2) 3083 3084 3085def test_map_blocks_with_changed_dimension(): 3086 x = np.arange(56).reshape((7, 8)) 3087 d = da.from_array(x, chunks=(7, 4)) 3088 3089 e = d.map_blocks(lambda b: b.sum(axis=0), chunks=(4,), drop_axis=0, dtype=d.dtype) 3090 assert e.chunks == ((4, 4),) 3091 assert_eq(e, x.sum(axis=0)) 3092 3093 # Provided chunks have wrong shape 3094 with pytest.raises(ValueError): 3095 d.map_blocks(lambda b: b.sum(axis=0), chunks=(), drop_axis=0) 3096 3097 with pytest.raises(ValueError): 3098 d.map_blocks(lambda b: b.sum(axis=0), chunks=((4, 4, 4),), drop_axis=0) 3099 3100 with pytest.raises(ValueError): 3101 d.map_blocks(lambda b: b.sum(axis=1), chunks=((3, 4),), drop_axis=1) 3102 3103 d = da.from_array(x, chunks=(4, 8)) 3104 e = d.map_blocks(lambda b: b.sum(axis=1), drop_axis=1, dtype=d.dtype) 3105 assert e.chunks == ((4, 3),) 3106 assert_eq(e, x.sum(axis=1)) 3107 3108 x = np.arange(64).reshape((8, 8)) 3109 d = da.from_array(x, chunks=(4, 4)) 3110 e = d.map_blocks( 3111 lambda b: b[None, :, :, None], 3112 chunks=(1, 4, 4, 1), 3113 new_axis=[0, 3], 3114 dtype=d.dtype, 3115 ) 3116 assert e.chunks == ((1,), (4, 4), (4, 4), (1,)) 3117 assert_eq(e, x[None, :, :, None]) 3118 3119 e = d.map_blocks(lambda b: b[None, :, :, None], new_axis=[0, 3], dtype=d.dtype) 3120 assert e.chunks == ((1,), (4, 4), (4, 4), (1,)) 3121 assert_eq(e, x[None, :, :, None]) 3122 3123 # Adding axis with a gap 3124 with pytest.raises(ValueError): 3125 d.map_blocks(lambda b: b, new_axis=(3, 4)) 3126 3127 # Both new_axis and drop_axis 3128 d = da.from_array(x, chunks=(8, 4)) 3129 e = d.map_blocks( 3130 lambda b: b.sum(axis=0)[:, None, None], 3131 drop_axis=0, 3132 new_axis=(1, 2), 3133 dtype=d.dtype, 3134 ) 3135 assert e.chunks == ((4, 4), (1,), (1,)) 3136 assert_eq(e, x.sum(axis=0)[:, None, None]) 3137 3138 d = da.from_array(x, chunks=(4, 8)) 3139 e = d.map_blocks( 3140 lambda b: b.sum(axis=1)[:, None, None], 3141 drop_axis=1, 3142 new_axis=(1, 2), 3143 dtype=d.dtype, 3144 ) 3145 assert e.chunks == ((4, 4), (1,), (1,)) 3146 assert_eq(e, x.sum(axis=1)[:, None, None]) 3147 3148 3149def test_map_blocks_with_negative_drop_axis(): 3150 x = np.arange(56).reshape((7, 8)) 3151 d = da.from_array(x, chunks=(7, 4)) 3152 3153 for drop_axis in [0, -2]: 3154 # test with equivalent positive and negative drop_axis 3155 e = d.map_blocks( 3156 lambda b: b.sum(axis=0), chunks=(4,), drop_axis=drop_axis, dtype=d.dtype 3157 ) 3158 assert e.chunks == ((4, 4),) 3159 assert_eq(e, x.sum(axis=0)) 3160 3161 3162def test_map_blocks_with_invalid_drop_axis(): 3163 x = np.arange(56).reshape((7, 8)) 3164 d = da.from_array(x, chunks=(7, 4)) 3165 3166 for drop_axis in [x.ndim, -x.ndim - 1]: 3167 with pytest.raises(ValueError): 3168 d.map_blocks( 3169 lambda b: b.sum(axis=0), chunks=(4,), drop_axis=drop_axis, dtype=d.dtype 3170 ) 3171 3172 3173def test_map_blocks_with_changed_dimension_and_broadcast_chunks(): 3174 # https://github.com/dask/dask/issues/4299 3175 a = da.from_array([1, 2, 3], 3) 3176 b = da.from_array(np.array([0, 1, 2, 0, 1, 2]), chunks=3) 3177 result = da.map_blocks(operator.add, a, b, chunks=b.chunks) 3178 expected = da.from_array(np.array([1, 3, 5, 1, 3, 5]), chunks=3) 3179 assert_eq(result, expected) 3180 3181 3182def test_broadcast_chunks(): 3183 assert broadcast_chunks() == () 3184 3185 assert broadcast_chunks(((2, 3),)) == ((2, 3),) 3186 3187 assert broadcast_chunks(((5, 5),), ((5, 5),)) == ((5, 5),) 3188 3189 a = ((10, 10, 10), (5, 5)) 3190 b = ((5, 5),) 3191 assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5)) 3192 assert broadcast_chunks(b, a) == ((10, 10, 10), (5, 5)) 3193 3194 a = ((10, 10, 10), (5, 5)) 3195 b = ((1,), (5, 5)) 3196 assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5)) 3197 3198 a = ((10, 10, 10), (5, 5)) 3199 b = ((3, 3), (5, 5)) 3200 with pytest.raises(ValueError): 3201 broadcast_chunks(a, b) 3202 3203 a = ((1,), (5, 5)) 3204 b = ((1,), (5, 5)) 3205 assert broadcast_chunks(a, b) == a 3206 3207 a = ((1,), (np.nan, np.nan, np.nan)) 3208 b = ((3, 3), (1,)) 3209 r = broadcast_chunks(a, b) 3210 assert r[0] == b[0] and np.allclose(r[1], a[1], equal_nan=True) 3211 3212 a = ((3, 3), (1,)) 3213 b = ((1,), (np.nan, np.nan, np.nan)) 3214 r = broadcast_chunks(a, b) 3215 assert r[0] == a[0] and np.allclose(r[1], b[1], equal_nan=True) 3216 3217 a = ((3, 3), (5, 5)) 3218 b = ((1,), (np.nan, np.nan, np.nan)) 3219 with pytest.raises(ValueError): 3220 broadcast_chunks(a, b) 3221 3222 3223def test_chunks_error(): 3224 x = np.ones((10, 10)) 3225 with pytest.raises(ValueError): 3226 da.from_array(x, chunks=(5,)) 3227 3228 3229def test_array_compute_forward_kwargs(): 3230 x = da.arange(10, chunks=2).sum() 3231 x.compute(bogus_keyword=10) 3232 3233 3234def test_dont_fuse_outputs(): 3235 dsk = {("x", 0): np.array([1, 2]), ("x", 1): (inc, ("x", 0))} 3236 3237 a = da.Array(dsk, "x", chunks=(2,), shape=(4,), dtype=np.array([1]).dtype) 3238 assert_eq(a, np.array([1, 2, 2, 3], dtype=a.dtype)) 3239 3240 3241def test_dont_dealias_outputs(): 3242 dsk = { 3243 ("x", 0, 0): np.ones((2, 2)), 3244 ("x", 0, 1): np.ones((2, 2)), 3245 ("x", 1, 0): np.ones((2, 2)), 3246 ("x", 1, 1): ("x", 0, 0), 3247 } 3248 3249 a = da.Array(dsk, "x", chunks=(2, 2), shape=(4, 4), dtype=np.ones(1).dtype) 3250 assert_eq(a, np.ones((4, 4))) 3251 3252 3253def test_timedelta_op(): 3254 x = np.array([np.timedelta64(10, "h")]) 3255 y = np.timedelta64(1, "h") 3256 a = da.from_array(x, chunks=(1,)) / y 3257 assert a.compute() == x / y 3258 3259 3260def test_to_delayed(): 3261 x = da.random.random((4, 4), chunks=(2, 2)) 3262 y = x + 10 3263 3264 [[a, b], [c, d]] = y.to_delayed() 3265 assert_eq(a.compute(), y[:2, :2]) 3266 3267 s = 2 3268 x = da.from_array(np.array(s), chunks=0) 3269 a = x.to_delayed()[tuple()] 3270 assert a.compute() == s 3271 3272 3273def test_to_delayed_optimize_graph(): 3274 x = da.ones((4, 4), chunks=(2, 2)) 3275 y = x[1:][1:][1:][:, 1:][:, 1:][:, 1:] 3276 3277 # optimizations 3278 d = y.to_delayed().flatten().tolist()[0] 3279 assert len([k for k in d.dask if k[0].startswith("getitem")]) == 1 3280 3281 # no optimizations 3282 d2 = y.to_delayed(optimize_graph=False).flatten().tolist()[0] 3283 assert dict(d2.dask) == dict(y.dask) 3284 3285 assert (d.compute() == d2.compute()).all() 3286 3287 3288def test_cumulative(): 3289 x = da.arange(20, chunks=5) 3290 assert_eq(x.cumsum(axis=0), np.arange(20).cumsum()) 3291 assert_eq(x.cumprod(axis=0), np.arange(20).cumprod()) 3292 3293 assert_eq(da.nancumsum(x, axis=0), nancumsum(np.arange(20))) 3294 assert_eq(da.nancumprod(x, axis=0), nancumprod(np.arange(20))) 3295 3296 a = np.random.random(20) 3297 rs = np.random.RandomState(0) 3298 a[rs.rand(*a.shape) < 0.5] = np.nan 3299 x = da.from_array(a, chunks=5) 3300 assert_eq(da.nancumsum(x, axis=0), nancumsum(a)) 3301 assert_eq(da.nancumprod(x, axis=0), nancumprod(a)) 3302 3303 a = np.random.random((20, 24)) 3304 x = da.from_array(a, chunks=(6, 5)) 3305 assert_eq(x.cumsum(axis=0), a.cumsum(axis=0)) 3306 assert_eq(x.cumsum(axis=1), a.cumsum(axis=1)) 3307 assert_eq(x.cumprod(axis=0), a.cumprod(axis=0)) 3308 assert_eq(x.cumprod(axis=1), a.cumprod(axis=1)) 3309 3310 assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0)) 3311 assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1)) 3312 assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0)) 3313 assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1)) 3314 3315 a = np.random.random((20, 24)) 3316 rs = np.random.RandomState(0) 3317 a[rs.rand(*a.shape) < 0.5] = np.nan 3318 x = da.from_array(a, chunks=(6, 5)) 3319 assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0)) 3320 assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1)) 3321 assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0)) 3322 assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1)) 3323 3324 a = np.random.random((20, 24, 13)) 3325 x = da.from_array(a, chunks=(6, 5, 4)) 3326 for axis in [0, 1, 2, -1, -2, -3]: 3327 assert_eq(x.cumsum(axis=axis), a.cumsum(axis=axis)) 3328 assert_eq(x.cumprod(axis=axis), a.cumprod(axis=axis)) 3329 3330 assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis)) 3331 assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis)) 3332 3333 a = np.random.random((20, 24, 13)) 3334 rs = np.random.RandomState(0) 3335 a[rs.rand(*a.shape) < 0.5] = np.nan 3336 x = da.from_array(a, chunks=(6, 5, 4)) 3337 for axis in [0, 1, 2, -1, -2, -3]: 3338 assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis)) 3339 assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis)) 3340 3341 with pytest.raises(ValueError): 3342 x.cumsum(axis=3) 3343 3344 with pytest.raises(ValueError): 3345 x.cumsum(axis=-4) 3346 3347 3348def test_from_delayed(): 3349 v = delayed(np.ones)((5, 3)) 3350 x = from_delayed(v, shape=(5, 3), dtype=np.ones(0).dtype) 3351 assert isinstance(x, Array) 3352 assert_eq(x, np.ones((5, 3))) 3353 3354 3355def test_from_delayed_meta(): 3356 v = delayed(np.ones)((5, 3)) 3357 x = from_delayed(v, shape=(5, 3), meta=np.ones(0)) 3358 assert isinstance(x, Array) 3359 assert isinstance(x._meta, np.ndarray) 3360 3361 3362def test_A_property(): 3363 x = da.ones(5, chunks=(2,)) 3364 assert x.A is x 3365 3366 3367def test_copy_mutate(): 3368 x = da.arange(5, chunks=(2,)) 3369 y = x.copy() 3370 memo = {} 3371 y2 = copy.deepcopy(x, memo=memo) 3372 x[x % 2 == 0] = -1 3373 3374 xx = np.arange(5) 3375 xx[xx % 2 == 0] = -1 3376 assert_eq(x, xx) 3377 3378 assert_eq(y, np.arange(5)) 3379 assert_eq(y2, np.arange(5)) 3380 assert memo[id(x)] is y2 3381 3382 3383def test_npartitions(): 3384 assert da.ones(5, chunks=(2,)).npartitions == 3 3385 assert da.ones((5, 5), chunks=(2, 3)).npartitions == 6 3386 3387 3388def test_astype_gh1151(): 3389 a = np.arange(5).astype(np.int32) 3390 b = da.from_array(a, (1,)) 3391 assert_eq(a.astype(np.int16), b.astype(np.int16)) 3392 3393 3394def test_elemwise_name(): 3395 assert (da.ones(5, chunks=2) + 1).name.startswith("add-") 3396 3397 3398def test_map_blocks_name(): 3399 assert da.ones(5, chunks=2).map_blocks(inc).name.startswith("inc-") 3400 3401 3402def test_from_array_names(): 3403 pytest.importorskip("distributed") 3404 3405 x = np.ones(10) 3406 d = da.from_array(x, chunks=2) 3407 3408 names = countby(key_split, d.dask) 3409 assert set(names.values()) == {5} 3410 3411 3412@pytest.mark.parametrize( 3413 "array", 3414 [ 3415 da.arange(100, chunks=25), 3416 da.ones((10, 10), chunks=25), 3417 ], 3418) 3419def test_array_picklable(array): 3420 from pickle import dumps, loads 3421 3422 a2 = loads(dumps(array)) 3423 assert_eq(array, a2) 3424 3425 3426def test_from_array_raises_on_bad_chunks(): 3427 x = np.ones(10) 3428 3429 with pytest.raises(ValueError): 3430 da.from_array(x, chunks=(5, 5, 5)) 3431 3432 # with pytest.raises(ValueError): 3433 # da.from_array(x, chunks=100) 3434 3435 with pytest.raises(ValueError): 3436 da.from_array(x, chunks=((5, 5, 5),)) 3437 3438 3439def test_concatenate_axes(): 3440 x = np.ones((2, 2, 2)) 3441 3442 assert_eq(concatenate_axes([x, x], axes=[0]), np.ones((4, 2, 2))) 3443 assert_eq(concatenate_axes([x, x, x], axes=[0]), np.ones((6, 2, 2))) 3444 assert_eq(concatenate_axes([x, x], axes=[1]), np.ones((2, 4, 2))) 3445 assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 1]), np.ones((4, 4, 2))) 3446 assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 2]), np.ones((4, 2, 4))) 3447 assert_eq(concatenate_axes([[x, x, x], [x, x, x]], axes=[1, 2]), np.ones((2, 4, 6))) 3448 3449 with pytest.raises(ValueError): 3450 concatenate_axes( 3451 [[x, x], [x, x]], axes=[0] 3452 ) # not all nested lists accounted for 3453 with pytest.raises(ValueError): 3454 concatenate_axes([x, x], axes=[0, 1, 2, 3]) # too many axes 3455 3456 3457def test_blockwise_concatenate(): 3458 x = da.ones((4, 4, 4), chunks=(2, 2, 2)) 3459 y = da.ones((4, 4), chunks=(2, 2)) 3460 3461 def f(a, b): 3462 assert isinstance(a, np.ndarray) 3463 assert isinstance(b, np.ndarray) 3464 3465 assert a.shape == (2, 4, 4) 3466 assert b.shape == (4, 4) 3467 3468 return (a + b).sum(axis=(1, 2)) 3469 3470 z = da.blockwise(f, "i", x, "ijk", y, "jk", concatenate=True, dtype=x.dtype) 3471 assert_eq(z, np.ones(4) * 32) 3472 3473 z = da.blockwise(add, "ij", y, "ij", y, "ij", concatenate=True, dtype=x.dtype) 3474 assert_eq(z, np.ones((4, 4)) * 2) 3475 3476 def f(a, b, c): 3477 assert isinstance(a, np.ndarray) 3478 assert isinstance(b, np.ndarray) 3479 assert isinstance(c, np.ndarray) 3480 3481 assert a.shape == (4, 2, 4) 3482 assert b.shape == (4, 4) 3483 assert c.shape == (4, 2) 3484 3485 return np.ones(2) 3486 3487 z = da.blockwise( 3488 f, "j", x, "ijk", y, "ki", y, "ij", concatenate=True, dtype=x.dtype 3489 ) 3490 assert_eq(z, np.ones(4), check_shape=False) 3491 3492 3493def test_common_blockdim(): 3494 assert common_blockdim([(5,), (5,)]) == (5,) 3495 assert common_blockdim([(5,), (2, 3)]) == (2, 3) 3496 assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5) 3497 assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5) 3498 assert common_blockdim([(5, 2, 3), (2, 3, 5)]) == (2, 3, 2, 3) 3499 3500 assert common_blockdim([(1, 2), (2, 1)]) == (1, 1, 1) 3501 assert common_blockdim([(1, 2, 2), (2, 1, 2), (2, 2, 1)]) == (1, 1, 1, 1, 1) 3502 3503 3504def test_uneven_chunks_that_fit_neatly(): 3505 x = da.arange(10, chunks=((5, 5),)) 3506 y = da.ones(10, chunks=((5, 2, 3),)) 3507 3508 assert_eq(x + y, np.arange(10) + np.ones(10)) 3509 3510 z = x + y 3511 assert z.chunks == ((5, 2, 3),) 3512 3513 3514def test_elemwise_uneven_chunks(): 3515 x = da.arange(10, chunks=((4, 6),)) 3516 y = da.ones(10, chunks=((6, 4),)) 3517 3518 assert_eq(x + y, np.arange(10) + np.ones(10)) 3519 3520 z = x + y 3521 assert z.chunks == ((4, 2, 4),) 3522 3523 x = da.random.random((10, 10), chunks=((4, 6), (5, 2, 3))) 3524 y = da.random.random((4, 10, 10), chunks=((2, 2), (6, 4), (2, 3, 5))) 3525 3526 z = x + y 3527 assert_eq(x + y, x.compute() + y.compute()) 3528 assert z.chunks == ((2, 2), (4, 2, 4), (2, 3, 2, 3)) 3529 3530 3531def test_uneven_chunks_blockwise(): 3532 x = da.random.random((10, 10), chunks=((2, 3, 2, 3), (5, 5))) 3533 y = da.random.random((10, 10), chunks=((4, 4, 2), (4, 2, 4))) 3534 z = da.blockwise(np.dot, "ik", x, "ij", y, "jk", dtype=x.dtype, concatenate=True) 3535 assert z.chunks == (x.chunks[0], y.chunks[1]) 3536 3537 assert_eq(z, x.compute().dot(y)) 3538 3539 3540def test_warn_bad_rechunking(): 3541 x = da.ones((20, 20), chunks=(20, 1)) 3542 y = da.ones((20, 20), chunks=(1, 20)) 3543 3544 with pytest.warns(da.core.PerformanceWarning, match="factor of 20"): 3545 x + y 3546 3547 3548def test_concatenate_stack_dont_warn(): 3549 with warnings.catch_warnings(record=True) as record: 3550 da.concatenate([da.ones(2, chunks=1)] * 62) 3551 assert not record 3552 3553 with warnings.catch_warnings(record=True) as record: 3554 da.stack([da.ones(2, chunks=1)] * 62) 3555 assert not record 3556 3557 3558def test_map_blocks_delayed(): 3559 x = da.ones((10, 10), chunks=(5, 5)) 3560 y = np.ones((5, 5)) 3561 3562 z = x.map_blocks(add, y, dtype=x.dtype) 3563 3564 yy = delayed(y) 3565 zz = x.map_blocks(add, yy, dtype=x.dtype) 3566 3567 assert_eq(z, zz) 3568 3569 assert yy.key in zz.dask 3570 3571 3572def test_no_chunks(): 3573 X = np.arange(11) 3574 dsk = {("x", 0): np.arange(5), ("x", 1): np.arange(5, 11)} 3575 x = Array(dsk, "x", ((np.nan, np.nan),), np.arange(1).dtype) 3576 assert_eq(x + 1, X + 1) 3577 assert_eq(x.sum(), X.sum()) 3578 assert_eq((x + 1).std(), (X + 1).std()) 3579 assert_eq((x + x).std(), (X + X).std()) 3580 assert_eq((x + x).std(keepdims=True), (X + X).std(keepdims=True)) 3581 3582 3583def test_no_chunks_2d(): 3584 X = np.arange(24).reshape((4, 6)) 3585 x = da.from_array(X, chunks=(2, 2)) 3586 x._chunks = ((np.nan, np.nan), (np.nan, np.nan, np.nan)) 3587 3588 with pytest.warns(None): # zero division warning 3589 assert_eq(da.log(x), np.log(X)) 3590 assert_eq(x.T, X.T) 3591 assert_eq(x.sum(axis=0, keepdims=True), X.sum(axis=0, keepdims=True)) 3592 assert_eq(x.sum(axis=1, keepdims=True), X.sum(axis=1, keepdims=True)) 3593 assert_eq(x.dot(x.T + 1), X.dot(X.T + 1)) 3594 3595 3596def test_no_chunks_yes_chunks(): 3597 X = np.arange(24).reshape((4, 6)) 3598 x = da.from_array(X, chunks=(2, 2)) 3599 x._chunks = ((2, 2), (np.nan, np.nan, np.nan)) 3600 3601 assert (x + 1).chunks == ((2, 2), (np.nan, np.nan, np.nan)) 3602 assert (x.T).chunks == ((np.nan, np.nan, np.nan), (2, 2)) 3603 assert (x.dot(x.T)).chunks == ((2, 2), (2, 2)) 3604 3605 3606def test_raise_informative_errors_no_chunks(): 3607 X = np.arange(10) 3608 a = da.from_array(X, chunks=(5, 5)) 3609 a._chunks = ((np.nan, np.nan),) 3610 3611 b = da.from_array(X, chunks=(4, 4, 2)) 3612 b._chunks = ((np.nan, np.nan, np.nan),) 3613 3614 for op in [ 3615 lambda: a + b, 3616 lambda: a[1], 3617 lambda: a[::2], 3618 lambda: a[-5], 3619 lambda: a.rechunk(3), 3620 lambda: a.reshape(2, 5), 3621 ]: 3622 with pytest.raises(ValueError) as e: 3623 op() 3624 if "chunk" not in str(e.value) or "unknown" not in str(e.value): 3625 op() 3626 3627 3628def test_no_chunks_slicing_2d(): 3629 X = np.arange(24).reshape((4, 6)) 3630 x = da.from_array(X, chunks=(2, 2)) 3631 x._chunks = ((2, 2), (np.nan, np.nan, np.nan)) 3632 3633 assert_eq(x[0], X[0]) 3634 3635 for op in [lambda: x[:, 4], lambda: x[:, ::2], lambda: x[0, 2:4]]: 3636 with pytest.raises(ValueError, match="chunk sizes are unknown"): 3637 op() 3638 3639 3640def test_index_array_with_array_1d(): 3641 x = np.arange(10) 3642 dx = da.from_array(x, chunks=(5,)) 3643 dx._chunks = ((np.nan, np.nan),) 3644 3645 assert_eq(x[x > 6], dx[dx > 6]) 3646 assert_eq(x[x % 2 == 0], dx[dx % 2 == 0]) 3647 3648 dy = da.ones(11, chunks=(3,)) 3649 3650 with pytest.raises(ValueError): 3651 dx[dy > 5] 3652 3653 3654def test_index_array_with_array_2d(): 3655 x = np.arange(24).reshape((4, 6)) 3656 dx = da.from_array(x, chunks=(2, 2)) 3657 3658 assert_eq(x[x > 6], dx[dx > 6]) 3659 assert_eq(x[x % 2 == 0], dx[dx % 2 == 0]) 3660 3661 # Test with unknown chunks 3662 dx._chunks = ((2, 2), (np.nan, np.nan, np.nan)) 3663 3664 with pytest.warns(UserWarning, match="different ordering") as record: 3665 assert sorted(x[x % 2 == 0].tolist()) == sorted( 3666 dx[dx % 2 == 0].compute().tolist() 3667 ) 3668 assert sorted(x[x > 6].tolist()) == sorted(dx[dx > 6].compute().tolist()) 3669 3670 assert len(record) == 2 3671 3672 3673@pytest.mark.xfail(reason="Chunking does not align well") 3674def test_index_array_with_array_3d_2d(): 3675 x = np.arange(4 ** 3).reshape((4, 4, 4)) 3676 dx = da.from_array(x, chunks=(2, 2, 2)) 3677 3678 ind = np.random.random((4, 4)) > 0.5 3679 ind = np.arange(4 ** 2).reshape((4, 4)) % 2 == 0 3680 dind = da.from_array(ind, (2, 2)) 3681 3682 assert_eq(x[ind], dx[dind]) 3683 assert_eq(x[:, ind], dx[:, dind]) 3684 3685 3686def test_setitem_1d(): 3687 x = np.arange(10) 3688 dx = da.from_array(x.copy(), chunks=(5,)) 3689 3690 x[x > 6] = -1 3691 x[x % 2 == 0] = -2 3692 3693 dx[dx > 6] = -1 3694 dx[dx % 2 == 0] = -2 3695 3696 assert_eq(x, dx) 3697 3698 3699def test_setitem_2d(): 3700 x = np.arange(24).reshape((4, 6)) 3701 dx = da.from_array(x.copy(), chunks=(2, 2)) 3702 3703 x[x > 6] = -1 3704 x[x % 2 == 0] = -2 3705 3706 dx[dx > 6] = -1 3707 dx[dx % 2 == 0] = -2 3708 3709 assert_eq(x, dx) 3710 3711 3712def test_setitem_extended_API_0d(): 3713 # 0-d array 3714 x = np.array(9) 3715 dx = da.from_array(9) 3716 3717 x[()] = -1 3718 dx[()] = -1 3719 assert_eq(x, dx.compute()) 3720 3721 x[...] = -11 3722 dx[...] = -11 3723 assert_eq(x, dx.compute()) 3724 3725 3726def test_setitem_extended_API_1d(): 3727 # 1-d array 3728 x = np.arange(10) 3729 dx = da.from_array(x.copy(), chunks=(4, 6)) 3730 3731 x[2:8:2] = -1 3732 dx[2:8:2] = -1 3733 assert_eq(x, dx.compute()) 3734 3735 x[...] = -11 3736 dx[...] = -11 3737 assert_eq(x, dx.compute()) 3738 3739 3740@pytest.mark.parametrize( 3741 "index, value", 3742 [ 3743 [Ellipsis, -1], 3744 [(slice(None, None, 2), slice(None, None, -1)), -1], 3745 [slice(1, None, 2), -1], 3746 [[4, 3, 1], -1], 3747 [(Ellipsis, 4), -1], 3748 [5, -1], 3749 [(slice(None), 2), range(6)], 3750 [3, range(10)], 3751 [(slice(None), [3, 5, 6]), [-30, -31, -32]], 3752 [([-1, 0, 1], 2), [-30, -31, -32]], 3753 [(slice(None, 2), slice(None, 3)), [-50, -51, -52]], 3754 [(slice(None), [6, 1, 3]), [-60, -61, -62]], 3755 [(slice(1, 3), slice(1, 4)), [[-70, -71, -72]]], 3756 [(slice(None), [9, 8, 8]), [-80, -81, 91]], 3757 [([True, False, False, False, True, False], 2), -1], 3758 [(3, [True, True, False, True, True, False, True, False, True, True]), -1], 3759 [(np.array([False, False, True, True, False, False]), slice(5, 7)), -1], 3760 [ 3761 ( 3762 4, 3763 da.from_array( 3764 [False, False, True, True, False, False, True, False, False, True] 3765 ), 3766 ), 3767 -1, 3768 ], 3769 ], 3770) 3771def test_setitem_extended_API_2d(index, value): 3772 # 2-d array 3773 x = np.ma.arange(60).reshape((6, 10)) 3774 dx = da.from_array(x, chunks=(2, 3)) 3775 dx[index] = value 3776 x[index] = value 3777 assert_eq(x, dx.compute()) 3778 3779 3780def test_setitem_extended_API_2d_rhs_func_of_lhs(): 3781 # Cases: 3782 # * RHS and/or indices are a function of the LHS 3783 # * Indices have unknown chunk sizes 3784 # * RHS has extra leading size 1 dimensions compared to LHS 3785 x = np.arange(60).reshape((6, 10)) 3786 chunks = (2, 3) 3787 3788 dx = da.from_array(x, chunks=chunks) 3789 dx[2:4, dx[0] > 3] = -5 3790 x[2:4, x[0] > 3] = -5 3791 assert_eq(x, dx.compute()) 3792 3793 dx = da.from_array(x, chunks=chunks) 3794 dx[2, dx[0] < -2] = -7 3795 x[2, x[0] < -2] = -7 3796 assert_eq(x, dx.compute()) 3797 3798 dx = da.from_array(x, chunks=chunks) 3799 dx[dx % 2 == 0] = -8 3800 x[x % 2 == 0] = -8 3801 assert_eq(x, dx.compute()) 3802 3803 dx = da.from_array(x, chunks=chunks) 3804 dx[dx % 2 == 0] = -8 3805 x[x % 2 == 0] = -8 3806 assert_eq(x, dx.compute()) 3807 3808 dx = da.from_array(x, chunks=chunks) 3809 dx[3:5, 5:1:-2] = -dx[:2, 4:1:-2] 3810 x[3:5, 5:1:-2] = -x[:2, 4:1:-2] 3811 assert_eq(x, dx.compute()) 3812 3813 dx = da.from_array(x, chunks=chunks) 3814 dx[0, 1:3] = -dx[0, 4:2:-1] 3815 x[0, 1:3] = -x[0, 4:2:-1] 3816 assert_eq(x, dx.compute()) 3817 3818 dx = da.from_array(x, chunks=chunks) 3819 dx[...] = dx 3820 x[...] = x 3821 assert_eq(x, dx.compute()) 3822 3823 dx = da.from_array(x, chunks=chunks) 3824 dx[...] = dx[...] 3825 x[...] = x[...] 3826 assert_eq(x, dx.compute()) 3827 3828 dx = da.from_array(x, chunks=chunks) 3829 dx[0] = dx[-1] 3830 x[0] = x[-1] 3831 assert_eq(x, dx.compute()) 3832 3833 dx = da.from_array(x, chunks=chunks) 3834 dx[0, :] = dx[-2, :] 3835 x[0, :] = x[-2, :] 3836 assert_eq(x, dx.compute()) 3837 3838 dx = da.from_array(x, chunks=chunks) 3839 dx[:, 1] = dx[:, -3] 3840 x[:, 1] = x[:, -3] 3841 assert_eq(x, dx.compute()) 3842 3843 index = da.from_array([0, 2], chunks=(2,)) 3844 dx = da.from_array(x, chunks=chunks) 3845 dx[index, 8] = [99, 88] 3846 x[[0, 2], 8] = [99, 88] 3847 assert_eq(x, dx.compute()) 3848 3849 dx = da.from_array(x, chunks=chunks) 3850 dx[:, index] = dx[:, :2] 3851 x[:, [0, 2]] = x[:, :2] 3852 assert_eq(x, dx.compute()) 3853 3854 index = da.where(da.arange(3, chunks=(1,)) < 2)[0] 3855 dx = da.from_array(x, chunks=chunks) 3856 dx[index, 7] = [-23, -33] 3857 x[index.compute(), 7] = [-23, -33] 3858 assert_eq(x, dx.compute()) 3859 3860 index = da.where(da.arange(3, chunks=(1,)) < 2)[0] 3861 dx = da.from_array(x, chunks=chunks) 3862 dx[(index,)] = -34 3863 x[(index.compute(),)] = -34 3864 assert_eq(x, dx.compute()) 3865 3866 index = index - 4 3867 dx = da.from_array(x, chunks=chunks) 3868 dx[index, 7] = [-43, -53] 3869 x[index.compute(), 7] = [-43, -53] 3870 assert_eq(x, dx.compute()) 3871 3872 index = da.from_array([0, -1], chunks=(1,)) 3873 x[[0, -1]] = 9999 3874 dx[(index,)] = 9999 3875 assert_eq(x, dx.compute()) 3876 3877 dx = da.from_array(x, chunks=(-1, -1)) 3878 dx[...] = da.from_array(x, chunks=chunks) 3879 assert_eq(x, dx.compute()) 3880 3881 # RHS has extra leading size 1 dimensions compared to LHS 3882 dx = da.from_array(x.copy(), chunks=(2, 3)) 3883 v = x.reshape((1, 1) + x.shape) 3884 x[...] = v 3885 dx[...] = v 3886 assert_eq(x, dx.compute()) 3887 3888 index = da.where(da.arange(3, chunks=(1,)) < 2)[0] 3889 v = -np.arange(12).reshape(1, 1, 6, 2) 3890 x[:, [0, 1]] = v 3891 dx[:, index] = v 3892 assert_eq(x, dx.compute()) 3893 3894 3895@pytest.mark.parametrize( 3896 "index, value", 3897 [ 3898 [(1, slice(1, 7, 2)), np.ma.masked], 3899 [(slice(1, 5, 2), [7, 5]), np.ma.masked_all((2, 2))], 3900 ], 3901) 3902def test_setitem_extended_API_2d_mask(index, value): 3903 x = np.ma.arange(60).reshape((6, 10)) 3904 dx = da.from_array(x.data, chunks=(2, 3)) 3905 dx[index] = value 3906 x[index] = value 3907 dx = dx.persist() 3908 assert_eq(x, dx.compute()) 3909 assert_eq(x.mask, da.ma.getmaskarray(dx).compute()) 3910 3911 3912def test_setitem_on_read_only_blocks(): 3913 # Outputs of broadcast_trick-style functions contain read-only 3914 # arrays 3915 dx = da.empty((4, 6), dtype=float, chunks=(2, 2)) 3916 dx[0] = 99 3917 3918 assert_eq(dx[0, 0], 99.0) 3919 3920 dx[0:2] = 88 3921 3922 assert_eq(dx[0, 0], 88.0) 3923 3924 3925def test_setitem_errs(): 3926 x = da.ones((4, 4), chunks=(2, 2)) 3927 3928 with pytest.raises(ValueError): 3929 x[x > 1] = x 3930 3931 # Shape mismatch 3932 with pytest.raises(ValueError): 3933 x[[True, True, False, False], 0] = [2, 3, 4] 3934 3935 with pytest.raises(ValueError): 3936 x[[True, True, True, False], 0] = [2, 3] 3937 3938 x = da.ones((4, 4), chunks=(2, 2)) 3939 with pytest.raises(ValueError): 3940 x[0, da.from_array([True, False, False, True])] = [2, 3, 4] 3941 3942 x = da.ones((4, 4), chunks=(2, 2)) 3943 with pytest.raises(ValueError): 3944 x[0, da.from_array([True, True, False, False])] = [2, 3, 4] 3945 3946 x = da.ones((4, 4), chunks=(2, 2)) 3947 with pytest.raises(ValueError): 3948 x[da.from_array([True, True, True, False]), 0] = [2, 3] 3949 3950 x = da.ones((4, 4), chunks=(2, 2)) 3951 3952 # Too many indices 3953 with pytest.raises(IndexError): 3954 x[:, :, :] = 2 3955 3956 # 2-d boolean indexing a single dimension 3957 with pytest.raises(IndexError): 3958 x[[[True, True, False, False]], 0] = 5 3959 3960 # Too many/not enough booleans 3961 with pytest.raises(IndexError): 3962 x[[True, True, False]] = 5 3963 3964 with pytest.raises(IndexError): 3965 x[[False, True, True, True, False]] = 5 3966 3967 # 2-d indexing a single dimension 3968 with pytest.raises(IndexError): 3969 x[[[1, 2, 3]], 0] = 5 3970 3971 # Multiple 1-d boolean/integer arrays 3972 with pytest.raises(NotImplementedError): 3973 x[[1, 2], [2, 3]] = 6 3974 3975 with pytest.raises(NotImplementedError): 3976 x[[True, True, False, False], [2, 3]] = 5 3977 3978 with pytest.raises(NotImplementedError): 3979 x[[True, True, False, False], [False, True, False, False]] = 7 3980 3981 # scalar boolean indexing 3982 with pytest.raises(NotImplementedError): 3983 x[True] = 5 3984 3985 with pytest.raises(NotImplementedError): 3986 x[np.array(True)] = 5 3987 3988 with pytest.raises(NotImplementedError): 3989 x[0, da.from_array(True)] = 5 3990 3991 # Scalar arrays 3992 y = da.from_array(np.array(1)) 3993 with pytest.raises(IndexError): 3994 y[:] = 2 3995 3996 # RHS has non-brodacastable extra leading dimensions 3997 x = np.arange(12).reshape((3, 4)) 3998 dx = da.from_array(x, chunks=(2, 2)) 3999 with pytest.raises(ValueError): 4000 dx[...] = np.arange(24).reshape((2, 1, 3, 4)) 4001 4002 # RHS doesn't have chunks set 4003 dx = da.unique(da.random.random([10])) 4004 with pytest.raises(ValueError, match="Arrays chunk sizes are unknown"): 4005 dx[0] = 0 4006 4007 4008def test_zero_slice_dtypes(): 4009 x = da.arange(5, chunks=1) 4010 y = x[[]] 4011 assert y.dtype == x.dtype 4012 assert y.shape == (0,) 4013 assert_eq(x[[]], np.arange(5)[[]]) 4014 4015 4016def test_zero_sized_array_rechunk(): 4017 x = da.arange(5, chunks=1)[:0] 4018 y = da.blockwise(identity, "i", x, "i", dtype=x.dtype) 4019 assert_eq(x, y) 4020 4021 4022def test_blockwise_zero_shape(): 4023 da.blockwise( 4024 lambda x: x, 4025 "i", 4026 da.arange(10, chunks=10), 4027 "i", 4028 da.from_array(np.ones((0, 2)), ((0,), 2)), 4029 "ab", 4030 da.from_array(np.ones((0,)), ((0,),)), 4031 "a", 4032 dtype="float64", 4033 ) 4034 4035 4036def test_blockwise_zero_shape_new_axes(): 4037 da.blockwise( 4038 lambda x: np.ones(42), 4039 "i", 4040 da.from_array(np.ones((0, 2)), ((0,), 2)), 4041 "ab", 4042 da.from_array(np.ones((0,)), ((0,),)), 4043 "a", 4044 dtype="float64", 4045 new_axes={"i": 42}, 4046 ) 4047 4048 4049def test_broadcast_against_zero_shape(): 4050 assert_eq(da.arange(1, chunks=1)[:0] + 0, np.arange(1)[:0] + 0) 4051 assert_eq(da.arange(1, chunks=1)[:0] + 0.1, np.arange(1)[:0] + 0.1) 4052 assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0, np.ones((5, 5))[:0] + 0) 4053 assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0.1, np.ones((5, 5))[:0] + 0.1) 4054 assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0, np.ones((5, 5))[:, :0] + 0) 4055 assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0.1, np.ones((5, 5))[:, :0] + 0.1) 4056 4057 4058def test_from_array_name(): 4059 x = np.array([1, 2, 3, 4, 5]) 4060 chunks = x.shape 4061 # Default is tokenize the array 4062 dx = da.from_array(x, chunks=chunks) 4063 hashed_name = dx.name 4064 assert da.from_array(x, chunks=chunks).name == hashed_name 4065 # Specify name directly 4066 assert da.from_array(x, chunks=chunks, name="x").name == "x" 4067 # False gives a random name 4068 dx2 = da.from_array(x, chunks=chunks, name=False) 4069 dx3 = da.from_array(x, chunks=chunks, name=False) 4070 assert dx2.name != hashed_name 4071 assert dx3.name != hashed_name 4072 assert dx2.name != dx3.name 4073 4074 4075def test_concatenate_errs(): 4076 with pytest.raises(ValueError, match=r"Shapes.*\(2, 1\)"): 4077 da.concatenate( 4078 [da.zeros((2, 1), chunks=(2, 1)), da.zeros((2, 3), chunks=(2, 3))] 4079 ) 4080 4081 with pytest.raises(ValueError): 4082 da.concatenate( 4083 [da.zeros((1, 2), chunks=(1, 2)), da.zeros((3, 2), chunks=(3, 2))], axis=1 4084 ) 4085 4086 4087def test_stack_errs(): 4088 with pytest.raises(ValueError) as e: 4089 da.stack([da.zeros((2,), chunks=2)] * 10 + [da.zeros((3,), chunks=3)] * 10) 4090 4091 assert ( 4092 str(e.value) 4093 == "Stacked arrays must have the same shape. The first array had shape (2,), while array 11 has shape (3,)." 4094 ) 4095 assert len(str(e.value)) < 105 4096 4097 4098def test_blockwise_with_numpy_arrays(): 4099 x = np.ones(10) 4100 y = da.ones(10, chunks=(5,)) 4101 4102 assert_eq(x + y, x + x) 4103 4104 s = da.sum(x) 4105 assert any(x is v for v in s.dask.values()) 4106 4107 4108@pytest.mark.parametrize("chunks", (100, 6)) 4109@pytest.mark.parametrize("other", [[0, 0, 1], [2, 1, 3], (0, 0, 1)]) 4110def test_elemwise_with_lists(chunks, other): 4111 x = np.arange(12).reshape((4, 3)) 4112 d = da.arange(12, chunks=chunks).reshape((4, 3)) 4113 4114 x2 = np.vstack([x[:, 0], x[:, 1], x[:, 2]]).T 4115 d2 = da.vstack([d[:, 0], d[:, 1], d[:, 2]]).T 4116 4117 assert_eq(x2, d2) 4118 4119 x3 = x2 * other 4120 d3 = d2 * other 4121 4122 assert_eq(x3, d3) 4123 4124 4125def test_constructor_plugin(): 4126 L = [] 4127 L2 = [] 4128 with dask.config.set(array_plugins=[L.append, L2.append]): 4129 x = da.ones(10, chunks=5) 4130 y = x + 1 4131 4132 assert L == L2 == [x, y] 4133 4134 with dask.config.set(array_plugins=[lambda x: x.compute()]): 4135 x = da.ones(10, chunks=5) 4136 y = x + 1 4137 4138 assert isinstance(y, np.ndarray) 4139 assert len(L) == 2 4140 4141 4142def test_no_warnings_on_metadata(): 4143 x = da.ones(5, chunks=3) 4144 with warnings.catch_warnings(record=True) as record: 4145 da.arccos(x) 4146 4147 assert not record 4148 4149 4150def test_delayed_array_key_hygeine(): 4151 a = da.zeros((1,), chunks=(1,)) 4152 d = delayed(identity)(a) 4153 b = da.from_delayed(d, shape=a.shape, dtype=a.dtype) 4154 assert_eq(a, b) 4155 4156 4157def test_empty_chunks_in_array_len(): 4158 x = da.ones((), chunks=()) 4159 with pytest.raises(TypeError) as exc_info: 4160 len(x) 4161 4162 err_msg = "len() of unsized object" 4163 assert err_msg in str(exc_info.value) 4164 4165 4166@pytest.mark.parametrize("dtype", [None, [("a", "f4"), ("b", object)]]) 4167def test_meta(dtype): 4168 a = da.zeros((1,), chunks=(1,)) 4169 assert a._meta.dtype == a.dtype 4170 assert isinstance(a._meta, np.ndarray) 4171 assert a.nbytes < 1000 4172 4173 4174@pytest.mark.parametrize( 4175 "shape,limit,expected", 4176 [ 4177 (100, 10, (10,) * 10), 4178 (20, 10, (10, 10)), 4179 (20, 5, (5, 5, 5, 5)), 4180 (24, 5, (4, 4, 4, 4, 4, 4)), # common factor is close, use it 4181 (23, 5, (5, 5, 5, 5, 3)), # relatively prime, don't use 1s 4182 (1000, 167, (125,) * 8), # find close value 4183 ], 4184) 4185def test_normalize_chunks_auto_1d(shape, limit, expected): 4186 result = normalize_chunks("auto", (shape,), limit=limit, dtype=np.uint8) 4187 assert result == (expected,) 4188 4189 4190@pytest.mark.parametrize( 4191 "shape,chunks,limit,expected", 4192 [ 4193 ((20, 20), ("auto", 2), 20, ((10, 10), (2,) * 10)), 4194 ( 4195 (20, 20), 4196 ("auto", (2, 2, 2, 2, 2, 5, 5)), 4197 20, 4198 ((4, 4, 4, 4, 4), (2, 2, 2, 2, 2, 5, 5)), 4199 ), 4200 ((1, 20), "auto", 10, ((1,), (10, 10))), 4201 ], 4202) 4203def test_normalize_chunks_auto_2d(shape, chunks, limit, expected): 4204 result = normalize_chunks(chunks, shape, limit=limit, dtype="uint8") 4205 assert result == expected 4206 4207 4208def test_normalize_chunks_auto_3d(): 4209 result = normalize_chunks( 4210 ("auto", "auto", 2), (20, 20, 20), limit=200, dtype="uint8" 4211 ) 4212 expected = ((10, 10), (10, 10), (2,) * 10) 4213 assert result == expected 4214 4215 result = normalize_chunks("auto", (20, 20, 20), limit=8, dtype="uint8") 4216 expected = ((2,) * 10,) * 3 4217 assert result == expected 4218 4219 4220def test_constructors_chunks_dict(): 4221 x = da.ones((20, 20), chunks={0: 10, 1: 5}) 4222 assert x.chunks == ((10, 10), (5, 5, 5, 5)) 4223 4224 x = da.ones((20, 20), chunks={0: 10, 1: "auto"}) 4225 assert x.chunks == ((10, 10), (20,)) 4226 4227 4228def test_from_array_chunks_dict(): 4229 with dask.config.set({"array.chunk-size": "128kiB"}): 4230 x = np.empty((100, 100, 100)) 4231 y = da.from_array(x, chunks={0: 10, 1: -1, 2: "auto"}) 4232 z = da.from_array(x, chunks=(10, 100, 10)) 4233 assert y.chunks == z.chunks 4234 4235 4236@pytest.mark.parametrize("dtype", [object, [("a", object), ("b", int)]]) 4237def test_normalize_chunks_object_dtype(dtype): 4238 x = np.array(["a", "abc"], dtype=object) 4239 with pytest.raises(NotImplementedError): 4240 da.from_array(x, chunks="auto") 4241 4242 4243def test_normalize_chunks_tuples_of_tuples(): 4244 result = normalize_chunks(((2, 3, 5), "auto"), (10, 10), limit=10, dtype=np.uint8) 4245 expected = ((2, 3, 5), (2, 2, 2, 2, 2)) 4246 assert result == expected 4247 4248 4249def test_normalize_chunks_nan(): 4250 with pytest.raises(ValueError) as info: 4251 normalize_chunks("auto", (np.nan,), limit=10, dtype=np.uint8) 4252 assert "auto" in str(info.value) 4253 with pytest.raises(ValueError) as info: 4254 normalize_chunks(((np.nan, np.nan), "auto"), (10, 10), limit=10, dtype=np.uint8) 4255 assert "auto" in str(info.value) 4256 4257 4258def test_pandas_from_dask_array(): 4259 pd = pytest.importorskip("pandas") 4260 from dask.dataframe._compat import PANDAS_GT_130, PANDAS_GT_131 4261 4262 a = da.ones((12,), chunks=4) 4263 s = pd.Series(a, index=range(12)) 4264 4265 if PANDAS_GT_130 and not PANDAS_GT_131: 4266 # https://github.com/pandas-dev/pandas/issues/38645 4267 assert s.dtype != a.dtype 4268 else: 4269 assert s.dtype == a.dtype 4270 assert_eq(s.values, a) 4271 4272 4273def test_from_zarr_unique_name(): 4274 zarr = pytest.importorskip("zarr") 4275 a = zarr.array([1, 2, 3]) 4276 b = zarr.array([4, 5, 6]) 4277 4278 assert da.from_zarr(a).name != da.from_zarr(b).name 4279 4280 4281def test_from_zarr_name(): 4282 zarr = pytest.importorskip("zarr") 4283 a = zarr.array([1, 2, 3]) 4284 assert da.from_zarr(a, name="foo").name == "foo" 4285 4286 4287def test_zarr_roundtrip(): 4288 pytest.importorskip("zarr") 4289 with tmpdir() as d: 4290 a = da.zeros((3, 3), chunks=(1, 1)) 4291 a.to_zarr(d) 4292 a2 = da.from_zarr(d) 4293 assert_eq(a, a2) 4294 assert a2.chunks == a.chunks 4295 4296 4297def test_zarr_roundtrip_with_path_like(): 4298 pytest.importorskip("zarr") 4299 with tmpdir() as d: 4300 path = pathlib.Path(d) 4301 a = da.zeros((3, 3), chunks=(1, 1)) 4302 a.to_zarr(path) 4303 a2 = da.from_zarr(path) 4304 assert_eq(a, a2) 4305 assert a2.chunks == a.chunks 4306 4307 4308@pytest.mark.parametrize("compute", [False, True]) 4309def test_zarr_return_stored(compute): 4310 pytest.importorskip("zarr") 4311 with tmpdir() as d: 4312 a = da.zeros((3, 3), chunks=(1, 1)) 4313 a2 = a.to_zarr(d, compute=compute, return_stored=True) 4314 assert isinstance(a2, Array) 4315 assert_eq(a, a2, check_graph=False) 4316 assert a2.chunks == a.chunks 4317 4318 4319def test_zarr_inline_array(): 4320 zarr = pytest.importorskip("zarr") 4321 a = zarr.array([1, 2, 3]) 4322 dsk = dict(da.from_zarr(a, inline_array=True).dask) 4323 assert len(dsk) == 1 4324 assert a in list(dsk.values())[0] 4325 4326 4327def test_zarr_existing_array(): 4328 zarr = pytest.importorskip("zarr") 4329 c = (1, 1) 4330 a = da.ones((3, 3), chunks=c) 4331 z = zarr.zeros_like(a, chunks=c) 4332 a.to_zarr(z) 4333 a2 = da.from_zarr(z) 4334 assert_eq(a, a2) 4335 assert a2.chunks == a.chunks 4336 4337 4338def test_to_zarr_unknown_chunks_raises(): 4339 pytest.importorskip("zarr") 4340 a = da.random.random((10,), chunks=(3,)) 4341 a = a[a > 0.5] 4342 with pytest.raises(ValueError, match="unknown chunk sizes"): 4343 a.to_zarr({}) 4344 4345 4346def test_read_zarr_chunks(): 4347 pytest.importorskip("zarr") 4348 a = da.zeros((9,), chunks=(3,)) 4349 with tmpdir() as d: 4350 a.to_zarr(d) 4351 arr = da.from_zarr(d, chunks=(5,)) 4352 assert arr.chunks == ((5, 4),) 4353 4354 4355def test_zarr_pass_mapper(): 4356 pytest.importorskip("zarr") 4357 import zarr.storage 4358 4359 with tmpdir() as d: 4360 mapper = zarr.storage.DirectoryStore(d) 4361 a = da.zeros((3, 3), chunks=(1, 1)) 4362 a.to_zarr(mapper) 4363 a2 = da.from_zarr(mapper) 4364 assert_eq(a, a2) 4365 assert a2.chunks == a.chunks 4366 4367 4368def test_zarr_group(): 4369 zarr = pytest.importorskip("zarr") 4370 with tmpdir() as d: 4371 a = da.zeros((3, 3), chunks=(1, 1)) 4372 a.to_zarr(d, component="test") 4373 with pytest.raises((OSError, ValueError)): 4374 a.to_zarr(d, component="test", overwrite=False) 4375 a.to_zarr(d, component="test", overwrite=True) 4376 4377 # second time is fine, group exists 4378 a.to_zarr(d, component="test2", overwrite=False) 4379 a.to_zarr(d, component="nested/test", overwrite=False) 4380 group = zarr.open_group(d, mode="r") 4381 assert list(group) == ["nested", "test", "test2"] 4382 assert "test" in group["nested"] 4383 4384 a2 = da.from_zarr(d, component="test") 4385 assert_eq(a, a2) 4386 assert a2.chunks == a.chunks 4387 4388 4389@pytest.mark.parametrize( 4390 "data", 4391 [ 4392 [(), True], 4393 [((1,),), True], 4394 [((1, 1, 1),), True], 4395 [((1,), (1,)), True], 4396 [((2, 2, 1),), True], 4397 [((2, 2, 3),), False], 4398 [((1, 1, 1), (2, 2, 3)), False], 4399 [((1, 2, 1),), False], 4400 ], 4401) 4402def test_regular_chunks(data): 4403 chunkset, expected = data 4404 assert da.core._check_regular_chunks(chunkset) == expected 4405 4406 4407def test_zarr_nocompute(): 4408 pytest.importorskip("zarr") 4409 with tmpdir() as d: 4410 a = da.zeros((3, 3), chunks=(1, 1)) 4411 out = a.to_zarr(d, compute=False) 4412 assert isinstance(out, Delayed) 4413 dask.compute(out) 4414 a2 = da.from_zarr(d) 4415 assert_eq(a, a2) 4416 assert a2.chunks == a.chunks 4417 4418 4419def test_tiledb_roundtrip(): 4420 tiledb = pytest.importorskip("tiledb") 4421 # 1) load with default chunking 4422 # 2) load from existing tiledb.DenseArray 4423 # 3) write to existing tiledb.DenseArray 4424 a = da.random.random((3, 3)) 4425 with tmpdir() as uri: 4426 da.to_tiledb(a, uri) 4427 tdb = da.from_tiledb(uri) 4428 4429 assert_eq(a, tdb) 4430 assert a.chunks == tdb.chunks 4431 4432 # from tiledb.array 4433 with tiledb.open(uri) as t: 4434 tdb2 = da.from_tiledb(t) 4435 assert_eq(a, tdb2) 4436 4437 with tmpdir() as uri2: 4438 with tiledb.empty_like(uri2, a) as t: 4439 a.to_tiledb(t) 4440 assert_eq(da.from_tiledb(uri2), a) 4441 4442 # specific chunking 4443 with tmpdir() as uri: 4444 a = da.random.random((3, 3), chunks=(1, 1)) 4445 a.to_tiledb(uri) 4446 tdb = da.from_tiledb(uri) 4447 4448 assert_eq(a, tdb) 4449 assert a.chunks == tdb.chunks 4450 4451 4452def test_tiledb_multiattr(): 4453 tiledb = pytest.importorskip("tiledb") 4454 dom = tiledb.Domain( 4455 tiledb.Dim("x", (0, 1000), tile=100), tiledb.Dim("y", (0, 1000), tile=100) 4456 ) 4457 schema = tiledb.ArraySchema( 4458 attrs=(tiledb.Attr("attr1"), tiledb.Attr("attr2")), domain=dom 4459 ) 4460 4461 with tmpdir() as uri: 4462 tiledb.DenseArray.create(uri, schema) 4463 tdb = tiledb.DenseArray(uri, "w") 4464 4465 ar1 = np.random.randn(*tdb.schema.shape) 4466 ar2 = np.random.randn(*tdb.schema.shape) 4467 4468 tdb[:] = {"attr1": ar1, "attr2": ar2} 4469 tdb = tiledb.DenseArray(uri, "r") 4470 4471 # basic round-trip from dask.array 4472 d = da.from_tiledb(uri, attribute="attr2") 4473 assert_eq(d, ar2) 4474 4475 # smoke-test computation directly on the TileDB view 4476 d = da.from_tiledb(uri, attribute="attr2") 4477 assert_eq(np.mean(ar2), d.mean().compute(scheduler="threads")) 4478 4479 4480def test_blockview(): 4481 x = da.arange(10, chunks=2) 4482 blockview = BlockView(x) 4483 assert x.blocks == blockview 4484 assert isinstance(blockview[0], da.Array) 4485 4486 assert_eq(blockview[0], x[:2]) 4487 assert_eq(blockview[-1], x[-2:]) 4488 assert_eq(blockview[:3], x[:6]) 4489 assert_eq(blockview[[0, 1, 2]], x[:6]) 4490 assert_eq(blockview[[3, 0, 2]], np.array([6, 7, 0, 1, 4, 5])) 4491 assert_eq(blockview.shape, tuple(map(len, x.chunks))) 4492 assert_eq(blockview.size, np.prod(blockview.shape)) 4493 assert_eq( 4494 blockview.ravel(), [blockview[idx] for idx in np.ndindex(blockview.shape)] 4495 ) 4496 4497 x = da.random.random((20, 20), chunks=(4, 5)) 4498 blockview = BlockView(x) 4499 assert_eq(blockview[0], x[:4]) 4500 assert_eq(blockview[0, :3], x[:4, :15]) 4501 assert_eq(blockview[:, :3], x[:, :15]) 4502 assert_eq(blockview.shape, tuple(map(len, x.chunks))) 4503 assert_eq(blockview.size, np.prod(blockview.shape)) 4504 assert_eq( 4505 blockview.ravel(), [blockview[idx] for idx in np.ndindex(blockview.shape)] 4506 ) 4507 4508 x = da.ones((40, 40, 40), chunks=(10, 10, 10)) 4509 blockview = BlockView(x) 4510 assert_eq(blockview[0, :, 0], np.ones((10, 40, 10))) 4511 assert_eq(blockview.shape, tuple(map(len, x.chunks))) 4512 assert_eq(blockview.size, np.prod(blockview.shape)) 4513 assert_eq( 4514 blockview.ravel(), [blockview[idx] for idx in np.ndindex(blockview.shape)] 4515 ) 4516 4517 x = da.ones((2, 2), chunks=1) 4518 with pytest.raises(ValueError): 4519 blockview[[0, 1], [0, 1]] 4520 with pytest.raises(ValueError): 4521 blockview[np.array([0, 1]), [0, 1]] 4522 with pytest.raises(ValueError) as info: 4523 blockview[np.array([0, 1]), np.array([0, 1])] 4524 assert "list" in str(info.value) 4525 with pytest.raises(ValueError) as info: 4526 blockview[None, :, :] 4527 assert "newaxis" in str(info.value) and "not supported" in str(info.value) 4528 with pytest.raises(IndexError) as info: 4529 blockview[100, 100] 4530 4531 4532def test_blocks_indexer(): 4533 x = da.arange(10, chunks=2) 4534 4535 assert isinstance(x.blocks[0], da.Array) 4536 4537 assert_eq(x.blocks[0], x[:2]) 4538 assert_eq(x.blocks[-1], x[-2:]) 4539 assert_eq(x.blocks[:3], x[:6]) 4540 assert_eq(x.blocks[[0, 1, 2]], x[:6]) 4541 assert_eq(x.blocks[[3, 0, 2]], np.array([6, 7, 0, 1, 4, 5])) 4542 4543 x = da.random.random((20, 20), chunks=(4, 5)) 4544 assert_eq(x.blocks[0], x[:4]) 4545 assert_eq(x.blocks[0, :3], x[:4, :15]) 4546 assert_eq(x.blocks[:, :3], x[:, :15]) 4547 4548 x = da.ones((40, 40, 40), chunks=(10, 10, 10)) 4549 assert_eq(x.blocks[0, :, 0], np.ones((10, 40, 10))) 4550 4551 x = da.ones((2, 2), chunks=1) 4552 with pytest.raises(ValueError): 4553 x.blocks[[0, 1], [0, 1]] 4554 with pytest.raises(ValueError): 4555 x.blocks[np.array([0, 1]), [0, 1]] 4556 with pytest.raises(ValueError) as info: 4557 x.blocks[np.array([0, 1]), np.array([0, 1])] 4558 assert "list" in str(info.value) 4559 with pytest.raises(ValueError) as info: 4560 x.blocks[None, :, :] 4561 assert "newaxis" in str(info.value) and "not supported" in str(info.value) 4562 with pytest.raises(IndexError) as info: 4563 x.blocks[100, 100] 4564 4565 4566def test_partitions_indexer(): 4567 # .partitions is an alias of .blocks for dask arrays 4568 x = da.arange(10, chunks=2) 4569 4570 assert isinstance(x.partitions[0], da.Array) 4571 4572 assert_eq(x.partitions[0], x[:2]) 4573 assert_eq(x.partitions[-1], x[-2:]) 4574 assert_eq(x.partitions[:3], x[:6]) 4575 assert_eq(x.partitions[[0, 1, 2]], x[:6]) 4576 assert_eq(x.partitions[[3, 0, 2]], np.array([6, 7, 0, 1, 4, 5])) 4577 4578 x = da.random.random((20, 20), chunks=(4, 5)) 4579 assert_eq(x.partitions[0], x[:4]) 4580 assert_eq(x.partitions[0, :3], x[:4, :15]) 4581 assert_eq(x.partitions[:, :3], x[:, :15]) 4582 4583 x = da.ones((40, 40, 40), chunks=(10, 10, 10)) 4584 assert_eq(x.partitions[0, :, 0], np.ones((10, 40, 10))) 4585 4586 x = da.ones((2, 2), chunks=1) 4587 with pytest.raises(ValueError): 4588 x.partitions[[0, 1], [0, 1]] 4589 with pytest.raises(ValueError): 4590 x.partitions[np.array([0, 1]), [0, 1]] 4591 with pytest.raises(ValueError) as info: 4592 x.partitions[np.array([0, 1]), np.array([0, 1])] 4593 assert "list" in str(info.value) 4594 with pytest.raises(ValueError) as info: 4595 x.partitions[None, :, :] 4596 assert "newaxis" in str(info.value) and "not supported" in str(info.value) 4597 with pytest.raises(IndexError) as info: 4598 x.partitions[100, 100] 4599 4600 4601@pytest.mark.filterwarnings("ignore:the matrix subclass:PendingDeprecationWarning") 4602def test_dask_array_holds_scipy_sparse_containers(): 4603 pytest.importorskip("scipy.sparse") 4604 import scipy.sparse 4605 4606 x = da.random.random((1000, 10), chunks=(100, 10)) 4607 x[x < 0.9] = 0 4608 xx = x.compute() 4609 y = x.map_blocks(scipy.sparse.csr_matrix) 4610 4611 vs = y.to_delayed().flatten().tolist() 4612 values = dask.compute(*vs, scheduler="single-threaded") 4613 assert all(isinstance(v, scipy.sparse.csr_matrix) for v in values) 4614 4615 yy = y.compute(scheduler="single-threaded") 4616 assert isinstance(yy, scipy.sparse.spmatrix) 4617 assert (yy == xx).all() 4618 4619 z = x.T.map_blocks(scipy.sparse.csr_matrix) 4620 zz = z.compute(scheduler="single-threaded") 4621 assert isinstance(zz, scipy.sparse.spmatrix) 4622 assert (zz == xx.T).all() 4623 4624 4625@pytest.mark.parametrize("axis", [0, 1]) 4626def test_scipy_sparse_concatenate(axis): 4627 pytest.importorskip("scipy.sparse") 4628 import scipy.sparse 4629 4630 rs = da.random.RandomState(RandomState=np.random.RandomState) 4631 4632 xs = [] 4633 ys = [] 4634 for i in range(2): 4635 x = rs.random((1000, 10), chunks=(100, 10)) 4636 x[x < 0.9] = 0 4637 xs.append(x) 4638 ys.append(x.map_blocks(scipy.sparse.csr_matrix)) 4639 4640 z = da.concatenate(ys, axis=axis) 4641 z = z.compute() 4642 4643 if axis == 0: 4644 sp_concatenate = scipy.sparse.vstack 4645 elif axis == 1: 4646 sp_concatenate = scipy.sparse.hstack 4647 z_expected = sp_concatenate([scipy.sparse.csr_matrix(e.compute()) for e in xs]) 4648 4649 assert (z != z_expected).nnz == 0 4650 4651 4652def test_3851(): 4653 with warnings.catch_warnings() as record: 4654 Y = da.random.random((10, 10), chunks="auto") 4655 da.argmax(Y, axis=0).compute() 4656 4657 assert not record 4658 4659 4660def test_3925(): 4661 x = da.from_array(np.array(["a", "b", "c"], dtype=object), chunks=-1) 4662 assert (x[0] == x[0]).compute(scheduler="sync") 4663 4664 4665def test_map_blocks_large_inputs_delayed(): 4666 a = da.ones(10, chunks=(5,)) 4667 b = np.ones(1000000) 4668 4669 c = a.map_blocks(add, b) 4670 assert any(b is v for v in c.dask.values()) 4671 assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence 4672 4673 d = a.map_blocks(lambda x, y: x + y.sum(), y=b) 4674 assert_eq(d, d) 4675 assert any(b is v for v in d.dask.values()) 4676 assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence 4677 4678 4679def test_blockwise_large_inputs_delayed(): 4680 a = da.ones(10, chunks=(5,)) 4681 b = np.ones(1000000) 4682 4683 c = da.blockwise(add, "i", a, "i", b, None, dtype=a.dtype) 4684 assert any(b is v for v in c.dask.values()) 4685 assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence 4686 4687 d = da.blockwise(lambda x, y: x + y, "i", a, "i", y=b, dtype=a.dtype) 4688 assert any(b is v for v in d.dask.values()) 4689 assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence 4690 4691 4692def test_slice_reversed(): 4693 x = da.ones(10, chunks=-1) 4694 y = x[6:3] 4695 4696 assert_eq(y, np.ones(0)) 4697 4698 4699def test_map_blocks_chunks(): 4700 x = da.arange(400, chunks=(100,)) 4701 y = da.arange(40, chunks=(10,)) 4702 4703 def func(a, b): 4704 return np.array([a.max(), b.max()]) 4705 4706 assert_eq( 4707 da.map_blocks(func, x, y, chunks=(2,), dtype=x.dtype), 4708 np.array([99, 9, 199, 19, 299, 29, 399, 39]), 4709 ) 4710 4711 4712def test_nbytes_auto(): 4713 chunks = normalize_chunks("800B", shape=(500,), dtype="float64") 4714 assert chunks == ((100, 100, 100, 100, 100),) 4715 chunks = normalize_chunks("200B", shape=(10, 10), dtype="float64") 4716 assert chunks == ((5, 5), (5, 5)) 4717 chunks = normalize_chunks((5, "200B"), shape=(10, 10), dtype="float64") 4718 assert chunks == ((5, 5), (5, 5)) 4719 chunks = normalize_chunks("33B", shape=(10, 10), dtype="float64") 4720 assert chunks == ((2, 2, 2, 2, 2), (2, 2, 2, 2, 2)) 4721 chunks = normalize_chunks("1800B", shape=(10, 20, 30), dtype="float64") 4722 assert chunks == ((5, 5), (5, 5, 5, 5), (6, 6, 6, 6, 6)) 4723 4724 with pytest.raises(ValueError): 4725 normalize_chunks("10B", shape=(10,), limit=20, dtype="float64") 4726 with pytest.raises(ValueError): 4727 normalize_chunks("100B", shape=(10, 10), limit=20, dtype="float64") 4728 with pytest.raises(ValueError): 4729 normalize_chunks(("100B", "10B"), shape=(10, 10), dtype="float64") 4730 with pytest.raises(ValueError): 4731 normalize_chunks(("10B", "10B"), shape=(10, 10), limit=20, dtype="float64") 4732 4733 4734def test_auto_chunks_h5py(): 4735 h5py = pytest.importorskip("h5py") 4736 4737 with tmpfile(".hdf5") as fn: 4738 with h5py.File(fn, mode="a") as f: 4739 d = f.create_dataset( 4740 "/x", shape=(1000, 1000), chunks=(32, 64), dtype="float64" 4741 ) 4742 d[:] = 1 4743 4744 with h5py.File(fn, mode="a") as f: 4745 d = f["x"] 4746 with dask.config.set({"array.chunk-size": "1 MiB"}): 4747 x = da.from_array(d) 4748 assert isinstance(x._meta, np.ndarray) 4749 assert x.chunks == ((256, 256, 256, 232), (512, 488)) 4750 4751 4752def test_no_warnings_from_blockwise(): 4753 with pytest.warns(None) as record: 4754 x = da.ones((3, 10, 10), chunks=(3, 2, 2)) 4755 da.map_blocks(lambda y: np.mean(y, axis=0), x, dtype=x.dtype, drop_axis=0) 4756 assert not record 4757 4758 with pytest.warns(None) as record: 4759 x = da.ones((15, 15), chunks=(5, 5)) 4760 (x.dot(x.T + 1) - x.mean(axis=0)).std() 4761 assert not record 4762 4763 with pytest.warns(None) as record: 4764 x = da.ones((1,), chunks=(1,)) 4765 1 / x[0] 4766 assert not record 4767 4768 4769def test_from_array_meta(): 4770 sparse = pytest.importorskip("sparse") 4771 x = np.ones(10) 4772 meta = sparse.COO.from_numpy(x) 4773 y = da.from_array(x, meta=meta) 4774 assert isinstance(y._meta, sparse.COO) 4775 4776 4777def test_compute_chunk_sizes(): 4778 x = da.from_array(np.linspace(-1, 1, num=50), chunks=10) 4779 y = x[x < 0] 4780 assert np.isnan(y.shape[0]) 4781 assert y.chunks == ((np.nan,) * 5,) 4782 4783 z = y.compute_chunk_sizes() 4784 assert y is z 4785 assert z.chunks == ((10, 10, 5, 0, 0),) 4786 assert len(z) == 25 4787 4788 # check that dtype of chunk dimensions is `int` 4789 assert isinstance(z.chunks[0][0], int) 4790 4791 4792def test_compute_chunk_sizes_2d_array(): 4793 X = np.linspace(-1, 1, num=9 * 4).reshape(9, 4) 4794 X = da.from_array(X, chunks=(3, 4)) 4795 idx = X.sum(axis=1) > 0 4796 Y = X[idx] 4797 4798 # This is very similar to the DataFrame->Array conversion 4799 assert np.isnan(Y.shape[0]) and Y.shape[1] == 4 4800 assert Y.chunks == ((np.nan, np.nan, np.nan), (4,)) 4801 4802 Z = Y.compute_chunk_sizes() 4803 assert Y is Z 4804 assert Z.chunks == ((0, 1, 3), (4,)) 4805 assert Z.shape == (4, 4) 4806 4807 4808def test_compute_chunk_sizes_3d_array(N=8): 4809 X = np.linspace(-1, 2, num=8 * 8 * 8).reshape(8, 8, 8) 4810 X = da.from_array(X, chunks=(4, 4, 4)) 4811 idx = X.sum(axis=0).sum(axis=0) > 0 4812 Y = X[idx] 4813 idx = X.sum(axis=1).sum(axis=1) < 0 4814 Y = Y[:, idx] 4815 idx = X.sum(axis=2).sum(axis=1) > 0.1 4816 Y = Y[:, :, idx] 4817 4818 # Checking to make sure shapes are different on outputs 4819 assert Y.compute().shape == (8, 3, 5) 4820 assert X.compute().shape == (8, 8, 8) 4821 4822 assert Y.chunks == ((np.nan, np.nan),) * 3 4823 assert all(np.isnan(s) for s in Y.shape) 4824 Z = Y.compute_chunk_sizes() 4825 assert Z is Y 4826 assert Z.shape == (8, 3, 5) 4827 assert Z.chunks == ((4, 4), (3, 0), (1, 4)) 4828 4829 4830def _known(num=50): 4831 return da.from_array(np.linspace(-1, 1, num=num), chunks=10) 4832 4833 4834@pytest.fixture() 4835def unknown(): 4836 x = _known() 4837 y = x[x < 0] 4838 assert y.chunks == ((np.nan,) * 5,) 4839 return y 4840 4841 4842def test_compute_chunk_sizes_warning_fixes_rechunk(unknown): 4843 y = unknown 4844 with pytest.raises(ValueError, match="compute_chunk_sizes"): 4845 y.rechunk("auto") 4846 y.compute_chunk_sizes() 4847 y.rechunk("auto") 4848 4849 4850def test_compute_chunk_sizes_warning_fixes_to_zarr(unknown): 4851 pytest.importorskip("zarr") 4852 y = unknown 4853 with pytest.raises(ValueError, match="compute_chunk_sizes"): 4854 with StringIO() as f: 4855 y.to_zarr(f) 4856 y.compute_chunk_sizes() 4857 4858 with pytest.raises(ValueError, match="irregular chunking"): 4859 with StringIO() as f: 4860 y.to_zarr(f) 4861 4862 4863def test_compute_chunk_sizes_warning_fixes_to_svg(unknown): 4864 y = unknown 4865 with pytest.raises(NotImplementedError, match="compute_chunk_sizes"): 4866 y.to_svg() 4867 y.compute_chunk_sizes() 4868 y.to_svg() 4869 4870 4871def test_compute_chunk_sizes_warning_fixes_concatenate(): 4872 x = _known(num=100).reshape(10, 10) 4873 idx = x.sum(axis=0) > 0 4874 y1 = x[idx] 4875 y2 = x[idx] 4876 with pytest.raises(ValueError, match="compute_chunk_sizes"): 4877 da.concatenate((y1, y2), axis=1) 4878 y1.compute_chunk_sizes() 4879 y2.compute_chunk_sizes() 4880 da.concatenate((y1, y2), axis=1) 4881 4882 4883def test_compute_chunk_sizes_warning_fixes_reduction(unknown): 4884 y = unknown 4885 with pytest.raises(ValueError, match="compute_chunk_sizes"): 4886 da.argmin(y) 4887 y.compute_chunk_sizes() 4888 da.argmin(y) 4889 4890 4891def test_compute_chunk_sizes_warning_fixes_reshape(unknown): 4892 y = unknown 4893 with pytest.raises(ValueError, match="compute_chunk_sizes"): 4894 da.reshape(y, (5, 5)) 4895 y.compute_chunk_sizes() 4896 da.reshape(y, (5, 5)) 4897 4898 4899def test_compute_chunk_sizes_warning_fixes_slicing(): 4900 x = _known(num=100).reshape(10, 10) 4901 y = x[x.sum(axis=0) < 0] 4902 with pytest.raises(ValueError, match="compute_chunk_sizes"): 4903 y[:3, :] 4904 y.compute_chunk_sizes() 4905 y[:3, :] 4906 4907 4908def test_rechunk_auto(): 4909 x = da.ones(10, chunks=(1,)) 4910 y = x.rechunk() 4911 4912 assert y.npartitions == 1 4913 4914 4915def test_chunk_assignment_invalidates_cached_properties(): 4916 x = da.ones((4,), chunks=(1,)) 4917 y = x.copy() 4918 # change chunks directly, which should change all of the tested properties 4919 y._chunks = ((2, 2), (0, 0, 0, 0)) 4920 assert not x.ndim == y.ndim 4921 assert not x.shape == y.shape 4922 assert not x.size == y.size 4923 assert not x.numblocks == y.numblocks 4924 assert not x.npartitions == y.npartitions 4925 assert not x.__dask_keys__() == y.__dask_keys__() 4926 assert not np.array_equal(x._key_array, y._key_array) 4927 4928 4929def test_map_blocks_series(): 4930 pd = pytest.importorskip("pandas") 4931 import dask.dataframe as dd 4932 from dask.dataframe.utils import assert_eq as dd_assert_eq 4933 4934 x = da.ones(10, chunks=(5,)) 4935 s = x.map_blocks(pd.Series) 4936 assert isinstance(s, dd.Series) 4937 assert s.npartitions == x.npartitions 4938 4939 dd_assert_eq(s, s) 4940 4941 4942@pytest.mark.xfail(reason="need to remove singleton index dimension") 4943def test_map_blocks_dataframe(): 4944 pd = pytest.importorskip("pandas") 4945 import dask.dataframe as dd 4946 from dask.dataframe.utils import assert_eq as dd_assert_eq 4947 4948 x = da.ones((10, 2), chunks=(5, 2)) 4949 s = x.map_blocks(pd.DataFrame) 4950 assert isinstance(s, dd.DataFrame) 4951 assert s.npartitions == x.npartitions 4952 dd_assert_eq(s, s) 4953 4954 4955def test_dask_layers(): 4956 a = da.ones(1) 4957 assert a.dask.layers.keys() == {a.name} 4958 assert a.dask.dependencies == {a.name: set()} 4959 assert a.__dask_layers__() == (a.name,) 4960 b = a + 1 4961 assert b.dask.layers.keys() == {a.name, b.name} 4962 assert b.dask.dependencies == {a.name: set(), b.name: {a.name}} 4963 assert b.__dask_layers__() == (b.name,) 4964