1import contextlib
2import copy
3import pathlib
4import xml.etree.ElementTree
5from unittest import mock
6
7import pytest
8
9np = pytest.importorskip("numpy")
10
11import operator
12import os
13import time
14import warnings
15from io import StringIO
16from operator import add, sub
17from threading import Lock
18
19from numpy import nancumprod, nancumsum
20from tlz import concat, countby, merge
21from tlz.curried import identity
22
23import dask
24import dask.array as da
25from dask.array.core import (
26    Array,
27    BlockView,
28    blockdims_from_blockshape,
29    broadcast_chunks,
30    broadcast_shapes,
31    broadcast_to,
32    common_blockdim,
33    concatenate,
34    concatenate3,
35    concatenate_axes,
36    dotmany,
37    from_array,
38    from_delayed,
39    from_func,
40    getem,
41    getter,
42    normalize_chunks,
43    optimize,
44    stack,
45    store,
46)
47from dask.array.utils import assert_eq, same_keys
48from dask.base import compute_as_if_collection, tokenize
49from dask.blockwise import broadcast_dimensions
50from dask.blockwise import make_blockwise_graph as top
51from dask.blockwise import optimize_blockwise
52from dask.delayed import Delayed, delayed
53from dask.utils import apply, key_split, tmpdir, tmpfile
54from dask.utils_test import dec, inc
55
56from ..chunk import getitem
57from .test_dispatch import EncapsulateNDArray
58
59
60def test_getem():
61    sol = {
62        ("X", 0, 0): (getter, "X", (slice(0, 2), slice(0, 3))),
63        ("X", 1, 0): (getter, "X", (slice(2, 4), slice(0, 3))),
64        ("X", 1, 1): (getter, "X", (slice(2, 4), slice(3, 6))),
65        ("X", 0, 1): (getter, "X", (slice(0, 2), slice(3, 6))),
66    }
67    assert getem("X", (2, 3), shape=(4, 6)) == sol
68
69
70def test_top():
71    assert top(inc, "z", "ij", "x", "ij", numblocks={"x": (2, 2)}) == {
72        ("z", 0, 0): (inc, ("x", 0, 0)),
73        ("z", 0, 1): (inc, ("x", 0, 1)),
74        ("z", 1, 0): (inc, ("x", 1, 0)),
75        ("z", 1, 1): (inc, ("x", 1, 1)),
76    }
77
78    assert top(
79        add, "z", "ij", "x", "ij", "y", "ij", numblocks={"x": (2, 2), "y": (2, 2)}
80    ) == {
81        ("z", 0, 0): (add, ("x", 0, 0), ("y", 0, 0)),
82        ("z", 0, 1): (add, ("x", 0, 1), ("y", 0, 1)),
83        ("z", 1, 0): (add, ("x", 1, 0), ("y", 1, 0)),
84        ("z", 1, 1): (add, ("x", 1, 1), ("y", 1, 1)),
85    }
86
87    assert top(
88        dotmany, "z", "ik", "x", "ij", "y", "jk", numblocks={"x": (2, 2), "y": (2, 2)}
89    ) == {
90        ("z", 0, 0): (dotmany, [("x", 0, 0), ("x", 0, 1)], [("y", 0, 0), ("y", 1, 0)]),
91        ("z", 0, 1): (dotmany, [("x", 0, 0), ("x", 0, 1)], [("y", 0, 1), ("y", 1, 1)]),
92        ("z", 1, 0): (dotmany, [("x", 1, 0), ("x", 1, 1)], [("y", 0, 0), ("y", 1, 0)]),
93        ("z", 1, 1): (dotmany, [("x", 1, 0), ("x", 1, 1)], [("y", 0, 1), ("y", 1, 1)]),
94    }
95
96    assert top(identity, "z", "", "x", "ij", numblocks={"x": (2, 2)}) == {
97        ("z",): (identity, [[("x", 0, 0), ("x", 0, 1)], [("x", 1, 0), ("x", 1, 1)]])
98    }
99
100
101def test_top_with_kwargs():
102    assert top(add, "z", "i", "x", "i", numblocks={"x": (2, 0)}, b=100) == {
103        ("z", 0): (apply, add, [("x", 0)], {"b": 100}),
104        ("z", 1): (apply, add, [("x", 1)], {"b": 100}),
105    }
106
107
108def test_top_supports_broadcasting_rules():
109    assert top(
110        add, "z", "ij", "x", "ij", "y", "ij", numblocks={"x": (1, 2), "y": (2, 1)}
111    ) == {
112        ("z", 0, 0): (add, ("x", 0, 0), ("y", 0, 0)),
113        ("z", 0, 1): (add, ("x", 0, 1), ("y", 0, 0)),
114        ("z", 1, 0): (add, ("x", 0, 0), ("y", 1, 0)),
115        ("z", 1, 1): (add, ("x", 0, 1), ("y", 1, 0)),
116    }
117
118
119def test_top_literals():
120    assert top(add, "z", "ij", "x", "ij", 123, None, numblocks={"x": (2, 2)}) == {
121        ("z", 0, 0): (add, ("x", 0, 0), 123),
122        ("z", 0, 1): (add, ("x", 0, 1), 123),
123        ("z", 1, 0): (add, ("x", 1, 0), 123),
124        ("z", 1, 1): (add, ("x", 1, 1), 123),
125    }
126
127
128def test_blockwise_literals():
129    x = da.ones((10, 10), chunks=(5, 5))
130    z = da.blockwise(add, "ij", x, "ij", 100, None, dtype=x.dtype)
131    assert_eq(z, x + 100)
132
133    z = da.blockwise(
134        lambda x, y, z: x * y + z, "ij", 2, None, x, "ij", 100, None, dtype=x.dtype
135    )
136    assert_eq(z, 2 * x + 100)
137
138    z = da.blockwise(getitem, "ij", x, "ij", slice(None), None, dtype=x.dtype)
139    assert_eq(z, x)
140
141
142def test_blockwise_1_in_shape_I():
143    def test_f(a, b):
144        assert 1 in b.shape
145
146    p, k, N = 7, 2, 5
147    da.blockwise(
148        test_f,
149        "x",
150        da.zeros((2 * p, 9, k * N), chunks=(p, 3, k)),
151        "xzt",
152        da.zeros((2 * p, 9, 1), chunks=(p, 3, -1)),
153        "xzt",
154        concatenate=True,
155        dtype=float,
156    ).compute()
157
158
159def test_blockwise_1_in_shape_II():
160    def test_f(a, b):
161        assert 1 in b.shape
162
163    p, k, N = 7, 2, 5
164    da.blockwise(
165        test_f,
166        "x",
167        da.zeros((2 * p, 9, k * N, 8), chunks=(p, 9, k, 4)),
168        "xztu",
169        da.zeros((2 * p, 9, 1, 8), chunks=(p, 9, -1, 4)),
170        "xztu",
171        concatenate=True,
172        dtype=float,
173    ).compute()
174
175
176def test_blockwise_1_in_shape_III():
177    def test_f(a, b):
178        assert 1 in b.shape
179
180    k, N = 2, 5
181    da.blockwise(
182        test_f,
183        "x",
184        da.zeros((k * N, 9, 8), chunks=(k, 3, 4)),
185        "xtu",
186        da.zeros((1, 9, 8), chunks=(-1, 3, 4)),
187        "xtu",
188        concatenate=True,
189        dtype=float,
190    ).compute()
191
192
193def test_concatenate3_on_scalars():
194    assert_eq(concatenate3([1, 2]), np.array([1, 2]))
195
196
197def test_chunked_dot_product():
198    x = np.arange(400).reshape((20, 20))
199    o = np.ones((20, 20))
200
201    d = {"x": x, "o": o}
202
203    getx = getem("x", (5, 5), shape=(20, 20))
204    geto = getem("o", (5, 5), shape=(20, 20))
205
206    result = top(
207        dotmany, "out", "ik", "x", "ij", "o", "jk", numblocks={"x": (4, 4), "o": (4, 4)}
208    )
209
210    dsk = merge(d, getx, geto, result)
211    out = dask.get(dsk, [[("out", i, j) for j in range(4)] for i in range(4)])
212
213    assert_eq(np.dot(x, o), concatenate3(out))
214
215
216def test_chunked_transpose_plus_one():
217    x = np.arange(400).reshape((20, 20))
218
219    d = {"x": x}
220
221    getx = getem("x", (5, 5), shape=(20, 20))
222
223    f = lambda x: x.T + 1
224    comp = top(f, "out", "ij", "x", "ji", numblocks={"x": (4, 4)})
225
226    dsk = merge(d, getx, comp)
227    out = dask.get(dsk, [[("out", i, j) for j in range(4)] for i in range(4)])
228
229    assert_eq(concatenate3(out), x.T + 1)
230
231
232def test_broadcast_dimensions_works_with_singleton_dimensions():
233    argpairs = [("x", "i")]
234    numblocks = {"x": ((1,),)}
235    assert broadcast_dimensions(argpairs, numblocks) == {"i": (1,)}
236
237
238def test_broadcast_dimensions():
239    argpairs = [("x", "ij"), ("y", "ij")]
240    d = {"x": ("Hello", 1), "y": (1, (2, 3))}
241    assert broadcast_dimensions(argpairs, d) == {"i": "Hello", "j": (2, 3)}
242
243
244def test_Array():
245    shape = (1000, 1000)
246    chunks = (100, 100)
247    name = "x"
248    dsk = merge({name: "some-array"}, getem(name, chunks, shape=shape))
249    a = Array(dsk, name, chunks, shape=shape, dtype="f8")
250
251    assert a.numblocks == (10, 10)
252
253    assert a.__dask_keys__() == [[("x", i, j) for j in range(10)] for i in range(10)]
254
255    assert a.chunks == ((100,) * 10, (100,) * 10)
256
257    assert a.shape == shape
258
259    assert len(a) == shape[0]
260
261    with pytest.raises(ValueError):
262        Array(dsk, name, chunks, shape=shape)
263    with pytest.raises(TypeError):
264        Array(dsk, name, chunks, shape=shape, dtype="f8", meta=np.empty(0, 0))
265
266
267def test_uneven_chunks():
268    a = Array({}, "x", chunks=(3, 3), shape=(10, 10), dtype="f8")
269    assert a.chunks == ((3, 3, 3, 1), (3, 3, 3, 1))
270
271
272def test_numblocks_suppoorts_singleton_block_dims():
273    shape = (100, 10)
274    chunks = (10, 10)
275    name = "x"
276    dsk = merge({name: "some-array"}, getem(name, shape=shape, chunks=chunks))
277    a = Array(dsk, name, chunks, shape=shape, dtype="f8")
278
279    assert set(concat(a.__dask_keys__())) == {("x", i, 0) for i in range(10)}
280
281
282def test_keys():
283    dsk = {("x", i, j): () for i in range(5) for j in range(6)}
284    dx = Array(dsk, "x", chunks=(10, 10), shape=(50, 60), dtype="f8")
285    assert dx.__dask_keys__() == [[(dx.name, i, j) for j in range(6)] for i in range(5)]
286    # Cache works
287    assert dx.__dask_keys__() is dx.__dask_keys__()
288    # Test mutating names clears key cache
289    dx.dask = {("y", i, j): () for i in range(5) for j in range(6)}
290    dx._name = "y"
291    new_keys = [[(dx.name, i, j) for j in range(6)] for i in range(5)]
292    assert dx.__dask_keys__() == new_keys
293    assert np.array_equal(dx._key_array, np.array(new_keys, dtype="object"))
294    d = Array({}, "x", (), shape=(), dtype="f8")
295    assert d.__dask_keys__() == [("x",)]
296
297
298def test_Array_computation():
299    a = Array({("x", 0, 0): np.eye(3)}, "x", shape=(3, 3), chunks=(3, 3), dtype="f8")
300    assert_eq(np.array(a), np.eye(3))
301    assert isinstance(a.compute(), np.ndarray)
302    assert float(a[0, 0]) == 1
303
304
305def test_Array_numpy_gufunc_call__array_ufunc__01():
306    x = da.random.normal(size=(3, 10, 10), chunks=(2, 10, 10))
307    nx = x.compute()
308    ny = np.linalg._umath_linalg.inv(nx)
309    y = np.linalg._umath_linalg.inv(x)
310    assert_eq(ny, y)
311
312
313def test_Array_numpy_gufunc_call__array_ufunc__02():
314    x = da.random.normal(size=(3, 10, 10), chunks=(2, 10, 10))
315    nx = x.compute()
316    nw, nv = np.linalg._umath_linalg.eig(nx)
317    w, v = np.linalg._umath_linalg.eig(x)
318    assert_eq(nw, w)
319    assert_eq(nv, v)
320
321
322def test_stack():
323    a, b, c = (
324        Array(
325            getem(name, chunks=(2, 3), shape=(4, 6)),
326            name,
327            chunks=(2, 3),
328            dtype="f8",
329            shape=(4, 6),
330        )
331        for name in "ABC"
332    )
333
334    s = stack([a, b, c], axis=0)
335
336    colon = slice(None, None, None)
337
338    assert s.shape == (3, 4, 6)
339    assert s.chunks == ((1, 1, 1), (2, 2), (3, 3))
340    assert s.chunksize == (1, 2, 3)
341    assert s.dask[(s.name, 0, 1, 0)] == (getitem, ("A", 1, 0), (None, colon, colon))
342    assert s.dask[(s.name, 2, 1, 0)] == (getitem, ("C", 1, 0), (None, colon, colon))
343    assert same_keys(s, stack([a, b, c], axis=0))
344
345    s2 = stack([a, b, c], axis=1)
346    assert s2.shape == (4, 3, 6)
347    assert s2.chunks == ((2, 2), (1, 1, 1), (3, 3))
348    assert s2.chunksize == (2, 1, 3)
349    assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ("B", 0, 0), (colon, None, colon))
350    assert s2.dask[(s2.name, 1, 1, 0)] == (getitem, ("B", 1, 0), (colon, None, colon))
351    assert same_keys(s2, stack([a, b, c], axis=1))
352
353    s2 = stack([a, b, c], axis=2)
354    assert s2.shape == (4, 6, 3)
355    assert s2.chunks == ((2, 2), (3, 3), (1, 1, 1))
356    assert s2.chunksize == (2, 3, 1)
357    assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ("A", 0, 1), (colon, colon, None))
358    assert s2.dask[(s2.name, 1, 1, 2)] == (getitem, ("C", 1, 1), (colon, colon, None))
359    assert same_keys(s2, stack([a, b, c], axis=2))
360
361    pytest.raises(ValueError, lambda: stack([]))
362    pytest.raises(ValueError, lambda: stack([a, b, c], axis=3))
363
364    assert set(b.dask.keys()).issubset(s2.dask.keys())
365
366    assert stack([a, b, c], axis=-1).chunks == stack([a, b, c], axis=2).chunks
367
368
369def test_stack_zero_size():
370    x = np.empty((2, 0, 3))
371    y = da.from_array(x, chunks=1)
372
373    result_np = np.concatenate([x, x])
374    result_da = da.concatenate([y, y])
375
376    assert_eq(result_np, result_da)
377
378
379def test_short_stack():
380    x = np.array([1])
381    d = da.from_array(x, chunks=(1,))
382    s = da.stack([d])
383    assert s.shape == (1, 1)
384    chunks = compute_as_if_collection(Array, s.dask, s.__dask_keys__())
385    assert chunks[0][0].shape == (1, 1)
386
387
388def test_stack_scalars():
389    d = da.arange(4, chunks=2)
390
391    s = da.stack([d.mean(), d.sum()])
392
393    assert s.compute().tolist() == [np.arange(4).mean(), np.arange(4).sum()]
394
395
396def test_stack_promote_type():
397    i = np.arange(10, dtype="i4")
398    f = np.arange(10, dtype="f4")
399    di = da.from_array(i, chunks=5)
400    df = da.from_array(f, chunks=5)
401    res = da.stack([di, df])
402    assert_eq(res, np.stack([i, f]))
403
404
405def test_stack_rechunk():
406    x = da.random.random(10, chunks=5)
407    y = da.random.random(10, chunks=4)
408
409    z = da.stack([x, y], axis=0)
410    assert z.shape == (2, 10)
411    assert z.chunks == ((1, 1), (4, 1, 3, 2))
412
413    assert_eq(z, np.stack([x.compute(), y.compute()], axis=0))
414
415
416def test_stack_unknown_chunksizes():
417    dd = pytest.importorskip("dask.dataframe")
418    pd = pytest.importorskip("pandas")
419
420    a_df = pd.DataFrame({"x": np.arange(12)})
421    b_df = pd.DataFrame({"y": np.arange(12) * 10})
422
423    a_ddf = dd.from_pandas(a_df, sort=False, npartitions=3)
424    b_ddf = dd.from_pandas(b_df, sort=False, npartitions=3)
425
426    a_x = a_ddf.values
427    b_x = b_ddf.values
428
429    assert np.isnan(a_x.shape[0])
430    assert np.isnan(b_x.shape[0])
431
432    with pytest.raises(ValueError) as exc_info:
433        da.stack([a_x, b_x], axis=0)
434
435    assert "shape" in str(exc_info.value)
436    assert "nan" in str(exc_info.value)
437
438    c_x = da.stack([a_x, b_x], axis=0, allow_unknown_chunksizes=True)
439
440    assert_eq(c_x, np.stack([a_df.values, b_df.values], axis=0))
441
442    with pytest.raises(ValueError) as exc_info:
443        da.stack([a_x, b_x], axis=1)
444
445    assert "shape" in str(exc_info.value)
446    assert "nan" in str(exc_info.value)
447
448    c_x = da.stack([a_x, b_x], axis=1, allow_unknown_chunksizes=True)
449
450    assert_eq(c_x, np.stack([a_df.values, b_df.values], axis=1))
451
452    m_df = pd.DataFrame({"m": np.arange(12) * 100})
453    n_df = pd.DataFrame({"n": np.arange(12) * 1000})
454
455    m_ddf = dd.from_pandas(m_df, sort=False, npartitions=3)
456    n_ddf = dd.from_pandas(n_df, sort=False, npartitions=3)
457
458    m_x = m_ddf.values
459    n_x = n_ddf.values
460
461    assert np.isnan(m_x.shape[0])
462    assert np.isnan(n_x.shape[0])
463
464    with pytest.raises(ValueError) as exc_info:
465        da.stack([[a_x, b_x], [m_x, n_x]])
466
467    assert "shape" in str(exc_info.value)
468    assert "nan" in str(exc_info.value)
469
470    c_x = da.stack([[a_x, b_x], [m_x, n_x]], allow_unknown_chunksizes=True)
471
472    assert_eq(c_x, np.stack([[a_df.values, b_df.values], [m_df.values, n_df.values]]))
473
474
475def test_concatenate():
476    a, b, c = (
477        Array(
478            getem(name, chunks=(2, 3), shape=(4, 6)),
479            name,
480            chunks=(2, 3),
481            dtype="f8",
482            shape=(4, 6),
483        )
484        for name in "ABC"
485    )
486
487    x = concatenate([a, b, c], axis=0)
488
489    assert x.shape == (12, 6)
490    assert x.chunks == ((2, 2, 2, 2, 2, 2), (3, 3))
491    assert x.dask[(x.name, 0, 1)] == ("A", 0, 1)
492    assert x.dask[(x.name, 5, 0)] == ("C", 1, 0)
493    assert same_keys(x, concatenate([a, b, c], axis=0))
494
495    y = concatenate([a, b, c], axis=1)
496
497    assert y.shape == (4, 18)
498    assert y.chunks == ((2, 2), (3, 3, 3, 3, 3, 3))
499    assert y.dask[(y.name, 1, 0)] == ("A", 1, 0)
500    assert y.dask[(y.name, 1, 5)] == ("C", 1, 1)
501    assert same_keys(y, concatenate([a, b, c], axis=1))
502
503    assert set(b.dask.keys()).issubset(y.dask.keys())
504
505    z = concatenate([a], axis=0)
506
507    assert z.shape == a.shape
508    assert z.chunks == a.chunks
509    assert z.dask == a.dask
510    assert z is a
511
512    assert (
513        concatenate([a, b, c], axis=-1).chunks == concatenate([a, b, c], axis=1).chunks
514    )
515
516    pytest.raises(ValueError, lambda: concatenate([]))
517    pytest.raises(ValueError, lambda: concatenate([a, b, c], axis=2))
518
519
520@pytest.mark.parametrize(
521    "dtypes", [((">f8", ">f8"), "float64"), (("<f4", "<f8"), "float64")]
522)
523def test_concatenate_types(dtypes):
524    dts_in, dt_out = dtypes
525    arrs = [np.zeros(4, dtype=dt) for dt in dts_in]
526    darrs = [from_array(arr, chunks=(2,)) for arr in arrs]
527
528    x = concatenate(darrs, axis=0)
529    assert x.dtype == dt_out
530
531
532def test_concatenate_unknown_axes():
533    dd = pytest.importorskip("dask.dataframe")
534    pd = pytest.importorskip("pandas")
535
536    a_df = pd.DataFrame({"x": np.arange(12)})
537    b_df = pd.DataFrame({"y": np.arange(12) * 10})
538
539    a_ddf = dd.from_pandas(a_df, sort=False, npartitions=3)
540    b_ddf = dd.from_pandas(b_df, sort=False, npartitions=3)
541
542    a_x = a_ddf.values
543    b_x = b_ddf.values
544
545    assert np.isnan(a_x.shape[0])
546    assert np.isnan(b_x.shape[0])
547
548    da.concatenate([a_x, b_x], axis=0)  # works fine
549
550    with pytest.raises(ValueError) as exc_info:
551        da.concatenate([a_x, b_x], axis=1)  # unknown chunks
552
553    assert "nan" in str(exc_info.value)
554    assert "allow_unknown_chunksize" in str(exc_info.value)
555
556    c_x = da.concatenate(
557        [a_x, b_x], axis=1, allow_unknown_chunksizes=True
558    )  # unknown chunks
559
560    assert_eq(c_x, np.concatenate([a_df.values, b_df.values], axis=1))
561
562
563def test_concatenate_rechunk():
564    x = da.random.random((6, 6), chunks=(3, 3))
565    y = da.random.random((6, 6), chunks=(2, 2))
566
567    z = da.concatenate([x, y], axis=0)
568    assert z.shape == (12, 6)
569    assert z.chunks == ((3, 3, 2, 2, 2), (2, 1, 1, 2))
570    assert_eq(z, np.concatenate([x.compute(), y.compute()], axis=0))
571
572    z = da.concatenate([x, y], axis=1)
573    assert z.shape == (6, 12)
574    assert z.chunks == ((2, 1, 1, 2), (3, 3, 2, 2, 2))
575    assert_eq(z, np.concatenate([x.compute(), y.compute()], axis=1))
576
577
578def test_concatenate_fixlen_strings():
579    x = np.array(["a", "b", "c"])
580    y = np.array(["aa", "bb", "cc"])
581
582    a = da.from_array(x, chunks=(2,))
583    b = da.from_array(y, chunks=(2,))
584
585    assert_eq(np.concatenate([x, y]), da.concatenate([a, b]))
586
587
588def test_concatenate_zero_size():
589
590    x = np.random.random(10)
591    y = da.from_array(x, chunks=3)
592    result_np = np.concatenate([x, x[:0]])
593    result_da = da.concatenate([y, y[:0]])
594    assert_eq(result_np, result_da)
595    assert result_da is y
596
597    # dtype of a size 0 arrays can affect the output dtype
598    result_np = np.concatenate([np.zeros(0, dtype=float), np.zeros(1, dtype=int)])
599    result_da = da.concatenate([da.zeros(0, dtype=float), da.zeros(1, dtype=int)])
600
601    assert_eq(result_np, result_da)
602
603    # All empty arrays case
604    result_np = np.concatenate([np.zeros(0), np.zeros(0)])
605    result_da = da.concatenate([da.zeros(0), da.zeros(0)])
606
607    assert_eq(result_np, result_da)
608
609
610def test_block_simple_row_wise():
611    a1 = np.ones((2, 2))
612    a2 = 2 * a1
613
614    d1 = da.asarray(a1)
615    d2 = da.asarray(a2)
616
617    expected = np.block([a1, a2])
618    result = da.block([d1, d2])
619
620    assert_eq(expected, result)
621
622    expected = np.block([a1, a2[:, :0]])
623    result = da.block([d1, d2[:, :0]])
624
625    assert result is d1
626    assert_eq(expected, result)
627
628
629def test_block_simple_column_wise():
630    a1 = np.ones((2, 2))
631    a2 = 2 * a1
632
633    d1 = da.asarray(a1)
634    d2 = da.asarray(a2)
635
636    expected = np.block([[a1], [a2]])
637    result = da.block([[d1], [d2]])
638
639    assert_eq(expected, result)
640
641
642def test_block_with_1d_arrays_row_wise():
643    # # # 1-D vectors are treated as row arrays
644    a1 = np.array([1, 2, 3])
645    a2 = np.array([2, 3, 4])
646
647    d1 = da.asarray(a1)
648    d2 = da.asarray(a2)
649
650    expected = np.block([a1, a2])
651    result = da.block([d1, d2])
652
653    assert_eq(expected, result)
654
655    expected = np.block([a1, a2[:0]])
656    result = da.block([d1, d2[:0]])
657
658    assert result is d1
659    assert_eq(expected, result)
660
661
662def test_block_with_1d_arrays_multiple_rows():
663    a1 = np.array([1, 2, 3])
664    a2 = np.array([2, 3, 4])
665
666    d1 = da.asarray(a1)
667    d2 = da.asarray(a2)
668
669    expected = np.block([[a1, a2], [a1, a2]])
670    result = da.block([[d1, d2], [d1, d2]])
671
672    assert_eq(expected, result)
673
674
675def test_block_with_1d_arrays_column_wise():
676    # # # 1-D vectors are treated as row arrays
677    a1 = np.array([1, 2, 3])
678    a2 = np.array([2, 3, 4])
679
680    d1 = da.asarray(a1)
681    d2 = da.asarray(a2)
682
683    expected = np.block([[a1], [a2]])
684    result = da.block([[d1], [d2]])
685
686    assert_eq(expected, result)
687
688
689def test_block_mixed_1d_and_2d():
690    a1 = np.ones((2, 2))
691    a2 = np.array([2, 2])
692
693    d1 = da.asarray(a1)
694    d2 = da.asarray(a2)
695
696    expected = np.block([[d1], [d2]])
697    result = da.block([[a1], [a2]])
698
699    assert_eq(expected, result)
700
701
702def test_block_complicated():
703    # a bit more complicated
704    a1 = np.array([[1, 1, 1]])
705    a2 = np.array([[2, 2, 2]])
706    a3 = np.array([[3, 3, 3, 3, 3, 3]])
707    a4 = np.array([4, 4, 4, 4, 4, 4])
708    a5 = np.array(5)
709    a6 = np.array([6, 6, 6, 6, 6])
710    a7 = np.zeros((2, 6))
711
712    d1 = da.asarray(a1)
713    d2 = da.asarray(a2)
714    d3 = da.asarray(a3)
715    d4 = da.asarray(a4)
716    d5 = da.asarray(a5)
717    d6 = da.asarray(a6)
718    d7 = da.asarray(a7)
719
720    expected = np.block([[a1, a2], [a3], [a4], [a5, a6], [a7]])
721    result = da.block([[d1, d2], [d3], [d4], [d5, d6], [d7]])
722
723    assert_eq(expected, result)
724
725
726def test_block_nested():
727    a1 = np.array([1, 1, 1])
728    a2 = np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]])
729    a3 = np.array([3, 3, 3])
730    a4 = np.array([4, 4, 4])
731    a5 = np.array(5)
732    a6 = np.array([6, 6, 6, 6, 6])
733    a7 = np.zeros((2, 6))
734
735    d1 = da.asarray(a1)
736    d2 = da.asarray(a2)
737    d3 = da.asarray(a3)
738    d4 = da.asarray(a4)
739    d5 = da.asarray(a5)
740    d6 = da.asarray(a6)
741    d7 = da.asarray(a7)
742
743    expected = np.block([[np.block([[a1], [a3], [a4]]), a2], [a5, a6], [a7]])
744    result = da.block([[da.block([[d1], [d3], [d4]]), d2], [d5, d6], [d7]])
745
746    assert_eq(expected, result)
747
748
749def test_block_3d():
750    a000 = np.ones((2, 2, 2), int) * 1
751
752    a100 = np.ones((3, 2, 2), int) * 2
753    a010 = np.ones((2, 3, 2), int) * 3
754    a001 = np.ones((2, 2, 3), int) * 4
755
756    a011 = np.ones((2, 3, 3), int) * 5
757    a101 = np.ones((3, 2, 3), int) * 6
758    a110 = np.ones((3, 3, 2), int) * 7
759
760    a111 = np.ones((3, 3, 3), int) * 8
761
762    d000 = da.asarray(a000)
763
764    d100 = da.asarray(a100)
765    d010 = da.asarray(a010)
766    d001 = da.asarray(a001)
767
768    d011 = da.asarray(a011)
769    d101 = da.asarray(a101)
770    d110 = da.asarray(a110)
771
772    d111 = da.asarray(a111)
773
774    expected = np.block([[[a000, a001], [a010, a011]], [[a100, a101], [a110, a111]]])
775    result = da.block([[[d000, d001], [d010, d011]], [[d100, d101], [d110, d111]]])
776
777    assert_eq(expected, result)
778
779    expected = np.block(
780        [
781            [[a000, a001[:, :, :0]], [a010[:, :0, :], a011[:, :0, :0]]],
782            [[a100[:0, :, :], a101[:0, :, :0]], [a110[:0, :0, :], a111[:0, :0, :0]]],
783        ]
784    )
785    result = da.block(
786        [
787            [[d000, d001[:, :, :0]], [d010[:, :0, :], d011[:, :0, :0]]],
788            [[d100[:0, :, :], d101[:0, :, :0]], [d110[:0, :0, :], d111[:0, :0, :0]]],
789        ]
790    )
791
792    assert result is d000
793    assert_eq(expected, result)
794
795
796def test_block_with_mismatched_shape():
797    a = np.array([0, 0])
798    b = np.eye(2)
799
800    for arrays in [[a, b], [b, a]]:
801        with pytest.raises(ValueError):
802            da.block(arrays)
803
804
805def test_block_no_lists():
806    assert_eq(da.block(1), np.block(1))
807    assert_eq(da.block(np.eye(3)), np.block(np.eye(3)))
808
809
810def test_block_invalid_nesting():
811    for arrays in [
812        [1, [2]],
813        [1, []],
814        [[1], 2],
815        [[], 2],
816        [[[1], [2]], [[3, 4]], [5]],  # missing brackets
817    ]:
818        with pytest.raises(ValueError) as e:
819            da.block(arrays)
820        e.match(r"depths are mismatched")
821
822
823def test_block_empty_lists():
824    for arrays in [[], [[]], [[1], []]]:
825        with pytest.raises(ValueError) as e:
826            da.block(arrays)
827        e.match(r"empty")
828
829
830def test_block_tuple():
831    for arrays in [([1, 2], [3, 4]), [(1, 2), (3, 4)]]:
832        with pytest.raises(TypeError) as e:
833            da.block(arrays)
834        e.match(r"tuple")
835
836
837def test_broadcast_shapes():
838    with warnings.catch_warnings(record=True) as record:
839        assert () == broadcast_shapes()
840        assert (2, 5) == broadcast_shapes((2, 5))
841        assert (0, 5) == broadcast_shapes((0, 1), (1, 5))
842        assert np.allclose(
843            (2, np.nan), broadcast_shapes((1, np.nan), (2, 1)), equal_nan=True
844        )
845        assert np.allclose(
846            (2, np.nan), broadcast_shapes((2, 1), (1, np.nan)), equal_nan=True
847        )
848        assert (3, 4, 5) == broadcast_shapes((3, 4, 5), (4, 1), ())
849        assert (3, 4) == broadcast_shapes((3, 1), (1, 4), (4,))
850        assert (5, 6, 7, 3, 4) == broadcast_shapes((3, 1), (), (5, 6, 7, 1, 4))
851
852    assert not record
853
854    pytest.raises(ValueError, lambda: broadcast_shapes((3,), (3, 4)))
855    pytest.raises(ValueError, lambda: broadcast_shapes((2, 3), (2, 3, 1)))
856    pytest.raises(ValueError, lambda: broadcast_shapes((2, 3), (1, np.nan)))
857
858
859def test_elemwise_on_scalars():
860    x = np.arange(10, dtype=np.int64)
861    a = from_array(x, chunks=(5,))
862    assert len(a.__dask_keys__()) == 2
863    assert_eq(a.sum() ** 2, x.sum() ** 2)
864
865    y = np.arange(10, dtype=np.int32)
866    b = from_array(y, chunks=(5,))
867    result = a.sum() * b
868    # Dask 0-d arrays do not behave like numpy scalars for type promotion
869    assert result.dtype == np.int64
870    assert result.compute().dtype == np.int64
871    assert (x.sum() * y).dtype == np.int32
872    assert_eq((x.sum() * y).astype(np.int64), result)
873
874
875def test_elemwise_with_ndarrays():
876    x = np.arange(3)
877    y = np.arange(12).reshape(4, 3)
878    a = from_array(x, chunks=(3,))
879    b = from_array(y, chunks=(2, 3))
880
881    assert_eq(x + a, 2 * x)
882    assert_eq(a + x, 2 * x)
883
884    assert_eq(x + b, x + y)
885    assert_eq(b + x, x + y)
886    assert_eq(a + y, x + y)
887    assert_eq(y + a, x + y)
888    # Error on shape mismatch
889    pytest.raises(ValueError, lambda: a + y.T)
890    pytest.raises(ValueError, lambda: a + np.arange(2))
891
892
893def test_elemwise_differently_chunked():
894    x = np.arange(3)
895    y = np.arange(12).reshape(4, 3)
896    a = from_array(x, chunks=(3,))
897    b = from_array(y, chunks=(2, 2))
898
899    assert_eq(a + b, x + y)
900    assert_eq(b + a, x + y)
901
902
903def test_elemwise_dtype():
904    values = [
905        da.from_array(np.ones(5, np.float32), chunks=3),
906        da.from_array(np.ones(5, np.int16), chunks=3),
907        da.from_array(np.ones(5, np.int64), chunks=3),
908        da.from_array(np.ones((), np.float64), chunks=()) * 1e200,
909        np.ones(5, np.float32),
910        1,
911        1.0,
912        1e200,
913        np.int64(1),
914        np.ones((), np.int64),
915    ]
916    for x in values:
917        for y in values:
918            assert da.maximum(x, y).dtype == da.result_type(x, y)
919
920
921def test_operators():
922    x = np.arange(10)
923    y = np.arange(10).reshape((10, 1))
924    a = from_array(x, chunks=(5,))
925    b = from_array(y, chunks=(5, 1))
926
927    c = a + 1
928    assert_eq(c, x + 1)
929
930    c = a + b
931    assert_eq(c, x + x.reshape((10, 1)))
932
933    expr = (3 / a * b) ** 2 > 5
934    with pytest.warns(None):  # ZeroDivisionWarning
935        assert_eq(expr, (3 / x * y) ** 2 > 5)
936
937    with pytest.warns(None):  # OverflowWarning
938        c = da.exp(a)
939    assert_eq(c, np.exp(x))
940
941    assert_eq(abs(-a), a)
942    assert_eq(a, +x)
943
944
945def test_operator_dtype_promotion():
946    x = np.arange(10, dtype=np.float32)
947    y = np.array([1])
948    a = from_array(x, chunks=(5,))
949
950    assert_eq(x + 1, a + 1)  # still float32
951    assert_eq(x + 1e50, a + 1e50)  # now float64
952    assert_eq(x + y, a + y)  # also float64
953
954
955def test_field_access():
956    x = np.array([(1, 1.0), (2, 2.0)], dtype=[("a", "i4"), ("b", "f4")])
957    y = from_array(x, chunks=(1,))
958    assert_eq(y["a"], x["a"])
959    assert_eq(y[["b", "a"]], x[["b", "a"]])
960    assert same_keys(y[["b", "a"]], y[["b", "a"]])
961
962
963def test_field_access_with_shape():
964    dtype = [("col1", ("f4", (3, 2))), ("col2", ("f4", 3))]
965    data = np.ones((100, 50), dtype=dtype)
966    x = da.from_array(data, 10)
967    assert_eq(x["col1"], data["col1"])
968    assert_eq(x[["col1"]], data[["col1"]])
969    assert_eq(x["col2"], data["col2"])
970    assert_eq(x[["col1", "col2"]], data[["col1", "col2"]])
971
972
973def test_matmul():
974    x = np.random.random((5, 5))
975    y = np.random.random((5, 2))
976    a = from_array(x, chunks=(1, 5))
977    b = from_array(y, chunks=(5, 1))
978    assert_eq(operator.matmul(a, b), a.dot(b))
979    assert_eq(operator.matmul(a, b), operator.matmul(x, y))
980    assert_eq(operator.matmul(a, y), operator.matmul(x, b))
981    list_vec = list(range(1, 6))
982    assert_eq(operator.matmul(list_vec, b), operator.matmul(list_vec, y))
983    assert_eq(operator.matmul(x, list_vec), operator.matmul(a, list_vec))
984    z = np.random.random((5, 5, 5))
985    c = from_array(z, chunks=(1, 5, 1))
986    assert_eq(operator.matmul(a, z), operator.matmul(x, c))
987    assert_eq(operator.matmul(z, a), operator.matmul(c, x))
988
989
990def test_matmul_array_ufunc():
991    # regression test for https://github.com/dask/dask/issues/4353
992    x = np.random.random((5, 5))
993    y = np.random.random((5, 2))
994    a = from_array(x, chunks=(1, 5))
995    b = from_array(y, chunks=(5, 1))
996    result = b.__array_ufunc__(np.matmul, "__call__", a, b)
997    assert_eq(result, x.dot(y))
998
999
1000def test_T():
1001    x = np.arange(400).reshape((20, 20))
1002    a = from_array(x, chunks=(5, 5))
1003
1004    assert_eq(x.T, a.T)
1005
1006
1007def test_broadcast_to():
1008    x = np.random.randint(10, size=(5, 1, 6))
1009    a = from_array(x, chunks=(3, 1, 3))
1010
1011    for shape in [a.shape, (5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]:
1012        xb = np.broadcast_to(x, shape)
1013        ab = broadcast_to(a, shape)
1014
1015        assert_eq(xb, ab)
1016
1017        if a.shape == ab.shape:
1018            assert a is ab
1019
1020    pytest.raises(ValueError, lambda: broadcast_to(a, (2, 1, 6)))
1021    pytest.raises(ValueError, lambda: broadcast_to(a, (3,)))
1022
1023
1024def test_broadcast_to_array():
1025    x = np.random.randint(10, size=(5, 1, 6))
1026
1027    for shape in [(5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]:
1028        a = np.broadcast_to(x, shape)
1029        d = broadcast_to(x, shape)
1030
1031        assert_eq(a, d)
1032
1033
1034def test_broadcast_to_scalar():
1035    x = 5
1036
1037    for shape in [tuple(), (0,), (2, 3), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]:
1038        a = np.broadcast_to(x, shape)
1039        d = broadcast_to(x, shape)
1040
1041        assert_eq(a, d)
1042
1043
1044def test_broadcast_to_chunks():
1045    x = np.random.randint(10, size=(5, 1, 6))
1046    a = from_array(x, chunks=(3, 1, 3))
1047
1048    for shape, chunks, expected_chunks in [
1049        ((5, 3, 6), (3, -1, 3), ((3, 2), (3,), (3, 3))),
1050        ((5, 3, 6), (3, 1, 3), ((3, 2), (1, 1, 1), (3, 3))),
1051        ((2, 5, 3, 6), (1, 3, 1, 3), ((1, 1), (3, 2), (1, 1, 1), (3, 3))),
1052    ]:
1053        xb = np.broadcast_to(x, shape)
1054        ab = broadcast_to(a, shape, chunks=chunks)
1055        assert_eq(xb, ab)
1056        assert ab.chunks == expected_chunks
1057
1058    with pytest.raises(ValueError):
1059        broadcast_to(a, a.shape, chunks=((2, 3), (1,), (3, 3)))
1060    with pytest.raises(ValueError):
1061        broadcast_to(a, a.shape, chunks=((3, 2), (3,), (3, 3)))
1062    with pytest.raises(ValueError):
1063        broadcast_to(a, (5, 2, 6), chunks=((3, 2), (3,), (3, 3)))
1064
1065
1066def test_broadcast_arrays():
1067    assert np.broadcast_arrays() == da.broadcast_arrays()
1068
1069    a = np.arange(4)
1070    d_a = da.from_array(a, chunks=tuple(s // 2 for s in a.shape))
1071
1072    a_0 = np.arange(4)[None, :]
1073    a_1 = np.arange(4)[:, None]
1074
1075    d_a_0 = d_a[None, :]
1076    d_a_1 = d_a[:, None]
1077
1078    a_r = np.broadcast_arrays(a_0, a_1)
1079    d_r = da.broadcast_arrays(d_a_0, d_a_1)
1080
1081    assert isinstance(d_r, list)
1082    assert len(a_r) == len(d_r)
1083
1084    for e_a_r, e_d_r in zip(a_r, d_r):
1085        assert_eq(e_a_r, e_d_r)
1086
1087
1088def test_broadcast_arrays_uneven_chunks():
1089    x = da.ones(30, chunks=(3,))
1090    y = da.ones(30, chunks=(5,))
1091    z = np.broadcast_arrays(x, y)
1092
1093    assert_eq(z, z)
1094
1095    x = da.ones((1, 30), chunks=(1, 3))
1096    y = da.ones(30, chunks=(5,))
1097    z = np.broadcast_arrays(x, y)
1098
1099    assert_eq(z, z)
1100
1101
1102@pytest.mark.parametrize(
1103    "u_shape, v_shape",
1104    [
1105        [tuple(), (2, 3)],
1106        [(1,), (2, 3)],
1107        [(1, 1), (2, 3)],
1108        [(0, 3), (1, 3)],
1109        [(2, 0), (2, 1)],
1110        [(1, 0), (2, 1)],
1111        [(0, 1), (1, 3)],
1112    ],
1113)
1114def test_broadcast_operator(u_shape, v_shape):
1115    u = np.random.random(u_shape)
1116    v = np.random.random(v_shape)
1117
1118    d_u = from_array(u, chunks=1)
1119    d_v = from_array(v, chunks=1)
1120
1121    w = u * v
1122    d_w = d_u * d_v
1123
1124    assert_eq(w, d_w)
1125
1126
1127@pytest.mark.parametrize(
1128    "original_shape,new_shape,chunks",
1129    [
1130        ((10,), (10,), (3, 3, 4)),
1131        ((10,), (10, 1, 1), 5),
1132        ((10,), (1, 10), 5),
1133        ((24,), (2, 3, 4), 12),
1134        ((1, 24), (2, 3, 4), 12),
1135        ((2, 3, 4), (24,), (1, 3, 4)),
1136        ((2, 3, 4), (24,), 4),
1137        ((2, 3, 4), (24, 1), 4),
1138        ((2, 3, 4), (1, 24), 4),
1139        ((4, 4, 1), (4, 4), 2),
1140        ((4, 4), (4, 4, 1), 2),
1141        ((1, 4, 4), (4, 4), 2),
1142        ((1, 4, 4), (4, 4, 1), 2),
1143        ((1, 4, 4), (1, 1, 4, 4), 2),
1144        ((4, 4), (1, 4, 4, 1), 2),
1145        ((4, 4), (1, 4, 4), 2),
1146        ((2, 3), (2, 3), (1, 2)),
1147        ((2, 3), (3, 2), 3),
1148        ((4, 2, 3), (4, 6), 4),
1149        ((3, 4, 5, 6), (3, 4, 5, 6), (2, 3, 4, 5)),
1150        ((), (1,), 1),
1151        ((1,), (), 1),
1152        ((24,), (3, 8), 24),
1153        ((24,), (4, 6), 6),
1154        ((24,), (4, 3, 2), 6),
1155        ((24,), (4, 6, 1), 6),
1156        ((24,), (4, 6), (6, 12, 6)),
1157        ((64, 4), (8, 8, 4), (16, 2)),
1158        ((4, 64), (4, 8, 4, 2), (2, 16)),
1159        ((4, 8, 4, 2), (2, 1, 2, 32, 2), (2, 4, 2, 2)),
1160        ((4, 1, 4), (4, 4), (2, 1, 2)),
1161        ((0, 10), (0, 5, 2), (5, 5)),
1162        ((5, 0, 2), (0, 10), (5, 2, 2)),
1163        ((0,), (2, 0, 2), (4,)),
1164        ((2, 0, 2), (0,), (4, 4, 4)),
1165    ],
1166)
1167def test_reshape(original_shape, new_shape, chunks):
1168    x = np.random.randint(10, size=original_shape)
1169    a = from_array(x, chunks=chunks)
1170
1171    xr = x.reshape(new_shape)
1172    ar = a.reshape(new_shape)
1173
1174    if a.shape == new_shape:
1175        assert a is ar
1176
1177    assert_eq(xr, ar)
1178
1179
1180def test_reshape_exceptions():
1181    x = np.random.randint(10, size=(5,))
1182    a = from_array(x, chunks=(2,))
1183    with pytest.raises(ValueError):
1184        da.reshape(a, (100,))
1185
1186
1187def test_reshape_splat():
1188    x = da.ones((5, 5), chunks=(2, 2))
1189    assert_eq(x.reshape((25,)), x.reshape(25))
1190
1191
1192def test_reshape_fails_for_dask_only():
1193    cases = [((3, 4), (4, 3), 2)]
1194    for original_shape, new_shape, chunks in cases:
1195        x = np.random.randint(10, size=original_shape)
1196        a = from_array(x, chunks=chunks)
1197        assert x.reshape(new_shape).shape == new_shape
1198        with pytest.raises(ValueError):
1199            da.reshape(a, new_shape)
1200
1201
1202def test_reshape_unknown_dimensions():
1203    for original_shape in [(24,), (2, 12), (2, 3, 4)]:
1204        for new_shape in [(-1,), (2, -1), (-1, 3, 4)]:
1205            x = np.random.randint(10, size=original_shape)
1206            a = from_array(x, 24)
1207            assert_eq(x.reshape(new_shape), a.reshape(new_shape))
1208
1209    pytest.raises(ValueError, lambda: da.reshape(a, (-1, -1)))
1210
1211
1212def test_full():
1213    d = da.full((3, 4), 2, chunks=((2, 1), (2, 2)))
1214    assert d.chunks == ((2, 1), (2, 2))
1215    assert_eq(d, np.full((3, 4), 2))
1216
1217
1218def test_map_blocks():
1219    x = np.arange(400).reshape((20, 20))
1220    d = from_array(x, chunks=(7, 7))
1221
1222    e = d.map_blocks(inc, dtype=d.dtype)
1223
1224    assert d.chunks == e.chunks
1225    assert_eq(e, x + 1)
1226
1227    e = d.map_blocks(inc, name="increment")
1228    assert e.name.startswith("increment-")
1229
1230    assert d.map_blocks(inc, name="foo").name != d.map_blocks(dec, name="foo").name
1231
1232    d = from_array(x, chunks=(10, 10))
1233    e = d.map_blocks(lambda x: x[::2, ::2], chunks=(5, 5), dtype=d.dtype)
1234
1235    assert e.chunks == ((5, 5), (5, 5))
1236    assert_eq(e, x[::2, ::2])
1237
1238    d = from_array(x, chunks=(8, 8))
1239    e = d.map_blocks(
1240        lambda x: x[::2, ::2], chunks=((4, 4, 2), (4, 4, 2)), dtype=d.dtype
1241    )
1242
1243    assert_eq(e, x[::2, ::2])
1244
1245
1246def test_map_blocks2():
1247    x = np.arange(10, dtype="i8")
1248    d = from_array(x, chunks=(2,))
1249
1250    def func(block, block_id=None, c=0):
1251        return np.ones_like(block) * sum(block_id) + c
1252
1253    out = d.map_blocks(func, dtype="i8")
1254    expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype="i8")
1255
1256    assert_eq(out, expected)
1257    assert same_keys(d.map_blocks(func, dtype="i8"), out)
1258
1259    out = d.map_blocks(func, dtype="i8", c=1)
1260    expected = expected + 1
1261
1262    assert_eq(out, expected)
1263    assert same_keys(d.map_blocks(func, dtype="i8", c=1), out)
1264
1265
1266def test_map_blocks_block_info():
1267    x = da.arange(50, chunks=10)
1268
1269    def func(a, b, c, block_info=None):
1270        for idx in [0, 2, None]:  # positions in args
1271            assert block_info[idx]["shape"] == (50,)
1272            assert block_info[idx]["num-chunks"] == (5,)
1273            start, stop = block_info[idx]["array-location"][0]
1274            assert stop - start == 10
1275            assert 0 <= start <= 40
1276            assert 10 <= stop <= 50
1277
1278            assert 0 <= block_info[idx]["chunk-location"][0] <= 4
1279        assert block_info[None]["chunk-shape"] == (10,)
1280        assert block_info[None]["dtype"] == x.dtype
1281
1282        return a + b + c
1283
1284    z = da.map_blocks(func, x, 100, x + 1, dtype=x.dtype)
1285    assert_eq(z, x + x + 1 + 100)
1286
1287
1288def test_map_blocks_block_info_with_new_axis():
1289    # https://github.com/dask/dask/issues/4298
1290    values = da.from_array(np.array(["a", "a", "b", "c"]), 2)
1291
1292    def func(x, block_info=None):
1293        assert block_info.keys() == {0, None}
1294        assert block_info[0]["shape"] == (4,)
1295        assert block_info[0]["num-chunks"] == (2,)
1296        assert block_info[None]["shape"] == (4, 3)
1297        assert block_info[None]["num-chunks"] == (2, 1)
1298        assert block_info[None]["chunk-shape"] == (2, 3)
1299        assert block_info[None]["dtype"] == np.dtype("f8")
1300
1301        assert block_info[0]["chunk-location"] in {(0,), (1,)}
1302
1303        if block_info[0]["chunk-location"] == (0,):
1304            assert block_info[0]["array-location"] == [(0, 2)]
1305            assert block_info[None]["chunk-location"] == (0, 0)
1306            assert block_info[None]["array-location"] == [(0, 2), (0, 3)]
1307        elif block_info[0]["chunk-location"] == (1,):
1308            assert block_info[0]["array-location"] == [(2, 4)]
1309            assert block_info[None]["chunk-location"] == (1, 0)
1310            assert block_info[None]["array-location"] == [(2, 4), (0, 3)]
1311
1312        return np.ones((len(x), 3))
1313
1314    z = values.map_blocks(func, chunks=((2, 2), 3), new_axis=1, dtype="f8")
1315    assert_eq(z, np.ones((4, 3), dtype="f8"))
1316
1317
1318def test_map_blocks_block_info_with_drop_axis():
1319    # https://github.com/dask/dask/issues/4584
1320    values = da.from_array(
1321        np.array(
1322            [[1, 2, 4], [8, 16, 32], [64, 128, 256], [1024, 2048, 4096]], dtype="u4"
1323        ),
1324        (2, 1),
1325    )
1326
1327    def func(x, block_info=None):
1328        assert block_info.keys() == {0, None}
1329        assert block_info[0]["shape"] == (4, 3)
1330        # drop_axis concatenates along the dropped dimension, hence not (2, 3)
1331        assert block_info[0]["num-chunks"] == (2, 1)
1332        assert block_info[None]["shape"] == (4,)
1333        assert block_info[None]["num-chunks"] == (2,)
1334        assert block_info[None]["chunk-shape"] == (2,)
1335        assert block_info[None]["dtype"] == np.dtype("u4")
1336
1337        assert block_info[0]["chunk-location"] in {(0, 0), (1, 0)}
1338
1339        if block_info[0]["chunk-location"] == (0, 0):
1340            assert block_info[0]["array-location"] == [(0, 2), (0, 3)]
1341            assert block_info[None]["chunk-location"] == (0,)
1342            assert block_info[None]["array-location"] == [(0, 2)]
1343        elif block_info[0]["chunk-location"] == (1, 0):
1344            assert block_info[0]["array-location"] == [(2, 4), (0, 3)]
1345            assert block_info[None]["chunk-location"] == (1,)
1346            assert block_info[None]["array-location"] == [(2, 4)]
1347
1348        return np.sum(x, axis=1, dtype="u4")
1349
1350    z = values.map_blocks(func, drop_axis=1, dtype="u4")
1351    assert_eq(z, np.array([7, 56, 448, 7168], dtype="u4"))
1352
1353
1354def test_map_blocks_block_info_with_broadcast():
1355    expected0 = [
1356        {
1357            "shape": (3, 4),
1358            "num-chunks": (1, 2),
1359            "array-location": [(0, 3), (0, 2)],
1360            "chunk-location": (0, 0),
1361        },
1362        {
1363            "shape": (3, 4),
1364            "num-chunks": (1, 2),
1365            "array-location": [(0, 3), (2, 4)],
1366            "chunk-location": (0, 1),
1367        },
1368    ]
1369    expected1 = [
1370        {
1371            "shape": (6, 2),
1372            "num-chunks": (2, 1),
1373            "array-location": [(0, 3), (0, 2)],
1374            "chunk-location": (0, 0),
1375        },
1376        {
1377            "shape": (6, 2),
1378            "num-chunks": (2, 1),
1379            "array-location": [(3, 6), (0, 2)],
1380            "chunk-location": (1, 0),
1381        },
1382    ]
1383    expected2 = [
1384        {
1385            "shape": (4,),
1386            "num-chunks": (2,),
1387            "array-location": [(0, 2)],
1388            "chunk-location": (0,),
1389        },
1390        {
1391            "shape": (4,),
1392            "num-chunks": (2,),
1393            "array-location": [(2, 4)],
1394            "chunk-location": (1,),
1395        },
1396    ]
1397    expected = [
1398        {
1399            0: expected0[0],
1400            1: expected1[0],
1401            2: expected2[0],
1402            None: {
1403                "shape": (6, 4),
1404                "num-chunks": (2, 2),
1405                "dtype": np.float_,
1406                "chunk-shape": (3, 2),
1407                "array-location": [(0, 3), (0, 2)],
1408                "chunk-location": (0, 0),
1409            },
1410        },
1411        {
1412            0: expected0[1],
1413            1: expected1[0],
1414            2: expected2[1],
1415            None: {
1416                "shape": (6, 4),
1417                "num-chunks": (2, 2),
1418                "dtype": np.float_,
1419                "chunk-shape": (3, 2),
1420                "array-location": [(0, 3), (2, 4)],
1421                "chunk-location": (0, 1),
1422            },
1423        },
1424        {
1425            0: expected0[0],
1426            1: expected1[1],
1427            2: expected2[0],
1428            None: {
1429                "shape": (6, 4),
1430                "num-chunks": (2, 2),
1431                "dtype": np.float_,
1432                "chunk-shape": (3, 2),
1433                "array-location": [(3, 6), (0, 2)],
1434                "chunk-location": (1, 0),
1435            },
1436        },
1437        {
1438            0: expected0[1],
1439            1: expected1[1],
1440            2: expected2[1],
1441            None: {
1442                "shape": (6, 4),
1443                "num-chunks": (2, 2),
1444                "dtype": np.float_,
1445                "chunk-shape": (3, 2),
1446                "array-location": [(3, 6), (2, 4)],
1447                "chunk-location": (1, 1),
1448            },
1449        },
1450    ]
1451
1452    def func(x, y, z, block_info=None):
1453        for info in expected:
1454            if block_info[None]["chunk-location"] == info[None]["chunk-location"]:
1455                assert block_info == info
1456                break
1457        else:
1458            assert False
1459        return x + y + z
1460
1461    a = da.ones((3, 4), chunks=(3, 2))
1462    b = da.ones((6, 2), chunks=(3, 2))
1463    c = da.ones((4,), chunks=(2,))
1464    d = da.map_blocks(func, a, b, c, chunks=((3, 3), (2, 2)), dtype=a.dtype)
1465    assert d.chunks == ((3, 3), (2, 2))
1466    assert_eq(d, 3 * np.ones((6, 4)))
1467
1468
1469def test_map_blocks_with_constants():
1470    d = da.arange(10, chunks=3)
1471    e = d.map_blocks(add, 100, dtype=d.dtype)
1472
1473    assert_eq(e, np.arange(10) + 100)
1474
1475    assert_eq(da.map_blocks(sub, d, 10, dtype=d.dtype), np.arange(10) - 10)
1476    assert_eq(da.map_blocks(sub, 10, d, dtype=d.dtype), 10 - np.arange(10))
1477
1478
1479def test_map_blocks_with_kwargs():
1480    d = da.arange(10, chunks=5)
1481
1482    result = d.map_blocks(np.max, axis=0, keepdims=True, dtype=d.dtype, chunks=(1,))
1483
1484    assert_eq(result, np.array([4, 9]))
1485
1486
1487def test_map_blocks_infer_chunks_broadcast():
1488    dx = da.from_array([[1, 2, 3, 4]], chunks=((1,), (2, 2)))
1489    dy = da.from_array([[10, 20], [30, 40]], chunks=((1, 1), (2,)))
1490    result = da.map_blocks(lambda x, y: x + y, dx, dy)
1491    assert result.chunks == ((1, 1), (2, 2))
1492    assert_eq(result, np.array([[11, 22, 13, 24], [31, 42, 33, 44]]))
1493
1494
1495def test_map_blocks_with_chunks():
1496    dx = da.ones((5, 3), chunks=(2, 2))
1497    dy = da.ones((5, 3), chunks=(2, 2))
1498    dz = da.map_blocks(np.add, dx, dy, chunks=dx.chunks)
1499    assert_eq(dz, np.ones((5, 3)) * 2)
1500
1501
1502def test_map_blocks_dtype_inference():
1503    x = np.arange(50).reshape((5, 10))
1504    y = np.arange(10)
1505    dx = da.from_array(x, chunks=5)
1506    dy = da.from_array(y, chunks=5)
1507
1508    def foo(x, *args, **kwargs):
1509        cast = kwargs.pop("cast", "i8")
1510        return (x + sum(args)).astype(cast)
1511
1512    assert_eq(dx.map_blocks(foo, dy, 1), foo(dx, dy, 1))
1513    assert_eq(dx.map_blocks(foo, dy, 1, cast="f8"), foo(dx, dy, 1, cast="f8"))
1514    assert_eq(
1515        dx.map_blocks(foo, dy, 1, cast="f8", dtype="f8"),
1516        foo(dx, dy, 1, cast="f8", dtype="f8"),
1517    )
1518
1519    def foo(x):
1520        raise RuntimeError("Woops")
1521
1522    with pytest.raises(ValueError) as e:
1523        dx.map_blocks(foo)
1524    msg = str(e.value)
1525    assert "dtype" in msg
1526
1527
1528def test_map_blocks_infer_newaxis():
1529    x = da.ones((5, 3), chunks=(2, 2))
1530    y = da.map_blocks(lambda x: x[None], x, chunks=((1,), (2, 2, 1), (2, 1)))
1531    assert_eq(y, da.ones((1, 5, 3)))
1532
1533
1534def test_map_blocks_no_array_args():
1535    def func(dtype, block_info=None):
1536        loc = block_info[None]["array-location"]
1537        return np.arange(loc[0][0], loc[0][1], dtype=dtype)
1538
1539    x = da.map_blocks(func, np.float32, chunks=((5, 3),), dtype=np.float32)
1540    assert x.chunks == ((5, 3),)
1541    assert_eq(x, np.arange(8, dtype=np.float32))
1542
1543
1544@pytest.mark.parametrize("func", [lambda x, y: x + y, lambda x, y, block_info: x + y])
1545def test_map_blocks_optimize_blockwise(func):
1546    # Check that map_blocks layers can merge with elementwise layers
1547    base = [da.full((1,), i, chunks=1) for i in range(4)]
1548    a = base[0] + base[1]
1549    b = da.map_blocks(func, a, base[2], dtype=np.int8)
1550    c = b + base[3]
1551    dsk = c.__dask_graph__()
1552    optimized = optimize_blockwise(dsk)
1553
1554    # Everything should be fused into a single layer.
1555    # If the lambda includes block_info, there will be two layers.
1556    assert len(optimized.layers) == len(dsk.layers) - 6
1557
1558
1559def test_repr():
1560    d = da.ones((4, 4), chunks=(2, 2))
1561    assert key_split(d.name) in repr(d)
1562    assert str(d.shape) in repr(d)
1563    assert str(d.dtype) in repr(d)
1564    d = da.ones((4000, 4), chunks=(4, 2))
1565    assert len(str(d)) < 1000
1566
1567
1568def test_repr_meta():
1569    d = da.ones((4, 4), chunks=(2, 2))
1570    assert "chunktype=numpy.ndarray" in repr(d)
1571
1572    # Test non-numpy meta
1573    sparse = pytest.importorskip("sparse")
1574    s = d.map_blocks(sparse.COO)
1575    assert "chunktype=sparse.COO" in repr(s)
1576
1577
1578def test_repr_html_array_highlevelgraph():
1579    pytest.importorskip("jinja2")
1580    x = da.ones((9, 9), chunks=(3, 3)).T[0:4, 0:4]
1581    hg = x.dask
1582    assert xml.etree.ElementTree.fromstring(hg._repr_html_()) is not None
1583    for layer in hg.layers.values():
1584        assert xml.etree.ElementTree.fromstring(layer._repr_html_()) is not None
1585
1586
1587def test_slicing_with_ellipsis():
1588    x = np.arange(256).reshape((4, 4, 4, 4))
1589    d = da.from_array(x, chunks=((2, 2, 2, 2)))
1590
1591    assert_eq(d[..., 1], x[..., 1])
1592    assert_eq(d[0, ..., 1], x[0, ..., 1])
1593
1594
1595def test_slicing_with_ndarray():
1596    x = np.arange(64).reshape((8, 8))
1597    d = da.from_array(x, chunks=((4, 4)))
1598
1599    assert_eq(d[np.arange(8)], x)
1600    assert_eq(d[np.ones(8, dtype=bool)], x)
1601    assert_eq(d[np.array([1])], x[[1]])
1602    assert_eq(d[np.array([True, False, True] + [False] * 5)], x[[0, 2]])
1603
1604
1605def test_slicing_flexible_type():
1606    a = np.array([["a", "b"], ["c", "d"]])
1607    b = da.from_array(a, 2)
1608
1609    assert_eq(a[:, 0], b[:, 0])
1610
1611
1612def test_slicing_with_object_dtype():
1613    # https://github.com/dask/dask/issues/6892
1614    d = da.from_array(np.array(["a", "b"], dtype=object), chunks=(1,))
1615    assert d.dtype == d[(0,)].dtype
1616
1617
1618def test_dtype():
1619    d = da.ones((4, 4), chunks=(2, 2))
1620
1621    assert d.dtype == d.compute().dtype
1622    assert (d * 1.0).dtype == (d + 1.0).compute().dtype
1623    assert d.sum().dtype == d.sum().compute().dtype  # no shape
1624
1625
1626def test_blockdims_from_blockshape():
1627    assert blockdims_from_blockshape((10, 10), (4, 3)) == ((4, 4, 2), (3, 3, 3, 1))
1628    pytest.raises(TypeError, lambda: blockdims_from_blockshape((10,), None))
1629    assert blockdims_from_blockshape((1e2, 3), [1e1, 3]) == ((10,) * 10, (3,))
1630    assert blockdims_from_blockshape((np.int8(10),), (5,)) == ((5, 5),)
1631
1632
1633def test_coerce():
1634    d0 = da.from_array(np.array(1), chunks=(1,))
1635    d1 = da.from_array(np.array([1]), chunks=(1,))
1636    with dask.config.set(scheduler="sync"):
1637        for d in d0, d1:
1638            assert bool(d) is True
1639            assert int(d) == 1
1640            assert float(d) == 1.0
1641            assert complex(d) == complex(1)
1642
1643    a2 = np.arange(2)
1644    d2 = da.from_array(a2, chunks=(2,))
1645    for func in (int, float, complex):
1646        pytest.raises(TypeError, lambda: func(d2))
1647
1648
1649def test_bool():
1650    arr = np.arange(100).reshape((10, 10))
1651    darr = da.from_array(arr, chunks=(10, 10))
1652    with pytest.raises(ValueError):
1653        bool(darr)
1654        bool(darr == darr)
1655
1656
1657def test_store_kwargs():
1658    d = da.ones((10, 10), chunks=(2, 2))
1659    a = d + 1
1660
1661    called = [False]
1662
1663    def get_func(*args, **kwargs):
1664        assert kwargs.pop("foo") == "test kwarg"
1665        r = dask.get(*args, **kwargs)
1666        called[0] = True
1667        return r
1668
1669    called[0] = False
1670    at = np.zeros(shape=(10, 10))
1671    store([a], [at], scheduler=get_func, foo="test kwarg")
1672    assert called[0]
1673
1674    called[0] = False
1675    at = np.zeros(shape=(10, 10))
1676    a.store(at, scheduler=get_func, foo="test kwarg")
1677    assert called[0]
1678
1679    called[0] = False
1680    at = np.zeros(shape=(10, 10))
1681    store([a], [at], scheduler=get_func, return_stored=True, foo="test kwarg")
1682    assert called[0]
1683
1684
1685def test_store_delayed_target():
1686    from dask.delayed import delayed
1687
1688    d = da.ones((4, 4), chunks=(2, 2))
1689    a, b = d + 1, d + 2
1690
1691    # empty buffers to be used as targets
1692    targs = {}
1693
1694    def make_target(key):
1695        a = np.empty((4, 4))
1696        targs[key] = a
1697        return a
1698
1699    # delayed calls to these targets
1700    atd = delayed(make_target)("at")
1701    btd = delayed(make_target)("bt")
1702
1703    # test not keeping result
1704    st = store([a, b], [atd, btd])
1705
1706    at = targs["at"]
1707    bt = targs["bt"]
1708
1709    assert st is None
1710    assert_eq(at, a)
1711    assert_eq(bt, b)
1712
1713    # test keeping result
1714    for st_compute in [False, True]:
1715        targs.clear()
1716
1717        st = store([a, b], [atd, btd], return_stored=True, compute=st_compute)
1718        if st_compute:
1719            assert all(not any(dask.core.get_deps(e.dask)[0].values()) for e in st)
1720
1721        st = dask.compute(*st)
1722
1723        at = targs["at"]
1724        bt = targs["bt"]
1725
1726        assert st is not None
1727        assert isinstance(st, tuple)
1728        assert all([isinstance(v, np.ndarray) for v in st])
1729        assert_eq(at, a)
1730        assert_eq(bt, b)
1731        assert_eq(st[0], a)
1732        assert_eq(st[1], b)
1733
1734        pytest.raises(ValueError, lambda: store([a], [at, bt]))
1735        pytest.raises(ValueError, lambda: store(at, at))
1736        pytest.raises(ValueError, lambda: store([at, bt], [at, bt]))
1737
1738
1739def test_store():
1740    d = da.ones((4, 4), chunks=(2, 2))
1741    a, b = d + 1, d + 2
1742
1743    at = np.empty(shape=(4, 4))
1744    bt = np.empty(shape=(4, 4))
1745
1746    st = store([a, b], [at, bt])
1747    assert st is None
1748    assert (at == 2).all()
1749    assert (bt == 3).all()
1750
1751    pytest.raises(ValueError, lambda: store([a], [at, bt]))
1752    pytest.raises(ValueError, lambda: store(at, at))
1753    pytest.raises(ValueError, lambda: store([at, bt], [at, bt]))
1754
1755
1756def test_store_regions():
1757    d = da.ones((4, 4, 4), dtype=int, chunks=(2, 2, 2))
1758    a, b = d + 1, d + 2
1759    a = a[:, 1:, :].astype(float)
1760
1761    region = (slice(None, None, 2), slice(None), [1, 2, 4, 5])
1762
1763    # Single region:
1764    at = np.zeros(shape=(8, 3, 6))
1765    bt = np.zeros(shape=(8, 4, 6))
1766    v = store([a, b], [at, bt], regions=region, compute=False)
1767    assert isinstance(v, Delayed)
1768    assert (at == 0).all() and (bt[region] == 0).all()
1769    assert all([ev is None for ev in v.compute()])
1770    assert (at[region] == 2).all() and (bt[region] == 3).all()
1771    assert not (bt == 3).all() and not (bt == 0).all()
1772    assert not (at == 2).all() and not (at == 0).all()
1773
1774    # Multiple regions:
1775    at = np.zeros(shape=(8, 3, 6))
1776    bt = np.zeros(shape=(8, 4, 6))
1777    v = store([a, b], [at, bt], regions=[region, region], compute=False)
1778    assert isinstance(v, Delayed)
1779    assert (at == 0).all() and (bt[region] == 0).all()
1780    assert all([ev is None for ev in v.compute()])
1781    assert (at[region] == 2).all() and (bt[region] == 3).all()
1782    assert not (bt == 3).all() and not (bt == 0).all()
1783    assert not (at == 2).all() and not (at == 0).all()
1784
1785    # Single region (keep result):
1786    for st_compute in [False, True]:
1787        at = np.zeros(shape=(8, 3, 6))
1788        bt = np.zeros(shape=(8, 4, 6))
1789        v = store(
1790            [a, b], [at, bt], regions=region, compute=st_compute, return_stored=True
1791        )
1792        assert isinstance(v, tuple)
1793        assert all([isinstance(e, da.Array) for e in v])
1794        if st_compute:
1795            assert all(not any(dask.core.get_deps(e.dask)[0].values()) for e in v)
1796        else:
1797            assert (at == 0).all() and (bt[region] == 0).all()
1798
1799        ar, br = v
1800        assert ar.dtype == a.dtype
1801        assert br.dtype == b.dtype
1802        assert ar.shape == a.shape
1803        assert br.shape == b.shape
1804        assert ar.chunks == a.chunks
1805        assert br.chunks == b.chunks
1806
1807        ar, br = da.compute(ar, br)
1808        assert (at[region] == 2).all() and (bt[region] == 3).all()
1809        assert not (bt == 3).all() and not (bt == 0).all()
1810        assert not (at == 2).all() and not (at == 0).all()
1811        assert (br == 3).all()
1812        assert (ar == 2).all()
1813
1814    # Multiple regions (keep result):
1815    for st_compute in [False, True]:
1816        at = np.zeros(shape=(8, 3, 6))
1817        bt = np.zeros(shape=(8, 4, 6))
1818        v = store(
1819            [a, b],
1820            [at, bt],
1821            regions=[region, region],
1822            compute=st_compute,
1823            return_stored=True,
1824        )
1825        assert isinstance(v, tuple)
1826        assert all([isinstance(e, da.Array) for e in v])
1827        if st_compute:
1828            assert all(not any(dask.core.get_deps(e.dask)[0].values()) for e in v)
1829        else:
1830            assert (at == 0).all() and (bt[region] == 0).all()
1831
1832        ar, br = v
1833        assert ar.dtype == a.dtype
1834        assert br.dtype == b.dtype
1835        assert ar.shape == a.shape
1836        assert br.shape == b.shape
1837        assert ar.chunks == a.chunks
1838        assert br.chunks == b.chunks
1839
1840        ar, br = da.compute(ar, br)
1841        assert (at[region] == 2).all() and (bt[region] == 3).all()
1842        assert not (bt == 3).all() and not (bt == 0).all()
1843        assert not (at == 2).all() and not (at == 0).all()
1844        assert (br == 3).all()
1845        assert (ar == 2).all()
1846
1847
1848def test_store_compute_false():
1849    d = da.ones((4, 4), chunks=(2, 2))
1850    a, b = d + 1, d + 2
1851
1852    at = np.zeros(shape=(4, 4))
1853    bt = np.zeros(shape=(4, 4))
1854
1855    v = store([a, b], [at, bt], compute=False)
1856    assert isinstance(v, Delayed)
1857
1858    # You need a well-formed HighLevelgraph for e.g. dask.graph_manipulation.bind
1859    for layer in v.__dask_layers__():
1860        assert layer in v.dask.layers
1861
1862    assert (at == 0).all() and (bt == 0).all()
1863    assert all([ev is None for ev in v.compute()])
1864    assert (at == 2).all() and (bt == 3).all()
1865
1866    at = np.zeros(shape=(4, 4))
1867    bt = np.zeros(shape=(4, 4))
1868
1869    dat, dbt = store([a, b], [at, bt], compute=False, return_stored=True)
1870    assert isinstance(dat, Array) and isinstance(dbt, Array)
1871    assert (at == 0).all() and (bt == 0).all()
1872    assert (dat.compute() == at).all() and (dbt.compute() == bt).all()
1873    assert (at == 2).all() and (bt == 3).all()
1874
1875
1876def test_store_nocompute_regions():
1877    x = da.ones(10, chunks=1)
1878    y = np.zeros((2, 10))
1879    d1 = da.store(x, y, regions=(0,), compute=False)
1880    d2 = da.store(x, y, regions=(1,), compute=False)
1881    assert d1.key != d2.key
1882
1883
1884class ThreadSafetyError(Exception):
1885    pass
1886
1887
1888class NonthreadSafeStore:
1889    def __init__(self):
1890        self.in_use = False
1891
1892    def __setitem__(self, key, value):
1893        if self.in_use:
1894            raise ThreadSafetyError()
1895        self.in_use = True
1896        time.sleep(0.001)
1897        self.in_use = False
1898
1899
1900class ThreadSafeStore:
1901    def __init__(self):
1902        self.concurrent_uses = 0
1903        self.max_concurrent_uses = 0
1904
1905    def __setitem__(self, key, value):
1906        self.concurrent_uses += 1
1907        self.max_concurrent_uses = max(self.concurrent_uses, self.max_concurrent_uses)
1908        time.sleep(0.01)
1909        self.concurrent_uses -= 1
1910
1911
1912class CounterLock:
1913    def __init__(self, *args, **kwargs):
1914        self.lock = Lock(*args, **kwargs)
1915
1916        self.acquire_count = 0
1917        self.release_count = 0
1918
1919    def acquire(self, *args, **kwargs):
1920        self.acquire_count += 1
1921        return self.lock.acquire(*args, **kwargs)
1922
1923    def release(self, *args, **kwargs):
1924        self.release_count += 1
1925        return self.lock.release(*args, **kwargs)
1926
1927
1928def test_store_locks():
1929    _Lock = type(Lock())
1930    d = da.ones((10, 10), chunks=(2, 2))
1931    a, b = d + 1, d + 2
1932
1933    at = np.zeros(shape=(10, 10))
1934    bt = np.zeros(shape=(10, 10))
1935
1936    lock = Lock()
1937    v = store([a, b], [at, bt], compute=False, lock=lock)
1938    assert isinstance(v, Delayed)
1939    dsk = v.dask
1940    locks = {vv for v in dsk.values() for vv in v if isinstance(vv, _Lock)}
1941    assert locks == {lock}
1942
1943    # Ensure same lock applies over multiple stores
1944    at = NonthreadSafeStore()
1945    v = store([a, b], [at, at], lock=lock, scheduler="threads", num_workers=10)
1946    assert v is None
1947
1948    # Don't assume thread safety by default
1949    at = NonthreadSafeStore()
1950    assert store(a, at, scheduler="threads", num_workers=10) is None
1951    assert a.store(at, scheduler="threads", num_workers=10) is None
1952
1953    # Ensure locks can be removed
1954    at = ThreadSafeStore()
1955    for i in range(10):
1956        st = a.store(at, lock=False, scheduler="threads", num_workers=10)
1957        assert st is None
1958        if at.max_concurrent_uses > 1:
1959            break
1960        if i == 9:
1961            assert False
1962
1963    # Verify number of lock calls
1964    nchunks = np.sum([np.prod([len(c) for c in e.chunks]) for e in [a, b]])
1965    for c in (False, True):
1966        at = np.zeros(shape=(10, 10))
1967        bt = np.zeros(shape=(10, 10))
1968        lock = CounterLock()
1969
1970        v = store([a, b], [at, bt], lock=lock, compute=c, return_stored=True)
1971        assert all(isinstance(e, Array) for e in v)
1972
1973        da.compute(v)
1974
1975        # When `return_stored=True` and `compute=False`,
1976        # the lock should be acquired only once for store and load steps
1977        # as they are fused together into one step.
1978        assert lock.acquire_count == lock.release_count
1979        if c:
1980            assert lock.acquire_count == 2 * nchunks
1981        else:
1982            assert lock.acquire_count == nchunks
1983
1984
1985def test_store_method_return():
1986    d = da.ones((10, 10), chunks=(2, 2))
1987    a = d + 1
1988
1989    for compute in [False, True]:
1990        for return_stored in [False, True]:
1991            at = np.zeros(shape=(10, 10))
1992            r = a.store(
1993                at, scheduler="threads", compute=compute, return_stored=return_stored
1994            )
1995
1996            if return_stored:
1997                assert isinstance(r, Array)
1998            elif compute:
1999                assert r is None
2000            else:
2001                assert isinstance(r, Delayed)
2002
2003
2004@pytest.mark.xfail(reason="can't lock with multiprocessing")
2005def test_store_multiprocessing_lock():
2006    d = da.ones((10, 10), chunks=(2, 2))
2007    a = d + 1
2008
2009    at = np.zeros(shape=(10, 10))
2010    st = a.store(at, scheduler="processes", num_workers=10)
2011    assert st is None
2012
2013
2014@pytest.mark.parametrize("return_stored", [False, True])
2015@pytest.mark.parametrize("delayed_target", [False, True])
2016def test_store_deterministic_keys(return_stored, delayed_target):
2017    a = da.ones((10, 10), chunks=(2, 2))
2018    at = np.zeros(shape=(10, 10))
2019    if delayed_target:
2020        at = delayed(at)
2021    st1 = a.store(at, return_stored=return_stored, compute=False)
2022    st2 = a.store(at, return_stored=return_stored, compute=False)
2023    assert st1.dask.keys() == st2.dask.keys()
2024
2025
2026def test_to_hdf5():
2027    h5py = pytest.importorskip("h5py")
2028    x = da.ones((4, 4), chunks=(2, 2))
2029    y = da.ones(4, chunks=2, dtype="i4")
2030
2031    with tmpfile(".hdf5") as fn:
2032        x.to_hdf5(fn, "/x")
2033        with h5py.File(fn, mode="r+") as f:
2034            d = f["/x"]
2035
2036            assert_eq(d[:], x)
2037            assert d.chunks == (2, 2)
2038
2039    with tmpfile(".hdf5") as fn:
2040        x.to_hdf5(fn, "/x", chunks=None)
2041        with h5py.File(fn, mode="r+") as f:
2042            d = f["/x"]
2043
2044            assert_eq(d[:], x)
2045            assert d.chunks is None
2046
2047    with tmpfile(".hdf5") as fn:
2048        x.to_hdf5(fn, "/x", chunks=(1, 1))
2049        with h5py.File(fn, mode="r+") as f:
2050            d = f["/x"]
2051
2052            assert_eq(d[:], x)
2053            assert d.chunks == (1, 1)
2054
2055    with tmpfile(".hdf5") as fn:
2056        da.to_hdf5(fn, {"/x": x, "/y": y})
2057
2058        with h5py.File(fn, mode="r+") as f:
2059            assert_eq(f["/x"][:], x)
2060            assert f["/x"].chunks == (2, 2)
2061            assert_eq(f["/y"][:], y)
2062            assert f["/y"].chunks == (2,)
2063
2064
2065def test_to_dask_dataframe():
2066    dd = pytest.importorskip("dask.dataframe")
2067    a = da.ones((4,), chunks=(2,))
2068    d = a.to_dask_dataframe()
2069    assert isinstance(d, dd.Series)
2070
2071    a = da.ones((4, 4), chunks=(2, 2))
2072    d = a.to_dask_dataframe()
2073    assert isinstance(d, dd.DataFrame)
2074
2075
2076def test_np_array_with_zero_dimensions():
2077    d = da.ones((4, 4), chunks=(2, 2))
2078    assert_eq(np.array(d.sum()), np.array(d.compute().sum()))
2079
2080
2081def test_dtype_complex():
2082    x = np.arange(24).reshape((4, 6)).astype("f4")
2083    y = np.arange(24).reshape((4, 6)).astype("i8")
2084    z = np.arange(24).reshape((4, 6)).astype("i2")
2085
2086    a = da.from_array(x, chunks=(2, 3))
2087    b = da.from_array(y, chunks=(2, 3))
2088    c = da.from_array(z, chunks=(2, 3))
2089
2090    def assert_eq(a, b):
2091        return isinstance(a, np.dtype) and isinstance(b, np.dtype) and str(a) == str(b)
2092
2093    assert_eq(a.dtype, x.dtype)
2094    assert_eq(b.dtype, y.dtype)
2095
2096    assert_eq((a + 1).dtype, (x + 1).dtype)
2097    assert_eq((a + b).dtype, (x + y).dtype)
2098    assert_eq(a.T.dtype, x.T.dtype)
2099    assert_eq(a[:3].dtype, x[:3].dtype)
2100    assert_eq((a.dot(b.T)).dtype, (x.dot(y.T)).dtype)
2101
2102    assert_eq(stack([a, b]).dtype, np.vstack([x, y]).dtype)
2103    assert_eq(concatenate([a, b]).dtype, np.concatenate([x, y]).dtype)
2104
2105    assert_eq(b.std().dtype, y.std().dtype)
2106    assert_eq(c.sum().dtype, z.sum().dtype)
2107    assert_eq(a.min().dtype, a.min().dtype)
2108    assert_eq(b.std().dtype, b.std().dtype)
2109    assert_eq(a.argmin(axis=0).dtype, a.argmin(axis=0).dtype)
2110
2111    assert_eq(da.sin(c).dtype, np.sin(z).dtype)
2112    assert_eq(da.exp(b).dtype, np.exp(y).dtype)
2113    assert_eq(da.floor(a).dtype, np.floor(x).dtype)
2114    assert_eq(da.isnan(b).dtype, np.isnan(y).dtype)
2115    with contextlib.suppress(ImportError):
2116        assert da.isnull(b).dtype == "bool"
2117        assert da.notnull(b).dtype == "bool"
2118
2119    x = np.array([("a", 1)], dtype=[("text", "S1"), ("numbers", "i4")])
2120    d = da.from_array(x, chunks=(1,))
2121
2122    assert_eq(d["text"].dtype, x["text"].dtype)
2123    assert_eq(d[["numbers", "text"]].dtype, x[["numbers", "text"]].dtype)
2124
2125
2126def test_astype():
2127    x = np.ones((5, 5), dtype="f8")
2128    d = da.from_array(x, chunks=(2, 2))
2129
2130    assert d.astype("i8").dtype == "i8"
2131    assert_eq(d.astype("i8"), x.astype("i8"))
2132    assert same_keys(d.astype("i8"), d.astype("i8"))
2133
2134    with pytest.raises(TypeError):
2135        d.astype("i8", casting="safe")
2136
2137    with pytest.raises(TypeError):
2138        d.astype("i8", not_a_real_kwarg="foo")
2139
2140    # smoketest with kwargs
2141    assert_eq(d.astype("i8", copy=False), x.astype("i8", copy=False))
2142
2143    # Check it's a noop
2144    assert d.astype("f8") is d
2145
2146
2147def test_arithmetic():
2148    x = np.arange(5).astype("f4") + 2
2149    y = np.arange(5).astype("i8") + 2
2150    z = np.arange(5).astype("i4") + 2
2151    a = da.from_array(x, chunks=(2,))
2152    b = da.from_array(y, chunks=(2,))
2153    c = da.from_array(z, chunks=(2,))
2154    assert_eq(a + b, x + y)
2155    assert_eq(a * b, x * y)
2156    assert_eq(a - b, x - y)
2157    assert_eq(a / b, x / y)
2158    assert_eq(b & b, y & y)
2159    assert_eq(b | b, y | y)
2160    assert_eq(b ^ b, y ^ y)
2161    assert_eq(a // b, x // y)
2162    assert_eq(a ** b, x ** y)
2163    assert_eq(a % b, x % y)
2164    assert_eq(a > b, x > y)
2165    assert_eq(a < b, x < y)
2166    assert_eq(a >= b, x >= y)
2167    assert_eq(a <= b, x <= y)
2168    assert_eq(a == b, x == y)
2169    assert_eq(a != b, x != y)
2170
2171    assert_eq(a + 2, x + 2)
2172    assert_eq(a * 2, x * 2)
2173    assert_eq(a - 2, x - 2)
2174    assert_eq(a / 2, x / 2)
2175    assert_eq(b & True, y & True)
2176    assert_eq(b | True, y | True)
2177    assert_eq(b ^ True, y ^ True)
2178    assert_eq(a // 2, x // 2)
2179    assert_eq(a ** 2, x ** 2)
2180    assert_eq(a % 2, x % 2)
2181    assert_eq(a > 2, x > 2)
2182    assert_eq(a < 2, x < 2)
2183    assert_eq(a >= 2, x >= 2)
2184    assert_eq(a <= 2, x <= 2)
2185    assert_eq(a == 2, x == 2)
2186    assert_eq(a != 2, x != 2)
2187
2188    assert_eq(2 + b, 2 + y)
2189    assert_eq(2 * b, 2 * y)
2190    assert_eq(2 - b, 2 - y)
2191    assert_eq(2 / b, 2 / y)
2192    assert_eq(True & b, True & y)
2193    assert_eq(True | b, True | y)
2194    assert_eq(True ^ b, True ^ y)
2195    assert_eq(2 // b, 2 // y)
2196    assert_eq(2 ** b, 2 ** y)
2197    assert_eq(2 % b, 2 % y)
2198    assert_eq(2 > b, 2 > y)
2199    assert_eq(2 < b, 2 < y)
2200    assert_eq(2 >= b, 2 >= y)
2201    assert_eq(2 <= b, 2 <= y)
2202    assert_eq(2 == b, 2 == y)
2203    assert_eq(2 != b, 2 != y)
2204
2205    assert_eq(-a, -x)
2206    assert_eq(abs(a), abs(x))
2207    assert_eq(~(a == b), ~(x == y))
2208    assert_eq(~(a == b), ~(x == y))
2209
2210    assert_eq(da.logaddexp(a, b), np.logaddexp(x, y))
2211    assert_eq(da.logaddexp2(a, b), np.logaddexp2(x, y))
2212    with pytest.warns(None):  # Overflow warning
2213        assert_eq(da.exp(b), np.exp(y))
2214    assert_eq(da.log(a), np.log(x))
2215    assert_eq(da.log10(a), np.log10(x))
2216    assert_eq(da.log1p(a), np.log1p(x))
2217    with pytest.warns(None):  # Overflow warning
2218        assert_eq(da.expm1(b), np.expm1(y))
2219    assert_eq(da.sqrt(a), np.sqrt(x))
2220    assert_eq(da.square(a), np.square(x))
2221
2222    assert_eq(da.sin(a), np.sin(x))
2223    assert_eq(da.cos(b), np.cos(y))
2224    assert_eq(da.tan(a), np.tan(x))
2225    assert_eq(da.arcsin(b / 10), np.arcsin(y / 10))
2226    assert_eq(da.arccos(b / 10), np.arccos(y / 10))
2227    assert_eq(da.arctan(b / 10), np.arctan(y / 10))
2228    assert_eq(da.arctan2(b * 10, a), np.arctan2(y * 10, x))
2229    assert_eq(da.hypot(b, a), np.hypot(y, x))
2230    assert_eq(da.sinh(a), np.sinh(x))
2231    with pytest.warns(None):  # Overflow warning
2232        assert_eq(da.cosh(b), np.cosh(y))
2233    assert_eq(da.tanh(a), np.tanh(x))
2234    assert_eq(da.arcsinh(b * 10), np.arcsinh(y * 10))
2235    assert_eq(da.arccosh(b * 10), np.arccosh(y * 10))
2236    assert_eq(da.arctanh(b / 10), np.arctanh(y / 10))
2237    assert_eq(da.deg2rad(a), np.deg2rad(x))
2238    assert_eq(da.rad2deg(a), np.rad2deg(x))
2239
2240    assert_eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4))
2241    assert_eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4))
2242    assert_eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4))
2243    assert_eq(da.logical_not(a < 1), np.logical_not(x < 1))
2244    assert_eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a))
2245    assert_eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a))
2246    assert_eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a))
2247    assert_eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a))
2248
2249    assert_eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y))
2250    assert_eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y))
2251    assert_eq(da.isfinite(a), np.isfinite(x))
2252    assert_eq(da.isinf(a), np.isinf(x))
2253    assert_eq(da.isnan(a), np.isnan(x))
2254    assert_eq(da.signbit(a - 3), np.signbit(x - 3))
2255    assert_eq(da.copysign(a - 3, b), np.copysign(x - 3, y))
2256    assert_eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y))
2257    with pytest.warns(None):  # overflow warning
2258        assert_eq(da.ldexp(c, c), np.ldexp(z, z))
2259    assert_eq(da.fmod(a * 12, b), np.fmod(x * 12, y))
2260    assert_eq(da.floor(a * 0.5), np.floor(x * 0.5))
2261    assert_eq(da.ceil(a), np.ceil(x))
2262    assert_eq(da.trunc(a / 2), np.trunc(x / 2))
2263
2264    assert_eq(da.degrees(b), np.degrees(y))
2265    assert_eq(da.radians(a), np.radians(x))
2266
2267    assert_eq(da.rint(a + 0.3), np.rint(x + 0.3))
2268    assert_eq(da.fix(a - 2.5), np.fix(x - 2.5))
2269
2270    assert_eq(da.angle(a + 1j), np.angle(x + 1j))
2271    assert_eq(da.real(a + 1j), np.real(x + 1j))
2272    assert_eq((a + 1j).real, np.real(x + 1j))
2273    assert_eq(da.imag(a + 1j), np.imag(x + 1j))
2274    assert_eq((a + 1j).imag, np.imag(x + 1j))
2275    assert_eq(da.conj(a + 1j * b), np.conj(x + 1j * y))
2276    assert_eq((a + 1j * b).conj(), (x + 1j * y).conj())
2277
2278    assert_eq(da.clip(b, 1, 4), np.clip(y, 1, 4))
2279    assert_eq(b.clip(1, 4), y.clip(1, 4))
2280    assert_eq(da.fabs(b), np.fabs(y))
2281    assert_eq(da.sign(b - 2), np.sign(y - 2))
2282    assert_eq(da.absolute(b - 2), np.absolute(y - 2))
2283    assert_eq(da.absolute(b - 2 + 1j), np.absolute(y - 2 + 1j))
2284
2285    l1, l2 = da.frexp(a)
2286    r1, r2 = np.frexp(x)
2287    assert_eq(l1, r1)
2288    assert_eq(l2, r2)
2289
2290    l1, l2 = da.modf(a)
2291    r1, r2 = np.modf(x)
2292    assert_eq(l1, r1)
2293    assert_eq(l2, r2)
2294
2295    assert_eq(da.around(a, -1), np.around(x, -1))
2296
2297
2298def test_elemwise_consistent_names():
2299    a = da.from_array(np.arange(5, dtype="f4"), chunks=(2,))
2300    b = da.from_array(np.arange(5, dtype="f4"), chunks=(2,))
2301    assert same_keys(a + b, a + b)
2302    assert same_keys(a + 2, a + 2)
2303    assert same_keys(da.exp(a), da.exp(a))
2304    assert same_keys(da.exp(a, dtype="f8"), da.exp(a, dtype="f8"))
2305    assert same_keys(da.maximum(a, b), da.maximum(a, b))
2306
2307
2308def test_optimize():
2309    x = np.arange(5).astype("f4")
2310    a = da.from_array(x, chunks=(2,))
2311    expr = a[1:4] + 1
2312    result = optimize(expr.dask, expr.__dask_keys__())
2313    assert isinstance(result, dict)
2314    assert all(key in result for key in expr.__dask_keys__())
2315
2316
2317def test_slicing_with_non_ndarrays():
2318    class ARangeSlice:
2319        dtype = np.dtype("i8")
2320        ndim = 1
2321
2322        def __init__(self, start, stop):
2323            self.start = start
2324            self.stop = stop
2325
2326        def __array__(self):
2327            return np.arange(self.start, self.stop)
2328
2329    class ARangeSlicable:
2330        dtype = np.dtype("i8")
2331        ndim = 1
2332
2333        def __init__(self, n):
2334            self.n = n
2335
2336        @property
2337        def shape(self):
2338            return (self.n,)
2339
2340        def __getitem__(self, key):
2341            return ARangeSlice(key[0].start, key[0].stop)
2342
2343    x = da.from_array(ARangeSlicable(10), chunks=(4,))
2344
2345    assert_eq((x + 1).sum(), (np.arange(10, dtype=x.dtype) + 1).sum())
2346
2347
2348@pytest.mark.filterwarnings("ignore:the matrix subclass")
2349def test_getter():
2350    assert type(getter(np.matrix([[1]]), 0)) is np.ndarray
2351    assert type(getter(np.matrix([[1]]), 0, asarray=False)) is np.matrix
2352    assert_eq(getter([1, 2, 3, 4, 5], slice(1, 4)), np.array([2, 3, 4]))
2353
2354    assert_eq(getter(np.arange(5), (None, slice(None, None))), np.arange(5)[None, :])
2355
2356
2357def test_size():
2358    x = da.ones((10, 2), chunks=(3, 1))
2359    assert x.size == np.array(x).size
2360    assert isinstance(x.size, int)
2361
2362
2363def test_nbytes():
2364    x = da.ones((10, 2), chunks=(3, 1))
2365    assert x.nbytes == np.array(x).nbytes
2366
2367
2368def test_itemsize():
2369    x = da.ones((10, 2), chunks=(3, 1))
2370    assert x.itemsize == 8
2371
2372
2373def test_Array_normalizes_dtype():
2374    x = da.ones((3,), chunks=(1,), dtype=int)
2375    assert isinstance(x.dtype, np.dtype)
2376
2377
2378def test_from_array_with_lock():
2379    x = np.arange(10)
2380    d = da.from_array(x, chunks=5, lock=True)
2381
2382    tasks = [v for k, v in d.dask.items() if k[0] == d.name]
2383
2384    assert hasattr(tasks[0][4], "acquire")
2385    assert len({task[4] for task in tasks}) == 1
2386
2387    assert_eq(d, x)
2388
2389    lock = Lock()
2390    e = da.from_array(x, chunks=5, lock=lock)
2391    f = da.from_array(x, chunks=5, lock=lock)
2392
2393    assert_eq(e + f, x + x)
2394
2395
2396class MyArray:
2397    def __init__(self, x):
2398        self.x = x
2399        self.dtype = x.dtype
2400        self.shape = x.shape
2401        self.ndim = len(x.shape)
2402
2403    def __getitem__(self, i):
2404        return self.x[i]
2405
2406
2407@pytest.mark.parametrize(
2408    "x,chunks",
2409    [
2410        (np.arange(25).reshape((5, 5)), (5, 5)),
2411        (np.arange(25).reshape((5, 5)), -1),
2412        (np.array([[1]]), 1),
2413        (np.array(1), 1),
2414    ],
2415)
2416def test_from_array_tasks_always_call_getter(x, chunks):
2417    dx = da.from_array(MyArray(x), chunks=chunks, asarray=False)
2418    assert_eq(x, dx)
2419
2420
2421def test_from_array_ndarray_onechunk():
2422    """ndarray with a single chunk produces a minimal single key dict"""
2423    x = np.array([[1, 2], [3, 4]])
2424    dx = da.from_array(x, chunks=-1)
2425    assert_eq(x, dx)
2426    assert len(dx.dask) == 1
2427    assert dx.dask[dx.name, 0, 0] is x
2428
2429
2430def test_from_array_ndarray_getitem():
2431    """For ndarray, don't use getter / getter_nofancy; use the cleaner
2432    operator.getitem"""
2433    x = np.array([[1, 2], [3, 4]])
2434    dx = da.from_array(x, chunks=(1, 2))
2435    assert_eq(x, dx)
2436    assert (dx.dask[dx.name, 0, 0] == np.array([[1, 2]])).all()
2437
2438
2439@pytest.mark.parametrize("x", [[1, 2], (1, 2), memoryview(b"abc")])
2440def test_from_array_list(x):
2441    """Lists, tuples, and memoryviews are automatically converted to ndarray"""
2442    dx = da.from_array(x, chunks=-1)
2443    assert_eq(np.array(x), dx)
2444    assert isinstance(dx.dask[dx.name, 0], np.ndarray)
2445
2446    dx = da.from_array(x, chunks=1)
2447    assert_eq(np.array(x), dx)
2448    assert dx.dask[dx.name, 0][0] == x[0]
2449
2450
2451# On MacOS Python 3.9, the order of the np.ScalarType tuple randomly changes across
2452# interpreter restarts, thus causing pytest-xdist failures; setting PYTHONHASHSEED does
2453# not help
2454@pytest.mark.parametrize(
2455    "type_", sorted((t for t in np.ScalarType if t is not memoryview), key=str)
2456)
2457def test_from_array_scalar(type_):
2458    """Python and numpy scalars are automatically converted to ndarray"""
2459    if type_ == np.datetime64:
2460        x = np.datetime64("2000-01-01")
2461    else:
2462        x = type_(1)
2463
2464    dx = da.from_array(x, chunks=-1)
2465    assert_eq(np.array(x), dx)
2466    assert isinstance(
2467        dx.dask[
2468            dx.name,
2469        ],
2470        np.ndarray,
2471    )
2472
2473
2474@pytest.mark.parametrize("asarray,cls", [(True, np.ndarray), (False, np.matrix)])
2475@pytest.mark.filterwarnings("ignore:the matrix subclass")
2476def test_from_array_no_asarray(asarray, cls):
2477    def assert_chunks_are_of_type(x):
2478        chunks = compute_as_if_collection(Array, x.dask, x.__dask_keys__())
2479        for c in concat(chunks):
2480            assert type(c) is cls
2481
2482    x = np.matrix(np.arange(100).reshape((10, 10)))
2483    dx = da.from_array(x, chunks=(5, 5), asarray=asarray)
2484    assert_chunks_are_of_type(dx)
2485    assert_chunks_are_of_type(dx[0:5])
2486    assert_chunks_are_of_type(dx[0:5][:, 0])
2487
2488
2489def test_from_array_getitem():
2490    x = np.arange(10)
2491
2492    def my_getitem(x, ind):
2493        return x[ind]
2494
2495    y = da.from_array(x, chunks=(5,), getitem=my_getitem)
2496
2497    for k, v in y.dask.items():
2498        if isinstance(v, tuple):
2499            assert v[0] is my_getitem
2500
2501    assert_eq(x, y)
2502
2503
2504def test_from_array_minus_one():
2505    x = np.arange(10)
2506    y = da.from_array(x, -1)
2507    assert y.chunks == ((10,),)
2508    assert_eq(x, y)
2509
2510
2511def test_from_array_copy():
2512    # Regression test for https://github.com/dask/dask/issues/3751
2513    x = np.arange(10)
2514    y = da.from_array(x, -1)
2515    assert y.npartitions == 1
2516    y_c = y.copy()
2517    assert y is not y_c
2518    assert y.compute() is not y_c.compute()
2519
2520
2521def test_from_array_dask_array():
2522    x = np.array([[1, 2], [3, 4]])
2523    dx = da.from_array(x, chunks=(1, 2))
2524    with pytest.raises(ValueError):
2525        da.from_array(dx)
2526
2527
2528def test_from_array_dask_collection_warns():
2529    class CustomCollection(np.ndarray):
2530        def __dask_graph__(self):
2531            return {"bar": 1}
2532
2533    x = CustomCollection([1, 2, 3])
2534    with pytest.warns(UserWarning):
2535        da.from_array(x)
2536
2537    # Ensure da.array warns too
2538    with pytest.warns(UserWarning):
2539        da.array(x)
2540
2541
2542def test_from_array_inline():
2543    class MyArray(np.ndarray):
2544        pass
2545
2546    a = np.array([1, 2, 3]).view(MyArray)
2547    dsk = dict(da.from_array(a, name="my-array").dask)
2548    assert dsk["my-array"] is a
2549
2550    dsk = dict(da.from_array(a, name="my-array", inline_array=True).dask)
2551    assert "my-array" not in dsk
2552    assert a is dsk[("my-array", 0)][1]
2553
2554
2555@pytest.mark.parametrize("asarray", [da.asarray, da.asanyarray])
2556def test_asarray(asarray):
2557    assert_eq(asarray([1, 2, 3]), np.asarray([1, 2, 3]))
2558
2559    x = asarray([1, 2, 3])
2560    assert asarray(x) is x
2561
2562    y = [x[0], 2, x[2]]
2563    assert_eq(asarray(y), x)
2564
2565
2566@pytest.mark.parametrize("asarray", [da.asarray, da.asanyarray])
2567def test_asarray_dask_dataframe(asarray):
2568    # https://github.com/dask/dask/issues/3885
2569    dd = pytest.importorskip("dask.dataframe")
2570    import pandas as pd
2571
2572    s = dd.from_pandas(pd.Series([1, 2, 3, 4]), 2)
2573    result = asarray(s)
2574    expected = s.values
2575    assert_eq(result, expected)
2576
2577    df = s.to_frame(name="s")
2578    result = asarray(df)
2579    expected = df.values
2580    assert_eq(result, expected)
2581
2582
2583@pytest.mark.parametrize("asarray", [da.asarray, da.asanyarray])
2584def test_asarray_h5py(asarray):
2585    h5py = pytest.importorskip("h5py")
2586
2587    with tmpfile(".hdf5") as fn:
2588        with h5py.File(fn, mode="a") as f:
2589            d = f.create_dataset("/x", shape=(2, 2), dtype=float)
2590            x = asarray(d)
2591            assert d in x.dask.values()
2592            assert not any(isinstance(v, np.ndarray) for v in x.dask.values())
2593
2594
2595def test_asarray_chunks():
2596    with dask.config.set({"array.chunk-size": "100 B"}):
2597        x = np.ones(1000)
2598        d = da.asarray(x)
2599        assert d.npartitions > 1
2600
2601
2602@pytest.mark.filterwarnings("ignore:the matrix subclass")
2603def test_asanyarray():
2604    x = np.matrix([1, 2, 3])
2605    dx = da.asanyarray(x)
2606    assert dx.numblocks == (1, 1)
2607    chunks = compute_as_if_collection(Array, dx.dask, dx.__dask_keys__())
2608    assert isinstance(chunks[0][0], np.matrix)
2609    assert da.asanyarray(dx) is dx
2610
2611
2612def test_asanyarray_dataframe():
2613    pd = pytest.importorskip("pandas")
2614    dd = pytest.importorskip("dask.dataframe")
2615
2616    df = pd.DataFrame({"x": [1, 2, 3]})
2617    ddf = dd.from_pandas(df, npartitions=2)
2618
2619    x = np.asanyarray(df)
2620    dx = da.asanyarray(ddf)
2621    assert isinstance(dx, da.Array)
2622
2623    assert_eq(x, dx)
2624
2625    x = np.asanyarray(df.x)
2626    dx = da.asanyarray(ddf.x)
2627    assert isinstance(dx, da.Array)
2628
2629    assert_eq(x, dx)
2630
2631
2632def test_asanyarray_datetime64():
2633    x = np.array(["2000-01-01"], dtype="datetime64")
2634    dx = da.asanyarray(x)
2635    assert isinstance(dx, da.Array)
2636    assert_eq(x, dx)
2637
2638
2639def test_from_func():
2640    x = np.arange(10)
2641    f = lambda n: n * x
2642    d = from_func(f, (10,), x.dtype, kwargs={"n": 2})
2643
2644    assert d.shape == x.shape
2645    assert d.dtype == x.dtype
2646    assert_eq(d, 2 * x)
2647    assert same_keys(d, from_func(f, (10,), x.dtype, kwargs={"n": 2}))
2648
2649
2650def test_concatenate3_2():
2651    x = np.array([1, 2])
2652    assert_eq(concatenate3([x, x, x]), np.array([1, 2, 1, 2, 1, 2]))
2653
2654    x = np.array([[1, 2]])
2655    assert (
2656        concatenate3([[x, x, x], [x, x, x]])
2657        == np.array([[1, 2, 1, 2, 1, 2], [1, 2, 1, 2, 1, 2]])
2658    ).all()
2659
2660    assert (
2661        concatenate3([[x, x], [x, x], [x, x]])
2662        == np.array([[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]])
2663    ).all()
2664
2665    x = np.arange(12).reshape((2, 2, 3))
2666    assert_eq(
2667        concatenate3([[[x, x, x], [x, x, x]], [[x, x, x], [x, x, x]]]),
2668        np.array(
2669            [
2670                [
2671                    [0, 1, 2, 0, 1, 2, 0, 1, 2],
2672                    [3, 4, 5, 3, 4, 5, 3, 4, 5],
2673                    [0, 1, 2, 0, 1, 2, 0, 1, 2],
2674                    [3, 4, 5, 3, 4, 5, 3, 4, 5],
2675                ],
2676                [
2677                    [6, 7, 8, 6, 7, 8, 6, 7, 8],
2678                    [9, 10, 11, 9, 10, 11, 9, 10, 11],
2679                    [6, 7, 8, 6, 7, 8, 6, 7, 8],
2680                    [9, 10, 11, 9, 10, 11, 9, 10, 11],
2681                ],
2682                [
2683                    [0, 1, 2, 0, 1, 2, 0, 1, 2],
2684                    [3, 4, 5, 3, 4, 5, 3, 4, 5],
2685                    [0, 1, 2, 0, 1, 2, 0, 1, 2],
2686                    [3, 4, 5, 3, 4, 5, 3, 4, 5],
2687                ],
2688                [
2689                    [6, 7, 8, 6, 7, 8, 6, 7, 8],
2690                    [9, 10, 11, 9, 10, 11, 9, 10, 11],
2691                    [6, 7, 8, 6, 7, 8, 6, 7, 8],
2692                    [9, 10, 11, 9, 10, 11, 9, 10, 11],
2693                ],
2694            ]
2695        ),
2696    )
2697
2698
2699@pytest.mark.parametrize("one_d", [True, False])
2700@mock.patch.object(da.core, "_concatenate2", wraps=da.core._concatenate2)
2701def test_concatenate3_nep18_dispatching(mock_concatenate2, one_d):
2702    x = EncapsulateNDArray(np.arange(10))
2703    concat = [x, x] if one_d else [[x[None]], [x[None]]]
2704    result = concatenate3(concat)
2705    assert type(result) is type(x)
2706    mock_concatenate2.assert_called()
2707    mock_concatenate2.reset_mock()
2708
2709    # When all the inputs are supported by plain `np.concatenate`, we should take the concatenate3
2710    # fastpath of allocating the full array up front and writing blocks into it.
2711    concat = [x.arr, x.arr] if one_d else [[x.arr[None]], [x.arr[None]]]
2712    plain_np_result = concatenate3(concat)
2713    mock_concatenate2.assert_not_called()
2714    assert type(plain_np_result) is np.ndarray
2715
2716
2717def test_map_blocks3():
2718    x = np.arange(10)
2719    y = np.arange(10) * 2
2720
2721    d = da.from_array(x, chunks=5)
2722    e = da.from_array(y, chunks=5)
2723
2724    assert_eq(
2725        da.core.map_blocks(lambda a, b: a + 2 * b, d, e, dtype=d.dtype), x + 2 * y
2726    )
2727
2728    z = np.arange(100).reshape((10, 10))
2729    f = da.from_array(z, chunks=5)
2730
2731    func = lambda a, b: a + 2 * b
2732    res = da.core.map_blocks(func, d, f, dtype=d.dtype)
2733    assert_eq(res, x + 2 * z)
2734    assert same_keys(da.core.map_blocks(func, d, f, dtype=d.dtype), res)
2735
2736    assert_eq(da.map_blocks(func, f, d, dtype=d.dtype), z + 2 * x)
2737
2738
2739def test_from_array_with_missing_chunks():
2740    x = np.random.randn(2, 4, 3)
2741    d = da.from_array(x, chunks=(None, 2, None))
2742    assert d.chunks == da.from_array(x, chunks=(2, 2, 3)).chunks
2743
2744
2745def test_normalize_chunks():
2746    assert normalize_chunks(3, (4, 6)) == ((3, 1), (3, 3))
2747    assert normalize_chunks(((3, 3), (8,)), (6, 8)) == ((3, 3), (8,))
2748    assert normalize_chunks((4, 5), (9,)) == ((4, 5),)
2749    assert normalize_chunks((4, 5), (9, 9)) == ((4, 4, 1), (5, 4))
2750    assert normalize_chunks(-1, (5, 5)) == ((5,), (5,))
2751    assert normalize_chunks((3, -1), (5, 5)) == ((3, 2), (5,))
2752    assert normalize_chunks((3, None), (5, 5)) == ((3, 2), (5,))
2753    assert normalize_chunks({0: 3}, (5, 5)) == ((3, 2), (5,))
2754    assert normalize_chunks([[2, 2], [3, 3]]) == ((2, 2), (3, 3))
2755    assert normalize_chunks(10, (30, 5)) == ((10, 10, 10), (5,))
2756    assert normalize_chunks((), (0, 0)) == ((0,), (0,))
2757    assert normalize_chunks(-1, (0, 3)) == ((0,), (3,))
2758    assert normalize_chunks("auto", shape=(20,), limit=5, dtype="uint8") == (
2759        (5, 5, 5, 5),
2760    )
2761    assert normalize_chunks(("auto", None), (5, 5), dtype=int) == ((5,), (5,))
2762
2763    with pytest.raises(ValueError):
2764        normalize_chunks(((10,),), (11,))
2765    with pytest.raises(ValueError):
2766        normalize_chunks(((5,), (5,)), (5,))
2767
2768
2769def test_align_chunks_to_previous_chunks():
2770    chunks = normalize_chunks(
2771        "auto", shape=(2000,), previous_chunks=(512,), limit="600 B", dtype=np.uint8
2772    )
2773    assert chunks == ((512, 512, 512, 2000 - 512 * 3),)
2774
2775    chunks = normalize_chunks(
2776        "auto", shape=(2000,), previous_chunks=(128,), limit="600 B", dtype=np.uint8
2777    )
2778    assert chunks == ((512, 512, 512, 2000 - 512 * 3),)
2779
2780    chunks = normalize_chunks(
2781        "auto", shape=(2000,), previous_chunks=(512,), limit="1200 B", dtype=np.uint8
2782    )
2783    assert chunks == ((1024, 2000 - 1024),)
2784
2785    chunks = normalize_chunks(
2786        "auto",
2787        shape=(3, 10211, 10376),
2788        previous_chunks=(1, 512, 512),
2789        limit="1MiB",
2790        dtype=np.float32,
2791    )
2792    assert chunks[0] == (1, 1, 1)
2793    assert all(c % 512 == 0 for c in chunks[1][:-1])
2794    assert all(c % 512 == 0 for c in chunks[2][:-1])
2795
2796
2797def test_raise_on_no_chunks():
2798    x = da.ones(6, chunks=3)
2799    try:
2800        Array(x.dask, x.name, chunks=None, dtype=x.dtype, shape=None)
2801        assert False
2802    except ValueError as e:
2803        assert "dask" in str(e)
2804        assert ".org" in str(e)
2805
2806
2807def test_chunks_is_immutable():
2808    x = da.ones(6, chunks=3)
2809    try:
2810        x.chunks = 2
2811        assert False
2812    except TypeError as e:
2813        assert "rechunk(2)" in str(e)
2814
2815
2816def test_raise_on_bad_kwargs():
2817    x = da.ones(5, chunks=3)
2818    try:
2819        da.minimum(x, foo=None)
2820    except TypeError as e:
2821        assert "minimum" in str(e)
2822        assert "foo" in str(e)
2823
2824
2825def test_long_slice():
2826    x = np.arange(10000)
2827    d = da.from_array(x, chunks=1)
2828
2829    assert_eq(d[8000:8200], x[8000:8200])
2830
2831
2832def test_h5py_newaxis():
2833    h5py = pytest.importorskip("h5py")
2834
2835    with tmpfile("h5") as fn:
2836        with h5py.File(fn, mode="a") as f:
2837            x = f.create_dataset("/x", shape=(10, 10), dtype="f8")
2838            d = da.from_array(x, chunks=(5, 5))
2839            assert d[None, :, :].compute(scheduler="sync").shape == (1, 10, 10)
2840            assert d[:, None, :].compute(scheduler="sync").shape == (10, 1, 10)
2841            assert d[:, :, None].compute(scheduler="sync").shape == (10, 10, 1)
2842            assert same_keys(d[:, :, None], d[:, :, None])
2843
2844
2845def test_ellipsis_slicing():
2846    assert_eq(da.ones(4, chunks=2)[...], np.ones(4))
2847
2848
2849def test_point_slicing():
2850    x = np.arange(56).reshape((7, 8))
2851    d = da.from_array(x, chunks=(3, 4))
2852
2853    result = d.vindex[[1, 2, 5, 5], [3, 1, 6, 1]]
2854    assert_eq(result, x[[1, 2, 5, 5], [3, 1, 6, 1]])
2855
2856    result = d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]]
2857    assert_eq(result, x[[0, 1, 6, 0], [0, 1, 0, 7]])
2858    assert same_keys(result, d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]])
2859
2860
2861def test_point_slicing_with_full_slice():
2862    from dask.array.core import _get_axis, _vindex_transpose
2863
2864    x = np.arange(4 * 5 * 6 * 7).reshape((4, 5, 6, 7))
2865    d = da.from_array(x, chunks=(2, 3, 3, 4))
2866
2867    inds = [
2868        [[1, 2, 3], None, [3, 2, 1], [5, 3, 4]],
2869        [[1, 2, 3], None, [4, 3, 2], None],
2870        [[1, 2, 3], [3, 2, 1]],
2871        [[1, 2, 3], [3, 2, 1], [3, 2, 1], [5, 3, 4]],
2872        [[], [], [], None],
2873        [np.array([1, 2, 3]), None, np.array([4, 3, 2]), None],
2874        [None, None, [1, 2, 3], [4, 3, 2]],
2875        [None, [0, 2, 3], None, [0, 3, 2]],
2876    ]
2877
2878    for ind in inds:
2879        slc = [
2880            i if isinstance(i, (np.ndarray, list)) else slice(None, None) for i in ind
2881        ]
2882        result = d.vindex[tuple(slc)]
2883
2884        # Rotate the expected result accordingly
2885        axis = _get_axis(ind)
2886        expected = _vindex_transpose(x[tuple(slc)], axis)
2887
2888        assert_eq(result, expected)
2889
2890        # Always have the first axis be the length of the points
2891        k = len(next(i for i in ind if isinstance(i, (np.ndarray, list))))
2892        assert result.shape[0] == k
2893
2894
2895def test_slice_with_floats():
2896    d = da.ones((5,), chunks=(3,))
2897    with pytest.raises(IndexError):
2898        d[1.5]
2899    with pytest.raises(IndexError):
2900        d[0:1.5]
2901    with pytest.raises(IndexError):
2902        d[[1, 1.5]]
2903
2904
2905def test_slice_with_integer_types():
2906    x = np.arange(10)
2907    dx = da.from_array(x, chunks=5)
2908    inds = np.array([0, 3, 6], dtype="u8")
2909    assert_eq(dx[inds], x[inds])
2910    assert_eq(dx[inds.astype("u4")], x[inds.astype("u4")])
2911
2912    inds = np.array([0, 3, 6], dtype=np.int64)
2913    assert_eq(dx[inds], x[inds])
2914    assert_eq(dx[inds.astype("u4")], x[inds.astype("u4")])
2915
2916
2917def test_index_with_integer_types():
2918    x = np.arange(10)
2919    dx = da.from_array(x, chunks=5)
2920    inds = int(3)
2921    assert_eq(dx[inds], x[inds])
2922
2923    inds = np.int64(3)
2924    assert_eq(dx[inds], x[inds])
2925
2926
2927def test_vindex_basic():
2928    x = np.arange(56).reshape((7, 8))
2929    d = da.from_array(x, chunks=(3, 4))
2930
2931    # cases where basic and advanced indexing coincide
2932    result = d.vindex[0]
2933    assert_eq(result, x[0])
2934
2935    result = d.vindex[0, 1]
2936    assert_eq(result, x[0, 1])
2937
2938    result = d.vindex[[0, 1], ::-1]  # slices last
2939    assert_eq(result, x[:2, ::-1])
2940
2941
2942def test_vindex_nd():
2943    x = np.arange(56).reshape((7, 8))
2944    d = da.from_array(x, chunks=(3, 4))
2945
2946    result = d.vindex[[[0, 1], [6, 0]], [[0, 1], [0, 7]]]
2947    assert_eq(result, x[[[0, 1], [6, 0]], [[0, 1], [0, 7]]])
2948
2949    result = d.vindex[np.arange(7)[:, None], np.arange(8)[None, :]]
2950    assert_eq(result, x)
2951
2952    result = d.vindex[np.arange(7)[None, :], np.arange(8)[:, None]]
2953    assert_eq(result, x.T)
2954
2955
2956def test_vindex_negative():
2957    x = np.arange(10)
2958    d = da.from_array(x, chunks=(5, 5))
2959
2960    result = d.vindex[np.array([0, -1])]
2961    assert_eq(result, x[np.array([0, -1])])
2962
2963
2964def test_vindex_errors():
2965    d = da.ones((5, 5, 5), chunks=(3, 3, 3))
2966    pytest.raises(IndexError, lambda: d.vindex[np.newaxis])
2967    pytest.raises(IndexError, lambda: d.vindex[[1, 2], [1, 2, 3]])
2968    pytest.raises(IndexError, lambda: d.vindex[[True] * 5])
2969    pytest.raises(IndexError, lambda: d.vindex[[0], [5]])
2970    pytest.raises(IndexError, lambda: d.vindex[[0], [-6]])
2971
2972
2973def test_vindex_merge():
2974    from dask.array.core import _vindex_merge
2975
2976    locations = [1], [2, 0]
2977    values = [np.array([[1, 2, 3]]), np.array([[10, 20, 30], [40, 50, 60]])]
2978
2979    assert (
2980        _vindex_merge(locations, values)
2981        == np.array([[40, 50, 60], [1, 2, 3], [10, 20, 30]])
2982    ).all()
2983
2984
2985def test_vindex_identity():
2986    rng = da.random.RandomState(42)
2987    a, b = 10, 20
2988
2989    x = rng.random(a, chunks=a // 2)
2990    assert x is x.vindex[:]
2991    assert x is x.vindex[:a]
2992    pytest.raises(IndexError, lambda: x.vindex[: a - 1])
2993    pytest.raises(IndexError, lambda: x.vindex[1:])
2994    pytest.raises(IndexError, lambda: x.vindex[0:a:2])
2995
2996    x = rng.random((a, b), chunks=(a // 2, b // 2))
2997    assert x is x.vindex[:, :]
2998    assert x is x.vindex[:a, :b]
2999    pytest.raises(IndexError, lambda: x.vindex[:, : b - 1])
3000    pytest.raises(IndexError, lambda: x.vindex[:, 1:])
3001    pytest.raises(IndexError, lambda: x.vindex[:, 0:b:2])
3002
3003
3004def test_empty_array():
3005    assert_eq(np.arange(0), da.arange(0, chunks=5))
3006
3007
3008def test_memmap():
3009    with tmpfile("npy") as fn_1:
3010        with tmpfile("npy") as fn_2:
3011            try:
3012                x = da.arange(100, chunks=15)
3013                target = np.memmap(fn_1, shape=x.shape, mode="w+", dtype=x.dtype)
3014
3015                x.store(target)
3016
3017                assert_eq(target, x, check_type=False)
3018
3019                np.save(fn_2, target)
3020
3021                assert_eq(np.load(fn_2, mmap_mode="r"), x, check_type=False)
3022            finally:
3023                target._mmap.close()
3024
3025
3026def test_to_npy_stack():
3027    x = np.arange(5 * 10 * 10).reshape((5, 10, 10))
3028    d = da.from_array(x, chunks=(2, 4, 4))
3029
3030    with tmpdir() as dirname:
3031        stackdir = os.path.join(dirname, "test")
3032        da.to_npy_stack(stackdir, d, axis=0)
3033        assert os.path.exists(os.path.join(stackdir, "0.npy"))
3034        assert (np.load(os.path.join(stackdir, "1.npy")) == x[2:4]).all()
3035
3036        e = da.from_npy_stack(stackdir)
3037        assert_eq(d, e)
3038
3039
3040def test_view():
3041    x = np.arange(56).reshape((7, 8))
3042    d = da.from_array(x, chunks=(2, 3))
3043
3044    assert_eq(x.view(), d.view())
3045    assert_eq(x.view("i4"), d.view("i4"))
3046    assert_eq(x.view("i2"), d.view("i2"))
3047    assert all(isinstance(s, int) for s in d.shape)
3048
3049    x = np.arange(8, dtype="i1")
3050    d = da.from_array(x, chunks=(4,))
3051    assert_eq(x.view("i4"), d.view("i4"))
3052
3053    with pytest.raises(ValueError):
3054        x = np.arange(8, dtype="i1")
3055        d = da.from_array(x, chunks=(3,))
3056        d.view("i4")
3057
3058    with pytest.raises(ValueError):
3059        d.view("i4", order="asdf")
3060
3061
3062def test_view_fortran():
3063    x = np.asfortranarray(np.arange(64).reshape((8, 8)))
3064    d = da.from_array(x, chunks=(2, 3))
3065    assert_eq(x.T.view("i4").T, d.view("i4", order="F"))
3066    assert_eq(x.T.view("i2").T, d.view("i2", order="F"))
3067
3068
3069def test_h5py_tokenize():
3070    h5py = pytest.importorskip("h5py")
3071    with tmpfile("hdf5") as fn1:
3072        with tmpfile("hdf5") as fn2:
3073            f = h5py.File(fn1, mode="a")
3074            g = h5py.File(fn2, mode="a")
3075
3076            f["x"] = np.arange(10).astype(float)
3077            g["x"] = np.ones(10).astype(float)
3078
3079            x1 = f["x"]
3080            x2 = g["x"]
3081
3082            assert tokenize(x1) != tokenize(x2)
3083
3084
3085def test_map_blocks_with_changed_dimension():
3086    x = np.arange(56).reshape((7, 8))
3087    d = da.from_array(x, chunks=(7, 4))
3088
3089    e = d.map_blocks(lambda b: b.sum(axis=0), chunks=(4,), drop_axis=0, dtype=d.dtype)
3090    assert e.chunks == ((4, 4),)
3091    assert_eq(e, x.sum(axis=0))
3092
3093    # Provided chunks have wrong shape
3094    with pytest.raises(ValueError):
3095        d.map_blocks(lambda b: b.sum(axis=0), chunks=(), drop_axis=0)
3096
3097    with pytest.raises(ValueError):
3098        d.map_blocks(lambda b: b.sum(axis=0), chunks=((4, 4, 4),), drop_axis=0)
3099
3100    with pytest.raises(ValueError):
3101        d.map_blocks(lambda b: b.sum(axis=1), chunks=((3, 4),), drop_axis=1)
3102
3103    d = da.from_array(x, chunks=(4, 8))
3104    e = d.map_blocks(lambda b: b.sum(axis=1), drop_axis=1, dtype=d.dtype)
3105    assert e.chunks == ((4, 3),)
3106    assert_eq(e, x.sum(axis=1))
3107
3108    x = np.arange(64).reshape((8, 8))
3109    d = da.from_array(x, chunks=(4, 4))
3110    e = d.map_blocks(
3111        lambda b: b[None, :, :, None],
3112        chunks=(1, 4, 4, 1),
3113        new_axis=[0, 3],
3114        dtype=d.dtype,
3115    )
3116    assert e.chunks == ((1,), (4, 4), (4, 4), (1,))
3117    assert_eq(e, x[None, :, :, None])
3118
3119    e = d.map_blocks(lambda b: b[None, :, :, None], new_axis=[0, 3], dtype=d.dtype)
3120    assert e.chunks == ((1,), (4, 4), (4, 4), (1,))
3121    assert_eq(e, x[None, :, :, None])
3122
3123    # Adding axis with a gap
3124    with pytest.raises(ValueError):
3125        d.map_blocks(lambda b: b, new_axis=(3, 4))
3126
3127    # Both new_axis and drop_axis
3128    d = da.from_array(x, chunks=(8, 4))
3129    e = d.map_blocks(
3130        lambda b: b.sum(axis=0)[:, None, None],
3131        drop_axis=0,
3132        new_axis=(1, 2),
3133        dtype=d.dtype,
3134    )
3135    assert e.chunks == ((4, 4), (1,), (1,))
3136    assert_eq(e, x.sum(axis=0)[:, None, None])
3137
3138    d = da.from_array(x, chunks=(4, 8))
3139    e = d.map_blocks(
3140        lambda b: b.sum(axis=1)[:, None, None],
3141        drop_axis=1,
3142        new_axis=(1, 2),
3143        dtype=d.dtype,
3144    )
3145    assert e.chunks == ((4, 4), (1,), (1,))
3146    assert_eq(e, x.sum(axis=1)[:, None, None])
3147
3148
3149def test_map_blocks_with_negative_drop_axis():
3150    x = np.arange(56).reshape((7, 8))
3151    d = da.from_array(x, chunks=(7, 4))
3152
3153    for drop_axis in [0, -2]:
3154        # test with equivalent positive and negative drop_axis
3155        e = d.map_blocks(
3156            lambda b: b.sum(axis=0), chunks=(4,), drop_axis=drop_axis, dtype=d.dtype
3157        )
3158        assert e.chunks == ((4, 4),)
3159        assert_eq(e, x.sum(axis=0))
3160
3161
3162def test_map_blocks_with_invalid_drop_axis():
3163    x = np.arange(56).reshape((7, 8))
3164    d = da.from_array(x, chunks=(7, 4))
3165
3166    for drop_axis in [x.ndim, -x.ndim - 1]:
3167        with pytest.raises(ValueError):
3168            d.map_blocks(
3169                lambda b: b.sum(axis=0), chunks=(4,), drop_axis=drop_axis, dtype=d.dtype
3170            )
3171
3172
3173def test_map_blocks_with_changed_dimension_and_broadcast_chunks():
3174    # https://github.com/dask/dask/issues/4299
3175    a = da.from_array([1, 2, 3], 3)
3176    b = da.from_array(np.array([0, 1, 2, 0, 1, 2]), chunks=3)
3177    result = da.map_blocks(operator.add, a, b, chunks=b.chunks)
3178    expected = da.from_array(np.array([1, 3, 5, 1, 3, 5]), chunks=3)
3179    assert_eq(result, expected)
3180
3181
3182def test_broadcast_chunks():
3183    assert broadcast_chunks() == ()
3184
3185    assert broadcast_chunks(((2, 3),)) == ((2, 3),)
3186
3187    assert broadcast_chunks(((5, 5),), ((5, 5),)) == ((5, 5),)
3188
3189    a = ((10, 10, 10), (5, 5))
3190    b = ((5, 5),)
3191    assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5))
3192    assert broadcast_chunks(b, a) == ((10, 10, 10), (5, 5))
3193
3194    a = ((10, 10, 10), (5, 5))
3195    b = ((1,), (5, 5))
3196    assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5))
3197
3198    a = ((10, 10, 10), (5, 5))
3199    b = ((3, 3), (5, 5))
3200    with pytest.raises(ValueError):
3201        broadcast_chunks(a, b)
3202
3203    a = ((1,), (5, 5))
3204    b = ((1,), (5, 5))
3205    assert broadcast_chunks(a, b) == a
3206
3207    a = ((1,), (np.nan, np.nan, np.nan))
3208    b = ((3, 3), (1,))
3209    r = broadcast_chunks(a, b)
3210    assert r[0] == b[0] and np.allclose(r[1], a[1], equal_nan=True)
3211
3212    a = ((3, 3), (1,))
3213    b = ((1,), (np.nan, np.nan, np.nan))
3214    r = broadcast_chunks(a, b)
3215    assert r[0] == a[0] and np.allclose(r[1], b[1], equal_nan=True)
3216
3217    a = ((3, 3), (5, 5))
3218    b = ((1,), (np.nan, np.nan, np.nan))
3219    with pytest.raises(ValueError):
3220        broadcast_chunks(a, b)
3221
3222
3223def test_chunks_error():
3224    x = np.ones((10, 10))
3225    with pytest.raises(ValueError):
3226        da.from_array(x, chunks=(5,))
3227
3228
3229def test_array_compute_forward_kwargs():
3230    x = da.arange(10, chunks=2).sum()
3231    x.compute(bogus_keyword=10)
3232
3233
3234def test_dont_fuse_outputs():
3235    dsk = {("x", 0): np.array([1, 2]), ("x", 1): (inc, ("x", 0))}
3236
3237    a = da.Array(dsk, "x", chunks=(2,), shape=(4,), dtype=np.array([1]).dtype)
3238    assert_eq(a, np.array([1, 2, 2, 3], dtype=a.dtype))
3239
3240
3241def test_dont_dealias_outputs():
3242    dsk = {
3243        ("x", 0, 0): np.ones((2, 2)),
3244        ("x", 0, 1): np.ones((2, 2)),
3245        ("x", 1, 0): np.ones((2, 2)),
3246        ("x", 1, 1): ("x", 0, 0),
3247    }
3248
3249    a = da.Array(dsk, "x", chunks=(2, 2), shape=(4, 4), dtype=np.ones(1).dtype)
3250    assert_eq(a, np.ones((4, 4)))
3251
3252
3253def test_timedelta_op():
3254    x = np.array([np.timedelta64(10, "h")])
3255    y = np.timedelta64(1, "h")
3256    a = da.from_array(x, chunks=(1,)) / y
3257    assert a.compute() == x / y
3258
3259
3260def test_to_delayed():
3261    x = da.random.random((4, 4), chunks=(2, 2))
3262    y = x + 10
3263
3264    [[a, b], [c, d]] = y.to_delayed()
3265    assert_eq(a.compute(), y[:2, :2])
3266
3267    s = 2
3268    x = da.from_array(np.array(s), chunks=0)
3269    a = x.to_delayed()[tuple()]
3270    assert a.compute() == s
3271
3272
3273def test_to_delayed_optimize_graph():
3274    x = da.ones((4, 4), chunks=(2, 2))
3275    y = x[1:][1:][1:][:, 1:][:, 1:][:, 1:]
3276
3277    # optimizations
3278    d = y.to_delayed().flatten().tolist()[0]
3279    assert len([k for k in d.dask if k[0].startswith("getitem")]) == 1
3280
3281    # no optimizations
3282    d2 = y.to_delayed(optimize_graph=False).flatten().tolist()[0]
3283    assert dict(d2.dask) == dict(y.dask)
3284
3285    assert (d.compute() == d2.compute()).all()
3286
3287
3288def test_cumulative():
3289    x = da.arange(20, chunks=5)
3290    assert_eq(x.cumsum(axis=0), np.arange(20).cumsum())
3291    assert_eq(x.cumprod(axis=0), np.arange(20).cumprod())
3292
3293    assert_eq(da.nancumsum(x, axis=0), nancumsum(np.arange(20)))
3294    assert_eq(da.nancumprod(x, axis=0), nancumprod(np.arange(20)))
3295
3296    a = np.random.random(20)
3297    rs = np.random.RandomState(0)
3298    a[rs.rand(*a.shape) < 0.5] = np.nan
3299    x = da.from_array(a, chunks=5)
3300    assert_eq(da.nancumsum(x, axis=0), nancumsum(a))
3301    assert_eq(da.nancumprod(x, axis=0), nancumprod(a))
3302
3303    a = np.random.random((20, 24))
3304    x = da.from_array(a, chunks=(6, 5))
3305    assert_eq(x.cumsum(axis=0), a.cumsum(axis=0))
3306    assert_eq(x.cumsum(axis=1), a.cumsum(axis=1))
3307    assert_eq(x.cumprod(axis=0), a.cumprod(axis=0))
3308    assert_eq(x.cumprod(axis=1), a.cumprod(axis=1))
3309
3310    assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0))
3311    assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1))
3312    assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0))
3313    assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1))
3314
3315    a = np.random.random((20, 24))
3316    rs = np.random.RandomState(0)
3317    a[rs.rand(*a.shape) < 0.5] = np.nan
3318    x = da.from_array(a, chunks=(6, 5))
3319    assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0))
3320    assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1))
3321    assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0))
3322    assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1))
3323
3324    a = np.random.random((20, 24, 13))
3325    x = da.from_array(a, chunks=(6, 5, 4))
3326    for axis in [0, 1, 2, -1, -2, -3]:
3327        assert_eq(x.cumsum(axis=axis), a.cumsum(axis=axis))
3328        assert_eq(x.cumprod(axis=axis), a.cumprod(axis=axis))
3329
3330        assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis))
3331        assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis))
3332
3333    a = np.random.random((20, 24, 13))
3334    rs = np.random.RandomState(0)
3335    a[rs.rand(*a.shape) < 0.5] = np.nan
3336    x = da.from_array(a, chunks=(6, 5, 4))
3337    for axis in [0, 1, 2, -1, -2, -3]:
3338        assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis))
3339        assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis))
3340
3341    with pytest.raises(ValueError):
3342        x.cumsum(axis=3)
3343
3344    with pytest.raises(ValueError):
3345        x.cumsum(axis=-4)
3346
3347
3348def test_from_delayed():
3349    v = delayed(np.ones)((5, 3))
3350    x = from_delayed(v, shape=(5, 3), dtype=np.ones(0).dtype)
3351    assert isinstance(x, Array)
3352    assert_eq(x, np.ones((5, 3)))
3353
3354
3355def test_from_delayed_meta():
3356    v = delayed(np.ones)((5, 3))
3357    x = from_delayed(v, shape=(5, 3), meta=np.ones(0))
3358    assert isinstance(x, Array)
3359    assert isinstance(x._meta, np.ndarray)
3360
3361
3362def test_A_property():
3363    x = da.ones(5, chunks=(2,))
3364    assert x.A is x
3365
3366
3367def test_copy_mutate():
3368    x = da.arange(5, chunks=(2,))
3369    y = x.copy()
3370    memo = {}
3371    y2 = copy.deepcopy(x, memo=memo)
3372    x[x % 2 == 0] = -1
3373
3374    xx = np.arange(5)
3375    xx[xx % 2 == 0] = -1
3376    assert_eq(x, xx)
3377
3378    assert_eq(y, np.arange(5))
3379    assert_eq(y2, np.arange(5))
3380    assert memo[id(x)] is y2
3381
3382
3383def test_npartitions():
3384    assert da.ones(5, chunks=(2,)).npartitions == 3
3385    assert da.ones((5, 5), chunks=(2, 3)).npartitions == 6
3386
3387
3388def test_astype_gh1151():
3389    a = np.arange(5).astype(np.int32)
3390    b = da.from_array(a, (1,))
3391    assert_eq(a.astype(np.int16), b.astype(np.int16))
3392
3393
3394def test_elemwise_name():
3395    assert (da.ones(5, chunks=2) + 1).name.startswith("add-")
3396
3397
3398def test_map_blocks_name():
3399    assert da.ones(5, chunks=2).map_blocks(inc).name.startswith("inc-")
3400
3401
3402def test_from_array_names():
3403    pytest.importorskip("distributed")
3404
3405    x = np.ones(10)
3406    d = da.from_array(x, chunks=2)
3407
3408    names = countby(key_split, d.dask)
3409    assert set(names.values()) == {5}
3410
3411
3412@pytest.mark.parametrize(
3413    "array",
3414    [
3415        da.arange(100, chunks=25),
3416        da.ones((10, 10), chunks=25),
3417    ],
3418)
3419def test_array_picklable(array):
3420    from pickle import dumps, loads
3421
3422    a2 = loads(dumps(array))
3423    assert_eq(array, a2)
3424
3425
3426def test_from_array_raises_on_bad_chunks():
3427    x = np.ones(10)
3428
3429    with pytest.raises(ValueError):
3430        da.from_array(x, chunks=(5, 5, 5))
3431
3432    # with pytest.raises(ValueError):
3433    #      da.from_array(x, chunks=100)
3434
3435    with pytest.raises(ValueError):
3436        da.from_array(x, chunks=((5, 5, 5),))
3437
3438
3439def test_concatenate_axes():
3440    x = np.ones((2, 2, 2))
3441
3442    assert_eq(concatenate_axes([x, x], axes=[0]), np.ones((4, 2, 2)))
3443    assert_eq(concatenate_axes([x, x, x], axes=[0]), np.ones((6, 2, 2)))
3444    assert_eq(concatenate_axes([x, x], axes=[1]), np.ones((2, 4, 2)))
3445    assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 1]), np.ones((4, 4, 2)))
3446    assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 2]), np.ones((4, 2, 4)))
3447    assert_eq(concatenate_axes([[x, x, x], [x, x, x]], axes=[1, 2]), np.ones((2, 4, 6)))
3448
3449    with pytest.raises(ValueError):
3450        concatenate_axes(
3451            [[x, x], [x, x]], axes=[0]
3452        )  # not all nested lists accounted for
3453    with pytest.raises(ValueError):
3454        concatenate_axes([x, x], axes=[0, 1, 2, 3])  # too many axes
3455
3456
3457def test_blockwise_concatenate():
3458    x = da.ones((4, 4, 4), chunks=(2, 2, 2))
3459    y = da.ones((4, 4), chunks=(2, 2))
3460
3461    def f(a, b):
3462        assert isinstance(a, np.ndarray)
3463        assert isinstance(b, np.ndarray)
3464
3465        assert a.shape == (2, 4, 4)
3466        assert b.shape == (4, 4)
3467
3468        return (a + b).sum(axis=(1, 2))
3469
3470    z = da.blockwise(f, "i", x, "ijk", y, "jk", concatenate=True, dtype=x.dtype)
3471    assert_eq(z, np.ones(4) * 32)
3472
3473    z = da.blockwise(add, "ij", y, "ij", y, "ij", concatenate=True, dtype=x.dtype)
3474    assert_eq(z, np.ones((4, 4)) * 2)
3475
3476    def f(a, b, c):
3477        assert isinstance(a, np.ndarray)
3478        assert isinstance(b, np.ndarray)
3479        assert isinstance(c, np.ndarray)
3480
3481        assert a.shape == (4, 2, 4)
3482        assert b.shape == (4, 4)
3483        assert c.shape == (4, 2)
3484
3485        return np.ones(2)
3486
3487    z = da.blockwise(
3488        f, "j", x, "ijk", y, "ki", y, "ij", concatenate=True, dtype=x.dtype
3489    )
3490    assert_eq(z, np.ones(4), check_shape=False)
3491
3492
3493def test_common_blockdim():
3494    assert common_blockdim([(5,), (5,)]) == (5,)
3495    assert common_blockdim([(5,), (2, 3)]) == (2, 3)
3496    assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5)
3497    assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5)
3498    assert common_blockdim([(5, 2, 3), (2, 3, 5)]) == (2, 3, 2, 3)
3499
3500    assert common_blockdim([(1, 2), (2, 1)]) == (1, 1, 1)
3501    assert common_blockdim([(1, 2, 2), (2, 1, 2), (2, 2, 1)]) == (1, 1, 1, 1, 1)
3502
3503
3504def test_uneven_chunks_that_fit_neatly():
3505    x = da.arange(10, chunks=((5, 5),))
3506    y = da.ones(10, chunks=((5, 2, 3),))
3507
3508    assert_eq(x + y, np.arange(10) + np.ones(10))
3509
3510    z = x + y
3511    assert z.chunks == ((5, 2, 3),)
3512
3513
3514def test_elemwise_uneven_chunks():
3515    x = da.arange(10, chunks=((4, 6),))
3516    y = da.ones(10, chunks=((6, 4),))
3517
3518    assert_eq(x + y, np.arange(10) + np.ones(10))
3519
3520    z = x + y
3521    assert z.chunks == ((4, 2, 4),)
3522
3523    x = da.random.random((10, 10), chunks=((4, 6), (5, 2, 3)))
3524    y = da.random.random((4, 10, 10), chunks=((2, 2), (6, 4), (2, 3, 5)))
3525
3526    z = x + y
3527    assert_eq(x + y, x.compute() + y.compute())
3528    assert z.chunks == ((2, 2), (4, 2, 4), (2, 3, 2, 3))
3529
3530
3531def test_uneven_chunks_blockwise():
3532    x = da.random.random((10, 10), chunks=((2, 3, 2, 3), (5, 5)))
3533    y = da.random.random((10, 10), chunks=((4, 4, 2), (4, 2, 4)))
3534    z = da.blockwise(np.dot, "ik", x, "ij", y, "jk", dtype=x.dtype, concatenate=True)
3535    assert z.chunks == (x.chunks[0], y.chunks[1])
3536
3537    assert_eq(z, x.compute().dot(y))
3538
3539
3540def test_warn_bad_rechunking():
3541    x = da.ones((20, 20), chunks=(20, 1))
3542    y = da.ones((20, 20), chunks=(1, 20))
3543
3544    with pytest.warns(da.core.PerformanceWarning, match="factor of 20"):
3545        x + y
3546
3547
3548def test_concatenate_stack_dont_warn():
3549    with warnings.catch_warnings(record=True) as record:
3550        da.concatenate([da.ones(2, chunks=1)] * 62)
3551    assert not record
3552
3553    with warnings.catch_warnings(record=True) as record:
3554        da.stack([da.ones(2, chunks=1)] * 62)
3555    assert not record
3556
3557
3558def test_map_blocks_delayed():
3559    x = da.ones((10, 10), chunks=(5, 5))
3560    y = np.ones((5, 5))
3561
3562    z = x.map_blocks(add, y, dtype=x.dtype)
3563
3564    yy = delayed(y)
3565    zz = x.map_blocks(add, yy, dtype=x.dtype)
3566
3567    assert_eq(z, zz)
3568
3569    assert yy.key in zz.dask
3570
3571
3572def test_no_chunks():
3573    X = np.arange(11)
3574    dsk = {("x", 0): np.arange(5), ("x", 1): np.arange(5, 11)}
3575    x = Array(dsk, "x", ((np.nan, np.nan),), np.arange(1).dtype)
3576    assert_eq(x + 1, X + 1)
3577    assert_eq(x.sum(), X.sum())
3578    assert_eq((x + 1).std(), (X + 1).std())
3579    assert_eq((x + x).std(), (X + X).std())
3580    assert_eq((x + x).std(keepdims=True), (X + X).std(keepdims=True))
3581
3582
3583def test_no_chunks_2d():
3584    X = np.arange(24).reshape((4, 6))
3585    x = da.from_array(X, chunks=(2, 2))
3586    x._chunks = ((np.nan, np.nan), (np.nan, np.nan, np.nan))
3587
3588    with pytest.warns(None):  # zero division warning
3589        assert_eq(da.log(x), np.log(X))
3590    assert_eq(x.T, X.T)
3591    assert_eq(x.sum(axis=0, keepdims=True), X.sum(axis=0, keepdims=True))
3592    assert_eq(x.sum(axis=1, keepdims=True), X.sum(axis=1, keepdims=True))
3593    assert_eq(x.dot(x.T + 1), X.dot(X.T + 1))
3594
3595
3596def test_no_chunks_yes_chunks():
3597    X = np.arange(24).reshape((4, 6))
3598    x = da.from_array(X, chunks=(2, 2))
3599    x._chunks = ((2, 2), (np.nan, np.nan, np.nan))
3600
3601    assert (x + 1).chunks == ((2, 2), (np.nan, np.nan, np.nan))
3602    assert (x.T).chunks == ((np.nan, np.nan, np.nan), (2, 2))
3603    assert (x.dot(x.T)).chunks == ((2, 2), (2, 2))
3604
3605
3606def test_raise_informative_errors_no_chunks():
3607    X = np.arange(10)
3608    a = da.from_array(X, chunks=(5, 5))
3609    a._chunks = ((np.nan, np.nan),)
3610
3611    b = da.from_array(X, chunks=(4, 4, 2))
3612    b._chunks = ((np.nan, np.nan, np.nan),)
3613
3614    for op in [
3615        lambda: a + b,
3616        lambda: a[1],
3617        lambda: a[::2],
3618        lambda: a[-5],
3619        lambda: a.rechunk(3),
3620        lambda: a.reshape(2, 5),
3621    ]:
3622        with pytest.raises(ValueError) as e:
3623            op()
3624        if "chunk" not in str(e.value) or "unknown" not in str(e.value):
3625            op()
3626
3627
3628def test_no_chunks_slicing_2d():
3629    X = np.arange(24).reshape((4, 6))
3630    x = da.from_array(X, chunks=(2, 2))
3631    x._chunks = ((2, 2), (np.nan, np.nan, np.nan))
3632
3633    assert_eq(x[0], X[0])
3634
3635    for op in [lambda: x[:, 4], lambda: x[:, ::2], lambda: x[0, 2:4]]:
3636        with pytest.raises(ValueError, match="chunk sizes are unknown"):
3637            op()
3638
3639
3640def test_index_array_with_array_1d():
3641    x = np.arange(10)
3642    dx = da.from_array(x, chunks=(5,))
3643    dx._chunks = ((np.nan, np.nan),)
3644
3645    assert_eq(x[x > 6], dx[dx > 6])
3646    assert_eq(x[x % 2 == 0], dx[dx % 2 == 0])
3647
3648    dy = da.ones(11, chunks=(3,))
3649
3650    with pytest.raises(ValueError):
3651        dx[dy > 5]
3652
3653
3654def test_index_array_with_array_2d():
3655    x = np.arange(24).reshape((4, 6))
3656    dx = da.from_array(x, chunks=(2, 2))
3657
3658    assert_eq(x[x > 6], dx[dx > 6])
3659    assert_eq(x[x % 2 == 0], dx[dx % 2 == 0])
3660
3661    # Test with unknown chunks
3662    dx._chunks = ((2, 2), (np.nan, np.nan, np.nan))
3663
3664    with pytest.warns(UserWarning, match="different ordering") as record:
3665        assert sorted(x[x % 2 == 0].tolist()) == sorted(
3666            dx[dx % 2 == 0].compute().tolist()
3667        )
3668        assert sorted(x[x > 6].tolist()) == sorted(dx[dx > 6].compute().tolist())
3669
3670    assert len(record) == 2
3671
3672
3673@pytest.mark.xfail(reason="Chunking does not align well")
3674def test_index_array_with_array_3d_2d():
3675    x = np.arange(4 ** 3).reshape((4, 4, 4))
3676    dx = da.from_array(x, chunks=(2, 2, 2))
3677
3678    ind = np.random.random((4, 4)) > 0.5
3679    ind = np.arange(4 ** 2).reshape((4, 4)) % 2 == 0
3680    dind = da.from_array(ind, (2, 2))
3681
3682    assert_eq(x[ind], dx[dind])
3683    assert_eq(x[:, ind], dx[:, dind])
3684
3685
3686def test_setitem_1d():
3687    x = np.arange(10)
3688    dx = da.from_array(x.copy(), chunks=(5,))
3689
3690    x[x > 6] = -1
3691    x[x % 2 == 0] = -2
3692
3693    dx[dx > 6] = -1
3694    dx[dx % 2 == 0] = -2
3695
3696    assert_eq(x, dx)
3697
3698
3699def test_setitem_2d():
3700    x = np.arange(24).reshape((4, 6))
3701    dx = da.from_array(x.copy(), chunks=(2, 2))
3702
3703    x[x > 6] = -1
3704    x[x % 2 == 0] = -2
3705
3706    dx[dx > 6] = -1
3707    dx[dx % 2 == 0] = -2
3708
3709    assert_eq(x, dx)
3710
3711
3712def test_setitem_extended_API_0d():
3713    # 0-d array
3714    x = np.array(9)
3715    dx = da.from_array(9)
3716
3717    x[()] = -1
3718    dx[()] = -1
3719    assert_eq(x, dx.compute())
3720
3721    x[...] = -11
3722    dx[...] = -11
3723    assert_eq(x, dx.compute())
3724
3725
3726def test_setitem_extended_API_1d():
3727    # 1-d array
3728    x = np.arange(10)
3729    dx = da.from_array(x.copy(), chunks=(4, 6))
3730
3731    x[2:8:2] = -1
3732    dx[2:8:2] = -1
3733    assert_eq(x, dx.compute())
3734
3735    x[...] = -11
3736    dx[...] = -11
3737    assert_eq(x, dx.compute())
3738
3739
3740@pytest.mark.parametrize(
3741    "index, value",
3742    [
3743        [Ellipsis, -1],
3744        [(slice(None, None, 2), slice(None, None, -1)), -1],
3745        [slice(1, None, 2), -1],
3746        [[4, 3, 1], -1],
3747        [(Ellipsis, 4), -1],
3748        [5, -1],
3749        [(slice(None), 2), range(6)],
3750        [3, range(10)],
3751        [(slice(None), [3, 5, 6]), [-30, -31, -32]],
3752        [([-1, 0, 1], 2), [-30, -31, -32]],
3753        [(slice(None, 2), slice(None, 3)), [-50, -51, -52]],
3754        [(slice(None), [6, 1, 3]), [-60, -61, -62]],
3755        [(slice(1, 3), slice(1, 4)), [[-70, -71, -72]]],
3756        [(slice(None), [9, 8, 8]), [-80, -81, 91]],
3757        [([True, False, False, False, True, False], 2), -1],
3758        [(3, [True, True, False, True, True, False, True, False, True, True]), -1],
3759        [(np.array([False, False, True, True, False, False]), slice(5, 7)), -1],
3760        [
3761            (
3762                4,
3763                da.from_array(
3764                    [False, False, True, True, False, False, True, False, False, True]
3765                ),
3766            ),
3767            -1,
3768        ],
3769    ],
3770)
3771def test_setitem_extended_API_2d(index, value):
3772    # 2-d array
3773    x = np.ma.arange(60).reshape((6, 10))
3774    dx = da.from_array(x, chunks=(2, 3))
3775    dx[index] = value
3776    x[index] = value
3777    assert_eq(x, dx.compute())
3778
3779
3780def test_setitem_extended_API_2d_rhs_func_of_lhs():
3781    # Cases:
3782    # * RHS and/or indices are a function of the LHS
3783    # * Indices have unknown chunk sizes
3784    # * RHS has extra leading size 1 dimensions compared to LHS
3785    x = np.arange(60).reshape((6, 10))
3786    chunks = (2, 3)
3787
3788    dx = da.from_array(x, chunks=chunks)
3789    dx[2:4, dx[0] > 3] = -5
3790    x[2:4, x[0] > 3] = -5
3791    assert_eq(x, dx.compute())
3792
3793    dx = da.from_array(x, chunks=chunks)
3794    dx[2, dx[0] < -2] = -7
3795    x[2, x[0] < -2] = -7
3796    assert_eq(x, dx.compute())
3797
3798    dx = da.from_array(x, chunks=chunks)
3799    dx[dx % 2 == 0] = -8
3800    x[x % 2 == 0] = -8
3801    assert_eq(x, dx.compute())
3802
3803    dx = da.from_array(x, chunks=chunks)
3804    dx[dx % 2 == 0] = -8
3805    x[x % 2 == 0] = -8
3806    assert_eq(x, dx.compute())
3807
3808    dx = da.from_array(x, chunks=chunks)
3809    dx[3:5, 5:1:-2] = -dx[:2, 4:1:-2]
3810    x[3:5, 5:1:-2] = -x[:2, 4:1:-2]
3811    assert_eq(x, dx.compute())
3812
3813    dx = da.from_array(x, chunks=chunks)
3814    dx[0, 1:3] = -dx[0, 4:2:-1]
3815    x[0, 1:3] = -x[0, 4:2:-1]
3816    assert_eq(x, dx.compute())
3817
3818    dx = da.from_array(x, chunks=chunks)
3819    dx[...] = dx
3820    x[...] = x
3821    assert_eq(x, dx.compute())
3822
3823    dx = da.from_array(x, chunks=chunks)
3824    dx[...] = dx[...]
3825    x[...] = x[...]
3826    assert_eq(x, dx.compute())
3827
3828    dx = da.from_array(x, chunks=chunks)
3829    dx[0] = dx[-1]
3830    x[0] = x[-1]
3831    assert_eq(x, dx.compute())
3832
3833    dx = da.from_array(x, chunks=chunks)
3834    dx[0, :] = dx[-2, :]
3835    x[0, :] = x[-2, :]
3836    assert_eq(x, dx.compute())
3837
3838    dx = da.from_array(x, chunks=chunks)
3839    dx[:, 1] = dx[:, -3]
3840    x[:, 1] = x[:, -3]
3841    assert_eq(x, dx.compute())
3842
3843    index = da.from_array([0, 2], chunks=(2,))
3844    dx = da.from_array(x, chunks=chunks)
3845    dx[index, 8] = [99, 88]
3846    x[[0, 2], 8] = [99, 88]
3847    assert_eq(x, dx.compute())
3848
3849    dx = da.from_array(x, chunks=chunks)
3850    dx[:, index] = dx[:, :2]
3851    x[:, [0, 2]] = x[:, :2]
3852    assert_eq(x, dx.compute())
3853
3854    index = da.where(da.arange(3, chunks=(1,)) < 2)[0]
3855    dx = da.from_array(x, chunks=chunks)
3856    dx[index, 7] = [-23, -33]
3857    x[index.compute(), 7] = [-23, -33]
3858    assert_eq(x, dx.compute())
3859
3860    index = da.where(da.arange(3, chunks=(1,)) < 2)[0]
3861    dx = da.from_array(x, chunks=chunks)
3862    dx[(index,)] = -34
3863    x[(index.compute(),)] = -34
3864    assert_eq(x, dx.compute())
3865
3866    index = index - 4
3867    dx = da.from_array(x, chunks=chunks)
3868    dx[index, 7] = [-43, -53]
3869    x[index.compute(), 7] = [-43, -53]
3870    assert_eq(x, dx.compute())
3871
3872    index = da.from_array([0, -1], chunks=(1,))
3873    x[[0, -1]] = 9999
3874    dx[(index,)] = 9999
3875    assert_eq(x, dx.compute())
3876
3877    dx = da.from_array(x, chunks=(-1, -1))
3878    dx[...] = da.from_array(x, chunks=chunks)
3879    assert_eq(x, dx.compute())
3880
3881    # RHS has extra leading size 1 dimensions compared to LHS
3882    dx = da.from_array(x.copy(), chunks=(2, 3))
3883    v = x.reshape((1, 1) + x.shape)
3884    x[...] = v
3885    dx[...] = v
3886    assert_eq(x, dx.compute())
3887
3888    index = da.where(da.arange(3, chunks=(1,)) < 2)[0]
3889    v = -np.arange(12).reshape(1, 1, 6, 2)
3890    x[:, [0, 1]] = v
3891    dx[:, index] = v
3892    assert_eq(x, dx.compute())
3893
3894
3895@pytest.mark.parametrize(
3896    "index, value",
3897    [
3898        [(1, slice(1, 7, 2)), np.ma.masked],
3899        [(slice(1, 5, 2), [7, 5]), np.ma.masked_all((2, 2))],
3900    ],
3901)
3902def test_setitem_extended_API_2d_mask(index, value):
3903    x = np.ma.arange(60).reshape((6, 10))
3904    dx = da.from_array(x.data, chunks=(2, 3))
3905    dx[index] = value
3906    x[index] = value
3907    dx = dx.persist()
3908    assert_eq(x, dx.compute())
3909    assert_eq(x.mask, da.ma.getmaskarray(dx).compute())
3910
3911
3912def test_setitem_on_read_only_blocks():
3913    # Outputs of broadcast_trick-style functions contain read-only
3914    # arrays
3915    dx = da.empty((4, 6), dtype=float, chunks=(2, 2))
3916    dx[0] = 99
3917
3918    assert_eq(dx[0, 0], 99.0)
3919
3920    dx[0:2] = 88
3921
3922    assert_eq(dx[0, 0], 88.0)
3923
3924
3925def test_setitem_errs():
3926    x = da.ones((4, 4), chunks=(2, 2))
3927
3928    with pytest.raises(ValueError):
3929        x[x > 1] = x
3930
3931    # Shape mismatch
3932    with pytest.raises(ValueError):
3933        x[[True, True, False, False], 0] = [2, 3, 4]
3934
3935    with pytest.raises(ValueError):
3936        x[[True, True, True, False], 0] = [2, 3]
3937
3938    x = da.ones((4, 4), chunks=(2, 2))
3939    with pytest.raises(ValueError):
3940        x[0, da.from_array([True, False, False, True])] = [2, 3, 4]
3941
3942    x = da.ones((4, 4), chunks=(2, 2))
3943    with pytest.raises(ValueError):
3944        x[0, da.from_array([True, True, False, False])] = [2, 3, 4]
3945
3946    x = da.ones((4, 4), chunks=(2, 2))
3947    with pytest.raises(ValueError):
3948        x[da.from_array([True, True, True, False]), 0] = [2, 3]
3949
3950    x = da.ones((4, 4), chunks=(2, 2))
3951
3952    # Too many indices
3953    with pytest.raises(IndexError):
3954        x[:, :, :] = 2
3955
3956    # 2-d boolean indexing a single dimension
3957    with pytest.raises(IndexError):
3958        x[[[True, True, False, False]], 0] = 5
3959
3960    # Too many/not enough booleans
3961    with pytest.raises(IndexError):
3962        x[[True, True, False]] = 5
3963
3964    with pytest.raises(IndexError):
3965        x[[False, True, True, True, False]] = 5
3966
3967    # 2-d indexing a single dimension
3968    with pytest.raises(IndexError):
3969        x[[[1, 2, 3]], 0] = 5
3970
3971    # Multiple 1-d boolean/integer arrays
3972    with pytest.raises(NotImplementedError):
3973        x[[1, 2], [2, 3]] = 6
3974
3975    with pytest.raises(NotImplementedError):
3976        x[[True, True, False, False], [2, 3]] = 5
3977
3978    with pytest.raises(NotImplementedError):
3979        x[[True, True, False, False], [False, True, False, False]] = 7
3980
3981    # scalar boolean indexing
3982    with pytest.raises(NotImplementedError):
3983        x[True] = 5
3984
3985    with pytest.raises(NotImplementedError):
3986        x[np.array(True)] = 5
3987
3988    with pytest.raises(NotImplementedError):
3989        x[0, da.from_array(True)] = 5
3990
3991    # Scalar arrays
3992    y = da.from_array(np.array(1))
3993    with pytest.raises(IndexError):
3994        y[:] = 2
3995
3996    # RHS has non-brodacastable extra leading dimensions
3997    x = np.arange(12).reshape((3, 4))
3998    dx = da.from_array(x, chunks=(2, 2))
3999    with pytest.raises(ValueError):
4000        dx[...] = np.arange(24).reshape((2, 1, 3, 4))
4001
4002    # RHS doesn't have chunks set
4003    dx = da.unique(da.random.random([10]))
4004    with pytest.raises(ValueError, match="Arrays chunk sizes are unknown"):
4005        dx[0] = 0
4006
4007
4008def test_zero_slice_dtypes():
4009    x = da.arange(5, chunks=1)
4010    y = x[[]]
4011    assert y.dtype == x.dtype
4012    assert y.shape == (0,)
4013    assert_eq(x[[]], np.arange(5)[[]])
4014
4015
4016def test_zero_sized_array_rechunk():
4017    x = da.arange(5, chunks=1)[:0]
4018    y = da.blockwise(identity, "i", x, "i", dtype=x.dtype)
4019    assert_eq(x, y)
4020
4021
4022def test_blockwise_zero_shape():
4023    da.blockwise(
4024        lambda x: x,
4025        "i",
4026        da.arange(10, chunks=10),
4027        "i",
4028        da.from_array(np.ones((0, 2)), ((0,), 2)),
4029        "ab",
4030        da.from_array(np.ones((0,)), ((0,),)),
4031        "a",
4032        dtype="float64",
4033    )
4034
4035
4036def test_blockwise_zero_shape_new_axes():
4037    da.blockwise(
4038        lambda x: np.ones(42),
4039        "i",
4040        da.from_array(np.ones((0, 2)), ((0,), 2)),
4041        "ab",
4042        da.from_array(np.ones((0,)), ((0,),)),
4043        "a",
4044        dtype="float64",
4045        new_axes={"i": 42},
4046    )
4047
4048
4049def test_broadcast_against_zero_shape():
4050    assert_eq(da.arange(1, chunks=1)[:0] + 0, np.arange(1)[:0] + 0)
4051    assert_eq(da.arange(1, chunks=1)[:0] + 0.1, np.arange(1)[:0] + 0.1)
4052    assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0, np.ones((5, 5))[:0] + 0)
4053    assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0.1, np.ones((5, 5))[:0] + 0.1)
4054    assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0, np.ones((5, 5))[:, :0] + 0)
4055    assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0.1, np.ones((5, 5))[:, :0] + 0.1)
4056
4057
4058def test_from_array_name():
4059    x = np.array([1, 2, 3, 4, 5])
4060    chunks = x.shape
4061    # Default is tokenize the array
4062    dx = da.from_array(x, chunks=chunks)
4063    hashed_name = dx.name
4064    assert da.from_array(x, chunks=chunks).name == hashed_name
4065    # Specify name directly
4066    assert da.from_array(x, chunks=chunks, name="x").name == "x"
4067    # False gives a random name
4068    dx2 = da.from_array(x, chunks=chunks, name=False)
4069    dx3 = da.from_array(x, chunks=chunks, name=False)
4070    assert dx2.name != hashed_name
4071    assert dx3.name != hashed_name
4072    assert dx2.name != dx3.name
4073
4074
4075def test_concatenate_errs():
4076    with pytest.raises(ValueError, match=r"Shapes.*\(2, 1\)"):
4077        da.concatenate(
4078            [da.zeros((2, 1), chunks=(2, 1)), da.zeros((2, 3), chunks=(2, 3))]
4079        )
4080
4081    with pytest.raises(ValueError):
4082        da.concatenate(
4083            [da.zeros((1, 2), chunks=(1, 2)), da.zeros((3, 2), chunks=(3, 2))], axis=1
4084        )
4085
4086
4087def test_stack_errs():
4088    with pytest.raises(ValueError) as e:
4089        da.stack([da.zeros((2,), chunks=2)] * 10 + [da.zeros((3,), chunks=3)] * 10)
4090
4091    assert (
4092        str(e.value)
4093        == "Stacked arrays must have the same shape. The first array had shape (2,), while array 11 has shape (3,)."
4094    )
4095    assert len(str(e.value)) < 105
4096
4097
4098def test_blockwise_with_numpy_arrays():
4099    x = np.ones(10)
4100    y = da.ones(10, chunks=(5,))
4101
4102    assert_eq(x + y, x + x)
4103
4104    s = da.sum(x)
4105    assert any(x is v for v in s.dask.values())
4106
4107
4108@pytest.mark.parametrize("chunks", (100, 6))
4109@pytest.mark.parametrize("other", [[0, 0, 1], [2, 1, 3], (0, 0, 1)])
4110def test_elemwise_with_lists(chunks, other):
4111    x = np.arange(12).reshape((4, 3))
4112    d = da.arange(12, chunks=chunks).reshape((4, 3))
4113
4114    x2 = np.vstack([x[:, 0], x[:, 1], x[:, 2]]).T
4115    d2 = da.vstack([d[:, 0], d[:, 1], d[:, 2]]).T
4116
4117    assert_eq(x2, d2)
4118
4119    x3 = x2 * other
4120    d3 = d2 * other
4121
4122    assert_eq(x3, d3)
4123
4124
4125def test_constructor_plugin():
4126    L = []
4127    L2 = []
4128    with dask.config.set(array_plugins=[L.append, L2.append]):
4129        x = da.ones(10, chunks=5)
4130        y = x + 1
4131
4132    assert L == L2 == [x, y]
4133
4134    with dask.config.set(array_plugins=[lambda x: x.compute()]):
4135        x = da.ones(10, chunks=5)
4136        y = x + 1
4137
4138    assert isinstance(y, np.ndarray)
4139    assert len(L) == 2
4140
4141
4142def test_no_warnings_on_metadata():
4143    x = da.ones(5, chunks=3)
4144    with warnings.catch_warnings(record=True) as record:
4145        da.arccos(x)
4146
4147    assert not record
4148
4149
4150def test_delayed_array_key_hygeine():
4151    a = da.zeros((1,), chunks=(1,))
4152    d = delayed(identity)(a)
4153    b = da.from_delayed(d, shape=a.shape, dtype=a.dtype)
4154    assert_eq(a, b)
4155
4156
4157def test_empty_chunks_in_array_len():
4158    x = da.ones((), chunks=())
4159    with pytest.raises(TypeError) as exc_info:
4160        len(x)
4161
4162    err_msg = "len() of unsized object"
4163    assert err_msg in str(exc_info.value)
4164
4165
4166@pytest.mark.parametrize("dtype", [None, [("a", "f4"), ("b", object)]])
4167def test_meta(dtype):
4168    a = da.zeros((1,), chunks=(1,))
4169    assert a._meta.dtype == a.dtype
4170    assert isinstance(a._meta, np.ndarray)
4171    assert a.nbytes < 1000
4172
4173
4174@pytest.mark.parametrize(
4175    "shape,limit,expected",
4176    [
4177        (100, 10, (10,) * 10),
4178        (20, 10, (10, 10)),
4179        (20, 5, (5, 5, 5, 5)),
4180        (24, 5, (4, 4, 4, 4, 4, 4)),  # common factor is close, use it
4181        (23, 5, (5, 5, 5, 5, 3)),  # relatively prime, don't use 1s
4182        (1000, 167, (125,) * 8),  # find close value
4183    ],
4184)
4185def test_normalize_chunks_auto_1d(shape, limit, expected):
4186    result = normalize_chunks("auto", (shape,), limit=limit, dtype=np.uint8)
4187    assert result == (expected,)
4188
4189
4190@pytest.mark.parametrize(
4191    "shape,chunks,limit,expected",
4192    [
4193        ((20, 20), ("auto", 2), 20, ((10, 10), (2,) * 10)),
4194        (
4195            (20, 20),
4196            ("auto", (2, 2, 2, 2, 2, 5, 5)),
4197            20,
4198            ((4, 4, 4, 4, 4), (2, 2, 2, 2, 2, 5, 5)),
4199        ),
4200        ((1, 20), "auto", 10, ((1,), (10, 10))),
4201    ],
4202)
4203def test_normalize_chunks_auto_2d(shape, chunks, limit, expected):
4204    result = normalize_chunks(chunks, shape, limit=limit, dtype="uint8")
4205    assert result == expected
4206
4207
4208def test_normalize_chunks_auto_3d():
4209    result = normalize_chunks(
4210        ("auto", "auto", 2), (20, 20, 20), limit=200, dtype="uint8"
4211    )
4212    expected = ((10, 10), (10, 10), (2,) * 10)
4213    assert result == expected
4214
4215    result = normalize_chunks("auto", (20, 20, 20), limit=8, dtype="uint8")
4216    expected = ((2,) * 10,) * 3
4217    assert result == expected
4218
4219
4220def test_constructors_chunks_dict():
4221    x = da.ones((20, 20), chunks={0: 10, 1: 5})
4222    assert x.chunks == ((10, 10), (5, 5, 5, 5))
4223
4224    x = da.ones((20, 20), chunks={0: 10, 1: "auto"})
4225    assert x.chunks == ((10, 10), (20,))
4226
4227
4228def test_from_array_chunks_dict():
4229    with dask.config.set({"array.chunk-size": "128kiB"}):
4230        x = np.empty((100, 100, 100))
4231        y = da.from_array(x, chunks={0: 10, 1: -1, 2: "auto"})
4232        z = da.from_array(x, chunks=(10, 100, 10))
4233        assert y.chunks == z.chunks
4234
4235
4236@pytest.mark.parametrize("dtype", [object, [("a", object), ("b", int)]])
4237def test_normalize_chunks_object_dtype(dtype):
4238    x = np.array(["a", "abc"], dtype=object)
4239    with pytest.raises(NotImplementedError):
4240        da.from_array(x, chunks="auto")
4241
4242
4243def test_normalize_chunks_tuples_of_tuples():
4244    result = normalize_chunks(((2, 3, 5), "auto"), (10, 10), limit=10, dtype=np.uint8)
4245    expected = ((2, 3, 5), (2, 2, 2, 2, 2))
4246    assert result == expected
4247
4248
4249def test_normalize_chunks_nan():
4250    with pytest.raises(ValueError) as info:
4251        normalize_chunks("auto", (np.nan,), limit=10, dtype=np.uint8)
4252    assert "auto" in str(info.value)
4253    with pytest.raises(ValueError) as info:
4254        normalize_chunks(((np.nan, np.nan), "auto"), (10, 10), limit=10, dtype=np.uint8)
4255    assert "auto" in str(info.value)
4256
4257
4258def test_pandas_from_dask_array():
4259    pd = pytest.importorskip("pandas")
4260    from dask.dataframe._compat import PANDAS_GT_130, PANDAS_GT_131
4261
4262    a = da.ones((12,), chunks=4)
4263    s = pd.Series(a, index=range(12))
4264
4265    if PANDAS_GT_130 and not PANDAS_GT_131:
4266        # https://github.com/pandas-dev/pandas/issues/38645
4267        assert s.dtype != a.dtype
4268    else:
4269        assert s.dtype == a.dtype
4270        assert_eq(s.values, a)
4271
4272
4273def test_from_zarr_unique_name():
4274    zarr = pytest.importorskip("zarr")
4275    a = zarr.array([1, 2, 3])
4276    b = zarr.array([4, 5, 6])
4277
4278    assert da.from_zarr(a).name != da.from_zarr(b).name
4279
4280
4281def test_from_zarr_name():
4282    zarr = pytest.importorskip("zarr")
4283    a = zarr.array([1, 2, 3])
4284    assert da.from_zarr(a, name="foo").name == "foo"
4285
4286
4287def test_zarr_roundtrip():
4288    pytest.importorskip("zarr")
4289    with tmpdir() as d:
4290        a = da.zeros((3, 3), chunks=(1, 1))
4291        a.to_zarr(d)
4292        a2 = da.from_zarr(d)
4293        assert_eq(a, a2)
4294        assert a2.chunks == a.chunks
4295
4296
4297def test_zarr_roundtrip_with_path_like():
4298    pytest.importorskip("zarr")
4299    with tmpdir() as d:
4300        path = pathlib.Path(d)
4301        a = da.zeros((3, 3), chunks=(1, 1))
4302        a.to_zarr(path)
4303        a2 = da.from_zarr(path)
4304        assert_eq(a, a2)
4305        assert a2.chunks == a.chunks
4306
4307
4308@pytest.mark.parametrize("compute", [False, True])
4309def test_zarr_return_stored(compute):
4310    pytest.importorskip("zarr")
4311    with tmpdir() as d:
4312        a = da.zeros((3, 3), chunks=(1, 1))
4313        a2 = a.to_zarr(d, compute=compute, return_stored=True)
4314        assert isinstance(a2, Array)
4315        assert_eq(a, a2, check_graph=False)
4316        assert a2.chunks == a.chunks
4317
4318
4319def test_zarr_inline_array():
4320    zarr = pytest.importorskip("zarr")
4321    a = zarr.array([1, 2, 3])
4322    dsk = dict(da.from_zarr(a, inline_array=True).dask)
4323    assert len(dsk) == 1
4324    assert a in list(dsk.values())[0]
4325
4326
4327def test_zarr_existing_array():
4328    zarr = pytest.importorskip("zarr")
4329    c = (1, 1)
4330    a = da.ones((3, 3), chunks=c)
4331    z = zarr.zeros_like(a, chunks=c)
4332    a.to_zarr(z)
4333    a2 = da.from_zarr(z)
4334    assert_eq(a, a2)
4335    assert a2.chunks == a.chunks
4336
4337
4338def test_to_zarr_unknown_chunks_raises():
4339    pytest.importorskip("zarr")
4340    a = da.random.random((10,), chunks=(3,))
4341    a = a[a > 0.5]
4342    with pytest.raises(ValueError, match="unknown chunk sizes"):
4343        a.to_zarr({})
4344
4345
4346def test_read_zarr_chunks():
4347    pytest.importorskip("zarr")
4348    a = da.zeros((9,), chunks=(3,))
4349    with tmpdir() as d:
4350        a.to_zarr(d)
4351        arr = da.from_zarr(d, chunks=(5,))
4352        assert arr.chunks == ((5, 4),)
4353
4354
4355def test_zarr_pass_mapper():
4356    pytest.importorskip("zarr")
4357    import zarr.storage
4358
4359    with tmpdir() as d:
4360        mapper = zarr.storage.DirectoryStore(d)
4361        a = da.zeros((3, 3), chunks=(1, 1))
4362        a.to_zarr(mapper)
4363        a2 = da.from_zarr(mapper)
4364        assert_eq(a, a2)
4365        assert a2.chunks == a.chunks
4366
4367
4368def test_zarr_group():
4369    zarr = pytest.importorskip("zarr")
4370    with tmpdir() as d:
4371        a = da.zeros((3, 3), chunks=(1, 1))
4372        a.to_zarr(d, component="test")
4373        with pytest.raises((OSError, ValueError)):
4374            a.to_zarr(d, component="test", overwrite=False)
4375        a.to_zarr(d, component="test", overwrite=True)
4376
4377        # second time is fine, group exists
4378        a.to_zarr(d, component="test2", overwrite=False)
4379        a.to_zarr(d, component="nested/test", overwrite=False)
4380        group = zarr.open_group(d, mode="r")
4381        assert list(group) == ["nested", "test", "test2"]
4382        assert "test" in group["nested"]
4383
4384        a2 = da.from_zarr(d, component="test")
4385        assert_eq(a, a2)
4386        assert a2.chunks == a.chunks
4387
4388
4389@pytest.mark.parametrize(
4390    "data",
4391    [
4392        [(), True],
4393        [((1,),), True],
4394        [((1, 1, 1),), True],
4395        [((1,), (1,)), True],
4396        [((2, 2, 1),), True],
4397        [((2, 2, 3),), False],
4398        [((1, 1, 1), (2, 2, 3)), False],
4399        [((1, 2, 1),), False],
4400    ],
4401)
4402def test_regular_chunks(data):
4403    chunkset, expected = data
4404    assert da.core._check_regular_chunks(chunkset) == expected
4405
4406
4407def test_zarr_nocompute():
4408    pytest.importorskip("zarr")
4409    with tmpdir() as d:
4410        a = da.zeros((3, 3), chunks=(1, 1))
4411        out = a.to_zarr(d, compute=False)
4412        assert isinstance(out, Delayed)
4413        dask.compute(out)
4414        a2 = da.from_zarr(d)
4415        assert_eq(a, a2)
4416        assert a2.chunks == a.chunks
4417
4418
4419def test_tiledb_roundtrip():
4420    tiledb = pytest.importorskip("tiledb")
4421    # 1) load with default chunking
4422    # 2) load from existing tiledb.DenseArray
4423    # 3) write to existing tiledb.DenseArray
4424    a = da.random.random((3, 3))
4425    with tmpdir() as uri:
4426        da.to_tiledb(a, uri)
4427        tdb = da.from_tiledb(uri)
4428
4429        assert_eq(a, tdb)
4430        assert a.chunks == tdb.chunks
4431
4432        # from tiledb.array
4433        with tiledb.open(uri) as t:
4434            tdb2 = da.from_tiledb(t)
4435            assert_eq(a, tdb2)
4436
4437    with tmpdir() as uri2:
4438        with tiledb.empty_like(uri2, a) as t:
4439            a.to_tiledb(t)
4440            assert_eq(da.from_tiledb(uri2), a)
4441
4442    # specific chunking
4443    with tmpdir() as uri:
4444        a = da.random.random((3, 3), chunks=(1, 1))
4445        a.to_tiledb(uri)
4446        tdb = da.from_tiledb(uri)
4447
4448        assert_eq(a, tdb)
4449        assert a.chunks == tdb.chunks
4450
4451
4452def test_tiledb_multiattr():
4453    tiledb = pytest.importorskip("tiledb")
4454    dom = tiledb.Domain(
4455        tiledb.Dim("x", (0, 1000), tile=100), tiledb.Dim("y", (0, 1000), tile=100)
4456    )
4457    schema = tiledb.ArraySchema(
4458        attrs=(tiledb.Attr("attr1"), tiledb.Attr("attr2")), domain=dom
4459    )
4460
4461    with tmpdir() as uri:
4462        tiledb.DenseArray.create(uri, schema)
4463        tdb = tiledb.DenseArray(uri, "w")
4464
4465        ar1 = np.random.randn(*tdb.schema.shape)
4466        ar2 = np.random.randn(*tdb.schema.shape)
4467
4468        tdb[:] = {"attr1": ar1, "attr2": ar2}
4469        tdb = tiledb.DenseArray(uri, "r")
4470
4471        # basic round-trip from dask.array
4472        d = da.from_tiledb(uri, attribute="attr2")
4473        assert_eq(d, ar2)
4474
4475        # smoke-test computation directly on the TileDB view
4476        d = da.from_tiledb(uri, attribute="attr2")
4477        assert_eq(np.mean(ar2), d.mean().compute(scheduler="threads"))
4478
4479
4480def test_blockview():
4481    x = da.arange(10, chunks=2)
4482    blockview = BlockView(x)
4483    assert x.blocks == blockview
4484    assert isinstance(blockview[0], da.Array)
4485
4486    assert_eq(blockview[0], x[:2])
4487    assert_eq(blockview[-1], x[-2:])
4488    assert_eq(blockview[:3], x[:6])
4489    assert_eq(blockview[[0, 1, 2]], x[:6])
4490    assert_eq(blockview[[3, 0, 2]], np.array([6, 7, 0, 1, 4, 5]))
4491    assert_eq(blockview.shape, tuple(map(len, x.chunks)))
4492    assert_eq(blockview.size, np.prod(blockview.shape))
4493    assert_eq(
4494        blockview.ravel(), [blockview[idx] for idx in np.ndindex(blockview.shape)]
4495    )
4496
4497    x = da.random.random((20, 20), chunks=(4, 5))
4498    blockview = BlockView(x)
4499    assert_eq(blockview[0], x[:4])
4500    assert_eq(blockview[0, :3], x[:4, :15])
4501    assert_eq(blockview[:, :3], x[:, :15])
4502    assert_eq(blockview.shape, tuple(map(len, x.chunks)))
4503    assert_eq(blockview.size, np.prod(blockview.shape))
4504    assert_eq(
4505        blockview.ravel(), [blockview[idx] for idx in np.ndindex(blockview.shape)]
4506    )
4507
4508    x = da.ones((40, 40, 40), chunks=(10, 10, 10))
4509    blockview = BlockView(x)
4510    assert_eq(blockview[0, :, 0], np.ones((10, 40, 10)))
4511    assert_eq(blockview.shape, tuple(map(len, x.chunks)))
4512    assert_eq(blockview.size, np.prod(blockview.shape))
4513    assert_eq(
4514        blockview.ravel(), [blockview[idx] for idx in np.ndindex(blockview.shape)]
4515    )
4516
4517    x = da.ones((2, 2), chunks=1)
4518    with pytest.raises(ValueError):
4519        blockview[[0, 1], [0, 1]]
4520    with pytest.raises(ValueError):
4521        blockview[np.array([0, 1]), [0, 1]]
4522    with pytest.raises(ValueError) as info:
4523        blockview[np.array([0, 1]), np.array([0, 1])]
4524    assert "list" in str(info.value)
4525    with pytest.raises(ValueError) as info:
4526        blockview[None, :, :]
4527    assert "newaxis" in str(info.value) and "not supported" in str(info.value)
4528    with pytest.raises(IndexError) as info:
4529        blockview[100, 100]
4530
4531
4532def test_blocks_indexer():
4533    x = da.arange(10, chunks=2)
4534
4535    assert isinstance(x.blocks[0], da.Array)
4536
4537    assert_eq(x.blocks[0], x[:2])
4538    assert_eq(x.blocks[-1], x[-2:])
4539    assert_eq(x.blocks[:3], x[:6])
4540    assert_eq(x.blocks[[0, 1, 2]], x[:6])
4541    assert_eq(x.blocks[[3, 0, 2]], np.array([6, 7, 0, 1, 4, 5]))
4542
4543    x = da.random.random((20, 20), chunks=(4, 5))
4544    assert_eq(x.blocks[0], x[:4])
4545    assert_eq(x.blocks[0, :3], x[:4, :15])
4546    assert_eq(x.blocks[:, :3], x[:, :15])
4547
4548    x = da.ones((40, 40, 40), chunks=(10, 10, 10))
4549    assert_eq(x.blocks[0, :, 0], np.ones((10, 40, 10)))
4550
4551    x = da.ones((2, 2), chunks=1)
4552    with pytest.raises(ValueError):
4553        x.blocks[[0, 1], [0, 1]]
4554    with pytest.raises(ValueError):
4555        x.blocks[np.array([0, 1]), [0, 1]]
4556    with pytest.raises(ValueError) as info:
4557        x.blocks[np.array([0, 1]), np.array([0, 1])]
4558    assert "list" in str(info.value)
4559    with pytest.raises(ValueError) as info:
4560        x.blocks[None, :, :]
4561    assert "newaxis" in str(info.value) and "not supported" in str(info.value)
4562    with pytest.raises(IndexError) as info:
4563        x.blocks[100, 100]
4564
4565
4566def test_partitions_indexer():
4567    # .partitions is an alias of .blocks for dask arrays
4568    x = da.arange(10, chunks=2)
4569
4570    assert isinstance(x.partitions[0], da.Array)
4571
4572    assert_eq(x.partitions[0], x[:2])
4573    assert_eq(x.partitions[-1], x[-2:])
4574    assert_eq(x.partitions[:3], x[:6])
4575    assert_eq(x.partitions[[0, 1, 2]], x[:6])
4576    assert_eq(x.partitions[[3, 0, 2]], np.array([6, 7, 0, 1, 4, 5]))
4577
4578    x = da.random.random((20, 20), chunks=(4, 5))
4579    assert_eq(x.partitions[0], x[:4])
4580    assert_eq(x.partitions[0, :3], x[:4, :15])
4581    assert_eq(x.partitions[:, :3], x[:, :15])
4582
4583    x = da.ones((40, 40, 40), chunks=(10, 10, 10))
4584    assert_eq(x.partitions[0, :, 0], np.ones((10, 40, 10)))
4585
4586    x = da.ones((2, 2), chunks=1)
4587    with pytest.raises(ValueError):
4588        x.partitions[[0, 1], [0, 1]]
4589    with pytest.raises(ValueError):
4590        x.partitions[np.array([0, 1]), [0, 1]]
4591    with pytest.raises(ValueError) as info:
4592        x.partitions[np.array([0, 1]), np.array([0, 1])]
4593    assert "list" in str(info.value)
4594    with pytest.raises(ValueError) as info:
4595        x.partitions[None, :, :]
4596    assert "newaxis" in str(info.value) and "not supported" in str(info.value)
4597    with pytest.raises(IndexError) as info:
4598        x.partitions[100, 100]
4599
4600
4601@pytest.mark.filterwarnings("ignore:the matrix subclass:PendingDeprecationWarning")
4602def test_dask_array_holds_scipy_sparse_containers():
4603    pytest.importorskip("scipy.sparse")
4604    import scipy.sparse
4605
4606    x = da.random.random((1000, 10), chunks=(100, 10))
4607    x[x < 0.9] = 0
4608    xx = x.compute()
4609    y = x.map_blocks(scipy.sparse.csr_matrix)
4610
4611    vs = y.to_delayed().flatten().tolist()
4612    values = dask.compute(*vs, scheduler="single-threaded")
4613    assert all(isinstance(v, scipy.sparse.csr_matrix) for v in values)
4614
4615    yy = y.compute(scheduler="single-threaded")
4616    assert isinstance(yy, scipy.sparse.spmatrix)
4617    assert (yy == xx).all()
4618
4619    z = x.T.map_blocks(scipy.sparse.csr_matrix)
4620    zz = z.compute(scheduler="single-threaded")
4621    assert isinstance(zz, scipy.sparse.spmatrix)
4622    assert (zz == xx.T).all()
4623
4624
4625@pytest.mark.parametrize("axis", [0, 1])
4626def test_scipy_sparse_concatenate(axis):
4627    pytest.importorskip("scipy.sparse")
4628    import scipy.sparse
4629
4630    rs = da.random.RandomState(RandomState=np.random.RandomState)
4631
4632    xs = []
4633    ys = []
4634    for i in range(2):
4635        x = rs.random((1000, 10), chunks=(100, 10))
4636        x[x < 0.9] = 0
4637        xs.append(x)
4638        ys.append(x.map_blocks(scipy.sparse.csr_matrix))
4639
4640    z = da.concatenate(ys, axis=axis)
4641    z = z.compute()
4642
4643    if axis == 0:
4644        sp_concatenate = scipy.sparse.vstack
4645    elif axis == 1:
4646        sp_concatenate = scipy.sparse.hstack
4647    z_expected = sp_concatenate([scipy.sparse.csr_matrix(e.compute()) for e in xs])
4648
4649    assert (z != z_expected).nnz == 0
4650
4651
4652def test_3851():
4653    with warnings.catch_warnings() as record:
4654        Y = da.random.random((10, 10), chunks="auto")
4655        da.argmax(Y, axis=0).compute()
4656
4657    assert not record
4658
4659
4660def test_3925():
4661    x = da.from_array(np.array(["a", "b", "c"], dtype=object), chunks=-1)
4662    assert (x[0] == x[0]).compute(scheduler="sync")
4663
4664
4665def test_map_blocks_large_inputs_delayed():
4666    a = da.ones(10, chunks=(5,))
4667    b = np.ones(1000000)
4668
4669    c = a.map_blocks(add, b)
4670    assert any(b is v for v in c.dask.values())
4671    assert repr(dict(c.dask)).count(repr(b)[:10]) == 1  # only one occurrence
4672
4673    d = a.map_blocks(lambda x, y: x + y.sum(), y=b)
4674    assert_eq(d, d)
4675    assert any(b is v for v in d.dask.values())
4676    assert repr(dict(c.dask)).count(repr(b)[:10]) == 1  # only one occurrence
4677
4678
4679def test_blockwise_large_inputs_delayed():
4680    a = da.ones(10, chunks=(5,))
4681    b = np.ones(1000000)
4682
4683    c = da.blockwise(add, "i", a, "i", b, None, dtype=a.dtype)
4684    assert any(b is v for v in c.dask.values())
4685    assert repr(dict(c.dask)).count(repr(b)[:10]) == 1  # only one occurrence
4686
4687    d = da.blockwise(lambda x, y: x + y, "i", a, "i", y=b, dtype=a.dtype)
4688    assert any(b is v for v in d.dask.values())
4689    assert repr(dict(c.dask)).count(repr(b)[:10]) == 1  # only one occurrence
4690
4691
4692def test_slice_reversed():
4693    x = da.ones(10, chunks=-1)
4694    y = x[6:3]
4695
4696    assert_eq(y, np.ones(0))
4697
4698
4699def test_map_blocks_chunks():
4700    x = da.arange(400, chunks=(100,))
4701    y = da.arange(40, chunks=(10,))
4702
4703    def func(a, b):
4704        return np.array([a.max(), b.max()])
4705
4706    assert_eq(
4707        da.map_blocks(func, x, y, chunks=(2,), dtype=x.dtype),
4708        np.array([99, 9, 199, 19, 299, 29, 399, 39]),
4709    )
4710
4711
4712def test_nbytes_auto():
4713    chunks = normalize_chunks("800B", shape=(500,), dtype="float64")
4714    assert chunks == ((100, 100, 100, 100, 100),)
4715    chunks = normalize_chunks("200B", shape=(10, 10), dtype="float64")
4716    assert chunks == ((5, 5), (5, 5))
4717    chunks = normalize_chunks((5, "200B"), shape=(10, 10), dtype="float64")
4718    assert chunks == ((5, 5), (5, 5))
4719    chunks = normalize_chunks("33B", shape=(10, 10), dtype="float64")
4720    assert chunks == ((2, 2, 2, 2, 2), (2, 2, 2, 2, 2))
4721    chunks = normalize_chunks("1800B", shape=(10, 20, 30), dtype="float64")
4722    assert chunks == ((5, 5), (5, 5, 5, 5), (6, 6, 6, 6, 6))
4723
4724    with pytest.raises(ValueError):
4725        normalize_chunks("10B", shape=(10,), limit=20, dtype="float64")
4726    with pytest.raises(ValueError):
4727        normalize_chunks("100B", shape=(10, 10), limit=20, dtype="float64")
4728    with pytest.raises(ValueError):
4729        normalize_chunks(("100B", "10B"), shape=(10, 10), dtype="float64")
4730    with pytest.raises(ValueError):
4731        normalize_chunks(("10B", "10B"), shape=(10, 10), limit=20, dtype="float64")
4732
4733
4734def test_auto_chunks_h5py():
4735    h5py = pytest.importorskip("h5py")
4736
4737    with tmpfile(".hdf5") as fn:
4738        with h5py.File(fn, mode="a") as f:
4739            d = f.create_dataset(
4740                "/x", shape=(1000, 1000), chunks=(32, 64), dtype="float64"
4741            )
4742            d[:] = 1
4743
4744        with h5py.File(fn, mode="a") as f:
4745            d = f["x"]
4746            with dask.config.set({"array.chunk-size": "1 MiB"}):
4747                x = da.from_array(d)
4748                assert isinstance(x._meta, np.ndarray)
4749                assert x.chunks == ((256, 256, 256, 232), (512, 488))
4750
4751
4752def test_no_warnings_from_blockwise():
4753    with pytest.warns(None) as record:
4754        x = da.ones((3, 10, 10), chunks=(3, 2, 2))
4755        da.map_blocks(lambda y: np.mean(y, axis=0), x, dtype=x.dtype, drop_axis=0)
4756    assert not record
4757
4758    with pytest.warns(None) as record:
4759        x = da.ones((15, 15), chunks=(5, 5))
4760        (x.dot(x.T + 1) - x.mean(axis=0)).std()
4761    assert not record
4762
4763    with pytest.warns(None) as record:
4764        x = da.ones((1,), chunks=(1,))
4765        1 / x[0]
4766    assert not record
4767
4768
4769def test_from_array_meta():
4770    sparse = pytest.importorskip("sparse")
4771    x = np.ones(10)
4772    meta = sparse.COO.from_numpy(x)
4773    y = da.from_array(x, meta=meta)
4774    assert isinstance(y._meta, sparse.COO)
4775
4776
4777def test_compute_chunk_sizes():
4778    x = da.from_array(np.linspace(-1, 1, num=50), chunks=10)
4779    y = x[x < 0]
4780    assert np.isnan(y.shape[0])
4781    assert y.chunks == ((np.nan,) * 5,)
4782
4783    z = y.compute_chunk_sizes()
4784    assert y is z
4785    assert z.chunks == ((10, 10, 5, 0, 0),)
4786    assert len(z) == 25
4787
4788    # check that dtype of chunk dimensions is `int`
4789    assert isinstance(z.chunks[0][0], int)
4790
4791
4792def test_compute_chunk_sizes_2d_array():
4793    X = np.linspace(-1, 1, num=9 * 4).reshape(9, 4)
4794    X = da.from_array(X, chunks=(3, 4))
4795    idx = X.sum(axis=1) > 0
4796    Y = X[idx]
4797
4798    # This is very similar to the DataFrame->Array conversion
4799    assert np.isnan(Y.shape[0]) and Y.shape[1] == 4
4800    assert Y.chunks == ((np.nan, np.nan, np.nan), (4,))
4801
4802    Z = Y.compute_chunk_sizes()
4803    assert Y is Z
4804    assert Z.chunks == ((0, 1, 3), (4,))
4805    assert Z.shape == (4, 4)
4806
4807
4808def test_compute_chunk_sizes_3d_array(N=8):
4809    X = np.linspace(-1, 2, num=8 * 8 * 8).reshape(8, 8, 8)
4810    X = da.from_array(X, chunks=(4, 4, 4))
4811    idx = X.sum(axis=0).sum(axis=0) > 0
4812    Y = X[idx]
4813    idx = X.sum(axis=1).sum(axis=1) < 0
4814    Y = Y[:, idx]
4815    idx = X.sum(axis=2).sum(axis=1) > 0.1
4816    Y = Y[:, :, idx]
4817
4818    # Checking to make sure shapes are different on outputs
4819    assert Y.compute().shape == (8, 3, 5)
4820    assert X.compute().shape == (8, 8, 8)
4821
4822    assert Y.chunks == ((np.nan, np.nan),) * 3
4823    assert all(np.isnan(s) for s in Y.shape)
4824    Z = Y.compute_chunk_sizes()
4825    assert Z is Y
4826    assert Z.shape == (8, 3, 5)
4827    assert Z.chunks == ((4, 4), (3, 0), (1, 4))
4828
4829
4830def _known(num=50):
4831    return da.from_array(np.linspace(-1, 1, num=num), chunks=10)
4832
4833
4834@pytest.fixture()
4835def unknown():
4836    x = _known()
4837    y = x[x < 0]
4838    assert y.chunks == ((np.nan,) * 5,)
4839    return y
4840
4841
4842def test_compute_chunk_sizes_warning_fixes_rechunk(unknown):
4843    y = unknown
4844    with pytest.raises(ValueError, match="compute_chunk_sizes"):
4845        y.rechunk("auto")
4846    y.compute_chunk_sizes()
4847    y.rechunk("auto")
4848
4849
4850def test_compute_chunk_sizes_warning_fixes_to_zarr(unknown):
4851    pytest.importorskip("zarr")
4852    y = unknown
4853    with pytest.raises(ValueError, match="compute_chunk_sizes"):
4854        with StringIO() as f:
4855            y.to_zarr(f)
4856    y.compute_chunk_sizes()
4857
4858    with pytest.raises(ValueError, match="irregular chunking"):
4859        with StringIO() as f:
4860            y.to_zarr(f)
4861
4862
4863def test_compute_chunk_sizes_warning_fixes_to_svg(unknown):
4864    y = unknown
4865    with pytest.raises(NotImplementedError, match="compute_chunk_sizes"):
4866        y.to_svg()
4867    y.compute_chunk_sizes()
4868    y.to_svg()
4869
4870
4871def test_compute_chunk_sizes_warning_fixes_concatenate():
4872    x = _known(num=100).reshape(10, 10)
4873    idx = x.sum(axis=0) > 0
4874    y1 = x[idx]
4875    y2 = x[idx]
4876    with pytest.raises(ValueError, match="compute_chunk_sizes"):
4877        da.concatenate((y1, y2), axis=1)
4878    y1.compute_chunk_sizes()
4879    y2.compute_chunk_sizes()
4880    da.concatenate((y1, y2), axis=1)
4881
4882
4883def test_compute_chunk_sizes_warning_fixes_reduction(unknown):
4884    y = unknown
4885    with pytest.raises(ValueError, match="compute_chunk_sizes"):
4886        da.argmin(y)
4887    y.compute_chunk_sizes()
4888    da.argmin(y)
4889
4890
4891def test_compute_chunk_sizes_warning_fixes_reshape(unknown):
4892    y = unknown
4893    with pytest.raises(ValueError, match="compute_chunk_sizes"):
4894        da.reshape(y, (5, 5))
4895    y.compute_chunk_sizes()
4896    da.reshape(y, (5, 5))
4897
4898
4899def test_compute_chunk_sizes_warning_fixes_slicing():
4900    x = _known(num=100).reshape(10, 10)
4901    y = x[x.sum(axis=0) < 0]
4902    with pytest.raises(ValueError, match="compute_chunk_sizes"):
4903        y[:3, :]
4904    y.compute_chunk_sizes()
4905    y[:3, :]
4906
4907
4908def test_rechunk_auto():
4909    x = da.ones(10, chunks=(1,))
4910    y = x.rechunk()
4911
4912    assert y.npartitions == 1
4913
4914
4915def test_chunk_assignment_invalidates_cached_properties():
4916    x = da.ones((4,), chunks=(1,))
4917    y = x.copy()
4918    # change chunks directly, which should change all of the tested properties
4919    y._chunks = ((2, 2), (0, 0, 0, 0))
4920    assert not x.ndim == y.ndim
4921    assert not x.shape == y.shape
4922    assert not x.size == y.size
4923    assert not x.numblocks == y.numblocks
4924    assert not x.npartitions == y.npartitions
4925    assert not x.__dask_keys__() == y.__dask_keys__()
4926    assert not np.array_equal(x._key_array, y._key_array)
4927
4928
4929def test_map_blocks_series():
4930    pd = pytest.importorskip("pandas")
4931    import dask.dataframe as dd
4932    from dask.dataframe.utils import assert_eq as dd_assert_eq
4933
4934    x = da.ones(10, chunks=(5,))
4935    s = x.map_blocks(pd.Series)
4936    assert isinstance(s, dd.Series)
4937    assert s.npartitions == x.npartitions
4938
4939    dd_assert_eq(s, s)
4940
4941
4942@pytest.mark.xfail(reason="need to remove singleton index dimension")
4943def test_map_blocks_dataframe():
4944    pd = pytest.importorskip("pandas")
4945    import dask.dataframe as dd
4946    from dask.dataframe.utils import assert_eq as dd_assert_eq
4947
4948    x = da.ones((10, 2), chunks=(5, 2))
4949    s = x.map_blocks(pd.DataFrame)
4950    assert isinstance(s, dd.DataFrame)
4951    assert s.npartitions == x.npartitions
4952    dd_assert_eq(s, s)
4953
4954
4955def test_dask_layers():
4956    a = da.ones(1)
4957    assert a.dask.layers.keys() == {a.name}
4958    assert a.dask.dependencies == {a.name: set()}
4959    assert a.__dask_layers__() == (a.name,)
4960    b = a + 1
4961    assert b.dask.layers.keys() == {a.name, b.name}
4962    assert b.dask.dependencies == {a.name: set(), b.name: {a.name}}
4963    assert b.__dask_layers__() == (b.name,)
4964