1from . import core
2
3
4def _tiledb_to_chunks(tiledb_array):
5    schema = tiledb_array.schema
6    return list(schema.domain.dim(i).tile for i in range(schema.ndim))
7
8
9def from_tiledb(uri, attribute=None, chunks=None, storage_options=None, **kwargs):
10    """Load array from the TileDB storage format
11
12    See https://docs.tiledb.io for more information about TileDB.
13
14    Parameters
15    ----------
16    uri: TileDB array or str
17        Location to save the data
18    attribute: str or None
19        Attribute selection (single-attribute view on multi-attribute array)
20
21
22    Returns
23    -------
24
25    A Dask Array
26
27    Examples
28    --------
29
30    >>> import tempfile, tiledb
31    >>> import dask.array as da, numpy as np
32    >>> uri = tempfile.NamedTemporaryFile().name
33    >>> _ = tiledb.from_numpy(uri, np.arange(0,9).reshape(3,3))  # create a tiledb array
34    >>> tdb_ar = da.from_tiledb(uri)  # read back the array
35    >>> tdb_ar.shape
36    (3, 3)
37    >>> tdb_ar.mean().compute()
38    4.0
39    """
40    import tiledb
41
42    tiledb_config = storage_options or dict()
43    key = tiledb_config.pop("key", None)
44
45    if isinstance(uri, tiledb.Array):
46        tdb = uri
47    else:
48        tdb = tiledb.open(uri, attr=attribute, config=tiledb_config, key=key)
49
50    if tdb.schema.sparse:
51        raise ValueError("Sparse TileDB arrays are not supported")
52
53    if not attribute:
54        if tdb.schema.nattr > 1:
55            raise TypeError(
56                "keyword 'attribute' must be provided"
57                "when loading a multi-attribute TileDB array"
58            )
59        else:
60            attribute = tdb.schema.attr(0).name
61
62    if tdb.iswritable:
63        raise ValueError("TileDB array must be open for reading")
64
65    chunks = chunks or _tiledb_to_chunks(tdb)
66
67    assert len(chunks) == tdb.schema.ndim
68
69    return core.from_array(tdb, chunks, name="tiledb-%s" % uri)
70
71
72def to_tiledb(
73    darray,
74    uri,
75    compute=True,
76    return_stored=False,
77    storage_options=None,
78    key=None,
79    **kwargs,
80):
81    """Save array to the TileDB storage format
82
83    Save 'array' using the TileDB storage manager, to any TileDB-supported URI,
84    including local disk, S3, or HDFS.
85
86    See https://docs.tiledb.io for more information about TileDB.
87
88    Parameters
89    ----------
90
91    darray: dask.array
92        A dask array to write.
93    uri:
94        Any supported TileDB storage location.
95    storage_options: dict
96        Dict containing any configuration options for the TileDB backend.
97        see https://docs.tiledb.io/en/stable/tutorials/config.html
98    compute, return_stored: see ``store()``
99    key: str or None
100        Encryption key
101
102    Returns
103    -------
104
105    None
106        Unless ``return_stored`` is set to ``True`` (``False`` by default)
107
108    Notes
109    -----
110
111    TileDB only supports regularly-chunked arrays.
112    TileDB `tile extents`_ correspond to form 2 of the dask
113    `chunk specification`_, and the conversion is
114    done automatically for supported arrays.
115
116    Examples
117    --------
118
119    >>> import dask.array as da, tempfile
120    >>> uri = tempfile.NamedTemporaryFile().name
121    >>> data = da.random.random(5,5)
122    >>> da.to_tiledb(data, uri)
123    >>> import tiledb
124    >>> tdb_ar = tiledb.open(uri)
125    >>> all(tdb_ar == data)
126    True
127
128    .. _chunk specification: https://docs.tiledb.io/en/stable/tutorials/tiling-dense.html
129    .. _tile extents: http://docs.dask.org/en/latest/array-chunks.html
130    """
131    import tiledb
132
133    tiledb_config = storage_options or dict()
134    # encryption key, if any
135    key = key or tiledb_config.pop("key", None)
136
137    if not core._check_regular_chunks(darray.chunks):
138        raise ValueError(
139            "Attempt to save array to TileDB with irregular "
140            "chunking, please call `arr.rechunk(...)` first."
141        )
142
143    if isinstance(uri, str):
144        chunks = [c[0] for c in darray.chunks]
145        # create a suitable, empty, writable TileDB array
146        tdb = tiledb.empty_like(
147            uri, darray, tile=chunks, config=tiledb_config, key=key, **kwargs
148        )
149    elif isinstance(uri, tiledb.Array):
150        tdb = uri
151        # sanity checks
152        if not ((darray.dtype == tdb.dtype) and (darray.ndim == tdb.ndim)):
153            raise ValueError(
154                "Target TileDB array layout is not compatible with source array"
155            )
156    else:
157        raise ValueError(
158            "'uri' must be string pointing to supported TileDB store location "
159            "or an open, writable TileDB array."
160        )
161
162    if not (tdb.isopen and tdb.iswritable):
163        raise ValueError("Target TileDB array is not open and writable.")
164
165    return darray.store(tdb, lock=False, compute=compute, return_stored=return_stored)
166