1from . import core 2 3 4def _tiledb_to_chunks(tiledb_array): 5 schema = tiledb_array.schema 6 return list(schema.domain.dim(i).tile for i in range(schema.ndim)) 7 8 9def from_tiledb(uri, attribute=None, chunks=None, storage_options=None, **kwargs): 10 """Load array from the TileDB storage format 11 12 See https://docs.tiledb.io for more information about TileDB. 13 14 Parameters 15 ---------- 16 uri: TileDB array or str 17 Location to save the data 18 attribute: str or None 19 Attribute selection (single-attribute view on multi-attribute array) 20 21 22 Returns 23 ------- 24 25 A Dask Array 26 27 Examples 28 -------- 29 30 >>> import tempfile, tiledb 31 >>> import dask.array as da, numpy as np 32 >>> uri = tempfile.NamedTemporaryFile().name 33 >>> _ = tiledb.from_numpy(uri, np.arange(0,9).reshape(3,3)) # create a tiledb array 34 >>> tdb_ar = da.from_tiledb(uri) # read back the array 35 >>> tdb_ar.shape 36 (3, 3) 37 >>> tdb_ar.mean().compute() 38 4.0 39 """ 40 import tiledb 41 42 tiledb_config = storage_options or dict() 43 key = tiledb_config.pop("key", None) 44 45 if isinstance(uri, tiledb.Array): 46 tdb = uri 47 else: 48 tdb = tiledb.open(uri, attr=attribute, config=tiledb_config, key=key) 49 50 if tdb.schema.sparse: 51 raise ValueError("Sparse TileDB arrays are not supported") 52 53 if not attribute: 54 if tdb.schema.nattr > 1: 55 raise TypeError( 56 "keyword 'attribute' must be provided" 57 "when loading a multi-attribute TileDB array" 58 ) 59 else: 60 attribute = tdb.schema.attr(0).name 61 62 if tdb.iswritable: 63 raise ValueError("TileDB array must be open for reading") 64 65 chunks = chunks or _tiledb_to_chunks(tdb) 66 67 assert len(chunks) == tdb.schema.ndim 68 69 return core.from_array(tdb, chunks, name="tiledb-%s" % uri) 70 71 72def to_tiledb( 73 darray, 74 uri, 75 compute=True, 76 return_stored=False, 77 storage_options=None, 78 key=None, 79 **kwargs, 80): 81 """Save array to the TileDB storage format 82 83 Save 'array' using the TileDB storage manager, to any TileDB-supported URI, 84 including local disk, S3, or HDFS. 85 86 See https://docs.tiledb.io for more information about TileDB. 87 88 Parameters 89 ---------- 90 91 darray: dask.array 92 A dask array to write. 93 uri: 94 Any supported TileDB storage location. 95 storage_options: dict 96 Dict containing any configuration options for the TileDB backend. 97 see https://docs.tiledb.io/en/stable/tutorials/config.html 98 compute, return_stored: see ``store()`` 99 key: str or None 100 Encryption key 101 102 Returns 103 ------- 104 105 None 106 Unless ``return_stored`` is set to ``True`` (``False`` by default) 107 108 Notes 109 ----- 110 111 TileDB only supports regularly-chunked arrays. 112 TileDB `tile extents`_ correspond to form 2 of the dask 113 `chunk specification`_, and the conversion is 114 done automatically for supported arrays. 115 116 Examples 117 -------- 118 119 >>> import dask.array as da, tempfile 120 >>> uri = tempfile.NamedTemporaryFile().name 121 >>> data = da.random.random(5,5) 122 >>> da.to_tiledb(data, uri) 123 >>> import tiledb 124 >>> tdb_ar = tiledb.open(uri) 125 >>> all(tdb_ar == data) 126 True 127 128 .. _chunk specification: https://docs.tiledb.io/en/stable/tutorials/tiling-dense.html 129 .. _tile extents: http://docs.dask.org/en/latest/array-chunks.html 130 """ 131 import tiledb 132 133 tiledb_config = storage_options or dict() 134 # encryption key, if any 135 key = key or tiledb_config.pop("key", None) 136 137 if not core._check_regular_chunks(darray.chunks): 138 raise ValueError( 139 "Attempt to save array to TileDB with irregular " 140 "chunking, please call `arr.rechunk(...)` first." 141 ) 142 143 if isinstance(uri, str): 144 chunks = [c[0] for c in darray.chunks] 145 # create a suitable, empty, writable TileDB array 146 tdb = tiledb.empty_like( 147 uri, darray, tile=chunks, config=tiledb_config, key=key, **kwargs 148 ) 149 elif isinstance(uri, tiledb.Array): 150 tdb = uri 151 # sanity checks 152 if not ((darray.dtype == tdb.dtype) and (darray.ndim == tdb.ndim)): 153 raise ValueError( 154 "Target TileDB array layout is not compatible with source array" 155 ) 156 else: 157 raise ValueError( 158 "'uri' must be string pointing to supported TileDB store location " 159 "or an open, writable TileDB array." 160 ) 161 162 if not (tdb.isopen and tdb.iswritable): 163 raise ValueError("Target TileDB array is not open and writable.") 164 165 return darray.store(tdb, lock=False, compute=compute, return_stored=return_stored) 166