1# This file is part of h5py, a Python interface to the HDF5 library. 2# 3# http://www.h5py.org 4# 5# Copyright 2008-2013 Andrew Collette and contributors 6# 7# License: Standard 3-clause BSD; see "license.txt" for full license terms 8# and contributor agreement. 9 10""" 11 High-level interface for creating HDF5 virtual datasets 12""" 13 14from copy import deepcopy as copy 15from collections import namedtuple 16 17import numpy as np 18 19from .compat import filename_encode 20from .datatype import Datatype 21from .selections import SimpleSelection, select 22from .. import h5d, h5p, h5s, h5t, h5 23from .. import version 24 25 26class VDSmap(namedtuple('VDSmap', ('vspace', 'file_name', 27 'dset_name', 'src_space'))): 28 '''Defines a region in a virtual dataset mapping to part of a source dataset 29 ''' 30 31 32vds_support = False 33hdf5_version = version.hdf5_version_tuple[0:3] 34 35if hdf5_version >= h5.get_config().vds_min_hdf5_version: 36 vds_support = True 37 38 39def _convert_space_for_key(space, key): 40 """ 41 Converts the space with the given key. Mainly used to allow unlimited 42 dimensions in virtual space selection. 43 """ 44 key = key if isinstance(key, tuple) else (key,) 45 type_code = space.get_select_type() 46 47 # check for unlimited selections in case where selection is regular 48 # hyperslab, which is the only allowed case for h5s.UNLIMITED to be 49 # in the selection 50 if type_code == h5s.SEL_HYPERSLABS and space.is_regular_hyperslab(): 51 rank = space.get_simple_extent_ndims() 52 nargs = len(key) 53 54 idx_offset = 0 55 start, stride, count, block = space.get_regular_hyperslab() 56 # iterate through keys. we ignore numeral indices. if we get a 57 # slice, we check for an h5s.UNLIMITED value as the stop 58 # if we get an ellipsis, we offset index by (rank - nargs) 59 for i, sl in enumerate(key): 60 if isinstance(sl, slice): 61 if sl.stop == h5s.UNLIMITED: 62 counts = list(count) 63 idx = i + idx_offset 64 counts[idx] = h5s.UNLIMITED 65 count = tuple(counts) 66 elif sl is Ellipsis: 67 idx_offset = rank - nargs 68 69 space.select_hyperslab(start, count, stride, block) 70 71 72class VirtualSource(object): 73 """Source definition for virtual data sets. 74 75 Instantiate this class to represent an entire source dataset, and then 76 slice it to indicate which regions should be used in the virtual dataset. 77 78 path_or_dataset 79 The path to a file, or an h5py dataset. If a dataset is given, 80 no other parameters are allowed, as the relevant values are taken from 81 the dataset instead. 82 name 83 The name of the source dataset within the file. 84 shape 85 A tuple giving the shape of the dataset. 86 dtype 87 Numpy dtype or string. 88 maxshape 89 The source dataset is resizable up to this shape. Use None for 90 axes you want to be unlimited. 91 """ 92 def __init__(self, path_or_dataset, name=None, 93 shape=None, dtype=None, maxshape=None): 94 from .dataset import Dataset 95 if isinstance(path_or_dataset, Dataset): 96 failed = {k: v 97 for k, v in 98 {'name': name, 'shape': shape, 99 'dtype': dtype, 'maxshape': maxshape}.items() 100 if v is not None} 101 if failed: 102 raise TypeError("If a Dataset is passed as the first argument " 103 "then no other arguments may be passed. You " 104 "passed {failed}".format(failed=failed)) 105 ds = path_or_dataset 106 path = ds.file.filename 107 name = ds.name 108 shape = ds.shape 109 dtype = ds.dtype 110 maxshape = ds.maxshape 111 else: 112 path = path_or_dataset 113 if name is None: 114 raise TypeError("The name parameter is required when " 115 "specifying a source by path") 116 if shape is None: 117 raise TypeError("The shape parameter is required when " 118 "specifying a source by path") 119 elif isinstance(shape, int): 120 shape = (shape,) 121 122 if isinstance(maxshape, int): 123 maxshape = (maxshape,) 124 125 self.path = path 126 self.name = name 127 self.dtype = dtype 128 129 if maxshape is None: 130 self.maxshape = shape 131 else: 132 self.maxshape = tuple([h5s.UNLIMITED if ix is None else ix 133 for ix in maxshape]) 134 self.sel = SimpleSelection(shape) 135 136 @property 137 def shape(self): 138 return self.sel.array_shape 139 140 def __getitem__(self, key): 141 tmp = copy(self) 142 tmp.sel = select(self.shape, key, dataset=None) 143 _convert_space_for_key(tmp.sel.id, key) 144 return tmp 145 146class VirtualLayout(object): 147 """Object for building a virtual dataset. 148 149 Instantiate this class to define a virtual dataset, assign to slices of it 150 (using VirtualSource objects), and then pass it to 151 group.create_virtual_dataset() to add the virtual dataset to a file. 152 153 This class does not allow access to the data; the virtual dataset must 154 be created in a file before it can be used. 155 156 shape 157 A tuple giving the shape of the dataset. 158 dtype 159 Numpy dtype or string. 160 maxshape 161 The virtual dataset is resizable up to this shape. Use None for 162 axes you want to be unlimited. 163 filename 164 The name of the destination file, if known in advance. Mappings from 165 data in the same file will be stored with filename '.', allowing the 166 file to be renamed later. 167 """ 168 def __init__(self, shape, dtype, maxshape=None, filename=None): 169 self.shape = (shape,) if isinstance(shape, int) else shape 170 self.dtype = dtype 171 self.maxshape = (maxshape,) if isinstance(maxshape, int) else maxshape 172 self._filename = filename 173 self._src_filenames = set() 174 self.dcpl = h5p.create(h5p.DATASET_CREATE) 175 176 def __setitem__(self, key, source): 177 sel = select(self.shape, key, dataset=None) 178 _convert_space_for_key(sel.id, key) 179 src_filename = self._source_file_name(source.path, self._filename) 180 181 self.dcpl.set_virtual( 182 sel.id, src_filename, source.name.encode('utf-8'), source.sel.id 183 ) 184 if self._filename is None: 185 self._src_filenames.add(src_filename) 186 187 @staticmethod 188 def _source_file_name(src_filename, dst_filename) -> bytes: 189 src_filename = filename_encode(src_filename) 190 if dst_filename and (src_filename == filename_encode(dst_filename)): 191 # use relative path if the source dataset is in the same 192 # file, in order to keep the virtual dataset valid in case 193 # the file is renamed. 194 return b'.' 195 return filename_encode(src_filename) 196 197 def _get_dcpl(self, dst_filename): 198 """Get the property list containing virtual dataset mappings 199 200 If the destination filename wasn't known when the VirtualLayout was 201 created, it is handled here. 202 """ 203 dst_filename = filename_encode(dst_filename) 204 if self._filename is not None: 205 # filename was known in advance; check dst_filename matches 206 if dst_filename != filename_encode(self._filename): 207 raise Exception(f"{dst_filename!r} != {self._filename!r}") 208 return self.dcpl 209 210 # destination file not known in advance 211 if dst_filename in self._src_filenames: 212 # At least 1 source file is the same as the destination file, 213 # but we didn't know this when making the mapping. Copy the mappings 214 # to a new property list, replacing the dest filename with '.' 215 new_dcpl = h5p.create(h5p.DATASET_CREATE) 216 for i in range(self.dcpl.get_virtual_count()): 217 src_filename = self.dcpl.get_virtual_filename(i) 218 new_dcpl.set_virtual( 219 self.dcpl.get_virtual_vspace(i), 220 self._source_file_name(src_filename, dst_filename), 221 self.dcpl.get_virtual_dsetname(i).encode('utf-8'), 222 self.dcpl.get_virtual_srcspace(i), 223 ) 224 return new_dcpl 225 else: 226 return self.dcpl # Mappings are all from other files 227 228 def make_dataset(self, parent, name, fillvalue=None): 229 """ Return a new low-level dataset identifier for a virtual dataset """ 230 dcpl = self._get_dcpl(parent.file.filename) 231 232 if fillvalue is not None: 233 dcpl.set_fill_value(np.array([fillvalue])) 234 235 maxshape = self.maxshape 236 if maxshape is not None: 237 maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape) 238 239 virt_dspace = h5s.create_simple(self.shape, maxshape) 240 241 if isinstance(self.dtype, Datatype): 242 # Named types are used as-is 243 tid = self.dtype.id 244 else: 245 dtype = np.dtype(self.dtype) 246 tid = h5t.py_create(dtype, logical=1) 247 248 return h5d.create(parent.id, name=name, tid=tid, space=virt_dspace, 249 dcpl=dcpl) 250