1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License:  Standard 3-clause BSD; see "license.txt" for full license terms
8#           and contributor agreement.
9
10"""
11    High-level interface for creating HDF5 virtual datasets
12"""
13
14from copy import deepcopy as copy
15from collections import namedtuple
16
17import numpy as np
18
19from .compat import filename_encode
20from .datatype import Datatype
21from .selections import SimpleSelection, select
22from .. import h5d, h5p, h5s, h5t, h5
23from .. import version
24
25
26class VDSmap(namedtuple('VDSmap', ('vspace', 'file_name',
27                                   'dset_name', 'src_space'))):
28    '''Defines a region in a virtual dataset mapping to part of a source dataset
29    '''
30
31
32vds_support = False
33hdf5_version = version.hdf5_version_tuple[0:3]
34
35if hdf5_version >= h5.get_config().vds_min_hdf5_version:
36    vds_support = True
37
38
39def _convert_space_for_key(space, key):
40    """
41    Converts the space with the given key. Mainly used to allow unlimited
42    dimensions in virtual space selection.
43    """
44    key = key if isinstance(key, tuple) else (key,)
45    type_code = space.get_select_type()
46
47    # check for unlimited selections in case where selection is regular
48    # hyperslab, which is the only allowed case for h5s.UNLIMITED to be
49    # in the selection
50    if type_code == h5s.SEL_HYPERSLABS and space.is_regular_hyperslab():
51        rank = space.get_simple_extent_ndims()
52        nargs = len(key)
53
54        idx_offset = 0
55        start, stride, count, block = space.get_regular_hyperslab()
56        # iterate through keys. we ignore numeral indices. if we get a
57        # slice, we check for an h5s.UNLIMITED value as the stop
58        # if we get an ellipsis, we offset index by (rank - nargs)
59        for i, sl in enumerate(key):
60            if isinstance(sl, slice):
61                if sl.stop == h5s.UNLIMITED:
62                    counts = list(count)
63                    idx = i + idx_offset
64                    counts[idx] = h5s.UNLIMITED
65                    count = tuple(counts)
66            elif sl is Ellipsis:
67                idx_offset = rank - nargs
68
69        space.select_hyperslab(start, count, stride, block)
70
71
72class VirtualSource(object):
73    """Source definition for virtual data sets.
74
75    Instantiate this class to represent an entire source dataset, and then
76    slice it to indicate which regions should be used in the virtual dataset.
77
78    path_or_dataset
79        The path to a file, or an h5py dataset. If a dataset is given,
80        no other parameters are allowed, as the relevant values are taken from
81        the dataset instead.
82    name
83        The name of the source dataset within the file.
84    shape
85        A tuple giving the shape of the dataset.
86    dtype
87        Numpy dtype or string.
88    maxshape
89        The source dataset is resizable up to this shape. Use None for
90        axes you want to be unlimited.
91    """
92    def __init__(self, path_or_dataset, name=None,
93                 shape=None, dtype=None, maxshape=None):
94        from .dataset import Dataset
95        if isinstance(path_or_dataset, Dataset):
96            failed = {k: v
97                      for k, v in
98                      {'name': name, 'shape': shape,
99                       'dtype': dtype, 'maxshape': maxshape}.items()
100                      if v is not None}
101            if failed:
102                raise TypeError("If a Dataset is passed as the first argument "
103                                "then no other arguments may be passed.  You "
104                                "passed {failed}".format(failed=failed))
105            ds = path_or_dataset
106            path = ds.file.filename
107            name = ds.name
108            shape = ds.shape
109            dtype = ds.dtype
110            maxshape = ds.maxshape
111        else:
112            path = path_or_dataset
113            if name is None:
114                raise TypeError("The name parameter is required when "
115                                "specifying a source by path")
116            if shape is None:
117                raise TypeError("The shape parameter is required when "
118                                "specifying a source by path")
119            elif isinstance(shape, int):
120                shape = (shape,)
121
122            if isinstance(maxshape, int):
123                maxshape = (maxshape,)
124
125        self.path = path
126        self.name = name
127        self.dtype = dtype
128
129        if maxshape is None:
130            self.maxshape = shape
131        else:
132            self.maxshape = tuple([h5s.UNLIMITED if ix is None else ix
133                                   for ix in maxshape])
134        self.sel = SimpleSelection(shape)
135
136    @property
137    def shape(self):
138        return self.sel.array_shape
139
140    def __getitem__(self, key):
141        tmp = copy(self)
142        tmp.sel = select(self.shape, key, dataset=None)
143        _convert_space_for_key(tmp.sel.id, key)
144        return tmp
145
146class VirtualLayout(object):
147    """Object for building a virtual dataset.
148
149    Instantiate this class to define a virtual dataset, assign to slices of it
150    (using VirtualSource objects), and then pass it to
151    group.create_virtual_dataset() to add the virtual dataset to a file.
152
153    This class does not allow access to the data; the virtual dataset must
154    be created in a file before it can be used.
155
156    shape
157        A tuple giving the shape of the dataset.
158    dtype
159        Numpy dtype or string.
160    maxshape
161        The virtual dataset is resizable up to this shape. Use None for
162        axes you want to be unlimited.
163    filename
164        The name of the destination file, if known in advance. Mappings from
165        data in the same file will be stored with filename '.', allowing the
166        file to be renamed later.
167    """
168    def __init__(self, shape, dtype, maxshape=None, filename=None):
169        self.shape = (shape,) if isinstance(shape, int) else shape
170        self.dtype = dtype
171        self.maxshape = (maxshape,) if isinstance(maxshape, int) else maxshape
172        self._filename = filename
173        self._src_filenames = set()
174        self.dcpl = h5p.create(h5p.DATASET_CREATE)
175
176    def __setitem__(self, key, source):
177        sel = select(self.shape, key, dataset=None)
178        _convert_space_for_key(sel.id, key)
179        src_filename = self._source_file_name(source.path, self._filename)
180
181        self.dcpl.set_virtual(
182            sel.id, src_filename, source.name.encode('utf-8'), source.sel.id
183        )
184        if self._filename is None:
185            self._src_filenames.add(src_filename)
186
187    @staticmethod
188    def _source_file_name(src_filename, dst_filename) -> bytes:
189        src_filename = filename_encode(src_filename)
190        if dst_filename and (src_filename == filename_encode(dst_filename)):
191            # use relative path if the source dataset is in the same
192            # file, in order to keep the virtual dataset valid in case
193            # the file is renamed.
194            return b'.'
195        return filename_encode(src_filename)
196
197    def _get_dcpl(self, dst_filename):
198        """Get the property list containing virtual dataset mappings
199
200        If the destination filename wasn't known when the VirtualLayout was
201        created, it is handled here.
202        """
203        dst_filename = filename_encode(dst_filename)
204        if self._filename is not None:
205            # filename was known in advance; check dst_filename matches
206            if dst_filename != filename_encode(self._filename):
207                raise Exception(f"{dst_filename!r} != {self._filename!r}")
208            return self.dcpl
209
210        # destination file not known in advance
211        if dst_filename in self._src_filenames:
212            # At least 1 source file is the same as the destination file,
213            # but we didn't know this when making the mapping. Copy the mappings
214            # to a new property list, replacing the dest filename with '.'
215            new_dcpl = h5p.create(h5p.DATASET_CREATE)
216            for i in range(self.dcpl.get_virtual_count()):
217                src_filename = self.dcpl.get_virtual_filename(i)
218                new_dcpl.set_virtual(
219                    self.dcpl.get_virtual_vspace(i),
220                    self._source_file_name(src_filename, dst_filename),
221                    self.dcpl.get_virtual_dsetname(i).encode('utf-8'),
222                    self.dcpl.get_virtual_srcspace(i),
223                )
224            return new_dcpl
225        else:
226            return self.dcpl  # Mappings are all from other files
227
228    def make_dataset(self, parent, name, fillvalue=None):
229        """ Return a new low-level dataset identifier for a virtual dataset """
230        dcpl = self._get_dcpl(parent.file.filename)
231
232        if fillvalue is not None:
233            dcpl.set_fill_value(np.array([fillvalue]))
234
235        maxshape = self.maxshape
236        if maxshape is not None:
237            maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape)
238
239        virt_dspace = h5s.create_simple(self.shape, maxshape)
240
241        if isinstance(self.dtype, Datatype):
242            # Named types are used as-is
243            tid = self.dtype.id
244        else:
245            dtype = np.dtype(self.dtype)
246            tid = h5t.py_create(dtype, logical=1)
247
248        return h5d.create(parent.id, name=name, tid=tid, space=virt_dspace,
249                          dcpl=dcpl)
250