1##############################################################################
2# Copyright by The HDF Group.                                                #
3# All rights reserved.                                                       #
4#                                                                            #
5# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
6# Utilities.  The full HDF5 REST Server copyright notice, including          #
7# terms governing use, modification, and redistribution, is contained in     #
8# the file COPYING, which can be found at the root of the source code        #
9# distribution tree.  If you do not have access to this file, you may        #
10# request a copy from help@hdfgroup.org.                                     #
11##############################################################################
12
13from __future__ import absolute_import
14
15import six
16
17if six.PY3:
18    unicode = str
19
20
21
22"""
23This class is used to manage UUID lookup tables for primary HDF objects (Groups, Datasets,
24 and Datatypes).  For HDF5 files that are read/write, this information is managed within
25 the file itself in the "__db__" group.  For read-only files, the data is managed in
26 an external file (domain filename with ".db" extension).
27
28 "___db__"  ("root" for read-only case)
29    description: Group object (member of root group). Only objects below this group are used
30            for UUID data
31    members: "{groups}", "{datasets}", "{datatypes}", "{objects}", "{paths}"
32    attrs: 'rootUUID': UUID of the root group
33
34"{groups}"
35    description: contains map of UUID->group objects
36    members: hard link to each anonymous group (i.e. groups which are not
37        linked to by anywhere else).  Link name is the UUID
38    attrs: group reference (or path for read-only files) to the group (for non-
39        anonymous groups).
40
41"{datasets}"
42    description: contains map of UUID->dataset objects
43    members: hard link to each anonymous dataset (i.e. datasets which are not
44        linked to by anywhere else).  Link name is the UUID
45    attrs: dataset reference (or path for read-only files) to the dataset (for non-
46        anonymous datasets).
47
48"{dataset_props}:
49    description contains dataset creation properties"
50    members: sub-group with link name as UUID.  Sub-group attributes are the creation props
51
52"{datatypes}"
53    description: contains map of UUID->datatyped objects
54    members: hard link to each anonymous datatype (i.e. datatypes which are not
55        linked to by anywhere else).  Link name is the UUID
56    attrs: datatype reference (or path for read-only files) to the datatype (for non-
57        anonymous datatypes).
58
59"{addr}"
60    description: contains map of file offset to UUID.
61    members: none
62    attrs: map of file offset to UUID
63
64
65
66
67"""
68import errno
69import time
70import h5py
71import numpy as np
72import uuid
73import os.path as op
74import os
75import json
76import logging
77
78from .hdf5dtype import getTypeItem, createDataType, getItemSize
79
80# global dictionary to direct back to the Hdf5db instance by filename
81# (needed for visititems callback)
82# Will break in multi-threaded context
83_db = {}
84
85UUID_LEN = 36  # length for uuid strings
86
87# standard compress filters
88_HDF_FILTERS = {
89    1: {'class': 'H5Z_FILTER_DEFLATE', 'alias': 'gzip', 'options': ['level']},
90    2: {'class': 'H5Z_FILTER_SHUFFLE', 'alias': 'shuffle'},
91    3: {'class': 'H5Z_FILTER_FLETCHER32', 'alias': 'fletcher32'},
92    4: {'class': 'H5Z_FILTER_SZIP', 'alias': 'szip', 'options': ['bitsPerPixel', 'coding', 'pixelsPerBlock', 'pixelsPerScanLine']},
93    5: {'class': 'H5Z_FILTER_NBIT'},
94    6: {'class': 'H5Z_FILTER_SCALEOFFSET', 'alias': 'scaleoffset', 'options': ['scaleType']},
95    32000: {'class': 'H5Z_FILTER_LZF', 'alias': 'lzf'}
96}
97
98_HDF_FILTER_OPTION_ENUMS = {'coding': {h5py.h5z.SZIP_EC_OPTION_MASK: 'H5_SZIP_EC_OPTION_MASK',
99                                       h5py.h5z.SZIP_NN_OPTION_MASK: 'H5_SZIP_NN_OPTION_MASK'},
100                            'scaleType': {h5py.h5z.SO_FLOAT_DSCALE: 'H5Z_SO_FLOAT_DSCALE',
101                                          h5py.h5z.SO_FLOAT_ESCALE: 'H5Z_SO_FLOAT_ESCALE',
102                                          h5py.h5z.SO_INT: 'H5Z_SO_INT'}}
103
104# h5py supported filters
105_H5PY_FILTERS = {'gzip': 1,
106                 'shuffle': 2,
107                 'fletcher32': 3,
108                 'szip': 4,
109                 'scaleoffset': 6,
110                 'lzf': 32000}
111
112_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
113
114
115def visitObj(path, obj):
116    hdf5db = _db[obj.file.filename]
117    hdf5db.visit(path, obj)
118
119
120class Hdf5db:
121
122    @staticmethod
123    def createHDF5File(filePath):
124        # create an "empty" hdf5 file
125        if op.isfile(filePath):
126            raise IOError(errno.EEXIST, "Resource already exists")
127
128        f = h5py.File(filePath, 'w')
129        f.close()
130
131    @staticmethod
132    def getVersionInfo():
133        versionInfo = {}
134        versionInfo['hdf5-json-version'] = "1.1.1" # todo - have this auto-synch with package version
135        versionInfo['h5py_version'] = h5py.version.version
136        versionInfo['hdf5_version'] = h5py.version.hdf5_version
137        return versionInfo
138
139    def __init__(self, filePath, dbFilePath=None, readonly=False,
140                 app_logger=None, root_uuid=None, update_timestamps=True,
141                 userid=None):
142        if app_logger:
143            self.log = app_logger
144        else:
145            self.log = logging.getLogger()
146        if len(filePath) == 0 or not op.isfile(filePath):
147            raise IOError(errno.ENXIO, "file not found")
148        if not h5py.is_hdf5(filePath):
149            raise IOError(errno.EINVAL, "not an HDF5 file")
150
151        mode = 'r'
152        if readonly:
153            self.readonly = True
154        else:
155            if not os.stat(filePath).st_mode & 0o200:
156                # file is read-only
157                self.readonly = True
158            else:
159                mode = 'r+'
160                self.readonly = False
161
162
163        self.log.info("init -- filePath: " + filePath + " mode: " + mode)
164
165        self.update_timestamps = update_timestamps
166
167        self.f = h5py.File(filePath, mode, libver='latest')
168
169        self.root_uuid = root_uuid
170
171        if self.readonly:
172            # for read-only files, add a dot in front of the name to be used as
173            # the db file.  This won't collide with actual data files, since
174            # "." is not allowed as the first character in a domain name.
175            if not dbFilePath:
176                dirname = op.dirname(self.f.filename)
177                basename = op.basename(self.f.filename)
178                if len(dirname) > 0:
179                    dbFilePath = dirname + '/.' + basename
180                else:
181                    dbFilePath = '.' + basename
182            dbMode = 'r+'
183            if not op.isfile(dbFilePath):
184                dbMode = 'w'
185            self.log.info("dbFilePath: " + dbFilePath + " mode: " + dbMode)
186            self.dbf = h5py.File(dbFilePath, dbMode)
187        else:
188            self.dbf = None  # for read only
189        # create a global reference to this class
190        # so visitObj can call back
191        _db[filePath] = self
192
193    def __enter__(self):
194        self.log.info('Hdf5db __enter')
195        return self
196
197    def __exit__(self, type, value, traceback):
198        self.log.info('Hdf5db __exit')
199        filename = self.f.filename
200        self.f.flush()
201        self.f.close()
202        if self.dbf:
203            self.dbf.flush()
204            self.dbf.close()
205        del _db[filename]
206
207    def getTimeStampName(self, uuid, objType="object", name=None):
208        ts_name = uuid
209        if objType != "object":
210            if len(name) == 0:
211                self.log.error("empty name passed to setCreateTime")
212                raise Exception("bad setCreateTimeParameter")
213            if objType == "attribute":
214                ts_name += "_attr:["
215                ts_name += name
216                ts_name += "]"
217            elif objType == "link":
218                ts_name += "_link:["
219                ts_name += name
220                ts_name += "]"
221            else:
222                msg = "Bad objType passed to setCreateTime"
223                self.log.error(msg)
224                raise IOError(errno.EIO, msg)
225        return ts_name
226
227    """
228      setCreateTime - sets the create time timestamp for the
229            given object.
230        uuid - id of object
231        objtype - one of "object", "link", "attribute"
232        name - name (for attributes, links... ignored for objects)
233        timestamp - time (otherwise current time will be used)
234
235       returns - nothing
236
237       Note - should only be called once per object
238    """
239    def setCreateTime(self, uuid, objType="object", name=None, timestamp=None):
240        if not self.update_timestamps:
241            return
242        ctime_grp = self.dbGrp["{ctime}"]
243        ts_name = self.getTimeStampName(uuid, objType, name)
244        if timestamp is None:
245            timestamp = time.time()
246        if ts_name in ctime_grp.attrs:
247            self.log.warning("modifying create time for object: " + ts_name)
248        ctime_grp.attrs.create(ts_name, timestamp, dtype='int64')
249
250    """
251      getCreateTime - gets the create time timestamp for the
252            given object.
253        uuid - id of object
254        objtype - one of "object", "link", "attribute"
255        name - name (for attributes, links... ignored for objects)
256        useRoot - if true, use the time value for root object as default
257
258       returns - create time for object, or create time for root if not set
259    """
260    def getCreateTime(self, uuid, objType="object", name=None, useRoot=True):
261        ctime_grp = self.dbGrp["{ctime}"]
262        ts_name = self.getTimeStampName(uuid, objType, name)
263        timestamp = None
264        if ts_name in ctime_grp.attrs:
265            timestamp = ctime_grp.attrs[ts_name]
266        elif useRoot:
267            # return root timestamp
268            root_uuid = self.dbGrp.attrs["rootUUID"]
269            if root_uuid in ctime_grp.attrs:
270                timestamp = ctime_grp.attrs[root_uuid]
271        return timestamp
272
273    """
274      setModifiedTime - sets the modified time timestamp for the
275            given object.
276        uuid - id of object
277        objtype - one of "object", "link", "attribute"
278        name - name (for attributes, links... ignored for objects)
279        timestamp - time (otherwise current time will be used)
280
281       returns - nothing
282
283    """
284    def setModifiedTime(self, uuid, objType="object", name=None, timestamp=None):
285        if not self.update_timestamps:
286            return
287        mtime_grp = self.dbGrp["{mtime}"]
288        ts_name = self.getTimeStampName(uuid, objType, name)
289        if timestamp is None:
290            timestamp = time.time()
291        mtime_grp.attrs.create(ts_name, timestamp, dtype='int64')
292
293    """
294      getModifiedTime - gets the modified time timestamp for the
295            given object.
296        uuid - id of object
297        objtype - one of "object", "link", "attribute"
298        name - name (for attributes, links... ignored for objects)
299        useRoot - if true, use the time value for root object as default
300
301       returns - create time for object, or create time for root if not set
302    """
303    def getModifiedTime(self, uuid, objType="object", name=None, useRoot=True):
304        mtime_grp = self.dbGrp["{mtime}"]
305        ts_name = self.getTimeStampName(uuid, objType, name)
306        timestamp = None
307        if ts_name in mtime_grp.attrs:
308            timestamp = mtime_grp.attrs[ts_name]
309        else:
310            # return create time if no modified time has been set
311            ctime_grp = self.dbGrp["{ctime}"]
312            if ts_name in ctime_grp.attrs:
313                timestamp = ctime_grp.attrs[ts_name]
314            elif useRoot:
315                # return root timestamp
316                root_uuid = self.dbGrp.attrs["rootUUID"]
317                timestamp = mtime_grp.attrs[root_uuid]
318        return timestamp
319
320    """
321      getAclGroup - return the db group "{acl}" if present,
322        otherwise return None
323    """
324    def getAclGroup(self, create=False):
325        if not self.dbGrp:
326            return None  # file not initialized
327        if "{acl}" in self.dbGrp:
328            return self.dbGrp["{acl}"]
329        if not create:
330            return None
331        self.dbGrp.create_group("{acl}")
332        return self.dbGrp["{acl}"]
333
334    """
335      getAclDtype - return detype for ACL
336    """
337    def getAclDtype(self):
338        fields = []
339        fields.append(('userid', np.int32))
340        fields.append(('create', np.int8))
341        fields.append(('read', np.int8))
342        fields.append(('update', np.int8))
343        fields.append(('delete', np.int8))
344        fields.append(('readACL', np.int8))
345        fields.append(('updateACL', np.int8))
346        dt = np.dtype(fields)
347        return dt
348
349    """
350      getAclDataset - return ACL datset for given uuid
351    """
352    def getAclDataset(self, obj_uuid, create=False):
353        acl_group = self.getAclGroup(create=create)
354
355        if acl_group is None:
356            return None
357
358        if obj_uuid in acl_group:
359            return acl_group[obj_uuid]
360
361        if not create:
362            return None
363
364        # create dataset
365        dt = self.getAclDtype()
366        acl_group.create_dataset(obj_uuid, (0,), dtype=dt, maxshape=(None,))
367        return acl_group[obj_uuid]
368
369    """
370      getNumAcls - return number of acls associatted with given uuid
371    """
372    def getNumAcls(self, obj_uuid):
373        acl_group = self.getAclGroup()
374        if acl_group is None:
375            return 0
376        if obj_uuid not in acl_group:
377            return 0
378        acls = acl_group[obj_uuid]
379        return acls.shape[0]
380
381    """
382      convertAclNdArrayToDict - helper function - return acl item to dict
383    """
384    def convertAclNdArrayToDict(self, acl_ndarray):
385        fields = acl_ndarray.dtype.fields.keys()
386        acl = {}
387        for field in fields:
388            value = int(acl_ndarray[field])
389            acl[field] = value
390        return acl
391
392    """
393      Get default acl - returns dict obj
394    """
395    def getDefaultAcl(self):
396        dt = self.getAclDtype()
397        acl = {}
398        for field in dt.fields.keys():
399            if field == 'userid':
400                acl[field] = 0
401            else:
402                acl[field] = 1  # default is allowed
403        return acl
404
405    """
406      getAcl - return ACL for given uuid and userid
407        returns ACL associated with the given uuid, or if none exists,
408        the ACL associatted with the root group.
409
410        If an ACL is not present for a userid/obj and ACL will be returned
411        via the following precedence:
412
413        1) obj_uuid, user_id
414        2) root_uuid, user_id
415        3) obj_uuid, 0
416        4) root_uuid, 0
417        5) 'all perm' ACL
418    """
419    def getAcl(self, obj_uuid, userid):
420        acl_grp = self.getAclGroup()
421
422        if acl_grp is not None:
423            acl = self.getAclByObjAndUser(obj_uuid, userid)
424            if acl is not None:
425                return acl
426
427            if obj_uuid != self.root_uuid and userid != 0:
428                # get the root acl for this user
429                acl = self.getAclByObjAndUser(self.root_uuid, userid)
430                if acl is not None:
431                    return acl
432
433            if userid != 0:
434                # get acl for default user
435                acl = self.getAclByObjAndUser(obj_uuid, 0)
436                if acl is not None:
437                    return acl
438
439            if obj_uuid != self.root_uuid:
440                # get root acl for default user
441                acl = self.getAclByObjAndUser(self.root_uuid, 0)
442                if acl is not None:
443                    return acl
444
445        # create an ACL with default permissions
446        acl = self.getDefaultAcl()
447
448        return acl
449
450    """
451      get ACL for specific uuid and user
452         return None if not found
453    """
454    def getAclByObjAndUser(self, obj_uuid, userid):
455
456        acl = None
457        acl_dset = self.getAclDataset(obj_uuid)
458
459        if acl_dset:
460            # iterate through elements, looking for user_id
461            acls = acl_dset[...]
462            num_acls = acl_dset.shape[0]
463            acl = None
464            for i in range(num_acls):
465                item = acls[i]
466                if item['userid'] == userid:
467                    acl = item
468                    break
469
470        if acl is not None:
471            acl = self.convertAclNdArrayToDict(acl)
472        return acl
473
474    """
475      getAcls - get all acls for given uuid
476    """
477
478    def getAcls(self, obj_uuid):
479
480        acls = []
481        acl_dset = self.getAclDataset(obj_uuid)
482
483        if acl_dset:
484            # iterate through elements, looking for user_id
485            num_acls = acl_dset.shape[0]
486
487            for i in range(num_acls):
488                item = acl_dset[i]
489                acl = self.convertAclNdArrayToDict(item)
490                acls.append(acl)
491
492        return acls
493
494    """
495      setAcl -  set the acl for given uuid.
496    """
497    def setAcl(self, obj_uuid, acl):
498        acl_dset = self.getAclDataset(obj_uuid, create=True)
499
500        if acl_dset is None:
501            msg = "Unexpected error acl not created for uuid:[" + obj_uuid + "]"
502            self.log.error(msg)
503            raise IOError(errno.EIO, msg)
504
505        userid = acl['userid']
506
507        # iterate through elements, looking for user_id
508        acls = acl_dset[...]
509        num_acls = acl_dset.shape[0]
510
511        user_index = None
512
513        for i in range(num_acls):
514            item = acls[i]
515            if item['userid'] == userid:
516                # update this element
517                user_index = i
518                break
519
520        if user_index is None:
521            # userid not found - add row
522            acl_dset.resize(((num_acls+1),))
523            user_index = num_acls
524
525        # update the acl dataset
526        item = acl_dset[user_index]
527        for field in acl.keys():
528            item[field] = acl[field]
529        acl_dset[user_index] = item  # save back to the file
530
531    def initFile(self):
532        # self.log.info("initFile")
533        if self.readonly:
534            self.dbGrp = self.dbf
535            if "{groups}" in self.dbf:
536                # file already initialized
537                self.root_uuid = self.dbGrp.attrs["rootUUID"]
538                return
539
540        else:
541            if "__db__" in self.f:
542                # file already initialized
543                self.dbGrp = self.f["__db__"]
544                self.root_uuid = self.dbGrp.attrs["rootUUID"]
545                return  # already initialized
546            self.dbGrp = self.f.create_group("__db__")
547
548        self.log.info("initializing file")
549        if not self.root_uuid:
550            self.root_uuid = str(uuid.uuid1())
551        self.dbGrp.attrs["rootUUID"] = self.root_uuid
552        self.dbGrp.create_group("{groups}")
553        self.dbGrp.create_group("{datasets}")
554        self.dbGrp.create_group("{datatypes}")
555        self.dbGrp.create_group("{addr}") # store object address
556        self.dbGrp.create_group("{ctime}") # stores create timestamps
557        self.dbGrp.create_group("{mtime}") # store modified timestamps
558
559        mtime = op.getmtime(self.f.filename)
560        ctime = mtime
561        self.setCreateTime(self.root_uuid, timestamp=ctime)
562        self.setModifiedTime(self.root_uuid, timestamp=mtime)
563
564        self.f.visititems(visitObj)
565
566    def visit(self, path, obj):
567        name = obj.__class__.__name__
568        if len(path) >= 6 and path[:6] == '__db__':
569            return  # don't include the db objects
570        self.log.info('visit: ' + path + ' name: ' + name)
571        col = None
572        if name == 'Group':
573            col = self.dbGrp["{groups}"].attrs
574        elif name == 'Dataset':
575            col = self.dbGrp["{datasets}"].attrs
576        elif name == 'Datatype':
577            col = self.dbGrp["{datatypes}"].attrs
578        else:
579            msg = "Unknown object type: " + __name__ + " found during scan of HDF5 file"
580            self.log.error(msg)
581            raise IOError(errno.EIO, msg)
582        uuid1 = uuid.uuid1()  # create uuid
583        id = str(uuid1)
584        addrGrp = self.dbGrp["{addr}"]
585        if not self.readonly:
586            # storing db in the file itself, so we can link to the object directly
587            col[id] = obj.ref  # save attribute ref to object
588        else:
589            #store path to object
590            col[id] = obj.name
591        addr = h5py.h5o.get_info(obj.id).addr
592        # store reverse map as an attribute
593        addrGrp.attrs[str(addr)] = id
594
595    #
596    # Get Datset creation properties
597    #
598    def getDatasetCreationProps(self, dset_uuid):
599        prop_list = {}
600        if "{dataset_props}" not in self.dbGrp:
601            # no, group, so no properties
602            return prop_list  # return empty dict
603        dbPropsGrp = self.dbGrp["{dataset_props}"]
604
605        if dset_uuid not in dbPropsGrp.attrs:
606            return prop_list  # return empty dict
607        prop_str = dbPropsGrp.attrs[dset_uuid]
608        # expand json string
609        try:
610            prop_list = json.loads(prop_str)
611        except ValueError as ve:
612            msg = "Unable to load creation properties for dataset:[" + dset_uuid + "]: " + ve.message
613            self.log.error(msg)
614            raise IOError(errno.EIO, msg)
615
616        # fill in Filter class values
617        if 'filters' in prop_list:
618            prop_filters = prop_list['filters']
619            for prop_filter in prop_filters:
620                if 'class' not in prop_filter:
621                    filter_id = prop_filter['id']
622                    if filter_id in _HDF_FILTERS:
623                        hdf_filter = _HDF_FILTERS[filter_id]
624                        prop_filter['class'] = hdf_filter['class']
625                    else:
626                        prop_filter['class'] = 'H5Z_FILTER_USER'
627
628        return prop_list
629
630    #
631    # Set dataset creation property
632    #
633    def setDatasetCreationProps(self, dset_uuid, prop_dict):
634        self.log.info('setDataProp([' + dset_uuid + ']')
635        if not prop_dict:
636            # just ignore if empty dictionary
637            return
638        if "{dataset_props}" not in self.dbGrp:
639            self.dbGrp.create_group("{dataset_props}")
640        dbPropsGrp = self.dbGrp["{dataset_props}"]
641        if dset_uuid in dbPropsGrp.attrs:
642            # this should be write once
643            msg = "Unexpected error setting dataset creation properties for dataset:[" + dset_uuid + "]"
644            self.log.error(msg)
645            raise IOError(errno.EIO, msg)
646        prop_str = json.dumps(prop_dict)
647        dbPropsGrp.attrs[dset_uuid] = prop_str
648
649    def getUUIDByAddress(self, addr):
650        if "{addr}" not in self.dbGrp:
651            self.log.error("expected to find {addr} group")
652            return None
653        addrGrp = self.dbGrp["{addr}"]
654        obj_uuid = None
655        if str(addr) in addrGrp.attrs:
656            obj_uuid = addrGrp.attrs[str(addr)]
657        if obj_uuid and type(obj_uuid) is not str:
658            # convert bytes to unicode
659            obj_uuid = obj_uuid.decode('utf-8')
660        return obj_uuid
661
662    """
663     Get the number of links in a group to an object
664    """
665    def getNumLinksToObjectInGroup(self, grp, obj):
666        objAddr = h5py.h5o.get_info(obj.id).addr
667        numLinks = 0
668        for name in grp:
669            try:
670                child = grp[name]
671            except KeyError:
672                # UDLink? Ignore for now
673                self.log.info("ignoring link (UDLink?): " + name)
674                continue
675
676            addr = h5py.h5o.get_info(child.id).addr
677            if addr == objAddr:
678                numLinks = numLinks + 1
679
680        return numLinks
681
682    """
683     Get the number of links to the given object
684    """
685    def getNumLinksToObject(self, obj):
686        self.initFile()
687        groups = self.dbGrp["{groups}"]
688        numLinks = 0
689        # iterate through each group in the file and unlink tgt if it is linked
690        # by the group
691        for uuidName in groups:
692            # iterate through anonymous groups
693            grp = groups[uuidName]
694            nLinks = self.getNumLinksToObjectInGroup(grp, obj)
695            if nLinks > 0:
696                numLinks += nLinks
697        for uuidName in groups.attrs:
698            # now non anonymous groups
699            grpRef = groups.attrs[uuidName]
700            grp = self.f[grpRef]  # dereference
701            nLinks = self.getNumLinksToObjectInGroup(grp, obj)
702            if nLinks > 0:
703                numLinks += nLinks
704        # finally, check the root group
705        root = self.getObjByPath("/")
706        nLinks = self.getNumLinksToObjectInGroup(root, obj)
707        numLinks += nLinks
708
709        return numLinks
710
711    def getUUIDByPath(self, path):
712        self.initFile()
713        self.log.info("getUUIDByPath: [" + path + "]")
714        if len(path) >= 6 and path[:6] == '__db__':
715            msg = "getUUIDByPath called with invalid path: [" + path + "]"
716            self.log.error(msg)
717            raise IOError(errno.EIO, msg)
718        if path == '/':
719            # just return the root UUID
720            root_uuid = self.dbGrp.attrs["rootUUID"]
721            if root_uuid and type(root_uuid) is not str:
722                # convert bytes to unicode
723                root_uuid = root_uuid.decode('utf-8')
724            return root_uuid
725
726        obj = self.f[path]  # will throw KeyError if object doesn't exist
727        addr = h5py.h5o.get_info(obj.id).addr
728        obj_uuid = self.getUUIDByAddress(addr)
729        return obj_uuid
730
731    def getObjByPath(self, path):
732        if len(path) >= 6 and path[:6] == '__db__':
733            return None # don't include the db objects
734        obj = self.f[path]  # will throw KeyError if object doesn't exist
735        return obj
736
737    def getObjectByUuid(self, col_type, obj_uuid):
738        #col_type should be either "datasets", "groups", or "datatypes"
739        if col_type not in ("datasets", "groups", "datatypes"):
740            msg = "Unexpectd error, invalid col_type: [" + col_type + "]"
741            self.log.error(msg)
742            raise IOError(errno.EIO, msg)
743        if col_type == "groups" and obj_uuid == self.dbGrp.attrs["rootUUID"]:
744            return self.f['/']  # returns root group
745
746        obj = None  # Group, Dataset, or Datatype
747        col_name = '{' + col_type + '}'
748        # get the collection group for this collection type
749        col = self.dbGrp[col_name]
750        if obj_uuid in col.attrs:
751            ref = col.attrs[obj_uuid]
752            obj = self.f[ref]  # this works for read-only as well
753        elif obj_uuid in col:
754            # anonymous object
755            obj = col[obj_uuid]
756
757        return obj
758
759    def getDatasetObjByUuid(self, obj_uuid):
760        self.initFile()
761        self.log.info("getDatasetObjByUuid(" + obj_uuid + ")")
762
763        obj = self.getObjectByUuid("datasets", obj_uuid)
764
765        return obj
766
767    def getGroupObjByUuid(self, obj_uuid):
768        self.initFile()
769        self.log.info("getGroupObjByUuid(" + obj_uuid + ")")
770
771        obj = self.getObjectByUuid("groups", obj_uuid)
772
773        return obj
774
775    def getDatasetTypeItemByUuid(self, obj_uuid):
776        dset = self.getDatasetObjByUuid(obj_uuid)  # throws exception if not found
777        item = { 'id': obj_uuid }
778        item['type'] = getTypeItem(dset.dtype)
779        if self.update_timestamps:
780            item['ctime'] = self.getCreateTime(obj_uuid)
781            item['mtime'] = self.getModifiedTime(obj_uuid)
782
783        return item
784
785    """
786    getNullReference - return a null object reference
787    """
788    def getNullReference(self):
789        tmpGrp = None
790        if "{tmp}" not in self.dbGrp:
791            tmpGrp = self.dbGrp.create_group("{tmp}")
792        else:
793            tmpGrp = self.dbGrp["{tmp}"]
794        if 'nullref' not in tmpGrp:
795            dt = h5py.special_dtype(ref=h5py.Reference)
796            tmpGrp.create_dataset('nullref', (1,), dtype=dt)
797        nullref_dset = tmpGrp['nullref']
798        return nullref_dset[0]
799
800    """
801    getNullRegionReference - return a null region reference
802    """
803    def getNullRegionReference(self):
804        tmpGrp = None
805        if "{tmp}" not in self.dbGrp:
806            tmpGrp = self.dbGrp.create_group("{tmp}")
807        else:
808            tmpGrp = self.dbGrp["{tmp}"]
809            if 'nullregref' not in tmpGrp:
810                dt = h5py.special_dtype(ref=h5py.RegionReference)
811                tmpGrp.create_dataset('nullregref', (1,), dtype=dt)
812                nullregref_dset = tmpGrp['nullregref']
813                return nullregref_dset[0]
814
815    def getShapeItemByDsetObj(self, obj):
816        item = {}
817        if obj.shape is None:
818            # new with h5py 2.6, null space datasets will return None for shape
819            item['class'] = 'H5S_NULL'
820        elif len(obj.shape) == 0:
821            # check to see if this is a null space vs a scalar dataset we'll do
822            # this by seeing if an exception is raised when reading the dataset
823            # h5py issue https://github.com/h5py/h5py/issues/279 will provide a
824            # better way to determine null spaces
825            # Update 3/10/17: Above issue is closed, but waiting on 2.7 final release
826            try:
827                val = obj[...]
828                if val is None:
829                    self.log.warning("no value returned for scalar dataset")
830                item['class'] = 'H5S_SCALAR'
831            except IOError:
832                item['class'] = 'H5S_NULL'
833        else:
834            item['class'] = 'H5S_SIMPLE'
835            item['dims'] = obj.shape
836            maxshape = []
837            include_maxdims = False
838            for i in range(len(obj.shape)):
839                extent = 0
840                if len(obj.maxshape) > i:
841                    extent = obj.maxshape[i]
842                    if extent is None:
843                        extent = 0
844                    if extent > obj.shape[i] or extent == 0:
845                        include_maxdims = True
846                maxshape.append(extent)
847            if include_maxdims:
848                item['maxdims'] = maxshape
849        return item
850
851    def getShapeItemByAttrObj(self, obj):
852        item = {}
853        if obj.shape is None or obj.get_storage_size() == 0:
854            # If storage size is 0, assume this is a null space obj
855            # See: h5py issue https://github.com/h5py/h5py/issues/279
856            item['class'] = 'H5S_NULL'
857        else:
858            if obj.shape:
859                item['class'] = 'H5S_SIMPLE'
860                item['dims'] = obj.shape
861            else:
862                item['class'] = 'H5S_SCALAR'
863        return item
864
865    #
866    # Get dataset creation properties maintained by HDF5 library
867    #
868    def getHDF5DatasetCreationProperties(self, obj_uuid, type_class):
869        dset = self.getDatasetObjByUuid(obj_uuid)
870        #
871        # Fill in creation properties
872        #
873        creationProps = {}
874        plist = h5py.h5d.DatasetID.get_create_plist(dset.id)
875
876        # alloc time
877        nAllocTime = plist.get_alloc_time()
878        if nAllocTime == h5py.h5d.ALLOC_TIME_DEFAULT:
879            creationProps['allocTime'] = 'H5D_ALLOC_TIME_DEFAULT'
880        elif nAllocTime == h5py.h5d.ALLOC_TIME_LATE:
881            creationProps['allocTime'] = 'H5D_ALLOC_TIME_LATE'
882        elif nAllocTime == h5py.h5d.ALLOC_TIME_EARLY:
883            creationProps['allocTime'] = 'H5D_ALLOC_TIME_EARLY'
884        elif nAllocTime == h5py.h5d.ALLOC_TIME_INCR:
885            creationProps['allocTime'] = 'H5D_ALLOC_TIME_INCR'
886        else:
887            self.log.warning("Unknown alloc time value: " + str(nAllocTime))
888
889        # fill time
890        nFillTime = plist.get_fill_time()
891        if nFillTime == h5py.h5d.FILL_TIME_ALLOC:
892            creationProps['fillTime'] = 'H5D_FILL_TIME_ALLOC'
893        elif nFillTime == h5py.h5d.FILL_TIME_NEVER:
894            creationProps['fillTime'] = 'H5D_FILL_TIME_NEVER'
895        elif nFillTime == h5py.h5d.FILL_TIME_IFSET:
896            creationProps['fillTime'] = 'H5D_FILL_TIME_IFSET'
897        else:
898            self.log.warning("unknown fill time value: " + str(nFillTime))
899
900        if type_class not in ('H5T_VLEN', 'H5T_OPAQUE'):
901            if plist.fill_value_defined() == h5py.h5d.FILL_VALUE_USER_DEFINED:
902                creationProps['fillValue'] =  self.bytesArrayToList(dset.fillvalue)
903
904        # layout
905        nLayout = plist.get_layout()
906        if nLayout == h5py.h5d.COMPACT:
907            creationProps['layout'] = {'class': 'H5D_COMPACT'}
908        elif nLayout == h5py.h5d.CONTIGUOUS:
909            creationProps['layout'] = {'class': 'H5D_CONTIGUOUS'}
910        elif nLayout == h5py.h5d.CHUNKED:
911            creationProps['layout'] = {'class': 'H5D_CHUNKED', 'dims': dset.chunks }
912        else:
913            self.log.warning("Unknown layout value:" + str(nLayout))
914
915        num_filters = plist.get_nfilters()
916        filter_props = []
917        if num_filters:
918            for n in range(num_filters):
919                filter_info = plist.get_filter(n)
920                opt_values = filter_info[2]
921                filter_prop = {}
922                filter_id = filter_info[0]
923                filter_prop['id'] = filter_id
924                if filter_info[3]:
925                    filter_prop['name'] = self.bytesArrayToList(filter_info[3])
926                if filter_id in _HDF_FILTERS:
927                    hdf_filter = _HDF_FILTERS[filter_id]
928                    filter_prop['class'] = hdf_filter['class']
929                    if 'options' in hdf_filter:
930                        filter_opts = hdf_filter['options']
931                        for i in range(len(filter_opts)):
932                            if len(opt_values) <= i:
933                                break  # end of option values
934                            opt_value = opt_values[i]
935                            opt_value_enum = None
936                            option_name = filter_opts[i]
937                            if option_name in _HDF_FILTER_OPTION_ENUMS:
938                                option_enums = _HDF_FILTER_OPTION_ENUMS[option_name]
939                                if opt_value in option_enums:
940                                    opt_value_enum = option_enums[opt_value]
941                            if opt_value_enum:
942                                filter_prop[option_name] = opt_value_enum
943                            else:
944                                filter_prop[option_name] = opt_value
945                else:
946                    # custom filter
947                    filter_prop['class'] = 'H5Z_FILTER_USER'
948                    if opt_values:
949                        filter_prop['parameters'] = opt_values
950                filter_props.append(filter_prop)
951            creationProps['filters'] = filter_props
952
953        return creationProps
954
955    #
956    # Get dataset information - type, shape, num attributes, creation properties
957    #
958    def getDatasetItemByUuid(self, obj_uuid):
959        dset = self.getDatasetObjByUuid(obj_uuid)
960        if dset is None:
961            if self.getModifiedTime(obj_uuid, useRoot=False):
962                msg = "Dataset with uuid: " + obj_uuid + " has been previously deleted"
963                self.log.info(msg)
964                raise IOError(errno.ENOENT, msg)
965            else:
966                msg = "Dataset with uuid: " + obj_uuid + " was not found"
967                self.log.info(msg)
968                raise IOError(errno.ENXIO, msg)
969
970        # fill in the item info for the dataset
971        item = { 'id': obj_uuid }
972
973        alias = []
974        if dset.name and not dset.name.startswith("/__db__"):
975            alias.append(dset.name)   # just use the default h5py path for now
976        item['alias'] = alias
977
978        item['attributeCount'] = len(dset.attrs)
979
980        # check if the dataset is using a committed type
981        typeid = h5py.h5d.DatasetID.get_type(dset.id)
982        typeItem = None
983        if h5py.h5t.TypeID.committed(typeid):
984            type_uuid = None
985            addr = h5py.h5o.get_info(typeid).addr
986            type_uuid = self.getUUIDByAddress(addr)
987            committedType = self.getCommittedTypeItemByUuid(type_uuid)
988            typeItem = committedType['type']
989            typeItem['uuid'] = type_uuid
990        else:
991            typeItem = getTypeItem(dset.dtype)
992
993        item['type'] = typeItem
994
995        # get shape
996        item['shape'] = self.getShapeItemByDsetObj(dset)
997
998        if self.update_timestamps:
999            item['ctime'] = self.getCreateTime(obj_uuid)
1000            item['mtime'] = self.getModifiedTime(obj_uuid)
1001
1002        creationProps = self.getDatasetCreationProps(obj_uuid)
1003        if creationProps:
1004            # if chunks is not in the db props, add it from the dataset prop
1005            # (so auto-chunk values can be returned)
1006            if dset.chunks and 'layout' not in creationProps:
1007                creationProps['layout'] = {'class': 'H5D_CHUNKED',
1008                                           'dims': dset.chunks}
1009        else:
1010            # no db-tracked creation properties, pull properties from library
1011            creationProps = self.getHDF5DatasetCreationProperties(obj_uuid, typeItem['class'])
1012
1013        if creationProps:
1014            item['creationProperties'] = creationProps
1015
1016        return item
1017
1018    """
1019    createTypeFromItem - create type given dictionary definition
1020    """
1021    def createTypeFromItem(self, attr_type):
1022        dt = None
1023
1024        if type(attr_type) in (six.text_type, six.binary_type) and len(attr_type) == UUID_LEN:
1025            # assume attr_type is a uuid of a named datatype
1026            tgt = self.getCommittedTypeObjByUuid(attr_type)
1027            if tgt is None:
1028                msg = "Unable to create attribute, committed type with uuid of: " + attr_type + " not found"
1029                self.log.info(msg)
1030                raise IOError(errno.ENXIO, msg)
1031            dt = tgt  # can use the object as the dt parameter
1032        else:
1033            try:
1034                dt = createDataType(attr_type)
1035            except KeyError as ke:
1036                msg = "Unable to create type: " + ke.message
1037                self.log.info(msg)
1038                raise IOError(errno.EINVAL, msg)
1039            except TypeError as te:
1040                msg = "Unable to create type: " + str(te)
1041                self.log.info(msg)
1042                raise IOError(errno.EINVAL, msg)
1043            if dt is None:
1044                msg = "Unexpected error creating type"
1045                self.log.error(msg)
1046                raise IOError(errno, errno.EIO, msg)
1047        return dt
1048
1049    """
1050    createCommittedType - creates new named datatype
1051    Returns item
1052    """
1053    def createCommittedType(self, datatype, obj_uuid=None):
1054        self.log.info("createCommittedType")
1055        self.initFile()
1056        if self.readonly:
1057            msg = "Can't create committed type (updates are not allowed)"
1058            self.log.info(msg)
1059            raise IOError(errno.EPERM, msg)
1060        datatypes = self.dbGrp["{datatypes}"]
1061        if not obj_uuid:
1062            obj_uuid = str(uuid.uuid1())
1063        dt = self.createTypeFromItem(datatype)
1064
1065        datatypes[obj_uuid] = dt
1066
1067        if obj_uuid not in datatypes:
1068            msg = "Unexpected failure to create committed datatype"
1069            self.log.error(msg)
1070            raise IOError(errno.EIO, msg)
1071        newType = datatypes[obj_uuid] # this will be a h5py Datatype class
1072        # store reverse map as an attribute
1073        addr = h5py.h5o.get_info(newType.id).addr
1074        addrGrp = self.dbGrp["{addr}"]
1075        addrGrp.attrs[str(addr)] = obj_uuid
1076        # set timestamp
1077        now = time.time()
1078        self.setCreateTime(obj_uuid, timestamp=now)
1079        self.setModifiedTime(obj_uuid, timestamp=now)
1080        item = { 'id': obj_uuid }
1081        item['attributeCount'] = len(newType.attrs)
1082        #item['type'] = hdf5dtype.getTypeItem(datatype.dtype)
1083        if self.update_timestamps:
1084            item['ctime'] = self.getCreateTime(obj_uuid)
1085            item['mtime'] = self.getModifiedTime(obj_uuid)
1086        return item
1087
1088    """
1089    getCommittedTypeObjByUuid - get obj from {datatypes} collection
1090    Returns type obj
1091    """
1092    def getCommittedTypeObjByUuid(self, obj_uuid):
1093        self.log.info("getCommittedTypeObjByUuid(" + obj_uuid + ")")
1094        self.initFile()
1095        datatype = None
1096        datatypesGrp = self.dbGrp["{datatypes}"]
1097        if obj_uuid in datatypesGrp.attrs:
1098            typeRef = datatypesGrp.attrs[obj_uuid]
1099            # typeRef could be a reference or (for read-only) a path
1100            datatype = self.f[typeRef]
1101        elif obj_uuid in datatypesGrp:
1102            datatype = datatypesGrp[obj_uuid]  # non-linked type
1103        else:
1104            msg = "Committed datatype: " + obj_uuid + " not found"
1105            self.log.info(msg)
1106
1107        return datatype
1108
1109    """
1110    getCommittedTypeItemByUuid - get json from {datatypes} collection
1111    Returns type obj
1112    """
1113    def getCommittedTypeItemByUuid(self, obj_uuid):
1114        self.log.info("getCommittedTypeItemByUuid(" + obj_uuid + ")")
1115        self.initFile()
1116        datatype = self.getCommittedTypeObjByUuid(obj_uuid)
1117
1118        if datatype is None:
1119            if self.getModifiedTime(obj_uuid, useRoot=False):
1120                msg = "Datatype with uuid: " + obj_uuid + " has been previously deleted"
1121                self.log.info(msg)
1122                raise IOError(errno.ENOENT, msg)
1123            else:
1124                msg = "Datatype with uuid: " + obj_uuid + " was not found"
1125                self.log.info(msg)
1126                raise IOError(errno.ENXIO, msg)
1127
1128        item = { 'id': obj_uuid }
1129        alias = []
1130        if datatype.name and not datatype.name.startswith("/__db__"):
1131            alias.append(datatype.name)   # just use the default h5py path for now
1132        item['alias'] = alias
1133        item['attributeCount'] = len(datatype.attrs)
1134        item['type'] = getTypeItem(datatype.dtype)
1135        if self.update_timestamps:
1136            item['ctime'] = self.getCreateTime(obj_uuid)
1137            item['mtime'] = self.getModifiedTime(obj_uuid)
1138
1139        return item
1140
1141    """
1142      Get attribute given an object and name
1143      returns: JSON object
1144    """
1145    def getAttributeItemByObj(self, obj, name, includeData=True):
1146        if name not in obj.attrs:
1147            msg = "Attribute: [" + name + "] not found in object: " + obj.name
1148            self.log.info(msg)
1149            return None
1150
1151        # get the attribute!
1152        attrObj = h5py.h5a.open(obj.id, np.string_(name))
1153        attr = None
1154
1155        item = { 'name': name }
1156
1157        # check if the dataset is using a committed type
1158        typeid = attrObj.get_type()
1159        typeItem = None
1160        if h5py.h5t.TypeID.committed(typeid):
1161            type_uuid = None
1162            addr = h5py.h5o.get_info(typeid).addr
1163            type_uuid = self.getUUIDByAddress(addr)
1164            committedType = self.getCommittedTypeItemByUuid(type_uuid)
1165            typeItem = committedType['type']
1166            typeItem['uuid'] = type_uuid
1167        else:
1168            typeItem = getTypeItem(attrObj.dtype)
1169        item['type'] = typeItem
1170        # todo - don't include data for OPAQUE until JSON serialization
1171        # issues are addressed
1172
1173        if type(typeItem) == dict and typeItem['class'] in ('H5T_OPAQUE'):
1174            includeData = False
1175
1176        shape_json = self.getShapeItemByAttrObj(attrObj)
1177        item['shape'] = shape_json
1178        if shape_json['class'] == 'H5S_NULL':
1179            includeData = False
1180        if includeData:
1181            try:
1182                attr = obj.attrs[name]  # returns a numpy array
1183            except TypeError:
1184                self.log.warning("type error reading attribute")
1185
1186        if includeData and attr is not None:
1187            if shape_json['class'] == 'H5S_SCALAR':
1188                data = self.getDataValue(typeItem, attr)
1189            else:
1190                dims = shape_json["dims"]
1191                rank = len(dims)
1192                # convert numpy object to python list
1193                # values = self.toList(typeItem, attr)
1194                data = self.toList(rank, typeItem, attr)
1195            #data = self.bytesToString(data)
1196            item['value'] = data
1197        # timestamps will be added by getAttributeItem()
1198        return item
1199
1200    def getAttributeItems(self, col_type, obj_uuid, marker=None, limit=0):
1201        self.log.info("db.getAttributeItems(" + obj_uuid + ")")
1202        if marker:
1203            self.log.info("...marker: " + marker)
1204        if limit:
1205            self.log.info("...limit: " + str(limit))
1206
1207        self.initFile()
1208        obj = self.getObjectByUuid(col_type, obj_uuid)
1209        if obj is None:
1210            msg = "Object: " + obj_uuid + " could not be loaded"
1211            self.log.info(msg)
1212            raise IOError(errno.ENXIO, msg)
1213
1214        items = []
1215        gotMarker = True
1216        if marker is not None:
1217            gotMarker = False
1218        count = 0
1219        for name in obj.attrs:
1220            if not gotMarker:
1221                if name == marker:
1222                    gotMarker = True
1223                    continue  # start filling in result on next pass
1224                else:
1225                    continue  # keep going!
1226            item = self.getAttributeItemByObj(obj, name, False)
1227            # mix-in timestamps
1228            if self.update_timestamps:
1229                item['ctime'] = self.getCreateTime(obj_uuid, objType="attribute", name=name)
1230                item['mtime'] = self.getModifiedTime(obj_uuid, objType="attribute", name=name)
1231
1232            items.append(item)
1233            count += 1
1234            if limit > 0 and count == limit:
1235                break  # return what we got
1236        return items
1237
1238    def getAttributeItem(self, col_type, obj_uuid, name):
1239        self.log.info("getAttributeItemByUuid(" + col_type + ", " + obj_uuid
1240                      + ", " + name + ")")
1241        self.initFile()
1242        obj = self.getObjectByUuid(col_type, obj_uuid)
1243        if obj is None:
1244            msg = "Parent object: " + obj_uuid + " of attribute not found"
1245            self.log.info(msg)
1246            raise IOError(errno.ENXIO, msg)
1247            return None
1248        item = self.getAttributeItemByObj(obj, name)
1249        if item is None:
1250            if self.getModifiedTime(obj_uuid, objType="attribute", name=name, useRoot=False):
1251                # attribute has been removed
1252                msg = "Attribute: [" + name + "] of object: " + obj_uuid + " has been previously deleted"
1253                self.log.info(msg)
1254                raise IOError(errno.ENOENT, msg)
1255            msg = "Attribute: [" + name + "] of object: " + obj_uuid + " not found"
1256            self.log.info(msg)
1257            raise IOError(errno.ENXIO, msg)
1258        # mix-in timestamps
1259        if self.update_timestamps:
1260            item['ctime'] = self.getCreateTime(obj_uuid, objType="attribute", name=name)
1261            item['mtime'] = self.getModifiedTime(obj_uuid, objType="attribute", name=name)
1262
1263        return item
1264
1265    """
1266    isDimensionList - return True if this attribute json looks like a dimension list
1267    """
1268    def isDimensionList(self, attr_name, attr_type):
1269        if attr_name != "DIMENSION_LIST":
1270            return False
1271        if type(attr_type) is not dict:
1272            return False
1273        if attr_type['class'] != "H5T_VLEN":
1274            return False
1275        base_type = attr_type['base']
1276        if base_type['class'] != 'H5T_REFERENCE':
1277            return False
1278        return True
1279
1280    """
1281    isReferenceList - return True if this attribute json looks like a reference list
1282    """
1283    def isReferenceList(self, attr_name, attr_type):
1284        if attr_name != "REFERENCE_LIST":
1285            return False
1286        if type(attr_type) is not dict:
1287            return False
1288        if attr_type['class'] != "H5T_COMPOUND":
1289            return False
1290
1291        return True
1292
1293    """
1294     makeDimensionList - work-around for h5py problems saving dimension list -
1295        types which are vlen's of references are not working directly, so use dim_scale api
1296        Note: this is a work-around for h5py issue:
1297         https://github.com/h5py/h5py/issues/553
1298    """
1299    def makeDimensionList(self, obj, shape, value):
1300        dset_refs = self.listToRef(value)
1301        for i in range(len(dset_refs)):
1302            refs = dset_refs[i]
1303            if type(refs) not in (list, tuple):
1304                msg = "Invalid dimension list value"
1305                self.log.info(msg)
1306                raise IOError(errno.EINVAL, msg)
1307            for j in range(len(refs)):
1308                scale_obj = self.f[refs[j]]
1309                if scale_obj is None:
1310                    self.log.warning("dimension list, missing obj reference: " + value[i])
1311                    continue
1312                if "CLASS" not in scale_obj.attrs:
1313                    self.log.warning("dimension list, no scale obj")
1314                    continue
1315                if scale_obj.attrs["CLASS"] != b"DIMENSION_SCALE":
1316                    self.log.warning("dimension list, invalid class for scale obj")
1317                    continue
1318
1319                try:
1320                    h5py.h5ds.attach_scale(obj.id, scale_obj.id, i)
1321                except RuntimeError:
1322                    self.log.error("got runtime error attaching scale")
1323
1324    """
1325    writeNdArrayToAttribute - create an attribute given numpy array
1326    """
1327    def writeNdArrayToAttribute(self, attrs, attr_name, npdata, shape, dt):
1328        attrs.create(attr_name, npdata, shape=shape, dtype=dt)
1329
1330    """
1331    create a scalar string attribute using nullterm padding
1332    """
1333    def makeNullTermStringAttribute(self, obj, attr_name, strLength, value):
1334        self.log.info(
1335            "make nullterm, length: " + str(strLength) + " value:" + str(value))
1336        if type(value) == unicode:
1337            value = str(value)
1338        if strLength < len(value):
1339            self.log.warning("makeNullTermStringAttribute: value string longer than length")
1340            #value = value[:strLength]  # truncate to length
1341
1342
1343        if six.PY3 and type(attr_name) is str:
1344            try:
1345                attr_name = attr_name.encode('ascii')
1346            except UnicodeDecodeError:
1347                raise TypeError("non-ascii attribute name not allowed")
1348
1349        # create the attribute
1350        tid = h5py.h5t.TypeID.copy(h5py.h5t.C_S1)
1351        tid.set_size(strLength)
1352        tid.set_strpad(h5py.h5t.STR_NULLTERM)
1353        sid = h5py.h5s.create(h5py.h5s.SCALAR)
1354        aid = h5py.h5a.create(obj.id, attr_name, tid, sid)
1355        # write the value
1356        dtype_code = 'S' + str(strLength)
1357        ndarr = np.array(value, dtype=np.dtype(dtype_code))
1358        aid.write(ndarr)
1359
1360    def makeAttribute(self, obj, attr_name, shape, attr_type, value):
1361        """
1362        makeAttribute - create an attribute (except for dimension list
1363        attribute)
1364        """
1365        is_committed_type = False
1366        if type(attr_type) in (str, unicode) and len(attr_type) == UUID_LEN:
1367            # assume attr_type is a uuid of a named datatype
1368            is_committed_type = True
1369
1370        dt = self.createTypeFromItem(attr_type)
1371
1372        if shape is None:
1373            self.log.info("shape is null - will create null space attribute")
1374            # create null space attribute
1375            # null space datasets/attributes not supported in h5py yet:
1376            # See: https://github.com/h5py/h5py/issues/279
1377            # work around this by using low-level interface.
1378            # first create a temp scalar dataset so we can pull out the typeid
1379            tmpGrp = None
1380            if "{tmp}" not in self.dbGrp:
1381                tmpGrp = self.dbGrp.create_group("{tmp}")
1382            else:
1383                tmpGrp = self.dbGrp["{tmp}"]
1384            tmpGrp.attrs.create(attr_name, 0, shape=(), dtype=dt)
1385            if six.PY3:
1386                b_attr_name = attr_name.encode('utf-8')
1387                tmpAttr = h5py.h5a.open(tmpGrp.id, name=b_attr_name)
1388            else:
1389                tmpAttr = h5py.h5a.open(tmpGrp.id, name=attr_name)
1390            if not tmpAttr:
1391                msg = "Unexpected error creating datatype for nullspace attribute"
1392                self.log.error(msg)
1393                raise IOError(errno.EIO, msg)
1394            tid = tmpAttr.get_type()
1395            sid = sid = h5py.h5s.create(h5py.h5s.NULL)
1396            # now create the permanent attribute
1397            if attr_name in obj.attrs:
1398                self.log.info("deleting attribute: " + attr_name)
1399                del obj.attrs[attr_name]
1400            if six.PY3:
1401                attr_id = h5py.h5a.create(obj.id, b_attr_name, tid, sid)
1402            else:
1403                attr_id = h5py.h5a.create(obj.id, attr_name, tid, sid)
1404            # delete the temp attribute
1405            del tmpGrp.attrs[attr_name]
1406            if not attr_id:
1407                msg = "Unexpected error creating nullspace attribute"
1408                self.log.error(msg)
1409                raise IOError(errno.EIO, msg)
1410        else:
1411            if type(value) is tuple:
1412                value = list(value)
1413            if type(shape) is list:
1414                shape = tuple(shape)
1415            if not is_committed_type:
1416                # apparently committed types can not be used as reference types
1417                # todo - verify why that is
1418
1419                rank = len(shape)
1420                # convert python list to numpy object
1421                strPad = None
1422                strLength = 0
1423                if type(attr_type) == dict and attr_type['class'] == 'H5T_STRING' and "strPad" in attr_type:
1424                    strPad = attr_type["strPad"]
1425                    strLength = attr_type['length']
1426
1427                if rank == 0 and type(strLength) == int and strPad == "H5T_STR_NULLTERM":
1428                    self.makeNullTermStringAttribute(obj, attr_name, strLength, value)
1429                else:
1430                    typeItem = getTypeItem(dt)
1431                    value = self.toRef(rank, typeItem, value)
1432
1433                    # create numpy array
1434                    npdata = np.zeros(shape, dtype=dt)
1435
1436                    if rank == 0:
1437                        npdata[()] = self.toNumPyValue(attr_type, value, npdata[()])
1438                    else:
1439                        self.toNumPyArray(rank, attr_type, value, npdata)
1440
1441                    self.writeNdArrayToAttribute(obj.attrs, attr_name, npdata, shape, dt)
1442
1443    """
1444    createAttribute - create an attribute
1445    """
1446    def createAttribute(self, col_name, obj_uuid, attr_name, shape, attr_type, value):
1447        self.log.info("createAttribute: [" + attr_name + "]")
1448
1449        self.initFile()
1450        if self.readonly:
1451            msg = "Unable to create attribute (updates are not allowed)"
1452            self.log.info(msg)
1453            raise IOError(errno.EPERM, msg)
1454        obj = self.getObjectByUuid(col_name, obj_uuid)
1455        if not obj:
1456            msg = "Object with uuid: " + obj_uuid + " not found"
1457            self.log.info(msg)
1458            raise IOError(errno.ENXIO, msg)
1459
1460        if self.isDimensionList(attr_name, attr_type):
1461            self.makeDimensionList(obj, shape, value)
1462        elif self.isReferenceList(attr_name, attr_type):
1463            pass  # Skip since reference list will be created by attach scale
1464        else:
1465            self.makeAttribute(obj, attr_name, shape, attr_type, value)
1466
1467        now = time.time()
1468        self.setCreateTime(obj_uuid, objType="attribute", name=attr_name, timestamp=now)
1469        self.setModifiedTime(obj_uuid, objType="attribute", name=attr_name, timestamp=now)
1470        self.setModifiedTime(obj_uuid, timestamp=now)  # owner entity is modified
1471
1472    def deleteAttribute(self, col_name, obj_uuid, attr_name):
1473        self.initFile()
1474        if self.readonly:
1475            msg = "Unable to delete attribute (updates are not allowed)"
1476            self.log.info(msg)
1477            raise IOError(errno.EPERM, msg)
1478        obj = self.getObjectByUuid(col_name, obj_uuid)
1479
1480        if attr_name not in obj.attrs:
1481            msg = "Attribute with name: [" + attr_name + "] of object: " + obj_uuid + " not found"
1482            self.log.info(msg)
1483            raise IOError(errno.ENXIO, msg)
1484
1485        del obj.attrs[attr_name]
1486        now = time.time()
1487        self.setModifiedTime(obj_uuid, objType="attribute", name=attr_name, timestamp=now)
1488
1489        return True
1490
1491    """
1492      Return a json-serializable representation of the numpy value
1493    """
1494    def getDataValue(self, typeItem, value, dimension=0, dims=None):
1495        if dimension > 0:
1496            if type(dims) not in (list, tuple):
1497                msg = "unexpected type for type array dimensions"
1498                self.log.error(msg)
1499                raise IOError(errno.EIO, msg)
1500            out = []
1501            rank = len(dims)
1502            if dimension > rank:
1503                msg = "unexpected dimension for type array"
1504                self.log.error(msg)
1505                raise IOError(errno.EIO, msg)
1506            nElements = dims[rank - dimension]
1507            for i in range(nElements):
1508                item_value = self.getDataValue(typeItem, value[i],
1509                                               dimension=(dimension-1),
1510                                               dims=dims)
1511                out.append(item_value)
1512            return out  # done for array case
1513
1514        out = None
1515        typeClass = typeItem['class']
1516        if isinstance(value, (np.ndarray, np.generic)):
1517            value = value.tolist()  # convert numpy object to list
1518        if typeClass == 'H5T_COMPOUND':
1519
1520            if type(value) not in (list, tuple):
1521                msg = "Unexpected type for compound value"
1522                self.log.error(msg)
1523                raise IOError(errno.EIO, msg)
1524
1525            fields = typeItem['fields']
1526            if len(fields) != len(value):
1527                msg = "Number of elements in compound type does not match type"
1528                self.log.error(msg)
1529                raise IOError(errno.EIO, msg)
1530            nFields = len(fields)
1531            out = []
1532            for i in range(nFields):
1533                field = fields[i]
1534                item_value = self.getDataValue(field['type'], value[i])
1535                out.append(item_value)
1536        elif typeClass == 'H5T_VLEN':
1537            if type(value) not in (list, tuple):
1538                msg = "Unexpected type for vlen value"
1539                self.log.error(msg)
1540                raise IOError(errno.EIO, msg)
1541
1542            baseType = typeItem['base']
1543            out = []
1544            nElements = len(value)
1545            for i in range(nElements):
1546                item_value = self.getDataValue(baseType, value[i])
1547                out.append(item_value)
1548        elif typeClass == 'H5T_REFERENCE':
1549            out = self.refToList(value)
1550        elif typeClass == 'H5T_OPAQUE':
1551            out = "???"  # todo
1552        elif typeClass == 'H5T_ARRAY':
1553            type_dims = typeItem["dims"]
1554            if type(type_dims) not in (list, tuple):
1555                msg = "unexpected type for type array dimensions"
1556                self.log.error(msg)
1557                raise IOError(errno.EIO, msg)
1558            rank = len(type_dims)
1559            baseType = typeItem['base']
1560            out = self.getDataValue(baseType, value, dimension=rank,
1561                                    dims=type_dims)
1562
1563        elif typeClass in ('H5T_INTEGER', 'H5T_FLOAT', 'H5T_ENUM'):
1564            out = value  # just copy value
1565        elif typeClass == 'H5T_STRING':
1566            if six.PY3:
1567                if "charSet" in typeItem:
1568                    charSet = typeItem["charSet"]
1569                else:
1570                    charSet =  "H5T_CSET_ASCII"
1571                if charSet == "H5T_CSET_ASCII":
1572                    out = value.decode("utf-8")
1573                else:
1574                    out = value
1575            else:
1576                # things are simpler in PY2
1577                out = value
1578        else:
1579            msg = "Unexpected type class: " + typeClass
1580            self.log.info(msg)
1581            raise IOError(errno.ENINVAL, msg)
1582        return out
1583
1584    """
1585      Return a numpy value based on json representation
1586    """
1587    def getRefValue(self, typeItem, value):
1588        out = None
1589        typeClass = typeItem['class']
1590        if typeClass == 'H5T_COMPOUND':
1591
1592            if type(value) not in (list, tuple):
1593                msg = "Unexpected type for compound value"
1594                self.log.error(msg)
1595                raise IOError(errno.EIO, msg)
1596
1597            fields = typeItem['fields']
1598            if len(fields) != len(value):
1599                msg = "Number of elements in compound type does not match type"
1600                self.log.error(msg)
1601                raise IOError(errno.EIO, msg)
1602            nFields = len(fields)
1603            out = []
1604            for i in range(nFields):
1605                field = fields[i]
1606                item_value = self.getRefValue(field['type'], value[i])
1607                out.append(item_value)
1608        elif typeClass == 'H5T_VLEN':
1609            if type(value) not in (list, tuple):
1610                msg = "Unexpected type for vlen value"
1611                self.log.error(msg)
1612                raise IOError(errno.EIO, msg)
1613
1614            baseType = typeItem['base']
1615            out = []
1616            nElements = len(value)
1617            for i in range(nElements):
1618                item_value = self.getRefValue(baseType, value[i])
1619                out.append(item_value)
1620        elif typeClass == 'H5T_REFERENCE':
1621            out = self.listToRef(value)
1622        elif typeClass == 'H5T_OPAQUE':
1623            out = "???"  # todo
1624        elif typeClass == 'H5T_ARRAY':
1625            out = value
1626        elif typeClass in ('H5T_INTEGER', 'H5T_FLOAT', 'H5T_ENUM'):
1627            out = value  # just copy value
1628        elif typeClass == 'H5T_STRING':
1629            if typeItem['charSet'] == 'H5T_CSET_UTF8':
1630                # out = value.encode('utf-8')
1631                out = value
1632            else:
1633                out = value.encode()
1634        else:
1635            msg = "Unexpected type class: " + typeClass
1636            self.log.info(msg)
1637            raise IOError(errno.ENINVAL, msg)
1638
1639        if type(out) == list:
1640            out = tuple(out)  # convert to tuple
1641        return out
1642
1643    """
1644      Return a numpy value based on json representation
1645    """
1646    def toNumPyValue(self, typeItem, src, des):
1647
1648        typeClass = 'H5T_INTEGER'  # default to int type
1649        if type(typeItem) is dict:
1650            typeClass = typeItem['class']
1651        if typeClass == 'H5T_COMPOUND':
1652            fields = typeItem['fields']
1653            if len(fields) != len(src):
1654                msg = "Number of elements in compound type does not match type"
1655                self.log.error(msg)
1656                raise IOError(errno.EIO, msg)
1657            nFields = len(fields)
1658
1659            for i in range(nFields):
1660                field = fields[i]
1661                field_name = field['name']
1662                des[field_name] = src[i]
1663
1664        elif typeClass == 'H5T_VLEN':
1665            if type(src) not in (list, tuple):
1666                msg = "Unexpected type for vlen value"
1667                self.log.error(msg)
1668                raise IOError(errno.EIO, msg)
1669
1670            baseType = typeItem['base']
1671
1672            dt = self.createTypeFromItem(baseType)
1673            des = np.array(src, dtype=dt)
1674
1675        elif typeClass == 'H5T_REFERENCE':
1676            des = src  # self.listToRef(src)
1677
1678        elif typeClass == 'H5T_OPAQUE':
1679            des = "???"  # todo
1680        elif typeClass == 'H5T_ARRAY':
1681            des = src
1682        elif typeClass in ('H5T_INTEGER', 'H5T_FLOAT', 'H5T_ENUM'):
1683            des = src  # just copy value
1684        elif typeClass == 'H5T_STRING':
1685            if typeItem['charSet'] == 'H5T_CSET_UTF8':
1686                des = src  # src.encode('utf-8')
1687            else:
1688                if type(src) is str:
1689                    try:
1690                        src.encode('ascii')
1691                    except UnicodeDecodeError:
1692                        raise TypeError("non-ascii value not allowed with H5T_CSET_ASCII")
1693                des = src
1694
1695        else:
1696            msg = "Unexpected type class: " + typeClass
1697            self.log.info(msg)
1698            raise IOError(errno.ENINVAL, msg)
1699        return des
1700
1701    """
1702       copy src data to numpy array
1703    """
1704    def toNumPyArray(self, rank, typeItem, src, des):
1705
1706        if rank == 0:
1707            msg = "unexpected rank value"
1708            self.log.error(msg)
1709            raise IOError(errno.EIO, msg)  # shouldn't be called with rank 0
1710
1711        for i in range(len(des)):
1712            des_sec = des[i]  # numpy slab
1713
1714            src_sec = src[i]
1715
1716            if rank > 1:
1717                self.toNumPyArray(rank - 1, typeItem, src_sec, des_sec)
1718            else:
1719                rv = self.toNumPyValue(typeItem, src_sec, des_sec)
1720                # if the numpy object is writeable, des_sec will be
1721                # already updated.  Otherwise, update the des by assignment
1722                if not hasattr(des_sec, 'flags') or not des_sec.flags['WRITEABLE']:
1723                    des[i] = rv
1724
1725    """
1726       Convert json list to h5py compatible values
1727    """
1728    def toRef(self, rank, typeItem, data):
1729        out = None
1730
1731        if type(typeItem) in (str, unicode):
1732            # commited type - get json representation
1733            committed_type_item = self.getCommittedTypeItemByUuid(typeItem)
1734            typeItem = committed_type_item['type']
1735
1736        typeClass = typeItem['class']
1737        if typeClass in ('H5T_INTEGER', 'H5T_FLOAT'):
1738            out = data   # just use as is
1739
1740        elif rank == 0:
1741            # scalar value
1742            out = self.getRefValue(typeItem, data)
1743        else:
1744            out = []
1745            for item in data:
1746                if rank > 1:
1747                    out_item = self.toRef(rank - 1, typeItem, item)
1748                    out.append(out_item)
1749                else:
1750                    out_item = self.getRefValue(typeItem, item)
1751                    out.append(out_item)
1752
1753        return out
1754
1755    """
1756       Convert list to json serializable values.
1757    """
1758    def toList(self, rank, typeItem, data):
1759        out = None
1760        typeClass = typeItem['class']
1761        if typeClass in ('H5T_INTEGER', 'H5T_FLOAT'):
1762            out = data.tolist()  # just use as is
1763
1764        elif rank == 0:
1765            # scalar value
1766            out = self.getDataValue(typeItem, data)
1767        else:
1768            out = []
1769            for item in data:
1770                if rank > 1:
1771                    out_item = self.toList(rank - 1, typeItem, item)
1772                    out.append(out_item)
1773                else:
1774                    out_item = self.getDataValue(typeItem, item)
1775                    out.append(out_item)
1776
1777        return out
1778
1779    """
1780       Create ascii representation of vlen data object
1781    """
1782    def vlenToList(self, data):
1783        # todo - verify that data is a numpy.ndarray
1784        out = None
1785        if len(data.shape) == 0:
1786            out = []
1787        else:
1788            try:
1789                if data.dtype.kind != 'O':
1790                    out = data.tolist()
1791                else:
1792                    out = []
1793                    for item in data:
1794                        out.append(self.vlenToList(item))  # recursive call
1795            except AttributeError:
1796                # looks like this is not a numpy ndarray, just return the value
1797                out = data
1798        return out
1799
1800    """
1801       Create ascii representation of ref data object
1802    """
1803    def refToList(self, data):
1804        # todo - verify that data is a numpy.ndarray
1805        out = None
1806        if type(data) is h5py.h5r.Reference:
1807            if bool(data):
1808                grpref = self.f[data]
1809                addr = h5py.h5o.get_info(grpref.id).addr
1810                uuid = self.getUUIDByAddress(addr)
1811                if self.getGroupObjByUuid(uuid):
1812                    out = "groups/" + uuid
1813                elif self.getDatasetObjByUuid(uuid):
1814                    out = "datasets/" + uuid
1815                elif self.getCommittedTypeObjByUuid(uuid):
1816                    out = "datatypes/" + uuid
1817                else:
1818                    self.log.warning(
1819                        "uuid in region ref not found: [" + uuid + "]")
1820                    return None
1821            else:
1822                out = "null"
1823        elif type(data) is h5py.h5r.RegionReference:
1824            out = self.getRegionReference(data)
1825        else:
1826            out = []
1827            for item in data:
1828                out.append(self.refToList(item))  # recursive call
1829        return out
1830
1831    """
1832       Convert ascii representation of data references to data ref
1833    """
1834    def listToRef(self, data):
1835        out = None
1836        if not data:
1837            # null reference
1838            out = self.getNullReference()
1839        elif type(data) in (bytes, str, unicode):
1840            obj_ref = None
1841            # object reference should be in the form: <collection_name>/<uuid>
1842            for prefix in ("datasets", "groups", "datatypes"):
1843                if data.startswith(prefix):
1844                    uuid_ref = data[len(prefix):]
1845                    if len(uuid_ref) == (UUID_LEN + 1) and uuid_ref.startswith('/'):
1846                        obj = self.getObjectByUuid(prefix, uuid_ref[1:])
1847                        if obj:
1848                            obj_ref = obj.ref
1849                        else:
1850                            msg = "Invalid object reference value: [" + uuid_ref + "] not found"
1851                            self.log.info(msg)
1852                            raise IOError(errno.ENXIO, msg)
1853                    break
1854            if not obj_ref:
1855                msg = "Invalid object reference value: [" + data + "]"
1856                self.log.info(msg)
1857                raise IOError(errno.EINVAL, msg)
1858            else:
1859                out = obj_ref
1860
1861        elif type(data) in (list, tuple):
1862            out = []
1863            for item in data:
1864                out.append(self.listToRef(item))  # recursive call
1865        elif type(data) == dict:
1866            # assume region ref
1867            out = self.createRegionReference(data)
1868        else:
1869            msg = "Invalid object reference value type: [" + str(type(data)) + "]"
1870            self.log.info(msg)
1871            raise IOError(errno.EINVAL, msg)
1872        return out
1873
1874    """
1875       Convert list that may contain bytes type elements to list of string elements
1876    """
1877    def bytesArrayToList(self, data):
1878        if type(data) in (bytes, str, unicode):
1879            is_list = False
1880        elif isinstance(data, (np.ndarray, np.generic)):
1881            if len(data.shape) == 0:
1882                is_list = False
1883                data = data.tolist()  # tolist will return a scalar in this case
1884                if type(data) in (list, tuple):
1885                    is_list = True
1886                else:
1887                    is_list = False
1888            else:
1889                is_list = True
1890        elif type(data) in (list, tuple):
1891            is_list = True
1892        else:
1893            is_list = False
1894
1895        if is_list:
1896            out = []
1897            for item in data:
1898                out.append(self.bytesArrayToList(item)) # recursive call
1899        elif type(data) is bytes:
1900            if six.PY3:
1901                out = data.decode("utf-8")
1902            else:
1903                out = data
1904        else:
1905            out = data
1906
1907        return out
1908
1909    """
1910      Get item description of region reference value
1911    """
1912    def getRegionReference(self, regionRef):
1913        selectionEnums = {h5py.h5s.SEL_NONE:       'H5S_SEL_NONE',
1914                          h5py.h5s.SEL_ALL:        'H5S_SEL_ALL',
1915                          h5py.h5s.SEL_POINTS:     'H5S_SEL_POINTS',
1916                          h5py.h5s.SEL_HYPERSLABS: 'H5S_SEL_HYPERSLABS'}
1917
1918        item = {}
1919        objid = h5py.h5r.dereference(regionRef, self.f.file.file.id)
1920        if objid:
1921            item['id'] = self.getUUIDByAddress(h5py.h5o.get_info(objid).addr)
1922        else:
1923                self.log.info("region reference unable to find item with objid: " + objid)
1924                return item
1925
1926        sel = h5py.h5r.get_region(regionRef, objid)
1927        select_type = sel.get_select_type()
1928        if select_type not in selectionEnums:
1929            msg = "Unexpected selection type: " + regionRef.typecode
1930            self.log.error(msg)
1931            raise IOError(errno.EIO, msg)
1932        item['select_type'] = selectionEnums[select_type]
1933        pointlist = None
1934        if select_type == h5py.h5s.SEL_POINTS:
1935            # retrieve a numpy array of selection points
1936            points = sel.get_select_elem_pointlist()
1937            pointlist = points.tolist()
1938        elif select_type == h5py.h5s.SEL_HYPERSLABS:
1939            points = sel.get_select_hyper_blocklist()
1940            if points is not None:
1941                pointlist = points[...].tolist()
1942                # bump up the second coordinate by one to match api spec
1943                for point in pointlist:
1944                    coord2 = point[1]
1945                    for i in range(len(coord2)):
1946                        coord2[i] = coord2[i] + 1
1947
1948        item['selection'] = pointlist
1949
1950        return item
1951
1952    """
1953      Create region reference from item description of region reference value
1954    """
1955    def createRegionReference(self, item):
1956        selectionEnums = {'H5S_SEL_NONE': h5py.h5s.SEL_NONE,
1957                          'H5S_SEL_ALL': h5py.h5s.SEL_ALL,
1958                          'H5S_SEL_POINTS': h5py.h5s.SEL_POINTS,
1959                          'H5S_SEL_HYPERSLABS': h5py.h5s.SEL_HYPERSLABS}
1960        region_ref = None
1961
1962        if 'select_type' not in item:
1963            msg = "select_type not provided for region selection"
1964            self.log.info(msg)
1965            raise IOError(errno.EINVAL, msg)
1966        select_type = item['select_type']
1967        if select_type not in selectionEnums.keys():
1968            msg = "selection type: [" + select_type + "] is not valid"
1969            self.log.info(msg)
1970            raise IOError(errno.EINVAL, msg)
1971        dset = None
1972        if select_type == 'H5S_SEL_NONE':
1973                if 'id' not in item:
1974                        #        select none on null dataset, return null ref
1975                        out = self.getNullReference()
1976                        return out
1977        else:  # select_type != 'H5S_SEL_NONE'
1978            if 'id' not in item:
1979                msg = "id not provided for region selection"
1980                self.log.info(msg)
1981                raise IOError(errno.EINVAL, msg)
1982
1983        # Otherwise need to provide uuid of dataset
1984        uuid_ref = item['id']
1985        if len(uuid_ref) != UUID_LEN:
1986            msg = "uuid value: [" + uuid_ref + "] for region reference is not valid"
1987            self.log.info(msg)
1988            raise IOError(errno.EINVAL, msg)
1989
1990        obj = self.getObjectByUuid("datasets", uuid_ref)
1991        if obj:
1992            dset = obj
1993        else:
1994            msg = "Invalid region refence value: [" + uuid_ref + "] not found"
1995            self.log.info(msg)
1996            raise IOError(errno.EINVAL, msg)
1997
1998        if select_type in ('H5S_SEL_POINTS', 'H5S_SEL_HYPERSLABS'):
1999            if 'selection' not in item:
2000                msg = "selection key not provided for region selection"
2001                self.log.info(msg)
2002                raise IOError(errno.EINVAL, msg)
2003
2004        rank = len(dset.shape)
2005        space_id = h5py.h5d.DatasetID.get_space(dset.id)
2006        h5py.h5s.SpaceID.select_none(space_id)
2007
2008        if select_type == 'H4S_SEL_NONE':
2009                pass  # did select_none above
2010        elif select_type == 'H5S_SEL_ALL':
2011            h5py.h5s.SpaceID.select_all(space_id)
2012        elif select_type == 'H5S_SEL_POINTS':
2013            selection = item['selection']
2014            for point in selection:
2015                if len(point) != rank:
2016                        msg = "point selection number of elements must mach rank of referenced dataset"
2017                        self.log.info(msg)
2018                        raise IOError(errno.EINVAL, msg)
2019            h5py.h5s.SpaceID.select_elements(space_id, selection)
2020        elif select_type == 'H5S_SEL_HYPERSLABS':
2021            selection = item['selection']
2022
2023            for slab in selection:
2024                    # each item should be a two element array defining the hyperslab boundary
2025                    if len(slab) != 2:
2026                        msg = "selection value not valid (not a 2 element array)"
2027                        self.log.info(msg)
2028                        raise IOError(errno.EINVAL, msg)
2029                    start = slab[0]
2030                    if type(start) == list:
2031                        start = tuple(start)
2032                    if type(start) is not tuple or len(start) != rank:
2033                        msg = "selection value not valid, start element should have number "
2034                        msg += "elements equal to rank of referenced dataset"
2035                        self.log.info(msg)
2036                        raise IOError(errno.EINVAL, msg)
2037                    stop = slab[1]
2038                    if type(stop) == list:
2039                        stop = tuple(stop)
2040                    if type(stop) is not tuple or len(stop) != rank:
2041                        msg = "selection value not valid, count element should have number "
2042                        msg += "elements equal to rank of referenced dataset"
2043                        self.log.info(msg)
2044                        raise IOError(errno.EINVAL, msg)
2045                    count = []
2046                    for i in range(rank):
2047                        if start[i] < 0:
2048                                msg = "start value for hyperslab selection must be non-negative"
2049                                self.log.info(msg)
2050                                raise IOError(errno.EINVAL, msg)
2051                        if stop[i] <= start[i]:
2052                                msg = "stop value must be greater than start value for hyperslab selection"
2053                                self.log.info(msg)
2054                                raise IOError(errno.EINVAL, msg)
2055                        count.append(stop[i] - start[i])
2056                    count = tuple(count)
2057
2058                    h5py.h5s.SpaceID.select_hyperslab(space_id, start, count, op=h5py.h5s.SELECT_OR)
2059
2060        # now that we've selected the desired region in the space, return a region reference
2061
2062        if six.PY3:
2063            dset_name = dset.name.encode('utf-8')
2064        else:
2065            dset_name = dset.name
2066        region_ref = h5py.h5r.create(self.f.id, dset_name, h5py.h5r.DATASET_REGION, space_id)
2067
2068        return region_ref
2069
2070    """
2071      Convert a list to a tuple, recursively.
2072      Example. [[1,2],[3,4]] -> ((1,2),(3,4))
2073    """
2074    def toTuple(self, rank, data):
2075        if type(data) in (list, tuple):
2076            if rank > 0:
2077                return list(self.toTuple(rank-1, x) for x in data)
2078            else:
2079                return tuple(self.toTuple(rank-1, x) for x in data)
2080        else:
2081            return data
2082
2083    """
2084    Get values from dataset identified by obj_uuid.
2085    If a slices list or tuple is provided, it should have the same
2086    number of elements as the rank of the dataset.
2087    """
2088    def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
2089        dset = self.getDatasetObjByUuid(obj_uuid)
2090        if format not in ("json", "binary"):
2091            msg = "only json and binary formats are supported"
2092            self.log.info(msg)
2093            raise IOError(errno.EINVAL, msg)
2094
2095        if dset is None:
2096            msg = "Dataset: " + obj_uuid + " not found"
2097            self.log.info(msg)
2098            raise IOError(errno.ENXIO, msg)
2099
2100        values = None
2101        dt = dset.dtype
2102        typeItem = getTypeItem(dt)
2103        itemSize = getItemSize(typeItem)
2104        if itemSize == "H5T_VARIABLE" and format == "binary":
2105            msg = "Only JSON is supported for for this data type"
2106            self.log.info(msg)
2107            raise IOError(errno.EINVAL, msg)
2108
2109        if dset.shape is None:
2110            # null space dataset (with h5py 2.6.0)
2111            return None
2112
2113        rank = len(dset.shape)
2114
2115        if rank == 0:
2116            # check for null dataspace
2117            try:
2118                val = dset[...]
2119            except IOError:
2120                # assume null dataspace, return none
2121                return None
2122            if val is None:
2123                self.log.warning("no value returned from scalar dataset")
2124
2125        if type(slices) != list and type(slices) != tuple and slices is not Ellipsis:
2126            msg = "Unexpected error: getDatasetValuesByUuid: bad type for dim parameter"
2127            self.log.error(msg)
2128            raise IOError(errno.EIO, msg)
2129
2130        if (type(slices) == list or type(slices) == tuple) and len(slices) != rank:
2131            msg = "Unexpected error: getDatasetValuesByUuid: number of dims in selection not same as rank"
2132            self.log.error(msg)
2133            raise IOError(errno.EIO, msg)
2134
2135        if dt.kind == 'O':
2136            if format != "json":
2137                msg = "Only JSON is supported for for this data type"
2138                self.log.info(msg)
2139                raise IOError(errno.EINVAL, msg)
2140            # numpy object type - could be a vlen string or generic vlen
2141            h5t_check = h5py.h5t.check_dtype(vlen=dt)
2142            if h5t_check == str or h5t_check == unicode:
2143                values = dset[slices].tolist()  # just dump to list
2144            elif six.PY3 and h5t_check == bytes:
2145                values = self.bytesArrayToList(dset[slices])
2146            elif h5t_check is not None:
2147                # other vlen data
2148                values = self.vlenToList(dset[slices])
2149            else:
2150                # check for reference type
2151                h5t_check = h5py.h5t.check_dtype(ref=dt)
2152                if h5t_check is not None:
2153                    # reference type
2154                    values = self.refToList(dset[slices])
2155                else:
2156                    msg = "Unexpected error, object type unknown"
2157                    self.log.error(msg)
2158                    raise IOError(errno.EIO, msg)
2159        elif dt.kind == 'V' and len(dt) <= 1 and len(dt.shape) == 0:
2160            # opaque type - skip for now
2161            self.log.warning("unable to get opaque type values")
2162            values =  "????"
2163        elif dt.kind == 'S' and format == "json" and six.PY3:
2164            values = self.bytesArrayToList(dset[slices])
2165        elif len(dt) > 1:
2166            # compound type
2167            if format == "json":
2168                values = self.bytesArrayToList(dset[slices])
2169            else:
2170                values = dset[slices].tobytes()
2171        else:
2172            values = dset[slices]
2173
2174            # just use tolist to dump
2175            if format == "json":
2176                values = values.tolist()
2177            else:
2178                #values = base64.b64encode(dset[slices].tobytes())
2179                values = values.tobytes()
2180
2181        return values
2182
2183    """
2184      doDatasetQueryByUuid: return rows based on query string
2185        Return rows from a dataset that matches query string.
2186
2187        Note: Only supported for compound_type/one-dimensional datasets
2188    """
2189    def doDatasetQueryByUuid(self, obj_uuid, query, start=0, stop=-1, step=1, limit=None):
2190        self.log.info("doQueryByUuid - uuid: " + obj_uuid + " query:" + query)
2191        self.log.info("start: " + str(start) + " stop: " + str(stop) + " step: " + str(step) + " limit: " + str(limit))
2192        dset = self.getDatasetObjByUuid(obj_uuid)
2193        if dset is None:
2194            msg = "Dataset: " + obj_uuid + " not found"
2195            self.log.info(msg)
2196            raise IOError(errno.ENXIO, msg)
2197
2198        values = []
2199        dt = dset.dtype
2200        typeItem = getTypeItem(dt)
2201        itemSize = getItemSize(typeItem)
2202        if typeItem['class'] != "H5T_COMPOUND":
2203            msg = "Only compound type datasets can be used as query target"
2204            self.log.info(msg)
2205            raise IOError(errno.EINVAL, msg)
2206
2207        if dset.shape is None:
2208            # null space dataset (with h5py 2.6.0)
2209            return None
2210
2211        rank = len(dset.shape)
2212        if rank != 1:
2213            msg = "One one-dimensional datasets can be used as query target"
2214            self.log.info(msg)
2215            raise IOError(errno.EINVAL, msg)
2216
2217
2218        values = []
2219        indexes = []
2220        count = 0
2221
2222        num_elements = dset.shape[0]
2223        if stop == -1:
2224            stop = num_elements
2225        elif stop > num_elements:
2226            stop = num_elements
2227        block_size = self._getBlockSize(dset)
2228        self.log.info("block_size: " + str(block_size))
2229
2230        field_names = list(dset.dtype.fields.keys())
2231        eval_str = self._getEvalStr(query, field_names)
2232
2233        while start < stop:
2234            if limit and (count == limit):
2235                break  # no more rows for this batch
2236            end = start  + block_size
2237            if end > stop:
2238                end = stop
2239            rows = dset[start:end]  # read from dataset
2240            where_result = np.where(eval(eval_str))
2241            index = where_result[0].tolist()
2242            if len(index) > 0:
2243                for i in index:
2244                    row = rows[i]
2245                    item = self.bytesArrayToList(row)
2246                    values.append(item)
2247                    indexes.append(start + i)
2248                    count += 1
2249                    if limit and (count == limit):
2250                        break  # no more rows for this batch
2251
2252            start = end  # go to next block
2253
2254
2255        # values = self.getDataValue(item_type, values, dimension=1, dims=(len(values),))
2256
2257        self.log.info("got " + str(count) + " query matches")
2258        return (indexes, values)
2259
2260    """
2261     _getBlockSize: Get number of rows to read from disk
2262
2263        heurestic to get reasonable sized chunk of data to fetch.
2264        make multiple of chunk_size if possible
2265    """
2266    def _getBlockSize(self, dset):
2267        target_block_size = 256 * 1000
2268        if dset.chunks:
2269            chunk_size = dset.chunks[0]
2270            if chunk_size < target_block_size:
2271                block_size = (target_block_size // chunk_size) * chunk_size
2272            else:
2273                block_size = target_block_size
2274        else:
2275            block_size = target_block_size
2276        return block_size
2277
2278    """
2279     _getEvalStr: Get eval string for given query
2280
2281        Gets Eval string to use with numpy where method.
2282    """
2283    def _getEvalStr(self, query, field_names):
2284        i = 0
2285        eval_str = ""
2286        var_name = None
2287        end_quote_char = None
2288        var_count = 0
2289        paren_count = 0
2290        black_list = ( "import", ) # field names that are not allowed
2291        self.log.info("getEvalStr(" + query + ")")
2292        for item in black_list:
2293            if item in field_names:
2294                msg = "invalid field name"
2295                self.log.info("EINVAL: " + msg)
2296                raise IOError(errno.EINVAL, msg)
2297        while i < len(query):
2298            ch = query[i]
2299            if (i+1) < len(query):
2300                ch_next = query[i+1]
2301            else:
2302                ch_next = None
2303            if var_name and not ch.isalnum():
2304                # end of variable
2305                if var_name not in field_names:
2306                    # invalid
2307                    msg = "unknown field name"
2308                    self.log.info("EINVAL: " + msg)
2309                    raise IOError(errno.EINVAL, msg)
2310                eval_str += "rows['" + var_name + "']"
2311                var_name = None
2312                var_count += 1
2313
2314            if end_quote_char:
2315                if ch == end_quote_char:
2316                    # end of literal
2317                    end_quote_char = None
2318                eval_str += ch
2319            elif ch in ("'", '"'):
2320                end_quote_char = ch
2321                eval_str += ch
2322            elif ch.isalpha():
2323                if ch == 'b' and ch_next in ("'", '"'):
2324                    eval_str += 'b' # start of a byte string literal
2325                elif var_name is None:
2326                    var_name = ch  # start of a variable
2327                else:
2328                    var_name += ch
2329            elif ch == '(' and end_quote_char is None:
2330                paren_count += 1
2331                eval_str += ch
2332            elif ch == ')' and end_quote_char is None:
2333                paren_count -= 1
2334                if paren_count < 0:
2335                    msg = "Mismatched paren"
2336                    self.log.info("EINVAL: " + msg)
2337                    raise IOError(errno.EINVAL, msg)
2338                eval_str += ch
2339            else:
2340                # just add to eval_str
2341                eval_str += ch
2342            i = i+1
2343        if end_quote_char:
2344            msg = "no matching quote character"
2345            self.log.info("EINVAL: " + msg)
2346            raise IOError(errno.EINVAL, msg)
2347        if var_count == 0:
2348            msg = "No field value"
2349            self.log.info("EINVAL: " + msg)
2350            raise IOError(errno.EINVAL, msg)
2351        if paren_count != 0:
2352            msg = "Mismatched paren"
2353            self.log.info("EINVAL: " + msg)
2354            raise IOError(errno.EINVAL, msg)
2355
2356        return eval_str
2357
2358    """
2359    Get values from dataset identified by obj_uuid using the given
2360    point selection.
2361    """
2362    def getDatasetPointSelectionByUuid(self, obj_uuid, points):
2363        dset = self.getDatasetObjByUuid(obj_uuid)
2364        if dset is None:
2365            msg = "Dataset: " + obj_uuid + " not found"
2366            self.log.info(msg)
2367            raise IOError(errno.ENXIO, msg)
2368
2369        rank = len(dset.shape)
2370        values = np.zeros(len(points), dtype=dset.dtype)
2371        try:
2372            i = 0
2373            for point in points:
2374                if rank == 1:
2375                    values[i] = dset[[point]]
2376                else:
2377                    values[i] = dset[tuple(point)]
2378                i += 1
2379        except ValueError:
2380            # out of range error
2381            msg = "getDatasetPointSelection, out of range error"
2382            self.log.info(msg)
2383            raise IOError(errno.EINVAL, msg)
2384        return values.tolist()
2385
2386    """
2387    setDatasetValuesByUuid - update the given dataset values with supplied data
2388      and optionally a hyperslab selection (slices)
2389    """
2390    def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
2391        dset = self.getDatasetObjByUuid(obj_uuid)
2392
2393        if format not in ("json", "binary"):
2394            msg = "only json and binary formats are supported"
2395            self.log.info(msg)
2396            raise IOError(errno.EINVAL, msg)
2397
2398        if format == "binary" and type(data) is not bytes:
2399            msg ="data must be of type bytes for binary writing"
2400            self.log.info(msg)
2401            raise IOError(errno.EINVAL, msg)
2402
2403        if dset is None:
2404            msg = "Dataset: " + obj_uuid + " not found"
2405            self.log.info(msg)
2406            raise IOError(errno.ENXIO, msg)
2407
2408        dt = dset.dtype
2409        typeItem = getTypeItem(dt)
2410        itemSize = getItemSize(typeItem)
2411        rank = len(dset.shape)
2412        arraySize = 1
2413        for extent in dset.shape:
2414            arraySize *= arraySize
2415
2416        if itemSize == "H5T_VARIABLE" and format == "binary":
2417            msg = "Only JSON is supported for for this data type"
2418            self.log.info(msg)
2419            raise IOError(errno.EINVAL, msg)
2420
2421        if slices is None:
2422            slices = []
2423            # create selection that covers entire dataset
2424            for dim in range(rank):
2425                s = slice(0, dset.shape[dim], 1)
2426                slices.append(s)
2427            slices = tuple(slices)
2428
2429
2430        if type(slices) != tuple:
2431            msg = "setDatasetValuesByUuid: bad type for dim parameter"
2432            self.log.error(msg)
2433            raise IOError(erno.EIO, msg)
2434
2435
2436        if len(slices) != rank:
2437            msg = "number of dims in selection not same as rank"
2438            self.log.info(msg)
2439            raise IOError(errno.EINVAL, msg)
2440
2441        npoints = 1
2442        np_shape = []
2443        for i in range(rank):
2444            s = slices[i]
2445
2446            if s.start < 0 or s.step <= 0 or s.stop < s.start:
2447                msg = "invalid slice specification"
2448                self.log.info(msg)
2449                raise IOError(errno.EINVAL, msg)
2450            if s.stop > dset.shape[i]:
2451                msg = "invalid slice specification"
2452                self.log.info(msg)
2453                raise IOError(errno.EINVAL, msg)
2454            np_shape.append(s.stop - s.start)
2455
2456            count = (s.stop - s.start) // s.step
2457            if count <= 0:
2458                msg = "invalid slice specification"
2459                self.log.info(msg)
2460                raise IOError(errno.EINVAL, msg)
2461
2462            npoints *= count
2463
2464        np_shape = tuple(np_shape)  # for comparison with ndarray shape
2465
2466        self.log.info("selection shape:" + str(np_shape))
2467
2468
2469        # need some special conversion for compound types --
2470        # each element must be a tuple, but the JSON decoder
2471        # gives us a list instead.
2472        if format != "binary" and len(dset.dtype) > 1 and type(data) in (list, tuple):
2473            data = self.toTuple(rank, data)
2474            #for i in range(len(data)):
2475            #    converted_data.append(self.toTuple(data[i]))
2476            #data = converted_data
2477        else:
2478            h5t_check = h5py.check_dtype(ref=dset.dtype)
2479            if h5t_check in (h5py.Reference, h5py.RegionReference):
2480                # convert data to data refs
2481                if format == "binary":
2482                    msg = "Only JSON is supported for for this data type"
2483                    self.log.info(msg)
2484                    raise IOError(errno.EINVAL, msg)
2485                data = self.listToRef(data)
2486
2487        if format == "binary":
2488            if npoints*itemSize != len(data):
2489                msg = "Expected: " + str(npoints*itemSize) + " bytes, but got: " + str(len(data))
2490                self.log.info(msg)
2491                raise IOError(errno.EINVAL, msg)
2492            if dset.dtype.shape == ():
2493                arr = np.fromstring(data, dtype=dset.dtype)
2494                arr = arr.reshape(np_shape)  # conform to selection shape
2495            else:
2496                # tricy array type!
2497                arr = np.empty(np_shape, dtype=dset.dtype)
2498                base_arr = np.fromstring(data, dtype=dset.dtype.base)
2499                base_shape = list(np_shape)
2500                base_shape.extend(dset.dtype.shape)  # add on the type dimensions
2501                base_arr = base_arr.reshape(base_shape)
2502                arr[...] = base_arr
2503        else:
2504            # data is json
2505            if npoints == 1 and len(dset.dtype) > 1:
2506                # convert to tuple for compound singleton writes
2507                data = [tuple(data),]
2508
2509            arr = np.array(data, dtype=dset.dtype)
2510            # raise an exception of the array shape doesn't match the selection shape
2511            # allow if the array is a scalar and the selection shape is one element,
2512            # numpy is ok with this
2513            np_index = 0
2514            for dim in range(len(arr.shape)):
2515                data_extent = arr.shape[dim]
2516                selection_extent = 1
2517                if np_index < len(np_shape):
2518                    selection_extent = np_shape[np_index]
2519                if selection_extent == data_extent:
2520                    np_index += 1
2521                    continue  # good
2522                if data_extent == 1:
2523                    continue  # skip singleton selection
2524                if selection_extent == 1:
2525                    np_index += 1
2526                    continue  # skip singleton selection
2527
2528                # selection/data mismatch!
2529                msg = "data shape doesn't match selection shape"
2530                msg += "--data shape: " + str(arr.shape)
2531                msg += "--selection shape: " + str(np_shape)
2532
2533                self.log.info(msg)
2534                raise IOError(errno.EINVAL, msg)
2535
2536        # write temp numpy array to dataset
2537        if rank == 1:
2538            s = slices[0]
2539            try:
2540                dset[s] = arr
2541            except TypeError as te:
2542                self.log.info("h5py setitem exception: " + str(te))
2543                raise IOError(errno.EINVAL, str(te))
2544        else:
2545            try:
2546                dset[slices] = arr
2547            except TypeError as te:
2548                self.log.info("h5py setitem exception: " + str(te))
2549                raise IOError(errno.EINVAL, str(te))
2550
2551        # update modified time
2552        self.setModifiedTime(obj_uuid)
2553        return True
2554
2555    """
2556    setDatasetValuesByPointSelection - Update the dataset values using the given
2557      data and point selection
2558    """
2559    def setDatasetValuesByPointSelection(self, obj_uuid, data, points, format="json"):
2560        dset = self.getDatasetObjByUuid(obj_uuid)
2561
2562        if format not in ("json", "binary"):
2563            msg = "only json and binary formats are supported"
2564            self.log.info(msg)
2565            raise IOError(errno.EINVAL, msg)
2566
2567        if format == "binary" and type(data) is not bytes:
2568            msg ="data must be of type bytes for binary writing"
2569            self.log.info(msg)
2570            raise IOError(errno.EINVAL, msg)
2571
2572        if dset is None:
2573            msg = "Dataset: " + obj_uuid + " not found"
2574            self.log.info(msg)
2575            raise IOError(errno.ENXIO, msg)
2576
2577        dt = dset.dtype
2578        typeItem = getTypeItem(dt)
2579        itemSize = getItemSize(typeItem)
2580        if itemSize == "H5T_VARIABLE" and format == "binary":
2581            msg = "Only JSON is supported for for this data type"
2582            self.log.info(msg)
2583            raise IOError(errno.EINVAL, msg)
2584
2585        rank = len(dset.shape)
2586
2587        # need some special conversion for compound types --
2588        # each element must be a tuple, but the JSON decoder
2589        # gives us a list instead.
2590        if format == "json" and len(dset.dtype) > 1 and type(data) in (list, tuple):
2591            converted_data = self.toTuple(rank, data)
2592            #for i in range(len(data)):
2593            #    converted_data.append(self.toTuple(data[i]))
2594            #data = converted_data
2595
2596        if format == "json":
2597
2598            try:
2599                i = 0
2600                for point in points:
2601                    if rank == 1:
2602                        dset[[point]] = data[i]
2603                    else:
2604                        dset[tuple(point)] = data[i]
2605                    i += 1
2606            except ValueError:
2607                # out of range error
2608                msg = "setDatasetValuesByPointSelection, out of range error"
2609                self.log.info(msg)
2610                raise IOError(errno.EINVAL, msg)
2611
2612        else:
2613            #binary
2614            arr = np.fromstring(data, dtype=dset.dtype)
2615            dset[points] = arr     # coordinate write
2616
2617        # update modified time
2618        self.setModifiedTime(obj_uuid)
2619        return True
2620
2621    """
2622    createDataset - creates new dataset given shape and datatype
2623    Returns item
2624    """
2625    def createDataset(self, datatype, datashape, max_shape=None,
2626                      creation_props=None, obj_uuid=None):
2627        self.initFile()
2628        if self.readonly:
2629            msg = "Unable to create dataset (Updates are not allowed)"
2630            self.log.info(msg)
2631            raise IOError(errno.EPERM, msg)
2632        datasets = self.dbGrp["{datasets}"]
2633        if not obj_uuid:
2634            obj_uuid = str(uuid.uuid1())
2635        dt = None
2636        item = {}
2637
2638        # h5py.createdataset fields
2639        kwargs = {}  # key word arguments for h5py dataset creation
2640
2641        fillvalue = None
2642
2643        if creation_props is None:
2644            creation_props = {}  # create empty list for convience
2645
2646        if creation_props:
2647            if "fillValue" in creation_props:
2648                fillvalue = creation_props["fillValue"]
2649            if "trackTimes" in creation_props:
2650                kwargs['track_times'] = creation_props["trackTimes"]
2651            if "layout" in creation_props:
2652                layout = creation_props["layout"]
2653                if "dims" in layout:
2654                    kwargs['chunks'] = tuple(layout["dims"])
2655            if "filters" in creation_props:
2656                filter_props = creation_props["filters"]
2657                for filter_prop in filter_props:
2658                    if "id" not in filter_prop:
2659                        msg = "filter id not provided"
2660                        self.log.info(msg)
2661                        raise IOError(errno.EINVAL, msg)
2662                    filter_id = filter_prop["id"]
2663                    if filter_id not in _HDF_FILTERS:
2664                        self.log.info("unknown filter id: " + str(filter_id) + " ignoring")
2665                        continue
2666
2667                    hdf_filter = _HDF_FILTERS[filter_id]
2668
2669                    self.log.info("got filter: " + str(filter_id))
2670                    if "alias" not in hdf_filter:
2671                        self.log.info("unsupported filter id: " + str(filter_id) + " ignoring")
2672                        continue
2673
2674                    filter_alias = hdf_filter["alias"]
2675                    if not h5py.h5z.filter_avail(filter_id):
2676                        self.log.info("compression filter not available, filter: " + filter_alias + " will be ignored")
2677                        continue
2678                    if filter_alias in _H5PY_COMPRESSION_FILTERS:
2679                        if kwargs.get('compression'):
2680                            self.log.info("compression filter already set, filter: " + filter_alias + " will be ignored")
2681                            continue
2682
2683                        kwargs['compression'] = filter_alias
2684                        self.log.info("setting compression filter to: " + kwargs['compression'])
2685                        if filter_alias == "gzip":
2686                            # check for an optional compression value
2687                            if "level" in filter_prop:
2688                                kwargs['compression_opts'] = filter_prop["level"]
2689                        elif filter_alias == "szip":
2690                            bitsPerPixel = None
2691                            coding = 'nn'
2692
2693                            if "bitsPerPixel" in filter_prop:
2694                                bitsPerPixel = filter_prop["bitsPerPixel"]
2695                            if "coding" in filter_prop:
2696                                if filter_prop["coding"] == "H5_SZIP_EC_OPTION_MASK":
2697                                    coding = 'ec'
2698                                elif filter_prop["coding"] == "H5_SZIP_NN_OPTION_MASK":
2699                                    coding = 'nn'
2700                                else:
2701                                    msg = "invalid szip option: 'coding'"
2702                                    self.log.info(msg)
2703                                    raise IOError(errno.EINVAL, msg)
2704                            # note: pixelsPerBlock, and pixelsPerScanline not supported by h5py,
2705                            # so these options will be ignored
2706                            if "pixelsPerBlock" in filter_props:
2707                                self.log.info("ignoring szip option: 'pixelsPerBlock'")
2708                            if "pixelsPerScanline" in filter_props:
2709                                self.log.info("ignoring szip option: 'pixelsPerScanline'")
2710                            if bitsPerPixel:
2711                                kwargs['compression_opts'] = (coding, bitsPerPixel)
2712                    else:
2713                        if filter_alias == "shuffle":
2714                            kwargs['shuffle'] = True
2715                        elif filter_alias == "fletcher32":
2716                            kwargs['fletcher32'] = True
2717                        elif filter_alias == "scaleoffset":
2718                            if "scaleOffset" not in filter_prop:
2719                                msg = "No scale_offset provided for scale offset filter"
2720                                self.log(msg)
2721                                raise IOError(errno.EINVAL, msg)
2722                            kwargs['scaleoffset'] = filter_prop["scaleOffset"]
2723                        else:
2724                            self.log.info("Unexpected filter name: " + filter_alias + " , ignoring")
2725
2726        dt_ref = self.createTypeFromItem(datatype)
2727        if dt_ref is None:
2728            msg = 'Unexpected error, no type returned'
2729            self.log.error(msg)
2730            raise IOError(errno.EIO, msg)
2731
2732        dt = dt_ref
2733        if hasattr(dt_ref, 'dtype'):
2734            # dt_ref is actualy a handle to a committed type
2735            # get the dtype prop, but use dt_ref for the actual dataset creation
2736            dt = dt_ref.dtype
2737
2738        if fillvalue and len(dt) > 1 and type(fillvalue) in (list, tuple):
2739            # for compound types, need to convert from list to dataset compatible element
2740
2741            if len(dt) != len(fillvalue):
2742                msg = 'fillvalue has incorrect number of elements'
2743                self.log.info(msg)
2744                raise IOError(errno.EINVAL, msg)
2745            ndscalar = np.zeros((), dtype=dt)
2746            for i in range(len(fillvalue)):
2747                field = dt.names[i]
2748                ndscalar[field] = self.toTuple(0, fillvalue[i])
2749            fillvalue = ndscalar
2750
2751        if fillvalue:
2752            kwargs['fillvalue'] = fillvalue
2753
2754        dataset_id = None
2755        if datashape is None:
2756            # create null space dataset
2757            # null space datasets not supported in h5py yet:
2758            # See: https://github.com/h5py/h5py/issues/279
2759            # work around this by using low-level interface.
2760            # first create a temp scalar dataset so we can pull out the typeid
2761            tmpGrp = None
2762            if "{tmp}" not in self.dbGrp:
2763                tmpGrp = self.dbGrp.create_group("{tmp}")
2764            else:
2765                tmpGrp = self.dbGrp["{tmp}"]
2766            tmpDataset = tmpGrp.create_dataset(obj_uuid, shape=(1,),
2767                                               dtype=dt_ref)
2768            tid = tmpDataset.id.get_type()
2769            sid = sid = h5py.h5s.create(h5py.h5s.NULL)
2770            # now create the permanent dataset
2771            gid = datasets.id
2772            if six.PY3:
2773                b_obj_uuid = obj_uuid.encode('utf-8')
2774                dataset_id = h5py.h5d.create(gid, b_obj_uuid, tid, sid)
2775            else:
2776                dataset_id = h5py.h5d.create(gid, obj_uuid, tid, sid)
2777            # delete the temp dataset
2778            del tmpGrp[obj_uuid]
2779        else:
2780
2781            # create the dataset
2782
2783            try:
2784                newDataset = datasets.create_dataset(
2785                    obj_uuid, shape=datashape, maxshape=max_shape,
2786                    dtype=dt_ref, **kwargs)
2787            except ValueError as ve:
2788                msg = "Unable to create dataset"
2789                try:
2790                    msg += ": " + ve.message
2791                except AttributeError:
2792                    pass  # no message
2793                self.log.info(msg)
2794                raise IOError(errno.EINVAL, msg)  # assume this is due to invalid params
2795
2796            if newDataset:
2797                dataset_id = newDataset.id
2798
2799        if dataset_id is None:
2800            msg = 'Unexpected failure to create dataset'
2801            self.log.error(msg)
2802            raise IOError(errno.EIO, msg)
2803        # store reverse map as an attribute
2804        addr = h5py.h5o.get_info(dataset_id).addr
2805        addrGrp = self.dbGrp["{addr}"]
2806        addrGrp.attrs[str(addr)] = obj_uuid
2807
2808        # save creation props if any
2809        if creation_props:
2810            self.setDatasetCreationProps(obj_uuid, creation_props)
2811
2812        # set timestamp
2813        now = time.time()
2814        self.setCreateTime(obj_uuid, timestamp=now)
2815        self.setModifiedTime(obj_uuid, timestamp=now)
2816
2817        item['id'] = obj_uuid
2818        if self.update_timestamps:
2819            item['ctime'] = self.getCreateTime(obj_uuid)
2820            item['mtime'] = self.getModifiedTime(obj_uuid)
2821        item['attributeCount'] = 0
2822        return item
2823
2824    """
2825    Resize existing Dataset
2826    """
2827    def resizeDataset(self, obj_uuid, shape):
2828        self.log.info("resizeDataset(") #  + obj_uuid + "): ") # + str(shape))
2829        self.initFile()
2830        if self.readonly:
2831            msg = "Unable to resize dataset (Updates are not allowed)"
2832            self.log.info(msg)
2833            raise IOError(errno.EACESS, msg)
2834        dset = self.getDatasetObjByUuid(obj_uuid)  # will throw exception if not found
2835        if len(shape) != len(dset.shape):
2836            msg = "Unable to resize dataset, shape has wrong number of dimensions"
2837            self.log.info(msg)
2838            raise IOError(errno.EINVAL, msg)
2839        for i in range(len(shape)):
2840            if shape[i] < dset.shape[i]:
2841                msg = "Unable to resize dataset, cannot make extent smaller"
2842                self.log.info(msg)
2843                raise IOError(errno.EINVAL, msg)
2844            if dset.maxshape[i] != None and shape[i] > dset.maxshape[i]:
2845                msg = "Unable to resize dataset, max extent exceeded"
2846                self.log.info(msg)
2847                raise IOError(errno.EINVAL, msg)
2848
2849        dset.resize(shape)  # resize
2850
2851        # update modified time
2852        self.setModifiedTime(obj_uuid)
2853
2854    """
2855    Check if link points to given target (as a HardLink)
2856    """
2857    def isObjectHardLinked(self, parentGroup, targetGroup, linkName):
2858        try:
2859            linkObj = parentGroup.get(linkName, None, False, True)
2860            linkClass = linkObj.__class__.__name__
2861        except TypeError:
2862            # UDLink? Ignore for now
2863            return False
2864        if linkClass == 'SoftLink':
2865            return False
2866        elif linkClass == 'ExternalLink':
2867            return False
2868        elif linkClass == 'HardLink':
2869            if parentGroup[linkName] == targetGroup:
2870                return True
2871        else:
2872            self.log.warning("unexpected linkclass: " + linkClass)
2873            return False
2874
2875    """
2876    Delete Dataset, Group or Datatype by UUID
2877    """
2878    def deleteObjectByUuid(self, objtype, obj_uuid):
2879        if objtype not in ('group', 'dataset', 'datatype'):
2880            msg = "unexpected objtype: " + objtype
2881            self.log.error(msg)
2882            raise IOError(errno.EIO, msg)
2883        self.initFile()
2884        self.log.info("delete uuid: " + obj_uuid)
2885        if self.readonly:
2886            msg = "Unable to delete object (Updates are not allowed)"
2887            self.log.info(msg)
2888            raise IOError(errno.EPERM, msg)
2889
2890        if obj_uuid == self.dbGrp.attrs["rootUUID"] and objtype == 'group':
2891            # can't delete root group
2892            msg = "Unable to delete group (root group may not be deleted)"
2893            self.log.info(msg)
2894            raise IOError(errno.EPERM, msg)
2895
2896        dbCol = None
2897        tgt = None
2898        if objtype == 'dataset':
2899            tgt = self.getDatasetObjByUuid(obj_uuid)
2900            dbCol = self.dbGrp["{datasets}"]
2901        elif objtype == 'group':
2902            tgt = self.getGroupObjByUuid(obj_uuid)
2903            dbCol = self.dbGrp["{groups}"]
2904        else:  # datatype
2905            tgt = self.getCommittedTypeObjByUuid(obj_uuid)
2906            dbCol = self.dbGrp["{datatypes}"]
2907
2908        if tgt is None:
2909            msg = "Unable to delete " + objtype + ", uuid: " + obj_uuid + " not found"
2910            self.log.info(msg)
2911            raise IOError(errno.ENXIO, msg)
2912
2913        # unlink from root (if present)
2914        self.unlinkObject(self.f['/'], tgt)
2915
2916        groups = self.dbGrp["{groups}"]
2917        # iterate through each group in the file and unlink tgt if it is linked
2918        # by the group.
2919        # We'll store a list of links to be removed as we go, and then actually
2920        # remove the links after the iteration is done (otherwise we can run into issues
2921        # where the key has become invalid)
2922        linkList = []  # this is our list
2923        for uuidName in groups.attrs:
2924            grpRef = groups.attrs[uuidName]
2925            # de-reference handle
2926            grp = self.f[grpRef]
2927            for linkName in grp:
2928                if self.isObjectHardLinked(grp, tgt, linkName):
2929                    linkList.append({'group': grp, 'link': linkName})
2930        for item in linkList:
2931            self.unlinkObjectItem(item['group'], tgt, item['link'])
2932
2933        addr = h5py.h5o.get_info(tgt.id).addr
2934        addrGrp = self.dbGrp["{addr}"]
2935        del addrGrp.attrs[str(addr)]  # remove reverse map
2936        dbRemoved = False
2937
2938        # finally, remove the dataset from db
2939        if obj_uuid in dbCol:
2940            # should be here (now it is anonymous)
2941            del dbCol[obj_uuid]
2942            dbRemoved = True
2943
2944        if not dbRemoved:
2945            self.log.warning("did not find: " + obj_uuid + " in anonymous collection")
2946
2947            if obj_uuid in dbCol.attrs:
2948                self.log.info("removing: " + obj_uuid + " from non-anonymous collection")
2949                del dbCol.attrs[obj_uuid]
2950                dbRemoved = True
2951
2952        if not dbRemoved:
2953            msg = "Unexpected Error, did not find reference to: " + obj_uuid
2954            self.log.error(msg)
2955            raise IOError(errno.EIO, msg)
2956
2957        # note when the object was deleted
2958        self.setModifiedTime(obj_uuid)
2959
2960        return True
2961
2962    def getGroupItemByUuid(self, obj_uuid):
2963        self.initFile()
2964        grp = self.getGroupObjByUuid(obj_uuid)
2965        if grp is None:
2966            if self.getModifiedTime(obj_uuid, useRoot=False):
2967                msg = "Group with uuid: " + obj_uuid + " has been previously deleted"
2968                self.log.info(msg)
2969                raise IOError(errno.ENOENT, msg)
2970            else:
2971                msg = "Group with uuid: " + obj_uuid + " was not found"
2972                self.log.info(msg)
2973                raise IOError(errno.ENXIO, msg)
2974
2975        linkCount = len(grp)
2976        if "__db__" in grp:
2977            linkCount -= 1  # don't include the db group
2978
2979        item = { 'id': obj_uuid }
2980        alias = []
2981        if grp.name and not grp.name.startswith("/__db__"):
2982            alias.append(grp.name)   # just use the default h5py path for now
2983        item['alias'] = alias
2984        item['attributeCount'] = len(grp.attrs)
2985        item['linkCount'] = linkCount
2986        if self.update_timestamps:
2987            item['ctime'] = self.getCreateTime(obj_uuid)
2988            item['mtime'] = self.getModifiedTime(obj_uuid)
2989
2990        return item
2991
2992    """
2993    getLinkItemByObj - return info about a link
2994        parent: reference to group
2995        linkName: name of link
2996        return: item dictionary with link attributes, or None if not found
2997    """
2998    def getLinkItemByObj(self, parent, link_name):
2999        if link_name not in parent:
3000            return None
3001
3002        if link_name == "__db__":
3003            return None  # don't provide link to db group
3004        #  "http://somefile/#h5path(somepath)")
3005        item = { 'title': link_name }
3006        # get the link object, one of HardLink, SoftLink, or ExternalLink
3007        try:
3008            linkObj = parent.get(link_name, None, False, True)
3009            linkClass = linkObj.__class__.__name__
3010        except TypeError:
3011            # UDLink? set class as 'user'
3012            linkClass = 'UDLink'  # user defined links
3013            item['class'] = 'H5L_TYPE_USER_DEFINED'
3014        if linkClass == 'SoftLink':
3015            item['class'] = 'H5L_TYPE_SOFT'
3016            item['h5path'] = linkObj.path
3017            item['href'] = '#h5path(' + linkObj.path + ')'
3018        elif linkClass == 'ExternalLink':
3019            item['class'] = 'H5L_TYPE_EXTERNAL'
3020            item['h5path'] = linkObj.path
3021            item['file'] = linkObj.filename
3022            item['href'] = '#h5path(' + linkObj.path + ')'
3023        elif linkClass == 'HardLink':
3024            # Hardlink doesn't have any properties itself, just get the linked
3025            # object
3026            obj = parent[link_name]
3027            addr = h5py.h5o.get_info(obj.id).addr
3028            item['class'] = 'H5L_TYPE_HARD'
3029            item['id'] = self.getUUIDByAddress(addr)
3030            class_name = obj.__class__.__name__
3031            if class_name == 'Dataset':
3032                item['href'] = 'datasets/' + item['id']
3033                item['collection'] = 'datasets'
3034            elif class_name == 'Group':
3035                item['href'] = 'groups/' + item['id']
3036                item['collection'] = 'groups'
3037            elif class_name == 'Datatype':
3038                item['href'] = 'datatypes/' + item['id']
3039                item['collection'] = 'datatypes'
3040            else:
3041                self.log.warning("unexpected object type: " + item['type'])
3042
3043        return item
3044
3045    def getLinkItemByUuid(self, grpUuid, link_name):
3046        self.log.info(
3047            "db.getLinkItemByUuid(" + grpUuid + ", [" + link_name + "])")
3048        if not link_name:
3049            msg = "link_name not specified"
3050            self.log.info(msg)
3051            raise IOError(errno.EINVAL, msg)
3052
3053        self.initFile()
3054        parent = self.getGroupObjByUuid(grpUuid)
3055        if parent is None:
3056            msg = "Parent group: " + grpUuid + " of link not found"
3057            self.log.info(msg)
3058            raise IOError(errno.ENXIO, msg)
3059
3060        item = self.getLinkItemByObj(parent, link_name)
3061        # add timestamps
3062        if item:
3063            if self.update_timestamps:
3064                item['ctime'] = self.getCreateTime(grpUuid, objType="link", name=link_name)
3065                item['mtime'] = self.getModifiedTime(grpUuid, objType="link", name=link_name)
3066        else:
3067            self.log.info("link not found")
3068            mtime = self.getModifiedTime(grpUuid, objType="link", name=link_name, useRoot=False)
3069            if mtime:
3070                msg = "Link [" + link_name + "] of: " + grpUuid + " has been previously deleted"
3071                self.log.info(msg)
3072                raise IOError(errno.ENOENT, msg)
3073            else:
3074                msg = "Link [" + link_name + "] of: " + grpUuid + " not found"
3075                self.log.info(msg)
3076                raise IOError(errno.ENXIO, msg)
3077
3078        return item
3079
3080    def getLinkItems(self, grpUuid, marker=None, limit=0):
3081        self.log.info("db.getLinkItems(" + grpUuid + ")")
3082        if marker:
3083            self.log.info("...marker: " + marker)
3084        if limit:
3085            self.log.info("...limit: " + str(limit))
3086
3087        self.initFile()
3088        parent = self.getGroupObjByUuid(grpUuid)
3089        if parent is None:
3090            msg = "Parent group: " + grpUuid + " not found, no links returned"
3091            self.log.info(msg)
3092            raise IOError(errno.ENXIO, msg)
3093        items = []
3094        gotMarker = True
3095        if marker is not None:
3096            gotMarker = False
3097        count = 0
3098        for link_name in parent:
3099            if link_name == "__db__":
3100                continue
3101            if not gotMarker:
3102                if link_name == marker:
3103                    gotMarker = True
3104                    continue  # start filling in result on next pass
3105                else:
3106                    continue  # keep going!
3107            item = self.getLinkItemByObj(parent, link_name)
3108            items.append(item)
3109
3110            count += 1
3111            if limit > 0 and count == limit:
3112                break  # return what we got
3113        return items
3114
3115    def unlinkItem(self, grpUuid, link_name):
3116        if self.readonly:
3117            msg = "Unable to unlink item (Updates are not allowed)"
3118            self.log.info(msg)
3119            raise IOError(errno.EPERM, msg)
3120        grp = self.getGroupObjByUuid(grpUuid)
3121        if grp is None:
3122            msg = "Parent group: " + grpUuid + " not found, cannot remove link"
3123            self.log.info(msg)
3124            raise IOError(errno.ENXIO, msg)
3125
3126        if link_name not in grp:
3127            msg = "Link: [" + link_name + "] of group: " + grpUuid + " not found, cannot remove link"
3128            self.log.info(msg)
3129            raise IOError(errno.ENXIO, msg)
3130
3131        if link_name == "__db__":
3132            # don't allow db group to be unlinked!
3133            msg = "Unlinking of __db__ group not allowed"
3134            raise IOError(errno.EPERM, msg)
3135
3136        obj = None
3137        try:
3138            linkObj = grp.get(link_name, None, False, True)
3139            linkClass = linkObj.__class__.__name__
3140            if linkClass == 'HardLink':
3141                # we can safely reference the object
3142                obj = grp[link_name]
3143        except TypeError:
3144            # UDLink? Return false to indicate that we can not delete this
3145            msg = "Unable to unlink user defined link"
3146            self.log.info(msg)
3147            raise IOError(errno.EPERM, msg)
3148
3149        linkDeleted = False
3150        if obj is not None:
3151            linkDeleted = self.unlinkObjectItem(grp, obj, link_name)
3152        else:
3153            # SoftLink or External Link - we can just remove the key
3154            del grp[link_name]
3155            linkDeleted = True
3156
3157        if linkDeleted:
3158            # update timestamp
3159            self.setModifiedTime(grpUuid, objType="link", name=link_name)
3160
3161        return linkDeleted
3162
3163    def getCollection(self, col_type, marker=None, limit=None):
3164        self.log.info("db.getCollection(" + col_type + ")")
3165        #col_type should be either "datasets", "groups", or "datatypes"
3166        if col_type not in ("datasets", "groups", "datatypes"):
3167            msg = "Unexpected col_type: [" + col_type + "]"
3168            self.log.error(msg)
3169            raise IOError(errno.EIO, msg)
3170        self.initFile()
3171        col = None  # Group, Dataset, or Datatype
3172        if col_type == "datasets":
3173            col = self.dbGrp["{datasets}"]
3174        elif col_type == "groups":
3175            col = self.dbGrp["{groups}"]
3176        else:  # col_type == "datatypes"
3177            col = self.dbGrp["{datatypes}"]
3178
3179        uuids = []
3180        count = 0
3181        # gather the non-anonymous ids first
3182        for obj_uuid in col.attrs:
3183            if marker:
3184                if obj_uuid == marker:
3185                    marker = None  # clear and pick up next item
3186                continue
3187            uuids.append(obj_uuid)
3188            count += 1
3189            if limit is not None and limit > 0 and count == limit:
3190                break
3191
3192        if limit == 0 or (limit is not None and count < limit):
3193            # grab any anonymous obj ids next
3194            for obj_uuid in col:
3195                if marker:
3196                    if obj_uuid == marker:
3197                        marker = None  # clear and pick up next item
3198                    continue
3199                uuids.append(obj_uuid)
3200                count += 1
3201                if limit is not None and limit > 0 and count == limit:
3202                    break
3203
3204        return uuids
3205
3206    """
3207      Get the DB Collection names
3208    """
3209    def getDBCollections(self):
3210        return ("{groups}", "{datasets}", "{datatypes}")
3211
3212    """
3213        Return the db collection the uuid belongs to
3214    """
3215    def getDBCollection(self, obj_uuid):
3216        dbCollections = self.getDBCollections()
3217        for dbCollectionName in dbCollections:
3218            col = self.dbGrp[dbCollectionName]
3219            if obj_uuid in col or obj_uuid in col.attrs:
3220                return col
3221        return None
3222
3223    def unlinkObjectItem(self, parentGrp, tgtObj, link_name):
3224        if self.readonly:
3225            msg = "Unexpected attempt to unlink object"
3226            self.log.error(msg)
3227            raise IOError(errno.EIO, msg)
3228        if link_name not in parentGrp:
3229            msg = "Unexpected: did not find link_name: [" + link_name + "]"
3230            self.log.error(msg)
3231            raise IOError(errno.EIO, msg)
3232        try:
3233            linkObj = parentGrp.get(link_name, None, False, True)
3234        except TypeError:
3235            # user defined link?
3236            msg = "Unable to remove link (user-defined link?)"
3237            self.log.error(msg)
3238            raise IOError(errno.EIO, msg)
3239        linkClass = linkObj.__class__.__name__
3240        # only deal with HardLinks
3241        linkDeleted = False
3242        if linkClass == 'HardLink':
3243            obj = parentGrp[link_name]
3244            if tgtObj is None or obj == tgtObj:
3245
3246                numlinks = self.getNumLinksToObject(obj)
3247                if numlinks == 1:
3248                    # last link to this object - convert to anonymous object by
3249                    # creating link under {datasets} or {groups} or {datatypes}
3250                    # also remove the attribute UUID key
3251                    addr = h5py.h5o.get_info(obj.id).addr
3252                    obj_uuid = self.getUUIDByAddress(addr)
3253                    self.log.info("converting: " + obj_uuid
3254                                  + " to anonymous obj")
3255                    dbCol = self.getDBCollection(obj_uuid)
3256                    del dbCol.attrs[obj_uuid]  # remove the object ref
3257                    dbCol[obj_uuid] = obj      # add a hardlink
3258                self.log.info("deleting link: [" + link_name + "] from: "
3259                              + parentGrp.name)
3260                del parentGrp[link_name]
3261                linkDeleted = True
3262        else:
3263            self.log.info("unlinkObjectItem: link is not a hardlink, ignoring")
3264        return linkDeleted
3265
3266    def unlinkObject(self, parentGrp, tgtObj):
3267        for name in parentGrp:
3268            self.unlinkObjectItem(parentGrp, tgtObj, name)
3269        return True
3270
3271    def linkObject(self, parentUUID, childUUID, link_name):
3272        self.initFile()
3273        if self.readonly:
3274            msg = "Unable to create link (Updates are not allowed)"
3275            self.log.info(msg)
3276            raise IOError(errno.EPERM, msg)
3277
3278        parentObj = self.getGroupObjByUuid(parentUUID)
3279        if parentObj is None:
3280            msg = "Unable to create link, parent UUID: " + parentUUID + " not found"
3281            self.log.info(msg)
3282            raise IOError(errno.ENXIO, msg)
3283
3284        childObj = self.getDatasetObjByUuid(childUUID)
3285        if childObj is None:
3286            # maybe it's a group...
3287            childObj = self.getGroupObjByUuid(childUUID)
3288        if childObj is None:
3289            # or maybe it's a committed datatype...
3290            childObj = self.getCommittedTypeObjByUuid(childUUID)
3291        if childObj is None:
3292            msg = "Unable to link item, child UUID: " + childUUID + " not found"
3293            self.log.info(msg)
3294            raise IOError(errno.ENXIO, msg)
3295        if link_name in parentObj:
3296            # link already exists
3297            self.log.info("linkname already exists, deleting")
3298            self.unlinkObjectItem(parentObj, None, link_name)
3299        parentObj[link_name] = childObj
3300
3301        # convert this from an anonymous object to ref if needed
3302        dbCol = self.getDBCollection(childUUID)
3303        if childUUID in dbCol:
3304            # convert to a ref
3305            del dbCol[childUUID]  # remove hardlink
3306            dbCol.attrs[childUUID] = childObj.ref # create a ref
3307
3308        # set link timestamps
3309        now = time.time()
3310        self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now)
3311        self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now)
3312        return True
3313
3314    def createSoftLink(self, parentUUID, linkPath, link_name):
3315        self.initFile()
3316        if self.readonly:
3317            msg = "Unable to create link (Updates are not allowed)"
3318            self.log.info(msg)
3319            raise IOError(errno.EPERM, msg)
3320        parentObj = self.getGroupObjByUuid(parentUUID)
3321        if parentObj is None:
3322            msg = "Unable to create link, parent UUID: " + parentUUID + " not found"
3323            self.log.info(msg)
3324            raise IOError(errno.ENXIO, msg)
3325        if link_name in parentObj:
3326            # link already exists
3327            self.log.info("linkname already exists, deleting")
3328            del parentObj[link_name]  # delete old link
3329        parentObj[link_name] = h5py.SoftLink(linkPath)
3330
3331        now = time.time()
3332        self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now)
3333        self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now)
3334
3335        return True
3336
3337    def createExternalLink(self, parentUUID, extPath, linkPath, link_name):
3338        self.initFile()
3339        if self.readonly:
3340            msg = "Unable to create link (Updates are not allowed)"
3341            self.log.info(msg)
3342            raise IOError(errno.EPERM, msg)
3343        parentObj = self.getGroupObjByUuid(parentUUID)
3344        if parentObj is None:
3345            msg = "Unable to create link, parent UUID: " + parentUUID + " not found"
3346            self.log.info(msg)
3347            raise IOError(errno.ENXIO, msg)
3348        if link_name in parentObj:
3349            # link already exists
3350            self.log.info("linkname already exists, deleting")
3351            del parentObj[link_name]  # delete old link
3352        parentObj[link_name] = h5py.ExternalLink(extPath, linkPath)
3353
3354        now = time.time()
3355        self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now)
3356        self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now)
3357
3358        return True
3359
3360    def createGroup(self, obj_uuid=None):
3361        self.initFile()
3362        if self.readonly:
3363            msg = "Unable to create group (Updates are not allowed)"
3364            self.log.info(msg)
3365            raise IOError(errno.EPERM, msg)
3366        groups = self.dbGrp["{groups}"]
3367        if not obj_uuid:
3368            obj_uuid = str(uuid.uuid1())
3369        newGroup = groups.create_group(obj_uuid)
3370        # store reverse map as an attribute
3371        addr = h5py.h5o.get_info(newGroup.id).addr
3372        addrGrp = self.dbGrp["{addr}"]
3373        addrGrp.attrs[str(addr)] = obj_uuid
3374
3375        #set timestamps
3376        now = time.time()
3377        self.setCreateTime(obj_uuid, timestamp=now)
3378        self.setModifiedTime(obj_uuid, timestamp=now)
3379
3380        return obj_uuid
3381
3382    def getNumberOfGroups(self):
3383        self.initFile()
3384        count = 0
3385        groups = self.dbGrp["{groups}"]
3386        count += len(groups)        # anonymous groups
3387        count += len(groups.attrs)  # linked groups
3388        count += 1                  # add of for root group
3389
3390        return count
3391
3392    def getNumberOfDatasets(self):
3393        self.initFile()
3394        count = 0
3395        datasets = self.dbGrp["{datasets}"]
3396        count += len(datasets)        # anonymous datasets
3397        count += len(datasets.attrs)  # linked datasets
3398        return count
3399
3400    def getNumberOfDatatypes(self):
3401        self.initFile()
3402        count = 0
3403        datatypes = self.dbGrp["{datatypes}"]
3404        count += len(datatypes)        # anonymous datatypes
3405        count += len(datatypes.attrs)  # linked datatypes
3406        return count
3407