1############################################################################## 2# Copyright by The HDF Group. # 3# All rights reserved. # 4# # 5# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and # 6# Utilities. The full HDF5 REST Server copyright notice, including # 7# terms governing use, modification, and redistribution, is contained in # 8# the file COPYING, which can be found at the root of the source code # 9# distribution tree. If you do not have access to this file, you may # 10# request a copy from help@hdfgroup.org. # 11############################################################################## 12 13from __future__ import absolute_import 14 15import six 16 17if six.PY3: 18 unicode = str 19 20 21 22""" 23This class is used to manage UUID lookup tables for primary HDF objects (Groups, Datasets, 24 and Datatypes). For HDF5 files that are read/write, this information is managed within 25 the file itself in the "__db__" group. For read-only files, the data is managed in 26 an external file (domain filename with ".db" extension). 27 28 "___db__" ("root" for read-only case) 29 description: Group object (member of root group). Only objects below this group are used 30 for UUID data 31 members: "{groups}", "{datasets}", "{datatypes}", "{objects}", "{paths}" 32 attrs: 'rootUUID': UUID of the root group 33 34"{groups}" 35 description: contains map of UUID->group objects 36 members: hard link to each anonymous group (i.e. groups which are not 37 linked to by anywhere else). Link name is the UUID 38 attrs: group reference (or path for read-only files) to the group (for non- 39 anonymous groups). 40 41"{datasets}" 42 description: contains map of UUID->dataset objects 43 members: hard link to each anonymous dataset (i.e. datasets which are not 44 linked to by anywhere else). Link name is the UUID 45 attrs: dataset reference (or path for read-only files) to the dataset (for non- 46 anonymous datasets). 47 48"{dataset_props}: 49 description contains dataset creation properties" 50 members: sub-group with link name as UUID. Sub-group attributes are the creation props 51 52"{datatypes}" 53 description: contains map of UUID->datatyped objects 54 members: hard link to each anonymous datatype (i.e. datatypes which are not 55 linked to by anywhere else). Link name is the UUID 56 attrs: datatype reference (or path for read-only files) to the datatype (for non- 57 anonymous datatypes). 58 59"{addr}" 60 description: contains map of file offset to UUID. 61 members: none 62 attrs: map of file offset to UUID 63 64 65 66 67""" 68import errno 69import time 70import h5py 71import numpy as np 72import uuid 73import os.path as op 74import os 75import json 76import logging 77 78from .hdf5dtype import getTypeItem, createDataType, getItemSize 79 80# global dictionary to direct back to the Hdf5db instance by filename 81# (needed for visititems callback) 82# Will break in multi-threaded context 83_db = {} 84 85UUID_LEN = 36 # length for uuid strings 86 87# standard compress filters 88_HDF_FILTERS = { 89 1: {'class': 'H5Z_FILTER_DEFLATE', 'alias': 'gzip', 'options': ['level']}, 90 2: {'class': 'H5Z_FILTER_SHUFFLE', 'alias': 'shuffle'}, 91 3: {'class': 'H5Z_FILTER_FLETCHER32', 'alias': 'fletcher32'}, 92 4: {'class': 'H5Z_FILTER_SZIP', 'alias': 'szip', 'options': ['bitsPerPixel', 'coding', 'pixelsPerBlock', 'pixelsPerScanLine']}, 93 5: {'class': 'H5Z_FILTER_NBIT'}, 94 6: {'class': 'H5Z_FILTER_SCALEOFFSET', 'alias': 'scaleoffset', 'options': ['scaleType']}, 95 32000: {'class': 'H5Z_FILTER_LZF', 'alias': 'lzf'} 96} 97 98_HDF_FILTER_OPTION_ENUMS = {'coding': {h5py.h5z.SZIP_EC_OPTION_MASK: 'H5_SZIP_EC_OPTION_MASK', 99 h5py.h5z.SZIP_NN_OPTION_MASK: 'H5_SZIP_NN_OPTION_MASK'}, 100 'scaleType': {h5py.h5z.SO_FLOAT_DSCALE: 'H5Z_SO_FLOAT_DSCALE', 101 h5py.h5z.SO_FLOAT_ESCALE: 'H5Z_SO_FLOAT_ESCALE', 102 h5py.h5z.SO_INT: 'H5Z_SO_INT'}} 103 104# h5py supported filters 105_H5PY_FILTERS = {'gzip': 1, 106 'shuffle': 2, 107 'fletcher32': 3, 108 'szip': 4, 109 'scaleoffset': 6, 110 'lzf': 32000} 111 112_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip") 113 114 115def visitObj(path, obj): 116 hdf5db = _db[obj.file.filename] 117 hdf5db.visit(path, obj) 118 119 120class Hdf5db: 121 122 @staticmethod 123 def createHDF5File(filePath): 124 # create an "empty" hdf5 file 125 if op.isfile(filePath): 126 raise IOError(errno.EEXIST, "Resource already exists") 127 128 f = h5py.File(filePath, 'w') 129 f.close() 130 131 @staticmethod 132 def getVersionInfo(): 133 versionInfo = {} 134 versionInfo['hdf5-json-version'] = "1.1.1" # todo - have this auto-synch with package version 135 versionInfo['h5py_version'] = h5py.version.version 136 versionInfo['hdf5_version'] = h5py.version.hdf5_version 137 return versionInfo 138 139 def __init__(self, filePath, dbFilePath=None, readonly=False, 140 app_logger=None, root_uuid=None, update_timestamps=True, 141 userid=None): 142 if app_logger: 143 self.log = app_logger 144 else: 145 self.log = logging.getLogger() 146 if len(filePath) == 0 or not op.isfile(filePath): 147 raise IOError(errno.ENXIO, "file not found") 148 if not h5py.is_hdf5(filePath): 149 raise IOError(errno.EINVAL, "not an HDF5 file") 150 151 mode = 'r' 152 if readonly: 153 self.readonly = True 154 else: 155 if not os.stat(filePath).st_mode & 0o200: 156 # file is read-only 157 self.readonly = True 158 else: 159 mode = 'r+' 160 self.readonly = False 161 162 163 self.log.info("init -- filePath: " + filePath + " mode: " + mode) 164 165 self.update_timestamps = update_timestamps 166 167 self.f = h5py.File(filePath, mode, libver='latest') 168 169 self.root_uuid = root_uuid 170 171 if self.readonly: 172 # for read-only files, add a dot in front of the name to be used as 173 # the db file. This won't collide with actual data files, since 174 # "." is not allowed as the first character in a domain name. 175 if not dbFilePath: 176 dirname = op.dirname(self.f.filename) 177 basename = op.basename(self.f.filename) 178 if len(dirname) > 0: 179 dbFilePath = dirname + '/.' + basename 180 else: 181 dbFilePath = '.' + basename 182 dbMode = 'r+' 183 if not op.isfile(dbFilePath): 184 dbMode = 'w' 185 self.log.info("dbFilePath: " + dbFilePath + " mode: " + dbMode) 186 self.dbf = h5py.File(dbFilePath, dbMode) 187 else: 188 self.dbf = None # for read only 189 # create a global reference to this class 190 # so visitObj can call back 191 _db[filePath] = self 192 193 def __enter__(self): 194 self.log.info('Hdf5db __enter') 195 return self 196 197 def __exit__(self, type, value, traceback): 198 self.log.info('Hdf5db __exit') 199 filename = self.f.filename 200 self.f.flush() 201 self.f.close() 202 if self.dbf: 203 self.dbf.flush() 204 self.dbf.close() 205 del _db[filename] 206 207 def getTimeStampName(self, uuid, objType="object", name=None): 208 ts_name = uuid 209 if objType != "object": 210 if len(name) == 0: 211 self.log.error("empty name passed to setCreateTime") 212 raise Exception("bad setCreateTimeParameter") 213 if objType == "attribute": 214 ts_name += "_attr:[" 215 ts_name += name 216 ts_name += "]" 217 elif objType == "link": 218 ts_name += "_link:[" 219 ts_name += name 220 ts_name += "]" 221 else: 222 msg = "Bad objType passed to setCreateTime" 223 self.log.error(msg) 224 raise IOError(errno.EIO, msg) 225 return ts_name 226 227 """ 228 setCreateTime - sets the create time timestamp for the 229 given object. 230 uuid - id of object 231 objtype - one of "object", "link", "attribute" 232 name - name (for attributes, links... ignored for objects) 233 timestamp - time (otherwise current time will be used) 234 235 returns - nothing 236 237 Note - should only be called once per object 238 """ 239 def setCreateTime(self, uuid, objType="object", name=None, timestamp=None): 240 if not self.update_timestamps: 241 return 242 ctime_grp = self.dbGrp["{ctime}"] 243 ts_name = self.getTimeStampName(uuid, objType, name) 244 if timestamp is None: 245 timestamp = time.time() 246 if ts_name in ctime_grp.attrs: 247 self.log.warning("modifying create time for object: " + ts_name) 248 ctime_grp.attrs.create(ts_name, timestamp, dtype='int64') 249 250 """ 251 getCreateTime - gets the create time timestamp for the 252 given object. 253 uuid - id of object 254 objtype - one of "object", "link", "attribute" 255 name - name (for attributes, links... ignored for objects) 256 useRoot - if true, use the time value for root object as default 257 258 returns - create time for object, or create time for root if not set 259 """ 260 def getCreateTime(self, uuid, objType="object", name=None, useRoot=True): 261 ctime_grp = self.dbGrp["{ctime}"] 262 ts_name = self.getTimeStampName(uuid, objType, name) 263 timestamp = None 264 if ts_name in ctime_grp.attrs: 265 timestamp = ctime_grp.attrs[ts_name] 266 elif useRoot: 267 # return root timestamp 268 root_uuid = self.dbGrp.attrs["rootUUID"] 269 if root_uuid in ctime_grp.attrs: 270 timestamp = ctime_grp.attrs[root_uuid] 271 return timestamp 272 273 """ 274 setModifiedTime - sets the modified time timestamp for the 275 given object. 276 uuid - id of object 277 objtype - one of "object", "link", "attribute" 278 name - name (for attributes, links... ignored for objects) 279 timestamp - time (otherwise current time will be used) 280 281 returns - nothing 282 283 """ 284 def setModifiedTime(self, uuid, objType="object", name=None, timestamp=None): 285 if not self.update_timestamps: 286 return 287 mtime_grp = self.dbGrp["{mtime}"] 288 ts_name = self.getTimeStampName(uuid, objType, name) 289 if timestamp is None: 290 timestamp = time.time() 291 mtime_grp.attrs.create(ts_name, timestamp, dtype='int64') 292 293 """ 294 getModifiedTime - gets the modified time timestamp for the 295 given object. 296 uuid - id of object 297 objtype - one of "object", "link", "attribute" 298 name - name (for attributes, links... ignored for objects) 299 useRoot - if true, use the time value for root object as default 300 301 returns - create time for object, or create time for root if not set 302 """ 303 def getModifiedTime(self, uuid, objType="object", name=None, useRoot=True): 304 mtime_grp = self.dbGrp["{mtime}"] 305 ts_name = self.getTimeStampName(uuid, objType, name) 306 timestamp = None 307 if ts_name in mtime_grp.attrs: 308 timestamp = mtime_grp.attrs[ts_name] 309 else: 310 # return create time if no modified time has been set 311 ctime_grp = self.dbGrp["{ctime}"] 312 if ts_name in ctime_grp.attrs: 313 timestamp = ctime_grp.attrs[ts_name] 314 elif useRoot: 315 # return root timestamp 316 root_uuid = self.dbGrp.attrs["rootUUID"] 317 timestamp = mtime_grp.attrs[root_uuid] 318 return timestamp 319 320 """ 321 getAclGroup - return the db group "{acl}" if present, 322 otherwise return None 323 """ 324 def getAclGroup(self, create=False): 325 if not self.dbGrp: 326 return None # file not initialized 327 if "{acl}" in self.dbGrp: 328 return self.dbGrp["{acl}"] 329 if not create: 330 return None 331 self.dbGrp.create_group("{acl}") 332 return self.dbGrp["{acl}"] 333 334 """ 335 getAclDtype - return detype for ACL 336 """ 337 def getAclDtype(self): 338 fields = [] 339 fields.append(('userid', np.int32)) 340 fields.append(('create', np.int8)) 341 fields.append(('read', np.int8)) 342 fields.append(('update', np.int8)) 343 fields.append(('delete', np.int8)) 344 fields.append(('readACL', np.int8)) 345 fields.append(('updateACL', np.int8)) 346 dt = np.dtype(fields) 347 return dt 348 349 """ 350 getAclDataset - return ACL datset for given uuid 351 """ 352 def getAclDataset(self, obj_uuid, create=False): 353 acl_group = self.getAclGroup(create=create) 354 355 if acl_group is None: 356 return None 357 358 if obj_uuid in acl_group: 359 return acl_group[obj_uuid] 360 361 if not create: 362 return None 363 364 # create dataset 365 dt = self.getAclDtype() 366 acl_group.create_dataset(obj_uuid, (0,), dtype=dt, maxshape=(None,)) 367 return acl_group[obj_uuid] 368 369 """ 370 getNumAcls - return number of acls associatted with given uuid 371 """ 372 def getNumAcls(self, obj_uuid): 373 acl_group = self.getAclGroup() 374 if acl_group is None: 375 return 0 376 if obj_uuid not in acl_group: 377 return 0 378 acls = acl_group[obj_uuid] 379 return acls.shape[0] 380 381 """ 382 convertAclNdArrayToDict - helper function - return acl item to dict 383 """ 384 def convertAclNdArrayToDict(self, acl_ndarray): 385 fields = acl_ndarray.dtype.fields.keys() 386 acl = {} 387 for field in fields: 388 value = int(acl_ndarray[field]) 389 acl[field] = value 390 return acl 391 392 """ 393 Get default acl - returns dict obj 394 """ 395 def getDefaultAcl(self): 396 dt = self.getAclDtype() 397 acl = {} 398 for field in dt.fields.keys(): 399 if field == 'userid': 400 acl[field] = 0 401 else: 402 acl[field] = 1 # default is allowed 403 return acl 404 405 """ 406 getAcl - return ACL for given uuid and userid 407 returns ACL associated with the given uuid, or if none exists, 408 the ACL associatted with the root group. 409 410 If an ACL is not present for a userid/obj and ACL will be returned 411 via the following precedence: 412 413 1) obj_uuid, user_id 414 2) root_uuid, user_id 415 3) obj_uuid, 0 416 4) root_uuid, 0 417 5) 'all perm' ACL 418 """ 419 def getAcl(self, obj_uuid, userid): 420 acl_grp = self.getAclGroup() 421 422 if acl_grp is not None: 423 acl = self.getAclByObjAndUser(obj_uuid, userid) 424 if acl is not None: 425 return acl 426 427 if obj_uuid != self.root_uuid and userid != 0: 428 # get the root acl for this user 429 acl = self.getAclByObjAndUser(self.root_uuid, userid) 430 if acl is not None: 431 return acl 432 433 if userid != 0: 434 # get acl for default user 435 acl = self.getAclByObjAndUser(obj_uuid, 0) 436 if acl is not None: 437 return acl 438 439 if obj_uuid != self.root_uuid: 440 # get root acl for default user 441 acl = self.getAclByObjAndUser(self.root_uuid, 0) 442 if acl is not None: 443 return acl 444 445 # create an ACL with default permissions 446 acl = self.getDefaultAcl() 447 448 return acl 449 450 """ 451 get ACL for specific uuid and user 452 return None if not found 453 """ 454 def getAclByObjAndUser(self, obj_uuid, userid): 455 456 acl = None 457 acl_dset = self.getAclDataset(obj_uuid) 458 459 if acl_dset: 460 # iterate through elements, looking for user_id 461 acls = acl_dset[...] 462 num_acls = acl_dset.shape[0] 463 acl = None 464 for i in range(num_acls): 465 item = acls[i] 466 if item['userid'] == userid: 467 acl = item 468 break 469 470 if acl is not None: 471 acl = self.convertAclNdArrayToDict(acl) 472 return acl 473 474 """ 475 getAcls - get all acls for given uuid 476 """ 477 478 def getAcls(self, obj_uuid): 479 480 acls = [] 481 acl_dset = self.getAclDataset(obj_uuid) 482 483 if acl_dset: 484 # iterate through elements, looking for user_id 485 num_acls = acl_dset.shape[0] 486 487 for i in range(num_acls): 488 item = acl_dset[i] 489 acl = self.convertAclNdArrayToDict(item) 490 acls.append(acl) 491 492 return acls 493 494 """ 495 setAcl - set the acl for given uuid. 496 """ 497 def setAcl(self, obj_uuid, acl): 498 acl_dset = self.getAclDataset(obj_uuid, create=True) 499 500 if acl_dset is None: 501 msg = "Unexpected error acl not created for uuid:[" + obj_uuid + "]" 502 self.log.error(msg) 503 raise IOError(errno.EIO, msg) 504 505 userid = acl['userid'] 506 507 # iterate through elements, looking for user_id 508 acls = acl_dset[...] 509 num_acls = acl_dset.shape[0] 510 511 user_index = None 512 513 for i in range(num_acls): 514 item = acls[i] 515 if item['userid'] == userid: 516 # update this element 517 user_index = i 518 break 519 520 if user_index is None: 521 # userid not found - add row 522 acl_dset.resize(((num_acls+1),)) 523 user_index = num_acls 524 525 # update the acl dataset 526 item = acl_dset[user_index] 527 for field in acl.keys(): 528 item[field] = acl[field] 529 acl_dset[user_index] = item # save back to the file 530 531 def initFile(self): 532 # self.log.info("initFile") 533 if self.readonly: 534 self.dbGrp = self.dbf 535 if "{groups}" in self.dbf: 536 # file already initialized 537 self.root_uuid = self.dbGrp.attrs["rootUUID"] 538 return 539 540 else: 541 if "__db__" in self.f: 542 # file already initialized 543 self.dbGrp = self.f["__db__"] 544 self.root_uuid = self.dbGrp.attrs["rootUUID"] 545 return # already initialized 546 self.dbGrp = self.f.create_group("__db__") 547 548 self.log.info("initializing file") 549 if not self.root_uuid: 550 self.root_uuid = str(uuid.uuid1()) 551 self.dbGrp.attrs["rootUUID"] = self.root_uuid 552 self.dbGrp.create_group("{groups}") 553 self.dbGrp.create_group("{datasets}") 554 self.dbGrp.create_group("{datatypes}") 555 self.dbGrp.create_group("{addr}") # store object address 556 self.dbGrp.create_group("{ctime}") # stores create timestamps 557 self.dbGrp.create_group("{mtime}") # store modified timestamps 558 559 mtime = op.getmtime(self.f.filename) 560 ctime = mtime 561 self.setCreateTime(self.root_uuid, timestamp=ctime) 562 self.setModifiedTime(self.root_uuid, timestamp=mtime) 563 564 self.f.visititems(visitObj) 565 566 def visit(self, path, obj): 567 name = obj.__class__.__name__ 568 if len(path) >= 6 and path[:6] == '__db__': 569 return # don't include the db objects 570 self.log.info('visit: ' + path + ' name: ' + name) 571 col = None 572 if name == 'Group': 573 col = self.dbGrp["{groups}"].attrs 574 elif name == 'Dataset': 575 col = self.dbGrp["{datasets}"].attrs 576 elif name == 'Datatype': 577 col = self.dbGrp["{datatypes}"].attrs 578 else: 579 msg = "Unknown object type: " + __name__ + " found during scan of HDF5 file" 580 self.log.error(msg) 581 raise IOError(errno.EIO, msg) 582 uuid1 = uuid.uuid1() # create uuid 583 id = str(uuid1) 584 addrGrp = self.dbGrp["{addr}"] 585 if not self.readonly: 586 # storing db in the file itself, so we can link to the object directly 587 col[id] = obj.ref # save attribute ref to object 588 else: 589 #store path to object 590 col[id] = obj.name 591 addr = h5py.h5o.get_info(obj.id).addr 592 # store reverse map as an attribute 593 addrGrp.attrs[str(addr)] = id 594 595 # 596 # Get Datset creation properties 597 # 598 def getDatasetCreationProps(self, dset_uuid): 599 prop_list = {} 600 if "{dataset_props}" not in self.dbGrp: 601 # no, group, so no properties 602 return prop_list # return empty dict 603 dbPropsGrp = self.dbGrp["{dataset_props}"] 604 605 if dset_uuid not in dbPropsGrp.attrs: 606 return prop_list # return empty dict 607 prop_str = dbPropsGrp.attrs[dset_uuid] 608 # expand json string 609 try: 610 prop_list = json.loads(prop_str) 611 except ValueError as ve: 612 msg = "Unable to load creation properties for dataset:[" + dset_uuid + "]: " + ve.message 613 self.log.error(msg) 614 raise IOError(errno.EIO, msg) 615 616 # fill in Filter class values 617 if 'filters' in prop_list: 618 prop_filters = prop_list['filters'] 619 for prop_filter in prop_filters: 620 if 'class' not in prop_filter: 621 filter_id = prop_filter['id'] 622 if filter_id in _HDF_FILTERS: 623 hdf_filter = _HDF_FILTERS[filter_id] 624 prop_filter['class'] = hdf_filter['class'] 625 else: 626 prop_filter['class'] = 'H5Z_FILTER_USER' 627 628 return prop_list 629 630 # 631 # Set dataset creation property 632 # 633 def setDatasetCreationProps(self, dset_uuid, prop_dict): 634 self.log.info('setDataProp([' + dset_uuid + ']') 635 if not prop_dict: 636 # just ignore if empty dictionary 637 return 638 if "{dataset_props}" not in self.dbGrp: 639 self.dbGrp.create_group("{dataset_props}") 640 dbPropsGrp = self.dbGrp["{dataset_props}"] 641 if dset_uuid in dbPropsGrp.attrs: 642 # this should be write once 643 msg = "Unexpected error setting dataset creation properties for dataset:[" + dset_uuid + "]" 644 self.log.error(msg) 645 raise IOError(errno.EIO, msg) 646 prop_str = json.dumps(prop_dict) 647 dbPropsGrp.attrs[dset_uuid] = prop_str 648 649 def getUUIDByAddress(self, addr): 650 if "{addr}" not in self.dbGrp: 651 self.log.error("expected to find {addr} group") 652 return None 653 addrGrp = self.dbGrp["{addr}"] 654 obj_uuid = None 655 if str(addr) in addrGrp.attrs: 656 obj_uuid = addrGrp.attrs[str(addr)] 657 if obj_uuid and type(obj_uuid) is not str: 658 # convert bytes to unicode 659 obj_uuid = obj_uuid.decode('utf-8') 660 return obj_uuid 661 662 """ 663 Get the number of links in a group to an object 664 """ 665 def getNumLinksToObjectInGroup(self, grp, obj): 666 objAddr = h5py.h5o.get_info(obj.id).addr 667 numLinks = 0 668 for name in grp: 669 try: 670 child = grp[name] 671 except KeyError: 672 # UDLink? Ignore for now 673 self.log.info("ignoring link (UDLink?): " + name) 674 continue 675 676 addr = h5py.h5o.get_info(child.id).addr 677 if addr == objAddr: 678 numLinks = numLinks + 1 679 680 return numLinks 681 682 """ 683 Get the number of links to the given object 684 """ 685 def getNumLinksToObject(self, obj): 686 self.initFile() 687 groups = self.dbGrp["{groups}"] 688 numLinks = 0 689 # iterate through each group in the file and unlink tgt if it is linked 690 # by the group 691 for uuidName in groups: 692 # iterate through anonymous groups 693 grp = groups[uuidName] 694 nLinks = self.getNumLinksToObjectInGroup(grp, obj) 695 if nLinks > 0: 696 numLinks += nLinks 697 for uuidName in groups.attrs: 698 # now non anonymous groups 699 grpRef = groups.attrs[uuidName] 700 grp = self.f[grpRef] # dereference 701 nLinks = self.getNumLinksToObjectInGroup(grp, obj) 702 if nLinks > 0: 703 numLinks += nLinks 704 # finally, check the root group 705 root = self.getObjByPath("/") 706 nLinks = self.getNumLinksToObjectInGroup(root, obj) 707 numLinks += nLinks 708 709 return numLinks 710 711 def getUUIDByPath(self, path): 712 self.initFile() 713 self.log.info("getUUIDByPath: [" + path + "]") 714 if len(path) >= 6 and path[:6] == '__db__': 715 msg = "getUUIDByPath called with invalid path: [" + path + "]" 716 self.log.error(msg) 717 raise IOError(errno.EIO, msg) 718 if path == '/': 719 # just return the root UUID 720 root_uuid = self.dbGrp.attrs["rootUUID"] 721 if root_uuid and type(root_uuid) is not str: 722 # convert bytes to unicode 723 root_uuid = root_uuid.decode('utf-8') 724 return root_uuid 725 726 obj = self.f[path] # will throw KeyError if object doesn't exist 727 addr = h5py.h5o.get_info(obj.id).addr 728 obj_uuid = self.getUUIDByAddress(addr) 729 return obj_uuid 730 731 def getObjByPath(self, path): 732 if len(path) >= 6 and path[:6] == '__db__': 733 return None # don't include the db objects 734 obj = self.f[path] # will throw KeyError if object doesn't exist 735 return obj 736 737 def getObjectByUuid(self, col_type, obj_uuid): 738 #col_type should be either "datasets", "groups", or "datatypes" 739 if col_type not in ("datasets", "groups", "datatypes"): 740 msg = "Unexpectd error, invalid col_type: [" + col_type + "]" 741 self.log.error(msg) 742 raise IOError(errno.EIO, msg) 743 if col_type == "groups" and obj_uuid == self.dbGrp.attrs["rootUUID"]: 744 return self.f['/'] # returns root group 745 746 obj = None # Group, Dataset, or Datatype 747 col_name = '{' + col_type + '}' 748 # get the collection group for this collection type 749 col = self.dbGrp[col_name] 750 if obj_uuid in col.attrs: 751 ref = col.attrs[obj_uuid] 752 obj = self.f[ref] # this works for read-only as well 753 elif obj_uuid in col: 754 # anonymous object 755 obj = col[obj_uuid] 756 757 return obj 758 759 def getDatasetObjByUuid(self, obj_uuid): 760 self.initFile() 761 self.log.info("getDatasetObjByUuid(" + obj_uuid + ")") 762 763 obj = self.getObjectByUuid("datasets", obj_uuid) 764 765 return obj 766 767 def getGroupObjByUuid(self, obj_uuid): 768 self.initFile() 769 self.log.info("getGroupObjByUuid(" + obj_uuid + ")") 770 771 obj = self.getObjectByUuid("groups", obj_uuid) 772 773 return obj 774 775 def getDatasetTypeItemByUuid(self, obj_uuid): 776 dset = self.getDatasetObjByUuid(obj_uuid) # throws exception if not found 777 item = { 'id': obj_uuid } 778 item['type'] = getTypeItem(dset.dtype) 779 if self.update_timestamps: 780 item['ctime'] = self.getCreateTime(obj_uuid) 781 item['mtime'] = self.getModifiedTime(obj_uuid) 782 783 return item 784 785 """ 786 getNullReference - return a null object reference 787 """ 788 def getNullReference(self): 789 tmpGrp = None 790 if "{tmp}" not in self.dbGrp: 791 tmpGrp = self.dbGrp.create_group("{tmp}") 792 else: 793 tmpGrp = self.dbGrp["{tmp}"] 794 if 'nullref' not in tmpGrp: 795 dt = h5py.special_dtype(ref=h5py.Reference) 796 tmpGrp.create_dataset('nullref', (1,), dtype=dt) 797 nullref_dset = tmpGrp['nullref'] 798 return nullref_dset[0] 799 800 """ 801 getNullRegionReference - return a null region reference 802 """ 803 def getNullRegionReference(self): 804 tmpGrp = None 805 if "{tmp}" not in self.dbGrp: 806 tmpGrp = self.dbGrp.create_group("{tmp}") 807 else: 808 tmpGrp = self.dbGrp["{tmp}"] 809 if 'nullregref' not in tmpGrp: 810 dt = h5py.special_dtype(ref=h5py.RegionReference) 811 tmpGrp.create_dataset('nullregref', (1,), dtype=dt) 812 nullregref_dset = tmpGrp['nullregref'] 813 return nullregref_dset[0] 814 815 def getShapeItemByDsetObj(self, obj): 816 item = {} 817 if obj.shape is None: 818 # new with h5py 2.6, null space datasets will return None for shape 819 item['class'] = 'H5S_NULL' 820 elif len(obj.shape) == 0: 821 # check to see if this is a null space vs a scalar dataset we'll do 822 # this by seeing if an exception is raised when reading the dataset 823 # h5py issue https://github.com/h5py/h5py/issues/279 will provide a 824 # better way to determine null spaces 825 # Update 3/10/17: Above issue is closed, but waiting on 2.7 final release 826 try: 827 val = obj[...] 828 if val is None: 829 self.log.warning("no value returned for scalar dataset") 830 item['class'] = 'H5S_SCALAR' 831 except IOError: 832 item['class'] = 'H5S_NULL' 833 else: 834 item['class'] = 'H5S_SIMPLE' 835 item['dims'] = obj.shape 836 maxshape = [] 837 include_maxdims = False 838 for i in range(len(obj.shape)): 839 extent = 0 840 if len(obj.maxshape) > i: 841 extent = obj.maxshape[i] 842 if extent is None: 843 extent = 0 844 if extent > obj.shape[i] or extent == 0: 845 include_maxdims = True 846 maxshape.append(extent) 847 if include_maxdims: 848 item['maxdims'] = maxshape 849 return item 850 851 def getShapeItemByAttrObj(self, obj): 852 item = {} 853 if obj.shape is None or obj.get_storage_size() == 0: 854 # If storage size is 0, assume this is a null space obj 855 # See: h5py issue https://github.com/h5py/h5py/issues/279 856 item['class'] = 'H5S_NULL' 857 else: 858 if obj.shape: 859 item['class'] = 'H5S_SIMPLE' 860 item['dims'] = obj.shape 861 else: 862 item['class'] = 'H5S_SCALAR' 863 return item 864 865 # 866 # Get dataset creation properties maintained by HDF5 library 867 # 868 def getHDF5DatasetCreationProperties(self, obj_uuid, type_class): 869 dset = self.getDatasetObjByUuid(obj_uuid) 870 # 871 # Fill in creation properties 872 # 873 creationProps = {} 874 plist = h5py.h5d.DatasetID.get_create_plist(dset.id) 875 876 # alloc time 877 nAllocTime = plist.get_alloc_time() 878 if nAllocTime == h5py.h5d.ALLOC_TIME_DEFAULT: 879 creationProps['allocTime'] = 'H5D_ALLOC_TIME_DEFAULT' 880 elif nAllocTime == h5py.h5d.ALLOC_TIME_LATE: 881 creationProps['allocTime'] = 'H5D_ALLOC_TIME_LATE' 882 elif nAllocTime == h5py.h5d.ALLOC_TIME_EARLY: 883 creationProps['allocTime'] = 'H5D_ALLOC_TIME_EARLY' 884 elif nAllocTime == h5py.h5d.ALLOC_TIME_INCR: 885 creationProps['allocTime'] = 'H5D_ALLOC_TIME_INCR' 886 else: 887 self.log.warning("Unknown alloc time value: " + str(nAllocTime)) 888 889 # fill time 890 nFillTime = plist.get_fill_time() 891 if nFillTime == h5py.h5d.FILL_TIME_ALLOC: 892 creationProps['fillTime'] = 'H5D_FILL_TIME_ALLOC' 893 elif nFillTime == h5py.h5d.FILL_TIME_NEVER: 894 creationProps['fillTime'] = 'H5D_FILL_TIME_NEVER' 895 elif nFillTime == h5py.h5d.FILL_TIME_IFSET: 896 creationProps['fillTime'] = 'H5D_FILL_TIME_IFSET' 897 else: 898 self.log.warning("unknown fill time value: " + str(nFillTime)) 899 900 if type_class not in ('H5T_VLEN', 'H5T_OPAQUE'): 901 if plist.fill_value_defined() == h5py.h5d.FILL_VALUE_USER_DEFINED: 902 creationProps['fillValue'] = self.bytesArrayToList(dset.fillvalue) 903 904 # layout 905 nLayout = plist.get_layout() 906 if nLayout == h5py.h5d.COMPACT: 907 creationProps['layout'] = {'class': 'H5D_COMPACT'} 908 elif nLayout == h5py.h5d.CONTIGUOUS: 909 creationProps['layout'] = {'class': 'H5D_CONTIGUOUS'} 910 elif nLayout == h5py.h5d.CHUNKED: 911 creationProps['layout'] = {'class': 'H5D_CHUNKED', 'dims': dset.chunks } 912 else: 913 self.log.warning("Unknown layout value:" + str(nLayout)) 914 915 num_filters = plist.get_nfilters() 916 filter_props = [] 917 if num_filters: 918 for n in range(num_filters): 919 filter_info = plist.get_filter(n) 920 opt_values = filter_info[2] 921 filter_prop = {} 922 filter_id = filter_info[0] 923 filter_prop['id'] = filter_id 924 if filter_info[3]: 925 filter_prop['name'] = self.bytesArrayToList(filter_info[3]) 926 if filter_id in _HDF_FILTERS: 927 hdf_filter = _HDF_FILTERS[filter_id] 928 filter_prop['class'] = hdf_filter['class'] 929 if 'options' in hdf_filter: 930 filter_opts = hdf_filter['options'] 931 for i in range(len(filter_opts)): 932 if len(opt_values) <= i: 933 break # end of option values 934 opt_value = opt_values[i] 935 opt_value_enum = None 936 option_name = filter_opts[i] 937 if option_name in _HDF_FILTER_OPTION_ENUMS: 938 option_enums = _HDF_FILTER_OPTION_ENUMS[option_name] 939 if opt_value in option_enums: 940 opt_value_enum = option_enums[opt_value] 941 if opt_value_enum: 942 filter_prop[option_name] = opt_value_enum 943 else: 944 filter_prop[option_name] = opt_value 945 else: 946 # custom filter 947 filter_prop['class'] = 'H5Z_FILTER_USER' 948 if opt_values: 949 filter_prop['parameters'] = opt_values 950 filter_props.append(filter_prop) 951 creationProps['filters'] = filter_props 952 953 return creationProps 954 955 # 956 # Get dataset information - type, shape, num attributes, creation properties 957 # 958 def getDatasetItemByUuid(self, obj_uuid): 959 dset = self.getDatasetObjByUuid(obj_uuid) 960 if dset is None: 961 if self.getModifiedTime(obj_uuid, useRoot=False): 962 msg = "Dataset with uuid: " + obj_uuid + " has been previously deleted" 963 self.log.info(msg) 964 raise IOError(errno.ENOENT, msg) 965 else: 966 msg = "Dataset with uuid: " + obj_uuid + " was not found" 967 self.log.info(msg) 968 raise IOError(errno.ENXIO, msg) 969 970 # fill in the item info for the dataset 971 item = { 'id': obj_uuid } 972 973 alias = [] 974 if dset.name and not dset.name.startswith("/__db__"): 975 alias.append(dset.name) # just use the default h5py path for now 976 item['alias'] = alias 977 978 item['attributeCount'] = len(dset.attrs) 979 980 # check if the dataset is using a committed type 981 typeid = h5py.h5d.DatasetID.get_type(dset.id) 982 typeItem = None 983 if h5py.h5t.TypeID.committed(typeid): 984 type_uuid = None 985 addr = h5py.h5o.get_info(typeid).addr 986 type_uuid = self.getUUIDByAddress(addr) 987 committedType = self.getCommittedTypeItemByUuid(type_uuid) 988 typeItem = committedType['type'] 989 typeItem['uuid'] = type_uuid 990 else: 991 typeItem = getTypeItem(dset.dtype) 992 993 item['type'] = typeItem 994 995 # get shape 996 item['shape'] = self.getShapeItemByDsetObj(dset) 997 998 if self.update_timestamps: 999 item['ctime'] = self.getCreateTime(obj_uuid) 1000 item['mtime'] = self.getModifiedTime(obj_uuid) 1001 1002 creationProps = self.getDatasetCreationProps(obj_uuid) 1003 if creationProps: 1004 # if chunks is not in the db props, add it from the dataset prop 1005 # (so auto-chunk values can be returned) 1006 if dset.chunks and 'layout' not in creationProps: 1007 creationProps['layout'] = {'class': 'H5D_CHUNKED', 1008 'dims': dset.chunks} 1009 else: 1010 # no db-tracked creation properties, pull properties from library 1011 creationProps = self.getHDF5DatasetCreationProperties(obj_uuid, typeItem['class']) 1012 1013 if creationProps: 1014 item['creationProperties'] = creationProps 1015 1016 return item 1017 1018 """ 1019 createTypeFromItem - create type given dictionary definition 1020 """ 1021 def createTypeFromItem(self, attr_type): 1022 dt = None 1023 1024 if type(attr_type) in (six.text_type, six.binary_type) and len(attr_type) == UUID_LEN: 1025 # assume attr_type is a uuid of a named datatype 1026 tgt = self.getCommittedTypeObjByUuid(attr_type) 1027 if tgt is None: 1028 msg = "Unable to create attribute, committed type with uuid of: " + attr_type + " not found" 1029 self.log.info(msg) 1030 raise IOError(errno.ENXIO, msg) 1031 dt = tgt # can use the object as the dt parameter 1032 else: 1033 try: 1034 dt = createDataType(attr_type) 1035 except KeyError as ke: 1036 msg = "Unable to create type: " + ke.message 1037 self.log.info(msg) 1038 raise IOError(errno.EINVAL, msg) 1039 except TypeError as te: 1040 msg = "Unable to create type: " + str(te) 1041 self.log.info(msg) 1042 raise IOError(errno.EINVAL, msg) 1043 if dt is None: 1044 msg = "Unexpected error creating type" 1045 self.log.error(msg) 1046 raise IOError(errno, errno.EIO, msg) 1047 return dt 1048 1049 """ 1050 createCommittedType - creates new named datatype 1051 Returns item 1052 """ 1053 def createCommittedType(self, datatype, obj_uuid=None): 1054 self.log.info("createCommittedType") 1055 self.initFile() 1056 if self.readonly: 1057 msg = "Can't create committed type (updates are not allowed)" 1058 self.log.info(msg) 1059 raise IOError(errno.EPERM, msg) 1060 datatypes = self.dbGrp["{datatypes}"] 1061 if not obj_uuid: 1062 obj_uuid = str(uuid.uuid1()) 1063 dt = self.createTypeFromItem(datatype) 1064 1065 datatypes[obj_uuid] = dt 1066 1067 if obj_uuid not in datatypes: 1068 msg = "Unexpected failure to create committed datatype" 1069 self.log.error(msg) 1070 raise IOError(errno.EIO, msg) 1071 newType = datatypes[obj_uuid] # this will be a h5py Datatype class 1072 # store reverse map as an attribute 1073 addr = h5py.h5o.get_info(newType.id).addr 1074 addrGrp = self.dbGrp["{addr}"] 1075 addrGrp.attrs[str(addr)] = obj_uuid 1076 # set timestamp 1077 now = time.time() 1078 self.setCreateTime(obj_uuid, timestamp=now) 1079 self.setModifiedTime(obj_uuid, timestamp=now) 1080 item = { 'id': obj_uuid } 1081 item['attributeCount'] = len(newType.attrs) 1082 #item['type'] = hdf5dtype.getTypeItem(datatype.dtype) 1083 if self.update_timestamps: 1084 item['ctime'] = self.getCreateTime(obj_uuid) 1085 item['mtime'] = self.getModifiedTime(obj_uuid) 1086 return item 1087 1088 """ 1089 getCommittedTypeObjByUuid - get obj from {datatypes} collection 1090 Returns type obj 1091 """ 1092 def getCommittedTypeObjByUuid(self, obj_uuid): 1093 self.log.info("getCommittedTypeObjByUuid(" + obj_uuid + ")") 1094 self.initFile() 1095 datatype = None 1096 datatypesGrp = self.dbGrp["{datatypes}"] 1097 if obj_uuid in datatypesGrp.attrs: 1098 typeRef = datatypesGrp.attrs[obj_uuid] 1099 # typeRef could be a reference or (for read-only) a path 1100 datatype = self.f[typeRef] 1101 elif obj_uuid in datatypesGrp: 1102 datatype = datatypesGrp[obj_uuid] # non-linked type 1103 else: 1104 msg = "Committed datatype: " + obj_uuid + " not found" 1105 self.log.info(msg) 1106 1107 return datatype 1108 1109 """ 1110 getCommittedTypeItemByUuid - get json from {datatypes} collection 1111 Returns type obj 1112 """ 1113 def getCommittedTypeItemByUuid(self, obj_uuid): 1114 self.log.info("getCommittedTypeItemByUuid(" + obj_uuid + ")") 1115 self.initFile() 1116 datatype = self.getCommittedTypeObjByUuid(obj_uuid) 1117 1118 if datatype is None: 1119 if self.getModifiedTime(obj_uuid, useRoot=False): 1120 msg = "Datatype with uuid: " + obj_uuid + " has been previously deleted" 1121 self.log.info(msg) 1122 raise IOError(errno.ENOENT, msg) 1123 else: 1124 msg = "Datatype with uuid: " + obj_uuid + " was not found" 1125 self.log.info(msg) 1126 raise IOError(errno.ENXIO, msg) 1127 1128 item = { 'id': obj_uuid } 1129 alias = [] 1130 if datatype.name and not datatype.name.startswith("/__db__"): 1131 alias.append(datatype.name) # just use the default h5py path for now 1132 item['alias'] = alias 1133 item['attributeCount'] = len(datatype.attrs) 1134 item['type'] = getTypeItem(datatype.dtype) 1135 if self.update_timestamps: 1136 item['ctime'] = self.getCreateTime(obj_uuid) 1137 item['mtime'] = self.getModifiedTime(obj_uuid) 1138 1139 return item 1140 1141 """ 1142 Get attribute given an object and name 1143 returns: JSON object 1144 """ 1145 def getAttributeItemByObj(self, obj, name, includeData=True): 1146 if name not in obj.attrs: 1147 msg = "Attribute: [" + name + "] not found in object: " + obj.name 1148 self.log.info(msg) 1149 return None 1150 1151 # get the attribute! 1152 attrObj = h5py.h5a.open(obj.id, np.string_(name)) 1153 attr = None 1154 1155 item = { 'name': name } 1156 1157 # check if the dataset is using a committed type 1158 typeid = attrObj.get_type() 1159 typeItem = None 1160 if h5py.h5t.TypeID.committed(typeid): 1161 type_uuid = None 1162 addr = h5py.h5o.get_info(typeid).addr 1163 type_uuid = self.getUUIDByAddress(addr) 1164 committedType = self.getCommittedTypeItemByUuid(type_uuid) 1165 typeItem = committedType['type'] 1166 typeItem['uuid'] = type_uuid 1167 else: 1168 typeItem = getTypeItem(attrObj.dtype) 1169 item['type'] = typeItem 1170 # todo - don't include data for OPAQUE until JSON serialization 1171 # issues are addressed 1172 1173 if type(typeItem) == dict and typeItem['class'] in ('H5T_OPAQUE'): 1174 includeData = False 1175 1176 shape_json = self.getShapeItemByAttrObj(attrObj) 1177 item['shape'] = shape_json 1178 if shape_json['class'] == 'H5S_NULL': 1179 includeData = False 1180 if includeData: 1181 try: 1182 attr = obj.attrs[name] # returns a numpy array 1183 except TypeError: 1184 self.log.warning("type error reading attribute") 1185 1186 if includeData and attr is not None: 1187 if shape_json['class'] == 'H5S_SCALAR': 1188 data = self.getDataValue(typeItem, attr) 1189 else: 1190 dims = shape_json["dims"] 1191 rank = len(dims) 1192 # convert numpy object to python list 1193 # values = self.toList(typeItem, attr) 1194 data = self.toList(rank, typeItem, attr) 1195 #data = self.bytesToString(data) 1196 item['value'] = data 1197 # timestamps will be added by getAttributeItem() 1198 return item 1199 1200 def getAttributeItems(self, col_type, obj_uuid, marker=None, limit=0): 1201 self.log.info("db.getAttributeItems(" + obj_uuid + ")") 1202 if marker: 1203 self.log.info("...marker: " + marker) 1204 if limit: 1205 self.log.info("...limit: " + str(limit)) 1206 1207 self.initFile() 1208 obj = self.getObjectByUuid(col_type, obj_uuid) 1209 if obj is None: 1210 msg = "Object: " + obj_uuid + " could not be loaded" 1211 self.log.info(msg) 1212 raise IOError(errno.ENXIO, msg) 1213 1214 items = [] 1215 gotMarker = True 1216 if marker is not None: 1217 gotMarker = False 1218 count = 0 1219 for name in obj.attrs: 1220 if not gotMarker: 1221 if name == marker: 1222 gotMarker = True 1223 continue # start filling in result on next pass 1224 else: 1225 continue # keep going! 1226 item = self.getAttributeItemByObj(obj, name, False) 1227 # mix-in timestamps 1228 if self.update_timestamps: 1229 item['ctime'] = self.getCreateTime(obj_uuid, objType="attribute", name=name) 1230 item['mtime'] = self.getModifiedTime(obj_uuid, objType="attribute", name=name) 1231 1232 items.append(item) 1233 count += 1 1234 if limit > 0 and count == limit: 1235 break # return what we got 1236 return items 1237 1238 def getAttributeItem(self, col_type, obj_uuid, name): 1239 self.log.info("getAttributeItemByUuid(" + col_type + ", " + obj_uuid 1240 + ", " + name + ")") 1241 self.initFile() 1242 obj = self.getObjectByUuid(col_type, obj_uuid) 1243 if obj is None: 1244 msg = "Parent object: " + obj_uuid + " of attribute not found" 1245 self.log.info(msg) 1246 raise IOError(errno.ENXIO, msg) 1247 return None 1248 item = self.getAttributeItemByObj(obj, name) 1249 if item is None: 1250 if self.getModifiedTime(obj_uuid, objType="attribute", name=name, useRoot=False): 1251 # attribute has been removed 1252 msg = "Attribute: [" + name + "] of object: " + obj_uuid + " has been previously deleted" 1253 self.log.info(msg) 1254 raise IOError(errno.ENOENT, msg) 1255 msg = "Attribute: [" + name + "] of object: " + obj_uuid + " not found" 1256 self.log.info(msg) 1257 raise IOError(errno.ENXIO, msg) 1258 # mix-in timestamps 1259 if self.update_timestamps: 1260 item['ctime'] = self.getCreateTime(obj_uuid, objType="attribute", name=name) 1261 item['mtime'] = self.getModifiedTime(obj_uuid, objType="attribute", name=name) 1262 1263 return item 1264 1265 """ 1266 isDimensionList - return True if this attribute json looks like a dimension list 1267 """ 1268 def isDimensionList(self, attr_name, attr_type): 1269 if attr_name != "DIMENSION_LIST": 1270 return False 1271 if type(attr_type) is not dict: 1272 return False 1273 if attr_type['class'] != "H5T_VLEN": 1274 return False 1275 base_type = attr_type['base'] 1276 if base_type['class'] != 'H5T_REFERENCE': 1277 return False 1278 return True 1279 1280 """ 1281 isReferenceList - return True if this attribute json looks like a reference list 1282 """ 1283 def isReferenceList(self, attr_name, attr_type): 1284 if attr_name != "REFERENCE_LIST": 1285 return False 1286 if type(attr_type) is not dict: 1287 return False 1288 if attr_type['class'] != "H5T_COMPOUND": 1289 return False 1290 1291 return True 1292 1293 """ 1294 makeDimensionList - work-around for h5py problems saving dimension list - 1295 types which are vlen's of references are not working directly, so use dim_scale api 1296 Note: this is a work-around for h5py issue: 1297 https://github.com/h5py/h5py/issues/553 1298 """ 1299 def makeDimensionList(self, obj, shape, value): 1300 dset_refs = self.listToRef(value) 1301 for i in range(len(dset_refs)): 1302 refs = dset_refs[i] 1303 if type(refs) not in (list, tuple): 1304 msg = "Invalid dimension list value" 1305 self.log.info(msg) 1306 raise IOError(errno.EINVAL, msg) 1307 for j in range(len(refs)): 1308 scale_obj = self.f[refs[j]] 1309 if scale_obj is None: 1310 self.log.warning("dimension list, missing obj reference: " + value[i]) 1311 continue 1312 if "CLASS" not in scale_obj.attrs: 1313 self.log.warning("dimension list, no scale obj") 1314 continue 1315 if scale_obj.attrs["CLASS"] != b"DIMENSION_SCALE": 1316 self.log.warning("dimension list, invalid class for scale obj") 1317 continue 1318 1319 try: 1320 h5py.h5ds.attach_scale(obj.id, scale_obj.id, i) 1321 except RuntimeError: 1322 self.log.error("got runtime error attaching scale") 1323 1324 """ 1325 writeNdArrayToAttribute - create an attribute given numpy array 1326 """ 1327 def writeNdArrayToAttribute(self, attrs, attr_name, npdata, shape, dt): 1328 attrs.create(attr_name, npdata, shape=shape, dtype=dt) 1329 1330 """ 1331 create a scalar string attribute using nullterm padding 1332 """ 1333 def makeNullTermStringAttribute(self, obj, attr_name, strLength, value): 1334 self.log.info( 1335 "make nullterm, length: " + str(strLength) + " value:" + str(value)) 1336 if type(value) == unicode: 1337 value = str(value) 1338 if strLength < len(value): 1339 self.log.warning("makeNullTermStringAttribute: value string longer than length") 1340 #value = value[:strLength] # truncate to length 1341 1342 1343 if six.PY3 and type(attr_name) is str: 1344 try: 1345 attr_name = attr_name.encode('ascii') 1346 except UnicodeDecodeError: 1347 raise TypeError("non-ascii attribute name not allowed") 1348 1349 # create the attribute 1350 tid = h5py.h5t.TypeID.copy(h5py.h5t.C_S1) 1351 tid.set_size(strLength) 1352 tid.set_strpad(h5py.h5t.STR_NULLTERM) 1353 sid = h5py.h5s.create(h5py.h5s.SCALAR) 1354 aid = h5py.h5a.create(obj.id, attr_name, tid, sid) 1355 # write the value 1356 dtype_code = 'S' + str(strLength) 1357 ndarr = np.array(value, dtype=np.dtype(dtype_code)) 1358 aid.write(ndarr) 1359 1360 def makeAttribute(self, obj, attr_name, shape, attr_type, value): 1361 """ 1362 makeAttribute - create an attribute (except for dimension list 1363 attribute) 1364 """ 1365 is_committed_type = False 1366 if type(attr_type) in (str, unicode) and len(attr_type) == UUID_LEN: 1367 # assume attr_type is a uuid of a named datatype 1368 is_committed_type = True 1369 1370 dt = self.createTypeFromItem(attr_type) 1371 1372 if shape is None: 1373 self.log.info("shape is null - will create null space attribute") 1374 # create null space attribute 1375 # null space datasets/attributes not supported in h5py yet: 1376 # See: https://github.com/h5py/h5py/issues/279 1377 # work around this by using low-level interface. 1378 # first create a temp scalar dataset so we can pull out the typeid 1379 tmpGrp = None 1380 if "{tmp}" not in self.dbGrp: 1381 tmpGrp = self.dbGrp.create_group("{tmp}") 1382 else: 1383 tmpGrp = self.dbGrp["{tmp}"] 1384 tmpGrp.attrs.create(attr_name, 0, shape=(), dtype=dt) 1385 if six.PY3: 1386 b_attr_name = attr_name.encode('utf-8') 1387 tmpAttr = h5py.h5a.open(tmpGrp.id, name=b_attr_name) 1388 else: 1389 tmpAttr = h5py.h5a.open(tmpGrp.id, name=attr_name) 1390 if not tmpAttr: 1391 msg = "Unexpected error creating datatype for nullspace attribute" 1392 self.log.error(msg) 1393 raise IOError(errno.EIO, msg) 1394 tid = tmpAttr.get_type() 1395 sid = sid = h5py.h5s.create(h5py.h5s.NULL) 1396 # now create the permanent attribute 1397 if attr_name in obj.attrs: 1398 self.log.info("deleting attribute: " + attr_name) 1399 del obj.attrs[attr_name] 1400 if six.PY3: 1401 attr_id = h5py.h5a.create(obj.id, b_attr_name, tid, sid) 1402 else: 1403 attr_id = h5py.h5a.create(obj.id, attr_name, tid, sid) 1404 # delete the temp attribute 1405 del tmpGrp.attrs[attr_name] 1406 if not attr_id: 1407 msg = "Unexpected error creating nullspace attribute" 1408 self.log.error(msg) 1409 raise IOError(errno.EIO, msg) 1410 else: 1411 if type(value) is tuple: 1412 value = list(value) 1413 if type(shape) is list: 1414 shape = tuple(shape) 1415 if not is_committed_type: 1416 # apparently committed types can not be used as reference types 1417 # todo - verify why that is 1418 1419 rank = len(shape) 1420 # convert python list to numpy object 1421 strPad = None 1422 strLength = 0 1423 if type(attr_type) == dict and attr_type['class'] == 'H5T_STRING' and "strPad" in attr_type: 1424 strPad = attr_type["strPad"] 1425 strLength = attr_type['length'] 1426 1427 if rank == 0 and type(strLength) == int and strPad == "H5T_STR_NULLTERM": 1428 self.makeNullTermStringAttribute(obj, attr_name, strLength, value) 1429 else: 1430 typeItem = getTypeItem(dt) 1431 value = self.toRef(rank, typeItem, value) 1432 1433 # create numpy array 1434 npdata = np.zeros(shape, dtype=dt) 1435 1436 if rank == 0: 1437 npdata[()] = self.toNumPyValue(attr_type, value, npdata[()]) 1438 else: 1439 self.toNumPyArray(rank, attr_type, value, npdata) 1440 1441 self.writeNdArrayToAttribute(obj.attrs, attr_name, npdata, shape, dt) 1442 1443 """ 1444 createAttribute - create an attribute 1445 """ 1446 def createAttribute(self, col_name, obj_uuid, attr_name, shape, attr_type, value): 1447 self.log.info("createAttribute: [" + attr_name + "]") 1448 1449 self.initFile() 1450 if self.readonly: 1451 msg = "Unable to create attribute (updates are not allowed)" 1452 self.log.info(msg) 1453 raise IOError(errno.EPERM, msg) 1454 obj = self.getObjectByUuid(col_name, obj_uuid) 1455 if not obj: 1456 msg = "Object with uuid: " + obj_uuid + " not found" 1457 self.log.info(msg) 1458 raise IOError(errno.ENXIO, msg) 1459 1460 if self.isDimensionList(attr_name, attr_type): 1461 self.makeDimensionList(obj, shape, value) 1462 elif self.isReferenceList(attr_name, attr_type): 1463 pass # Skip since reference list will be created by attach scale 1464 else: 1465 self.makeAttribute(obj, attr_name, shape, attr_type, value) 1466 1467 now = time.time() 1468 self.setCreateTime(obj_uuid, objType="attribute", name=attr_name, timestamp=now) 1469 self.setModifiedTime(obj_uuid, objType="attribute", name=attr_name, timestamp=now) 1470 self.setModifiedTime(obj_uuid, timestamp=now) # owner entity is modified 1471 1472 def deleteAttribute(self, col_name, obj_uuid, attr_name): 1473 self.initFile() 1474 if self.readonly: 1475 msg = "Unable to delete attribute (updates are not allowed)" 1476 self.log.info(msg) 1477 raise IOError(errno.EPERM, msg) 1478 obj = self.getObjectByUuid(col_name, obj_uuid) 1479 1480 if attr_name not in obj.attrs: 1481 msg = "Attribute with name: [" + attr_name + "] of object: " + obj_uuid + " not found" 1482 self.log.info(msg) 1483 raise IOError(errno.ENXIO, msg) 1484 1485 del obj.attrs[attr_name] 1486 now = time.time() 1487 self.setModifiedTime(obj_uuid, objType="attribute", name=attr_name, timestamp=now) 1488 1489 return True 1490 1491 """ 1492 Return a json-serializable representation of the numpy value 1493 """ 1494 def getDataValue(self, typeItem, value, dimension=0, dims=None): 1495 if dimension > 0: 1496 if type(dims) not in (list, tuple): 1497 msg = "unexpected type for type array dimensions" 1498 self.log.error(msg) 1499 raise IOError(errno.EIO, msg) 1500 out = [] 1501 rank = len(dims) 1502 if dimension > rank: 1503 msg = "unexpected dimension for type array" 1504 self.log.error(msg) 1505 raise IOError(errno.EIO, msg) 1506 nElements = dims[rank - dimension] 1507 for i in range(nElements): 1508 item_value = self.getDataValue(typeItem, value[i], 1509 dimension=(dimension-1), 1510 dims=dims) 1511 out.append(item_value) 1512 return out # done for array case 1513 1514 out = None 1515 typeClass = typeItem['class'] 1516 if isinstance(value, (np.ndarray, np.generic)): 1517 value = value.tolist() # convert numpy object to list 1518 if typeClass == 'H5T_COMPOUND': 1519 1520 if type(value) not in (list, tuple): 1521 msg = "Unexpected type for compound value" 1522 self.log.error(msg) 1523 raise IOError(errno.EIO, msg) 1524 1525 fields = typeItem['fields'] 1526 if len(fields) != len(value): 1527 msg = "Number of elements in compound type does not match type" 1528 self.log.error(msg) 1529 raise IOError(errno.EIO, msg) 1530 nFields = len(fields) 1531 out = [] 1532 for i in range(nFields): 1533 field = fields[i] 1534 item_value = self.getDataValue(field['type'], value[i]) 1535 out.append(item_value) 1536 elif typeClass == 'H5T_VLEN': 1537 if type(value) not in (list, tuple): 1538 msg = "Unexpected type for vlen value" 1539 self.log.error(msg) 1540 raise IOError(errno.EIO, msg) 1541 1542 baseType = typeItem['base'] 1543 out = [] 1544 nElements = len(value) 1545 for i in range(nElements): 1546 item_value = self.getDataValue(baseType, value[i]) 1547 out.append(item_value) 1548 elif typeClass == 'H5T_REFERENCE': 1549 out = self.refToList(value) 1550 elif typeClass == 'H5T_OPAQUE': 1551 out = "???" # todo 1552 elif typeClass == 'H5T_ARRAY': 1553 type_dims = typeItem["dims"] 1554 if type(type_dims) not in (list, tuple): 1555 msg = "unexpected type for type array dimensions" 1556 self.log.error(msg) 1557 raise IOError(errno.EIO, msg) 1558 rank = len(type_dims) 1559 baseType = typeItem['base'] 1560 out = self.getDataValue(baseType, value, dimension=rank, 1561 dims=type_dims) 1562 1563 elif typeClass in ('H5T_INTEGER', 'H5T_FLOAT', 'H5T_ENUM'): 1564 out = value # just copy value 1565 elif typeClass == 'H5T_STRING': 1566 if six.PY3: 1567 if "charSet" in typeItem: 1568 charSet = typeItem["charSet"] 1569 else: 1570 charSet = "H5T_CSET_ASCII" 1571 if charSet == "H5T_CSET_ASCII": 1572 out = value.decode("utf-8") 1573 else: 1574 out = value 1575 else: 1576 # things are simpler in PY2 1577 out = value 1578 else: 1579 msg = "Unexpected type class: " + typeClass 1580 self.log.info(msg) 1581 raise IOError(errno.ENINVAL, msg) 1582 return out 1583 1584 """ 1585 Return a numpy value based on json representation 1586 """ 1587 def getRefValue(self, typeItem, value): 1588 out = None 1589 typeClass = typeItem['class'] 1590 if typeClass == 'H5T_COMPOUND': 1591 1592 if type(value) not in (list, tuple): 1593 msg = "Unexpected type for compound value" 1594 self.log.error(msg) 1595 raise IOError(errno.EIO, msg) 1596 1597 fields = typeItem['fields'] 1598 if len(fields) != len(value): 1599 msg = "Number of elements in compound type does not match type" 1600 self.log.error(msg) 1601 raise IOError(errno.EIO, msg) 1602 nFields = len(fields) 1603 out = [] 1604 for i in range(nFields): 1605 field = fields[i] 1606 item_value = self.getRefValue(field['type'], value[i]) 1607 out.append(item_value) 1608 elif typeClass == 'H5T_VLEN': 1609 if type(value) not in (list, tuple): 1610 msg = "Unexpected type for vlen value" 1611 self.log.error(msg) 1612 raise IOError(errno.EIO, msg) 1613 1614 baseType = typeItem['base'] 1615 out = [] 1616 nElements = len(value) 1617 for i in range(nElements): 1618 item_value = self.getRefValue(baseType, value[i]) 1619 out.append(item_value) 1620 elif typeClass == 'H5T_REFERENCE': 1621 out = self.listToRef(value) 1622 elif typeClass == 'H5T_OPAQUE': 1623 out = "???" # todo 1624 elif typeClass == 'H5T_ARRAY': 1625 out = value 1626 elif typeClass in ('H5T_INTEGER', 'H5T_FLOAT', 'H5T_ENUM'): 1627 out = value # just copy value 1628 elif typeClass == 'H5T_STRING': 1629 if typeItem['charSet'] == 'H5T_CSET_UTF8': 1630 # out = value.encode('utf-8') 1631 out = value 1632 else: 1633 out = value.encode() 1634 else: 1635 msg = "Unexpected type class: " + typeClass 1636 self.log.info(msg) 1637 raise IOError(errno.ENINVAL, msg) 1638 1639 if type(out) == list: 1640 out = tuple(out) # convert to tuple 1641 return out 1642 1643 """ 1644 Return a numpy value based on json representation 1645 """ 1646 def toNumPyValue(self, typeItem, src, des): 1647 1648 typeClass = 'H5T_INTEGER' # default to int type 1649 if type(typeItem) is dict: 1650 typeClass = typeItem['class'] 1651 if typeClass == 'H5T_COMPOUND': 1652 fields = typeItem['fields'] 1653 if len(fields) != len(src): 1654 msg = "Number of elements in compound type does not match type" 1655 self.log.error(msg) 1656 raise IOError(errno.EIO, msg) 1657 nFields = len(fields) 1658 1659 for i in range(nFields): 1660 field = fields[i] 1661 field_name = field['name'] 1662 des[field_name] = src[i] 1663 1664 elif typeClass == 'H5T_VLEN': 1665 if type(src) not in (list, tuple): 1666 msg = "Unexpected type for vlen value" 1667 self.log.error(msg) 1668 raise IOError(errno.EIO, msg) 1669 1670 baseType = typeItem['base'] 1671 1672 dt = self.createTypeFromItem(baseType) 1673 des = np.array(src, dtype=dt) 1674 1675 elif typeClass == 'H5T_REFERENCE': 1676 des = src # self.listToRef(src) 1677 1678 elif typeClass == 'H5T_OPAQUE': 1679 des = "???" # todo 1680 elif typeClass == 'H5T_ARRAY': 1681 des = src 1682 elif typeClass in ('H5T_INTEGER', 'H5T_FLOAT', 'H5T_ENUM'): 1683 des = src # just copy value 1684 elif typeClass == 'H5T_STRING': 1685 if typeItem['charSet'] == 'H5T_CSET_UTF8': 1686 des = src # src.encode('utf-8') 1687 else: 1688 if type(src) is str: 1689 try: 1690 src.encode('ascii') 1691 except UnicodeDecodeError: 1692 raise TypeError("non-ascii value not allowed with H5T_CSET_ASCII") 1693 des = src 1694 1695 else: 1696 msg = "Unexpected type class: " + typeClass 1697 self.log.info(msg) 1698 raise IOError(errno.ENINVAL, msg) 1699 return des 1700 1701 """ 1702 copy src data to numpy array 1703 """ 1704 def toNumPyArray(self, rank, typeItem, src, des): 1705 1706 if rank == 0: 1707 msg = "unexpected rank value" 1708 self.log.error(msg) 1709 raise IOError(errno.EIO, msg) # shouldn't be called with rank 0 1710 1711 for i in range(len(des)): 1712 des_sec = des[i] # numpy slab 1713 1714 src_sec = src[i] 1715 1716 if rank > 1: 1717 self.toNumPyArray(rank - 1, typeItem, src_sec, des_sec) 1718 else: 1719 rv = self.toNumPyValue(typeItem, src_sec, des_sec) 1720 # if the numpy object is writeable, des_sec will be 1721 # already updated. Otherwise, update the des by assignment 1722 if not hasattr(des_sec, 'flags') or not des_sec.flags['WRITEABLE']: 1723 des[i] = rv 1724 1725 """ 1726 Convert json list to h5py compatible values 1727 """ 1728 def toRef(self, rank, typeItem, data): 1729 out = None 1730 1731 if type(typeItem) in (str, unicode): 1732 # commited type - get json representation 1733 committed_type_item = self.getCommittedTypeItemByUuid(typeItem) 1734 typeItem = committed_type_item['type'] 1735 1736 typeClass = typeItem['class'] 1737 if typeClass in ('H5T_INTEGER', 'H5T_FLOAT'): 1738 out = data # just use as is 1739 1740 elif rank == 0: 1741 # scalar value 1742 out = self.getRefValue(typeItem, data) 1743 else: 1744 out = [] 1745 for item in data: 1746 if rank > 1: 1747 out_item = self.toRef(rank - 1, typeItem, item) 1748 out.append(out_item) 1749 else: 1750 out_item = self.getRefValue(typeItem, item) 1751 out.append(out_item) 1752 1753 return out 1754 1755 """ 1756 Convert list to json serializable values. 1757 """ 1758 def toList(self, rank, typeItem, data): 1759 out = None 1760 typeClass = typeItem['class'] 1761 if typeClass in ('H5T_INTEGER', 'H5T_FLOAT'): 1762 out = data.tolist() # just use as is 1763 1764 elif rank == 0: 1765 # scalar value 1766 out = self.getDataValue(typeItem, data) 1767 else: 1768 out = [] 1769 for item in data: 1770 if rank > 1: 1771 out_item = self.toList(rank - 1, typeItem, item) 1772 out.append(out_item) 1773 else: 1774 out_item = self.getDataValue(typeItem, item) 1775 out.append(out_item) 1776 1777 return out 1778 1779 """ 1780 Create ascii representation of vlen data object 1781 """ 1782 def vlenToList(self, data): 1783 # todo - verify that data is a numpy.ndarray 1784 out = None 1785 if len(data.shape) == 0: 1786 out = [] 1787 else: 1788 try: 1789 if data.dtype.kind != 'O': 1790 out = data.tolist() 1791 else: 1792 out = [] 1793 for item in data: 1794 out.append(self.vlenToList(item)) # recursive call 1795 except AttributeError: 1796 # looks like this is not a numpy ndarray, just return the value 1797 out = data 1798 return out 1799 1800 """ 1801 Create ascii representation of ref data object 1802 """ 1803 def refToList(self, data): 1804 # todo - verify that data is a numpy.ndarray 1805 out = None 1806 if type(data) is h5py.h5r.Reference: 1807 if bool(data): 1808 grpref = self.f[data] 1809 addr = h5py.h5o.get_info(grpref.id).addr 1810 uuid = self.getUUIDByAddress(addr) 1811 if self.getGroupObjByUuid(uuid): 1812 out = "groups/" + uuid 1813 elif self.getDatasetObjByUuid(uuid): 1814 out = "datasets/" + uuid 1815 elif self.getCommittedTypeObjByUuid(uuid): 1816 out = "datatypes/" + uuid 1817 else: 1818 self.log.warning( 1819 "uuid in region ref not found: [" + uuid + "]") 1820 return None 1821 else: 1822 out = "null" 1823 elif type(data) is h5py.h5r.RegionReference: 1824 out = self.getRegionReference(data) 1825 else: 1826 out = [] 1827 for item in data: 1828 out.append(self.refToList(item)) # recursive call 1829 return out 1830 1831 """ 1832 Convert ascii representation of data references to data ref 1833 """ 1834 def listToRef(self, data): 1835 out = None 1836 if not data: 1837 # null reference 1838 out = self.getNullReference() 1839 elif type(data) in (bytes, str, unicode): 1840 obj_ref = None 1841 # object reference should be in the form: <collection_name>/<uuid> 1842 for prefix in ("datasets", "groups", "datatypes"): 1843 if data.startswith(prefix): 1844 uuid_ref = data[len(prefix):] 1845 if len(uuid_ref) == (UUID_LEN + 1) and uuid_ref.startswith('/'): 1846 obj = self.getObjectByUuid(prefix, uuid_ref[1:]) 1847 if obj: 1848 obj_ref = obj.ref 1849 else: 1850 msg = "Invalid object reference value: [" + uuid_ref + "] not found" 1851 self.log.info(msg) 1852 raise IOError(errno.ENXIO, msg) 1853 break 1854 if not obj_ref: 1855 msg = "Invalid object reference value: [" + data + "]" 1856 self.log.info(msg) 1857 raise IOError(errno.EINVAL, msg) 1858 else: 1859 out = obj_ref 1860 1861 elif type(data) in (list, tuple): 1862 out = [] 1863 for item in data: 1864 out.append(self.listToRef(item)) # recursive call 1865 elif type(data) == dict: 1866 # assume region ref 1867 out = self.createRegionReference(data) 1868 else: 1869 msg = "Invalid object reference value type: [" + str(type(data)) + "]" 1870 self.log.info(msg) 1871 raise IOError(errno.EINVAL, msg) 1872 return out 1873 1874 """ 1875 Convert list that may contain bytes type elements to list of string elements 1876 """ 1877 def bytesArrayToList(self, data): 1878 if type(data) in (bytes, str, unicode): 1879 is_list = False 1880 elif isinstance(data, (np.ndarray, np.generic)): 1881 if len(data.shape) == 0: 1882 is_list = False 1883 data = data.tolist() # tolist will return a scalar in this case 1884 if type(data) in (list, tuple): 1885 is_list = True 1886 else: 1887 is_list = False 1888 else: 1889 is_list = True 1890 elif type(data) in (list, tuple): 1891 is_list = True 1892 else: 1893 is_list = False 1894 1895 if is_list: 1896 out = [] 1897 for item in data: 1898 out.append(self.bytesArrayToList(item)) # recursive call 1899 elif type(data) is bytes: 1900 if six.PY3: 1901 out = data.decode("utf-8") 1902 else: 1903 out = data 1904 else: 1905 out = data 1906 1907 return out 1908 1909 """ 1910 Get item description of region reference value 1911 """ 1912 def getRegionReference(self, regionRef): 1913 selectionEnums = {h5py.h5s.SEL_NONE: 'H5S_SEL_NONE', 1914 h5py.h5s.SEL_ALL: 'H5S_SEL_ALL', 1915 h5py.h5s.SEL_POINTS: 'H5S_SEL_POINTS', 1916 h5py.h5s.SEL_HYPERSLABS: 'H5S_SEL_HYPERSLABS'} 1917 1918 item = {} 1919 objid = h5py.h5r.dereference(regionRef, self.f.file.file.id) 1920 if objid: 1921 item['id'] = self.getUUIDByAddress(h5py.h5o.get_info(objid).addr) 1922 else: 1923 self.log.info("region reference unable to find item with objid: " + objid) 1924 return item 1925 1926 sel = h5py.h5r.get_region(regionRef, objid) 1927 select_type = sel.get_select_type() 1928 if select_type not in selectionEnums: 1929 msg = "Unexpected selection type: " + regionRef.typecode 1930 self.log.error(msg) 1931 raise IOError(errno.EIO, msg) 1932 item['select_type'] = selectionEnums[select_type] 1933 pointlist = None 1934 if select_type == h5py.h5s.SEL_POINTS: 1935 # retrieve a numpy array of selection points 1936 points = sel.get_select_elem_pointlist() 1937 pointlist = points.tolist() 1938 elif select_type == h5py.h5s.SEL_HYPERSLABS: 1939 points = sel.get_select_hyper_blocklist() 1940 if points is not None: 1941 pointlist = points[...].tolist() 1942 # bump up the second coordinate by one to match api spec 1943 for point in pointlist: 1944 coord2 = point[1] 1945 for i in range(len(coord2)): 1946 coord2[i] = coord2[i] + 1 1947 1948 item['selection'] = pointlist 1949 1950 return item 1951 1952 """ 1953 Create region reference from item description of region reference value 1954 """ 1955 def createRegionReference(self, item): 1956 selectionEnums = {'H5S_SEL_NONE': h5py.h5s.SEL_NONE, 1957 'H5S_SEL_ALL': h5py.h5s.SEL_ALL, 1958 'H5S_SEL_POINTS': h5py.h5s.SEL_POINTS, 1959 'H5S_SEL_HYPERSLABS': h5py.h5s.SEL_HYPERSLABS} 1960 region_ref = None 1961 1962 if 'select_type' not in item: 1963 msg = "select_type not provided for region selection" 1964 self.log.info(msg) 1965 raise IOError(errno.EINVAL, msg) 1966 select_type = item['select_type'] 1967 if select_type not in selectionEnums.keys(): 1968 msg = "selection type: [" + select_type + "] is not valid" 1969 self.log.info(msg) 1970 raise IOError(errno.EINVAL, msg) 1971 dset = None 1972 if select_type == 'H5S_SEL_NONE': 1973 if 'id' not in item: 1974 # select none on null dataset, return null ref 1975 out = self.getNullReference() 1976 return out 1977 else: # select_type != 'H5S_SEL_NONE' 1978 if 'id' not in item: 1979 msg = "id not provided for region selection" 1980 self.log.info(msg) 1981 raise IOError(errno.EINVAL, msg) 1982 1983 # Otherwise need to provide uuid of dataset 1984 uuid_ref = item['id'] 1985 if len(uuid_ref) != UUID_LEN: 1986 msg = "uuid value: [" + uuid_ref + "] for region reference is not valid" 1987 self.log.info(msg) 1988 raise IOError(errno.EINVAL, msg) 1989 1990 obj = self.getObjectByUuid("datasets", uuid_ref) 1991 if obj: 1992 dset = obj 1993 else: 1994 msg = "Invalid region refence value: [" + uuid_ref + "] not found" 1995 self.log.info(msg) 1996 raise IOError(errno.EINVAL, msg) 1997 1998 if select_type in ('H5S_SEL_POINTS', 'H5S_SEL_HYPERSLABS'): 1999 if 'selection' not in item: 2000 msg = "selection key not provided for region selection" 2001 self.log.info(msg) 2002 raise IOError(errno.EINVAL, msg) 2003 2004 rank = len(dset.shape) 2005 space_id = h5py.h5d.DatasetID.get_space(dset.id) 2006 h5py.h5s.SpaceID.select_none(space_id) 2007 2008 if select_type == 'H4S_SEL_NONE': 2009 pass # did select_none above 2010 elif select_type == 'H5S_SEL_ALL': 2011 h5py.h5s.SpaceID.select_all(space_id) 2012 elif select_type == 'H5S_SEL_POINTS': 2013 selection = item['selection'] 2014 for point in selection: 2015 if len(point) != rank: 2016 msg = "point selection number of elements must mach rank of referenced dataset" 2017 self.log.info(msg) 2018 raise IOError(errno.EINVAL, msg) 2019 h5py.h5s.SpaceID.select_elements(space_id, selection) 2020 elif select_type == 'H5S_SEL_HYPERSLABS': 2021 selection = item['selection'] 2022 2023 for slab in selection: 2024 # each item should be a two element array defining the hyperslab boundary 2025 if len(slab) != 2: 2026 msg = "selection value not valid (not a 2 element array)" 2027 self.log.info(msg) 2028 raise IOError(errno.EINVAL, msg) 2029 start = slab[0] 2030 if type(start) == list: 2031 start = tuple(start) 2032 if type(start) is not tuple or len(start) != rank: 2033 msg = "selection value not valid, start element should have number " 2034 msg += "elements equal to rank of referenced dataset" 2035 self.log.info(msg) 2036 raise IOError(errno.EINVAL, msg) 2037 stop = slab[1] 2038 if type(stop) == list: 2039 stop = tuple(stop) 2040 if type(stop) is not tuple or len(stop) != rank: 2041 msg = "selection value not valid, count element should have number " 2042 msg += "elements equal to rank of referenced dataset" 2043 self.log.info(msg) 2044 raise IOError(errno.EINVAL, msg) 2045 count = [] 2046 for i in range(rank): 2047 if start[i] < 0: 2048 msg = "start value for hyperslab selection must be non-negative" 2049 self.log.info(msg) 2050 raise IOError(errno.EINVAL, msg) 2051 if stop[i] <= start[i]: 2052 msg = "stop value must be greater than start value for hyperslab selection" 2053 self.log.info(msg) 2054 raise IOError(errno.EINVAL, msg) 2055 count.append(stop[i] - start[i]) 2056 count = tuple(count) 2057 2058 h5py.h5s.SpaceID.select_hyperslab(space_id, start, count, op=h5py.h5s.SELECT_OR) 2059 2060 # now that we've selected the desired region in the space, return a region reference 2061 2062 if six.PY3: 2063 dset_name = dset.name.encode('utf-8') 2064 else: 2065 dset_name = dset.name 2066 region_ref = h5py.h5r.create(self.f.id, dset_name, h5py.h5r.DATASET_REGION, space_id) 2067 2068 return region_ref 2069 2070 """ 2071 Convert a list to a tuple, recursively. 2072 Example. [[1,2],[3,4]] -> ((1,2),(3,4)) 2073 """ 2074 def toTuple(self, rank, data): 2075 if type(data) in (list, tuple): 2076 if rank > 0: 2077 return list(self.toTuple(rank-1, x) for x in data) 2078 else: 2079 return tuple(self.toTuple(rank-1, x) for x in data) 2080 else: 2081 return data 2082 2083 """ 2084 Get values from dataset identified by obj_uuid. 2085 If a slices list or tuple is provided, it should have the same 2086 number of elements as the rank of the dataset. 2087 """ 2088 def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"): 2089 dset = self.getDatasetObjByUuid(obj_uuid) 2090 if format not in ("json", "binary"): 2091 msg = "only json and binary formats are supported" 2092 self.log.info(msg) 2093 raise IOError(errno.EINVAL, msg) 2094 2095 if dset is None: 2096 msg = "Dataset: " + obj_uuid + " not found" 2097 self.log.info(msg) 2098 raise IOError(errno.ENXIO, msg) 2099 2100 values = None 2101 dt = dset.dtype 2102 typeItem = getTypeItem(dt) 2103 itemSize = getItemSize(typeItem) 2104 if itemSize == "H5T_VARIABLE" and format == "binary": 2105 msg = "Only JSON is supported for for this data type" 2106 self.log.info(msg) 2107 raise IOError(errno.EINVAL, msg) 2108 2109 if dset.shape is None: 2110 # null space dataset (with h5py 2.6.0) 2111 return None 2112 2113 rank = len(dset.shape) 2114 2115 if rank == 0: 2116 # check for null dataspace 2117 try: 2118 val = dset[...] 2119 except IOError: 2120 # assume null dataspace, return none 2121 return None 2122 if val is None: 2123 self.log.warning("no value returned from scalar dataset") 2124 2125 if type(slices) != list and type(slices) != tuple and slices is not Ellipsis: 2126 msg = "Unexpected error: getDatasetValuesByUuid: bad type for dim parameter" 2127 self.log.error(msg) 2128 raise IOError(errno.EIO, msg) 2129 2130 if (type(slices) == list or type(slices) == tuple) and len(slices) != rank: 2131 msg = "Unexpected error: getDatasetValuesByUuid: number of dims in selection not same as rank" 2132 self.log.error(msg) 2133 raise IOError(errno.EIO, msg) 2134 2135 if dt.kind == 'O': 2136 if format != "json": 2137 msg = "Only JSON is supported for for this data type" 2138 self.log.info(msg) 2139 raise IOError(errno.EINVAL, msg) 2140 # numpy object type - could be a vlen string or generic vlen 2141 h5t_check = h5py.h5t.check_dtype(vlen=dt) 2142 if h5t_check == str or h5t_check == unicode: 2143 values = dset[slices].tolist() # just dump to list 2144 elif six.PY3 and h5t_check == bytes: 2145 values = self.bytesArrayToList(dset[slices]) 2146 elif h5t_check is not None: 2147 # other vlen data 2148 values = self.vlenToList(dset[slices]) 2149 else: 2150 # check for reference type 2151 h5t_check = h5py.h5t.check_dtype(ref=dt) 2152 if h5t_check is not None: 2153 # reference type 2154 values = self.refToList(dset[slices]) 2155 else: 2156 msg = "Unexpected error, object type unknown" 2157 self.log.error(msg) 2158 raise IOError(errno.EIO, msg) 2159 elif dt.kind == 'V' and len(dt) <= 1 and len(dt.shape) == 0: 2160 # opaque type - skip for now 2161 self.log.warning("unable to get opaque type values") 2162 values = "????" 2163 elif dt.kind == 'S' and format == "json" and six.PY3: 2164 values = self.bytesArrayToList(dset[slices]) 2165 elif len(dt) > 1: 2166 # compound type 2167 if format == "json": 2168 values = self.bytesArrayToList(dset[slices]) 2169 else: 2170 values = dset[slices].tobytes() 2171 else: 2172 values = dset[slices] 2173 2174 # just use tolist to dump 2175 if format == "json": 2176 values = values.tolist() 2177 else: 2178 #values = base64.b64encode(dset[slices].tobytes()) 2179 values = values.tobytes() 2180 2181 return values 2182 2183 """ 2184 doDatasetQueryByUuid: return rows based on query string 2185 Return rows from a dataset that matches query string. 2186 2187 Note: Only supported for compound_type/one-dimensional datasets 2188 """ 2189 def doDatasetQueryByUuid(self, obj_uuid, query, start=0, stop=-1, step=1, limit=None): 2190 self.log.info("doQueryByUuid - uuid: " + obj_uuid + " query:" + query) 2191 self.log.info("start: " + str(start) + " stop: " + str(stop) + " step: " + str(step) + " limit: " + str(limit)) 2192 dset = self.getDatasetObjByUuid(obj_uuid) 2193 if dset is None: 2194 msg = "Dataset: " + obj_uuid + " not found" 2195 self.log.info(msg) 2196 raise IOError(errno.ENXIO, msg) 2197 2198 values = [] 2199 dt = dset.dtype 2200 typeItem = getTypeItem(dt) 2201 itemSize = getItemSize(typeItem) 2202 if typeItem['class'] != "H5T_COMPOUND": 2203 msg = "Only compound type datasets can be used as query target" 2204 self.log.info(msg) 2205 raise IOError(errno.EINVAL, msg) 2206 2207 if dset.shape is None: 2208 # null space dataset (with h5py 2.6.0) 2209 return None 2210 2211 rank = len(dset.shape) 2212 if rank != 1: 2213 msg = "One one-dimensional datasets can be used as query target" 2214 self.log.info(msg) 2215 raise IOError(errno.EINVAL, msg) 2216 2217 2218 values = [] 2219 indexes = [] 2220 count = 0 2221 2222 num_elements = dset.shape[0] 2223 if stop == -1: 2224 stop = num_elements 2225 elif stop > num_elements: 2226 stop = num_elements 2227 block_size = self._getBlockSize(dset) 2228 self.log.info("block_size: " + str(block_size)) 2229 2230 field_names = list(dset.dtype.fields.keys()) 2231 eval_str = self._getEvalStr(query, field_names) 2232 2233 while start < stop: 2234 if limit and (count == limit): 2235 break # no more rows for this batch 2236 end = start + block_size 2237 if end > stop: 2238 end = stop 2239 rows = dset[start:end] # read from dataset 2240 where_result = np.where(eval(eval_str)) 2241 index = where_result[0].tolist() 2242 if len(index) > 0: 2243 for i in index: 2244 row = rows[i] 2245 item = self.bytesArrayToList(row) 2246 values.append(item) 2247 indexes.append(start + i) 2248 count += 1 2249 if limit and (count == limit): 2250 break # no more rows for this batch 2251 2252 start = end # go to next block 2253 2254 2255 # values = self.getDataValue(item_type, values, dimension=1, dims=(len(values),)) 2256 2257 self.log.info("got " + str(count) + " query matches") 2258 return (indexes, values) 2259 2260 """ 2261 _getBlockSize: Get number of rows to read from disk 2262 2263 heurestic to get reasonable sized chunk of data to fetch. 2264 make multiple of chunk_size if possible 2265 """ 2266 def _getBlockSize(self, dset): 2267 target_block_size = 256 * 1000 2268 if dset.chunks: 2269 chunk_size = dset.chunks[0] 2270 if chunk_size < target_block_size: 2271 block_size = (target_block_size // chunk_size) * chunk_size 2272 else: 2273 block_size = target_block_size 2274 else: 2275 block_size = target_block_size 2276 return block_size 2277 2278 """ 2279 _getEvalStr: Get eval string for given query 2280 2281 Gets Eval string to use with numpy where method. 2282 """ 2283 def _getEvalStr(self, query, field_names): 2284 i = 0 2285 eval_str = "" 2286 var_name = None 2287 end_quote_char = None 2288 var_count = 0 2289 paren_count = 0 2290 black_list = ( "import", ) # field names that are not allowed 2291 self.log.info("getEvalStr(" + query + ")") 2292 for item in black_list: 2293 if item in field_names: 2294 msg = "invalid field name" 2295 self.log.info("EINVAL: " + msg) 2296 raise IOError(errno.EINVAL, msg) 2297 while i < len(query): 2298 ch = query[i] 2299 if (i+1) < len(query): 2300 ch_next = query[i+1] 2301 else: 2302 ch_next = None 2303 if var_name and not ch.isalnum(): 2304 # end of variable 2305 if var_name not in field_names: 2306 # invalid 2307 msg = "unknown field name" 2308 self.log.info("EINVAL: " + msg) 2309 raise IOError(errno.EINVAL, msg) 2310 eval_str += "rows['" + var_name + "']" 2311 var_name = None 2312 var_count += 1 2313 2314 if end_quote_char: 2315 if ch == end_quote_char: 2316 # end of literal 2317 end_quote_char = None 2318 eval_str += ch 2319 elif ch in ("'", '"'): 2320 end_quote_char = ch 2321 eval_str += ch 2322 elif ch.isalpha(): 2323 if ch == 'b' and ch_next in ("'", '"'): 2324 eval_str += 'b' # start of a byte string literal 2325 elif var_name is None: 2326 var_name = ch # start of a variable 2327 else: 2328 var_name += ch 2329 elif ch == '(' and end_quote_char is None: 2330 paren_count += 1 2331 eval_str += ch 2332 elif ch == ')' and end_quote_char is None: 2333 paren_count -= 1 2334 if paren_count < 0: 2335 msg = "Mismatched paren" 2336 self.log.info("EINVAL: " + msg) 2337 raise IOError(errno.EINVAL, msg) 2338 eval_str += ch 2339 else: 2340 # just add to eval_str 2341 eval_str += ch 2342 i = i+1 2343 if end_quote_char: 2344 msg = "no matching quote character" 2345 self.log.info("EINVAL: " + msg) 2346 raise IOError(errno.EINVAL, msg) 2347 if var_count == 0: 2348 msg = "No field value" 2349 self.log.info("EINVAL: " + msg) 2350 raise IOError(errno.EINVAL, msg) 2351 if paren_count != 0: 2352 msg = "Mismatched paren" 2353 self.log.info("EINVAL: " + msg) 2354 raise IOError(errno.EINVAL, msg) 2355 2356 return eval_str 2357 2358 """ 2359 Get values from dataset identified by obj_uuid using the given 2360 point selection. 2361 """ 2362 def getDatasetPointSelectionByUuid(self, obj_uuid, points): 2363 dset = self.getDatasetObjByUuid(obj_uuid) 2364 if dset is None: 2365 msg = "Dataset: " + obj_uuid + " not found" 2366 self.log.info(msg) 2367 raise IOError(errno.ENXIO, msg) 2368 2369 rank = len(dset.shape) 2370 values = np.zeros(len(points), dtype=dset.dtype) 2371 try: 2372 i = 0 2373 for point in points: 2374 if rank == 1: 2375 values[i] = dset[[point]] 2376 else: 2377 values[i] = dset[tuple(point)] 2378 i += 1 2379 except ValueError: 2380 # out of range error 2381 msg = "getDatasetPointSelection, out of range error" 2382 self.log.info(msg) 2383 raise IOError(errno.EINVAL, msg) 2384 return values.tolist() 2385 2386 """ 2387 setDatasetValuesByUuid - update the given dataset values with supplied data 2388 and optionally a hyperslab selection (slices) 2389 """ 2390 def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"): 2391 dset = self.getDatasetObjByUuid(obj_uuid) 2392 2393 if format not in ("json", "binary"): 2394 msg = "only json and binary formats are supported" 2395 self.log.info(msg) 2396 raise IOError(errno.EINVAL, msg) 2397 2398 if format == "binary" and type(data) is not bytes: 2399 msg ="data must be of type bytes for binary writing" 2400 self.log.info(msg) 2401 raise IOError(errno.EINVAL, msg) 2402 2403 if dset is None: 2404 msg = "Dataset: " + obj_uuid + " not found" 2405 self.log.info(msg) 2406 raise IOError(errno.ENXIO, msg) 2407 2408 dt = dset.dtype 2409 typeItem = getTypeItem(dt) 2410 itemSize = getItemSize(typeItem) 2411 rank = len(dset.shape) 2412 arraySize = 1 2413 for extent in dset.shape: 2414 arraySize *= arraySize 2415 2416 if itemSize == "H5T_VARIABLE" and format == "binary": 2417 msg = "Only JSON is supported for for this data type" 2418 self.log.info(msg) 2419 raise IOError(errno.EINVAL, msg) 2420 2421 if slices is None: 2422 slices = [] 2423 # create selection that covers entire dataset 2424 for dim in range(rank): 2425 s = slice(0, dset.shape[dim], 1) 2426 slices.append(s) 2427 slices = tuple(slices) 2428 2429 2430 if type(slices) != tuple: 2431 msg = "setDatasetValuesByUuid: bad type for dim parameter" 2432 self.log.error(msg) 2433 raise IOError(erno.EIO, msg) 2434 2435 2436 if len(slices) != rank: 2437 msg = "number of dims in selection not same as rank" 2438 self.log.info(msg) 2439 raise IOError(errno.EINVAL, msg) 2440 2441 npoints = 1 2442 np_shape = [] 2443 for i in range(rank): 2444 s = slices[i] 2445 2446 if s.start < 0 or s.step <= 0 or s.stop < s.start: 2447 msg = "invalid slice specification" 2448 self.log.info(msg) 2449 raise IOError(errno.EINVAL, msg) 2450 if s.stop > dset.shape[i]: 2451 msg = "invalid slice specification" 2452 self.log.info(msg) 2453 raise IOError(errno.EINVAL, msg) 2454 np_shape.append(s.stop - s.start) 2455 2456 count = (s.stop - s.start) // s.step 2457 if count <= 0: 2458 msg = "invalid slice specification" 2459 self.log.info(msg) 2460 raise IOError(errno.EINVAL, msg) 2461 2462 npoints *= count 2463 2464 np_shape = tuple(np_shape) # for comparison with ndarray shape 2465 2466 self.log.info("selection shape:" + str(np_shape)) 2467 2468 2469 # need some special conversion for compound types -- 2470 # each element must be a tuple, but the JSON decoder 2471 # gives us a list instead. 2472 if format != "binary" and len(dset.dtype) > 1 and type(data) in (list, tuple): 2473 data = self.toTuple(rank, data) 2474 #for i in range(len(data)): 2475 # converted_data.append(self.toTuple(data[i])) 2476 #data = converted_data 2477 else: 2478 h5t_check = h5py.check_dtype(ref=dset.dtype) 2479 if h5t_check in (h5py.Reference, h5py.RegionReference): 2480 # convert data to data refs 2481 if format == "binary": 2482 msg = "Only JSON is supported for for this data type" 2483 self.log.info(msg) 2484 raise IOError(errno.EINVAL, msg) 2485 data = self.listToRef(data) 2486 2487 if format == "binary": 2488 if npoints*itemSize != len(data): 2489 msg = "Expected: " + str(npoints*itemSize) + " bytes, but got: " + str(len(data)) 2490 self.log.info(msg) 2491 raise IOError(errno.EINVAL, msg) 2492 if dset.dtype.shape == (): 2493 arr = np.fromstring(data, dtype=dset.dtype) 2494 arr = arr.reshape(np_shape) # conform to selection shape 2495 else: 2496 # tricy array type! 2497 arr = np.empty(np_shape, dtype=dset.dtype) 2498 base_arr = np.fromstring(data, dtype=dset.dtype.base) 2499 base_shape = list(np_shape) 2500 base_shape.extend(dset.dtype.shape) # add on the type dimensions 2501 base_arr = base_arr.reshape(base_shape) 2502 arr[...] = base_arr 2503 else: 2504 # data is json 2505 if npoints == 1 and len(dset.dtype) > 1: 2506 # convert to tuple for compound singleton writes 2507 data = [tuple(data),] 2508 2509 arr = np.array(data, dtype=dset.dtype) 2510 # raise an exception of the array shape doesn't match the selection shape 2511 # allow if the array is a scalar and the selection shape is one element, 2512 # numpy is ok with this 2513 np_index = 0 2514 for dim in range(len(arr.shape)): 2515 data_extent = arr.shape[dim] 2516 selection_extent = 1 2517 if np_index < len(np_shape): 2518 selection_extent = np_shape[np_index] 2519 if selection_extent == data_extent: 2520 np_index += 1 2521 continue # good 2522 if data_extent == 1: 2523 continue # skip singleton selection 2524 if selection_extent == 1: 2525 np_index += 1 2526 continue # skip singleton selection 2527 2528 # selection/data mismatch! 2529 msg = "data shape doesn't match selection shape" 2530 msg += "--data shape: " + str(arr.shape) 2531 msg += "--selection shape: " + str(np_shape) 2532 2533 self.log.info(msg) 2534 raise IOError(errno.EINVAL, msg) 2535 2536 # write temp numpy array to dataset 2537 if rank == 1: 2538 s = slices[0] 2539 try: 2540 dset[s] = arr 2541 except TypeError as te: 2542 self.log.info("h5py setitem exception: " + str(te)) 2543 raise IOError(errno.EINVAL, str(te)) 2544 else: 2545 try: 2546 dset[slices] = arr 2547 except TypeError as te: 2548 self.log.info("h5py setitem exception: " + str(te)) 2549 raise IOError(errno.EINVAL, str(te)) 2550 2551 # update modified time 2552 self.setModifiedTime(obj_uuid) 2553 return True 2554 2555 """ 2556 setDatasetValuesByPointSelection - Update the dataset values using the given 2557 data and point selection 2558 """ 2559 def setDatasetValuesByPointSelection(self, obj_uuid, data, points, format="json"): 2560 dset = self.getDatasetObjByUuid(obj_uuid) 2561 2562 if format not in ("json", "binary"): 2563 msg = "only json and binary formats are supported" 2564 self.log.info(msg) 2565 raise IOError(errno.EINVAL, msg) 2566 2567 if format == "binary" and type(data) is not bytes: 2568 msg ="data must be of type bytes for binary writing" 2569 self.log.info(msg) 2570 raise IOError(errno.EINVAL, msg) 2571 2572 if dset is None: 2573 msg = "Dataset: " + obj_uuid + " not found" 2574 self.log.info(msg) 2575 raise IOError(errno.ENXIO, msg) 2576 2577 dt = dset.dtype 2578 typeItem = getTypeItem(dt) 2579 itemSize = getItemSize(typeItem) 2580 if itemSize == "H5T_VARIABLE" and format == "binary": 2581 msg = "Only JSON is supported for for this data type" 2582 self.log.info(msg) 2583 raise IOError(errno.EINVAL, msg) 2584 2585 rank = len(dset.shape) 2586 2587 # need some special conversion for compound types -- 2588 # each element must be a tuple, but the JSON decoder 2589 # gives us a list instead. 2590 if format == "json" and len(dset.dtype) > 1 and type(data) in (list, tuple): 2591 converted_data = self.toTuple(rank, data) 2592 #for i in range(len(data)): 2593 # converted_data.append(self.toTuple(data[i])) 2594 #data = converted_data 2595 2596 if format == "json": 2597 2598 try: 2599 i = 0 2600 for point in points: 2601 if rank == 1: 2602 dset[[point]] = data[i] 2603 else: 2604 dset[tuple(point)] = data[i] 2605 i += 1 2606 except ValueError: 2607 # out of range error 2608 msg = "setDatasetValuesByPointSelection, out of range error" 2609 self.log.info(msg) 2610 raise IOError(errno.EINVAL, msg) 2611 2612 else: 2613 #binary 2614 arr = np.fromstring(data, dtype=dset.dtype) 2615 dset[points] = arr # coordinate write 2616 2617 # update modified time 2618 self.setModifiedTime(obj_uuid) 2619 return True 2620 2621 """ 2622 createDataset - creates new dataset given shape and datatype 2623 Returns item 2624 """ 2625 def createDataset(self, datatype, datashape, max_shape=None, 2626 creation_props=None, obj_uuid=None): 2627 self.initFile() 2628 if self.readonly: 2629 msg = "Unable to create dataset (Updates are not allowed)" 2630 self.log.info(msg) 2631 raise IOError(errno.EPERM, msg) 2632 datasets = self.dbGrp["{datasets}"] 2633 if not obj_uuid: 2634 obj_uuid = str(uuid.uuid1()) 2635 dt = None 2636 item = {} 2637 2638 # h5py.createdataset fields 2639 kwargs = {} # key word arguments for h5py dataset creation 2640 2641 fillvalue = None 2642 2643 if creation_props is None: 2644 creation_props = {} # create empty list for convience 2645 2646 if creation_props: 2647 if "fillValue" in creation_props: 2648 fillvalue = creation_props["fillValue"] 2649 if "trackTimes" in creation_props: 2650 kwargs['track_times'] = creation_props["trackTimes"] 2651 if "layout" in creation_props: 2652 layout = creation_props["layout"] 2653 if "dims" in layout: 2654 kwargs['chunks'] = tuple(layout["dims"]) 2655 if "filters" in creation_props: 2656 filter_props = creation_props["filters"] 2657 for filter_prop in filter_props: 2658 if "id" not in filter_prop: 2659 msg = "filter id not provided" 2660 self.log.info(msg) 2661 raise IOError(errno.EINVAL, msg) 2662 filter_id = filter_prop["id"] 2663 if filter_id not in _HDF_FILTERS: 2664 self.log.info("unknown filter id: " + str(filter_id) + " ignoring") 2665 continue 2666 2667 hdf_filter = _HDF_FILTERS[filter_id] 2668 2669 self.log.info("got filter: " + str(filter_id)) 2670 if "alias" not in hdf_filter: 2671 self.log.info("unsupported filter id: " + str(filter_id) + " ignoring") 2672 continue 2673 2674 filter_alias = hdf_filter["alias"] 2675 if not h5py.h5z.filter_avail(filter_id): 2676 self.log.info("compression filter not available, filter: " + filter_alias + " will be ignored") 2677 continue 2678 if filter_alias in _H5PY_COMPRESSION_FILTERS: 2679 if kwargs.get('compression'): 2680 self.log.info("compression filter already set, filter: " + filter_alias + " will be ignored") 2681 continue 2682 2683 kwargs['compression'] = filter_alias 2684 self.log.info("setting compression filter to: " + kwargs['compression']) 2685 if filter_alias == "gzip": 2686 # check for an optional compression value 2687 if "level" in filter_prop: 2688 kwargs['compression_opts'] = filter_prop["level"] 2689 elif filter_alias == "szip": 2690 bitsPerPixel = None 2691 coding = 'nn' 2692 2693 if "bitsPerPixel" in filter_prop: 2694 bitsPerPixel = filter_prop["bitsPerPixel"] 2695 if "coding" in filter_prop: 2696 if filter_prop["coding"] == "H5_SZIP_EC_OPTION_MASK": 2697 coding = 'ec' 2698 elif filter_prop["coding"] == "H5_SZIP_NN_OPTION_MASK": 2699 coding = 'nn' 2700 else: 2701 msg = "invalid szip option: 'coding'" 2702 self.log.info(msg) 2703 raise IOError(errno.EINVAL, msg) 2704 # note: pixelsPerBlock, and pixelsPerScanline not supported by h5py, 2705 # so these options will be ignored 2706 if "pixelsPerBlock" in filter_props: 2707 self.log.info("ignoring szip option: 'pixelsPerBlock'") 2708 if "pixelsPerScanline" in filter_props: 2709 self.log.info("ignoring szip option: 'pixelsPerScanline'") 2710 if bitsPerPixel: 2711 kwargs['compression_opts'] = (coding, bitsPerPixel) 2712 else: 2713 if filter_alias == "shuffle": 2714 kwargs['shuffle'] = True 2715 elif filter_alias == "fletcher32": 2716 kwargs['fletcher32'] = True 2717 elif filter_alias == "scaleoffset": 2718 if "scaleOffset" not in filter_prop: 2719 msg = "No scale_offset provided for scale offset filter" 2720 self.log(msg) 2721 raise IOError(errno.EINVAL, msg) 2722 kwargs['scaleoffset'] = filter_prop["scaleOffset"] 2723 else: 2724 self.log.info("Unexpected filter name: " + filter_alias + " , ignoring") 2725 2726 dt_ref = self.createTypeFromItem(datatype) 2727 if dt_ref is None: 2728 msg = 'Unexpected error, no type returned' 2729 self.log.error(msg) 2730 raise IOError(errno.EIO, msg) 2731 2732 dt = dt_ref 2733 if hasattr(dt_ref, 'dtype'): 2734 # dt_ref is actualy a handle to a committed type 2735 # get the dtype prop, but use dt_ref for the actual dataset creation 2736 dt = dt_ref.dtype 2737 2738 if fillvalue and len(dt) > 1 and type(fillvalue) in (list, tuple): 2739 # for compound types, need to convert from list to dataset compatible element 2740 2741 if len(dt) != len(fillvalue): 2742 msg = 'fillvalue has incorrect number of elements' 2743 self.log.info(msg) 2744 raise IOError(errno.EINVAL, msg) 2745 ndscalar = np.zeros((), dtype=dt) 2746 for i in range(len(fillvalue)): 2747 field = dt.names[i] 2748 ndscalar[field] = self.toTuple(0, fillvalue[i]) 2749 fillvalue = ndscalar 2750 2751 if fillvalue: 2752 kwargs['fillvalue'] = fillvalue 2753 2754 dataset_id = None 2755 if datashape is None: 2756 # create null space dataset 2757 # null space datasets not supported in h5py yet: 2758 # See: https://github.com/h5py/h5py/issues/279 2759 # work around this by using low-level interface. 2760 # first create a temp scalar dataset so we can pull out the typeid 2761 tmpGrp = None 2762 if "{tmp}" not in self.dbGrp: 2763 tmpGrp = self.dbGrp.create_group("{tmp}") 2764 else: 2765 tmpGrp = self.dbGrp["{tmp}"] 2766 tmpDataset = tmpGrp.create_dataset(obj_uuid, shape=(1,), 2767 dtype=dt_ref) 2768 tid = tmpDataset.id.get_type() 2769 sid = sid = h5py.h5s.create(h5py.h5s.NULL) 2770 # now create the permanent dataset 2771 gid = datasets.id 2772 if six.PY3: 2773 b_obj_uuid = obj_uuid.encode('utf-8') 2774 dataset_id = h5py.h5d.create(gid, b_obj_uuid, tid, sid) 2775 else: 2776 dataset_id = h5py.h5d.create(gid, obj_uuid, tid, sid) 2777 # delete the temp dataset 2778 del tmpGrp[obj_uuid] 2779 else: 2780 2781 # create the dataset 2782 2783 try: 2784 newDataset = datasets.create_dataset( 2785 obj_uuid, shape=datashape, maxshape=max_shape, 2786 dtype=dt_ref, **kwargs) 2787 except ValueError as ve: 2788 msg = "Unable to create dataset" 2789 try: 2790 msg += ": " + ve.message 2791 except AttributeError: 2792 pass # no message 2793 self.log.info(msg) 2794 raise IOError(errno.EINVAL, msg) # assume this is due to invalid params 2795 2796 if newDataset: 2797 dataset_id = newDataset.id 2798 2799 if dataset_id is None: 2800 msg = 'Unexpected failure to create dataset' 2801 self.log.error(msg) 2802 raise IOError(errno.EIO, msg) 2803 # store reverse map as an attribute 2804 addr = h5py.h5o.get_info(dataset_id).addr 2805 addrGrp = self.dbGrp["{addr}"] 2806 addrGrp.attrs[str(addr)] = obj_uuid 2807 2808 # save creation props if any 2809 if creation_props: 2810 self.setDatasetCreationProps(obj_uuid, creation_props) 2811 2812 # set timestamp 2813 now = time.time() 2814 self.setCreateTime(obj_uuid, timestamp=now) 2815 self.setModifiedTime(obj_uuid, timestamp=now) 2816 2817 item['id'] = obj_uuid 2818 if self.update_timestamps: 2819 item['ctime'] = self.getCreateTime(obj_uuid) 2820 item['mtime'] = self.getModifiedTime(obj_uuid) 2821 item['attributeCount'] = 0 2822 return item 2823 2824 """ 2825 Resize existing Dataset 2826 """ 2827 def resizeDataset(self, obj_uuid, shape): 2828 self.log.info("resizeDataset(") # + obj_uuid + "): ") # + str(shape)) 2829 self.initFile() 2830 if self.readonly: 2831 msg = "Unable to resize dataset (Updates are not allowed)" 2832 self.log.info(msg) 2833 raise IOError(errno.EACESS, msg) 2834 dset = self.getDatasetObjByUuid(obj_uuid) # will throw exception if not found 2835 if len(shape) != len(dset.shape): 2836 msg = "Unable to resize dataset, shape has wrong number of dimensions" 2837 self.log.info(msg) 2838 raise IOError(errno.EINVAL, msg) 2839 for i in range(len(shape)): 2840 if shape[i] < dset.shape[i]: 2841 msg = "Unable to resize dataset, cannot make extent smaller" 2842 self.log.info(msg) 2843 raise IOError(errno.EINVAL, msg) 2844 if dset.maxshape[i] != None and shape[i] > dset.maxshape[i]: 2845 msg = "Unable to resize dataset, max extent exceeded" 2846 self.log.info(msg) 2847 raise IOError(errno.EINVAL, msg) 2848 2849 dset.resize(shape) # resize 2850 2851 # update modified time 2852 self.setModifiedTime(obj_uuid) 2853 2854 """ 2855 Check if link points to given target (as a HardLink) 2856 """ 2857 def isObjectHardLinked(self, parentGroup, targetGroup, linkName): 2858 try: 2859 linkObj = parentGroup.get(linkName, None, False, True) 2860 linkClass = linkObj.__class__.__name__ 2861 except TypeError: 2862 # UDLink? Ignore for now 2863 return False 2864 if linkClass == 'SoftLink': 2865 return False 2866 elif linkClass == 'ExternalLink': 2867 return False 2868 elif linkClass == 'HardLink': 2869 if parentGroup[linkName] == targetGroup: 2870 return True 2871 else: 2872 self.log.warning("unexpected linkclass: " + linkClass) 2873 return False 2874 2875 """ 2876 Delete Dataset, Group or Datatype by UUID 2877 """ 2878 def deleteObjectByUuid(self, objtype, obj_uuid): 2879 if objtype not in ('group', 'dataset', 'datatype'): 2880 msg = "unexpected objtype: " + objtype 2881 self.log.error(msg) 2882 raise IOError(errno.EIO, msg) 2883 self.initFile() 2884 self.log.info("delete uuid: " + obj_uuid) 2885 if self.readonly: 2886 msg = "Unable to delete object (Updates are not allowed)" 2887 self.log.info(msg) 2888 raise IOError(errno.EPERM, msg) 2889 2890 if obj_uuid == self.dbGrp.attrs["rootUUID"] and objtype == 'group': 2891 # can't delete root group 2892 msg = "Unable to delete group (root group may not be deleted)" 2893 self.log.info(msg) 2894 raise IOError(errno.EPERM, msg) 2895 2896 dbCol = None 2897 tgt = None 2898 if objtype == 'dataset': 2899 tgt = self.getDatasetObjByUuid(obj_uuid) 2900 dbCol = self.dbGrp["{datasets}"] 2901 elif objtype == 'group': 2902 tgt = self.getGroupObjByUuid(obj_uuid) 2903 dbCol = self.dbGrp["{groups}"] 2904 else: # datatype 2905 tgt = self.getCommittedTypeObjByUuid(obj_uuid) 2906 dbCol = self.dbGrp["{datatypes}"] 2907 2908 if tgt is None: 2909 msg = "Unable to delete " + objtype + ", uuid: " + obj_uuid + " not found" 2910 self.log.info(msg) 2911 raise IOError(errno.ENXIO, msg) 2912 2913 # unlink from root (if present) 2914 self.unlinkObject(self.f['/'], tgt) 2915 2916 groups = self.dbGrp["{groups}"] 2917 # iterate through each group in the file and unlink tgt if it is linked 2918 # by the group. 2919 # We'll store a list of links to be removed as we go, and then actually 2920 # remove the links after the iteration is done (otherwise we can run into issues 2921 # where the key has become invalid) 2922 linkList = [] # this is our list 2923 for uuidName in groups.attrs: 2924 grpRef = groups.attrs[uuidName] 2925 # de-reference handle 2926 grp = self.f[grpRef] 2927 for linkName in grp: 2928 if self.isObjectHardLinked(grp, tgt, linkName): 2929 linkList.append({'group': grp, 'link': linkName}) 2930 for item in linkList: 2931 self.unlinkObjectItem(item['group'], tgt, item['link']) 2932 2933 addr = h5py.h5o.get_info(tgt.id).addr 2934 addrGrp = self.dbGrp["{addr}"] 2935 del addrGrp.attrs[str(addr)] # remove reverse map 2936 dbRemoved = False 2937 2938 # finally, remove the dataset from db 2939 if obj_uuid in dbCol: 2940 # should be here (now it is anonymous) 2941 del dbCol[obj_uuid] 2942 dbRemoved = True 2943 2944 if not dbRemoved: 2945 self.log.warning("did not find: " + obj_uuid + " in anonymous collection") 2946 2947 if obj_uuid in dbCol.attrs: 2948 self.log.info("removing: " + obj_uuid + " from non-anonymous collection") 2949 del dbCol.attrs[obj_uuid] 2950 dbRemoved = True 2951 2952 if not dbRemoved: 2953 msg = "Unexpected Error, did not find reference to: " + obj_uuid 2954 self.log.error(msg) 2955 raise IOError(errno.EIO, msg) 2956 2957 # note when the object was deleted 2958 self.setModifiedTime(obj_uuid) 2959 2960 return True 2961 2962 def getGroupItemByUuid(self, obj_uuid): 2963 self.initFile() 2964 grp = self.getGroupObjByUuid(obj_uuid) 2965 if grp is None: 2966 if self.getModifiedTime(obj_uuid, useRoot=False): 2967 msg = "Group with uuid: " + obj_uuid + " has been previously deleted" 2968 self.log.info(msg) 2969 raise IOError(errno.ENOENT, msg) 2970 else: 2971 msg = "Group with uuid: " + obj_uuid + " was not found" 2972 self.log.info(msg) 2973 raise IOError(errno.ENXIO, msg) 2974 2975 linkCount = len(grp) 2976 if "__db__" in grp: 2977 linkCount -= 1 # don't include the db group 2978 2979 item = { 'id': obj_uuid } 2980 alias = [] 2981 if grp.name and not grp.name.startswith("/__db__"): 2982 alias.append(grp.name) # just use the default h5py path for now 2983 item['alias'] = alias 2984 item['attributeCount'] = len(grp.attrs) 2985 item['linkCount'] = linkCount 2986 if self.update_timestamps: 2987 item['ctime'] = self.getCreateTime(obj_uuid) 2988 item['mtime'] = self.getModifiedTime(obj_uuid) 2989 2990 return item 2991 2992 """ 2993 getLinkItemByObj - return info about a link 2994 parent: reference to group 2995 linkName: name of link 2996 return: item dictionary with link attributes, or None if not found 2997 """ 2998 def getLinkItemByObj(self, parent, link_name): 2999 if link_name not in parent: 3000 return None 3001 3002 if link_name == "__db__": 3003 return None # don't provide link to db group 3004 # "http://somefile/#h5path(somepath)") 3005 item = { 'title': link_name } 3006 # get the link object, one of HardLink, SoftLink, or ExternalLink 3007 try: 3008 linkObj = parent.get(link_name, None, False, True) 3009 linkClass = linkObj.__class__.__name__ 3010 except TypeError: 3011 # UDLink? set class as 'user' 3012 linkClass = 'UDLink' # user defined links 3013 item['class'] = 'H5L_TYPE_USER_DEFINED' 3014 if linkClass == 'SoftLink': 3015 item['class'] = 'H5L_TYPE_SOFT' 3016 item['h5path'] = linkObj.path 3017 item['href'] = '#h5path(' + linkObj.path + ')' 3018 elif linkClass == 'ExternalLink': 3019 item['class'] = 'H5L_TYPE_EXTERNAL' 3020 item['h5path'] = linkObj.path 3021 item['file'] = linkObj.filename 3022 item['href'] = '#h5path(' + linkObj.path + ')' 3023 elif linkClass == 'HardLink': 3024 # Hardlink doesn't have any properties itself, just get the linked 3025 # object 3026 obj = parent[link_name] 3027 addr = h5py.h5o.get_info(obj.id).addr 3028 item['class'] = 'H5L_TYPE_HARD' 3029 item['id'] = self.getUUIDByAddress(addr) 3030 class_name = obj.__class__.__name__ 3031 if class_name == 'Dataset': 3032 item['href'] = 'datasets/' + item['id'] 3033 item['collection'] = 'datasets' 3034 elif class_name == 'Group': 3035 item['href'] = 'groups/' + item['id'] 3036 item['collection'] = 'groups' 3037 elif class_name == 'Datatype': 3038 item['href'] = 'datatypes/' + item['id'] 3039 item['collection'] = 'datatypes' 3040 else: 3041 self.log.warning("unexpected object type: " + item['type']) 3042 3043 return item 3044 3045 def getLinkItemByUuid(self, grpUuid, link_name): 3046 self.log.info( 3047 "db.getLinkItemByUuid(" + grpUuid + ", [" + link_name + "])") 3048 if not link_name: 3049 msg = "link_name not specified" 3050 self.log.info(msg) 3051 raise IOError(errno.EINVAL, msg) 3052 3053 self.initFile() 3054 parent = self.getGroupObjByUuid(grpUuid) 3055 if parent is None: 3056 msg = "Parent group: " + grpUuid + " of link not found" 3057 self.log.info(msg) 3058 raise IOError(errno.ENXIO, msg) 3059 3060 item = self.getLinkItemByObj(parent, link_name) 3061 # add timestamps 3062 if item: 3063 if self.update_timestamps: 3064 item['ctime'] = self.getCreateTime(grpUuid, objType="link", name=link_name) 3065 item['mtime'] = self.getModifiedTime(grpUuid, objType="link", name=link_name) 3066 else: 3067 self.log.info("link not found") 3068 mtime = self.getModifiedTime(grpUuid, objType="link", name=link_name, useRoot=False) 3069 if mtime: 3070 msg = "Link [" + link_name + "] of: " + grpUuid + " has been previously deleted" 3071 self.log.info(msg) 3072 raise IOError(errno.ENOENT, msg) 3073 else: 3074 msg = "Link [" + link_name + "] of: " + grpUuid + " not found" 3075 self.log.info(msg) 3076 raise IOError(errno.ENXIO, msg) 3077 3078 return item 3079 3080 def getLinkItems(self, grpUuid, marker=None, limit=0): 3081 self.log.info("db.getLinkItems(" + grpUuid + ")") 3082 if marker: 3083 self.log.info("...marker: " + marker) 3084 if limit: 3085 self.log.info("...limit: " + str(limit)) 3086 3087 self.initFile() 3088 parent = self.getGroupObjByUuid(grpUuid) 3089 if parent is None: 3090 msg = "Parent group: " + grpUuid + " not found, no links returned" 3091 self.log.info(msg) 3092 raise IOError(errno.ENXIO, msg) 3093 items = [] 3094 gotMarker = True 3095 if marker is not None: 3096 gotMarker = False 3097 count = 0 3098 for link_name in parent: 3099 if link_name == "__db__": 3100 continue 3101 if not gotMarker: 3102 if link_name == marker: 3103 gotMarker = True 3104 continue # start filling in result on next pass 3105 else: 3106 continue # keep going! 3107 item = self.getLinkItemByObj(parent, link_name) 3108 items.append(item) 3109 3110 count += 1 3111 if limit > 0 and count == limit: 3112 break # return what we got 3113 return items 3114 3115 def unlinkItem(self, grpUuid, link_name): 3116 if self.readonly: 3117 msg = "Unable to unlink item (Updates are not allowed)" 3118 self.log.info(msg) 3119 raise IOError(errno.EPERM, msg) 3120 grp = self.getGroupObjByUuid(grpUuid) 3121 if grp is None: 3122 msg = "Parent group: " + grpUuid + " not found, cannot remove link" 3123 self.log.info(msg) 3124 raise IOError(errno.ENXIO, msg) 3125 3126 if link_name not in grp: 3127 msg = "Link: [" + link_name + "] of group: " + grpUuid + " not found, cannot remove link" 3128 self.log.info(msg) 3129 raise IOError(errno.ENXIO, msg) 3130 3131 if link_name == "__db__": 3132 # don't allow db group to be unlinked! 3133 msg = "Unlinking of __db__ group not allowed" 3134 raise IOError(errno.EPERM, msg) 3135 3136 obj = None 3137 try: 3138 linkObj = grp.get(link_name, None, False, True) 3139 linkClass = linkObj.__class__.__name__ 3140 if linkClass == 'HardLink': 3141 # we can safely reference the object 3142 obj = grp[link_name] 3143 except TypeError: 3144 # UDLink? Return false to indicate that we can not delete this 3145 msg = "Unable to unlink user defined link" 3146 self.log.info(msg) 3147 raise IOError(errno.EPERM, msg) 3148 3149 linkDeleted = False 3150 if obj is not None: 3151 linkDeleted = self.unlinkObjectItem(grp, obj, link_name) 3152 else: 3153 # SoftLink or External Link - we can just remove the key 3154 del grp[link_name] 3155 linkDeleted = True 3156 3157 if linkDeleted: 3158 # update timestamp 3159 self.setModifiedTime(grpUuid, objType="link", name=link_name) 3160 3161 return linkDeleted 3162 3163 def getCollection(self, col_type, marker=None, limit=None): 3164 self.log.info("db.getCollection(" + col_type + ")") 3165 #col_type should be either "datasets", "groups", or "datatypes" 3166 if col_type not in ("datasets", "groups", "datatypes"): 3167 msg = "Unexpected col_type: [" + col_type + "]" 3168 self.log.error(msg) 3169 raise IOError(errno.EIO, msg) 3170 self.initFile() 3171 col = None # Group, Dataset, or Datatype 3172 if col_type == "datasets": 3173 col = self.dbGrp["{datasets}"] 3174 elif col_type == "groups": 3175 col = self.dbGrp["{groups}"] 3176 else: # col_type == "datatypes" 3177 col = self.dbGrp["{datatypes}"] 3178 3179 uuids = [] 3180 count = 0 3181 # gather the non-anonymous ids first 3182 for obj_uuid in col.attrs: 3183 if marker: 3184 if obj_uuid == marker: 3185 marker = None # clear and pick up next item 3186 continue 3187 uuids.append(obj_uuid) 3188 count += 1 3189 if limit is not None and limit > 0 and count == limit: 3190 break 3191 3192 if limit == 0 or (limit is not None and count < limit): 3193 # grab any anonymous obj ids next 3194 for obj_uuid in col: 3195 if marker: 3196 if obj_uuid == marker: 3197 marker = None # clear and pick up next item 3198 continue 3199 uuids.append(obj_uuid) 3200 count += 1 3201 if limit is not None and limit > 0 and count == limit: 3202 break 3203 3204 return uuids 3205 3206 """ 3207 Get the DB Collection names 3208 """ 3209 def getDBCollections(self): 3210 return ("{groups}", "{datasets}", "{datatypes}") 3211 3212 """ 3213 Return the db collection the uuid belongs to 3214 """ 3215 def getDBCollection(self, obj_uuid): 3216 dbCollections = self.getDBCollections() 3217 for dbCollectionName in dbCollections: 3218 col = self.dbGrp[dbCollectionName] 3219 if obj_uuid in col or obj_uuid in col.attrs: 3220 return col 3221 return None 3222 3223 def unlinkObjectItem(self, parentGrp, tgtObj, link_name): 3224 if self.readonly: 3225 msg = "Unexpected attempt to unlink object" 3226 self.log.error(msg) 3227 raise IOError(errno.EIO, msg) 3228 if link_name not in parentGrp: 3229 msg = "Unexpected: did not find link_name: [" + link_name + "]" 3230 self.log.error(msg) 3231 raise IOError(errno.EIO, msg) 3232 try: 3233 linkObj = parentGrp.get(link_name, None, False, True) 3234 except TypeError: 3235 # user defined link? 3236 msg = "Unable to remove link (user-defined link?)" 3237 self.log.error(msg) 3238 raise IOError(errno.EIO, msg) 3239 linkClass = linkObj.__class__.__name__ 3240 # only deal with HardLinks 3241 linkDeleted = False 3242 if linkClass == 'HardLink': 3243 obj = parentGrp[link_name] 3244 if tgtObj is None or obj == tgtObj: 3245 3246 numlinks = self.getNumLinksToObject(obj) 3247 if numlinks == 1: 3248 # last link to this object - convert to anonymous object by 3249 # creating link under {datasets} or {groups} or {datatypes} 3250 # also remove the attribute UUID key 3251 addr = h5py.h5o.get_info(obj.id).addr 3252 obj_uuid = self.getUUIDByAddress(addr) 3253 self.log.info("converting: " + obj_uuid 3254 + " to anonymous obj") 3255 dbCol = self.getDBCollection(obj_uuid) 3256 del dbCol.attrs[obj_uuid] # remove the object ref 3257 dbCol[obj_uuid] = obj # add a hardlink 3258 self.log.info("deleting link: [" + link_name + "] from: " 3259 + parentGrp.name) 3260 del parentGrp[link_name] 3261 linkDeleted = True 3262 else: 3263 self.log.info("unlinkObjectItem: link is not a hardlink, ignoring") 3264 return linkDeleted 3265 3266 def unlinkObject(self, parentGrp, tgtObj): 3267 for name in parentGrp: 3268 self.unlinkObjectItem(parentGrp, tgtObj, name) 3269 return True 3270 3271 def linkObject(self, parentUUID, childUUID, link_name): 3272 self.initFile() 3273 if self.readonly: 3274 msg = "Unable to create link (Updates are not allowed)" 3275 self.log.info(msg) 3276 raise IOError(errno.EPERM, msg) 3277 3278 parentObj = self.getGroupObjByUuid(parentUUID) 3279 if parentObj is None: 3280 msg = "Unable to create link, parent UUID: " + parentUUID + " not found" 3281 self.log.info(msg) 3282 raise IOError(errno.ENXIO, msg) 3283 3284 childObj = self.getDatasetObjByUuid(childUUID) 3285 if childObj is None: 3286 # maybe it's a group... 3287 childObj = self.getGroupObjByUuid(childUUID) 3288 if childObj is None: 3289 # or maybe it's a committed datatype... 3290 childObj = self.getCommittedTypeObjByUuid(childUUID) 3291 if childObj is None: 3292 msg = "Unable to link item, child UUID: " + childUUID + " not found" 3293 self.log.info(msg) 3294 raise IOError(errno.ENXIO, msg) 3295 if link_name in parentObj: 3296 # link already exists 3297 self.log.info("linkname already exists, deleting") 3298 self.unlinkObjectItem(parentObj, None, link_name) 3299 parentObj[link_name] = childObj 3300 3301 # convert this from an anonymous object to ref if needed 3302 dbCol = self.getDBCollection(childUUID) 3303 if childUUID in dbCol: 3304 # convert to a ref 3305 del dbCol[childUUID] # remove hardlink 3306 dbCol.attrs[childUUID] = childObj.ref # create a ref 3307 3308 # set link timestamps 3309 now = time.time() 3310 self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now) 3311 self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now) 3312 return True 3313 3314 def createSoftLink(self, parentUUID, linkPath, link_name): 3315 self.initFile() 3316 if self.readonly: 3317 msg = "Unable to create link (Updates are not allowed)" 3318 self.log.info(msg) 3319 raise IOError(errno.EPERM, msg) 3320 parentObj = self.getGroupObjByUuid(parentUUID) 3321 if parentObj is None: 3322 msg = "Unable to create link, parent UUID: " + parentUUID + " not found" 3323 self.log.info(msg) 3324 raise IOError(errno.ENXIO, msg) 3325 if link_name in parentObj: 3326 # link already exists 3327 self.log.info("linkname already exists, deleting") 3328 del parentObj[link_name] # delete old link 3329 parentObj[link_name] = h5py.SoftLink(linkPath) 3330 3331 now = time.time() 3332 self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now) 3333 self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now) 3334 3335 return True 3336 3337 def createExternalLink(self, parentUUID, extPath, linkPath, link_name): 3338 self.initFile() 3339 if self.readonly: 3340 msg = "Unable to create link (Updates are not allowed)" 3341 self.log.info(msg) 3342 raise IOError(errno.EPERM, msg) 3343 parentObj = self.getGroupObjByUuid(parentUUID) 3344 if parentObj is None: 3345 msg = "Unable to create link, parent UUID: " + parentUUID + " not found" 3346 self.log.info(msg) 3347 raise IOError(errno.ENXIO, msg) 3348 if link_name in parentObj: 3349 # link already exists 3350 self.log.info("linkname already exists, deleting") 3351 del parentObj[link_name] # delete old link 3352 parentObj[link_name] = h5py.ExternalLink(extPath, linkPath) 3353 3354 now = time.time() 3355 self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now) 3356 self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now) 3357 3358 return True 3359 3360 def createGroup(self, obj_uuid=None): 3361 self.initFile() 3362 if self.readonly: 3363 msg = "Unable to create group (Updates are not allowed)" 3364 self.log.info(msg) 3365 raise IOError(errno.EPERM, msg) 3366 groups = self.dbGrp["{groups}"] 3367 if not obj_uuid: 3368 obj_uuid = str(uuid.uuid1()) 3369 newGroup = groups.create_group(obj_uuid) 3370 # store reverse map as an attribute 3371 addr = h5py.h5o.get_info(newGroup.id).addr 3372 addrGrp = self.dbGrp["{addr}"] 3373 addrGrp.attrs[str(addr)] = obj_uuid 3374 3375 #set timestamps 3376 now = time.time() 3377 self.setCreateTime(obj_uuid, timestamp=now) 3378 self.setModifiedTime(obj_uuid, timestamp=now) 3379 3380 return obj_uuid 3381 3382 def getNumberOfGroups(self): 3383 self.initFile() 3384 count = 0 3385 groups = self.dbGrp["{groups}"] 3386 count += len(groups) # anonymous groups 3387 count += len(groups.attrs) # linked groups 3388 count += 1 # add of for root group 3389 3390 return count 3391 3392 def getNumberOfDatasets(self): 3393 self.initFile() 3394 count = 0 3395 datasets = self.dbGrp["{datasets}"] 3396 count += len(datasets) # anonymous datasets 3397 count += len(datasets.attrs) # linked datasets 3398 return count 3399 3400 def getNumberOfDatatypes(self): 3401 self.initFile() 3402 count = 0 3403 datatypes = self.dbGrp["{datatypes}"] 3404 count += len(datatypes) # anonymous datatypes 3405 count += len(datatypes.attrs) # linked datatypes 3406 return count 3407