1# -*- coding: utf-8 -*- 2 3######################################################################## 4# 5# License: BSD 6# Created: September 21, 2002 7# Author: Francesc Alted - faltet@pytables.com 8# 9# $Id$ 10# 11######################################################################## 12 13"""Cython interface between several PyTables classes and HDF5 library. 14 15Classes (type extensions): 16 17 File 18 AttributeSet 19 Node 20 Leaf 21 Group 22 Array 23 VLArray 24 UnImplemented 25 26Functions: 27 28Misc variables: 29 30""" 31 32import os 33import warnings 34from collections import namedtuple 35 36ObjInfo = namedtuple('ObjInfo', ['addr', 'rc']) 37ObjTimestamps = namedtuple('ObjTimestamps', ['atime', 'mtime', 38 'ctime', 'btime']) 39 40 41from cpython cimport PY_MAJOR_VERSION 42if PY_MAJOR_VERSION < 3: 43 import cPickle as pickle 44else: 45 import pickle 46 47import numpy 48 49from tables.exceptions import HDF5ExtError, DataTypeWarning 50 51from tables.utils import (check_file_access, byteorders, correct_byteorder, 52 SizeType) 53 54from tables.atom import Atom 55 56from tables.description import descr_from_dtype 57 58from tables.utilsextension import (encode_filename, set_blosc_max_threads, 59 atom_to_hdf5_type, atom_from_hdf5_type, hdf5_to_np_ext_type, create_nested_type, 60 pttype_to_hdf5, pt_special_kinds, npext_prefixes_to_ptkinds, hdf5_class_to_string, 61 platform_byteorder) 62 63 64# Types, constants, functions, classes & other objects from everywhere 65from libc.stdlib cimport malloc, free 66from libc.string cimport strdup, strlen 67from numpy cimport import_array, ndarray, npy_intp 68from cpython.bytes cimport (PyBytes_AsString, PyBytes_FromStringAndSize, 69 PyBytes_Check) 70from cpython.unicode cimport PyUnicode_DecodeUTF8 71 72 73from definitions cimport (uintptr_t, hid_t, herr_t, hsize_t, hvl_t, 74 H5S_seloper_t, H5D_FILL_VALUE_UNDEFINED, 75 H5O_TYPE_UNKNOWN, H5O_TYPE_GROUP, H5O_TYPE_DATASET, H5O_TYPE_NAMED_DATATYPE, 76 H5L_TYPE_ERROR, H5L_TYPE_HARD, H5L_TYPE_SOFT, H5L_TYPE_EXTERNAL, 77 H5T_class_t, H5T_sign_t, H5T_NATIVE_INT, 78 H5T_cset_t, H5T_CSET_ASCII, H5T_CSET_UTF8, 79 H5F_SCOPE_GLOBAL, H5F_ACC_TRUNC, H5F_ACC_RDONLY, H5F_ACC_RDWR, 80 H5P_DEFAULT, H5P_FILE_ACCESS, H5P_FILE_CREATE, H5T_DIR_DEFAULT, 81 H5S_SELECT_SET, H5S_SELECT_AND, H5S_SELECT_NOTB, 82 H5Fcreate, H5Fopen, H5Fclose, H5Fflush, H5Fget_vfd_handle, H5Fget_filesize, 83 H5Fget_create_plist, 84 H5Gcreate, H5Gopen, H5Gclose, H5Ldelete, H5Lmove, 85 H5Dopen, H5Dclose, H5Dread, H5Dwrite, H5Dget_type, H5Dget_create_plist, 86 H5Dget_space, H5Dvlen_reclaim, H5Dget_storage_size, H5Dvlen_get_buf_size, 87 H5Tget_native_type, H5Tclose, H5Tis_variable_str, H5Tget_sign, 88 H5Adelete, H5T_BITFIELD, H5T_INTEGER, H5T_FLOAT, H5T_STRING, H5Tget_order, 89 H5Pcreate, H5Pset_cache, H5Pclose, H5Pget_userblock, H5Pset_userblock, 90 H5Pset_fapl_sec2, H5Pset_fapl_log, H5Pset_fapl_stdio, H5Pset_fapl_core, 91 H5Pset_fapl_split, H5Pget_obj_track_times, 92 H5Sselect_all, H5Sselect_elements, H5Sselect_hyperslab, 93 H5Screate_simple, H5Sclose, 94 H5Oget_info, H5O_info_t, 95 H5ATTRset_attribute, H5ATTRset_attribute_string, 96 H5ATTRget_attribute, H5ATTRget_attribute_string, 97 H5ATTRget_attribute_vlen_string_array, 98 H5ATTRfind_attribute, H5ATTRget_type_ndims, H5ATTRget_dims, 99 H5ARRAYget_ndims, H5ARRAYget_info, 100 set_cache_size, get_objinfo, get_linkinfo, Giterate, Aiterate, H5UIget_info, 101 get_len_of_range, conv_float64_timeval32, truncate_dset, 102 H5_HAVE_DIRECT_DRIVER, pt_H5Pset_fapl_direct, 103 H5_HAVE_WINDOWS_DRIVER, pt_H5Pset_fapl_windows, 104 H5_HAVE_IMAGE_FILE, pt_H5Pset_file_image, pt_H5Fget_file_image, 105 H5Tget_size, hobj_ref_t) 106 107cdef int H5T_CSET_DEFAULT = 16 108 109from utilsextension cimport malloc_dims, get_native_type, cstr_to_pystr, load_reference 110 111 112#------------------------------------------------------------------- 113 114cdef extern from "Python.h": 115 116 object PyByteArray_FromStringAndSize(char *s, Py_ssize_t len) 117 118# Functions from HDF5 ARRAY (this is not part of HDF5 HL; it's private) 119cdef extern from "H5ARRAY.h" nogil: 120 121 herr_t H5ARRAYmake(hid_t loc_id, char *dset_name, char *obversion, 122 int rank, hsize_t *dims, int extdim, 123 hid_t type_id, hsize_t *dims_chunk, void *fill_data, 124 int complevel, char *complib, int shuffle, 125 int fletcher32, hbool_t track_times, void *data) 126 127 herr_t H5ARRAYappend_records(hid_t dataset_id, hid_t type_id, 128 int rank, hsize_t *dims_orig, 129 hsize_t *dims_new, int extdim, void *data ) 130 131 herr_t H5ARRAYwrite_records(hid_t dataset_id, hid_t type_id, 132 int rank, hsize_t *start, hsize_t *step, 133 hsize_t *count, void *data) 134 135 herr_t H5ARRAYread(hid_t dataset_id, hid_t type_id, 136 hsize_t start, hsize_t nrows, hsize_t step, 137 int extdim, void *data) 138 139 herr_t H5ARRAYreadSlice(hid_t dataset_id, hid_t type_id, 140 hsize_t *start, hsize_t *stop, 141 hsize_t *step, void *data) 142 143 herr_t H5ARRAYreadIndex(hid_t dataset_id, hid_t type_id, int notequal, 144 hsize_t *start, hsize_t *stop, hsize_t *step, 145 void *data) 146 147 herr_t H5ARRAYget_chunkshape(hid_t dataset_id, int rank, hsize_t *dims_chunk) 148 149 herr_t H5ARRAYget_fill_value( hid_t dataset_id, hid_t type_id, 150 int *status, void *value) 151 152 153# Functions for dealing with VLArray objects 154cdef extern from "H5VLARRAY.h" nogil: 155 156 herr_t H5VLARRAYmake( hid_t loc_id, char *dset_name, char *obversion, 157 int rank, hsize_t *dims, hid_t type_id, 158 hsize_t chunk_size, void *fill_data, int complevel, 159 char *complib, int shuffle, int flecther32, 160 hbool_t track_times, void *data) 161 162 herr_t H5VLARRAYappend_records( hid_t dataset_id, hid_t type_id, 163 int nobjects, hsize_t nrecords, 164 void *data ) 165 166 herr_t H5VLARRAYmodify_records( hid_t dataset_id, hid_t type_id, 167 hsize_t nrow, int nobjects, 168 void *data ) 169 170 herr_t H5VLARRAYget_info( hid_t dataset_id, hid_t type_id, 171 hsize_t *nrecords, char *base_byteorder) 172 173 174#---------------------------------------------------------------------------- 175 176# Initialization code 177 178# The numpy API requires this function to be called before 179# using any numpy facilities in an extension module. 180import_array() 181 182#--------------------------------------------------------------------------- 183 184# Helper functions 185 186cdef hsize_t *npy_malloc_dims(int rank, npy_intp *pdims): 187 """Returns a malloced hsize_t dims from a npy_intp *pdims.""" 188 189 cdef int i 190 cdef hsize_t *dims 191 192 dims = NULL 193 if rank > 0: 194 dims = <hsize_t *>malloc(rank * sizeof(hsize_t)) 195 for i from 0 <= i < rank: 196 dims[i] = pdims[i] 197 return dims 198 199 200cdef object getshape(int rank, hsize_t *dims): 201 """Return a shape (tuple) from a dims C array of rank dimensions.""" 202 203 cdef int i 204 cdef object shape 205 206 shape = [] 207 for i from 0 <= i < rank: 208 shape.append(SizeType(dims[i])) 209 210 return tuple(shape) 211 212 213# Helper function for quickly fetch an attribute string 214cdef object get_attribute_string_or_none(hid_t node_id, char* attr_name): 215 """Returns a string/unicode attribute if it exists in node_id. 216 217 It returns ``None`` in case it don't exists (or there have been problems 218 reading it). 219 220 """ 221 222 cdef char *attr_value 223 cdef int cset = H5T_CSET_DEFAULT 224 cdef object retvalue 225 cdef hsize_t size 226 227 attr_value = NULL 228 retvalue = None # Default value 229 if H5ATTRfind_attribute(node_id, attr_name): 230 size = H5ATTRget_attribute_string(node_id, attr_name, &attr_value, &cset) 231 if size == 0: 232 if cset == H5T_CSET_UTF8: 233 retvalue = numpy.unicode_(u'') 234 else: 235 retvalue = numpy.bytes_(b'') 236 elif cset == H5T_CSET_UTF8: 237 if size == 1 and attr_value[0] == 0: 238 # compatibility with PyTables <= 3.1.1 239 retvalue = numpy.unicode_(u'') 240 retvalue = PyUnicode_DecodeUTF8(attr_value, size, NULL) 241 retvalue = numpy.unicode_(retvalue) 242 else: 243 retvalue = PyBytes_FromStringAndSize(attr_value, size) 244 # AV: oct 2012 245 # since now we use the string size got form HDF5 we have to stip 246 # trailing zeros used for padding. 247 # The entire process is quite odd but due to a bug (??) in the way 248 # numpy arrays are pickled in python 3 we can't assume that 249 # strlen(attr_value) is the actual length of the attibute 250 # and numpy.bytes_(attr_value) can give a truncated pickle string 251 retvalue = retvalue.rstrip(b'\x00') 252 retvalue = numpy.bytes_(retvalue) 253 254 # Important to release attr_value, because it has been malloc'ed! 255 if attr_value: 256 free(<void *>attr_value) 257 258 return retvalue 259 260 261# Get the numpy dtype scalar attribute from an HDF5 type as fast as possible 262cdef object get_dtype_scalar(hid_t type_id, H5T_class_t class_id, 263 size_t itemsize): 264 cdef H5T_sign_t sign 265 cdef object stype 266 267 if class_id == H5T_BITFIELD: 268 stype = "b1" 269 elif class_id == H5T_INTEGER: 270 # Get the sign 271 sign = H5Tget_sign(type_id) 272 if (sign > 0): 273 stype = "i%s" % (itemsize) 274 else: 275 stype = "u%s" % (itemsize) 276 elif class_id == H5T_FLOAT: 277 stype = "f%s" % (itemsize) 278 elif class_id == H5T_STRING: 279 if H5Tis_variable_str(type_id): 280 raise TypeError("variable length strings are not supported yet") 281 stype = "S%s" % (itemsize) 282 283 # Try to get a NumPy type. If this can't be done, return None. 284 try: 285 ntype = numpy.dtype(stype) 286 except TypeError: 287 ntype = None 288 return ntype 289 290 291_supported_drivers = ( 292 "H5FD_SEC2", 293 "H5FD_DIRECT", 294 #"H5FD_LOG", 295 "H5FD_WINDOWS", 296 "H5FD_STDIO", 297 "H5FD_CORE", 298 #"H5FD_FAMILY", 299 #"H5FD_MULTI", 300 "H5FD_SPLIT", 301 #"H5FD_MPIO", 302 #"H5FD_MPIPOSIX", 303 #"H5FD_STREAM", 304) 305 306HAVE_DIRECT_DRIVER = bool(H5_HAVE_DIRECT_DRIVER) 307HAVE_WINDOWS_DRIVER = bool(H5_HAVE_WINDOWS_DRIVER) 308 309# Type extensions declarations (these are subclassed by PyTables 310# Python classes) 311 312cdef class File: 313 cdef hid_t file_id 314 cdef hid_t access_plist 315 cdef object name 316 317 def _g_new(self, name, pymode, **params): 318 cdef herr_t err = 0 319 cdef hid_t access_plist, create_plist = H5P_DEFAULT 320 cdef hid_t meta_plist_id = H5P_DEFAULT, raw_plist_id = H5P_DEFAULT 321 cdef size_t img_buf_len = 0, user_block_size = 0 322 cdef void *img_buf_p = NULL 323 cdef bytes encname 324 #cdef bytes logfile_name 325 326 # Check if we can handle the driver 327 driver = params["DRIVER"] 328 if driver is not None and driver not in _supported_drivers: 329 raise ValueError("Invalid or not supported driver: '%s'" % driver) 330 if driver == "H5FD_SPLIT": 331 meta_ext = params.get("DRIVER_SPLIT_META_EXT", "-m.h5") 332 raw_ext = params.get("DRIVER_SPLIT_RAW_EXT", "-r.h5") 333 meta_name = meta_ext % name if "%s" in meta_ext else name + meta_ext 334 raw_name = raw_ext % name if "%s" in raw_ext else name + raw_ext 335 enc_meta_ext = encode_filename(meta_ext) 336 enc_raw_ext = encode_filename(raw_ext) 337 338 # Create a new file using default properties 339 self.name = name 340 341 # Encode the filename in case it is unicode 342 encname = encode_filename(name) 343 344 # These fields can be seen from Python. 345 self._v_new = None # this will be computed later 346 # """Is this file going to be created from scratch?""" 347 348 self._isPTFile = True # assume a PyTables file by default 349 # """Does this HDF5 file have a PyTables format?""" 350 351 assert pymode in ('r', 'r+', 'a', 'w'), ("an invalid mode string ``%s`` " 352 "passed the ``check_file_access()`` test; " 353 "please report this to the authors" % pymode) 354 355 image = params.get('DRIVER_CORE_IMAGE') 356 if image: 357 if driver != "H5FD_CORE": 358 warnings.warn("The DRIVER_CORE_IMAGE parameter will be ignored by " 359 "the '%s' driver" % driver) 360 elif not PyBytes_Check(image): 361 raise TypeError("The DRIVER_CORE_IMAGE must be a string of bytes") 362 elif not H5_HAVE_IMAGE_FILE: 363 raise RuntimeError("Support for image files is only available in " 364 "HDF5 >= 1.8.9") 365 366 # After the following check we can be quite sure 367 # that the file or directory exists and permissions are right. 368 if driver == "H5FD_SPLIT": 369 for n in meta_name, raw_name: 370 check_file_access(n, pymode) 371 else: 372 backing_store = params.get("DRIVER_CORE_BACKING_STORE", 1) 373 if driver != "H5FD_CORE" or backing_store: 374 check_file_access(name, pymode) 375 376 # Should a new file be created? 377 if image: 378 exists = True 379 elif driver == "H5FD_SPLIT": 380 exists = os.path.exists(meta_name) and os.path.exists(raw_name) 381 else: 382 exists = os.path.exists(name) 383 self._v_new = not (pymode in ('r', 'r+') or (pymode == 'a' and exists)) 384 385 user_block_size = params.get("USER_BLOCK_SIZE", 0) 386 if user_block_size and not self._v_new: 387 warnings.warn("The HDF5 file already esists: the USER_BLOCK_SIZE " 388 "will be ignored") 389 elif user_block_size: 390 user_block_size = int(user_block_size) 391 is_pow_of_2 = ((user_block_size & (user_block_size - 1)) == 0) 392 if user_block_size < 512 or not is_pow_of_2: 393 raise ValueError("The USER_BLOCK_SIZE must be a power od 2 greather " 394 "than 512 or zero") 395 396 # File creation property list 397 create_plist = H5Pcreate(H5P_FILE_CREATE) 398 err = H5Pset_userblock(create_plist, user_block_size) 399 if err < 0: 400 H5Pclose(create_plist) 401 raise HDF5ExtError("Unable to set the user block size") 402 403 # File access property list 404 access_plist = H5Pcreate(H5P_FILE_ACCESS) 405 406 # Set parameters for chunk cache 407 H5Pset_cache(access_plist, 0, 408 params["CHUNK_CACHE_NELMTS"], 409 params["CHUNK_CACHE_SIZE"], 410 params["CHUNK_CACHE_PREEMPT"]) 411 412 # Set the I/O driver 413 if driver == "H5FD_SEC2": 414 err = H5Pset_fapl_sec2(access_plist) 415 elif driver == "H5FD_DIRECT": 416 if not H5_HAVE_DIRECT_DRIVER: 417 H5Pclose(create_plist) 418 H5Pclose(access_plist) 419 raise RuntimeError("The H5FD_DIRECT driver is not available") 420 err = pt_H5Pset_fapl_direct(access_plist, 421 params["DRIVER_DIRECT_ALIGNMENT"], 422 params["DRIVER_DIRECT_BLOCK_SIZE"], 423 params["DRIVER_DIRECT_CBUF_SIZE"]) 424 #elif driver == "H5FD_LOG": 425 # if "DRIVER_LOG_FILE" not in params: 426 # H5Pclose(access_plist) 427 # raise ValueError("The DRIVER_LOG_FILE parameter is required for " 428 # "the H5FD_LOG driver") 429 # logfile_name = encode_filename(params["DRIVER_LOG_FILE"]) 430 # err = H5Pset_fapl_log(access_plist, 431 # <char*>logfile_name, 432 # params["DRIVER_LOG_FLAGS"], 433 # params["DRIVER_LOG_BUF_SIZE"]) 434 elif driver == "H5FD_WINDOWS": 435 if not H5_HAVE_WINDOWS_DRIVER: 436 H5Pclose(access_plist) 437 H5Pclose(create_plist) 438 raise RuntimeError("The H5FD_WINDOWS driver is not available") 439 err = pt_H5Pset_fapl_windows(access_plist) 440 elif driver == "H5FD_STDIO": 441 err = H5Pset_fapl_stdio(access_plist) 442 elif driver == "H5FD_CORE": 443 err = H5Pset_fapl_core(access_plist, 444 params["DRIVER_CORE_INCREMENT"], 445 backing_store) 446 if image: 447 img_buf_len = len(image) 448 img_buf_p = <void *>PyBytes_AsString(image) 449 err = pt_H5Pset_file_image(access_plist, img_buf_p, img_buf_len) 450 if err < 0: 451 H5Pclose(create_plist) 452 H5Pclose(access_plist) 453 raise HDF5ExtError("Unable to set the file image") 454 455 #elif driver == "H5FD_FAMILY": 456 # H5Pset_fapl_family(access_plist, 457 # params["DRIVER_FAMILY_MEMB_SIZE"], 458 # fapl_id) 459 #elif driver == "H5FD_MULTI": 460 # err = H5Pset_fapl_multi(access_plist, memb_map, memb_fapl, memb_name, 461 # memb_addr, relax) 462 elif driver == "H5FD_SPLIT": 463 err = H5Pset_fapl_split(access_plist, enc_meta_ext, meta_plist_id, 464 enc_raw_ext, raw_plist_id) 465 if err < 0: 466 e = HDF5ExtError("Unable to set the file access property list") 467 H5Pclose(create_plist) 468 H5Pclose(access_plist) 469 raise e 470 471 if pymode == 'r': 472 self.file_id = H5Fopen(encname, H5F_ACC_RDONLY, access_plist) 473 elif pymode == 'r+': 474 self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist) 475 elif pymode == 'a': 476 if exists: 477 # A test for logging. 478 ## H5Pset_sieve_buf_size(access_plist, 0) 479 ## H5Pset_fapl_log (access_plist, "test.log", H5FD_LOG_LOC_WRITE, 0) 480 self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist) 481 else: 482 self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC, create_plist, 483 access_plist) 484 elif pymode == 'w': 485 self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC, create_plist, 486 access_plist) 487 488 if self.file_id < 0: 489 e = HDF5ExtError("Unable to open/create file '%s'" % name) 490 H5Pclose(create_plist) 491 H5Pclose(access_plist) 492 raise e 493 494 H5Pclose(create_plist) 495 H5Pclose(access_plist) 496 497 # Set the cache size 498 set_cache_size(self.file_id, params["METADATA_CACHE_SIZE"]) 499 500 # Set the maximum number of threads for Blosc 501 set_blosc_max_threads(params["MAX_BLOSC_THREADS"]) 502 503 # XXX: add the possibility to pass a pre-allocated buffer 504 def get_file_image(self): 505 """Retrieves an in-memory image of an existing, open HDF5 file. 506 507 .. note:: this method requires HDF5 >= 1.8.9. 508 509 .. versionadded:: 3.0 510 511 """ 512 513 cdef ssize_t size = 0 514 cdef size_t buf_len = 0 515 cdef bytes image 516 cdef char* cimage 517 518 self.flush() 519 520 # retrieve the size of the buffer for the file image 521 size = pt_H5Fget_file_image(self.file_id, NULL, buf_len) 522 if size < 0: 523 raise HDF5ExtError("Unable to retrieve the size of the buffer for the " 524 "file image. Plese note that not all drivers " 525 "provide support for image files.") 526 527 # allocate the memory buffer 528 image = PyBytes_FromStringAndSize(NULL, size) 529 if not image: 530 raise RuntimeError("Unable to allecote meomory fir the file image") 531 532 cimage = image 533 buf_len = size 534 size = pt_H5Fget_file_image(self.file_id, <void*>cimage, buf_len) 535 if size < 0: 536 raise HDF5ExtError("Unable to retrieve the file image. " 537 "Plese note that not all drivers provide support " 538 "for image files.") 539 540 return image 541 542 def get_filesize(self): 543 """Returns the size of an HDF5 file. 544 545 The returned size is that of the entire file, as opposed to only 546 the HDF5 portion of the file. I.e., size includes the user block, 547 if any, the HDF5 portion of the file, and any data that may have 548 been appended beyond the data written through the HDF5 Library. 549 550 .. versionadded:: 3.0 551 552 """ 553 554 cdef herr_t err = 0 555 cdef hsize_t size = 0 556 557 err = H5Fget_filesize(self.file_id, &size) 558 if err < 0: 559 raise HDF5ExtError("Unable to retrieve the HDF5 file size") 560 561 return size 562 563 def get_userblock_size(self): 564 """Retrieves the size of a user block. 565 566 .. versionadded:: 3.0 567 568 """ 569 570 cdef herr_t err = 0 571 cdef hsize_t size = 0 572 cdef hid_t create_plist 573 574 create_plist = H5Fget_create_plist(self.file_id) 575 if create_plist < 0: 576 raise HDF5ExtError("Unable to get the creation property list") 577 578 err = H5Pget_userblock(create_plist, &size) 579 if err < 0: 580 H5Pclose(create_plist) 581 raise HDF5ExtError("unable to retrieve the user block size") 582 583 H5Pclose(create_plist) 584 585 return size 586 587 # Accessor definitions 588 def _get_file_id(self): 589 return self.file_id 590 591 def fileno(self): 592 """Return the underlying OS integer file descriptor. 593 594 This is needed for lower-level file interfaces, such as the ``fcntl`` 595 module. 596 597 """ 598 599 cdef void *file_handle 600 cdef uintptr_t *descriptor 601 cdef herr_t err 602 err = H5Fget_vfd_handle(self.file_id, H5P_DEFAULT, &file_handle) 603 if err < 0: 604 raise HDF5ExtError( 605 "Problems getting file descriptor for file ``%s``" % self.name) 606 # Convert the 'void *file_handle' into an 'int *descriptor' 607 descriptor = <uintptr_t *>file_handle 608 return descriptor[0] 609 610 611 def _flush_file(self, scope): 612 # Close the file 613 H5Fflush(self.file_id, scope) 614 615 616 def _close_file(self): 617 # Close the file 618 H5Fclose( self.file_id ) 619 self.file_id = 0 # Means file closed 620 621 622 # This method is moved out of scope, until we provide code to delete 623 # the memory booked by this extension types 624 def __dealloc__(self): 625 cdef int ret 626 if self.file_id > 0: 627 # Close the HDF5 file because user didn't do that! 628 ret = H5Fclose(self.file_id) 629 if ret < 0: 630 raise HDF5ExtError("Problems closing the file '%s'" % self.name) 631 632 633cdef class AttributeSet: 634 cdef object name 635 636 def _g_new(self, node): 637 self.name = node._v_name 638 639 def _g_list_attr(self, node): 640 "Return a tuple with the attribute list" 641 a = Aiterate(node._v_objectid) 642 return a 643 644 645 def _g_setattr(self, node, name, object value): 646 """Save Python or NumPy objects as HDF5 attributes. 647 648 Scalar Python objects, scalar NumPy & 0-dim NumPy objects will all be 649 saved as H5T_SCALAR type. N-dim NumPy objects will be saved as H5T_ARRAY 650 type. 651 652 """ 653 654 cdef int ret 655 cdef hid_t dset_id, type_id 656 cdef hsize_t *dims 657 cdef ndarray ndv 658 cdef object byteorder, rabyteorder, baseatom 659 cdef char* cname = NULL 660 cdef bytes encoded_name 661 cdef int cset = H5T_CSET_DEFAULT 662 663 encoded_name = name.encode('utf-8') 664 # get the C pointer 665 cname = encoded_name 666 667 # The dataset id of the node 668 dset_id = node._v_objectid 669 670 # Convert a NumPy scalar into a NumPy 0-dim ndarray 671 if isinstance(value, numpy.generic): 672 value = numpy.array(value) 673 674 # Check if value is a NumPy ndarray and of a supported type 675 if (isinstance(value, numpy.ndarray) and 676 value.dtype.kind in ('V', 'S', 'b', 'i', 'u', 'f', 'c')): 677 # get a contiguous array: fixes #270 and gh-176 678 #value = numpy.ascontiguousarray(value) 679 value = value.copy() 680 if value.dtype.kind == 'V': 681 description, rabyteorder = descr_from_dtype(value.dtype, ptparams=node._v_file.params) 682 byteorder = byteorders[rabyteorder] 683 type_id = create_nested_type(description, byteorder) 684 # Make sure the value is consistent with offsets of the description 685 value = value.astype(description._v_dtype) 686 else: 687 # Get the associated native HDF5 type of the scalar type 688 baseatom = Atom.from_dtype(value.dtype.base) 689 byteorder = byteorders[value.dtype.byteorder] 690 type_id = atom_to_hdf5_type(baseatom, byteorder) 691 # Get dimensionality info 692 ndv = <ndarray>value 693 dims = npy_malloc_dims(ndv.ndim, ndv.shape) 694 # Actually write the attribute 695 ret = H5ATTRset_attribute(dset_id, cname, type_id, 696 ndv.ndim, dims, ndv.data) 697 if ret < 0: 698 raise HDF5ExtError("Can't set attribute '%s' in node:\n %s." % 699 (name, self._v_node)) 700 # Release resources 701 free(<void *>dims) 702 H5Tclose(type_id) 703 else: 704 # Object cannot be natively represented in HDF5. 705 if (isinstance(value, numpy.ndarray) and 706 value.dtype.kind == 'U' and 707 value.shape == ()): 708 value = value[()].encode('utf-8') 709 cset = H5T_CSET_UTF8 710 else: 711 # Convert this object to a null-terminated string 712 # (binary pickles are not supported at this moment) 713 value = pickle.dumps(value, 0) 714 715 ret = H5ATTRset_attribute_string(dset_id, cname, value, len(value), cset) 716 if ret < 0: 717 raise HDF5ExtError("Can't set attribute '%s' in node:\n %s." % 718 (name, self._v_node)) 719 720 721 # Get attributes 722 def _g_getattr(self, node, attrname): 723 """Get HDF5 attributes and retrieve them as NumPy objects. 724 725 H5T_SCALAR types will be retrieved as scalar NumPy. 726 H5T_ARRAY types will be retrieved as ndarray NumPy objects. 727 728 """ 729 730 cdef hsize_t *dims 731 cdef H5T_class_t class_id 732 cdef size_t type_size 733 cdef hid_t mem_type, dset_id, type_id, native_type 734 cdef int rank, ret, enumtype 735 cdef void *rbuf 736 cdef char *str_value 737 cdef char **str_values = NULL 738 cdef ndarray ndvalue 739 cdef object shape, stype_atom, shape_atom, retvalue 740 cdef int i, nelements 741 cdef char* cattrname = NULL 742 cdef bytes encoded_attrname 743 cdef int cset = H5T_CSET_DEFAULT 744 745 encoded_attrname = attrname.encode('utf-8') 746 # Get the C pointer 747 cattrname = encoded_attrname 748 749 # The dataset id of the node 750 dset_id = node._v_objectid 751 dims = NULL 752 753 ret = H5ATTRget_type_ndims(dset_id, cattrname, &type_id, &class_id, 754 &type_size, &rank ) 755 if ret < 0: 756 raise HDF5ExtError("Can't get type info on attribute %s in node %s." % 757 (attrname, self.name)) 758 759 # Call a fast function for scalar values and typical class types 760 if (rank == 0 and class_id == H5T_STRING): 761 type_size = H5ATTRget_attribute_string(dset_id, cattrname, &str_value, 762 &cset) 763 if type_size == 0: 764 if cset == H5T_CSET_UTF8: 765 retvalue = numpy.unicode_(u'') 766 else: 767 retvalue = numpy.bytes_(b'') 768 769 elif cset == H5T_CSET_UTF8: 770 if type_size == 1 and str_value[0] == 0: 771 # compatibility with PyTables <= 3.1.1 772 retvalue = numpy.unicode_(u'') 773 retvalue = PyUnicode_DecodeUTF8(str_value, type_size, NULL) 774 retvalue = numpy.unicode_(retvalue) 775 else: 776 retvalue = PyBytes_FromStringAndSize(str_value, type_size) 777 # AV: oct 2012 778 # since now we use the string size got form HDF5 we have to strip 779 # trailing zeros used for padding. 780 # The entire process is quite odd but due to a bug (??) in the way 781 # numpy arrays are pickled in python 3 we can't assume that 782 # strlen(attr_value) is the actual length of the attibute 783 # and numpy.bytes_(attr_value) can give a truncated pickle sting 784 retvalue = retvalue.rstrip(b'\x00') 785 retvalue = numpy.bytes_(retvalue) # bytes 786 # Important to release attr_value, because it has been malloc'ed! 787 if str_value: 788 free(str_value) 789 H5Tclose(type_id) 790 return retvalue 791 elif (rank == 0 and class_id in (H5T_BITFIELD, H5T_INTEGER, H5T_FLOAT)): 792 dtype_ = get_dtype_scalar(type_id, class_id, type_size) 793 if dtype_ is None: 794 warnings.warn("Unsupported type for attribute '%s' in node '%s'. " 795 "Offending HDF5 class: %d" % (attrname, self.name, 796 class_id), DataTypeWarning) 797 self._v_unimplemented.append(attrname) 798 return None 799 shape = () 800 else: 801 # General case 802 803 # Get the dimensional info 804 dims = <hsize_t *>malloc(rank * sizeof(hsize_t)) 805 ret = H5ATTRget_dims(dset_id, cattrname, dims) 806 if ret < 0: 807 raise HDF5ExtError("Can't get dims info on attribute %s in node %s." % 808 (attrname, self.name)) 809 shape = getshape(rank, dims) 810 # dims is not needed anymore 811 free(<void *> dims) 812 813 # Get the NumPy dtype from the type_id 814 try: 815 stype_, shape_ = hdf5_to_np_ext_type(type_id, pure_numpy_types=True, ptparams=node._v_file.params) 816 dtype_ = numpy.dtype(stype_, shape_) 817 except TypeError: 818 if class_id == H5T_STRING and H5Tis_variable_str(type_id): 819 nelements = H5ATTRget_attribute_vlen_string_array(dset_id, cattrname, 820 &str_values, &cset) 821 if nelements < 0: 822 raise HDF5ExtError("Can't read attribute %s in node %s." % 823 (attrname, self.name)) 824 825 # The following generator expressions do not work with Cython 0.15.1 826 if cset == H5T_CSET_UTF8: 827 #retvalue = numpy.fromiter( 828 # PyUnicode_DecodeUTF8(<char*>str_values[i], 829 # strlen(<char*>str_values[i]), 830 # NULL) 831 # for i in range(nelements), "O8") 832 retvalue = numpy.array([ 833 PyUnicode_DecodeUTF8(<char*>str_values[i], 834 strlen(<char*>str_values[i]), 835 NULL) 836 for i in range(nelements)], "O8") 837 838 else: 839 #retvalue = numpy.fromiter( 840 # <char*>str_values[i] for i in range(nelements), "O8") 841 retvalue = numpy.array( 842 [<char*>str_values[i] for i in range(nelements)], "O8") 843 retvalue.shape = shape 844 845 # Important to release attr_value, because it has been malloc'ed! 846 for i in range(nelements): 847 free(str_values[i]) 848 free(str_values) 849 850 return retvalue 851 852 # This class is not supported. Instead of raising a TypeError, issue a 853 # warning explaining the problem. This will allow to continue browsing 854 # native HDF5 files, while informing the user about the problem. 855 warnings.warn("Unsupported type for attribute '%s' in node '%s'. " 856 "Offending HDF5 class: %d" % (attrname, self.name, 857 class_id), DataTypeWarning) 858 self._v_unimplemented.append(attrname) 859 return None 860 861 # Get the container for data 862 ndvalue = numpy.empty(dtype=dtype_, shape=shape) 863 # Get the pointer to the buffer data area 864 rbuf = ndvalue.data 865 # Actually read the attribute from disk 866 ret = H5ATTRget_attribute(dset_id, cattrname, type_id, rbuf) 867 if ret < 0: 868 raise HDF5ExtError("Attribute %s exists in node %s, but can't get it." % 869 (attrname, self.name)) 870 H5Tclose(type_id) 871 872 if rank > 0: # multidimensional case 873 retvalue = ndvalue 874 else: 875 retvalue = ndvalue[()] # 0-dim ndarray becomes a NumPy scalar 876 877 return retvalue 878 879 880 def _g_remove(self, node, attrname): 881 cdef int ret 882 cdef hid_t dset_id 883 cdef char *cattrname = NULL 884 cdef bytes encoded_attrname 885 886 encoded_attrname = attrname.encode('utf-8') 887 # Get the C pointer 888 cattrname = encoded_attrname 889 890 # The dataset id of the node 891 dset_id = node._v_objectid 892 893 ret = H5Adelete(dset_id, cattrname) 894 if ret < 0: 895 raise HDF5ExtError("Attribute '%s' exists in node '%s', but cannot be " 896 "deleted." % (attrname, self.name)) 897 898 899cdef class Node: 900 # Instance variables declared in .pxd 901 902 def _g_new(self, where, name, init): 903 self.name = name 904 # """The name of this node in its parent group.""" 905 self.parent_id = where._v_objectid 906 # """The identifier of the parent group.""" 907 908 def _g_delete(self, parent): 909 cdef int ret 910 cdef bytes encoded_name 911 912 encoded_name = self.name.encode('utf-8') 913 914 # Delete this node 915 ret = H5Ldelete(parent._v_objectid, encoded_name, H5P_DEFAULT) 916 if ret < 0: 917 raise HDF5ExtError("problems deleting the node ``%s``" % self.name) 918 return ret 919 920 def __dealloc__(self): 921 self.parent_id = 0 922 923 def _get_obj_info(self): 924 cdef herr_t ret = 0 925 cdef H5O_info_t oinfo 926 927 ret = H5Oget_info(self._v_objectid, &oinfo) 928 if ret < 0: 929 raise HDF5ExtError("Unable to get object info for '%s'" % 930 self. _v_pathname) 931 932 return ObjInfo(oinfo.addr, oinfo.rc) 933 934 def _get_obj_timestamps(self): 935 cdef herr_t ret = 0 936 cdef H5O_info_t oinfo 937 938 ret = H5Oget_info(self._v_objectid, &oinfo) 939 if ret < 0: 940 raise HDF5ExtError("Unable to get object info for '%s'" % 941 self. _v_pathname) 942 943 return ObjTimestamps(oinfo.atime, oinfo.mtime, oinfo.ctime, 944 oinfo.btime) 945 946 947cdef class Group(Node): 948 cdef hid_t group_id 949 950 def _g_create(self): 951 cdef hid_t ret 952 cdef bytes encoded_name 953 954 encoded_name = self.name.encode('utf-8') 955 956 # @TODO: set property list --> utf-8 957 958 # Create a new group 959 ret = H5Gcreate(self.parent_id, encoded_name, H5P_DEFAULT, H5P_DEFAULT, 960 H5P_DEFAULT) 961 if ret < 0: 962 raise HDF5ExtError("Can't create the group %s." % self.name) 963 self.group_id = ret 964 return self.group_id 965 966 def _g_open(self): 967 cdef hid_t ret 968 cdef bytes encoded_name 969 970 encoded_name = self.name.encode('utf-8') 971 972 ret = H5Gopen(self.parent_id, encoded_name, H5P_DEFAULT) 973 if ret < 0: 974 raise HDF5ExtError("Can't open the group: '%s'." % self.name) 975 self.group_id = ret 976 return self.group_id 977 978 def _g_get_objinfo(self, object h5name): 979 """Check whether 'name' is a children of 'self' and return its type.""" 980 981 cdef int ret 982 cdef object node_type 983 cdef bytes encoded_name 984 cdef char *cname 985 986 encoded_name = h5name.encode('utf-8') 987 # Get the C pointer 988 cname = encoded_name 989 990 ret = get_linkinfo(self.group_id, cname) 991 if ret == -2 or ret == H5L_TYPE_ERROR: 992 node_type = "NoSuchNode" 993 elif ret == H5L_TYPE_SOFT: 994 node_type = "SoftLink" 995 elif ret == H5L_TYPE_EXTERNAL: 996 node_type = "ExternalLink" 997 elif ret == H5L_TYPE_HARD: 998 ret = get_objinfo(self.group_id, cname) 999 if ret == -2: 1000 node_type = "NoSuchNode" 1001 elif ret == H5O_TYPE_UNKNOWN: 1002 node_type = "Unknown" 1003 elif ret == H5O_TYPE_GROUP: 1004 node_type = "Group" 1005 elif ret == H5O_TYPE_DATASET: 1006 node_type = "Leaf" 1007 elif ret == H5O_TYPE_NAMED_DATATYPE: 1008 node_type = "NamedType" # Not supported yet 1009 #else H5O_TYPE_LINK: 1010 # # symbolic link 1011 # raise RuntimeError('unexpected object type') 1012 else: 1013 node_type = "Unknown" 1014 return node_type 1015 1016 def _g_list_group(self, parent): 1017 """Return a tuple with the groups and the leaves hanging from self.""" 1018 1019 cdef bytes encoded_name 1020 1021 encoded_name = self.name.encode('utf-8') 1022 1023 return Giterate(parent._v_objectid, self._v_objectid, encoded_name) 1024 1025 1026 def _g_get_gchild_attr(self, group_name, attr_name): 1027 """Return an attribute of a child `Group`. 1028 1029 If the attribute does not exist, ``None`` is returned. 1030 1031 """ 1032 1033 cdef hid_t gchild_id 1034 cdef object retvalue 1035 cdef bytes encoded_group_name 1036 cdef bytes encoded_attr_name 1037 1038 encoded_group_name = group_name.encode('utf-8') 1039 encoded_attr_name = attr_name.encode('utf-8') 1040 1041 # Open the group 1042 retvalue = None # Default value 1043 gchild_id = H5Gopen(self.group_id, encoded_group_name, H5P_DEFAULT) 1044 if gchild_id < 0: 1045 raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" % 1046 (group_name, self._v_pathname)) 1047 retvalue = get_attribute_string_or_none(gchild_id, encoded_attr_name) 1048 # Close child group 1049 H5Gclose(gchild_id) 1050 1051 return retvalue 1052 1053 1054 def _g_get_lchild_attr(self, leaf_name, attr_name): 1055 """Return an attribute of a child `Leaf`. 1056 1057 If the attribute does not exist, ``None`` is returned. 1058 1059 """ 1060 1061 cdef hid_t leaf_id 1062 cdef object retvalue 1063 cdef bytes encoded_leaf_name 1064 cdef bytes encoded_attr_name 1065 1066 encoded_leaf_name = leaf_name.encode('utf-8') 1067 encoded_attr_name = attr_name.encode('utf-8') 1068 1069 # Open the dataset 1070 leaf_id = H5Dopen(self.group_id, encoded_leaf_name, H5P_DEFAULT) 1071 if leaf_id < 0: 1072 raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" % 1073 (leaf_name, self._v_pathname)) 1074 retvalue = get_attribute_string_or_none(leaf_id, encoded_attr_name) 1075 # Close the dataset 1076 H5Dclose(leaf_id) 1077 return retvalue 1078 1079 1080 def _g_flush_group(self): 1081 # Close the group 1082 H5Fflush(self.group_id, H5F_SCOPE_GLOBAL) 1083 1084 1085 def _g_close_group(self): 1086 cdef int ret 1087 1088 ret = H5Gclose(self.group_id) 1089 if ret < 0: 1090 raise HDF5ExtError("Problems closing the Group %s" % self.name) 1091 self.group_id = 0 # indicate that this group is closed 1092 1093 1094 def _g_move_node(self, hid_t oldparent, oldname, hid_t newparent, newname, 1095 oldpathname, newpathname): 1096 cdef int ret 1097 cdef bytes encoded_oldname, encoded_newname 1098 1099 encoded_oldname = oldname.encode('utf-8') 1100 encoded_newname = newname.encode('utf-8') 1101 1102 ret = H5Lmove(oldparent, encoded_oldname, newparent, encoded_newname, 1103 H5P_DEFAULT, H5P_DEFAULT) 1104 if ret < 0: 1105 raise HDF5ExtError("Problems moving the node %s to %s" % 1106 (oldpathname, newpathname) ) 1107 return ret 1108 1109 1110 1111cdef class Leaf(Node): 1112 # Instance variables declared in .pxd 1113 1114 def _get_storage_size(self): 1115 return H5Dget_storage_size(self.dataset_id) 1116 1117 def _get_obj_track_times(self): 1118 """Get track_times boolean for dataset 1119 1120 Uses H5Pget_obj_track_times to determine if the dataset was 1121 created with the track_times property. If the leaf is not a 1122 dataset, this will fail with HDF5ExtError. 1123 1124 The track times dataset creation property does not seem to survive 1125 closing and reopening as of HDF5 1.8.17. Currently, it may be 1126 more accurate to test whether the ctime for the dataset is 0: 1127 track_times = (leaf._get_obj_timestamps().ctime == 0) 1128 """ 1129 cdef: 1130 hbool_t track_times = True 1131 1132 if self.dataset_id < 0: 1133 raise ValueError('Invalid dataset id %s' % self.dataset_id) 1134 1135 plist_id = H5Dget_create_plist(self.dataset_id) 1136 if plist_id < 0: 1137 raise HDF5ExtError("Could not get dataset creation property list " 1138 "from dataset id %s" % self.dataset_id) 1139 1140 try: 1141 # Get track_times boolean for dataset 1142 if H5Pget_obj_track_times(plist_id, &track_times) < 0: 1143 raise HDF5ExtError("Could not get dataset track_times property " 1144 "from dataset id %s" % self.dataset_id) 1145 finally: 1146 H5Pclose(plist_id) 1147 1148 return bool(track_times) 1149 1150 def _g_new(self, where, name, init): 1151 if init: 1152 # Put this info to 0 just when the class is initialized 1153 self.dataset_id = -1 1154 self.type_id = -1 1155 self.base_type_id = -1 1156 self.disk_type_id = -1 1157 super(Leaf, self)._g_new(where, name, init) 1158 1159 cdef _get_type_ids(self): 1160 """Get the disk and native HDF5 types associated with this leaf. 1161 1162 It is guaranteed that both disk and native types are not the same 1163 descriptor (so that it is safe to close them separately). 1164 1165 """ 1166 1167 cdef hid_t disk_type_id, native_type_id 1168 1169 disk_type_id = H5Dget_type(self.dataset_id) 1170 native_type_id = get_native_type(disk_type_id) 1171 return disk_type_id, native_type_id 1172 1173 cdef _convert_time64(self, ndarray nparr, int sense): 1174 """Converts a NumPy of Time64 elements between NumPy and HDF5 formats. 1175 1176 NumPy to HDF5 conversion is performed when 'sense' is 0. Otherwise, HDF5 1177 to NumPy conversion is performed. The conversion is done in place, 1178 i.e. 'nparr' is modified. 1179 1180 """ 1181 1182 cdef void *t64buf 1183 cdef long byteoffset, bytestride, nelements 1184 cdef hsize_t nrecords 1185 1186 byteoffset = 0 # NumPy objects doesn't have an offset 1187 if (<object>nparr).shape == (): 1188 # 0-dim array does contain *one* element 1189 nrecords = 1 1190 bytestride = 8 1191 else: 1192 nrecords = len(nparr) 1193 bytestride = nparr.strides[0] # supports multi-dimensional recarray 1194 nelements = <size_t>nparr.size / nrecords 1195 t64buf = nparr.data 1196 1197 conv_float64_timeval32( 1198 t64buf, byteoffset, bytestride, nrecords, nelements, sense) 1199 1200 # can't do since cdef'd 1201 1202 def _g_truncate(self, hsize_t size): 1203 """Truncate a Leaf to `size` nrows.""" 1204 1205 cdef hsize_t ret 1206 1207 ret = truncate_dset(self.dataset_id, self.maindim, size) 1208 if ret < 0: 1209 raise HDF5ExtError("Problems truncating the leaf: %s" % self) 1210 1211 classname = self.__class__.__name__ 1212 if classname in ('EArray', 'CArray'): 1213 # Update the new dimensionality 1214 self.dims[self.maindim] = size 1215 # Update the shape 1216 shape = list(self.shape) 1217 shape[self.maindim] = SizeType(size) 1218 self.shape = tuple(shape) 1219 elif classname in ('Table', 'VLArray'): 1220 self.nrows = size 1221 else: 1222 raise ValueError("Unexpected classname: %s" % classname) 1223 1224 def _g_flush(self): 1225 # Flush the dataset (in fact, the entire buffers in file!) 1226 if self.dataset_id >= 0: 1227 H5Fflush(self.dataset_id, H5F_SCOPE_GLOBAL) 1228 1229 def _g_close(self): 1230 # Close dataset in HDF5 space 1231 # Release resources 1232 if self.type_id >= 0: 1233 H5Tclose(self.type_id) 1234 if self.disk_type_id >= 0: 1235 H5Tclose(self.disk_type_id) 1236 if self.base_type_id >= 0: 1237 H5Tclose(self.base_type_id) 1238 if self.dataset_id >= 0: 1239 H5Dclose(self.dataset_id) 1240 1241 1242cdef class Array(Leaf): 1243 # Instance variables declared in .pxd 1244 1245 def _create_array(self, ndarray nparr, object title, object atom): 1246 cdef int i 1247 cdef herr_t ret 1248 cdef void *rbuf 1249 cdef bytes complib, version, class_ 1250 cdef object dtype_, atom_, shape 1251 cdef ndarray dims 1252 cdef bytes encoded_title, encoded_name 1253 cdef H5T_cset_t cset = H5T_CSET_ASCII 1254 1255 encoded_title = title.encode('utf-8') 1256 encoded_name = self.name.encode('utf-8') 1257 1258 # Get the HDF5 type associated with this numpy type 1259 shape = (<object>nparr).shape 1260 if atom is None or atom.shape == (): 1261 dtype_ = nparr.dtype.base 1262 atom_ = Atom.from_dtype(dtype_) 1263 else: 1264 atom_ = atom 1265 shape = shape[:-len(atom_.shape)] 1266 self.disk_type_id = atom_to_hdf5_type(atom_, self.byteorder) 1267 if self.disk_type_id < 0: 1268 raise HDF5ExtError( 1269 "Problems creating the %s: invalid disk type ID for atom %s" % ( 1270 self.__class__.__name__, atom_)) 1271 1272 # Allocate space for the dimension axis info and fill it 1273 dims = numpy.array(shape, dtype=numpy.intp) 1274 self.rank = len(shape) 1275 self.dims = npy_malloc_dims(self.rank, <npy_intp *>(dims.data)) 1276 # Get the pointer to the buffer data area 1277 strides = (<object>nparr).strides 1278 # When the object is not a 0-d ndarray and its strides == 0, that 1279 # means that the array does not contain actual data 1280 if strides != () and sum(strides) == 0: 1281 rbuf = NULL 1282 else: 1283 rbuf = nparr.data 1284 # Save the array 1285 complib = (self.filters.complib or '').encode('utf-8') 1286 version = self._v_version.encode('utf-8') 1287 class_ = self._c_classid.encode('utf-8') 1288 self.dataset_id = H5ARRAYmake(self.parent_id, encoded_name, version, 1289 self.rank, self.dims, 1290 self.extdim, self.disk_type_id, NULL, NULL, 1291 self.filters.complevel, complib, 1292 self.filters.shuffle_bitshuffle, 1293 self.filters.fletcher32, 1294 self._want_track_times, 1295 rbuf) 1296 if self.dataset_id < 0: 1297 raise HDF5ExtError("Problems creating the %s." % self.__class__.__name__) 1298 1299 if self._v_file.params['PYTABLES_SYS_ATTRS']: 1300 if PY_MAJOR_VERSION > 2: 1301 cset = H5T_CSET_UTF8 1302 # Set the conforming array attributes 1303 H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_, 1304 len(class_), cset) 1305 H5ATTRset_attribute_string(self.dataset_id, "VERSION", version, 1306 len(version), cset) 1307 H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title, 1308 len(encoded_title), cset) 1309 1310 # Get the native type (so that it is HDF5 who is the responsible to deal 1311 # with non-native byteorders on-disk) 1312 self.type_id = get_native_type(self.disk_type_id) 1313 1314 return self.dataset_id, shape, atom_ 1315 1316 1317 def _create_carray(self, object title): 1318 cdef int i 1319 cdef herr_t ret 1320 cdef void *rbuf 1321 cdef bytes complib, version, class_ 1322 cdef ndarray dflts 1323 cdef void *fill_data 1324 cdef ndarray extdim 1325 cdef object atom 1326 cdef bytes encoded_title, encoded_name 1327 1328 encoded_title = title.encode('utf-8') 1329 encoded_name = self.name.encode('utf-8') 1330 1331 atom = self.atom 1332 self.disk_type_id = atom_to_hdf5_type(atom, self.byteorder) 1333 1334 self.rank = len(self.shape) 1335 self.dims = malloc_dims(self.shape) 1336 if self.chunkshape: 1337 self.dims_chunk = malloc_dims(self.chunkshape) 1338 1339 rbuf = NULL # The data pointer. We don't have data to save initially 1340 # Encode strings 1341 complib = (self.filters.complib or '').encode('utf-8') 1342 version = self._v_version.encode('utf-8') 1343 class_ = self._c_classid.encode('utf-8') 1344 1345 # Get the fill values 1346 if isinstance(atom.dflt, numpy.ndarray) or atom.dflt: 1347 dflts = numpy.array(atom.dflt, dtype=atom.dtype) 1348 fill_data = dflts.data 1349 else: 1350 dflts = numpy.zeros((), dtype=atom.dtype) 1351 fill_data = NULL 1352 if atom.shape == (): 1353 # The default is preferred as a scalar value instead of 0-dim array 1354 atom.dflt = dflts[()] 1355 else: 1356 atom.dflt = dflts 1357 1358 # Create the CArray/EArray 1359 self.dataset_id = H5ARRAYmake( 1360 self.parent_id, encoded_name, version, self.rank, 1361 self.dims, self.extdim, self.disk_type_id, self.dims_chunk, 1362 fill_data, self.filters.complevel, complib, 1363 self.filters.shuffle_bitshuffle, self.filters.fletcher32, 1364 self._want_track_times, rbuf) 1365 if self.dataset_id < 0: 1366 raise HDF5ExtError("Problems creating the %s." % self.__class__.__name__) 1367 1368 if self._v_file.params['PYTABLES_SYS_ATTRS']: 1369 # Set the conforming array attributes 1370 H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_, 1371 len(class_), H5T_CSET_ASCII) 1372 H5ATTRset_attribute_string(self.dataset_id, "VERSION", version, 1373 len(version), H5T_CSET_ASCII) 1374 H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title, 1375 len(encoded_title), H5T_CSET_ASCII) 1376 if self.extdim >= 0: 1377 extdim = <ndarray>numpy.array([self.extdim], dtype="int32") 1378 # Attach the EXTDIM attribute in case of enlargeable arrays 1379 H5ATTRset_attribute(self.dataset_id, "EXTDIM", H5T_NATIVE_INT, 1380 0, NULL, extdim.data) 1381 1382 # Get the native type (so that it is HDF5 who is the responsible to deal 1383 # with non-native byteorders on-disk) 1384 self.type_id = get_native_type(self.disk_type_id) 1385 1386 return self.dataset_id 1387 1388 1389 def _open_array(self): 1390 cdef size_t type_size, type_precision 1391 cdef H5T_class_t class_id 1392 cdef char cbyteorder[11] # "irrelevant" fits easily here 1393 cdef int i 1394 cdef int extdim 1395 cdef herr_t ret 1396 cdef object shape, chunkshapes, atom 1397 cdef int fill_status 1398 cdef ndarray dflts 1399 cdef void *fill_data 1400 cdef bytes encoded_name 1401 cdef str byteorder 1402 1403 encoded_name = self.name.encode('utf-8') 1404 1405 # Open the dataset 1406 self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT) 1407 if self.dataset_id < 0: 1408 raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" % 1409 (self.name, self._v_parent._v_pathname)) 1410 # Get the datatype handles 1411 self.disk_type_id, self.type_id = self._get_type_ids() 1412 # Get the atom for this type 1413 atom = atom_from_hdf5_type(self.type_id) 1414 1415 # Get the rank for this array object 1416 if H5ARRAYget_ndims(self.dataset_id, &self.rank) < 0: 1417 raise HDF5ExtError("Problems getting ndims!") 1418 # Allocate space for the dimension axis info 1419 self.dims = <hsize_t *>malloc(self.rank * sizeof(hsize_t)) 1420 self.maxdims = <hsize_t *>malloc(self.rank * sizeof(hsize_t)) 1421 # Get info on dimensions, class and type (of base class) 1422 ret = H5ARRAYget_info(self.dataset_id, self.disk_type_id, 1423 self.dims, self.maxdims, 1424 &class_id, cbyteorder) 1425 if ret < 0: 1426 raise HDF5ExtError("Unable to get array info.") 1427 1428 byteorder = cstr_to_pystr(cbyteorder) 1429 1430 # Get the extendable dimension (if any) 1431 self.extdim = -1 # default is non-extensible Array 1432 for i from 0 <= i < self.rank: 1433 if self.maxdims[i] == -1: 1434 self.extdim = i 1435 break 1436 1437 # Get the shape as a python tuple 1438 shape = getshape(self.rank, self.dims) 1439 1440 # Allocate space for the dimension chunking info 1441 self.dims_chunk = <hsize_t *>malloc(self.rank * sizeof(hsize_t)) 1442 if H5ARRAYget_chunkshape(self.dataset_id, self.rank, self.dims_chunk) < 0: 1443 # The Array class is not chunked! 1444 chunkshapes = None 1445 else: 1446 # Get the chunkshape as a python tuple 1447 chunkshapes = getshape(self.rank, self.dims_chunk) 1448 1449 # object arrays should not be read directly into memory 1450 if atom.dtype != numpy.object: 1451 # Get the fill value 1452 dflts = numpy.zeros((), dtype=atom.dtype) 1453 fill_data = dflts.data 1454 H5ARRAYget_fill_value(self.dataset_id, self.type_id, 1455 &fill_status, fill_data); 1456 if fill_status == H5D_FILL_VALUE_UNDEFINED: 1457 # This can only happen with datasets created with other libraries 1458 # than PyTables. 1459 dflts = None 1460 if dflts is not None and atom.shape == (): 1461 # The default is preferred as a scalar value instead of 0-dim array 1462 atom.dflt = dflts[()] 1463 else: 1464 atom.dflt = dflts 1465 1466 # Get the byteorder 1467 self.byteorder = correct_byteorder(atom.type, byteorder) 1468 1469 return self.dataset_id, atom, shape, chunkshapes 1470 1471 1472 def _append(self, ndarray nparr): 1473 cdef int ret, extdim 1474 cdef hsize_t *dims_arr 1475 cdef void *rbuf 1476 cdef object shape 1477 1478 if self.atom.kind == "reference": 1479 raise ValueError("Cannot append to the reference types") 1480 1481 # Allocate space for the dimension axis info 1482 dims_arr = npy_malloc_dims(self.rank, nparr.shape) 1483 # Get the pointer to the buffer data area 1484 rbuf = nparr.data 1485 # Convert some NumPy types to HDF5 before storing. 1486 if self.atom.type == 'time64': 1487 self._convert_time64(nparr, 0) 1488 1489 # Append the records 1490 extdim = self.extdim 1491 with nogil: 1492 ret = H5ARRAYappend_records(self.dataset_id, self.type_id, self.rank, 1493 self.dims, dims_arr, extdim, rbuf) 1494 1495 if ret < 0: 1496 raise HDF5ExtError("Problems appending the elements") 1497 1498 free(dims_arr) 1499 # Update the new dimensionality 1500 shape = list(self.shape) 1501 shape[self.extdim] = SizeType(self.dims[self.extdim]) 1502 self.shape = tuple(shape) 1503 1504 def _read_array(self, hsize_t start, hsize_t stop, hsize_t step, 1505 ndarray nparr): 1506 cdef herr_t ret 1507 cdef void *rbuf 1508 cdef hsize_t nrows 1509 cdef int extdim 1510 cdef size_t item_size = H5Tget_size(self.type_id) 1511 cdef void * refbuf = NULL 1512 1513 # Number of rows to read 1514 nrows = get_len_of_range(start, stop, step) 1515 1516 # Get the pointer to the buffer data area 1517 if self.atom.kind == "reference": 1518 refbuf = malloc(nrows * item_size) 1519 rbuf = refbuf 1520 else: 1521 rbuf = nparr.data 1522 1523 if hasattr(self, "extdim"): 1524 extdim = self.extdim 1525 else: 1526 extdim = -1 1527 1528 # Do the physical read 1529 with nogil: 1530 ret = H5ARRAYread(self.dataset_id, self.type_id, start, nrows, step, 1531 extdim, rbuf) 1532 1533 try: 1534 if ret < 0: 1535 raise HDF5ExtError("Problems reading the array data.") 1536 1537 # Get the pointer to the buffer data area 1538 if self.atom.kind == "reference": 1539 load_reference(self.dataset_id, <hobj_ref_t *>rbuf, item_size, nparr) 1540 finally: 1541 if refbuf: 1542 free(refbuf) 1543 refbuf = NULL 1544 1545 if self.atom.kind == 'time': 1546 # Swap the byteorder by hand (this is not currently supported by HDF5) 1547 if H5Tget_order(self.type_id) != platform_byteorder: 1548 nparr.byteswap(True) 1549 1550 # Convert some HDF5 types to NumPy after reading. 1551 if self.atom.type == 'time64': 1552 self._convert_time64(nparr, 1) 1553 1554 return 1555 1556 1557 def _g_read_slice(self, ndarray startl, ndarray stopl, ndarray stepl, 1558 ndarray nparr): 1559 cdef herr_t ret 1560 cdef hsize_t *start 1561 cdef hsize_t *stop 1562 cdef hsize_t *step 1563 cdef void *rbuf 1564 cdef size_t item_size = H5Tget_size(self.type_id) 1565 cdef void * refbuf = NULL 1566 1567 # Get the pointer to the buffer data area of startl, stopl and stepl arrays 1568 start = <hsize_t *>startl.data 1569 stop = <hsize_t *>stopl.data 1570 step = <hsize_t *>stepl.data 1571 1572 # Get the pointer to the buffer data area 1573 if self.atom.kind == "reference": 1574 refbuf = malloc(nparr.size * item_size) 1575 rbuf = refbuf 1576 else: 1577 rbuf = nparr.data 1578 1579 # Do the physical read 1580 with nogil: 1581 ret = H5ARRAYreadSlice(self.dataset_id, self.type_id, 1582 start, stop, step, rbuf) 1583 try: 1584 if ret < 0: 1585 raise HDF5ExtError("Problems reading the array data.") 1586 1587 # Get the pointer to the buffer data area 1588 if self.atom.kind == "reference": 1589 load_reference(self.dataset_id, <hobj_ref_t *>rbuf, item_size, nparr) 1590 finally: 1591 if refbuf: 1592 free(refbuf) 1593 refbuf = NULL 1594 1595 if self.atom.kind == 'time': 1596 # Swap the byteorder by hand (this is not currently supported by HDF5) 1597 if H5Tget_order(self.type_id) != platform_byteorder: 1598 nparr.byteswap(True) 1599 1600 # Convert some HDF5 types to NumPy after reading 1601 if self.atom.type == 'time64': 1602 self._convert_time64(nparr, 1) 1603 1604 return 1605 1606 1607 def _g_read_coords(self, ndarray coords, ndarray nparr): 1608 """Read coordinates in an already created NumPy array.""" 1609 1610 cdef herr_t ret 1611 cdef hid_t space_id 1612 cdef hid_t mem_space_id 1613 cdef hsize_t size 1614 cdef void *rbuf 1615 cdef object mode 1616 cdef size_t item_size = H5Tget_size(self.type_id) 1617 cdef void * refbuf = NULL 1618 1619 # Get the dataspace handle 1620 space_id = H5Dget_space(self.dataset_id) 1621 # Create a memory dataspace handle 1622 size = nparr.size 1623 mem_space_id = H5Screate_simple(1, &size, NULL) 1624 1625 # Select the dataspace to be read 1626 H5Sselect_elements(space_id, H5S_SELECT_SET, 1627 <size_t>size, <hsize_t *>coords.data) 1628 1629 # Get the pointer to the buffer data area 1630 if self.atom.kind == "reference": 1631 refbuf = malloc(nparr.size * item_size) 1632 rbuf = refbuf 1633 else: 1634 rbuf = nparr.data 1635 1636 # Do the actual read 1637 with nogil: 1638 ret = H5Dread(self.dataset_id, self.type_id, mem_space_id, space_id, 1639 H5P_DEFAULT, rbuf) 1640 1641 try: 1642 if ret < 0: 1643 raise HDF5ExtError("Problems reading the array data.") 1644 1645 # Get the pointer to the buffer data area 1646 if self.atom.kind == "reference": 1647 load_reference(self.dataset_id, <hobj_ref_t *>rbuf, item_size, nparr) 1648 finally: 1649 if refbuf: 1650 free(refbuf) 1651 refbuf = NULL 1652 1653 # Terminate access to the memory dataspace 1654 H5Sclose(mem_space_id) 1655 # Terminate access to the dataspace 1656 H5Sclose(space_id) 1657 1658 if self.atom.kind == 'time': 1659 # Swap the byteorder by hand (this is not currently supported by HDF5) 1660 if H5Tget_order(self.type_id) != platform_byteorder: 1661 nparr.byteswap(True) 1662 1663 # Convert some HDF5 types to NumPy after reading 1664 if self.atom.type == 'time64': 1665 self._convert_time64(nparr, 1) 1666 1667 return 1668 1669 1670 def perform_selection(self, space_id, start, count, step, idx, mode): 1671 """Performs a selection using start/count/step in the given axis. 1672 1673 All other axes have their full range selected. The selection is 1674 added to the current `space_id` selection using the given mode. 1675 1676 Note: This is a backport from the h5py project. 1677 1678 """ 1679 1680 cdef int select_mode 1681 cdef ndarray start_, count_, step_ 1682 cdef hsize_t *startp 1683 cdef hsize_t *countp 1684 cdef hsize_t *stepp 1685 1686 # Build arrays for the selection parameters 1687 startl, countl, stepl = [], [], [] 1688 for i, x in enumerate(self.shape): 1689 if i != idx: 1690 startl.append(0) 1691 countl.append(x) 1692 stepl.append(1) 1693 else: 1694 startl.append(start) 1695 countl.append(count) 1696 stepl.append(step) 1697 start_ = numpy.array(startl, dtype="i8") 1698 count_ = numpy.array(countl, dtype="i8") 1699 step_ = numpy.array(stepl, dtype="i8") 1700 1701 # Get the pointers to array data 1702 startp = <hsize_t *>start_.data 1703 countp = <hsize_t *>count_.data 1704 stepp = <hsize_t *>step_.data 1705 1706 # Do the actual selection 1707 select_modes = {"AND": H5S_SELECT_AND, "NOTB": H5S_SELECT_NOTB} 1708 assert mode in select_modes 1709 select_mode = select_modes[mode] 1710 H5Sselect_hyperslab(space_id, <H5S_seloper_t>select_mode, 1711 startp, stepp, countp, NULL) 1712 1713 def _g_read_selection(self, object selection, ndarray nparr): 1714 """Read a selection in an already created NumPy array.""" 1715 1716 cdef herr_t ret 1717 cdef hid_t space_id 1718 cdef hid_t mem_space_id 1719 cdef hsize_t size 1720 cdef void *rbuf 1721 cdef object mode 1722 cdef size_t item_size = H5Tget_size(self.type_id) 1723 cdef void * refbuf = NULL 1724 1725 # Get the dataspace handle 1726 space_id = H5Dget_space(self.dataset_id) 1727 # Create a memory dataspace handle 1728 size = nparr.size 1729 mem_space_id = H5Screate_simple(1, &size, NULL) 1730 1731 # Select the dataspace to be read 1732 # Start by selecting everything 1733 H5Sselect_all(space_id) 1734 # Now refine with outstanding selections 1735 for args in selection: 1736 self.perform_selection(space_id, *args) 1737 1738 # Get the pointer to the buffer data area 1739 if self.atom.kind == "reference": 1740 refbuf = malloc(nparr.size * item_size) 1741 rbuf = refbuf 1742 else: 1743 rbuf = nparr.data 1744 1745 # Do the actual read 1746 with nogil: 1747 ret = H5Dread(self.dataset_id, self.type_id, mem_space_id, space_id, 1748 H5P_DEFAULT, rbuf) 1749 1750 try: 1751 if ret < 0: 1752 raise HDF5ExtError("Problems reading the array data.") 1753 1754 # Get the pointer to the buffer data area 1755 if self.atom.kind == "reference": 1756 load_reference(self.dataset_id, <hobj_ref_t *>rbuf, item_size, nparr) 1757 finally: 1758 if refbuf: 1759 free(refbuf) 1760 refbuf = NULL 1761 1762 # Terminate access to the memory dataspace 1763 H5Sclose(mem_space_id) 1764 # Terminate access to the dataspace 1765 H5Sclose(space_id) 1766 1767 if self.atom.kind == 'time': 1768 # Swap the byteorder by hand (this is not currently supported by HDF5) 1769 if H5Tget_order(self.type_id) != platform_byteorder: 1770 nparr.byteswap(True) 1771 1772 # Convert some HDF5 types to NumPy after reading 1773 if self.atom.type == 'time64': 1774 self._convert_time64(nparr, 1) 1775 1776 return 1777 1778 1779 def _g_write_slice(self, ndarray startl, ndarray stepl, ndarray countl, 1780 ndarray nparr): 1781 """Write a slice in an already created NumPy array.""" 1782 1783 cdef int ret 1784 cdef void *rbuf 1785 cdef void *temp 1786 cdef hsize_t *start 1787 cdef hsize_t *step 1788 cdef hsize_t *count 1789 1790 if self.atom.kind == "reference": 1791 raise ValueError("Cannot write reference types yet") 1792 # Get the pointer to the buffer data area 1793 rbuf = nparr.data 1794 # Get the start, step and count values 1795 start = <hsize_t *>startl.data 1796 step = <hsize_t *>stepl.data 1797 count = <hsize_t *>countl.data 1798 1799 # Convert some NumPy types to HDF5 before storing. 1800 if self.atom.type == 'time64': 1801 self._convert_time64(nparr, 0) 1802 1803 # Modify the elements: 1804 with nogil: 1805 ret = H5ARRAYwrite_records(self.dataset_id, self.type_id, self.rank, 1806 start, step, count, rbuf) 1807 1808 if ret < 0: 1809 raise HDF5ExtError("Internal error modifying the elements " 1810 "(H5ARRAYwrite_records returned errorcode -%i)" % (-ret)) 1811 1812 return 1813 1814 1815 def _g_write_coords(self, ndarray coords, ndarray nparr): 1816 """Write a selection in an already created NumPy array.""" 1817 1818 cdef herr_t ret 1819 cdef hid_t space_id 1820 cdef hid_t mem_space_id 1821 cdef hsize_t size 1822 cdef void *rbuf 1823 cdef object mode 1824 1825 if self.atom.kind == "reference": 1826 raise ValueError("Cannot write reference types yet") 1827 # Get the dataspace handle 1828 space_id = H5Dget_space(self.dataset_id) 1829 # Create a memory dataspace handle 1830 size = nparr.size 1831 mem_space_id = H5Screate_simple(1, &size, NULL) 1832 1833 # Select the dataspace to be written 1834 H5Sselect_elements(space_id, H5S_SELECT_SET, 1835 <size_t>size, <hsize_t *>coords.data) 1836 1837 # Get the pointer to the buffer data area 1838 rbuf = nparr.data 1839 1840 # Convert some NumPy types to HDF5 before storing. 1841 if self.atom.type == 'time64': 1842 self._convert_time64(nparr, 0) 1843 1844 # Do the actual write 1845 with nogil: 1846 ret = H5Dwrite(self.dataset_id, self.type_id, mem_space_id, space_id, 1847 H5P_DEFAULT, rbuf) 1848 1849 if ret < 0: 1850 raise HDF5ExtError("Problems writing the array data.") 1851 1852 # Terminate access to the memory dataspace 1853 H5Sclose(mem_space_id) 1854 # Terminate access to the dataspace 1855 H5Sclose(space_id) 1856 1857 return 1858 1859 1860 def _g_write_selection(self, object selection, ndarray nparr): 1861 """Write a selection in an already created NumPy array.""" 1862 1863 cdef herr_t ret 1864 cdef hid_t space_id 1865 cdef hid_t mem_space_id 1866 cdef hsize_t size 1867 cdef void *rbuf 1868 cdef object mode 1869 1870 if self.atom.kind == "reference": 1871 raise ValueError("Cannot write reference types yet") 1872 # Get the dataspace handle 1873 space_id = H5Dget_space(self.dataset_id) 1874 # Create a memory dataspace handle 1875 size = nparr.size 1876 mem_space_id = H5Screate_simple(1, &size, NULL) 1877 1878 # Select the dataspace to be written 1879 # Start by selecting everything 1880 H5Sselect_all(space_id) 1881 # Now refine with outstanding selections 1882 for args in selection: 1883 self.perform_selection(space_id, *args) 1884 1885 # Get the pointer to the buffer data area 1886 rbuf = nparr.data 1887 1888 # Convert some NumPy types to HDF5 before storing. 1889 if self.atom.type == 'time64': 1890 self._convert_time64(nparr, 0) 1891 1892 # Do the actual write 1893 with nogil: 1894 ret = H5Dwrite(self.dataset_id, self.type_id, mem_space_id, space_id, 1895 H5P_DEFAULT, rbuf) 1896 1897 if ret < 0: 1898 raise HDF5ExtError("Problems writing the array data.") 1899 1900 # Terminate access to the memory dataspace 1901 H5Sclose(mem_space_id) 1902 # Terminate access to the dataspace 1903 H5Sclose(space_id) 1904 1905 return 1906 1907 1908 def __dealloc__(self): 1909 if self.dims: 1910 free(<void *>self.dims) 1911 if self.maxdims: 1912 free(<void *>self.maxdims) 1913 if self.dims_chunk: 1914 free(self.dims_chunk) 1915 1916 1917cdef class VLArray(Leaf): 1918 # Instance variables 1919 cdef hsize_t nrecords 1920 1921 def _create_array(self, object title): 1922 cdef int rank 1923 cdef hsize_t *dims 1924 cdef herr_t ret 1925 cdef void *rbuf 1926 cdef bytes complib, version, class_ 1927 cdef object type_, itemsize, atom, scatom 1928 cdef bytes encoded_title, encoded_name 1929 cdef H5T_cset_t cset = H5T_CSET_ASCII 1930 1931 encoded_title = title.encode('utf-8') 1932 encoded_name = self.name.encode('utf-8') 1933 1934 atom = self.atom 1935 if not hasattr(atom, 'size'): # it is a pseudo-atom 1936 atom = atom.base 1937 1938 # Get the HDF5 type of the *scalar* atom 1939 scatom = atom.copy(shape=()) 1940 self.base_type_id = atom_to_hdf5_type(scatom, self.byteorder) 1941 if self.base_type_id < 0: 1942 raise HDF5ExtError( 1943 "Problems creating the %s: invalid base type ID for atom %s" % ( 1944 self.__class__.__name__, scatom)) 1945 1946 # Allocate space for the dimension axis info 1947 rank = len(atom.shape) 1948 dims = malloc_dims(atom.shape) 1949 1950 rbuf = NULL # We don't have data to save initially 1951 1952 # Encode strings 1953 complib = (self.filters.complib or '').encode('utf-8') 1954 version = self._v_version.encode('utf-8') 1955 class_ = self._c_classid.encode('utf-8') 1956 1957 # Create the vlarray 1958 self.dataset_id = H5VLARRAYmake(self.parent_id, encoded_name, version, 1959 rank, dims, self.base_type_id, 1960 self.chunkshape[0], rbuf, 1961 self.filters.complevel, complib, 1962 self.filters.shuffle_bitshuffle, 1963 self.filters.fletcher32, 1964 self._want_track_times, rbuf) 1965 if dims: 1966 free(<void *>dims) 1967 if self.dataset_id < 0: 1968 raise HDF5ExtError("Problems creating the VLArray.") 1969 self.nrecords = 0 # Initialize the number of records saved 1970 1971 if self._v_file.params['PYTABLES_SYS_ATTRS']: 1972 if PY_MAJOR_VERSION > 2: 1973 cset = H5T_CSET_UTF8 1974 # Set the conforming array attributes 1975 H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_, 1976 len(class_), cset) 1977 H5ATTRset_attribute_string(self.dataset_id, "VERSION", version, 1978 len(version), cset) 1979 H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title, 1980 len(encoded_title), cset) 1981 1982 # Get the datatype handles 1983 self.disk_type_id, self.type_id = self._get_type_ids() 1984 1985 return self.dataset_id 1986 1987 1988 def _open_array(self): 1989 cdef char cbyteorder[11] # "irrelevant" fits easily here 1990 cdef int i, enumtype 1991 cdef int rank 1992 cdef herr_t ret 1993 cdef hsize_t nrecords, chunksize 1994 cdef object shape, type_ 1995 cdef bytes encoded_name 1996 cdef str byteorder 1997 1998 encoded_name = self.name.encode('utf-8') 1999 2000 # Open the dataset 2001 self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT) 2002 if self.dataset_id < 0: 2003 raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" % 2004 (self.name, self._v_parent._v_pathname)) 2005 # Get the datatype handles 2006 self.disk_type_id, self.type_id = self._get_type_ids() 2007 # Get the atom for this type 2008 atom = atom_from_hdf5_type(self.type_id) 2009 2010 # Get info on dimensions & types (of base class) 2011 H5VLARRAYget_info(self.dataset_id, self.disk_type_id, &nrecords, 2012 cbyteorder) 2013 2014 byteorder = cstr_to_pystr(cbyteorder) 2015 2016 # Get some properties of the atomic type 2017 self._atomicdtype = atom.dtype 2018 self._atomictype = atom.type 2019 self._atomicshape = atom.shape 2020 self._atomicsize = atom.size 2021 2022 # Get the byteorder 2023 self.byteorder = correct_byteorder(atom.type, byteorder) 2024 2025 # Get the chunkshape (VLArrays are unidimensional entities) 2026 H5ARRAYget_chunkshape(self.dataset_id, 1, &chunksize) 2027 2028 self.nrecords = nrecords # Initialize the number of records saved 2029 return self.dataset_id, SizeType(nrecords), (SizeType(chunksize),), atom 2030 2031 2032 def _append(self, ndarray nparr, int nobjects): 2033 cdef int ret 2034 cdef void *rbuf 2035 2036 # Get the pointer to the buffer data area 2037 if nobjects: 2038 rbuf = nparr.data 2039 # Convert some NumPy types to HDF5 before storing. 2040 if self.atom.type == 'time64': 2041 self._convert_time64(nparr, 0) 2042 else: 2043 rbuf = NULL 2044 2045 # Append the records: 2046 with nogil: 2047 ret = H5VLARRAYappend_records(self.dataset_id, self.type_id, 2048 nobjects, self.nrecords, rbuf) 2049 2050 if ret < 0: 2051 raise HDF5ExtError("Problems appending the records.") 2052 2053 self.nrecords = self.nrecords + 1 2054 2055 def _modify(self, hsize_t nrow, ndarray nparr, int nobjects): 2056 cdef int ret 2057 cdef void *rbuf 2058 2059 # Get the pointer to the buffer data area 2060 rbuf = nparr.data 2061 if nobjects: 2062 # Convert some NumPy types to HDF5 before storing. 2063 if self.atom.type == 'time64': 2064 self._convert_time64(nparr, 0) 2065 2066 # Append the records: 2067 with nogil: 2068 ret = H5VLARRAYmodify_records(self.dataset_id, self.type_id, 2069 nrow, nobjects, rbuf) 2070 2071 if ret < 0: 2072 raise HDF5ExtError("Problems modifying the record.") 2073 2074 return nobjects 2075 2076 # Because the size of each "row" is unknown, there is no easy way to 2077 # calculate this value 2078 def _get_memory_size(self): 2079 cdef hid_t space_id 2080 cdef hsize_t size 2081 cdef herr_t ret 2082 2083 if self.nrows == 0: 2084 size = 0 2085 else: 2086 # Get the dataspace handle 2087 space_id = H5Dget_space(self.dataset_id) 2088 # Return the size of the entire dataset 2089 ret = H5Dvlen_get_buf_size(self.dataset_id, self.type_id, space_id, 2090 &size) 2091 if ret < 0: 2092 size = -1 2093 2094 # Terminate access to the dataspace 2095 H5Sclose(space_id) 2096 2097 return size 2098 2099 def _read_array(self, hsize_t start, hsize_t stop, hsize_t step): 2100 cdef int i 2101 cdef size_t vllen 2102 cdef herr_t ret 2103 cdef hvl_t *rdata 2104 cdef hsize_t nrows 2105 cdef hid_t space_id 2106 cdef hid_t mem_space_id 2107 cdef object buf, nparr, shape, datalist 2108 2109 # Compute the number of rows to read 2110 nrows = get_len_of_range(start, stop, step) 2111 if start + nrows > self.nrows: 2112 raise HDF5ExtError( 2113 "Asking for a range of rows exceeding the available ones!.", 2114 h5bt=False) 2115 2116 # Now, read the chunk of rows 2117 with nogil: 2118 # Allocate the necessary memory for keeping the row handlers 2119 rdata = <hvl_t *>malloc(<size_t>nrows*sizeof(hvl_t)) 2120 # Get the dataspace handle 2121 space_id = H5Dget_space(self.dataset_id) 2122 # Create a memory dataspace handle 2123 mem_space_id = H5Screate_simple(1, &nrows, NULL) 2124 # Select the data to be read 2125 H5Sselect_hyperslab(space_id, H5S_SELECT_SET, &start, &step, &nrows, 2126 NULL) 2127 # Do the actual read 2128 ret = H5Dread(self.dataset_id, self.type_id, mem_space_id, space_id, 2129 H5P_DEFAULT, rdata) 2130 2131 if ret < 0: 2132 raise HDF5ExtError( 2133 "VLArray._read_array: Problems reading the array data.") 2134 2135 datalist = [] 2136 for i from 0 <= i < nrows: 2137 # Number of atoms in row 2138 vllen = rdata[i].len 2139 # Get the pointer to the buffer data area 2140 if vllen > 0: 2141 # Create a buffer to keep this info. It is important to do a 2142 # copy, because we will dispose the buffer memory later on by 2143 # calling the H5Dvlen_reclaim. PyByteArray_FromStringAndSize does this. 2144 buf = PyByteArray_FromStringAndSize(<char *>rdata[i].p, 2145 vllen*self._atomicsize) 2146 else: 2147 # Case where there is info with zero lentgh 2148 buf = None 2149 # Compute the shape for the read array 2150 shape = list(self._atomicshape) 2151 shape.insert(0, vllen) # put the length at the beginning of the shape 2152 nparr = numpy.ndarray( 2153 buffer=buf, dtype=self._atomicdtype.base, shape=shape) 2154 # Set the writeable flag for this ndarray object 2155 nparr.flags.writeable = True 2156 if self.atom.kind == 'time': 2157 # Swap the byteorder by hand (this is not currently supported by HDF5) 2158 if H5Tget_order(self.type_id) != platform_byteorder: 2159 nparr.byteswap(True) 2160 # Convert some HDF5 types to NumPy after reading. 2161 if self.atom.type == 'time64': 2162 self._convert_time64(nparr, 1) 2163 # Append this array to the output list 2164 datalist.append(nparr) 2165 2166 # Release resources 2167 # Reclaim all the (nested) VL data 2168 ret = H5Dvlen_reclaim(self.type_id, mem_space_id, H5P_DEFAULT, rdata) 2169 if ret < 0: 2170 raise HDF5ExtError("VLArray._read_array: error freeing the data buffer.") 2171 # Terminate access to the memory dataspace 2172 H5Sclose(mem_space_id) 2173 # Terminate access to the dataspace 2174 H5Sclose(space_id) 2175 # Free the amount of row pointers to VL row data 2176 free(rdata) 2177 2178 return datalist 2179 2180 2181 def get_row_size(self, row): 2182 """Return the total size in bytes of all the elements contained in a given row.""" 2183 2184 cdef hid_t space_id 2185 cdef hsize_t size 2186 cdef herr_t ret 2187 2188 cdef hsize_t offset[1] 2189 cdef hsize_t count[1] 2190 2191 if row >= self.nrows: 2192 raise HDF5ExtError( 2193 "Asking for a range of rows exceeding the available ones!.", 2194 h5bt=False) 2195 2196 # Get the dataspace handle 2197 space_id = H5Dget_space(self.dataset_id) 2198 2199 offset[0] = row 2200 count[0] = 1 2201 2202 ret = H5Sselect_hyperslab(space_id, H5S_SELECT_SET, offset, NULL, count, NULL); 2203 if ret < 0: 2204 size = -1 2205 2206 ret = H5Dvlen_get_buf_size(self.dataset_id, self.type_id, space_id, &size) 2207 if ret < 0: 2208 size = -1 2209 2210 # Terminate access to the dataspace 2211 H5Sclose(space_id) 2212 2213 return size 2214 2215 2216cdef class UnImplemented(Leaf): 2217 2218 def _open_unimplemented(self): 2219 cdef object shape 2220 cdef char cbyteorder[11] # "irrelevant" fits easily here 2221 cdef bytes encoded_name 2222 cdef str byteorder 2223 2224 encoded_name = self.name.encode('utf-8') 2225 2226 # Get info on dimensions 2227 shape = H5UIget_info(self.parent_id, encoded_name, cbyteorder) 2228 shape = tuple(map(SizeType, shape)) 2229 self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT) 2230 byteorder = cstr_to_pystr(cbyteorder) 2231 2232 return (shape, byteorder, self.dataset_id) 2233 2234 def _g_close(self): 2235 H5Dclose(self.dataset_id) 2236 2237 2238## Local Variables: 2239## mode: python 2240## py-indent-offset: 2 2241## tab-width: 2 2242## fill-column: 78 2243## End: 2244