1# -*- coding: utf-8 -*-
2
3########################################################################
4#
5# License: BSD
6# Created: September 21, 2002
7# Author:  Francesc Alted - faltet@pytables.com
8#
9# $Id$
10#
11########################################################################
12
13"""Cython interface between several PyTables classes and HDF5 library.
14
15Classes (type extensions):
16
17    File
18    AttributeSet
19    Node
20    Leaf
21    Group
22    Array
23    VLArray
24    UnImplemented
25
26Functions:
27
28Misc variables:
29
30"""
31
32import os
33import warnings
34from collections import namedtuple
35
36ObjInfo = namedtuple('ObjInfo', ['addr', 'rc'])
37ObjTimestamps = namedtuple('ObjTimestamps', ['atime', 'mtime',
38                                             'ctime', 'btime'])
39
40
41from cpython cimport PY_MAJOR_VERSION
42if PY_MAJOR_VERSION < 3:
43    import cPickle as pickle
44else:
45    import pickle
46
47import numpy
48
49from tables.exceptions import HDF5ExtError, DataTypeWarning
50
51from tables.utils import (check_file_access, byteorders, correct_byteorder,
52  SizeType)
53
54from tables.atom import Atom
55
56from tables.description import descr_from_dtype
57
58from tables.utilsextension import (encode_filename, set_blosc_max_threads,
59  atom_to_hdf5_type, atom_from_hdf5_type, hdf5_to_np_ext_type, create_nested_type,
60  pttype_to_hdf5, pt_special_kinds, npext_prefixes_to_ptkinds, hdf5_class_to_string,
61  platform_byteorder)
62
63
64# Types, constants, functions, classes & other objects from everywhere
65from libc.stdlib cimport malloc, free
66from libc.string cimport strdup, strlen
67from numpy cimport import_array, ndarray, npy_intp
68from cpython.bytes cimport (PyBytes_AsString, PyBytes_FromStringAndSize,
69    PyBytes_Check)
70from cpython.unicode cimport PyUnicode_DecodeUTF8
71
72
73from definitions cimport (uintptr_t, hid_t, herr_t, hsize_t, hvl_t,
74  H5S_seloper_t, H5D_FILL_VALUE_UNDEFINED,
75  H5O_TYPE_UNKNOWN, H5O_TYPE_GROUP, H5O_TYPE_DATASET, H5O_TYPE_NAMED_DATATYPE,
76  H5L_TYPE_ERROR, H5L_TYPE_HARD, H5L_TYPE_SOFT, H5L_TYPE_EXTERNAL,
77  H5T_class_t, H5T_sign_t, H5T_NATIVE_INT,
78  H5T_cset_t, H5T_CSET_ASCII, H5T_CSET_UTF8,
79  H5F_SCOPE_GLOBAL, H5F_ACC_TRUNC, H5F_ACC_RDONLY, H5F_ACC_RDWR,
80  H5P_DEFAULT, H5P_FILE_ACCESS, H5P_FILE_CREATE, H5T_DIR_DEFAULT,
81  H5S_SELECT_SET, H5S_SELECT_AND, H5S_SELECT_NOTB,
82  H5Fcreate, H5Fopen, H5Fclose, H5Fflush, H5Fget_vfd_handle, H5Fget_filesize,
83  H5Fget_create_plist,
84  H5Gcreate, H5Gopen, H5Gclose, H5Ldelete, H5Lmove,
85  H5Dopen, H5Dclose, H5Dread, H5Dwrite, H5Dget_type, H5Dget_create_plist,
86  H5Dget_space, H5Dvlen_reclaim, H5Dget_storage_size, H5Dvlen_get_buf_size,
87  H5Tget_native_type, H5Tclose, H5Tis_variable_str, H5Tget_sign,
88  H5Adelete, H5T_BITFIELD, H5T_INTEGER, H5T_FLOAT, H5T_STRING, H5Tget_order,
89  H5Pcreate, H5Pset_cache, H5Pclose, H5Pget_userblock, H5Pset_userblock,
90  H5Pset_fapl_sec2, H5Pset_fapl_log, H5Pset_fapl_stdio, H5Pset_fapl_core,
91  H5Pset_fapl_split, H5Pget_obj_track_times,
92  H5Sselect_all, H5Sselect_elements, H5Sselect_hyperslab,
93  H5Screate_simple, H5Sclose,
94  H5Oget_info, H5O_info_t,
95  H5ATTRset_attribute, H5ATTRset_attribute_string,
96  H5ATTRget_attribute, H5ATTRget_attribute_string,
97  H5ATTRget_attribute_vlen_string_array,
98  H5ATTRfind_attribute, H5ATTRget_type_ndims, H5ATTRget_dims,
99  H5ARRAYget_ndims, H5ARRAYget_info,
100  set_cache_size, get_objinfo, get_linkinfo, Giterate, Aiterate, H5UIget_info,
101  get_len_of_range, conv_float64_timeval32, truncate_dset,
102  H5_HAVE_DIRECT_DRIVER, pt_H5Pset_fapl_direct,
103  H5_HAVE_WINDOWS_DRIVER, pt_H5Pset_fapl_windows,
104  H5_HAVE_IMAGE_FILE, pt_H5Pset_file_image, pt_H5Fget_file_image,
105  H5Tget_size, hobj_ref_t)
106
107cdef int H5T_CSET_DEFAULT = 16
108
109from utilsextension cimport malloc_dims, get_native_type, cstr_to_pystr, load_reference
110
111
112#-------------------------------------------------------------------
113
114cdef extern from "Python.h":
115
116    object PyByteArray_FromStringAndSize(char *s, Py_ssize_t len)
117
118# Functions from HDF5 ARRAY (this is not part of HDF5 HL; it's private)
119cdef extern from "H5ARRAY.h" nogil:
120
121  herr_t H5ARRAYmake(hid_t loc_id, char *dset_name, char *obversion,
122                     int rank, hsize_t *dims, int extdim,
123                     hid_t type_id, hsize_t *dims_chunk, void *fill_data,
124                     int complevel, char  *complib, int shuffle,
125                     int fletcher32, hbool_t track_times, void *data)
126
127  herr_t H5ARRAYappend_records(hid_t dataset_id, hid_t type_id,
128                               int rank, hsize_t *dims_orig,
129                               hsize_t *dims_new, int extdim, void *data )
130
131  herr_t H5ARRAYwrite_records(hid_t dataset_id, hid_t type_id,
132                              int rank, hsize_t *start, hsize_t *step,
133                              hsize_t *count, void *data)
134
135  herr_t H5ARRAYread(hid_t dataset_id, hid_t type_id,
136                     hsize_t start, hsize_t nrows, hsize_t step,
137                     int extdim, void *data)
138
139  herr_t H5ARRAYreadSlice(hid_t dataset_id, hid_t type_id,
140                          hsize_t *start, hsize_t *stop,
141                          hsize_t *step, void *data)
142
143  herr_t H5ARRAYreadIndex(hid_t dataset_id, hid_t type_id, int notequal,
144                          hsize_t *start, hsize_t *stop, hsize_t *step,
145                          void *data)
146
147  herr_t H5ARRAYget_chunkshape(hid_t dataset_id, int rank, hsize_t *dims_chunk)
148
149  herr_t H5ARRAYget_fill_value( hid_t dataset_id, hid_t type_id,
150                                int *status, void *value)
151
152
153# Functions for dealing with VLArray objects
154cdef extern from "H5VLARRAY.h" nogil:
155
156  herr_t H5VLARRAYmake( hid_t loc_id, char *dset_name, char *obversion,
157                        int rank, hsize_t *dims, hid_t type_id,
158                        hsize_t chunk_size, void *fill_data, int complevel,
159                        char *complib, int shuffle, int flecther32,
160                        hbool_t track_times, void *data)
161
162  herr_t H5VLARRAYappend_records( hid_t dataset_id, hid_t type_id,
163                                  int nobjects, hsize_t nrecords,
164                                  void *data )
165
166  herr_t H5VLARRAYmodify_records( hid_t dataset_id, hid_t type_id,
167                                  hsize_t nrow, int nobjects,
168                                  void *data )
169
170  herr_t H5VLARRAYget_info( hid_t dataset_id, hid_t type_id,
171                            hsize_t *nrecords, char *base_byteorder)
172
173
174#----------------------------------------------------------------------------
175
176# Initialization code
177
178# The numpy API requires this function to be called before
179# using any numpy facilities in an extension module.
180import_array()
181
182#---------------------------------------------------------------------------
183
184# Helper functions
185
186cdef hsize_t *npy_malloc_dims(int rank, npy_intp *pdims):
187  """Returns a malloced hsize_t dims from a npy_intp *pdims."""
188
189  cdef int i
190  cdef hsize_t *dims
191
192  dims = NULL
193  if rank > 0:
194    dims = <hsize_t *>malloc(rank * sizeof(hsize_t))
195    for i from 0 <= i < rank:
196      dims[i] = pdims[i]
197  return dims
198
199
200cdef object getshape(int rank, hsize_t *dims):
201  """Return a shape (tuple) from a dims C array of rank dimensions."""
202
203  cdef int i
204  cdef object shape
205
206  shape = []
207  for i from 0 <= i < rank:
208    shape.append(SizeType(dims[i]))
209
210  return tuple(shape)
211
212
213# Helper function for quickly fetch an attribute string
214cdef object get_attribute_string_or_none(hid_t node_id, char* attr_name):
215  """Returns a string/unicode attribute if it exists in node_id.
216
217  It returns ``None`` in case it don't exists (or there have been problems
218  reading it).
219
220  """
221
222  cdef char *attr_value
223  cdef int cset = H5T_CSET_DEFAULT
224  cdef object retvalue
225  cdef hsize_t size
226
227  attr_value = NULL
228  retvalue = None   # Default value
229  if H5ATTRfind_attribute(node_id, attr_name):
230    size = H5ATTRget_attribute_string(node_id, attr_name, &attr_value, &cset)
231    if size == 0:
232      if cset == H5T_CSET_UTF8:
233        retvalue = numpy.unicode_(u'')
234      else:
235        retvalue = numpy.bytes_(b'')
236    elif cset == H5T_CSET_UTF8:
237      if size == 1 and attr_value[0] == 0:
238        # compatibility with PyTables <= 3.1.1
239        retvalue = numpy.unicode_(u'')
240      retvalue = PyUnicode_DecodeUTF8(attr_value, size, NULL)
241      retvalue = numpy.unicode_(retvalue)
242    else:
243      retvalue = PyBytes_FromStringAndSize(attr_value, size)
244      # AV: oct 2012
245      # since now we use the string size got form HDF5 we have to stip
246      # trailing zeros used for padding.
247      # The entire process is quite odd but due to a bug (??) in the way
248      # numpy arrays are pickled in python 3 we can't assume that
249      # strlen(attr_value) is the actual length of the attibute
250      # and numpy.bytes_(attr_value) can give a truncated pickle string
251      retvalue = retvalue.rstrip(b'\x00')
252      retvalue = numpy.bytes_(retvalue)
253
254    # Important to release attr_value, because it has been malloc'ed!
255    if attr_value:
256      free(<void *>attr_value)
257
258  return retvalue
259
260
261# Get the numpy dtype scalar attribute from an HDF5 type as fast as possible
262cdef object get_dtype_scalar(hid_t type_id, H5T_class_t class_id,
263                             size_t itemsize):
264  cdef H5T_sign_t sign
265  cdef object stype
266
267  if class_id == H5T_BITFIELD:
268    stype = "b1"
269  elif class_id == H5T_INTEGER:
270    # Get the sign
271    sign = H5Tget_sign(type_id)
272    if (sign > 0):
273      stype = "i%s" % (itemsize)
274    else:
275      stype = "u%s" % (itemsize)
276  elif class_id ==  H5T_FLOAT:
277    stype = "f%s" % (itemsize)
278  elif class_id ==  H5T_STRING:
279    if H5Tis_variable_str(type_id):
280      raise TypeError("variable length strings are not supported yet")
281    stype = "S%s" % (itemsize)
282
283  # Try to get a NumPy type.  If this can't be done, return None.
284  try:
285    ntype = numpy.dtype(stype)
286  except TypeError:
287    ntype = None
288  return ntype
289
290
291_supported_drivers = (
292    "H5FD_SEC2",
293    "H5FD_DIRECT",
294    #"H5FD_LOG",
295    "H5FD_WINDOWS",
296    "H5FD_STDIO",
297    "H5FD_CORE",
298    #"H5FD_FAMILY",
299    #"H5FD_MULTI",
300    "H5FD_SPLIT",
301    #"H5FD_MPIO",
302    #"H5FD_MPIPOSIX",
303    #"H5FD_STREAM",
304)
305
306HAVE_DIRECT_DRIVER = bool(H5_HAVE_DIRECT_DRIVER)
307HAVE_WINDOWS_DRIVER = bool(H5_HAVE_WINDOWS_DRIVER)
308
309# Type extensions declarations (these are subclassed by PyTables
310# Python classes)
311
312cdef class File:
313  cdef hid_t   file_id
314  cdef hid_t   access_plist
315  cdef object  name
316
317  def _g_new(self, name, pymode, **params):
318    cdef herr_t err = 0
319    cdef hid_t access_plist, create_plist = H5P_DEFAULT
320    cdef hid_t meta_plist_id = H5P_DEFAULT, raw_plist_id = H5P_DEFAULT
321    cdef size_t img_buf_len = 0, user_block_size = 0
322    cdef void *img_buf_p = NULL
323    cdef bytes encname
324    #cdef bytes logfile_name
325
326    # Check if we can handle the driver
327    driver = params["DRIVER"]
328    if driver is not None and driver not in _supported_drivers:
329      raise ValueError("Invalid or not supported driver: '%s'" % driver)
330    if driver == "H5FD_SPLIT":
331      meta_ext = params.get("DRIVER_SPLIT_META_EXT", "-m.h5")
332      raw_ext = params.get("DRIVER_SPLIT_RAW_EXT", "-r.h5")
333      meta_name = meta_ext % name if "%s" in meta_ext else name + meta_ext
334      raw_name = raw_ext % name if "%s" in raw_ext else name + raw_ext
335      enc_meta_ext = encode_filename(meta_ext)
336      enc_raw_ext = encode_filename(raw_ext)
337
338    # Create a new file using default properties
339    self.name = name
340
341    # Encode the filename in case it is unicode
342    encname = encode_filename(name)
343
344    # These fields can be seen from Python.
345    self._v_new = None  # this will be computed later
346    # """Is this file going to be created from scratch?"""
347
348    self._isPTFile = True  # assume a PyTables file by default
349    # """Does this HDF5 file have a PyTables format?"""
350
351    assert pymode in ('r', 'r+', 'a', 'w'), ("an invalid mode string ``%s`` "
352           "passed the ``check_file_access()`` test; "
353           "please report this to the authors" % pymode)
354
355    image = params.get('DRIVER_CORE_IMAGE')
356    if image:
357      if driver != "H5FD_CORE":
358        warnings.warn("The DRIVER_CORE_IMAGE parameter will be ignored by "
359                      "the '%s' driver" % driver)
360      elif not PyBytes_Check(image):
361        raise TypeError("The DRIVER_CORE_IMAGE must be a string of bytes")
362      elif not H5_HAVE_IMAGE_FILE:
363        raise RuntimeError("Support for image files is only available in "
364                           "HDF5 >= 1.8.9")
365
366    # After the following check we can be quite sure
367    # that the file or directory exists and permissions are right.
368    if driver == "H5FD_SPLIT":
369      for n in meta_name, raw_name:
370        check_file_access(n, pymode)
371    else:
372      backing_store = params.get("DRIVER_CORE_BACKING_STORE", 1)
373      if driver != "H5FD_CORE" or backing_store:
374        check_file_access(name, pymode)
375
376    # Should a new file be created?
377    if image:
378      exists = True
379    elif driver == "H5FD_SPLIT":
380      exists = os.path.exists(meta_name) and os.path.exists(raw_name)
381    else:
382      exists = os.path.exists(name)
383    self._v_new = not (pymode in ('r', 'r+') or (pymode == 'a' and exists))
384
385    user_block_size = params.get("USER_BLOCK_SIZE", 0)
386    if user_block_size and not self._v_new:
387        warnings.warn("The HDF5 file already esists: the USER_BLOCK_SIZE "
388                      "will be ignored")
389    elif user_block_size:
390      user_block_size = int(user_block_size)
391      is_pow_of_2 = ((user_block_size & (user_block_size - 1)) == 0)
392      if user_block_size < 512 or not is_pow_of_2:
393        raise ValueError("The USER_BLOCK_SIZE must be a power od 2 greather "
394                         "than 512 or zero")
395
396      # File creation property list
397      create_plist = H5Pcreate(H5P_FILE_CREATE)
398      err = H5Pset_userblock(create_plist, user_block_size)
399      if err < 0:
400        H5Pclose(create_plist)
401        raise HDF5ExtError("Unable to set the user block size")
402
403    # File access property list
404    access_plist = H5Pcreate(H5P_FILE_ACCESS)
405
406    # Set parameters for chunk cache
407    H5Pset_cache(access_plist, 0,
408                 params["CHUNK_CACHE_NELMTS"],
409                 params["CHUNK_CACHE_SIZE"],
410                 params["CHUNK_CACHE_PREEMPT"])
411
412    # Set the I/O driver
413    if driver == "H5FD_SEC2":
414      err = H5Pset_fapl_sec2(access_plist)
415    elif driver == "H5FD_DIRECT":
416      if not H5_HAVE_DIRECT_DRIVER:
417        H5Pclose(create_plist)
418        H5Pclose(access_plist)
419        raise RuntimeError("The H5FD_DIRECT driver is not available")
420      err = pt_H5Pset_fapl_direct(access_plist,
421                                  params["DRIVER_DIRECT_ALIGNMENT"],
422                                  params["DRIVER_DIRECT_BLOCK_SIZE"],
423                                  params["DRIVER_DIRECT_CBUF_SIZE"])
424    #elif driver == "H5FD_LOG":
425    #  if "DRIVER_LOG_FILE" not in params:
426    #    H5Pclose(access_plist)
427    #    raise ValueError("The DRIVER_LOG_FILE parameter is required for "
428    #                     "the H5FD_LOG driver")
429    #  logfile_name = encode_filename(params["DRIVER_LOG_FILE"])
430    #  err = H5Pset_fapl_log(access_plist,
431    #                        <char*>logfile_name,
432    #                        params["DRIVER_LOG_FLAGS"],
433    #                        params["DRIVER_LOG_BUF_SIZE"])
434    elif driver == "H5FD_WINDOWS":
435      if not H5_HAVE_WINDOWS_DRIVER:
436        H5Pclose(access_plist)
437        H5Pclose(create_plist)
438        raise RuntimeError("The H5FD_WINDOWS driver is not available")
439      err = pt_H5Pset_fapl_windows(access_plist)
440    elif driver == "H5FD_STDIO":
441      err = H5Pset_fapl_stdio(access_plist)
442    elif driver == "H5FD_CORE":
443      err = H5Pset_fapl_core(access_plist,
444                             params["DRIVER_CORE_INCREMENT"],
445                             backing_store)
446      if image:
447        img_buf_len = len(image)
448        img_buf_p = <void *>PyBytes_AsString(image)
449        err = pt_H5Pset_file_image(access_plist, img_buf_p, img_buf_len)
450        if err < 0:
451          H5Pclose(create_plist)
452          H5Pclose(access_plist)
453          raise HDF5ExtError("Unable to set the file image")
454
455    #elif driver == "H5FD_FAMILY":
456    #  H5Pset_fapl_family(access_plist,
457    #                     params["DRIVER_FAMILY_MEMB_SIZE"],
458    #                     fapl_id)
459    #elif driver == "H5FD_MULTI":
460    #  err = H5Pset_fapl_multi(access_plist, memb_map, memb_fapl, memb_name,
461    #                          memb_addr, relax)
462    elif driver == "H5FD_SPLIT":
463      err = H5Pset_fapl_split(access_plist, enc_meta_ext, meta_plist_id,
464                              enc_raw_ext, raw_plist_id)
465    if err < 0:
466      e = HDF5ExtError("Unable to set the file access property list")
467      H5Pclose(create_plist)
468      H5Pclose(access_plist)
469      raise e
470
471    if pymode == 'r':
472      self.file_id = H5Fopen(encname, H5F_ACC_RDONLY, access_plist)
473    elif pymode == 'r+':
474      self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist)
475    elif pymode == 'a':
476      if exists:
477        # A test for logging.
478        ## H5Pset_sieve_buf_size(access_plist, 0)
479        ## H5Pset_fapl_log (access_plist, "test.log", H5FD_LOG_LOC_WRITE, 0)
480        self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist)
481      else:
482        self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC, create_plist,
483                                 access_plist)
484    elif pymode == 'w':
485      self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC, create_plist,
486                               access_plist)
487
488    if self.file_id < 0:
489        e = HDF5ExtError("Unable to open/create file '%s'" % name)
490        H5Pclose(create_plist)
491        H5Pclose(access_plist)
492        raise e
493
494    H5Pclose(create_plist)
495    H5Pclose(access_plist)
496
497    # Set the cache size
498    set_cache_size(self.file_id, params["METADATA_CACHE_SIZE"])
499
500    # Set the maximum number of threads for Blosc
501    set_blosc_max_threads(params["MAX_BLOSC_THREADS"])
502
503  # XXX: add the possibility to pass a pre-allocated buffer
504  def get_file_image(self):
505    """Retrieves an in-memory image of an existing, open HDF5 file.
506
507    .. note:: this method requires HDF5 >= 1.8.9.
508
509    .. versionadded:: 3.0
510
511    """
512
513    cdef ssize_t size = 0
514    cdef size_t buf_len = 0
515    cdef bytes image
516    cdef char* cimage
517
518    self.flush()
519
520    # retrieve the size of the buffer for the file image
521    size = pt_H5Fget_file_image(self.file_id, NULL, buf_len)
522    if size < 0:
523      raise HDF5ExtError("Unable to retrieve the size of the buffer for the "
524                         "file image.  Plese note that not all drivers "
525                         "provide support for image files.")
526
527    # allocate the memory buffer
528    image = PyBytes_FromStringAndSize(NULL, size)
529    if not image:
530      raise RuntimeError("Unable to allecote meomory fir the file image")
531
532    cimage = image
533    buf_len = size
534    size = pt_H5Fget_file_image(self.file_id, <void*>cimage, buf_len)
535    if size < 0:
536      raise HDF5ExtError("Unable to retrieve the file image. "
537                         "Plese note that not all drivers provide support "
538                         "for image files.")
539
540    return image
541
542  def get_filesize(self):
543    """Returns the size of an HDF5 file.
544
545    The returned size is that of the entire file, as opposed to only
546    the HDF5 portion of the file. I.e., size includes the user block,
547    if any, the HDF5 portion of the file, and any data that may have
548    been appended beyond the data written through the HDF5 Library.
549
550    .. versionadded:: 3.0
551
552    """
553
554    cdef herr_t err = 0
555    cdef hsize_t size = 0
556
557    err = H5Fget_filesize(self.file_id, &size)
558    if err < 0:
559      raise HDF5ExtError("Unable to retrieve the HDF5 file size")
560
561    return size
562
563  def get_userblock_size(self):
564    """Retrieves the size of a user block.
565
566    .. versionadded:: 3.0
567
568    """
569
570    cdef herr_t err = 0
571    cdef hsize_t size = 0
572    cdef hid_t create_plist
573
574    create_plist = H5Fget_create_plist(self.file_id)
575    if create_plist < 0:
576      raise HDF5ExtError("Unable to get the creation property list")
577
578    err = H5Pget_userblock(create_plist, &size)
579    if err < 0:
580      H5Pclose(create_plist)
581      raise HDF5ExtError("unable to retrieve the user block size")
582
583    H5Pclose(create_plist)
584
585    return size
586
587  # Accessor definitions
588  def _get_file_id(self):
589    return self.file_id
590
591  def fileno(self):
592    """Return the underlying OS integer file descriptor.
593
594    This is needed for lower-level file interfaces, such as the ``fcntl``
595    module.
596
597    """
598
599    cdef void *file_handle
600    cdef uintptr_t *descriptor
601    cdef herr_t err
602    err = H5Fget_vfd_handle(self.file_id, H5P_DEFAULT, &file_handle)
603    if err < 0:
604      raise HDF5ExtError(
605        "Problems getting file descriptor for file ``%s``" % self.name)
606    # Convert the 'void *file_handle' into an 'int *descriptor'
607    descriptor = <uintptr_t *>file_handle
608    return descriptor[0]
609
610
611  def _flush_file(self, scope):
612    # Close the file
613    H5Fflush(self.file_id, scope)
614
615
616  def _close_file(self):
617    # Close the file
618    H5Fclose( self.file_id )
619    self.file_id = 0    # Means file closed
620
621
622  # This method is moved out of scope, until we provide code to delete
623  # the memory booked by this extension types
624  def __dealloc__(self):
625    cdef int ret
626    if self.file_id > 0:
627      # Close the HDF5 file because user didn't do that!
628      ret = H5Fclose(self.file_id)
629      if ret < 0:
630        raise HDF5ExtError("Problems closing the file '%s'" % self.name)
631
632
633cdef class AttributeSet:
634  cdef object name
635
636  def _g_new(self, node):
637    self.name = node._v_name
638
639  def _g_list_attr(self, node):
640    "Return a tuple with the attribute list"
641    a = Aiterate(node._v_objectid)
642    return a
643
644
645  def _g_setattr(self, node, name, object value):
646    """Save Python or NumPy objects as HDF5 attributes.
647
648    Scalar Python objects, scalar NumPy & 0-dim NumPy objects will all be
649    saved as H5T_SCALAR type.  N-dim NumPy objects will be saved as H5T_ARRAY
650    type.
651
652    """
653
654    cdef int ret
655    cdef hid_t dset_id, type_id
656    cdef hsize_t *dims
657    cdef ndarray ndv
658    cdef object byteorder, rabyteorder, baseatom
659    cdef char* cname = NULL
660    cdef bytes encoded_name
661    cdef int cset = H5T_CSET_DEFAULT
662
663    encoded_name = name.encode('utf-8')
664    # get the C pointer
665    cname = encoded_name
666
667    # The dataset id of the node
668    dset_id = node._v_objectid
669
670    # Convert a NumPy scalar into a NumPy 0-dim ndarray
671    if isinstance(value, numpy.generic):
672      value = numpy.array(value)
673
674    # Check if value is a NumPy ndarray and of a supported type
675    if (isinstance(value, numpy.ndarray) and
676        value.dtype.kind in ('V', 'S', 'b', 'i', 'u', 'f', 'c')):
677      # get a contiguous array: fixes #270 and gh-176
678      #value = numpy.ascontiguousarray(value)
679      value = value.copy()
680      if value.dtype.kind == 'V':
681        description, rabyteorder = descr_from_dtype(value.dtype, ptparams=node._v_file.params)
682        byteorder = byteorders[rabyteorder]
683        type_id = create_nested_type(description, byteorder)
684        # Make sure the value is consistent with offsets of the description
685        value = value.astype(description._v_dtype)
686      else:
687        # Get the associated native HDF5 type of the scalar type
688        baseatom = Atom.from_dtype(value.dtype.base)
689        byteorder = byteorders[value.dtype.byteorder]
690        type_id = atom_to_hdf5_type(baseatom, byteorder)
691      # Get dimensionality info
692      ndv = <ndarray>value
693      dims = npy_malloc_dims(ndv.ndim, ndv.shape)
694      # Actually write the attribute
695      ret = H5ATTRset_attribute(dset_id, cname, type_id,
696                                ndv.ndim, dims, ndv.data)
697      if ret < 0:
698        raise HDF5ExtError("Can't set attribute '%s' in node:\n %s." %
699                           (name, self._v_node))
700      # Release resources
701      free(<void *>dims)
702      H5Tclose(type_id)
703    else:
704      # Object cannot be natively represented in HDF5.
705      if (isinstance(value, numpy.ndarray) and
706          value.dtype.kind == 'U' and
707          value.shape == ()):
708        value = value[()].encode('utf-8')
709        cset = H5T_CSET_UTF8
710      else:
711        # Convert this object to a null-terminated string
712        # (binary pickles are not supported at this moment)
713        value = pickle.dumps(value, 0)
714
715      ret = H5ATTRset_attribute_string(dset_id, cname, value, len(value), cset)
716      if ret < 0:
717        raise HDF5ExtError("Can't set attribute '%s' in node:\n %s." %
718                           (name, self._v_node))
719
720
721  # Get attributes
722  def _g_getattr(self, node, attrname):
723    """Get HDF5 attributes and retrieve them as NumPy objects.
724
725    H5T_SCALAR types will be retrieved as scalar NumPy.
726    H5T_ARRAY types will be retrieved as ndarray NumPy objects.
727
728    """
729
730    cdef hsize_t *dims
731    cdef H5T_class_t class_id
732    cdef size_t type_size
733    cdef hid_t mem_type, dset_id, type_id, native_type
734    cdef int rank, ret, enumtype
735    cdef void *rbuf
736    cdef char *str_value
737    cdef char **str_values = NULL
738    cdef ndarray ndvalue
739    cdef object shape, stype_atom, shape_atom, retvalue
740    cdef int i, nelements
741    cdef char* cattrname = NULL
742    cdef bytes encoded_attrname
743    cdef int cset = H5T_CSET_DEFAULT
744
745    encoded_attrname = attrname.encode('utf-8')
746    # Get the C pointer
747    cattrname = encoded_attrname
748
749    # The dataset id of the node
750    dset_id = node._v_objectid
751    dims = NULL
752
753    ret = H5ATTRget_type_ndims(dset_id, cattrname, &type_id, &class_id,
754                               &type_size, &rank )
755    if ret < 0:
756      raise HDF5ExtError("Can't get type info on attribute %s in node %s." %
757                         (attrname, self.name))
758
759    # Call a fast function for scalar values and typical class types
760    if (rank == 0 and class_id == H5T_STRING):
761      type_size = H5ATTRget_attribute_string(dset_id, cattrname, &str_value,
762                                             &cset)
763      if type_size == 0:
764        if cset == H5T_CSET_UTF8:
765          retvalue = numpy.unicode_(u'')
766        else:
767          retvalue = numpy.bytes_(b'')
768
769      elif cset == H5T_CSET_UTF8:
770        if type_size == 1 and str_value[0] == 0:
771          # compatibility with PyTables <= 3.1.1
772          retvalue = numpy.unicode_(u'')
773        retvalue = PyUnicode_DecodeUTF8(str_value, type_size, NULL)
774        retvalue = numpy.unicode_(retvalue)
775      else:
776        retvalue = PyBytes_FromStringAndSize(str_value, type_size)
777        # AV: oct 2012
778        # since now we use the string size got form HDF5 we have to strip
779        # trailing zeros used for padding.
780        # The entire process is quite odd but due to a bug (??) in the way
781        # numpy arrays are pickled in python 3 we can't assume that
782        # strlen(attr_value) is the actual length of the attibute
783        # and numpy.bytes_(attr_value) can give a truncated pickle sting
784        retvalue = retvalue.rstrip(b'\x00')
785        retvalue = numpy.bytes_(retvalue)     # bytes
786      # Important to release attr_value, because it has been malloc'ed!
787      if str_value:
788        free(str_value)
789      H5Tclose(type_id)
790      return retvalue
791    elif (rank == 0 and class_id in (H5T_BITFIELD, H5T_INTEGER, H5T_FLOAT)):
792      dtype_ = get_dtype_scalar(type_id, class_id, type_size)
793      if dtype_ is None:
794        warnings.warn("Unsupported type for attribute '%s' in node '%s'. "
795                      "Offending HDF5 class: %d" % (attrname, self.name,
796                                                    class_id), DataTypeWarning)
797        self._v_unimplemented.append(attrname)
798        return None
799      shape = ()
800    else:
801      # General case
802
803      # Get the dimensional info
804      dims = <hsize_t *>malloc(rank * sizeof(hsize_t))
805      ret = H5ATTRget_dims(dset_id, cattrname, dims)
806      if ret < 0:
807        raise HDF5ExtError("Can't get dims info on attribute %s in node %s." %
808                           (attrname, self.name))
809      shape = getshape(rank, dims)
810      # dims is not needed anymore
811      free(<void *> dims)
812
813      # Get the NumPy dtype from the type_id
814      try:
815        stype_, shape_ = hdf5_to_np_ext_type(type_id, pure_numpy_types=True, ptparams=node._v_file.params)
816        dtype_ = numpy.dtype(stype_, shape_)
817      except TypeError:
818        if class_id == H5T_STRING and H5Tis_variable_str(type_id):
819          nelements = H5ATTRget_attribute_vlen_string_array(dset_id, cattrname,
820                                                            &str_values, &cset)
821          if nelements < 0:
822            raise HDF5ExtError("Can't read attribute %s in node %s." %
823                               (attrname, self.name))
824
825          # The following generator expressions do not work with Cython 0.15.1
826          if cset == H5T_CSET_UTF8:
827            #retvalue = numpy.fromiter(
828            #  PyUnicode_DecodeUTF8(<char*>str_values[i],
829            #                        strlen(<char*>str_values[i]),
830            #                        NULL)
831            #    for i in range(nelements), "O8")
832            retvalue = numpy.array([
833              PyUnicode_DecodeUTF8(<char*>str_values[i],
834                                    strlen(<char*>str_values[i]),
835                                    NULL)
836                for i in range(nelements)], "O8")
837
838          else:
839            #retvalue = numpy.fromiter(
840            #  <char*>str_values[i] for i in range(nelements), "O8")
841            retvalue = numpy.array(
842              [<char*>str_values[i] for i in range(nelements)], "O8")
843          retvalue.shape = shape
844
845          # Important to release attr_value, because it has been malloc'ed!
846          for i in range(nelements):
847            free(str_values[i])
848          free(str_values)
849
850          return retvalue
851
852        # This class is not supported. Instead of raising a TypeError, issue a
853        # warning explaining the problem. This will allow to continue browsing
854        # native HDF5 files, while informing the user about the problem.
855        warnings.warn("Unsupported type for attribute '%s' in node '%s'. "
856                      "Offending HDF5 class: %d" % (attrname, self.name,
857                                                    class_id), DataTypeWarning)
858        self._v_unimplemented.append(attrname)
859        return None
860
861    # Get the container for data
862    ndvalue = numpy.empty(dtype=dtype_, shape=shape)
863    # Get the pointer to the buffer data area
864    rbuf = ndvalue.data
865    # Actually read the attribute from disk
866    ret = H5ATTRget_attribute(dset_id, cattrname, type_id, rbuf)
867    if ret < 0:
868      raise HDF5ExtError("Attribute %s exists in node %s, but can't get it." %
869                         (attrname, self.name))
870    H5Tclose(type_id)
871
872    if rank > 0:    # multidimensional case
873      retvalue = ndvalue
874    else:
875      retvalue = ndvalue[()]   # 0-dim ndarray becomes a NumPy scalar
876
877    return retvalue
878
879
880  def _g_remove(self, node, attrname):
881    cdef int ret
882    cdef hid_t dset_id
883    cdef char *cattrname = NULL
884    cdef bytes encoded_attrname
885
886    encoded_attrname = attrname.encode('utf-8')
887    # Get the C pointer
888    cattrname = encoded_attrname
889
890    # The dataset id of the node
891    dset_id = node._v_objectid
892
893    ret = H5Adelete(dset_id, cattrname)
894    if ret < 0:
895      raise HDF5ExtError("Attribute '%s' exists in node '%s', but cannot be "
896                         "deleted." % (attrname, self.name))
897
898
899cdef class Node:
900  # Instance variables declared in .pxd
901
902  def _g_new(self, where, name, init):
903    self.name = name
904    # """The name of this node in its parent group."""
905    self.parent_id = where._v_objectid
906    # """The identifier of the parent group."""
907
908  def _g_delete(self, parent):
909    cdef int ret
910    cdef bytes encoded_name
911
912    encoded_name = self.name.encode('utf-8')
913
914    # Delete this node
915    ret = H5Ldelete(parent._v_objectid, encoded_name, H5P_DEFAULT)
916    if ret < 0:
917      raise HDF5ExtError("problems deleting the node ``%s``" % self.name)
918    return ret
919
920  def __dealloc__(self):
921    self.parent_id = 0
922
923  def _get_obj_info(self):
924    cdef herr_t ret = 0
925    cdef H5O_info_t oinfo
926
927    ret = H5Oget_info(self._v_objectid, &oinfo)
928    if ret < 0:
929      raise HDF5ExtError("Unable to get object info for '%s'" %
930                         self. _v_pathname)
931
932    return ObjInfo(oinfo.addr, oinfo.rc)
933
934  def _get_obj_timestamps(self):
935    cdef herr_t ret = 0
936    cdef H5O_info_t oinfo
937
938    ret = H5Oget_info(self._v_objectid, &oinfo)
939    if ret < 0:
940      raise HDF5ExtError("Unable to get object info for '%s'" %
941                         self. _v_pathname)
942
943    return ObjTimestamps(oinfo.atime, oinfo.mtime, oinfo.ctime,
944                         oinfo.btime)
945
946
947cdef class Group(Node):
948  cdef hid_t   group_id
949
950  def _g_create(self):
951    cdef hid_t ret
952    cdef bytes encoded_name
953
954    encoded_name = self.name.encode('utf-8')
955
956    # @TODO: set property list --> utf-8
957
958    # Create a new group
959    ret = H5Gcreate(self.parent_id, encoded_name, H5P_DEFAULT, H5P_DEFAULT,
960                    H5P_DEFAULT)
961    if ret < 0:
962      raise HDF5ExtError("Can't create the group %s." % self.name)
963    self.group_id = ret
964    return self.group_id
965
966  def _g_open(self):
967    cdef hid_t ret
968    cdef bytes encoded_name
969
970    encoded_name = self.name.encode('utf-8')
971
972    ret = H5Gopen(self.parent_id, encoded_name, H5P_DEFAULT)
973    if ret < 0:
974      raise HDF5ExtError("Can't open the group: '%s'." % self.name)
975    self.group_id = ret
976    return self.group_id
977
978  def _g_get_objinfo(self, object h5name):
979    """Check whether 'name' is a children of 'self' and return its type."""
980
981    cdef int ret
982    cdef object node_type
983    cdef bytes encoded_name
984    cdef char *cname
985
986    encoded_name = h5name.encode('utf-8')
987    # Get the C pointer
988    cname = encoded_name
989
990    ret = get_linkinfo(self.group_id, cname)
991    if ret == -2 or ret == H5L_TYPE_ERROR:
992      node_type = "NoSuchNode"
993    elif ret == H5L_TYPE_SOFT:
994      node_type = "SoftLink"
995    elif ret == H5L_TYPE_EXTERNAL:
996      node_type = "ExternalLink"
997    elif ret == H5L_TYPE_HARD:
998        ret = get_objinfo(self.group_id, cname)
999        if ret == -2:
1000          node_type = "NoSuchNode"
1001        elif ret == H5O_TYPE_UNKNOWN:
1002          node_type = "Unknown"
1003        elif ret == H5O_TYPE_GROUP:
1004          node_type = "Group"
1005        elif ret == H5O_TYPE_DATASET:
1006          node_type = "Leaf"
1007        elif ret == H5O_TYPE_NAMED_DATATYPE:
1008          node_type = "NamedType"              # Not supported yet
1009        #else H5O_TYPE_LINK:
1010        #    # symbolic link
1011        #    raise RuntimeError('unexpected object type')
1012        else:
1013          node_type = "Unknown"
1014    return node_type
1015
1016  def _g_list_group(self, parent):
1017    """Return a tuple with the groups and the leaves hanging from self."""
1018
1019    cdef bytes encoded_name
1020
1021    encoded_name = self.name.encode('utf-8')
1022
1023    return Giterate(parent._v_objectid, self._v_objectid, encoded_name)
1024
1025
1026  def _g_get_gchild_attr(self, group_name, attr_name):
1027    """Return an attribute of a child `Group`.
1028
1029    If the attribute does not exist, ``None`` is returned.
1030
1031    """
1032
1033    cdef hid_t gchild_id
1034    cdef object retvalue
1035    cdef bytes encoded_group_name
1036    cdef bytes encoded_attr_name
1037
1038    encoded_group_name = group_name.encode('utf-8')
1039    encoded_attr_name = attr_name.encode('utf-8')
1040
1041    # Open the group
1042    retvalue = None  # Default value
1043    gchild_id = H5Gopen(self.group_id, encoded_group_name, H5P_DEFAULT)
1044    if gchild_id < 0:
1045      raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" %
1046                         (group_name, self._v_pathname))
1047    retvalue = get_attribute_string_or_none(gchild_id, encoded_attr_name)
1048    # Close child group
1049    H5Gclose(gchild_id)
1050
1051    return retvalue
1052
1053
1054  def _g_get_lchild_attr(self, leaf_name, attr_name):
1055    """Return an attribute of a child `Leaf`.
1056
1057    If the attribute does not exist, ``None`` is returned.
1058
1059    """
1060
1061    cdef hid_t leaf_id
1062    cdef object retvalue
1063    cdef bytes encoded_leaf_name
1064    cdef bytes encoded_attr_name
1065
1066    encoded_leaf_name = leaf_name.encode('utf-8')
1067    encoded_attr_name = attr_name.encode('utf-8')
1068
1069    # Open the dataset
1070    leaf_id = H5Dopen(self.group_id, encoded_leaf_name, H5P_DEFAULT)
1071    if leaf_id < 0:
1072      raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" %
1073                         (leaf_name, self._v_pathname))
1074    retvalue = get_attribute_string_or_none(leaf_id, encoded_attr_name)
1075    # Close the dataset
1076    H5Dclose(leaf_id)
1077    return retvalue
1078
1079
1080  def _g_flush_group(self):
1081    # Close the group
1082    H5Fflush(self.group_id, H5F_SCOPE_GLOBAL)
1083
1084
1085  def _g_close_group(self):
1086    cdef int ret
1087
1088    ret = H5Gclose(self.group_id)
1089    if ret < 0:
1090      raise HDF5ExtError("Problems closing the Group %s" % self.name)
1091    self.group_id = 0  # indicate that this group is closed
1092
1093
1094  def _g_move_node(self, hid_t oldparent, oldname, hid_t newparent, newname,
1095                   oldpathname, newpathname):
1096    cdef int ret
1097    cdef bytes encoded_oldname, encoded_newname
1098
1099    encoded_oldname = oldname.encode('utf-8')
1100    encoded_newname = newname.encode('utf-8')
1101
1102    ret = H5Lmove(oldparent, encoded_oldname, newparent, encoded_newname,
1103                  H5P_DEFAULT, H5P_DEFAULT)
1104    if ret < 0:
1105      raise HDF5ExtError("Problems moving the node %s to %s" %
1106                         (oldpathname, newpathname) )
1107    return ret
1108
1109
1110
1111cdef class Leaf(Node):
1112  # Instance variables declared in .pxd
1113
1114  def _get_storage_size(self):
1115      return H5Dget_storage_size(self.dataset_id)
1116
1117  def _get_obj_track_times(self):
1118    """Get track_times boolean for dataset
1119
1120    Uses H5Pget_obj_track_times to determine if the dataset was
1121    created with the track_times property.  If the leaf is not a
1122    dataset, this will fail with HDF5ExtError.
1123
1124    The track times dataset creation property does not seem to survive
1125    closing and reopening as of HDF5 1.8.17.  Currently, it may be
1126    more accurate to test whether the ctime for the dataset is 0:
1127    track_times = (leaf._get_obj_timestamps().ctime == 0)
1128    """
1129    cdef:
1130      hbool_t track_times = True
1131
1132    if self.dataset_id < 0:
1133      raise ValueError('Invalid dataset id %s' % self.dataset_id)
1134
1135    plist_id = H5Dget_create_plist(self.dataset_id)
1136    if plist_id < 0:
1137      raise HDF5ExtError("Could not get dataset creation property list "
1138                         "from dataset id %s" % self.dataset_id)
1139
1140    try:
1141      # Get track_times boolean for dataset
1142      if H5Pget_obj_track_times(plist_id, &track_times) < 0:
1143        raise HDF5ExtError("Could not get dataset track_times property "
1144                           "from dataset id %s" % self.dataset_id)
1145    finally:
1146      H5Pclose(plist_id)
1147
1148    return bool(track_times)
1149
1150  def _g_new(self, where, name, init):
1151    if init:
1152      # Put this info to 0 just when the class is initialized
1153      self.dataset_id = -1
1154      self.type_id = -1
1155      self.base_type_id = -1
1156      self.disk_type_id = -1
1157    super(Leaf, self)._g_new(where, name, init)
1158
1159  cdef _get_type_ids(self):
1160    """Get the disk and native HDF5 types associated with this leaf.
1161
1162    It is guaranteed that both disk and native types are not the same
1163    descriptor (so that it is safe to close them separately).
1164
1165    """
1166
1167    cdef hid_t disk_type_id, native_type_id
1168
1169    disk_type_id = H5Dget_type(self.dataset_id)
1170    native_type_id = get_native_type(disk_type_id)
1171    return disk_type_id, native_type_id
1172
1173  cdef _convert_time64(self, ndarray nparr, int sense):
1174    """Converts a NumPy of Time64 elements between NumPy and HDF5 formats.
1175
1176    NumPy to HDF5 conversion is performed when 'sense' is 0.  Otherwise, HDF5
1177    to NumPy conversion is performed.  The conversion is done in place,
1178    i.e. 'nparr' is modified.
1179
1180    """
1181
1182    cdef void *t64buf
1183    cdef long byteoffset, bytestride, nelements
1184    cdef hsize_t nrecords
1185
1186    byteoffset = 0   # NumPy objects doesn't have an offset
1187    if (<object>nparr).shape == ():
1188      # 0-dim array does contain *one* element
1189      nrecords = 1
1190      bytestride = 8
1191    else:
1192      nrecords = len(nparr)
1193      bytestride = nparr.strides[0]  # supports multi-dimensional recarray
1194    nelements = <size_t>nparr.size / nrecords
1195    t64buf = nparr.data
1196
1197    conv_float64_timeval32(
1198      t64buf, byteoffset, bytestride, nrecords, nelements, sense)
1199
1200  # can't do since cdef'd
1201
1202  def _g_truncate(self, hsize_t size):
1203    """Truncate a Leaf to `size` nrows."""
1204
1205    cdef hsize_t ret
1206
1207    ret = truncate_dset(self.dataset_id, self.maindim, size)
1208    if ret < 0:
1209      raise HDF5ExtError("Problems truncating the leaf: %s" % self)
1210
1211    classname = self.__class__.__name__
1212    if classname in ('EArray', 'CArray'):
1213      # Update the new dimensionality
1214      self.dims[self.maindim] = size
1215      # Update the shape
1216      shape = list(self.shape)
1217      shape[self.maindim] = SizeType(size)
1218      self.shape = tuple(shape)
1219    elif classname in ('Table', 'VLArray'):
1220      self.nrows = size
1221    else:
1222      raise ValueError("Unexpected classname: %s" % classname)
1223
1224  def _g_flush(self):
1225    # Flush the dataset (in fact, the entire buffers in file!)
1226    if self.dataset_id >= 0:
1227        H5Fflush(self.dataset_id, H5F_SCOPE_GLOBAL)
1228
1229  def _g_close(self):
1230    # Close dataset in HDF5 space
1231    # Release resources
1232    if self.type_id >= 0:
1233      H5Tclose(self.type_id)
1234    if self.disk_type_id >= 0:
1235      H5Tclose(self.disk_type_id)
1236    if self.base_type_id >= 0:
1237      H5Tclose(self.base_type_id)
1238    if self.dataset_id >= 0:
1239      H5Dclose(self.dataset_id)
1240
1241
1242cdef class Array(Leaf):
1243  # Instance variables declared in .pxd
1244
1245  def _create_array(self, ndarray nparr, object title, object atom):
1246    cdef int i
1247    cdef herr_t ret
1248    cdef void *rbuf
1249    cdef bytes complib, version, class_
1250    cdef object dtype_, atom_, shape
1251    cdef ndarray dims
1252    cdef bytes encoded_title, encoded_name
1253    cdef H5T_cset_t cset = H5T_CSET_ASCII
1254
1255    encoded_title = title.encode('utf-8')
1256    encoded_name = self.name.encode('utf-8')
1257
1258    # Get the HDF5 type associated with this numpy type
1259    shape = (<object>nparr).shape
1260    if atom is None or atom.shape == ():
1261      dtype_ = nparr.dtype.base
1262      atom_ = Atom.from_dtype(dtype_)
1263    else:
1264      atom_ = atom
1265      shape = shape[:-len(atom_.shape)]
1266    self.disk_type_id = atom_to_hdf5_type(atom_, self.byteorder)
1267    if self.disk_type_id < 0:
1268      raise HDF5ExtError(
1269        "Problems creating the %s: invalid disk type ID for atom %s" % (
1270            self.__class__.__name__, atom_))
1271
1272    # Allocate space for the dimension axis info and fill it
1273    dims = numpy.array(shape, dtype=numpy.intp)
1274    self.rank = len(shape)
1275    self.dims = npy_malloc_dims(self.rank, <npy_intp *>(dims.data))
1276    # Get the pointer to the buffer data area
1277    strides = (<object>nparr).strides
1278    # When the object is not a 0-d ndarray and its strides == 0, that
1279    # means that the array does not contain actual data
1280    if strides != () and sum(strides) == 0:
1281      rbuf = NULL
1282    else:
1283      rbuf = nparr.data
1284    # Save the array
1285    complib = (self.filters.complib or '').encode('utf-8')
1286    version = self._v_version.encode('utf-8')
1287    class_ = self._c_classid.encode('utf-8')
1288    self.dataset_id = H5ARRAYmake(self.parent_id, encoded_name, version,
1289                                  self.rank, self.dims,
1290                                  self.extdim, self.disk_type_id, NULL, NULL,
1291                                  self.filters.complevel, complib,
1292                                  self.filters.shuffle_bitshuffle,
1293                                  self.filters.fletcher32,
1294                                  self._want_track_times,
1295                                  rbuf)
1296    if self.dataset_id < 0:
1297      raise HDF5ExtError("Problems creating the %s." % self.__class__.__name__)
1298
1299    if self._v_file.params['PYTABLES_SYS_ATTRS']:
1300      if PY_MAJOR_VERSION > 2:
1301        cset = H5T_CSET_UTF8
1302      # Set the conforming array attributes
1303      H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_,
1304                                 len(class_), cset)
1305      H5ATTRset_attribute_string(self.dataset_id, "VERSION", version,
1306                                 len(version), cset)
1307      H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title,
1308                                 len(encoded_title), cset)
1309
1310    # Get the native type (so that it is HDF5 who is the responsible to deal
1311    # with non-native byteorders on-disk)
1312    self.type_id = get_native_type(self.disk_type_id)
1313
1314    return self.dataset_id, shape, atom_
1315
1316
1317  def _create_carray(self, object title):
1318    cdef int i
1319    cdef herr_t ret
1320    cdef void *rbuf
1321    cdef bytes complib, version, class_
1322    cdef ndarray dflts
1323    cdef void *fill_data
1324    cdef ndarray extdim
1325    cdef object atom
1326    cdef bytes encoded_title, encoded_name
1327
1328    encoded_title = title.encode('utf-8')
1329    encoded_name = self.name.encode('utf-8')
1330
1331    atom = self.atom
1332    self.disk_type_id = atom_to_hdf5_type(atom, self.byteorder)
1333
1334    self.rank = len(self.shape)
1335    self.dims = malloc_dims(self.shape)
1336    if self.chunkshape:
1337      self.dims_chunk = malloc_dims(self.chunkshape)
1338
1339    rbuf = NULL   # The data pointer. We don't have data to save initially
1340    # Encode strings
1341    complib = (self.filters.complib or '').encode('utf-8')
1342    version = self._v_version.encode('utf-8')
1343    class_ = self._c_classid.encode('utf-8')
1344
1345    # Get the fill values
1346    if isinstance(atom.dflt, numpy.ndarray) or atom.dflt:
1347      dflts = numpy.array(atom.dflt, dtype=atom.dtype)
1348      fill_data = dflts.data
1349    else:
1350      dflts = numpy.zeros((), dtype=atom.dtype)
1351      fill_data = NULL
1352    if atom.shape == ():
1353      # The default is preferred as a scalar value instead of 0-dim array
1354      atom.dflt = dflts[()]
1355    else:
1356      atom.dflt = dflts
1357
1358    # Create the CArray/EArray
1359    self.dataset_id = H5ARRAYmake(
1360      self.parent_id, encoded_name, version, self.rank,
1361      self.dims, self.extdim, self.disk_type_id, self.dims_chunk,
1362      fill_data, self.filters.complevel, complib,
1363        self.filters.shuffle_bitshuffle, self.filters.fletcher32,
1364        self._want_track_times, rbuf)
1365    if self.dataset_id < 0:
1366      raise HDF5ExtError("Problems creating the %s." % self.__class__.__name__)
1367
1368    if self._v_file.params['PYTABLES_SYS_ATTRS']:
1369      # Set the conforming array attributes
1370      H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_,
1371                                 len(class_), H5T_CSET_ASCII)
1372      H5ATTRset_attribute_string(self.dataset_id, "VERSION", version,
1373                                 len(version), H5T_CSET_ASCII)
1374      H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title,
1375                                 len(encoded_title), H5T_CSET_ASCII)
1376      if self.extdim >= 0:
1377        extdim = <ndarray>numpy.array([self.extdim], dtype="int32")
1378        # Attach the EXTDIM attribute in case of enlargeable arrays
1379        H5ATTRset_attribute(self.dataset_id, "EXTDIM", H5T_NATIVE_INT,
1380                            0, NULL, extdim.data)
1381
1382    # Get the native type (so that it is HDF5 who is the responsible to deal
1383    # with non-native byteorders on-disk)
1384    self.type_id = get_native_type(self.disk_type_id)
1385
1386    return self.dataset_id
1387
1388
1389  def _open_array(self):
1390    cdef size_t type_size, type_precision
1391    cdef H5T_class_t class_id
1392    cdef char cbyteorder[11]  # "irrelevant" fits easily here
1393    cdef int i
1394    cdef int extdim
1395    cdef herr_t ret
1396    cdef object shape, chunkshapes, atom
1397    cdef int fill_status
1398    cdef ndarray dflts
1399    cdef void *fill_data
1400    cdef bytes encoded_name
1401    cdef str byteorder
1402
1403    encoded_name = self.name.encode('utf-8')
1404
1405    # Open the dataset
1406    self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT)
1407    if self.dataset_id < 0:
1408      raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" %
1409                         (self.name, self._v_parent._v_pathname))
1410    # Get the datatype handles
1411    self.disk_type_id, self.type_id = self._get_type_ids()
1412    # Get the atom for this type
1413    atom = atom_from_hdf5_type(self.type_id)
1414
1415    # Get the rank for this array object
1416    if H5ARRAYget_ndims(self.dataset_id, &self.rank) < 0:
1417      raise HDF5ExtError("Problems getting ndims!")
1418    # Allocate space for the dimension axis info
1419    self.dims = <hsize_t *>malloc(self.rank * sizeof(hsize_t))
1420    self.maxdims = <hsize_t *>malloc(self.rank * sizeof(hsize_t))
1421    # Get info on dimensions, class and type (of base class)
1422    ret = H5ARRAYget_info(self.dataset_id, self.disk_type_id,
1423                          self.dims, self.maxdims,
1424                          &class_id, cbyteorder)
1425    if ret < 0:
1426      raise HDF5ExtError("Unable to get array info.")
1427
1428    byteorder = cstr_to_pystr(cbyteorder)
1429
1430    # Get the extendable dimension (if any)
1431    self.extdim = -1  # default is non-extensible Array
1432    for i from 0 <= i < self.rank:
1433      if self.maxdims[i] == -1:
1434        self.extdim = i
1435        break
1436
1437    # Get the shape as a python tuple
1438    shape = getshape(self.rank, self.dims)
1439
1440    # Allocate space for the dimension chunking info
1441    self.dims_chunk = <hsize_t *>malloc(self.rank * sizeof(hsize_t))
1442    if H5ARRAYget_chunkshape(self.dataset_id, self.rank, self.dims_chunk) < 0:
1443      # The Array class is not chunked!
1444      chunkshapes = None
1445    else:
1446      # Get the chunkshape as a python tuple
1447      chunkshapes = getshape(self.rank, self.dims_chunk)
1448
1449    # object arrays should not be read directly into memory
1450    if atom.dtype != numpy.object:
1451      # Get the fill value
1452      dflts = numpy.zeros((), dtype=atom.dtype)
1453      fill_data = dflts.data
1454      H5ARRAYget_fill_value(self.dataset_id, self.type_id,
1455                            &fill_status, fill_data);
1456      if fill_status == H5D_FILL_VALUE_UNDEFINED:
1457        # This can only happen with datasets created with other libraries
1458        # than PyTables.
1459        dflts = None
1460      if dflts is not None and atom.shape == ():
1461        # The default is preferred as a scalar value instead of 0-dim array
1462        atom.dflt = dflts[()]
1463      else:
1464        atom.dflt = dflts
1465
1466    # Get the byteorder
1467    self.byteorder = correct_byteorder(atom.type, byteorder)
1468
1469    return self.dataset_id, atom, shape, chunkshapes
1470
1471
1472  def _append(self, ndarray nparr):
1473    cdef int ret, extdim
1474    cdef hsize_t *dims_arr
1475    cdef void *rbuf
1476    cdef object shape
1477
1478    if self.atom.kind == "reference":
1479      raise ValueError("Cannot append to the reference types")
1480
1481    # Allocate space for the dimension axis info
1482    dims_arr = npy_malloc_dims(self.rank, nparr.shape)
1483    # Get the pointer to the buffer data area
1484    rbuf = nparr.data
1485    # Convert some NumPy types to HDF5 before storing.
1486    if self.atom.type == 'time64':
1487      self._convert_time64(nparr, 0)
1488
1489    # Append the records
1490    extdim = self.extdim
1491    with nogil:
1492        ret = H5ARRAYappend_records(self.dataset_id, self.type_id, self.rank,
1493                                    self.dims, dims_arr, extdim, rbuf)
1494
1495    if ret < 0:
1496      raise HDF5ExtError("Problems appending the elements")
1497
1498    free(dims_arr)
1499    # Update the new dimensionality
1500    shape = list(self.shape)
1501    shape[self.extdim] = SizeType(self.dims[self.extdim])
1502    self.shape = tuple(shape)
1503
1504  def _read_array(self, hsize_t start, hsize_t stop, hsize_t step,
1505                 ndarray nparr):
1506    cdef herr_t ret
1507    cdef void *rbuf
1508    cdef hsize_t nrows
1509    cdef int extdim
1510    cdef size_t item_size = H5Tget_size(self.type_id)
1511    cdef void * refbuf = NULL
1512
1513    # Number of rows to read
1514    nrows = get_len_of_range(start, stop, step)
1515
1516    # Get the pointer to the buffer data area
1517    if self.atom.kind == "reference":
1518      refbuf = malloc(nrows * item_size)
1519      rbuf = refbuf
1520    else:
1521      rbuf = nparr.data
1522
1523    if hasattr(self, "extdim"):
1524      extdim = self.extdim
1525    else:
1526      extdim = -1
1527
1528    # Do the physical read
1529    with nogil:
1530        ret = H5ARRAYread(self.dataset_id, self.type_id, start, nrows, step,
1531                          extdim, rbuf)
1532
1533    try:
1534      if ret < 0:
1535        raise HDF5ExtError("Problems reading the array data.")
1536
1537      # Get the pointer to the buffer data area
1538      if self.atom.kind == "reference":
1539        load_reference(self.dataset_id, <hobj_ref_t *>rbuf, item_size, nparr)
1540    finally:
1541      if refbuf:
1542        free(refbuf)
1543        refbuf = NULL
1544
1545    if self.atom.kind == 'time':
1546      # Swap the byteorder by hand (this is not currently supported by HDF5)
1547      if H5Tget_order(self.type_id) != platform_byteorder:
1548        nparr.byteswap(True)
1549
1550    # Convert some HDF5 types to NumPy after reading.
1551    if self.atom.type == 'time64':
1552      self._convert_time64(nparr, 1)
1553
1554    return
1555
1556
1557  def _g_read_slice(self, ndarray startl, ndarray stopl, ndarray stepl,
1558                   ndarray nparr):
1559    cdef herr_t ret
1560    cdef hsize_t *start
1561    cdef hsize_t *stop
1562    cdef hsize_t *step
1563    cdef void *rbuf
1564    cdef size_t item_size = H5Tget_size(self.type_id)
1565    cdef void * refbuf = NULL
1566
1567    # Get the pointer to the buffer data area of startl, stopl and stepl arrays
1568    start = <hsize_t *>startl.data
1569    stop = <hsize_t *>stopl.data
1570    step = <hsize_t *>stepl.data
1571
1572    # Get the pointer to the buffer data area
1573    if self.atom.kind == "reference":
1574      refbuf = malloc(nparr.size * item_size)
1575      rbuf = refbuf
1576    else:
1577      rbuf = nparr.data
1578
1579    # Do the physical read
1580    with nogil:
1581        ret = H5ARRAYreadSlice(self.dataset_id, self.type_id,
1582                               start, stop, step, rbuf)
1583    try:
1584      if ret < 0:
1585        raise HDF5ExtError("Problems reading the array data.")
1586
1587      # Get the pointer to the buffer data area
1588      if self.atom.kind == "reference":
1589        load_reference(self.dataset_id, <hobj_ref_t *>rbuf, item_size, nparr)
1590    finally:
1591      if refbuf:
1592        free(refbuf)
1593        refbuf = NULL
1594
1595    if self.atom.kind == 'time':
1596      # Swap the byteorder by hand (this is not currently supported by HDF5)
1597      if H5Tget_order(self.type_id) != platform_byteorder:
1598        nparr.byteswap(True)
1599
1600    # Convert some HDF5 types to NumPy after reading
1601    if self.atom.type == 'time64':
1602      self._convert_time64(nparr, 1)
1603
1604    return
1605
1606
1607  def _g_read_coords(self, ndarray coords, ndarray nparr):
1608    """Read coordinates in an already created NumPy array."""
1609
1610    cdef herr_t ret
1611    cdef hid_t space_id
1612    cdef hid_t mem_space_id
1613    cdef hsize_t size
1614    cdef void *rbuf
1615    cdef object mode
1616    cdef size_t item_size = H5Tget_size(self.type_id)
1617    cdef void * refbuf = NULL
1618
1619    # Get the dataspace handle
1620    space_id = H5Dget_space(self.dataset_id)
1621    # Create a memory dataspace handle
1622    size = nparr.size
1623    mem_space_id = H5Screate_simple(1, &size, NULL)
1624
1625    # Select the dataspace to be read
1626    H5Sselect_elements(space_id, H5S_SELECT_SET,
1627                       <size_t>size, <hsize_t *>coords.data)
1628
1629    # Get the pointer to the buffer data area
1630    if self.atom.kind == "reference":
1631      refbuf = malloc(nparr.size * item_size)
1632      rbuf = refbuf
1633    else:
1634      rbuf = nparr.data
1635
1636    # Do the actual read
1637    with nogil:
1638        ret = H5Dread(self.dataset_id, self.type_id, mem_space_id, space_id,
1639                      H5P_DEFAULT, rbuf)
1640
1641    try:
1642      if ret < 0:
1643        raise HDF5ExtError("Problems reading the array data.")
1644
1645      # Get the pointer to the buffer data area
1646      if self.atom.kind == "reference":
1647        load_reference(self.dataset_id, <hobj_ref_t *>rbuf, item_size, nparr)
1648    finally:
1649      if refbuf:
1650        free(refbuf)
1651        refbuf = NULL
1652
1653    # Terminate access to the memory dataspace
1654    H5Sclose(mem_space_id)
1655    # Terminate access to the dataspace
1656    H5Sclose(space_id)
1657
1658    if self.atom.kind == 'time':
1659      # Swap the byteorder by hand (this is not currently supported by HDF5)
1660      if H5Tget_order(self.type_id) != platform_byteorder:
1661        nparr.byteswap(True)
1662
1663    # Convert some HDF5 types to NumPy after reading
1664    if self.atom.type == 'time64':
1665      self._convert_time64(nparr, 1)
1666
1667    return
1668
1669
1670  def perform_selection(self, space_id, start, count, step, idx, mode):
1671    """Performs a selection using start/count/step in the given axis.
1672
1673    All other axes have their full range selected.  The selection is
1674    added to the current `space_id` selection using the given mode.
1675
1676    Note: This is a backport from the h5py project.
1677
1678    """
1679
1680    cdef int select_mode
1681    cdef ndarray start_, count_, step_
1682    cdef hsize_t *startp
1683    cdef hsize_t *countp
1684    cdef hsize_t *stepp
1685
1686    # Build arrays for the selection parameters
1687    startl, countl, stepl = [], [], []
1688    for i, x in enumerate(self.shape):
1689      if i != idx:
1690        startl.append(0)
1691        countl.append(x)
1692        stepl.append(1)
1693      else:
1694        startl.append(start)
1695        countl.append(count)
1696        stepl.append(step)
1697    start_ = numpy.array(startl, dtype="i8")
1698    count_ = numpy.array(countl, dtype="i8")
1699    step_ = numpy.array(stepl, dtype="i8")
1700
1701    # Get the pointers to array data
1702    startp = <hsize_t *>start_.data
1703    countp = <hsize_t *>count_.data
1704    stepp = <hsize_t *>step_.data
1705
1706    # Do the actual selection
1707    select_modes = {"AND": H5S_SELECT_AND, "NOTB": H5S_SELECT_NOTB}
1708    assert mode in select_modes
1709    select_mode = select_modes[mode]
1710    H5Sselect_hyperslab(space_id, <H5S_seloper_t>select_mode,
1711                        startp, stepp, countp, NULL)
1712
1713  def _g_read_selection(self, object selection, ndarray nparr):
1714    """Read a selection in an already created NumPy array."""
1715
1716    cdef herr_t ret
1717    cdef hid_t space_id
1718    cdef hid_t mem_space_id
1719    cdef hsize_t size
1720    cdef void *rbuf
1721    cdef object mode
1722    cdef size_t item_size = H5Tget_size(self.type_id)
1723    cdef void * refbuf = NULL
1724
1725    # Get the dataspace handle
1726    space_id = H5Dget_space(self.dataset_id)
1727    # Create a memory dataspace handle
1728    size = nparr.size
1729    mem_space_id = H5Screate_simple(1, &size, NULL)
1730
1731    # Select the dataspace to be read
1732    # Start by selecting everything
1733    H5Sselect_all(space_id)
1734    # Now refine with outstanding selections
1735    for args in selection:
1736      self.perform_selection(space_id, *args)
1737
1738    # Get the pointer to the buffer data area
1739    if self.atom.kind == "reference":
1740      refbuf = malloc(nparr.size * item_size)
1741      rbuf = refbuf
1742    else:
1743      rbuf = nparr.data
1744
1745    # Do the actual read
1746    with nogil:
1747        ret = H5Dread(self.dataset_id, self.type_id, mem_space_id, space_id,
1748                      H5P_DEFAULT, rbuf)
1749
1750    try:
1751      if ret < 0:
1752        raise HDF5ExtError("Problems reading the array data.")
1753
1754      # Get the pointer to the buffer data area
1755      if self.atom.kind == "reference":
1756        load_reference(self.dataset_id, <hobj_ref_t *>rbuf, item_size, nparr)
1757    finally:
1758      if refbuf:
1759        free(refbuf)
1760        refbuf = NULL
1761
1762    # Terminate access to the memory dataspace
1763    H5Sclose(mem_space_id)
1764    # Terminate access to the dataspace
1765    H5Sclose(space_id)
1766
1767    if self.atom.kind == 'time':
1768      # Swap the byteorder by hand (this is not currently supported by HDF5)
1769      if H5Tget_order(self.type_id) != platform_byteorder:
1770        nparr.byteswap(True)
1771
1772    # Convert some HDF5 types to NumPy after reading
1773    if self.atom.type == 'time64':
1774      self._convert_time64(nparr, 1)
1775
1776    return
1777
1778
1779  def _g_write_slice(self, ndarray startl, ndarray stepl, ndarray countl,
1780                    ndarray nparr):
1781    """Write a slice in an already created NumPy array."""
1782
1783    cdef int ret
1784    cdef void *rbuf
1785    cdef void *temp
1786    cdef hsize_t *start
1787    cdef hsize_t *step
1788    cdef hsize_t *count
1789
1790    if self.atom.kind == "reference":
1791      raise ValueError("Cannot write reference types yet")
1792    # Get the pointer to the buffer data area
1793    rbuf = nparr.data
1794    # Get the start, step and count values
1795    start = <hsize_t *>startl.data
1796    step = <hsize_t *>stepl.data
1797    count = <hsize_t *>countl.data
1798
1799    # Convert some NumPy types to HDF5 before storing.
1800    if self.atom.type == 'time64':
1801      self._convert_time64(nparr, 0)
1802
1803    # Modify the elements:
1804    with nogil:
1805        ret = H5ARRAYwrite_records(self.dataset_id, self.type_id, self.rank,
1806                                   start, step, count, rbuf)
1807
1808    if ret < 0:
1809      raise HDF5ExtError("Internal error modifying the elements "
1810                "(H5ARRAYwrite_records returned errorcode -%i)" % (-ret))
1811
1812    return
1813
1814
1815  def _g_write_coords(self, ndarray coords, ndarray nparr):
1816    """Write a selection in an already created NumPy array."""
1817
1818    cdef herr_t ret
1819    cdef hid_t space_id
1820    cdef hid_t mem_space_id
1821    cdef hsize_t size
1822    cdef void *rbuf
1823    cdef object mode
1824
1825    if self.atom.kind == "reference":
1826      raise ValueError("Cannot write reference types yet")
1827    # Get the dataspace handle
1828    space_id = H5Dget_space(self.dataset_id)
1829    # Create a memory dataspace handle
1830    size = nparr.size
1831    mem_space_id = H5Screate_simple(1, &size, NULL)
1832
1833    # Select the dataspace to be written
1834    H5Sselect_elements(space_id, H5S_SELECT_SET,
1835                       <size_t>size, <hsize_t *>coords.data)
1836
1837    # Get the pointer to the buffer data area
1838    rbuf = nparr.data
1839
1840    # Convert some NumPy types to HDF5 before storing.
1841    if self.atom.type == 'time64':
1842      self._convert_time64(nparr, 0)
1843
1844    # Do the actual write
1845    with nogil:
1846        ret = H5Dwrite(self.dataset_id, self.type_id, mem_space_id, space_id,
1847                       H5P_DEFAULT, rbuf)
1848
1849    if ret < 0:
1850      raise HDF5ExtError("Problems writing the array data.")
1851
1852    # Terminate access to the memory dataspace
1853    H5Sclose(mem_space_id)
1854    # Terminate access to the dataspace
1855    H5Sclose(space_id)
1856
1857    return
1858
1859
1860  def _g_write_selection(self, object selection, ndarray nparr):
1861    """Write a selection in an already created NumPy array."""
1862
1863    cdef herr_t ret
1864    cdef hid_t space_id
1865    cdef hid_t mem_space_id
1866    cdef hsize_t size
1867    cdef void *rbuf
1868    cdef object mode
1869
1870    if self.atom.kind == "reference":
1871      raise ValueError("Cannot write reference types yet")
1872    # Get the dataspace handle
1873    space_id = H5Dget_space(self.dataset_id)
1874    # Create a memory dataspace handle
1875    size = nparr.size
1876    mem_space_id = H5Screate_simple(1, &size, NULL)
1877
1878    # Select the dataspace to be written
1879    # Start by selecting everything
1880    H5Sselect_all(space_id)
1881    # Now refine with outstanding selections
1882    for args in selection:
1883      self.perform_selection(space_id, *args)
1884
1885    # Get the pointer to the buffer data area
1886    rbuf = nparr.data
1887
1888    # Convert some NumPy types to HDF5 before storing.
1889    if self.atom.type == 'time64':
1890      self._convert_time64(nparr, 0)
1891
1892    # Do the actual write
1893    with nogil:
1894        ret = H5Dwrite(self.dataset_id, self.type_id, mem_space_id, space_id,
1895                       H5P_DEFAULT, rbuf)
1896
1897    if ret < 0:
1898      raise HDF5ExtError("Problems writing the array data.")
1899
1900    # Terminate access to the memory dataspace
1901    H5Sclose(mem_space_id)
1902    # Terminate access to the dataspace
1903    H5Sclose(space_id)
1904
1905    return
1906
1907
1908  def __dealloc__(self):
1909    if self.dims:
1910      free(<void *>self.dims)
1911    if self.maxdims:
1912      free(<void *>self.maxdims)
1913    if self.dims_chunk:
1914      free(self.dims_chunk)
1915
1916
1917cdef class VLArray(Leaf):
1918  # Instance variables
1919  cdef hsize_t nrecords
1920
1921  def _create_array(self, object title):
1922    cdef int rank
1923    cdef hsize_t *dims
1924    cdef herr_t ret
1925    cdef void *rbuf
1926    cdef bytes complib, version, class_
1927    cdef object type_, itemsize, atom, scatom
1928    cdef bytes encoded_title, encoded_name
1929    cdef H5T_cset_t cset = H5T_CSET_ASCII
1930
1931    encoded_title = title.encode('utf-8')
1932    encoded_name = self.name.encode('utf-8')
1933
1934    atom = self.atom
1935    if not hasattr(atom, 'size'):  # it is a pseudo-atom
1936      atom = atom.base
1937
1938    # Get the HDF5 type of the *scalar* atom
1939    scatom = atom.copy(shape=())
1940    self.base_type_id = atom_to_hdf5_type(scatom, self.byteorder)
1941    if self.base_type_id < 0:
1942      raise HDF5ExtError(
1943        "Problems creating the %s: invalid base type ID for atom %s" % (
1944            self.__class__.__name__, scatom))
1945
1946    # Allocate space for the dimension axis info
1947    rank = len(atom.shape)
1948    dims = malloc_dims(atom.shape)
1949
1950    rbuf = NULL   # We don't have data to save initially
1951
1952    # Encode strings
1953    complib = (self.filters.complib or '').encode('utf-8')
1954    version = self._v_version.encode('utf-8')
1955    class_ = self._c_classid.encode('utf-8')
1956
1957    # Create the vlarray
1958    self.dataset_id = H5VLARRAYmake(self.parent_id, encoded_name, version,
1959                                    rank, dims, self.base_type_id,
1960                                    self.chunkshape[0], rbuf,
1961                                    self.filters.complevel, complib,
1962                                    self.filters.shuffle_bitshuffle,
1963                                    self.filters.fletcher32,
1964                                    self._want_track_times, rbuf)
1965    if dims:
1966      free(<void *>dims)
1967    if self.dataset_id < 0:
1968      raise HDF5ExtError("Problems creating the VLArray.")
1969    self.nrecords = 0  # Initialize the number of records saved
1970
1971    if self._v_file.params['PYTABLES_SYS_ATTRS']:
1972      if PY_MAJOR_VERSION > 2:
1973        cset = H5T_CSET_UTF8
1974      # Set the conforming array attributes
1975      H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_,
1976                                 len(class_), cset)
1977      H5ATTRset_attribute_string(self.dataset_id, "VERSION", version,
1978                                 len(version), cset)
1979      H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title,
1980                                 len(encoded_title), cset)
1981
1982    # Get the datatype handles
1983    self.disk_type_id, self.type_id = self._get_type_ids()
1984
1985    return self.dataset_id
1986
1987
1988  def _open_array(self):
1989    cdef char cbyteorder[11]  # "irrelevant" fits easily here
1990    cdef int i, enumtype
1991    cdef int rank
1992    cdef herr_t ret
1993    cdef hsize_t nrecords, chunksize
1994    cdef object shape, type_
1995    cdef bytes encoded_name
1996    cdef str byteorder
1997
1998    encoded_name = self.name.encode('utf-8')
1999
2000    # Open the dataset
2001    self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT)
2002    if self.dataset_id < 0:
2003      raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" %
2004                         (self.name, self._v_parent._v_pathname))
2005    # Get the datatype handles
2006    self.disk_type_id, self.type_id = self._get_type_ids()
2007    # Get the atom for this type
2008    atom = atom_from_hdf5_type(self.type_id)
2009
2010    # Get info on dimensions & types (of base class)
2011    H5VLARRAYget_info(self.dataset_id, self.disk_type_id, &nrecords,
2012                      cbyteorder)
2013
2014    byteorder = cstr_to_pystr(cbyteorder)
2015
2016    # Get some properties of the atomic type
2017    self._atomicdtype = atom.dtype
2018    self._atomictype = atom.type
2019    self._atomicshape = atom.shape
2020    self._atomicsize = atom.size
2021
2022    # Get the byteorder
2023    self.byteorder = correct_byteorder(atom.type, byteorder)
2024
2025    # Get the chunkshape (VLArrays are unidimensional entities)
2026    H5ARRAYget_chunkshape(self.dataset_id, 1, &chunksize)
2027
2028    self.nrecords = nrecords  # Initialize the number of records saved
2029    return self.dataset_id, SizeType(nrecords), (SizeType(chunksize),), atom
2030
2031
2032  def _append(self, ndarray nparr, int nobjects):
2033    cdef int ret
2034    cdef void *rbuf
2035
2036    # Get the pointer to the buffer data area
2037    if nobjects:
2038      rbuf = nparr.data
2039      # Convert some NumPy types to HDF5 before storing.
2040      if self.atom.type == 'time64':
2041        self._convert_time64(nparr, 0)
2042    else:
2043      rbuf = NULL
2044
2045    # Append the records:
2046    with nogil:
2047        ret = H5VLARRAYappend_records(self.dataset_id, self.type_id,
2048                                      nobjects, self.nrecords, rbuf)
2049
2050    if ret < 0:
2051      raise HDF5ExtError("Problems appending the records.")
2052
2053    self.nrecords = self.nrecords + 1
2054
2055  def _modify(self, hsize_t nrow, ndarray nparr, int nobjects):
2056    cdef int ret
2057    cdef void *rbuf
2058
2059    # Get the pointer to the buffer data area
2060    rbuf = nparr.data
2061    if nobjects:
2062      # Convert some NumPy types to HDF5 before storing.
2063      if self.atom.type == 'time64':
2064        self._convert_time64(nparr, 0)
2065
2066    # Append the records:
2067    with nogil:
2068        ret = H5VLARRAYmodify_records(self.dataset_id, self.type_id,
2069                                      nrow, nobjects, rbuf)
2070
2071    if ret < 0:
2072      raise HDF5ExtError("Problems modifying the record.")
2073
2074    return nobjects
2075
2076  # Because the size of each "row" is unknown, there is no easy way to
2077  # calculate this value
2078  def _get_memory_size(self):
2079    cdef hid_t space_id
2080    cdef hsize_t size
2081    cdef herr_t ret
2082
2083    if self.nrows == 0:
2084      size = 0
2085    else:
2086      # Get the dataspace handle
2087      space_id = H5Dget_space(self.dataset_id)
2088      # Return the size of the entire dataset
2089      ret = H5Dvlen_get_buf_size(self.dataset_id, self.type_id, space_id,
2090                                 &size)
2091      if ret < 0:
2092        size = -1
2093
2094      # Terminate access to the dataspace
2095      H5Sclose(space_id)
2096
2097    return size
2098
2099  def _read_array(self, hsize_t start, hsize_t stop, hsize_t step):
2100    cdef int i
2101    cdef size_t vllen
2102    cdef herr_t ret
2103    cdef hvl_t *rdata
2104    cdef hsize_t nrows
2105    cdef hid_t space_id
2106    cdef hid_t mem_space_id
2107    cdef object buf, nparr, shape, datalist
2108
2109    # Compute the number of rows to read
2110    nrows = get_len_of_range(start, stop, step)
2111    if start + nrows > self.nrows:
2112      raise HDF5ExtError(
2113        "Asking for a range of rows exceeding the available ones!.",
2114        h5bt=False)
2115
2116    # Now, read the chunk of rows
2117    with nogil:
2118        # Allocate the necessary memory for keeping the row handlers
2119        rdata = <hvl_t *>malloc(<size_t>nrows*sizeof(hvl_t))
2120        # Get the dataspace handle
2121        space_id = H5Dget_space(self.dataset_id)
2122        # Create a memory dataspace handle
2123        mem_space_id = H5Screate_simple(1, &nrows, NULL)
2124        # Select the data to be read
2125        H5Sselect_hyperslab(space_id, H5S_SELECT_SET, &start, &step, &nrows,
2126                            NULL)
2127        # Do the actual read
2128        ret = H5Dread(self.dataset_id, self.type_id, mem_space_id, space_id,
2129                      H5P_DEFAULT, rdata)
2130
2131    if ret < 0:
2132      raise HDF5ExtError(
2133        "VLArray._read_array: Problems reading the array data.")
2134
2135    datalist = []
2136    for i from 0 <= i < nrows:
2137      # Number of atoms in row
2138      vllen = rdata[i].len
2139      # Get the pointer to the buffer data area
2140      if vllen > 0:
2141        # Create a buffer to keep this info. It is important to do a
2142        # copy, because we will dispose the buffer memory later on by
2143        # calling the H5Dvlen_reclaim. PyByteArray_FromStringAndSize does this.
2144        buf = PyByteArray_FromStringAndSize(<char *>rdata[i].p,
2145                                            vllen*self._atomicsize)
2146      else:
2147        # Case where there is info with zero lentgh
2148        buf = None
2149      # Compute the shape for the read array
2150      shape = list(self._atomicshape)
2151      shape.insert(0, vllen)  # put the length at the beginning of the shape
2152      nparr = numpy.ndarray(
2153        buffer=buf, dtype=self._atomicdtype.base, shape=shape)
2154      # Set the writeable flag for this ndarray object
2155      nparr.flags.writeable = True
2156      if self.atom.kind == 'time':
2157        # Swap the byteorder by hand (this is not currently supported by HDF5)
2158        if H5Tget_order(self.type_id) != platform_byteorder:
2159          nparr.byteswap(True)
2160      # Convert some HDF5 types to NumPy after reading.
2161      if self.atom.type == 'time64':
2162        self._convert_time64(nparr, 1)
2163      # Append this array to the output list
2164      datalist.append(nparr)
2165
2166    # Release resources
2167    # Reclaim all the (nested) VL data
2168    ret = H5Dvlen_reclaim(self.type_id, mem_space_id, H5P_DEFAULT, rdata)
2169    if ret < 0:
2170      raise HDF5ExtError("VLArray._read_array: error freeing the data buffer.")
2171    # Terminate access to the memory dataspace
2172    H5Sclose(mem_space_id)
2173    # Terminate access to the dataspace
2174    H5Sclose(space_id)
2175    # Free the amount of row pointers to VL row data
2176    free(rdata)
2177
2178    return datalist
2179
2180
2181  def get_row_size(self, row):
2182    """Return the total size in bytes of all the elements contained in a given row."""
2183
2184    cdef hid_t space_id
2185    cdef hsize_t size
2186    cdef herr_t ret
2187
2188    cdef hsize_t offset[1]
2189    cdef hsize_t count[1]
2190
2191    if row >= self.nrows:
2192      raise HDF5ExtError(
2193        "Asking for a range of rows exceeding the available ones!.",
2194        h5bt=False)
2195
2196    # Get the dataspace handle
2197    space_id = H5Dget_space(self.dataset_id)
2198
2199    offset[0] = row
2200    count[0] = 1
2201
2202    ret = H5Sselect_hyperslab(space_id, H5S_SELECT_SET, offset, NULL, count, NULL);
2203    if ret < 0:
2204      size = -1
2205
2206    ret = H5Dvlen_get_buf_size(self.dataset_id, self.type_id, space_id, &size)
2207    if ret < 0:
2208      size = -1
2209
2210    # Terminate access to the dataspace
2211    H5Sclose(space_id)
2212
2213    return size
2214
2215
2216cdef class UnImplemented(Leaf):
2217
2218  def _open_unimplemented(self):
2219    cdef object shape
2220    cdef char cbyteorder[11]  # "irrelevant" fits easily here
2221    cdef bytes encoded_name
2222    cdef str byteorder
2223
2224    encoded_name = self.name.encode('utf-8')
2225
2226    # Get info on dimensions
2227    shape = H5UIget_info(self.parent_id, encoded_name, cbyteorder)
2228    shape = tuple(map(SizeType, shape))
2229    self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT)
2230    byteorder = cstr_to_pystr(cbyteorder)
2231
2232    return (shape, byteorder, self.dataset_id)
2233
2234  def _g_close(self):
2235    H5Dclose(self.dataset_id)
2236
2237
2238## Local Variables:
2239## mode: python
2240## py-indent-offset: 2
2241## tab-width: 2
2242## fill-column: 78
2243## End:
2244