1# cython: language_level=3
2# This file is part of h5py, a Python interface to the HDF5 library.
3#
4# http://www.h5py.org
5#
6# Copyright 2008-2019 Andrew Collette and contributors
7#
8# License:  Standard 3-clause BSD; see "license.txt" for full license terms
9#           and contributor agreement.
10
11"""
12    HDF5 "H5T" data-type API
13
14    This module contains the datatype identifier class TypeID, and its
15    subclasses which represent things like integer/float/compound identifiers.
16    The majority of the H5T API is presented as methods on these identifiers.
17"""
18# C-level imports
19include "config.pxi"
20from ._objects cimport pdefault
21cimport numpy as cnp
22from .h5r cimport Reference, RegionReference
23from .h5p cimport PropID, propwrap
24
25
26from .utils cimport  emalloc, efree, require_tuple, convert_dims,\
27                     convert_tuple
28
29# Python imports
30import codecs
31import sys
32from collections import namedtuple
33import sys
34import numpy as np
35from .h5 import get_config
36
37from ._objects import phil, with_phil
38
39cfg = get_config()
40
41DEF MACHINE = UNAME_MACHINE  # processor architecture, provided by Cython
42cdef char* H5PY_PYTHON_OPAQUE_TAG = "PYTHON:OBJECT"
43
44# === Custom C API ============================================================
45
46cpdef TypeID typewrap(hid_t id_):
47
48    cdef H5T_class_t cls
49    cls = H5Tget_class(id_)
50
51    if cls == H5T_INTEGER:
52        pcls = TypeIntegerID
53    elif cls == H5T_FLOAT:
54        pcls = TypeFloatID
55    elif cls == H5T_TIME:
56        pcls = TypeTimeID
57    elif cls == H5T_STRING:
58        pcls = TypeStringID
59    elif cls == H5T_BITFIELD:
60        pcls = TypeBitfieldID
61    elif cls == H5T_OPAQUE:
62        pcls = TypeOpaqueID
63    elif cls == H5T_COMPOUND:
64        pcls = TypeCompoundID
65    elif cls == H5T_REFERENCE:
66        pcls = TypeReferenceID
67    elif cls == H5T_ENUM:
68        pcls = TypeEnumID
69    elif cls == H5T_VLEN:
70        pcls = TypeVlenID
71    elif cls == H5T_ARRAY:
72        pcls = TypeArrayID
73    else:
74        pcls = TypeID
75
76    return pcls(id_)
77
78cdef object lockid(hid_t id_in):
79    cdef TypeID tid
80    tid = typewrap(id_in)
81    tid.locked = 1
82    return tid
83
84# === Public constants and data structures ====================================
85
86
87# Enumeration H5T_class_t
88NO_CLASS  = H5T_NO_CLASS
89INTEGER   = H5T_INTEGER
90FLOAT     = H5T_FLOAT
91TIME      = H5T_TIME
92STRING    = H5T_STRING
93BITFIELD  = H5T_BITFIELD
94OPAQUE    = H5T_OPAQUE
95COMPOUND  = H5T_COMPOUND
96REFERENCE = H5T_REFERENCE
97ENUM      = H5T_ENUM
98VLEN      = H5T_VLEN
99ARRAY     = H5T_ARRAY
100
101# Enumeration H5T_sign_t
102SGN_NONE   = H5T_SGN_NONE
103SGN_2      = H5T_SGN_2
104
105# Enumeration H5T_order_t
106ORDER_LE    = H5T_ORDER_LE
107ORDER_BE    = H5T_ORDER_BE
108ORDER_VAX   = H5T_ORDER_VAX
109ORDER_NONE  = H5T_ORDER_NONE
110
111DIR_DEFAULT = H5T_DIR_DEFAULT
112DIR_ASCEND  = H5T_DIR_ASCEND
113DIR_DESCEND = H5T_DIR_DESCEND
114
115# Enumeration H5T_str_t
116STR_NULLTERM = H5T_STR_NULLTERM
117STR_NULLPAD  = H5T_STR_NULLPAD
118STR_SPACEPAD = H5T_STR_SPACEPAD
119
120# Enumeration H5T_norm_t
121NORM_IMPLIED = H5T_NORM_IMPLIED
122NORM_MSBSET = H5T_NORM_MSBSET
123NORM_NONE = H5T_NORM_NONE
124
125# Enumeration H5T_cset_t:
126CSET_ASCII = H5T_CSET_ASCII
127
128# Enumeration H5T_pad_t:
129PAD_ZERO = H5T_PAD_ZERO
130PAD_ONE = H5T_PAD_ONE
131PAD_BACKGROUND = H5T_PAD_BACKGROUND
132
133if sys.byteorder == "little":    # Custom python addition
134    ORDER_NATIVE = H5T_ORDER_LE
135else:
136    ORDER_NATIVE = H5T_ORDER_BE
137
138# For conversion
139BKG_NO = H5T_BKG_NO
140BKG_TEMP = H5T_BKG_TEMP
141BKG_YES = H5T_BKG_YES
142
143# --- Built-in HDF5 datatypes -------------------------------------------------
144
145# IEEE floating-point
146IEEE_F32LE = lockid(H5T_IEEE_F32LE)
147IEEE_F32BE = lockid(H5T_IEEE_F32BE)
148IEEE_F64LE = lockid(H5T_IEEE_F64LE)
149IEEE_F64BE = lockid(H5T_IEEE_F64BE)
150
151# Signed 2's complement integer types
152STD_I8LE  = lockid(H5T_STD_I8LE)
153STD_I16LE = lockid(H5T_STD_I16LE)
154STD_I32LE = lockid(H5T_STD_I32LE)
155STD_I64LE = lockid(H5T_STD_I64LE)
156
157STD_I8BE  = lockid(H5T_STD_I8BE)
158STD_I16BE = lockid(H5T_STD_I16BE)
159STD_I32BE = lockid(H5T_STD_I32BE)
160STD_I64BE = lockid(H5T_STD_I64BE)
161
162# Bitfields
163STD_B8LE = lockid(H5T_STD_B8LE)
164STD_B16LE = lockid(H5T_STD_B16LE)
165STD_B32LE = lockid(H5T_STD_B32LE)
166STD_B64LE = lockid(H5T_STD_B64LE)
167
168STD_B8BE = lockid(H5T_STD_B8BE)
169STD_B16BE = lockid(H5T_STD_B16BE)
170STD_B32BE = lockid(H5T_STD_B32BE)
171STD_B64BE = lockid(H5T_STD_B64BE)
172
173# Unsigned integers
174STD_U8LE  = lockid(H5T_STD_U8LE)
175STD_U16LE = lockid(H5T_STD_U16LE)
176STD_U32LE = lockid(H5T_STD_U32LE)
177STD_U64LE = lockid(H5T_STD_U64LE)
178
179STD_U8BE  = lockid(H5T_STD_U8BE)
180STD_U16BE = lockid(H5T_STD_U16BE)
181STD_U32BE = lockid(H5T_STD_U32BE)
182STD_U64BE = lockid(H5T_STD_U64BE)
183
184# Native types by bytesize
185NATIVE_B8 = lockid(H5T_NATIVE_B8)
186NATIVE_INT8 = lockid(H5T_NATIVE_INT8)
187NATIVE_UINT8 = lockid(H5T_NATIVE_UINT8)
188NATIVE_B16 = lockid(H5T_NATIVE_B16)
189NATIVE_INT16 = lockid(H5T_NATIVE_INT16)
190NATIVE_UINT16 = lockid(H5T_NATIVE_UINT16)
191NATIVE_B32 = lockid(H5T_NATIVE_B32)
192NATIVE_INT32 = lockid(H5T_NATIVE_INT32)
193NATIVE_UINT32 = lockid(H5T_NATIVE_UINT32)
194NATIVE_B64 = lockid(H5T_NATIVE_B64)
195NATIVE_INT64 = lockid(H5T_NATIVE_INT64)
196NATIVE_UINT64 = lockid(H5T_NATIVE_UINT64)
197NATIVE_FLOAT = lockid(H5T_NATIVE_FLOAT)
198NATIVE_DOUBLE = lockid(H5T_NATIVE_DOUBLE)
199NATIVE_LDOUBLE = lockid(H5T_NATIVE_LDOUBLE)
200
201# Unix time types
202UNIX_D32LE = lockid(H5T_UNIX_D32LE)
203UNIX_D64LE = lockid(H5T_UNIX_D64LE)
204UNIX_D32BE = lockid(H5T_UNIX_D32BE)
205UNIX_D64BE = lockid(H5T_UNIX_D64BE)
206
207# Reference types
208STD_REF_OBJ = lockid(H5T_STD_REF_OBJ)
209STD_REF_DSETREG = lockid(H5T_STD_REF_DSETREG)
210
211# Null terminated (C) and Fortran string types
212C_S1 = lockid(H5T_C_S1)
213FORTRAN_S1 = lockid(H5T_FORTRAN_S1)
214VARIABLE = H5T_VARIABLE
215
216# Character sets
217CSET_ASCII = H5T_CSET_ASCII
218CSET_UTF8 = H5T_CSET_UTF8
219
220# Mini (or short) floats
221IEEE_F16BE = IEEE_F32BE.copy()
222IEEE_F16BE.set_fields(15, 10, 5, 0, 10)
223IEEE_F16BE.set_size(2)
224IEEE_F16BE.set_ebias(15)
225IEEE_F16BE.lock()
226
227IEEE_F16LE = IEEE_F16BE.copy()
228IEEE_F16LE.set_order(H5T_ORDER_LE)
229IEEE_F16LE.lock()
230
231# Quad floats
232IEEE_F128BE = IEEE_F64BE.copy()
233IEEE_F128BE.set_size(16)
234IEEE_F128BE.set_precision(128)
235IEEE_F128BE.set_fields(127, 112, 15, 0, 112)
236IEEE_F128BE.set_ebias(16383)
237IEEE_F128BE.lock()
238
239IEEE_F128LE = IEEE_F128BE.copy()
240IEEE_F128LE.set_order(H5T_ORDER_LE)
241IEEE_F128LE.lock()
242
243LDOUBLE_LE = NATIVE_LDOUBLE.copy()
244LDOUBLE_LE.set_order(H5T_ORDER_LE)
245LDOUBLE_LE.lock()
246
247LDOUBLE_BE = NATIVE_LDOUBLE.copy()
248LDOUBLE_BE.set_order(H5T_ORDER_BE)
249LDOUBLE_BE.lock()
250
251# Custom Python object pointer type
252cdef hid_t H5PY_OBJ = H5Tcreate(H5T_OPAQUE, sizeof(PyObject*))
253H5Tset_tag(H5PY_OBJ, H5PY_PYTHON_OPAQUE_TAG)
254H5Tlock(H5PY_OBJ)
255
256PYTHON_OBJECT = lockid(H5PY_OBJ)
257
258# Translation tables for HDF5 -> NumPy dtype conversion
259cdef dict _order_map = { H5T_ORDER_NONE: '|', H5T_ORDER_LE: '<', H5T_ORDER_BE: '>'}
260cdef dict _sign_map  = { H5T_SGN_NONE: 'u', H5T_SGN_2: 'i' }
261
262# Available floating point types
263cdef tuple _get_available_ftypes():
264    cdef:
265        str floating_typecodes = np.typecodes["Float"]
266        str ftc
267        cnp.dtype fdtype
268        list available_ftypes = []
269
270    for ftc in floating_typecodes:
271        fdtype = np.dtype(ftc)
272        available_ftypes.append(
273            (<object>(fdtype.typeobj), np.finfo(fdtype), fdtype.itemsize)
274            )
275
276    return tuple(available_ftypes)
277
278cdef tuple _available_ftypes = _get_available_ftypes()
279
280
281cdef (int, int, int) _correct_float_info(ftype_, finfo):
282    nmant = finfo.nmant
283    maxexp = finfo.maxexp
284    minexp = finfo.minexp
285    # workaround for numpy's buggy finfo on float128 on ppc64 archs
286    if ftype_ == np.longdouble and MACHINE == 'ppc64':
287        # values reported by hdf5
288        nmant = 116
289        maxexp = 1024
290        minexp = -1022
291    elif ftype_ == np.longdouble and MACHINE == 'ppc64le':
292        # values reported by hdf5
293        nmant = 52
294        maxexp = 1024
295        minexp = -1022
296    elif nmant == 63 and finfo.nexp == 15:
297        # This is an 80-bit float, correct mantissa size
298        nmant += 1
299
300    return nmant, maxexp, minexp
301
302
303# === General datatype operations =============================================
304
305@with_phil
306def create(int classtype, size_t size):
307    """(INT classtype, UINT size) => TypeID
308
309    Create a new HDF5 type object.  Legal class values are
310    COMPOUND and OPAQUE.  Use enum_create for enums.
311    """
312
313    # HDF5 versions 1.6.X segfault with anything else
314    if classtype != H5T_COMPOUND and classtype != H5T_OPAQUE:
315        raise ValueError("Class must be COMPOUND or OPAQUE.")
316
317    return typewrap(H5Tcreate(<H5T_class_t>classtype, size))
318
319
320@with_phil
321def open(ObjectID group not None, char* name, ObjectID tapl=None):
322    """(ObjectID group, STRING name) => TypeID
323
324    Open a named datatype from a file.
325    If present, tapl must be a datatype access property list.
326    """
327    return typewrap(H5Topen(group.id, name, pdefault(tapl)))
328
329
330@with_phil
331def array_create(TypeID base not None, object dims_tpl):
332    """(TypeID base, TUPLE dimensions) => TypeArrayID
333
334    Create a new array datatype, using and HDF5 parent type and
335    dimensions given via a tuple of positive integers.  "Unlimited"
336    dimensions are not allowed.
337    """
338    cdef hsize_t rank
339    cdef hsize_t *dims = NULL
340
341    require_tuple(dims_tpl, 0, -1, b"dims_tpl")
342    rank = len(dims_tpl)
343    dims = <hsize_t*>emalloc(sizeof(hsize_t)*rank)
344
345    try:
346        convert_tuple(dims_tpl, dims, rank)
347        return TypeArrayID(H5Tarray_create(base.id, rank, dims))
348    finally:
349        efree(dims)
350
351
352@with_phil
353def enum_create(TypeID base not None):
354    """(TypeID base) => TypeID
355
356    Create a new enumerated type based on an (integer) parent type.
357    """
358    return typewrap(H5Tenum_create(base.id))
359
360
361@with_phil
362def vlen_create(TypeID base not None):
363    """(TypeID base) => TypeID
364
365    Create a new variable-length datatype, using any HDF5 type as a base.
366
367    Although the Python interface can manipulate these types, there is no
368    provision for reading/writing vlen data.
369    """
370    return typewrap(H5Tvlen_create(base.id))
371
372
373@with_phil
374def decode(char* buf):
375    """(STRING buf) => TypeID
376
377    Deserialize an HDF5 type.  You can also do this with the native
378    Python pickling machinery.
379    """
380    return typewrap(H5Tdecode(<unsigned char*>buf))
381
382
383# === Base type class =========================================================
384
385cdef class TypeID(ObjectID):
386
387    """
388        Base class for type identifiers (implements common operations)
389
390        * Hashable: If committed; in HDF5 1.8.X, also if locked
391        * Equality: Logical H5T comparison
392    """
393
394    def __hash__(self):
395        with phil:
396            if self._hash is None:
397                try:
398                    # Try to use object header first
399                    return ObjectID.__hash__(self)
400                except TypeError:
401                    # It's a transient type object
402                    if self.locked:
403                        self._hash = hash(self.encode())
404                    else:
405                        raise TypeError("Only locked or committed types can be hashed")
406
407            return self._hash
408
409
410    def __richcmp__(self, object other, int how):
411        cdef bint truthval = 0
412        with phil:
413            if how != 2 and how != 3:
414                return NotImplemented
415            if isinstance(other, TypeID):
416                truthval = self.equal(other)
417
418            if how == 2:
419                return truthval
420            return not truthval
421
422
423    def __copy__(self):
424        cdef TypeID cpy
425        with phil:
426            cpy = ObjectID.__copy__(self)
427            return cpy
428
429
430    property dtype:
431        """ A Numpy-style dtype object representing this object.
432        """
433        def __get__(self):
434            with phil:
435                return self.py_dtype()
436
437
438    cdef object py_dtype(self):
439        raise TypeError("No NumPy equivalent for %s exists" % self.__class__.__name__)
440
441
442    @with_phil
443    def commit(self, ObjectID group not None, char* name, ObjectID lcpl=None):
444        """(ObjectID group, STRING name, PropID lcpl=None)
445
446        Commit this (transient) datatype to a named datatype in a file.
447        If present, lcpl may be a link creation property list.
448        """
449        H5Tcommit(group.id, name, self.id, pdefault(lcpl),
450            H5P_DEFAULT, H5P_DEFAULT)
451
452
453    @with_phil
454    def committed(self):
455        """() => BOOL is_comitted
456
457        Determine if a given type object is named (T) or transient (F).
458        """
459        return <bint>(H5Tcommitted(self.id))
460
461
462    @with_phil
463    def copy(self):
464        """() => TypeID
465
466        Create a copy of this type object.
467        """
468        return typewrap(H5Tcopy(self.id))
469
470
471    @with_phil
472    def equal(self, TypeID typeid):
473        """(TypeID typeid) => BOOL
474
475        Logical comparison between datatypes.  Also called by
476        Python's "==" operator.
477        """
478        return <bint>(H5Tequal(self.id, typeid.id))
479
480
481    @with_phil
482    def lock(self):
483        """()
484
485        Lock this datatype, which makes it immutable and indestructible.
486        Once locked, it can't be unlocked.
487        """
488        H5Tlock(self.id)
489        self.locked = 1
490
491
492    @with_phil
493    def get_class(self):
494        """() => INT classcode
495
496        Determine the datatype's class code.
497        """
498        return <int>H5Tget_class(self.id)
499
500
501    @with_phil
502    def set_size(self, size_t size):
503        """(UINT size)
504
505        Set the total size of the datatype, in bytes.
506        """
507        H5Tset_size(self.id, size)
508
509
510    @with_phil
511    def get_size(self):
512        """ () => INT size
513
514            Determine the total size of a datatype, in bytes.
515        """
516        return H5Tget_size(self.id)
517
518
519    @with_phil
520    def get_super(self):
521        """() => TypeID
522
523        Determine the parent type of an array, enumeration or vlen datatype.
524        """
525        return typewrap(H5Tget_super(self.id))
526
527
528    @with_phil
529    def detect_class(self, int classtype):
530        """(INT classtype) => BOOL class_is_present
531
532        Determine if a member of the given class exists in a compound
533        datatype.  The search is recursive.
534        """
535        return <bint>(H5Tdetect_class(self.id, <H5T_class_t>classtype))
536
537
538    @with_phil
539    def encode(self):
540        """() => STRING
541
542        Serialize an HDF5 type.  Bear in mind you can also use the
543        native Python pickle/unpickle machinery to do this.  The
544        returned string may contain binary values, including NULLs.
545        """
546        cdef size_t nalloc = 0
547        cdef char* buf = NULL
548
549        H5Tencode(self.id, NULL, &nalloc)
550        buf = <char*>emalloc(sizeof(char)*nalloc)
551        try:
552            H5Tencode(self.id, <unsigned char*>buf, &nalloc)
553            pystr = PyBytes_FromStringAndSize(buf, nalloc)
554        finally:
555            efree(buf)
556
557        return pystr
558
559    @with_phil
560    def get_create_plist(self):
561        """ () => PropTCID
562
563            Create and return a new copy of the datatype creation property list
564            used when this datatype was created.
565        """
566        return propwrap(H5Tget_create_plist(self.id))
567
568
569    def __reduce__(self):
570        with phil:
571            return (type(self), (-1,), self.encode())
572
573
574    def __setstate__(self, char* state):
575        with phil:
576            self.id = H5Tdecode(<unsigned char*>state)
577
578
579# === Top-level classes (inherit directly from TypeID) ========================
580
581cdef class TypeArrayID(TypeID):
582
583    """
584        Represents an array datatype
585    """
586
587
588    @with_phil
589    def get_array_ndims(self):
590        """() => INT rank
591
592        Get the rank of the given array datatype.
593        """
594        return H5Tget_array_ndims(self.id)
595
596
597    @with_phil
598    def get_array_dims(self):
599        """() => TUPLE dimensions
600
601        Get the dimensions of the given array datatype as
602        a tuple of integers.
603        """
604        cdef hsize_t rank
605        cdef hsize_t* dims = NULL
606
607        rank = H5Tget_array_dims(self.id, NULL)
608        dims = <hsize_t*>emalloc(sizeof(hsize_t)*rank)
609        try:
610            H5Tget_array_dims(self.id, dims)
611            return convert_dims(dims, rank)
612        finally:
613            efree(dims)
614
615    cdef object py_dtype(self):
616        # Numpy translation function for array types
617        cdef TypeID tmp_type
618        tmp_type = self.get_super()
619
620        base_dtype = tmp_type.py_dtype()
621
622        shape = self.get_array_dims()
623        return np.dtype( (base_dtype, shape) )
624
625
626cdef class TypeOpaqueID(TypeID):
627
628    """
629        Represents an opaque type
630    """
631
632
633    @with_phil
634    def set_tag(self, char* tag):
635        """(STRING tag)
636
637        Set a string describing the contents of an opaque datatype.
638        Limited to 256 characters.
639        """
640        H5Tset_tag(self.id, tag)
641
642
643    @with_phil
644    def get_tag(self):
645        """() => STRING tag
646
647        Get the tag associated with an opaque datatype.
648        """
649        cdef char* buf = NULL
650
651        try:
652            buf = H5Tget_tag(self.id)
653            assert buf != NULL
654            tag = buf
655            return tag
656        finally:
657            IF HDF5_VERSION >= (1, 8, 13):
658                H5free_memory(buf)
659            ELSE:
660                free(buf)
661
662    cdef object py_dtype(self):
663        cdef bytes tag = self.get_tag()
664        if tag.startswith(b"NUMPY:"):
665            # 6 = len("NUMPY:")
666            return np.dtype(tag[6:], metadata={'h5py_opaque': True})
667
668        # Numpy translation function for opaque types
669        return np.dtype("|V" + str(self.get_size()))
670
671
672cdef class TypeStringID(TypeID):
673
674    """
675        String datatypes, both fixed and vlen.
676    """
677
678
679    @with_phil
680    def is_variable_str(self):
681        """() => BOOL is_variable
682
683        Determine if the given string datatype is a variable-length string.
684        """
685        return <bint>(H5Tis_variable_str(self.id))
686
687
688    @with_phil
689    def get_cset(self):
690        """() => INT character_set
691
692        Retrieve the character set used for a string.
693        """
694        return <int>H5Tget_cset(self.id)
695
696
697    @with_phil
698    def set_cset(self, int cset):
699        """(INT character_set)
700
701        Set the character set used for a string.
702        """
703        H5Tset_cset(self.id, <H5T_cset_t>cset)
704
705
706    @with_phil
707    def get_strpad(self):
708        """() => INT padding_type
709
710        Get the padding type.  Legal values are:
711
712        STR_NULLTERM
713            NULL termination only (C style)
714
715        STR_NULLPAD
716            Pad buffer with NULLs
717
718        STR_SPACEPAD
719            Pad buffer with spaces (FORTRAN style)
720        """
721        return <int>H5Tget_strpad(self.id)
722
723
724    @with_phil
725    def set_strpad(self, int pad):
726        """(INT pad)
727
728        Set the padding type.  Legal values are:
729
730        STR_NULLTERM
731            NULL termination only (C style)
732
733        STR_NULLPAD
734            Pad buffer with NULLs
735
736        STR_SPACEPAD
737            Pad buffer with spaces (FORTRAN style)
738        """
739        H5Tset_strpad(self.id, <H5T_str_t>pad)
740
741
742    cdef object py_dtype(self):
743        # Numpy translation function for string types
744        if self.get_cset() == H5T_CSET_ASCII:
745            encoding = 'ascii'
746        elif self.get_cset() == H5T_CSET_UTF8:
747            encoding = 'utf-8'
748        else:
749            raise TypeError("Unknown string encoding (value %d)" % self.get_cset())
750
751        if self.is_variable_str():
752            length = None
753        else:
754            length = self.get_size()
755
756        return string_dtype(encoding=encoding, length=length)
757
758cdef class TypeVlenID(TypeID):
759
760    """
761        Non-string vlen datatypes.
762    """
763
764    cdef object py_dtype(self):
765
766        # get base type id
767        cdef TypeID base_type
768        base_type = self.get_super()
769
770        return vlen_dtype(base_type.dtype)
771
772cdef class TypeTimeID(TypeID):
773
774    """
775        Unix-style time_t (deprecated)
776    """
777    pass
778
779cdef class TypeBitfieldID(TypeID):
780
781    """
782        HDF5 bitfield type
783    """
784
785    @with_phil
786    def get_order(self):
787        """() => INT order
788
789        Obtain the byte order of the datatype; one of:
790
791        - ORDER_LE
792        - ORDER_BE
793        """
794        return <int>H5Tget_order(self.id)
795
796    cdef object py_dtype(self):
797
798        # Translation function for bitfield types
799        return np.dtype( _order_map[self.get_order()] +
800                         'u' + str(self.get_size()) )
801
802
803cdef class TypeReferenceID(TypeID):
804
805    """
806        HDF5 object or region reference
807    """
808
809    cdef object py_dtype(self):
810        if H5Tequal(self.id, H5T_STD_REF_OBJ):
811            return ref_dtype
812        elif H5Tequal(self.id, H5T_STD_REF_DSETREG):
813            return regionref_dtype
814        else:
815            raise TypeError("Unknown reference type")
816
817
818# === Numeric classes (integers and floats) ===================================
819
820cdef class TypeAtomicID(TypeID):
821
822    """
823        Base class for atomic datatypes (float or integer)
824    """
825
826
827    @with_phil
828    def get_order(self):
829        """() => INT order
830
831        Obtain the byte order of the datatype; one of:
832
833        - ORDER_LE
834        - ORDER_BE
835        """
836        return <int>H5Tget_order(self.id)
837
838
839    @with_phil
840    def set_order(self, int order):
841        """(INT order)
842
843        Set the byte order of the datatype; one of:
844
845        - ORDER_LE
846        - ORDER_BE
847        """
848        H5Tset_order(self.id, <H5T_order_t>order)
849
850
851    @with_phil
852    def get_precision(self):
853        """() => UINT precision
854
855        Get the number of significant bits (excludes padding).
856        """
857        return H5Tget_precision(self.id)
858
859
860    @with_phil
861    def set_precision(self, size_t precision):
862        """(UINT precision)
863
864        Set the number of significant bits (excludes padding).
865        """
866        H5Tset_precision(self.id, precision)
867
868
869    @with_phil
870    def get_offset(self):
871        """() => INT offset
872
873        Get the offset of the first significant bit.
874        """
875        return H5Tget_offset(self.id)
876
877
878    @with_phil
879    def set_offset(self, size_t offset):
880        """(UINT offset)
881
882        Set the offset of the first significant bit.
883        """
884        H5Tset_offset(self.id, offset)
885
886
887    @with_phil
888    def get_pad(self):
889        """() => (INT lsb_pad_code, INT msb_pad_code)
890
891        Determine the padding type.  Possible values are:
892
893        - PAD_ZERO
894        - PAD_ONE
895        - PAD_BACKGROUND
896        """
897        cdef H5T_pad_t lsb
898        cdef H5T_pad_t msb
899        H5Tget_pad(self.id, &lsb, &msb)
900        return (<int>lsb, <int>msb)
901
902
903    @with_phil
904    def set_pad(self, int lsb, int msb):
905        """(INT lsb_pad_code, INT msb_pad_code)
906
907        Set the padding type.  Possible values are:
908
909        - PAD_ZERO
910        - PAD_ONE
911        - PAD_BACKGROUND
912        """
913        H5Tset_pad(self.id, <H5T_pad_t>lsb, <H5T_pad_t>msb)
914
915
916cdef class TypeIntegerID(TypeAtomicID):
917
918    """
919        Integer atomic datatypes
920    """
921
922
923    @with_phil
924    def get_sign(self):
925        """() => INT sign
926
927        Get the "signedness" of the datatype; one of:
928
929        SGN_NONE
930            Unsigned
931
932        SGN_2
933            Signed 2's complement
934        """
935        return <int>H5Tget_sign(self.id)
936
937
938    @with_phil
939    def set_sign(self, int sign):
940        """(INT sign)
941
942        Set the "signedness" of the datatype; one of:
943
944        SGN_NONE
945            Unsigned
946
947        SGN_2
948            Signed 2's complement
949        """
950        H5Tset_sign(self.id, <H5T_sign_t>sign)
951
952    cdef object py_dtype(self):
953        # Translation function for integer types
954        return np.dtype( _order_map[self.get_order()] +
955                         _sign_map[self.get_sign()] + str(self.get_size()) )
956
957
958cdef class TypeFloatID(TypeAtomicID):
959
960    """
961        Floating-point atomic datatypes
962    """
963
964
965    @with_phil
966    def get_fields(self):
967        """() => TUPLE field_info
968
969        Get information about floating-point bit fields.  See the HDF5
970        docs for a full description.  Tuple has the following members:
971
972        0. UINT spos
973        1. UINT epos
974        2. UINT esize
975        3. UINT mpos
976        4. UINT msize
977        """
978        cdef size_t spos, epos, esize, mpos, msize
979        H5Tget_fields(self.id, &spos, &epos, &esize, &mpos, &msize)
980        return (spos, epos, esize, mpos, msize)
981
982
983    @with_phil
984    def set_fields(self, size_t spos, size_t epos, size_t esize,
985                          size_t mpos, size_t msize):
986        """(UINT spos, UINT epos, UINT esize, UINT mpos, UINT msize)
987
988        Set floating-point bit fields.  Refer to the HDF5 docs for
989        argument definitions.
990        """
991        H5Tset_fields(self.id, spos, epos, esize, mpos, msize)
992
993
994    @with_phil
995    def get_ebias(self):
996        """() => UINT ebias
997
998        Get the exponent bias.
999        """
1000        return H5Tget_ebias(self.id)
1001
1002
1003    @with_phil
1004    def set_ebias(self, size_t ebias):
1005        """(UINT ebias)
1006
1007        Set the exponent bias.
1008        """
1009        H5Tset_ebias(self.id, ebias)
1010
1011
1012    @with_phil
1013    def get_norm(self):
1014        """() => INT normalization_code
1015
1016        Get the normalization strategy.  Legal values are:
1017
1018        - NORM_IMPLIED
1019        - NORM_MSBSET
1020        - NORM_NONE
1021        """
1022        return <int>H5Tget_norm(self.id)
1023
1024
1025    @with_phil
1026    def set_norm(self, int norm):
1027        """(INT normalization_code)
1028
1029        Set the normalization strategy.  Legal values are:
1030
1031        - NORM_IMPLIED
1032        - NORM_MSBSET
1033        - NORM_NONE
1034        """
1035        H5Tset_norm(self.id, <H5T_norm_t>norm)
1036
1037
1038    @with_phil
1039    def get_inpad(self):
1040        """() => INT pad_code
1041
1042        Determine the internal padding strategy.  Legal values are:
1043
1044        - PAD_ZERO
1045        - PAD_ONE
1046        - PAD_BACKGROUND
1047        """
1048        return <int>H5Tget_inpad(self.id)
1049
1050
1051    @with_phil
1052    def set_inpad(self, int pad_code):
1053        """(INT pad_code)
1054
1055        Set the internal padding strategy.  Legal values are:
1056
1057        - PAD_ZERO
1058        - PAD_ONE
1059        - PAD_BACKGROUND
1060        """
1061        H5Tset_inpad(self.id, <H5T_pad_t>pad_code)
1062
1063    cdef object py_dtype(self):
1064        # Translation function for floating-point types
1065
1066        order = _order_map[self.get_order()]    # string with '<' or '>'
1067
1068        s_offset, e_offset, e_size, m_offset, m_size = self.get_fields()
1069        e_bias = self.get_ebias()
1070
1071        # Handle non-standard exponent and mantissa sizes.
1072        for ftype_, finfo, size in _available_ftypes:
1073            nmant, maxexp, minexp = _correct_float_info(ftype_, finfo)
1074            if (size >= self.get_size() and m_size <= nmant and
1075                (2**e_size - e_bias - 1) <= maxexp and (1 - e_bias) >= minexp):
1076                new_dtype = np.dtype(ftype_).newbyteorder(order)
1077                break
1078        else:
1079            raise ValueError('Insufficient precision in available types to ' +
1080                             'represent ' + str(self.get_fields()))
1081
1082        return new_dtype
1083
1084
1085# === Composite types (enums and compound) ====================================
1086
1087cdef class TypeCompositeID(TypeID):
1088
1089    """
1090        Base class for enumerated and compound types.
1091    """
1092
1093
1094    @with_phil
1095    def get_nmembers(self):
1096        """() => INT number_of_members
1097
1098        Determine the number of members in a compound or enumerated type.
1099        """
1100        return H5Tget_nmembers(self.id)
1101
1102
1103    @with_phil
1104    def get_member_name(self, int member):
1105        """(INT member) => STRING name
1106
1107        Determine the name of a member of a compound or enumerated type,
1108        identified by its index (0 <= member < nmembers).
1109        """
1110        cdef char* name
1111        name = NULL
1112
1113        if member < 0:
1114            raise ValueError("Member index must be non-negative.")
1115
1116        try:
1117            name = H5Tget_member_name(self.id, member)
1118            assert name != NULL
1119            pyname = <bytes>name
1120        finally:
1121            IF HDF5_VERSION >= (1, 8, 13):
1122                H5free_memory(name)
1123            ELSE:
1124                free(name)
1125
1126        return pyname
1127
1128
1129    @with_phil
1130    def get_member_index(self, char* name):
1131        """(STRING name) => INT index
1132
1133        Determine the index of a member of a compound or enumerated datatype
1134        identified by a string name.
1135        """
1136        return H5Tget_member_index(self.id, name)
1137
1138cdef class TypeCompoundID(TypeCompositeID):
1139
1140    """
1141        Represents a compound datatype
1142    """
1143
1144
1145    @with_phil
1146    def get_member_class(self, int member):
1147        """(INT member) => INT class
1148
1149        Determine the datatype class of the member of a compound type,
1150        identified by its index (0 <= member < nmembers).
1151        """
1152        if member < 0:
1153            raise ValueError("Member index must be non-negative.")
1154        return H5Tget_member_class(self.id, member)
1155
1156
1157    @with_phil
1158    def get_member_offset(self, int member):
1159        """(INT member) => INT offset
1160
1161        Determine the offset, in bytes, of the beginning of the specified
1162        member of a compound datatype.
1163        """
1164        if member < 0:
1165            raise ValueError("Member index must be non-negative.")
1166        return H5Tget_member_offset(self.id, member)
1167
1168
1169    @with_phil
1170    def get_member_type(self, int member):
1171        """(INT member) => TypeID
1172
1173        Create a copy of a member of a compound datatype, identified by its
1174        index.
1175        """
1176        if member < 0:
1177            raise ValueError("Member index must be non-negative.")
1178        return typewrap(H5Tget_member_type(self.id, member))
1179
1180
1181    @with_phil
1182    def insert(self, char* name, size_t offset, TypeID field not None):
1183        """(STRING name, UINT offset, TypeID field)
1184
1185        Add a named member datatype to a compound datatype.  The parameter
1186        offset indicates the offset from the start of the compound datatype,
1187        in bytes.
1188        """
1189        H5Tinsert(self.id, name, offset, field.id)
1190
1191
1192    @with_phil
1193    def pack(self):
1194        """()
1195
1196        Recursively removes padding (introduced on account of e.g. compiler
1197        alignment rules) from a compound datatype.
1198        """
1199        H5Tpack(self.id)
1200
1201    cdef object py_dtype(self):
1202        cdef:
1203            TypeID tmp_type
1204            list field_names
1205            list field_types
1206            int i, nfields
1207        field_names = []
1208        field_types = []
1209        field_offsets = []
1210        nfields = self.get_nmembers()
1211
1212        # First step: read field names and their Numpy dtypes into
1213        # two separate arrays.
1214        for i in range(nfields):
1215            tmp_type = self.get_member_type(i)
1216            name = self.get_member_name(i)
1217            field_names.append(name)
1218            field_types.append(tmp_type.py_dtype())
1219            field_offsets.append(self.get_member_offset(i))
1220
1221
1222        # 1. Check if it should be converted to a complex number
1223        if len(field_names) == 2                                and \
1224            tuple(field_names) == (cfg._r_name, cfg._i_name)    and \
1225            field_types[0] == field_types[1]                    and \
1226            field_types[0].kind == 'f':
1227
1228            bstring = field_types[0].str
1229            blen = int(bstring[2:])
1230            nstring = bstring[0] + "c" + str(2*blen)
1231            typeobj = np.dtype(nstring)
1232
1233        # 2. Otherwise, read all fields of the compound type, in HDF5 order.
1234        else:
1235            field_names = [x.decode('utf8') for x in field_names]
1236            typeobj = np.dtype({'names': field_names,
1237                                'formats': field_types,
1238                                'offsets': field_offsets,
1239                                'itemsize': self.get_size()})
1240
1241        return typeobj
1242
1243
1244cdef class TypeEnumID(TypeCompositeID):
1245
1246    """
1247        Represents an enumerated type
1248    """
1249
1250    cdef int enum_convert(self, long long *buf, int reverse) except -1:
1251        # Convert the long long value in "buf" to the native representation
1252        # of this (enumerated) type.  Conversion performed in-place.
1253        # Reverse: false => llong->type; true => type->llong
1254
1255        cdef hid_t basetype
1256        cdef H5T_class_t class_code
1257
1258        class_code = H5Tget_class(self.id)
1259        if class_code != H5T_ENUM:
1260            raise ValueError("This type (class %d) is not of class ENUM" % class_code)
1261
1262        basetype = H5Tget_super(self.id)
1263        assert basetype > 0
1264
1265        try:
1266            if not reverse:
1267                H5Tconvert(H5T_NATIVE_LLONG, basetype, 1, buf, NULL, H5P_DEFAULT)
1268            else:
1269                H5Tconvert(basetype, H5T_NATIVE_LLONG, 1, buf, NULL, H5P_DEFAULT)
1270        finally:
1271            H5Tclose(basetype)
1272
1273
1274    @with_phil
1275    def enum_insert(self, char* name, long long value):
1276        """(STRING name, INT/LONG value)
1277
1278        Define a new member of an enumerated type.  The value will be
1279        automatically converted to the base type defined for this enum.  If
1280        the conversion results in overflow, the value will be silently
1281        clipped.
1282        """
1283        cdef long long buf
1284
1285        buf = value
1286        self.enum_convert(&buf, 0)
1287        H5Tenum_insert(self.id, name, &buf)
1288
1289
1290    @with_phil
1291    def enum_nameof(self, long long value):
1292        """(LONG value) => STRING name
1293
1294        Determine the name associated with the given value.  Due to a
1295        limitation of the HDF5 library, this can only retrieve names up to
1296        1023 characters in length.
1297        """
1298        cdef herr_t retval
1299        cdef char name[1024]
1300        cdef long long buf
1301
1302        buf = value
1303        self.enum_convert(&buf, 0)
1304        retval = H5Tenum_nameof(self.id, &buf, name, 1024)
1305        assert retval >= 0
1306        retstring = name
1307        return retstring
1308
1309
1310    @with_phil
1311    def enum_valueof(self, char* name):
1312        """(STRING name) => LONG value
1313
1314        Get the value associated with an enum name.
1315        """
1316        cdef long long buf
1317
1318        H5Tenum_valueof(self.id, name, &buf)
1319        self.enum_convert(&buf, 1)
1320        return buf
1321
1322
1323    @with_phil
1324    def get_member_value(self, int idx):
1325        """(UINT index) => LONG value
1326
1327        Determine the value for the member at the given zero-based index.
1328        """
1329        cdef herr_t retval
1330        cdef hid_t ptype
1331        cdef long long val
1332        ptype = 0
1333
1334        if idx < 0:
1335            raise ValueError("Index must be non-negative.")
1336
1337        H5Tget_member_value(self.id, idx, &val)
1338        self.enum_convert(&val, 1)
1339        return val
1340
1341    cdef object py_dtype(self):
1342        # Translation function for enum types
1343
1344        cdef TypeID basetype = self.get_super()
1345
1346        nmembers = self.get_nmembers()
1347        members = {}
1348
1349        for idx in range(nmembers):
1350            name = self.get_member_name(idx)
1351            val = self.get_member_value(idx)
1352            members[name] = val
1353
1354        ref = {cfg._f_name: 0, cfg._t_name: 1}
1355
1356        # Boolean types have priority over standard enums
1357        if members == ref:
1358            return np.dtype('bool')
1359
1360        # Convert strings to appropriate representation
1361        members_conv = {}
1362        for name, val in members.iteritems():
1363            try:    # ASCII;
1364                name = name.decode('ascii')
1365            except UnicodeDecodeError:
1366                try:    # Non-ascii; all platforms try unicode
1367                    name = name.decode('utf8')
1368                except UnicodeDecodeError:
1369                    pass    # Last resort: return byte string
1370            members_conv[name] = val
1371        return enum_dtype(members_conv, basetype=basetype.py_dtype())
1372
1373
1374# === Translation from NumPy dtypes to HDF5 type objects ======================
1375
1376# The following series of native-C functions each translate a specific class
1377# of NumPy dtype into an HDF5 type object.  The result is guaranteed to be
1378# transient and unlocked.
1379
1380def _get_float_dtype_to_hdf5():
1381    float_le = {}
1382    float_be = {}
1383    h5_be_list = [IEEE_F16BE, IEEE_F32BE, IEEE_F64BE, IEEE_F128BE, LDOUBLE_BE]
1384    h5_le_list = [IEEE_F16LE, IEEE_F32LE, IEEE_F64LE, IEEE_F128LE, LDOUBLE_LE]
1385
1386    for ftype_, finfo, size in _available_ftypes:
1387        nmant, maxexp, minexp = _correct_float_info(ftype_, finfo)
1388        for h5type in h5_be_list:
1389            spos, epos, esize, mpos, msize = h5type.get_fields()
1390            ebias = h5type.get_ebias()
1391            if (finfo.iexp == esize and nmant == msize and
1392                    (maxexp - 1) == ebias):
1393                float_be[ftype_] = h5type
1394                break # first found matches, related to #1244
1395        for h5type in h5_le_list:
1396            spos, epos, esize, mpos, msize = h5type.get_fields()
1397            ebias = h5type.get_ebias()
1398            if (finfo.iexp == esize and nmant == msize and
1399                    (maxexp - 1) == ebias):
1400                float_le[ftype_] = h5type
1401                break # first found matches, related to #1244
1402    if ORDER_NATIVE == H5T_ORDER_LE:
1403        float_nt = dict(float_le)
1404    else:
1405        float_nt = dict(float_be)
1406    return float_le, float_be, float_nt
1407
1408cdef dict _float_le
1409cdef dict _float_be
1410cdef dict _float_nt
1411_float_le, _float_be, _float_nt = _get_float_dtype_to_hdf5()
1412
1413cdef dict _int_le = {1: H5Tcopy(H5T_STD_I8LE), 2: H5Tcopy(H5T_STD_I16LE), 4: H5Tcopy(H5T_STD_I32LE), 8: H5Tcopy(H5T_STD_I64LE)}
1414cdef dict _int_be = {1: H5Tcopy(H5T_STD_I8BE), 2: H5Tcopy(H5T_STD_I16BE), 4: H5Tcopy(H5T_STD_I32BE), 8: H5Tcopy(H5T_STD_I64BE)}
1415cdef dict _int_nt = {1: H5Tcopy(H5T_NATIVE_INT8), 2: H5Tcopy(H5T_NATIVE_INT16), 4: H5Tcopy(H5T_NATIVE_INT32), 8: H5Tcopy(H5T_NATIVE_INT64)}
1416
1417cdef dict _uint_le = {1: H5Tcopy(H5T_STD_U8LE), 2: H5Tcopy(H5T_STD_U16LE), 4: H5Tcopy(H5T_STD_U32LE), 8: H5Tcopy(H5T_STD_U64LE)}
1418cdef dict _uint_be = {1: H5Tcopy(H5T_STD_U8BE), 2: H5Tcopy(H5T_STD_U16BE), 4: H5Tcopy(H5T_STD_U32BE), 8: H5Tcopy(H5T_STD_U64BE)}
1419cdef dict _uint_nt = {1: H5Tcopy(H5T_NATIVE_UINT8), 2: H5Tcopy(H5T_NATIVE_UINT16), 4: H5Tcopy(H5T_NATIVE_UINT32), 8: H5Tcopy(H5T_NATIVE_UINT64)}
1420
1421cdef TypeFloatID _c_float(cnp.dtype dt):
1422    # Floats (single and double)
1423    cdef TypeFloatID tid
1424
1425    try:
1426        if dt.byteorder == c'<':
1427            tid = _float_le[np.dtype(dt).type]
1428        elif dt.byteorder == c'>':
1429            tid = _float_be[np.dtype(dt).type]
1430        else:
1431            tid = _float_nt[np.dtype(dt).type]
1432    except KeyError:
1433        raise TypeError("Unsupported float type (%s)" % dt)
1434
1435    return tid.copy()
1436
1437cdef TypeIntegerID _c_int(cnp.dtype dt):
1438    # Integers (ints and uints)
1439    cdef hid_t tid
1440
1441    try:
1442        if dt.kind == c'i':
1443            if dt.byteorder == c'<':
1444                tid = _int_le[dt.itemsize]
1445            elif dt.byteorder == c'>':
1446                tid = _int_be[dt.itemsize]
1447            else:
1448                tid = _int_nt[dt.itemsize]
1449        elif dt.kind == c'u':
1450            if dt.byteorder == c'<':
1451                tid = _uint_le[dt.itemsize]
1452            elif dt.byteorder == c'>':
1453                tid = _uint_be[dt.itemsize]
1454            else:
1455                tid = _uint_nt[dt.itemsize]
1456        else:
1457            raise TypeError('Illegal int kind "%s"' % dt.kind)
1458    except KeyError:
1459        raise TypeError("Unsupported integer size (%s)" % dt.itemsize)
1460
1461    return TypeIntegerID(H5Tcopy(tid))
1462
1463
1464cdef TypeEnumID _c_enum(cnp.dtype dt, dict vals):
1465    # Enums
1466    cdef:
1467        TypeIntegerID base
1468        TypeEnumID out
1469
1470    base = _c_int(dt)
1471
1472    out = TypeEnumID(H5Tenum_create(base.id))
1473    for name in sorted(vals):
1474        if isinstance(name, bytes):
1475            bname = name
1476        else:
1477            bname = unicode(name).encode('utf8')
1478        out.enum_insert(bname, vals[name])
1479    return out
1480
1481
1482cdef TypeEnumID _c_bool(cnp.dtype dt):
1483    # Booleans
1484    global cfg
1485
1486    cdef TypeEnumID out
1487    out = TypeEnumID(H5Tenum_create(H5T_NATIVE_INT8))
1488
1489    out.enum_insert(cfg._f_name, 0)
1490    out.enum_insert(cfg._t_name, 1)
1491
1492    return out
1493
1494
1495cdef TypeArrayID _c_array(cnp.dtype dt, int logical):
1496    # Arrays
1497    cdef:
1498        cnp.dtype base
1499        TypeID type_base
1500        object shape
1501
1502    base, shape = dt.subdtype
1503    try:
1504        shape = tuple(shape)
1505    except TypeError:
1506        try:
1507            shape = (int(shape),)
1508        except TypeError:
1509            raise TypeError("Array shape for dtype must be a sequence or integer")
1510    type_base = py_create(base, logical=logical)
1511    return array_create(type_base, shape)
1512
1513
1514cdef TypeOpaqueID _c_opaque(cnp.dtype dt):
1515    # Opaque
1516    return TypeOpaqueID(H5Tcreate(H5T_OPAQUE, dt.itemsize))
1517
1518
1519cdef TypeOpaqueID _c_opaque_tagged(cnp.dtype dt):
1520    """Create an HDF5 opaque data type with a tag recording the numpy dtype.
1521
1522    Tagged opaque types can be read back easily in h5py, but not in other tools
1523    (they are *opaque*).
1524
1525    The default tag is generated via the code:
1526    ``b"NUMPY:" + dt_in.descr[0][1].encode()``.
1527    """
1528    cdef TypeOpaqueID new_type = _c_opaque(dt)
1529    new_type.set_tag(b"NUMPY:" + dt.descr[0][1].encode())
1530
1531    return new_type
1532
1533cdef TypeStringID _c_string(cnp.dtype dt):
1534    # Strings (fixed-length)
1535    cdef hid_t tid
1536
1537    tid = H5Tcopy(H5T_C_S1)
1538    H5Tset_size(tid, dt.itemsize)
1539    H5Tset_strpad(tid, H5T_STR_NULLPAD)
1540    if dt.metadata and dt.metadata.get('h5py_encoding') == 'utf-8':
1541        H5Tset_cset(tid, H5T_CSET_UTF8)
1542    return TypeStringID(tid)
1543
1544cdef TypeCompoundID _c_complex(cnp.dtype dt):
1545    # Complex numbers (names depend on cfg)
1546    global cfg
1547
1548    cdef hid_t tid, tid_sub
1549    cdef size_t size, off_r, off_i
1550
1551    cdef size_t length = dt.itemsize
1552    cdef char byteorder = dt.byteorder
1553
1554    if length == 8:
1555        size = h5py_size_n64
1556        off_r = h5py_offset_n64_real
1557        off_i = h5py_offset_n64_imag
1558        if byteorder == c'<':
1559            tid_sub = H5T_IEEE_F32LE
1560        elif byteorder == c'>':
1561            tid_sub = H5T_IEEE_F32BE
1562        else:
1563            tid_sub = H5T_NATIVE_FLOAT
1564    elif length == 16:
1565        size = h5py_size_n128
1566        off_r = h5py_offset_n128_real
1567        off_i = h5py_offset_n128_imag
1568        if byteorder == c'<':
1569            tid_sub = H5T_IEEE_F64LE
1570        elif byteorder == c'>':
1571            tid_sub = H5T_IEEE_F64BE
1572        else:
1573            tid_sub = H5T_NATIVE_DOUBLE
1574
1575    elif length == 32:
1576        IF COMPLEX256_SUPPORT:
1577            size = h5py_size_n256
1578            off_r = h5py_offset_n256_real
1579            off_i = h5py_offset_n256_imag
1580            tid_sub = H5T_NATIVE_LDOUBLE
1581        ELSE:
1582            raise TypeError("Illegal length %d for complex dtype" % length)
1583    else:
1584        raise TypeError("Illegal length %d for complex dtype" % length)
1585
1586    tid = H5Tcreate(H5T_COMPOUND, size)
1587    H5Tinsert(tid, cfg._r_name, off_r, tid_sub)
1588    H5Tinsert(tid, cfg._i_name, off_i, tid_sub)
1589
1590    return TypeCompoundID(tid)
1591
1592cdef TypeCompoundID _c_compound(cnp.dtype dt, int logical, int aligned):
1593    # Compound datatypes
1594    cdef:
1595        hid_t tid
1596        TypeID member_type
1597        object member_dt
1598        size_t member_offset = 0
1599        dict fields = {}
1600
1601    # The challenge with correctly converting a numpy/h5py dtype to a HDF5 type
1602    # which is composed of subtypes has three aspects we must consider
1603    # 1. numpy/h5py dtypes do not always have the same size as HDF5, even when
1604    #   equivalent (can result in overlapping elements if not careful)
1605    # 2. For correct round-tripping of aligned dtypes, we need to consider how
1606    #   much padding we need by looking at the field offsets
1607    # 3. There is no requirement that the offsets be monotonically increasing
1608    #
1609    # The code below tries to cover these aspects
1610
1611    # Build list of names, offsets, and types, sorted by increasing offset
1612    # (i.e. the position of the member in the struct)
1613    for name in sorted(dt.names, key=(lambda n: dt.fields[n][1])):
1614        field = dt.fields[name]
1615        h5_name = name.encode('utf8') if isinstance(name, unicode) else name
1616
1617        # Get HDF5 data types and set the offset for each member
1618        member_dt = field[0]
1619        member_offset = max(member_offset, field[1])
1620        member_type = py_create(member_dt, logical=logical, aligned=aligned)
1621        if aligned and (member_offset > field[1]
1622                        or member_dt.itemsize != member_type.get_size()):
1623            raise TypeError("Enforced alignment not compatible with HDF5 type")
1624        fields[name] = (h5_name, member_offset, member_type)
1625
1626        # Update member offset based on the HDF5 type size
1627        member_offset += member_type.get_size()
1628
1629    member_offset = max(member_offset, dt.itemsize)
1630    if aligned and member_offset > dt.itemsize:
1631        raise TypeError("Enforced alignment not compatible with HDF5 type")
1632
1633    # Create compound with the necessary size, and insert its members
1634    tid = H5Tcreate(H5T_COMPOUND, member_offset)
1635    for name in dt.names:
1636        h5_name, member_offset, member_type = fields[name]
1637        H5Tinsert(tid, h5_name, member_offset, member_type.id)
1638
1639    return TypeCompoundID(tid)
1640
1641cdef TypeStringID _c_vlen_str():
1642    # Variable-length strings
1643    cdef hid_t tid
1644    tid = H5Tcopy(H5T_C_S1)
1645    H5Tset_size(tid, H5T_VARIABLE)
1646    return TypeStringID(tid)
1647
1648cdef TypeStringID _c_vlen_unicode():
1649    cdef hid_t tid
1650    tid = H5Tcopy(H5T_C_S1)
1651    H5Tset_size(tid, H5T_VARIABLE)
1652    H5Tset_cset(tid, H5T_CSET_UTF8)
1653    return TypeStringID(tid)
1654
1655cdef TypeReferenceID _c_ref(object refclass):
1656    if refclass is Reference:
1657        return STD_REF_OBJ
1658    elif refclass is RegionReference:
1659        return STD_REF_DSETREG
1660    raise TypeError("Unrecognized reference code")
1661
1662
1663cpdef TypeID py_create(object dtype_in, bint logical=0, bint aligned=0):
1664    """(OBJECT dtype_in, BOOL logical=False) => TypeID
1665
1666    Given a Numpy dtype object, generate a byte-for-byte memory-compatible
1667    HDF5 datatype object.  The result is guaranteed to be transient and
1668    unlocked.
1669
1670    :param dtype_in: may be a dtype object, or anything which can be
1671        converted to a dtype, including strings like '<i4' or an "int".
1672    :param logical: when this flag is set, instead of returning a byte-for-byte
1673        identical representation of the type, the function returns the closest
1674        logically appropriate HDF5 type.  For example, in the case of a "hinted"
1675        dtype of kind "O" representing a string, it would return an HDF5 variable-
1676        length string type.
1677    """
1678    cdef:
1679        cnp.dtype dt
1680        char kind
1681
1682    dt = np.dtype(dtype_in)
1683    # dt is now the C side of dtype_in. Sometimes the Python behaviour is easier to handle than the C-version
1684    kind = dt.kind
1685    aligned = getattr(dtype_in, "isalignedstruct", aligned)
1686
1687    with phil:
1688        # Tagged opaque data
1689        if check_opaque_dtype(dt):
1690            return _c_opaque_tagged(dt)
1691
1692        # Float
1693        if kind == c'f':
1694            return _c_float(dt)
1695
1696        # Integer
1697        elif kind == c'u' or kind == c'i':
1698
1699            if logical:
1700                # Check for an enumeration hint
1701                enum_vals = check_enum_dtype(dt)
1702                if enum_vals is not None:
1703                    return _c_enum(dt, enum_vals)
1704
1705            return _c_int(dt)
1706
1707        # Complex
1708        elif kind == c'c':
1709            return _c_complex(dt)
1710
1711        # Compound. The explicit cast to object is used to force Python attribute access,
1712        # as dt.names may be a NULL pointer at the C level when undefined.
1713        elif (kind == c'V') and ((<object> dt).names is not None):
1714            return _c_compound(dt, logical, aligned)
1715
1716        # Array or opaque
1717        elif kind == c'V':
1718            if dt.subdtype is not None:
1719                return _c_array(dt, logical)
1720            else:
1721                return _c_opaque(dt)
1722
1723        # String
1724        elif kind == c'S':
1725            return _c_string(dt)
1726
1727        # Boolean
1728        elif kind == c'b':
1729            return _c_bool(dt)
1730
1731        # Object types (including those with vlen hints)
1732        elif kind == c'O':
1733
1734            if logical:
1735                vlen = check_vlen_dtype(dt)
1736                if vlen is bytes:
1737                    return _c_vlen_str()
1738                elif vlen is unicode:
1739                    return _c_vlen_unicode()
1740                elif vlen is not None:
1741                    return vlen_create(py_create(vlen, logical))
1742
1743                refclass = check_ref_dtype(dt)
1744                if refclass is not None:
1745                    return _c_ref(refclass)
1746
1747                raise TypeError("Object dtype %r has no native HDF5 equivalent" % (dt,))
1748
1749            return PYTHON_OBJECT
1750
1751        # Unrecognized
1752        else:
1753            raise TypeError("No conversion path for dtype: %s" % repr(dt))
1754
1755def vlen_dtype(basetype):
1756    """Make a numpy dtype for an HDF5 variable-length datatype
1757
1758    For variable-length string dtypes, use :func:`string_dtype` instead.
1759    """
1760    return np.dtype('O', metadata={'vlen': basetype})
1761
1762def string_dtype(encoding='utf-8', length=None):
1763    """Make a numpy dtype for HDF5 strings
1764
1765    encoding may be 'utf-8' or 'ascii'.
1766
1767    length may be an integer for a fixed length string dtype, or None for
1768    variable length strings. String lengths for HDF5 are counted in bytes,
1769    not unicode code points.
1770
1771    For variable length strings, the data should be passed as Python str objects
1772    (unicode in Python 2) if the encoding is 'utf-8', and bytes if it is 'ascii'.
1773    For fixed length strings, the data should be numpy fixed length *bytes*
1774    arrays, regardless of the encoding. Fixed length unicode data is not
1775    supported.
1776    """
1777    # Normalise encoding name:
1778    try:
1779        encoding = codecs.lookup(encoding).name
1780    except LookupError:
1781        pass  # Use our error below
1782
1783    if encoding not in {'ascii', 'utf-8'}:
1784        raise ValueError("Invalid encoding (%r); 'utf-8' or 'ascii' allowed"
1785                         % encoding)
1786
1787    if isinstance(length, int):
1788        # Fixed length string
1789        return np.dtype("|S" + str(length), metadata={'h5py_encoding': encoding})
1790    elif length is None:
1791        vlen = unicode if (encoding == 'utf-8') else bytes
1792        return np.dtype('O', metadata={'vlen': vlen})
1793    else:
1794        raise TypeError("length must be integer or None (got %r)" % length)
1795
1796def enum_dtype(values_dict, basetype=np.uint8):
1797    """Create a NumPy representation of an HDF5 enumerated type
1798
1799    *values_dict* maps string names to integer values. *basetype* is an
1800    appropriate integer base dtype large enough to hold the possible options.
1801    """
1802    dt = np.dtype(basetype)
1803    if not np.issubdtype(dt, np.integer):
1804        raise TypeError("Only integer types can be used as enums")
1805
1806    return np.dtype(dt, metadata={'enum': values_dict})
1807
1808
1809def opaque_dtype(np_dtype):
1810    """Return an equivalent dtype tagged to be stored in an HDF5 opaque type.
1811
1812    This makes it easy to store numpy data like datetimes for which there is
1813    no equivalent HDF5 type, but it's not interoperable: other tools won't treat
1814    the opaque data as datetimes.
1815    """
1816    dt = np.dtype(np_dtype)
1817    if np.issubdtype(dt, np.object_):
1818        raise TypeError("Cannot store numpy object arrays as opaque data")
1819    if dt.names is not None:
1820        raise TypeError("Cannot store numpy structured arrays as opaque data")
1821    if dt.subdtype is not None:
1822        raise TypeError("Cannot store numpy sub-array dtype as opaque data")
1823    if dt.itemsize == 0:
1824        raise TypeError("dtype for opaque data must have explicit size")
1825
1826    return np.dtype(dt, metadata={'h5py_opaque': True})
1827
1828
1829ref_dtype = np.dtype('O', metadata={'ref': Reference})
1830regionref_dtype = np.dtype('O', metadata={'ref': RegionReference})
1831
1832
1833@with_phil
1834def special_dtype(**kwds):
1835    """ Create a new h5py "special" type.  Only one keyword may be given.
1836
1837    Legal keywords are:
1838
1839    vlen = basetype
1840        Base type for HDF5 variable-length datatype. This can be Python
1841        str type or instance of np.dtype.
1842        Example: special_dtype( vlen=str )
1843
1844    enum = (basetype, values_dict)
1845        Create a NumPy representation of an HDF5 enumerated type.  Provide
1846        a 2-tuple containing an (integer) base dtype and a dict mapping
1847        string names to integer values.
1848
1849    ref = Reference | RegionReference
1850        Create a NumPy representation of an HDF5 object or region reference
1851        type.
1852    """
1853
1854    if len(kwds) != 1:
1855        raise TypeError("Exactly one keyword may be provided")
1856
1857    name, val = kwds.popitem()
1858
1859    if name == 'vlen':
1860        return np.dtype('O', metadata={'vlen': val})
1861
1862    if name == 'enum':
1863        try:
1864            dt, enum_vals = val
1865        except TypeError:
1866            raise TypeError("Enums must be created from a 2-tuple (basetype, values_dict)")
1867        return enum_dtype(enum_vals, dt)
1868
1869    if name == 'ref':
1870        if val not in (Reference, RegionReference):
1871            raise ValueError("Ref class must be Reference or RegionReference")
1872        return ref_dtype if (val is Reference) else regionref_dtype
1873
1874    raise TypeError('Unknown special type "%s"' % name)
1875
1876
1877def check_vlen_dtype(dt):
1878    """If the dtype represents an HDF5 vlen, returns the Python base class.
1879
1880    Returns None if the dtype does not represent an HDF5 vlen.
1881    """
1882    try:
1883        return dt.metadata.get('vlen', None)
1884    except AttributeError:
1885        return None
1886
1887string_info = namedtuple('string_info', ['encoding', 'length'])
1888
1889def check_string_dtype(dt):
1890    """If the dtype represents an HDF5 string, returns a string_info object.
1891
1892    The returned string_info object holds the encoding and the length.
1893    The encoding can only be 'utf-8' or 'ascii'. The length may be None
1894    for a variable-length string, or a fixed length in bytes.
1895
1896    Returns None if the dtype does not represent an HDF5 string.
1897    """
1898    vlen_kind = check_vlen_dtype(dt)
1899    if vlen_kind is unicode:
1900        return string_info('utf-8', None)
1901    elif vlen_kind is bytes:
1902        return string_info('ascii', None)
1903    elif dt.kind == 'S':
1904        enc = (dt.metadata or {}).get('h5py_encoding', 'ascii')
1905        return string_info(enc, dt.itemsize)
1906    else:
1907        return None
1908
1909def check_enum_dtype(dt):
1910    """If the dtype represents an HDF5 enumerated type, returns the dictionary
1911    mapping string names to integer values.
1912
1913    Returns None if the dtype does not represent an HDF5 enumerated type.
1914    """
1915    try:
1916        return dt.metadata.get('enum', None)
1917    except AttributeError:
1918        return None
1919
1920def check_opaque_dtype(dt):
1921    """Return True if the dtype given is tagged to be stored as HDF5 opaque data
1922    """
1923    try:
1924        return dt.metadata.get('h5py_opaque', False)
1925    except AttributeError:
1926        return False
1927
1928def check_ref_dtype(dt):
1929    """If the dtype represents an HDF5 reference type, returns the reference
1930    class (either Reference or RegionReference).
1931
1932    Returns None if the dtype does not represent an HDF5 reference type.
1933    """
1934    try:
1935        return dt.metadata.get('ref', None)
1936    except AttributeError:
1937        return None
1938
1939@with_phil
1940def check_dtype(**kwds):
1941    """ Check a dtype for h5py special type "hint" information.  Only one
1942    keyword may be given.
1943
1944    vlen = dtype
1945        If the dtype represents an HDF5 vlen, returns the Python base class.
1946        Currently only builting string vlens (str) are supported.  Returns
1947        None if the dtype does not represent an HDF5 vlen.
1948
1949    enum = dtype
1950        If the dtype represents an HDF5 enumerated type, returns the dictionary
1951        mapping string names to integer values.  Returns None if the dtype does
1952        not represent an HDF5 enumerated type.
1953
1954    ref = dtype
1955        If the dtype represents an HDF5 reference type, returns the reference
1956        class (either Reference or RegionReference).  Returns None if the dtype
1957        does not represent an HDF5 reference type.
1958    """
1959
1960    if len(kwds) != 1:
1961        raise TypeError("Exactly one keyword may be provided")
1962
1963    name, dt = kwds.popitem()
1964
1965    if name not in ('vlen', 'enum', 'ref'):
1966        raise TypeError('Unknown special type "%s"' % name)
1967
1968    try:
1969        return dt.metadata[name]
1970    except TypeError:
1971        return None
1972    except KeyError:
1973        return None
1974
1975
1976@with_phil
1977def convert(TypeID src not None, TypeID dst not None, size_t n,
1978            cnp.ndarray buf not None, cnp.ndarray bkg=None, ObjectID dxpl=None):
1979    """ (TypeID src, TypeID dst, UINT n, NDARRAY buf, NDARRAY bkg=None,
1980    PropID dxpl=None)
1981
1982    Convert n contiguous elements of a buffer in-place.  The array dtype
1983    is ignored.  The backing buffer is optional; for conversion of compound
1984    types, a temporary copy of conversion buffer will used for backing if
1985    one is not supplied.
1986    """
1987    cdef:
1988        void* bkg_ = NULL
1989        void* buf_ = buf.data
1990
1991    if bkg is None and (src.detect_class(H5T_COMPOUND) or
1992                        dst.detect_class(H5T_COMPOUND)):
1993        bkg = buf.copy()
1994    if bkg is not None:
1995        bkg_ = bkg.data
1996
1997    H5Tconvert(src.id, dst.id, n, buf_, bkg_, pdefault(dxpl))
1998
1999
2000@with_phil
2001def find(TypeID src not None, TypeID dst not None):
2002    """ (TypeID src, TypeID dst) => TUPLE or None
2003
2004    Determine if a conversion path exists from src to dst.  Result is None
2005    or a tuple describing the conversion path.  Currently tuple entries are:
2006
2007    1. INT need_bkg:    Whether this routine requires a backing buffer.
2008                        Values are BKG_NO, BKG_TEMP and BKG_YES.
2009    """
2010    cdef:
2011        H5T_cdata_t *data
2012        H5T_conv_t result = NULL
2013
2014    try:
2015        result = H5Tfind(src.id, dst.id, &data)
2016        if result == NULL:
2017            return None
2018        return (data[0].need_bkg,)
2019    except:
2020        return None
2021