1# cython: language_level=3 2# This file is part of h5py, a Python interface to the HDF5 library. 3# 4# http://www.h5py.org 5# 6# Copyright 2008-2019 Andrew Collette and contributors 7# 8# License: Standard 3-clause BSD; see "license.txt" for full license terms 9# and contributor agreement. 10 11""" 12 HDF5 "H5T" data-type API 13 14 This module contains the datatype identifier class TypeID, and its 15 subclasses which represent things like integer/float/compound identifiers. 16 The majority of the H5T API is presented as methods on these identifiers. 17""" 18# C-level imports 19include "config.pxi" 20from ._objects cimport pdefault 21cimport numpy as cnp 22from .h5r cimport Reference, RegionReference 23from .h5p cimport PropID, propwrap 24 25 26from .utils cimport emalloc, efree, require_tuple, convert_dims,\ 27 convert_tuple 28 29# Python imports 30import codecs 31import sys 32from collections import namedtuple 33import sys 34import numpy as np 35from .h5 import get_config 36 37from ._objects import phil, with_phil 38 39cfg = get_config() 40 41DEF MACHINE = UNAME_MACHINE # processor architecture, provided by Cython 42cdef char* H5PY_PYTHON_OPAQUE_TAG = "PYTHON:OBJECT" 43 44# === Custom C API ============================================================ 45 46cpdef TypeID typewrap(hid_t id_): 47 48 cdef H5T_class_t cls 49 cls = H5Tget_class(id_) 50 51 if cls == H5T_INTEGER: 52 pcls = TypeIntegerID 53 elif cls == H5T_FLOAT: 54 pcls = TypeFloatID 55 elif cls == H5T_TIME: 56 pcls = TypeTimeID 57 elif cls == H5T_STRING: 58 pcls = TypeStringID 59 elif cls == H5T_BITFIELD: 60 pcls = TypeBitfieldID 61 elif cls == H5T_OPAQUE: 62 pcls = TypeOpaqueID 63 elif cls == H5T_COMPOUND: 64 pcls = TypeCompoundID 65 elif cls == H5T_REFERENCE: 66 pcls = TypeReferenceID 67 elif cls == H5T_ENUM: 68 pcls = TypeEnumID 69 elif cls == H5T_VLEN: 70 pcls = TypeVlenID 71 elif cls == H5T_ARRAY: 72 pcls = TypeArrayID 73 else: 74 pcls = TypeID 75 76 return pcls(id_) 77 78cdef object lockid(hid_t id_in): 79 cdef TypeID tid 80 tid = typewrap(id_in) 81 tid.locked = 1 82 return tid 83 84# === Public constants and data structures ==================================== 85 86 87# Enumeration H5T_class_t 88NO_CLASS = H5T_NO_CLASS 89INTEGER = H5T_INTEGER 90FLOAT = H5T_FLOAT 91TIME = H5T_TIME 92STRING = H5T_STRING 93BITFIELD = H5T_BITFIELD 94OPAQUE = H5T_OPAQUE 95COMPOUND = H5T_COMPOUND 96REFERENCE = H5T_REFERENCE 97ENUM = H5T_ENUM 98VLEN = H5T_VLEN 99ARRAY = H5T_ARRAY 100 101# Enumeration H5T_sign_t 102SGN_NONE = H5T_SGN_NONE 103SGN_2 = H5T_SGN_2 104 105# Enumeration H5T_order_t 106ORDER_LE = H5T_ORDER_LE 107ORDER_BE = H5T_ORDER_BE 108ORDER_VAX = H5T_ORDER_VAX 109ORDER_NONE = H5T_ORDER_NONE 110 111DIR_DEFAULT = H5T_DIR_DEFAULT 112DIR_ASCEND = H5T_DIR_ASCEND 113DIR_DESCEND = H5T_DIR_DESCEND 114 115# Enumeration H5T_str_t 116STR_NULLTERM = H5T_STR_NULLTERM 117STR_NULLPAD = H5T_STR_NULLPAD 118STR_SPACEPAD = H5T_STR_SPACEPAD 119 120# Enumeration H5T_norm_t 121NORM_IMPLIED = H5T_NORM_IMPLIED 122NORM_MSBSET = H5T_NORM_MSBSET 123NORM_NONE = H5T_NORM_NONE 124 125# Enumeration H5T_cset_t: 126CSET_ASCII = H5T_CSET_ASCII 127 128# Enumeration H5T_pad_t: 129PAD_ZERO = H5T_PAD_ZERO 130PAD_ONE = H5T_PAD_ONE 131PAD_BACKGROUND = H5T_PAD_BACKGROUND 132 133if sys.byteorder == "little": # Custom python addition 134 ORDER_NATIVE = H5T_ORDER_LE 135else: 136 ORDER_NATIVE = H5T_ORDER_BE 137 138# For conversion 139BKG_NO = H5T_BKG_NO 140BKG_TEMP = H5T_BKG_TEMP 141BKG_YES = H5T_BKG_YES 142 143# --- Built-in HDF5 datatypes ------------------------------------------------- 144 145# IEEE floating-point 146IEEE_F32LE = lockid(H5T_IEEE_F32LE) 147IEEE_F32BE = lockid(H5T_IEEE_F32BE) 148IEEE_F64LE = lockid(H5T_IEEE_F64LE) 149IEEE_F64BE = lockid(H5T_IEEE_F64BE) 150 151# Signed 2's complement integer types 152STD_I8LE = lockid(H5T_STD_I8LE) 153STD_I16LE = lockid(H5T_STD_I16LE) 154STD_I32LE = lockid(H5T_STD_I32LE) 155STD_I64LE = lockid(H5T_STD_I64LE) 156 157STD_I8BE = lockid(H5T_STD_I8BE) 158STD_I16BE = lockid(H5T_STD_I16BE) 159STD_I32BE = lockid(H5T_STD_I32BE) 160STD_I64BE = lockid(H5T_STD_I64BE) 161 162# Bitfields 163STD_B8LE = lockid(H5T_STD_B8LE) 164STD_B16LE = lockid(H5T_STD_B16LE) 165STD_B32LE = lockid(H5T_STD_B32LE) 166STD_B64LE = lockid(H5T_STD_B64LE) 167 168STD_B8BE = lockid(H5T_STD_B8BE) 169STD_B16BE = lockid(H5T_STD_B16BE) 170STD_B32BE = lockid(H5T_STD_B32BE) 171STD_B64BE = lockid(H5T_STD_B64BE) 172 173# Unsigned integers 174STD_U8LE = lockid(H5T_STD_U8LE) 175STD_U16LE = lockid(H5T_STD_U16LE) 176STD_U32LE = lockid(H5T_STD_U32LE) 177STD_U64LE = lockid(H5T_STD_U64LE) 178 179STD_U8BE = lockid(H5T_STD_U8BE) 180STD_U16BE = lockid(H5T_STD_U16BE) 181STD_U32BE = lockid(H5T_STD_U32BE) 182STD_U64BE = lockid(H5T_STD_U64BE) 183 184# Native types by bytesize 185NATIVE_B8 = lockid(H5T_NATIVE_B8) 186NATIVE_INT8 = lockid(H5T_NATIVE_INT8) 187NATIVE_UINT8 = lockid(H5T_NATIVE_UINT8) 188NATIVE_B16 = lockid(H5T_NATIVE_B16) 189NATIVE_INT16 = lockid(H5T_NATIVE_INT16) 190NATIVE_UINT16 = lockid(H5T_NATIVE_UINT16) 191NATIVE_B32 = lockid(H5T_NATIVE_B32) 192NATIVE_INT32 = lockid(H5T_NATIVE_INT32) 193NATIVE_UINT32 = lockid(H5T_NATIVE_UINT32) 194NATIVE_B64 = lockid(H5T_NATIVE_B64) 195NATIVE_INT64 = lockid(H5T_NATIVE_INT64) 196NATIVE_UINT64 = lockid(H5T_NATIVE_UINT64) 197NATIVE_FLOAT = lockid(H5T_NATIVE_FLOAT) 198NATIVE_DOUBLE = lockid(H5T_NATIVE_DOUBLE) 199NATIVE_LDOUBLE = lockid(H5T_NATIVE_LDOUBLE) 200 201# Unix time types 202UNIX_D32LE = lockid(H5T_UNIX_D32LE) 203UNIX_D64LE = lockid(H5T_UNIX_D64LE) 204UNIX_D32BE = lockid(H5T_UNIX_D32BE) 205UNIX_D64BE = lockid(H5T_UNIX_D64BE) 206 207# Reference types 208STD_REF_OBJ = lockid(H5T_STD_REF_OBJ) 209STD_REF_DSETREG = lockid(H5T_STD_REF_DSETREG) 210 211# Null terminated (C) and Fortran string types 212C_S1 = lockid(H5T_C_S1) 213FORTRAN_S1 = lockid(H5T_FORTRAN_S1) 214VARIABLE = H5T_VARIABLE 215 216# Character sets 217CSET_ASCII = H5T_CSET_ASCII 218CSET_UTF8 = H5T_CSET_UTF8 219 220# Mini (or short) floats 221IEEE_F16BE = IEEE_F32BE.copy() 222IEEE_F16BE.set_fields(15, 10, 5, 0, 10) 223IEEE_F16BE.set_size(2) 224IEEE_F16BE.set_ebias(15) 225IEEE_F16BE.lock() 226 227IEEE_F16LE = IEEE_F16BE.copy() 228IEEE_F16LE.set_order(H5T_ORDER_LE) 229IEEE_F16LE.lock() 230 231# Quad floats 232IEEE_F128BE = IEEE_F64BE.copy() 233IEEE_F128BE.set_size(16) 234IEEE_F128BE.set_precision(128) 235IEEE_F128BE.set_fields(127, 112, 15, 0, 112) 236IEEE_F128BE.set_ebias(16383) 237IEEE_F128BE.lock() 238 239IEEE_F128LE = IEEE_F128BE.copy() 240IEEE_F128LE.set_order(H5T_ORDER_LE) 241IEEE_F128LE.lock() 242 243LDOUBLE_LE = NATIVE_LDOUBLE.copy() 244LDOUBLE_LE.set_order(H5T_ORDER_LE) 245LDOUBLE_LE.lock() 246 247LDOUBLE_BE = NATIVE_LDOUBLE.copy() 248LDOUBLE_BE.set_order(H5T_ORDER_BE) 249LDOUBLE_BE.lock() 250 251# Custom Python object pointer type 252cdef hid_t H5PY_OBJ = H5Tcreate(H5T_OPAQUE, sizeof(PyObject*)) 253H5Tset_tag(H5PY_OBJ, H5PY_PYTHON_OPAQUE_TAG) 254H5Tlock(H5PY_OBJ) 255 256PYTHON_OBJECT = lockid(H5PY_OBJ) 257 258# Translation tables for HDF5 -> NumPy dtype conversion 259cdef dict _order_map = { H5T_ORDER_NONE: '|', H5T_ORDER_LE: '<', H5T_ORDER_BE: '>'} 260cdef dict _sign_map = { H5T_SGN_NONE: 'u', H5T_SGN_2: 'i' } 261 262# Available floating point types 263cdef tuple _get_available_ftypes(): 264 cdef: 265 str floating_typecodes = np.typecodes["Float"] 266 str ftc 267 cnp.dtype fdtype 268 list available_ftypes = [] 269 270 for ftc in floating_typecodes: 271 fdtype = np.dtype(ftc) 272 available_ftypes.append( 273 (<object>(fdtype.typeobj), np.finfo(fdtype), fdtype.itemsize) 274 ) 275 276 return tuple(available_ftypes) 277 278cdef tuple _available_ftypes = _get_available_ftypes() 279 280 281cdef (int, int, int) _correct_float_info(ftype_, finfo): 282 nmant = finfo.nmant 283 maxexp = finfo.maxexp 284 minexp = finfo.minexp 285 # workaround for numpy's buggy finfo on float128 on ppc64 archs 286 if ftype_ == np.longdouble and MACHINE == 'ppc64': 287 # values reported by hdf5 288 nmant = 116 289 maxexp = 1024 290 minexp = -1022 291 elif ftype_ == np.longdouble and MACHINE == 'ppc64le': 292 # values reported by hdf5 293 nmant = 52 294 maxexp = 1024 295 minexp = -1022 296 elif nmant == 63 and finfo.nexp == 15: 297 # This is an 80-bit float, correct mantissa size 298 nmant += 1 299 300 return nmant, maxexp, minexp 301 302 303# === General datatype operations ============================================= 304 305@with_phil 306def create(int classtype, size_t size): 307 """(INT classtype, UINT size) => TypeID 308 309 Create a new HDF5 type object. Legal class values are 310 COMPOUND and OPAQUE. Use enum_create for enums. 311 """ 312 313 # HDF5 versions 1.6.X segfault with anything else 314 if classtype != H5T_COMPOUND and classtype != H5T_OPAQUE: 315 raise ValueError("Class must be COMPOUND or OPAQUE.") 316 317 return typewrap(H5Tcreate(<H5T_class_t>classtype, size)) 318 319 320@with_phil 321def open(ObjectID group not None, char* name, ObjectID tapl=None): 322 """(ObjectID group, STRING name) => TypeID 323 324 Open a named datatype from a file. 325 If present, tapl must be a datatype access property list. 326 """ 327 return typewrap(H5Topen(group.id, name, pdefault(tapl))) 328 329 330@with_phil 331def array_create(TypeID base not None, object dims_tpl): 332 """(TypeID base, TUPLE dimensions) => TypeArrayID 333 334 Create a new array datatype, using and HDF5 parent type and 335 dimensions given via a tuple of positive integers. "Unlimited" 336 dimensions are not allowed. 337 """ 338 cdef hsize_t rank 339 cdef hsize_t *dims = NULL 340 341 require_tuple(dims_tpl, 0, -1, b"dims_tpl") 342 rank = len(dims_tpl) 343 dims = <hsize_t*>emalloc(sizeof(hsize_t)*rank) 344 345 try: 346 convert_tuple(dims_tpl, dims, rank) 347 return TypeArrayID(H5Tarray_create(base.id, rank, dims)) 348 finally: 349 efree(dims) 350 351 352@with_phil 353def enum_create(TypeID base not None): 354 """(TypeID base) => TypeID 355 356 Create a new enumerated type based on an (integer) parent type. 357 """ 358 return typewrap(H5Tenum_create(base.id)) 359 360 361@with_phil 362def vlen_create(TypeID base not None): 363 """(TypeID base) => TypeID 364 365 Create a new variable-length datatype, using any HDF5 type as a base. 366 367 Although the Python interface can manipulate these types, there is no 368 provision for reading/writing vlen data. 369 """ 370 return typewrap(H5Tvlen_create(base.id)) 371 372 373@with_phil 374def decode(char* buf): 375 """(STRING buf) => TypeID 376 377 Deserialize an HDF5 type. You can also do this with the native 378 Python pickling machinery. 379 """ 380 return typewrap(H5Tdecode(<unsigned char*>buf)) 381 382 383# === Base type class ========================================================= 384 385cdef class TypeID(ObjectID): 386 387 """ 388 Base class for type identifiers (implements common operations) 389 390 * Hashable: If committed; in HDF5 1.8.X, also if locked 391 * Equality: Logical H5T comparison 392 """ 393 394 def __hash__(self): 395 with phil: 396 if self._hash is None: 397 try: 398 # Try to use object header first 399 return ObjectID.__hash__(self) 400 except TypeError: 401 # It's a transient type object 402 if self.locked: 403 self._hash = hash(self.encode()) 404 else: 405 raise TypeError("Only locked or committed types can be hashed") 406 407 return self._hash 408 409 410 def __richcmp__(self, object other, int how): 411 cdef bint truthval = 0 412 with phil: 413 if how != 2 and how != 3: 414 return NotImplemented 415 if isinstance(other, TypeID): 416 truthval = self.equal(other) 417 418 if how == 2: 419 return truthval 420 return not truthval 421 422 423 def __copy__(self): 424 cdef TypeID cpy 425 with phil: 426 cpy = ObjectID.__copy__(self) 427 return cpy 428 429 430 property dtype: 431 """ A Numpy-style dtype object representing this object. 432 """ 433 def __get__(self): 434 with phil: 435 return self.py_dtype() 436 437 438 cdef object py_dtype(self): 439 raise TypeError("No NumPy equivalent for %s exists" % self.__class__.__name__) 440 441 442 @with_phil 443 def commit(self, ObjectID group not None, char* name, ObjectID lcpl=None): 444 """(ObjectID group, STRING name, PropID lcpl=None) 445 446 Commit this (transient) datatype to a named datatype in a file. 447 If present, lcpl may be a link creation property list. 448 """ 449 H5Tcommit(group.id, name, self.id, pdefault(lcpl), 450 H5P_DEFAULT, H5P_DEFAULT) 451 452 453 @with_phil 454 def committed(self): 455 """() => BOOL is_comitted 456 457 Determine if a given type object is named (T) or transient (F). 458 """ 459 return <bint>(H5Tcommitted(self.id)) 460 461 462 @with_phil 463 def copy(self): 464 """() => TypeID 465 466 Create a copy of this type object. 467 """ 468 return typewrap(H5Tcopy(self.id)) 469 470 471 @with_phil 472 def equal(self, TypeID typeid): 473 """(TypeID typeid) => BOOL 474 475 Logical comparison between datatypes. Also called by 476 Python's "==" operator. 477 """ 478 return <bint>(H5Tequal(self.id, typeid.id)) 479 480 481 @with_phil 482 def lock(self): 483 """() 484 485 Lock this datatype, which makes it immutable and indestructible. 486 Once locked, it can't be unlocked. 487 """ 488 H5Tlock(self.id) 489 self.locked = 1 490 491 492 @with_phil 493 def get_class(self): 494 """() => INT classcode 495 496 Determine the datatype's class code. 497 """ 498 return <int>H5Tget_class(self.id) 499 500 501 @with_phil 502 def set_size(self, size_t size): 503 """(UINT size) 504 505 Set the total size of the datatype, in bytes. 506 """ 507 H5Tset_size(self.id, size) 508 509 510 @with_phil 511 def get_size(self): 512 """ () => INT size 513 514 Determine the total size of a datatype, in bytes. 515 """ 516 return H5Tget_size(self.id) 517 518 519 @with_phil 520 def get_super(self): 521 """() => TypeID 522 523 Determine the parent type of an array, enumeration or vlen datatype. 524 """ 525 return typewrap(H5Tget_super(self.id)) 526 527 528 @with_phil 529 def detect_class(self, int classtype): 530 """(INT classtype) => BOOL class_is_present 531 532 Determine if a member of the given class exists in a compound 533 datatype. The search is recursive. 534 """ 535 return <bint>(H5Tdetect_class(self.id, <H5T_class_t>classtype)) 536 537 538 @with_phil 539 def encode(self): 540 """() => STRING 541 542 Serialize an HDF5 type. Bear in mind you can also use the 543 native Python pickle/unpickle machinery to do this. The 544 returned string may contain binary values, including NULLs. 545 """ 546 cdef size_t nalloc = 0 547 cdef char* buf = NULL 548 549 H5Tencode(self.id, NULL, &nalloc) 550 buf = <char*>emalloc(sizeof(char)*nalloc) 551 try: 552 H5Tencode(self.id, <unsigned char*>buf, &nalloc) 553 pystr = PyBytes_FromStringAndSize(buf, nalloc) 554 finally: 555 efree(buf) 556 557 return pystr 558 559 @with_phil 560 def get_create_plist(self): 561 """ () => PropTCID 562 563 Create and return a new copy of the datatype creation property list 564 used when this datatype was created. 565 """ 566 return propwrap(H5Tget_create_plist(self.id)) 567 568 569 def __reduce__(self): 570 with phil: 571 return (type(self), (-1,), self.encode()) 572 573 574 def __setstate__(self, char* state): 575 with phil: 576 self.id = H5Tdecode(<unsigned char*>state) 577 578 579# === Top-level classes (inherit directly from TypeID) ======================== 580 581cdef class TypeArrayID(TypeID): 582 583 """ 584 Represents an array datatype 585 """ 586 587 588 @with_phil 589 def get_array_ndims(self): 590 """() => INT rank 591 592 Get the rank of the given array datatype. 593 """ 594 return H5Tget_array_ndims(self.id) 595 596 597 @with_phil 598 def get_array_dims(self): 599 """() => TUPLE dimensions 600 601 Get the dimensions of the given array datatype as 602 a tuple of integers. 603 """ 604 cdef hsize_t rank 605 cdef hsize_t* dims = NULL 606 607 rank = H5Tget_array_dims(self.id, NULL) 608 dims = <hsize_t*>emalloc(sizeof(hsize_t)*rank) 609 try: 610 H5Tget_array_dims(self.id, dims) 611 return convert_dims(dims, rank) 612 finally: 613 efree(dims) 614 615 cdef object py_dtype(self): 616 # Numpy translation function for array types 617 cdef TypeID tmp_type 618 tmp_type = self.get_super() 619 620 base_dtype = tmp_type.py_dtype() 621 622 shape = self.get_array_dims() 623 return np.dtype( (base_dtype, shape) ) 624 625 626cdef class TypeOpaqueID(TypeID): 627 628 """ 629 Represents an opaque type 630 """ 631 632 633 @with_phil 634 def set_tag(self, char* tag): 635 """(STRING tag) 636 637 Set a string describing the contents of an opaque datatype. 638 Limited to 256 characters. 639 """ 640 H5Tset_tag(self.id, tag) 641 642 643 @with_phil 644 def get_tag(self): 645 """() => STRING tag 646 647 Get the tag associated with an opaque datatype. 648 """ 649 cdef char* buf = NULL 650 651 try: 652 buf = H5Tget_tag(self.id) 653 assert buf != NULL 654 tag = buf 655 return tag 656 finally: 657 IF HDF5_VERSION >= (1, 8, 13): 658 H5free_memory(buf) 659 ELSE: 660 free(buf) 661 662 cdef object py_dtype(self): 663 cdef bytes tag = self.get_tag() 664 if tag.startswith(b"NUMPY:"): 665 # 6 = len("NUMPY:") 666 return np.dtype(tag[6:], metadata={'h5py_opaque': True}) 667 668 # Numpy translation function for opaque types 669 return np.dtype("|V" + str(self.get_size())) 670 671 672cdef class TypeStringID(TypeID): 673 674 """ 675 String datatypes, both fixed and vlen. 676 """ 677 678 679 @with_phil 680 def is_variable_str(self): 681 """() => BOOL is_variable 682 683 Determine if the given string datatype is a variable-length string. 684 """ 685 return <bint>(H5Tis_variable_str(self.id)) 686 687 688 @with_phil 689 def get_cset(self): 690 """() => INT character_set 691 692 Retrieve the character set used for a string. 693 """ 694 return <int>H5Tget_cset(self.id) 695 696 697 @with_phil 698 def set_cset(self, int cset): 699 """(INT character_set) 700 701 Set the character set used for a string. 702 """ 703 H5Tset_cset(self.id, <H5T_cset_t>cset) 704 705 706 @with_phil 707 def get_strpad(self): 708 """() => INT padding_type 709 710 Get the padding type. Legal values are: 711 712 STR_NULLTERM 713 NULL termination only (C style) 714 715 STR_NULLPAD 716 Pad buffer with NULLs 717 718 STR_SPACEPAD 719 Pad buffer with spaces (FORTRAN style) 720 """ 721 return <int>H5Tget_strpad(self.id) 722 723 724 @with_phil 725 def set_strpad(self, int pad): 726 """(INT pad) 727 728 Set the padding type. Legal values are: 729 730 STR_NULLTERM 731 NULL termination only (C style) 732 733 STR_NULLPAD 734 Pad buffer with NULLs 735 736 STR_SPACEPAD 737 Pad buffer with spaces (FORTRAN style) 738 """ 739 H5Tset_strpad(self.id, <H5T_str_t>pad) 740 741 742 cdef object py_dtype(self): 743 # Numpy translation function for string types 744 if self.get_cset() == H5T_CSET_ASCII: 745 encoding = 'ascii' 746 elif self.get_cset() == H5T_CSET_UTF8: 747 encoding = 'utf-8' 748 else: 749 raise TypeError("Unknown string encoding (value %d)" % self.get_cset()) 750 751 if self.is_variable_str(): 752 length = None 753 else: 754 length = self.get_size() 755 756 return string_dtype(encoding=encoding, length=length) 757 758cdef class TypeVlenID(TypeID): 759 760 """ 761 Non-string vlen datatypes. 762 """ 763 764 cdef object py_dtype(self): 765 766 # get base type id 767 cdef TypeID base_type 768 base_type = self.get_super() 769 770 return vlen_dtype(base_type.dtype) 771 772cdef class TypeTimeID(TypeID): 773 774 """ 775 Unix-style time_t (deprecated) 776 """ 777 pass 778 779cdef class TypeBitfieldID(TypeID): 780 781 """ 782 HDF5 bitfield type 783 """ 784 785 @with_phil 786 def get_order(self): 787 """() => INT order 788 789 Obtain the byte order of the datatype; one of: 790 791 - ORDER_LE 792 - ORDER_BE 793 """ 794 return <int>H5Tget_order(self.id) 795 796 cdef object py_dtype(self): 797 798 # Translation function for bitfield types 799 return np.dtype( _order_map[self.get_order()] + 800 'u' + str(self.get_size()) ) 801 802 803cdef class TypeReferenceID(TypeID): 804 805 """ 806 HDF5 object or region reference 807 """ 808 809 cdef object py_dtype(self): 810 if H5Tequal(self.id, H5T_STD_REF_OBJ): 811 return ref_dtype 812 elif H5Tequal(self.id, H5T_STD_REF_DSETREG): 813 return regionref_dtype 814 else: 815 raise TypeError("Unknown reference type") 816 817 818# === Numeric classes (integers and floats) =================================== 819 820cdef class TypeAtomicID(TypeID): 821 822 """ 823 Base class for atomic datatypes (float or integer) 824 """ 825 826 827 @with_phil 828 def get_order(self): 829 """() => INT order 830 831 Obtain the byte order of the datatype; one of: 832 833 - ORDER_LE 834 - ORDER_BE 835 """ 836 return <int>H5Tget_order(self.id) 837 838 839 @with_phil 840 def set_order(self, int order): 841 """(INT order) 842 843 Set the byte order of the datatype; one of: 844 845 - ORDER_LE 846 - ORDER_BE 847 """ 848 H5Tset_order(self.id, <H5T_order_t>order) 849 850 851 @with_phil 852 def get_precision(self): 853 """() => UINT precision 854 855 Get the number of significant bits (excludes padding). 856 """ 857 return H5Tget_precision(self.id) 858 859 860 @with_phil 861 def set_precision(self, size_t precision): 862 """(UINT precision) 863 864 Set the number of significant bits (excludes padding). 865 """ 866 H5Tset_precision(self.id, precision) 867 868 869 @with_phil 870 def get_offset(self): 871 """() => INT offset 872 873 Get the offset of the first significant bit. 874 """ 875 return H5Tget_offset(self.id) 876 877 878 @with_phil 879 def set_offset(self, size_t offset): 880 """(UINT offset) 881 882 Set the offset of the first significant bit. 883 """ 884 H5Tset_offset(self.id, offset) 885 886 887 @with_phil 888 def get_pad(self): 889 """() => (INT lsb_pad_code, INT msb_pad_code) 890 891 Determine the padding type. Possible values are: 892 893 - PAD_ZERO 894 - PAD_ONE 895 - PAD_BACKGROUND 896 """ 897 cdef H5T_pad_t lsb 898 cdef H5T_pad_t msb 899 H5Tget_pad(self.id, &lsb, &msb) 900 return (<int>lsb, <int>msb) 901 902 903 @with_phil 904 def set_pad(self, int lsb, int msb): 905 """(INT lsb_pad_code, INT msb_pad_code) 906 907 Set the padding type. Possible values are: 908 909 - PAD_ZERO 910 - PAD_ONE 911 - PAD_BACKGROUND 912 """ 913 H5Tset_pad(self.id, <H5T_pad_t>lsb, <H5T_pad_t>msb) 914 915 916cdef class TypeIntegerID(TypeAtomicID): 917 918 """ 919 Integer atomic datatypes 920 """ 921 922 923 @with_phil 924 def get_sign(self): 925 """() => INT sign 926 927 Get the "signedness" of the datatype; one of: 928 929 SGN_NONE 930 Unsigned 931 932 SGN_2 933 Signed 2's complement 934 """ 935 return <int>H5Tget_sign(self.id) 936 937 938 @with_phil 939 def set_sign(self, int sign): 940 """(INT sign) 941 942 Set the "signedness" of the datatype; one of: 943 944 SGN_NONE 945 Unsigned 946 947 SGN_2 948 Signed 2's complement 949 """ 950 H5Tset_sign(self.id, <H5T_sign_t>sign) 951 952 cdef object py_dtype(self): 953 # Translation function for integer types 954 return np.dtype( _order_map[self.get_order()] + 955 _sign_map[self.get_sign()] + str(self.get_size()) ) 956 957 958cdef class TypeFloatID(TypeAtomicID): 959 960 """ 961 Floating-point atomic datatypes 962 """ 963 964 965 @with_phil 966 def get_fields(self): 967 """() => TUPLE field_info 968 969 Get information about floating-point bit fields. See the HDF5 970 docs for a full description. Tuple has the following members: 971 972 0. UINT spos 973 1. UINT epos 974 2. UINT esize 975 3. UINT mpos 976 4. UINT msize 977 """ 978 cdef size_t spos, epos, esize, mpos, msize 979 H5Tget_fields(self.id, &spos, &epos, &esize, &mpos, &msize) 980 return (spos, epos, esize, mpos, msize) 981 982 983 @with_phil 984 def set_fields(self, size_t spos, size_t epos, size_t esize, 985 size_t mpos, size_t msize): 986 """(UINT spos, UINT epos, UINT esize, UINT mpos, UINT msize) 987 988 Set floating-point bit fields. Refer to the HDF5 docs for 989 argument definitions. 990 """ 991 H5Tset_fields(self.id, spos, epos, esize, mpos, msize) 992 993 994 @with_phil 995 def get_ebias(self): 996 """() => UINT ebias 997 998 Get the exponent bias. 999 """ 1000 return H5Tget_ebias(self.id) 1001 1002 1003 @with_phil 1004 def set_ebias(self, size_t ebias): 1005 """(UINT ebias) 1006 1007 Set the exponent bias. 1008 """ 1009 H5Tset_ebias(self.id, ebias) 1010 1011 1012 @with_phil 1013 def get_norm(self): 1014 """() => INT normalization_code 1015 1016 Get the normalization strategy. Legal values are: 1017 1018 - NORM_IMPLIED 1019 - NORM_MSBSET 1020 - NORM_NONE 1021 """ 1022 return <int>H5Tget_norm(self.id) 1023 1024 1025 @with_phil 1026 def set_norm(self, int norm): 1027 """(INT normalization_code) 1028 1029 Set the normalization strategy. Legal values are: 1030 1031 - NORM_IMPLIED 1032 - NORM_MSBSET 1033 - NORM_NONE 1034 """ 1035 H5Tset_norm(self.id, <H5T_norm_t>norm) 1036 1037 1038 @with_phil 1039 def get_inpad(self): 1040 """() => INT pad_code 1041 1042 Determine the internal padding strategy. Legal values are: 1043 1044 - PAD_ZERO 1045 - PAD_ONE 1046 - PAD_BACKGROUND 1047 """ 1048 return <int>H5Tget_inpad(self.id) 1049 1050 1051 @with_phil 1052 def set_inpad(self, int pad_code): 1053 """(INT pad_code) 1054 1055 Set the internal padding strategy. Legal values are: 1056 1057 - PAD_ZERO 1058 - PAD_ONE 1059 - PAD_BACKGROUND 1060 """ 1061 H5Tset_inpad(self.id, <H5T_pad_t>pad_code) 1062 1063 cdef object py_dtype(self): 1064 # Translation function for floating-point types 1065 1066 order = _order_map[self.get_order()] # string with '<' or '>' 1067 1068 s_offset, e_offset, e_size, m_offset, m_size = self.get_fields() 1069 e_bias = self.get_ebias() 1070 1071 # Handle non-standard exponent and mantissa sizes. 1072 for ftype_, finfo, size in _available_ftypes: 1073 nmant, maxexp, minexp = _correct_float_info(ftype_, finfo) 1074 if (size >= self.get_size() and m_size <= nmant and 1075 (2**e_size - e_bias - 1) <= maxexp and (1 - e_bias) >= minexp): 1076 new_dtype = np.dtype(ftype_).newbyteorder(order) 1077 break 1078 else: 1079 raise ValueError('Insufficient precision in available types to ' + 1080 'represent ' + str(self.get_fields())) 1081 1082 return new_dtype 1083 1084 1085# === Composite types (enums and compound) ==================================== 1086 1087cdef class TypeCompositeID(TypeID): 1088 1089 """ 1090 Base class for enumerated and compound types. 1091 """ 1092 1093 1094 @with_phil 1095 def get_nmembers(self): 1096 """() => INT number_of_members 1097 1098 Determine the number of members in a compound or enumerated type. 1099 """ 1100 return H5Tget_nmembers(self.id) 1101 1102 1103 @with_phil 1104 def get_member_name(self, int member): 1105 """(INT member) => STRING name 1106 1107 Determine the name of a member of a compound or enumerated type, 1108 identified by its index (0 <= member < nmembers). 1109 """ 1110 cdef char* name 1111 name = NULL 1112 1113 if member < 0: 1114 raise ValueError("Member index must be non-negative.") 1115 1116 try: 1117 name = H5Tget_member_name(self.id, member) 1118 assert name != NULL 1119 pyname = <bytes>name 1120 finally: 1121 IF HDF5_VERSION >= (1, 8, 13): 1122 H5free_memory(name) 1123 ELSE: 1124 free(name) 1125 1126 return pyname 1127 1128 1129 @with_phil 1130 def get_member_index(self, char* name): 1131 """(STRING name) => INT index 1132 1133 Determine the index of a member of a compound or enumerated datatype 1134 identified by a string name. 1135 """ 1136 return H5Tget_member_index(self.id, name) 1137 1138cdef class TypeCompoundID(TypeCompositeID): 1139 1140 """ 1141 Represents a compound datatype 1142 """ 1143 1144 1145 @with_phil 1146 def get_member_class(self, int member): 1147 """(INT member) => INT class 1148 1149 Determine the datatype class of the member of a compound type, 1150 identified by its index (0 <= member < nmembers). 1151 """ 1152 if member < 0: 1153 raise ValueError("Member index must be non-negative.") 1154 return H5Tget_member_class(self.id, member) 1155 1156 1157 @with_phil 1158 def get_member_offset(self, int member): 1159 """(INT member) => INT offset 1160 1161 Determine the offset, in bytes, of the beginning of the specified 1162 member of a compound datatype. 1163 """ 1164 if member < 0: 1165 raise ValueError("Member index must be non-negative.") 1166 return H5Tget_member_offset(self.id, member) 1167 1168 1169 @with_phil 1170 def get_member_type(self, int member): 1171 """(INT member) => TypeID 1172 1173 Create a copy of a member of a compound datatype, identified by its 1174 index. 1175 """ 1176 if member < 0: 1177 raise ValueError("Member index must be non-negative.") 1178 return typewrap(H5Tget_member_type(self.id, member)) 1179 1180 1181 @with_phil 1182 def insert(self, char* name, size_t offset, TypeID field not None): 1183 """(STRING name, UINT offset, TypeID field) 1184 1185 Add a named member datatype to a compound datatype. The parameter 1186 offset indicates the offset from the start of the compound datatype, 1187 in bytes. 1188 """ 1189 H5Tinsert(self.id, name, offset, field.id) 1190 1191 1192 @with_phil 1193 def pack(self): 1194 """() 1195 1196 Recursively removes padding (introduced on account of e.g. compiler 1197 alignment rules) from a compound datatype. 1198 """ 1199 H5Tpack(self.id) 1200 1201 cdef object py_dtype(self): 1202 cdef: 1203 TypeID tmp_type 1204 list field_names 1205 list field_types 1206 int i, nfields 1207 field_names = [] 1208 field_types = [] 1209 field_offsets = [] 1210 nfields = self.get_nmembers() 1211 1212 # First step: read field names and their Numpy dtypes into 1213 # two separate arrays. 1214 for i in range(nfields): 1215 tmp_type = self.get_member_type(i) 1216 name = self.get_member_name(i) 1217 field_names.append(name) 1218 field_types.append(tmp_type.py_dtype()) 1219 field_offsets.append(self.get_member_offset(i)) 1220 1221 1222 # 1. Check if it should be converted to a complex number 1223 if len(field_names) == 2 and \ 1224 tuple(field_names) == (cfg._r_name, cfg._i_name) and \ 1225 field_types[0] == field_types[1] and \ 1226 field_types[0].kind == 'f': 1227 1228 bstring = field_types[0].str 1229 blen = int(bstring[2:]) 1230 nstring = bstring[0] + "c" + str(2*blen) 1231 typeobj = np.dtype(nstring) 1232 1233 # 2. Otherwise, read all fields of the compound type, in HDF5 order. 1234 else: 1235 field_names = [x.decode('utf8') for x in field_names] 1236 typeobj = np.dtype({'names': field_names, 1237 'formats': field_types, 1238 'offsets': field_offsets, 1239 'itemsize': self.get_size()}) 1240 1241 return typeobj 1242 1243 1244cdef class TypeEnumID(TypeCompositeID): 1245 1246 """ 1247 Represents an enumerated type 1248 """ 1249 1250 cdef int enum_convert(self, long long *buf, int reverse) except -1: 1251 # Convert the long long value in "buf" to the native representation 1252 # of this (enumerated) type. Conversion performed in-place. 1253 # Reverse: false => llong->type; true => type->llong 1254 1255 cdef hid_t basetype 1256 cdef H5T_class_t class_code 1257 1258 class_code = H5Tget_class(self.id) 1259 if class_code != H5T_ENUM: 1260 raise ValueError("This type (class %d) is not of class ENUM" % class_code) 1261 1262 basetype = H5Tget_super(self.id) 1263 assert basetype > 0 1264 1265 try: 1266 if not reverse: 1267 H5Tconvert(H5T_NATIVE_LLONG, basetype, 1, buf, NULL, H5P_DEFAULT) 1268 else: 1269 H5Tconvert(basetype, H5T_NATIVE_LLONG, 1, buf, NULL, H5P_DEFAULT) 1270 finally: 1271 H5Tclose(basetype) 1272 1273 1274 @with_phil 1275 def enum_insert(self, char* name, long long value): 1276 """(STRING name, INT/LONG value) 1277 1278 Define a new member of an enumerated type. The value will be 1279 automatically converted to the base type defined for this enum. If 1280 the conversion results in overflow, the value will be silently 1281 clipped. 1282 """ 1283 cdef long long buf 1284 1285 buf = value 1286 self.enum_convert(&buf, 0) 1287 H5Tenum_insert(self.id, name, &buf) 1288 1289 1290 @with_phil 1291 def enum_nameof(self, long long value): 1292 """(LONG value) => STRING name 1293 1294 Determine the name associated with the given value. Due to a 1295 limitation of the HDF5 library, this can only retrieve names up to 1296 1023 characters in length. 1297 """ 1298 cdef herr_t retval 1299 cdef char name[1024] 1300 cdef long long buf 1301 1302 buf = value 1303 self.enum_convert(&buf, 0) 1304 retval = H5Tenum_nameof(self.id, &buf, name, 1024) 1305 assert retval >= 0 1306 retstring = name 1307 return retstring 1308 1309 1310 @with_phil 1311 def enum_valueof(self, char* name): 1312 """(STRING name) => LONG value 1313 1314 Get the value associated with an enum name. 1315 """ 1316 cdef long long buf 1317 1318 H5Tenum_valueof(self.id, name, &buf) 1319 self.enum_convert(&buf, 1) 1320 return buf 1321 1322 1323 @with_phil 1324 def get_member_value(self, int idx): 1325 """(UINT index) => LONG value 1326 1327 Determine the value for the member at the given zero-based index. 1328 """ 1329 cdef herr_t retval 1330 cdef hid_t ptype 1331 cdef long long val 1332 ptype = 0 1333 1334 if idx < 0: 1335 raise ValueError("Index must be non-negative.") 1336 1337 H5Tget_member_value(self.id, idx, &val) 1338 self.enum_convert(&val, 1) 1339 return val 1340 1341 cdef object py_dtype(self): 1342 # Translation function for enum types 1343 1344 cdef TypeID basetype = self.get_super() 1345 1346 nmembers = self.get_nmembers() 1347 members = {} 1348 1349 for idx in range(nmembers): 1350 name = self.get_member_name(idx) 1351 val = self.get_member_value(idx) 1352 members[name] = val 1353 1354 ref = {cfg._f_name: 0, cfg._t_name: 1} 1355 1356 # Boolean types have priority over standard enums 1357 if members == ref: 1358 return np.dtype('bool') 1359 1360 # Convert strings to appropriate representation 1361 members_conv = {} 1362 for name, val in members.iteritems(): 1363 try: # ASCII; 1364 name = name.decode('ascii') 1365 except UnicodeDecodeError: 1366 try: # Non-ascii; all platforms try unicode 1367 name = name.decode('utf8') 1368 except UnicodeDecodeError: 1369 pass # Last resort: return byte string 1370 members_conv[name] = val 1371 return enum_dtype(members_conv, basetype=basetype.py_dtype()) 1372 1373 1374# === Translation from NumPy dtypes to HDF5 type objects ====================== 1375 1376# The following series of native-C functions each translate a specific class 1377# of NumPy dtype into an HDF5 type object. The result is guaranteed to be 1378# transient and unlocked. 1379 1380def _get_float_dtype_to_hdf5(): 1381 float_le = {} 1382 float_be = {} 1383 h5_be_list = [IEEE_F16BE, IEEE_F32BE, IEEE_F64BE, IEEE_F128BE, LDOUBLE_BE] 1384 h5_le_list = [IEEE_F16LE, IEEE_F32LE, IEEE_F64LE, IEEE_F128LE, LDOUBLE_LE] 1385 1386 for ftype_, finfo, size in _available_ftypes: 1387 nmant, maxexp, minexp = _correct_float_info(ftype_, finfo) 1388 for h5type in h5_be_list: 1389 spos, epos, esize, mpos, msize = h5type.get_fields() 1390 ebias = h5type.get_ebias() 1391 if (finfo.iexp == esize and nmant == msize and 1392 (maxexp - 1) == ebias): 1393 float_be[ftype_] = h5type 1394 break # first found matches, related to #1244 1395 for h5type in h5_le_list: 1396 spos, epos, esize, mpos, msize = h5type.get_fields() 1397 ebias = h5type.get_ebias() 1398 if (finfo.iexp == esize and nmant == msize and 1399 (maxexp - 1) == ebias): 1400 float_le[ftype_] = h5type 1401 break # first found matches, related to #1244 1402 if ORDER_NATIVE == H5T_ORDER_LE: 1403 float_nt = dict(float_le) 1404 else: 1405 float_nt = dict(float_be) 1406 return float_le, float_be, float_nt 1407 1408cdef dict _float_le 1409cdef dict _float_be 1410cdef dict _float_nt 1411_float_le, _float_be, _float_nt = _get_float_dtype_to_hdf5() 1412 1413cdef dict _int_le = {1: H5Tcopy(H5T_STD_I8LE), 2: H5Tcopy(H5T_STD_I16LE), 4: H5Tcopy(H5T_STD_I32LE), 8: H5Tcopy(H5T_STD_I64LE)} 1414cdef dict _int_be = {1: H5Tcopy(H5T_STD_I8BE), 2: H5Tcopy(H5T_STD_I16BE), 4: H5Tcopy(H5T_STD_I32BE), 8: H5Tcopy(H5T_STD_I64BE)} 1415cdef dict _int_nt = {1: H5Tcopy(H5T_NATIVE_INT8), 2: H5Tcopy(H5T_NATIVE_INT16), 4: H5Tcopy(H5T_NATIVE_INT32), 8: H5Tcopy(H5T_NATIVE_INT64)} 1416 1417cdef dict _uint_le = {1: H5Tcopy(H5T_STD_U8LE), 2: H5Tcopy(H5T_STD_U16LE), 4: H5Tcopy(H5T_STD_U32LE), 8: H5Tcopy(H5T_STD_U64LE)} 1418cdef dict _uint_be = {1: H5Tcopy(H5T_STD_U8BE), 2: H5Tcopy(H5T_STD_U16BE), 4: H5Tcopy(H5T_STD_U32BE), 8: H5Tcopy(H5T_STD_U64BE)} 1419cdef dict _uint_nt = {1: H5Tcopy(H5T_NATIVE_UINT8), 2: H5Tcopy(H5T_NATIVE_UINT16), 4: H5Tcopy(H5T_NATIVE_UINT32), 8: H5Tcopy(H5T_NATIVE_UINT64)} 1420 1421cdef TypeFloatID _c_float(cnp.dtype dt): 1422 # Floats (single and double) 1423 cdef TypeFloatID tid 1424 1425 try: 1426 if dt.byteorder == c'<': 1427 tid = _float_le[np.dtype(dt).type] 1428 elif dt.byteorder == c'>': 1429 tid = _float_be[np.dtype(dt).type] 1430 else: 1431 tid = _float_nt[np.dtype(dt).type] 1432 except KeyError: 1433 raise TypeError("Unsupported float type (%s)" % dt) 1434 1435 return tid.copy() 1436 1437cdef TypeIntegerID _c_int(cnp.dtype dt): 1438 # Integers (ints and uints) 1439 cdef hid_t tid 1440 1441 try: 1442 if dt.kind == c'i': 1443 if dt.byteorder == c'<': 1444 tid = _int_le[dt.itemsize] 1445 elif dt.byteorder == c'>': 1446 tid = _int_be[dt.itemsize] 1447 else: 1448 tid = _int_nt[dt.itemsize] 1449 elif dt.kind == c'u': 1450 if dt.byteorder == c'<': 1451 tid = _uint_le[dt.itemsize] 1452 elif dt.byteorder == c'>': 1453 tid = _uint_be[dt.itemsize] 1454 else: 1455 tid = _uint_nt[dt.itemsize] 1456 else: 1457 raise TypeError('Illegal int kind "%s"' % dt.kind) 1458 except KeyError: 1459 raise TypeError("Unsupported integer size (%s)" % dt.itemsize) 1460 1461 return TypeIntegerID(H5Tcopy(tid)) 1462 1463 1464cdef TypeEnumID _c_enum(cnp.dtype dt, dict vals): 1465 # Enums 1466 cdef: 1467 TypeIntegerID base 1468 TypeEnumID out 1469 1470 base = _c_int(dt) 1471 1472 out = TypeEnumID(H5Tenum_create(base.id)) 1473 for name in sorted(vals): 1474 if isinstance(name, bytes): 1475 bname = name 1476 else: 1477 bname = unicode(name).encode('utf8') 1478 out.enum_insert(bname, vals[name]) 1479 return out 1480 1481 1482cdef TypeEnumID _c_bool(cnp.dtype dt): 1483 # Booleans 1484 global cfg 1485 1486 cdef TypeEnumID out 1487 out = TypeEnumID(H5Tenum_create(H5T_NATIVE_INT8)) 1488 1489 out.enum_insert(cfg._f_name, 0) 1490 out.enum_insert(cfg._t_name, 1) 1491 1492 return out 1493 1494 1495cdef TypeArrayID _c_array(cnp.dtype dt, int logical): 1496 # Arrays 1497 cdef: 1498 cnp.dtype base 1499 TypeID type_base 1500 object shape 1501 1502 base, shape = dt.subdtype 1503 try: 1504 shape = tuple(shape) 1505 except TypeError: 1506 try: 1507 shape = (int(shape),) 1508 except TypeError: 1509 raise TypeError("Array shape for dtype must be a sequence or integer") 1510 type_base = py_create(base, logical=logical) 1511 return array_create(type_base, shape) 1512 1513 1514cdef TypeOpaqueID _c_opaque(cnp.dtype dt): 1515 # Opaque 1516 return TypeOpaqueID(H5Tcreate(H5T_OPAQUE, dt.itemsize)) 1517 1518 1519cdef TypeOpaqueID _c_opaque_tagged(cnp.dtype dt): 1520 """Create an HDF5 opaque data type with a tag recording the numpy dtype. 1521 1522 Tagged opaque types can be read back easily in h5py, but not in other tools 1523 (they are *opaque*). 1524 1525 The default tag is generated via the code: 1526 ``b"NUMPY:" + dt_in.descr[0][1].encode()``. 1527 """ 1528 cdef TypeOpaqueID new_type = _c_opaque(dt) 1529 new_type.set_tag(b"NUMPY:" + dt.descr[0][1].encode()) 1530 1531 return new_type 1532 1533cdef TypeStringID _c_string(cnp.dtype dt): 1534 # Strings (fixed-length) 1535 cdef hid_t tid 1536 1537 tid = H5Tcopy(H5T_C_S1) 1538 H5Tset_size(tid, dt.itemsize) 1539 H5Tset_strpad(tid, H5T_STR_NULLPAD) 1540 if dt.metadata and dt.metadata.get('h5py_encoding') == 'utf-8': 1541 H5Tset_cset(tid, H5T_CSET_UTF8) 1542 return TypeStringID(tid) 1543 1544cdef TypeCompoundID _c_complex(cnp.dtype dt): 1545 # Complex numbers (names depend on cfg) 1546 global cfg 1547 1548 cdef hid_t tid, tid_sub 1549 cdef size_t size, off_r, off_i 1550 1551 cdef size_t length = dt.itemsize 1552 cdef char byteorder = dt.byteorder 1553 1554 if length == 8: 1555 size = h5py_size_n64 1556 off_r = h5py_offset_n64_real 1557 off_i = h5py_offset_n64_imag 1558 if byteorder == c'<': 1559 tid_sub = H5T_IEEE_F32LE 1560 elif byteorder == c'>': 1561 tid_sub = H5T_IEEE_F32BE 1562 else: 1563 tid_sub = H5T_NATIVE_FLOAT 1564 elif length == 16: 1565 size = h5py_size_n128 1566 off_r = h5py_offset_n128_real 1567 off_i = h5py_offset_n128_imag 1568 if byteorder == c'<': 1569 tid_sub = H5T_IEEE_F64LE 1570 elif byteorder == c'>': 1571 tid_sub = H5T_IEEE_F64BE 1572 else: 1573 tid_sub = H5T_NATIVE_DOUBLE 1574 1575 elif length == 32: 1576 IF COMPLEX256_SUPPORT: 1577 size = h5py_size_n256 1578 off_r = h5py_offset_n256_real 1579 off_i = h5py_offset_n256_imag 1580 tid_sub = H5T_NATIVE_LDOUBLE 1581 ELSE: 1582 raise TypeError("Illegal length %d for complex dtype" % length) 1583 else: 1584 raise TypeError("Illegal length %d for complex dtype" % length) 1585 1586 tid = H5Tcreate(H5T_COMPOUND, size) 1587 H5Tinsert(tid, cfg._r_name, off_r, tid_sub) 1588 H5Tinsert(tid, cfg._i_name, off_i, tid_sub) 1589 1590 return TypeCompoundID(tid) 1591 1592cdef TypeCompoundID _c_compound(cnp.dtype dt, int logical, int aligned): 1593 # Compound datatypes 1594 cdef: 1595 hid_t tid 1596 TypeID member_type 1597 object member_dt 1598 size_t member_offset = 0 1599 dict fields = {} 1600 1601 # The challenge with correctly converting a numpy/h5py dtype to a HDF5 type 1602 # which is composed of subtypes has three aspects we must consider 1603 # 1. numpy/h5py dtypes do not always have the same size as HDF5, even when 1604 # equivalent (can result in overlapping elements if not careful) 1605 # 2. For correct round-tripping of aligned dtypes, we need to consider how 1606 # much padding we need by looking at the field offsets 1607 # 3. There is no requirement that the offsets be monotonically increasing 1608 # 1609 # The code below tries to cover these aspects 1610 1611 # Build list of names, offsets, and types, sorted by increasing offset 1612 # (i.e. the position of the member in the struct) 1613 for name in sorted(dt.names, key=(lambda n: dt.fields[n][1])): 1614 field = dt.fields[name] 1615 h5_name = name.encode('utf8') if isinstance(name, unicode) else name 1616 1617 # Get HDF5 data types and set the offset for each member 1618 member_dt = field[0] 1619 member_offset = max(member_offset, field[1]) 1620 member_type = py_create(member_dt, logical=logical, aligned=aligned) 1621 if aligned and (member_offset > field[1] 1622 or member_dt.itemsize != member_type.get_size()): 1623 raise TypeError("Enforced alignment not compatible with HDF5 type") 1624 fields[name] = (h5_name, member_offset, member_type) 1625 1626 # Update member offset based on the HDF5 type size 1627 member_offset += member_type.get_size() 1628 1629 member_offset = max(member_offset, dt.itemsize) 1630 if aligned and member_offset > dt.itemsize: 1631 raise TypeError("Enforced alignment not compatible with HDF5 type") 1632 1633 # Create compound with the necessary size, and insert its members 1634 tid = H5Tcreate(H5T_COMPOUND, member_offset) 1635 for name in dt.names: 1636 h5_name, member_offset, member_type = fields[name] 1637 H5Tinsert(tid, h5_name, member_offset, member_type.id) 1638 1639 return TypeCompoundID(tid) 1640 1641cdef TypeStringID _c_vlen_str(): 1642 # Variable-length strings 1643 cdef hid_t tid 1644 tid = H5Tcopy(H5T_C_S1) 1645 H5Tset_size(tid, H5T_VARIABLE) 1646 return TypeStringID(tid) 1647 1648cdef TypeStringID _c_vlen_unicode(): 1649 cdef hid_t tid 1650 tid = H5Tcopy(H5T_C_S1) 1651 H5Tset_size(tid, H5T_VARIABLE) 1652 H5Tset_cset(tid, H5T_CSET_UTF8) 1653 return TypeStringID(tid) 1654 1655cdef TypeReferenceID _c_ref(object refclass): 1656 if refclass is Reference: 1657 return STD_REF_OBJ 1658 elif refclass is RegionReference: 1659 return STD_REF_DSETREG 1660 raise TypeError("Unrecognized reference code") 1661 1662 1663cpdef TypeID py_create(object dtype_in, bint logical=0, bint aligned=0): 1664 """(OBJECT dtype_in, BOOL logical=False) => TypeID 1665 1666 Given a Numpy dtype object, generate a byte-for-byte memory-compatible 1667 HDF5 datatype object. The result is guaranteed to be transient and 1668 unlocked. 1669 1670 :param dtype_in: may be a dtype object, or anything which can be 1671 converted to a dtype, including strings like '<i4' or an "int". 1672 :param logical: when this flag is set, instead of returning a byte-for-byte 1673 identical representation of the type, the function returns the closest 1674 logically appropriate HDF5 type. For example, in the case of a "hinted" 1675 dtype of kind "O" representing a string, it would return an HDF5 variable- 1676 length string type. 1677 """ 1678 cdef: 1679 cnp.dtype dt 1680 char kind 1681 1682 dt = np.dtype(dtype_in) 1683 # dt is now the C side of dtype_in. Sometimes the Python behaviour is easier to handle than the C-version 1684 kind = dt.kind 1685 aligned = getattr(dtype_in, "isalignedstruct", aligned) 1686 1687 with phil: 1688 # Tagged opaque data 1689 if check_opaque_dtype(dt): 1690 return _c_opaque_tagged(dt) 1691 1692 # Float 1693 if kind == c'f': 1694 return _c_float(dt) 1695 1696 # Integer 1697 elif kind == c'u' or kind == c'i': 1698 1699 if logical: 1700 # Check for an enumeration hint 1701 enum_vals = check_enum_dtype(dt) 1702 if enum_vals is not None: 1703 return _c_enum(dt, enum_vals) 1704 1705 return _c_int(dt) 1706 1707 # Complex 1708 elif kind == c'c': 1709 return _c_complex(dt) 1710 1711 # Compound. The explicit cast to object is used to force Python attribute access, 1712 # as dt.names may be a NULL pointer at the C level when undefined. 1713 elif (kind == c'V') and ((<object> dt).names is not None): 1714 return _c_compound(dt, logical, aligned) 1715 1716 # Array or opaque 1717 elif kind == c'V': 1718 if dt.subdtype is not None: 1719 return _c_array(dt, logical) 1720 else: 1721 return _c_opaque(dt) 1722 1723 # String 1724 elif kind == c'S': 1725 return _c_string(dt) 1726 1727 # Boolean 1728 elif kind == c'b': 1729 return _c_bool(dt) 1730 1731 # Object types (including those with vlen hints) 1732 elif kind == c'O': 1733 1734 if logical: 1735 vlen = check_vlen_dtype(dt) 1736 if vlen is bytes: 1737 return _c_vlen_str() 1738 elif vlen is unicode: 1739 return _c_vlen_unicode() 1740 elif vlen is not None: 1741 return vlen_create(py_create(vlen, logical)) 1742 1743 refclass = check_ref_dtype(dt) 1744 if refclass is not None: 1745 return _c_ref(refclass) 1746 1747 raise TypeError("Object dtype %r has no native HDF5 equivalent" % (dt,)) 1748 1749 return PYTHON_OBJECT 1750 1751 # Unrecognized 1752 else: 1753 raise TypeError("No conversion path for dtype: %s" % repr(dt)) 1754 1755def vlen_dtype(basetype): 1756 """Make a numpy dtype for an HDF5 variable-length datatype 1757 1758 For variable-length string dtypes, use :func:`string_dtype` instead. 1759 """ 1760 return np.dtype('O', metadata={'vlen': basetype}) 1761 1762def string_dtype(encoding='utf-8', length=None): 1763 """Make a numpy dtype for HDF5 strings 1764 1765 encoding may be 'utf-8' or 'ascii'. 1766 1767 length may be an integer for a fixed length string dtype, or None for 1768 variable length strings. String lengths for HDF5 are counted in bytes, 1769 not unicode code points. 1770 1771 For variable length strings, the data should be passed as Python str objects 1772 (unicode in Python 2) if the encoding is 'utf-8', and bytes if it is 'ascii'. 1773 For fixed length strings, the data should be numpy fixed length *bytes* 1774 arrays, regardless of the encoding. Fixed length unicode data is not 1775 supported. 1776 """ 1777 # Normalise encoding name: 1778 try: 1779 encoding = codecs.lookup(encoding).name 1780 except LookupError: 1781 pass # Use our error below 1782 1783 if encoding not in {'ascii', 'utf-8'}: 1784 raise ValueError("Invalid encoding (%r); 'utf-8' or 'ascii' allowed" 1785 % encoding) 1786 1787 if isinstance(length, int): 1788 # Fixed length string 1789 return np.dtype("|S" + str(length), metadata={'h5py_encoding': encoding}) 1790 elif length is None: 1791 vlen = unicode if (encoding == 'utf-8') else bytes 1792 return np.dtype('O', metadata={'vlen': vlen}) 1793 else: 1794 raise TypeError("length must be integer or None (got %r)" % length) 1795 1796def enum_dtype(values_dict, basetype=np.uint8): 1797 """Create a NumPy representation of an HDF5 enumerated type 1798 1799 *values_dict* maps string names to integer values. *basetype* is an 1800 appropriate integer base dtype large enough to hold the possible options. 1801 """ 1802 dt = np.dtype(basetype) 1803 if not np.issubdtype(dt, np.integer): 1804 raise TypeError("Only integer types can be used as enums") 1805 1806 return np.dtype(dt, metadata={'enum': values_dict}) 1807 1808 1809def opaque_dtype(np_dtype): 1810 """Return an equivalent dtype tagged to be stored in an HDF5 opaque type. 1811 1812 This makes it easy to store numpy data like datetimes for which there is 1813 no equivalent HDF5 type, but it's not interoperable: other tools won't treat 1814 the opaque data as datetimes. 1815 """ 1816 dt = np.dtype(np_dtype) 1817 if np.issubdtype(dt, np.object_): 1818 raise TypeError("Cannot store numpy object arrays as opaque data") 1819 if dt.names is not None: 1820 raise TypeError("Cannot store numpy structured arrays as opaque data") 1821 if dt.subdtype is not None: 1822 raise TypeError("Cannot store numpy sub-array dtype as opaque data") 1823 if dt.itemsize == 0: 1824 raise TypeError("dtype for opaque data must have explicit size") 1825 1826 return np.dtype(dt, metadata={'h5py_opaque': True}) 1827 1828 1829ref_dtype = np.dtype('O', metadata={'ref': Reference}) 1830regionref_dtype = np.dtype('O', metadata={'ref': RegionReference}) 1831 1832 1833@with_phil 1834def special_dtype(**kwds): 1835 """ Create a new h5py "special" type. Only one keyword may be given. 1836 1837 Legal keywords are: 1838 1839 vlen = basetype 1840 Base type for HDF5 variable-length datatype. This can be Python 1841 str type or instance of np.dtype. 1842 Example: special_dtype( vlen=str ) 1843 1844 enum = (basetype, values_dict) 1845 Create a NumPy representation of an HDF5 enumerated type. Provide 1846 a 2-tuple containing an (integer) base dtype and a dict mapping 1847 string names to integer values. 1848 1849 ref = Reference | RegionReference 1850 Create a NumPy representation of an HDF5 object or region reference 1851 type. 1852 """ 1853 1854 if len(kwds) != 1: 1855 raise TypeError("Exactly one keyword may be provided") 1856 1857 name, val = kwds.popitem() 1858 1859 if name == 'vlen': 1860 return np.dtype('O', metadata={'vlen': val}) 1861 1862 if name == 'enum': 1863 try: 1864 dt, enum_vals = val 1865 except TypeError: 1866 raise TypeError("Enums must be created from a 2-tuple (basetype, values_dict)") 1867 return enum_dtype(enum_vals, dt) 1868 1869 if name == 'ref': 1870 if val not in (Reference, RegionReference): 1871 raise ValueError("Ref class must be Reference or RegionReference") 1872 return ref_dtype if (val is Reference) else regionref_dtype 1873 1874 raise TypeError('Unknown special type "%s"' % name) 1875 1876 1877def check_vlen_dtype(dt): 1878 """If the dtype represents an HDF5 vlen, returns the Python base class. 1879 1880 Returns None if the dtype does not represent an HDF5 vlen. 1881 """ 1882 try: 1883 return dt.metadata.get('vlen', None) 1884 except AttributeError: 1885 return None 1886 1887string_info = namedtuple('string_info', ['encoding', 'length']) 1888 1889def check_string_dtype(dt): 1890 """If the dtype represents an HDF5 string, returns a string_info object. 1891 1892 The returned string_info object holds the encoding and the length. 1893 The encoding can only be 'utf-8' or 'ascii'. The length may be None 1894 for a variable-length string, or a fixed length in bytes. 1895 1896 Returns None if the dtype does not represent an HDF5 string. 1897 """ 1898 vlen_kind = check_vlen_dtype(dt) 1899 if vlen_kind is unicode: 1900 return string_info('utf-8', None) 1901 elif vlen_kind is bytes: 1902 return string_info('ascii', None) 1903 elif dt.kind == 'S': 1904 enc = (dt.metadata or {}).get('h5py_encoding', 'ascii') 1905 return string_info(enc, dt.itemsize) 1906 else: 1907 return None 1908 1909def check_enum_dtype(dt): 1910 """If the dtype represents an HDF5 enumerated type, returns the dictionary 1911 mapping string names to integer values. 1912 1913 Returns None if the dtype does not represent an HDF5 enumerated type. 1914 """ 1915 try: 1916 return dt.metadata.get('enum', None) 1917 except AttributeError: 1918 return None 1919 1920def check_opaque_dtype(dt): 1921 """Return True if the dtype given is tagged to be stored as HDF5 opaque data 1922 """ 1923 try: 1924 return dt.metadata.get('h5py_opaque', False) 1925 except AttributeError: 1926 return False 1927 1928def check_ref_dtype(dt): 1929 """If the dtype represents an HDF5 reference type, returns the reference 1930 class (either Reference or RegionReference). 1931 1932 Returns None if the dtype does not represent an HDF5 reference type. 1933 """ 1934 try: 1935 return dt.metadata.get('ref', None) 1936 except AttributeError: 1937 return None 1938 1939@with_phil 1940def check_dtype(**kwds): 1941 """ Check a dtype for h5py special type "hint" information. Only one 1942 keyword may be given. 1943 1944 vlen = dtype 1945 If the dtype represents an HDF5 vlen, returns the Python base class. 1946 Currently only builting string vlens (str) are supported. Returns 1947 None if the dtype does not represent an HDF5 vlen. 1948 1949 enum = dtype 1950 If the dtype represents an HDF5 enumerated type, returns the dictionary 1951 mapping string names to integer values. Returns None if the dtype does 1952 not represent an HDF5 enumerated type. 1953 1954 ref = dtype 1955 If the dtype represents an HDF5 reference type, returns the reference 1956 class (either Reference or RegionReference). Returns None if the dtype 1957 does not represent an HDF5 reference type. 1958 """ 1959 1960 if len(kwds) != 1: 1961 raise TypeError("Exactly one keyword may be provided") 1962 1963 name, dt = kwds.popitem() 1964 1965 if name not in ('vlen', 'enum', 'ref'): 1966 raise TypeError('Unknown special type "%s"' % name) 1967 1968 try: 1969 return dt.metadata[name] 1970 except TypeError: 1971 return None 1972 except KeyError: 1973 return None 1974 1975 1976@with_phil 1977def convert(TypeID src not None, TypeID dst not None, size_t n, 1978 cnp.ndarray buf not None, cnp.ndarray bkg=None, ObjectID dxpl=None): 1979 """ (TypeID src, TypeID dst, UINT n, NDARRAY buf, NDARRAY bkg=None, 1980 PropID dxpl=None) 1981 1982 Convert n contiguous elements of a buffer in-place. The array dtype 1983 is ignored. The backing buffer is optional; for conversion of compound 1984 types, a temporary copy of conversion buffer will used for backing if 1985 one is not supplied. 1986 """ 1987 cdef: 1988 void* bkg_ = NULL 1989 void* buf_ = buf.data 1990 1991 if bkg is None and (src.detect_class(H5T_COMPOUND) or 1992 dst.detect_class(H5T_COMPOUND)): 1993 bkg = buf.copy() 1994 if bkg is not None: 1995 bkg_ = bkg.data 1996 1997 H5Tconvert(src.id, dst.id, n, buf_, bkg_, pdefault(dxpl)) 1998 1999 2000@with_phil 2001def find(TypeID src not None, TypeID dst not None): 2002 """ (TypeID src, TypeID dst) => TUPLE or None 2003 2004 Determine if a conversion path exists from src to dst. Result is None 2005 or a tuple describing the conversion path. Currently tuple entries are: 2006 2007 1. INT need_bkg: Whether this routine requires a backing buffer. 2008 Values are BKG_NO, BKG_TEMP and BKG_YES. 2009 """ 2010 cdef: 2011 H5T_cdata_t *data 2012 H5T_conv_t result = NULL 2013 2014 try: 2015 result = H5Tfind(src.id, dst.id, &data) 2016 if result == NULL: 2017 return None 2018 return (data[0].need_bkg,) 2019 except: 2020 return None 2021