1# cython: profile=False 2# cython: language_level=3 3# This file is part of h5py, a Python interface to the HDF5 library. 4# 5# http://www.h5py.org 6# 7# Copyright 2008-2019 Andrew Collette and contributors 8# 9# License: Standard 3-clause BSD; see "license.txt" for full license terms 10# and contributor agreement. 11 12""" 13 Low-level type-conversion routines. 14""" 15include "config.pxi" 16 17from logging import getLogger 18 19from .h5 import get_config 20from .h5r cimport Reference, RegionReference, hobj_ref_t, hdset_reg_ref_t 21from .h5t cimport H5PY_OBJ, typewrap, py_create, TypeID, H5PY_PYTHON_OPAQUE_TAG 22from libc.stdlib cimport realloc 23from libc.string cimport strcmp 24from .utils cimport emalloc, efree 25cfg = get_config() 26 27# Initialization of numpy 28cimport numpy as cnp 29from numpy cimport npy_intp, NPY_WRITEABLE, NPY_C_CONTIGUOUS, NPY_OWNDATA 30cnp._import_array() 31import numpy as np 32 33from cpython.buffer cimport ( 34 PyObject_GetBuffer, PyBuffer_ToContiguous, PyBuffer_Release, PyBUF_INDIRECT 35) 36from cpython.object cimport PyObject 37from cpython.ref cimport Py_INCREF, Py_XDECREF, Py_XINCREF 38 39logger = getLogger(__name__) 40 41cdef PyObject* Py_None = <PyObject*> None 42 43cdef extern from "numpy/arrayobject.h": 44 void PyArray_ENABLEFLAGS(cnp.ndarray arr, int flags) 45 46 47ctypedef int (*conv_operator_t)(void* ipt, void* opt, void* bkg, void* priv) except -1 48ctypedef herr_t (*init_operator_t)(hid_t src, hid_t dst, void** priv) except -1 49 50# Generic conversion callback 51# 52# The actual conversion routines are one-liners which plug the appropriate 53# operator callback into this function. This prevents us from having to 54# repeat all the conversion boilerplate for every single callback. 55# 56# While this is somewhat slower than a custom function, the added overhead is 57# likely small compared to the cost of the Python-side API calls required to 58# implement the conversions. 59cdef herr_t generic_converter(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 60 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 61 void *bkg_i, hid_t dxpl, conv_operator_t op, 62 init_operator_t initop, H5T_bkg_t need_bkg) except -1: 63 cdef: 64 int command 65 conv_size_t *sizes 66 int i 67 char* buf = <char*>buf_i 68 char* bkg = <char*>bkg_i 69 70 command = cdata[0].command 71 if command == H5T_CONV_INIT: 72 cdata[0].need_bkg = need_bkg 73 return initop(src_id, dst_id, &(cdata[0].priv)) 74 75 elif command == H5T_CONV_FREE: 76 efree(cdata[0].priv) 77 cdata[0].priv = NULL 78 79 elif command == H5T_CONV_CONV: 80 sizes = <conv_size_t*>cdata[0].priv 81 if H5Tis_variable_str(src_id): 82 sizes.cset = H5Tget_cset(src_id) 83 elif H5Tis_variable_str(dst_id): 84 sizes.cset = H5Tget_cset(dst_id) 85 if bkg_stride==0: 86 bkg_stride = sizes[0].dst_size 87 if buf_stride == 0: 88 # No explicit stride seems to mean that the elements are packed 89 # contiguously in the buffer. In this case we must be careful 90 # not to "stomp on" input elements if the output elements are 91 # of a larger size. 92 93 if sizes[0].src_size >= sizes[0].dst_size: 94 for i in range(nl): 95 op( buf + (i*sizes[0].src_size), # input pointer 96 buf + (i*sizes[0].dst_size), # output pointer 97 bkg + (i*bkg_stride), # backing buffer 98 cdata[0].priv) # conversion context 99 else: 100 for i in range(nl-1, -1, -1): 101 op( buf + (i*sizes[0].src_size), 102 buf + (i*sizes[0].dst_size), 103 bkg + (i*bkg_stride), 104 cdata[0].priv) 105 else: 106 # With explicit strides, we assume that the library knows the 107 # alignment better than us. Therefore we use the given stride 108 # offsets exclusively. 109 for i in range(nl): 110 op( buf + (i*buf_stride), 111 buf + (i*buf_stride), # note this is the same! 112 bkg + (i*bkg_stride), 113 cdata[0].priv) 114 else: 115 return -2 # Unrecognized command. Note this is NOT an exception. 116 return 0 117 118# ============================================================================= 119# Helper functions 120 121cdef void log_convert_registered(hid_t src, hid_t dst): 122 logger.debug("Creating converter from %s to %s", H5Tget_class(src), H5Tget_class(dst)) 123 124 125# ============================================================================= 126# Generic conversion 127 128ctypedef struct conv_size_t: 129 size_t src_size 130 size_t dst_size 131 int cset 132 133cdef herr_t init_generic(hid_t src, hid_t dst, void** priv) except -1: 134 135 cdef conv_size_t *sizes 136 sizes = <conv_size_t*>emalloc(sizeof(conv_size_t)) 137 priv[0] = sizes 138 sizes[0].src_size = H5Tget_size(src) 139 sizes[0].dst_size = H5Tget_size(dst) 140 log_convert_registered(src, dst) 141 142 return 0 143 144# ============================================================================= 145# Vlen string conversion 146 147cdef bint _is_pyobject_opaque(hid_t obj): 148 # This complexity is needed to sure: 149 # 1) That ctag is freed 150 # 2) We don't segfault (for some reason a try-finally statement is needed, 151 # even if we do (what I think are) the right steps in copying and freeing. 152 cdef char* ctag = NULL 153 try: 154 if H5Tget_class(obj) == H5T_OPAQUE: 155 ctag = H5Tget_tag(obj) 156 if ctag != NULL: 157 if strcmp(ctag, H5PY_PYTHON_OPAQUE_TAG) == 0: 158 return True 159 return False 160 finally: 161 IF HDF5_VERSION >= (1, 8, 13): 162 H5free_memory(ctag) 163 ELSE: 164 free(ctag) 165 166cdef herr_t init_vlen2str(hid_t src_vlen, hid_t dst_str, void** priv) except -1: 167 # /!\ Untested 168 cdef conv_size_t *sizes 169 170 if not H5Tis_variable_str(src_vlen): 171 return -2 172 173 if not _is_pyobject_opaque(dst_str): 174 return -2 175 176 log_convert_registered(src_vlen, dst_str) 177 178 sizes = <conv_size_t*>emalloc(sizeof(conv_size_t)) 179 priv[0] = sizes 180 181 sizes[0].src_size = H5Tget_size(src_vlen) 182 sizes[0].dst_size = H5Tget_size(dst_str) 183 return 0 184 185cdef herr_t init_str2vlen(hid_t src_str, hid_t dst_vlen, void** priv) except -1: 186 # /!\ untested ! 187 cdef conv_size_t *sizes 188 189 if not H5Tis_variable_str(dst_vlen): 190 return -2 191 192 if not _is_pyobject_opaque(src_str): 193 return -2 194 195 log_convert_registered(src_str, dst_vlen) 196 197 sizes = <conv_size_t*>emalloc(sizeof(conv_size_t)) 198 priv[0] = sizes 199 sizes[0].src_size = H5Tget_size(src_str) 200 sizes[0].dst_size = H5Tget_size(dst_vlen) 201 202 return 0 203 204cdef int conv_vlen2str(void* ipt, void* opt, void* bkg, void* priv) except -1: 205 cdef: 206 PyObject** buf_obj = <PyObject**>opt 207 char** buf_cstring = <char**>ipt 208 PyObject* tmp_object 209 bytes tmp_bytes 210 conv_size_t *sizes = <conv_size_t*>priv 211 char* buf_cstring0 212 213 buf_cstring0 = buf_cstring[0] 214 215 if buf_cstring0 == NULL: 216 tmp_bytes = b"" 217 else: 218 tmp_bytes = buf_cstring0 # Let cython converts char* -> bytes for us 219 tmp_object = <PyObject *>tmp_bytes 220 221 # Since all data conversions are by definition in-place, it 222 # is our responsibility to free the memory used by the vlens. 223 efree(buf_cstring0) 224 225 # Write the new unicode object to the buffer in-place and ensure it is not destroyed 226 buf_obj[0] = tmp_object 227 Py_XINCREF(tmp_object) 228 return 0 229 230cdef int conv_str2vlen(void* ipt, void* opt, void* bkg, void* priv) except -1: 231 cdef: 232 PyObject** buf_obj = <PyObject**>ipt 233 char** buf_cstring = <char**>opt 234 conv_size_t* sizes = <conv_size_t*>priv 235 char* temp_string = NULL 236 size_t temp_string_len = 0 # Not including null term 237 PyObject* buf_obj0 238 char* buf_cstring0 239 object temp_object 240 241 buf_obj0 = buf_obj[0] 242 temp_object = <object> buf_obj0 243 244 if isinstance(temp_object, unicode): 245 enc = 'utf-8' if (sizes[0].cset == H5T_CSET_UTF8) else 'ascii' 246 temp_object = temp_object.encode(enc) 247 248 elif not isinstance(temp_object, bytes): 249 raise TypeError("Can't implicitly convert non-string objects to strings") 250 251 # temp_object is bytes 252 temp_string = temp_object # cython cast it as char * 253 temp_string_len = len(temp_object) 254 255 if strlen(temp_string) != temp_string_len: 256 raise ValueError("VLEN strings do not support embedded NULLs") 257 buf_cstring0 = <char*>emalloc(temp_string_len+1) 258 memcpy(buf_cstring0, temp_string, temp_string_len+1) 259 buf_cstring[0] = buf_cstring0 260 261 return 0 262 263# ============================================================================= 264# VLEN to fixed-width strings 265 266cdef herr_t init_vlen2fixed(hid_t src, hid_t dst, void** priv) except -1: 267 cdef conv_size_t *sizes 268 269 # /!\ Untested 270 271 if not (H5Tis_variable_str(src) and (not H5Tis_variable_str(dst))): 272 return -2 273 log_convert_registered(src, dst) 274 275 sizes = <conv_size_t*>emalloc(sizeof(conv_size_t)) 276 priv[0] = sizes 277 278 sizes[0].src_size = H5Tget_size(src) 279 sizes[0].dst_size = H5Tget_size(dst) 280 return 0 281 282cdef herr_t init_fixed2vlen(hid_t src, hid_t dst, void** priv) except -1: 283 284 cdef conv_size_t *sizes 285 if not (H5Tis_variable_str(dst) and (not H5Tis_variable_str(src))): 286 return -2 287 log_convert_registered(src, dst) 288 289 # /!\ untested ! 290 291 sizes = <conv_size_t*>emalloc(sizeof(conv_size_t)) 292 priv[0] = sizes 293 sizes[0].src_size = H5Tget_size(src) 294 sizes[0].dst_size = H5Tget_size(dst) 295 296 return 0 297 298cdef int conv_vlen2fixed(void* ipt, void* opt, void* bkg, void* priv) except -1: 299 cdef: 300 char** buf_vlen = <char**>ipt 301 char* buf_fixed = <char*>opt 302 char* temp_string = NULL 303 size_t temp_string_len = 0 # Without null term 304 conv_size_t *sizes = <conv_size_t*>priv 305 char* buf_vlen0 306 307 # /!\ untested ! 308 309 buf_vlen0 = buf_vlen[0] 310 311 if buf_vlen0 != NULL: 312 temp_string = buf_vlen0 313 temp_string_len = strlen(temp_string) 314 315 if temp_string_len <= sizes[0].dst_size: 316 # Pad with zeros 317 memcpy(buf_fixed, temp_string, temp_string_len) 318 memset(buf_fixed + temp_string_len, c'\0', sizes[0].dst_size - temp_string_len) 319 else: 320 # Simply truncate the string 321 memcpy(buf_fixed, temp_string, sizes[0].dst_size) 322 else: 323 memset(buf_fixed, c'\0', sizes[0].dst_size) 324 325 return 0 326 327cdef int conv_fixed2vlen(void* ipt, void* opt, void* bkg, void* priv) except -1: 328 cdef: 329 char** buf_vlen = <char**>opt 330 char* buf_fixed = <char*>ipt 331 char* temp_string = NULL 332 conv_size_t *sizes = <conv_size_t*>priv 333 334 # /!\ untested ! 335 336 temp_string = <char*>emalloc(sizes[0].src_size+1) 337 memcpy(temp_string, buf_fixed, sizes[0].src_size) 338 temp_string[sizes[0].src_size] = c'\0' 339 340 memcpy(buf_vlen, &temp_string, sizeof(temp_string)) 341 342 return 0 343 344# ============================================================================= 345# HDF5 references to Python instances of h5r.Reference 346 347cdef inline int conv_objref2pyref(void* ipt, void* opt, void* bkg, void* priv) except -1: 348 cdef: 349 PyObject** buf_obj = <PyObject**>opt 350 hobj_ref_t* buf_ref = <hobj_ref_t*>ipt 351 Reference ref 352 PyObject* ref_ptr = NULL 353 354 ref = Reference() 355 ref.ref.obj_ref = buf_ref[0] 356 ref.typecode = H5R_OBJECT 357 358 ref_ptr = <PyObject*>ref 359 Py_INCREF(ref) # prevent ref from garbage collection 360 buf_obj[0] = ref_ptr 361 362 return 0 363 364cdef inline int conv_pyref2objref(void* ipt, void* opt, void* bkg, void* priv) except -1: 365 cdef: 366 PyObject** buf_obj = <PyObject**>ipt 367 hobj_ref_t* buf_ref = <hobj_ref_t*>opt 368 object obj 369 Reference ref 370 PyObject* buf_obj0 371 372 buf_obj0 = buf_obj[0] 373 374 if buf_obj0 != NULL and buf_obj0 != Py_None: 375 obj = <object>(buf_obj0) 376 if not isinstance(obj, Reference): 377 raise TypeError("Can't convert incompatible object to HDF5 object reference") 378 ref = <Reference>(buf_obj0) 379 buf_ref[0] = ref.ref.obj_ref 380 else: 381 memset(buf_ref, c'\0', sizeof(hobj_ref_t)) 382 383 return 0 384 385cdef inline int conv_regref2pyref(void* ipt, void* opt, void* bkg, void* priv) except -1: 386 cdef: 387 PyObject** buf_obj = <PyObject**>opt 388 PyObject** bkg_obj = <PyObject**>bkg 389 hdset_reg_ref_t* buf_ref = <hdset_reg_ref_t*>ipt 390 RegionReference ref 391 PyObject* ref_ptr = NULL 392 PyObject* bkg_obj0 393 394 bkg_obj0 = bkg_obj[0] 395 ref = RegionReference() 396 ref.ref.reg_ref = buf_ref[0] 397 ref.typecode = H5R_DATASET_REGION 398 ref_ptr = <PyObject*>ref 399 Py_INCREF(ref) # because Cython discards its reference when the 400 # function exits 401 402 Py_XDECREF(bkg_obj0) 403 buf_obj[0] = ref_ptr 404 405 return 0 406 407cdef inline int conv_pyref2regref(void* ipt, void* opt, void* bkg, void* priv) except -1: 408 cdef: 409 PyObject** buf_obj = <PyObject**>ipt 410 hdset_reg_ref_t* buf_ref = <hdset_reg_ref_t*>opt 411 object obj 412 RegionReference ref 413 PyObject* buf_obj0 414 415 buf_obj0 = buf_obj[0] 416 417 if buf_obj0 != NULL and buf_obj0 != Py_None: 418 obj = <object>(buf_obj0) 419 if not isinstance(obj, RegionReference): 420 raise TypeError("Can't convert incompatible object to HDF5 region reference") 421 ref = <RegionReference>(buf_obj0) 422 IF HDF5_VERSION >= (1, 12, 0): 423 memcpy(buf_ref, ref.ref.reg_ref.data, sizeof(hdset_reg_ref_t)) 424 ELSE: 425 memcpy(buf_ref, ref.ref.reg_ref, sizeof(hdset_reg_ref_t)) 426 else: 427 memset(buf_ref, c'\0', sizeof(hdset_reg_ref_t)) 428 429 return 0 430 431# ============================================================================= 432# Conversion functions 433 434 435cdef inline herr_t vlen2str(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 436 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 437 void *bkg_i, hid_t dxpl) except -1 with gil: 438 return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 439 buf_i, bkg_i, dxpl, conv_vlen2str, init_vlen2str, H5T_BKG_YES) 440 441cdef inline herr_t str2vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 442 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 443 void *bkg_i, hid_t dxpl)except -1 with gil: 444 return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 445 buf_i, bkg_i, dxpl, conv_str2vlen, init_str2vlen, H5T_BKG_NO) 446 447cdef inline herr_t vlen2fixed(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 448 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 449 void *bkg_i, hid_t dxpl) except -1 with gil: 450 return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 451 buf_i, bkg_i, dxpl, conv_vlen2fixed, init_vlen2fixed, H5T_BKG_NO) 452 453cdef inline herr_t fixed2vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 454 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 455 void *bkg_i, hid_t dxpl) except -1 with gil: 456 return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 457 buf_i, bkg_i, dxpl, conv_fixed2vlen, init_fixed2vlen, H5T_BKG_NO) 458 459cdef inline herr_t objref2pyref(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 460 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 461 void *bkg_i, hid_t dxpl) except -1 with gil: 462 return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 463 buf_i, bkg_i, dxpl, conv_objref2pyref, init_generic, H5T_BKG_NO) 464 465cdef inline herr_t pyref2objref(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 466 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 467 void *bkg_i, hid_t dxpl) except -1 with gil: 468 return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 469 buf_i, bkg_i, dxpl, conv_pyref2objref, init_generic, H5T_BKG_NO) 470 471cdef inline herr_t regref2pyref(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 472 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 473 void *bkg_i, hid_t dxpl) except -1 with gil: 474 return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 475 buf_i, bkg_i, dxpl, conv_regref2pyref, init_generic, H5T_BKG_YES) 476 477cdef inline herr_t pyref2regref(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 478 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 479 void *bkg_i, hid_t dxpl) except -1 with gil: 480 return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 481 buf_i, bkg_i, dxpl, conv_pyref2regref, init_generic, H5T_BKG_NO) 482 483# ============================================================================= 484# Enum to integer converter 485 486cdef struct conv_enum_t: 487 size_t src_size 488 size_t dst_size 489 490cdef int enum_int_converter_init(hid_t src, hid_t dst, 491 H5T_cdata_t *cdata, int forward) except -1: 492 cdef conv_enum_t *info 493 494 cdata[0].need_bkg = H5T_BKG_NO 495 cdata[0].priv = info = <conv_enum_t*>emalloc(sizeof(conv_enum_t)) 496 info[0].src_size = H5Tget_size(src) 497 info[0].dst_size = H5Tget_size(dst) 498 499cdef void enum_int_converter_free(H5T_cdata_t *cdata): 500 cdef conv_enum_t *info 501 502 info = <conv_enum_t*>cdata[0].priv 503 efree(info) 504 cdata[0].priv = NULL 505 506 507cdef int enum_int_converter_conv(hid_t src, hid_t dst, H5T_cdata_t *cdata, 508 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 509 void *bkg_i, hid_t dxpl, int forward) except -1: 510 cdef: 511 conv_enum_t *info 512 size_t nalloc 513 int i 514 char* cbuf = NULL 515 char* buf = <char*>buf_i 516 int identical 517 hid_t supertype = -1 518 519 info = <conv_enum_t*>cdata[0].priv 520 521 try: 522 if forward: 523 supertype = H5Tget_super(src) 524 identical = H5Tequal(supertype, dst) 525 else: 526 supertype = H5Tget_super(dst) 527 identical = H5Tequal(supertype, src) 528 529 # Short-circuit success 530 if identical: 531 return 0 532 533 if buf_stride == 0: 534 # Contiguous case: call H5Tconvert directly 535 if forward: 536 H5Tconvert(supertype, dst, nl, buf, NULL, dxpl) 537 else: 538 H5Tconvert(src, supertype, nl, buf, NULL, dxpl) 539 else: 540 # Non-contiguous: gather, convert and then scatter 541 if info[0].src_size > info[0].dst_size: 542 nalloc = info[0].src_size*nl 543 else: 544 nalloc = info[0].dst_size*nl 545 546 cbuf = <char*>emalloc(nalloc) 547 if cbuf == NULL: 548 raise MemoryError() 549 550 for i in range(nl): 551 memcpy(cbuf + (i*info[0].src_size), buf + (i*buf_stride), 552 info[0].src_size) 553 554 if forward: 555 H5Tconvert(supertype, dst, nl, cbuf, NULL, dxpl) 556 else: 557 H5Tconvert(src, supertype, nl, cbuf, NULL, dxpl) 558 559 for i in range(nl): 560 memcpy(buf + (i*buf_stride), cbuf + (i*info[0].dst_size), 561 info[0].dst_size) 562 563 finally: 564 efree(cbuf) 565 cbuf = NULL 566 if supertype > 0: 567 H5Tclose(supertype) 568 569 return 0 570 571 572# Direction ("forward"): 1 = enum to int, 0 = int to enum 573cdef herr_t enum_int_converter(hid_t src, hid_t dst, H5T_cdata_t *cdata, 574 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 575 void *bkg_i, hid_t dxpl, int forward) except -1: 576 577 cdef int command = cdata[0].command 578 579 if command == H5T_CONV_INIT: 580 enum_int_converter_init(src, dst, cdata, forward) 581 elif command == H5T_CONV_FREE: 582 enum_int_converter_free(cdata) 583 elif command == H5T_CONV_CONV: 584 return enum_int_converter_conv(src, dst, cdata, nl, buf_stride, 585 bkg_stride, buf_i, bkg_i, dxpl, forward) 586 else: 587 return -2 588 589 return 0 590 591 592cdef herr_t enum2int(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 593 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 594 void *bkg_i, hid_t dxpl) except -1 with gil: 595 return enum_int_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 596 buf_i, bkg_i, dxpl, 1) 597 598cdef herr_t int2enum(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 599 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 600 void *bkg_i, hid_t dxpl) except -1 with gil: 601 return enum_int_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride, 602 buf_i, bkg_i, dxpl, 0) 603 604# ============================================================================= 605# ndarray to VLEN routines 606 607cdef herr_t vlen2ndarray(hid_t src_id, 608 hid_t dst_id, 609 H5T_cdata_t *cdata, 610 size_t nl, 611 size_t buf_stride, 612 size_t bkg_stride, 613 void *buf_i, 614 void *bkg_i, 615 hid_t dxpl) except -1 with gil: 616 """Convert variable length object to numpy array, typically a list of strings 617 618 :param src_id: Identifier for the source datatype. 619 :param dst_id: Identifier for the destination datatype. 620 :param nl: number of element 621 :param buf_stride: Array containing pre- and post-conversion values. 622 :param bkg_stride: Optional background buffer 623 :param dxpl: Dataset transfer property list identifier. 624 :return: error-code 625 """ 626 cdef: 627 int command = cdata[0].command 628 size_t src_size, dst_size 629 TypeID supertype 630 TypeID outtype 631 cnp.dtype dt 632 int i 633 char* buf = <char*>buf_i 634 635 if command == H5T_CONV_INIT: 636 cdata[0].need_bkg = H5T_BKG_NO 637 if H5Tget_class(src_id) != H5T_VLEN or H5Tget_class(dst_id) != H5T_OPAQUE: 638 return -2 639 640 elif command == H5T_CONV_FREE: 641 pass 642 643 elif command == H5T_CONV_CONV: 644 # need to pass element dtype to converter 645 supertype = typewrap(H5Tget_super(src_id)) 646 dt = supertype.dtype 647 outtype = py_create(dt) 648 649 if buf_stride == 0: 650 # No explicit stride seems to mean that the elements are packed 651 # contiguously in the buffer. In this case we must be careful 652 # not to "stomp on" input elements if the output elements are 653 # of a larger size. 654 655 src_size = H5Tget_size(src_id) 656 dst_size = H5Tget_size(dst_id) 657 658 if src_size >= dst_size: 659 for i in range(nl): 660 conv_vlen2ndarray(buf + (i*src_size), buf + (i*dst_size), 661 dt, supertype, outtype) 662 else: 663 for i in range(nl-1, -1, -1): 664 conv_vlen2ndarray(buf + (i*src_size), buf + (i*dst_size), 665 dt, supertype, outtype) 666 else: 667 # With explicit strides, we assume that the library knows the 668 # alignment better than us. Therefore we use the given stride 669 # offsets exclusively. 670 for i in range(nl): 671 conv_vlen2ndarray(buf + (i*buf_stride), buf + (i*buf_stride), 672 dt, supertype, outtype) 673 674 else: 675 return -2 # Unrecognized command. Note this is NOT an exception. 676 677 return 0 678 679 680cdef struct vlen_t: 681 size_t len 682 void* ptr 683 684cdef int conv_vlen2ndarray(void* ipt, 685 void* opt, 686 cnp.dtype elem_dtype, 687 TypeID intype, 688 TypeID outtype) except -1: 689 """Convert variable length strings to numpy array 690 691 :param ipt: input pointer: Point to the input data 692 :param opt: output pointer: will contains the numpy array after exit 693 :param elem_dtype: dtype of the element 694 :param intype: ? 695 :param outtype: ? 696 """ 697 cdef: 698 PyObject** buf_obj = <PyObject**>opt 699 vlen_t* in_vlen = <vlen_t*>ipt 700 int flags = NPY_WRITEABLE | NPY_C_CONTIGUOUS | NPY_OWNDATA 701 npy_intp dims[1] 702 void* data 703 cdef char[:] buf 704 cnp.ndarray ndarray 705 PyObject* ndarray_obj 706 vlen_t in_vlen0 707 size_t size, itemsize 708 709 #Replaces the memcpy 710 size = in_vlen0.len = in_vlen[0].len 711 data = in_vlen0.ptr = in_vlen[0].ptr 712 713 dims[0] = size 714 itemsize = H5Tget_size(outtype.id) 715 if itemsize > H5Tget_size(intype.id): 716 data = realloc(data, itemsize * size) 717 H5Tconvert(intype.id, outtype.id, size, data, NULL, H5P_DEFAULT) 718 719 if elem_dtype.kind in b"biufcmMO": 720 # type_num is enough to create an array for these dtypes 721 ndarray = cnp.PyArray_SimpleNewFromData(1, dims, elem_dtype.type_num, data) 722 else: 723 # dtypes like string & void need a size specified, so can't be used with 724 # SimpleNewFromData. Cython doesn't expose NumPy C-API functions 725 # like NewFromDescr, so we'll construct this with a Python function. 726 buf = <char[:itemsize * size]> data 727 ndarray = np.frombuffer(buf, dtype=elem_dtype) 728 729 PyArray_ENABLEFLAGS(ndarray, flags) 730 ndarray_obj = <PyObject*>ndarray 731 732 in_vlen0.ptr = NULL 733 734 # Write the new ndarray object to the buffer in-place and ensure it is not destroyed 735 buf_obj[0] = ndarray_obj 736 Py_INCREF(ndarray) 737 Py_INCREF(elem_dtype) 738 return 0 739 740cdef herr_t ndarray2vlen(hid_t src_id, 741 hid_t dst_id, 742 H5T_cdata_t *cdata, 743 size_t nl, 744 size_t buf_stride, 745 size_t bkg_stride, 746 void *buf_i, 747 void *bkg_i, 748 hid_t dxpl) except -1 with gil: 749 cdef: 750 int command = cdata[0].command 751 size_t src_size, dst_size 752 TypeID supertype 753 TypeID outtype 754 int i 755 PyObject **pdata = <PyObject **> buf_i 756 PyObject *pdata_elem 757 char* buf = <char*>buf_i 758 759 if command == H5T_CONV_INIT: 760 cdata[0].need_bkg = H5T_BKG_NO 761 if not H5Tequal(src_id, H5PY_OBJ) or H5Tget_class(dst_id) != H5T_VLEN: 762 return -2 763 supertype = typewrap(H5Tget_super(dst_id)) 764 for i in range(nl): 765 # smells a lot 766 memcpy(&pdata_elem, pdata+i, sizeof(pdata_elem)) 767 if supertype != py_create((<cnp.ndarray> pdata_elem).dtype, 1): 768 return -2 769 if (<cnp.ndarray> pdata_elem).ndim != 1: 770 return -2 771 log_convert_registered(src_id, dst_id) 772 773 elif command == H5T_CONV_FREE: 774 pass 775 776 elif command == H5T_CONV_CONV: 777 # If there are no elements to convert, pdata will not point to 778 # a valid PyObject*, so bail here to prevent accessing the dtype below 779 if nl == 0: 780 return 0 781 782 # need to pass element dtype to converter 783 pdata_elem = pdata[0] 784 supertype = py_create((<cnp.ndarray> pdata_elem).dtype) 785 outtype = typewrap(H5Tget_super(dst_id)) 786 787 if buf_stride == 0: 788 # No explicit stride seems to mean that the elements are packed 789 # contiguously in the buffer. In this case we must be careful 790 # not to "stomp on" input elements if the output elements are 791 # of a larger size. 792 793 src_size = H5Tget_size(src_id) 794 dst_size = H5Tget_size(dst_id) 795 796 if src_size >= dst_size: 797 for i in range(nl): 798 conv_ndarray2vlen(buf + (i*src_size), buf + (i*dst_size), 799 supertype, outtype) 800 else: 801 for i in range(nl-1, -1, -1): 802 conv_ndarray2vlen(buf + (i*src_size), buf + (i*dst_size), 803 supertype, outtype) 804 else: 805 # With explicit strides, we assume that the library knows the 806 # alignment better than us. Therefore we use the given stride 807 # offsets exclusively. 808 for i in range(nl): 809 conv_ndarray2vlen(buf + (i*buf_stride), buf + (i*buf_stride), 810 supertype, outtype) 811 812 else: 813 return -2 # Unrecognized command. Note this is NOT an exception. 814 815 return 0 816 817 818cdef int conv_ndarray2vlen(void* ipt, 819 void* opt, 820 TypeID intype, 821 TypeID outtype) except -1: 822 cdef: 823 PyObject** buf_obj = <PyObject**>ipt 824 vlen_t* in_vlen = <vlen_t*>opt 825 void* data 826 cnp.ndarray ndarray 827 size_t len, nbytes 828 PyObject* buf_obj0 829 Py_buffer view 830 831 buf_obj0 = buf_obj[0] 832 ndarray = <cnp.ndarray> buf_obj0 833 len = ndarray.shape[0] 834 nbytes = len * max(H5Tget_size(outtype.id), H5Tget_size(intype.id)) 835 836 data = emalloc(nbytes) 837 838 PyObject_GetBuffer(ndarray, &view, PyBUF_INDIRECT) 839 PyBuffer_ToContiguous(data, &view, view.len, b'C') 840 PyBuffer_Release(&view) 841 842 H5Tconvert(intype.id, outtype.id, len, data, NULL, H5P_DEFAULT) 843 844 in_vlen[0].len = len 845 in_vlen[0].ptr = data 846 847 return 0 848 849# ============================================================================= 850# B8 to enum bool routines 851 852cdef herr_t b82boolenum(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 853 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 854 void *bkg_i, hid_t dxpl) except -1: 855 return 0 856 857cdef herr_t boolenum2b8(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 858 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 859 void *bkg_i, hid_t dxpl) except -1: 860 return 0 861 862# ============================================================================= 863# BITFIELD to UINT routines 864 865cdef herr_t bitfield2uint(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 866 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 867 void *bkg_i, hid_t dxpl) except -1: 868 return 0 869 870cdef herr_t uint2bitfield(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, 871 size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i, 872 void *bkg_i, hid_t dxpl) except -1: 873 return 0 874 875# ============================================================================= 876 877cpdef int register_converters() except -1: 878 cdef: 879 hid_t vlstring 880 hid_t vlentype 881 hid_t pyobj 882 hid_t enum 883 hid_t boolenum = -1 884 int8_t f_value = 0 885 int8_t t_value = 1 886 887 vlstring = H5Tcopy(H5T_C_S1) 888 H5Tset_size(vlstring, H5T_VARIABLE) 889 890 enum = H5Tenum_create(H5T_STD_I32LE) 891 892 vlentype = H5Tvlen_create(H5T_STD_I32LE) 893 894 pyobj = H5PY_OBJ 895 896 boolenum = H5Tenum_create(H5T_NATIVE_INT8) 897 H5Tenum_insert(boolenum, cfg._f_name, &f_value) 898 H5Tenum_insert(boolenum, cfg._t_name, &t_value) 899 900 H5Tregister(H5T_PERS_SOFT, "vlen2fixed", vlstring, H5T_C_S1, vlen2fixed) 901 H5Tregister(H5T_PERS_SOFT, "fixed2vlen", H5T_C_S1, vlstring, fixed2vlen) 902 903 H5Tregister(H5T_PERS_HARD, "objref2pyref", H5T_STD_REF_OBJ, pyobj, objref2pyref) 904 H5Tregister(H5T_PERS_HARD, "pyref2objref", pyobj, H5T_STD_REF_OBJ, pyref2objref) 905 906 H5Tregister(H5T_PERS_HARD, "regref2pyref", H5T_STD_REF_DSETREG, pyobj, regref2pyref) 907 H5Tregister(H5T_PERS_HARD, "pyref2regref", pyobj, H5T_STD_REF_DSETREG, pyref2regref) 908 909 H5Tregister(H5T_PERS_SOFT, "enum2int", enum, H5T_STD_I32LE, enum2int) 910 H5Tregister(H5T_PERS_SOFT, "int2enum", H5T_STD_I32LE, enum, int2enum) 911 912 H5Tregister(H5T_PERS_SOFT, "vlen2ndarray", vlentype, pyobj, vlen2ndarray) 913 H5Tregister(H5T_PERS_SOFT, "ndarray2vlen", pyobj, vlentype, ndarray2vlen) 914 915 H5Tregister(H5T_PERS_HARD, "boolenum2b8", boolenum, H5T_NATIVE_B8, boolenum2b8) 916 H5Tregister(H5T_PERS_HARD, "b82boolenum", H5T_NATIVE_B8, boolenum, b82boolenum) 917 918 H5Tregister(H5T_PERS_HARD, "uint82b8", H5T_STD_U8BE, H5T_STD_B8BE, uint2bitfield) 919 H5Tregister(H5T_PERS_HARD, "b82uint8", H5T_STD_B8BE, H5T_STD_U8BE, bitfield2uint) 920 921 H5Tregister(H5T_PERS_HARD, "uint82b8", H5T_STD_U8LE, H5T_STD_B8LE, uint2bitfield) 922 H5Tregister(H5T_PERS_HARD, "b82uint8", H5T_STD_B8LE, H5T_STD_U8LE, bitfield2uint) 923 924 H5Tregister(H5T_PERS_HARD, "uint162b16", H5T_STD_U16BE, H5T_STD_B16BE, uint2bitfield) 925 H5Tregister(H5T_PERS_HARD, "b162uint16", H5T_STD_B16BE, H5T_STD_U16BE, bitfield2uint) 926 927 H5Tregister(H5T_PERS_HARD, "uint162b16", H5T_STD_U16LE, H5T_STD_B16LE, uint2bitfield) 928 H5Tregister(H5T_PERS_HARD, "b162uint16", H5T_STD_B16LE, H5T_STD_U16LE, bitfield2uint) 929 930 H5Tregister(H5T_PERS_HARD, "uint322b32", H5T_STD_U32BE, H5T_STD_B32BE, uint2bitfield) 931 H5Tregister(H5T_PERS_HARD, "b322uint32", H5T_STD_B32BE, H5T_STD_U32BE, bitfield2uint) 932 933 H5Tregister(H5T_PERS_HARD, "uint322b32", H5T_STD_U32LE, H5T_STD_B32LE, uint2bitfield) 934 H5Tregister(H5T_PERS_HARD, "b322uint32", H5T_STD_B32LE, H5T_STD_U32LE, bitfield2uint) 935 936 H5Tregister(H5T_PERS_HARD, "uint642b64", H5T_STD_U64BE, H5T_STD_B64BE, uint2bitfield) 937 H5Tregister(H5T_PERS_HARD, "b642uint64", H5T_STD_B64BE, H5T_STD_U64BE, bitfield2uint) 938 939 H5Tregister(H5T_PERS_HARD, "uint642b64", H5T_STD_U64LE, H5T_STD_B64LE, uint2bitfield) 940 H5Tregister(H5T_PERS_HARD, "b642uint64", H5T_STD_B64LE, H5T_STD_U64LE, bitfield2uint) 941 942 H5Tregister(H5T_PERS_SOFT, "vlen2str", vlstring, pyobj, vlen2str) 943 H5Tregister(H5T_PERS_SOFT, "str2vlen", pyobj, vlstring, str2vlen) 944 945 H5Tclose(vlstring) 946 H5Tclose(vlentype) 947 H5Tclose(enum) 948 H5Tclose(boolenum) 949 950 return 0 951 952cpdef int unregister_converters() except -1: 953 954 H5Tunregister(H5T_PERS_SOFT, "vlen2str", -1, -1, vlen2str) 955 H5Tunregister(H5T_PERS_SOFT, "str2vlen", -1, -1, str2vlen) 956 957 H5Tunregister(H5T_PERS_SOFT, "vlen2fixed", -1, -1, vlen2fixed) 958 H5Tunregister(H5T_PERS_SOFT, "fixed2vlen", -1, -1, fixed2vlen) 959 960 H5Tunregister(H5T_PERS_HARD, "objref2pyref", -1, -1, objref2pyref) 961 H5Tunregister(H5T_PERS_HARD, "pyref2objref", -1, -1, pyref2objref) 962 963 H5Tunregister(H5T_PERS_HARD, "regref2pyref", -1, -1, regref2pyref) 964 H5Tunregister(H5T_PERS_HARD, "pyref2regref", -1, -1, pyref2regref) 965 966 H5Tunregister(H5T_PERS_SOFT, "enum2int", -1, -1, enum2int) 967 H5Tunregister(H5T_PERS_SOFT, "int2enum", -1, -1, int2enum) 968 969 H5Tunregister(H5T_PERS_SOFT, "vlen2ndarray", -1, -1, vlen2ndarray) 970 H5Tunregister(H5T_PERS_SOFT, "ndarray2vlen", -1, -1, ndarray2vlen) 971 972 H5Tunregister(H5T_PERS_HARD, "boolenum2b8", -1, -1, boolenum2b8) 973 H5Tunregister(H5T_PERS_HARD, "b82boolenum", -1, -1, b82boolenum) 974 975 # Pass an empty string to unregister all methods that use these functions 976 H5Tunregister(H5T_PERS_HARD, "", -1, -1, uint2bitfield) 977 H5Tunregister(H5T_PERS_HARD, "", -1, -1, bitfield2uint) 978 979 return 0 980