1# cython: profile=False
2# cython: language_level=3
3# This file is part of h5py, a Python interface to the HDF5 library.
4#
5# http://www.h5py.org
6#
7# Copyright 2008-2019 Andrew Collette and contributors
8#
9# License:  Standard 3-clause BSD; see "license.txt" for full license terms
10#           and contributor agreement.
11
12"""
13    Low-level type-conversion routines.
14"""
15include "config.pxi"
16
17from logging import getLogger
18
19from .h5 import get_config
20from .h5r cimport Reference, RegionReference, hobj_ref_t, hdset_reg_ref_t
21from .h5t cimport H5PY_OBJ, typewrap, py_create, TypeID, H5PY_PYTHON_OPAQUE_TAG
22from libc.stdlib cimport realloc
23from libc.string cimport strcmp
24from .utils cimport emalloc, efree
25cfg = get_config()
26
27# Initialization of numpy
28cimport numpy as cnp
29from numpy cimport npy_intp, NPY_WRITEABLE, NPY_C_CONTIGUOUS, NPY_OWNDATA
30cnp._import_array()
31import numpy as np
32
33from cpython.buffer cimport (
34    PyObject_GetBuffer, PyBuffer_ToContiguous, PyBuffer_Release, PyBUF_INDIRECT
35)
36from cpython.object cimport PyObject
37from cpython.ref cimport Py_INCREF, Py_XDECREF, Py_XINCREF
38
39logger = getLogger(__name__)
40
41cdef PyObject* Py_None = <PyObject*> None
42
43cdef extern from "numpy/arrayobject.h":
44    void PyArray_ENABLEFLAGS(cnp.ndarray arr, int flags)
45
46
47ctypedef int (*conv_operator_t)(void* ipt, void* opt, void* bkg, void* priv) except -1
48ctypedef herr_t (*init_operator_t)(hid_t src, hid_t dst, void** priv) except -1
49
50# Generic conversion callback
51#
52# The actual conversion routines are one-liners which plug the appropriate
53# operator callback into this function.  This prevents us from having to
54# repeat all the conversion boilerplate for every single callback.
55#
56# While this is somewhat slower than a custom function, the added overhead is
57# likely small compared to the cost of the Python-side API calls required to
58# implement the conversions.
59cdef herr_t generic_converter(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
60                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
61                    void *bkg_i, hid_t dxpl, conv_operator_t op,
62                    init_operator_t initop, H5T_bkg_t need_bkg)  except -1:
63    cdef:
64        int command
65        conv_size_t *sizes
66        int i
67        char* buf = <char*>buf_i
68        char* bkg = <char*>bkg_i
69
70    command = cdata[0].command
71    if command == H5T_CONV_INIT:
72        cdata[0].need_bkg = need_bkg
73        return initop(src_id, dst_id, &(cdata[0].priv))
74
75    elif command == H5T_CONV_FREE:
76        efree(cdata[0].priv)
77        cdata[0].priv = NULL
78
79    elif command == H5T_CONV_CONV:
80        sizes = <conv_size_t*>cdata[0].priv
81        if H5Tis_variable_str(src_id):
82            sizes.cset = H5Tget_cset(src_id)
83        elif H5Tis_variable_str(dst_id):
84            sizes.cset = H5Tget_cset(dst_id)
85        if bkg_stride==0:
86            bkg_stride = sizes[0].dst_size
87        if buf_stride == 0:
88            # No explicit stride seems to mean that the elements are packed
89            # contiguously in the buffer.  In this case we must be careful
90            # not to "stomp on" input elements if the output elements are
91            # of a larger size.
92
93            if sizes[0].src_size >= sizes[0].dst_size:
94                for i in range(nl):
95                    op( buf + (i*sizes[0].src_size),    # input pointer
96                        buf + (i*sizes[0].dst_size),    # output pointer
97                        bkg + (i*bkg_stride),           # backing buffer
98                        cdata[0].priv)                  # conversion context
99            else:
100                for i in range(nl-1, -1, -1):
101                    op( buf + (i*sizes[0].src_size),
102                        buf + (i*sizes[0].dst_size),
103                        bkg + (i*bkg_stride),
104                        cdata[0].priv)
105        else:
106            # With explicit strides, we assume that the library knows the
107            # alignment better than us.  Therefore we use the given stride
108            # offsets exclusively.
109            for i in range(nl):
110                op( buf + (i*buf_stride),
111                    buf + (i*buf_stride),   # note this is the same!
112                    bkg + (i*bkg_stride),
113                    cdata[0].priv)
114    else:
115        return -2   # Unrecognized command.  Note this is NOT an exception.
116    return 0
117
118# =============================================================================
119# Helper functions
120
121cdef void log_convert_registered(hid_t src, hid_t dst):
122    logger.debug("Creating converter from %s to %s", H5Tget_class(src), H5Tget_class(dst))
123
124
125# =============================================================================
126# Generic conversion
127
128ctypedef struct conv_size_t:
129    size_t src_size
130    size_t dst_size
131    int cset
132
133cdef herr_t init_generic(hid_t src, hid_t dst, void** priv) except -1:
134
135    cdef conv_size_t *sizes
136    sizes = <conv_size_t*>emalloc(sizeof(conv_size_t))
137    priv[0] = sizes
138    sizes[0].src_size = H5Tget_size(src)
139    sizes[0].dst_size = H5Tget_size(dst)
140    log_convert_registered(src, dst)
141
142    return 0
143
144# =============================================================================
145# Vlen string conversion
146
147cdef bint _is_pyobject_opaque(hid_t obj):
148    # This complexity is needed to sure:
149    #   1) That ctag is freed
150    #   2) We don't segfault (for some reason a try-finally statement is needed,
151    #   even if we do (what I think are) the right steps in copying and freeing.
152    cdef char* ctag = NULL
153    try:
154        if H5Tget_class(obj) == H5T_OPAQUE:
155            ctag = H5Tget_tag(obj)
156            if ctag != NULL:
157                if strcmp(ctag, H5PY_PYTHON_OPAQUE_TAG) == 0:
158                    return True
159        return False
160    finally:
161        IF HDF5_VERSION >= (1, 8, 13):
162            H5free_memory(ctag)
163        ELSE:
164            free(ctag)
165
166cdef herr_t init_vlen2str(hid_t src_vlen, hid_t dst_str, void** priv) except -1:
167    # /!\ Untested
168    cdef conv_size_t *sizes
169
170    if not H5Tis_variable_str(src_vlen):
171        return -2
172
173    if not _is_pyobject_opaque(dst_str):
174        return -2
175
176    log_convert_registered(src_vlen, dst_str)
177
178    sizes = <conv_size_t*>emalloc(sizeof(conv_size_t))
179    priv[0] = sizes
180
181    sizes[0].src_size = H5Tget_size(src_vlen)
182    sizes[0].dst_size = H5Tget_size(dst_str)
183    return 0
184
185cdef herr_t init_str2vlen(hid_t src_str, hid_t dst_vlen, void** priv) except -1:
186    # /!\ untested !
187    cdef conv_size_t *sizes
188
189    if not H5Tis_variable_str(dst_vlen):
190        return -2
191
192    if not _is_pyobject_opaque(src_str):
193        return -2
194
195    log_convert_registered(src_str, dst_vlen)
196
197    sizes = <conv_size_t*>emalloc(sizeof(conv_size_t))
198    priv[0] = sizes
199    sizes[0].src_size = H5Tget_size(src_str)
200    sizes[0].dst_size = H5Tget_size(dst_vlen)
201
202    return 0
203
204cdef int conv_vlen2str(void* ipt, void* opt, void* bkg, void* priv) except -1:
205    cdef:
206        PyObject** buf_obj = <PyObject**>opt
207        char** buf_cstring = <char**>ipt
208        PyObject* tmp_object
209        bytes tmp_bytes
210        conv_size_t *sizes = <conv_size_t*>priv
211        char* buf_cstring0
212
213    buf_cstring0 = buf_cstring[0]
214
215    if buf_cstring0 == NULL:
216        tmp_bytes =  b""
217    else:
218        tmp_bytes = buf_cstring0 # Let cython converts char* -> bytes for us
219    tmp_object = <PyObject *>tmp_bytes
220
221    # Since all data conversions are by definition in-place, it
222    # is our responsibility to free the memory used by the vlens.
223    efree(buf_cstring0)
224
225    # Write the new unicode object to the buffer in-place and ensure it is not destroyed
226    buf_obj[0] = tmp_object
227    Py_XINCREF(tmp_object)
228    return 0
229
230cdef int conv_str2vlen(void* ipt, void* opt, void* bkg, void* priv) except -1:
231    cdef:
232        PyObject** buf_obj = <PyObject**>ipt
233        char** buf_cstring = <char**>opt
234        conv_size_t* sizes = <conv_size_t*>priv
235        char* temp_string = NULL
236        size_t temp_string_len = 0  # Not including null term
237        PyObject* buf_obj0
238        char* buf_cstring0
239        object temp_object
240
241    buf_obj0 = buf_obj[0]
242    temp_object = <object> buf_obj0
243
244    if isinstance(temp_object, unicode):
245        enc = 'utf-8' if (sizes[0].cset == H5T_CSET_UTF8) else 'ascii'
246        temp_object = temp_object.encode(enc)
247
248    elif not isinstance(temp_object, bytes):
249        raise TypeError("Can't implicitly convert non-string objects to strings")
250
251    # temp_object is bytes
252    temp_string = temp_object  # cython cast it as char *
253    temp_string_len = len(temp_object)
254
255    if strlen(temp_string) != temp_string_len:
256        raise ValueError("VLEN strings do not support embedded NULLs")
257    buf_cstring0 = <char*>emalloc(temp_string_len+1)
258    memcpy(buf_cstring0, temp_string, temp_string_len+1)
259    buf_cstring[0] = buf_cstring0
260
261    return 0
262
263# =============================================================================
264# VLEN to fixed-width strings
265
266cdef herr_t init_vlen2fixed(hid_t src, hid_t dst, void** priv) except -1:
267    cdef conv_size_t *sizes
268
269    # /!\ Untested
270
271    if not (H5Tis_variable_str(src) and (not H5Tis_variable_str(dst))):
272        return -2
273    log_convert_registered(src, dst)
274
275    sizes = <conv_size_t*>emalloc(sizeof(conv_size_t))
276    priv[0] = sizes
277
278    sizes[0].src_size = H5Tget_size(src)
279    sizes[0].dst_size = H5Tget_size(dst)
280    return 0
281
282cdef herr_t init_fixed2vlen(hid_t src, hid_t dst, void** priv) except -1:
283
284    cdef conv_size_t *sizes
285    if not (H5Tis_variable_str(dst) and (not H5Tis_variable_str(src))):
286        return -2
287    log_convert_registered(src, dst)
288
289    # /!\ untested !
290
291    sizes = <conv_size_t*>emalloc(sizeof(conv_size_t))
292    priv[0] = sizes
293    sizes[0].src_size = H5Tget_size(src)
294    sizes[0].dst_size = H5Tget_size(dst)
295
296    return 0
297
298cdef int conv_vlen2fixed(void* ipt, void* opt, void* bkg, void* priv) except -1:
299    cdef:
300        char** buf_vlen = <char**>ipt
301        char* buf_fixed = <char*>opt
302        char* temp_string = NULL
303        size_t temp_string_len = 0  # Without null term
304        conv_size_t *sizes = <conv_size_t*>priv
305        char* buf_vlen0
306
307    # /!\ untested !
308
309    buf_vlen0 = buf_vlen[0]
310
311    if buf_vlen0 != NULL:
312        temp_string = buf_vlen0
313        temp_string_len = strlen(temp_string)
314
315        if temp_string_len <= sizes[0].dst_size:
316            # Pad with zeros
317            memcpy(buf_fixed, temp_string, temp_string_len)
318            memset(buf_fixed + temp_string_len, c'\0', sizes[0].dst_size - temp_string_len)
319        else:
320            # Simply truncate the string
321            memcpy(buf_fixed, temp_string, sizes[0].dst_size)
322    else:
323        memset(buf_fixed, c'\0', sizes[0].dst_size)
324
325    return 0
326
327cdef int conv_fixed2vlen(void* ipt, void* opt, void* bkg, void* priv) except -1:
328    cdef:
329        char** buf_vlen = <char**>opt
330        char* buf_fixed = <char*>ipt
331        char* temp_string = NULL
332        conv_size_t *sizes = <conv_size_t*>priv
333
334    # /!\ untested !
335
336    temp_string = <char*>emalloc(sizes[0].src_size+1)
337    memcpy(temp_string, buf_fixed, sizes[0].src_size)
338    temp_string[sizes[0].src_size] = c'\0'
339
340    memcpy(buf_vlen, &temp_string, sizeof(temp_string))
341
342    return 0
343
344# =============================================================================
345# HDF5 references to Python instances of h5r.Reference
346
347cdef inline int conv_objref2pyref(void* ipt, void* opt, void* bkg, void* priv) except -1:
348    cdef:
349        PyObject** buf_obj = <PyObject**>opt
350        hobj_ref_t* buf_ref = <hobj_ref_t*>ipt
351        Reference ref
352        PyObject* ref_ptr = NULL
353
354    ref = Reference()
355    ref.ref.obj_ref = buf_ref[0]
356    ref.typecode = H5R_OBJECT
357
358    ref_ptr = <PyObject*>ref
359    Py_INCREF(ref)  # prevent ref from garbage collection
360    buf_obj[0] = ref_ptr
361
362    return 0
363
364cdef inline int conv_pyref2objref(void* ipt, void* opt, void* bkg, void* priv)  except -1:
365    cdef:
366        PyObject** buf_obj = <PyObject**>ipt
367        hobj_ref_t* buf_ref = <hobj_ref_t*>opt
368        object obj
369        Reference ref
370        PyObject* buf_obj0
371
372    buf_obj0 = buf_obj[0]
373
374    if buf_obj0 != NULL and buf_obj0 != Py_None:
375        obj = <object>(buf_obj0)
376        if not isinstance(obj, Reference):
377            raise TypeError("Can't convert incompatible object to HDF5 object reference")
378        ref = <Reference>(buf_obj0)
379        buf_ref[0] = ref.ref.obj_ref
380    else:
381        memset(buf_ref, c'\0', sizeof(hobj_ref_t))
382
383    return 0
384
385cdef inline int conv_regref2pyref(void* ipt, void* opt, void* bkg, void* priv) except -1:
386    cdef:
387        PyObject** buf_obj = <PyObject**>opt
388        PyObject** bkg_obj = <PyObject**>bkg
389        hdset_reg_ref_t* buf_ref = <hdset_reg_ref_t*>ipt
390        RegionReference ref
391        PyObject* ref_ptr = NULL
392        PyObject* bkg_obj0
393
394    bkg_obj0 = bkg_obj[0]
395    ref = RegionReference()
396    ref.ref.reg_ref = buf_ref[0]
397    ref.typecode = H5R_DATASET_REGION
398    ref_ptr = <PyObject*>ref
399    Py_INCREF(ref)  # because Cython discards its reference when the
400                        # function exits
401
402    Py_XDECREF(bkg_obj0)
403    buf_obj[0] = ref_ptr
404
405    return 0
406
407cdef inline int conv_pyref2regref(void* ipt, void* opt, void* bkg, void* priv) except -1:
408    cdef:
409        PyObject** buf_obj = <PyObject**>ipt
410        hdset_reg_ref_t* buf_ref = <hdset_reg_ref_t*>opt
411        object obj
412        RegionReference ref
413        PyObject* buf_obj0
414
415    buf_obj0 = buf_obj[0]
416
417    if buf_obj0 != NULL and buf_obj0 != Py_None:
418        obj = <object>(buf_obj0)
419        if not isinstance(obj, RegionReference):
420            raise TypeError("Can't convert incompatible object to HDF5 region reference")
421        ref = <RegionReference>(buf_obj0)
422        IF HDF5_VERSION >= (1, 12, 0):
423            memcpy(buf_ref, ref.ref.reg_ref.data, sizeof(hdset_reg_ref_t))
424        ELSE:
425            memcpy(buf_ref, ref.ref.reg_ref, sizeof(hdset_reg_ref_t))
426    else:
427        memset(buf_ref, c'\0', sizeof(hdset_reg_ref_t))
428
429    return 0
430
431# =============================================================================
432# Conversion functions
433
434
435cdef inline herr_t vlen2str(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
436                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
437                    void *bkg_i, hid_t dxpl) except -1 with gil:
438    return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
439             buf_i, bkg_i, dxpl,  conv_vlen2str, init_vlen2str, H5T_BKG_YES)
440
441cdef inline herr_t str2vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
442                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
443                    void *bkg_i, hid_t dxpl)except -1 with gil:
444    return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
445             buf_i, bkg_i, dxpl, conv_str2vlen, init_str2vlen, H5T_BKG_NO)
446
447cdef inline herr_t vlen2fixed(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
448                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
449                    void *bkg_i, hid_t dxpl) except -1 with gil:
450    return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
451             buf_i, bkg_i, dxpl, conv_vlen2fixed, init_vlen2fixed, H5T_BKG_NO)
452
453cdef inline herr_t fixed2vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
454                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
455                    void *bkg_i, hid_t dxpl) except -1 with gil:
456    return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
457             buf_i, bkg_i, dxpl, conv_fixed2vlen, init_fixed2vlen, H5T_BKG_NO)
458
459cdef inline herr_t objref2pyref(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
460                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
461                    void *bkg_i, hid_t dxpl) except -1 with gil:
462    return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
463             buf_i, bkg_i, dxpl, conv_objref2pyref, init_generic, H5T_BKG_NO)
464
465cdef inline herr_t pyref2objref(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
466                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
467                    void *bkg_i, hid_t dxpl) except -1 with gil:
468    return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
469             buf_i, bkg_i, dxpl, conv_pyref2objref, init_generic, H5T_BKG_NO)
470
471cdef inline herr_t regref2pyref(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
472                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
473                    void *bkg_i, hid_t dxpl) except -1 with gil:
474    return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
475             buf_i, bkg_i, dxpl, conv_regref2pyref, init_generic, H5T_BKG_YES)
476
477cdef inline herr_t pyref2regref(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
478                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
479                    void *bkg_i, hid_t dxpl) except -1 with gil:
480    return generic_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
481             buf_i, bkg_i, dxpl, conv_pyref2regref, init_generic, H5T_BKG_NO)
482
483# =============================================================================
484# Enum to integer converter
485
486cdef struct conv_enum_t:
487    size_t src_size
488    size_t dst_size
489
490cdef int enum_int_converter_init(hid_t src, hid_t dst,
491                                 H5T_cdata_t *cdata, int forward) except -1:
492    cdef conv_enum_t *info
493
494    cdata[0].need_bkg = H5T_BKG_NO
495    cdata[0].priv = info = <conv_enum_t*>emalloc(sizeof(conv_enum_t))
496    info[0].src_size = H5Tget_size(src)
497    info[0].dst_size = H5Tget_size(dst)
498
499cdef void enum_int_converter_free(H5T_cdata_t *cdata):
500    cdef conv_enum_t *info
501
502    info = <conv_enum_t*>cdata[0].priv
503    efree(info)
504    cdata[0].priv = NULL
505
506
507cdef int enum_int_converter_conv(hid_t src, hid_t dst, H5T_cdata_t *cdata,
508                                  size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
509                                 void *bkg_i, hid_t dxpl, int forward) except -1:
510    cdef:
511        conv_enum_t *info
512        size_t nalloc
513        int i
514        char* cbuf = NULL
515        char* buf = <char*>buf_i
516        int identical
517        hid_t supertype = -1
518
519    info = <conv_enum_t*>cdata[0].priv
520
521    try:
522        if forward:
523            supertype = H5Tget_super(src)
524            identical = H5Tequal(supertype, dst)
525        else:
526            supertype = H5Tget_super(dst)
527            identical = H5Tequal(supertype, src)
528
529        # Short-circuit success
530        if identical:
531            return 0
532
533        if buf_stride == 0:
534            # Contiguous case: call H5Tconvert directly
535            if forward:
536                H5Tconvert(supertype, dst, nl, buf, NULL, dxpl)
537            else:
538                H5Tconvert(src, supertype, nl, buf, NULL, dxpl)
539        else:
540            # Non-contiguous: gather, convert and then scatter
541            if info[0].src_size > info[0].dst_size:
542                nalloc = info[0].src_size*nl
543            else:
544                nalloc = info[0].dst_size*nl
545
546            cbuf = <char*>emalloc(nalloc)
547            if cbuf == NULL:
548                raise MemoryError()
549
550            for i in range(nl):
551                memcpy(cbuf + (i*info[0].src_size), buf + (i*buf_stride),
552                        info[0].src_size)
553
554            if forward:
555                H5Tconvert(supertype, dst, nl, cbuf, NULL, dxpl)
556            else:
557                H5Tconvert(src, supertype, nl, cbuf, NULL, dxpl)
558
559            for i in range(nl):
560                memcpy(buf + (i*buf_stride), cbuf + (i*info[0].dst_size),
561                        info[0].dst_size)
562
563    finally:
564        efree(cbuf)
565        cbuf = NULL
566        if supertype > 0:
567            H5Tclose(supertype)
568
569    return 0
570
571
572# Direction ("forward"): 1 = enum to int, 0 = int to enum
573cdef herr_t enum_int_converter(hid_t src, hid_t dst, H5T_cdata_t *cdata,
574                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
575                               void *bkg_i, hid_t dxpl, int forward) except -1:
576
577    cdef int command = cdata[0].command
578
579    if command == H5T_CONV_INIT:
580        enum_int_converter_init(src, dst, cdata, forward)
581    elif command == H5T_CONV_FREE:
582        enum_int_converter_free(cdata)
583    elif command == H5T_CONV_CONV:
584        return enum_int_converter_conv(src, dst, cdata, nl, buf_stride,
585                                       bkg_stride, buf_i, bkg_i, dxpl, forward)
586    else:
587        return -2
588
589    return 0
590
591
592cdef herr_t enum2int(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
593                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
594                    void *bkg_i, hid_t dxpl) except -1 with gil:
595    return enum_int_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
596             buf_i, bkg_i, dxpl, 1)
597
598cdef herr_t int2enum(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
599                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
600                    void *bkg_i, hid_t dxpl) except -1 with gil:
601    return enum_int_converter(src_id, dst_id, cdata, nl, buf_stride, bkg_stride,
602             buf_i, bkg_i, dxpl, 0)
603
604# =============================================================================
605# ndarray to VLEN routines
606
607cdef herr_t vlen2ndarray(hid_t src_id,
608                         hid_t dst_id,
609                         H5T_cdata_t *cdata,
610                         size_t nl,
611                         size_t buf_stride,
612                         size_t bkg_stride,
613                         void *buf_i,
614                         void *bkg_i,
615                         hid_t dxpl) except -1 with gil:
616    """Convert variable length object to numpy array, typically a list of strings
617
618    :param src_id: Identifier for the source datatype.
619    :param dst_id: Identifier for the destination datatype.
620    :param nl: number of element
621    :param buf_stride: Array containing pre- and post-conversion values.
622    :param bkg_stride: Optional background buffer
623    :param dxpl: Dataset transfer property list identifier.
624    :return: error-code
625    """
626    cdef:
627        int command = cdata[0].command
628        size_t src_size, dst_size
629        TypeID supertype
630        TypeID outtype
631        cnp.dtype dt
632        int i
633        char* buf = <char*>buf_i
634
635    if command == H5T_CONV_INIT:
636        cdata[0].need_bkg = H5T_BKG_NO
637        if H5Tget_class(src_id) != H5T_VLEN or H5Tget_class(dst_id) != H5T_OPAQUE:
638            return -2
639
640    elif command == H5T_CONV_FREE:
641        pass
642
643    elif command == H5T_CONV_CONV:
644        # need to pass element dtype to converter
645        supertype = typewrap(H5Tget_super(src_id))
646        dt = supertype.dtype
647        outtype = py_create(dt)
648
649        if buf_stride == 0:
650            # No explicit stride seems to mean that the elements are packed
651            # contiguously in the buffer.  In this case we must be careful
652            # not to "stomp on" input elements if the output elements are
653            # of a larger size.
654
655            src_size = H5Tget_size(src_id)
656            dst_size = H5Tget_size(dst_id)
657
658            if src_size >= dst_size:
659                for i in range(nl):
660                    conv_vlen2ndarray(buf + (i*src_size), buf + (i*dst_size),
661                                      dt, supertype, outtype)
662            else:
663                for i in range(nl-1, -1, -1):
664                    conv_vlen2ndarray(buf + (i*src_size), buf + (i*dst_size),
665                                      dt, supertype, outtype)
666        else:
667            # With explicit strides, we assume that the library knows the
668            # alignment better than us.  Therefore we use the given stride
669            # offsets exclusively.
670            for i in range(nl):
671                conv_vlen2ndarray(buf + (i*buf_stride), buf + (i*buf_stride),
672                                  dt, supertype, outtype)
673
674    else:
675        return -2   # Unrecognized command.  Note this is NOT an exception.
676
677    return 0
678
679
680cdef struct vlen_t:
681    size_t len
682    void* ptr
683
684cdef int conv_vlen2ndarray(void* ipt,
685                           void* opt,
686                           cnp.dtype elem_dtype,
687                           TypeID intype,
688                           TypeID outtype) except -1:
689    """Convert variable length strings to numpy array
690
691    :param ipt: input pointer: Point to the input data
692    :param opt: output pointer: will contains the numpy array after exit
693    :param elem_dtype: dtype of the element
694    :param intype: ?
695    :param outtype: ?
696    """
697    cdef:
698        PyObject** buf_obj = <PyObject**>opt
699        vlen_t* in_vlen = <vlen_t*>ipt
700        int flags = NPY_WRITEABLE | NPY_C_CONTIGUOUS | NPY_OWNDATA
701        npy_intp dims[1]
702        void* data
703        cdef char[:] buf
704        cnp.ndarray ndarray
705        PyObject* ndarray_obj
706        vlen_t in_vlen0
707        size_t size, itemsize
708
709    #Replaces the memcpy
710    size = in_vlen0.len = in_vlen[0].len
711    data = in_vlen0.ptr = in_vlen[0].ptr
712
713    dims[0] = size
714    itemsize = H5Tget_size(outtype.id)
715    if itemsize > H5Tget_size(intype.id):
716        data = realloc(data, itemsize * size)
717    H5Tconvert(intype.id, outtype.id, size, data, NULL, H5P_DEFAULT)
718
719    if elem_dtype.kind in b"biufcmMO":
720        # type_num is enough to create an array for these dtypes
721        ndarray = cnp.PyArray_SimpleNewFromData(1, dims, elem_dtype.type_num, data)
722    else:
723        # dtypes like string & void need a size specified, so can't be used with
724        # SimpleNewFromData. Cython doesn't expose NumPy C-API functions
725        # like NewFromDescr, so we'll construct this with a Python function.
726        buf = <char[:itemsize * size]> data
727        ndarray = np.frombuffer(buf, dtype=elem_dtype)
728
729    PyArray_ENABLEFLAGS(ndarray, flags)
730    ndarray_obj = <PyObject*>ndarray
731
732    in_vlen0.ptr = NULL
733
734    # Write the new ndarray object to the buffer in-place and ensure it is not destroyed
735    buf_obj[0] = ndarray_obj
736    Py_INCREF(ndarray)
737    Py_INCREF(elem_dtype)
738    return 0
739
740cdef herr_t ndarray2vlen(hid_t src_id,
741                         hid_t dst_id,
742                         H5T_cdata_t *cdata,
743                         size_t nl,
744                         size_t buf_stride,
745                         size_t bkg_stride,
746                         void *buf_i,
747                         void *bkg_i,
748                         hid_t dxpl) except -1 with gil:
749    cdef:
750        int command = cdata[0].command
751        size_t src_size, dst_size
752        TypeID supertype
753        TypeID outtype
754        int i
755        PyObject **pdata = <PyObject **> buf_i
756        PyObject *pdata_elem
757        char* buf = <char*>buf_i
758
759    if command == H5T_CONV_INIT:
760        cdata[0].need_bkg = H5T_BKG_NO
761        if not H5Tequal(src_id, H5PY_OBJ) or H5Tget_class(dst_id) != H5T_VLEN:
762            return -2
763        supertype = typewrap(H5Tget_super(dst_id))
764        for i in range(nl):
765            # smells a lot
766            memcpy(&pdata_elem, pdata+i, sizeof(pdata_elem))
767            if supertype != py_create((<cnp.ndarray> pdata_elem).dtype, 1):
768                return -2
769            if (<cnp.ndarray> pdata_elem).ndim != 1:
770                return -2
771        log_convert_registered(src_id, dst_id)
772
773    elif command == H5T_CONV_FREE:
774        pass
775
776    elif command == H5T_CONV_CONV:
777        # If there are no elements to convert, pdata will not point to
778        # a valid PyObject*, so bail here to prevent accessing the dtype below
779        if nl == 0:
780            return 0
781
782        # need to pass element dtype to converter
783        pdata_elem = pdata[0]
784        supertype = py_create((<cnp.ndarray> pdata_elem).dtype)
785        outtype = typewrap(H5Tget_super(dst_id))
786
787        if buf_stride == 0:
788            # No explicit stride seems to mean that the elements are packed
789            # contiguously in the buffer.  In this case we must be careful
790            # not to "stomp on" input elements if the output elements are
791            # of a larger size.
792
793            src_size = H5Tget_size(src_id)
794            dst_size = H5Tget_size(dst_id)
795
796            if src_size >= dst_size:
797                for i in range(nl):
798                    conv_ndarray2vlen(buf + (i*src_size), buf + (i*dst_size),
799                                      supertype, outtype)
800            else:
801                for i in range(nl-1, -1, -1):
802                    conv_ndarray2vlen(buf + (i*src_size), buf + (i*dst_size),
803                                      supertype, outtype)
804        else:
805            # With explicit strides, we assume that the library knows the
806            # alignment better than us.  Therefore we use the given stride
807            # offsets exclusively.
808            for i in range(nl):
809                conv_ndarray2vlen(buf + (i*buf_stride), buf + (i*buf_stride),
810                                  supertype, outtype)
811
812    else:
813        return -2   # Unrecognized command.  Note this is NOT an exception.
814
815    return 0
816
817
818cdef int conv_ndarray2vlen(void* ipt,
819                           void* opt,
820                           TypeID intype,
821                           TypeID outtype) except -1:
822    cdef:
823        PyObject** buf_obj = <PyObject**>ipt
824        vlen_t* in_vlen = <vlen_t*>opt
825        void* data
826        cnp.ndarray ndarray
827        size_t len, nbytes
828        PyObject* buf_obj0
829        Py_buffer view
830
831    buf_obj0 = buf_obj[0]
832    ndarray = <cnp.ndarray> buf_obj0
833    len = ndarray.shape[0]
834    nbytes = len * max(H5Tget_size(outtype.id), H5Tget_size(intype.id))
835
836    data = emalloc(nbytes)
837
838    PyObject_GetBuffer(ndarray, &view, PyBUF_INDIRECT)
839    PyBuffer_ToContiguous(data, &view, view.len, b'C')
840    PyBuffer_Release(&view)
841
842    H5Tconvert(intype.id, outtype.id, len, data, NULL, H5P_DEFAULT)
843
844    in_vlen[0].len = len
845    in_vlen[0].ptr = data
846
847    return 0
848
849# =============================================================================
850# B8 to enum bool routines
851
852cdef herr_t b82boolenum(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
853                        size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
854                        void *bkg_i, hid_t dxpl) except -1:
855    return 0
856
857cdef herr_t boolenum2b8(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
858                        size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
859                        void *bkg_i, hid_t dxpl) except -1:
860    return 0
861
862# =============================================================================
863# BITFIELD to UINT routines
864
865cdef herr_t bitfield2uint(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
866                     size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
867                     void *bkg_i, hid_t dxpl) except -1:
868    return 0
869
870cdef herr_t uint2bitfield(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
871                     size_t nl, size_t buf_stride, size_t bkg_stride, void *buf_i,
872                     void *bkg_i, hid_t dxpl) except -1:
873    return 0
874
875# =============================================================================
876
877cpdef int register_converters() except -1:
878    cdef:
879        hid_t vlstring
880        hid_t vlentype
881        hid_t pyobj
882        hid_t enum
883        hid_t boolenum = -1
884        int8_t f_value = 0
885        int8_t t_value = 1
886
887    vlstring = H5Tcopy(H5T_C_S1)
888    H5Tset_size(vlstring, H5T_VARIABLE)
889
890    enum = H5Tenum_create(H5T_STD_I32LE)
891
892    vlentype = H5Tvlen_create(H5T_STD_I32LE)
893
894    pyobj = H5PY_OBJ
895
896    boolenum = H5Tenum_create(H5T_NATIVE_INT8)
897    H5Tenum_insert(boolenum, cfg._f_name, &f_value)
898    H5Tenum_insert(boolenum, cfg._t_name, &t_value)
899
900    H5Tregister(H5T_PERS_SOFT, "vlen2fixed", vlstring, H5T_C_S1, vlen2fixed)
901    H5Tregister(H5T_PERS_SOFT, "fixed2vlen", H5T_C_S1, vlstring, fixed2vlen)
902
903    H5Tregister(H5T_PERS_HARD, "objref2pyref", H5T_STD_REF_OBJ, pyobj, objref2pyref)
904    H5Tregister(H5T_PERS_HARD, "pyref2objref", pyobj, H5T_STD_REF_OBJ, pyref2objref)
905
906    H5Tregister(H5T_PERS_HARD, "regref2pyref", H5T_STD_REF_DSETREG, pyobj, regref2pyref)
907    H5Tregister(H5T_PERS_HARD, "pyref2regref", pyobj, H5T_STD_REF_DSETREG, pyref2regref)
908
909    H5Tregister(H5T_PERS_SOFT, "enum2int", enum, H5T_STD_I32LE, enum2int)
910    H5Tregister(H5T_PERS_SOFT, "int2enum", H5T_STD_I32LE, enum, int2enum)
911
912    H5Tregister(H5T_PERS_SOFT, "vlen2ndarray", vlentype, pyobj, vlen2ndarray)
913    H5Tregister(H5T_PERS_SOFT, "ndarray2vlen", pyobj, vlentype, ndarray2vlen)
914
915    H5Tregister(H5T_PERS_HARD, "boolenum2b8", boolenum, H5T_NATIVE_B8, boolenum2b8)
916    H5Tregister(H5T_PERS_HARD, "b82boolenum", H5T_NATIVE_B8, boolenum, b82boolenum)
917
918    H5Tregister(H5T_PERS_HARD, "uint82b8", H5T_STD_U8BE, H5T_STD_B8BE, uint2bitfield)
919    H5Tregister(H5T_PERS_HARD, "b82uint8", H5T_STD_B8BE, H5T_STD_U8BE, bitfield2uint)
920
921    H5Tregister(H5T_PERS_HARD, "uint82b8", H5T_STD_U8LE, H5T_STD_B8LE, uint2bitfield)
922    H5Tregister(H5T_PERS_HARD, "b82uint8", H5T_STD_B8LE, H5T_STD_U8LE, bitfield2uint)
923
924    H5Tregister(H5T_PERS_HARD, "uint162b16", H5T_STD_U16BE, H5T_STD_B16BE, uint2bitfield)
925    H5Tregister(H5T_PERS_HARD, "b162uint16", H5T_STD_B16BE, H5T_STD_U16BE, bitfield2uint)
926
927    H5Tregister(H5T_PERS_HARD, "uint162b16", H5T_STD_U16LE, H5T_STD_B16LE, uint2bitfield)
928    H5Tregister(H5T_PERS_HARD, "b162uint16", H5T_STD_B16LE, H5T_STD_U16LE, bitfield2uint)
929
930    H5Tregister(H5T_PERS_HARD, "uint322b32", H5T_STD_U32BE, H5T_STD_B32BE, uint2bitfield)
931    H5Tregister(H5T_PERS_HARD, "b322uint32", H5T_STD_B32BE, H5T_STD_U32BE, bitfield2uint)
932
933    H5Tregister(H5T_PERS_HARD, "uint322b32", H5T_STD_U32LE, H5T_STD_B32LE, uint2bitfield)
934    H5Tregister(H5T_PERS_HARD, "b322uint32", H5T_STD_B32LE, H5T_STD_U32LE, bitfield2uint)
935
936    H5Tregister(H5T_PERS_HARD, "uint642b64", H5T_STD_U64BE, H5T_STD_B64BE, uint2bitfield)
937    H5Tregister(H5T_PERS_HARD, "b642uint64", H5T_STD_B64BE, H5T_STD_U64BE, bitfield2uint)
938
939    H5Tregister(H5T_PERS_HARD, "uint642b64", H5T_STD_U64LE, H5T_STD_B64LE, uint2bitfield)
940    H5Tregister(H5T_PERS_HARD, "b642uint64", H5T_STD_B64LE, H5T_STD_U64LE, bitfield2uint)
941
942    H5Tregister(H5T_PERS_SOFT, "vlen2str", vlstring, pyobj, vlen2str)
943    H5Tregister(H5T_PERS_SOFT, "str2vlen", pyobj, vlstring, str2vlen)
944
945    H5Tclose(vlstring)
946    H5Tclose(vlentype)
947    H5Tclose(enum)
948    H5Tclose(boolenum)
949
950    return 0
951
952cpdef int unregister_converters() except -1:
953
954    H5Tunregister(H5T_PERS_SOFT, "vlen2str", -1, -1, vlen2str)
955    H5Tunregister(H5T_PERS_SOFT, "str2vlen", -1, -1, str2vlen)
956
957    H5Tunregister(H5T_PERS_SOFT, "vlen2fixed", -1, -1, vlen2fixed)
958    H5Tunregister(H5T_PERS_SOFT, "fixed2vlen", -1, -1, fixed2vlen)
959
960    H5Tunregister(H5T_PERS_HARD, "objref2pyref", -1, -1, objref2pyref)
961    H5Tunregister(H5T_PERS_HARD, "pyref2objref", -1, -1, pyref2objref)
962
963    H5Tunregister(H5T_PERS_HARD, "regref2pyref", -1, -1, regref2pyref)
964    H5Tunregister(H5T_PERS_HARD, "pyref2regref", -1, -1, pyref2regref)
965
966    H5Tunregister(H5T_PERS_SOFT, "enum2int", -1, -1, enum2int)
967    H5Tunregister(H5T_PERS_SOFT, "int2enum", -1, -1, int2enum)
968
969    H5Tunregister(H5T_PERS_SOFT, "vlen2ndarray", -1, -1, vlen2ndarray)
970    H5Tunregister(H5T_PERS_SOFT, "ndarray2vlen", -1, -1, ndarray2vlen)
971
972    H5Tunregister(H5T_PERS_HARD, "boolenum2b8", -1, -1, boolenum2b8)
973    H5Tunregister(H5T_PERS_HARD, "b82boolenum", -1, -1, b82boolenum)
974
975    # Pass an empty string to unregister all methods that use these functions
976    H5Tunregister(H5T_PERS_HARD, "", -1, -1, uint2bitfield)
977    H5Tunregister(H5T_PERS_HARD, "", -1, -1, bitfield2uint)
978
979    return 0
980