1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License:  Standard 3-clause BSD; see "license.txt" for full license terms
8#           and contributor agreement.
9
10"""
11    Low-level operations on HDF5 file objects.
12"""
13
14include "config.pxi"
15
16# C level imports
17from cpython.buffer cimport PyObject_CheckBuffer, \
18                            PyObject_GetBuffer, PyBuffer_Release, \
19                            PyBUF_SIMPLE
20from ._objects cimport pdefault
21from .h5p cimport propwrap, PropFAID, PropFCID
22from .h5i cimport wrap_identifier
23from .h5ac cimport CacheConfig
24from .utils cimport emalloc, efree
25
26# Python level imports
27from collections import namedtuple
28import gc
29from . import _objects
30from ._objects import phil, with_phil
31
32from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AsString
33
34# Initialization
35
36# === Public constants and data structures ====================================
37
38ACC_TRUNC   = H5F_ACC_TRUNC
39ACC_EXCL    = H5F_ACC_EXCL
40ACC_RDWR    = H5F_ACC_RDWR
41ACC_RDONLY  = H5F_ACC_RDONLY
42IF HDF5_VERSION >= SWMR_MIN_HDF5_VERSION:
43    ACC_SWMR_WRITE = H5F_ACC_SWMR_WRITE
44    ACC_SWMR_READ  = H5F_ACC_SWMR_READ
45
46
47SCOPE_LOCAL     = H5F_SCOPE_LOCAL
48SCOPE_GLOBAL    = H5F_SCOPE_GLOBAL
49
50CLOSE_WEAK  = H5F_CLOSE_WEAK
51CLOSE_SEMI  = H5F_CLOSE_SEMI
52CLOSE_STRONG = H5F_CLOSE_STRONG
53CLOSE_DEFAULT = H5F_CLOSE_DEFAULT
54
55OBJ_FILE    = H5F_OBJ_FILE
56OBJ_DATASET = H5F_OBJ_DATASET
57OBJ_GROUP   = H5F_OBJ_GROUP
58OBJ_DATATYPE = H5F_OBJ_DATATYPE
59OBJ_ATTR    = H5F_OBJ_ATTR
60OBJ_ALL     = H5F_OBJ_ALL
61OBJ_LOCAL   = H5F_OBJ_LOCAL
62UNLIMITED   = H5F_UNLIMITED
63
64LIBVER_EARLIEST = H5F_LIBVER_EARLIEST
65LIBVER_LATEST = H5F_LIBVER_LATEST
66IF HDF5_VERSION >= (1, 10, 2):
67    LIBVER_V18 = H5F_LIBVER_V18
68    LIBVER_V110 = H5F_LIBVER_V110
69
70IF HDF5_VERSION >= VOL_MIN_HDF5_VERSION:
71    LIBVER_V112 = H5F_LIBVER_V112
72
73IF HDF5_VERSION >= (1, 13, 0):
74    LIBVER_V114 = H5F_LIBVER_V114
75
76if HDF5_VERSION >= (1, 8, 9):
77    FILE_IMAGE_OPEN_RW = H5LT_FILE_IMAGE_OPEN_RW
78
79IF HDF5_VERSION >= (1, 10, 1):
80    FSPACE_STRATEGY_FSM_AGGR = H5F_FSPACE_STRATEGY_FSM_AGGR
81    FSPACE_STRATEGY_PAGE = H5F_FSPACE_STRATEGY_PAGE
82    FSPACE_STRATEGY_AGGR = H5F_FSPACE_STRATEGY_AGGR
83    FSPACE_STRATEGY_NONE = H5F_FSPACE_STRATEGY_NONE
84
85    # Used in FileID.get_page_buffering_stats()
86    PageBufStats = namedtuple('PageBufferStats', ['meta', 'raw'])
87    PageStats = namedtuple('PageStats', ['accesses', 'hits', 'misses', 'evictions', 'bypasses'])
88
89
90# === File operations =========================================================
91
92@with_phil
93def open(char* name, unsigned int flags=H5F_ACC_RDWR, PropFAID fapl=None):
94    """(STRING name, UINT flags=ACC_RDWR, PropFAID fapl=None) => FileID
95
96    Open an existing HDF5 file.  Keyword "flags" may be:
97
98    ACC_RDWR
99        Open in read-write mode
100
101    ACC_RDONLY
102        Open in readonly mode
103
104    Keyword fapl may be a file access property list.
105    """
106    return FileID(H5Fopen(name, flags, pdefault(fapl)))
107
108
109@with_phil
110def create(char* name, int flags=H5F_ACC_TRUNC, PropFCID fcpl=None,
111                                                PropFAID fapl=None):
112    """(STRING name, INT flags=ACC_TRUNC, PropFCID fcpl=None,
113    PropFAID fapl=None) => FileID
114
115    Create a new HDF5 file.  Keyword "flags" may be:
116
117    ACC_TRUNC
118        Truncate an existing file, discarding its data
119
120    ACC_EXCL
121        Fail if a conflicting file exists
122
123    To keep the behavior in line with that of Python's built-in functions,
124    the default is ACC_TRUNC.  Be careful!
125    """
126    return FileID(H5Fcreate(name, flags, pdefault(fcpl), pdefault(fapl)))
127
128IF HDF5_VERSION >= (1, 8, 9):
129    @with_phil
130    def open_file_image(image, flags=0):
131        """(STRING image, INT flags=0) => FileID
132
133        Load a new HDF5 file into memory.  Keyword "flags" may be:
134
135        FILE_IMAGE_OPEN_RW
136            Specifies opening the file image in read/write mode.
137        """
138        cdef Py_buffer buf
139
140        if not PyObject_CheckBuffer(image):
141            raise TypeError("image must support the buffer protocol")
142
143        PyObject_GetBuffer(image, &buf, PyBUF_SIMPLE)
144        try:
145            return FileID(H5LTopen_file_image(buf.buf, buf.len, flags))
146        finally:
147            PyBuffer_Release(&buf)
148
149
150@with_phil
151def flush(ObjectID obj not None, int scope=H5F_SCOPE_LOCAL):
152    """(ObjectID obj, INT scope=SCOPE_LOCAL)
153
154    Tell the HDF5 library to flush file buffers to disk.  "obj" may
155    be the file identifier, or the identifier of any object residing in
156    the file.  Keyword "scope" may be:
157
158    SCOPE_LOCAL
159        Flush only the given file
160
161    SCOPE_GLOBAL
162        Flush the entire virtual file
163    """
164    H5Fflush(obj.id, <H5F_scope_t>scope)
165
166
167@with_phil
168def is_hdf5(char* name):
169    """(STRING name) => BOOL
170
171    Determine if a given file is an HDF5 file.  Note this raises an
172    exception if the file doesn't exist.
173    """
174    return <bint>(H5Fis_hdf5(name))
175
176
177@with_phil
178def mount(ObjectID loc not None, char* name, FileID fid not None):
179    """(ObjectID loc, STRING name, FileID fid)
180
181    Mount an open file on the group "name" under group loc_id.  Note that
182    "name" must already exist.
183    """
184    H5Fmount(loc.id, name, fid.id, H5P_DEFAULT)
185
186
187@with_phil
188def unmount(ObjectID loc not None, char* name):
189    """(ObjectID loc, STRING name)
190
191    Unmount a file, mounted at "name" under group loc_id.
192    """
193    H5Funmount(loc.id, name)
194
195
196@with_phil
197def get_name(ObjectID obj not None):
198    """(ObjectID obj) => STRING
199
200    Determine the name of the file in which the specified object resides.
201    """
202    cdef ssize_t size
203    cdef char* name
204    name = NULL
205
206    size = H5Fget_name(obj.id, NULL, 0)
207    assert size >= 0
208    name = <char*>emalloc(sizeof(char)*(size+1))
209    try:
210        H5Fget_name(obj.id, name, size+1)
211        pname = name
212        return pname
213    finally:
214        efree(name)
215
216
217@with_phil
218def get_obj_count(object where=OBJ_ALL, int types=H5F_OBJ_ALL):
219    """(OBJECT where=OBJ_ALL, types=OBJ_ALL) => INT
220
221    Get the number of open objects.
222
223    where
224        Either a FileID instance representing an HDF5 file, or the
225        special constant OBJ_ALL, to count objects in all files.
226
227    type
228        Specify what kinds of object to include.  May be one of OBJ*,
229        or any bitwise combination (e.g. ``OBJ_FILE | OBJ_ATTR``).
230
231        The special value OBJ_ALL matches all object types, and
232        OBJ_LOCAL will only match objects opened through a specific
233        identifier.
234    """
235    cdef hid_t where_id
236    if isinstance(where, FileID):
237        where_id = where.id
238    elif isinstance(where, int):
239        where_id = where
240    else:
241        raise TypeError("Location must be a FileID or OBJ_ALL.")
242
243    return H5Fget_obj_count(where_id, types)
244
245
246@with_phil
247def get_obj_ids(object where=OBJ_ALL, int types=H5F_OBJ_ALL):
248    """(OBJECT where=OBJ_ALL, types=OBJ_ALL) => LIST
249
250    Get a list of identifier instances for open objects.
251
252    where
253        Either a FileID instance representing an HDF5 file, or the
254        special constant OBJ_ALL, to list objects in all files.
255
256    type
257        Specify what kinds of object to include.  May be one of OBJ*,
258        or any bitwise combination (e.g. ``OBJ_FILE | OBJ_ATTR``).
259
260        The special value OBJ_ALL matches all object types, and
261        OBJ_LOCAL will only match objects opened through a specific
262        identifier.
263    """
264    cdef int count
265    cdef int i
266    cdef hid_t where_id
267    cdef hid_t *obj_list = NULL
268    cdef list py_obj_list = []
269
270    if isinstance(where, FileID):
271        where_id = where.id
272    else:
273        try:
274            where_id = int(where)
275        except TypeError:
276            raise TypeError("Location must be a FileID or OBJ_ALL.")
277
278    try:
279        count = H5Fget_obj_count(where_id, types)
280        obj_list = <hid_t*>emalloc(sizeof(hid_t)*count)
281
282        if count > 0: # HDF5 complains that obj_list is NULL, even if count==0
283            # Garbage collection might dealloc a Python object & call H5Idec_ref
284            # between getting an HDF5 ID and calling H5Iinc_ref, breaking it.
285            # Disable GC until we have inc_ref'd the IDs to keep them alive.
286            gc.disable()
287            try:
288                H5Fget_obj_ids(where_id, types, count, obj_list)
289                for i in range(count):
290                    py_obj_list.append(wrap_identifier(obj_list[i]))
291                    # The HDF5 function returns a borrowed reference for each hid_t.
292                    H5Iinc_ref(obj_list[i])
293            finally:
294                gc.enable()
295
296        return py_obj_list
297
298    finally:
299        efree(obj_list)
300
301
302# === FileID implementation ===================================================
303
304cdef class FileID(GroupID):
305
306    """
307        Represents an HDF5 file identifier.
308
309        These objects wrap a small portion of the H5F interface; all the
310        H5F functions which can take arbitrary objects in addition to
311        file identifiers are provided as functions in the h5f module.
312
313        Properties:
314
315        * name:   File name on disk
316
317        Behavior:
318
319        * Hashable: Yes, unique to the file (but not the access mode)
320        * Equality: Hash comparison
321    """
322
323    property name:
324        """ File name on disk (according to h5f.get_name()) """
325        def __get__(self):
326            with phil:
327                return get_name(self)
328
329
330    @with_phil
331    def close(self):
332        """()
333
334        Terminate access through this identifier.  Note that depending on
335        what property list settings were used to open the file, the
336        physical file might not be closed until all remaining open
337        identifiers are freed.
338        """
339        self._close()
340        _objects.nonlocal_close()
341
342    @with_phil
343    def _close_open_objects(self, int types):
344        # Used by File.close(). This avoids the get_obj_ids wrapper, which
345        # creates Python objects and increments HDF5 ref counts while we're
346        # trying to clean up. E.g. that can be problematic at Python shutdown.
347        cdef int count, i
348        cdef hid_t *obj_list = NULL
349
350        count = H5Fget_obj_count(self.id, types)
351        if count == 0:
352            return
353        obj_list = <hid_t*> emalloc(sizeof(hid_t) * count)
354        try:
355            H5Fget_obj_ids(self.id, types, count, obj_list)
356            for i in range(count):
357                while H5Iis_valid(obj_list[i]):
358                    H5Idec_ref(obj_list[i])
359        finally:
360            efree(obj_list)
361
362    @with_phil
363    def reopen(self):
364        """() => FileID
365
366        Retrieve another identifier for a file (which must still be open).
367        The new identifier is guaranteed to neither be mounted nor contain
368        a mounted file.
369        """
370        return FileID(H5Freopen(self.id))
371
372
373    @with_phil
374    def get_filesize(self):
375        """() => LONG size
376
377        Determine the total size (in bytes) of the HDF5 file,
378        including any user block.
379        """
380        cdef hsize_t size
381        H5Fget_filesize(self.id, &size)
382        return size
383
384
385    @with_phil
386    def get_create_plist(self):
387        """() => PropFCID
388
389        Retrieve a copy of the file creation property list used to
390        create this file.
391        """
392        return propwrap(H5Fget_create_plist(self.id))
393
394
395    @with_phil
396    def get_access_plist(self):
397        """() => PropFAID
398
399        Retrieve a copy of the file access property list which manages access
400        to this file.
401        """
402        return propwrap(H5Fget_access_plist(self.id))
403
404
405    @with_phil
406    def get_freespace(self):
407        """() => LONG freespace
408
409        Determine the amount of free space in this file.  Note that this
410        only tracks free space until the file is closed.
411        """
412        return H5Fget_freespace(self.id)
413
414
415    @with_phil
416    def get_intent(self):
417        """ () => INT
418
419        Determine the file's write intent, either of:
420        - H5F_ACC_RDONLY
421        - H5F_ACC_RDWR
422        """
423        cdef unsigned int mode
424        H5Fget_intent(self.id, &mode)
425        return mode
426
427
428    @with_phil
429    def get_vfd_handle(self, fapl=None):
430        """ (PropFAID) => INT
431
432        Retrieve the file handle used by the virtual file driver.
433
434        This may not be supported for all file drivers, and the meaning of the
435        return value may depend on the file driver.
436
437        The 'family' and 'multi' drivers access multiple files, and a file
438        access property list (fapl) can be used to indicate which to access,
439        with H5Pset_family_offset or H5Pset_multi_type.
440        """
441        cdef int *handle
442        H5Fget_vfd_handle(self.id, pdefault(fapl), <void**>&handle)
443        return handle[0]
444
445    IF HDF5_VERSION >= (1, 8, 9):
446
447        @with_phil
448        def get_file_image(self):
449            """ () => BYTES
450
451            Retrieves a copy of the image of an existing, open file.
452
453            Feature requires: 1.8.9
454            """
455
456            cdef ssize_t size
457
458            size = H5Fget_file_image(self.id, NULL, 0)
459            image = PyBytes_FromStringAndSize(NULL, size)
460
461            H5Fget_file_image(self.id, PyBytes_AsString(image), size)
462
463            return image
464
465    IF MPI and HDF5_VERSION >= (1, 8, 9):
466
467        @with_phil
468        def set_mpi_atomicity(self, bint atomicity):
469            """ (BOOL atomicity)
470
471            For MPI-IO driver, set to atomic (True), which guarantees sequential
472            I/O semantics, or non-atomic (False), which improves  performance.
473
474            Default is False.
475
476            Feature requires: 1.8.9 and Parallel HDF5
477            """
478            H5Fset_mpi_atomicity(self.id, <hbool_t>atomicity)
479
480
481        @with_phil
482        def get_mpi_atomicity(self):
483            """ () => BOOL
484
485            Return atomicity setting for MPI-IO driver.
486
487            Feature requires: 1.8.9 and Parallel HDF5
488            """
489            cdef hbool_t atom
490
491            H5Fget_mpi_atomicity(self.id, &atom)
492            return <bint>atom
493
494
495    @with_phil
496    def get_mdc_hit_rate(self):
497        """() => DOUBLE
498
499        Retrieve the cache hit rate
500
501        """
502        cdef double hit_rate
503        H5Fget_mdc_hit_rate(self.id, &hit_rate)
504        return hit_rate
505
506
507    @with_phil
508    def get_mdc_size(self):
509        """() => (max_size, min_clean_size, cur_size, cur_num_entries) [SIZE_T, SIZE_T, SIZE_T, INT]
510
511        Obtain current metadata cache size data for specified file.
512
513        """
514        cdef size_t max_size
515        cdef size_t min_clean_size
516        cdef size_t cur_size
517        cdef int cur_num_entries
518
519
520        H5Fget_mdc_size(self.id, &max_size, &min_clean_size, &cur_size, &cur_num_entries)
521
522        return (max_size, min_clean_size, cur_size, cur_num_entries)
523
524
525    @with_phil
526    def reset_mdc_hit_rate_stats(self):
527        """no return
528
529        rests the hit-rate statistics
530
531        """
532        H5Freset_mdc_hit_rate_stats(self.id)
533
534
535    @with_phil
536    def get_mdc_config(self):
537        """() => CacheConfig
538        Returns an object that stores all the information about the meta-data cache
539        configuration. This config is created for every file in-memory with the default
540        cache config values, it is not saved to the hdf5 file.
541        """
542
543        cdef CacheConfig config = CacheConfig()
544
545        H5Fget_mdc_config(self.id, &config.cache_config)
546
547        return config
548
549    @with_phil
550    def set_mdc_config(self, CacheConfig config not None):
551        """(CacheConfig) => None
552        Sets the meta-data cache configuration for a file. This config is created for every file
553        in-memory with the default config values, it is not saved to the hdf5 file. Any change to
554        the configuration lives until the hdf5 file is closed.
555        """
556        # I feel this should have some sanity checking to make sure that
557        H5Fset_mdc_config(self.id, &config.cache_config)
558
559    IF HDF5_VERSION >= SWMR_MIN_HDF5_VERSION:
560
561        @with_phil
562        def start_swmr_write(self):
563            """ no return
564
565            Enables SWMR writing mode for a file.
566
567            This function will activate SWMR writing mode for a file associated
568            with file_id. This routine will prepare and ensure the file is safe
569            for SWMR writing as follows:
570
571                * Check that the file is opened with write access (H5F_ACC_RDWR).
572                * Check that the file is opened with the latest library format
573                  to ensure data structures with check-summed metadata are used.
574                * Check that the file is not already marked in SWMR writing mode.
575                * Enable reading retries for check-summed metadata to remedy
576                  possible checksum failures from reading inconsistent metadata
577                  on a system that is not atomic.
578                * Turn off usage of the library’s accumulator to avoid possible
579                  ordering problem on a system that is not atomic.
580                * Perform a flush of the file’s data buffers and metadata to set
581                  a consistent state for starting SWMR write operations.
582
583            Library objects are groups, datasets, and committed datatypes. For
584            the current implementation, groups and datasets can remain open when
585            activating SWMR writing mode, but not committed datatypes. Attributes
586            attached to objects cannot remain open.
587
588            Feature requires: 1.9.178 HDF5
589            """
590            H5Fstart_swmr_write(self.id)
591
592    IF HDF5_VERSION >= (1, 10, 1):
593
594        @with_phil
595        def reset_page_buffering_stats(self):
596            """ ()
597
598            Reset page buffer statistics for the file.
599            """
600            H5Freset_page_buffering_stats(self.id)
601
602        @with_phil
603        def get_page_buffering_stats(self):
604            """ () -> NAMEDTUPLE PageBufStats(NAMEDTUPLE meta=PageStats, NAMEDTUPLE raw=PageStats)
605
606            Retrieve page buffering statistics for the file as the number of
607            metadata and raw data accesses, hits, misses, evictions, and
608            accesses that bypass the page buffer (bypasses).
609            """
610            cdef:
611                unsigned int accesses[2]
612                unsigned int hits[2]
613                unsigned int misses[2]
614                unsigned int evictions[2]
615                unsigned int bypasses[2]
616
617            H5Fget_page_buffering_stats(self.id, &accesses[0], &hits[0],
618                                        &misses[0], &evictions[0], &bypasses[0])
619            meta = PageStats(int(accesses[0]), int(hits[0]), int(misses[0]),
620                             int(evictions[0]), int(bypasses[0]))
621            raw = PageStats(int(accesses[1]), int(hits[1]), int(misses[1]),
622                            int(evictions[1]), int(bypasses[1]))
623
624            return PageBufStats(meta, raw)
625