1# This file is part of h5py, a Python interface to the HDF5 library. 2# 3# http://www.h5py.org 4# 5# Copyright 2008-2013 Andrew Collette and contributors 6# 7# License: Standard 3-clause BSD; see "license.txt" for full license terms 8# and contributor agreement. 9 10""" 11 Low-level operations on HDF5 file objects. 12""" 13 14include "config.pxi" 15 16# C level imports 17from cpython.buffer cimport PyObject_CheckBuffer, \ 18 PyObject_GetBuffer, PyBuffer_Release, \ 19 PyBUF_SIMPLE 20from ._objects cimport pdefault 21from .h5p cimport propwrap, PropFAID, PropFCID 22from .h5i cimport wrap_identifier 23from .h5ac cimport CacheConfig 24from .utils cimport emalloc, efree 25 26# Python level imports 27from collections import namedtuple 28import gc 29from . import _objects 30from ._objects import phil, with_phil 31 32from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AsString 33 34# Initialization 35 36# === Public constants and data structures ==================================== 37 38ACC_TRUNC = H5F_ACC_TRUNC 39ACC_EXCL = H5F_ACC_EXCL 40ACC_RDWR = H5F_ACC_RDWR 41ACC_RDONLY = H5F_ACC_RDONLY 42IF HDF5_VERSION >= SWMR_MIN_HDF5_VERSION: 43 ACC_SWMR_WRITE = H5F_ACC_SWMR_WRITE 44 ACC_SWMR_READ = H5F_ACC_SWMR_READ 45 46 47SCOPE_LOCAL = H5F_SCOPE_LOCAL 48SCOPE_GLOBAL = H5F_SCOPE_GLOBAL 49 50CLOSE_WEAK = H5F_CLOSE_WEAK 51CLOSE_SEMI = H5F_CLOSE_SEMI 52CLOSE_STRONG = H5F_CLOSE_STRONG 53CLOSE_DEFAULT = H5F_CLOSE_DEFAULT 54 55OBJ_FILE = H5F_OBJ_FILE 56OBJ_DATASET = H5F_OBJ_DATASET 57OBJ_GROUP = H5F_OBJ_GROUP 58OBJ_DATATYPE = H5F_OBJ_DATATYPE 59OBJ_ATTR = H5F_OBJ_ATTR 60OBJ_ALL = H5F_OBJ_ALL 61OBJ_LOCAL = H5F_OBJ_LOCAL 62UNLIMITED = H5F_UNLIMITED 63 64LIBVER_EARLIEST = H5F_LIBVER_EARLIEST 65LIBVER_LATEST = H5F_LIBVER_LATEST 66IF HDF5_VERSION >= (1, 10, 2): 67 LIBVER_V18 = H5F_LIBVER_V18 68 LIBVER_V110 = H5F_LIBVER_V110 69 70IF HDF5_VERSION >= VOL_MIN_HDF5_VERSION: 71 LIBVER_V112 = H5F_LIBVER_V112 72 73IF HDF5_VERSION >= (1, 13, 0): 74 LIBVER_V114 = H5F_LIBVER_V114 75 76if HDF5_VERSION >= (1, 8, 9): 77 FILE_IMAGE_OPEN_RW = H5LT_FILE_IMAGE_OPEN_RW 78 79IF HDF5_VERSION >= (1, 10, 1): 80 FSPACE_STRATEGY_FSM_AGGR = H5F_FSPACE_STRATEGY_FSM_AGGR 81 FSPACE_STRATEGY_PAGE = H5F_FSPACE_STRATEGY_PAGE 82 FSPACE_STRATEGY_AGGR = H5F_FSPACE_STRATEGY_AGGR 83 FSPACE_STRATEGY_NONE = H5F_FSPACE_STRATEGY_NONE 84 85 # Used in FileID.get_page_buffering_stats() 86 PageBufStats = namedtuple('PageBufferStats', ['meta', 'raw']) 87 PageStats = namedtuple('PageStats', ['accesses', 'hits', 'misses', 'evictions', 'bypasses']) 88 89 90# === File operations ========================================================= 91 92@with_phil 93def open(char* name, unsigned int flags=H5F_ACC_RDWR, PropFAID fapl=None): 94 """(STRING name, UINT flags=ACC_RDWR, PropFAID fapl=None) => FileID 95 96 Open an existing HDF5 file. Keyword "flags" may be: 97 98 ACC_RDWR 99 Open in read-write mode 100 101 ACC_RDONLY 102 Open in readonly mode 103 104 Keyword fapl may be a file access property list. 105 """ 106 return FileID(H5Fopen(name, flags, pdefault(fapl))) 107 108 109@with_phil 110def create(char* name, int flags=H5F_ACC_TRUNC, PropFCID fcpl=None, 111 PropFAID fapl=None): 112 """(STRING name, INT flags=ACC_TRUNC, PropFCID fcpl=None, 113 PropFAID fapl=None) => FileID 114 115 Create a new HDF5 file. Keyword "flags" may be: 116 117 ACC_TRUNC 118 Truncate an existing file, discarding its data 119 120 ACC_EXCL 121 Fail if a conflicting file exists 122 123 To keep the behavior in line with that of Python's built-in functions, 124 the default is ACC_TRUNC. Be careful! 125 """ 126 return FileID(H5Fcreate(name, flags, pdefault(fcpl), pdefault(fapl))) 127 128IF HDF5_VERSION >= (1, 8, 9): 129 @with_phil 130 def open_file_image(image, flags=0): 131 """(STRING image, INT flags=0) => FileID 132 133 Load a new HDF5 file into memory. Keyword "flags" may be: 134 135 FILE_IMAGE_OPEN_RW 136 Specifies opening the file image in read/write mode. 137 """ 138 cdef Py_buffer buf 139 140 if not PyObject_CheckBuffer(image): 141 raise TypeError("image must support the buffer protocol") 142 143 PyObject_GetBuffer(image, &buf, PyBUF_SIMPLE) 144 try: 145 return FileID(H5LTopen_file_image(buf.buf, buf.len, flags)) 146 finally: 147 PyBuffer_Release(&buf) 148 149 150@with_phil 151def flush(ObjectID obj not None, int scope=H5F_SCOPE_LOCAL): 152 """(ObjectID obj, INT scope=SCOPE_LOCAL) 153 154 Tell the HDF5 library to flush file buffers to disk. "obj" may 155 be the file identifier, or the identifier of any object residing in 156 the file. Keyword "scope" may be: 157 158 SCOPE_LOCAL 159 Flush only the given file 160 161 SCOPE_GLOBAL 162 Flush the entire virtual file 163 """ 164 H5Fflush(obj.id, <H5F_scope_t>scope) 165 166 167@with_phil 168def is_hdf5(char* name): 169 """(STRING name) => BOOL 170 171 Determine if a given file is an HDF5 file. Note this raises an 172 exception if the file doesn't exist. 173 """ 174 return <bint>(H5Fis_hdf5(name)) 175 176 177@with_phil 178def mount(ObjectID loc not None, char* name, FileID fid not None): 179 """(ObjectID loc, STRING name, FileID fid) 180 181 Mount an open file on the group "name" under group loc_id. Note that 182 "name" must already exist. 183 """ 184 H5Fmount(loc.id, name, fid.id, H5P_DEFAULT) 185 186 187@with_phil 188def unmount(ObjectID loc not None, char* name): 189 """(ObjectID loc, STRING name) 190 191 Unmount a file, mounted at "name" under group loc_id. 192 """ 193 H5Funmount(loc.id, name) 194 195 196@with_phil 197def get_name(ObjectID obj not None): 198 """(ObjectID obj) => STRING 199 200 Determine the name of the file in which the specified object resides. 201 """ 202 cdef ssize_t size 203 cdef char* name 204 name = NULL 205 206 size = H5Fget_name(obj.id, NULL, 0) 207 assert size >= 0 208 name = <char*>emalloc(sizeof(char)*(size+1)) 209 try: 210 H5Fget_name(obj.id, name, size+1) 211 pname = name 212 return pname 213 finally: 214 efree(name) 215 216 217@with_phil 218def get_obj_count(object where=OBJ_ALL, int types=H5F_OBJ_ALL): 219 """(OBJECT where=OBJ_ALL, types=OBJ_ALL) => INT 220 221 Get the number of open objects. 222 223 where 224 Either a FileID instance representing an HDF5 file, or the 225 special constant OBJ_ALL, to count objects in all files. 226 227 type 228 Specify what kinds of object to include. May be one of OBJ*, 229 or any bitwise combination (e.g. ``OBJ_FILE | OBJ_ATTR``). 230 231 The special value OBJ_ALL matches all object types, and 232 OBJ_LOCAL will only match objects opened through a specific 233 identifier. 234 """ 235 cdef hid_t where_id 236 if isinstance(where, FileID): 237 where_id = where.id 238 elif isinstance(where, int): 239 where_id = where 240 else: 241 raise TypeError("Location must be a FileID or OBJ_ALL.") 242 243 return H5Fget_obj_count(where_id, types) 244 245 246@with_phil 247def get_obj_ids(object where=OBJ_ALL, int types=H5F_OBJ_ALL): 248 """(OBJECT where=OBJ_ALL, types=OBJ_ALL) => LIST 249 250 Get a list of identifier instances for open objects. 251 252 where 253 Either a FileID instance representing an HDF5 file, or the 254 special constant OBJ_ALL, to list objects in all files. 255 256 type 257 Specify what kinds of object to include. May be one of OBJ*, 258 or any bitwise combination (e.g. ``OBJ_FILE | OBJ_ATTR``). 259 260 The special value OBJ_ALL matches all object types, and 261 OBJ_LOCAL will only match objects opened through a specific 262 identifier. 263 """ 264 cdef int count 265 cdef int i 266 cdef hid_t where_id 267 cdef hid_t *obj_list = NULL 268 cdef list py_obj_list = [] 269 270 if isinstance(where, FileID): 271 where_id = where.id 272 else: 273 try: 274 where_id = int(where) 275 except TypeError: 276 raise TypeError("Location must be a FileID or OBJ_ALL.") 277 278 try: 279 count = H5Fget_obj_count(where_id, types) 280 obj_list = <hid_t*>emalloc(sizeof(hid_t)*count) 281 282 if count > 0: # HDF5 complains that obj_list is NULL, even if count==0 283 # Garbage collection might dealloc a Python object & call H5Idec_ref 284 # between getting an HDF5 ID and calling H5Iinc_ref, breaking it. 285 # Disable GC until we have inc_ref'd the IDs to keep them alive. 286 gc.disable() 287 try: 288 H5Fget_obj_ids(where_id, types, count, obj_list) 289 for i in range(count): 290 py_obj_list.append(wrap_identifier(obj_list[i])) 291 # The HDF5 function returns a borrowed reference for each hid_t. 292 H5Iinc_ref(obj_list[i]) 293 finally: 294 gc.enable() 295 296 return py_obj_list 297 298 finally: 299 efree(obj_list) 300 301 302# === FileID implementation =================================================== 303 304cdef class FileID(GroupID): 305 306 """ 307 Represents an HDF5 file identifier. 308 309 These objects wrap a small portion of the H5F interface; all the 310 H5F functions which can take arbitrary objects in addition to 311 file identifiers are provided as functions in the h5f module. 312 313 Properties: 314 315 * name: File name on disk 316 317 Behavior: 318 319 * Hashable: Yes, unique to the file (but not the access mode) 320 * Equality: Hash comparison 321 """ 322 323 property name: 324 """ File name on disk (according to h5f.get_name()) """ 325 def __get__(self): 326 with phil: 327 return get_name(self) 328 329 330 @with_phil 331 def close(self): 332 """() 333 334 Terminate access through this identifier. Note that depending on 335 what property list settings were used to open the file, the 336 physical file might not be closed until all remaining open 337 identifiers are freed. 338 """ 339 self._close() 340 _objects.nonlocal_close() 341 342 @with_phil 343 def _close_open_objects(self, int types): 344 # Used by File.close(). This avoids the get_obj_ids wrapper, which 345 # creates Python objects and increments HDF5 ref counts while we're 346 # trying to clean up. E.g. that can be problematic at Python shutdown. 347 cdef int count, i 348 cdef hid_t *obj_list = NULL 349 350 count = H5Fget_obj_count(self.id, types) 351 if count == 0: 352 return 353 obj_list = <hid_t*> emalloc(sizeof(hid_t) * count) 354 try: 355 H5Fget_obj_ids(self.id, types, count, obj_list) 356 for i in range(count): 357 while H5Iis_valid(obj_list[i]): 358 H5Idec_ref(obj_list[i]) 359 finally: 360 efree(obj_list) 361 362 @with_phil 363 def reopen(self): 364 """() => FileID 365 366 Retrieve another identifier for a file (which must still be open). 367 The new identifier is guaranteed to neither be mounted nor contain 368 a mounted file. 369 """ 370 return FileID(H5Freopen(self.id)) 371 372 373 @with_phil 374 def get_filesize(self): 375 """() => LONG size 376 377 Determine the total size (in bytes) of the HDF5 file, 378 including any user block. 379 """ 380 cdef hsize_t size 381 H5Fget_filesize(self.id, &size) 382 return size 383 384 385 @with_phil 386 def get_create_plist(self): 387 """() => PropFCID 388 389 Retrieve a copy of the file creation property list used to 390 create this file. 391 """ 392 return propwrap(H5Fget_create_plist(self.id)) 393 394 395 @with_phil 396 def get_access_plist(self): 397 """() => PropFAID 398 399 Retrieve a copy of the file access property list which manages access 400 to this file. 401 """ 402 return propwrap(H5Fget_access_plist(self.id)) 403 404 405 @with_phil 406 def get_freespace(self): 407 """() => LONG freespace 408 409 Determine the amount of free space in this file. Note that this 410 only tracks free space until the file is closed. 411 """ 412 return H5Fget_freespace(self.id) 413 414 415 @with_phil 416 def get_intent(self): 417 """ () => INT 418 419 Determine the file's write intent, either of: 420 - H5F_ACC_RDONLY 421 - H5F_ACC_RDWR 422 """ 423 cdef unsigned int mode 424 H5Fget_intent(self.id, &mode) 425 return mode 426 427 428 @with_phil 429 def get_vfd_handle(self, fapl=None): 430 """ (PropFAID) => INT 431 432 Retrieve the file handle used by the virtual file driver. 433 434 This may not be supported for all file drivers, and the meaning of the 435 return value may depend on the file driver. 436 437 The 'family' and 'multi' drivers access multiple files, and a file 438 access property list (fapl) can be used to indicate which to access, 439 with H5Pset_family_offset or H5Pset_multi_type. 440 """ 441 cdef int *handle 442 H5Fget_vfd_handle(self.id, pdefault(fapl), <void**>&handle) 443 return handle[0] 444 445 IF HDF5_VERSION >= (1, 8, 9): 446 447 @with_phil 448 def get_file_image(self): 449 """ () => BYTES 450 451 Retrieves a copy of the image of an existing, open file. 452 453 Feature requires: 1.8.9 454 """ 455 456 cdef ssize_t size 457 458 size = H5Fget_file_image(self.id, NULL, 0) 459 image = PyBytes_FromStringAndSize(NULL, size) 460 461 H5Fget_file_image(self.id, PyBytes_AsString(image), size) 462 463 return image 464 465 IF MPI and HDF5_VERSION >= (1, 8, 9): 466 467 @with_phil 468 def set_mpi_atomicity(self, bint atomicity): 469 """ (BOOL atomicity) 470 471 For MPI-IO driver, set to atomic (True), which guarantees sequential 472 I/O semantics, or non-atomic (False), which improves performance. 473 474 Default is False. 475 476 Feature requires: 1.8.9 and Parallel HDF5 477 """ 478 H5Fset_mpi_atomicity(self.id, <hbool_t>atomicity) 479 480 481 @with_phil 482 def get_mpi_atomicity(self): 483 """ () => BOOL 484 485 Return atomicity setting for MPI-IO driver. 486 487 Feature requires: 1.8.9 and Parallel HDF5 488 """ 489 cdef hbool_t atom 490 491 H5Fget_mpi_atomicity(self.id, &atom) 492 return <bint>atom 493 494 495 @with_phil 496 def get_mdc_hit_rate(self): 497 """() => DOUBLE 498 499 Retrieve the cache hit rate 500 501 """ 502 cdef double hit_rate 503 H5Fget_mdc_hit_rate(self.id, &hit_rate) 504 return hit_rate 505 506 507 @with_phil 508 def get_mdc_size(self): 509 """() => (max_size, min_clean_size, cur_size, cur_num_entries) [SIZE_T, SIZE_T, SIZE_T, INT] 510 511 Obtain current metadata cache size data for specified file. 512 513 """ 514 cdef size_t max_size 515 cdef size_t min_clean_size 516 cdef size_t cur_size 517 cdef int cur_num_entries 518 519 520 H5Fget_mdc_size(self.id, &max_size, &min_clean_size, &cur_size, &cur_num_entries) 521 522 return (max_size, min_clean_size, cur_size, cur_num_entries) 523 524 525 @with_phil 526 def reset_mdc_hit_rate_stats(self): 527 """no return 528 529 rests the hit-rate statistics 530 531 """ 532 H5Freset_mdc_hit_rate_stats(self.id) 533 534 535 @with_phil 536 def get_mdc_config(self): 537 """() => CacheConfig 538 Returns an object that stores all the information about the meta-data cache 539 configuration. This config is created for every file in-memory with the default 540 cache config values, it is not saved to the hdf5 file. 541 """ 542 543 cdef CacheConfig config = CacheConfig() 544 545 H5Fget_mdc_config(self.id, &config.cache_config) 546 547 return config 548 549 @with_phil 550 def set_mdc_config(self, CacheConfig config not None): 551 """(CacheConfig) => None 552 Sets the meta-data cache configuration for a file. This config is created for every file 553 in-memory with the default config values, it is not saved to the hdf5 file. Any change to 554 the configuration lives until the hdf5 file is closed. 555 """ 556 # I feel this should have some sanity checking to make sure that 557 H5Fset_mdc_config(self.id, &config.cache_config) 558 559 IF HDF5_VERSION >= SWMR_MIN_HDF5_VERSION: 560 561 @with_phil 562 def start_swmr_write(self): 563 """ no return 564 565 Enables SWMR writing mode for a file. 566 567 This function will activate SWMR writing mode for a file associated 568 with file_id. This routine will prepare and ensure the file is safe 569 for SWMR writing as follows: 570 571 * Check that the file is opened with write access (H5F_ACC_RDWR). 572 * Check that the file is opened with the latest library format 573 to ensure data structures with check-summed metadata are used. 574 * Check that the file is not already marked in SWMR writing mode. 575 * Enable reading retries for check-summed metadata to remedy 576 possible checksum failures from reading inconsistent metadata 577 on a system that is not atomic. 578 * Turn off usage of the library’s accumulator to avoid possible 579 ordering problem on a system that is not atomic. 580 * Perform a flush of the file’s data buffers and metadata to set 581 a consistent state for starting SWMR write operations. 582 583 Library objects are groups, datasets, and committed datatypes. For 584 the current implementation, groups and datasets can remain open when 585 activating SWMR writing mode, but not committed datatypes. Attributes 586 attached to objects cannot remain open. 587 588 Feature requires: 1.9.178 HDF5 589 """ 590 H5Fstart_swmr_write(self.id) 591 592 IF HDF5_VERSION >= (1, 10, 1): 593 594 @with_phil 595 def reset_page_buffering_stats(self): 596 """ () 597 598 Reset page buffer statistics for the file. 599 """ 600 H5Freset_page_buffering_stats(self.id) 601 602 @with_phil 603 def get_page_buffering_stats(self): 604 """ () -> NAMEDTUPLE PageBufStats(NAMEDTUPLE meta=PageStats, NAMEDTUPLE raw=PageStats) 605 606 Retrieve page buffering statistics for the file as the number of 607 metadata and raw data accesses, hits, misses, evictions, and 608 accesses that bypass the page buffer (bypasses). 609 """ 610 cdef: 611 unsigned int accesses[2] 612 unsigned int hits[2] 613 unsigned int misses[2] 614 unsigned int evictions[2] 615 unsigned int bypasses[2] 616 617 H5Fget_page_buffering_stats(self.id, &accesses[0], &hits[0], 618 &misses[0], &evictions[0], &bypasses[0]) 619 meta = PageStats(int(accesses[0]), int(hits[0]), int(misses[0]), 620 int(evictions[0]), int(bypasses[0])) 621 raw = PageStats(int(accesses[1]), int(hits[1]), int(misses[1]), 622 int(evictions[1]), int(bypasses[1])) 623 624 return PageBufStats(meta, raw) 625