1"""Write compressed chunks directly, bypassing HDF5's filters 2""" 3import h5py 4import numpy as np 5import zlib 6 7f = h5py.File("direct_chunk.h5", "w") 8 9block_size = 2048 10dataset = f.create_dataset( 11 "data", (256, 1024, 1024), dtype="uint16", chunks=(64, 128, 128), 12 compression="gzip", compression_opts=4, 13) 14# h5py's compression='gzip' is actually a misnomer: gzip does the same 15# compression, but adds some extra metadata before & after the compressed data. 16# This won't work if you use gzip.compress() instead of zlib! 17 18# Random numbers with only a few possibilities, so some compression is possible. 19array = np.random.randint(0, 10, size=(64, 128, 128), dtype=np.uint16) 20 21# Compress the data, and write it into the dataset. (0, 0, 128) are coordinates 22# for the start of a chunk. Equivalent to: 23# dataset[0:64, 0:128, 128:256] = array 24compressed = zlib.compress(array, level=4) 25dataset.id.write_direct_chunk((0, 0, 128), compressed) 26print(f"Written {len(compressed)} bytes compressed data") 27 28# Read the chunk back (HDF5 will decompress it) and check the data is the same. 29read_data = dataset[:64, :128, 128:256] 30np.testing.assert_array_equal(read_data, array) 31print(f"Verified array of {read_data.size} elements ({read_data.nbytes} bytes)") 32 33f.close() 34