1"""A simple example of building a virtual dataset. 2 3This makes four 'source' HDF5 files, each with a 1D dataset of 100 numbers. 4Then it makes a single 4x100 virtual dataset in a separate file, exposing 5the four sources as one dataset. 6""" 7 8import h5py 9import numpy as np 10 11# create some sample data 12data = np.arange(0, 100).reshape(1, 100) + np.arange(1, 5).reshape(4, 1) 13 14# Create source files (0.h5 to 3.h5) 15for n in range(4): 16 with h5py.File(f"{n}.h5", "w") as f: 17 d = f.create_dataset("data", (100,), "i4", data[n]) 18 19# Assemble virtual dataset 20layout = h5py.VirtualLayout(shape=(4, 100), dtype="i4") 21for n in range(4): 22 filename = "{}.h5".format(n) 23 vsource = h5py.VirtualSource(filename, "data", shape=(100,)) 24 layout[n] = vsource 25 26# Add virtual dataset to output file 27with h5py.File("VDS.h5", "w", libver="latest") as f: 28 f.create_virtual_dataset("vdata", layout, fillvalue=-5) 29 f.create_dataset("data", data=data, dtype="i4") 30 31 32# read data back 33# virtual dataset is transparent for reader! 34with h5py.File("VDS.h5", "r") as f: 35 print("Virtual dataset:") 36 print(f["vdata"][:, :10]) 37 print("Normal dataset:") 38 print(f["data"][:, :10]) 39