1"""A simple example of building a virtual dataset.
2
3This makes four 'source' HDF5 files, each with a 1D dataset of 100 numbers.
4Then it makes a single 4x100 virtual dataset in a separate file, exposing
5the four sources as one dataset.
6"""
7
8import h5py
9import numpy as np
10
11# create some sample data
12data = np.arange(0, 100).reshape(1, 100) + np.arange(1, 5).reshape(4, 1)
13
14# Create source files (0.h5 to 3.h5)
15for n in range(4):
16    with h5py.File(f"{n}.h5", "w") as f:
17        d = f.create_dataset("data", (100,), "i4", data[n])
18
19# Assemble virtual dataset
20layout = h5py.VirtualLayout(shape=(4, 100), dtype="i4")
21for n in range(4):
22    filename = "{}.h5".format(n)
23    vsource = h5py.VirtualSource(filename, "data", shape=(100,))
24    layout[n] = vsource
25
26# Add virtual dataset to output file
27with h5py.File("VDS.h5", "w", libver="latest") as f:
28    f.create_virtual_dataset("vdata", layout, fillvalue=-5)
29    f.create_dataset("data", data=data, dtype="i4")
30
31
32# read data back
33# virtual dataset is transparent for reader!
34with h5py.File("VDS.h5", "r") as f:
35    print("Virtual dataset:")
36    print(f["vdata"][:, :10])
37    print("Normal dataset:")
38    print(f["data"][:, :10])
39