1'''Concatenate multiple files into a single virtual dataset
2'''
3import h5py
4import numpy as np
5import sys
6import os
7
8
9def concatenate(file_names_to_concatenate):
10    entry_key = 'data'  # where the data is inside of the source files.
11    sh = h5py.File(file_names_to_concatenate[0], 'r')[entry_key].shape  # get the first ones shape.
12    layout = h5py.VirtualLayout(shape=(len(file_names_to_concatenate),) + sh,
13                                dtype=np.float64)
14    with h5py.File("VDS.h5", 'w', libver='latest') as f:
15        for i, filename in enumerate(file_names_to_concatenate):
16            vsource = h5py.VirtualSource(filename, entry_key, shape=sh)
17            layout[i, :, :, :] = vsource
18
19        f.create_virtual_dataset(entry_key, layout, fillvalue=0)
20
21
22def create_random_file(folder, index):
23    """create one random file"""
24    name = os.path.join(folder, 'myfile_' + str(index))
25    with h5py.File(name=name, mode='w') as f:
26        d = f.create_dataset('data', (5, 10, 20), 'i4')
27        data = np.random.randint(low=0, high=100, size=(5*10*20))
28        data = data.reshape(5, 10, 20)
29        d[:] = data
30    return name
31
32
33def main(argv):
34    files = argv[1:]
35    if len(files) == 0:
36        import tempfile
37        tmp_dir = tempfile.mkdtemp()
38        for i_file in range(5):
39            files.append(create_random_file(tmp_dir, index=i_file))
40    concatenate(files)
41
42
43if __name__ == '__main__':
44    main(sys.argv)
45