1'''Concatenate multiple files into a single virtual dataset 2''' 3import h5py 4import numpy as np 5import sys 6import os 7 8 9def concatenate(file_names_to_concatenate): 10 entry_key = 'data' # where the data is inside of the source files. 11 sh = h5py.File(file_names_to_concatenate[0], 'r')[entry_key].shape # get the first ones shape. 12 layout = h5py.VirtualLayout(shape=(len(file_names_to_concatenate),) + sh, 13 dtype=np.float64) 14 with h5py.File("VDS.h5", 'w', libver='latest') as f: 15 for i, filename in enumerate(file_names_to_concatenate): 16 vsource = h5py.VirtualSource(filename, entry_key, shape=sh) 17 layout[i, :, :, :] = vsource 18 19 f.create_virtual_dataset(entry_key, layout, fillvalue=0) 20 21 22def create_random_file(folder, index): 23 """create one random file""" 24 name = os.path.join(folder, 'myfile_' + str(index)) 25 with h5py.File(name=name, mode='w') as f: 26 d = f.create_dataset('data', (5, 10, 20), 'i4') 27 data = np.random.randint(low=0, high=100, size=(5*10*20)) 28 data = data.reshape(5, 10, 20) 29 d[:] = data 30 return name 31 32 33def main(argv): 34 files = argv[1:] 35 if len(files) == 0: 36 import tempfile 37 tmp_dir = tempfile.mkdtemp() 38 for i_file in range(5): 39 files.append(create_random_file(tmp_dir, index=i_file)) 40 concatenate(files) 41 42 43if __name__ == '__main__': 44 main(sys.argv) 45