1#!/usr/local/bin/python3.8
2
3# This script merges an arbitrary number of epsmat.h5 files (in HDF5 format).
4# The default output is epsmat_merged.h5, and it can be configured.
5# The script requires h5py and numpy to work. On hopper, you should type:
6# module load python h5py
7
8# Felipe H. da Jornada <jornada@berkeley.edu> (2013)
9
10import h5py
11import sys
12import os
13from optparse import OptionParser
14import numpy as np
15
16usage = '%prog: [options] epsmat_1 [epsmat_2] [...]'
17parser = OptionParser(usage=usage)
18parser.add_option('--take-header-from', metavar='F_NUM', type='int',
19                  help="don't check for consistency of the headers."
20                       "Instead, use the header from the argument number F_NUM.")
21parser.add_option('-o', '--output', default='epsmat_merged.h5', type='str',
22                  help="output file. Defaults to 'epsmat_merged.h5'.")
23parser.add_option('-f', '--force', default=False, action='store_true',
24                  help="don't ask any questions, and overwrite output file if"
25                       "it exists.")
26(options, args) = parser.parse_args()
27if len(args)<2:
28    parser.error("wrong number of arguments")
29
30print('Output file:\n> %s\n'%(options.output))
31print('Input files:')
32for fname in args:
33    print('< %s'%(fname))
34    if not os.path.exists(fname):
35        raise IOError("File %s doesn't exist."%(fname))
36
37fs_in = [h5py.File(fname, 'r') for fname in args]
38if options.take_header_from is None:
39    ind = 0
40else:
41    assert options.take_header_from >=1 and options.take_header_from <= len(args), \
42        'invalid value (%d) for option --take-header-from.\n'%(options.take_header_from) + \
43        'Expecting and integer between %d and %d'%(1,len(args))
44    ind = options.take_header_from-1
45f1 = fs_in[ind]
46
47if (not options.force) and os.path.exists(options.output):
48    # Fix Python 2.x.
49    try: input = raw_input
50    except NameError: pass
51    ans = input('\nOutput file %s already exists. Would you '
52                    'like to overwrite it? (y,N) '%(options.output))
53    if len(ans)==0 or ans[0].lower()!='y':
54        exit(0)
55    print("Overwriting output file %s"%(options.output))
56
57###############################################################################
58print("\nAnalyzing files and checking consistency against %s\n"%(args[ind]))
59###############################################################################
60
61#FHJ: these dsets *must* match between different files
62dsets = ['eps_header/versionnumber', 'eps_header/flavor', \
63    'mf_header/gspace/ng', 'mf_header/gspace/components', \
64    'eps_header/freqs/nfreq', 'eps_header/freqs/freqs']
65dsets += [v.name for v in f1['eps_header/params'].values()
66    if v.name.split('/')[-1] not in ('ecuts','nband','efermi',)]
67
68#FHJ: match these dsets unless we use the --take-header-from option
69if options.take_header_from is None:
70    dsets += ['eps_header/qpoints/qgrid', 'mf_header/flavor']
71    def parse_optional(name, obj):
72        if isinstance(obj, h5py.Dataset):
73            dsets.append(obj.name)
74    f1['mf_header'].visititems(parse_optional)
75else:
76    print('  Note: skipping extra consistency checks because of --take-header-from')
77
78#Checking consistency among files wrt f1
79for f2 in fs_in:
80    if f2==f1: continue
81    print('  Checking %s'%(f2.filename))
82    for dname in dsets:
83        if not (f1[dname].shape==f2[dname].shape and
84            np.allclose(f1[dname][()], f2[dname][()])):
85            raise AttributeError('Incompatible values for dataset %s'%(dname))
86print('  All files are consistent!')
87
88flavor = f1['eps_header/flavor'][()]
89ng = f1['mf_header/gspace/ng'][()]
90nfreq = f1['eps_header/freqs/nfreq'][()]
91freq_dep = f1['eps_header/freqs/freq_dep'][()]
92ecuts = f1['eps_header/params/ecuts'][()]
93print("\nSummary:\n")
94print("  Flavor: %s"%((flavor==2 and 'Complex') or 'Real'))
95print("  Frequency dependency: %d"%(freq_dep))
96print("  Number of frequencies: %d"%(nfreq))
97print("  Number of G-vectors: %d"%(ng))
98print("  Epsilon cutoff: %f"%(ecuts))
99nmtx_max = np.amax([f['eps_header/gspace/nmtx_max'][()] for f in fs_in])
100print("  Maximum rank of the merged epsilon matrix (nmtx): %d"%(nmtx_max))
101nq_tot = np.sum([f['eps_header/qpoints/nq'][()] for f in fs_in])
102print("  Total number of q-points after merging: %d"%(nq_tot))
103
104###############################################################################
105print("\nWriting output parameters\n")
106###############################################################################
107
108f_out = h5py.File(options.output, 'w')
109f_out.copy(f1['mf_header'], 'mf_header')
110f_out.copy(f1['eps_header'], 'eps_header')
111f_out['eps_header/qpoints/nq'][()] = nq_tot
112f_out['eps_header/gspace/nmtx_max'][()] = nmtx_max
113
114#Delete and merge all datasets related to nq
115dsets_merge = {
116    'eps_header/qpoints/qpts': (nq_tot,3),
117    'eps_header/qpoints/qpt_done': (nq_tot,),
118    'eps_header/gspace/nmtx': (nq_tot,),
119    'eps_header/gspace/ekin': (nq_tot,ng),
120    'eps_header/gspace/gind_eps2rho': (nq_tot,ng),
121    'eps_header/gspace/gind_rho2eps': (nq_tot,ng)}
122for name, shape in dsets_merge.items():
123    del f_out[name]
124    f_out.create_dataset(name, shape, dtype=f1[name].dtype)
125    f_out[name][()] = np.concatenate([f[name][()] for f in fs_in], axis=0)
126
127print("  Ok!")
128
129###############################################################################
130print("\nWriting output matrices\n")
131###############################################################################
132
133f_out.create_group('mats')
134name = 'mats/matrix-diagonal'
135shape = np.array(f1[name].shape)
136shape[[0,1]] = [nq_tot, nmtx_max] #shape = (nq, nmtx_max, 1|2)
137f_out.create_dataset(name, shape, f1[name].dtype)
138name = 'mats/matrix'
139shape = np.array(f1[name].shape)
140shape[[0,3,4]] = [nq_tot, nmtx_max, nmtx_max] #shape = (nq, 1|2|3|4, nfreq, nmtx_max, nmtx_max, 1|2)
141f_out.create_dataset(name, shape, f1[name].dtype)
142name = 'eps_header/gspace/vcoul'
143del f_out[name]
144shape = [nq_tot, nmtx_max] #shape = (nq, nmtx_max)
145f_out.create_dataset(name, shape, f1[name].dtype)
146
147iq_glob = 0
148for f in fs_in:
149    nq = f['eps_header/qpoints/nq'][()]
150    for iq in xrange(nq):
151        nmtx_q = f['eps_header/gspace/nmtx'][iq]
152        print("  Dealing with q=%d/%d (rank=%d)"%(iq_glob+1, nq_tot, nmtx_q))
153        name = 'mats/matrix-diagonal'
154        f_out[name][iq_glob,:nmtx_q,:] = (
155            f[name][iq,:nmtx_q,:] )
156        name = 'mats/matrix'
157        f_out[name][iq_glob,:,:,:nmtx_q,:nmtx_q,:] = (
158            f[name][iq,:,:,:nmtx_q,:nmtx_q,:] )
159        name = 'eps_header/gspace/vcoul'
160        f_out[name][iq_glob,:nmtx_q] = (
161            f[name][iq,:nmtx_q] )
162        iq_glob += 1
163f_out.close()
164
165print('\nAll done!\n')
166