1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2020, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
7
8"""Parser for Formatted Checkpoint files"""
9
10
11from __future__ import print_function
12
13import re
14
15import numpy
16
17from cclib.parser import data
18from cclib.parser import logfileparser
19from cclib.parser import utils
20
21SHELL_ORBITALS = {
22    0: ['S'],
23    1: ['PX', 'PY', 'PZ'],
24    -1: ['S', 'PX', 'PY', 'PZ'],
25    2: ['D1', 'D2', 'D3', 'D4', 'D5', 'D6'],
26    -2: ['D1', 'D2', 'D3', 'D4', 'D5'],
27    3:  ['F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10'],
28    -3: ['F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7'],
29    4: ['G1', 'G2', 'G3', 'G4', 'G5', 'G6', 'G7', 'G8', 'G9', 'G10', 'G11', 'G12','G13'],
30    -4: ['G1', 'G2', 'G3', 'G4', 'G5', 'G6', 'G7', 'G8', 'G9']
31
32}
33
34SHELL_START = {
35    0: 1,
36    1: 2,
37    -1: 2,
38    2: 3,
39    -2: 3,
40    3: 4,
41    -3: 4
42}
43
44
45def _shell_to_orbitals(type, offset):
46    """Convert a Fchk shell type and offset to a list of string representations.
47
48    For example, shell type = -2 corresponds to d orbitals (spherical basis) with
49    an offset = 1 would correspond to the 4d orbitals, so this function returns
50    `['4D1', '4D2', '4D3', '4D4', '4D5']`.
51    """
52
53    return ['{}{}'.format(SHELL_START[type] + offset, x) for x in SHELL_ORBITALS[type]]
54
55
56class FChk(logfileparser.Logfile):
57    """A Formatted checkpoint file, which contains molecular and wavefunction information.
58
59    These files are produced by Gaussian and Q-Chem.
60    """
61
62    def __init__(self, *args, **kwargs):
63
64        # Call the __init__ method of the superclass
65        super(FChk, self).__init__(logname="FChk", *args, **kwargs)
66        self.start = True
67
68    def __str__(self):
69        """Return a string representation of the object."""
70        return "Formatted checkpoint file %s" % self.filename
71
72    def __repr__(self):
73        """Return a representation of the object."""
74        return 'FCHK("%s")' % self.filename
75
76    def normalisesym(self, symlabel):
77        """Just return label"""
78        return symlabel
79
80    def extract(self, inputfile, line):
81
82        # just opened file, skip first line to get basis
83        if self.start:
84            method = next(inputfile)
85            self.metadata['basis_set'] = method.split()[-1]
86            self.start = False
87
88        if line[0:6] == 'Charge':
89            self.set_attribute('charge', int(line.split()[-1]))
90
91        if line[0:12] == 'Multiplicity':
92            self.set_attribute('mult', int(line.split()[-1]))
93
94        if line[0:14] == 'Atomic numbers':
95            self.natom = int(line.split()[-1])
96            atomnos = self._parse_block(inputfile, self.natom, int, 'Basic Information')
97            self.set_attribute('atomnos', atomnos)
98
99        if line[0:19] == 'Number of electrons':
100            alpha = next(inputfile)
101            alpha_homo = int(alpha.split()[-1]) - 1
102
103            beta = next(inputfile)
104            beta_homo = int(beta.split()[-1]) - 1
105
106            self.set_attribute('homos', [alpha_homo, beta_homo])
107
108        if line[0:29] == 'Current cartesian coordinates':
109            count = int(line.split()[-1])
110            assert count % 3 == 0
111
112            coords = numpy.array(self._parse_block(inputfile, count, float, 'Coordinates'))
113            coords.shape = (1, int(count / 3), 3)
114            self.set_attribute('atomcoords', utils.convertor(coords, 'bohr', 'Angstrom'))
115
116        if line[0:25] == 'Number of basis functions':
117            self.set_attribute('nbasis', int(line.split()[-1]))
118
119        if line[0:14] == 'Overlap Matrix':
120            count = int(line.split()[-1])
121
122            # triangle matrix, with number of elements in a row:
123            # 1 + 2 + 3 + .... + self.nbasis
124            assert count == (self.nbasis + 1) * self.nbasis / 2
125            raw_overlaps = self._parse_block(inputfile, count, float, 'Overlap Matrix')
126
127            # now turn into matrix
128            overlaps = numpy.zeros((self.nbasis, self.nbasis))
129            raw_index = 0
130            for row in range(self.nbasis):
131                for col in range(row + 1):
132                    overlaps[row, col] = raw_overlaps[raw_index]
133                    overlaps[col, row] = raw_overlaps[raw_index]
134                    raw_index += 1
135
136            self.set_attribute('aooverlaps', overlaps)
137
138        if line[0:31] == 'Number of independent functions':
139            self.set_attribute('nmo', int(line.split()[-1]))
140
141        if line[0:21] == 'Alpha MO coefficients':
142            count = int(line.split()[-1])
143            assert count == self.nbasis * self.nmo
144
145            coeffs = numpy.array(self._parse_block(inputfile, count, float, 'Alpha Coefficients'))
146            coeffs.shape = (self.nmo, self.nbasis)
147            self.set_attribute('mocoeffs', [coeffs])
148
149        if line[0:22] == 'Alpha Orbital Energies':
150            count = int(line.split()[-1])
151            assert count == self.nmo
152
153            energies = numpy.array(self._parse_block(inputfile, count, float, 'Alpha MO Energies'))
154            self.set_attribute('moenergies', [energies])
155
156        if line[0:20] == 'Beta MO coefficients':
157            count = int(line.split()[-1])
158            assert count == self.nbasis * self.nmo
159
160            coeffs = numpy.array(self._parse_block(inputfile, count, float, 'Beta Coefficients'))
161            coeffs.shape = (self.nmo, self.nbasis)
162            self.append_attribute('mocoeffs', coeffs)
163
164        if line[0:21] == 'Beta Orbital Energies':
165            count = int(line.split()[-1])
166            assert count == self.nmo
167
168            energies = numpy.array(self._parse_block(inputfile, count, float, 'Alpha MO Energies'))
169            self.append_attribute('moenergies', energies)
170
171        if line[0:11] == 'Shell types':
172            self.parse_aonames(line, inputfile)
173
174        if line[0:19] == 'Real atomic weights':
175            count = int(line.split()[-1])
176            assert count == self.natom
177
178            atommasses = numpy.array(self._parse_block(inputfile, count, float, 'Atomic Masses'))
179
180            self.set_attribute('atommasses', atommasses)
181
182        if line[0:10] == 'SCF Energy':
183            self.scfenergy = float(line.split()[-1])
184
185            self.set_attribute('scfenergies', [utils.convertor(self.scfenergy,'hartree','eV')])
186
187        if line[0:18] == 'Cartesian Gradient':
188            count = int(line.split()[-1])
189            assert count == self.natom*3
190
191            gradient = numpy.array(self._parse_block(inputfile, count, float, 'Gradient'))
192
193            self.set_attribute('grads', gradient)
194
195        if line[0:25] == 'Cartesian Force Constants':
196            count = int(line.split()[-1])
197            assert count == (3*self.natom*(3*self.natom+1))/2
198
199            hessian = numpy.array(self._parse_block(inputfile, count, float, 'Hessian'))
200
201            self.set_attribute('hessian', hessian)
202
203        if line[0:13] == 'ETran scalars':
204            count = int(line.split()[-1])
205
206            etscalars = self._parse_block(inputfile, count, int, 'ET Scalars')
207
208            # Set attribute: self.netroot (number of excited estates)
209            self.netroot = etscalars[4]
210
211        if line[0:10] == 'ETran spin':
212            count = int(line.split()[-1])
213
214            etspin = self._parse_block(inputfile, count, int, 'ET Spin')
215
216            spin_labels = { 0:'Singlet',
217                            2:'Triplet',
218                           -1:'Unknown'}
219            etsyms = []
220            for i in etspin:
221                if i in spin_labels:
222                    etsyms.append(spin_labels[i])
223                else:
224                    etsyms.append(spin_labels[-1])
225
226            # The extracted property does not contain the actual irrep label
227            # (contrarily to that extracted from the Gaussian log)
228            # After this, 'Etran sym' appears (and would need to be parsed),
229            # but at least in Gaussian this contains only zeroes regardless of the irrep.
230
231            self.set_attribute('etsyms', etsyms)
232
233        if line[0:18] == 'ETran state values':
234            # This section is organized as follows:
235            # ·First the properties of each excited state (up to net):
236            # E, {muNx,muNy,muNz,muvelNx,muvelNy,muvelNz,mmagNx,mmagNy,mmagNz,unkX,unkY,unkZ,unkX,unkY,unkZ}_N=1,net
237            # ·Then come 48 items (only if Freq is requested)
238            # They were all 0.000 in G09, but get an actual value in G16
239            # ·Then, the derivates of each property with respect to Cartesian coordiates only for target state (netroot)
240            # For each Cartesian coordiate, all derivatives wrt to it are included:
241            #  dE/dx1 dmux/dx1 dmuy/dx1 ... unkZ/dx1
242            #  dE/dy1 dmux/dy1 dmuy/dy1 ... unkZ/dy1
243            #  ...
244            #  dE/dzN dmux/dzN dmuy/dzN ... unkZ/dzN
245            # The number of items is therefore:
246            ### 16*net (no Freq jobs)
247            ### 16*net + 48 + 3*self.natom*16 (Freq jobs)
248            count = int(line.split()[-1])
249            if hasattr(self,'etsyms'):
250                net = len(self.etsyms)
251            else:
252                net = 0 # This forces an AssertionError below
253            assert count in [16*net, 16*net+48+3*self.natom*16]
254
255            etvalues = self._parse_block(inputfile, count, float, 'ET Values')
256
257            # ETr energies (1/cm)
258            etenergies_au = [ e_es-self.scfenergy for e_es in etvalues[0:net*16:16] ]
259            etenergies = [ utils.convertor(etr,'hartree','wavenumber') for etr in etenergies_au ]
260            self.set_attribute('etenergies', etenergies)
261
262            # ETr dipoles (length-gauge)
263            etdips = []
264            for k in range(1,16*net,16):
265                etdips.append(etvalues[k:k+3])
266            self.set_attribute('etdips',etdips)
267
268            # Osc. Strength from Etr dipoles
269            # oscs = 2/3 * Etr(au) * dip²(au)
270            etoscs = [ 2.0/3.0*e*numpy.linalg.norm(numpy.array(dip))**2 for e,dip in zip(etenergies_au,etdips) ]
271            self.set_attribute('etoscs', etoscs)
272
273            # ETr dipoles (velocity-gauge)
274            etveldips = []
275            for k in range(4,16*net,16):
276                etveldips.append(etvalues[k:k+3])
277            self.set_attribute('etveldips',etveldips)
278
279            # ETr magnetic dipoles
280            etmagdips = []
281            for k in range(7,16*net,16):
282                etmagdips.append(etvalues[k:k+3])
283            self.set_attribute('etmagdips',etmagdips)
284
285    def parse_aonames(self, line, inputfile):
286        # e.g.: Shell types                                I   N=          28
287        count = int(line.split()[-1])
288        shell_types = self._parse_block(inputfile, count, int, 'Atomic Orbital Names')
289
290        # e.g.: Number of primitives per shell             I   N=          28
291        next(inputfile)
292        self._parse_block(inputfile, count, int, 'Atomic Orbital Names')
293
294        # e.g. Shell to atom map                          I   N=          28
295        next(inputfile)
296        shell_map = self._parse_block(inputfile, count, int, 'Atomic Orbital Names')
297
298        elements = (self.table.element[x] for x in self.atomnos)
299        atom_labels = ["{}{}".format(y, x) for x, y in enumerate(elements, 1)]
300
301        # get orbitals for first atom and start aonames and atombasis lists
302        atom = shell_map[0] - 1
303        shell_offset = 0
304        orbitals = _shell_to_orbitals(shell_types[0], shell_offset)
305        aonames = ["{}_{}".format(atom_labels[atom], x) for x in orbitals]
306        atombasis = [list(range(len(orbitals)))]
307
308        # get rest
309        for i in range(1, len(shell_types)):
310            _type = shell_types[i]
311            atom = shell_map[i] - 1
312            shell_offset += 1
313            basis_offset = atombasis[-1][-1] + 1 # atombasis is increasing numbers, so just grab last
314
315            # if we've move to next atom, need to update offset of shells (e.g. start at 1S)
316            # and start new list for atom basis
317            if atom != shell_map[i - 1] - 1:
318                shell_offset = 0
319                atombasis.append([])
320
321            # determine if we've changed shell type (e.g. from S to P)
322            if _type != shell_types[i - 1]:
323                shell_offset = 0
324
325            orbitals = _shell_to_orbitals(_type, shell_offset)
326            aonames.extend(["{}_{}".format(atom_labels[atom], x) for x in orbitals])
327            atombasis[-1].extend(list(range(basis_offset, basis_offset + len(orbitals))))
328
329        assert len(aonames) == self.nbasis, 'Length of aonames != nbasis: {} != {}'.format(len(aonames), self.nbasis)
330        self.set_attribute('aonames', aonames)
331
332        assert len(atombasis) == self.natom, 'Length of atombasis != natom: {} != {}'.format(len(atombasis), self.natom)
333        self.set_attribute('atombasis', atombasis)
334
335    def after_parsing(self):
336        """Correct data or do parser-specific validation after parsing is finished."""
337
338        # If restricted calculation, need to remove beta homo
339        if len(self.moenergies) == len(self.homos) - 1:
340            self.homos.pop()
341
342    def _parse_block(self, inputfile, count, type, msg):
343        atomnos = []
344        while len(atomnos) < count :
345            self.updateprogress(inputfile, msg, self.fupdate)
346            line = next(inputfile)
347            atomnos.extend([ type(x) for x in line.split()])
348        return atomnos
349