1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2017, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
7
8"""Parser for Psi4 output files."""
9
10from collections import namedtuple
11
12import numpy
13
14from cclib.parser import data
15from cclib.parser import logfileparser
16from cclib.parser import utils
17
18
19class Psi4(logfileparser.Logfile):
20    """A Psi4 log file."""
21
22    def __init__(self, *args, **kwargs):
23
24        # Call the __init__ method of the superclass
25        super(Psi4, self).__init__(logname="Psi4", *args, **kwargs)
26
27    def __str__(self):
28        """Return a string representation of the object."""
29        return "Psi4 log file %s" % (self.filename)
30
31    def __repr__(self):
32        """Return a representation of the object."""
33        return 'Psi4("%s")' % (self.filename)
34
35    def before_parsing(self):
36
37        # Early beta versions of Psi4 normalize basis function
38        # coefficients when printing.
39        self.version_4_beta = False
40
41        # This is just used to track which part of the output we are in, with
42        # changes triggered by ==> things like this <==.
43        self.section = None
44
45        # There are also sometimes subsections within each section, denoted
46        # with =>/<= rather than ==>/<==.
47        self.subsection = None
48
49    def after_parsing(self):
50
51        # Newer versions of Psi4 don't explicitly print the number of atoms.
52        if not hasattr(self, 'natom'):
53            if hasattr(self, 'atomnos'):
54                self.set_attribute('natom', len(self.atomnos))
55
56    def normalisesym(self, label):
57        """Use standard symmetry labels instead of Psi4 labels.
58
59        To normalise:
60        (1) `App` -> `A"`
61        (2) `Ap` -> `A'`
62        """
63        return label.replace("pp", '"').replace("p", "'")
64
65    # Match the number of skipped lines required based on the type of
66    # gradient present (determined from the header), as otherwise the
67    # parsing is identical.
68    GradientInfo = namedtuple('GradientInfo', ['gradient_type', 'header', 'skip_lines'])
69    GRADIENT_TYPES = {
70        'analytic': GradientInfo('analytic',
71                                 '-Total Gradient:',
72                                 ['header', 'dash header']),
73        'numerical': GradientInfo('numerical',
74                                  '## F-D gradient (Symmetry 0) ##',
75                                  ['Irrep num and total size', 'b', '123', 'b']),
76    }
77    GRADIENT_HEADERS = set([gradient_type.header
78                            for gradient_type in GRADIENT_TYPES.values()])
79
80    def extract(self, inputfile, line):
81        """Extract information from the file object inputfile."""
82
83        # Extract the version number and the version control
84        # information, if it exists.
85        if "An Open-Source Ab Initio Electronic Structure Package" in line:
86            version_line = next(inputfile)
87            tokens = version_line.split()
88            package_version = tokens[1].split("-")[-1]
89            self.metadata["legacy_package_version"] = package_version
90            # Keep track of early versions of Psi4.
91            if "beta" in package_version:
92                self.version_4_beta = True
93                # `beta2+` -> `0!0.beta2`
94                package_version = "0!0.{}".format(package_version)
95                if package_version[-1] == "+":
96                    # There is no good way to keep the bare plus sign around,
97                    # but this version is so old...
98                    package_version = package_version[:-1]
99            else:
100                package_version = "1!{}".format(package_version)
101            self.skip_line(inputfile, "blank")
102            line = next(inputfile)
103            if "Git:" in line:
104                tokens = line.split()
105                assert tokens[1] == "Rev"
106                revision = '-'.join(tokens[2:]).replace("{", "").replace("}", "")
107                dev_flag = "" if "dev" in package_version else ".dev"
108                package_version = "{}{}+{}".format(
109                    package_version, dev_flag, revision
110                )
111            self.metadata["package_version"] = package_version
112
113        # This will automatically change the section attribute for Psi4, when encountering
114        # a line that <== looks like this ==>, to whatever is in between.
115        if (line.strip()[:3] == "==>") and (line.strip()[-3:] == "<=="):
116            self.section = line.strip()[4:-4]
117            if self.section == "DFT Potential":
118                self.metadata["methods"].append("DFT")
119
120        # There is also the possibility of subsections.
121        if (line.strip()[:2] == "=>") and (line.strip()[-2:] == "<="):
122            self.subsection = line.strip()[3:-3]
123
124        # Determine whether or not the reference wavefunction is
125        # restricted, unrestricted, or restricted open-shell.
126        if line.strip() == "SCF":
127            while "Reference" not in line:
128                line = next(inputfile)
129            self.reference = line.split()[0]
130            # Work with a complex reference as if it's real.
131            if self.reference[0] == 'C':
132                self.reference = self.reference[1:]
133
134        # Parse the XC density functional
135        # => Composite Functional: B3LYP <=
136        if self.section == "DFT Potential" and "composite functional" in line.lower():
137            chomp = line.split()
138            functional = chomp[-2]
139            self.metadata["functional"] = functional
140
141        #  ==> Geometry <==
142        #
143        #    Molecular point group: c2h
144        #    Full point group: C2h
145        #
146        #    Geometry (in Angstrom), charge = 0, multiplicity = 1:
147        #
148        #       Center              X                  Y                   Z
149        #    ------------   -----------------  -----------------  -----------------
150        #           C         -1.415253322400     0.230221785400     0.000000000000
151        #           C          1.415253322400    -0.230221785400     0.000000000000
152        # ...
153        #
154        if (self.section == "Geometry") and ("Geometry (in Angstrom), charge" in line):
155
156            assert line.split()[3] == "charge"
157            charge = int(line.split()[5].strip(','))
158            self.set_attribute('charge', charge)
159
160            assert line.split()[6] == "multiplicity"
161            mult = int(line.split()[8].strip(':'))
162            self.set_attribute('mult', mult)
163
164            self.skip_line(inputfile, "blank")
165            line = next(inputfile)
166
167            # Usually there is the header and dashes, but, for example, the coordinates
168            # printed when a geometry optimization finishes do not have it.
169            if line.split()[0] == "Center":
170                self.skip_line(inputfile, "dashes")
171                line = next(inputfile)
172
173            elements = []
174            coords = []
175            atommasses = []
176            while line.strip():
177                chomp = line.split()
178                el, x, y, z = chomp[:4]
179                if len(el) > 1:
180                    el = el[0] + el[1:].lower()
181                elements.append(el)
182                coords.append([float(x), float(y), float(z)])
183                # Newer versions of Psi4 print atomic masses.
184                if len(chomp) == 5:
185                    atommasses.append(float(chomp[4]))
186                line = next(inputfile)
187
188            # The 0 is to handle the presence of ghost atoms.
189            self.set_attribute('atomnos', [self.table.number.get(el, 0) for el in elements])
190
191            if not hasattr(self, 'atomcoords'):
192                self.atomcoords = []
193
194            # This condition discards any repeated coordinates that Psi print. For example,
195            # geometry optimizations will print the coordinates at the beginning of and SCF
196            # section and also at the start of the gradient calculation.
197            if len(self.atomcoords) == 0 \
198                or (self.atomcoords[-1] != coords and not hasattr(self, 'finite_difference')):
199                self.atomcoords.append(coords)
200
201            if len(atommasses) > 0:
202                if not hasattr(self, 'atommasses'):
203                    self.atommasses = atommasses
204
205        # Psi4 repeats the charge and multiplicity after the geometry.
206        if (self.section == "Geometry") and (line[2:16].lower() == "charge       ="):
207            charge = int(line.split()[-1])
208            self.set_attribute('charge', charge)
209        if (self.section == "Geometry") and (line[2:16].lower() == "multiplicity ="):
210            mult = int(line.split()[-1])
211            self.set_attribute('mult', mult)
212
213        # The printout for Psi4 has a more obvious trigger for the SCF parameter printout.
214        if (self.section == "Algorithm") and (line.strip() == "==> Algorithm <==") \
215            and not hasattr(self, 'finite_difference'):
216
217            self.skip_line(inputfile, 'blank')
218
219            line = next(inputfile)
220            while line.strip():
221                if "Energy threshold" in line:
222                    etarget = float(line.split()[-1])
223                if "Density threshold" in line:
224                    dtarget = float(line.split()[-1])
225                line = next(inputfile)
226
227            if not hasattr(self, "scftargets"):
228                self.scftargets = []
229            self.scftargets.append([etarget, dtarget])
230
231        # This section prints contraction information before the atomic basis set functions and
232        # is a good place to parse atombasis indices as well as atomnos. However, the section this line
233        # is in differs between HF and DFT outputs.
234        #
235        #  -Contraction Scheme:
236        #    Atom   Type   All Primitives // Shells:
237        #   ------ ------ --------------------------
238        #       1     C     6s 3p // 2s 1p
239        #       2     C     6s 3p // 2s 1p
240        #       3     C     6s 3p // 2s 1p
241        # ...
242        if self.section == "Primary Basis":
243            if line[2:12] == "Basis Set:":
244                self.metadata["basis_set"] = line.split()[2]
245
246        if (self.section == "Primary Basis" or self.section == "DFT Potential") and line.strip() == "-Contraction Scheme:":
247
248            self.skip_lines(inputfile, ['headers', 'd'])
249
250            atomnos = []
251            atombasis = []
252            atombasis_pos = 0
253            line = next(inputfile)
254            while line.strip():
255
256                element = line.split()[1]
257                if len(element) > 1:
258                    element = element[0] + element[1:].lower()
259                atomnos.append(self.table.number[element])
260
261                # To count the number of atomic orbitals for the atom, sum up the orbitals
262                # in each type of shell, times the numbers of shells. Currently, we assume
263                # the multiplier is a single digit and that there are only s and p shells,
264                # which will need to be extended later when considering larger basis sets,
265                # with corrections for the cartesian/spherical cases.
266                ao_count = 0
267                shells = line.split('//')[1].split()
268                for s in shells:
269                    count, type = s
270                    multiplier = 3*(type == 'p') or 1
271                    ao_count += multiplier*int(count)
272
273                if len(atombasis) > 0:
274                    atombasis_pos = atombasis[-1][-1] + 1
275                atombasis.append(list(range(atombasis_pos, atombasis_pos+ao_count)))
276
277                line = next(inputfile)
278
279            self.set_attribute('natom', len(atomnos))
280            self.set_attribute('atomnos', atomnos)
281            self.set_attribute('atombasis', atombasis)
282
283        # The atomic basis set is straightforward to parse, but there are some complications
284        # when symmetry is used, because in that case Psi4 only print the symmetry-unique atoms,
285        # and the list of symmetry-equivalent ones is not printed. Therefore, for simplicity here
286        # when an atomic is missing (atom indices are printed) assume the atomic orbitals of the
287        # last atom of the same element before it. This might not work if a mixture of basis sets
288        # is used somehow... but it should cover almost all cases for now.
289        #
290        # Note that Psi also print normalized coefficients (details below).
291        #
292        #  ==> AO Basis Functions <==
293        #
294        #    [ STO-3G ]
295        #    spherical
296        #    ****
297        #    C   1
298        #    S   3 1.00
299        #                        71.61683700           2.70781445
300        #                        13.04509600           2.61888016
301        # ...
302        if (self.section == "AO Basis Functions") and (line.strip() == "==> AO Basis Functions <=="):
303
304            def get_symmetry_atom_basis(gbasis):
305                """Get symmetry atom by replicating the last atom in gbasis of the same element."""
306
307                missing_index = len(gbasis)
308                missing_atomno = self.atomnos[missing_index]
309
310                ngbasis = len(gbasis)
311                last_same = ngbasis - self.atomnos[:ngbasis][::-1].index(missing_atomno) - 1
312                return gbasis[last_same]
313
314            dfact = lambda n: (n <= 0) or n * dfact(n-2)
315
316            # Early beta versions of Psi4 normalize basis function
317            # coefficients when printing.
318            if self.version_4_beta:
319                def get_normalization_factor(exp, lx, ly, lz):
320                    norm_s = (2*exp/numpy.pi)**0.75
321                    if lx + ly + lz > 0:
322                        nom = (4*exp)**((lx+ly+lz)/2.0)
323                        den = numpy.sqrt(dfact(2*lx-1) * dfact(2*ly-1) * dfact(2*lz-1))
324                        return norm_s * nom / den
325                    else:
326                        return norm_s
327            else:
328                get_normalization_factor = lambda exp, lx, ly, lz: 1
329
330            self.skip_lines(inputfile, ['b', 'basisname'])
331
332            line = next(inputfile)
333            spherical = line.strip() == "spherical"
334            if hasattr(self, 'spherical_basis'):
335                assert self.spherical_basis == spherical
336            else:
337                self.spherical_basis = spherical
338
339            gbasis = []
340            self.skip_line(inputfile, 'stars')
341            line = next(inputfile)
342            while line.strip():
343
344                element, index = line.split()
345                if len(element) > 1:
346                    element = element[0] + element[1:].lower()
347                index = int(index)
348
349                # This is the code that adds missing atoms when symmetry atoms are excluded
350                # from the basis set printout. Again, this will work only if all atoms of
351                # the same element use the same basis set.
352                while index > len(gbasis) + 1:
353                    gbasis.append(get_symmetry_atom_basis(gbasis))
354
355                gbasis.append([])
356                line = next(inputfile)
357                while line.find("*") == -1:
358
359                    # The shell type and primitive count is in the first line.
360                    shell_type, nprimitives, _ = line.split()
361                    nprimitives = int(nprimitives)
362
363                    # Get the angular momentum for this shell type.
364                    momentum = {'S': 0, 'P': 1, 'D': 2, 'F': 3, 'G': 4, 'H': 5, 'I': 6}[shell_type.upper()]
365
366                    # Read in the primitives.
367                    primitives_lines = [next(inputfile) for i in range(nprimitives)]
368                    primitives = [list(map(float, pl.split())) for pl in primitives_lines]
369
370                    # Un-normalize the coefficients. Psi prints the normalized coefficient
371                    # of the highest polynomial, namely XX for D orbitals, XXX for F, and so on.
372                    for iprim, prim in enumerate(primitives):
373                        exp, coef = prim
374                        coef = coef / get_normalization_factor(exp, momentum, 0, 0)
375                        primitives[iprim] = [exp, coef]
376
377                    primitives = [tuple(p) for p in primitives]
378                    shell = [shell_type, primitives]
379                    gbasis[-1].append(shell)
380
381                    line = next(inputfile)
382
383                line = next(inputfile)
384
385            # We will also need to add symmetry atoms that are missing from the input
386            # at the end of this block, if the symmetry atoms are last.
387            while len(gbasis) < self.natom:
388                gbasis.append(get_symmetry_atom_basis(gbasis))
389
390            self.gbasis = gbasis
391
392        # A block called 'Calculation Information' prints these before starting the SCF.
393        if (self.section == "Pre-Iterations") and ("Number of atoms" in line):
394            natom = int(line.split()[-1])
395            self.set_attribute('natom', natom)
396        if (self.section == "Pre-Iterations") and ("Number of atomic orbitals" in line):
397            nbasis = int(line.split()[-1])
398            self.set_attribute('nbasis', nbasis)
399        if (self.section == "Pre-Iterations") and ("Total" in line):
400            chomp = line.split()
401            nbasis = int(chomp[1])
402            self.set_attribute('nbasis', nbasis)
403
404        #  ==> Iterations <==
405
406        # Psi4 converges both the SCF energy and density elements and reports both in the
407        # iterations printout. However, the default convergence scheme involves a density-fitted
408        # algorithm for efficiency, and this is often followed by a something with exact electron
409        # repulsion integrals. In that case, there are actually two convergence cycles performed,
410        # one for the density-fitted algorithm and one for the exact one, and the iterations are
411        # printed in two blocks separated by some set-up information.
412        if (self.section == "Iterations") and (line.strip() == "==> Iterations <==") \
413            and not hasattr(self, 'finite_difference'):
414
415            if not hasattr(self, 'scfvalues'):
416                self.scfvalues = []
417            scfvals = []
418
419            self.skip_lines(inputfile, ['b', 'header', 'b'])
420            line = next(inputfile)
421            # Read each SCF iteration.
422            while line.strip() != "==> Post-Iterations <==":
423                if line.strip() and line.split()[0][0] == '@':
424                    denergy = float(line.split()[4])
425                    ddensity = float(line.split()[5])
426                    scfvals.append([denergy, ddensity])
427                try:
428                    line = next(inputfile)
429                except StopIteration:
430                    self.logger.warning('File terminated before end of last SCF! Last density err: {}'.format(ddensity))
431                    break
432            self.section = "Post-Iterations"
433            self.scfvalues.append(scfvals)
434
435        # This section, from which we parse molecular orbital symmetries and
436        # orbital energies, is quite similar for both Psi3 and Psi4, and in fact
437        # the format for orbtials is the same, although the headers and spacers
438        # are a bit different. Let's try to get both parsed with one code block.
439        #
440        # Here is how the block looks like for Psi4:
441        #
442        #	Orbital Energies (a.u.)
443        #	-----------------------
444        #
445        #	Doubly Occupied:
446        #
447        #	   1Bu   -11.040586     1Ag   -11.040524     2Bu   -11.031589
448        #	   2Ag   -11.031589     3Bu   -11.028950     3Ag   -11.028820
449        # (...)
450        #	  15Ag    -0.415620     1Bg    -0.376962     2Au    -0.315126
451        #	   2Bg    -0.278361     3Bg    -0.222189
452        #
453        #	Virtual:
454        #
455        #	   3Au     0.198995     4Au     0.268517     4Bg     0.308826
456        #	   5Au     0.397078     5Bg     0.521759    16Ag     0.565017
457        # (...)
458        #	  24Ag     0.990287    24Bu     1.027266    25Ag     1.107702
459        #	  25Bu     1.124938
460        #
461        # The case is different in the trigger string.
462        if ("orbital energies (a.u.)" in line.lower()  or "orbital energies [eh]" in line.lower()) \
463            and not hasattr(self, 'finite_difference'):
464
465            # If this is Psi4, we will be in the appropriate section.
466            assert self.section == "Post-Iterations"
467
468            self.moenergies = [[]]
469            self.mosyms = [[]]
470
471            # Psi4 has dashes under the trigger line, but Psi3 did not.
472            self.skip_line(inputfile, 'dashes')
473            self.skip_line(inputfile, 'blank')
474
475            # Both versions have this case-insensitive substring.
476            occupied = next(inputfile)
477            if self.reference[0:2] == 'RO' or self.reference[0:1] == 'R':
478                assert 'doubly occupied' in occupied.lower()
479            elif self.reference[0:1] == 'U':
480                assert 'alpha occupied' in occupied.lower()
481
482            self.skip_line(inputfile, 'blank')
483
484            # Parse the occupied MO symmetries and energies.
485            self._parse_mosyms_moenergies(inputfile, 0)
486
487            # The last orbital energy here represents the HOMO.
488            self.homos = [len(self.moenergies[0])-1]
489            # For a restricted open-shell calculation, this is the
490            # beta HOMO, and we assume the singly-occupied orbitals
491            # are all alpha, which are handled next.
492            if self.reference[0:2] == 'RO':
493                self.homos.append(self.homos[0])
494
495            unoccupied = next(inputfile)
496            if self.reference[0:2] == 'RO':
497                assert unoccupied.strip() == 'Singly Occupied:'
498            elif self.reference[0:1] == 'R':
499                assert unoccupied.strip() == 'Virtual:'
500            elif self.reference[0:1] == 'U':
501                assert unoccupied.strip() == 'Alpha Virtual:'
502
503            # Psi4 now has a blank line, Psi3 does not.
504            self.skip_line(inputfile, 'blank')
505
506            # Parse the unoccupied MO symmetries and energies.
507            self._parse_mosyms_moenergies(inputfile, 0)
508
509            # Here is where we handle the Beta or Singly occupied orbitals.
510            if self.reference[0:1] == 'U':
511                self.mosyms.append([])
512                self.moenergies.append([])
513                line = next(inputfile)
514                assert line.strip() == 'Beta Occupied:'
515                self.skip_line(inputfile, 'blank')
516                self._parse_mosyms_moenergies(inputfile, 1)
517                self.homos.append(len(self.moenergies[1])-1)
518                line = next(inputfile)
519                assert line.strip() == 'Beta Virtual:'
520                self.skip_line(inputfile, 'blank')
521                self._parse_mosyms_moenergies(inputfile, 1)
522            elif self.reference[0:2] == 'RO':
523                line = next(inputfile)
524                assert line.strip() == 'Virtual:'
525                self.skip_line(inputfile, 'blank')
526                self._parse_mosyms_moenergies(inputfile, 0)
527
528            line = next(inputfile)
529            assert line.strip() == 'Final Occupation by Irrep:'
530            line = next(inputfile)
531            irreps = line.split()
532            line = next(inputfile)
533            tokens = line.split()
534            assert tokens[0] == 'DOCC'
535            docc = sum([int(x.replace(',', '')) for x in tokens[2:-1]])
536            line = next(inputfile)
537            if line.strip():
538                tokens = line.split()
539                assert tokens[0] in ('SOCC', 'NA')
540                socc = sum([int(x.replace(',', '')) for x in tokens[2:-1]])
541                # Fix up the restricted open-shell alpha HOMO.
542                if self.reference[0:2] == 'RO':
543                    self.homos[0] += socc
544
545        # Both Psi3 and Psi4 print the final SCF energy right after the orbital energies,
546        # but the label is different. Psi4 also does DFT, and the label is also different in that case.
547        if self.section == "Post-Iterations" and "Final Energy:" in line \
548            and not hasattr(self, 'finite_difference'):
549            e = float(line.split()[3])
550            if not hasattr(self, 'scfenergies'):
551                self.scfenergies = []
552            self.scfenergies.append(utils.convertor(e, 'hartree', 'eV'))
553
554        if self.subsection == "Energetics":
555            if "Empirical Dispersion Energy" in line:
556                dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV")
557                self.append_attribute("dispersionenergies", dispersion)
558
559        #   ==> Molecular Orbitals <==
560        #
561        #                     1            2            3            4            5
562        #
563        # 1    H1 s0         0.1610392    0.1040990    0.0453848    0.0978665    1.0863246
564        # 2    H1 s0         0.3066996    0.0742959    0.8227318    1.3460922   -1.6429494
565        # 3    H1 s0         0.1669296    1.5494169   -0.8885631   -1.8689490    1.0473633
566        # 4    H2 s0         0.1610392   -0.1040990    0.0453848   -0.0978665   -1.0863246
567        # 5    H2 s0         0.3066996   -0.0742959    0.8227318   -1.3460922    1.6429494
568        # 6    H2 s0         0.1669296   -1.5494169   -0.8885631    1.8689490   -1.0473633
569        #
570        #             Ene    -0.5279195    0.1235556    0.3277474    0.5523654    2.5371710
571        #             Sym            Ag          B3u           Ag          B3u          B3u
572        #             Occ             2            0            0            0            0
573        #
574        #
575        #                         6
576        #
577        # 1    H1 s0         1.1331221
578        # 2    H1 s0        -1.2163107
579        # 3    H1 s0         0.4695317
580        # 4    H2 s0         1.1331221
581        # 5    H2 s0        -1.2163107
582        # 6    H2 s0         0.4695317
583        #
584        #            Ene     2.6515637
585        #            Sym            Ag
586        #            Occ             0
587
588        if (self.section) and ("Molecular Orbitals" in self.section) \
589           and ("Molecular Orbitals" in line) and not hasattr(self, 'finite_difference'):
590
591            self.skip_line(inputfile, 'blank')
592
593            mocoeffs = []
594            indices = next(inputfile)
595            while indices.strip():
596
597                if indices[:3] == '***':
598                    break
599
600                indices = [int(i) for i in indices.split()]
601
602                if len(mocoeffs) < indices[-1]:
603                    for i in range(len(indices)):
604                        mocoeffs.append([])
605                else:
606                    assert len(mocoeffs) == indices[-1]
607
608                self.skip_line(inputfile, 'blank')
609
610                n = len(indices)
611                line = next(inputfile)
612                while line.strip():
613                    chomp = line.split()
614                    m = len(chomp)
615                    iao = int(chomp[0])
616                    coeffs = [float(c) for c in chomp[m - n:]]
617                    for i, c in enumerate(coeffs):
618                        mocoeffs[indices[i]-1].append(c)
619                    line = next(inputfile)
620
621                energies = next(inputfile)
622                symmetries = next(inputfile)
623                occupancies = next(inputfile)
624
625                self.skip_lines(inputfile, ['b', 'b'])
626                indices = next(inputfile)
627
628            if not hasattr(self, 'mocoeffs'):
629                self.mocoeffs = []
630            self.mocoeffs.append(mocoeffs)
631
632        # The formats for Mulliken and Lowdin atomic charges are the same, just with
633        # the name changes, so use the same code for both.
634        #
635        # Properties computed using the SCF density density matrix
636        #   Mulliken Charges: (a.u.)
637        #    Center  Symbol    Alpha    Beta     Spin     Total
638        #        1     C     2.99909  2.99909  0.00000  0.00182
639        #        2     C     2.99909  2.99909  0.00000  0.00182
640        # ...
641        for pop_type in ["Mulliken", "Lowdin"]:
642            if line.strip() == "%s Charges: (a.u.)" % pop_type:
643                if not hasattr(self, 'atomcharges'):
644                    self.atomcharges = {}
645                header = next(inputfile)
646
647                line = next(inputfile)
648                while not line.strip():
649                    line = next(inputfile)
650
651                charges = []
652                while line.strip():
653                    ch = float(line.split()[-1])
654                    charges.append(ch)
655                    line = next(inputfile)
656                self.atomcharges[pop_type.lower()] = charges
657
658        # This is for the older conventional MP2 code in 4.0b5.
659        mp_trigger = "MP2 Total Energy (a.u.)"
660        if line.strip()[:len(mp_trigger)] == mp_trigger:
661            self.metadata["methods"].append("MP2")
662            mpenergy = utils.convertor(float(line.split()[-1]), 'hartree', 'eV')
663            if not hasattr(self, 'mpenergies'):
664                self.mpenergies = []
665            self.mpenergies.append([mpenergy])
666        # This is for the newer DF-MP2 code in 4.0.
667        if 'DF-MP2 Energies' in line:
668            self.metadata["methods"].append("DF-MP2")
669            while 'Total Energy' not in line:
670                line = next(inputfile)
671            mpenergy = utils.convertor(float(line.split()[3]), 'hartree', 'eV')
672            if not hasattr(self, 'mpenergies'):
673                self.mpenergies = []
674            self.mpenergies.append([mpenergy])
675
676        # Note this is just a start and needs to be modified for CCSD(T), etc.
677        ccsd_trigger = "* CCSD total energy"
678        if line.strip()[:len(ccsd_trigger)] == ccsd_trigger:
679            self.metadata["methods"].append("CCSD")
680            ccsd_energy = utils.convertor(float(line.split()[-1]), 'hartree', 'eV')
681            if not hasattr(self, "ccenergis"):
682                self.ccenergies = []
683            self.ccenergies.append(ccsd_energy)
684
685        # The geometry convergence targets and values are printed in a table, with the legends
686        # describing the convergence annotation. Probably exact slicing of the line needs
687        # to be done in order to extract the numbers correctly. If there are no values for
688        # a paritcular target it means they are not used (marked also with an 'o'), and in this case
689        # we will set a value of numpy.inf so that any value will be smaller.
690        #
691        #  ==> Convergence Check <==
692        #
693        #  Measures of convergence in internal coordinates in au.
694        #  Criteria marked as inactive (o), active & met (*), and active & unmet ( ).
695        #  ---------------------------------------------------------------------------------------------
696        #   Step     Total Energy     Delta E     MAX Force     RMS Force      MAX Disp      RMS Disp
697        #  ---------------------------------------------------------------------------------------------
698        #    Convergence Criteria    1.00e-06 *    3.00e-04 *             o    1.20e-03 *             o
699        #  ---------------------------------------------------------------------------------------------
700        #      2    -379.77675264   -7.79e-03      1.88e-02      4.37e-03 o    2.29e-02      6.76e-03 o  ~
701        #  ---------------------------------------------------------------------------------------------
702        #
703        if (self.section == "Convergence Check") and line.strip() == "==> Convergence Check <==" \
704            and not hasattr(self, 'finite_difference'):
705
706            if not hasattr(self, "optstatus"):
707                self.optstatus = []
708            self.optstatus.append(data.ccData.OPT_UNKNOWN)
709
710            self.skip_lines(inputfile, ['b', 'units', 'comment', 'dash+tilde', 'header', 'dash+tilde'])
711
712            # These are the position in the line at which numbers should start.
713            starts = [27, 41, 55, 69, 83]
714
715            criteria = next(inputfile)
716            geotargets = []
717            for istart in starts:
718                if criteria[istart:istart+9].strip():
719                    geotargets.append(float(criteria[istart:istart+9]))
720                else:
721                    geotargets.append(numpy.inf)
722
723            self.skip_line(inputfile, 'dashes')
724
725            values = next(inputfile)
726            step = int(values.split()[0])
727            geovalues = []
728            for istart in starts:
729                if values[istart:istart+9].strip():
730                    geovalues.append(float(values[istart:istart+9]))
731
732            if step == 1:
733                self.optstatus[-1] += data.ccData.OPT_NEW
734
735            # This assertion may be too restrictive, but we haven't seen the geotargets change.
736            # If such an example comes up, update the value since we're interested in the last ones.
737            if not hasattr(self, 'geotargets'):
738                self.geotargets = geotargets
739            else:
740                assert self.geotargets == geotargets
741
742            if not hasattr(self, 'geovalues'):
743                self.geovalues = []
744            self.geovalues.append(geovalues)
745
746        # This message signals a converged optimization, in which case we want
747        # to append the index for this step to optdone, which should be equal
748        # to the number of geovalues gathered so far.
749        if "Optimization is complete!" in line:
750
751            # This is a workaround for Psi4.0/sample_opt-irc-2.out;
752            # IRC calculations currently aren't parsed properly for
753            # optimization parameters.
754            if hasattr(self, 'geovalues'):
755
756                if not hasattr(self, 'optdone'):
757                    self.optdone = []
758                self.optdone.append(len(self.geovalues))
759
760                assert hasattr(self, "optstatus") and len(self.optstatus) > 0
761                self.optstatus[-1] += data.ccData.OPT_DONE
762
763        # This message means that optimization has stopped for some reason, but we
764        # still want optdone to exist in this case, although it will be an empty list.
765        if line.strip() == "Optimizer: Did not converge!":
766            if not hasattr(self, 'optdone'):
767                self.optdone = []
768
769            assert hasattr(self, "optstatus") and len(self.optstatus) > 0
770            self.optstatus[-1] += data.ccData.OPT_UNCONVERGED
771
772        # The reference point at which properties are evaluated in Psi4 is explicitely stated,
773        # so we can save it for later. It is not, however, a part of the Properties section,
774        # but it appears before it and also in other places where properies that might depend
775        # on it are printed.
776        #
777        # Properties will be evaluated at   0.000000,   0.000000,   0.000000 Bohr
778        #
779        # OR
780        #
781        # Properties will be evaluated at   0.000000,   0.000000,   0.000000 [a0]
782        #
783        if "Properties will be evaluated at" in line.strip():
784            self.origin = numpy.array([float(x.strip(',')) for x in line.split()[-4:-1]])
785            assert line.split()[-1] in ["Bohr", "[a0]"]
786            self.origin = utils.convertor(self.origin, 'bohr', 'Angstrom')
787
788        # The properties section print the molecular dipole moment:
789        #
790        #  ==> Properties <==
791        #
792        #
793        #Properties computed using the SCF density density matrix
794        #  Nuclear Dipole Moment: (a.u.)
795        #     X:     0.0000      Y:     0.0000      Z:     0.0000
796        #
797        #  Electronic Dipole Moment: (a.u.)
798        #     X:     0.0000      Y:     0.0000      Z:     0.0000
799        #
800        #  Dipole Moment: (a.u.)
801        #     X:     0.0000      Y:     0.0000      Z:     0.0000     Total:     0.0000
802        #
803        if (self.section == "Properties") and line.strip() == "Dipole Moment: (a.u.)":
804
805            line = next(inputfile)
806            dipole = numpy.array([float(line.split()[1]), float(line.split()[3]), float(line.split()[5])])
807            dipole = utils.convertor(dipole, "ebohr", "Debye")
808
809            if not hasattr(self, 'moments'):
810                # Old versions of Psi4 don't print the origin; assume
811                # it's at zero.
812                if not hasattr(self, 'origin'):
813                    self.origin = numpy.array([0.0, 0.0, 0.0])
814                self.moments = [self.origin, dipole]
815            else:
816                try:
817                    assert numpy.all(self.moments[1] == dipole)
818                except AssertionError:
819                    self.logger.warning('Overwriting previous multipole moments with new values')
820                    self.logger.warning('This could be from post-HF properties or geometry optimization')
821                    self.moments = [self.origin, dipole]
822
823        # Higher multipole moments are printed separately, on demand, in lexicographical order.
824        #
825        # Multipole Moments:
826        #
827        # ------------------------------------------------------------------------------------
828        #     Multipole             Electric (a.u.)       Nuclear  (a.u.)        Total (a.u.)
829        # ------------------------------------------------------------------------------------
830        #
831        # L = 1.  Multiply by 2.5417462300 to convert to Debye
832        # Dipole X            :          0.0000000            0.0000000            0.0000000
833        # Dipole Y            :          0.0000000            0.0000000            0.0000000
834        # Dipole Z            :          0.0000000            0.0000000            0.0000000
835        #
836        # L = 2.  Multiply by 1.3450341749 to convert to Debye.ang
837        # Quadrupole XX       :      -1535.8888701         1496.8839996          -39.0048704
838        # Quadrupole XY       :        -11.5262958           11.4580038           -0.0682920
839        # ...
840        #
841        if line.strip() == "Multipole Moments:":
842
843            self.skip_lines(inputfile, ['b', 'd', 'header', 'd', 'b'])
844
845            # The reference used here should have been printed somewhere
846            # before the properties and parsed above.
847            moments = [self.origin]
848
849            line = next(inputfile)
850            while "----------" not in line.strip():
851
852                rank = int(line.split()[2].strip('.'))
853
854                multipole = []
855                line = next(inputfile)
856                while line.strip():
857
858                    value = float(line.split()[-1])
859                    fromunits = "ebohr" + (rank > 1)*("%i" % rank)
860                    tounits = "Debye" + (rank > 1)*".ang" + (rank > 2)*("%i" % (rank-1))
861                    value = utils.convertor(value, fromunits, tounits)
862                    multipole.append(value)
863
864                    line = next(inputfile)
865
866                multipole = numpy.array(multipole)
867                moments.append(multipole)
868                line = next(inputfile)
869
870            if not hasattr(self, 'moments'):
871                self.moments = moments
872            else:
873                for im, m in enumerate(moments):
874                    if len(self.moments) <= im:
875                        self.moments.append(m)
876                    else:
877                        assert numpy.allclose(self.moments[im], m, atol=1.0e4)
878
879        ## Analytic Gradient
880        #        -Total Gradient:
881        #   Atom            X                  Y                   Z
882        #  ------   -----------------  -----------------  -----------------
883        #     1       -0.000000000000     0.000000000000    -0.064527252292
884        #     2        0.000000000000    -0.028380539652     0.032263626146
885        #     3       -0.000000000000     0.028380539652     0.032263626146
886
887        ## Finite Differences Gradient
888        # -------------------------------------------------------------
889        #   ## F-D gradient (Symmetry 0) ##
890        #   Irrep: 1 Size: 3 x 3
891        #
892        #                  1                   2                   3
893        #
894        #     1     0.00000000000000     0.00000000000000    -0.02921303282515
895        #     2     0.00000000000000    -0.00979709321487     0.01460651641258
896        #     3     0.00000000000000     0.00979709321487     0.01460651641258
897        if line.strip() in Psi4.GRADIENT_HEADERS:
898
899            # Handle the different header lines between analytic and
900            # numerical gradients.
901            gradient_skip_lines = [
902                info.skip_lines
903                for info in Psi4.GRADIENT_TYPES.values()
904                if info.header == line.strip()
905            ][0]
906            gradient = self.parse_gradient(inputfile, gradient_skip_lines)
907
908            if not hasattr(self, 'grads'):
909                self.grads = []
910            self.grads.append(gradient)
911
912        # OLD Normal mode output parser (PSI4 < 1)
913
914        ## Harmonic frequencies.
915
916        # -------------------------------------------------------------
917
918        #   Computing second-derivative from gradients using projected,
919        #   symmetry-adapted, cartesian coordinates (fd_freq_1).
920
921        #   74 gradients passed in, including the reference geometry.
922        #   Generating complete list of displacements from unique ones.
923
924        #       Operation 2 takes plus displacements of irrep Bg to minus ones.
925        #       Operation 3 takes plus displacements of irrep Au to minus ones.
926        #       Operation 2 takes plus displacements of irrep Bu to minus ones.
927
928        #         Irrep      Harmonic Frequency
929        #                         (cm-1)
930        #       -----------------------------------------------
931        #            Au          137.2883
932        if line.strip() == 'Irrep      Harmonic Frequency':
933
934            vibsyms = []
935            vibfreqs = []
936
937            self.skip_lines(inputfile, ['(cm-1)', 'dashes'])
938
939            ## The first section contains the symmetry of each normal
940            ## mode and its frequency.
941            line = next(inputfile)
942            while '---' not in line:
943                chomp = line.split()
944                vibsym = chomp[0]
945                vibfreq = Psi4.parse_vibfreq(chomp[1])
946                vibsyms.append(vibsym)
947                vibfreqs.append(vibfreq)
948                line = next(inputfile)
949
950            self.set_attribute('vibsyms', vibsyms)
951            self.set_attribute('vibfreqs', vibfreqs)
952
953            line = next(inputfile)
954            assert line.strip() == ''
955            line = next(inputfile)
956            assert 'Normal Modes' in line
957            line = next(inputfile)
958            assert 'Molecular mass is' in line
959            if hasattr(self, 'atommasses'):
960                assert abs(float(line.split()[3]) - sum(self.atommasses)) < 1.0e-4
961            line = next(inputfile)
962            assert line.strip() == 'Frequencies in cm^-1; force constants in au.'
963            line = next(inputfile)
964            assert line.strip() == ''
965            line = next(inputfile)
966
967            ## The second section contains the frequency, force
968            ## constant, and displacement for each normal mode, along
969            ## with the atomic masses.
970
971            #       Normal Modes (non-mass-weighted).
972            #       Molecular mass is  130.07825 amu.
973            #       Frequencies in cm^-1; force constants in au.
974
975            #  Frequency:        137.29
976            #  Force constant:   0.0007
977            #            X       Y       Z           mass
978            # C      0.000   0.000   0.050      12.000000
979            # C      0.000   0.000   0.050      12.000000
980            for vibfreq in self.vibfreqs:
981                _vibfreq = Psi4.parse_vibfreq(line[13:].strip())
982                assert abs(vibfreq - _vibfreq) < 1.0e-2
983                line = next(inputfile)
984                assert 'Force constant:' in line
985                if not hasattr(self, "vibfconsts"):
986                    self.vibfconsts = []
987                self.vibfconsts.append(
988                    utils.convertor(float(line.split()[2]), "hartree/bohr2", "mDyne/angstrom")
989                )
990                line = next(inputfile)
991                assert 'X       Y       Z           mass' in line
992                line = next(inputfile)
993                if not hasattr(self, 'vibdisps'):
994                    self.vibdisps = []
995                normal_mode_disps = []
996                # for k in range(self.natom):
997                while line.strip():
998                    chomp = line.split()
999                    # Do nothing with this for now.
1000                    atomsym = chomp[0]
1001                    atomcoords = [float(x) for x in chomp[1:4]]
1002                    # Do nothing with this for now.
1003                    atommass = float(chomp[4])
1004                    normal_mode_disps.append(atomcoords)
1005                    line = next(inputfile)
1006                self.vibdisps.append(normal_mode_disps)
1007                line = next(inputfile)
1008
1009        # NEW Normal mode output parser (PSI4 >= 1)
1010
1011        #   ==> Harmonic Vibrational Analysis <==
1012        #   ...
1013        #   Vibration                       7                   8                   9
1014        #   ...
1015        #
1016        #   Vibration                       10                  11                  12
1017        #   ...
1018
1019        if line.strip() == '==> Harmonic Vibrational Analysis <==':
1020
1021            vibsyms = []
1022            vibfreqs = []
1023            vibdisps = []
1024            vibrmasses = []
1025            vibfconsts = []
1026
1027            # Skip lines till the first Vibration block
1028            while not line.strip().startswith('Vibration'):
1029                line = next(inputfile)
1030
1031            n_modes = 0
1032            # Parse all the Vibration blocks
1033            while line.strip().startswith('Vibration'):
1034                n = len(line.split()) - 1
1035                n_modes += n
1036                vibfreqs_, vibsyms_, vibdisps_, vibrmasses_, vibfconsts_ = self.parse_vibration(n, inputfile)
1037                vibfreqs.extend(vibfreqs_)
1038                vibsyms.extend(vibsyms_)
1039                vibdisps.extend(vibdisps_)
1040                vibrmasses.extend(vibrmasses_)
1041                vibfconsts.extend(vibfconsts_)
1042                line = next(inputfile)
1043
1044            # It looks like the symmetry of the normal mode may be missing
1045            # from some / most. Only include them if they are there for all
1046
1047            if len(vibfreqs) == n_modes:
1048                self.set_attribute('vibfreqs', vibfreqs)
1049
1050            if len(vibsyms) == n_modes:
1051                self.set_attribute('vibsyms', vibsyms)
1052
1053            if len(vibdisps) == n_modes:
1054                self.set_attribute('vibdisps', vibdisps)
1055
1056            if len(vibdisps) == n_modes:
1057                self.set_attribute('vibrmasses', vibrmasses)
1058
1059            if len(vibdisps) == n_modes:
1060                self.set_attribute('vibfconsts', vibfconsts)
1061
1062        # Second one is 1.0, first one is 1.2 and newer
1063        if (self.section == "Thermochemistry Energy Analysis" and "Thermochemistry Energy Analysis" in line) \
1064           or (self.section == "Energy Analysis" and "Energy Analysis" in line):
1065
1066            self.skip_lines(
1067                inputfile,
1068                [
1069                    "b",
1070                    "Raw electronic energy",
1071                    "Total E0",
1072                    "b",
1073                    "Zero-point energy, ZPE_vib = Sum_i nu_i / 2",
1074                    "Electronic ZPE",
1075                    "Translational ZPE",
1076                    "Rotational ZPE"
1077                ]
1078            )
1079            line = next(inputfile)
1080            assert "Vibrational ZPE" in line
1081            self.set_attribute("zpve", float(line.split()[6]))
1082
1083        # If finite difference is used to compute forces (i.e. by displacing
1084        # slightly all the atoms), a series of additional scf calculations is
1085        # performed. Orbitals, geometries, energies, etc. for these shouln't be
1086        # included in the parsed data.
1087
1088        if line.strip().startswith('Using finite-differences of gradients'):
1089            self.set_attribute('finite_difference', True)
1090
1091        # This is the result of calling `print_variables()` and contains all
1092        # current inner variables known to Psi4.
1093        if line.strip() == "Variable Map:":
1094            self.skip_line(inputfile, "d")
1095            line = next(inputfile)
1096            while line.strip():
1097                tokens = line.split()
1098                # Remove double quotation marks
1099                name = " ".join(tokens[:-2])[1:-1]
1100                value = float(tokens[-1])
1101                if name == "CC T1 DIAGNOSTIC":
1102                    self.metadata["t1_diagnostic"] = value
1103                line = next(inputfile)
1104
1105        if line[:54] == '*** Psi4 exiting successfully. Buy a developer a beer!'\
1106                or line[:54] == '*** PSI4 exiting successfully. Buy a developer a beer!':
1107            self.metadata['success'] = True
1108
1109    def _parse_mosyms_moenergies(self, inputfile, spinidx):
1110        """Parse molecular orbital symmetries and energies from the
1111        'Post-Iterations' section.
1112        """
1113        line = next(inputfile)
1114        while line.strip():
1115            for i in range(len(line.split()) // 2):
1116                self.mosyms[spinidx].append(line.split()[i*2][-2:])
1117                moenergy = utils.convertor(float(line.split()[i*2+1]), "hartree", "eV")
1118                self.moenergies[spinidx].append(moenergy)
1119            line = next(inputfile)
1120        return
1121
1122    def parse_gradient(self, inputfile, skip_lines):
1123        """Parse the nuclear gradient section into a list of lists with shape
1124        [natom, 3].
1125        """
1126        self.skip_lines(inputfile, skip_lines)
1127        line = next(inputfile)
1128        gradient = []
1129
1130        while line.strip():
1131            idx, x, y, z = line.split()
1132            gradient.append((float(x), float(y), float(z)))
1133            line = next(inputfile)
1134        return gradient
1135
1136    @staticmethod
1137    def parse_vibration(n, inputfile):
1138
1139        #   Freq [cm^-1]                1501.9533           1501.9533           1501.9533
1140        #   Irrep
1141        #   Reduced mass [u]              1.1820              1.1820              1.1820
1142        #   Force const [mDyne/A]         1.5710              1.5710              1.5710
1143        #   Turning point v=0 [a0]        0.2604              0.2604              0.2604
1144        #   RMS dev v=0 [a0 u^1/2]        0.2002              0.2002              0.2002
1145        #   Char temp [K]               2160.9731           2160.9731           2160.9731
1146        #   ----------------------------------------------------------------------------------
1147        #       1   C               -0.00  0.01  0.13   -0.00 -0.13  0.01   -0.13  0.00 -0.00
1148        #       2   H                0.33 -0.03 -0.38    0.02  0.60 -0.02    0.14 -0.01 -0.32
1149        #       3   H               -0.32 -0.03 -0.37   -0.01  0.60 -0.01    0.15 -0.01  0.33
1150        #       4   H                0.02  0.32 -0.36    0.01  0.16 -0.34    0.60 -0.01  0.01
1151        #       5   H                0.02 -0.33 -0.39    0.01  0.13  0.31    0.60  0.01  0.01
1152
1153        line = next(inputfile)
1154        assert 'Freq' in line
1155        chomp = line.split()
1156        vibfreqs = [Psi4.parse_vibfreq(x) for x in chomp[-n:]]
1157
1158        line = next(inputfile)
1159        assert 'Irrep' in line
1160        chomp = line.split()
1161        vibsyms = [irrep for irrep in chomp[1:]]
1162
1163        line = next(inputfile)
1164        assert 'Reduced mass' in line
1165        chomp = line.split()
1166        vibrmasses = [utils.float(x) for x in chomp[3:]]
1167
1168        line = next(inputfile)
1169        assert 'Force const' in line
1170        chomp = line.split()
1171        vibfconsts = [utils.float(x) for x in chomp[3:]]
1172
1173        line = next(inputfile)
1174        assert 'Turning point' in line
1175
1176        line = next(inputfile)
1177        assert 'RMS dev' in line
1178
1179        line = next(inputfile)
1180        assert 'Char temp' in line
1181
1182        line = next(inputfile)
1183        assert '---' in line
1184
1185        line = next(inputfile)
1186        vibdisps = [ [] for i in range(n)]
1187        while len(line.strip()) > 0:
1188            chomp = line.split()
1189            for i in range(n):
1190                start = len(chomp) - (n - i) * 3
1191                stop = start + 3
1192                mode_disps = [float(c) for c in chomp[start:stop]]
1193                vibdisps[i].append(mode_disps)
1194
1195            line = next(inputfile)
1196
1197        return vibfreqs, vibsyms, vibdisps, vibrmasses, vibfconsts
1198
1199    @staticmethod
1200    def parse_vibfreq(vibfreq):
1201        """Imaginary frequencies are printed as '12.34i', rather than
1202        '-12.34'.
1203        """
1204        is_imag = vibfreq[-1] == 'i'
1205        if is_imag:
1206            return -float(vibfreq[:-1])
1207        else:
1208            return float(vibfreq)
1209