1# -*- coding: utf-8 -*- 2# 3# Copyright (c) 2017, the cclib development team 4# 5# This file is part of cclib (http://cclib.github.io) and is distributed under 6# the terms of the BSD 3-Clause License. 7 8"""Parser for Psi4 output files.""" 9 10from collections import namedtuple 11 12import numpy 13 14from cclib.parser import data 15from cclib.parser import logfileparser 16from cclib.parser import utils 17 18 19class Psi4(logfileparser.Logfile): 20 """A Psi4 log file.""" 21 22 def __init__(self, *args, **kwargs): 23 24 # Call the __init__ method of the superclass 25 super(Psi4, self).__init__(logname="Psi4", *args, **kwargs) 26 27 def __str__(self): 28 """Return a string representation of the object.""" 29 return "Psi4 log file %s" % (self.filename) 30 31 def __repr__(self): 32 """Return a representation of the object.""" 33 return 'Psi4("%s")' % (self.filename) 34 35 def before_parsing(self): 36 37 # Early beta versions of Psi4 normalize basis function 38 # coefficients when printing. 39 self.version_4_beta = False 40 41 # This is just used to track which part of the output we are in, with 42 # changes triggered by ==> things like this <==. 43 self.section = None 44 45 # There are also sometimes subsections within each section, denoted 46 # with =>/<= rather than ==>/<==. 47 self.subsection = None 48 49 def after_parsing(self): 50 51 # Newer versions of Psi4 don't explicitly print the number of atoms. 52 if not hasattr(self, 'natom'): 53 if hasattr(self, 'atomnos'): 54 self.set_attribute('natom', len(self.atomnos)) 55 56 def normalisesym(self, label): 57 """Use standard symmetry labels instead of Psi4 labels. 58 59 To normalise: 60 (1) `App` -> `A"` 61 (2) `Ap` -> `A'` 62 """ 63 return label.replace("pp", '"').replace("p", "'") 64 65 # Match the number of skipped lines required based on the type of 66 # gradient present (determined from the header), as otherwise the 67 # parsing is identical. 68 GradientInfo = namedtuple('GradientInfo', ['gradient_type', 'header', 'skip_lines']) 69 GRADIENT_TYPES = { 70 'analytic': GradientInfo('analytic', 71 '-Total Gradient:', 72 ['header', 'dash header']), 73 'numerical': GradientInfo('numerical', 74 '## F-D gradient (Symmetry 0) ##', 75 ['Irrep num and total size', 'b', '123', 'b']), 76 } 77 GRADIENT_HEADERS = set([gradient_type.header 78 for gradient_type in GRADIENT_TYPES.values()]) 79 80 def extract(self, inputfile, line): 81 """Extract information from the file object inputfile.""" 82 83 # Extract the version number and the version control 84 # information, if it exists. 85 if "An Open-Source Ab Initio Electronic Structure Package" in line: 86 version_line = next(inputfile) 87 tokens = version_line.split() 88 package_version = tokens[1].split("-")[-1] 89 self.metadata["legacy_package_version"] = package_version 90 # Keep track of early versions of Psi4. 91 if "beta" in package_version: 92 self.version_4_beta = True 93 # `beta2+` -> `0!0.beta2` 94 package_version = "0!0.{}".format(package_version) 95 if package_version[-1] == "+": 96 # There is no good way to keep the bare plus sign around, 97 # but this version is so old... 98 package_version = package_version[:-1] 99 else: 100 package_version = "1!{}".format(package_version) 101 self.skip_line(inputfile, "blank") 102 line = next(inputfile) 103 if "Git:" in line: 104 tokens = line.split() 105 assert tokens[1] == "Rev" 106 revision = '-'.join(tokens[2:]).replace("{", "").replace("}", "") 107 dev_flag = "" if "dev" in package_version else ".dev" 108 package_version = "{}{}+{}".format( 109 package_version, dev_flag, revision 110 ) 111 self.metadata["package_version"] = package_version 112 113 # This will automatically change the section attribute for Psi4, when encountering 114 # a line that <== looks like this ==>, to whatever is in between. 115 if (line.strip()[:3] == "==>") and (line.strip()[-3:] == "<=="): 116 self.section = line.strip()[4:-4] 117 if self.section == "DFT Potential": 118 self.metadata["methods"].append("DFT") 119 120 # There is also the possibility of subsections. 121 if (line.strip()[:2] == "=>") and (line.strip()[-2:] == "<="): 122 self.subsection = line.strip()[3:-3] 123 124 # Determine whether or not the reference wavefunction is 125 # restricted, unrestricted, or restricted open-shell. 126 if line.strip() == "SCF": 127 while "Reference" not in line: 128 line = next(inputfile) 129 self.reference = line.split()[0] 130 # Work with a complex reference as if it's real. 131 if self.reference[0] == 'C': 132 self.reference = self.reference[1:] 133 134 # Parse the XC density functional 135 # => Composite Functional: B3LYP <= 136 if self.section == "DFT Potential" and "composite functional" in line.lower(): 137 chomp = line.split() 138 functional = chomp[-2] 139 self.metadata["functional"] = functional 140 141 # ==> Geometry <== 142 # 143 # Molecular point group: c2h 144 # Full point group: C2h 145 # 146 # Geometry (in Angstrom), charge = 0, multiplicity = 1: 147 # 148 # Center X Y Z 149 # ------------ ----------------- ----------------- ----------------- 150 # C -1.415253322400 0.230221785400 0.000000000000 151 # C 1.415253322400 -0.230221785400 0.000000000000 152 # ... 153 # 154 if (self.section == "Geometry") and ("Geometry (in Angstrom), charge" in line): 155 156 assert line.split()[3] == "charge" 157 charge = int(line.split()[5].strip(',')) 158 self.set_attribute('charge', charge) 159 160 assert line.split()[6] == "multiplicity" 161 mult = int(line.split()[8].strip(':')) 162 self.set_attribute('mult', mult) 163 164 self.skip_line(inputfile, "blank") 165 line = next(inputfile) 166 167 # Usually there is the header and dashes, but, for example, the coordinates 168 # printed when a geometry optimization finishes do not have it. 169 if line.split()[0] == "Center": 170 self.skip_line(inputfile, "dashes") 171 line = next(inputfile) 172 173 elements = [] 174 coords = [] 175 atommasses = [] 176 while line.strip(): 177 chomp = line.split() 178 el, x, y, z = chomp[:4] 179 if len(el) > 1: 180 el = el[0] + el[1:].lower() 181 elements.append(el) 182 coords.append([float(x), float(y), float(z)]) 183 # Newer versions of Psi4 print atomic masses. 184 if len(chomp) == 5: 185 atommasses.append(float(chomp[4])) 186 line = next(inputfile) 187 188 # The 0 is to handle the presence of ghost atoms. 189 self.set_attribute('atomnos', [self.table.number.get(el, 0) for el in elements]) 190 191 if not hasattr(self, 'atomcoords'): 192 self.atomcoords = [] 193 194 # This condition discards any repeated coordinates that Psi print. For example, 195 # geometry optimizations will print the coordinates at the beginning of and SCF 196 # section and also at the start of the gradient calculation. 197 if len(self.atomcoords) == 0 \ 198 or (self.atomcoords[-1] != coords and not hasattr(self, 'finite_difference')): 199 self.atomcoords.append(coords) 200 201 if len(atommasses) > 0: 202 if not hasattr(self, 'atommasses'): 203 self.atommasses = atommasses 204 205 # Psi4 repeats the charge and multiplicity after the geometry. 206 if (self.section == "Geometry") and (line[2:16].lower() == "charge ="): 207 charge = int(line.split()[-1]) 208 self.set_attribute('charge', charge) 209 if (self.section == "Geometry") and (line[2:16].lower() == "multiplicity ="): 210 mult = int(line.split()[-1]) 211 self.set_attribute('mult', mult) 212 213 # The printout for Psi4 has a more obvious trigger for the SCF parameter printout. 214 if (self.section == "Algorithm") and (line.strip() == "==> Algorithm <==") \ 215 and not hasattr(self, 'finite_difference'): 216 217 self.skip_line(inputfile, 'blank') 218 219 line = next(inputfile) 220 while line.strip(): 221 if "Energy threshold" in line: 222 etarget = float(line.split()[-1]) 223 if "Density threshold" in line: 224 dtarget = float(line.split()[-1]) 225 line = next(inputfile) 226 227 if not hasattr(self, "scftargets"): 228 self.scftargets = [] 229 self.scftargets.append([etarget, dtarget]) 230 231 # This section prints contraction information before the atomic basis set functions and 232 # is a good place to parse atombasis indices as well as atomnos. However, the section this line 233 # is in differs between HF and DFT outputs. 234 # 235 # -Contraction Scheme: 236 # Atom Type All Primitives // Shells: 237 # ------ ------ -------------------------- 238 # 1 C 6s 3p // 2s 1p 239 # 2 C 6s 3p // 2s 1p 240 # 3 C 6s 3p // 2s 1p 241 # ... 242 if self.section == "Primary Basis": 243 if line[2:12] == "Basis Set:": 244 self.metadata["basis_set"] = line.split()[2] 245 246 if (self.section == "Primary Basis" or self.section == "DFT Potential") and line.strip() == "-Contraction Scheme:": 247 248 self.skip_lines(inputfile, ['headers', 'd']) 249 250 atomnos = [] 251 atombasis = [] 252 atombasis_pos = 0 253 line = next(inputfile) 254 while line.strip(): 255 256 element = line.split()[1] 257 if len(element) > 1: 258 element = element[0] + element[1:].lower() 259 atomnos.append(self.table.number[element]) 260 261 # To count the number of atomic orbitals for the atom, sum up the orbitals 262 # in each type of shell, times the numbers of shells. Currently, we assume 263 # the multiplier is a single digit and that there are only s and p shells, 264 # which will need to be extended later when considering larger basis sets, 265 # with corrections for the cartesian/spherical cases. 266 ao_count = 0 267 shells = line.split('//')[1].split() 268 for s in shells: 269 count, type = s 270 multiplier = 3*(type == 'p') or 1 271 ao_count += multiplier*int(count) 272 273 if len(atombasis) > 0: 274 atombasis_pos = atombasis[-1][-1] + 1 275 atombasis.append(list(range(atombasis_pos, atombasis_pos+ao_count))) 276 277 line = next(inputfile) 278 279 self.set_attribute('natom', len(atomnos)) 280 self.set_attribute('atomnos', atomnos) 281 self.set_attribute('atombasis', atombasis) 282 283 # The atomic basis set is straightforward to parse, but there are some complications 284 # when symmetry is used, because in that case Psi4 only print the symmetry-unique atoms, 285 # and the list of symmetry-equivalent ones is not printed. Therefore, for simplicity here 286 # when an atomic is missing (atom indices are printed) assume the atomic orbitals of the 287 # last atom of the same element before it. This might not work if a mixture of basis sets 288 # is used somehow... but it should cover almost all cases for now. 289 # 290 # Note that Psi also print normalized coefficients (details below). 291 # 292 # ==> AO Basis Functions <== 293 # 294 # [ STO-3G ] 295 # spherical 296 # **** 297 # C 1 298 # S 3 1.00 299 # 71.61683700 2.70781445 300 # 13.04509600 2.61888016 301 # ... 302 if (self.section == "AO Basis Functions") and (line.strip() == "==> AO Basis Functions <=="): 303 304 def get_symmetry_atom_basis(gbasis): 305 """Get symmetry atom by replicating the last atom in gbasis of the same element.""" 306 307 missing_index = len(gbasis) 308 missing_atomno = self.atomnos[missing_index] 309 310 ngbasis = len(gbasis) 311 last_same = ngbasis - self.atomnos[:ngbasis][::-1].index(missing_atomno) - 1 312 return gbasis[last_same] 313 314 dfact = lambda n: (n <= 0) or n * dfact(n-2) 315 316 # Early beta versions of Psi4 normalize basis function 317 # coefficients when printing. 318 if self.version_4_beta: 319 def get_normalization_factor(exp, lx, ly, lz): 320 norm_s = (2*exp/numpy.pi)**0.75 321 if lx + ly + lz > 0: 322 nom = (4*exp)**((lx+ly+lz)/2.0) 323 den = numpy.sqrt(dfact(2*lx-1) * dfact(2*ly-1) * dfact(2*lz-1)) 324 return norm_s * nom / den 325 else: 326 return norm_s 327 else: 328 get_normalization_factor = lambda exp, lx, ly, lz: 1 329 330 self.skip_lines(inputfile, ['b', 'basisname']) 331 332 line = next(inputfile) 333 spherical = line.strip() == "spherical" 334 if hasattr(self, 'spherical_basis'): 335 assert self.spherical_basis == spherical 336 else: 337 self.spherical_basis = spherical 338 339 gbasis = [] 340 self.skip_line(inputfile, 'stars') 341 line = next(inputfile) 342 while line.strip(): 343 344 element, index = line.split() 345 if len(element) > 1: 346 element = element[0] + element[1:].lower() 347 index = int(index) 348 349 # This is the code that adds missing atoms when symmetry atoms are excluded 350 # from the basis set printout. Again, this will work only if all atoms of 351 # the same element use the same basis set. 352 while index > len(gbasis) + 1: 353 gbasis.append(get_symmetry_atom_basis(gbasis)) 354 355 gbasis.append([]) 356 line = next(inputfile) 357 while line.find("*") == -1: 358 359 # The shell type and primitive count is in the first line. 360 shell_type, nprimitives, _ = line.split() 361 nprimitives = int(nprimitives) 362 363 # Get the angular momentum for this shell type. 364 momentum = {'S': 0, 'P': 1, 'D': 2, 'F': 3, 'G': 4, 'H': 5, 'I': 6}[shell_type.upper()] 365 366 # Read in the primitives. 367 primitives_lines = [next(inputfile) for i in range(nprimitives)] 368 primitives = [list(map(float, pl.split())) for pl in primitives_lines] 369 370 # Un-normalize the coefficients. Psi prints the normalized coefficient 371 # of the highest polynomial, namely XX for D orbitals, XXX for F, and so on. 372 for iprim, prim in enumerate(primitives): 373 exp, coef = prim 374 coef = coef / get_normalization_factor(exp, momentum, 0, 0) 375 primitives[iprim] = [exp, coef] 376 377 primitives = [tuple(p) for p in primitives] 378 shell = [shell_type, primitives] 379 gbasis[-1].append(shell) 380 381 line = next(inputfile) 382 383 line = next(inputfile) 384 385 # We will also need to add symmetry atoms that are missing from the input 386 # at the end of this block, if the symmetry atoms are last. 387 while len(gbasis) < self.natom: 388 gbasis.append(get_symmetry_atom_basis(gbasis)) 389 390 self.gbasis = gbasis 391 392 # A block called 'Calculation Information' prints these before starting the SCF. 393 if (self.section == "Pre-Iterations") and ("Number of atoms" in line): 394 natom = int(line.split()[-1]) 395 self.set_attribute('natom', natom) 396 if (self.section == "Pre-Iterations") and ("Number of atomic orbitals" in line): 397 nbasis = int(line.split()[-1]) 398 self.set_attribute('nbasis', nbasis) 399 if (self.section == "Pre-Iterations") and ("Total" in line): 400 chomp = line.split() 401 nbasis = int(chomp[1]) 402 self.set_attribute('nbasis', nbasis) 403 404 # ==> Iterations <== 405 406 # Psi4 converges both the SCF energy and density elements and reports both in the 407 # iterations printout. However, the default convergence scheme involves a density-fitted 408 # algorithm for efficiency, and this is often followed by a something with exact electron 409 # repulsion integrals. In that case, there are actually two convergence cycles performed, 410 # one for the density-fitted algorithm and one for the exact one, and the iterations are 411 # printed in two blocks separated by some set-up information. 412 if (self.section == "Iterations") and (line.strip() == "==> Iterations <==") \ 413 and not hasattr(self, 'finite_difference'): 414 415 if not hasattr(self, 'scfvalues'): 416 self.scfvalues = [] 417 scfvals = [] 418 419 self.skip_lines(inputfile, ['b', 'header', 'b']) 420 line = next(inputfile) 421 # Read each SCF iteration. 422 while line.strip() != "==> Post-Iterations <==": 423 if line.strip() and line.split()[0][0] == '@': 424 denergy = float(line.split()[4]) 425 ddensity = float(line.split()[5]) 426 scfvals.append([denergy, ddensity]) 427 try: 428 line = next(inputfile) 429 except StopIteration: 430 self.logger.warning('File terminated before end of last SCF! Last density err: {}'.format(ddensity)) 431 break 432 self.section = "Post-Iterations" 433 self.scfvalues.append(scfvals) 434 435 # This section, from which we parse molecular orbital symmetries and 436 # orbital energies, is quite similar for both Psi3 and Psi4, and in fact 437 # the format for orbtials is the same, although the headers and spacers 438 # are a bit different. Let's try to get both parsed with one code block. 439 # 440 # Here is how the block looks like for Psi4: 441 # 442 # Orbital Energies (a.u.) 443 # ----------------------- 444 # 445 # Doubly Occupied: 446 # 447 # 1Bu -11.040586 1Ag -11.040524 2Bu -11.031589 448 # 2Ag -11.031589 3Bu -11.028950 3Ag -11.028820 449 # (...) 450 # 15Ag -0.415620 1Bg -0.376962 2Au -0.315126 451 # 2Bg -0.278361 3Bg -0.222189 452 # 453 # Virtual: 454 # 455 # 3Au 0.198995 4Au 0.268517 4Bg 0.308826 456 # 5Au 0.397078 5Bg 0.521759 16Ag 0.565017 457 # (...) 458 # 24Ag 0.990287 24Bu 1.027266 25Ag 1.107702 459 # 25Bu 1.124938 460 # 461 # The case is different in the trigger string. 462 if ("orbital energies (a.u.)" in line.lower() or "orbital energies [eh]" in line.lower()) \ 463 and not hasattr(self, 'finite_difference'): 464 465 # If this is Psi4, we will be in the appropriate section. 466 assert self.section == "Post-Iterations" 467 468 self.moenergies = [[]] 469 self.mosyms = [[]] 470 471 # Psi4 has dashes under the trigger line, but Psi3 did not. 472 self.skip_line(inputfile, 'dashes') 473 self.skip_line(inputfile, 'blank') 474 475 # Both versions have this case-insensitive substring. 476 occupied = next(inputfile) 477 if self.reference[0:2] == 'RO' or self.reference[0:1] == 'R': 478 assert 'doubly occupied' in occupied.lower() 479 elif self.reference[0:1] == 'U': 480 assert 'alpha occupied' in occupied.lower() 481 482 self.skip_line(inputfile, 'blank') 483 484 # Parse the occupied MO symmetries and energies. 485 self._parse_mosyms_moenergies(inputfile, 0) 486 487 # The last orbital energy here represents the HOMO. 488 self.homos = [len(self.moenergies[0])-1] 489 # For a restricted open-shell calculation, this is the 490 # beta HOMO, and we assume the singly-occupied orbitals 491 # are all alpha, which are handled next. 492 if self.reference[0:2] == 'RO': 493 self.homos.append(self.homos[0]) 494 495 unoccupied = next(inputfile) 496 if self.reference[0:2] == 'RO': 497 assert unoccupied.strip() == 'Singly Occupied:' 498 elif self.reference[0:1] == 'R': 499 assert unoccupied.strip() == 'Virtual:' 500 elif self.reference[0:1] == 'U': 501 assert unoccupied.strip() == 'Alpha Virtual:' 502 503 # Psi4 now has a blank line, Psi3 does not. 504 self.skip_line(inputfile, 'blank') 505 506 # Parse the unoccupied MO symmetries and energies. 507 self._parse_mosyms_moenergies(inputfile, 0) 508 509 # Here is where we handle the Beta or Singly occupied orbitals. 510 if self.reference[0:1] == 'U': 511 self.mosyms.append([]) 512 self.moenergies.append([]) 513 line = next(inputfile) 514 assert line.strip() == 'Beta Occupied:' 515 self.skip_line(inputfile, 'blank') 516 self._parse_mosyms_moenergies(inputfile, 1) 517 self.homos.append(len(self.moenergies[1])-1) 518 line = next(inputfile) 519 assert line.strip() == 'Beta Virtual:' 520 self.skip_line(inputfile, 'blank') 521 self._parse_mosyms_moenergies(inputfile, 1) 522 elif self.reference[0:2] == 'RO': 523 line = next(inputfile) 524 assert line.strip() == 'Virtual:' 525 self.skip_line(inputfile, 'blank') 526 self._parse_mosyms_moenergies(inputfile, 0) 527 528 line = next(inputfile) 529 assert line.strip() == 'Final Occupation by Irrep:' 530 line = next(inputfile) 531 irreps = line.split() 532 line = next(inputfile) 533 tokens = line.split() 534 assert tokens[0] == 'DOCC' 535 docc = sum([int(x.replace(',', '')) for x in tokens[2:-1]]) 536 line = next(inputfile) 537 if line.strip(): 538 tokens = line.split() 539 assert tokens[0] in ('SOCC', 'NA') 540 socc = sum([int(x.replace(',', '')) for x in tokens[2:-1]]) 541 # Fix up the restricted open-shell alpha HOMO. 542 if self.reference[0:2] == 'RO': 543 self.homos[0] += socc 544 545 # Both Psi3 and Psi4 print the final SCF energy right after the orbital energies, 546 # but the label is different. Psi4 also does DFT, and the label is also different in that case. 547 if self.section == "Post-Iterations" and "Final Energy:" in line \ 548 and not hasattr(self, 'finite_difference'): 549 e = float(line.split()[3]) 550 if not hasattr(self, 'scfenergies'): 551 self.scfenergies = [] 552 self.scfenergies.append(utils.convertor(e, 'hartree', 'eV')) 553 554 if self.subsection == "Energetics": 555 if "Empirical Dispersion Energy" in line: 556 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV") 557 self.append_attribute("dispersionenergies", dispersion) 558 559 # ==> Molecular Orbitals <== 560 # 561 # 1 2 3 4 5 562 # 563 # 1 H1 s0 0.1610392 0.1040990 0.0453848 0.0978665 1.0863246 564 # 2 H1 s0 0.3066996 0.0742959 0.8227318 1.3460922 -1.6429494 565 # 3 H1 s0 0.1669296 1.5494169 -0.8885631 -1.8689490 1.0473633 566 # 4 H2 s0 0.1610392 -0.1040990 0.0453848 -0.0978665 -1.0863246 567 # 5 H2 s0 0.3066996 -0.0742959 0.8227318 -1.3460922 1.6429494 568 # 6 H2 s0 0.1669296 -1.5494169 -0.8885631 1.8689490 -1.0473633 569 # 570 # Ene -0.5279195 0.1235556 0.3277474 0.5523654 2.5371710 571 # Sym Ag B3u Ag B3u B3u 572 # Occ 2 0 0 0 0 573 # 574 # 575 # 6 576 # 577 # 1 H1 s0 1.1331221 578 # 2 H1 s0 -1.2163107 579 # 3 H1 s0 0.4695317 580 # 4 H2 s0 1.1331221 581 # 5 H2 s0 -1.2163107 582 # 6 H2 s0 0.4695317 583 # 584 # Ene 2.6515637 585 # Sym Ag 586 # Occ 0 587 588 if (self.section) and ("Molecular Orbitals" in self.section) \ 589 and ("Molecular Orbitals" in line) and not hasattr(self, 'finite_difference'): 590 591 self.skip_line(inputfile, 'blank') 592 593 mocoeffs = [] 594 indices = next(inputfile) 595 while indices.strip(): 596 597 if indices[:3] == '***': 598 break 599 600 indices = [int(i) for i in indices.split()] 601 602 if len(mocoeffs) < indices[-1]: 603 for i in range(len(indices)): 604 mocoeffs.append([]) 605 else: 606 assert len(mocoeffs) == indices[-1] 607 608 self.skip_line(inputfile, 'blank') 609 610 n = len(indices) 611 line = next(inputfile) 612 while line.strip(): 613 chomp = line.split() 614 m = len(chomp) 615 iao = int(chomp[0]) 616 coeffs = [float(c) for c in chomp[m - n:]] 617 for i, c in enumerate(coeffs): 618 mocoeffs[indices[i]-1].append(c) 619 line = next(inputfile) 620 621 energies = next(inputfile) 622 symmetries = next(inputfile) 623 occupancies = next(inputfile) 624 625 self.skip_lines(inputfile, ['b', 'b']) 626 indices = next(inputfile) 627 628 if not hasattr(self, 'mocoeffs'): 629 self.mocoeffs = [] 630 self.mocoeffs.append(mocoeffs) 631 632 # The formats for Mulliken and Lowdin atomic charges are the same, just with 633 # the name changes, so use the same code for both. 634 # 635 # Properties computed using the SCF density density matrix 636 # Mulliken Charges: (a.u.) 637 # Center Symbol Alpha Beta Spin Total 638 # 1 C 2.99909 2.99909 0.00000 0.00182 639 # 2 C 2.99909 2.99909 0.00000 0.00182 640 # ... 641 for pop_type in ["Mulliken", "Lowdin"]: 642 if line.strip() == "%s Charges: (a.u.)" % pop_type: 643 if not hasattr(self, 'atomcharges'): 644 self.atomcharges = {} 645 header = next(inputfile) 646 647 line = next(inputfile) 648 while not line.strip(): 649 line = next(inputfile) 650 651 charges = [] 652 while line.strip(): 653 ch = float(line.split()[-1]) 654 charges.append(ch) 655 line = next(inputfile) 656 self.atomcharges[pop_type.lower()] = charges 657 658 # This is for the older conventional MP2 code in 4.0b5. 659 mp_trigger = "MP2 Total Energy (a.u.)" 660 if line.strip()[:len(mp_trigger)] == mp_trigger: 661 self.metadata["methods"].append("MP2") 662 mpenergy = utils.convertor(float(line.split()[-1]), 'hartree', 'eV') 663 if not hasattr(self, 'mpenergies'): 664 self.mpenergies = [] 665 self.mpenergies.append([mpenergy]) 666 # This is for the newer DF-MP2 code in 4.0. 667 if 'DF-MP2 Energies' in line: 668 self.metadata["methods"].append("DF-MP2") 669 while 'Total Energy' not in line: 670 line = next(inputfile) 671 mpenergy = utils.convertor(float(line.split()[3]), 'hartree', 'eV') 672 if not hasattr(self, 'mpenergies'): 673 self.mpenergies = [] 674 self.mpenergies.append([mpenergy]) 675 676 # Note this is just a start and needs to be modified for CCSD(T), etc. 677 ccsd_trigger = "* CCSD total energy" 678 if line.strip()[:len(ccsd_trigger)] == ccsd_trigger: 679 self.metadata["methods"].append("CCSD") 680 ccsd_energy = utils.convertor(float(line.split()[-1]), 'hartree', 'eV') 681 if not hasattr(self, "ccenergis"): 682 self.ccenergies = [] 683 self.ccenergies.append(ccsd_energy) 684 685 # The geometry convergence targets and values are printed in a table, with the legends 686 # describing the convergence annotation. Probably exact slicing of the line needs 687 # to be done in order to extract the numbers correctly. If there are no values for 688 # a paritcular target it means they are not used (marked also with an 'o'), and in this case 689 # we will set a value of numpy.inf so that any value will be smaller. 690 # 691 # ==> Convergence Check <== 692 # 693 # Measures of convergence in internal coordinates in au. 694 # Criteria marked as inactive (o), active & met (*), and active & unmet ( ). 695 # --------------------------------------------------------------------------------------------- 696 # Step Total Energy Delta E MAX Force RMS Force MAX Disp RMS Disp 697 # --------------------------------------------------------------------------------------------- 698 # Convergence Criteria 1.00e-06 * 3.00e-04 * o 1.20e-03 * o 699 # --------------------------------------------------------------------------------------------- 700 # 2 -379.77675264 -7.79e-03 1.88e-02 4.37e-03 o 2.29e-02 6.76e-03 o ~ 701 # --------------------------------------------------------------------------------------------- 702 # 703 if (self.section == "Convergence Check") and line.strip() == "==> Convergence Check <==" \ 704 and not hasattr(self, 'finite_difference'): 705 706 if not hasattr(self, "optstatus"): 707 self.optstatus = [] 708 self.optstatus.append(data.ccData.OPT_UNKNOWN) 709 710 self.skip_lines(inputfile, ['b', 'units', 'comment', 'dash+tilde', 'header', 'dash+tilde']) 711 712 # These are the position in the line at which numbers should start. 713 starts = [27, 41, 55, 69, 83] 714 715 criteria = next(inputfile) 716 geotargets = [] 717 for istart in starts: 718 if criteria[istart:istart+9].strip(): 719 geotargets.append(float(criteria[istart:istart+9])) 720 else: 721 geotargets.append(numpy.inf) 722 723 self.skip_line(inputfile, 'dashes') 724 725 values = next(inputfile) 726 step = int(values.split()[0]) 727 geovalues = [] 728 for istart in starts: 729 if values[istart:istart+9].strip(): 730 geovalues.append(float(values[istart:istart+9])) 731 732 if step == 1: 733 self.optstatus[-1] += data.ccData.OPT_NEW 734 735 # This assertion may be too restrictive, but we haven't seen the geotargets change. 736 # If such an example comes up, update the value since we're interested in the last ones. 737 if not hasattr(self, 'geotargets'): 738 self.geotargets = geotargets 739 else: 740 assert self.geotargets == geotargets 741 742 if not hasattr(self, 'geovalues'): 743 self.geovalues = [] 744 self.geovalues.append(geovalues) 745 746 # This message signals a converged optimization, in which case we want 747 # to append the index for this step to optdone, which should be equal 748 # to the number of geovalues gathered so far. 749 if "Optimization is complete!" in line: 750 751 # This is a workaround for Psi4.0/sample_opt-irc-2.out; 752 # IRC calculations currently aren't parsed properly for 753 # optimization parameters. 754 if hasattr(self, 'geovalues'): 755 756 if not hasattr(self, 'optdone'): 757 self.optdone = [] 758 self.optdone.append(len(self.geovalues)) 759 760 assert hasattr(self, "optstatus") and len(self.optstatus) > 0 761 self.optstatus[-1] += data.ccData.OPT_DONE 762 763 # This message means that optimization has stopped for some reason, but we 764 # still want optdone to exist in this case, although it will be an empty list. 765 if line.strip() == "Optimizer: Did not converge!": 766 if not hasattr(self, 'optdone'): 767 self.optdone = [] 768 769 assert hasattr(self, "optstatus") and len(self.optstatus) > 0 770 self.optstatus[-1] += data.ccData.OPT_UNCONVERGED 771 772 # The reference point at which properties are evaluated in Psi4 is explicitely stated, 773 # so we can save it for later. It is not, however, a part of the Properties section, 774 # but it appears before it and also in other places where properies that might depend 775 # on it are printed. 776 # 777 # Properties will be evaluated at 0.000000, 0.000000, 0.000000 Bohr 778 # 779 # OR 780 # 781 # Properties will be evaluated at 0.000000, 0.000000, 0.000000 [a0] 782 # 783 if "Properties will be evaluated at" in line.strip(): 784 self.origin = numpy.array([float(x.strip(',')) for x in line.split()[-4:-1]]) 785 assert line.split()[-1] in ["Bohr", "[a0]"] 786 self.origin = utils.convertor(self.origin, 'bohr', 'Angstrom') 787 788 # The properties section print the molecular dipole moment: 789 # 790 # ==> Properties <== 791 # 792 # 793 #Properties computed using the SCF density density matrix 794 # Nuclear Dipole Moment: (a.u.) 795 # X: 0.0000 Y: 0.0000 Z: 0.0000 796 # 797 # Electronic Dipole Moment: (a.u.) 798 # X: 0.0000 Y: 0.0000 Z: 0.0000 799 # 800 # Dipole Moment: (a.u.) 801 # X: 0.0000 Y: 0.0000 Z: 0.0000 Total: 0.0000 802 # 803 if (self.section == "Properties") and line.strip() == "Dipole Moment: (a.u.)": 804 805 line = next(inputfile) 806 dipole = numpy.array([float(line.split()[1]), float(line.split()[3]), float(line.split()[5])]) 807 dipole = utils.convertor(dipole, "ebohr", "Debye") 808 809 if not hasattr(self, 'moments'): 810 # Old versions of Psi4 don't print the origin; assume 811 # it's at zero. 812 if not hasattr(self, 'origin'): 813 self.origin = numpy.array([0.0, 0.0, 0.0]) 814 self.moments = [self.origin, dipole] 815 else: 816 try: 817 assert numpy.all(self.moments[1] == dipole) 818 except AssertionError: 819 self.logger.warning('Overwriting previous multipole moments with new values') 820 self.logger.warning('This could be from post-HF properties or geometry optimization') 821 self.moments = [self.origin, dipole] 822 823 # Higher multipole moments are printed separately, on demand, in lexicographical order. 824 # 825 # Multipole Moments: 826 # 827 # ------------------------------------------------------------------------------------ 828 # Multipole Electric (a.u.) Nuclear (a.u.) Total (a.u.) 829 # ------------------------------------------------------------------------------------ 830 # 831 # L = 1. Multiply by 2.5417462300 to convert to Debye 832 # Dipole X : 0.0000000 0.0000000 0.0000000 833 # Dipole Y : 0.0000000 0.0000000 0.0000000 834 # Dipole Z : 0.0000000 0.0000000 0.0000000 835 # 836 # L = 2. Multiply by 1.3450341749 to convert to Debye.ang 837 # Quadrupole XX : -1535.8888701 1496.8839996 -39.0048704 838 # Quadrupole XY : -11.5262958 11.4580038 -0.0682920 839 # ... 840 # 841 if line.strip() == "Multipole Moments:": 842 843 self.skip_lines(inputfile, ['b', 'd', 'header', 'd', 'b']) 844 845 # The reference used here should have been printed somewhere 846 # before the properties and parsed above. 847 moments = [self.origin] 848 849 line = next(inputfile) 850 while "----------" not in line.strip(): 851 852 rank = int(line.split()[2].strip('.')) 853 854 multipole = [] 855 line = next(inputfile) 856 while line.strip(): 857 858 value = float(line.split()[-1]) 859 fromunits = "ebohr" + (rank > 1)*("%i" % rank) 860 tounits = "Debye" + (rank > 1)*".ang" + (rank > 2)*("%i" % (rank-1)) 861 value = utils.convertor(value, fromunits, tounits) 862 multipole.append(value) 863 864 line = next(inputfile) 865 866 multipole = numpy.array(multipole) 867 moments.append(multipole) 868 line = next(inputfile) 869 870 if not hasattr(self, 'moments'): 871 self.moments = moments 872 else: 873 for im, m in enumerate(moments): 874 if len(self.moments) <= im: 875 self.moments.append(m) 876 else: 877 assert numpy.allclose(self.moments[im], m, atol=1.0e4) 878 879 ## Analytic Gradient 880 # -Total Gradient: 881 # Atom X Y Z 882 # ------ ----------------- ----------------- ----------------- 883 # 1 -0.000000000000 0.000000000000 -0.064527252292 884 # 2 0.000000000000 -0.028380539652 0.032263626146 885 # 3 -0.000000000000 0.028380539652 0.032263626146 886 887 ## Finite Differences Gradient 888 # ------------------------------------------------------------- 889 # ## F-D gradient (Symmetry 0) ## 890 # Irrep: 1 Size: 3 x 3 891 # 892 # 1 2 3 893 # 894 # 1 0.00000000000000 0.00000000000000 -0.02921303282515 895 # 2 0.00000000000000 -0.00979709321487 0.01460651641258 896 # 3 0.00000000000000 0.00979709321487 0.01460651641258 897 if line.strip() in Psi4.GRADIENT_HEADERS: 898 899 # Handle the different header lines between analytic and 900 # numerical gradients. 901 gradient_skip_lines = [ 902 info.skip_lines 903 for info in Psi4.GRADIENT_TYPES.values() 904 if info.header == line.strip() 905 ][0] 906 gradient = self.parse_gradient(inputfile, gradient_skip_lines) 907 908 if not hasattr(self, 'grads'): 909 self.grads = [] 910 self.grads.append(gradient) 911 912 # OLD Normal mode output parser (PSI4 < 1) 913 914 ## Harmonic frequencies. 915 916 # ------------------------------------------------------------- 917 918 # Computing second-derivative from gradients using projected, 919 # symmetry-adapted, cartesian coordinates (fd_freq_1). 920 921 # 74 gradients passed in, including the reference geometry. 922 # Generating complete list of displacements from unique ones. 923 924 # Operation 2 takes plus displacements of irrep Bg to minus ones. 925 # Operation 3 takes plus displacements of irrep Au to minus ones. 926 # Operation 2 takes plus displacements of irrep Bu to minus ones. 927 928 # Irrep Harmonic Frequency 929 # (cm-1) 930 # ----------------------------------------------- 931 # Au 137.2883 932 if line.strip() == 'Irrep Harmonic Frequency': 933 934 vibsyms = [] 935 vibfreqs = [] 936 937 self.skip_lines(inputfile, ['(cm-1)', 'dashes']) 938 939 ## The first section contains the symmetry of each normal 940 ## mode and its frequency. 941 line = next(inputfile) 942 while '---' not in line: 943 chomp = line.split() 944 vibsym = chomp[0] 945 vibfreq = Psi4.parse_vibfreq(chomp[1]) 946 vibsyms.append(vibsym) 947 vibfreqs.append(vibfreq) 948 line = next(inputfile) 949 950 self.set_attribute('vibsyms', vibsyms) 951 self.set_attribute('vibfreqs', vibfreqs) 952 953 line = next(inputfile) 954 assert line.strip() == '' 955 line = next(inputfile) 956 assert 'Normal Modes' in line 957 line = next(inputfile) 958 assert 'Molecular mass is' in line 959 if hasattr(self, 'atommasses'): 960 assert abs(float(line.split()[3]) - sum(self.atommasses)) < 1.0e-4 961 line = next(inputfile) 962 assert line.strip() == 'Frequencies in cm^-1; force constants in au.' 963 line = next(inputfile) 964 assert line.strip() == '' 965 line = next(inputfile) 966 967 ## The second section contains the frequency, force 968 ## constant, and displacement for each normal mode, along 969 ## with the atomic masses. 970 971 # Normal Modes (non-mass-weighted). 972 # Molecular mass is 130.07825 amu. 973 # Frequencies in cm^-1; force constants in au. 974 975 # Frequency: 137.29 976 # Force constant: 0.0007 977 # X Y Z mass 978 # C 0.000 0.000 0.050 12.000000 979 # C 0.000 0.000 0.050 12.000000 980 for vibfreq in self.vibfreqs: 981 _vibfreq = Psi4.parse_vibfreq(line[13:].strip()) 982 assert abs(vibfreq - _vibfreq) < 1.0e-2 983 line = next(inputfile) 984 assert 'Force constant:' in line 985 if not hasattr(self, "vibfconsts"): 986 self.vibfconsts = [] 987 self.vibfconsts.append( 988 utils.convertor(float(line.split()[2]), "hartree/bohr2", "mDyne/angstrom") 989 ) 990 line = next(inputfile) 991 assert 'X Y Z mass' in line 992 line = next(inputfile) 993 if not hasattr(self, 'vibdisps'): 994 self.vibdisps = [] 995 normal_mode_disps = [] 996 # for k in range(self.natom): 997 while line.strip(): 998 chomp = line.split() 999 # Do nothing with this for now. 1000 atomsym = chomp[0] 1001 atomcoords = [float(x) for x in chomp[1:4]] 1002 # Do nothing with this for now. 1003 atommass = float(chomp[4]) 1004 normal_mode_disps.append(atomcoords) 1005 line = next(inputfile) 1006 self.vibdisps.append(normal_mode_disps) 1007 line = next(inputfile) 1008 1009 # NEW Normal mode output parser (PSI4 >= 1) 1010 1011 # ==> Harmonic Vibrational Analysis <== 1012 # ... 1013 # Vibration 7 8 9 1014 # ... 1015 # 1016 # Vibration 10 11 12 1017 # ... 1018 1019 if line.strip() == '==> Harmonic Vibrational Analysis <==': 1020 1021 vibsyms = [] 1022 vibfreqs = [] 1023 vibdisps = [] 1024 vibrmasses = [] 1025 vibfconsts = [] 1026 1027 # Skip lines till the first Vibration block 1028 while not line.strip().startswith('Vibration'): 1029 line = next(inputfile) 1030 1031 n_modes = 0 1032 # Parse all the Vibration blocks 1033 while line.strip().startswith('Vibration'): 1034 n = len(line.split()) - 1 1035 n_modes += n 1036 vibfreqs_, vibsyms_, vibdisps_, vibrmasses_, vibfconsts_ = self.parse_vibration(n, inputfile) 1037 vibfreqs.extend(vibfreqs_) 1038 vibsyms.extend(vibsyms_) 1039 vibdisps.extend(vibdisps_) 1040 vibrmasses.extend(vibrmasses_) 1041 vibfconsts.extend(vibfconsts_) 1042 line = next(inputfile) 1043 1044 # It looks like the symmetry of the normal mode may be missing 1045 # from some / most. Only include them if they are there for all 1046 1047 if len(vibfreqs) == n_modes: 1048 self.set_attribute('vibfreqs', vibfreqs) 1049 1050 if len(vibsyms) == n_modes: 1051 self.set_attribute('vibsyms', vibsyms) 1052 1053 if len(vibdisps) == n_modes: 1054 self.set_attribute('vibdisps', vibdisps) 1055 1056 if len(vibdisps) == n_modes: 1057 self.set_attribute('vibrmasses', vibrmasses) 1058 1059 if len(vibdisps) == n_modes: 1060 self.set_attribute('vibfconsts', vibfconsts) 1061 1062 # Second one is 1.0, first one is 1.2 and newer 1063 if (self.section == "Thermochemistry Energy Analysis" and "Thermochemistry Energy Analysis" in line) \ 1064 or (self.section == "Energy Analysis" and "Energy Analysis" in line): 1065 1066 self.skip_lines( 1067 inputfile, 1068 [ 1069 "b", 1070 "Raw electronic energy", 1071 "Total E0", 1072 "b", 1073 "Zero-point energy, ZPE_vib = Sum_i nu_i / 2", 1074 "Electronic ZPE", 1075 "Translational ZPE", 1076 "Rotational ZPE" 1077 ] 1078 ) 1079 line = next(inputfile) 1080 assert "Vibrational ZPE" in line 1081 self.set_attribute("zpve", float(line.split()[6])) 1082 1083 # If finite difference is used to compute forces (i.e. by displacing 1084 # slightly all the atoms), a series of additional scf calculations is 1085 # performed. Orbitals, geometries, energies, etc. for these shouln't be 1086 # included in the parsed data. 1087 1088 if line.strip().startswith('Using finite-differences of gradients'): 1089 self.set_attribute('finite_difference', True) 1090 1091 # This is the result of calling `print_variables()` and contains all 1092 # current inner variables known to Psi4. 1093 if line.strip() == "Variable Map:": 1094 self.skip_line(inputfile, "d") 1095 line = next(inputfile) 1096 while line.strip(): 1097 tokens = line.split() 1098 # Remove double quotation marks 1099 name = " ".join(tokens[:-2])[1:-1] 1100 value = float(tokens[-1]) 1101 if name == "CC T1 DIAGNOSTIC": 1102 self.metadata["t1_diagnostic"] = value 1103 line = next(inputfile) 1104 1105 if line[:54] == '*** Psi4 exiting successfully. Buy a developer a beer!'\ 1106 or line[:54] == '*** PSI4 exiting successfully. Buy a developer a beer!': 1107 self.metadata['success'] = True 1108 1109 def _parse_mosyms_moenergies(self, inputfile, spinidx): 1110 """Parse molecular orbital symmetries and energies from the 1111 'Post-Iterations' section. 1112 """ 1113 line = next(inputfile) 1114 while line.strip(): 1115 for i in range(len(line.split()) // 2): 1116 self.mosyms[spinidx].append(line.split()[i*2][-2:]) 1117 moenergy = utils.convertor(float(line.split()[i*2+1]), "hartree", "eV") 1118 self.moenergies[spinidx].append(moenergy) 1119 line = next(inputfile) 1120 return 1121 1122 def parse_gradient(self, inputfile, skip_lines): 1123 """Parse the nuclear gradient section into a list of lists with shape 1124 [natom, 3]. 1125 """ 1126 self.skip_lines(inputfile, skip_lines) 1127 line = next(inputfile) 1128 gradient = [] 1129 1130 while line.strip(): 1131 idx, x, y, z = line.split() 1132 gradient.append((float(x), float(y), float(z))) 1133 line = next(inputfile) 1134 return gradient 1135 1136 @staticmethod 1137 def parse_vibration(n, inputfile): 1138 1139 # Freq [cm^-1] 1501.9533 1501.9533 1501.9533 1140 # Irrep 1141 # Reduced mass [u] 1.1820 1.1820 1.1820 1142 # Force const [mDyne/A] 1.5710 1.5710 1.5710 1143 # Turning point v=0 [a0] 0.2604 0.2604 0.2604 1144 # RMS dev v=0 [a0 u^1/2] 0.2002 0.2002 0.2002 1145 # Char temp [K] 2160.9731 2160.9731 2160.9731 1146 # ---------------------------------------------------------------------------------- 1147 # 1 C -0.00 0.01 0.13 -0.00 -0.13 0.01 -0.13 0.00 -0.00 1148 # 2 H 0.33 -0.03 -0.38 0.02 0.60 -0.02 0.14 -0.01 -0.32 1149 # 3 H -0.32 -0.03 -0.37 -0.01 0.60 -0.01 0.15 -0.01 0.33 1150 # 4 H 0.02 0.32 -0.36 0.01 0.16 -0.34 0.60 -0.01 0.01 1151 # 5 H 0.02 -0.33 -0.39 0.01 0.13 0.31 0.60 0.01 0.01 1152 1153 line = next(inputfile) 1154 assert 'Freq' in line 1155 chomp = line.split() 1156 vibfreqs = [Psi4.parse_vibfreq(x) for x in chomp[-n:]] 1157 1158 line = next(inputfile) 1159 assert 'Irrep' in line 1160 chomp = line.split() 1161 vibsyms = [irrep for irrep in chomp[1:]] 1162 1163 line = next(inputfile) 1164 assert 'Reduced mass' in line 1165 chomp = line.split() 1166 vibrmasses = [utils.float(x) for x in chomp[3:]] 1167 1168 line = next(inputfile) 1169 assert 'Force const' in line 1170 chomp = line.split() 1171 vibfconsts = [utils.float(x) for x in chomp[3:]] 1172 1173 line = next(inputfile) 1174 assert 'Turning point' in line 1175 1176 line = next(inputfile) 1177 assert 'RMS dev' in line 1178 1179 line = next(inputfile) 1180 assert 'Char temp' in line 1181 1182 line = next(inputfile) 1183 assert '---' in line 1184 1185 line = next(inputfile) 1186 vibdisps = [ [] for i in range(n)] 1187 while len(line.strip()) > 0: 1188 chomp = line.split() 1189 for i in range(n): 1190 start = len(chomp) - (n - i) * 3 1191 stop = start + 3 1192 mode_disps = [float(c) for c in chomp[start:stop]] 1193 vibdisps[i].append(mode_disps) 1194 1195 line = next(inputfile) 1196 1197 return vibfreqs, vibsyms, vibdisps, vibrmasses, vibfconsts 1198 1199 @staticmethod 1200 def parse_vibfreq(vibfreq): 1201 """Imaginary frequencies are printed as '12.34i', rather than 1202 '-12.34'. 1203 """ 1204 is_imag = vibfreq[-1] == 'i' 1205 if is_imag: 1206 return -float(vibfreq[:-1]) 1207 else: 1208 return float(vibfreq) 1209