1import re
2from collections import OrderedDict
3import numpy as np
4
5from ase import Atoms
6from ase.units import Hartree, Bohr
7from ase.calculators.singlepoint import (SinglePointDFTCalculator,
8                                         SinglePointKPoint)
9from .parser import _define_pattern
10
11# Note to the reader of this code: Here and below we use the function
12# _define_pattern from parser.py in this same directory to compile
13# regular expressions. These compiled expressions are stored along with
14# an example string that the expression should match in a list that
15# is used during tests (test/nwchem/nwchem_parser.py) to ensure that
16# the regular expressions are still working correctly.
17
18# Matches the beginning of a GTO calculation
19_gauss_block = _define_pattern(
20    r'^[\s]+NWChem (?:SCF|DFT) Module\n$',
21    "                                 NWChem SCF Module\n",
22)
23
24
25# Matches the beginning of a plane wave calculation
26_pw_block = _define_pattern(
27    r'^[\s]+\*[\s]+NWPW (?:PSPW|BAND|PAW|Band Structure) Calculation'
28    r'[\s]+\*[\s]*\n$',
29    "          *               NWPW PSPW Calculation              *\n",
30)
31
32
33# Top-level parser
34def read_nwchem_out(fobj, index=-1):
35    """Splits an NWChem output file into chunks corresponding to
36    individual single point calculations."""
37    lines = fobj.readlines()
38
39    if index == slice(-1, None, None):
40        for line in lines:
41            if _gauss_block.match(line):
42                return [parse_gto_chunk(''.join(lines))]
43            if _pw_block.match(line):
44                return [parse_pw_chunk(''.join(lines))]
45        else:
46            raise ValueError('This does not appear to be a valid NWChem '
47                             'output file.')
48
49    # First, find each SCF block
50    group = []
51    atomslist = []
52    header = True
53    lastgroup = []
54    lastparser = None
55    parser = None
56    for line in lines:
57        group.append(line)
58        if _gauss_block.match(line):
59            next_parser = parse_gto_chunk
60        elif _pw_block.match(line):
61            next_parser = parse_pw_chunk
62        else:
63            continue
64
65        if header:
66            header = False
67        else:
68            atoms = parser(''.join(group))
69            if atoms is None and parser is lastparser:
70                atoms = parser(''.join(lastgroup + group))
71                if atoms is not None:
72                    atomslist[-1] = atoms
73                    lastgroup += group
74            else:
75                atomslist.append(atoms)
76                lastgroup = group
77                lastparser = parser
78            group = []
79        parser = next_parser
80    else:
81        if not header:
82            atoms = parser(''.join(group))
83            if atoms is not None:
84                atomslist.append(atoms)
85
86    return atomslist[index]
87
88
89# Matches a geometry block and returns the geometry specification lines
90_geom = _define_pattern(
91    r'\n[ \t]+Geometry \"[ \t\S]+\" -> \"[ \t\S]*\"[ \t]*\n'
92    r'^[ \t-]+\n'
93    r'(?:^[ \t\S]*\n){3}'
94    r'^[ \t]+No\.[ \t]+Tag[ \t]+Charge[ \t]+X[ \t]+Y[ \t]+Z\n'
95    r'^[ \t-]+\n'
96    r'((?:^(?:[ \t]+[\S]+){6}[ \t]*\n)+)',
97    """\
98
99                             Geometry "geometry" -> ""
100                             -------------------------
101
102 Output coordinates in angstroms (scale by  1.889725989 to convert to a.u.)
103
104  No.       Tag          Charge          X              Y              Z
105 ---- ---------------- ---------- -------------- -------------- --------------
106    1 C                    6.0000     0.00000000     0.00000000     0.00000000
107    2 H                    1.0000     0.62911800     0.62911800     0.62911800
108    3 H                    1.0000    -0.62911800    -0.62911800     0.62911800
109    4 H                    1.0000     0.62911800    -0.62911800    -0.62911800
110""", re.M)
111
112# Unit cell parser
113_cell_block = _define_pattern(r'^[ \t]+Lattice Parameters[ \t]*\n'
114                              r'^(?:[ \t\S]*\n){4}'
115                              r'((?:^(?:[ \t]+[\S]+){5}\n){3})',
116                              """\
117      Lattice Parameters
118      ------------------
119
120      lattice vectors in angstroms (scale by  1.889725989 to convert to a.u.)
121
122      a1=<   4.000   0.000   0.000 >
123      a2=<   0.000   5.526   0.000 >
124      a3=<   0.000   0.000   4.596 >
125      a=       4.000 b=      5.526 c=       4.596
126      alpha=  90.000 beta=  90.000 gamma=  90.000
127      omega=   101.6
128""", re.M)
129
130
131# Parses the geometry and returns the corresponding Atoms object
132def _parse_geomblock(chunk):
133    geomblocks = _geom.findall(chunk)
134    if not geomblocks:
135        return None
136    geomblock = geomblocks[-1].strip().split('\n')
137    natoms = len(geomblock)
138    symbols = []
139    pos = np.zeros((natoms, 3))
140    for i, line in enumerate(geomblock):
141        line = line.strip().split()
142        symbols.append(line[1])
143        pos[i] = [float(x) for x in line[3:6]]
144
145    cellblocks = _cell_block.findall(chunk)
146    if cellblocks:
147        cellblock = cellblocks[-1].strip().split('\n')
148        cell = np.zeros((3, 3))
149        for i, line in enumerate(cellblock):
150            line = line.strip().split()
151            cell[i] = [float(x) for x in line[1:4]]
152    else:
153        cell = None
154    return Atoms(symbols, positions=pos, cell=cell)
155
156
157# GTO-specific parser stuff
158
159# Matches gradient block from a GTO calculation
160_gto_grad = _define_pattern(
161    r'^[ \t]+[\S]+[ \t]+ENERGY GRADIENTS[ \t]*[\n]+'
162    r'^[ \t]+atom[ \t]+coordinates[ \t]+gradient[ \t]*\n'
163    r'^(?:[ \t]+x[ \t]+y[ \t]+z){2}[ \t]*\n'
164    r'((?:^(?:[ \t]+[\S]+){8}\n)+)[ \t]*\n',
165    """\
166                         UHF ENERGY GRADIENTS
167
168    atom               coordinates                        gradient
169                 x          y          z           x          y          z
170   1 C       0.293457  -0.293457   0.293457   -0.000083   0.000083  -0.000083
171   2 H       1.125380   1.355351   1.125380    0.000086   0.000089   0.000086
172   3 H      -1.355351  -1.125380   1.125380   -0.000089  -0.000086   0.000086
173   4 H       1.125380  -1.125380  -1.355351    0.000086  -0.000086  -0.000089
174
175""", re.M)
176
177# Energy parsers for a variety of different GTO calculations
178_e_gto = OrderedDict()
179_e_gto['tce'] = _define_pattern(
180    r'^[\s]+[\S]+[\s]+total energy \/ hartree[\s]+'
181    r'=[\s]+([\S]+)[\s]*\n',
182    " CCD total energy / hartree       "
183    "=       -75.715332545665888\n", re.M,
184)
185_e_gto['ccsd'] = _define_pattern(
186    r'^[\s]+Total CCSD energy:[\s]+([\S]+)[\s]*\n',
187    " Total CCSD energy:            -75.716168566598569\n",
188    re.M,
189)
190_e_gto['tddft'] = _define_pattern(
191    r'^[\s]+Excited state energy =[\s]+([\S]+)[\s]*\n',
192    "     Excited state energy =    -75.130134499965\n",
193    re.M,
194)
195_e_gto['mp2'] = _define_pattern(
196    r'^[\s]+Total MP2 energy[\s]+([\S]+)[\s]*\n',
197    "          Total MP2 energy           -75.708800087578\n",
198    re.M,
199)
200_e_gto['mf'] = _define_pattern(
201    r'^[\s]+Total (?:DFT|SCF) energy =[\s]+([\S]+)[\s]*\n',
202    "         Total SCF energy =    -75.585555997789\n",
203    re.M,
204)
205
206
207# GTO parser
208def parse_gto_chunk(chunk):
209    atoms = None
210    forces = None
211    energy = None
212    dipole = None
213    quadrupole = None
214    for theory, pattern in _e_gto.items():
215        matches = pattern.findall(chunk)
216        if matches:
217            energy = float(matches[-1].replace('D', 'E')) * Hartree
218            break
219
220    gradblocks = _gto_grad.findall(chunk)
221    if gradblocks:
222        gradblock = gradblocks[-1].strip().split('\n')
223        natoms = len(gradblock)
224        symbols = []
225        pos = np.zeros((natoms, 3))
226        forces = np.zeros((natoms, 3))
227        for i, line in enumerate(gradblock):
228            line = line.strip().split()
229            symbols.append(line[1])
230            pos[i] = [float(x) for x in line[2:5]]
231            forces[i] = [-float(x) for x in line[5:8]]
232        pos *= Bohr
233        forces *= Hartree / Bohr
234        atoms = Atoms(symbols, positions=pos)
235
236    dipole, quadrupole = _get_multipole(chunk)
237
238    kpts = _get_gto_kpts(chunk)
239
240    if atoms is None:
241        atoms = _parse_geomblock(chunk)
242
243    if atoms is None:
244        return
245
246    # SinglePointDFTCalculator doesn't support quadrupole moment currently
247    calc = SinglePointDFTCalculator(atoms=atoms,
248                                    energy=energy,
249                                    free_energy=energy,  # XXX Is this right?
250                                    forces=forces,
251                                    dipole=dipole,
252                                    # quadrupole=quadrupole,
253                                    )
254    calc.kpts = kpts
255    atoms.calc = calc
256    return atoms
257
258
259# Extracts dipole and quadrupole moment for a GTO calculation
260_multipole = _define_pattern(
261    r'^[ \t]+Multipole analysis of the density[ \t\S]*\n'
262    r'^[ \t-]+\n\n^[ \t\S]+\n^[ \t-]+\n'
263    r'((?:(?:(?:[ \t]+[\S]+){7,8}\n)|[ \t]*\n){12})',
264    """\
265     Multipole analysis of the density
266     ---------------------------------
267
268     L   x y z        total         alpha         beta         nuclear
269     -   - - -        -----         -----         ----         -------
270     0   0 0 0     -0.000000     -5.000000     -5.000000     10.000000
271
272     1   1 0 0      0.000000      0.000000      0.000000      0.000000
273     1   0 1 0     -0.000001     -0.000017     -0.000017      0.000034
274     1   0 0 1     -0.902084     -0.559881     -0.559881      0.217679
275
276     2   2 0 0     -5.142958     -2.571479     -2.571479      0.000000
277     2   1 1 0     -0.000000     -0.000000     -0.000000      0.000000
278     2   1 0 1      0.000000      0.000000      0.000000      0.000000
279     2   0 2 0     -3.153324     -3.807308     -3.807308      4.461291
280     2   0 1 1      0.000001     -0.000009     -0.000009      0.000020
281     2   0 0 2     -4.384288     -3.296205     -3.296205      2.208122
282""", re.M)
283
284
285# Parses the dipole and quadrupole moment from a GTO calculation
286def _get_multipole(chunk):
287    matches = _multipole.findall(chunk)
288    if not matches:
289        return None, None
290    # This pulls the 5th column out of the multipole moments block;
291    # this column contains the actual moments.
292    moments = [float(x.split()[4]) for x in matches[-1].split('\n') if x]
293    dipole = np.array(moments[1:4]) * Bohr
294    quadrupole = np.zeros(9)
295    quadrupole[[0, 1, 2, 4, 5, 8]] = [moments[4:]]
296    quadrupole[[3, 6, 7]] = quadrupole[[1, 2, 5]]
297    return dipole, quadrupole.reshape((3, 3)) * Bohr**2
298
299
300# MO eigenvalue and occupancy parser for GTO calculations
301_eval_block = _define_pattern(
302        r'^[ \t]+[\S]+ Final (?:Alpha |Beta )?Molecular Orbital Analysis[ \t]*'
303        r'\n^[ \t-]+\n\n'
304        r'(?:^[ \t]+Vector [ \t\S]+\n(?:^[ \t\S]+\n){3}'
305        r'(?:^(?:(?:[ \t]+[\S]+){5}){1,2}[ \t]*\n)+\n)+',
306        """\
307                       ROHF Final Molecular Orbital Analysis
308                       -------------------------------------
309
310 Vector    1  Occ=2.000000D+00  E=-2.043101D+01
311              MO Center=  1.1D-20,  1.5D-18,  1.2D-01, r^2= 1.5D-02
312   Bfn.  Coefficient  Atom+Function         Bfn.  Coefficient  Atom+Function
313  ----- ------------  ---------------      ----- ------------  ---------------
314     1      0.983233  1 O  s
315
316 Vector    2  Occ=2.000000D+00  E=-1.324439D+00
317              MO Center= -2.1D-18, -8.6D-17, -7.1D-02, r^2= 5.1D-01
318   Bfn.  Coefficient  Atom+Function         Bfn.  Coefficient  Atom+Function
319  ----- ------------  ---------------      ----- ------------  ---------------
320     6      0.708998  1 O  s                  1     -0.229426  1 O  s
321     2      0.217752  1 O  s
322     """, re.M)  # noqa: W291
323
324
325# Parses the eigenvalues and occupations from a GTO calculation
326def _get_gto_kpts(chunk):
327    eval_blocks = _eval_block.findall(chunk)
328    if not eval_blocks:
329        return []
330    kpts = []
331    kpt = _get_gto_evals(eval_blocks[-1])
332    if kpt.s == 1:
333        kpts.append(_get_gto_evals(eval_blocks[-2]))
334    kpts.append(kpt)
335    return kpts
336
337
338# Extracts MO eigenvalue and occupancy for a GTO calculation
339_extract_vector = _define_pattern(
340    r'^[ \t]+Vector[ \t]+([\S])+[ \t]+Occ=([\S]+)[ \t]+E=[ \t]*([\S]+)[ \t]*\n',
341    " Vector    1  Occ=2.000000D+00  E=-2.043101D+01\n", re.M,
342)
343
344
345# Extracts the eigenvalues and occupations from a GTO calculation
346def _get_gto_evals(chunk):
347    spin = 1 if re.match(r'[ \t\S]+Beta', chunk) else 0
348    data = []
349    for vector in _extract_vector.finditer(chunk):
350        data.append([float(x.replace('D', 'E')) for x in vector.groups()[1:]])
351    data = np.array(data)
352    occ = data[:, 0]
353    energies = data[:, 1] * Hartree
354
355    return SinglePointKPoint(1., spin, 0, energies, occ)
356
357
358# Plane wave specific parsing stuff
359
360# Matches the gradient block from a plane wave calculation
361_nwpw_grad = _define_pattern(
362    r'^[ \t]+[=]+[ \t]+Ion Gradients[ \t]+[=]+[ \t]*\n'
363    r'^[ \t]+Ion Forces:[ \t]*\n'
364    r'((?:^(?:[ \t]+[\S]+){7}\n)+)',
365    """\
366          =============  Ion Gradients =================
367 Ion Forces:
368        1 O    (   -0.000012    0.000027   -0.005199 )
369        2 H    (    0.000047   -0.013082    0.020790 )
370        3 H    (    0.000047    0.012863    0.020786 )
371        C.O.M. (   -0.000000   -0.000000   -0.000000 )
372          ===============================================
373""", re.M)
374
375# Matches the gradient block from a PAW calculation
376_paw_grad = _define_pattern(
377    r'^[ \t]+[=]+[ \t]+Ion Gradients[ \t]+[=]+[ \t]*\n'
378    r'^[ \t]+Ion Positions:[ \t]*\n'
379    r'((?:^(?:[ \t]+[\S]+){7}\n)+)'
380    r'^[ \t]+Ion Forces:[ \t]*\n'
381    r'((?:^(?:[ \t]+[\S]+){7}\n)+)',
382    """\
383          =============  Ion Gradients =================
384 Ion Positions:
385        1 O    (   -3.77945   -5.22176   -3.77945 )
386        2 H    (   -3.77945   -3.77945    3.77945 )
387        3 H    (   -3.77945    3.77945    3.77945 )
388 Ion Forces:
389        1 O    (   -0.00001   -0.00000    0.00081 )
390        2 H    (    0.00005   -0.00026   -0.00322 )
391        3 H    (    0.00005    0.00030   -0.00322 )
392        C.O.M. (   -0.00000   -0.00000   -0.00000 )
393          ===============================================
394""", re.M)
395
396# Energy parser for plane wave calculations
397_nwpw_energy = _define_pattern(
398    r'^[\s]+Total (?:PSPW|BAND|PAW) energy'
399    r'[\s]+:[\s]+([\S]+)[\s]*\n',
400    " Total PSPW energy     :  -0.1709317826E+02\n",
401    re.M,
402)
403
404# Parser for the fermi energy in a plane wave calculation
405_fermi_energy = _define_pattern(
406    r'^[ \t]+Fermi energy =[ \t]+([\S]+) \([ \t]+[\S]+[ \t]*\n',
407    "  Fermi energy =    -0.5585062E-01 (  -1.520eV)\n", re.M,
408)
409
410
411# Plane wave parser
412def parse_pw_chunk(chunk):
413    atoms = _parse_geomblock(chunk)
414    if atoms is None:
415        return
416
417    energy = None
418    efermi = None
419    forces = None
420    stress = None
421
422    matches = _nwpw_energy.findall(chunk)
423    if matches:
424        energy = float(matches[-1].replace('D', 'E')) * Hartree
425
426    matches = _fermi_energy.findall(chunk)
427    if matches:
428        efermi = float(matches[-1].replace('D', 'E')) * Hartree
429
430    gradblocks = _nwpw_grad.findall(chunk)
431    if not gradblocks:
432        gradblocks = _paw_grad.findall(chunk)
433    if gradblocks:
434        gradblock = gradblocks[-1].strip().split('\n')
435        natoms = len(gradblock)
436        symbols = []
437        forces = np.zeros((natoms, 3))
438        for i, line in enumerate(gradblock):
439            line = line.strip().split()
440            symbols.append(line[1])
441            forces[i] = [float(x) for x in line[3:6]]
442        forces *= Hartree / Bohr
443
444    if atoms.cell:
445        stress = _get_stress(chunk, atoms.cell)
446
447    ibz_kpts, kpts = _get_pw_kpts(chunk)
448
449    # NWChem does not calculate an energy extrapolated to the 0K limit,
450    # so right now, energy and free_energy will be the same.
451    calc = SinglePointDFTCalculator(atoms=atoms,
452                                    energy=energy,
453                                    efermi=efermi,
454                                    free_energy=energy,
455                                    forces=forces,
456                                    stress=stress,
457                                    ibzkpts=ibz_kpts)
458    calc.kpts = kpts
459    atoms.calc = calc
460    return atoms
461
462
463# Extracts stress tensor from a plane wave calculation
464_stress = _define_pattern(
465    r'[ \t]+[=]+[ \t]+(?:total gradient|E all FD)[ \t]+[=]+[ \t]*\n'
466    r'^[ \t]+S =((?:(?:[ \t]+[\S]+){5}\n){3})[ \t=]+\n',
467    """\
468          ============= total gradient ==============
469      S =  (   -0.22668    0.27174    0.19134 )
470           (    0.23150   -0.26760    0.23226 )
471           (    0.19090    0.27206   -0.22700 )
472          ===================================================
473""", re.M)
474
475
476# Extract stress tensor from a plane wave calculation
477def _get_stress(chunk, cell):
478    stress_blocks = _stress.findall(chunk)
479    if not stress_blocks:
480        return None
481    stress_block = stress_blocks[-1]
482    stress = np.zeros((3, 3))
483    for i, row in enumerate(stress_block.strip().split('\n')):
484        stress[i] = [float(x) for x in row.split()[1:4]]
485    stress = (stress @ cell) * Hartree / Bohr / cell.volume
486    stress = 0.5 * (stress + stress.T)
487    # convert from 3x3 array to Voigt form
488    return stress.ravel()[[0, 4, 8, 5, 2, 1]]
489
490
491# MO/band eigenvalue and occupancy parser for plane wave calculations
492_nwpw_eval_block = _define_pattern(
493        r'(?:(?:^[ \t]+Brillouin zone point:[ \t]+[\S]+[ \t]*\n'
494        r'(?:[ \t\S]*\n){3,4})?'
495        r'^[ \t]+(?:virtual )?orbital energies:\n'
496        r'(?:^(?:(?:[ \t]+[\S]+){3,4}){1,2}[ \t]*\n)+\n{,3})+',
497        """\
498 Brillouin zone point:      1
499    weight=  0.074074
500    k     =<   0.333   0.333   0.333> . <b1,b2,b3>
501          =<   0.307   0.307   0.307>
502
503 orbital energies:
504     0.3919370E+00 (  10.665eV) occ=1.000
505     0.3908827E+00 (  10.637eV) occ=1.000     0.4155535E+00 (  11.308eV) occ=1.000
506     0.3607689E+00 (   9.817eV) occ=1.000     0.3827820E+00 (  10.416eV) occ=1.000
507     0.3544000E+00 (   9.644eV) occ=1.000     0.3782641E+00 (  10.293eV) occ=1.000
508     0.3531137E+00 (   9.609eV) occ=1.000     0.3778819E+00 (  10.283eV) occ=1.000
509     0.2596367E+00 (   7.065eV) occ=1.000     0.2820723E+00 (   7.676eV) occ=1.000
510
511 Brillouin zone point:      2
512    weight=  0.074074
513    k     =<  -0.000   0.333   0.333> . <b1,b2,b3>
514          =<   0.614   0.000   0.000>
515
516 orbital energies:
517     0.3967132E+00 (  10.795eV) occ=1.000
518     0.3920006E+00 (  10.667eV) occ=1.000     0.4197952E+00 (  11.423eV) occ=1.000
519     0.3912442E+00 (  10.646eV) occ=1.000     0.4125086E+00 (  11.225eV) occ=1.000
520     0.3910472E+00 (  10.641eV) occ=1.000     0.4124238E+00 (  11.223eV) occ=1.000
521     0.3153977E+00 (   8.582eV) occ=1.000     0.3379797E+00 (   9.197eV) occ=1.000
522     0.2801606E+00 (   7.624eV) occ=1.000     0.3052478E+00 (   8.306eV) occ=1.000
523""", re.M)  # noqa: E501, W291
524
525# Parser for kpoint weights for a plane wave calculation
526_kpt_weight = _define_pattern(
527    r'^[ \t]+Brillouin zone point:[ \t]+([\S]+)[ \t]*\n'
528    r'^[ \t]+weight=[ \t]+([\S]+)[ \t]*\n',
529    """\
530 Brillouin zone point:      1
531    weight=  0.074074
532""", re.M)  # noqa: W291
533
534
535# Parse eigenvalues and occupancies from a plane wave calculation
536def _get_pw_kpts(chunk):
537    eval_blocks = []
538    for block in _nwpw_eval_block.findall(chunk):
539        if 'pathlength' not in block:
540            eval_blocks.append(block)
541    if not eval_blocks:
542        return []
543    if 'virtual' in eval_blocks[-1]:
544        occ_block = eval_blocks[-2]
545        virt_block = eval_blocks[-1]
546    else:
547        occ_block = eval_blocks[-1]
548        virt_block = ''
549    kpts = NWChemKpts()
550    _extract_pw_kpts(occ_block, kpts, 1.)
551    _extract_pw_kpts(virt_block, kpts, 0.)
552    for match in _kpt_weight.finditer(occ_block):
553        index, weight = match.groups()
554        kpts.set_weight(index, float(weight))
555    return kpts.to_ibz_kpts(), kpts.to_singlepointkpts()
556
557
558# Helper class for keeping track of kpoints and converting to
559# SinglePointKPoint objects.
560class NWChemKpts:
561    def __init__(self):
562        self.data = dict()
563        self.ibz_kpts = dict()
564        self.weights = dict()
565
566    def add_ibz_kpt(self, index, raw_kpt):
567        kpt = np.array([float(x.strip('>')) for x in raw_kpt.split()[1:4]])
568        self.ibz_kpts[index] = kpt
569
570    def add_eval(self, index, spin, energy, occ):
571        if index not in self.data:
572            self.data[index] = dict()
573        if spin not in self.data[index]:
574            self.data[index][spin] = []
575        self.data[index][spin].append((energy, occ))
576
577    def set_weight(self, index, weight):
578        self.weights[index] = weight
579
580    def to_ibz_kpts(self):
581        if not self.ibz_kpts:
582            return np.array([[0., 0., 0.]])
583        sorted_kpts = sorted(list(self.ibz_kpts.items()), key=lambda x: x[0])
584        return np.array(list(zip(*sorted_kpts))[1])
585
586    def to_singlepointkpts(self):
587        kpts = []
588        for i, (index, spins) in enumerate(self.data.items()):
589            weight = self.weights[index]
590            for spin, (_, data) in enumerate(spins.items()):
591                energies, occs = np.array(sorted(data, key=lambda x: x[0])).T
592                kpts.append(SinglePointKPoint(weight, spin, i, energies, occs))
593        return kpts
594
595
596# Extracts MO/band data from a pattern matched by _nwpw_eval_block above
597_kpt = _define_pattern(
598    r'^[ \t]+Brillouin zone point:[ \t]+([\S]+)[ \t]*\n'
599    r'^[ \t]+weight=[ \t]+([\S])+[ \t]*\n'
600    r'^[ \t]+k[ \t]+([ \t\S]+)\n'
601    r'(?:^[ \t\S]*\n){1,2}'
602    r'^[ \t]+(?:virtual )?orbital energies:\n'
603    r'((?:^(?:(?:[ \t]+[\S]+){3,4}){1,2}[ \t]*\n)+)',
604    """\
605 Brillouin zone point:      1
606    weight=  0.074074
607    k     =<   0.333   0.333   0.333> . <b1,b2,b3>
608          =<   0.307   0.307   0.307>
609
610 orbital energies:
611     0.3919370E+00 (  10.665eV) occ=1.000
612     0.3908827E+00 (  10.637eV) occ=1.000     0.4155535E+00 (  11.308eV) occ=1.000
613     0.3607689E+00 (   9.817eV) occ=1.000     0.3827820E+00 (  10.416eV) occ=1.000
614     0.3544000E+00 (   9.644eV) occ=1.000     0.3782641E+00 (  10.293eV) occ=1.000
615     0.3531137E+00 (   9.609eV) occ=1.000     0.3778819E+00 (  10.283eV) occ=1.000
616     0.2596367E+00 (   7.065eV) occ=1.000     0.2820723E+00 (   7.676eV) occ=1.000
617""", re.M)  # noqa: E501, W291
618
619
620# Extracts kpoints from a plane wave calculation
621def _extract_pw_kpts(chunk, kpts, default_occ):
622    for match in _kpt.finditer(chunk):
623        point, weight, raw_kpt, orbitals = match.groups()
624        index = int(point) - 1
625        for line in orbitals.split('\n'):
626            tokens = line.strip().split()
627            if not tokens:
628                continue
629            ntokens = len(tokens)
630            a_e = float(tokens[0]) * Hartree
631            if ntokens % 3 == 0:
632                a_o = default_occ
633            else:
634                a_o = float(tokens[3].split('=')[1])
635            kpts.add_eval(index, 0, a_e, a_o)
636
637            if ntokens <= 4:
638                continue
639            if ntokens == 6:
640                b_e = float(tokens[3]) * Hartree
641                b_o = default_occ
642            elif ntokens == 8:
643                b_e = float(tokens[4]) * Hartree
644                b_o = float(tokens[7].split('=')[1])
645            kpts.add_eval(index, 1, b_e, b_o)
646        kpts.set_weight(index, float(weight))
647        kpts.add_ibz_kpt(index, raw_kpt)
648