1#!/usr/bin/env python
2# Copyright 2014-2020 The PySCF Developers. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# Author: Qiming Sun <osirpt.sun@gmail.com>
17#
18
19'''
20Parses for basis set in the Molpro format
21'''
22
23__all__ = ['parse', 'load']
24
25import re
26import numpy
27
28try:
29    from pyscf.gto.basis.parse_nwchem import optimize_contraction
30    from pyscf.gto.basis.parse_nwchem import remove_zero
31except ImportError:
32    optimize_contraction = lambda basis: basis
33    remove_zero = lambda basis: basis
34
35from pyscf import __config__
36DISABLE_EVAL = getattr(__config__, 'DISABLE_EVAL', False)
37
38MAXL = 12
39SPDF = 'SPDFGHIKLMNO'
40MAPSPDF = {key: l for l, key in enumerate(SPDF)}
41COMMENT_KEYWORDS = '!*#'
42
43# parse the basis text which is in Molpro format, return an internal basis
44# format which can be assigned to gto.mole.basis
45def parse(string, optimize=True):
46    raw_basis = []
47    for x in string.splitlines():
48        x = x.strip()
49        if x and x[0] not in COMMENT_KEYWORDS:
50            raw_basis.append(x)
51    return _parse(raw_basis, optimize)
52
53def load(basisfile, symb, optimize=True):
54    raw_basis = search_seg(basisfile, symb)
55    #if not raw_basis:
56    #    raise BasisNotFoundError('Basis not found for  %s  in  %s' % (symb, basisfile))
57    return _parse(raw_basis, optimize)
58
59def search_seg(basisfile, symb):
60    raw_basis = []
61    with open(basisfile, 'r') as fin:
62        dat = fin.readline()
63        while dat:
64            if dat[0] in COMMENT_KEYWORDS:
65                dat = fin.readline()
66                continue
67            elif dat[0].isalpha():
68                if dat.startswith(symb+' '):
69                    raw_basis.append(dat.splitlines()[0])
70                elif raw_basis:
71                    return raw_basis
72                fin.readline()  # line for references
73            elif raw_basis:
74                raw_basis.append(dat.splitlines()[0])
75            dat = fin.readline()
76    return raw_basis
77
78
79def _parse(raw_basis, optimize=True):
80    # pass 1
81    basis_add = []
82    for dat in raw_basis:
83        dat = dat.upper()
84        if dat[0].isalpha():
85            if ' ' not in dat:
86                # Skip the line of comments
87                continue
88            status = dat
89            val = []
90            basis_add.append([status, val])
91        else:
92            val.append(dat)
93    raw_basis = [[k, ' '.join(v)] for k,v in basis_add]
94
95    # pass 2
96    basis_add = []
97    for status, valstring in raw_basis:
98        tmp = status.split(':')
99        key = tmp[0].split()
100        l = MAPSPDF[key[1].upper()]
101        #TODO if key[-1] == 'SV'
102        val = tmp[1].split()
103        np = int(val[0])
104        nc = int(val[1])
105
106        rawd = [float(x) for x in valstring.replace('D','e').split()]
107        if nc == 0:
108            for e in rawd:
109                basis_add.append([l, [e, 1.]])
110        else:
111            exps = numpy.array(rawd[:np])
112            coeff = numpy.zeros((np,nc))
113            p1 = np
114            for i in range(nc):
115                start, end = val[2+i].split('.')
116                start, end = int(start), int(end)
117                nd = end - start + 1
118                p0, p1 = p1, p1 + nd
119                coeff[start-1:end,i] = rawd[p0:p1]
120
121            bval = numpy.hstack((exps[:,None], coeff))
122            basis_add.append([l] + bval.tolist())
123
124    basis_sorted = []
125    for l in range(MAXL):
126        basis_sorted.extend([b for b in basis_add if b[0] == l])
127
128    if optimize:
129        basis_sorted = optimize_contraction(basis_sorted)
130
131    basis_sorted = remove_zero(basis_sorted)
132    return basis_sorted
133
134def parse_ecp(string):
135    ecptxt = []
136    for x in string.splitlines():
137        x = x.strip()
138        if x and x[0] not in COMMENT_KEYWORDS:
139            ecptxt.append(x)
140    return _parse_ecp(ecptxt)
141
142def _parse_ecp(raw_ecp):
143    symb, nelec, nshell, nso = re.split(',|;', raw_ecp[0])[1:5]
144    nelec = int(nelec)
145    nshell = int(nshell)
146    nso = int(nso)
147    assert len(raw_ecp) == (nshell + nso + 2), "ecp info doesn't match with data"
148
149    def parse_terms(terms):
150        r_orders = [[] for i in range(7)]  # up to r^6
151        for term in terms:
152            line = term.split(',')
153            order = int(line[0])
154            try:
155                coef = [float(x) for x in line[1:]]
156            except ValueError:
157                if DISABLE_EVAL:
158                    raise ValueError('Failed to parse ecp %s' % line)
159                else:
160                    coef = list(eval(','.join(line[1:])))
161            r_orders[order].append(coef)
162        return r_orders
163
164    ecp_add = {}
165    ul = [x.strip() for x in raw_ecp[1].replace('D', 'e').split(';') if x.strip()]
166    assert int(ul[0]) + 1 == len(ul), "UL doesn't match data"
167    ecp_add[-1] = parse_terms(ul[1:])
168
169    for i, sf_terms in enumerate(raw_ecp[2:2+nshell]):
170        terms = [x.strip() for x in sf_terms.replace('D', 'e').split(';') if x.strip()]
171        assert int(terms[0]) + 1 == len(terms), \
172                "ECP %s Shell doesn't match data" % SPDF[i]
173        ecp_add[i] = parse_terms(terms[1:])
174
175    if nso > 0:
176        for i, so_terms in enumerate(raw_ecp[2+nshell:]):
177            terms = [x.strip() for x in so_terms.replace('D', 'e').split(';') if x.strip()]
178            assert int(terms[0]) + 1 == len(terms), \
179                    "ECP-SOC Shell %s doesn't match data" % SPDF[i+1]
180            soc_data = parse_terms(terms[1:])
181            for order, coefs in enumerate(soc_data):
182                if not coefs:
183                    continue
184                sf_coefs = ecp_add[i+1][order]
185                assert ([x[0] for x in sf_coefs] == [x[0] for x in coefs]), \
186                        "In ECP Shell %s order %d, SF and SOC do not match" % (SPDF[i+1], order)
187                for j, c in enumerate(coefs):
188                    sf_coefs[j].append(c[1])
189
190    bsort = []
191    for l in range(-1, MAXL):
192        if l in ecp_add:
193            bsort.append([l, ecp_add[l]])
194    return [nelec, bsort]
195
196if __name__ == '__main__':
197    #print(search_seg('minao.libmol', 'C'))
198    print(load('cc_pvdz.libmol', 'C'))
199