1#!/usr/bin/env python 2# Copyright 2014-2020 The PySCF Developers. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16# Author: Qiming Sun <osirpt.sun@gmail.com> 17# 18 19''' 20Parses for basis set in the Molpro format 21''' 22 23__all__ = ['parse', 'load'] 24 25import re 26import numpy 27 28try: 29 from pyscf.gto.basis.parse_nwchem import optimize_contraction 30 from pyscf.gto.basis.parse_nwchem import remove_zero 31except ImportError: 32 optimize_contraction = lambda basis: basis 33 remove_zero = lambda basis: basis 34 35from pyscf import __config__ 36DISABLE_EVAL = getattr(__config__, 'DISABLE_EVAL', False) 37 38MAXL = 12 39SPDF = 'SPDFGHIKLMNO' 40MAPSPDF = {key: l for l, key in enumerate(SPDF)} 41COMMENT_KEYWORDS = '!*#' 42 43# parse the basis text which is in Molpro format, return an internal basis 44# format which can be assigned to gto.mole.basis 45def parse(string, optimize=True): 46 raw_basis = [] 47 for x in string.splitlines(): 48 x = x.strip() 49 if x and x[0] not in COMMENT_KEYWORDS: 50 raw_basis.append(x) 51 return _parse(raw_basis, optimize) 52 53def load(basisfile, symb, optimize=True): 54 raw_basis = search_seg(basisfile, symb) 55 #if not raw_basis: 56 # raise BasisNotFoundError('Basis not found for %s in %s' % (symb, basisfile)) 57 return _parse(raw_basis, optimize) 58 59def search_seg(basisfile, symb): 60 raw_basis = [] 61 with open(basisfile, 'r') as fin: 62 dat = fin.readline() 63 while dat: 64 if dat[0] in COMMENT_KEYWORDS: 65 dat = fin.readline() 66 continue 67 elif dat[0].isalpha(): 68 if dat.startswith(symb+' '): 69 raw_basis.append(dat.splitlines()[0]) 70 elif raw_basis: 71 return raw_basis 72 fin.readline() # line for references 73 elif raw_basis: 74 raw_basis.append(dat.splitlines()[0]) 75 dat = fin.readline() 76 return raw_basis 77 78 79def _parse(raw_basis, optimize=True): 80 # pass 1 81 basis_add = [] 82 for dat in raw_basis: 83 dat = dat.upper() 84 if dat[0].isalpha(): 85 if ' ' not in dat: 86 # Skip the line of comments 87 continue 88 status = dat 89 val = [] 90 basis_add.append([status, val]) 91 else: 92 val.append(dat) 93 raw_basis = [[k, ' '.join(v)] for k,v in basis_add] 94 95 # pass 2 96 basis_add = [] 97 for status, valstring in raw_basis: 98 tmp = status.split(':') 99 key = tmp[0].split() 100 l = MAPSPDF[key[1].upper()] 101 #TODO if key[-1] == 'SV' 102 val = tmp[1].split() 103 np = int(val[0]) 104 nc = int(val[1]) 105 106 rawd = [float(x) for x in valstring.replace('D','e').split()] 107 if nc == 0: 108 for e in rawd: 109 basis_add.append([l, [e, 1.]]) 110 else: 111 exps = numpy.array(rawd[:np]) 112 coeff = numpy.zeros((np,nc)) 113 p1 = np 114 for i in range(nc): 115 start, end = val[2+i].split('.') 116 start, end = int(start), int(end) 117 nd = end - start + 1 118 p0, p1 = p1, p1 + nd 119 coeff[start-1:end,i] = rawd[p0:p1] 120 121 bval = numpy.hstack((exps[:,None], coeff)) 122 basis_add.append([l] + bval.tolist()) 123 124 basis_sorted = [] 125 for l in range(MAXL): 126 basis_sorted.extend([b for b in basis_add if b[0] == l]) 127 128 if optimize: 129 basis_sorted = optimize_contraction(basis_sorted) 130 131 basis_sorted = remove_zero(basis_sorted) 132 return basis_sorted 133 134def parse_ecp(string): 135 ecptxt = [] 136 for x in string.splitlines(): 137 x = x.strip() 138 if x and x[0] not in COMMENT_KEYWORDS: 139 ecptxt.append(x) 140 return _parse_ecp(ecptxt) 141 142def _parse_ecp(raw_ecp): 143 symb, nelec, nshell, nso = re.split(',|;', raw_ecp[0])[1:5] 144 nelec = int(nelec) 145 nshell = int(nshell) 146 nso = int(nso) 147 assert len(raw_ecp) == (nshell + nso + 2), "ecp info doesn't match with data" 148 149 def parse_terms(terms): 150 r_orders = [[] for i in range(7)] # up to r^6 151 for term in terms: 152 line = term.split(',') 153 order = int(line[0]) 154 try: 155 coef = [float(x) for x in line[1:]] 156 except ValueError: 157 if DISABLE_EVAL: 158 raise ValueError('Failed to parse ecp %s' % line) 159 else: 160 coef = list(eval(','.join(line[1:]))) 161 r_orders[order].append(coef) 162 return r_orders 163 164 ecp_add = {} 165 ul = [x.strip() for x in raw_ecp[1].replace('D', 'e').split(';') if x.strip()] 166 assert int(ul[0]) + 1 == len(ul), "UL doesn't match data" 167 ecp_add[-1] = parse_terms(ul[1:]) 168 169 for i, sf_terms in enumerate(raw_ecp[2:2+nshell]): 170 terms = [x.strip() for x in sf_terms.replace('D', 'e').split(';') if x.strip()] 171 assert int(terms[0]) + 1 == len(terms), \ 172 "ECP %s Shell doesn't match data" % SPDF[i] 173 ecp_add[i] = parse_terms(terms[1:]) 174 175 if nso > 0: 176 for i, so_terms in enumerate(raw_ecp[2+nshell:]): 177 terms = [x.strip() for x in so_terms.replace('D', 'e').split(';') if x.strip()] 178 assert int(terms[0]) + 1 == len(terms), \ 179 "ECP-SOC Shell %s doesn't match data" % SPDF[i+1] 180 soc_data = parse_terms(terms[1:]) 181 for order, coefs in enumerate(soc_data): 182 if not coefs: 183 continue 184 sf_coefs = ecp_add[i+1][order] 185 assert ([x[0] for x in sf_coefs] == [x[0] for x in coefs]), \ 186 "In ECP Shell %s order %d, SF and SOC do not match" % (SPDF[i+1], order) 187 for j, c in enumerate(coefs): 188 sf_coefs[j].append(c[1]) 189 190 bsort = [] 191 for l in range(-1, MAXL): 192 if l in ecp_add: 193 bsort.append([l, ecp_add[l]]) 194 return [nelec, bsort] 195 196if __name__ == '__main__': 197 #print(search_seg('minao.libmol', 'C')) 198 print(load('cc_pvdz.libmol', 'C')) 199