1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /** @file basisset.cc
31
32 \brief Code for representing basis set information for Gaussian
33 basis sets, and for parsing a text file specifying such a
34 basisset.
35
36 @author: Elias Rudberg <em>responsible</em>.
37 */
38
39 /* -*-mode:c; indent-tabs-mode: nil -*- */
40 /* basisset.c: provides read_basisset_file() which creates a
41 basisset_struct from a data contained in a file */
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <memory.h>
47 #include <string.h>
48 #include <stdexcept>
49
50 #include "basisset.h"
51 #include "output.h"
52 #include "memorymanag.h"
53
54
basisset_info()55 basisset_info::basisset_info() {
56 atoms.resize(MAX_NO_OF_ATOM_TYPES);
57 clear();
58 }
59
clear()60 void basisset_info::clear() {
61 memset(&atoms[0], 0x00, MAX_NO_OF_ATOM_TYPES*sizeof(basisset_atom_struct));
62 }
63
write_to_buffer(char * dataBuffer,size_t const bufferSize) const64 void basisset_info::write_to_buffer ( char * dataBuffer, size_t const bufferSize ) const {
65 if(bufferSize < get_size())
66 throw std::runtime_error("Error in basisset_info::write_to_buffer: bufferSize too small.");
67 memcpy(dataBuffer, &atoms[0], MAX_NO_OF_ATOM_TYPES*sizeof(basisset_atom_struct));
68 }
69
get_size() const70 size_t basisset_info::get_size() const {
71 return MAX_NO_OF_ATOM_TYPES*sizeof(basisset_atom_struct);
72 }
73
assign_from_buffer(char const * dataBuffer,size_t const bufferSize)74 void basisset_info::assign_from_buffer ( char const * dataBuffer, size_t const bufferSize) {
75 if(bufferSize != get_size())
76 throw std::runtime_error("Error in basisset_info::assign_from_buffer: wrong bufferSize.");
77 memcpy(&atoms[0], dataBuffer, MAX_NO_OF_ATOM_TYPES*sizeof(basisset_atom_struct));
78 }
79
80
81 static void
remove_zeros(basisset_atom_struct * currAtom,int shellBaseIndex,int noOfShellsCurrBatch)82 remove_zeros(basisset_atom_struct* currAtom,
83 int shellBaseIndex, int noOfShellsCurrBatch) {
84 /* Remove zero coefficients. */
85 for(int i = 0; i < noOfShellsCurrBatch; i++) {
86 int count = 0;
87 ergo_real *coeffList = currAtom->shells[shellBaseIndex+i].coeffList;
88 ergo_real *expList = currAtom->shells[shellBaseIndex+i].exponentList;
89 for(int j = 0; j < currAtom->shells[shellBaseIndex+i].contrCount; j++) {
90 ergo_real currCoeff = coeffList[j];
91 ergo_real currExponent = expList[j];
92 if(currCoeff != 0) {
93 coeffList[count] = currCoeff;
94 expList[count] = currExponent;
95 count++;
96 }
97 } /* END FOR j */
98 currAtom->shells[shellBaseIndex+i].contrCount = count;
99 } /* END FOR i */
100 }
101
102 /* read_basisset_file: reads a basis set from fileName. The basis set
103 exponents and contraction coefficients are placed in result. The
104 reading procedure is bit convoluted because the basis set file
105 follows the Fortran syntax, with wrapping and skipping empty
106 elements. We basically need to emulate fortran `format'
107 statement. There is one "but" though: AhlrichsDenFit does not
108 follow the format syntax so it will/may be misread by eg. dalton. What
109 a mess...
110
111 The parser is implemented as a state machine. It still cannot read
112 ANO-type basis sets...
113 */
114 int
read_basisset_file(basisset_info & result,const char * fileName,int dirc,const char * dirv[],int print_raw)115 read_basisset_file(basisset_info & result,
116 const char* fileName,
117 int dirc,
118 const char *dirv[],
119 int print_raw)
120 {
121 enum { END_PARSING, ATOM_EXPECTED, SHELL_EXPECTED,
122 SHELL_OR_ATOM_EXPECTED, CONTRACTION_BLOCK_EXPECTED } state;
123 int uncontracted = 0;
124 char line[512];
125 basisset_atom_struct* currAtom = NULL;
126 int spdf = -1;
127 int shellBaseIndex = -1;
128 int expNo = -1;
129 FILE* f = NULL;
130
131 if(!fileName) {
132 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: fileName == NULL.");
133 return -1;
134 }
135 if(fileName[0] == '/')
136 f = fopen(fileName, "rt");
137 else {
138 for(int i = 0; i < dirc; i++) {
139 char ffname[256];
140 int len = strlen(dirv[i]);
141 strncpy(ffname, dirv[i], sizeof(ffname));
142 if(len>0 && ffname[len-1] != '/')
143 strncat(ffname, "/", sizeof(ffname)-1-len);
144 strncat(ffname, fileName, sizeof(ffname)-2-len);
145 do_output(LOG_CAT_WARNING, LOG_AREA_INTEGRALS,
146 "Trying basis set file '%s'...", ffname);
147 if( (f=fopen(ffname, "rt")) != NULL)
148 break;
149 }
150 }
151
152 if(f == NULL)
153 {
154 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error opening file '%s'", fileName);
155 return -1;
156 }
157
158 /* now create basis set by reading buf2 */
159 result.clear();
160 int noOfAtomTypes = 0;
161 state = ATOM_EXPECTED;
162 int lineNo = 0;
163 int lineConsumed = 1;
164 ergo_real currExponent = 0;
165 /* start global parsing loop */
166 do {
167 int dummy;
168 if(lineConsumed) {
169 for(unsigned int cc = 0; cc < sizeof(line); cc++)
170 line[cc] = '\0';
171 if(fgets(line, sizeof(line), f) == NULL) {
172 state = END_PARSING;
173 break;
174 }
175 // This only works if sizeof(line) is large enough. Here we check that sizeof(line) is really large enough.
176 if(strlen(line) >= sizeof(line)-1) {
177 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "Error in read_basisset_file: (strlen(line) >= sizeof(line)-1). "
178 "This indicates that the basis set file '%s' included a line that was too long, longer than %d characters.", fileName, (int)(sizeof(line)-1));
179 return -1;
180 }
181 lineConsumed = 0;
182 lineNo++;
183 }
184
185 for(int cc = strlen(line)-1; cc>=0 && isspace(line[cc]); cc--)
186 line[cc] = '\0';
187
188 if(line[0] == '$' || line[0] == '!' || line[0] == '#'||
189 line[0] == '*' || line[0] == '\0'|| line[0] == '\n') {
190 lineConsumed = 1; /* skip the comment and move on */
191 continue;
192 }
193 switch(state) {
194 case ATOM_EXPECTED:
195 if(line[0] == 'a' || line[0] == 'A') {
196 noOfAtomTypes++;
197 int atomType = atoi(line+1);
198 if(print_raw)
199 do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS, "Basis set for atom of Z=%d", atomType);
200 state = SHELL_EXPECTED;
201 if(atomType <= 0)
202 {
203 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (atomType <= 0) "
204 " in line %d %s", lineNo, fileName);
205 return -1;
206 }
207 if(atomType >= MAX_NO_OF_ATOM_TYPES)
208 {
209 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
210 "(atomType >= MAX_NO_OF_ATOM_TYPES) in line %d %s",
211 lineNo, fileName);
212 return -1;
213 }
214 currAtom = &result.atoms[atomType];
215 spdf = 0;
216 shellBaseIndex = 0;
217 }
218 lineConsumed = 1;
219 break;
220
221 case SHELL_OR_ATOM_EXPECTED:
222 if(line[0] == 'a' || line[0] == 'A') {
223 state = ATOM_EXPECTED;
224 /* fininalize current atom data */
225 if(currAtom == NULL || shellBaseIndex < 0) {
226 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (currAtom == NULL || shellBaseIndex < 0) in line %d %s", lineNo, fileName);
227 return -1;
228 }
229 currAtom->noOfShells = shellBaseIndex;
230 break;
231 } /* else fall through */
232
233 case SHELL_EXPECTED:
234 if(shellBaseIndex < 0) {
235 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (shellBaseIndex < 0) in line %d %s", lineNo, fileName);
236 return -1;
237 }
238 int noOfExponents, noOfShellsCurrBatch;
239 if(sscanf(line, "%d %d %d",
240 &noOfExponents, &noOfShellsCurrBatch, &dummy ) != 3)
241 {
242 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
243 "Shell data expected in line %d:\n%s", lineNo, line);
244 return -1;
245 }
246 if(noOfExponents <= 0)
247 {
248 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
249 "(noOfExponents <= 0) in line %d %s", lineNo, fileName);
250 return -1;
251 }
252 if(noOfExponents >= MAX_NO_OF_CONTR)
253 {
254 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
255 "(noOfExponents >= MAX_NO_OF_CONTR) in line %d",
256 lineNo);
257 return -1;
258 }
259 if(noOfShellsCurrBatch < 0)
260 {
261 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
262 "(noOfShellsCurrBatch < 0) in line %d", lineNo);
263 return -1;
264 }
265 if(noOfShellsCurrBatch == 0) {
266 /* special case: uncontracted, expect only one column */
267 noOfShellsCurrBatch = noOfExponents;
268 uncontracted = 1;
269 } else uncontracted = 0;
270
271 if(shellBaseIndex + noOfShellsCurrBatch >= MAX_NO_OF_SHELLS_PER_ATOM)
272 {
273 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: too many shells.");
274 return -1;
275 }
276 /* initialize shell data. Set the contraction count to its upper
277 limit. remove_zeros() will later check for a better, lower
278 value. */
279 for(int i = 0; i < noOfShellsCurrBatch; i++) {
280 if(currAtom == NULL || spdf < 0) {
281 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (currAtom == NULL || spdf < 0) in line %d %s", lineNo, fileName);
282 return -1;
283 }
284 currAtom->shells[shellBaseIndex+i].type = spdf;
285 currAtom->shells[shellBaseIndex+i].contrCount = noOfExponents;
286 }
287 expNo = 0;
288 state = CONTRACTION_BLOCK_EXPECTED;
289 if(print_raw)
290 do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS,
291 "Block for L=%d primitives: %d contracted: %d",
292 spdf, noOfExponents, noOfShellsCurrBatch);
293
294 lineConsumed = 1;
295 break;
296
297 case CONTRACTION_BLOCK_EXPECTED:
298 currExponent = atof(line);
299 if(currExponent <= 0) {
300 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error: (currExponent <= 0) in line %d", lineNo);
301 return -1;
302 }
303 if(currAtom == NULL || shellBaseIndex < 0 || expNo < 0) {
304 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (currAtom == NULL || shellBaseIndex < 0 || expNo < 0) in line %d %s", lineNo, fileName);
305 return -1;
306 }
307 if(uncontracted) {
308 for(int i = 0; i < noOfShellsCurrBatch; i++) {
309 currAtom->shells[shellBaseIndex+i].exponentList[expNo] =
310 currExponent;
311 currAtom->shells[shellBaseIndex+i].coeffList[expNo] =
312 i == expNo ? 1.0 : 0.0;
313 }
314 } else {
315 int idx = 0;
316 /* skip exponent */
317 while(line[idx] && isspace(line[idx])) idx++;
318 for(int i = 0; i < noOfShellsCurrBatch; i++) {
319 currAtom->shells[shellBaseIndex+i].exponentList[expNo] =
320 currExponent;
321 while(line[idx] && !isspace(line[idx])) idx++;
322 while(line[idx] && isspace(line[idx])) idx++;
323 if( !line[idx] ) {
324 /* Second line begins when we are about to read 7th
325 contraction coefficient (i=6), third line for 14th
326 (i=13), fourth for i=20 etc. If this pattern does not
327 match, warn the user. */
328 if( i != 6 && (i+1) % 7 != 0 )
329 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "WARN: line %d has trailing data: '%s'"
330 "non-conformant basis set file for i=%d.",
331 lineNo, line + idx, i);
332 if(fgets(line, sizeof(line), f) == NULL) {
333 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "reading error when continuing shell data.");
334 return -1;
335 }
336 lineNo++;
337 idx = 0;
338 while(line[idx] && isspace(line[idx])) idx++;
339 }
340 currAtom->shells[shellBaseIndex+i].coeffList[expNo] =
341 atof(line + idx);
342 } /* END FOR i */
343 }
344 if(print_raw) {
345 char line[256], eee[20];
346 line[0] = '\0';
347 for(int i = 0; i<noOfShellsCurrBatch; i++) {
348 sprintf(eee, "%10.5f",
349 (double)currAtom->shells[shellBaseIndex+i].coeffList[expNo]);
350 strcat(line, eee);
351 }
352 do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS,
353 "%d %12.6f: %s", expNo, (double)currExponent, line);
354 }
355 if(++expNo == noOfExponents) {
356 remove_zeros(currAtom, shellBaseIndex, noOfShellsCurrBatch);
357 shellBaseIndex += noOfShellsCurrBatch;
358 spdf++;
359 state = SHELL_OR_ATOM_EXPECTED;
360 }
361 lineConsumed = 1;
362 break;
363 case END_PARSING:
364 /* do nothing */
365 break;
366 }
367 } while(state != END_PARSING);
368 fclose(f);
369
370 /* fininalize current atom data */
371 if(currAtom == NULL || shellBaseIndex < 0) {
372 do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (currAtom == NULL || shellBaseIndex < 0) in line %d %s", lineNo, fileName);
373 return -1;
374 }
375 currAtom->noOfShells = shellBaseIndex;
376
377 /* Postprocessing... */
378 /* set shell ID for each shell in basis set */
379 int currShellID = 0;
380 for(int i = 0; i < MAX_NO_OF_ATOM_TYPES; i++) {
381 int noOfShells = result.atoms[i].noOfShells;
382 for(int j = 0; j < noOfShells; j++) {
383 basisset_shell_struct* currShell = &result.atoms[i].shells[j];
384 currShellID++;
385 currShell->shell_ID = currShellID;
386 } /* END FOR j */
387 } /* END FOR i */
388 do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS, "total number of shells in basis set: %i", currShellID);
389 do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS, "Basis set file '%s' processed OK, noOfAtomTypes = %i",
390 fileName, noOfAtomTypes);
391
392 return 0;
393 }
394