1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30 /** @file basisset.cc
31 
32     \brief Code for representing basis set information for Gaussian
33     basis sets, and for parsing a text file specifying such a
34     basisset.
35 
36     @author: Elias Rudberg <em>responsible</em>.
37 */
38 
39 /* -*-mode:c; indent-tabs-mode: nil -*- */
40 /* basisset.c: provides read_basisset_file() which creates a
41    basisset_struct from a data contained in a file */
42 
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <memory.h>
47 #include <string.h>
48 #include <stdexcept>
49 
50 #include "basisset.h"
51 #include "output.h"
52 #include "memorymanag.h"
53 
54 
basisset_info()55 basisset_info::basisset_info() {
56   atoms.resize(MAX_NO_OF_ATOM_TYPES);
57   clear();
58 }
59 
clear()60 void basisset_info::clear() {
61   memset(&atoms[0], 0x00, MAX_NO_OF_ATOM_TYPES*sizeof(basisset_atom_struct));
62 }
63 
write_to_buffer(char * dataBuffer,size_t const bufferSize) const64 void basisset_info::write_to_buffer ( char * dataBuffer, size_t const bufferSize ) const {
65   if(bufferSize < get_size())
66     throw std::runtime_error("Error in basisset_info::write_to_buffer: bufferSize too small.");
67   memcpy(dataBuffer, &atoms[0], MAX_NO_OF_ATOM_TYPES*sizeof(basisset_atom_struct));
68 }
69 
get_size() const70 size_t basisset_info::get_size() const {
71   return MAX_NO_OF_ATOM_TYPES*sizeof(basisset_atom_struct);
72 }
73 
assign_from_buffer(char const * dataBuffer,size_t const bufferSize)74 void basisset_info::assign_from_buffer ( char const * dataBuffer, size_t const bufferSize) {
75   if(bufferSize != get_size())
76     throw std::runtime_error("Error in basisset_info::assign_from_buffer: wrong bufferSize.");
77   memcpy(&atoms[0], dataBuffer, MAX_NO_OF_ATOM_TYPES*sizeof(basisset_atom_struct));
78 }
79 
80 
81 static void
remove_zeros(basisset_atom_struct * currAtom,int shellBaseIndex,int noOfShellsCurrBatch)82 remove_zeros(basisset_atom_struct* currAtom,
83              int shellBaseIndex, int noOfShellsCurrBatch) {
84   /*  Remove zero coefficients. */
85   for(int i = 0; i < noOfShellsCurrBatch; i++) {
86     int count = 0;
87     ergo_real *coeffList = currAtom->shells[shellBaseIndex+i].coeffList;
88     ergo_real *expList   = currAtom->shells[shellBaseIndex+i].exponentList;
89     for(int j = 0; j < currAtom->shells[shellBaseIndex+i].contrCount; j++) {
90       ergo_real currCoeff    = coeffList[j];
91       ergo_real currExponent = expList[j];
92       if(currCoeff != 0) {
93 	coeffList[count] = currCoeff;
94 	expList[count] = currExponent;
95 	count++;
96       }
97     } /*  END FOR j */
98     currAtom->shells[shellBaseIndex+i].contrCount = count;
99   } /*  END FOR i         */
100 }
101 
102 /* read_basisset_file: reads a basis set from fileName. The basis set
103    exponents and contraction coefficients are placed in result.  The
104    reading procedure is bit convoluted because the basis set file
105    follows the Fortran syntax, with wrapping and skipping empty
106    elements. We basically need to emulate fortran `format'
107    statement. There is one "but" though: AhlrichsDenFit does not
108    follow the format syntax so it will/may be misread by eg. dalton. What
109    a mess...
110 
111    The parser is implemented as a state machine. It still cannot read
112    ANO-type basis sets...
113 */
114 int
read_basisset_file(basisset_info & result,const char * fileName,int dirc,const char * dirv[],int print_raw)115 read_basisset_file(basisset_info & result,
116 		   const char* fileName,
117                    int dirc,
118 		   const char *dirv[],
119                    int print_raw)
120 {
121   enum { END_PARSING, ATOM_EXPECTED, SHELL_EXPECTED,
122          SHELL_OR_ATOM_EXPECTED, CONTRACTION_BLOCK_EXPECTED } state;
123   int uncontracted = 0;
124   char line[512];
125   basisset_atom_struct* currAtom = NULL;
126   int spdf = -1;
127   int shellBaseIndex = -1;
128   int expNo = -1;
129   FILE* f = NULL;
130 
131   if(!fileName) {
132     do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: fileName == NULL.");
133     return -1;
134   }
135   if(fileName[0] == '/')
136     f = fopen(fileName, "rt");
137   else {
138     for(int i = 0; i < dirc; i++) {
139       char ffname[256];
140       int len = strlen(dirv[i]);
141       strncpy(ffname, dirv[i], sizeof(ffname));
142       if(len>0 && ffname[len-1] != '/')
143         strncat(ffname, "/", sizeof(ffname)-1-len);
144       strncat(ffname, fileName, sizeof(ffname)-2-len);
145       do_output(LOG_CAT_WARNING, LOG_AREA_INTEGRALS,
146                 "Trying basis set file '%s'...", ffname);
147       if( (f=fopen(ffname, "rt")) != NULL)
148         break;
149     }
150   }
151 
152   if(f == NULL)
153     {
154       do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error opening file '%s'", fileName);
155       return -1;
156     }
157 
158   /*  now create basis set by reading buf2 */
159   result.clear();
160   int noOfAtomTypes = 0;
161   state = ATOM_EXPECTED;
162   int lineNo = 0;
163   int lineConsumed = 1;
164   ergo_real currExponent = 0;
165   /* start global parsing loop */
166   do {
167     int dummy;
168     if(lineConsumed) {
169       for(unsigned int cc = 0; cc < sizeof(line); cc++)
170 	line[cc] = '\0';
171       if(fgets(line, sizeof(line), f) == NULL) {
172         state = END_PARSING;
173         break;
174       }
175       // This only works if sizeof(line) is large enough. Here we check that sizeof(line) is really large enough.
176       if(strlen(line) >= sizeof(line)-1) {
177 	do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "Error in read_basisset_file: (strlen(line) >= sizeof(line)-1). "
178 		  "This indicates that the basis set file '%s' included a line that was too long, longer than %d characters.", fileName, (int)(sizeof(line)-1));
179 	return -1;
180       }
181       lineConsumed = 0;
182       lineNo++;
183     }
184 
185     for(int cc = strlen(line)-1; cc>=0 && isspace(line[cc]); cc--)
186       line[cc] = '\0';
187 
188     if(line[0] == '$' || line[0] == '!' || line[0] == '#'||
189        line[0] == '*' || line[0] == '\0'|| line[0] == '\n') {
190       lineConsumed = 1; /* skip the comment and move on */
191       continue;
192     }
193     switch(state) {
194     case ATOM_EXPECTED:
195       if(line[0] == 'a' || line[0] == 'A') {
196         noOfAtomTypes++;
197         int atomType = atoi(line+1);
198         if(print_raw)
199           do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS, "Basis set for atom of Z=%d", atomType);
200         state = SHELL_EXPECTED;
201         if(atomType <= 0)
202           {
203             do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (atomType <= 0) "
204                       " in line %d %s", lineNo, fileName);
205             return -1;
206           }
207         if(atomType >= MAX_NO_OF_ATOM_TYPES)
208           {
209             do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
210                       "(atomType >= MAX_NO_OF_ATOM_TYPES) in line %d %s",
211                       lineNo, fileName);
212             return -1;
213           }
214         currAtom = &result.atoms[atomType];
215         spdf = 0;
216         shellBaseIndex = 0;
217       }
218       lineConsumed = 1;
219       break;
220 
221     case SHELL_OR_ATOM_EXPECTED:
222       if(line[0] == 'a' || line[0] == 'A') {
223         state = ATOM_EXPECTED;
224         /* fininalize current atom data */
225         if(currAtom == NULL || shellBaseIndex < 0) {
226 	  do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (currAtom == NULL || shellBaseIndex < 0) in line %d %s", lineNo, fileName);
227           return -1;
228 	}
229         currAtom->noOfShells = shellBaseIndex;
230         break;
231       } /* else fall through */
232 
233     case SHELL_EXPECTED:
234       if(shellBaseIndex < 0) {
235 	do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (shellBaseIndex < 0) in line %d %s", lineNo, fileName);
236         return -1;
237       }
238       int noOfExponents, noOfShellsCurrBatch;
239       if(sscanf(line, "%d %d %d",
240                 &noOfExponents, &noOfShellsCurrBatch, &dummy ) != 3)
241         {
242           do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
243                     "Shell data expected in line %d:\n%s", lineNo, line);
244           return -1;
245         }
246       if(noOfExponents <= 0)
247         {
248           do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
249                     "(noOfExponents <= 0) in line %d %s", lineNo, fileName);
250           return -1;
251         }
252       if(noOfExponents >= MAX_NO_OF_CONTR)
253         {
254           do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
255                     "(noOfExponents >= MAX_NO_OF_CONTR) in line %d",
256                     lineNo);
257           return -1;
258         }
259       if(noOfShellsCurrBatch < 0)
260         {
261           do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: "
262                     "(noOfShellsCurrBatch < 0) in line %d", lineNo);
263           return -1;
264         }
265       if(noOfShellsCurrBatch == 0) {
266         /*  special case: uncontracted, expect only one column */
267         noOfShellsCurrBatch = noOfExponents;
268         uncontracted = 1;
269       } else uncontracted = 0;
270 
271       if(shellBaseIndex + noOfShellsCurrBatch >= MAX_NO_OF_SHELLS_PER_ATOM)
272         {
273           do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: too many shells.");
274           return -1;
275         }
276       /* initialize shell data. Set the contraction count to its upper
277          limit. remove_zeros() will later check for a better, lower
278          value. */
279       for(int i = 0; i < noOfShellsCurrBatch; i++) {
280 	if(currAtom == NULL || spdf < 0) {
281 	  do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (currAtom == NULL || spdf < 0) in line %d %s", lineNo, fileName);
282 	  return -1;
283 	}
284 	currAtom->shells[shellBaseIndex+i].type = spdf;
285 	currAtom->shells[shellBaseIndex+i].contrCount = noOfExponents;
286       }
287       expNo = 0;
288       state = CONTRACTION_BLOCK_EXPECTED;
289       if(print_raw)
290         do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS,
291                   "Block for L=%d primitives: %d contracted: %d",
292                   spdf, noOfExponents, noOfShellsCurrBatch);
293 
294       lineConsumed = 1;
295       break;
296 
297     case CONTRACTION_BLOCK_EXPECTED:
298       currExponent = atof(line);
299       if(currExponent <= 0) {
300 	do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error: (currExponent <= 0) in line %d", lineNo);
301 	return -1;
302       }
303       if(currAtom == NULL || shellBaseIndex < 0 || expNo < 0) {
304 	do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (currAtom == NULL || shellBaseIndex < 0 || expNo < 0) in line %d %s", lineNo, fileName);
305         return -1;
306       }
307       if(uncontracted) {
308         for(int i = 0; i < noOfShellsCurrBatch; i++) {
309           currAtom->shells[shellBaseIndex+i].exponentList[expNo] =
310             currExponent;
311           currAtom->shells[shellBaseIndex+i].coeffList[expNo] =
312             i == expNo ? 1.0 : 0.0;
313         }
314       } else {
315         int idx = 0;
316         /* skip exponent */
317         while(line[idx] && isspace(line[idx]))  idx++;
318         for(int i = 0; i < noOfShellsCurrBatch; i++) {
319           currAtom->shells[shellBaseIndex+i].exponentList[expNo] =
320             currExponent;
321           while(line[idx] && !isspace(line[idx])) idx++;
322           while(line[idx] && isspace(line[idx]))  idx++;
323           if( !line[idx] ) {
324 	    /* Second line begins when we are about to read 7th
325 	       contraction coefficient (i=6), third line for 14th
326 	       (i=13), fourth for i=20 etc. If this pattern does not
327 	       match, warn the user. */
328 	    if( i != 6 && (i+1) % 7 != 0 )
329 	      do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "WARN: line %d has trailing data: '%s'"
330 			"non-conformant basis set file for i=%d.",
331 			lineNo, line + idx, i);
332 	    if(fgets(line, sizeof(line), f) == NULL) {
333 	      do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "reading error when continuing shell data.");
334 	      return -1;
335 	    }
336 	    lineNo++;
337 	    idx = 0;
338 	    while(line[idx] && isspace(line[idx]))  idx++;
339 	  }
340           currAtom->shells[shellBaseIndex+i].coeffList[expNo] =
341             atof(line + idx);
342         }  /*  END FOR i */
343       }
344       if(print_raw) {
345         char line[256], eee[20];
346         line[0] = '\0';
347         for(int i = 0; i<noOfShellsCurrBatch; i++) {
348           sprintf(eee, "%10.5f",
349                   (double)currAtom->shells[shellBaseIndex+i].coeffList[expNo]);
350           strcat(line, eee);
351         }
352         do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS,
353                   "%d %12.6f: %s", expNo, (double)currExponent, line);
354       }
355       if(++expNo == noOfExponents) {
356         remove_zeros(currAtom, shellBaseIndex, noOfShellsCurrBatch);
357         shellBaseIndex += noOfShellsCurrBatch;
358         spdf++;
359         state = SHELL_OR_ATOM_EXPECTED;
360       }
361       lineConsumed = 1;
362       break;
363     case END_PARSING:
364       /*  do nothing */
365       break;
366     }
367   } while(state != END_PARSING);
368   fclose(f);
369 
370   /* fininalize current atom data */
371   if(currAtom == NULL || shellBaseIndex < 0) {
372     do_output(LOG_CAT_ERROR, LOG_AREA_INTEGRALS, "error in read_basisset_file: (currAtom == NULL || shellBaseIndex < 0) in line %d %s", lineNo, fileName);
373     return -1;
374   }
375   currAtom->noOfShells = shellBaseIndex;
376 
377   /*  Postprocessing... */
378   /*  set shell ID for each shell in basis set */
379   int currShellID = 0;
380   for(int i = 0; i < MAX_NO_OF_ATOM_TYPES; i++) {
381     int noOfShells = result.atoms[i].noOfShells;
382     for(int j = 0; j < noOfShells; j++) {
383       basisset_shell_struct* currShell = &result.atoms[i].shells[j];
384       currShellID++;
385       currShell->shell_ID = currShellID;
386     } /*  END FOR j */
387   } /*  END FOR i */
388   do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS, "total number of shells in basis set: %i", currShellID);
389   do_output(LOG_CAT_INFO, LOG_AREA_INTEGRALS, "Basis set file '%s' processed OK, noOfAtomTypes = %i",
390             fileName, noOfAtomTypes);
391 
392   return 0;
393 }
394