1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30 /** @file convert_basis_set_file.cc
31 
32     \brief Program that can be used to convert a file downloaded from
33     the EMSL Basis Set Library (in SuperMolecule format) to the format
34     expected by the Ergo program.
35 
36     @author: Elias Rudberg <em>responsible</em>
37 */
38 
39 #include <cstdio>
40 #include <cstdlib>
41 #include <vector>
42 #include <cstring>
43 #include <string>
44 #include <cassert>
45 
getChargeForAtomName(const std::string & atomName)46 static int getChargeForAtomName(const std::string & atomName) {
47   if(atomName == "HYDROGEN") return 1;
48   if(atomName == "HELIUM") return 2;
49   if(atomName == "LITHIUM") return 3;
50   if(atomName == "BERYLLIUM") return 4;
51   if(atomName == "BORON") return 5;
52   if(atomName == "CARBON") return 6;
53   if(atomName == "NITROGEN") return 7;
54   if(atomName == "OXYGEN") return 8;
55   if(atomName == "FLUORINE") return 9;
56   if(atomName == "NEON") return 10;
57   if(atomName == "SODIUM") return 11;
58   if(atomName == "MAGNESIUM") return 12;
59   if(atomName == "ALUMINUM") return 13;
60   if(atomName == "SILICON") return 14;
61   if(atomName == "PHOSPHOROUS") return 15;
62   if(atomName == "SULFUR") return 16;
63   if(atomName == "CHLORINE") return 17;
64   if(atomName == "ARGON") return 18;
65   if(atomName == "POTASSIUM") return 19;
66   if(atomName == "CALCIUM") return 20;
67   if(atomName == "SCANDIUM") return 21;
68   if(atomName == "TITANIUM") return 22;
69   if(atomName == "VANADIUM") return 23;
70   if(atomName == "CHROMIUM") return 24;
71   if(atomName == "MANGANESE") return 25;
72   if(atomName == "IRON") return 26;
73   if(atomName == "COBALT") return 27;
74   if(atomName == "NICKEL") return 28;
75   if(atomName == "COPPER") return 29;
76   if(atomName == "ZINC") return 30;
77   if(atomName == "GALLIUM") return 31;
78   if(atomName == "GERMANIUM") return 32;
79   if(atomName == "ARSENIC") return 33;
80   if(atomName == "SELENIUM") return 34;
81   if(atomName == "BROMINE") return 35;
82   if(atomName == "KRYPTON") return 36;
83   return -1;
84 }
85 
getSecondWordFromLine(const std::string & str)86 static std::string getSecondWordFromLine(const std::string & str) {
87   int len = str.length();
88   char s[len+1];
89   strcpy(s, str.c_str());
90   assert(len > 3);
91   assert(s[0] == '$');
92   assert(s[1] == ' ');
93   int idx = 2;
94   assert(s[idx] != ' ');
95   while(idx < len) {
96     if(s[idx] == ' ')
97       break;
98     idx++;
99   }
100   int nChars = idx - 2;
101   char ss[nChars+1];
102   memcpy(ss, &s[2], nChars);
103   ss[nChars] = '\0';
104   std::string resultStr = ss;
105   return resultStr;
106 }
107 
is_digit(char c)108 static bool is_digit(char c) {
109   if(c >= '0' && c <= '9')
110     return true;
111   return false;
112 }
113 
checkIfLineHasThreeNumbers(const std::string & str)114 static bool checkIfLineHasThreeNumbers(const std::string & str) {
115   int len = str.length();
116   char s[len+1];
117   strcpy(s, str.c_str());
118   int nDigitsFound = 0;
119   int idx = 0;
120   while(idx < len) {
121     if(s[idx] == ' ') {
122       idx++;
123       continue;
124     }
125     if(is_digit(s[idx])) {
126       // Digit found. Check how many digits follow.
127       int nDigits = 1;
128       for(int k = 1; k < len; k++) {
129 	if(is_digit(s[idx+k]))
130 	  nDigits++;
131 	else
132 	  break;
133       }
134       idx += nDigits;
135       nDigitsFound++;
136     }
137     else
138       return false;
139   }
140   if(nDigitsFound == 3)
141     return true;
142   return false;
143 }
144 
main(int argc,char * argv[])145 int main(int argc, char* argv[])
146 {
147   printf("convert_basis_set_file 1.0\n");
148   printf("Written by Elias Rudberg\n");
149   printf("Source modified on Mon  9 Nov 13:22:39 CET 2015\n");
150   if(argc != 3) {
151     printf("usage: convert_basis_set_file infile outfile\n");
152     return -1;
153   }
154 
155   const char* inFileName = argv[1];
156   const char* outFileName = argv[2];
157 
158   printf("inFileName = '%s', outFileName = '%s'\n", inFileName, outFileName);
159 
160   FILE* inFile = fopen(inFileName, "rb");
161   if(!inFile) {
162     printf("Error opening inFile '%s' for reading.\n", inFileName);
163     return -1;
164   }
165   FILE* outFile = fopen(outFileName, "wb");
166   if(!outFile) {
167     printf("Error opening outFile '%s' for writing.\n", outFileName);
168     return -1;
169   }
170 
171   const int MAXFILESIZE = 8888888;
172   std::vector<char> buf(MAXFILESIZE);
173   memset(&buf[0], 0x00, MAXFILESIZE);
174 
175   if(fread(&buf[0], 1, MAXFILESIZE, inFile) <= 0) {
176     printf("Error reading inFile\n");
177     return -1;
178   }
179   if(buf[MAXFILESIZE-1] != '\0') {
180     printf("Error: zero not found at end of buffer. File too large?\n");
181     return -1;
182   }
183 
184   // Count number of lines in file
185   int nLines = 0;
186   for(int i = 0; i < MAXFILESIZE; i++) {
187     if(buf[i] == '\n')
188       nLines++;
189   }
190   nLines++;
191 
192   std::vector<std::string> lines(nLines);
193 
194   const char* p = &buf[0];
195   int lineCount = 0;
196   while(*p != '\0') {
197     // Find end of line
198     const char* q = p;
199     while(*q != '\n' && *q != '\0')
200       q++;
201     int nChars = q - p;
202     char lineStr[nChars+1];
203     memcpy(lineStr, p, nChars);
204     lineStr[nChars] = '\0';
205     lines[lineCount] = lineStr;
206     lineCount++;
207     p = q;
208     if(*q == '\n')
209       p++;
210   }
211   printf("lineCount = %d\n", lineCount);
212 
213   const std::string str_s = "$ S-TYPE FUNCTIONS";
214   const std::string str_p = "$ P-TYPE FUNCTIONS";
215   const std::string str_d = "$ D-TYPE FUNCTIONS";
216   const std::string str_f = "$ F-TYPE FUNCTIONS";
217   const std::string str_g = "$ G-TYPE FUNCTIONS";
218   const std::string str_h = "$ H-TYPE FUNCTIONS";
219   const std::string str_i = "$ I-TYPE FUNCTIONS";
220 
221   // Check how many atom types there are
222   int nAtomTypes = 0;
223   for(int i = 0; i < lineCount; i++) {
224     std::string & currLine = lines[i];
225     if(currLine == str_s)
226       nAtomTypes++;
227   }
228   printf("nAtomTypes = %d\n", nAtomTypes);
229   assert(nAtomTypes >= 1);
230 
231   std::vector<std::string> linesToInsert(nAtomTypes);
232   int linesToInsertCount = 0;
233 
234   // OK, now we have extracted the lines.
235   // Look for lines containing three integer numbers.
236   int lineIdx = 0;
237   while(lineIdx < lineCount) {
238     std::string & currLine = lines[lineIdx];
239     if(checkIfLineHasThreeNumbers(currLine)) {
240       // Now previous line must be one of the following strings:
241       assert(lineIdx > 5);
242       std::string & prevLine = lines[lineIdx-1];
243       if(prevLine != str_s &&
244 	 prevLine != str_p &&
245 	 prevLine != str_d &&
246 	 prevLine != str_f &&
247 	 prevLine != str_g &&
248 	 prevLine != str_h &&
249 	 prevLine != str_i) {
250 	printf("ERROR: string like 'X-TYPE FUNCTIONS' not found where expected.\n");
251 	return -1;
252       }
253       if(prevLine == str_s) {
254 	// Now we found a place where info about nuclear charge should be inserted.
255 	std::string & prevLine2 = lines[lineIdx-2];
256 	std::string atomName = getSecondWordFromLine(prevLine2);
257 	int charge = getChargeForAtomName(atomName);
258 	if(charge <= 0) {
259 	  printf("ERROR: getChargeForAtomName failed for atomName = '%s'\n", atomName.c_str());
260 	  return -1;
261 	}
262 	printf("atomName = '%s', charge = %d\n", atomName.c_str(), charge);
263 	char s[88];
264 	sprintf(s, "a %d", charge);
265 	std::string lineToInsert = s;
266 	linesToInsert[linesToInsertCount] = lineToInsert;
267 	linesToInsertCount++;
268       }
269     }
270     lineIdx++;
271   }
272   assert(linesToInsertCount == nAtomTypes);
273 
274   int nLinesFinal = nLines + nAtomTypes;
275   std::vector<std::string> linesFinal(nLinesFinal);
276   // No go through all lines again, creating linesFinal.
277   lineIdx = 0;
278   int lineIdx2 = 0;
279   int atomTypeCounter = 0;
280   while(lineIdx < lineCount) {
281     // Find next str_s line
282     int foundIdx = -1;
283     for(int idxTmp = lineIdx; idxTmp < lineCount; idxTmp++) {
284       if(lines[idxTmp] == str_s) {
285 	foundIdx = idxTmp;
286 	break;
287       }
288     }
289     assert(foundIdx >= 0);
290     while(lineIdx < foundIdx)
291       linesFinal[lineIdx2++] = lines[lineIdx++];
292     linesFinal[lineIdx2++] = linesToInsert[atomTypeCounter++];
293     linesFinal[lineIdx2++] = lines[lineIdx++];
294     if(atomTypeCounter == nAtomTypes) {
295       while(lineIdx < lineCount)
296 	linesFinal[lineIdx2++] = lines[lineIdx++];
297     }
298   }
299 
300   for(int i = 0; i < nLinesFinal; i++)
301     fprintf(outFile, "%s\n", linesFinal[i].c_str());
302   fclose(inFile);
303   fclose(outFile);
304 
305   printf("Done, file '%s' created OK, nLinesFinal = %d.\n", outFileName, nLinesFinal);
306 
307   return 0;
308 }
309