1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /** @file convert_basis_set_file.cc
31
32 \brief Program that can be used to convert a file downloaded from
33 the EMSL Basis Set Library (in SuperMolecule format) to the format
34 expected by the Ergo program.
35
36 @author: Elias Rudberg <em>responsible</em>
37 */
38
39 #include <cstdio>
40 #include <cstdlib>
41 #include <vector>
42 #include <cstring>
43 #include <string>
44 #include <cassert>
45
getChargeForAtomName(const std::string & atomName)46 static int getChargeForAtomName(const std::string & atomName) {
47 if(atomName == "HYDROGEN") return 1;
48 if(atomName == "HELIUM") return 2;
49 if(atomName == "LITHIUM") return 3;
50 if(atomName == "BERYLLIUM") return 4;
51 if(atomName == "BORON") return 5;
52 if(atomName == "CARBON") return 6;
53 if(atomName == "NITROGEN") return 7;
54 if(atomName == "OXYGEN") return 8;
55 if(atomName == "FLUORINE") return 9;
56 if(atomName == "NEON") return 10;
57 if(atomName == "SODIUM") return 11;
58 if(atomName == "MAGNESIUM") return 12;
59 if(atomName == "ALUMINUM") return 13;
60 if(atomName == "SILICON") return 14;
61 if(atomName == "PHOSPHOROUS") return 15;
62 if(atomName == "SULFUR") return 16;
63 if(atomName == "CHLORINE") return 17;
64 if(atomName == "ARGON") return 18;
65 if(atomName == "POTASSIUM") return 19;
66 if(atomName == "CALCIUM") return 20;
67 if(atomName == "SCANDIUM") return 21;
68 if(atomName == "TITANIUM") return 22;
69 if(atomName == "VANADIUM") return 23;
70 if(atomName == "CHROMIUM") return 24;
71 if(atomName == "MANGANESE") return 25;
72 if(atomName == "IRON") return 26;
73 if(atomName == "COBALT") return 27;
74 if(atomName == "NICKEL") return 28;
75 if(atomName == "COPPER") return 29;
76 if(atomName == "ZINC") return 30;
77 if(atomName == "GALLIUM") return 31;
78 if(atomName == "GERMANIUM") return 32;
79 if(atomName == "ARSENIC") return 33;
80 if(atomName == "SELENIUM") return 34;
81 if(atomName == "BROMINE") return 35;
82 if(atomName == "KRYPTON") return 36;
83 return -1;
84 }
85
getSecondWordFromLine(const std::string & str)86 static std::string getSecondWordFromLine(const std::string & str) {
87 int len = str.length();
88 char s[len+1];
89 strcpy(s, str.c_str());
90 assert(len > 3);
91 assert(s[0] == '$');
92 assert(s[1] == ' ');
93 int idx = 2;
94 assert(s[idx] != ' ');
95 while(idx < len) {
96 if(s[idx] == ' ')
97 break;
98 idx++;
99 }
100 int nChars = idx - 2;
101 char ss[nChars+1];
102 memcpy(ss, &s[2], nChars);
103 ss[nChars] = '\0';
104 std::string resultStr = ss;
105 return resultStr;
106 }
107
is_digit(char c)108 static bool is_digit(char c) {
109 if(c >= '0' && c <= '9')
110 return true;
111 return false;
112 }
113
checkIfLineHasThreeNumbers(const std::string & str)114 static bool checkIfLineHasThreeNumbers(const std::string & str) {
115 int len = str.length();
116 char s[len+1];
117 strcpy(s, str.c_str());
118 int nDigitsFound = 0;
119 int idx = 0;
120 while(idx < len) {
121 if(s[idx] == ' ') {
122 idx++;
123 continue;
124 }
125 if(is_digit(s[idx])) {
126 // Digit found. Check how many digits follow.
127 int nDigits = 1;
128 for(int k = 1; k < len; k++) {
129 if(is_digit(s[idx+k]))
130 nDigits++;
131 else
132 break;
133 }
134 idx += nDigits;
135 nDigitsFound++;
136 }
137 else
138 return false;
139 }
140 if(nDigitsFound == 3)
141 return true;
142 return false;
143 }
144
main(int argc,char * argv[])145 int main(int argc, char* argv[])
146 {
147 printf("convert_basis_set_file 1.0\n");
148 printf("Written by Elias Rudberg\n");
149 printf("Source modified on Mon 9 Nov 13:22:39 CET 2015\n");
150 if(argc != 3) {
151 printf("usage: convert_basis_set_file infile outfile\n");
152 return -1;
153 }
154
155 const char* inFileName = argv[1];
156 const char* outFileName = argv[2];
157
158 printf("inFileName = '%s', outFileName = '%s'\n", inFileName, outFileName);
159
160 FILE* inFile = fopen(inFileName, "rb");
161 if(!inFile) {
162 printf("Error opening inFile '%s' for reading.\n", inFileName);
163 return -1;
164 }
165 FILE* outFile = fopen(outFileName, "wb");
166 if(!outFile) {
167 printf("Error opening outFile '%s' for writing.\n", outFileName);
168 return -1;
169 }
170
171 const int MAXFILESIZE = 8888888;
172 std::vector<char> buf(MAXFILESIZE);
173 memset(&buf[0], 0x00, MAXFILESIZE);
174
175 if(fread(&buf[0], 1, MAXFILESIZE, inFile) <= 0) {
176 printf("Error reading inFile\n");
177 return -1;
178 }
179 if(buf[MAXFILESIZE-1] != '\0') {
180 printf("Error: zero not found at end of buffer. File too large?\n");
181 return -1;
182 }
183
184 // Count number of lines in file
185 int nLines = 0;
186 for(int i = 0; i < MAXFILESIZE; i++) {
187 if(buf[i] == '\n')
188 nLines++;
189 }
190 nLines++;
191
192 std::vector<std::string> lines(nLines);
193
194 const char* p = &buf[0];
195 int lineCount = 0;
196 while(*p != '\0') {
197 // Find end of line
198 const char* q = p;
199 while(*q != '\n' && *q != '\0')
200 q++;
201 int nChars = q - p;
202 char lineStr[nChars+1];
203 memcpy(lineStr, p, nChars);
204 lineStr[nChars] = '\0';
205 lines[lineCount] = lineStr;
206 lineCount++;
207 p = q;
208 if(*q == '\n')
209 p++;
210 }
211 printf("lineCount = %d\n", lineCount);
212
213 const std::string str_s = "$ S-TYPE FUNCTIONS";
214 const std::string str_p = "$ P-TYPE FUNCTIONS";
215 const std::string str_d = "$ D-TYPE FUNCTIONS";
216 const std::string str_f = "$ F-TYPE FUNCTIONS";
217 const std::string str_g = "$ G-TYPE FUNCTIONS";
218 const std::string str_h = "$ H-TYPE FUNCTIONS";
219 const std::string str_i = "$ I-TYPE FUNCTIONS";
220
221 // Check how many atom types there are
222 int nAtomTypes = 0;
223 for(int i = 0; i < lineCount; i++) {
224 std::string & currLine = lines[i];
225 if(currLine == str_s)
226 nAtomTypes++;
227 }
228 printf("nAtomTypes = %d\n", nAtomTypes);
229 assert(nAtomTypes >= 1);
230
231 std::vector<std::string> linesToInsert(nAtomTypes);
232 int linesToInsertCount = 0;
233
234 // OK, now we have extracted the lines.
235 // Look for lines containing three integer numbers.
236 int lineIdx = 0;
237 while(lineIdx < lineCount) {
238 std::string & currLine = lines[lineIdx];
239 if(checkIfLineHasThreeNumbers(currLine)) {
240 // Now previous line must be one of the following strings:
241 assert(lineIdx > 5);
242 std::string & prevLine = lines[lineIdx-1];
243 if(prevLine != str_s &&
244 prevLine != str_p &&
245 prevLine != str_d &&
246 prevLine != str_f &&
247 prevLine != str_g &&
248 prevLine != str_h &&
249 prevLine != str_i) {
250 printf("ERROR: string like 'X-TYPE FUNCTIONS' not found where expected.\n");
251 return -1;
252 }
253 if(prevLine == str_s) {
254 // Now we found a place where info about nuclear charge should be inserted.
255 std::string & prevLine2 = lines[lineIdx-2];
256 std::string atomName = getSecondWordFromLine(prevLine2);
257 int charge = getChargeForAtomName(atomName);
258 if(charge <= 0) {
259 printf("ERROR: getChargeForAtomName failed for atomName = '%s'\n", atomName.c_str());
260 return -1;
261 }
262 printf("atomName = '%s', charge = %d\n", atomName.c_str(), charge);
263 char s[88];
264 sprintf(s, "a %d", charge);
265 std::string lineToInsert = s;
266 linesToInsert[linesToInsertCount] = lineToInsert;
267 linesToInsertCount++;
268 }
269 }
270 lineIdx++;
271 }
272 assert(linesToInsertCount == nAtomTypes);
273
274 int nLinesFinal = nLines + nAtomTypes;
275 std::vector<std::string> linesFinal(nLinesFinal);
276 // No go through all lines again, creating linesFinal.
277 lineIdx = 0;
278 int lineIdx2 = 0;
279 int atomTypeCounter = 0;
280 while(lineIdx < lineCount) {
281 // Find next str_s line
282 int foundIdx = -1;
283 for(int idxTmp = lineIdx; idxTmp < lineCount; idxTmp++) {
284 if(lines[idxTmp] == str_s) {
285 foundIdx = idxTmp;
286 break;
287 }
288 }
289 assert(foundIdx >= 0);
290 while(lineIdx < foundIdx)
291 linesFinal[lineIdx2++] = lines[lineIdx++];
292 linesFinal[lineIdx2++] = linesToInsert[atomTypeCounter++];
293 linesFinal[lineIdx2++] = lines[lineIdx++];
294 if(atomTypeCounter == nAtomTypes) {
295 while(lineIdx < lineCount)
296 linesFinal[lineIdx2++] = lines[lineIdx++];
297 }
298 }
299
300 for(int i = 0; i < nLinesFinal; i++)
301 fprintf(outFile, "%s\n", linesFinal[i].c_str());
302 fclose(inFile);
303 fclose(outFile);
304
305 printf("Done, file '%s' created OK, nLinesFinal = %d.\n", outFileName, nLinesFinal);
306
307 return 0;
308 }
309