1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /** @file molecule.cc
31
32 @brief Class representing a molecule as a set of atoms with
33 assiciated coordinates and charges of the atomic nuclei.
34
35 @author: Elias Rudberg <em>responsible</em>
36 */
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <memory.h>
41 #include <math.h>
42 #include <string.h>
43 #include <cassert>
44 #include "molecule.h"
45 #include "xyz_file_parser.h"
46 #include "output.h"
47 #include "memorymanag.h"
48 #include "units.h"
49 #include "utilities.h"
50
51
52 static ergo_real
get_distance_between_atoms(const Atom & atomA,const Atom & atomB)53 get_distance_between_atoms(const Atom& atomA, const Atom& atomB)
54 {
55 int i;
56 ergo_real sum = 0;
57 for(i = 0; i < 3; i++)
58 {
59 ergo_real dx = atomB.coords[i] - atomA.coords[i];
60 sum += dx*dx;
61 }
62 return template_blas_sqrt(sum);
63 }
64
65 void
getExtremeInternuclearDistancesQuadratic(ergo_real & minDist,ergo_real & maxDist) const66 Molecule::getExtremeInternuclearDistancesQuadratic(ergo_real & minDist, ergo_real & maxDist) const
67 {
68 minDist = maxDist = 0; // This matters if there is only one atom.
69 bool firstTime = true;
70 for(int A = 0; A < noOfAtoms; A++)
71 for(int B = A+1; B < noOfAtoms; B++) {
72 const Atom& atomA = atoms[A];
73 const Atom& atomB = atoms[B];
74 ergo_real RAB = get_distance_between_atoms(atomA, atomB);
75 if(firstTime) {
76 minDist = maxDist = RAB;
77 firstTime = false;
78 }
79 else {
80 minDist = RAB < minDist ? RAB : minDist;
81 maxDist = RAB > maxDist ? RAB : maxDist;
82 }
83 }
84 }
85
86 ergo_real
getNuclearRepulsionEnergyQuadratic() const87 Molecule::getNuclearRepulsionEnergyQuadratic() const
88 {
89 int A, B;
90 ergo_real sum;
91 int N = noOfAtoms;
92 do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
93 "get_nuclear_repulsion_energy, N = %i", N);
94 sum = 0;
95 for(A = 0; A < N; A++)
96 for(B = A+1; B < N; B++)
97 {
98 const Atom& atomA = atoms[A];
99 const Atom& atomB = atoms[B];
100 ergo_real ZA = atomA.charge;
101 ergo_real ZB = atomB.charge;
102 ergo_real RAB = get_distance_between_atoms(atomA, atomB);
103 sum += ZA * ZB / RAB;
104 }
105 do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
106 "get_nuclear_repulsion_energy returning energy %33.22f", (double)sum);
107 return sum;
108 }
109
110 void
getNuclearRepulsionEnergyGradientContribQuadratic(ergo_real * resultGradient) const111 Molecule::getNuclearRepulsionEnergyGradientContribQuadratic(ergo_real* resultGradient) const
112 {
113 int N = noOfAtoms;
114 int A, B;
115 for(A = 0; A < N; A++)
116 for(B = A+1; B < N; B++) {
117 const Atom& atomA = atoms[A];
118 const Atom& atomB = atoms[B];
119 ergo_real ZA = atomA.charge;
120 ergo_real ZB = atomB.charge;
121 ergo_real dx = atomB.coords[0] - atomA.coords[0];
122 ergo_real dy = atomB.coords[1] - atomA.coords[1];
123 ergo_real dz = atomB.coords[2] - atomA.coords[2];
124 ergo_real r = template_blas_sqrt(dx*dx + dy*dy + dz*dz);
125 ergo_real derivative_x = ZA * ZB * (-0.5) * 2.0 * dx / (r*r*r);
126 ergo_real derivative_y = ZA * ZB * (-0.5) * 2.0 * dy / (r*r*r);
127 ergo_real derivative_z = ZA * ZB * (-0.5) * 2.0 * dz / (r*r*r);
128 resultGradient[A*3+0] += -1 * derivative_x;
129 resultGradient[A*3+1] += -1 * derivative_y;
130 resultGradient[A*3+2] += -1 * derivative_z;
131 resultGradient[B*3+0] += 1 * derivative_x;
132 resultGradient[B*3+1] += 1 * derivative_y;
133 resultGradient[B*3+2] += 1 * derivative_z;
134 }
135 }
136
137 ergo_real
getNuclearElectricFieldEnergy(const Vector3D & electricField) const138 Molecule::getNuclearElectricFieldEnergy(const Vector3D& electricField) const
139 {
140 int N = noOfAtoms;
141 do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
142 "get_nuclear_electric_field_energy, N = %i", N);
143 ergo_real sum = 0;
144 int A;
145 for(A = 0; A < N; A++)
146 {
147 const Atom* atom = &atoms[A];
148 sum += atom->charge * atom->coords[0] * electricField.v[0];
149 sum += atom->charge * atom->coords[1] * electricField.v[1];
150 sum += atom->charge * atom->coords[2] * electricField.v[2];
151 } // END FOR A
152 do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
153 "get_nuclear_electric_field_energy returning energy %33.22f", (double)sum);
154 return sum;
155 }
156
157 int
getNumberOfElectrons() const158 Molecule::getNumberOfElectrons() const
159 {
160 int i;
161 ergo_real noOfElectrons;
162 ergo_real sum = 0;
163 for(i = 0; i < noOfAtoms; i++)
164 sum += atoms[i].charge;
165 noOfElectrons = sum - netCharge;
166 return (int)noOfElectrons;
167 }
168
169
170 static int
readMoleculeFileInMolFormat(Molecule * result,const char * fileName,int netCharge,char ** basisfilename)171 readMoleculeFileInMolFormat(Molecule* result, const char* fileName,
172 int netCharge, char **basisfilename)
173 {
174 int nAtoms, nAtomsCurrType;
175 int i, j, ii, lineLen, noOfAtomTypes, atomTypeNo, can_read_basset;
176 char* p;
177 ergo_real unitFactor, atomCharge;
178 long int fileSize = get_file_size(fileName);
179 if(fileSize < 0) {
180 do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error getting file size for file '%s'", fileName);
181 return -1;
182 }
183 FILE* f = fopen(fileName, "rt");
184 if(f == NULL)
185 {
186 do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error opening file '%s'", fileName);
187 return -1;
188 }
189 size_t bufSize = fileSize + 10000;
190 std::vector<char> buf(bufSize);
191 memset(&buf[0], 0, bufSize);
192
193 int nBytes = (int)fread(&buf[0], sizeof(char), bufSize, f);
194 fclose(f);
195 if(nBytes <= 0) {
196 do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error reading file '%s'", fileName);
197 return -1;
198 }
199
200 do_output(LOG_CAT_INFO, LOG_AREA_MAIN, "File '%s' read OK, nBytes = %i", fileName, nBytes);
201
202 nAtoms = 0;
203
204 p = &buf[0];
205
206 // skip 3 or 4 lines
207 if(strncmp(p, "BASIS", 5) == 0) { i = 3; can_read_basset = 1; }
208 else if(strncmp(p, "ATOMDF", 6) == 0 ||
209 strncmp(p, "ATOMBASIS", 9) == 0) { i = 2; can_read_basset = 0; }
210 else
211 {
212 do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "Unsupported molecule input file type");
213 return -1;
214 }
215 while((*p != '\n') && (*p != '\0')) p++; /* skip first line */
216 p++;
217 // if basisfilename is NULL we skip getting basis set string.
218 if(basisfilename)
219 {
220 if(can_read_basset && !*basisfilename) {
221 char *eol = strchr(p, '\n');
222 if(eol) {
223 while (--eol>p && *eol == ' ')
224 ;
225 if (eol>p) {
226 size_t len = eol-p+1;
227 *basisfilename = (char*)ergo_malloc(len+1);
228 strncpy(*basisfilename, p, len);
229 (*basisfilename)[len] = '\0';
230 }
231 }
232 }
233 }
234 while(i--) {
235 while((*p != '\n') && (*p != '\0')) p++;
236 p++;
237 }
238 if(*p == '\0') return -2;
239
240 // the molecule input can follow in a fixed or a free format.
241 unitFactor = 1.0;
242 if( sscanf(p, "%d", &noOfAtomTypes) == 1)
243 {
244 // check if Angstrom flag is set
245 lineLen = 0;
246 for(ii = 0; ii <= 19; ii++)
247 {
248 if((p[ii] == '\n') || (p[ii] == '\0'))
249 break;
250 lineLen++;
251 }
252 if(lineLen > 19)
253 {
254 if(p[19] != ' ')
255 unitFactor = UNIT_one_Angstrom;
256 }
257 // skip till end of line
258 while(*p && *p != '\n') p++;
259 }
260 else
261 { /* "free" format */
262 do {
263 if(strncasecmp(p, "Atomtypes=", 10) == 0)
264 sscanf(p+10, "%d", &noOfAtomTypes);
265 else if(strncmp(p, "Angstrom", 8) == 0)
266 unitFactor = UNIT_one_Angstrom;
267 while(*p && *p != '\n' && *p != ' ') p++;
268 if(*p != ' ') break;
269 p++;
270 } while(1);
271 }
272 if(*p == '\n') p++;
273 do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
274 "noOfAtomTypes = %i dist. unit=%f au", noOfAtomTypes, unitFactor);
275 if(noOfAtomTypes <= 0)
276 {
277 do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "Error: (noOfAtomTypes <= 0)");
278 return -3;
279 }
280
281 for(atomTypeNo = 0; atomTypeNo < noOfAtomTypes; atomTypeNo++)
282 {
283 // now p should point to beginning of new atom type
284 float charge;
285 if(*p == '\0')
286 {
287 do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error in readMoleculeFile for atom type %i",
288 atomTypeNo+1);
289 return -4;
290 }
291
292 do_output(LOG_CAT_INFO, LOG_AREA_MAIN, "atomTypeNo = %i", atomTypeNo);
293
294 // skip blanks
295 while(*p == ' ') p++;
296 if(*p == '\0') return -5;
297 // if the first character on the line is a digit, we assume fixed format
298 if(*p >= '0' && *p <= '9')
299 {
300 charge = (float)atof(p);
301 while(*p != ' ' && *p != '\n' && *p != '\0') p++;
302 // skip blanks
303 while(*p == ' ') p++;
304 // now we expect another digit
305 if(*p < '0' || *p > '9')
306 return -6;
307 nAtomsCurrType = atoi(p);
308 while(*p != ' ' && *p != '\n' && *p != '\0') p++;
309 // skip blanks
310 while(*p == ' ') p++;
311 // now we expect end of line
312 if(*p != '\n')
313 return -7;
314 p++;
315 }
316 else
317 { /* "free" format */
318 charge = -1;
319 nAtomsCurrType = -1;
320 do {
321 if(strncmp(p, "Charge=", 7) == 0)
322 sscanf(p+7, "%f", &charge);
323 else if(strncmp(p, "Atoms=", 6) == 0)
324 sscanf(p+6, "%d", &nAtomsCurrType);
325 while(*p && *p != '\n' && *p != ' ') p++;
326 if(*p != ' ') break;
327 p++;
328 } while(1);
329 if(charge < 0) {
330 do_output(LOG_CAT_ERROR, LOG_AREA_MAIN,
331 "Invalid charge = %5.2f", (double)charge);
332 return -8;
333 }
334 if(nAtomsCurrType < 0) {
335 do_output(LOG_CAT_ERROR, LOG_AREA_MAIN,
336 "Invalid number of atoms = %i", nAtomsCurrType);
337 return -8;
338 }
339 }
340
341 do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
342 "charge = %5.2f, nAtomsCurrType = %i", (double)charge, nAtomsCurrType);
343
344 atomCharge = charge;
345 if(atomCharge <= 0)
346 return -9;
347
348 for(i = 0; i < nAtomsCurrType; i++)
349 {
350 // skip atom label
351 while(*p == ' ') p++;
352 while((*p != ' ') && (*p != '\0')) p++;
353 if(*p == '\0') return -10;
354 while(*p == ' ') p++;
355 // read coordinates
356 ergo_real coords[3];
357 for(j = 0; j < 3; j++)
358 {
359 // skip blanks
360 while(*p == ' ') p++;
361 if(*p == '\0') return -11;
362 coords[j] = atof(p) * unitFactor;
363 while((*p != ' ') && (*p != '\n') && (*p != '\0')) p++;
364 if(*p == '\0') return -12;
365 }
366
367 // skip rest of line
368 while((*p != '\n') && (*p != '\0'))
369 p++;
370 p++;
371
372 result->addAtom(atomCharge, coords[0], coords[1], coords[2]);
373 nAtoms++;
374 } // END FOR i
375 } // END FOR atomTypeNo
376
377 // done. now there should be nothing more in the file.
378 while(*p != '\0')
379 {
380 if(*p != ' ' && *p != '\n')
381 {
382 do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error in readMoleculeFile: "
383 "non-blank character found after last atom.");
384 return -14;
385 }
386 p++;
387 }
388
389 #if 0
390 for(i = 0; i < nAtoms; i++)
391 {
392 printf("%5.2f %22.11f %22.11f %22.11f\n",
393 result->atoms[i].charge,
394 result->atoms[i].coords[0],
395 result->atoms[i].coords[1],
396 result->atoms[i].coords[2]);
397 }
398 #endif
399
400 assert(result->getNoOfAtoms() == nAtoms);
401 result->setNetCharge(netCharge);
402
403 return 0;
404 }
405
406
407
408 int
setFromMoleculeFile(const char * fileName,int netCharge,char ** basisfilename)409 Molecule::setFromMoleculeFile(const char* fileName,
410 int netCharge, char **basisfilename)
411 {
412 // Check filename extension
413 int len = strlen(fileName);
414 if(len < 5)
415 {
416 // Too short to have a 3-letter extension after dot.
417 // Assume mol format.
418 return readMoleculeFileInMolFormat(this, fileName, netCharge,
419 basisfilename);
420 }
421 const char* extensionPtr = &fileName[len-4];
422 if(strcasecmp(extensionPtr, ".xyz") == 0)
423 return readMoleculeFileInXyzFormat(*this, fileName, netCharge, false);
424
425 // Not xyz, assume mol format.
426 return readMoleculeFileInMolFormat(this, fileName, netCharge,
427 basisfilename);
428 }
429
430
431