1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30 /** @file molecule.cc
31 
32     @brief Class representing a molecule as a set of atoms with
33     assiciated coordinates and charges of the atomic nuclei.
34 
35     @author: Elias Rudberg <em>responsible</em>
36 */
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <memory.h>
41 #include <math.h>
42 #include <string.h>
43 #include <cassert>
44 #include "molecule.h"
45 #include "xyz_file_parser.h"
46 #include "output.h"
47 #include "memorymanag.h"
48 #include "units.h"
49 #include "utilities.h"
50 
51 
52 static ergo_real
get_distance_between_atoms(const Atom & atomA,const Atom & atomB)53 get_distance_between_atoms(const Atom& atomA, const Atom& atomB)
54 {
55   int i;
56   ergo_real sum = 0;
57   for(i = 0; i < 3; i++)
58     {
59       ergo_real dx = atomB.coords[i] - atomA.coords[i];
60       sum += dx*dx;
61     }
62   return template_blas_sqrt(sum);
63 }
64 
65 void
getExtremeInternuclearDistancesQuadratic(ergo_real & minDist,ergo_real & maxDist) const66 Molecule::getExtremeInternuclearDistancesQuadratic(ergo_real & minDist, ergo_real & maxDist) const
67 {
68   minDist = maxDist = 0; // This matters if there is only one atom.
69   bool firstTime = true;
70   for(int A = 0; A < noOfAtoms; A++)
71     for(int B = A+1; B < noOfAtoms; B++) {
72       const Atom& atomA = atoms[A];
73       const Atom& atomB = atoms[B];
74       ergo_real RAB = get_distance_between_atoms(atomA, atomB);
75       if(firstTime) {
76 	minDist = maxDist = RAB;
77 	firstTime = false;
78       }
79       else {
80 	minDist = RAB < minDist ? RAB : minDist;
81 	maxDist = RAB > maxDist ? RAB : maxDist;
82       }
83     }
84 }
85 
86 ergo_real
getNuclearRepulsionEnergyQuadratic() const87 Molecule::getNuclearRepulsionEnergyQuadratic() const
88 {
89   int A, B;
90   ergo_real sum;
91   int N = noOfAtoms;
92   do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
93 	    "get_nuclear_repulsion_energy, N = %i", N);
94   sum = 0;
95   for(A = 0; A < N; A++)
96     for(B = A+1; B < N; B++)
97       {
98 	const Atom& atomA = atoms[A];
99 	const Atom& atomB = atoms[B];
100 	ergo_real ZA = atomA.charge;
101 	ergo_real ZB = atomB.charge;
102 	ergo_real RAB = get_distance_between_atoms(atomA, atomB);
103 	sum += ZA * ZB / RAB;
104       }
105   do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
106 	    "get_nuclear_repulsion_energy returning energy %33.22f", (double)sum);
107   return sum;
108 }
109 
110 void
getNuclearRepulsionEnergyGradientContribQuadratic(ergo_real * resultGradient) const111 Molecule::getNuclearRepulsionEnergyGradientContribQuadratic(ergo_real* resultGradient) const
112 {
113   int N = noOfAtoms;
114   int A, B;
115   for(A = 0; A < N; A++)
116     for(B = A+1; B < N; B++) {
117       const Atom& atomA = atoms[A];
118       const Atom& atomB = atoms[B];
119       ergo_real ZA = atomA.charge;
120       ergo_real ZB = atomB.charge;
121       ergo_real dx = atomB.coords[0] - atomA.coords[0];
122       ergo_real dy = atomB.coords[1] - atomA.coords[1];
123       ergo_real dz = atomB.coords[2] - atomA.coords[2];
124       ergo_real r = template_blas_sqrt(dx*dx + dy*dy + dz*dz);
125       ergo_real derivative_x = ZA * ZB * (-0.5) * 2.0 * dx / (r*r*r);
126       ergo_real derivative_y = ZA * ZB * (-0.5) * 2.0 * dy / (r*r*r);
127       ergo_real derivative_z = ZA * ZB * (-0.5) * 2.0 * dz / (r*r*r);
128       resultGradient[A*3+0] += -1 * derivative_x;
129       resultGradient[A*3+1] += -1 * derivative_y;
130       resultGradient[A*3+2] += -1 * derivative_z;
131       resultGradient[B*3+0] +=  1 * derivative_x;
132       resultGradient[B*3+1] +=  1 * derivative_y;
133       resultGradient[B*3+2] +=  1 * derivative_z;
134     }
135 }
136 
137 ergo_real
getNuclearElectricFieldEnergy(const Vector3D & electricField) const138 Molecule::getNuclearElectricFieldEnergy(const Vector3D& electricField) const
139 {
140   int N = noOfAtoms;
141   do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
142 	    "get_nuclear_electric_field_energy, N = %i", N);
143   ergo_real sum = 0;
144   int A;
145   for(A = 0; A < N; A++)
146     {
147       const Atom* atom = &atoms[A];
148       sum += atom->charge * atom->coords[0] * electricField.v[0];
149       sum += atom->charge * atom->coords[1] * electricField.v[1];
150       sum += atom->charge * atom->coords[2] * electricField.v[2];
151     } // END FOR A
152   do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
153 	    "get_nuclear_electric_field_energy returning energy %33.22f", (double)sum);
154   return sum;
155 }
156 
157 int
getNumberOfElectrons() const158 Molecule::getNumberOfElectrons() const
159 {
160   int i;
161   ergo_real noOfElectrons;
162   ergo_real sum = 0;
163   for(i = 0; i < noOfAtoms; i++)
164     sum += atoms[i].charge;
165   noOfElectrons = sum - netCharge;
166   return (int)noOfElectrons;
167 }
168 
169 
170 static int
readMoleculeFileInMolFormat(Molecule * result,const char * fileName,int netCharge,char ** basisfilename)171 readMoleculeFileInMolFormat(Molecule* result, const char* fileName,
172                             int netCharge, char **basisfilename)
173 {
174   int nAtoms, nAtomsCurrType;
175   int i, j, ii, lineLen, noOfAtomTypes, atomTypeNo, can_read_basset;
176   char* p;
177   ergo_real unitFactor, atomCharge;
178   long int  fileSize = get_file_size(fileName);
179   if(fileSize < 0) {
180     do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error getting file size for file '%s'", fileName);
181     return -1;
182   }
183   FILE* f = fopen(fileName, "rt");
184   if(f == NULL)
185     {
186       do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error opening file '%s'", fileName);
187       return -1;
188     }
189   size_t bufSize = fileSize + 10000;
190   std::vector<char> buf(bufSize);
191   memset(&buf[0], 0, bufSize);
192 
193   int nBytes = (int)fread(&buf[0], sizeof(char), bufSize, f);
194   fclose(f);
195   if(nBytes <= 0) {
196     do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error reading file '%s'", fileName);
197     return -1;
198   }
199 
200   do_output(LOG_CAT_INFO, LOG_AREA_MAIN, "File '%s' read OK, nBytes = %i", fileName, nBytes);
201 
202   nAtoms = 0;
203 
204   p = &buf[0];
205 
206   // skip 3 or 4 lines
207   if(strncmp(p, "BASIS", 5) == 0) { i = 3; can_read_basset = 1; }
208   else if(strncmp(p, "ATOMDF", 6) == 0 ||
209           strncmp(p, "ATOMBASIS", 9) == 0) { i = 2; can_read_basset = 0; }
210   else
211     {
212       do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "Unsupported molecule input file type");
213       return -1;
214     }
215   while((*p != '\n') && (*p != '\0')) p++; /* skip first line */
216   p++;
217   // if basisfilename is NULL we skip getting basis set string.
218   if(basisfilename)
219     {
220       if(can_read_basset && !*basisfilename) {
221 	char *eol = strchr(p, '\n');
222 	if(eol) {
223 	  while (--eol>p && *eol == ' ')
224 	    ;
225 	  if (eol>p) {
226 	    size_t len = eol-p+1;
227 	    *basisfilename = (char*)ergo_malloc(len+1);
228 	    strncpy(*basisfilename, p, len);
229 	    (*basisfilename)[len] = '\0';
230 	  }
231 	}
232       }
233     }
234   while(i--) {
235     while((*p != '\n') && (*p != '\0')) p++;
236     p++;
237   }
238   if(*p == '\0') return -2;
239 
240   // the molecule input can follow in a fixed or a free format.
241   unitFactor = 1.0;
242   if( sscanf(p, "%d", &noOfAtomTypes) == 1)
243     {
244       // check if Angstrom flag is set
245       lineLen = 0;
246       for(ii = 0; ii <= 19; ii++)
247         {
248           if((p[ii] == '\n') || (p[ii] == '\0'))
249             break;
250           lineLen++;
251         }
252       if(lineLen > 19)
253         {
254           if(p[19] != ' ')
255             unitFactor = UNIT_one_Angstrom;
256         }
257       // skip till end of line
258       while(*p && *p != '\n') p++;
259     }
260   else
261     { /* "free" format */
262       do {
263         if(strncasecmp(p, "Atomtypes=", 10) == 0)
264           sscanf(p+10, "%d", &noOfAtomTypes);
265         else if(strncmp(p, "Angstrom", 8) == 0)
266           unitFactor = UNIT_one_Angstrom;
267         while(*p && *p != '\n' && *p != ' ') p++;
268         if(*p != ' ') break;
269         p++;
270       } while(1);
271     }
272   if(*p == '\n') p++;
273   do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
274 	    "noOfAtomTypes = %i dist. unit=%f au", noOfAtomTypes, unitFactor);
275   if(noOfAtomTypes <= 0)
276     {
277       do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "Error: (noOfAtomTypes <= 0)");
278       return -3;
279     }
280 
281   for(atomTypeNo = 0; atomTypeNo < noOfAtomTypes; atomTypeNo++)
282     {
283       // now p should point to beginning of new atom type
284       float charge;
285       if(*p == '\0')
286 	{
287 	  do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error in readMoleculeFile for atom type %i",
288 		    atomTypeNo+1);
289 	  return -4;
290 	}
291 
292       do_output(LOG_CAT_INFO, LOG_AREA_MAIN, "atomTypeNo = %i", atomTypeNo);
293 
294       // skip blanks
295       while(*p == ' ') p++;
296       if(*p == '\0') return -5;
297       // if the first character on the line is a digit, we assume fixed format
298       if(*p >= '0' && *p <= '9')
299 	{
300 	  charge = (float)atof(p);
301 	  while(*p != ' ' && *p != '\n' && *p != '\0') p++;
302 	  // skip blanks
303 	  while(*p == ' ') p++;
304 	  // now we expect another digit
305 	  if(*p < '0' || *p > '9')
306 	    return -6;
307 	  nAtomsCurrType = atoi(p);
308 	  while(*p != ' ' && *p != '\n' && *p != '\0') p++;
309 	  // skip blanks
310 	  while(*p == ' ') p++;
311 	  // now we expect end of line
312 	  if(*p != '\n')
313 	    return -7;
314 	  p++;
315 	}
316       else
317         { /* "free" format */
318 	  charge = -1;
319 	  nAtomsCurrType = -1;
320           do {
321             if(strncmp(p, "Charge=", 7) == 0)
322               sscanf(p+7, "%f", &charge);
323             else if(strncmp(p, "Atoms=", 6) == 0)
324               sscanf(p+6, "%d", &nAtomsCurrType);
325             while(*p && *p != '\n' && *p != ' ') p++;
326             if(*p != ' ') break;
327             p++;
328           } while(1);
329 	  if(charge < 0) {
330 	    do_output(LOG_CAT_ERROR, LOG_AREA_MAIN,
331 		      "Invalid charge = %5.2f", (double)charge);
332             return -8;
333 	  }
334 	  if(nAtomsCurrType < 0) {
335 	    do_output(LOG_CAT_ERROR, LOG_AREA_MAIN,
336 		      "Invalid number of atoms = %i", nAtomsCurrType);
337 		       return -8;
338 	  }
339         }
340 
341       do_output(LOG_CAT_INFO, LOG_AREA_MAIN,
342 		"charge = %5.2f, nAtomsCurrType = %i", (double)charge, nAtomsCurrType);
343 
344       atomCharge = charge;
345       if(atomCharge <= 0)
346         return -9;
347 
348       for(i = 0; i < nAtomsCurrType; i++)
349 	{
350 	  // skip atom label
351 	  while(*p == ' ') p++;
352 	  while((*p != ' ') && (*p != '\0')) p++;
353 	  if(*p == '\0') return -10;
354 	  while(*p == ' ') p++;
355 	  // read coordinates
356 	  ergo_real coords[3];
357 	  for(j = 0; j < 3; j++)
358 	    {
359 	      // skip blanks
360 	      while(*p == ' ') p++;
361 	      if(*p == '\0') return -11;
362 	      coords[j] = atof(p) * unitFactor;
363 	      while((*p != ' ') && (*p != '\n') && (*p != '\0')) p++;
364 	      if(*p == '\0') return -12;
365 	    }
366 
367 	  // skip rest of line
368 	  while((*p != '\n') && (*p != '\0'))
369 	    p++;
370 	  p++;
371 
372 	  result->addAtom(atomCharge, coords[0], coords[1], coords[2]);
373 	  nAtoms++;
374 	} // END FOR i
375     } // END FOR atomTypeNo
376 
377   // done. now there should be nothing more in the file.
378   while(*p != '\0')
379     {
380       if(*p != ' ' && *p != '\n')
381 	{
382 	  do_output(LOG_CAT_ERROR, LOG_AREA_MAIN, "error in readMoleculeFile: "
383 		    "non-blank character found after last atom.");
384 	  return -14;
385 	}
386       p++;
387     }
388 
389 #if 0
390   for(i = 0; i < nAtoms; i++)
391     {
392       printf("%5.2f %22.11f %22.11f %22.11f\n",
393 	     result->atoms[i].charge,
394 	     result->atoms[i].coords[0],
395 	     result->atoms[i].coords[1],
396 	     result->atoms[i].coords[2]);
397     }
398 #endif
399 
400   assert(result->getNoOfAtoms() == nAtoms);
401   result->setNetCharge(netCharge);
402 
403   return 0;
404 }
405 
406 
407 
408 int
setFromMoleculeFile(const char * fileName,int netCharge,char ** basisfilename)409 Molecule::setFromMoleculeFile(const char* fileName,
410                               int netCharge, char **basisfilename)
411 {
412   // Check filename extension
413   int len = strlen(fileName);
414   if(len < 5)
415     {
416       // Too short to have a 3-letter extension after dot.
417       // Assume mol format.
418       return readMoleculeFileInMolFormat(this, fileName, netCharge,
419                                          basisfilename);
420     }
421   const char* extensionPtr = &fileName[len-4];
422   if(strcasecmp(extensionPtr, ".xyz") == 0)
423     return readMoleculeFileInXyzFormat(*this, fileName, netCharge, false);
424 
425   // Not xyz, assume mol format.
426   return readMoleculeFileInMolFormat(this, fileName, netCharge,
427                                      basisfilename);
428 }
429 
430 
431