1 // EMT.cpp  --  Implements the EMT potential for multiple elements.
2 
3 // Copyright (C) 2008-2011 Jakob Schiotz and Center for Individual
4 // Nanoparticle Functionality, Department of Physics, Technical
5 // University of Denmark.  Email: schiotz@fysik.dtu.dk
6 //
7 // This file is part of Asap version 3.
8 // Asap is released under the GNU Lesser Public License (LGPL) version 3.
9 // However, the parts of Asap distributed within the OpenKIM project
10 // (including this file) are also released under the Common Development
11 // and Distribution License (CDDL) version 1.0.
12 //
13 // This program is free software: you can redistribute it and/or
14 // modify it under the terms of the GNU Lesser General Public License
15 // version 3 as published by the Free Software Foundation.  Permission
16 // to use other versions of the GNU Lesser General Public License may
17 // granted by Jakob Schiotz or the head of department of the
18 // Department of Physics, Technical University of Denmark, as
19 // described in section 14 of the GNU General Public License.
20 //
21 // This program is distributed in the hope that it will be useful,
22 // but WITHOUT ANY WARRANTY; without even the implied warranty of
23 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24 // GNU General Public License for more details.
25 //
26 // You should have received a copy of the GNU General Public License
27 // and the GNU Lesser Public License along with this program.  If not,
28 // see <http://www.gnu.org/licenses/>.
29 
30 // Note: The macro ASAP_FOR_KIM is undefined when this file is
31 // compiled as part of Asap, but is defined when it is part of the
32 // OpenKIM model driver.
33 
34 #include "EMT.h"
35 #include "Asap.h"
36 #include "EMTParameterProvider.h"
37 #include "Atoms.h"
38 #include "Vec.h"
39 #include "NeighborLocator.h"
40 #ifndef ASAP_FOR_KIM
41 #include "NeighborList.h"  // Not present in OpenKIM
42 #endif
43 #include "Exception.h"
44 #include "Timing.h"
45 #include "mass.h"
46 #include "Debug.h"
47 #include <math.h>
48 #include <vector>
49 #include <set>
50 #include <cstdio>
51 #include <iostream>
52 #include <algorithm>
53 #ifdef ASAP_MKL
54 #include "mkl.h"
55 #endif //ASAP_MKL
56 using std::vector;
57 using std::set;
58 using std::cerr;
59 using std::endl;
60 using std::flush;
61 using std::sort;
62 
63 #undef INTERLEAVETHREADS
64 
65 
66 #if 1
67 #define VERB(x) if (verbose == 1) cerr << x
68 #else
69 #define VERB(x)
70 #endif
71 
72 using namespace std;
73 
74 // Standard mapping of the six independent parts of the stress tensor to
75 // vector notation
76 const static int stresscomp[3][3] = {{0, 5, 4}, {5, 1, 3}, {4, 3, 2}};
77 
EMT(PyObject * self,PyObject * prov,int verbose)78 EMT::EMT(PyObject *self, PyObject *prov, int verbose) : Potential(self, verbose)
79 {
80   CONSTRUCTOR;
81   // Extract the parameter provider from the Python object, and store
82   // the Python object.
83   if (prov != NULL)
84     {
85       provider = ((PyAsap_EMTParamProvObject *) prov)->cobj;
86       provider_obj = prov;
87       Py_INCREF(provider_obj);
88     }
89   else
90     {
91       provider_obj = NULL;
92       provider = NULL;
93     }
94   DEBUGPRINT;
95   nblist = NULL;
96   nblist_obj = NULL;
97   driftfactor = 0.05;  // Drift factor for the neighbor list.
98   initialized = false;
99   always_fullnblist = false;
100   atoms = NULL;
101   ghostatoms = false;
102   nAtoms = 0;
103   nSize = 0;
104 #ifdef ASAP_FOR_KIM
105   // Compiling EMT as an OpenKIM model
106   // OpenKIM assumes that the potential energy is zero at infinite separation.
107   // subtractE0 = false;
108   subtractE0 = false;
109 #else
110   // Asap defaults to using the fcc crystal as zero for the potential energy.
111   subtractE0 = true;
112 #endif
113   counters.ids = counters.nblist = counters.sigma1 = counters.sigma2 =
114     counters.energies = counters.forces = counters.virials =
115     counters.beforeforces = 0;
116   recalc.ids = recalc.nblist = recalc.sigma1 = recalc.sigma2 =
117     recalc.energies = recalc.forces = recalc.virials =
118     recalc.beforeforces = false;
119   skip_begin = false;
120   DEBUGPRINT;
121 }
122 
123 /// If an EMTParameterProvider was given when constructing the EMT
124 /// object, it will NOT be deleted, but any default parameter provider
125 /// and the automatically generated neigbor list will be deleted.
126 /// XXXX: Handle this with Python refcounting.
~EMT()127 EMT::~EMT()
128 {
129   DESTRUCTOR;
130   Py_XDECREF(provider_obj);
131   Py_XDECREF(nblist_obj);
132   if (atoms != NULL)
133     AsapAtoms_DECREF(atoms);
134 }
135 
GetRepresentation() const136 string EMT::GetRepresentation() const
137 {
138   char buffer[50];
139   sprintf(buffer, "0x%p", this);
140   return "<asap." + GetName() + "(" + provider->GetName() +
141     ") potential object at " + buffer + ">";
142 }
143 
PrintMemory() const144 long EMT::PrintMemory() const
145 {
146   long atomsmem = 0;
147   if (atoms != NULL)
148     atomsmem = atoms->PrintMemory();
149   long mem = 0;  // Count the big stuff.
150   for (vector<vector<double> >::const_iterator i = sigma1.begin();
151        i != sigma1.end(); i++)
152     mem += i->size() * sizeof(int);
153   for (vector<vector<double> >::const_iterator i = sigma2.begin();
154        i != sigma2.end(); i++)
155     mem += i->size() * sizeof(int);
156   mem += Ec.size() * sizeof(double);
157   mem += Eas.size() * sizeof(double);
158   mem += Epot.size() * sizeof(double);
159   mem += radius.size() * sizeof(double);
160   mem += dEds.size() * sizeof(double);
161   mem += force.size() * sizeof(Vec);
162   mem += virials.size() * sizeof(SymTensor);
163   mem += tmp_double.size() * sizeof(double);
164   mem += id.size() * sizeof(int);
165   mem = (mem + 512*1024) / (1024*1024);
166   char buffer[500];
167   snprintf(buffer, 500,
168 	   "*MEM* EMT %ld MB.  [ sizeof(int)=%ld  sizeof(double)=%ld ]",
169 	   mem, (long) sizeof(int), (long) sizeof(double));
170   cerr << buffer << endl;
171   if (nblist != NULL)
172     mem += nblist->PrintMemory();
173   return mem + atomsmem;
174 }
175 
SetAtoms(PyObject * pyatoms,Atoms * accessobj)176 void EMT::SetAtoms(PyObject *pyatoms, Atoms* accessobj /* = NULL */)
177 {
178   DEBUGPRINT;
179   VERB("SetAtoms ");
180   if (atoms != NULL)
181     {
182       // SetAtoms should only do anything the first time it is called.
183       // Subsequent calls should just check for accessobj being NULL.
184       if (accessobj != NULL && accessobj != atoms)
185 	throw AsapError("EMT::SetAtoms called multiple times with accessobj != NULL");
186       // If SetAtoms is called multiple times, it should only check that no new element
187       // is introduced.
188       set<int> elements_set;
189       atoms->Begin(pyatoms);
190       atoms->GetListOfElements(elements_set);
191       atoms->End();
192       set<int> parameters_set;
193       for (int i = 0; i < parameters.size(); i++)
194       {
195 	parameters_set.insert(parameters[i]->Z);
196       }
197       for (set<int>::iterator i = elements_set.begin(); i != elements_set.end(); i++)
198 	if (parameters_set.find(*i) == parameters_set.end())
199 	  throw AsapError("You cannot introduce a new element after initializing EMT calculator: Z=") << *i;
200       return;
201     }
202 
203   // The first time SetAtoms is being called some initialization is done.
204   if (accessobj != NULL)
205     {
206       atoms = accessobj;
207       AsapAtoms_INCREF(atoms);
208     }
209   else
210     atoms = new NormalAtoms();
211   ASSERT(atoms != NULL);
212   atoms->Begin(pyatoms);
213   nAtoms = atoms->GetNumberOfAtoms();
214   nSize = nAtoms;
215 
216   InitParameters();
217   initialized = true;
218   if (nelements == 1)
219     {
220       singleelement = parameters[0];
221     }
222   else
223     {
224       singleelement = 0;
225     }
226   atoms->End();
227   DEBUGPRINT;
228 }
229 
Allocate()230 void EMT::Allocate()
231 {
232   RETURNIFASAPERROR;
233   USETIMER("EMT::Allocate");
234   DEBUGPRINT;
235   VERB(" Allocate[" << nAtoms << "," << nSize << "]" << flush);
236   // WARNING: Resizing the vector may allocate way too much memory.  It
237   // appears that calling reserve solves this problem.  For efficiency,
238   // reserve is called with 5% extra space.  This is only necessary if the
239   // atoms have ghosts, otherwise no reallocation will happen.
240 
241   // First, check if reallocation is necessary.
242   if (nSize != force.size() || nAtoms != Eas.size())
243     {
244       DEBUGPRINT;
245       /* Resize/intialize the internal variables. */
246       sigma1.resize(nelements);
247       sigma2.resize(nelements);
248       // Do the reserve trick if the atoms have ghosts.
249       if (ghostatoms)
250         {
251           if (force.capacity() < nSize)
252             {
253               nSizeRes = nSize + nSize/20;
254               for (int i = 0; i < nelements; i++) {
255                 sigma1[i].reserve(nSizeRes);
256                 sigma2[i].reserve(nSizeRes);
257               }
258               id.reserve(nSizeRes);
259               force.reserve(nSizeRes);
260               dEds.reserve(nSizeRes);
261             }
262           if (Eas.capacity() < nAtoms)
263             {
264               nAtomsRes = nAtoms + nAtoms/20;
265               Eas.reserve(nAtomsRes);
266 	      Epot.reserve(nAtomsRes);
267 	      Ec.reserve(nAtomsRes);
268 	      radius.reserve(nAtomsRes);
269             }
270         }
271 
272       for (int i = 0; i < nelements; i++) {
273         sigma1[i].resize(nAtoms);
274         sigma2[i].resize(nAtoms);
275       }
276       DEBUGPRINT;
277       Ec.resize(nAtoms);
278       Eas.resize(nAtoms);
279       Epot.resize(nAtoms);
280       radius.resize(nAtoms);
281       dEds.resize(nSize);
282       id.resize(nSize);
283       force.resize(nSize);
284       tmp_double.resize(nSize);
285       ex2.resize(nAtoms);
286       if (virials.size())
287         AllocateStress();
288       // The IDs are set to zero, then they do not have to be calculated if
289       // there is only one element.
290       if (nelements == 1)
291         for (int i = 0; i < nSize; i++)
292           id[i] = 0;
293     }
294   DEBUGPRINT;
295 }
296 
297 // Stresses are reallocated if they exist when Allocate is called, or if
298 // they do not exist, and GetStress is called.
AllocateStress()299 void EMT::AllocateStress()
300 {
301   DEBUGPRINT;
302   if (ghostatoms && (virials.capacity() < nSize))
303     virials.reserve(nSizeRes);
304   VERB(" AllocStr[" << nAtoms << "," << nSize << "]"<< flush);
305   virials.resize(nSize);
306   DEBUGPRINT;
307 }
308 
InitParameters()309 void EMT::InitParameters()
310 {
311   DEBUGPRINT;
312   // Extract the elements from the list of atoms, and set up an array
313   // of EMT parameters.
314   set<int> elements_set;
315   vector<int> elements;
316 
317   // Extract the elements from the list of atoms.
318   // elements.clear();
319   atoms->GetListOfElements(elements_set);
320   for (set<int>::iterator i = elements_set.begin(); i != elements_set.end();
321        ++i)
322     elements.push_back(*i);
323   nelements = elements.size();
324   ASSERT(nelements == elements_set.size());
325   sort(elements.begin(), elements.end());
326 
327   // Get the EMT parameters
328   parameters.clear();
329   for (vector<int>::iterator i = elements.begin(); i != elements.end(); ++i)
330     parameters.push_back(provider->GetParameters(*i));
331 
332   //ASSERT(nelements == provider->GetNumberOfElements());
333 
334   // Calculate the quantities that can only be calculated, once the
335   // elements in the simulations are known.
336   provider->CalcGammaEtc();
337   rFermi = provider->GetCutoffDistance();
338   //rNbCut = 1.04500185048 * rFermi;
339   rNbCut = provider->GetListCutoffDistance();
340   cutoffslope = provider->GetCutoffSlope();
341   chi = provider->GetChi();
342   if (verbose)
343     cerr << "EMT::InitParameters:  rFermi = " << rFermi << "  rNbCut = "
344          << rNbCut << "  cutoffslope = " << cutoffslope << endl;
345   DEBUGPRINT;
346 }
347 
CalcReq_Forces(PyObject * pyatoms)348 bool EMT::CalcReq_Forces(PyObject *pyatoms)
349 {
350   atoms->Begin(pyatoms);
351   bool required =  (counters.forces != atoms->GetPositionsCounter());
352   atoms->End();
353   return required;
354 }
355 
GetForces(PyObject * pyatoms)356 const vector<Vec> &EMT::GetForces(PyObject *pyatoms)
357 {
358   USETIMER("EMT::GetForces")
359   DEBUGPRINT;
360   VERB(" Force[");
361   ASSERT(atoms != NULL);
362   atoms->Begin(pyatoms);
363   recalc.nblist = CheckNeighborList();
364   recalc.forces = (counters.forces != atoms->GetPositionsCounter());
365   if (recalc.forces)
366     {
367       recalc.ids = (counters.ids != atoms->GetPositionsCounter());
368       recalc.sigma1 = (counters.sigma1 != atoms->GetPositionsCounter());
369       recalc.beforeforces = (counters.beforeforces !=
370 			     atoms->GetPositionsCounter());
371       DEBUGPRINT;
372       CalculateForces();
373       counters.beforeforces = atoms->GetPositionsCounter();
374       counters.forces = atoms->GetPositionsCounter();
375       VERB("]" << flush);
376     }
377   else
378     {
379       VERB("-]");
380       ASSERT(recalc.nblist == false);
381     }
382   DEBUGPRINT;
383   atoms->End();
384   MEMORY;
385   DEBUGPRINT;
386   return force;
387 }
388 
CalculateForces()389 void EMT::CalculateForces()
390 {
391   CHECKNOASAPERROR;
392 #ifdef _OPENMP
393 #pragma omp parallel
394 #endif // _OPENMP
395   {
396     if (recalc.nblist)
397       UpdateNeighborList();
398     CalculateIDs();
399     CalculateSigmas(false);              /* Skip sigma2 */
400     CalculateEnergiesAfterSigmas(false); /* Skip actual energy calc. */
401     if (nelements > 1)
402       CalculateForcesAfterEnergies();
403     else
404       CalculateForcesAfterEnergiesSingle();
405   }
406   PROPAGATEASAPERROR;
407 }
408 
CalcReq_Virials(PyObject * pyatoms)409 bool EMT::CalcReq_Virials(PyObject *pyatoms)
410 {
411   atoms->Begin(pyatoms);
412   bool required = (counters.virials != atoms->GetPositionsCounter());
413   atoms->End();
414   return required;
415 }
416 
CalcReq_Virial(PyObject * pyatoms)417 bool EMT::CalcReq_Virial(PyObject *pyatoms)
418 {
419   return CalcReq_Virials(pyatoms);
420 }
421 
GetVirials(PyObject * pyatoms)422 const vector<SymTensor> &EMT::GetVirials(PyObject *pyatoms)
423 {
424   USETIMER("EMT::GetVirials")
425   DEBUGPRINT;
426   VERB(" Virials[");
427   ASSERT(atoms != NULL);
428   atoms->Begin(pyatoms);
429 
430   recalc.nblist = CheckNeighborList();
431   recalc.virials = (counters.virials != atoms->GetPositionsCounter());
432   if (recalc.virials)
433     {
434       recalc.ids = (counters.ids != atoms->GetPositionsCounter());
435       recalc.sigma1 = (counters.sigma1 != atoms->GetPositionsCounter());
436       recalc.beforeforces = (counters.beforeforces !=
437 			     atoms->GetPositionsCounter());
438       recalc.forces = (counters.forces != atoms->GetPositionsCounter());
439       DEBUGPRINT;
440       if (this->virials.size() == 0)
441         AllocateStress();
442       CalculateVirials();
443     }
444   else
445     {
446       ASSERT(recalc.nblist == false);
447     }
448   VERB("]" << flush);
449   DEBUGPRINT;
450   counters.virials = atoms->GetPositionsCounter();
451   counters.beforeforces = atoms->GetPositionsCounter();
452   counters.forces = atoms->GetPositionsCounter();
453   atoms->End();
454   return virials;
455 }
456 
CalculateVirials()457 void EMT::CalculateVirials()
458 {
459   if (recalc.virials)
460     {
461       CHECKNOASAPERROR;
462 #ifdef _OPENMP
463 #pragma omp parallel
464 #endif // _OPENMP
465       {
466         if (recalc.nblist)
467           UpdateNeighborList();
468         CalculateIDs();
469         CalculateSigmas(false);                  // Skip sigma2
470         CalculateEnergiesAfterSigmas(false); // Skip actual energy calc.
471         if (nelements > 1)
472           CalculateForcesAfterEnergies();
473         else
474           CalculateForcesAfterEnergiesSingle();
475       }
476       PROPAGATEASAPERROR;
477     }
478 }
479 
GetAtomicVolumes(vector<double> & v)480 void EMT::GetAtomicVolumes(vector<double> &v)
481 {
482   v.resize(nAtoms);
483   const double fourpithird = 4.1887902048;  // 4 * PI / 3
484   for (int i = 0; i < nAtoms; i++)
485     {
486       double s0 = parameters[id[i]]->seq;
487       v[i] = fourpithird * s0 * s0 * s0;
488     }
489 }
490 
CalcReq_Energy(PyObject * pyatoms)491 bool EMT::CalcReq_Energy(PyObject *pyatoms)
492 {
493   atoms->Begin(pyatoms);
494   bool required = (counters.energies != atoms->GetPositionsCounter());
495   atoms->End();
496   return required;
497 }
498 
GetPotentialEnergies(PyObject * pyatoms)499 const vector<double> &EMT::GetPotentialEnergies(PyObject *pyatoms)
500 {
501   USETIMER("EMT::GetPotentialEnergies");
502   DEBUGPRINT;
503   VERB(" Energies[");
504   ASSERT(atoms != NULL);
505   // Atoms may already be open if called from MonteCarloEMT object.
506   if (!skip_begin)
507     atoms->Begin(pyatoms);
508   else
509     skip_begin = false;
510   recalc.nblist = CheckNeighborList();
511   recalc.energies = (counters.energies != atoms->GetPositionsCounter());
512   if (recalc.energies)
513     {
514       recalc.ids = (counters.ids != atoms->GetPositionsCounter());
515       recalc.sigma1 = (counters.sigma1 != atoms->GetPositionsCounter());
516       recalc.sigma2 = (counters.sigma2 != atoms->GetPositionsCounter());
517       recalc.beforeforces = (counters.beforeforces !=
518 			     atoms->GetPositionsCounter());
519       DEBUGPRINT;
520       CalculateEnergies();
521       counters.energies = atoms->GetPositionsCounter();
522       counters.beforeforces = atoms->GetPositionsCounter();
523     }
524   else
525     {
526       ASSERT(counters.beforeforces == atoms->GetPositionsCounter());
527       ASSERT(recalc.nblist == false);
528 
529       if(subtractE0)
530 	for (int i = 0; i < nAtoms; i++)
531 	  Epot[i] = Ec[i] + Eas[i] - parameters[id[i]]->e0;
532       else
533 	for (int i = 0; i < nAtoms; i++)
534 	  Epot[i] = Ec[i] + Eas[i];
535 
536       VERB("-");
537     }
538   ASSERT(Epot.size() == nAtoms);
539   DEBUGPRINT;
540   VERB("]" << flush);
541   atoms->End();
542   return Epot;
543 }
544 
CalculateEnergies()545 void EMT::CalculateEnergies()
546 {
547   CHECKNOASAPERROR;
548 #ifdef _OPENMP
549 #pragma omp parallel
550 #endif // _OPENMP
551   {
552     if (recalc.nblist)
553       UpdateNeighborList();
554     CalculateIDs();
555     CalculateSigmas(true);
556     CalculateEnergiesAfterSigmas(true);
557   }
558   PROPAGATEASAPERROR;
559 }
560 
GetPotentialEnergy(PyObject * pyatoms)561 double EMT::GetPotentialEnergy(PyObject *pyatoms)
562 {
563   USETIMER("EMT::GetPotentialEnergy");
564   VERB(" Energy[");
565   DEBUGPRINT;
566   double etot = 0.0;
567 
568   const vector<double> &e = GetPotentialEnergies(pyatoms);
569   for (int i = 0; i < nAtoms; i++)
570     etot += e[i];
571   DEBUGPRINT;
572   VERB("]" << flush);
573   return etot;
574 }
575 
CheckNeighborList()576 bool EMT::CheckNeighborList()
577 {
578   RETURNIFASAPERROR2(false);
579   USETIMER("EMT::CheckNeighborList");
580   DEBUGPRINT;
581   ASSERT(atoms != NULL);
582   bool update = (nblist == NULL) || nblist->IsInvalid();  // Update if invalid
583   if (!update && (counters.nblist != atoms->GetPositionsCounter()))
584     {
585       VERB("n");
586       update = nblist->CheckNeighborList();
587     }
588   // May communicate
589   update = atoms->UpdateBeforeCalculation(update,
590 					  rNbCut * (1 + driftfactor));
591   counters.nblist = atoms->GetPositionsCounter();
592   return update;
593 }
594 
UpdateNeighborList()595 void EMT::UpdateNeighborList()
596 {
597   RETURNIFASAPERROR;
598   USETIMER("EMT::UpdateNeighborList");
599   DEBUGPRINT;
600   VERB("N");
601   DEBUGPRINT;
602   RETURNIFASAPERROR;
603   if (nblist)
604     {
605       DEBUGPRINT;
606       nblist->UpdateNeighborList();
607       RETURNIFASAPERROR;
608 #ifdef _OPENMP
609 #pragma omp single
610 #endif // _OPENMP
611       {
612         if ((nAtoms != atoms->GetNumberOfAtoms())
613             || (nSize - nAtoms != atoms->GetNumberOfGhostAtoms()))
614           {
615             nAtoms = atoms->GetNumberOfAtoms();
616             nSize = nAtoms + atoms->GetNumberOfGhostAtoms();
617             ghostatoms = atoms->HasGhostAtoms();
618             Allocate();
619           }
620       }
621     }
622   else
623     {
624 #ifdef _OPENMP
625 #pragma omp barrier
626 #endif // _OPENMP
627       // First call, create the neighbor list.
628       DEBUGPRINT;
629       CreateNeighborList();
630       RETURNIFASAPERROR;
631 #ifdef _OPENMP
632 #pragma omp single
633 #endif // _OPENMP
634       {
635         nAtoms = atoms->GetNumberOfAtoms();
636         nSize = nAtoms + atoms->GetNumberOfGhostAtoms();
637         ghostatoms = atoms->HasGhostAtoms();
638         Allocate();
639       }
640     }
641   DEBUGPRINT;
642 }
643 
644 // Will be replaced in MonteCarloEMT
CreateNeighborList()645 void EMT::CreateNeighborList()
646 {
647 #ifdef ASAP_FOR_KIM
648   // This function is never called when used as an OpenKIM potential
649   // In that case, the NeighborList object does not exist, and this
650   // function does not compile.
651   throw AsapError("Internal inconsistency: EMT::CreateNeighborList called in KIM calculator.");
652 #else
653   RETURNIFASAPERROR;
654   MEMORY;
655   USETIMER("EMT::CreateNeighborList");
656   if (!initialized)
657     THROW_RETURN( AsapError("EMT object has not been initialized!") );
658 #ifdef _OPENMP
659 #pragma omp single
660 #endif // _OPENMP
661   {
662       PyAsap_NeighborLocatorObject *nbl;
663       nbl = PyAsap_NewNeighborList(atoms, rNbCut, driftfactor);
664       nblist = nbl->cobj;
665       nblist->verbose = verbose;
666       nblist_obj = (PyObject *) nbl;
667       MEMORY;
668   }
669   nblist->UpdateNeighborList();
670   MEMORY;
671 #endif // not ASAP_FOR_KIM
672 }
673 
674 
675 /// IDs are a recoding of the atomic numbers used as indices into
676 /// various arrays.  Atomic numbers are not practical for this as they
677 /// are not contiguous.
CalculateIDs()678 void EMT::CalculateIDs()
679 {
680   RETURNIFASAPERROR;
681   USETIMER("EMT::CalculateIDs");
682   DEBUGPRINT;
683   if (!recalc.ids || nelements == 1)
684     return;
685   VERB("i");
686   DEBUGPRINT;
687   // If there is only one element, all IDs are 0 and do not have to be
688   // calculated.  Otherwise the ID is the offset into the list of elements.
689   const asap_z_int *RESTRICT z = atoms->GetAtomicNumbers();
690 
691   int nSize = this->nSize;    // These two lines help optimization.
692   int *RESTRICT id = &(this->id)[0];
693   for (int i = 0; i < nelements; i++)
694     {
695       int zcand = parameters[i]->Z;
696 #ifdef _OPENMP
697 #pragma omp for
698 #endif // _OPENMP
699       for (int j = 0; j < nSize; j++)
700 	if (z[j] == zcand)
701 	    id[j] = i;
702     }
703 #ifdef _OPENMP
704 #pragma omp master
705 #endif // _OPENMP
706   counters.ids = atoms->GetPositionsCounter();
707   DEBUGPRINT;
708 }
709 
CalculateSigmas(bool calculatesigma2)710 void EMT::CalculateSigmas(bool calculatesigma2)
711 {
712   RETURNIFASAPERROR;
713   USETIMER("EMT::CalculateSigmas");
714   DEBUGPRINT;
715   if (!(recalc.sigma1 || (calculatesigma2 && recalc.sigma2)))
716     return;
717 #ifdef _OPENMP
718 #pragma omp master
719 #endif // _OPENMP
720   {
721       if (calculatesigma2)
722         {
723           VERB("2");
724         }
725       else
726         {
727           VERB("1");
728         }
729   }
730 
731   DEBUGPRINT;
732   int maxnblen = nblist->MaxNeighborListLength();
733   if (maxnblen > BUFLEN)
734     {
735       const Vec *cell = atoms->GetCell();
736       THROW_RETURN( AsapError("Neighborlist overrun (did you squeeze your atoms?).\n")
737 		    << "  Longest neighbor list is " << maxnblen << ".\n"
738 		    << "  Cell is " << cell[0] << ", " << cell[1] << ", " << cell[2] );
739     }
740 #ifdef ASAP_FOR_KIM
741   const int *contributes = atoms->GetParticleContributes();
742   ASSERT(nAtoms == nSize);  // In OpenKIM, non-contributing atoms are not always at the end.
743 #endif
744   // Buffer data:
745   TinyMatrix<int> nbatch(nelements,nelements);
746   TinyMatrix<int[BUFLEN]> self(nelements,nelements);
747   TinyMatrix<int[BUFLEN]> other(nelements,nelements);
748   TinyMatrix<Vec[BUFLEN]> rnb(nelements,nelements);
749   TinyMatrix<double[BUFLEN]> sqdist(nelements,nelements);
750   vector<int> other_buf(BUFLEN);
751   vector<Vec> rnb_buf(BUFLEN);
752   vector<double> sqdist_buf(BUFLEN);
753 
754   int nSize = this->nSize;  // Helps optimization
755   DEBUGPRINT;
756   /* Set sigmas to zero */
757   for (int i = 0; i < nelements; i++)
758     {
759       double *RESTRICT s1 = &sigma1[i][0];
760       double *RESTRICT s2 = &sigma2[i][0];
761 #ifdef _OPENMP
762 #pragma omp for nowait
763 #endif // _OPENMP
764       for (int j = 0; j < nSize; j++)
765         {
766           s1[j] = 0.0;
767           s2[j] = 0.0;
768         }
769     }
770 #ifdef _OPENMP
771 #pragma omp barrier
772 #endif // _OPENMP
773 
774   /* No atoms in batch pools */
775   for (int i = 0; i < nelements; i++)
776     for (int j = 0; j < nelements; j++)
777       nbatch[i][j] = 0;
778 
779   // Loop over atoms
780 #ifdef _OPENMP
781 #pragma omp for
782 #endif // _OPENMP
783   for (int atom = 0; atom < nAtoms; atom++)
784     {
785 #ifdef ASAP_FOR_KIM
786       if (!contributes[atom]) continue;   // Skip non-contributing atoms.
787 #endif
788       int zself = id[atom];
789       // Get neighbors and loop over them.  Simplest if only one element
790       if (nelements == 1)
791         {
792           int nbat = nbatch[0][0];  // only element
793           int size = BUFLEN-nbat;
794           int n;
795           if (always_fullnblist)
796             n = nblist->GetFullNeighbors(atom, other[0][0]+nbat,
797                                          rnb[0][0]+nbat, sqdist[0][0]+nbat,
798                                          size);
799           else
800             n = nblist->GetNeighbors(atom, other[0][0]+nbat,
801                                      rnb[0][0]+nbat, sqdist[0][0]+nbat,
802                                      size);
803           ASSERT(size >= 0);    // REMOVE LATER !!!
804           for (int i = nbat; i < nbat+n; i++)
805             self[0][0][i] = atom;
806           nbatch[0][0] += n;
807         }
808       else
809         {
810           int size = BUFLEN;
811           int n;
812           if (always_fullnblist)
813             n = nblist->GetFullNeighbors(atom, &other_buf[0], &rnb_buf[0],
814                                          &sqdist_buf[0], size);
815           else
816             n = nblist->GetNeighbors(atom, &other_buf[0], &rnb_buf[0],
817                                      &sqdist_buf[0], size);
818           ASSERT(size >= 0);     // REMOVE LATER !!!
819 	  int *RESTRICT id = &(this->id)[0];
820           for (int i = 0; i < n; i++)
821             {
822               int zother = id[other_buf[i]];
823               int nbat = nbatch[zself][zother]++;  // Count this atom
824               self[zself][zother][nbat] = atom;
825               other[zself][zother][nbat] = other_buf[i];
826               sqdist[zself][zother][nbat] = sqdist_buf[i];
827             }
828         }
829       // Now process any full batch
830 
831       for (int zo = 0; zo < nelements; zo++)
832         if (nbatch[zself][zo] >= BUFLEN - maxnblen)
833           {
834             sigma_batch(self[zself][zo], other[zself][zo],
835                         sqdist[zself][zo], zself, zo, nbatch[zself][zo],
836                         calculatesigma2);
837             nbatch[zself][zo] = 0;
838           }
839     }  // Loop over atoms
840   /* Process the remaining incomplete batches */
841   for (int zs = 0; zs < nelements; zs++)
842     for (int zo = 0; zo < nelements; zo++)
843       if (nbatch[zs][zo])
844         sigma_batch(self[zs][zo], other[zs][zo],
845                     sqdist[zs][zo], zs, zo, nbatch[zs][zo],
846                     calculatesigma2);
847 
848 #ifdef _OPENMP
849 #pragma omp single
850 #endif // _OPENMP
851   {
852     sigma2isvalid = calculatesigma2;  /* Remember if it was calculated. */
853     counters.sigma1 = atoms->GetPositionsCounter();
854     if (calculatesigma2)
855       counters.sigma2 = atoms->GetPositionsCounter();
856   }
857 
858 }
859 
sigma_batch(int * RESTRICT self,int * RESTRICT other,double * RESTRICT sq_dist,int zs,int zo,int n,bool calculatesigma2,bool partialupdate)860 void EMT::sigma_batch(int *RESTRICT self, int *RESTRICT other,
861                       double *RESTRICT sq_dist, int zs, int zo, int n,
862                       bool calculatesigma2, bool partialupdate /* = false */)
863 {
864   USETIMER("EMT::sigma_batch");
865 #ifdef ASAP_MKL
866   double *temporary = (double *) mkl_malloc(10 * BUFLEN * sizeof(double), ASAP_MKL_ALIGN);
867 #else // ASAP_MKL
868   double *temporary = new double[4*BUFLEN];
869 #endif //ASAP_MKL
870   double *RESTRICT dsigma1s = &temporary[0];
871   double *RESTRICT dsigma2s = dsigma1s + BUFLEN;
872   double *RESTRICT dsigma1o = dsigma2s + BUFLEN;
873   double *RESTRICT dsigma2o = dsigma1o + BUFLEN;
874   //ASSERT(dsigma2o - temporary == (4 - 1) * BUFLEN);
875 #ifdef ASAP_MKL
876   double *RESTRICT tmp_a = dsigma2o + BUFLEN;
877   double *RESTRICT tmp_b = tmp_a + BUFLEN;
878   double *RESTRICT tmp_c = tmp_b + BUFLEN;
879   double *RESTRICT tmp_d = tmp_c + BUFLEN;
880   double *RESTRICT tmp_e = tmp_d + BUFLEN;
881   double *RESTRICT tmp_f = tmp_e + BUFLEN;
882   //ASSERT(tmp_f - temporary == (10 - 1) * BUFLEN);
883 #endif //ASAP_MKL
884   double cutslopecutdist;
885   double other_eta2betaseq, self_eta2betaseq;
886   double other_kappaoverbeta, self_kappaoverbeta;
887   double other_kappaseq, self_kappaseq;
888   double *s1s, *s1o, *s2s, *s2o;
889   const emt_parameters *emtself, *emtother;
890 
891   ASSERT(n <= BUFLEN);
892 
893   /* Get EMT parameters   REWRITE !!! XXXX */
894   emtself = parameters[zs];
895   emtother = parameters[zo];
896   double cutoffslope = this->cutoffslope;
897   cutslopecutdist = cutoffslope * rFermi;
898   other_eta2betaseq = emtother->eta2 * Beta * emtother->seq;
899   self_eta2betaseq = emtself->eta2 * Beta * emtself->seq;
900   other_kappaoverbeta = emtother->kappa / Beta;
901   self_kappaoverbeta = emtself->kappa / Beta;
902   other_kappaseq = emtother->kappa * emtother->seq;
903   self_kappaseq = emtself->kappa * emtself->seq;
904   double other_eta2 = emtother->eta2;
905   double self_eta2 = emtself->eta2;
906 
907   ASSERT(n <= BUFLEN);
908   // We have four cases to handled, controlled by two booleans.
909   // 1) Whether sigma2 needs to be calculated,  2) Whether we need a
910   // different calculation for the two atoms interacting.  The latter is
911   // the case if the atomic numbers are different, unless operating in
912   // full neighborlist mode or doing a partial update for MonteCarloEMT,
913   // in these cases calculations are not reused.
914   bool calculateother = (zs != zo) && !always_fullnblist && !partialupdate;
915   if (!calculateother && !calculatesigma2)
916     {
917 #ifdef ASAP_MKL
918       // dist[] = tmp_a[] = sqrt(sq_dist[])
919       vdSqrt(n, sq_dist, tmp_a);
920       for (int i = 0; i < n; i++)
921 	{
922 	  tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
923 	  tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
924 	}
925       // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
926       vdExp(n, tmp_b, tmp_a);
927       // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
928       vdExp(n, tmp_c, tmp_b);
929       for (int i = 0; i < n; i++)
930 	{
931 	  double wght = 1.0 / (1.0 + tmp_a[i]);
932 	  dsigma1s[i] = wght * tmp_b[i];
933 	}
934 #else  // ASAP_MKL
935       for (int i = 0; i < n; i++)
936 	{
937 	  /* dist = sqrt(sq_dist),  distances between atoms */
938 	  double dist = sqrt(sq_dist[i]);
939 	  /* Calculate weight factor (cutoff function) */
940 	  double wght = 1.0 / (1.0 + exp(cutoffslope * dist
941 					 - cutslopecutdist));
942 	  /* Contribution to sigma1 */
943 	  dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq);
944 	}
945 #endif // ASAP_MKL
946     }
947   else if (calculateother && !calculatesigma2)
948     {
949 #ifdef ASAP_MKL
950       // dist[] = tmp_a[] = sqrt(sq_dist[])
951       vdSqrt(n, sq_dist, tmp_a);
952       for (int i = 0; i < n; i++)
953 	{
954 	  tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
955 	  tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
956 	  tmp_d[i] = -self_eta2 * tmp_a[i] + self_eta2betaseq;
957 	}
958       // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
959       vdExp(n, tmp_b, tmp_a);
960       // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
961       vdExp(n, tmp_c, tmp_b);
962       // tmp_c[] = exp(tmp_d[]) = exp(-self_eta2 * dist[] + self_eta2betaseq)
963       vdExp(n, tmp_d, tmp_c);
964       for (int i = 0; i < n; i++)
965 	{
966 	  double wght = 1.0 / (1.0 + tmp_a[i]);
967 	  dsigma1s[i] = wght * tmp_b[i];
968 	  dsigma1o[i] = wght * tmp_c[i];
969 	}
970 #else // ASAP_MKL
971       for (int i = 0; i < n; i++)
972 	{
973 	  /* dist = sqrt(sq_dist),  distances between atoms */
974 	  double dist = sqrt(sq_dist[i]);
975 	  /* Calculate weight factor (cutoff function) */
976 	  double wght = 1.0 / (1.0 + exp(cutoffslope * dist
977 					 - cutslopecutdist));
978 	  /* Contributions to sigma1 */
979 	  dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq);
980 	  dsigma1o[i] = wght * exp(-self_eta2 * dist + self_eta2betaseq);
981 	}
982 #endif //ASAP_MKL
983     }
984   else if (!calculateother && calculatesigma2)
985     {
986 #ifdef ASAP_MKL
987       // dist[] = tmp_a[] = sqrt(sq_dist[])
988       vdSqrt(n, sq_dist, tmp_a);
989       for (int i = 0; i < n; i++)
990 	{
991 	  tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
992 	  tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
993 	  tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
994 	}
995       // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
996       vdExp(n, tmp_b, tmp_a);
997       // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
998       vdExp(n, tmp_c, tmp_b);
999       // tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
1000       vdExp(n, tmp_d, tmp_c);
1001       for (int i = 0; i < n; i++)
1002 	{
1003 	  double wght = 1.0 / (1.0 + tmp_a[i]);
1004 	  dsigma1s[i] = wght * tmp_b[i];
1005 	  dsigma2s[i] = wght * tmp_c[i];
1006 	}
1007 #else // ASAP_MKL
1008       for (int i = 0; i < n; i++)
1009 	{
1010 	  /* dist = sqrt(sq_dist),  distances between atoms */
1011 	  double dist = sqrt(sq_dist[i]);
1012 
1013 	  /* Calculate weight factor (cutoff function) */
1014 	  double wght = 1.0 / (1.0 + exp(cutoffslope * dist
1015 					 - cutslopecutdist));
1016 	  /* Contribution to sigma1 */
1017 	  dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq);
1018 	  dsigma2s[i] = wght * exp(-other_kappaoverbeta * dist
1019 					+ other_kappaseq);
1020 
1021 	}
1022 #endif //ASAP_MKL
1023    }
1024   else // calculateother && calculatesigma2
1025     {
1026 #ifdef ASAP_MKL
1027       // dist[] = tmp_a[] = sqrt(sq_dist[])
1028       vdSqrt(n, sq_dist, tmp_a);
1029       for (int i = 0; i < n; i++)
1030 	{
1031 	  tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
1032 	  tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
1033 	  tmp_d[i] = -self_eta2 * tmp_a[i] + self_eta2betaseq;
1034 	  tmp_e[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
1035 	  tmp_f[i] = -self_kappaoverbeta * tmp_a[i] + self_kappaseq;
1036 	}
1037       // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
1038       vdExp(n, tmp_b, tmp_a);
1039       // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
1040       vdExp(n, tmp_c, tmp_b);
1041       // tmp_c[] = exp(tmp_d[]) = exp(-self_eta2 * dist[] + self_eta2betaseq)
1042       vdExp(n, tmp_d, tmp_c);
1043       // tmp_d[] = exp(tmp_e[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
1044       vdExp(n, tmp_e, tmp_d);
1045       // tmp_e[] = exp(tmp_f[]) = exp(-self_kappaoverbeta * dist[] + self_kappaseq)
1046       vdExp(n, tmp_f, tmp_e);
1047       for (int i = 0; i < n; i++)
1048 	{
1049 	  double wght = 1.0 / (1.0 + tmp_a[i]);
1050 	  dsigma1s[i] = wght * tmp_b[i];
1051 	  dsigma1o[i] = wght * tmp_c[i];
1052 	  dsigma2s[i] = wght * tmp_d[i];
1053 	  dsigma2o[i] = wght * tmp_e[i];
1054 	}
1055 #else // ASAP_MKL
1056       for (int i = 0; i < n; i++)
1057 	{
1058 	  /* dist = sqrt(sq_dist),  distances between atoms */
1059 	  double dist = sqrt(sq_dist[i]);
1060 	  /* Calculate weight factor (cutoff function) */
1061 	  double wght = 1.0 / (1.0 + exp(cutoffslope * dist
1062 					 - cutslopecutdist));
1063 	  /* Contributions to sigma1 */
1064 	  dsigma1s[i] = wght * exp(-other_eta2 * dist + other_eta2betaseq);
1065 	  dsigma1o[i] = wght * exp(-self_eta2 * dist + self_eta2betaseq);
1066 	  dsigma2s[i] = wght * exp(-other_kappaoverbeta * dist
1067 				   + other_kappaseq);
1068 	  dsigma2o[i] = wght * exp(-self_kappaoverbeta * dist + self_kappaseq);
1069 	}
1070 #endif // ASAP_MKL
1071     }
1072 
1073   // Distribute the results to the atoms.
1074   if (!partialupdate && !always_fullnblist)
1075     {
1076       // This is the normal branch, both atoms need to be updated.
1077       if (calculatesigma2)
1078 	{
1079 	  /* Distribute contributions to sigma1 and sigma2 */
1080 	  s1o = &sigma1[zo][0];
1081 	  s1s = &sigma1[zs][0];
1082 	  s2o = &sigma2[zo][0];
1083 	  s2s = &sigma2[zs][0];
1084 #ifdef _OPENMP
1085 #pragma omp critical
1086 #endif // _OPENMP
1087 	  {
1088 	    if (calculateother)
1089 	      {
1090 		for (int i = 0; i < n; i++)
1091 		  {
1092 		    int s = self[i];
1093 		    int o = other[i];
1094 		    s1o[s] += dsigma1s[i];
1095 		    s2o[s] += dsigma2s[i];
1096 		    if (o < nAtoms)        // Dont add to ghost atoms
1097 		      {
1098 			s1s[o] += dsigma1o[i];
1099 			s2s[o] += dsigma2o[i];
1100 		      }
1101 		  }
1102 	      }
1103 	    else
1104 	      {
1105 		for (int i = 0; i < n; i++)
1106 		  {
1107 		    int s = self[i];
1108 		    int o = other[i];
1109 		    s1o[s] += dsigma1s[i];
1110 		    s2o[s] += dsigma2s[i];
1111 		    if (o < nAtoms)        // Dont add to ghost atoms
1112 		      {
1113 			s1s[o] += dsigma1s[i];
1114 			s2s[o] += dsigma2s[i];
1115 		      }
1116 		  }
1117 	      }
1118 
1119 	  }
1120 	}
1121       else
1122 	{
1123 	  /* Distribute contributions to sigma1. */
1124 	  s1o = &sigma1[zo][0];
1125 	  s1s = &sigma1[zs][0];
1126 #ifdef _OPENMP
1127 #pragma omp critical
1128 #endif // _OPENMP
1129 	  {
1130 	    if (calculateother)
1131 	      {
1132 		for (int i = 0; i < n; i++)
1133 		  {
1134 		    int s = self[i];
1135 		    int o = other[i];
1136 		    s1o[s] += dsigma1s[i];
1137 		    if (o < nAtoms)
1138 		      s1s[o] += dsigma1o[i];
1139 		  }
1140 	      }
1141 	    else
1142 	      {
1143 		for (int i = 0; i < n; i++)
1144 		  {
1145 		    int s = self[i];
1146 		    int o = other[i];
1147 		    s1o[s] += dsigma1s[i];
1148 		    if (o < nAtoms)
1149 		      s1s[o] += dsigma1s[i];
1150 		  }
1151 	      }
1152 	  }
1153 	}
1154     }
1155   else  // partialupdate OR always_fullnblist
1156     {
1157       // This branch may be taken by MonteCarloEMT during a partial update,
1158       // or by the normal EMT when not using Minimum Image Convention.  Since
1159       // full neighbor lists are used, only update the
1160       // 'self' atom, not the 'other' atom.
1161       // In OpenKIM, this branch is always taken.
1162       if (calculatesigma2)
1163         {
1164           /* Distribute contributions to sigma1 and sigma2 */
1165           s1o = &sigma1[zo][0];
1166           s2o = &sigma2[zo][0];
1167     #ifdef _OPENMP
1168     #pragma omp critical
1169     #endif // _OPENMP
1170           {
1171             for (int i = 0; i < n; i++)
1172               {
1173                 int s = self[i];
1174                 s1o[s] += dsigma1s[i];
1175                 s2o[s] += dsigma2s[i];
1176               }
1177           }
1178         }
1179       else
1180         {
1181           /* Distribute contributions to sigma1 only */
1182           s1o = &sigma1[zo][0];
1183     #ifdef _OPENMP
1184     #pragma omp critical
1185     #endif // _OPENMP
1186           {
1187             for (int i = 0; i < n; i++)
1188               {
1189                 int s = self[i];
1190                 s1o[s] += dsigma1s[i];
1191               }
1192           }
1193         }
1194     }
1195 #ifdef ASAP_MKL
1196   mkl_free(temporary);
1197 #else
1198   delete[] temporary;
1199 #endif
1200 }
1201 
1202 
1203 // Calculate the energies if calc_Epot is true, otherwise just calculate the
1204 // derivatives needed for the forces.
CalculateEnergiesAfterSigmas(bool calc_Epot)1205 void EMT::CalculateEnergiesAfterSigmas(bool calc_Epot)
1206 {
1207   RETURNIFASAPERROR;
1208   USETIMER("EMT::CalculateEnergiesAfterSigmas");
1209   DEBUGPRINT;
1210 
1211   //vector<double> sigma(nSize);
1212   double *RESTRICT sigma = &tmp_double[0];
1213   bool dosigmapart = (recalc.beforeforces || (calc_Epot && recalc.energies));
1214   bool doepotpart = (calc_Epot && recalc.energies);
1215 
1216   int zs, zo;
1217   double s;
1218   // Better performance if static ???
1219   ASSERT(nelements < NMAXELEMENTS);
1220   double inv12gamma1[NMAXELEMENTS];
1221   double neginvbetaeta2[NMAXELEMENTS];
1222   double neglambda[NMAXELEMENTS];
1223   double lambdaseq[NMAXELEMENTS];
1224   double negkappa[NMAXELEMENTS];
1225   double kappaseq[NMAXELEMENTS];
1226   double nege0lambdalambda[NMAXELEMENTS];
1227   double e0lambdalambdaseq[NMAXELEMENTS];
1228   double neg6v0kappa[NMAXELEMENTS];
1229   double e0lambda[NMAXELEMENTS];
1230   double eccnst[NMAXELEMENTS];
1231   double sixv0[NMAXELEMENTS];
1232   double neghalfv0overgamma2[NMAXELEMENTS];
1233   double seq[NMAXELEMENTS];
1234   int *id = &(this->id)[0];
1235 
1236   if (dosigmapart)
1237     {
1238 #ifdef _OPENMP
1239 #pragma omp master
1240 #endif // _OPENMP
1241       {
1242         VERB("b");
1243         DEBUGPRINT;
1244       }
1245       /* Calculate total sigma1 */
1246 #ifdef _OPENMP
1247 #pragma omp for
1248 #endif // _OPENMP
1249       for (int i = 0; i < nAtoms; i++)
1250         {
1251           double s = 0.0;
1252           int zs = id[i];
1253           for (int zo = 0; zo < nelements; zo++)
1254             s += (*chi)[zs][zo] * sigma1[zo][i];
1255           if (s < 1.0e-40)
1256             s = 1.0e-40;
1257           sigma[i] = s;
1258         }
1259       ASSERT(nAtoms == radius.size() && nAtoms == Ec.size() &&
1260              nSize == dEds.size());
1261     }
1262 
1263   /* Calculate conbinations of EMT parameters */
1264   for (int i = 0; i < nelements; i++)
1265     {
1266      inv12gamma1[i] = 1.0 / (12.0 * parameters[i]->gamma1);
1267       neginvbetaeta2[i] = -1.0 / (Beta * parameters[i]->eta2);
1268       neglambda[i] = - parameters[i]->lambda;
1269       lambdaseq[i] = parameters[i]->lambda * parameters[i]->seq;
1270       negkappa[i] = - parameters[i]->kappa;
1271       kappaseq[i] = parameters[i]->kappa * parameters[i]->seq;
1272       nege0lambdalambda[i] = - parameters[i]->e0 * parameters[i]->lambda *
1273         parameters[i]->lambda;
1274       e0lambdalambdaseq[i] = parameters[i]->e0 * parameters[i]->lambda *
1275         parameters[i]->lambda * parameters[i]->seq;
1276       neg6v0kappa[i] = - 6.0 * parameters[i]->V0 * parameters[i]->kappa;
1277       e0lambda[i] = parameters[i]->e0 * parameters[i]->lambda;
1278       eccnst[i] = parameters[i]->e0 * (1.0 - parameters[i]->lambda *
1279                                        parameters[i]->seq);
1280       sixv0[i] = 6.0 * parameters[i]->V0;
1281       neghalfv0overgamma2[i] = -0.5 * parameters[i]->V0 /
1282         parameters[i]->gamma2;
1283       seq[i] = parameters[i]->seq;
1284     }
1285 
1286   int nAtoms = this->nAtoms;  // Helps optimization.
1287 #define LOCALPTR(XX) double *RESTRICT XX = &(this->XX)[0]
1288   LOCALPTR(Ec);
1289   LOCALPTR(Eas);
1290   LOCALPTR(Epot);
1291   LOCALPTR(radius);
1292   LOCALPTR(dEds);
1293   LOCALPTR(ex2);
1294 
1295   if (dosigmapart)
1296   {
1297 #ifdef _OPENMP
1298 #pragma omp for
1299 #endif // _OPENMP
1300     for (int i = 0; i < nAtoms; i++)
1301     {
1302       asap_z_int z = id[i];  // Actually not Z (atomic no) but ID no.
1303       double r = seq[z] + neginvbetaeta2[z] * log(sigma[i] * inv12gamma1[z]);
1304       radius[i] = r;
1305       double ex1 = exp(neglambda[z] * r + lambdaseq[z]);
1306       double e2 = exp(negkappa[z] * r + kappaseq[z]);
1307       ex2[i] = e2;
1308       double tmp = (nege0lambdalambda[z] * r + e0lambdalambdaseq[z]) * ex1
1309            + neg6v0kappa[z] * e2;
1310       dEds[i] = tmp * neginvbetaeta2[z] / sigma[i];
1311       Ec[i] = (e0lambda[z] * r + eccnst[z]) * ex1;
1312     }
1313 #ifdef _OPENMP
1314 #pragma omp for
1315 #endif // _OPENMP
1316     for (int i = nAtoms; i < nSize; i++)
1317       dEds[i] = 0.0;
1318   }
1319   if (calc_Epot)
1320     {
1321       DEBUGPRINT;
1322       if (doepotpart)
1323         {
1324 #ifdef _OPENMP
1325 #pragma omp master
1326 #endif // _OPENMP
1327           {
1328             VERB("e");
1329             DEBUGPRINT;
1330           }
1331           /* We also need Eas, but only for the real atoms */
1332           ASSERT(sigma2isvalid);
1333           ASSERT(counters.sigma2 == atoms->GetPositionsCounter());
1334           /* Calculate total sigma2 */
1335 #ifdef _OPENMP
1336 #pragma omp for
1337 #endif // _OPENMP
1338           for (int i = 0; i < nAtoms; i++)
1339             {
1340               s = 0.0;
1341               zs = id[i];
1342               for (zo = 0; zo < nelements; zo++)
1343                 s += (*chi)[zs][zo] * sigma2[zo][i];
1344               Eas[i] = sixv0[zs] * ex2[i] + neghalfv0overgamma2[zs] * s;
1345             }
1346         }
1347       /* Add the energies */
1348       DEBUGPRINT;
1349       if(subtractE0)
1350         {
1351 #ifdef _OPENMP
1352 #pragma omp for
1353 #endif // _OPENMP
1354           for (int i = 0; i < nAtoms; i++)
1355             Epot[i] = Ec[i] + Eas[i] - parameters[id[i]]->e0;
1356         }
1357       else
1358         {
1359 #ifdef _OPENMP
1360 #pragma omp for
1361 #endif // _OPENMP
1362           for (int i = 0; i < nAtoms; i++)
1363             Epot[i] = Ec[i] + Eas[i];
1364         }
1365     } // if (calc_Epot)
1366 
1367 #ifdef ASAP_FOR_KIM
1368   // In OpenKIM, nAtoms == nSize and the non-contributing atoms had
1369   // their energies calculates.  Zap them.
1370   const int *contributes = atoms->GetParticleContributes();
1371   for (int i = 0; i < nAtoms; i++)
1372     if (!contributes[i])
1373       Epot[i] = Ec[i] = Eas[i] = dEds[i] = 0;
1374 #endif // ASAP_FOR_KIM
1375 
1376   DEBUGPRINT;
1377 }
1378 
CalculateForcesAfterEnergies()1379 void EMT::CalculateForcesAfterEnergies()
1380 {
1381   RETURNIFASAPERROR;
1382   if (!(recalc.forces || (virials.size() && recalc.virials)))
1383     return;
1384 #ifdef _OPENMP
1385 #pragma omp master
1386 #endif // _OPENMP
1387   {
1388       VERB("f");
1389       if (virials.size())
1390         {
1391           VERB("s");
1392         }
1393   }
1394 #ifdef _OPENMP
1395 #pragma omp barrier
1396 #endif // _OPENMP
1397   DEBUGPRINT;
1398   int maxnblen = nblist->MaxNeighborListLength();
1399   // Buffer data:
1400   TinyMatrix<int> nbatch(nelements,nelements);
1401   TinyMatrixOfVector<vector<int> > self(nelements,nelements, BUFLEN);
1402   TinyMatrixOfVector<vector<int> > other(nelements,nelements, BUFLEN);
1403   TinyMatrixOfVector<vector<Vec> > rnb(nelements,nelements, BUFLEN);
1404   TinyMatrixOfVector<vector<double> > sqdist(nelements,nelements, BUFLEN);
1405   TinyMatrixOfVector<vector<double> > dEdss(nelements,nelements, BUFLEN);
1406   TinyMatrixOfVector<vector<double> > dEdso(nelements,nelements, BUFLEN);
1407   vector<int> other_buf(BUFLEN);
1408   vector<Vec> rnb_buf(BUFLEN);
1409   vector<double> sqdist_buf(BUFLEN);
1410 
1411 #ifdef ASAP_FOR_KIM
1412   const int *contributes = atoms->GetParticleContributes();
1413 #endif
1414 
1415   // If there is only one element, CalculateForcesAfterEnergiesSingle should
1416   // be used.
1417   ASSERT(nelements > 1);
1418 
1419   int nSize = this->nSize;  // Helps optimization.
1420   Vec *RESTRICT force = &(this->force)[0];
1421 
1422   /* Set forces and virials to zero */
1423   if (virials.size())
1424     {
1425       ASSERT(virials.size() == nSize);
1426 #ifdef _OPENMP
1427 #pragma omp for nowait
1428 #endif // _OPENMP
1429       for (int i = 0; i < nSize; i++)
1430         for (int j = 0; j < 6; j++)
1431           virials[i][j] = 0.0;
1432     }
1433 #ifdef _OPENMP
1434 #pragma omp for
1435 #endif // _OPENMP
1436   for (int i = 0; i < nSize; i++)
1437     force[i].x = force[i].y = force[i].z = 0.0;
1438 
1439   /* Calculate forces */
1440 
1441   /* No atoms in batch pools */
1442   for (int i = 0; i < nelements; i++)
1443     for (int j = 0; j < nelements; j++)
1444       nbatch[i][j] = 0;
1445 
1446   // Loop over atoms
1447 #ifdef _OPENMP
1448 #pragma omp for
1449 #endif // _OPENMP
1450   for (int atom = 0; atom < nAtoms; atom++) {
1451 #ifdef ASAP_FOR_KIM
1452     if (!contributes[atom]) continue;  // Skip non-contributing atom in OpenKIM, where nAtoms == nSize.
1453 #endif
1454     int zself = id[atom];
1455     // Get neighbors and loop over them.
1456     int size = BUFLEN;
1457     int n;
1458     if (always_fullnblist)
1459       n = nblist->GetFullNeighbors(atom, &other_buf[0], &rnb_buf[0],
1460                                    &sqdist_buf[0], size);
1461     else
1462       n = nblist->GetNeighbors(atom, &other_buf[0], &rnb_buf[0],
1463                                &sqdist_buf[0], size);
1464     ASSERT(size >= 0);    // REMOVE LATER
1465     for (int i = 0; i < n; i++) {
1466       int zother = id[other_buf[i]];
1467       int nbat = nbatch[zself][zother]++;  // Count this atom
1468       self[zself][zother][nbat] = atom;
1469       other[zself][zother][nbat] = other_buf[i];
1470       rnb[zself][zother][nbat][0] = rnb_buf[i][0];
1471       rnb[zself][zother][nbat][1] = rnb_buf[i][1];
1472       rnb[zself][zother][nbat][2] = rnb_buf[i][2];
1473       sqdist[zself][zother][nbat] = sqdist_buf[i];
1474       dEdss[zself][zother][nbat] = dEds[atom];
1475       dEdso[zself][zother][nbat] = dEds[other_buf[i]];
1476     }
1477     // Now process any full batch
1478     for (int zo = 0; zo < nelements; zo++)
1479       if (nbatch[zself][zo] >= BUFLEN - maxnblen) {
1480         force_batch(&self[zself][zo][0], &other[zself][zo][0],
1481 		    &rnb[zself][zo][0],
1482                     &sqdist[zself][zo][0], &dEdss[zself][zo][0],
1483                     &dEdso[zself][zo][0], zself, zo, nbatch[zself][zo]);
1484         nbatch[zself][zo] = 0;
1485       }
1486   }  // Loop over atoms
1487   /* Process the remaining incomplete batches */
1488   for (int zs = 0; zs < nelements; zs++)
1489     for (int zo = 0; zo < nelements; zo++)
1490       if (nbatch[zs][zo])
1491         force_batch(&self[zs][zo][0], &other[zs][zo][0], &rnb[zs][zo][0],
1492                     &sqdist[zs][zo][0], &dEdss[zs][zo][0],
1493                     &dEdso[zs][zo][0], zs, zo, nbatch[zs][zo]);
1494   DEBUGPRINT;
1495 #ifdef _OPENMP
1496 #pragma omp barrier
1497 #endif // _OPENMP
1498 }
1499 
CalculateForcesAfterEnergiesSingle()1500 void EMT::CalculateForcesAfterEnergiesSingle()
1501 {
1502   RETURNIFASAPERROR;
1503   USETIMER("EMT::CalculateForcesAfterEnergiesSingle");
1504   if (!(recalc.forces || (virials.size() && recalc.virials)))
1505     return;
1506 #ifdef _OPENMP
1507 #pragma omp master
1508 #endif // _OPENMP
1509   {
1510       VERB("f");
1511       if (virials.size())
1512         {
1513           VERB("s");
1514         }
1515   }
1516 #ifdef _OPENMP
1517 #pragma omp barrier
1518 #endif // _OPENMP
1519   int maxnblen = nblist->MaxNeighborListLength();
1520   DEBUGPRINT;
1521   // Buffer data:
1522   int nbatch;
1523   vector<int> self(BUFLEN);
1524   vector<int> other(BUFLEN);
1525   vector<Vec> rnb(BUFLEN);
1526   vector<double> sqdist(BUFLEN);
1527   vector<double> dEdss(BUFLEN);
1528   vector<double> dEdso(BUFLEN);
1529 
1530 #ifdef ASAP_FOR_KIM
1531   const int *contributes = atoms->GetParticleContributes();
1532 #endif
1533 
1534   int nSize = this->nSize;   // These three lines help optimization.
1535   int nAtoms = this->nAtoms;
1536   Vec *RESTRICT force = &(this->force)[0];
1537 
1538   DEBUGPRINT;
1539   // If there is more than one element, CalculateForcesAfterEnergies should
1540   // be used.
1541   ASSERT(nelements == 1);
1542 
1543   /* Set forces and virials to zero */
1544   ASSERT(this->force.size() >= nSize);
1545   if (virials.size())
1546     {
1547       ASSERT(virials.size() == nSize);
1548 #ifdef _OPENMP
1549 #pragma omp for nowait
1550 #endif // _OPENMP
1551       for (int i = 0; i < nSize; i++)
1552         for (int j = 0; j < 6; j++)
1553           virials[i][j] = 0.0;
1554     }
1555 #ifdef _OPENMP
1556 #pragma omp for
1557 #endif // _OPENMP
1558   for (int i = 0; i < nSize; i++)
1559     force[i].x = force[i].y = force[i].z = 0.0;
1560 
1561   /* Calculate forces */
1562 
1563   /* No atoms in batch pool */
1564   DEBUGPRINT;
1565   nbatch = 0;
1566 
1567   // Loop over atoms
1568   DEBUGPRINT;
1569 #ifdef _OPENMP
1570 #pragma omp for
1571 #endif // _OPENMP
1572   for (int atom = 0; atom < nAtoms; atom++) {
1573 
1574 #ifdef ASAP_FOR_KIM
1575     if (!contributes[atom]) continue;  // Skip non-contributing atom in OpenKIM, where nAtoms == nSize.
1576 #endif
1577 
1578     // Get neighbors and loop over them.
1579     int size = BUFLEN-nbatch;
1580     int n;
1581     if (always_fullnblist)
1582       n = nblist->GetFullNeighbors(atom, &other[nbatch], &rnb[nbatch],
1583                                    &sqdist[nbatch], size);
1584     else
1585       n = nblist->GetNeighbors(atom, &other[nbatch], &rnb[nbatch],
1586                                &sqdist[nbatch], size);
1587     double dEdsatom = dEds[atom];
1588     for (int i = nbatch; i < nbatch+n; i++) {
1589       self[i] = atom;
1590       dEdss[i] = dEdsatom;
1591       dEdso[i] = dEds[other[i]];
1592     }
1593     nbatch += n;
1594     // Now process any full batch
1595     if (nbatch >= BUFLEN - maxnblen) {
1596       force_batch(&self[0], &other[0], &rnb[0], &sqdist[0], &dEdss[0],
1597 		  &dEdso[0], 0, 0, nbatch);
1598       nbatch = 0;
1599     }
1600   }  // Loop over atoms
1601   /* Process the remaining incomplete batch */
1602   DEBUGPRINT;
1603   if (nbatch)
1604     force_batch(&self[0], &other[0], &rnb[0], &sqdist[0], &dEdss[0],
1605 		&dEdso[0], 0, 0, nbatch);
1606   DEBUGPRINT;
1607 #ifdef _OPENMP
1608 #pragma omp barrier
1609 #endif // _OPENMP
1610 }
1611 
1612 // It would be natural to transfer the arrays as vector<> instead of
1613 // as pointes, but that confuses the compiler so it cannot discover
1614 // that various arrays are disjunct, ruining optimization.  For the
1615 // same reason, the temporary array is not a vector<>.
force_batch(const int * RESTRICT self,const int * RESTRICT other,const Vec * RESTRICT rnb,const double * RESTRICT sq_dist,const double * RESTRICT dEdss,const double * RESTRICT dEdso,int zs,int zo,int n)1616 void EMT::force_batch(const int *RESTRICT self, const int *RESTRICT other,
1617                       const Vec *RESTRICT rnb, const double *RESTRICT sq_dist,
1618                       const double *RESTRICT dEdss, const double *RESTRICT dEdso,
1619                       int zs, int zo, int n)
1620 {
1621   USETIMER("EMT::force_batch");
1622 #ifdef ASAP_MKL
1623   double *temporary = (double *) mkl_malloc(8 * BUFLEN * sizeof(double), ASAP_MKL_ALIGN);
1624   double *RESTRICT df = temporary;
1625   double *RESTRICT tmp_a = df + BUFLEN;
1626   double *RESTRICT tmp_b = tmp_a + BUFLEN;
1627   double *RESTRICT tmp_c = tmp_b + BUFLEN;
1628   double *RESTRICT tmp_d = tmp_c + BUFLEN;
1629   double *RESTRICT tmp_e = tmp_d + BUFLEN;
1630   double *RESTRICT tmp_f = tmp_e + BUFLEN;
1631   double *RESTRICT tmp_g = tmp_f + BUFLEN;
1632   //ASSERT(tmp_g - temporary == (8 - 1) * BUFLEN);
1633 #else // ASAP_MKL
1634   double *temporary = new double[BUFLEN];
1635   double *RESTRICT df = temporary;
1636 #endif // ASAP_MKL
1637   double cutslopecutdist, other_eta2betaseq, other_kappaoverbeta;
1638   double other_kappaseq, self_eta2betaseq, self_kappaoverbeta;
1639   double self_kappaseq, cnst_s, cnst_o;
1640   const emt_parameters *emtself, *emtother;
1641   //double pairA, exprcut, pairD;
1642   //double *tmp;
1643 
1644   ASSERT(n <= BUFLEN);
1645 
1646   /* Get EMT parameters */
1647   emtself = parameters[zs];
1648   emtother = parameters[zo];
1649   cutslopecutdist = cutoffslope * rFermi;
1650   other_eta2betaseq = emtother->eta2 * Beta * emtother->seq;
1651   other_kappaoverbeta = emtother->kappa / Beta;
1652   other_kappaseq = emtother->kappa * emtother->seq;
1653   self_eta2betaseq = emtself->eta2 * Beta * emtself->seq;
1654   self_kappaoverbeta = emtself->kappa / Beta;
1655   self_kappaseq = emtself->kappa * emtself->seq;
1656   double other_eta2 = emtother->eta2;
1657   double self_eta2 = emtself->eta2;
1658 
1659   cnst_s = -0.5 * emtself->V0 * (*chi)[zs][zo] / emtself->gamma2;
1660   cnst_o = -0.5 * emtother->V0 * (*chi)[zo][zs] / emtother->gamma2;
1661   double chi_zs_zo = (*chi)[zs][zo];
1662   double chi_zo_zs = (*chi)[zo][zs];
1663   // Three cases: 1) The two atoms have the same atomic number,
1664   // 2) they have different atomic number (less reuse of calculations)
1665   // 3) we use full neighbor lists (no reuse).
1666   if ((zs == zo) && !always_fullnblist)
1667     {
1668 #ifdef ASAP_MKL
1669       /* Get the distances from their squares */
1670       // dist[] = tmp_a[] = sqrt(sq_dist[])
1671       vdSqrt(n, sq_dist, tmp_a);
1672       // invdist[] = tmp_e[] = 1.0 / dist[]
1673       vdInv(n, tmp_a, tmp_e);
1674       for (int i = 0; i < n; i++)
1675         {
1676 	  tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
1677 	  tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
1678 	  tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
1679 	}
1680       // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
1681       vdExp(n, tmp_b, tmp_a);
1682       // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
1683       vdExp(n, tmp_c, tmp_b);
1684       // tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
1685       vdExp(n, tmp_d, tmp_c);
1686       for (int i = 0; i < n; i++)
1687 	{
1688 	  /* Calculate cutoff function (weight factor) */
1689 	  double wght = 1.0 / (1.0 + tmp_a[i]);
1690 	  /* Calculate derivative of the cutoff function. */
1691 	  double dwdr = -cutoffslope * wght * (1 - wght);
1692 	  double dsigma1dr = (dwdr - wght * other_eta2) * tmp_b[i];
1693 	  double dsigma2dr = (dwdr + wght * -other_kappaoverbeta) * tmp_c[i];
1694 	  df[i] = tmp_e[i] * (dsigma1dr * dEdss[i] * chi_zs_zo
1695 			      + cnst_s * dsigma2dr
1696 			      + dsigma1dr * dEdso[i] * chi_zo_zs
1697 			      + cnst_o * dsigma2dr * (other[i] < nAtoms));
1698 	}
1699 #else //ASAP_MKL
1700       for (int i = 0; i < n; i++)
1701 	{
1702 	  /* Get the distances from their squares */
1703 	  double dist = sqrt(sq_dist[i]);
1704 	  double inv_dist = 1.0 / dist;
1705 	  /* Calculate cutoff function (weight factor) */
1706 	  double wght = 1.0 / (1.0 + exp(cutoffslope * dist
1707 					 - cutslopecutdist));
1708 	  /* Calculate derivative of the cutoff function. */
1709 	  double dwdr = -cutoffslope * wght * (1 - wght);
1710 	  double dsigma1dr = (dwdr - wght * other_eta2) *
1711 	    exp(-other_eta2 * dist + other_eta2betaseq);
1712 	  double dsigma2dr = (dwdr + wght * -other_kappaoverbeta) *
1713 	    exp(-other_kappaoverbeta * dist + other_kappaseq);
1714 	  df[i] = inv_dist * (dsigma1dr * dEdss[i] * chi_zs_zo
1715 			      + cnst_s * dsigma2dr // * (self[i] < nAtoms)   always true
1716 			      + dsigma1dr * dEdso[i] * chi_zo_zs
1717 			      + cnst_o * dsigma2dr * (other[i] < nAtoms));
1718 	}
1719 #endif //ASAP_MKL
1720     }
1721   else if (!always_fullnblist)
1722     {
1723       // zs != zo.
1724 #ifdef ASAP_MKL
1725       /* Get the distances from their squares */
1726       // dist[] = tmp_a[] = sqrt(sq_dist[])
1727       vdSqrt(n, sq_dist, tmp_a);
1728       // invdist[] = tmp_g[] = 1.0 / dist[]
1729       vdInv(n, tmp_a, tmp_g);
1730       for (int i = 0; i < n; i++)
1731         {
1732 	  tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
1733 	  tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
1734 	  tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
1735 	  tmp_e[i] = -self_eta2 * tmp_a[i] + self_eta2betaseq;
1736 	  tmp_f[i] = -self_kappaoverbeta * tmp_a[i] + self_kappaseq;
1737 	}
1738       // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
1739       vdExp(n, tmp_b, tmp_a);
1740       // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
1741       vdExp(n, tmp_c, tmp_b);
1742       // tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
1743       vdExp(n, tmp_d, tmp_c);
1744       // tmp_d[] = exp(tmp_e[]) = exp(-self_eta2 * dist[] + self_eta2betaseq)
1745       vdExp(n, tmp_e, tmp_d);
1746       // tmp_e[] = exp(tmp_f[]) = exp(-self_kappaoverbeta * dist[] + self_kappaseq)
1747       vdExp(n, tmp_f, tmp_e);
1748       for (int i = 0; i < n; i++)
1749 	{
1750 	  /* Calculate cutoff function (weight factor) */
1751 	  double wght = 1.0 / (1.0 + tmp_a[i]);
1752 	  /* Calculate derivative of the cutoff function. */
1753 	  double dwdr = -cutoffslope * wght * (1 - wght);
1754 	  double dsigma1dr_o = (dwdr - wght * other_eta2) * tmp_b[i];
1755 	  double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) * tmp_c[i];
1756 	  double dsigma1dr_s = (dwdr - wght * self_eta2) * tmp_d[i];
1757 	  double dsigma2dr_s = (dwdr + wght * -self_kappaoverbeta) * tmp_e[i];
1758 	  df[i] = tmp_g[i] * (dsigma1dr_o * dEdss[i] * chi_zs_zo
1759 			      + cnst_s * dsigma2dr_o
1760 			      + dsigma1dr_s * dEdso[i] * chi_zo_zs
1761 			      + cnst_o * dsigma2dr_s * (other[i] < nAtoms));
1762 	}
1763 #else // ASAP_MKL
1764       for (int i = 0; i < n; i++)
1765 	{
1766 	  /* Get the distances from their squares */
1767 	  double dist = sqrt(sq_dist[i]);
1768 	  double inv_dist = 1.0 / dist;
1769 	  /* Calculate cutoff function (weight factor) */
1770 	  double wght = 1.0 / (1.0 + exp(cutoffslope * dist -
1771 					 cutslopecutdist));
1772 	  /* Calculate derivative of the cutoff function. */
1773 	  double dwdr = -cutoffslope * wght * (1 - wght);
1774 	  double dsigma1dr_o = (dwdr - wght * other_eta2) *
1775 	    exp(-other_eta2 * dist + other_eta2betaseq);
1776 	  double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) *
1777 	    exp(-other_kappaoverbeta * dist + other_kappaseq);
1778 	  double dsigma1dr_s = (dwdr - wght * self_eta2) *
1779 	    exp(-self_eta2 * dist + self_eta2betaseq);
1780 	  double dsigma2dr_s = (dwdr + wght * -self_kappaoverbeta) *
1781 	    exp(-self_kappaoverbeta * dist + self_kappaseq);
1782 	  df[i] = inv_dist * (dsigma1dr_o * dEdss[i] * chi_zs_zo
1783 			      + cnst_s * dsigma2dr_o // * (self[i] < nAtoms)  always true!
1784 			      + dsigma1dr_s * dEdso[i] * chi_zo_zs
1785 			      + cnst_o * dsigma2dr_s * (other[i] < nAtoms));
1786 	}
1787 #endif //ASAP_MKL
1788     }
1789   else
1790     {
1791       // Using full neighbor lists
1792 #ifdef ASAP_MKL
1793       /* Get the distances from their squares */
1794       // dist[] = tmp_a[] = sqrt(sq_dist[])
1795       vdSqrt(n, sq_dist, tmp_a);
1796       // invdist[] = tmp_e[] = 1.0 / dist[]
1797       vdInv(n, tmp_a, tmp_e);
1798       for (int i = 0; i < n; i++)
1799         {
1800 	  tmp_b[i] = cutoffslope * tmp_a[i] - cutslopecutdist;
1801 	  tmp_c[i] = -other_eta2 * tmp_a[i] + other_eta2betaseq;
1802 	  tmp_d[i] = -other_kappaoverbeta * tmp_a[i] + other_kappaseq;
1803 	}
1804       // tmp_a[] = exp(tmp_b[]) = exp(cutoffslope * dist[] - cutslopecutdist)
1805       vdExp(n, tmp_b, tmp_a);
1806       // tmp_b[] = exp(tmp_c[]) = exp(-other_eta2 * dist[] + other_eta2betaseq)
1807       vdExp(n, tmp_c, tmp_b);
1808       // tmp_c[] = exp(tmp_d[]) = exp(-other_kappaoverbeta * dist[] + other_kappaseq)
1809       vdExp(n, tmp_d, tmp_c);
1810       for (int i = 0; i < n; i++)
1811         {
1812           /* Calculate cutoff function (weight factor) */
1813           double wght = 1.0 / (1.0 + tmp_a[i]);
1814           /* Calculate derivative of the cutoff function. */
1815           double dwdr = -cutoffslope * wght * (1 - wght);
1816           double dsigma1dr_o = (dwdr - wght * other_eta2) * tmp_b[i];
1817           double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) * tmp_c[i];
1818           df[i] = tmp_e[i] * (dsigma1dr_o * dEdss[i] * chi_zs_zo
1819                               + cnst_s * dsigma2dr_o);
1820 	}
1821 #else // ASAP_MKL
1822       for (int i = 0; i < n; i++)
1823         {
1824           /* Get the distances from their squares */
1825           double dist = sqrt(sq_dist[i]);
1826           double inv_dist = 1.0 / dist;
1827           /* Calculate cutoff function (weight factor) */
1828           double wght = 1.0 / (1.0 + exp(cutoffslope * dist -
1829                                          cutslopecutdist));
1830           /* Calculate derivative of the cutoff function. */
1831           double dwdr = -cutoffslope * wght * (1 - wght);
1832           double dsigma1dr_o = (dwdr - wght * other_eta2) *
1833             exp(-other_eta2 * dist + other_eta2betaseq);
1834           double dsigma2dr_o = (dwdr + wght * -other_kappaoverbeta) *
1835             exp(-other_kappaoverbeta * dist + other_kappaseq);
1836           df[i] = inv_dist * (dsigma1dr_o * dEdss[i] * chi_zs_zo
1837                               + cnst_s * dsigma2dr_o);
1838         }
1839 #endif // ASAP_MKL
1840     }
1841 
1842   distribute_force(self, other, df, rnb, n);
1843 
1844 #ifdef ASAP_MKL
1845   mkl_free(temporary);
1846 #else
1847   delete[] temporary;
1848 #endif
1849 }
1850 
distribute_force(const int * RESTRICT self,const int * RESTRICT other,const double * RESTRICT df,const Vec * RESTRICT rnb,int n)1851 void EMT::distribute_force(const int *RESTRICT self, const int *RESTRICT other,
1852     const double *RESTRICT df, const Vec *RESTRICT rnb, int n)
1853 {
1854   /* Distribute force contributions */
1855 #ifdef _OPENMP
1856 #pragma omp critical
1857 #endif // _OPENMP
1858   {
1859     for (int i = 0; i < n; i++)
1860       for (int j = 0; j < 3; j++)
1861         {
1862           double dfx = df[i] * rnb[i][j];
1863           force[self[i]][j] += dfx;
1864           force[other[i]][j] -= dfx;  // force has been extended to nSize.
1865         }
1866 
1867     /* Distribute virials contributions */
1868     if (virials.size())
1869       {
1870         for (int i = 0; i < n; i++)
1871           for (int alpha = 0; alpha < 3; alpha++)
1872             for (int beta = alpha; beta < 3; beta++)
1873               {
1874                 double dsx = 0.5 * df[i] * rnb[i][alpha] * rnb[i][beta];
1875                 int ii = stresscomp[alpha][beta];
1876                 virials[self[i]][ii] += dsx;
1877                 virials[other[i]][ii] += dsx;
1878               }
1879       }
1880   }
1881 }
1882 
GetLatticeConstant() const1883 double EMT::GetLatticeConstant() const
1884 {
1885     ASSERT(singleelement != 0);
1886     return Beta * singleelement->seq * sqrt(2.0);
1887 }
1888 
PrintParameters()1889 void EMT::PrintParameters()
1890 {
1891   for (int i = 0; i < nelements; i++)
1892     {
1893       const emt_parameters *p = parameters[i];
1894       cerr << endl;
1895       cerr << "Parameters for element " << i << " (" << p->name << ", Z=" << p->Z << ")" << endl;
1896       cerr << "E0:" << p->e0 << "  s0:" << p->seq << "  V0:" << p->V0 <<
1897         "  eta2:" << p->eta2 << "  kappa:" << p->kappa << "  lambda:" << p->lambda <<
1898         "  rFermi:" << rFermi << "  cutSlope" << cutoffslope <<
1899         "  gamma1:" << p->gamma1 << "  gamma2:" <<  p->gamma2 << endl <<
1900         endl;
1901       cerr << "Chi:";
1902       for (int j = 0; j < nelements; j++)
1903 	cerr << " " << (*chi)[i][j];
1904       cerr << endl;
1905     }
1906 }
1907 
BoundaryConditionsChanged()1908 void EMT::BoundaryConditionsChanged()
1909 {
1910   if (nblist)
1911     nblist->Invalidate();
1912 }
1913