1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Stan Moore (SNL)
17 ------------------------------------------------------------------------- */
18 
19 #include "min_linesearch_kokkos.h"
20 
21 #include "atom_kokkos.h"
22 #include "atom_masks.h"
23 #include "error.h"
24 #include "fix_minimize_kokkos.h"
25 #include "output.h"
26 #include "pair.h"
27 #include "thermo.h"
28 #include "modify.h"
29 
30 #include <cmath>
31 
32 using namespace LAMMPS_NS;
33 
34 // ALPHA_MAX = max alpha allowed to avoid long backtracks
35 // ALPHA_REDUCE = reduction ratio, should be in range [0.5,1)
36 // BACKTRACK_SLOPE, should be in range (0,0.5]
37 // QUADRATIC_TOL = tolerance on alpha0, should be in range [0.1,1)
38 // EMACH = machine accuracy limit of energy changes (1.0e-8)
39 // EPS_QUAD = tolerance for quadratic projection
40 
41 #define ALPHA_MAX 1.0
42 #define ALPHA_REDUCE 0.5
43 #define BACKTRACK_SLOPE 0.4
44 #define QUADRATIC_TOL 0.1
45 //#define EMACH 1.0e-8
46 #define EMACH 1.0e-8
47 #define EPS_QUAD 1.0e-28
48 
49 /* ---------------------------------------------------------------------- */
50 
MinLineSearchKokkos(LAMMPS * lmp)51 MinLineSearchKokkos::MinLineSearchKokkos(LAMMPS *lmp) : MinKokkos(lmp)
52 {
53   searchflag = 1;
54   atomKK = (AtomKokkos *) atom;
55   gextra = hextra = nullptr;
56 }
57 
58 /* ---------------------------------------------------------------------- */
59 
~MinLineSearchKokkos()60 MinLineSearchKokkos::~MinLineSearchKokkos()
61 {
62   delete [] gextra;
63   delete [] hextra;
64 }
65 
66 /* ---------------------------------------------------------------------- */
67 
init()68 void MinLineSearchKokkos::init()
69 {
70   MinKokkos::init();
71 
72   if (linestyle == 1) linemin = &MinLineSearchKokkos::linemin_quadratic;
73   else error->all(FLERR,"Kokkos minimize only supports the 'min_modify line "
74    "quadratic' option");
75 }
76 
77 /* ---------------------------------------------------------------------- */
78 
setup_style()79 void MinLineSearchKokkos::setup_style()
80 {
81   // memory for x0,g,h for atomic dof
82 
83   fix_minimize_kk->add_vector_kokkos();
84   fix_minimize_kk->add_vector_kokkos();
85   fix_minimize_kk->add_vector_kokkos();
86 
87   // memory for g,h for extra global dof, fix stores x0
88 
89   if (nextra_global) {
90     gextra = new double[nextra_global];
91     hextra = new double[nextra_global];
92   }
93 }
94 
95 /* ----------------------------------------------------------------------
96    set current vector lengths and pointers
97    called after atoms have migrated
98 ------------------------------------------------------------------------- */
99 
reset_vectors()100 void MinLineSearchKokkos::reset_vectors()
101 {
102   // atomic dof
103 
104   nvec = 3 * atom->nlocal;
105   atomKK->sync(Device,F_MASK|X_MASK);
106   auto d_x = atomKK->k_x.d_view;
107   auto d_f = atomKK->k_f.d_view;
108 
109   if (nvec) xvec = DAT::t_ffloat_1d(d_x.data(),d_x.size());
110   if (nvec) fvec = DAT::t_ffloat_1d(d_f.data(),d_f.size());
111   x0 = fix_minimize_kk->request_vector_kokkos(0);
112   g = fix_minimize_kk->request_vector_kokkos(1);
113   h = fix_minimize_kk->request_vector_kokkos(2);
114 
115   auto h_fvec = Kokkos::create_mirror_view(fvec);
116   Kokkos::deep_copy(h_fvec,fvec);
117 }
118 
119 /* ----------------------------------------------------------------------
120    line minimization methods
121    find minimum-energy starting at x along h direction
122    input args:   eoriginal = energy at initial x
123    input extra:  n,x,x0,f,h for atomic, extra global, extra per-atom dof
124    output args:  return 0 if successful move, non-zero alpha
125                  return non-zero if failed
126                  alpha = distance moved along h for x at min eng config
127                  update neval counter of eng/force function evaluations
128                  output extra: if fail, energy_force() of original x
129                  if succeed, energy_force() at x + alpha*h
130                  atom->x = coords at new configuration
131                  atom->f = force at new configuration
132                  ecurrent = energy of new configuration
133 ------------------------------------------------------------------------- */
134 
135 /* ----------------------------------------------------------------------
136     // linemin: quadratic line search (adapted from Dennis and Schnabel)
137     // The objective function is approximated by a quadratic
138     // function in alpha, for sufficiently small alpha.
139     // This idea is the same as that used in the well-known secant
140     // method. However, since the change in the objective function
141     // (difference of two finite numbers) is not known as accurately
142     // as the gradient (which is close to zero), all the expressions
143     // are written in terms of gradients. In this way, we can converge
144     // the LAMMPS forces much closer to zero.
145     //
146     // We know E,Eprev,fh,fhprev. The Taylor series about alpha_prev
147     // truncated at the quadratic term is:
148     //
149     //     E = Eprev - del_alpha*fhprev + (1/2)del_alpha^2*Hprev
150     //
151     // and
152     //
153     //     fh = fhprev - del_alpha*Hprev
154     //
155     // where del_alpha = alpha-alpha_prev
156     //
157     // We solve these two equations for Hprev and E=Esolve, giving:
158     //
159     //     Esolve = Eprev - del_alpha*(f+fprev)/2
160     //
161     // We define relerr to be:
162     //
163     //      relerr = |(Esolve-E)/Eprev|
164     //             = |1.0 - (0.5*del_alpha*(f+fprev)+E)/Eprev|
165     //
166     // If this is accurate to within a reasonable tolerance, then
167     // we go ahead and use a secant step to fh = 0:
168     //
169     //      alpha0 = alpha - (alpha-alphaprev)*fh/delfh;
170     //
171 ------------------------------------------------------------------------- */
172 
linemin_quadratic(double eoriginal,double & alpha)173 int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha)
174 {
175   double fdothall,fdothme,hme,hmaxall;
176   double de_ideal,de;
177   double delfh,engprev,relerr,alphaprev,fhprev,ff,fh,alpha0;
178   double dot[2],dotall[2];
179   double alphamax;
180 
181   fix_minimize_kk->k_vectors.sync<LMPDeviceType>();
182   fix_minimize_kk->k_vectors.modify<LMPDeviceType>();
183 
184   // fdothall = projection of search dir along downhill gradient
185   // if search direction is not downhill, exit with error
186 
187   fdothme = 0.0;
188   {
189     // local variables for lambda capture
190 
191     auto l_fvec = fvec;
192     auto l_h = h;
193 
194     Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& fdothme) {
195       fdothme += l_fvec[i]*l_h[i];
196     },fdothme);
197   }
198   MPI_Allreduce(&fdothme,&fdothall,1,MPI_DOUBLE,MPI_SUM,world);
199   if (nextra_global)
200     for (int i = 0; i < nextra_global; i++) fdothall += fextra[i]*hextra[i];
201   if (output->thermo->normflag) fdothall /= atom->natoms;
202 
203   if (fdothall <= 0.0) return DOWNHILL;
204 
205   // set alphamax so no dof is changed by more than max allowed amount
206   // for atom coords, max amount = dmax
207   // for extra per-atom dof, max amount = extra_max[]
208   // for extra global dof, max amount is set by fix
209   // also insure alphamax <= ALPHA_MAX
210   // else will have to backtrack from huge value when forces are tiny
211   // if all search dir components are already 0.0, exit with error
212 
213 
214   hme = 0.0;
215   {
216     // local variables for lambda capture
217 
218     auto l_h = h;
219 
220     Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& hme) {
221       hme = MAX(hme,fabs(l_h[i]));
222     },Kokkos::Max<double>(hme));
223   }
224   MPI_Allreduce(&hme,&hmaxall,1,MPI_DOUBLE,MPI_MAX,world);
225   alphamax = MIN(ALPHA_MAX,dmax/hmaxall);
226   if (nextra_global) {
227     double alpha_extra = modify->max_alpha(hextra);
228     alphamax = MIN(alphamax,alpha_extra);
229     for (int i = 0; i < nextra_global; i++)
230       hmaxall = MAX(hmaxall,fabs(hextra[i]));
231   }
232 
233   if (hmaxall == 0.0) return ZEROFORCE;
234 
235   // store box and values of all dof at start of linesearch
236 
237   {
238     // local variables for lambda capture
239 
240     auto l_xvec = xvec;
241     auto l_x0 = x0;
242 
243     fix_minimize_kk->store_box();
244     Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
245       l_x0[i] = l_xvec[i];
246     });
247   }
248   if (nextra_global) modify->min_store();
249 
250   // backtrack with alpha until energy decrease is sufficient
251   // or until get to small energy change, then perform quadratic projection
252 
253   alpha = alphamax;
254   fhprev = fdothall;
255   engprev = eoriginal;
256   alphaprev = 0.0;
257 
258   // // important diagnostic: test the gradient against energy
259   // double etmp;
260   // double alphatmp = alphamax*1.0e-4;
261   // etmp = alpha_step(alphatmp,1);
262   // printf("alpha = %g dele = %g dele_force = %g err = %g\n",
263   //        alphatmp,etmp-eoriginal,-alphatmp*fdothall,
264   //        etmp-eoriginal+alphatmp*fdothall);
265   // alpha_step(0.0,1);
266 
267   while (1) {
268     ecurrent = alpha_step(alpha,1);
269 
270     // compute new fh, alpha, delfh
271 
272     s_double2 sdot;
273     {
274       // local variables for lambda capture
275 
276       auto l_fvec = fvec;
277       auto l_h = h;
278 
279       Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) {
280         sdot.d0 += l_fvec[i]*l_fvec[i];
281         sdot.d1 += l_fvec[i]*l_h[i];
282       },sdot);
283     }
284     dot[0] = sdot.d0;
285     dot[1] = sdot.d1;
286 
287     MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
288     if (nextra_global) {
289       for (int i = 0; i < nextra_global; i++) {
290         dotall[0] += fextra[i]*fextra[i];
291         dotall[1] += fextra[i]*hextra[i];
292       }
293     }
294     ff = dotall[0];
295     fh = dotall[1];
296     if (output->thermo->normflag) {
297       ff /= atom->natoms;
298       fh /= atom->natoms;
299     }
300 
301     delfh = fh - fhprev;
302 
303     // if fh or delfh is epsilon, reset to starting point, exit with error
304 
305     if (fabs(fh) < EPS_QUAD || fabs(delfh) < EPS_QUAD) {
306       ecurrent = alpha_step(0.0,0);
307       return ZEROQUAD;
308     }
309 
310     // Check if ready for quadratic projection, equivalent to secant method
311     // alpha0 = projected alpha
312 
313     relerr = fabs(1.0-(0.5*(alpha-alphaprev)*(fh+fhprev)+ecurrent)/engprev);
314     alpha0 = alpha - (alpha-alphaprev)*fh/delfh;
315 
316     if (relerr <= QUADRATIC_TOL && alpha0 > 0.0 && alpha0 < alphamax) {
317       ecurrent = alpha_step(alpha0,1);
318       if (ecurrent - eoriginal < EMACH) {
319         if (nextra_global) {
320           int itmp = modify->min_reset_ref();
321           if (itmp) ecurrent = energy_force(1);
322         }
323         return 0;
324       }
325     }
326 
327     // if backtracking energy change is better than ideal, exit with success
328 
329     de_ideal = -BACKTRACK_SLOPE*alpha*fdothall;
330     de = ecurrent - eoriginal;
331 
332     if (de <= de_ideal) {
333       if (nextra_global) {
334         int itmp = modify->min_reset_ref();
335         if (itmp) ecurrent = energy_force(1);
336       }
337       return 0;
338     }
339 
340     // save previous state
341 
342     fhprev = fh;
343     engprev = ecurrent;
344     alphaprev = alpha;
345 
346     // reduce alpha
347 
348     alpha *= ALPHA_REDUCE;
349 
350     // backtracked all the way to 0.0
351     // reset to starting point, exit with error
352 
353     if (alpha <= 0.0 || de_ideal >= -EMACH) {
354       ecurrent = alpha_step(0.0,0);
355       return ZEROALPHA;
356     }
357   }
358 }
359 
360 /* ---------------------------------------------------------------------- */
361 
alpha_step(double alpha,int resetflag)362 double MinLineSearchKokkos::alpha_step(double alpha, int resetflag)
363 {
364   // reset to starting point
365 
366   if (nextra_global) modify->min_step(0.0,hextra);
367   atomKK->k_x.clear_sync_state(); // ignore if host positions since device
368                                   //  positions will be reset below
369   {
370     // local variables for lambda capture
371 
372     auto l_xvec = xvec;
373     auto l_x0 = x0;
374 
375     Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
376       l_xvec[i] = l_x0[i];
377     });
378   }
379 
380   // step forward along h
381 
382   if (alpha > 0.0) {
383     if (nextra_global) modify->min_step(alpha,hextra);
384 
385     // local variables for lambda capture
386 
387     auto l_xvec = xvec;
388     auto l_h = h;
389 
390     Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
391       l_xvec[i] += alpha*l_h[i];
392     });
393   }
394 
395   atomKK->modified(Device,X_MASK);
396 
397   // compute and return new energy
398 
399   neval++;
400   return energy_force(resetflag);
401 }
402 
403 /* ---------------------------------------------------------------------- */
404 
405 // compute projection of force on: itself and the search direction
406 
compute_dir_deriv(double & ff)407 double MinLineSearchKokkos::compute_dir_deriv(double &ff)
408 {
409   double dot[2],dotall[2];
410   double fh;
411 
412   // compute new fh, alpha, delfh
413 
414   s_double2 sdot;
415   {
416     // local variables for lambda capture
417 
418     auto l_fvec = fvec;
419     auto l_h = h;
420 
421     Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) {
422       sdot.d0 += l_fvec[i]*l_fvec[i];
423       sdot.d1 += l_fvec[i]*l_h[i];
424     },sdot);
425   }
426   dot[0] = sdot.d0;
427   dot[1] = sdot.d1;
428 
429   MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
430   if (nextra_global) {
431     for (int i = 0; i < nextra_global; i++) {
432       dotall[0] += fextra[i]*fextra[i];
433       dotall[1] += fextra[i]*hextra[i];
434     }
435   }
436 
437   ff = dotall[0];
438   fh = dotall[1];
439   if (output->thermo->normflag) {
440     ff /= atom->natoms;
441     fh /= atom->natoms;
442   }
443 
444   return fh;
445 }
446