1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 #include "min_cg_kokkos.h"
16 #include <mpi.h>
17 #include <cmath>
18 #include "update.h"
19 #include "output.h"
20 #include "timer.h"
21 #include "atom_kokkos.h"
22 #include "atom_masks.h"
23 #include "error.h"
24 #include "fix_minimize_kokkos.h"
25 
26 using namespace LAMMPS_NS;
27 
28 // EPS_ENERGY = minimum normalization for energy tolerance
29 
30 #define EPS_ENERGY 1.0e-8
31 
32 /* ---------------------------------------------------------------------- */
33 
MinCGKokkos(LAMMPS * lmp)34 MinCGKokkos::MinCGKokkos(LAMMPS *lmp) : MinLineSearchKokkos(lmp)
35 {
36   atomKK = (AtomKokkos *) atom;
37   kokkosable = 1;
38 }
39 
40 /* ----------------------------------------------------------------------
41    minimization via conjugate gradient iterations
42 ------------------------------------------------------------------------- */
43 
iterate(int maxiter)44 int MinCGKokkos::iterate(int maxiter)
45 {
46   int fail,ntimestep;
47   double beta,gg,dot[2],dotall[2],fdotf;
48 
49   fix_minimize_kk->k_vectors.sync<LMPDeviceType>();
50   fix_minimize_kk->k_vectors.modify<LMPDeviceType>();
51 
52   // nlimit = max # of CG iterations before restarting
53   // set to ndoftotal unless too big
54 
55   int nlimit = static_cast<int> (MIN(MAXSMALLINT,ndoftotal));
56 
57   // initialize working vectors
58 
59   {
60     // local variables for lambda capture
61 
62     auto l_h = h;
63     auto l_g = g;
64     auto l_fvec = fvec;
65 
66     Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
67       l_h[i] = l_fvec[i];
68       l_g[i] = l_fvec[i];
69     });
70   }
71   if (nextra_global)
72     for (int i = 0; i < nextra_global; i++) hextra[i] = gextra[i] = fextra[i];
73 
74   gg = fnorm_sqr();
75 
76   for (int iter = 0; iter < maxiter; iter++) {
77 
78     if (timer->check_timeout(niter))
79       return TIMEOUT;
80 
81     ntimestep = ++update->ntimestep;
82     niter++;
83 
84     // line minimization along direction h from current atom->x
85 
86     eprevious = ecurrent;
87     fail = (this->*linemin)(ecurrent,alpha_final);
88     if (fail) return fail;
89 
90     // function evaluation criterion
91 
92     if (neval >= update->max_eval) return MAXEVAL;
93 
94     // energy tolerance criterion
95 
96     if (fabs(ecurrent-eprevious) <
97         update->etol * 0.5*(fabs(ecurrent) + fabs(eprevious) + EPS_ENERGY))
98       return ETOL;
99 
100     // force tolerance criterion
101 
102     s_double2 sdot;
103     {
104       // local variables for lambda capture
105 
106       auto l_g = g;
107       auto l_fvec = fvec;
108 
109       Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) {
110         sdot.d0 += l_fvec[i]*l_fvec[i];
111         sdot.d1 += l_fvec[i]*l_g[i];
112       },sdot);
113     }
114     dot[0] = sdot.d0;
115     dot[1] = sdot.d1;
116     MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
117     if (nextra_global)
118       for (int i = 0; i < nextra_global; i++) {
119         dotall[0] += fextra[i]*fextra[i];
120         dotall[1] += fextra[i]*gextra[i];
121       }
122 
123     fdotf = 0.0;
124     if (update->ftol > 0.0) {
125       if (normstyle == MAX) fdotf = fnorm_max();        // max force norm
126       else if (normstyle == INF) fdotf = fnorm_inf();   // infinite force norm
127       else if (normstyle == TWO) fdotf = dotall[0];     // same as fnorm_sqr(), Euclidean force 2-norm
128       else error->all(FLERR,"Illegal min_modify command");
129       if (fdotf < update->ftol*update->ftol) return FTOL;
130     }
131 
132     // update new search direction h from new f = -Grad(x) and old g
133     // this is Polak-Ribieri formulation
134     // beta = dotall[0]/gg would be Fletcher-Reeves
135     // reinitialize CG every ndof iterations by setting beta = 0.0
136 
137     beta = MAX(0.0,(dotall[0] - dotall[1])/gg);
138     if ((niter+1) % nlimit == 0) beta = 0.0;
139     gg = dotall[0];
140 
141     {
142       // local variables for lambda capture
143 
144       auto l_h = h;
145       auto l_g = g;
146       auto l_fvec = fvec;
147 
148       Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
149         l_g[i] = l_fvec[i];
150         l_h[i] = l_g[i] + beta*l_h[i];
151       });
152     }
153     if (nextra_global)
154       for (int i = 0; i < nextra_global; i++) {
155         gextra[i] = fextra[i];
156         hextra[i] = gextra[i] + beta*hextra[i];
157       }
158 
159     // reinitialize CG if new search direction h is not downhill
160 
161     double dot_0 = 0.0;
162 
163     {
164       // local variables for lambda capture
165 
166       auto l_h = h;
167       auto l_g = g;
168 
169       Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& dot_0) {
170         dot_0 += l_g[i]*l_h[i];
171       },dot_0);
172     }
173     dot[0] = dot_0;
174     MPI_Allreduce(dot,dotall,1,MPI_DOUBLE,MPI_SUM,world);
175     if (nextra_global)
176       for (int i = 0; i < nextra_global; i++)
177         dotall[0] += gextra[i]*hextra[i];
178 
179     if (dotall[0] <= 0.0) {
180       // local variables for lambda capture
181 
182       auto l_h = h;
183       auto l_g = g;
184 
185       Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
186          l_h[i] = l_g[i];
187       });
188       if (nextra_global)
189         for (int i = 0; i < nextra_global; i++) hextra[i] = gextra[i];
190     }
191 
192     // output for thermo, dump, restart files
193 
194     if (output->next == ntimestep) {
195       atomKK->sync(Host,ALL_MASK);
196 
197       timer->stamp();
198       output->write(ntimestep);
199       timer->stamp(Timer::OUTPUT);
200     }
201   }
202 
203   return MAXITER;
204 }
205