1 /* ----------------------------------------------------------------------
2    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
3    https://www.lammps.org/ Sandia National Laboratories
4    Steve Plimpton, sjplimp@sandia.gov
5 
6    Copyright (2003) Sandia Corporation.  Under the terms of Contract
7    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
8    certain rights in this software.  This software is distributed under
9    the GNU General Public License.
10 
11    See the README file in the top-level LAMMPS directory.
12 ------------------------------------------------------------------------- */
13 
14 /* ----------------------------------------------------------------------
15    Contributing authors:
16      Trung Nguyen and Monica Olvera de la Cruz (Northwestern)
17      based on the original implementation by Honghao Li (Northwestern)
18    Reference:
19      Jadhao, Solis, Olvera de la Cruz, J. Chem. Phys. 138, 054119, 2013
20 
21    Solve the following eq. for induced charges on fixed sharp interfaces:
22      (Rww + Rww^T) w = q Rwq
23    at every time step, the vector (q Rwq) is computed, and so
24        w = [Rww + Rww^T)^(-1)] (q Rwq)
25    NOTE: Oct 7, 2019: switch to using a conjugate gradient solver
26 ------------------------------------------------------------------------- */
27 
28 #include "fix_polarize_functional.h"
29 
30 #include "atom.h"
31 #include "atom_vec_dielectric.h"
32 #include "comm.h"
33 #include "domain.h"
34 #include "error.h"
35 #include "force.h"
36 #include "kspace.h"
37 #include "math_const.h"
38 #include "math_extra.h"
39 #include "math_special.h"
40 #include "memory.h"
41 #include "msm_dielectric.h"
42 #include "neigh_list.h"
43 #include "neigh_request.h"
44 #include "neighbor.h"
45 #include "pair_coul_cut_dielectric.h"
46 #include "pair_coul_long_dielectric.h"
47 #include "pair_lj_cut_coul_cut_dielectric.h"
48 #include "pair_lj_cut_coul_long_dielectric.h"
49 #include "pair_lj_cut_coul_msm_dielectric.h"
50 #include "pppm_dielectric.h"
51 #include "update.h"
52 
53 #include <cmath>
54 #include <cstring>
55 
56 using namespace LAMMPS_NS;
57 using namespace FixConst;
58 using namespace MathExtra;
59 using namespace MathConst;
60 using namespace MathSpecial;
61 
62 enum { REAL2SCALED = 0, SCALED2REAL = 1 };
63 
64 #define EPSILON 1e-6
65 
66 //#define _POLARIZE_DEBUG
67 
68 /* ---------------------------------------------------------------------- */
69 
FixPolarizeFunctional(LAMMPS * lmp,int narg,char ** arg)70 FixPolarizeFunctional::FixPolarizeFunctional(LAMMPS *lmp, int narg, char **arg) :
71     Fix(lmp, narg, arg)
72 {
73   if (narg < 4) error->all(FLERR, "Illegal fix polarize/functional command");
74 
75   avec = (AtomVecDielectric *) atom->style_match("dielectric");
76   if (!avec) error->all(FLERR, "Fix polarize/functional requires atom style dielectric");
77 
78   nevery = utils::inumeric(FLERR, arg[3], false, lmp);
79   if (nevery < 0) error->all(FLERR, "Illegal fix polarize/functional command");
80 
81   tolerance = EPSILON;
82   if (narg == 5) tolerance = utils::numeric(FLERR, arg[4], false, lmp);
83 
84   comm_forward = 1;
85   nmax = 0;
86   allocated = 0;
87 
88   induced_charge_idx = nullptr;
89   induced_charges = nullptr;
90   tag2mat = nullptr;
91   mat2tag = nullptr;
92   tag2mat_ions = nullptr;
93   mat2tag_ions = nullptr;
94   ion_idx = nullptr;
95 
96   rhs1 = nullptr;
97   rhs2 = nullptr;
98   buffer1 = nullptr;
99   buffer2 = nullptr;
100 
101   // set flags for arrays to clear in force_clear()
102 
103   torqueflag = extraflag = 0;
104   if (atom->torque_flag) torqueflag = 1;
105   if (atom->avec->forceclearflag) extraflag = 1;
106 
107   Rww = nullptr;
108   inverse_matrix = nullptr;
109   G1ww = nullptr;
110   G2ww = nullptr;
111   G3ww = nullptr;
112   ndotGww = nullptr;
113 
114   qiRqwVector = nullptr;
115   G1qw_real = nullptr;
116   sum2G2wq = nullptr;
117 
118   sum1G2qw = nullptr;
119   sum1G1qw_epsilon = nullptr;
120   sum2ndotGwq_epsilon = nullptr;
121 
122   efield_pair = nullptr;
123   efield_kspace = nullptr;
124 
125   includingG3ww = 1;
126 
127   cg_r = cg_p = cg_Ap = nullptr;
128   cg_A = nullptr;
129 
130   FixPolarizeFunctional::grow_arrays(atom->nmax);
131   atom->add_callback(0);    // to ensure to work with atom->sort()
132 }
133 
134 /* ---------------------------------------------------------------------- */
135 
~FixPolarizeFunctional()136 FixPolarizeFunctional::~FixPolarizeFunctional()
137 {
138   memory->destroy(mat2tag);
139   memory->destroy(tag2mat);
140   memory->destroy(tag2mat_ions);
141   memory->destroy(mat2tag_ions);
142   memory->destroy(ion_idx);
143   memory->destroy(induced_charge_idx);
144   memory->destroy(induced_charges);
145   memory->destroy(rhs1);
146   memory->destroy(rhs2);
147   memory->destroy(buffer1);
148   memory->destroy(buffer2);
149 
150   if (allocated) deallocate();
151   atom->delete_callback(id, 0);
152 }
153 
154 /* ---------------------------------------------------------------------- */
155 
setmask()156 int FixPolarizeFunctional::setmask()
157 {
158   int mask = 0;
159   mask |= PRE_FORCE;
160   return mask;
161 }
162 
163 /* ---------------------------------------------------------------------- */
164 
init()165 void FixPolarizeFunctional::init()
166 {
167   // mapping induced charge matrix/vector to atom tags and vice versa
168 
169   int i, maxtag;
170   int *mask = atom->mask;
171   tagint *tag = atom->tag;
172   int nlocal = atom->nlocal;
173   tagint max_tag;
174   tagint itmp;
175   int *ncount = nullptr;
176 
177   // induced charge arrays setup
178 
179   max_tag = -1;
180   for (i = 0; i < nlocal; i++)
181     if (mask[i] & groupbit) max_tag = MAX(max_tag, tag[i]);
182 
183   MPI_Allreduce(&max_tag, &itmp, 1, MPI_LMP_TAGINT, MPI_MAX, world);
184   maxtag = (int) itmp;
185 
186   memory->create(ncount, maxtag + 1, "polarize:ncount");
187   for (i = 0; i <= maxtag; i++) ncount[i] = 0;
188 
189   for (i = 0; i < nlocal; i++)
190     if (mask[i] & groupbit) ncount[tag[i]]++;
191 
192   memory->create(tag2mat, maxtag + 1, "polarize:tag2mat");
193   MPI_Allreduce(ncount, tag2mat, maxtag + 1, MPI_INT, MPI_SUM, world);
194 
195   num_induced_charges = 0;
196   for (i = 0; i <= maxtag; i++)
197     if (tag2mat[i])
198       tag2mat[i] = num_induced_charges++;
199     else
200       tag2mat[i] = -1;
201 
202   memory->create(mat2tag, num_induced_charges, "polarize:mat2tag");
203 
204   num_induced_charges = 0;
205   for (i = 0; i <= maxtag; i++)
206     if (tag2mat[i] >= 0) mat2tag[num_induced_charges++] = i;
207 
208   for (i = 0; i < nlocal; i++) {
209     induced_charge_idx[i] = -1;
210     if (mask[i] & groupbit) induced_charge_idx[i] = tag2mat[tag[i]];
211   }
212 
213   memory->destroy(ncount);
214 
215   // ion arrays setup
216 
217   max_tag = -1;
218   for (i = 0; i < nlocal; i++)
219     if (!(mask[i] & groupbit)) max_tag = MAX(max_tag, tag[i]);
220 
221   MPI_Allreduce(&max_tag, &itmp, 1, MPI_LMP_TAGINT, MPI_MAX, world);
222   maxtag = (int) itmp;
223 
224   memory->create(ncount, maxtag + 1, "polarize:ncount");
225   for (i = 0; i <= maxtag; i++) ncount[i] = 0;
226 
227   for (i = 0; i < nlocal; i++)
228     if (!(mask[i] & groupbit)) ncount[tag[i]]++;
229 
230   memory->create(tag2mat_ions, maxtag + 1, "polarize:tag2mat_ions");
231   MPI_Allreduce(ncount, tag2mat_ions, maxtag + 1, MPI_INT, MPI_SUM, world);
232 
233   num_ions = 0;
234   for (i = 0; i <= maxtag; i++)
235     if (tag2mat_ions[i])
236       tag2mat_ions[i] = num_ions++;
237     else
238       tag2mat_ions[i] = -1;
239 
240   memory->create(mat2tag_ions, num_ions, "polarize:mat2tag_ions");
241   memory->create(rhs1, num_induced_charges, "polarize:rhs1");
242   memory->create(rhs2, num_induced_charges, "polarize:rhs2");
243   int buffer_size = (num_induced_charges > num_ions) ? num_induced_charges : num_ions;
244   memory->create(buffer1, buffer_size, num_induced_charges, "polarize:buffer1");
245   memory->create(buffer2, num_induced_charges, num_induced_charges, "polarize:buffer2");
246   memory->create(induced_charges, num_induced_charges, "polarize:induced_charges");
247 
248   num_ions = 0;
249   for (i = 0; i <= maxtag; i++)
250     if (tag2mat_ions[i] >= 0) mat2tag_ions[num_ions++] = i;
251 
252   for (i = 0; i < nlocal; i++) {
253     ion_idx[i] = -1;
254     if (!(mask[i] & groupbit)) ion_idx[i] = tag2mat_ions[tag[i]];
255   }
256 
257   memory->destroy(ncount);
258 
259   if (allocated == 0) {
260     nmax = atom->nmax;
261     allocate();
262     allocated = 1;
263   }
264 
265   // need a full neighbor list w/ Newton off and ghost neighbors
266   // built whenever re-neighboring occurs
267 
268   int irequest = neighbor->request(this, instance_me);
269   neighbor->requests[irequest]->pair = 0;
270   neighbor->requests[irequest]->fix = 1;
271   neighbor->requests[irequest]->half = 0;
272   neighbor->requests[irequest]->full = 1;
273   neighbor->requests[irequest]->occasional = 0;
274 
275   if (force->kspace)
276     g_ewald = force->kspace->g_ewald;
277   else
278     g_ewald = 0.01;
279 
280   if (comm->me == 0)
281     utils::logmesg(lmp, "Direct solver using a variational approach for {} induced charges\n",
282                    num_induced_charges);
283 }
284 
285 /* ---------------------------------------------------------------------- */
286 
init_list(int,NeighList * ptr)287 void FixPolarizeFunctional::init_list(int /*id*/, NeighList *ptr)
288 {
289   list = ptr;
290 }
291 
292 /* ---------------------------------------------------------------------- */
293 
setup(int)294 void FixPolarizeFunctional::setup(int /*vflag*/)
295 {
296   // check if the pair styles in use are compatible
297 
298   if (strcmp(force->pair_style, "lj/cut/coul/long/dielectric") == 0)
299     efield_pair = ((PairLJCutCoulLongDielectric *) force->pair)->efield;
300   else if (strcmp(force->pair_style, "lj/cut/coul/long/dielectric/omp") == 0)
301     efield_pair = ((PairLJCutCoulLongDielectric *) force->pair)->efield;
302   else if (strcmp(force->pair_style, "lj/cut/coul/msm/dielectric") == 0)
303     efield_pair = ((PairLJCutCoulMSMDielectric *) force->pair)->efield;
304   else if (strcmp(force->pair_style, "lj/cut/coul/cut/dielectric") == 0)
305     efield_pair = ((PairLJCutCoulCutDielectric *) force->pair)->efield;
306   else if (strcmp(force->pair_style, "lj/cut/coul/cut/dielectric/omp") == 0)
307     efield_pair = ((PairLJCutCoulCutDielectric *) force->pair)->efield;
308   else if (strcmp(force->pair_style, "coul/long/dielectric") == 0)
309     efield_pair = ((PairCoulLongDielectric *) force->pair)->efield;
310   else if (strcmp(force->pair_style, "coul/cut/dielectric") == 0)
311     efield_pair = ((PairCoulCutDielectric *) force->pair)->efield;
312   else
313     error->all(FLERR, "Pair style not compatible with fix polarize/functional");
314 
315   if (force->kspace) {
316 
317     kspaceflag = 1;
318     if (strcmp(force->kspace_style, "pppm/dielectric") == 0)
319       efield_kspace = ((PPPMDielectric *) force->kspace)->efield;
320     else if (strcmp(force->kspace_style, "msm/dielectric") == 0)
321       efield_kspace = ((MSMDielectric *) force->kspace)->efield;
322     else
323       error->all(FLERR, "Kspace style not compatible with fix polarize/functional");
324 
325   } else {
326 
327     if (kspaceflag == 1) {    // users specified kspace yes
328       error->warning(FLERR, "No Kspace style available for fix polarize/functional");
329       kspaceflag = 0;
330     }
331   }
332 
333   update_induced_charges();
334 }
335 
336 /* ---------------------------------------------------------------------- */
337 
setup_pre_force(int)338 void FixPolarizeFunctional::setup_pre_force(int /*vflag*/)
339 {
340   // calculate Rww before the run (assuming that the interface is fixed for now)
341   // otherwise this should be done every time step in pre_force()
342 
343   calculate_Rww_cutoff();
344 }
345 
346 /* ---------------------------------------------------------------------- */
347 
pre_force(int)348 void FixPolarizeFunctional::pre_force(int)
349 {
350   if (nevery == 0) return;
351   if (update->ntimestep % nevery) return;
352 
353   // solve for the induced charges
354 
355   update_induced_charges();
356 }
357 
358 /* ---------------------------------------------------------------------- */
359 
update_induced_charges()360 void FixPolarizeFunctional::update_induced_charges()
361 {
362   // convert all ions from scaled charges (q) to real q by multiplying with epsilon
363 
364   charge_rescaled(SCALED2REAL);
365 
366   // compute the right hand side vector qiRwVector
367 
368   calculate_qiRqw_cutoff();
369 
370   // conjugate gradient solver for w from Rww * w = -qRqw
371 
372   for (int i = 0; i < num_induced_charges; i++)
373     for (int j = 0; j < num_induced_charges; j++) cg_A[i][j] = Rww[i][j] + Rww[j][i];
374 
375   for (int i = 0; i < num_induced_charges; i++) induced_charges[i] = 0;
376 
377   cg_solver(cg_A, qiRqwVector, induced_charges, num_induced_charges);
378 
379   // assign charges to the particles in the group
380 
381   double *q = atom->q;
382   int nlocal = atom->nlocal;
383 
384   for (int i = 0; i < nlocal; i++) {
385     if (induced_charge_idx[i] < 0) continue;
386     int idx = induced_charge_idx[i];
387     q[i] = -induced_charges[idx] / (4 * MY_PI);
388   }
389 
390   // revert to scaled charges to calculate forces
391 
392   charge_rescaled(REAL2SCALED);
393 }
394 
395 /* ----------------------------------------------------------------------
396   scaled2real = 1: convert ion charges from scaled values (divided by epsilon) to real values
397               = 0:                          real values to scaled values
398 ------------------------------------------------------------------------- */
399 
charge_rescaled(int scaled2real)400 void FixPolarizeFunctional::charge_rescaled(int scaled2real)
401 {
402   double *q = atom->q;
403   double *q_real = atom->q_unscaled;
404   double *epsilon = atom->epsilon;
405   int nlocal = atom->nlocal;
406 
407   if (scaled2real) {
408     for (int i = 0; i < nlocal; i++)
409       if (induced_charge_idx[i] < 0) q[i] = q_real[i];
410   } else {
411     for (int i = 0; i < nlocal; i++)
412       if (induced_charge_idx[i] < 0) q[i] = q_real[i] / epsilon[i];
413   }
414 
415   comm->forward_comm_fix(this);
416 }
417 
418 /* ---------------------------------------------------------------------- */
419 
allocate()420 void FixPolarizeFunctional::allocate()
421 {
422   // initialize all data
423   // interface terms, all matrix of M*M
424   memory->create(inverse_matrix, num_induced_charges, num_induced_charges, "fix:inverse_matrix");
425   memory->create(Rww, num_induced_charges, num_induced_charges, "fix:Rww");
426   memory->create(G1ww, num_induced_charges, num_induced_charges, "fix:G1ww");
427   memory->create(ndotGww, num_induced_charges, num_induced_charges, "fix:ndotGww");
428   memory->create(G2ww, num_induced_charges, num_induced_charges, "fix:G2ww");
429   memory->create(G3ww, num_induced_charges, num_induced_charges, "fix:G3ww");
430 
431   // each step, qw, qq terms, temp data
432 
433   memory->create(qiRqwVector, num_induced_charges, "fix:qiRqwVector");
434   memory->create(sum2G2wq, num_induced_charges, "fix:sum2G2wq");
435   memory->create(G1qw_real, num_ions, num_induced_charges, "fix:G1qw_real");
436 
437   // arrays of M
438   memory->create(sum1G2qw, num_induced_charges, "fix:sum1G2qw");
439   memory->create(sum1G1qw_epsilon, num_induced_charges, "fix:sum1G1qw_epsilon");
440   memory->create(sum2ndotGwq_epsilon, num_induced_charges, "fix:sum2ndotGwq_epsilon");
441 
442   memory->create(cg_r, num_induced_charges, "polarize:cg_r");
443   memory->create(cg_p, num_induced_charges, "polarize:cg_p");
444   memory->create(cg_Ap, num_induced_charges, "polarize:cg_Ap");
445   memory->create(cg_A, num_induced_charges, num_induced_charges, "polarize:cg_A");
446 }
447 
448 /* ---------------------------------------------------------------------- */
449 
deallocate()450 void FixPolarizeFunctional::deallocate()
451 {
452   memory->destroy(inverse_matrix);
453   memory->destroy(Rww);
454   memory->destroy(G1ww);
455   memory->destroy(G2ww);
456   memory->destroy(G3ww);
457   memory->destroy(ndotGww);
458 
459   memory->destroy(qiRqwVector);
460   memory->destroy(sum2G2wq);
461   memory->destroy(G1qw_real);
462 
463   memory->destroy(sum1G2qw);
464   memory->destroy(sum1G1qw_epsilon);
465   memory->destroy(sum2ndotGwq_epsilon);
466 
467   memory->destroy(cg_r);
468   memory->destroy(cg_p);
469   memory->destroy(cg_Ap);
470   memory->destroy(cg_A);
471 }
472 
473 /* ---------------------------------------------------------------------- */
474 
modify_param(int narg,char ** arg)475 int FixPolarizeFunctional::modify_param(int narg, char **arg)
476 {
477   int iarg = 0;
478   while (iarg < narg) {
479     if (strcmp(arg[iarg], "kspace") == 0) {
480       if (iarg + 2 > narg) error->all(FLERR, "Illegal fix_modify command");
481       if (strcmp(arg[iarg + 1], "yes") == 0)
482         kspaceflag = 1;
483       else if (strcmp(arg[iarg + 1], "no") == 0)
484         kspaceflag = 0;
485       else
486         error->all(FLERR, "Illegal fix_modify command for fix polarize/functional");
487       iarg += 2;
488     } else if (strcmp(arg[iarg], "dielectrics") == 0) {
489       if (iarg + 6 > narg) error->all(FLERR, "Illegal fix_modify command");
490       double epsiloni = -1, areai = -1;
491       double q_unscaled = 0;
492       int set_charge = 0;
493       double ediff = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
494       double emean = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
495       if (strcmp(arg[iarg + 3], "nullptr") != 0)
496         epsiloni = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
497       if (strcmp(arg[iarg + 4], "nullptr") != 0)
498         areai = utils::numeric(FLERR, arg[iarg + 4], false, lmp);
499       if (strcmp(arg[iarg + 5], "nullptr") != 0) {
500         q_unscaled = utils::numeric(FLERR, arg[iarg + 5], false, lmp);
501         set_charge = 1;
502       }
503       set_dielectric_params(ediff, emean, epsiloni, areai, set_charge, q_unscaled);
504 
505       iarg += 6;
506     } else
507       error->all(FLERR, "Illegal fix_modify command");
508   }
509 
510   return iarg;
511 }
512 
513 /* ----------------------------------------------------------------------
514    pack values in local atom-based arrays for exchange with another proc
515 ------------------------------------------------------------------------- */
516 
pack_exchange(int i,double * buf)517 int FixPolarizeFunctional::pack_exchange(int i, double *buf)
518 {
519   buf[0] = ubuf(induced_charge_idx[i]).d;
520   buf[1] = ubuf(ion_idx[i]).d;
521   return 2;
522 }
523 
524 /* ----------------------------------------------------------------------
525    unpack values in local atom-based arrays from exchange with another proc
526 ------------------------------------------------------------------------- */
527 
unpack_exchange(int nlocal,double * buf)528 int FixPolarizeFunctional::unpack_exchange(int nlocal, double *buf)
529 {
530   induced_charge_idx[nlocal] = (int) ubuf(buf[0]).i;
531   ion_idx[nlocal] = (int) ubuf(buf[1]).i;
532   return 2;
533 }
534 
535 /* ---------------------------------------------------------------------- */
536 
pack_forward_comm(int n,int * list,double * buf,int,int *)537 int FixPolarizeFunctional::pack_forward_comm(int n, int *list, double *buf, int /*pbc_flag*/,
538                                              int * /*pbc*/)
539 {
540   int m;
541   for (m = 0; m < n; m++) buf[m] = atom->q[list[m]];
542   return n;
543 }
544 
545 /* ---------------------------------------------------------------------- */
546 
unpack_forward_comm(int n,int first,double * buf)547 void FixPolarizeFunctional::unpack_forward_comm(int n, int first, double *buf)
548 {
549   int i, m;
550   for (m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
551 }
552 
553 /* ----------------------------------------------------------------------
554    allocate local atom-based arrays
555 ------------------------------------------------------------------------- */
556 
grow_arrays(int n)557 void FixPolarizeFunctional::grow_arrays(int n)
558 {
559   if (n > nmax) nmax = n;
560   memory->grow(induced_charge_idx, nmax, "fix:induced_charge_idx");
561   memory->grow(ion_idx, nmax, "fix:ion_idx");
562 }
563 
564 /* ----------------------------------------------------------------------
565    copy values within local atom-based arrays
566 ------------------------------------------------------------------------- */
567 
copy_arrays(int i,int j,int)568 void FixPolarizeFunctional::copy_arrays(int i, int j, int /*delflag*/)
569 {
570   induced_charge_idx[j] = induced_charge_idx[i];
571   ion_idx[j] = ion_idx[i];
572 }
573 
574 /* ----------------------------------------------------------------------
575    initialize one atom's array values, called when atom is created
576 ------------------------------------------------------------------------- */
577 
set_arrays(int i)578 void FixPolarizeFunctional::set_arrays(int i)
579 {
580   induced_charge_idx[i] = -1;
581   ion_idx[i] = 0;
582 }
583 
584 /* ----------------------------------------------------------------------
585    return # of bytes of allocated memory
586 ------------------------------------------------------------------------- */
587 
memory_usage()588 double FixPolarizeFunctional::memory_usage()
589 {
590   double bytes = 0;
591   bytes += square(num_induced_charges) * sizeof(double);                // inverse_matrix
592   bytes += square(num_induced_charges) * sizeof(double);                // Rww
593   bytes += square(num_induced_charges) * sizeof(double);                // G1ww
594   bytes += square(num_induced_charges) * sizeof(double);                // ndotGww
595   bytes += square(num_induced_charges) * sizeof(double);                // G2ww
596   bytes += square(num_induced_charges) * sizeof(double);                // G3ww
597   bytes += num_induced_charges * sizeof(double);                        // qiRqwVector
598   bytes += num_induced_charges * sizeof(double);                        // sum2G2wq
599   bytes += num_induced_charges * sizeof(double);                        // sum1G2qw
600   bytes += num_induced_charges * sizeof(double);                        // sum1G1qw_epsilon
601   bytes += num_induced_charges * sizeof(double);                        // sum2ndotGwq_epsilon
602   bytes += (double) num_ions * num_induced_charges * sizeof(double);    // G1qw_real
603   bytes += nmax * sizeof(int);                                          // induced_charge_idx
604   bytes += nmax * sizeof(int);                                          // ion_idx
605   bytes += num_induced_charges * sizeof(double);                        // induced_charges
606   return bytes;
607 }
608 
609 /* ---------------------------------------------------------------------- */
610 
calculate_Rww_cutoff()611 void FixPolarizeFunctional::calculate_Rww_cutoff()
612 {
613   int *mask = atom->mask;
614   tagint *tag = atom->tag;
615   double **x = atom->x;
616   double *area = atom->area;
617   double *curvature = atom->curvature;
618   double **norm = atom->mu;
619   double *ed = atom->ed;
620   double *em = atom->em;
621 
622   // invoke full neighbor list
623 
624   int inum, jnum, *ilist, *jlist, *numneigh, **firstneigh;
625   inum = list->inum;
626   ilist = list->ilist;
627   numneigh = list->numneigh;
628   firstneigh = list->firstneigh;
629 
630   // calculate G1ww, gradG1ww, ndotG1ww
631   // fill up buffer1 with local G1ww and buffer2 with local ndotGww
632   // seperate into two loops to let the compiler optimize/or later vectorization
633 
634   for (int i = 0; i < num_induced_charges; i++)
635     for (int j = 0; j < num_induced_charges; j++) buffer1[i][j] = 0;
636 
637   for (int i = 0; i < num_induced_charges; i++)
638     for (int j = 0; j < num_induced_charges; j++) buffer2[i][j] = 0;
639 
640   for (int ii = 0; ii < inum; ii++) {
641     int i = ilist[ii];
642     if (mask[i] & groupbit) {
643       // interface particles
644       int mi = induced_charge_idx[i];
645       double xtmp = x[i][0];
646       double ytmp = x[i][1];
647       double ztmp = x[i][2];
648       jlist = firstneigh[i];
649       jnum = numneigh[i];
650 
651       for (int kk = 0; kk < jnum; kk++) {
652         int k = jlist[kk] & NEIGHMASK;
653         if (mask[k] & groupbit) {
654 
655           // interface particles: k can be ghost atoms
656           double delx = xtmp - x[k][0];
657           double dely = ytmp - x[k][1];
658           double delz = ztmp - x[k][2];
659           domain->minimum_image(delx, dely, delz);
660           int mk = tag2mat[tag[k]];
661 
662           // G1ww[mi][mk] = calculate_greens_ewald(delx, dely, delz);
663           buffer1[mi][mk] = calculate_greens_ewald(delx, dely, delz);
664 
665           // gradG1ww is vector, directly change it in the function
666           double gradG1ww[3];
667           calculate_grad_greens_ewald(gradG1ww, delx, dely, delz);
668 
669           // use mu to store the normal vector of interface vertex
670           buffer2[mi][mk] = MathExtra::dot3(norm[i], gradG1ww) / (4 * MY_PI);
671         }
672       }
673 
674       // special treatment for the diagonal terms,
675       // even though in the above loop there is mk == mi
676 
677       buffer1[mi][mi] = calculate_greens_ewald_self_vertex(area[i]);
678       buffer2[mi][mi] = calculate_ndotgreens_ewald_self_vertex(area[i], curvature[i]) / (4 * MY_PI);
679     }
680   }
681 
682   MPI_Allreduce(buffer1[0], G1ww[0], num_induced_charges * num_induced_charges, MPI_DOUBLE, MPI_SUM,
683                 world);
684   MPI_Allreduce(buffer2[0], ndotGww[0], num_induced_charges * num_induced_charges, MPI_DOUBLE,
685                 MPI_SUM, world);
686 
687   // calculate G2ww
688   // fill up buffer1 with local G2ww
689 
690   for (int i = 0; i < num_induced_charges; i++)
691     for (int j = 0; j < num_induced_charges; j++) buffer1[i][j] = 0;
692 
693   for (int ii = 0; ii < inum; ii++) {
694     int i = ilist[ii];
695     if (mask[i] & groupbit) {
696       // interface particles
697       int mi = induced_charge_idx[i];
698       jlist = firstneigh[i];
699       jnum = numneigh[i];
700 
701       for (int kk = 0; kk < jnum; kk++) {
702         int k = jlist[kk] & NEIGHMASK;
703 
704         if (mask[k] & groupbit) {
705           // interface particles: k can be ghost atoms
706           int mk = tag2mat[tag[k]];
707           double temp = 0;
708           for (int ll = 0; ll < jnum; ll++) {
709             int l = jlist[ll] & NEIGHMASK;
710             if (mask[l] & groupbit) {
711               // interface particles: l can be ghost atoms
712               int ml = tag2mat[tag[l]];
713               temp += G1ww[mi][ml] * ndotGww[ml][mk] * area[l] * ed[l];
714             }
715           }
716           //G2ww[mi][mk] = temp;
717           buffer1[mi][mk] = temp;
718         }
719       }
720 
721       double temp = 0;
722       for (int kk = 0; kk < jnum; kk++) {
723         int k = jlist[kk] & NEIGHMASK;
724         if (mask[k] & groupbit) {
725           // interface particles: k can be ghost atoms
726           int mk = tag2mat[tag[k]];
727           temp += G1ww[mi][mk] * ndotGww[mk][mi] * area[k] * ed[k];
728         }
729       }
730       //G2ww[mi][mi] = temp;
731       buffer1[mi][mi] = temp;
732     }
733   }
734 
735   MPI_Allreduce(buffer1[0], G2ww[0], num_induced_charges * num_induced_charges, MPI_DOUBLE, MPI_SUM,
736                 world);
737 
738   // calculate G3ww and Rww
739   // G3ww is implemented as in _exact(), but can be optionally excluded
740   // due to its minor contribution
741   // fill up buffer1 with local G3ww
742 
743   for (int i = 0; i < num_induced_charges; i++)
744     for (int j = 0; j < num_induced_charges; j++) buffer1[i][j] = 0;
745 
746   for (int ii = 0; ii < inum; ii++) {
747     int i = ilist[ii];
748     if (mask[i] & groupbit) {
749       // interface particles
750       int mi = induced_charge_idx[i];
751       jlist = firstneigh[i];
752       jnum = numneigh[i];
753 
754       for (int kk = 0; kk < jnum; kk++) {
755         int k = jlist[kk] & NEIGHMASK;
756 
757         if (mask[k] & groupbit) {
758 
759           // interface particles: k can be ghost atoms
760 
761           int mk = tag2mat[tag[k]];
762 
763           double a1 = em[i] * (em[k] - 1.0);
764           double a2 = 1.0 - em[i] - em[k];
765 
766           // The first term (w/ G1ww) contributes the most to Rww
767           // the second term (w/ G2ww) includes certain correction
768 
769           //Rww[mi][mk] = a1 * G1ww[mi][mk] + a2 * G2ww[mi][mk];
770           buffer1[mi][mk] = a1 * G1ww[mi][mk] + a2 * G2ww[mi][mk];
771 
772           if (includingG3ww) {
773             double temp = 0;
774             for (int ll = 0; ll < jnum; ll++) {
775               int l = jlist[ll] & NEIGHMASK;
776               if (mask[l] & groupbit) {
777                 // interface particles: l can be ghost atoms
778                 int ml = tag2mat[tag[l]];
779                 temp += (ndotGww[ml][mi]) * G2ww[ml][mk] * area[l] * ed[l];
780               }
781             }
782             G3ww[mi][mk] = temp;
783             //Rww[mi][mk] += G3ww[mi][mk];
784             buffer1[mi][mk] += G3ww[mi][mk];
785           }
786         }
787       }
788 
789       if (includingG3ww) {
790         double temp = 0;
791         for (int ll = 0; ll < jnum; ll++) {
792           int l = jlist[ll] & NEIGHMASK;
793           if (mask[l] & groupbit) {
794             // interface particles: l can be ghost atoms
795             int ml = tag2mat[tag[l]];
796             temp += (ndotGww[ml][mi]) * G2ww[ml][mi] * area[l] * ed[l];
797           }
798         }
799         G3ww[mi][mi] = temp;
800         //Rww[mi][mi] += G3ww[mi][mi];
801         buffer1[mi][mi] += G3ww[mi][mi];
802       }
803 
804       // including the diagonal term
805       double a1 = em[i] * (em[i] - 1.0);
806       double a2 = 1.0 - em[i] - em[i];
807 
808       // The first term (w/ G1ww) contributes the most to Rww
809       // the second term (w/ G2ww) includes certain correction
810       //Rww[mi][mi] = a1 * G1ww[mi][mi] + a2 * G2ww[mi][mi];
811 
812       buffer1[mi][mi] = a1 * G1ww[mi][mi] + a2 * G2ww[mi][mi];
813     }
814   }
815 
816   MPI_Allreduce(buffer1[0], Rww[0], num_induced_charges * num_induced_charges, MPI_DOUBLE, MPI_SUM,
817                 world);
818 
819 #ifdef _POLARIZE_DEBUG
820   if (comm->me == 0) {
821     FILE *fp = fopen("Rww-functional.txt", "w");
822     for (int i = 0; i < num_induced_charges; i++)
823       fprintf(fp, "%d %g %g %g\n", i, Rww[i][i], Rww[i][num_induced_charges / 2],
824               Rww[num_induced_charges / 2][i]);
825     fclose(fp);
826   }
827 #endif
828 }
829 
830 /* ---------------------------------------------------------------------- */
831 
calculate_qiRqw_cutoff()832 void FixPolarizeFunctional::calculate_qiRqw_cutoff()
833 {
834   int ii, i, k, kk, jnum;
835   double xtmp, ytmp, ztmp, delx, dely, delz, r;
836   int *mask = atom->mask;
837   tagint *tag = atom->tag;
838   double **x = atom->x;
839   double *q = atom->q_unscaled;
840   double *epsilon = atom->epsilon;
841   double *area = atom->area;
842   double **norm = atom->mu;
843   double *ed = atom->ed;
844   double *em = atom->em;
845 
846   // invoke full neighbor list
847 
848   int inum, *ilist, *jlist, *numneigh, **firstneigh;
849   inum = list->inum;
850   ilist = list->ilist;
851   numneigh = list->numneigh;
852   firstneigh = list->firstneigh;
853 
854   // calculate G1qw_real
855   // fill up buffer1 with local G1qw_real
856 
857   for (int i = 0; i < num_ions; i++)
858     for (int j = 0; j < num_induced_charges; j++) buffer1[i][j] = 0;
859 
860   for (ii = 0; ii < inum; ii++) {
861     i = ilist[ii];
862     if (!(mask[i] & groupbit)) {
863       // ion particles
864       int mi = ion_idx[i];
865       xtmp = x[i][0];
866       ytmp = x[i][1];
867       ztmp = x[i][2];
868       jlist = firstneigh[i];
869       jnum = numneigh[i];
870 
871       for (kk = 0; kk < jnum; kk++) {
872         k = jlist[kk] & NEIGHMASK;
873         if (mask[k] & groupbit) {
874           // interface particles: k can be ghost atoms
875           delx = xtmp - x[k][0];
876           dely = ytmp - x[k][1];
877           delz = ztmp - x[k][2];
878           domain->minimum_image(delx, dely, delz);
879           r = sqrt(delx * delx + dely * dely + delz * delz);
880 
881           int mk = tag2mat[tag[k]];
882           //G1qw_real[mi][mk] = greens_real(r);
883           buffer1[mi][mk] = greens_real(r);
884         }
885       }
886     }
887   }
888 
889   MPI_Allreduce(&buffer1[0][0], &G1qw_real[0][0], num_ions * num_induced_charges, MPI_DOUBLE,
890                 MPI_SUM, world);
891 
892   // the following loop need the above results: gradG1wq_real
893   // calculate sum1G1qw_epsilon and sum2ndotGwq_epsilon
894   // fill up rhs1 with local sum1G1qw_epsilon and rhs2 with local sum2ndotGwq_epsilon
895 
896   memset(rhs1, 0, num_induced_charges * sizeof(double));
897   memset(rhs2, 0, num_induced_charges * sizeof(double));
898 
899   for (kk = 0; kk < inum; kk++) {
900     k = ilist[kk];    // k is local index
901     if (mask[k] & groupbit) {
902       // interface particles
903       int mk = induced_charge_idx[k];
904       xtmp = x[k][0];
905       ytmp = x[k][1];
906       ztmp = x[k][2];
907       jlist = firstneigh[k];
908       jnum = numneigh[k];
909 
910       double tempndotG[3] = {0.0, 0.0, 0.0};
911       double temp_sum1 = 0;
912       for (ii = 0; ii < jnum; ii++) {
913         i = jlist[ii] & NEIGHMASK;
914         if (!(mask[i] & groupbit)) {
915           // ions particles: i can be ghost atoms
916           delx = x[i][0] - xtmp;
917           dely = x[i][1] - ytmp;
918           delz = x[i][2] - ztmp;
919           domain->minimum_image(delx, dely, delz);
920 
921           int mi = tag2mat_ions[tag[i]];    //ion_idx[i];
922 
923           //calculate_grad_greens_real(gradG1wq_real[mk][mi], delx, dely, delz);
924           double gradG1wq[3];
925           calculate_grad_greens_real(gradG1wq, delx, dely, delz);
926           MathExtra::scale3(-1.0, gradG1wq);
927 
928           tempndotG[0] += gradG1wq[0] * (q[i] / epsilon[i]);
929           tempndotG[1] += gradG1wq[1] * (q[i] / epsilon[i]);
930           tempndotG[2] += gradG1wq[2] * (q[i] / epsilon[i]);
931           temp_sum1 += G1qw_real[mi][mk] * q[i] / epsilon[i];
932         }
933       }
934 
935       //sum1G1qw_epsilon[mk] = temp_sum1;// + ewaldDielectric->sum1G1qw_k_epsilon[mk];
936       rhs1[mk] = temp_sum1;
937 
938       //sum2ndotGwq_epsilon[mk] = MathExtra::dot3(norm[k], tempndotG);
939       rhs2[mk] = MathExtra::dot3(norm[k], tempndotG);
940     }
941   }
942 
943   MPI_Allreduce(rhs1, sum1G1qw_epsilon, num_induced_charges, MPI_DOUBLE, MPI_SUM, world);
944   MPI_Allreduce(rhs2, sum2ndotGwq_epsilon, num_induced_charges, MPI_DOUBLE, MPI_SUM, world);
945 
946   // calculate G2, gradient G2
947   // sum2G2wq and sum1G2qw
948 
949   //  for (int i = 0; i < num_induced_charges; i++) rhs1[i] = rhs2[i] = 0;
950   memset(rhs1, 0, num_induced_charges * sizeof(double));
951   memset(rhs2, 0, num_induced_charges * sizeof(double));
952 
953   for (kk = 0; kk < inum; kk++) {
954     k = ilist[kk];    // k is local index
955     if (mask[k] & groupbit) {
956       // interface particles
957       int mk = induced_charge_idx[k];
958       jlist = firstneigh[k];
959       jnum = numneigh[k];
960 
961       double tempwq = 0;
962       double temp = 0;
963       for (ii = 0; ii < jnum; ii++) {
964         i = jlist[ii] & NEIGHMASK;
965         if (mask[i] & groupbit) {
966           // interface particles: i can be ghost atoms
967           int mi = tag2mat[tag[i]];
968           tempwq += G1ww[mk][mi] * (sum2ndotGwq_epsilon[mi]) * area[i] * ed[i];
969           temp += sum1G1qw_epsilon[mi] * (ndotGww[mi][mk]) * area[i] * ed[i];
970         }
971       }
972 
973       // add the corresponding self terms
974       tempwq += G1ww[mk][mk] * (sum2ndotGwq_epsilon[mk]) * area[k] * ed[k];
975       temp += sum1G1qw_epsilon[mk] * (ndotGww[mk][mk]) * area[k] * ed[k];
976 
977       //sum2G2wq[mk] = tempwq;
978       rhs1[mk] = tempwq;
979       //sum1G2qw[mk] = temp;
980       rhs2[mk] = temp;
981     }
982   }
983 
984   MPI_Allreduce(rhs1, sum2G2wq, num_induced_charges, MPI_DOUBLE, MPI_SUM, world);
985   MPI_Allreduce(rhs2, sum1G2qw, num_induced_charges, MPI_DOUBLE, MPI_SUM, world);
986 
987   // calculate G3, gradient G3
988   // fill up rhs1 with local qiRqwVector
989 
990   memset(rhs1, 0, num_induced_charges * sizeof(double));
991 
992   for (kk = 0; kk < inum; kk++) {
993     k = ilist[kk];    // k is local index
994     if (mask[k] & groupbit) {
995       // interface particles
996       int mk = induced_charge_idx[k];
997       jlist = firstneigh[k];
998       jnum = numneigh[k];
999 
1000       double sum1G3qw = 0;
1001       double qiRwwVectorTemp1 = 0;
1002       for (ii = 0; ii < jnum; ii++) {
1003         i = jlist[ii] & NEIGHMASK;
1004         if (mask[i] & groupbit) {
1005           // interface particles: i can be ghost atoms
1006           int mi = tag2mat[tag[i]];
1007           sum1G3qw += sum2ndotGwq_epsilon[mi] * G2ww[mi][mk] * area[i] * ed[i];
1008         } else {
1009           // ions particles: i can be ghost atoms
1010           int mi = tag2mat_ions[tag[i]];    //ion_idx[i];
1011           qiRwwVectorTemp1 += q[i] * (1.0 - em[k] / epsilon[i]) * G1qw_real[mi][mk];
1012         }
1013       }
1014 
1015       // add the diagonal term
1016 
1017       sum1G3qw += sum2ndotGwq_epsilon[mk] * G2ww[mk][mk] * area[k] * ed[k];
1018 
1019       // qiRwwVectorTemp2 is a significant contribution, of which sum2G2wq is significant
1020       double qiRwwVectorTemp2 = (1.0 - 2.0 * em[k]) * sum2G2wq[mk] + sum1G2qw[mk] + 2.0 * sum1G3qw;
1021 
1022       // qiRqwVector[mk] = qiRwwVectorTemp1 + qiRwwVectorTemp2;
1023       rhs1[mk] = qiRwwVectorTemp1 + qiRwwVectorTemp2;
1024     }
1025   }
1026 
1027   MPI_Allreduce(rhs1, qiRqwVector, num_induced_charges, MPI_DOUBLE, MPI_SUM, world);
1028 
1029 #ifdef _POLARIZE_DEBUG
1030   if (comm->me == 0) {
1031     FILE *fp = fopen("qRqw-functional.txt", "w");
1032     for (int i = 0; i < num_induced_charges; i++) fprintf(fp, "%d %g\n", i, qiRqwVector[i]);
1033     fclose(fp);
1034   }
1035 #endif
1036 }
1037 
1038 /* ----------------------------------------------------------------------
1039    set dielectric params for the atom in the group
1040 ------------------------------------------------------------------------- */
1041 
set_dielectric_params(double ediff,double emean,double epsiloni,double areai,int set_charge,double qvalue)1042 void FixPolarizeFunctional::set_dielectric_params(double ediff, double emean, double epsiloni,
1043                                                   double areai, int set_charge, double qvalue)
1044 {
1045   double *area = atom->area;
1046   double *ed = atom->ed;
1047   double *em = atom->em;
1048   double *q_unscaled = atom->q_unscaled;
1049   double *epsilon = atom->epsilon;
1050   int *mask = atom->mask;
1051   int nlocal = atom->nlocal;
1052 
1053   for (int i = 0; i < nlocal; i++) {
1054     if (mask[i] & groupbit) {
1055       ed[i] = ediff;
1056       em[i] = emean;
1057       if (areai > 0) area[i] = areai;
1058       if (epsiloni > 0) epsilon[i] = epsiloni;
1059       if (set_charge) q_unscaled[i] = qvalue;
1060     }
1061   }
1062 }
1063 
1064 /* ----------------------------------------------------------------------
1065    real Green's function
1066 ------------------------------------------------------------------------ */
1067 
greens_real(double r)1068 double FixPolarizeFunctional::greens_real(double r)
1069 {
1070   return erfc(g_ewald * r) / r;
1071 }
1072 
1073 /* ---------------------------------------------------------------------- */
1074 
grad_greens_real_factor(double r)1075 double FixPolarizeFunctional::grad_greens_real_factor(double r)
1076 {
1077   double alpharij = g_ewald * r;
1078   double factor = erfc(alpharij) + 2.0 * alpharij / MY_PIS * exp(-(alpharij * alpharij));
1079   double r3 = r * r * r;
1080   return (factor * (-1.0 / r3));
1081 }
1082 
1083 /* ---------------------------------------------------------------------- */
1084 
calculate_grad_greens_real(double * vec,double dx,double dy,double dz)1085 void FixPolarizeFunctional::calculate_grad_greens_real(double *vec, double dx, double dy, double dz)
1086 {
1087   double r = sqrt(dx * dx + dy * dy + dz * dz);
1088   double real = grad_greens_real_factor(r);
1089   vec[0] = real * dx;
1090   vec[1] = real * dy;
1091   vec[2] = real * dz;
1092 }
1093 
1094 /* ---------------------------------------------------------------------- */
1095 
calculate_greens_ewald(double dx,double dy,double dz)1096 double FixPolarizeFunctional::calculate_greens_ewald(double dx, double dy, double dz)
1097 {
1098   // excluding the reciprocal term
1099   double r = sqrt(dx * dx + dy * dy + dz * dz);
1100   return greens_real(r);
1101 }
1102 
1103 /* ---------------------------------------------------------------------- */
1104 
calculate_grad_greens_ewald(double * vec,double dx,double dy,double dz)1105 void FixPolarizeFunctional::calculate_grad_greens_ewald(double *vec, double dx, double dy,
1106                                                         double dz)
1107 {
1108   // real part of grad greens, excluding the reciprocal term
1109   calculate_grad_greens_real(vec, dx, dy, dz);
1110 }
1111 
1112 /* ---------------------------------------------------------------------- */
1113 
calculate_matrix_multiply_vector(double ** matrix,double * in_vec,double * out_vec,int M)1114 void FixPolarizeFunctional::calculate_matrix_multiply_vector(double **matrix, double *in_vec,
1115                                                              double *out_vec, int M)
1116 {
1117   for (int k = 0; k < M; ++k) {
1118     double temp = 0.0;
1119     for (int l = 0; l < M; ++l) { temp += matrix[k][l] * in_vec[l]; }
1120     out_vec[k] = temp;
1121   }
1122 }
1123 
1124 /* ---------------------------------------------------------------------- */
1125 
calculate_greens_ewald_self_vertex(double area)1126 double FixPolarizeFunctional::calculate_greens_ewald_self_vertex(double area)
1127 {
1128   // excluding the reciprocal term
1129   double corr = 2.0 * MY_PIS / sqrt(area);
1130   double self_energy = -2.0 * g_ewald / MY_PIS;
1131   return corr + self_energy;
1132 }
1133 
1134 /* ---------------------------------------------------------------------- */
1135 
calculate_ndotgreens_ewald_self_vertex(double area,double curvature)1136 double FixPolarizeFunctional::calculate_ndotgreens_ewald_self_vertex(double area, double curvature)
1137 {
1138   // this term is important, cannot be set to zero
1139   // curvature = 1 / R, minus if norm is inverse of R to center.
1140 
1141   return curvature * MY_PIS / sqrt(area);
1142 }
1143 
1144 /* ----------------------------------------------------------------------
1145   compute the inner product between two vectors x and y: x^t * y
1146     where ^t is the transpose operator
1147 -- ---------------------------------------------------------------------- */
1148 
inner_product(double * x,double * y,int N)1149 double FixPolarizeFunctional::inner_product(double *x, double *y, int N)
1150 {
1151   double t = 0;
1152   for (int i = 0; i < N; i++) t += x[i] * y[i];
1153   return t;
1154 }
1155 
1156 /* ---------------------------------------------------------------------- */
1157 
cg_solver(double ** A,double * b,double * x,int N)1158 void FixPolarizeFunctional::cg_solver(double **A, double *b, double *x, int N)
1159 {
1160   calculate_matrix_multiply_vector(A, x, cg_p, N);
1161   for (int i = 0; i < N; i++) {
1162     cg_r[i] = b[i] - cg_p[i];
1163     cg_p[i] = cg_r[i];
1164   }
1165   double rsq = inner_product(cg_r, cg_r, N);
1166 
1167   // maximum number of iterations do not exceed N
1168   for (int k = 0; k < N; k++) {
1169 
1170     // Ap = A * p
1171     calculate_matrix_multiply_vector(A, cg_p, cg_Ap, N);
1172 
1173     // pAp = p^t * Ap
1174     double pAp = inner_product(cg_p, cg_Ap, N);
1175 
1176     // alpha = r^t * r / pAp
1177     double alpha = rsq / pAp;
1178 
1179     // x = x + alpha * p
1180     // r = r - alpha * Ap
1181     for (int i = 0; i < N; i++) {
1182       x[i] = x[i] + alpha * cg_p[i];
1183       cg_r[i] = cg_r[i] - alpha * cg_Ap[i];
1184     }
1185 
1186     double rsq_new = inner_product(cg_r, cg_r, N);
1187     if (rsq_new < tolerance) break;
1188 
1189     // beta = rsq_new / rsq
1190     double beta = rsq_new / rsq;
1191     for (int i = 0; i < N; i++) cg_p[i] = cg_r[i] + beta * cg_p[i];
1192     rsq = rsq_new;
1193   }
1194 }
1195