1 /* ----------------------------------------------------------------------
2    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
3    http://lammps.sandia.gov, Sandia National Laboratories
4    Steve Plimpton, sjplimp@sandia.gov
5    Copyright (2003) Sandia Corporation.  Under the terms of Contract
6    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
7    certain rights in this software.  This software is distributed under
8    the GNU General Public License.
9    See the README file in the top-level LAMMPS directory.
10 ------------------------------------------------------------------------- */
11 
12 /* ----------------------------------------------------------------------
13    Contributing authors: Aidan Thompson, Christian Trott, SNL
14 ------------------------------------------------------------------------- */
15 
16 //
17 // SNA.cpp
18 //
19 // LGPL Version 2.1 HEADER START
20 //
21 // This library is free software; you can redistribute it and/or
22 // modify it under the terms of the GNU Lesser General Public
23 //
24 // License as published by the Free Software Foundation; either
25 // version 2.1 of the License, or (at your option) any later version.
26 //
27 // This library is distributed in the hope that it will be useful,
28 // but WITHOUT ANY WARRANTY; without even the implied warranty of
29 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
30 // Lesser General Public License for more details.
31 //
32 // You should have received a copy of the GNU Lesser General Public
33 // License along with this library; if not, write to the Free Software
34 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
35 // MA 02110-1301  USA
36 //
37 // LGPL Version 2.1 HEADER END
38 //
39 
40 //
41 // Copyright (c) 2019, Regents of the University of Minnesota.
42 // All rights reserved.
43 //
44 // Contributors:
45 //    Yaser Afshar
46 //
47 // Brief: This file is adapted from the LAMMPS software package
48 //        `lammps/src/SNAP/sna.cpp` and amended and updated by
49 //        Yaser Afshar
50 //
51 
52 
53 #include "SNA.hpp"
54 
55 #include <cmath>
56 
57 #include <algorithm>
58 
59 #ifdef MY_PI
60 #undef MY_PI
61 #endif
62 
63 #define MY_PI 3.14159265358979323846 // pi
64 
SNA(double const rfac0_in,int const twojmax_in,double const rmin0_in,int const switchflag_in,int const bzeroflag_in)65 SNA::SNA(double const rfac0_in,
66          int const twojmax_in,
67          double const rmin0_in,
68          int const switchflag_in,
69          int const bzeroflag_in) : twojmax(twojmax_in),
70                                    rmin0(rmin0_in),
71                                    rfac0(rfac0_in),
72                                    switchflag(switchflag_in),
73                                    bzeroflag(bzeroflag_in),
74                                    wself(1.0)
75 {
76   ncoeff = compute_ncoeff();
77 
78   build_indexlist();
79 
80   create_twojmax_arrays();
81 
82   if (bzeroflag)
83   {
84     double const www = wself * wself * wself;
85     for (int j = 0; j <= twojmax; ++j)
86     {
87       bzero[j] = www * (j + 1);
88     }
89   }
90 
91   init();
92 }
93 
~SNA()94 SNA::~SNA() {}
95 
build_indexlist()96 void SNA::build_indexlist()
97 {
98   // index list for cglist
99   int const jdim = twojmax + 1;
100 
101   idxcg_block.resize(jdim, jdim, jdim, 0);
102 
103   int counter = 0;
104   for (int j1 = 0; j1 < jdim; ++j1)
105   {
106     for (int j2 = 0; j2 <= j1; ++j2)
107     {
108       for (int j = j1 - j2; j <= std::min(twojmax, j1 + j2); j += 2)
109       {
110         idxcg_block(j1, j2, j) = counter;
111         counter += (j1 + 1) * (j2 + 1);
112       }
113     }
114   }
115   idxcg_max = counter;
116 
117   // index list for uarray need to include both halves
118   idxu_block.resize(jdim);
119 
120   counter = 0;
121   for (int j = 0; j < jdim; ++j)
122   {
123     idxu_block[j] = counter;
124     counter += (j + 1) * (j + 1);
125   }
126   idxu_max = counter;
127 
128   // Index list for beta and B
129   counter = 0;
130   for (int j1 = 0; j1 < jdim; ++j1)
131   {
132     for (int j2 = 0; j2 <= j1; ++j2)
133     {
134       for (int j = j1 - j2; j <= std::min(twojmax, j1 + j2); j += 2)
135       {
136         if (j >= j1)
137         {
138           ++counter;
139         }
140       }
141     }
142   }
143   idxb_max = counter;
144 
145   idxb.resize(idxb_max);
146 
147   // Reverse index list for beta and b
148   idxb_block.resize(jdim, jdim, jdim, 0);
149 
150   counter = 0;
151   for (int j1 = 0; j1 < jdim; ++j1)
152   {
153     for (int j2 = 0; j2 <= j1; ++j2)
154     {
155       for (int j = j1 - j2; j <= std::min(twojmax, j1 + j2); j += 2)
156       {
157         if (j >= j1)
158         {
159           idxb[counter].j1 = j1;
160           idxb[counter].j2 = j2;
161           idxb[counter].j = j;
162 
163           idxb_block(j1, j2, j) = counter++;
164         }
165       }
166     }
167   }
168 
169   // Index list for zlist
170   counter = 0;
171   for (int j1 = 0; j1 < jdim; ++j1)
172   {
173     for (int j2 = 0; j2 <= j1; ++j2)
174     {
175       for (int j = j1 - j2; j <= std::min(twojmax, j1 + j2); j += 2)
176       {
177         for (int mb = 0; 2 * mb <= j; ++mb)
178         {
179           counter += j + 1;
180         }
181       }
182     }
183   }
184   idxz_max = counter;
185 
186   idxz.resize(idxz_max);
187 
188   idxz_block.resize(jdim, jdim, jdim, 0);
189 
190   counter = 0;
191   for (int j1 = 0; j1 < jdim; ++j1)
192   {
193     for (int j2 = 0; j2 <= j1; ++j2)
194     {
195       for (int j = j1 - j2; j <= std::min(twojmax, j1 + j2); j += 2)
196       {
197         idxz_block(j1, j2, j) = counter;
198 
199         // find right beta[jjb] entry multiply and divide by j+1 factors
200         // account for multiplicity of 1, 2, or 3
201         for (int mb = 0; 2 * mb <= j; ++mb)
202         {
203           for (int ma = 0; ma <= j; ++ma, ++counter)
204           {
205             idxz[counter].j1 = j1;
206             idxz[counter].j2 = j2;
207             idxz[counter].j = j;
208 
209             idxz[counter].ma1min = std::max(0, (2 * ma - j - j2 + j1) / 2);
210             idxz[counter].ma2max = (2 * ma - j - (2 * idxz[counter].ma1min - j1) + j2) / 2;
211 
212             idxz[counter].na = std::min(j1, (2 * ma - j + j2 + j1) / 2) - idxz[counter].ma1min + 1;
213 
214             idxz[counter].mb1min = std::max(0, (2 * mb - j - j2 + j1) / 2);
215             idxz[counter].mb2max = (2 * mb - j - (2 * idxz[counter].mb1min - j1) + j2) / 2;
216 
217             idxz[counter].nb = std::min(j1, (2 * mb - j + j2 + j1) / 2) - idxz[counter].mb1min + 1;
218 
219             // apply to z(j1,j2,j,ma,mb) to unique element of y(j)
220 
221             idxz[counter].jju = idxu_block[j] + (j + 1) * mb + ma;
222           }
223         }
224       }
225     }
226   }
227 }
228 
init()229 void SNA::init()
230 {
231   init_clebsch_gordan();
232   init_rootpqarray();
233 }
234 
grow_rij(int newnmax)235 void SNA::grow_rij(int newnmax)
236 {
237   if (newnmax <= (int)rcutij.size())
238   {
239     return;
240   }
241 
242   rij.resize(newnmax, 3);
243   inside.resize(newnmax);
244   wj.resize(newnmax);
245   rcutij.resize(newnmax);
246 
247   ulist_r_ij.resize(newnmax, idxu_max, 0.0);
248   ulist_i_ij.resize(newnmax, idxu_max, 0.0);
249 }
250 
compute_ui(int const jnum)251 void SNA::compute_ui(int const jnum)
252 {
253   zero_uarraytot();
254 
255   addself_uarraytot(wself);
256 
257   // Loop over jnum neighbors
258   for (int j = 0; j < jnum; ++j)
259   {
260     double const dx = rij(j, 0);
261     double const dy = rij(j, 1);
262     double const dz = rij(j, 2);
263     double const rsq = dx * dx + dy * dy + dz * dz;
264     double const r = std::sqrt(rsq);
265 
266     double const theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0);
267     double const z0 = r / std::tan(theta0);
268 
269     compute_uarray(dx, dy, dz, z0, r, j);
270 
271     add_uarraytot(r, wj[j], rcutij[j], j);
272   }
273 }
274 
compute_zi()275 void SNA::compute_zi()
276 {
277   for (int jjz = 0; jjz < idxz_max; ++jjz)
278   {
279     int const j1 = idxz[jjz].j1;
280     int const j2 = idxz[jjz].j2;
281     int const j = idxz[jjz].j;
282 
283     int const ma1min = idxz[jjz].ma1min;
284     int const ma2max = idxz[jjz].ma2max;
285     int const na = idxz[jjz].na;
286 
287     int const mb1min = idxz[jjz].mb1min;
288     int const mb2max = idxz[jjz].mb2max;
289     int const nb = idxz[jjz].nb;
290 
291     double const *cgblock = cglist.data() + idxcg_block(j1, j2, j);
292 
293     zlist_r[jjz] = 0.0;
294     zlist_i[jjz] = 0.0;
295 
296     int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
297     int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
298 
299     int icgb = mb1min * (j2 + 1) + mb2max;
300 
301     for (int ib = 0; ib < nb; ++ib)
302     {
303       double suma1_r = 0.0;
304       double suma1_i = 0.0;
305 
306       double const *u1_r = &ulisttot_r[jju1];
307       double const *u1_i = &ulisttot_i[jju1];
308       double const *u2_r = &ulisttot_r[jju2];
309       double const *u2_i = &ulisttot_i[jju2];
310 
311       int icga = ma1min * (j2 + 1) + ma2max;
312 
313       for (int ia = 0, ma1 = ma1min, ma2 = ma2max; ia < na; ++ia, ++ma1, --ma2)
314       {
315         suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]);
316         suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]);
317         icga += j2;
318       }
319 
320       zlist_r[jjz] += cgblock[icgb] * suma1_r;
321       zlist_i[jjz] += cgblock[icgb] * suma1_i;
322 
323       jju1 += j1 + 1;
324       jju2 -= j2 + 1;
325       icgb += j2;
326     }
327   }
328 }
329 
compute_yi(double const * const beta)330 void SNA::compute_yi(double const *const beta)
331 {
332   // Zero yarray
333   for (int j = 0; j <= twojmax; ++j)
334   {
335     int jju = idxu_block[j];
336 
337     for (int mb = 0; 2 * mb <= j; ++mb)
338     {
339       for (int ma = 0; ma <= j; ++ma, ++jju)
340       {
341         ylist_r[jju] = 0.0;
342         ylist_i[jju] = 0.0;
343       }
344     }
345   }
346 
347   double betaj;
348   for (int jjz = 0; jjz < idxz_max; ++jjz)
349   {
350     int const j1 = idxz[jjz].j1;
351     int const j2 = idxz[jjz].j2;
352     int const j = idxz[jjz].j;
353 
354     int const ma1min = idxz[jjz].ma1min;
355     int const ma2max = idxz[jjz].ma2max;
356     int const na = idxz[jjz].na;
357 
358     int const mb1min = idxz[jjz].mb1min;
359     int const mb2max = idxz[jjz].mb2max;
360     int const nb = idxz[jjz].nb;
361 
362     double const *cgblock = cglist.data() + idxcg_block(j1, j2, j);
363 
364     double ztmp_r = 0.0;
365     double ztmp_i = 0.0;
366 
367     int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
368     int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
369     int icgb = mb1min * (j2 + 1) + mb2max;
370 
371     for (int ib = 0; ib < nb; ++ib)
372     {
373       double suma1_r = 0.0;
374       double suma1_i = 0.0;
375 
376       double const *u1_r = &ulisttot_r[jju1];
377       double const *u1_i = &ulisttot_i[jju1];
378       double const *u2_r = &ulisttot_r[jju2];
379       double const *u2_i = &ulisttot_i[jju2];
380 
381       int icga = ma1min * (j2 + 1) + ma2max;
382 
383       for (int ia = 0, ma1 = ma1min, ma2 = ma2max; ia < na; ++ia, ++ma1, --ma2)
384       {
385         suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]);
386         suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]);
387         icga += j2;
388       }
389 
390       ztmp_r += cgblock[icgb] * suma1_r;
391       ztmp_i += cgblock[icgb] * suma1_i;
392 
393       jju1 += j1 + 1;
394       jju2 -= j2 + 1;
395       icgb += j2;
396     }
397 
398     // apply to z(j1,j2,j,ma,mb) to unique element of y(j) find the right y_list[jju] and beta[jjb] entries
399     // multiply and divide by j+1 factors account for multiplicity of 1, 2, or 3
400 
401     int const jju = idxz[jjz].jju;
402 
403     // pick out right beta value
404 
405     if (j >= j1)
406     {
407       int const jjb = idxb_block(j1, j2, j);
408       if (j1 == j)
409       {
410         betaj = ((j2 == j) ? 3 : 2) * beta[jjb];
411       }
412       else
413       {
414         betaj = beta[jjb];
415       }
416     }
417     else if (j >= j2)
418     {
419       int const jjb = idxb_block(j, j2, j1);
420       if (j2 == j)
421       {
422         betaj = 2 * beta[jjb] * (j1 + 1) / (j + 1.0);
423       }
424       else
425       {
426         betaj = beta[jjb] * (j1 + 1) / (j + 1.0);
427       }
428     }
429     else
430     {
431       int const jjb = idxb_block(j2, j, j1);
432       betaj = beta[jjb] * (j1 + 1) / (j + 1.0);
433     }
434 
435     ylist_r[jju] += betaj * ztmp_r;
436     ylist_i[jju] += betaj * ztmp_i;
437   }
438 }
439 
compute_deidrj(double * const dedr)440 void SNA::compute_deidrj(double *const dedr)
441 {
442   dedr[0] = 0.0;
443   dedr[1] = 0.0;
444   dedr[2] = 0.0;
445 
446   for (int j = 0; j <= twojmax; ++j)
447   {
448     int jju = idxu_block[j];
449 
450     for (int mb = 0; 2 * mb < j; ++mb)
451     {
452       for (int ma = 0; ma <= j; ++ma, ++jju)
453       {
454         double const jjjmambyarray_r = ylist_r[jju];
455         double const jjjmambyarray_i = ylist_i[jju];
456 
457         auto dudr_r = dulist_r.data_1D(jju);
458         auto dudr_i = dulist_i.data_1D(jju);
459 
460         dedr[0] += dudr_r[0] * jjjmambyarray_r + dudr_i[0] * jjjmambyarray_i;
461         dedr[1] += dudr_r[1] * jjjmambyarray_r + dudr_i[1] * jjjmambyarray_i;
462         dedr[2] += dudr_r[2] * jjjmambyarray_r + dudr_i[2] * jjjmambyarray_i;
463       }
464     }
465 
466     // For j even, handle middle column
467 
468     if (j % 2 == 0)
469     {
470       int const mb = j / 2;
471       for (int ma = 0; ma < mb; ++ma, ++jju)
472       {
473         double const jjjmambyarray_r = ylist_r[jju];
474         double const jjjmambyarray_i = ylist_i[jju];
475 
476         auto dudr_r = dulist_r.data_1D(jju);
477         auto dudr_i = dulist_i.data_1D(jju);
478 
479         dedr[0] += dudr_r[0] * jjjmambyarray_r + dudr_i[0] * jjjmambyarray_i;
480         dedr[1] += dudr_r[1] * jjjmambyarray_r + dudr_i[1] * jjjmambyarray_i;
481         dedr[2] += dudr_r[2] * jjjmambyarray_r + dudr_i[2] * jjjmambyarray_i;
482       }
483 
484       {
485         double const jjjmambyarray_r = ylist_r[jju];
486         double const jjjmambyarray_i = ylist_i[jju];
487 
488         auto dudr_r = dulist_r.data_1D(jju);
489         auto dudr_i = dulist_i.data_1D(jju);
490 
491         dedr[0] += (dudr_r[0] * jjjmambyarray_r + dudr_i[0] * jjjmambyarray_i) * 0.5;
492         dedr[1] += (dudr_r[1] * jjjmambyarray_r + dudr_i[1] * jjjmambyarray_i) * 0.5;
493         dedr[2] += (dudr_r[2] * jjjmambyarray_r + dudr_i[2] * jjjmambyarray_i) * 0.5;
494       }
495     }
496   }
497 
498   dedr[0] *= 2.0;
499   dedr[1] *= 2.0;
500   dedr[2] *= 2.0;
501 }
502 
compute_bi()503 void SNA::compute_bi()
504 {
505   for (int jjb = 0; jjb < idxb_max; ++jjb)
506   {
507     int const j1 = idxb[jjb].j1;
508     int const j2 = idxb[jjb].j2;
509     int const j = idxb[jjb].j;
510 
511     int jjz = idxz_block(j1, j2, j);
512     int jju = idxu_block[j];
513 
514     double sumzu = 0.0;
515     for (int mb = 0; 2 * mb < j; ++mb)
516     {
517       for (int ma = 0; ma <= j; ++ma, ++jjz, ++jju)
518       {
519         sumzu += ulisttot_r[jju] * zlist_r[jjz] + ulisttot_i[jju] * zlist_i[jjz];
520       }
521     }
522 
523     // For j even, handle middle column
524     if (j % 2 == 0)
525     {
526       int const mb = j / 2;
527       for (int ma = 0; ma < mb; ++ma, ++jjz, ++jju)
528       {
529         sumzu += ulisttot_r[jju] * zlist_r[jjz] + ulisttot_i[jju] * zlist_i[jjz];
530       }
531 
532       sumzu += 0.5 * (ulisttot_r[jju] * zlist_r[jjz] + ulisttot_i[jju] * zlist_i[jjz]);
533     }
534 
535     blist[jjb] = 2.0 * sumzu;
536 
537     // apply bzero shift
538     if (bzeroflag)
539     {
540       blist[jjb] -= bzero[j];
541     }
542   }
543 }
544 
compute_dbidrj()545 void SNA::compute_dbidrj()
546 {
547   for (int jjb = 0; jjb < idxb_max; ++jjb)
548   {
549     int const j1 = idxb[jjb].j1;
550     int const j2 = idxb[jjb].j2;
551     int const j = idxb[jjb].j;
552 
553     auto dbdr = dblist.data_1D(jjb);
554 
555     dbdr[0] = 0.0;
556     dbdr[1] = 0.0;
557     dbdr[2] = 0.0;
558 
559     // Sum terms Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb)
560 
561     int jjz = idxz_block(j1, j2, j);
562     int jju = idxu_block[j];
563 
564     VectorOfSizeDIM sumzdu_r;
565 
566     sumzdu_r[0] = 0.0;
567     sumzdu_r[1] = 0.0;
568     sumzdu_r[2] = 0.0;
569 
570     for (int mb = 0; 2 * mb < j; ++mb)
571     {
572       for (int ma = 0; ma <= j; ++ma, ++jjz, ++jju)
573       {
574         auto dudr_r = dulist_r.data_1D(jju);
575         auto dudr_i = dulist_i.data_1D(jju);
576 
577         sumzdu_r[0] += dudr_r[0] * zlist_r[jjz] + dudr_i[0] * zlist_i[jjz];
578         sumzdu_r[1] += dudr_r[1] * zlist_r[jjz] + dudr_i[1] * zlist_i[jjz];
579         sumzdu_r[2] += dudr_r[2] * zlist_r[jjz] + dudr_i[2] * zlist_i[jjz];
580       }
581     }
582 
583     // For j even, handle middle column
584     if (j % 2 == 0)
585     {
586       int const mb = j / 2;
587       for (int ma = 0; ma < mb; ++ma, ++jjz, ++jju)
588       {
589         auto dudr_r = dulist_r.data_1D(jju);
590         auto dudr_i = dulist_i.data_1D(jju);
591 
592         sumzdu_r[0] += dudr_r[0] * zlist_r[jjz] + dudr_i[0] * zlist_i[jjz];
593         sumzdu_r[1] += dudr_r[1] * zlist_r[jjz] + dudr_i[1] * zlist_i[jjz];
594         sumzdu_r[2] += dudr_r[2] * zlist_r[jjz] + dudr_i[2] * zlist_i[jjz];
595       }
596 
597       {
598         auto dudr_r = dulist_r.data_1D(jju);
599         auto dudr_i = dulist_i.data_1D(jju);
600 
601         sumzdu_r[0] += (dudr_r[0] * zlist_r[jjz] + dudr_i[0] * zlist_i[jjz]) * 0.5;
602         sumzdu_r[1] += (dudr_r[1] * zlist_r[jjz] + dudr_i[1] * zlist_i[jjz]) * 0.5;
603         sumzdu_r[2] += (dudr_r[2] * zlist_r[jjz] + dudr_i[2] * zlist_i[jjz]) * 0.5;
604       }
605     } // end if jeven
606 
607     dbdr[0] += 2.0 * sumzdu_r[0];
608     dbdr[1] += 2.0 * sumzdu_r[1];
609     dbdr[2] += 2.0 * sumzdu_r[2];
610 
611     // Sum over Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1)
612 
613     jjz = idxz_block(j, j2, j1);
614     jju = idxu_block[j1];
615 
616     sumzdu_r[0] = 0.0;
617     sumzdu_r[1] = 0.0;
618     sumzdu_r[2] = 0.0;
619 
620     for (int mb = 0; 2 * mb < j1; ++mb)
621     {
622       for (int ma = 0; ma <= j1; ++ma, ++jjz, ++jju)
623       {
624         auto dudr_r = dulist_r.data_1D(jju);
625         auto dudr_i = dulist_i.data_1D(jju);
626 
627         sumzdu_r[0] += dudr_r[0] * zlist_r[jjz] + dudr_i[0] * zlist_i[jjz];
628         sumzdu_r[1] += dudr_r[1] * zlist_r[jjz] + dudr_i[1] * zlist_i[jjz];
629         sumzdu_r[2] += dudr_r[2] * zlist_r[jjz] + dudr_i[2] * zlist_i[jjz];
630       }
631     }
632 
633     // For j1 even, handle middle column
634     if (j1 % 2 == 0)
635     {
636       int mb = j1 / 2;
637       for (int ma = 0; ma < mb; ++ma, ++jjz, ++jju)
638       {
639         auto dudr_r = dulist_r.data_1D(jju);
640         auto dudr_i = dulist_i.data_1D(jju);
641 
642         sumzdu_r[0] += dudr_r[0] * zlist_r[jjz] + dudr_i[0] * zlist_i[jjz];
643         sumzdu_r[1] += dudr_r[1] * zlist_r[jjz] + dudr_i[1] * zlist_i[jjz];
644         sumzdu_r[2] += dudr_r[2] * zlist_r[jjz] + dudr_i[2] * zlist_i[jjz];
645       }
646 
647       {
648         auto dudr_r = dulist_r.data_1D(jju);
649         auto dudr_i = dulist_i.data_1D(jju);
650 
651         sumzdu_r[0] += (dudr_r[0] * zlist_r[jjz] + dudr_i[0] * zlist_i[jjz]) * 0.5;
652         sumzdu_r[1] += (dudr_r[1] * zlist_r[jjz] + dudr_i[1] * zlist_i[jjz]) * 0.5;
653         sumzdu_r[2] += (dudr_r[2] * zlist_r[jjz] + dudr_i[2] * zlist_i[jjz]) * 0.5;
654       }
655     } // end if j1even
656 
657     double const j1fac = (j + 1) / (j1 + 1.0);
658 
659     dbdr[0] += 2.0 * sumzdu_r[0] * j1fac;
660     dbdr[1] += 2.0 * sumzdu_r[1] * j1fac;
661     dbdr[2] += 2.0 * sumzdu_r[2] * j1fac;
662 
663     //
664 
665     jjz = idxz_block(j, j1, j2);
666     jju = idxu_block[j2];
667 
668     sumzdu_r[0] = 0.0;
669     sumzdu_r[1] = 0.0;
670     sumzdu_r[2] = 0.0;
671 
672     for (int mb = 0; 2 * mb < j2; ++mb)
673     {
674       for (int ma = 0; ma <= j2; ++ma, ++jjz, ++jju)
675       {
676         auto dudr_r = dulist_r.data_1D(jju);
677         auto dudr_i = dulist_i.data_1D(jju);
678 
679         sumzdu_r[0] += dudr_r[0] * zlist_r[jjz] + dudr_i[0] * zlist_i[jjz];
680         sumzdu_r[1] += dudr_r[1] * zlist_r[jjz] + dudr_i[1] * zlist_i[jjz];
681         sumzdu_r[2] += dudr_r[2] * zlist_r[jjz] + dudr_i[2] * zlist_i[jjz];
682       }
683     }
684 
685     // For j2 even, handle middle column
686     if (j2 % 2 == 0)
687     {
688       int const mb = j2 / 2;
689       for (int ma = 0; ma < mb; ++ma, ++jjz, ++jju)
690       {
691         auto dudr_r = dulist_r.data_1D(jju);
692         auto dudr_i = dulist_i.data_1D(jju);
693 
694         sumzdu_r[0] += dudr_r[0] * zlist_r[jjz] + dudr_i[0] * zlist_i[jjz];
695         sumzdu_r[1] += dudr_r[1] * zlist_r[jjz] + dudr_i[1] * zlist_i[jjz];
696         sumzdu_r[2] += dudr_r[2] * zlist_r[jjz] + dudr_i[2] * zlist_i[jjz];
697       }
698 
699       {
700         auto dudr_r = dulist_r.data_1D(jju);
701         auto dudr_i = dulist_i.data_1D(jju);
702 
703         sumzdu_r[0] += (dudr_r[0] * zlist_r[jjz] + dudr_i[0] * zlist_i[jjz]) * 0.5;
704         sumzdu_r[1] += (dudr_r[1] * zlist_r[jjz] + dudr_i[1] * zlist_i[jjz]) * 0.5;
705         sumzdu_r[2] += (dudr_r[2] * zlist_r[jjz] + dudr_i[2] * zlist_i[jjz]) * 0.5;
706       }
707     } // end if j2even
708 
709     double const j2fac = (j + 1) / (j2 + 1.0);
710 
711     dbdr[0] += 2.0 * sumzdu_r[0] * j2fac;
712     dbdr[1] += 2.0 * sumzdu_r[1] * j2fac;
713     dbdr[2] += 2.0 * sumzdu_r[2] * j2fac;
714   } //end loop over j1 j2 j
715 }
716 
compute_duidrj(double const * const rij_in,double const wj_in,double const rcut,int const neigh_j)717 void SNA::compute_duidrj(double const *const rij_in, double const wj_in, double const rcut, int const neigh_j)
718 {
719   double const dx = rij_in[0];
720   double const dy = rij_in[1];
721   double const dz = rij_in[2];
722 
723   double const rsq = dx * dx + dy * dy + dz * dz;
724   double const r = std::sqrt(rsq);
725 
726   double const rscale0 = rfac0 * MY_PI / (rcut - rmin0);
727   double const theta0 = (r - rmin0) * rscale0;
728 
729   double const cs = std::cos(theta0);
730   double const sn = std::sin(theta0);
731 
732   double const z0 = r * cs / sn;
733 
734   double const dz0dr = z0 / r - (r * rscale0) * (rsq + z0 * z0) / rsq;
735 
736   compute_duarray(dx, dy, dz, z0, r, dz0dr, wj_in, rcut, neigh_j);
737 }
738 
zero_uarraytot()739 void SNA::zero_uarraytot()
740 {
741   std::for_each(ulisttot_r.begin(), ulisttot_r.end(), [](double &u) { u = 0.0; });
742   std::for_each(ulisttot_i.begin(), ulisttot_i.end(), [](double &u) { u = 0.0; });
743 }
744 
addself_uarraytot(double const wself_in)745 void SNA::addself_uarraytot(double const wself_in)
746 {
747   for (int j = 0; j <= twojmax; ++j)
748   {
749     int jju = idxu_block[j];
750     for (int ma = 0; ma <= j; ++ma)
751     {
752       ulisttot_r[jju] = wself_in;
753       ulisttot_i[jju] = 0.0;
754       jju += j + 2;
755     }
756   }
757 }
758 
add_uarraytot(double const r,double const wj_in,double const rcut,int const neigh_j)759 void SNA::add_uarraytot(double const r, double const wj_in, double const rcut, int const neigh_j)
760 {
761   double const sfac = compute_sfac(r, rcut) * wj_in;
762 
763   double *ulist_r = ulist_r_ij.data_1D(neigh_j).data();
764   double *ulist_i = ulist_i_ij.data_1D(neigh_j).data();
765 
766   for (int j = 0; j <= twojmax; ++j)
767   {
768     int const jju_b = idxu_block[j];
769     int const jju_e = jju_b + (j + 1) * (j + 1);
770 
771     for (int m = jju_b; m < jju_e; ++m)
772     {
773       ulisttot_r[m] += sfac * ulist_r[m];
774     }
775 
776     for (int m = jju_b; m < jju_e; ++m)
777     {
778       ulisttot_i[m] += sfac * ulist_i[m];
779     }
780   }
781 }
782 
compute_uarray(double const dx,double const dy,double const dz,double const z0,double const r,int const neigh_j)783 void SNA::compute_uarray(double const dx, double const dy, double const dz, double const z0, double const r, int const neigh_j)
784 {
785   // Compute Cayley-Klein parameters for unit quaternion
786 
787   double const r0inv = 1.0 / std::sqrt(r * r + z0 * z0);
788 
789   double const a_r = r0inv * z0;
790   double const a_i = -r0inv * dz;
791   double const b_r = r0inv * dy;
792   double const b_i = -r0inv * dx;
793 
794   // VMK Section 4.8.2
795 
796   double *ulist_r = ulist_r_ij.data_1D(neigh_j).data();
797   double *ulist_i = ulist_i_ij.data_1D(neigh_j).data();
798 
799   ulist_r[0] = 1.0;
800   ulist_i[0] = 0.0;
801 
802   double rootpq;
803 
804   for (int j = 1; j <= twojmax; ++j)
805   {
806     int jju = idxu_block[j];
807     int jjup = idxu_block[j - 1];
808 
809     // Fill in left side of matrix layer from previous layer
810 
811     for (int mb = 0; 2 * mb <= j; ++mb, ++jju)
812     {
813       ulist_r[jju] = 0.0;
814       ulist_i[jju] = 0.0;
815 
816       for (int ma = 0; ma < j; ++ma, ++jjup)
817       {
818         rootpq = rootpqarray(j - ma, j - mb);
819 
820         ulist_r[jju] += rootpq * (a_r * ulist_r[jjup] + a_i * ulist_i[jjup]);
821         ulist_i[jju] += rootpq * (a_r * ulist_i[jjup] - a_i * ulist_r[jjup]);
822 
823         rootpq = rootpqarray(ma + 1, j - mb);
824 
825         ++jju;
826 
827         ulist_r[jju] = -rootpq * (b_r * ulist_r[jjup] + b_i * ulist_i[jjup]);
828         ulist_i[jju] = -rootpq * (b_r * ulist_i[jjup] - b_i * ulist_r[jjup]);
829       }
830     }
831 
832     // Copy the left side to the right side with inversion symmetry VMK 4.4(2)
833     // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb])
834 
835     jju = idxu_block[j];
836     jjup = jju + (j + 1) * (j + 1) - 1;
837 
838     for (int mb = 0, mbpar = 1; 2 * mb <= j; ++mb)
839     {
840       for (int ma = 0, mapar = mbpar; ma <= j; ++ma, ++jju, --jjup)
841       {
842         if (mapar)
843         {
844           ulist_r[jjup] = ulist_r[jju];
845           ulist_i[jjup] = -ulist_i[jju];
846         }
847         else
848         {
849           ulist_r[jjup] = -ulist_r[jju];
850           ulist_i[jjup] = ulist_i[jju];
851         }
852         mapar = !mapar;
853       }
854       mbpar = !mbpar;
855     }
856   }
857 }
compute_duarray(double const dx,double const dy,double const dz,double const z0,double const r,double const dz0dr,double const wj_in,double const rcut,int const neigh_j)858 void SNA::compute_duarray(double const dx, double const dy, double const dz,
859                           double const z0, double const r, double const dz0dr,
860                           double const wj_in, double const rcut, int const neigh_j)
861 {
862   VectorOfSizeDIM da_r;
863   VectorOfSizeDIM da_i;
864   VectorOfSizeDIM db_r;
865   VectorOfSizeDIM db_i;
866   VectorOfSizeDIM dz0;
867   VectorOfSizeDIM dr0inv;
868 
869   double rootpq;
870 
871   double const rinv = 1.0 / r;
872 
873   double const ux = dx * rinv;
874   double const uy = dy * rinv;
875   double const uz = dz * rinv;
876 
877   double const r0inv = 1.0 / std::sqrt(r * r + z0 * z0);
878 
879   double const a_r = z0 * r0inv;
880   double const a_i = -dz * r0inv;
881   double const b_r = dy * r0inv;
882   double const b_i = -dx * r0inv;
883 
884   double const dr0invdr = -r0inv * r0inv * r0inv * (r + z0 * dz0dr);
885 
886   dr0inv[0] = dr0invdr * ux;
887   dr0inv[1] = dr0invdr * uy;
888   dr0inv[2] = dr0invdr * uz;
889 
890   dz0[0] = dz0dr * ux;
891   dz0[1] = dz0dr * uy;
892   dz0[2] = dz0dr * uz;
893 
894   da_r[0] = dz0[0] * r0inv + z0 * dr0inv[0];
895   da_r[1] = dz0[1] * r0inv + z0 * dr0inv[1];
896   da_r[2] = dz0[2] * r0inv + z0 * dr0inv[2];
897 
898   da_i[0] = -dz * dr0inv[0];
899   da_i[1] = -dz * dr0inv[1];
900   da_i[2] = -dz * dr0inv[2] - r0inv;
901 
902   db_r[0] = dy * dr0inv[0];
903   db_r[1] = dy * dr0inv[1] + r0inv;
904   db_r[2] = dy * dr0inv[2];
905 
906   db_i[0] = -dx * dr0inv[0] - r0inv;
907   db_i[1] = -dx * dr0inv[1];
908   db_i[2] = -dx * dr0inv[2];
909 
910   double *ulist_r = ulist_r_ij.data_1D(neigh_j).data();
911   double *ulist_i = ulist_i_ij.data_1D(neigh_j).data();
912 
913   dulist_r(0, 0) = 0.0;
914   dulist_r(0, 1) = 0.0;
915   dulist_r(0, 2) = 0.0;
916 
917   dulist_i(0, 0) = 0.0;
918   dulist_i(0, 1) = 0.0;
919   dulist_i(0, 2) = 0.0;
920 
921   for (int j = 1; j <= twojmax; ++j)
922   {
923     int jju = idxu_block[j];
924     int jjup = idxu_block[j - 1];
925 
926     for (int mb = 0; 2 * mb <= j; ++mb, ++jju)
927     {
928       dulist_r(jju, 0) = 0.0;
929       dulist_r(jju, 1) = 0.0;
930       dulist_r(jju, 2) = 0.0;
931 
932       dulist_i(jju, 0) = 0.0;
933       dulist_i(jju, 1) = 0.0;
934       dulist_i(jju, 2) = 0.0;
935 
936       for (int ma = 0; ma < j; ++ma, ++jjup)
937       {
938         rootpq = rootpqarray(j - ma, j - mb);
939 
940         dulist_r(jju, 0) += rootpq *
941                             (da_r[0] * ulist_r[jjup] + da_i[0] * ulist_i[jjup] +
942                              a_r * dulist_r(jjup, 0) + a_i * dulist_i(jjup, 0));
943         dulist_r(jju, 1) += rootpq *
944                             (da_r[1] * ulist_r[jjup] + da_i[1] * ulist_i[jjup] +
945                              a_r * dulist_r(jjup, 1) + a_i * dulist_i(jjup, 1));
946         dulist_r(jju, 2) += rootpq *
947                             (da_r[2] * ulist_r[jjup] + da_i[2] * ulist_i[jjup] +
948                              a_r * dulist_r(jjup, 2) + a_i * dulist_i(jjup, 2));
949 
950         dulist_i(jju, 0) += rootpq *
951                             (da_r[0] * ulist_i[jjup] - da_i[0] * ulist_r[jjup] +
952                              a_r * dulist_i(jjup, 0) - a_i * dulist_r(jjup, 0));
953         dulist_i(jju, 1) += rootpq *
954                             (da_r[1] * ulist_i[jjup] - da_i[1] * ulist_r[jjup] +
955                              a_r * dulist_i(jjup, 1) - a_i * dulist_r(jjup, 1));
956         dulist_i(jju, 2) += rootpq *
957                             (da_r[2] * ulist_i[jjup] - da_i[2] * ulist_r[jjup] +
958                              a_r * dulist_i(jjup, 2) - a_i * dulist_r(jjup, 2));
959 
960         ++jju;
961 
962         rootpq = rootpqarray(ma + 1, j - mb);
963 
964         dulist_r(jju, 0) = -rootpq *
965                            (db_r[0] * ulist_r[jjup] + db_i[0] * ulist_i[jjup] +
966                             b_r * dulist_r(jjup, 0) + b_i * dulist_i(jjup, 0));
967         dulist_r(jju, 1) = -rootpq *
968                            (db_r[1] * ulist_r[jjup] + db_i[1] * ulist_i[jjup] +
969                             b_r * dulist_r(jjup, 1) + b_i * dulist_i(jjup, 1));
970         dulist_r(jju, 2) = -rootpq *
971                            (db_r[2] * ulist_r[jjup] + db_i[2] * ulist_i[jjup] +
972                             b_r * dulist_r(jjup, 2) + b_i * dulist_i(jjup, 2));
973 
974         dulist_i(jju, 0) = -rootpq *
975                            (db_r[0] * ulist_i[jjup] - db_i[0] * ulist_r[jjup] +
976                             b_r * dulist_i(jjup, 0) - b_i * dulist_r(jjup, 0));
977         dulist_i(jju, 1) = -rootpq *
978                            (db_r[1] * ulist_i[jjup] - db_i[1] * ulist_r[jjup] +
979                             b_r * dulist_i(jjup, 1) - b_i * dulist_r(jjup, 1));
980         dulist_i(jju, 2) = -rootpq *
981                            (db_r[2] * ulist_i[jjup] - db_i[2] * ulist_r[jjup] +
982                             b_r * dulist_i(jjup, 2) - b_i * dulist_r(jjup, 2));
983       }
984     }
985 
986     // copy left side to right side with inversion symmetry VMK 4.4(2)
987     // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb])
988 
989     jju = idxu_block[j];
990     jjup = jju + (j + 1) * (j + 1) - 1;
991 
992     for (int mb = 0, mbpar = 1; 2 * mb <= j; ++mb)
993     {
994       for (int ma = 0, mapar = mbpar; ma <= j; ++ma, ++jju, --jjup)
995       {
996         if (mapar)
997         {
998           dulist_r(jjup, 0) = dulist_r(jju, 0);
999           dulist_r(jjup, 1) = dulist_r(jju, 1);
1000           dulist_r(jjup, 2) = dulist_r(jju, 2);
1001 
1002           dulist_i(jjup, 0) = -dulist_i(jju, 0);
1003           dulist_i(jjup, 1) = -dulist_i(jju, 1);
1004           dulist_i(jjup, 2) = -dulist_i(jju, 2);
1005         }
1006         else
1007         {
1008           dulist_r(jjup, 0) = -dulist_r(jju, 0);
1009           dulist_r(jjup, 1) = -dulist_r(jju, 1);
1010           dulist_r(jjup, 2) = -dulist_r(jju, 2);
1011 
1012           dulist_i(jjup, 0) = dulist_i(jju, 0);
1013           dulist_i(jjup, 1) = dulist_i(jju, 1);
1014           dulist_i(jjup, 2) = dulist_i(jju, 2);
1015         }
1016         mapar = !mapar;
1017       }
1018       mbpar = !mbpar;
1019     }
1020   }
1021 
1022   double const sfac = compute_sfac(r, rcut) * wj_in;
1023 
1024   double const dsfac = compute_dsfac(r, rcut) * wj_in;
1025 
1026   for (int j = 0; j <= twojmax; ++j)
1027   {
1028     int jju = idxu_block[j];
1029 
1030     for (int mb = 0; 2 * mb <= j; ++mb)
1031     {
1032       for (int ma = 0; ma <= j; ++ma, ++jju)
1033       {
1034         dulist_r(jju, 0) = dsfac * ulist_r[jju] * ux + sfac * dulist_r(jju, 0);
1035         dulist_r(jju, 1) = dsfac * ulist_r[jju] * uy + sfac * dulist_r(jju, 1);
1036         dulist_r(jju, 2) = dsfac * ulist_r[jju] * uz + sfac * dulist_r(jju, 2);
1037 
1038         dulist_i(jju, 0) = dsfac * ulist_i[jju] * ux + sfac * dulist_i(jju, 0);
1039         dulist_i(jju, 1) = dsfac * ulist_i[jju] * uy + sfac * dulist_i(jju, 1);
1040         dulist_i(jju, 2) = dsfac * ulist_i[jju] * uz + sfac * dulist_i(jju, 2);
1041       }
1042     }
1043   }
1044 }
1045 
create_twojmax_arrays()1046 void SNA::create_twojmax_arrays()
1047 {
1048   rootpqarray.resize(twojmax + 2, twojmax + 2, 0.0);
1049 
1050   cglist.resize(idxcg_max, 0.0);
1051 
1052   ulisttot_r.resize(idxu_max, 0.0);
1053   ulisttot_i.resize(idxu_max, 0.0);
1054 
1055   dulist_r.resize(idxu_max, 3, 0.0);
1056   dulist_i.resize(idxu_max, 3, 0.0);
1057 
1058   zlist_r.resize(idxz_max);
1059   zlist_i.resize(idxz_max);
1060 
1061   blist.resize(idxb_max);
1062 
1063   dblist.resize(idxb_max, 3);
1064 
1065   ylist_r.resize(idxu_max, 0.0);
1066   ylist_i.resize(idxu_max, 0.0);
1067 
1068   if (bzeroflag)
1069   {
1070     bzero.resize(twojmax + 1);
1071   }
1072 }
1073 
factorial(int const n)1074 inline double SNA::factorial(int const n)
1075 {
1076   return std::tgamma(n + 1);
1077 }
1078 
deltacg(int const j1,int const j2,int const j)1079 double SNA::deltacg(int const j1, int const j2, int const j)
1080 {
1081   double const factorial_1 = factorial((j1 + j2 - j) / 2);
1082   double const factorial_2 = factorial((j1 - j2 + j) / 2);
1083   double const factorial_3 = factorial((-j1 + j2 + j) / 2);
1084   double const sfaccg = factorial((j1 + j2 + j) / 2 + 1);
1085 
1086   return std::sqrt(factorial_1 * factorial_2 * factorial_3 / sfaccg);
1087 }
1088 
init_clebsch_gordan()1089 void SNA::init_clebsch_gordan()
1090 {
1091   for (int j1 = 0, counter = 0; j1 <= twojmax; ++j1)
1092   {
1093     for (int j2 = 0; j2 <= j1; ++j2)
1094     {
1095       for (int j = j1 - j2; j <= std::min(twojmax, j1 + j2); j += 2)
1096       {
1097         for (int m1 = 0; m1 <= j1; ++m1)
1098         {
1099           int const aa2 = 2 * m1 - j1;
1100 
1101           for (int m2 = 0; m2 <= j2; ++m2, ++counter)
1102           {
1103             // -c <= cc <= c
1104 
1105             int const bb2 = 2 * m2 - j2;
1106             int const m = (aa2 + bb2 + j) / 2;
1107 
1108             if (m < 0 || m > j)
1109             {
1110               cglist[counter] = 0.0;
1111               continue;
1112             }
1113 
1114             double sum = 0.0;
1115 
1116             for (int z = std::max(0, std::max(-(j - j2 + aa2) / 2, -(j - j1 - bb2) / 2));
1117                  z <= std::min((j1 + j2 - j) / 2, std::min((j1 - aa2) / 2, (j2 + bb2) / 2));
1118                  ++z)
1119             {
1120               double const factorial_1 = factorial(z);
1121               double const factorial_2 = factorial((j1 + j2 - j) / 2 - z);
1122               double const factorial_3 = factorial((j1 - aa2) / 2 - z);
1123               double const factorial_4 = factorial((j2 + bb2) / 2 - z);
1124               double const factorial_5 = factorial((j - j2 + aa2) / 2 + z);
1125               double const factorial_6 = factorial((j - j1 - bb2) / 2 + z);
1126 
1127               double const ifac = (z % 2) ? -1.0 : 1.0;
1128 
1129               sum += ifac / (factorial_1 * factorial_2 * factorial_3 * factorial_4 * factorial_5 * factorial_6);
1130             }
1131 
1132             double const dcg = deltacg(j1, j2, j);
1133 
1134             double sfaccg;
1135             {
1136               double const factorial_1 = factorial((j1 + aa2) / 2);
1137               double const factorial_2 = factorial((j1 - aa2) / 2);
1138               double const factorial_3 = factorial((j2 + bb2) / 2);
1139               double const factorial_4 = factorial((j2 - bb2) / 2);
1140 
1141               int const cc2 = 2 * m - j;
1142 
1143               double const factorial_5 = factorial((j + cc2) / 2);
1144               double const factorial_6 = factorial((j - cc2) / 2);
1145 
1146               sfaccg = factorial_1 * factorial_2 * factorial_3 * factorial_4 * factorial_5 * factorial_6 * (j + 1);
1147             }
1148 
1149             cglist[counter] = sum * dcg * std::sqrt(sfaccg);
1150           }
1151         }
1152       }
1153     }
1154   }
1155 }
1156 
init_rootpqarray()1157 void SNA::init_rootpqarray()
1158 {
1159   for (int p = 1; p <= twojmax; ++p)
1160   {
1161     for (int q = 1; q <= twojmax; ++q)
1162     {
1163       rootpqarray(p, q) = std::sqrt(static_cast<double>(p) / q);
1164     }
1165   }
1166 }
1167 
compute_ncoeff()1168 int SNA::compute_ncoeff()
1169 {
1170   int counter = 0;
1171   for (int j1 = 0; j1 <= twojmax; ++j1)
1172   {
1173     for (int j2 = 0; j2 <= j1; ++j2)
1174     {
1175       for (int j = j1 - j2; j <= std::min(twojmax, j1 + j2); j += 2)
1176       {
1177         if (j >= j1)
1178         {
1179           ++counter;
1180         }
1181       }
1182     }
1183   }
1184   return counter;
1185 }
1186 
compute_sfac(double const r,double const rcut)1187 double SNA::compute_sfac(double const r, double const rcut)
1188 {
1189   if (!switchflag)
1190   {
1191     return 1.0;
1192   }
1193 
1194   if (r <= rmin0)
1195   {
1196     return 1.0;
1197   }
1198   else if (r > rcut)
1199   {
1200     return 0.0;
1201   }
1202   else
1203   {
1204     double const rcutfac = MY_PI / (rcut - rmin0);
1205     return 0.5 * (std::cos((r - rmin0) * rcutfac) + 1.0);
1206   }
1207 }
1208 
compute_dsfac(double const r,double const rcut)1209 double SNA::compute_dsfac(double const r, double const rcut)
1210 {
1211   if (!switchflag)
1212   {
1213     return 0.0;
1214   }
1215 
1216   if (r <= rmin0 || r > rcut)
1217   {
1218     return 0.0;
1219   }
1220   else
1221   {
1222     double const rcutfac = MY_PI / (rcut - rmin0);
1223     return -0.5 * std::sin((r - rmin0) * rcutfac) * rcutfac;
1224   }
1225 }
1226 
1227 #undef MY_PI
1228 #undef HELPER_LOG_ERROR
1229