1 /*-
2 * Copyright (c) 2012-2017 Ilya Kaliman
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "clapack.h"
31 #include "private.h"
32
33 #define INTEGRAL_THRESHOLD 1.0e-7
34
35 static inline size_t
fock_idx(size_t i,size_t j)36 fock_idx(size_t i, size_t j)
37 {
38 return i < j ? (j * (j + 1) / 2 + i) : (i * (i + 1) / 2 + j);
39 }
40
41 static double
charge_penetration_energy(double s_ij,double r_ij)42 charge_penetration_energy(double s_ij, double r_ij)
43 {
44 double ln_s;
45
46 if (fabs(s_ij) < INTEGRAL_THRESHOLD)
47 return 0.0;
48
49 ln_s = log(fabs(s_ij));
50
51 return -s_ij * s_ij / r_ij / sqrt(-2.0 * ln_s);
52 }
53
54 static void
charge_penetration_grad(struct efp * efp,size_t fr_i_idx,size_t fr_j_idx,size_t lmo_i_idx,size_t lmo_j_idx,double s_ij,const six_t ds_ij,const struct swf * swf)55 charge_penetration_grad(struct efp *efp, size_t fr_i_idx, size_t fr_j_idx,
56 size_t lmo_i_idx, size_t lmo_j_idx, double s_ij, const six_t ds_ij,
57 const struct swf *swf)
58 {
59 if (fabs(s_ij) < INTEGRAL_THRESHOLD)
60 return;
61
62 const struct frag *fr_i = efp->frags + fr_i_idx;
63 const struct frag *fr_j = efp->frags + fr_j_idx;
64 const vec_t *ct_i = fr_i->lmo_centroids + lmo_i_idx;
65 const vec_t *ct_j = fr_j->lmo_centroids + lmo_j_idx;
66
67 vec_t dr = {
68 ct_j->x - ct_i->x - swf->cell.x,
69 ct_j->y - ct_i->y - swf->cell.y,
70 ct_j->z - ct_i->z - swf->cell.z
71 };
72
73 double r_ij = vec_len(&dr);
74 double ln_s = log(fabs(s_ij));
75 double t1 = s_ij * s_ij / (r_ij * r_ij * r_ij) / sqrt(-2.0 * ln_s);
76 double t2 = -s_ij / r_ij / sqrt(2.0) * (2.0 / sqrt(-ln_s) +
77 0.5 / sqrt(-ln_s * ln_s * ln_s));
78
79 vec_t force, torque_i, torque_j;
80
81 force.x = (t2 * ds_ij.x - t1 * dr.x) * swf->swf;
82 force.y = (t2 * ds_ij.y - t1 * dr.y) * swf->swf;
83 force.z = (t2 * ds_ij.z - t1 * dr.z) * swf->swf;
84
85 torque_i.x = swf->swf * (-t2 * ds_ij.a +
86 t1 * (dr.y * (ct_i->z - fr_i->z) - dr.z * (ct_i->y - fr_i->y)));
87 torque_i.y = swf->swf * (-t2 * ds_ij.b +
88 t1 * (dr.z * (ct_i->x - fr_i->x) - dr.x * (ct_i->z - fr_i->z)));
89 torque_i.z = swf->swf * (-t2 * ds_ij.c +
90 t1 * (dr.x * (ct_i->y - fr_i->y) - dr.y * (ct_i->x - fr_i->x)));
91
92 torque_j.x = torque_i.x + force.y * (fr_j->z - fr_i->z - swf->cell.z) -
93 force.z * (fr_j->y - fr_i->y - swf->cell.y);
94 torque_j.y = torque_i.y + force.z * (fr_j->x - fr_i->x - swf->cell.x) -
95 force.x * (fr_j->z - fr_i->z - swf->cell.z);
96 torque_j.z = torque_i.z + force.x * (fr_j->y - fr_i->y - swf->cell.y) -
97 force.y * (fr_j->x - fr_i->x - swf->cell.x);
98
99 six_atomic_add_xyz(efp->grad + fr_i_idx, &force);
100 six_atomic_sub_xyz(efp->grad + fr_j_idx, &force);
101 six_atomic_add_abc(efp->grad + fr_i_idx, &torque_i);
102 six_atomic_sub_abc(efp->grad + fr_j_idx, &torque_j);
103
104 efp_add_stress(&swf->dr, &force, &efp->stress);
105 }
106
107 static void
transform_integrals(size_t n_lmo_i,size_t n_lmo_j,size_t wf_size_i,size_t wf_size_j,double * wf_i,double * wf_j,double * s,double * lmo_s,double * tmp)108 transform_integrals(size_t n_lmo_i, size_t n_lmo_j, size_t wf_size_i,
109 size_t wf_size_j, double *wf_i, double *wf_j, double *s, double *lmo_s,
110 double *tmp)
111 {
112 efp_dgemm('N', 'N', (fortranint_t)wf_size_j, (fortranint_t)n_lmo_i,
113 (fortranint_t)wf_size_i, 1.0, s, (fortranint_t)wf_size_j, wf_i,
114 (fortranint_t)wf_size_i, 0.0, tmp, (fortranint_t)wf_size_j);
115 efp_dgemm('T', 'N', (fortranint_t)n_lmo_j, (fortranint_t)n_lmo_i,
116 (fortranint_t)wf_size_j, 1.0, wf_j, (fortranint_t)wf_size_j, tmp,
117 (fortranint_t)wf_size_j, 0.0, lmo_s, (fortranint_t)n_lmo_j);
118 }
119
120 static void
transform_integral_derivatives(size_t n_lmo_i,size_t n_lmo_j,size_t wf_size_i,size_t wf_size_j,const double * wf_i,const double * wf_j,const six_t * ds,six_t * lmo_ds,six_t * tmp)121 transform_integral_derivatives(size_t n_lmo_i, size_t n_lmo_j, size_t wf_size_i,
122 size_t wf_size_j, const double *wf_i, const double *wf_j, const six_t *ds,
123 six_t *lmo_ds, six_t *tmp)
124 {
125 const six_t *p_ds;
126 six_t *p_tmp, *p_lmo_ds;
127
128 p_tmp = tmp;
129 for (size_t i = 0; i < n_lmo_i; i++) {
130 for (size_t j = 0; j < wf_size_j; j++, p_tmp++) {
131 six_t sum = six_zero;
132 p_ds = ds + j;
133
134 for (size_t k = 0; k < wf_size_i; k++, p_ds += wf_size_j) {
135 double w = wf_i[wf_size_i * i + k];
136
137 sum.x += p_ds->x * w;
138 sum.y += p_ds->y * w;
139 sum.z += p_ds->z * w;
140 sum.a += p_ds->a * w;
141 sum.b += p_ds->b * w;
142 sum.c += p_ds->c * w;
143 }
144 *p_tmp = sum;
145 } }
146
147 p_lmo_ds = lmo_ds;
148 for (size_t i = 0; i < n_lmo_i; i++) {
149 for (size_t j = 0; j < n_lmo_j; j++, p_lmo_ds++) {
150 six_t sum = six_zero;
151 p_tmp = tmp + i * wf_size_j;
152
153 for (size_t k = 0; k < wf_size_j; k++, p_tmp++) {
154 double w = wf_j[wf_size_j * j + k];
155
156 sum.x += p_tmp->x * w;
157 sum.y += p_tmp->y * w;
158 sum.z += p_tmp->z * w;
159 sum.a += p_tmp->a * w;
160 sum.b += p_tmp->b * w;
161 sum.c += p_tmp->c * w;
162 }
163 *p_lmo_ds = sum;
164 } }
165 }
166
167 static void
add_six_vec(size_t el,size_t size,const double * vec,six_t * six)168 add_six_vec(size_t el, size_t size, const double *vec, six_t *six)
169 {
170 double *ptr = (double *)six + el;
171
172 for (size_t i = 0; i < size; i++, vec++, ptr += 6)
173 *ptr += *vec;
174 }
175
176 /*
177 * Reference:
178 *
179 * Hui Li, Mark Gordon
180 *
181 * Gradients of the exchange-repulsion energy in the general effective fragment
182 * potential method
183 *
184 * Theor. Chem. Acc. 115, 385 (2006)
185 */
186 static void
lmo_lmo_xr_grad(struct efp * efp,size_t fr_i_idx,size_t fr_j_idx,size_t i,size_t j,const double * lmo_s,const double * lmo_t,const six_t * lmo_ds,const six_t * lmo_dt,const struct swf * swf)187 lmo_lmo_xr_grad(struct efp *efp, size_t fr_i_idx, size_t fr_j_idx,
188 size_t i, size_t j, const double *lmo_s, const double *lmo_t,
189 const six_t *lmo_ds, const six_t *lmo_dt, const struct swf *swf)
190 {
191 const struct frag *fr_i = efp->frags + fr_i_idx;
192 const struct frag *fr_j = efp->frags + fr_j_idx;
193 const vec_t *ct_i = fr_i->lmo_centroids + i;
194 const vec_t *ct_j = fr_j->lmo_centroids + j;
195
196 size_t ij = i * fr_j->n_lmo + j;
197
198 vec_t dr = {
199 ct_j->x - ct_i->x - swf->cell.x,
200 ct_j->y - ct_i->y - swf->cell.y,
201 ct_j->z - ct_i->z - swf->cell.z
202 };
203
204 double s_ij = lmo_s[ij];
205 double t_ij = lmo_t[ij];
206 double r_ij = vec_len(&dr);
207 double r_ij3 = r_ij * r_ij * r_ij;
208
209 six_t ds_ij = lmo_ds[ij];
210 six_t dt_ij = lmo_dt[ij];
211
212 double t1, t2;
213 vec_t force = vec_zero, torque_i = vec_zero;
214
215 /* first part */
216 if (fabs(s_ij) > INTEGRAL_THRESHOLD) {
217 double ln_s = log(fabs(s_ij));
218
219 t1 = s_ij / r_ij * (-sqrt(-2.0 / PI / ln_s) +
220 4.0 * sqrt(-2.0 / PI * ln_s));
221 t2 = 2.0 * sqrt(-2.0 / PI * ln_s) * s_ij * s_ij / r_ij3;
222
223 force.x += -t1 * ds_ij.x - t2 * dr.x;
224 force.y += -t1 * ds_ij.y - t2 * dr.y;
225 force.z += -t1 * ds_ij.z - t2 * dr.z;
226
227 torque_i.x += t1 * ds_ij.a + t2 * (dr.y * (ct_i->z - fr_i->z) -
228 dr.z * (ct_i->y - fr_i->y));
229 torque_i.y += t1 * ds_ij.b + t2 * (dr.z * (ct_i->x - fr_i->x) -
230 dr.x * (ct_i->z - fr_i->z));
231 torque_i.z += t1 * ds_ij.c + t2 * (dr.x * (ct_i->y - fr_i->y) -
232 dr.y * (ct_i->x - fr_i->x));
233 }
234
235 /* second part */
236 double fij = 0.0, fji = 0.0;
237 six_t dfij = six_zero, dfji = six_zero;
238
239 for (size_t k = 0; k < fr_i->n_lmo; k++) {
240 double fe = fr_i->xr_fock_mat[fock_idx(i, k)];
241 const six_t *ds = lmo_ds + k * fr_j->n_lmo + j;
242
243 fij += fe * lmo_s[k * fr_j->n_lmo + j];
244
245 dfij.x += fe * ds->x;
246 dfij.y += fe * ds->y;
247 dfij.z += fe * ds->z;
248 dfij.a += fe * ds->a;
249 dfij.b += fe * ds->b;
250 dfij.c += fe * ds->c;
251 }
252
253 for (size_t l = 0; l < fr_j->n_lmo; l++) {
254 double fe = fr_j->xr_fock_mat[fock_idx(j, l)];
255 const six_t *ds = lmo_ds + i * fr_j->n_lmo + l;
256
257 fji += fe * lmo_s[i * fr_j->n_lmo + l];
258
259 dfji.x += fe * ds->x;
260 dfji.y += fe * ds->y;
261 dfji.z += fe * ds->z;
262 dfji.a += fe * ds->a;
263 dfji.b += fe * ds->b;
264 dfji.c += fe * ds->c;
265 }
266
267 t1 = (fij + fji - 2.0 * t_ij);
268
269 force.x += -t1 * ds_ij.x - s_ij * (dfij.x + dfji.x - 2.0 * dt_ij.x);
270 force.y += -t1 * ds_ij.y - s_ij * (dfij.y + dfji.y - 2.0 * dt_ij.y);
271 force.z += -t1 * ds_ij.z - s_ij * (dfij.z + dfji.z - 2.0 * dt_ij.z);
272
273 torque_i.x += t1 * ds_ij.a + s_ij * (dfij.a + dfji.a - 2.0 * dt_ij.a);
274 torque_i.y += t1 * ds_ij.b + s_ij * (dfij.b + dfji.b - 2.0 * dt_ij.b);
275 torque_i.z += t1 * ds_ij.c + s_ij * (dfij.c + dfji.c - 2.0 * dt_ij.c);
276
277 /* third part */
278 double vib = 0.0, vja = 0.0;
279 six_t dvib = six_zero, dvja = six_zero;
280
281 for (size_t l = 0; l < fr_j->n_xr_atoms; l++) {
282 struct xr_atom *at_j = fr_j->xr_atoms + l;
283
284 vec_t dr_a = {
285 at_j->x - ct_i->x - swf->cell.x,
286 at_j->y - ct_i->y - swf->cell.y,
287 at_j->z - ct_i->z - swf->cell.z
288 };
289
290 double r = vec_len(&dr_a);
291 double tmp = at_j->znuc / (r * r * r);
292
293 vib -= at_j->znuc / r;
294
295 dvib.x -= tmp * dr_a.x;
296 dvib.y -= tmp * dr_a.y;
297 dvib.z -= tmp * dr_a.z;
298
299 dvib.a -= tmp * (dr_a.y * (ct_i->z - fr_i->z) -
300 dr_a.z * (ct_i->y - fr_i->y));
301 dvib.b -= tmp * (dr_a.z * (ct_i->x - fr_i->x) -
302 dr_a.x * (ct_i->z - fr_i->z));
303 dvib.c -= tmp * (dr_a.x * (ct_i->y - fr_i->y) -
304 dr_a.y * (ct_i->x - fr_i->x));
305 }
306
307 for (size_t l = 0; l < fr_j->n_lmo; l++) {
308 vec_t *ct_jj = fr_j->lmo_centroids + l;
309
310 vec_t dr_a = {
311 ct_jj->x - ct_i->x - swf->cell.x,
312 ct_jj->y - ct_i->y - swf->cell.y,
313 ct_jj->z - ct_i->z - swf->cell.z
314 };
315
316 double r = vec_len(&dr_a);
317 double tmp = 2.0 / (r * r * r);
318
319 vib += 2.0 / r;
320
321 dvib.x += tmp * dr_a.x;
322 dvib.y += tmp * dr_a.y;
323 dvib.z += tmp * dr_a.z;
324
325 dvib.a += tmp * (dr_a.y * (ct_i->z - fr_i->z) -
326 dr_a.z * (ct_i->y - fr_i->y));
327 dvib.b += tmp * (dr_a.z * (ct_i->x - fr_i->x) -
328 dr_a.x * (ct_i->z - fr_i->z));
329 dvib.c += tmp * (dr_a.x * (ct_i->y - fr_i->y) -
330 dr_a.y * (ct_i->x - fr_i->x));
331 }
332
333 for (size_t k = 0; k < fr_i->n_xr_atoms; k++) {
334 struct xr_atom *at_i = fr_i->xr_atoms + k;
335
336 vec_t dr_a = {
337 ct_j->x - at_i->x - swf->cell.x,
338 ct_j->y - at_i->y - swf->cell.y,
339 ct_j->z - at_i->z - swf->cell.z
340 };
341
342 double r = vec_len(&dr_a);
343 double tmp = at_i->znuc / (r * r * r);
344
345 vja -= at_i->znuc / r;
346
347 dvja.x -= tmp * dr_a.x;
348 dvja.y -= tmp * dr_a.y;
349 dvja.z -= tmp * dr_a.z;
350
351 dvja.a -= tmp * (dr_a.y * (at_i->z - fr_i->z) -
352 dr_a.z * (at_i->y - fr_i->y));
353 dvja.b -= tmp * (dr_a.z * (at_i->x - fr_i->x) -
354 dr_a.x * (at_i->z - fr_i->z));
355 dvja.c -= tmp * (dr_a.x * (at_i->y - fr_i->y) -
356 dr_a.y * (at_i->x - fr_i->x));
357 }
358
359 for (size_t k = 0; k < fr_i->n_lmo; k++) {
360 vec_t *ct_ii = fr_i->lmo_centroids + k;
361
362 vec_t dr_a = {
363 ct_j->x - ct_ii->x - swf->cell.x,
364 ct_j->y - ct_ii->y - swf->cell.y,
365 ct_j->z - ct_ii->z - swf->cell.z
366 };
367
368 double r = vec_len(&dr_a);
369 double tmp = 2.0 / (r * r * r);
370
371 vja += 2.0 / r;
372
373 dvja.x += tmp * dr_a.x;
374 dvja.y += tmp * dr_a.y;
375 dvja.z += tmp * dr_a.z;
376
377 dvja.a += tmp * (dr_a.y * (ct_ii->z - fr_i->z) -
378 dr_a.z * (ct_ii->y - fr_i->y));
379 dvja.b += tmp * (dr_a.z * (ct_ii->x - fr_i->x) -
380 dr_a.x * (ct_ii->z - fr_i->z));
381 dvja.c += tmp * (dr_a.x * (ct_ii->y - fr_i->y) -
382 dr_a.y * (ct_ii->x - fr_i->x));
383 }
384
385 t1 = 2.0 * s_ij * (vib + vja - 1.0 / r_ij);
386
387 force.x += t1 * ds_ij.x +
388 s_ij * s_ij * (dvib.x + dvja.x - dr.x / r_ij3);
389 force.y += t1 * ds_ij.y +
390 s_ij * s_ij * (dvib.y + dvja.y - dr.y / r_ij3);
391 force.z += t1 * ds_ij.z +
392 s_ij * s_ij * (dvib.z + dvja.z - dr.z / r_ij3);
393
394 torque_i.x += -t1 * ds_ij.a - s_ij * s_ij * (dvib.a + dvja.a -
395 (dr.y * (ct_i->z - fr_i->z) - dr.z * (ct_i->y - fr_i->y)) / r_ij3);
396 torque_i.y += -t1 * ds_ij.b - s_ij * s_ij * (dvib.b + dvja.b -
397 (dr.z * (ct_i->x - fr_i->x) - dr.x * (ct_i->z - fr_i->z)) / r_ij3);
398 torque_i.z += -t1 * ds_ij.c - s_ij * s_ij * (dvib.c + dvja.c -
399 (dr.x * (ct_i->y - fr_i->y) - dr.y * (ct_i->x - fr_i->x)) / r_ij3);
400
401 force.x *= 2.0 * swf->swf;
402 force.y *= 2.0 * swf->swf;
403 force.z *= 2.0 * swf->swf;
404
405 torque_i.x *= 2.0 * swf->swf;
406 torque_i.y *= 2.0 * swf->swf;
407 torque_i.z *= 2.0 * swf->swf;
408
409 vec_t torque_j = {
410 torque_i.x + force.y * (fr_j->z - fr_i->z - swf->cell.z) -
411 force.z * (fr_j->y - fr_i->y - swf->cell.y),
412 torque_i.y + force.z * (fr_j->x - fr_i->x - swf->cell.x) -
413 force.x * (fr_j->z - fr_i->z - swf->cell.z),
414 torque_i.z + force.x * (fr_j->y - fr_i->y - swf->cell.y) -
415 force.y * (fr_j->x - fr_i->x - swf->cell.x)
416 };
417
418 six_atomic_add_xyz(efp->grad + fr_i_idx, &force);
419 six_atomic_sub_xyz(efp->grad + fr_j_idx, &force);
420 six_atomic_add_abc(efp->grad + fr_i_idx, &torque_i);
421 six_atomic_sub_abc(efp->grad + fr_j_idx, &torque_j);
422
423 efp_add_stress(&swf->dr, &force, &efp->stress);
424 }
425
426 static double
lmo_lmo_xr_energy(struct frag * fr_i,struct frag * fr_j,size_t i,size_t j,const double * lmo_s,const double * lmo_t,const struct swf * swf)427 lmo_lmo_xr_energy(struct frag *fr_i, struct frag *fr_j, size_t i, size_t j,
428 const double *lmo_s, const double *lmo_t, const struct swf *swf)
429 {
430 double s_ij = lmo_s[i * fr_j->n_lmo + j];
431 double t_ij = lmo_t[i * fr_j->n_lmo + j];
432
433 const vec_t *ct_i = fr_i->lmo_centroids + i;
434 const vec_t *ct_j = fr_j->lmo_centroids + j;
435
436 vec_t dr = {
437 ct_j->x - ct_i->x - swf->cell.x,
438 ct_j->y - ct_i->y - swf->cell.y,
439 ct_j->z - ct_i->z - swf->cell.z
440 };
441
442 double r_ij = vec_len(&dr);
443 double exr = 0.0;
444
445 /* xr - first part */
446 if (fabs(s_ij) > INTEGRAL_THRESHOLD) {
447 exr += -2.0 * sqrt(-2.0 * log(fabs(s_ij)) / PI) *
448 s_ij * s_ij / r_ij;
449 }
450
451 /* xr - second part */
452 for (size_t k = 0; k < fr_i->n_lmo; k++) {
453 exr -= s_ij * lmo_s[k * fr_j->n_lmo + j] *
454 fr_i->xr_fock_mat[fock_idx(i, k)];
455 }
456 for (size_t l = 0; l < fr_j->n_lmo; l++) {
457 exr -= s_ij * lmo_s[i * fr_j->n_lmo + l] *
458 fr_j->xr_fock_mat[fock_idx(j, l)];
459 }
460 exr += 2.0 * s_ij * t_ij;
461
462 /* xr - third part */
463 for (size_t jj = 0; jj < fr_j->n_xr_atoms; jj++) {
464 struct xr_atom *at_j = fr_j->xr_atoms + jj;
465
466 vec_t dr_a = {
467 at_j->x - ct_i->x - swf->cell.x,
468 at_j->y - ct_i->y - swf->cell.y,
469 at_j->z - ct_i->z - swf->cell.z
470 };
471
472 double r = vec_len(&dr_a);
473
474 exr -= s_ij * s_ij * at_j->znuc / r;
475 }
476 for (size_t jj = 0; jj < fr_j->n_lmo; jj++) {
477 const vec_t *ct_jj = fr_j->lmo_centroids + jj;
478
479 vec_t dr_a = {
480 ct_jj->x - ct_i->x - swf->cell.x,
481 ct_jj->y - ct_i->y - swf->cell.y,
482 ct_jj->z - ct_i->z - swf->cell.z
483 };
484
485 double r = vec_len(&dr_a);
486 exr += 2.0 * s_ij * s_ij / r;
487 }
488 for (size_t ii = 0; ii < fr_i->n_xr_atoms; ii++) {
489 struct xr_atom *at_i = fr_i->xr_atoms + ii;
490
491 vec_t dr_a = {
492 ct_j->x - at_i->x - swf->cell.x,
493 ct_j->y - at_i->y - swf->cell.y,
494 ct_j->z - at_i->z - swf->cell.z
495 };
496
497 double r = vec_len(&dr_a);
498 exr -= s_ij * s_ij * at_i->znuc / r;
499 }
500 for (size_t ii = 0; ii < fr_i->n_lmo; ii++) {
501 const vec_t *ct_ii = fr_i->lmo_centroids + ii;
502
503 vec_t dr_a = {
504 ct_j->x - ct_ii->x - swf->cell.x,
505 ct_j->y - ct_ii->y - swf->cell.y,
506 ct_j->z - ct_ii->z - swf->cell.z
507 };
508
509 double r = vec_len(&dr_a);
510 exr += 2.0 * s_ij * s_ij / r;
511 }
512 exr -= s_ij * s_ij / r_ij;
513
514 return 2.0 * exr;
515 }
516
517 void
efp_frag_frag_xr(struct efp * efp,size_t frag_i,size_t frag_j,double * lmo_s,six_t * lmo_ds,double * exr_out,double * ecp_out)518 efp_frag_frag_xr(struct efp *efp, size_t frag_i, size_t frag_j, double *lmo_s,
519 six_t *lmo_ds, double *exr_out, double *ecp_out)
520 {
521 struct frag *fr_i = efp->frags + frag_i;
522 struct frag *fr_j = efp->frags + frag_j;
523
524 size_t ij_wf_size = fr_i->xr_wf_size * fr_j->xr_wf_size;
525 size_t ij_nlmo = fr_i->n_lmo * fr_j->n_lmo;
526 size_t ij_nlmo_wf_size = fr_i->n_lmo * fr_j->xr_wf_size;
527 double *s = (double *)malloc(ij_wf_size * sizeof(double));
528 double *t = (double *)malloc(ij_wf_size * sizeof(double));
529 double *lmo_t = (double *)malloc(ij_nlmo * sizeof(double));
530 double *tmp = (double *)malloc(ij_nlmo_wf_size * sizeof(double));
531 struct xr_atom *atoms_j = (struct xr_atom *)malloc(
532 fr_j->n_xr_atoms * sizeof(struct xr_atom));
533 struct swf swf = efp_make_swf(efp, fr_i, fr_j);
534
535 for (size_t j = 0; j < fr_j->n_xr_atoms; j++) {
536 atoms_j[j] = fr_j->xr_atoms[j];
537 atoms_j[j].x -= swf.cell.x;
538 atoms_j[j].y -= swf.cell.y;
539 atoms_j[j].z -= swf.cell.z;
540 }
541
542 efp_st_int(fr_i->n_xr_atoms, fr_i->xr_atoms,
543 fr_j->n_xr_atoms, atoms_j,
544 fr_j->xr_wf_size, s, t);
545
546 transform_integrals(fr_i->n_lmo, fr_j->n_lmo,
547 fr_i->xr_wf_size, fr_j->xr_wf_size,
548 fr_i->xr_wf, fr_j->xr_wf,
549 s, lmo_s, tmp);
550 transform_integrals(fr_i->n_lmo, fr_j->n_lmo,
551 fr_i->xr_wf_size, fr_j->xr_wf_size,
552 fr_i->xr_wf, fr_j->xr_wf,
553 t, lmo_t, tmp);
554
555 double exr = 0.0;
556 double ecp = 0.0;
557
558 for (size_t i = 0, idx = 0; i < fr_i->n_lmo; i++) {
559 for (size_t j = 0; j < fr_j->n_lmo; j++, idx++) {
560 double s_ij = lmo_s[i * fr_j->n_lmo + j];
561
562 vec_t dr = {
563 fr_j->lmo_centroids[j].x -
564 fr_i->lmo_centroids[i].x - swf.cell.x,
565 fr_j->lmo_centroids[j].y -
566 fr_i->lmo_centroids[i].y - swf.cell.y,
567 fr_j->lmo_centroids[j].z -
568 fr_i->lmo_centroids[i].z - swf.cell.z
569 };
570
571 double r_ij = vec_len(&dr);
572
573 if ((efp->opts.terms & EFP_TERM_ELEC) &&
574 (efp->opts.elec_damp == EFP_ELEC_DAMP_OVERLAP))
575 ecp += charge_penetration_energy(s_ij, r_ij);
576 if (efp->opts.terms & EFP_TERM_XR)
577 exr += lmo_lmo_xr_energy(fr_i, fr_j, i, j,
578 lmo_s, lmo_t, &swf);
579 }
580 }
581
582 *exr_out = exr * swf.swf;
583 *ecp_out = ecp * swf.swf;
584
585 if (!efp->do_gradient) {
586 free(s);
587 free(t);
588 free(lmo_t);
589 free(tmp);
590 free(atoms_j);
591 return;
592 }
593
594 /* compute gradient */
595
596 six_t *ds = (six_t *)malloc(ij_wf_size * sizeof(six_t));
597 six_t *dt = (six_t *)malloc(ij_wf_size * sizeof(six_t));
598 six_t *lmo_dt = (six_t *)malloc(ij_nlmo * sizeof(six_t));
599 six_t *sixtmp = (six_t *)malloc(ij_nlmo_wf_size * sizeof(six_t));
600 double *lmo_tmp = (double *)malloc(ij_nlmo * sizeof(double));
601
602 efp_st_int_deriv(fr_i->n_xr_atoms, fr_i->xr_atoms,
603 fr_j->n_xr_atoms, atoms_j,
604 VEC(fr_i->x), fr_i->xr_wf_size, fr_j->xr_wf_size,
605 ds, dt);
606
607 transform_integral_derivatives(fr_i->n_lmo, fr_j->n_lmo,
608 fr_i->xr_wf_size, fr_j->xr_wf_size,
609 fr_i->xr_wf, fr_j->xr_wf,
610 ds, lmo_ds, sixtmp);
611 transform_integral_derivatives(fr_i->n_lmo, fr_j->n_lmo,
612 fr_i->xr_wf_size, fr_j->xr_wf_size,
613 fr_i->xr_wf, fr_j->xr_wf,
614 dt, lmo_dt, sixtmp);
615
616 for (size_t a = 0; a < 3; a++) {
617 transform_integrals(fr_i->n_lmo, fr_j->n_lmo,
618 fr_i->xr_wf_size, fr_j->xr_wf_size,
619 fr_i->xr_wf_deriv[a], fr_j->xr_wf,
620 s, lmo_tmp, tmp);
621 add_six_vec(3 + a, fr_i->n_lmo * fr_j->n_lmo, lmo_tmp, lmo_ds);
622
623 transform_integrals(fr_i->n_lmo, fr_j->n_lmo,
624 fr_i->xr_wf_size, fr_j->xr_wf_size,
625 fr_i->xr_wf_deriv[a], fr_j->xr_wf,
626 t, lmo_tmp, tmp);
627 add_six_vec(3 + a, fr_i->n_lmo * fr_j->n_lmo, lmo_tmp, lmo_dt);
628 }
629
630 for (size_t i = 0, idx = 0; i < fr_i->n_lmo; i++) {
631 for (size_t j = 0; j < fr_j->n_lmo; j++, idx++) {
632 size_t ij = i * fr_j->n_lmo + j;
633
634 if ((efp->opts.terms & EFP_TERM_ELEC) &&
635 (efp->opts.elec_damp == EFP_ELEC_DAMP_OVERLAP))
636 charge_penetration_grad(efp, frag_i, frag_j,
637 i, j, lmo_s[ij], lmo_ds[ij], &swf);
638 if (efp->opts.terms & EFP_TERM_XR)
639 lmo_lmo_xr_grad(efp, frag_i, frag_j, i, j,
640 lmo_s, lmo_t, lmo_ds, lmo_dt, &swf);
641 }
642 }
643
644 vec_t force = {
645 swf.dswf.x * (exr + ecp),
646 swf.dswf.y * (exr + ecp),
647 swf.dswf.z * (exr + ecp)
648 };
649
650 six_atomic_add_xyz(efp->grad + frag_i, &force);
651 six_atomic_sub_xyz(efp->grad + frag_j, &force);
652 efp_add_stress(&swf.dr, &force, &efp->stress);
653
654 free(s);
655 free(ds);
656 free(t);
657 free(dt);
658 free(lmo_t);
659 free(lmo_dt);
660 free(lmo_tmp);
661 free(tmp);
662 free(sixtmp);
663 free(atoms_j);
664 }
665
666 static inline size_t
func_d_idx(size_t a,size_t b)667 func_d_idx(size_t a, size_t b)
668 {
669 /* order in which GAMESS stores D functions */
670 enum { xx = 0, yy, zz, xy, xz, yz };
671
672 static const size_t idx[] = {
673 xx, xy, xz, xy, yy, yz, xz, yz, zz
674 };
675
676 return idx[3 * a + b];
677 }
678
679 static inline size_t
func_f_idx(size_t a,size_t b,size_t c)680 func_f_idx(size_t a, size_t b, size_t c)
681 {
682 /* order in which GAMESS stores F functions */
683 enum { xxx = 0, yyy, zzz, xxy, xxz, xyy, yyz, xzz, yzz, xyz };
684
685 static const size_t idx[] = {
686 xxx, xxy, xxz, xxy, xyy, xyz, xxz, xyz, xzz,
687 xxy, xyy, xyz, xyy, yyy, yyz, xyz, yyz, yzz,
688 xxz, xyz, xzz, xyz, yyz, yzz, xzz, yzz, zzz
689 };
690
691 return idx[9 * a + 3 * b + c];
692 }
693
694 static void
rotate_func_d(const mat_t * rotmat,const double * in,double * out)695 rotate_func_d(const mat_t *rotmat, const double *in, double *out)
696 {
697 const double norm = sqrt(3.0) / 2.0;
698
699 double full_in[9], full_out[9];
700
701 for (size_t a = 0; a < 3; a++) {
702 for (size_t b = 0; b < 3; b++) {
703 full_in[3 * a + b] = in[func_d_idx(a, b)];
704 if (a != b)
705 full_in[3 * a + b] *= norm;
706 }
707 }
708
709 efp_rotate_t2(rotmat, full_in, full_out);
710
711 for (size_t a = 0; a < 3; a++) {
712 for (size_t b = 0; b < 3; b++) {
713 if (a != b)
714 full_out[3 * a + b] /= norm;
715 out[func_d_idx(a, b)] = full_out[3 * a + b];
716 }
717 }
718 }
719
720 static void
rotate_func_f(const mat_t * rotmat,const double * in,double * out)721 rotate_func_f(const mat_t *rotmat, const double *in, double *out)
722 {
723 const double norm1 = sqrt(5.0) / 3.0;
724 const double norm2 = sqrt(3.0) / 2.0;
725
726 double full_in[27], full_out[27];
727
728 for (size_t a = 0; a < 3; a++)
729 for (size_t b = 0; b < 3; b++)
730 for (size_t c = 0; c < 3; c++) {
731 size_t full_idx = 9 * a + 3 * b + c;
732 size_t in_idx = func_f_idx(a, b, c);
733
734 full_in[full_idx] = in[in_idx];
735
736 if (a != b || a != c)
737 full_in[full_idx] *= norm1;
738
739 if (a != b && a != c && b != c)
740 full_in[full_idx] *= norm2;
741 }
742
743 efp_rotate_t3(rotmat, full_in, full_out);
744
745 for (size_t a = 0; a < 3; a++)
746 for (size_t b = 0; b < 3; b++)
747 for (size_t c = 0; c < 3; c++) {
748 size_t full_idx = 9 * a + 3 * b + c;
749 size_t out_idx = func_f_idx(a, b, c);
750
751 if (a != b || a != c)
752 full_out[full_idx] /= norm1;
753
754 if (a != b && a != c && b != c)
755 full_out[full_idx] /= norm2;
756
757 out[out_idx] = full_out[full_idx];
758 }
759 }
760
761 static void
coef_deriv_p(size_t axis,const double * coef,double * der)762 coef_deriv_p(size_t axis, const double *coef, double *der)
763 {
764 switch (axis) {
765 case 0:
766 der[0] = 0.0;
767 der[1] = coef[2];
768 der[2] = -coef[1];
769 break;
770 case 1:
771 der[0] = -coef[2];
772 der[1] = 0.0;
773 der[2] = coef[0];
774 break;
775 case 2:
776 der[0] = coef[1];
777 der[1] = -coef[0];
778 der[2] = 0.0;
779 break;
780 };
781 }
782
783 static void
coef_deriv_d(size_t axis,const double * coef,double * der)784 coef_deriv_d(size_t axis, const double *coef, double *der)
785 {
786 const double sqrt3 = sqrt(3.0);
787
788 switch (axis) {
789 case 0:
790 der[0] = 0.0;
791 der[1] = sqrt3 * coef[5];
792 der[2] = -sqrt3 * coef[5];
793 der[3] = coef[4];
794 der[4] = -coef[3];
795 der[5] = 2.0 / sqrt3 * (coef[2] - coef[1]);
796 break;
797 case 1:
798 der[0] = -sqrt3 * coef[4];
799 der[1] = 0.0;
800 der[2] = sqrt3 * coef[4];
801 der[3] = -coef[5];
802 der[4] = 2.0 / sqrt3 * (coef[0] - coef[2]);
803 der[5] = coef[3];
804 break;
805 case 2:
806 der[0] = sqrt3 * coef[3];
807 der[1] = -sqrt3 * coef[3];
808 der[2] = 0.0;
809 der[3] = 2.0 / sqrt3 * (coef[1] - coef[0]);
810 der[4] = coef[5];
811 der[5] = -coef[4];
812 break;
813 };
814 }
815
816 static void
coef_deriv_f(size_t axis,const double * coef,double * der)817 coef_deriv_f(size_t axis, const double *coef, double *der)
818 {
819 const double sqrt3 = sqrt(3.0);
820 const double sqrt5 = sqrt(5.0);
821
822 switch (axis) {
823 case 0:
824 der[0] = 0.0;
825 der[1] = sqrt5 * coef[6];
826 der[2] = -sqrt5 * coef[8];
827 der[3] = coef[4];
828 der[4] = -coef[3];
829 der[5] = sqrt3 * coef[9];
830 der[6] = -3.0 / sqrt5 * coef[1] + 2.0 * coef[8];
831 der[7] = -sqrt3 * coef[9];
832 der[8] = 3.0 / sqrt5 * coef[2] - 2.0 * coef[6];
833 der[9] = 2.0 / sqrt3 * (coef[7] - coef[5]);
834 break;
835 case 1:
836 der[0] = -sqrt5 * coef[4];
837 der[1] = 0.0;
838 der[2] = sqrt5 * coef[7];
839 der[3] = -sqrt3 * coef[9];
840 der[4] = 3.0 / sqrt5 * coef[0] - 2.0 * coef[7];
841 der[5] = -coef[6];
842 der[6] = coef[5];
843 der[7] = -3.0 / sqrt5 * coef[2] + 2.0 * coef[4];
844 der[8] = sqrt3 * coef[9];
845 der[9] = 2.0 / sqrt3 * (coef[3] - coef[8]);
846 break;
847 case 2:
848 der[0] = sqrt5 * coef[3];
849 der[1] = -sqrt5 * coef[5];
850 der[2] = 0.0;
851 der[3] = -3.0 / sqrt5 * coef[0] + 2.0 * coef[5];
852 der[4] = sqrt3 * coef[9];
853 der[5] = 3.0 / sqrt5 * coef[1] - 2.0 * coef[3];
854 der[6] = -sqrt3 * coef[9];
855 der[7] = coef[8];
856 der[8] = -coef[7];
857 der[9] = 2.0 / sqrt3 * (coef[6] - coef[4]);
858 break;
859 };
860 }
861
862 void
efp_update_xr(struct frag * frag)863 efp_update_xr(struct frag *frag)
864 {
865 const mat_t *rotmat = &frag->rotmat;
866
867 /* update LMO centroids */
868 for (size_t i = 0; i < frag->n_lmo; i++) {
869 efp_move_pt(CVEC(frag->x), rotmat,
870 frag->lib->lmo_centroids + i, frag->lmo_centroids + i);
871 }
872 /* update xr atoms */
873 for (size_t i = 0; i < frag->n_xr_atoms; i++) {
874 efp_move_pt(CVEC(frag->x), rotmat,
875 CVEC(frag->lib->xr_atoms[i].x), VEC(frag->xr_atoms[i].x));
876 }
877 /* rotate wavefunction */
878 for (size_t k = 0; k < frag->n_lmo; k++) {
879 double *deriv[3];
880
881 for (size_t a = 0; a < 3; a++)
882 deriv[a] = frag->xr_wf_deriv[a] + k * frag->xr_wf_size;
883
884 const double *in = frag->lib->xr_wf + k * frag->xr_wf_size;
885 double *out = frag->xr_wf + k * frag->xr_wf_size;
886
887 for (size_t j = 0, func = 0; j < frag->n_xr_atoms; j++) {
888 const struct xr_atom *atom = frag->xr_atoms + j;
889
890 for (size_t i = 0; i < atom->n_shells; i++) {
891 switch (atom->shells[i].type) {
892 case 'S':
893 func++;
894 break;
895 case 'L':
896 func++;
897 /* fall through */
898 case 'P': {
899 vec_t r = mat_vec(rotmat,
900 (const vec_t *)(in + func));
901 out[func + 0] = r.x;
902 out[func + 1] = r.y;
903 out[func + 2] = r.z;
904 for (size_t a = 0; a < 3; a++) {
905 coef_deriv_p(a, out + func,
906 deriv[a] + func);
907 }
908 func += 3;
909 break;
910 }
911 case 'D':
912 rotate_func_d(rotmat, in + func,
913 out + func);
914 for (size_t a = 0; a < 3; a++) {
915 coef_deriv_d(a, out + func,
916 deriv[a] + func);
917 }
918 func += 6;
919 break;
920 case 'F':
921 rotate_func_f(rotmat, in + func,
922 out + func);
923 for (size_t a = 0; a < 3; a++) {
924 coef_deriv_f(a, out + func,
925 deriv[a] + func);
926 }
927 func += 10;
928 break;
929 }
930 }
931 }
932 }
933 }
934