1 /*-
2  * Copyright (c) 2012-2017 Ilya Kaliman
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "clapack.h"
31 #include "private.h"
32 
33 #define INTEGRAL_THRESHOLD 1.0e-7
34 
35 static inline size_t
fock_idx(size_t i,size_t j)36 fock_idx(size_t i, size_t j)
37 {
38 	return i < j ? (j * (j + 1) / 2 + i) : (i * (i + 1) / 2 + j);
39 }
40 
41 static double
charge_penetration_energy(double s_ij,double r_ij)42 charge_penetration_energy(double s_ij, double r_ij)
43 {
44 	double ln_s;
45 
46 	if (fabs(s_ij) < INTEGRAL_THRESHOLD)
47 		return 0.0;
48 
49 	ln_s = log(fabs(s_ij));
50 
51 	return -s_ij * s_ij / r_ij / sqrt(-2.0 * ln_s);
52 }
53 
54 static void
charge_penetration_grad(struct efp * efp,size_t fr_i_idx,size_t fr_j_idx,size_t lmo_i_idx,size_t lmo_j_idx,double s_ij,const six_t ds_ij,const struct swf * swf)55 charge_penetration_grad(struct efp *efp, size_t fr_i_idx, size_t fr_j_idx,
56     size_t lmo_i_idx, size_t lmo_j_idx, double s_ij, const six_t ds_ij,
57     const struct swf *swf)
58 {
59 	if (fabs(s_ij) < INTEGRAL_THRESHOLD)
60 		return;
61 
62 	const struct frag *fr_i = efp->frags + fr_i_idx;
63 	const struct frag *fr_j = efp->frags + fr_j_idx;
64 	const vec_t *ct_i = fr_i->lmo_centroids + lmo_i_idx;
65 	const vec_t *ct_j = fr_j->lmo_centroids + lmo_j_idx;
66 
67 	vec_t dr = {
68 		ct_j->x - ct_i->x - swf->cell.x,
69 		ct_j->y - ct_i->y - swf->cell.y,
70 		ct_j->z - ct_i->z - swf->cell.z
71 	};
72 
73 	double r_ij = vec_len(&dr);
74 	double ln_s = log(fabs(s_ij));
75 	double t1 = s_ij * s_ij / (r_ij * r_ij * r_ij) / sqrt(-2.0 * ln_s);
76 	double t2 = -s_ij / r_ij / sqrt(2.0) * (2.0 / sqrt(-ln_s) +
77 	    0.5 / sqrt(-ln_s * ln_s * ln_s));
78 
79 	vec_t force, torque_i, torque_j;
80 
81 	force.x = (t2 * ds_ij.x - t1 * dr.x) * swf->swf;
82 	force.y = (t2 * ds_ij.y - t1 * dr.y) * swf->swf;
83 	force.z = (t2 * ds_ij.z - t1 * dr.z) * swf->swf;
84 
85 	torque_i.x = swf->swf * (-t2 * ds_ij.a +
86 	    t1 * (dr.y * (ct_i->z - fr_i->z) - dr.z * (ct_i->y - fr_i->y)));
87 	torque_i.y = swf->swf * (-t2 * ds_ij.b +
88 	    t1 * (dr.z * (ct_i->x - fr_i->x) - dr.x * (ct_i->z - fr_i->z)));
89 	torque_i.z = swf->swf * (-t2 * ds_ij.c +
90 	    t1 * (dr.x * (ct_i->y - fr_i->y) - dr.y * (ct_i->x - fr_i->x)));
91 
92 	torque_j.x = torque_i.x + force.y * (fr_j->z - fr_i->z - swf->cell.z) -
93 				  force.z * (fr_j->y - fr_i->y - swf->cell.y);
94 	torque_j.y = torque_i.y + force.z * (fr_j->x - fr_i->x - swf->cell.x) -
95 				  force.x * (fr_j->z - fr_i->z - swf->cell.z);
96 	torque_j.z = torque_i.z + force.x * (fr_j->y - fr_i->y - swf->cell.y) -
97 				  force.y * (fr_j->x - fr_i->x - swf->cell.x);
98 
99 	six_atomic_add_xyz(efp->grad + fr_i_idx, &force);
100 	six_atomic_sub_xyz(efp->grad + fr_j_idx, &force);
101 	six_atomic_add_abc(efp->grad + fr_i_idx, &torque_i);
102 	six_atomic_sub_abc(efp->grad + fr_j_idx, &torque_j);
103 
104 	efp_add_stress(&swf->dr, &force, &efp->stress);
105 }
106 
107 static void
transform_integrals(size_t n_lmo_i,size_t n_lmo_j,size_t wf_size_i,size_t wf_size_j,double * wf_i,double * wf_j,double * s,double * lmo_s,double * tmp)108 transform_integrals(size_t n_lmo_i, size_t n_lmo_j, size_t wf_size_i,
109     size_t wf_size_j, double *wf_i, double *wf_j, double *s, double *lmo_s,
110     double *tmp)
111 {
112 	efp_dgemm('N', 'N', (fortranint_t)wf_size_j, (fortranint_t)n_lmo_i,
113 	    (fortranint_t)wf_size_i, 1.0, s, (fortranint_t)wf_size_j, wf_i,
114 	    (fortranint_t)wf_size_i, 0.0, tmp, (fortranint_t)wf_size_j);
115 	efp_dgemm('T', 'N', (fortranint_t)n_lmo_j, (fortranint_t)n_lmo_i,
116 	    (fortranint_t)wf_size_j, 1.0, wf_j, (fortranint_t)wf_size_j, tmp,
117 	    (fortranint_t)wf_size_j, 0.0, lmo_s, (fortranint_t)n_lmo_j);
118 }
119 
120 static void
transform_integral_derivatives(size_t n_lmo_i,size_t n_lmo_j,size_t wf_size_i,size_t wf_size_j,const double * wf_i,const double * wf_j,const six_t * ds,six_t * lmo_ds,six_t * tmp)121 transform_integral_derivatives(size_t n_lmo_i, size_t n_lmo_j, size_t wf_size_i,
122     size_t wf_size_j, const double *wf_i, const double *wf_j, const six_t *ds,
123     six_t *lmo_ds, six_t *tmp)
124 {
125 	const six_t *p_ds;
126 	six_t *p_tmp, *p_lmo_ds;
127 
128 	p_tmp = tmp;
129 	for (size_t i = 0; i < n_lmo_i; i++) {
130 	for (size_t j = 0; j < wf_size_j; j++, p_tmp++) {
131 		six_t sum = six_zero;
132 		p_ds = ds + j;
133 
134 		for (size_t k = 0; k < wf_size_i; k++, p_ds += wf_size_j) {
135 			double w = wf_i[wf_size_i * i + k];
136 
137 			sum.x += p_ds->x * w;
138 			sum.y += p_ds->y * w;
139 			sum.z += p_ds->z * w;
140 			sum.a += p_ds->a * w;
141 			sum.b += p_ds->b * w;
142 			sum.c += p_ds->c * w;
143 		}
144 		*p_tmp = sum;
145 	} }
146 
147 	p_lmo_ds = lmo_ds;
148 	for (size_t i = 0; i < n_lmo_i; i++) {
149 	for (size_t j = 0; j < n_lmo_j; j++, p_lmo_ds++) {
150 		six_t sum = six_zero;
151 		p_tmp = tmp + i * wf_size_j;
152 
153 		for (size_t k = 0; k < wf_size_j; k++, p_tmp++) {
154 			double w = wf_j[wf_size_j * j + k];
155 
156 			sum.x += p_tmp->x * w;
157 			sum.y += p_tmp->y * w;
158 			sum.z += p_tmp->z * w;
159 			sum.a += p_tmp->a * w;
160 			sum.b += p_tmp->b * w;
161 			sum.c += p_tmp->c * w;
162 		}
163 		*p_lmo_ds = sum;
164 	} }
165 }
166 
167 static void
add_six_vec(size_t el,size_t size,const double * vec,six_t * six)168 add_six_vec(size_t el, size_t size, const double *vec, six_t *six)
169 {
170 	double *ptr = (double *)six + el;
171 
172 	for (size_t i = 0; i < size; i++, vec++, ptr += 6)
173 		*ptr += *vec;
174 }
175 
176 /*
177  * Reference:
178  *
179  * Hui Li, Mark Gordon
180  *
181  * Gradients of the exchange-repulsion energy in the general effective fragment
182  * potential method
183  *
184  * Theor. Chem. Acc. 115, 385 (2006)
185  */
186 static void
lmo_lmo_xr_grad(struct efp * efp,size_t fr_i_idx,size_t fr_j_idx,size_t i,size_t j,const double * lmo_s,const double * lmo_t,const six_t * lmo_ds,const six_t * lmo_dt,const struct swf * swf)187 lmo_lmo_xr_grad(struct efp *efp, size_t fr_i_idx, size_t fr_j_idx,
188     size_t i, size_t j, const double *lmo_s, const double *lmo_t,
189     const six_t *lmo_ds, const six_t *lmo_dt, const struct swf *swf)
190 {
191 	const struct frag *fr_i = efp->frags + fr_i_idx;
192 	const struct frag *fr_j = efp->frags + fr_j_idx;
193 	const vec_t *ct_i = fr_i->lmo_centroids + i;
194 	const vec_t *ct_j = fr_j->lmo_centroids + j;
195 
196 	size_t ij = i * fr_j->n_lmo + j;
197 
198 	vec_t dr = {
199 		ct_j->x - ct_i->x - swf->cell.x,
200 		ct_j->y - ct_i->y - swf->cell.y,
201 		ct_j->z - ct_i->z - swf->cell.z
202 	};
203 
204 	double s_ij = lmo_s[ij];
205 	double t_ij = lmo_t[ij];
206 	double r_ij = vec_len(&dr);
207 	double r_ij3 = r_ij * r_ij * r_ij;
208 
209 	six_t ds_ij = lmo_ds[ij];
210 	six_t dt_ij = lmo_dt[ij];
211 
212 	double t1, t2;
213 	vec_t force = vec_zero, torque_i = vec_zero;
214 
215 	/* first part */
216 	if (fabs(s_ij) > INTEGRAL_THRESHOLD) {
217 		double ln_s = log(fabs(s_ij));
218 
219 		t1 = s_ij / r_ij * (-sqrt(-2.0 / PI / ln_s) +
220 		    4.0 * sqrt(-2.0 / PI * ln_s));
221 		t2 = 2.0 * sqrt(-2.0 / PI * ln_s) * s_ij * s_ij / r_ij3;
222 
223 		force.x += -t1 * ds_ij.x - t2 * dr.x;
224 		force.y += -t1 * ds_ij.y - t2 * dr.y;
225 		force.z += -t1 * ds_ij.z - t2 * dr.z;
226 
227 		torque_i.x += t1 * ds_ij.a + t2 * (dr.y * (ct_i->z - fr_i->z) -
228 						   dr.z * (ct_i->y - fr_i->y));
229 		torque_i.y += t1 * ds_ij.b + t2 * (dr.z * (ct_i->x - fr_i->x) -
230 						   dr.x * (ct_i->z - fr_i->z));
231 		torque_i.z += t1 * ds_ij.c + t2 * (dr.x * (ct_i->y - fr_i->y) -
232 						   dr.y * (ct_i->x - fr_i->x));
233 	}
234 
235 	/* second part */
236 	double fij = 0.0, fji = 0.0;
237 	six_t dfij = six_zero, dfji = six_zero;
238 
239 	for (size_t k = 0; k < fr_i->n_lmo; k++) {
240 		double fe = fr_i->xr_fock_mat[fock_idx(i, k)];
241 		const six_t *ds = lmo_ds + k * fr_j->n_lmo + j;
242 
243 		fij += fe * lmo_s[k * fr_j->n_lmo + j];
244 
245 		dfij.x += fe * ds->x;
246 		dfij.y += fe * ds->y;
247 		dfij.z += fe * ds->z;
248 		dfij.a += fe * ds->a;
249 		dfij.b += fe * ds->b;
250 		dfij.c += fe * ds->c;
251 	}
252 
253 	for (size_t l = 0; l < fr_j->n_lmo; l++) {
254 		double fe = fr_j->xr_fock_mat[fock_idx(j, l)];
255 		const six_t *ds = lmo_ds + i * fr_j->n_lmo + l;
256 
257 		fji += fe * lmo_s[i * fr_j->n_lmo + l];
258 
259 		dfji.x += fe * ds->x;
260 		dfji.y += fe * ds->y;
261 		dfji.z += fe * ds->z;
262 		dfji.a += fe * ds->a;
263 		dfji.b += fe * ds->b;
264 		dfji.c += fe * ds->c;
265 	}
266 
267 	t1 = (fij + fji - 2.0 * t_ij);
268 
269 	force.x += -t1 * ds_ij.x - s_ij * (dfij.x + dfji.x - 2.0 * dt_ij.x);
270 	force.y += -t1 * ds_ij.y - s_ij * (dfij.y + dfji.y - 2.0 * dt_ij.y);
271 	force.z += -t1 * ds_ij.z - s_ij * (dfij.z + dfji.z - 2.0 * dt_ij.z);
272 
273 	torque_i.x += t1 * ds_ij.a + s_ij * (dfij.a + dfji.a - 2.0 * dt_ij.a);
274 	torque_i.y += t1 * ds_ij.b + s_ij * (dfij.b + dfji.b - 2.0 * dt_ij.b);
275 	torque_i.z += t1 * ds_ij.c + s_ij * (dfij.c + dfji.c - 2.0 * dt_ij.c);
276 
277 	/* third part */
278 	double vib = 0.0, vja = 0.0;
279 	six_t dvib = six_zero, dvja = six_zero;
280 
281 	for (size_t l = 0; l < fr_j->n_xr_atoms; l++) {
282 		struct xr_atom *at_j = fr_j->xr_atoms + l;
283 
284 		vec_t dr_a = {
285 			at_j->x - ct_i->x - swf->cell.x,
286 			at_j->y - ct_i->y - swf->cell.y,
287 			at_j->z - ct_i->z - swf->cell.z
288 		};
289 
290 		double r = vec_len(&dr_a);
291 		double tmp = at_j->znuc / (r * r * r);
292 
293 		vib -= at_j->znuc / r;
294 
295 		dvib.x -= tmp * dr_a.x;
296 		dvib.y -= tmp * dr_a.y;
297 		dvib.z -= tmp * dr_a.z;
298 
299 		dvib.a -= tmp * (dr_a.y * (ct_i->z - fr_i->z) -
300 				 dr_a.z * (ct_i->y - fr_i->y));
301 		dvib.b -= tmp * (dr_a.z * (ct_i->x - fr_i->x) -
302 				 dr_a.x * (ct_i->z - fr_i->z));
303 		dvib.c -= tmp * (dr_a.x * (ct_i->y - fr_i->y) -
304 				 dr_a.y * (ct_i->x - fr_i->x));
305 	}
306 
307 	for (size_t l = 0; l < fr_j->n_lmo; l++) {
308 		vec_t *ct_jj = fr_j->lmo_centroids + l;
309 
310 		vec_t dr_a = {
311 			ct_jj->x - ct_i->x - swf->cell.x,
312 			ct_jj->y - ct_i->y - swf->cell.y,
313 			ct_jj->z - ct_i->z - swf->cell.z
314 		};
315 
316 		double r = vec_len(&dr_a);
317 		double tmp = 2.0 / (r * r * r);
318 
319 		vib += 2.0 / r;
320 
321 		dvib.x += tmp * dr_a.x;
322 		dvib.y += tmp * dr_a.y;
323 		dvib.z += tmp * dr_a.z;
324 
325 		dvib.a += tmp * (dr_a.y * (ct_i->z - fr_i->z) -
326 				 dr_a.z * (ct_i->y - fr_i->y));
327 		dvib.b += tmp * (dr_a.z * (ct_i->x - fr_i->x) -
328 				 dr_a.x * (ct_i->z - fr_i->z));
329 		dvib.c += tmp * (dr_a.x * (ct_i->y - fr_i->y) -
330 				 dr_a.y * (ct_i->x - fr_i->x));
331 	}
332 
333 	for (size_t k = 0; k < fr_i->n_xr_atoms; k++) {
334 		struct xr_atom *at_i = fr_i->xr_atoms + k;
335 
336 		vec_t dr_a = {
337 			ct_j->x - at_i->x - swf->cell.x,
338 			ct_j->y - at_i->y - swf->cell.y,
339 			ct_j->z - at_i->z - swf->cell.z
340 		};
341 
342 		double r = vec_len(&dr_a);
343 		double tmp = at_i->znuc / (r * r * r);
344 
345 		vja -= at_i->znuc / r;
346 
347 		dvja.x -= tmp * dr_a.x;
348 		dvja.y -= tmp * dr_a.y;
349 		dvja.z -= tmp * dr_a.z;
350 
351 		dvja.a -= tmp * (dr_a.y * (at_i->z - fr_i->z) -
352 				 dr_a.z * (at_i->y - fr_i->y));
353 		dvja.b -= tmp * (dr_a.z * (at_i->x - fr_i->x) -
354 				 dr_a.x * (at_i->z - fr_i->z));
355 		dvja.c -= tmp * (dr_a.x * (at_i->y - fr_i->y) -
356 				 dr_a.y * (at_i->x - fr_i->x));
357 	}
358 
359 	for (size_t k = 0; k < fr_i->n_lmo; k++) {
360 		vec_t *ct_ii = fr_i->lmo_centroids + k;
361 
362 		vec_t dr_a = {
363 			ct_j->x - ct_ii->x - swf->cell.x,
364 			ct_j->y - ct_ii->y - swf->cell.y,
365 			ct_j->z - ct_ii->z - swf->cell.z
366 		};
367 
368 		double r = vec_len(&dr_a);
369 		double tmp = 2.0 / (r * r * r);
370 
371 		vja += 2.0 / r;
372 
373 		dvja.x += tmp * dr_a.x;
374 		dvja.y += tmp * dr_a.y;
375 		dvja.z += tmp * dr_a.z;
376 
377 		dvja.a += tmp * (dr_a.y * (ct_ii->z - fr_i->z) -
378 				 dr_a.z * (ct_ii->y - fr_i->y));
379 		dvja.b += tmp * (dr_a.z * (ct_ii->x - fr_i->x) -
380 				 dr_a.x * (ct_ii->z - fr_i->z));
381 		dvja.c += tmp * (dr_a.x * (ct_ii->y - fr_i->y) -
382 				 dr_a.y * (ct_ii->x - fr_i->x));
383 	}
384 
385 	t1 = 2.0 * s_ij * (vib + vja - 1.0 / r_ij);
386 
387 	force.x += t1 * ds_ij.x +
388 	    s_ij * s_ij * (dvib.x + dvja.x - dr.x / r_ij3);
389 	force.y += t1 * ds_ij.y +
390 	    s_ij * s_ij * (dvib.y + dvja.y - dr.y / r_ij3);
391 	force.z += t1 * ds_ij.z +
392 	    s_ij * s_ij * (dvib.z + dvja.z - dr.z / r_ij3);
393 
394 	torque_i.x += -t1 * ds_ij.a - s_ij * s_ij * (dvib.a + dvja.a -
395 	    (dr.y * (ct_i->z - fr_i->z) - dr.z * (ct_i->y - fr_i->y)) / r_ij3);
396 	torque_i.y += -t1 * ds_ij.b - s_ij * s_ij * (dvib.b + dvja.b -
397 	    (dr.z * (ct_i->x - fr_i->x) - dr.x * (ct_i->z - fr_i->z)) / r_ij3);
398 	torque_i.z += -t1 * ds_ij.c - s_ij * s_ij * (dvib.c + dvja.c -
399 	    (dr.x * (ct_i->y - fr_i->y) - dr.y * (ct_i->x - fr_i->x)) / r_ij3);
400 
401 	force.x *= 2.0 * swf->swf;
402 	force.y *= 2.0 * swf->swf;
403 	force.z *= 2.0 * swf->swf;
404 
405 	torque_i.x *= 2.0 * swf->swf;
406 	torque_i.y *= 2.0 * swf->swf;
407 	torque_i.z *= 2.0 * swf->swf;
408 
409 	vec_t torque_j = {
410 		torque_i.x + force.y * (fr_j->z - fr_i->z - swf->cell.z) -
411 		    force.z * (fr_j->y - fr_i->y - swf->cell.y),
412 		torque_i.y + force.z * (fr_j->x - fr_i->x - swf->cell.x) -
413 		    force.x * (fr_j->z - fr_i->z - swf->cell.z),
414 		torque_i.z + force.x * (fr_j->y - fr_i->y - swf->cell.y) -
415 		    force.y * (fr_j->x - fr_i->x - swf->cell.x)
416 	};
417 
418 	six_atomic_add_xyz(efp->grad + fr_i_idx, &force);
419 	six_atomic_sub_xyz(efp->grad + fr_j_idx, &force);
420 	six_atomic_add_abc(efp->grad + fr_i_idx, &torque_i);
421 	six_atomic_sub_abc(efp->grad + fr_j_idx, &torque_j);
422 
423 	efp_add_stress(&swf->dr, &force, &efp->stress);
424 }
425 
426 static double
lmo_lmo_xr_energy(struct frag * fr_i,struct frag * fr_j,size_t i,size_t j,const double * lmo_s,const double * lmo_t,const struct swf * swf)427 lmo_lmo_xr_energy(struct frag *fr_i, struct frag *fr_j, size_t i, size_t j,
428     const double *lmo_s, const double *lmo_t, const struct swf *swf)
429 {
430 	double s_ij = lmo_s[i * fr_j->n_lmo + j];
431 	double t_ij = lmo_t[i * fr_j->n_lmo + j];
432 
433 	const vec_t *ct_i = fr_i->lmo_centroids + i;
434 	const vec_t *ct_j = fr_j->lmo_centroids + j;
435 
436 	vec_t dr = {
437 		ct_j->x - ct_i->x - swf->cell.x,
438 		ct_j->y - ct_i->y - swf->cell.y,
439 		ct_j->z - ct_i->z - swf->cell.z
440 	};
441 
442 	double r_ij = vec_len(&dr);
443 	double exr = 0.0;
444 
445 	/* xr - first part */
446 	if (fabs(s_ij) > INTEGRAL_THRESHOLD) {
447 		exr += -2.0 * sqrt(-2.0 * log(fabs(s_ij)) / PI) *
448 		    s_ij * s_ij / r_ij;
449 	}
450 
451 	/* xr - second part */
452 	for (size_t k = 0; k < fr_i->n_lmo; k++) {
453 		exr -= s_ij * lmo_s[k * fr_j->n_lmo + j] *
454 		    fr_i->xr_fock_mat[fock_idx(i, k)];
455 	}
456 	for (size_t l = 0; l < fr_j->n_lmo; l++) {
457 		exr -= s_ij * lmo_s[i * fr_j->n_lmo + l] *
458 		    fr_j->xr_fock_mat[fock_idx(j, l)];
459 	}
460 	exr += 2.0 * s_ij * t_ij;
461 
462 	/* xr - third part */
463 	for (size_t jj = 0; jj < fr_j->n_xr_atoms; jj++) {
464 		struct xr_atom *at_j = fr_j->xr_atoms + jj;
465 
466 		vec_t dr_a = {
467 			at_j->x - ct_i->x - swf->cell.x,
468 			at_j->y - ct_i->y - swf->cell.y,
469 			at_j->z - ct_i->z - swf->cell.z
470 		};
471 
472 		double r = vec_len(&dr_a);
473 
474 		exr -= s_ij * s_ij * at_j->znuc / r;
475 	}
476 	for (size_t jj = 0; jj < fr_j->n_lmo; jj++) {
477 		const vec_t *ct_jj = fr_j->lmo_centroids + jj;
478 
479 		vec_t dr_a = {
480 			ct_jj->x - ct_i->x - swf->cell.x,
481 			ct_jj->y - ct_i->y - swf->cell.y,
482 			ct_jj->z - ct_i->z - swf->cell.z
483 		};
484 
485 		double r = vec_len(&dr_a);
486 		exr += 2.0 * s_ij * s_ij / r;
487 	}
488 	for (size_t ii = 0; ii < fr_i->n_xr_atoms; ii++) {
489 		struct xr_atom *at_i = fr_i->xr_atoms + ii;
490 
491 		vec_t dr_a = {
492 			ct_j->x - at_i->x - swf->cell.x,
493 			ct_j->y - at_i->y - swf->cell.y,
494 			ct_j->z - at_i->z - swf->cell.z
495 		};
496 
497 		double r = vec_len(&dr_a);
498 		exr -= s_ij * s_ij * at_i->znuc / r;
499 	}
500 	for (size_t ii = 0; ii < fr_i->n_lmo; ii++) {
501 		const vec_t *ct_ii = fr_i->lmo_centroids + ii;
502 
503 		vec_t dr_a = {
504 			ct_j->x - ct_ii->x - swf->cell.x,
505 			ct_j->y - ct_ii->y - swf->cell.y,
506 			ct_j->z - ct_ii->z - swf->cell.z
507 		};
508 
509 		double r = vec_len(&dr_a);
510 		exr += 2.0 * s_ij * s_ij / r;
511 	}
512 	exr -= s_ij * s_ij / r_ij;
513 
514 	return 2.0 * exr;
515 }
516 
517 void
efp_frag_frag_xr(struct efp * efp,size_t frag_i,size_t frag_j,double * lmo_s,six_t * lmo_ds,double * exr_out,double * ecp_out)518 efp_frag_frag_xr(struct efp *efp, size_t frag_i, size_t frag_j, double *lmo_s,
519     six_t *lmo_ds, double *exr_out, double *ecp_out)
520 {
521 	struct frag *fr_i = efp->frags + frag_i;
522 	struct frag *fr_j = efp->frags + frag_j;
523 
524 	size_t ij_wf_size = fr_i->xr_wf_size * fr_j->xr_wf_size;
525 	size_t ij_nlmo = fr_i->n_lmo * fr_j->n_lmo;
526 	size_t ij_nlmo_wf_size = fr_i->n_lmo * fr_j->xr_wf_size;
527 	double *s = (double *)malloc(ij_wf_size * sizeof(double));
528 	double *t = (double *)malloc(ij_wf_size * sizeof(double));
529 	double *lmo_t = (double *)malloc(ij_nlmo * sizeof(double));
530 	double *tmp = (double *)malloc(ij_nlmo_wf_size * sizeof(double));
531 	struct xr_atom *atoms_j = (struct xr_atom *)malloc(
532 	    fr_j->n_xr_atoms * sizeof(struct xr_atom));
533 	struct swf swf = efp_make_swf(efp, fr_i, fr_j);
534 
535 	for (size_t j = 0; j < fr_j->n_xr_atoms; j++) {
536 		atoms_j[j] = fr_j->xr_atoms[j];
537 		atoms_j[j].x -= swf.cell.x;
538 		atoms_j[j].y -= swf.cell.y;
539 		atoms_j[j].z -= swf.cell.z;
540 	}
541 
542 	efp_st_int(fr_i->n_xr_atoms, fr_i->xr_atoms,
543 		   fr_j->n_xr_atoms, atoms_j,
544 		   fr_j->xr_wf_size, s, t);
545 
546 	transform_integrals(fr_i->n_lmo, fr_j->n_lmo,
547 			    fr_i->xr_wf_size, fr_j->xr_wf_size,
548 			    fr_i->xr_wf, fr_j->xr_wf,
549 			    s, lmo_s, tmp);
550 	transform_integrals(fr_i->n_lmo, fr_j->n_lmo,
551 			    fr_i->xr_wf_size, fr_j->xr_wf_size,
552 			    fr_i->xr_wf, fr_j->xr_wf,
553 			    t, lmo_t, tmp);
554 
555 	double exr = 0.0;
556 	double ecp = 0.0;
557 
558 	for (size_t i = 0, idx = 0; i < fr_i->n_lmo; i++) {
559 		for (size_t j = 0; j < fr_j->n_lmo; j++, idx++) {
560 			double s_ij = lmo_s[i * fr_j->n_lmo + j];
561 
562 			vec_t dr = {
563 				fr_j->lmo_centroids[j].x -
564 				    fr_i->lmo_centroids[i].x - swf.cell.x,
565 				fr_j->lmo_centroids[j].y -
566 				    fr_i->lmo_centroids[i].y - swf.cell.y,
567 				fr_j->lmo_centroids[j].z -
568 				    fr_i->lmo_centroids[i].z - swf.cell.z
569 			};
570 
571 			double r_ij = vec_len(&dr);
572 
573 			if ((efp->opts.terms & EFP_TERM_ELEC) &&
574 			    (efp->opts.elec_damp == EFP_ELEC_DAMP_OVERLAP))
575 				ecp += charge_penetration_energy(s_ij, r_ij);
576 			if (efp->opts.terms & EFP_TERM_XR)
577 				exr += lmo_lmo_xr_energy(fr_i, fr_j, i, j,
578 				    lmo_s, lmo_t, &swf);
579 		}
580 	}
581 
582 	*exr_out = exr * swf.swf;
583 	*ecp_out = ecp * swf.swf;
584 
585 	if (!efp->do_gradient) {
586 		free(s);
587 		free(t);
588 		free(lmo_t);
589 		free(tmp);
590 		free(atoms_j);
591 		return;
592 	}
593 
594 	/* compute gradient */
595 
596 	six_t *ds = (six_t *)malloc(ij_wf_size * sizeof(six_t));
597 	six_t *dt = (six_t *)malloc(ij_wf_size * sizeof(six_t));
598 	six_t *lmo_dt = (six_t *)malloc(ij_nlmo * sizeof(six_t));
599 	six_t *sixtmp = (six_t *)malloc(ij_nlmo_wf_size * sizeof(six_t));
600 	double *lmo_tmp = (double *)malloc(ij_nlmo * sizeof(double));
601 
602 	efp_st_int_deriv(fr_i->n_xr_atoms, fr_i->xr_atoms,
603 			 fr_j->n_xr_atoms, atoms_j,
604 			 VEC(fr_i->x), fr_i->xr_wf_size, fr_j->xr_wf_size,
605 			 ds, dt);
606 
607 	transform_integral_derivatives(fr_i->n_lmo, fr_j->n_lmo,
608 				       fr_i->xr_wf_size, fr_j->xr_wf_size,
609 				       fr_i->xr_wf, fr_j->xr_wf,
610 				       ds, lmo_ds, sixtmp);
611 	transform_integral_derivatives(fr_i->n_lmo, fr_j->n_lmo,
612 				       fr_i->xr_wf_size, fr_j->xr_wf_size,
613 				       fr_i->xr_wf, fr_j->xr_wf,
614 				       dt, lmo_dt, sixtmp);
615 
616 	for (size_t a = 0; a < 3; a++) {
617 		transform_integrals(fr_i->n_lmo, fr_j->n_lmo,
618 				    fr_i->xr_wf_size, fr_j->xr_wf_size,
619 				    fr_i->xr_wf_deriv[a], fr_j->xr_wf,
620 				    s, lmo_tmp, tmp);
621 		add_six_vec(3 + a, fr_i->n_lmo * fr_j->n_lmo, lmo_tmp, lmo_ds);
622 
623 		transform_integrals(fr_i->n_lmo, fr_j->n_lmo,
624 				    fr_i->xr_wf_size, fr_j->xr_wf_size,
625 				    fr_i->xr_wf_deriv[a], fr_j->xr_wf,
626 				    t, lmo_tmp, tmp);
627 		add_six_vec(3 + a, fr_i->n_lmo * fr_j->n_lmo, lmo_tmp, lmo_dt);
628 	}
629 
630 	for (size_t i = 0, idx = 0; i < fr_i->n_lmo; i++) {
631 		for (size_t j = 0; j < fr_j->n_lmo; j++, idx++) {
632 			size_t ij = i * fr_j->n_lmo + j;
633 
634 			if ((efp->opts.terms & EFP_TERM_ELEC) &&
635 			    (efp->opts.elec_damp == EFP_ELEC_DAMP_OVERLAP))
636 				charge_penetration_grad(efp, frag_i, frag_j,
637 				    i, j, lmo_s[ij], lmo_ds[ij], &swf);
638 			if (efp->opts.terms & EFP_TERM_XR)
639 				lmo_lmo_xr_grad(efp, frag_i, frag_j, i, j,
640 				    lmo_s, lmo_t, lmo_ds, lmo_dt, &swf);
641 		}
642 	}
643 
644 	vec_t force = {
645 		swf.dswf.x * (exr + ecp),
646 		swf.dswf.y * (exr + ecp),
647 		swf.dswf.z * (exr + ecp)
648 	};
649 
650 	six_atomic_add_xyz(efp->grad + frag_i, &force);
651 	six_atomic_sub_xyz(efp->grad + frag_j, &force);
652 	efp_add_stress(&swf.dr, &force, &efp->stress);
653 
654 	free(s);
655 	free(ds);
656 	free(t);
657 	free(dt);
658 	free(lmo_t);
659 	free(lmo_dt);
660 	free(lmo_tmp);
661 	free(tmp);
662 	free(sixtmp);
663 	free(atoms_j);
664 }
665 
666 static inline size_t
func_d_idx(size_t a,size_t b)667 func_d_idx(size_t a, size_t b)
668 {
669 	/* order in which GAMESS stores D functions */
670 	enum { xx = 0, yy, zz, xy, xz, yz };
671 
672 	static const size_t idx[] = {
673 		xx, xy, xz, xy, yy, yz, xz, yz, zz
674 	};
675 
676 	return idx[3 * a + b];
677 }
678 
679 static inline size_t
func_f_idx(size_t a,size_t b,size_t c)680 func_f_idx(size_t a, size_t b, size_t c)
681 {
682 	/* order in which GAMESS stores F functions */
683 	enum { xxx = 0, yyy, zzz, xxy, xxz, xyy, yyz, xzz, yzz, xyz };
684 
685 	static const size_t idx[] = {
686 		xxx, xxy, xxz, xxy, xyy, xyz, xxz, xyz, xzz,
687 		xxy, xyy, xyz, xyy, yyy, yyz, xyz, yyz, yzz,
688 		xxz, xyz, xzz, xyz, yyz, yzz, xzz, yzz, zzz
689 	};
690 
691 	return idx[9 * a + 3 * b + c];
692 }
693 
694 static void
rotate_func_d(const mat_t * rotmat,const double * in,double * out)695 rotate_func_d(const mat_t *rotmat, const double *in, double *out)
696 {
697 	const double norm = sqrt(3.0) / 2.0;
698 
699 	double full_in[9], full_out[9];
700 
701 	for (size_t a = 0; a < 3; a++) {
702 		for (size_t b = 0; b < 3; b++) {
703 			full_in[3 * a + b] = in[func_d_idx(a, b)];
704 			if (a != b)
705 				full_in[3 * a + b] *= norm;
706 		}
707 	}
708 
709 	efp_rotate_t2(rotmat, full_in, full_out);
710 
711 	for (size_t a = 0; a < 3; a++) {
712 		for (size_t b = 0; b < 3; b++) {
713 			if (a != b)
714 				full_out[3 * a + b] /= norm;
715 			out[func_d_idx(a, b)] = full_out[3 * a + b];
716 		}
717 	}
718 }
719 
720 static void
rotate_func_f(const mat_t * rotmat,const double * in,double * out)721 rotate_func_f(const mat_t *rotmat, const double *in, double *out)
722 {
723 	const double norm1 = sqrt(5.0) / 3.0;
724 	const double norm2 = sqrt(3.0) / 2.0;
725 
726 	double full_in[27], full_out[27];
727 
728 	for (size_t a = 0; a < 3; a++)
729 		for (size_t b = 0; b < 3; b++)
730 			for (size_t c = 0; c < 3; c++) {
731 				size_t full_idx = 9 * a + 3 * b + c;
732 				size_t in_idx = func_f_idx(a, b, c);
733 
734 				full_in[full_idx] = in[in_idx];
735 
736 				if (a != b || a != c)
737 					full_in[full_idx] *= norm1;
738 
739 				if (a != b && a != c && b != c)
740 					full_in[full_idx] *= norm2;
741 			}
742 
743 	efp_rotate_t3(rotmat, full_in, full_out);
744 
745 	for (size_t a = 0; a < 3; a++)
746 		for (size_t b = 0; b < 3; b++)
747 			for (size_t c = 0; c < 3; c++) {
748 				size_t full_idx = 9 * a + 3 * b + c;
749 				size_t out_idx = func_f_idx(a, b, c);
750 
751 				if (a != b || a != c)
752 					full_out[full_idx] /= norm1;
753 
754 				if (a != b && a != c && b != c)
755 					full_out[full_idx] /= norm2;
756 
757 				out[out_idx] = full_out[full_idx];
758 			}
759 }
760 
761 static void
coef_deriv_p(size_t axis,const double * coef,double * der)762 coef_deriv_p(size_t axis, const double *coef, double *der)
763 {
764 	switch (axis) {
765 	case 0:
766 		der[0] = 0.0;
767 		der[1] = coef[2];
768 		der[2] = -coef[1];
769 		break;
770 	case 1:
771 		der[0] = -coef[2];
772 		der[1] = 0.0;
773 		der[2] = coef[0];
774 		break;
775 	case 2:
776 		der[0] = coef[1];
777 		der[1] = -coef[0];
778 		der[2] = 0.0;
779 		break;
780 	};
781 }
782 
783 static void
coef_deriv_d(size_t axis,const double * coef,double * der)784 coef_deriv_d(size_t axis, const double *coef, double *der)
785 {
786 	const double sqrt3 = sqrt(3.0);
787 
788 	switch (axis) {
789 	case 0:
790 		der[0] = 0.0;
791 		der[1] = sqrt3 * coef[5];
792 		der[2] = -sqrt3 * coef[5];
793 		der[3] = coef[4];
794 		der[4] = -coef[3];
795 		der[5] = 2.0 / sqrt3 * (coef[2] - coef[1]);
796 		break;
797 	case 1:
798 		der[0] = -sqrt3 * coef[4];
799 		der[1] = 0.0;
800 		der[2] = sqrt3 * coef[4];
801 		der[3] = -coef[5];
802 		der[4] = 2.0 / sqrt3 * (coef[0] - coef[2]);
803 		der[5] = coef[3];
804 		break;
805 	case 2:
806 		der[0] = sqrt3 * coef[3];
807 		der[1] = -sqrt3 * coef[3];
808 		der[2] = 0.0;
809 		der[3] = 2.0 / sqrt3 * (coef[1] - coef[0]);
810 		der[4] = coef[5];
811 		der[5] = -coef[4];
812 		break;
813 	};
814 }
815 
816 static void
coef_deriv_f(size_t axis,const double * coef,double * der)817 coef_deriv_f(size_t axis, const double *coef, double *der)
818 {
819 	const double sqrt3 = sqrt(3.0);
820 	const double sqrt5 = sqrt(5.0);
821 
822 	switch (axis) {
823 	case 0:
824 		der[0] = 0.0;
825 		der[1] = sqrt5 * coef[6];
826 		der[2] = -sqrt5 * coef[8];
827 		der[3] = coef[4];
828 		der[4] = -coef[3];
829 		der[5] = sqrt3 * coef[9];
830 		der[6] = -3.0 / sqrt5 * coef[1] + 2.0 * coef[8];
831 		der[7] = -sqrt3 * coef[9];
832 		der[8] = 3.0 / sqrt5 * coef[2] - 2.0 * coef[6];
833 		der[9] = 2.0 / sqrt3 * (coef[7] - coef[5]);
834 		break;
835 	case 1:
836 		der[0] = -sqrt5 * coef[4];
837 		der[1] = 0.0;
838 		der[2] = sqrt5 * coef[7];
839 		der[3] = -sqrt3 * coef[9];
840 		der[4] = 3.0 / sqrt5 * coef[0] - 2.0 * coef[7];
841 		der[5] = -coef[6];
842 		der[6] = coef[5];
843 		der[7] = -3.0 / sqrt5 * coef[2] + 2.0 * coef[4];
844 		der[8] = sqrt3 * coef[9];
845 		der[9] = 2.0 / sqrt3 * (coef[3] - coef[8]);
846 		break;
847 	case 2:
848 		der[0] = sqrt5 * coef[3];
849 		der[1] = -sqrt5 * coef[5];
850 		der[2] = 0.0;
851 		der[3] = -3.0 / sqrt5 * coef[0] + 2.0 * coef[5];
852 		der[4] = sqrt3 * coef[9];
853 		der[5] = 3.0 / sqrt5 * coef[1] - 2.0 * coef[3];
854 		der[6] = -sqrt3 * coef[9];
855 		der[7] = coef[8];
856 		der[8] = -coef[7];
857 		der[9] = 2.0 / sqrt3 * (coef[6] - coef[4]);
858 		break;
859 	};
860 }
861 
862 void
efp_update_xr(struct frag * frag)863 efp_update_xr(struct frag *frag)
864 {
865 	const mat_t *rotmat = &frag->rotmat;
866 
867 	/* update LMO centroids */
868 	for (size_t i = 0; i < frag->n_lmo; i++) {
869 		efp_move_pt(CVEC(frag->x), rotmat,
870 		    frag->lib->lmo_centroids + i, frag->lmo_centroids + i);
871 	}
872 	/* update xr atoms */
873 	for (size_t i = 0; i < frag->n_xr_atoms; i++) {
874 		efp_move_pt(CVEC(frag->x), rotmat,
875 		    CVEC(frag->lib->xr_atoms[i].x), VEC(frag->xr_atoms[i].x));
876 	}
877 	/* rotate wavefunction */
878 	for (size_t k = 0; k < frag->n_lmo; k++) {
879 		double *deriv[3];
880 
881 		for (size_t a = 0; a < 3; a++)
882 			deriv[a] = frag->xr_wf_deriv[a] + k * frag->xr_wf_size;
883 
884 		const double *in = frag->lib->xr_wf + k * frag->xr_wf_size;
885 		double *out = frag->xr_wf + k * frag->xr_wf_size;
886 
887 		for (size_t j = 0, func = 0; j < frag->n_xr_atoms; j++) {
888 			const struct xr_atom *atom = frag->xr_atoms + j;
889 
890 			for (size_t i = 0; i < atom->n_shells; i++) {
891 				switch (atom->shells[i].type) {
892 				case 'S':
893 					func++;
894 					break;
895 				case 'L':
896 					func++;
897 					/* fall through */
898 				case 'P': {
899 					vec_t r = mat_vec(rotmat,
900 					    (const vec_t *)(in + func));
901 					out[func + 0] = r.x;
902 					out[func + 1] = r.y;
903 					out[func + 2] = r.z;
904 					for (size_t a = 0; a < 3; a++) {
905 						coef_deriv_p(a, out + func,
906 						    deriv[a] + func);
907 					}
908 					func += 3;
909 					break;
910 				}
911 				case 'D':
912 					rotate_func_d(rotmat, in + func,
913 					    out + func);
914 					for (size_t a = 0; a < 3; a++) {
915 						coef_deriv_d(a, out + func,
916 						    deriv[a] + func);
917 					}
918 					func += 6;
919 					break;
920 				case 'F':
921 					rotate_func_f(rotmat, in + func,
922 					    out + func);
923 					for (size_t a = 0; a < 3; a++) {
924 						coef_deriv_f(a, out + func,
925 						    deriv[a] + func);
926 					}
927 					func += 10;
928 					break;
929 				}
930 			}
931 		}
932 	}
933 }
934