1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_d_f_f_s(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_f_f_s)8 int ostei_d_f_f_s(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__d_f_f_s)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__d_f_f_s);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int iket;
26
27 // partition workspace
28 double * const INT__s_f_f_s = work + (SIMINT_NSHELL_SIMD * 0);
29 double * const INT__s_g_f_s = work + (SIMINT_NSHELL_SIMD * 100);
30 double * const INT__s_h_f_s = work + (SIMINT_NSHELL_SIMD * 250);
31 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*460);
32 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
33 SIMINT_DBLTYPE * const restrict PRIM_INT__s_p_s_s = primwork + 9;
34 SIMINT_DBLTYPE * const restrict PRIM_INT__s_p_p_s = primwork + 33;
35 SIMINT_DBLTYPE * const restrict PRIM_INT__s_d_s_s = primwork + 60;
36 SIMINT_DBLTYPE * const restrict PRIM_INT__s_d_p_s = primwork + 102;
37 SIMINT_DBLTYPE * const restrict PRIM_INT__s_d_d_s = primwork + 156;
38 SIMINT_DBLTYPE * const restrict PRIM_INT__s_f_s_s = primwork + 228;
39 SIMINT_DBLTYPE * const restrict PRIM_INT__s_f_p_s = primwork + 288;
40 SIMINT_DBLTYPE * const restrict PRIM_INT__s_f_d_s = primwork + 378;
41 SIMINT_DBLTYPE * const restrict PRIM_INT__s_f_f_s = primwork + 498;
42 SIMINT_DBLTYPE * const restrict PRIM_INT__s_g_s_s = primwork + 598;
43 SIMINT_DBLTYPE * const restrict PRIM_INT__s_g_p_s = primwork + 673;
44 SIMINT_DBLTYPE * const restrict PRIM_INT__s_g_d_s = primwork + 808;
45 SIMINT_DBLTYPE * const restrict PRIM_INT__s_g_f_s = primwork + 988;
46 SIMINT_DBLTYPE * const restrict PRIM_INT__s_h_s_s = primwork + 1138;
47 SIMINT_DBLTYPE * const restrict PRIM_INT__s_h_p_s = primwork + 1222;
48 SIMINT_DBLTYPE * const restrict PRIM_INT__s_h_d_s = primwork + 1411;
49 SIMINT_DBLTYPE * const restrict PRIM_INT__s_h_f_s = primwork + 1663;
50 double * const hrrwork = (double *)(primwork + 1873);
51 double * const HRR_INT__p_f_f_s = hrrwork + 0;
52 double * const HRR_INT__p_g_f_s = hrrwork + 300;
53
54
55 // Create constants
56 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
57 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
58 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
59 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
60 const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
61 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
62
63
64 ////////////////////////////////////////
65 // Loop over shells and primitives
66 ////////////////////////////////////////
67
68 real_abcd = 0;
69 istart = 0;
70 for(ab = 0; ab < P.nshell12_clip; ++ab)
71 {
72 const int iend = istart + P.nprim12[ab];
73
74 cd = 0;
75 jstart = 0;
76
77 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
78 {
79 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
80 int jend = jstart;
81 for(i = 0; i < nshellbatch; i++)
82 jend += Q.nprim12[cd+i];
83
84 // Clear the beginning of the workspace (where we are accumulating integrals)
85 memset(work, 0, SIMINT_NSHELL_SIMD * 460 * sizeof(double));
86 abcd = 0;
87
88
89 for(i = istart; i < iend; ++i)
90 {
91 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
92
93 if(check_screen)
94 {
95 // Skip this whole thing if always insignificant
96 if((P.screen[i] * Q.screen_max) < screen_tol)
97 continue;
98 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
99 }
100
101 icd = 0;
102 iprimcd = 0;
103 nprim_icd = Q.nprim12[cd];
104 double * restrict PRIM_PTR_INT__s_f_f_s = INT__s_f_f_s + abcd * 100;
105 double * restrict PRIM_PTR_INT__s_g_f_s = INT__s_g_f_s + abcd * 150;
106 double * restrict PRIM_PTR_INT__s_h_f_s = INT__s_h_f_s + abcd * 210;
107
108
109
110 // Load these one per loop over i
111 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
112 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
113 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
114
115 const SIMINT_DBLTYPE P_PB[3] = { SIMINT_DBLSET1(P.PB_x[i]), SIMINT_DBLSET1(P.PB_y[i]), SIMINT_DBLSET1(P.PB_z[i]) };
116
117 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
118 {
119 // calculate the shell offsets
120 // these are the offset from the shell pointed to by cd
121 // for each element
122 int shelloffsets[SIMINT_SIMD_LEN] = {0};
123 int lastoffset = 0;
124 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
125
126 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
127 {
128 // Handle if the first element of the vector is a new shell
129 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
130 {
131 nprim_icd += Q.nprim12[cd + (++icd)];
132 PRIM_PTR_INT__s_f_f_s += 100;
133 PRIM_PTR_INT__s_g_f_s += 150;
134 PRIM_PTR_INT__s_h_f_s += 210;
135 }
136 iprimcd++;
137 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
138 {
139 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
140 {
141 shelloffsets[n] = shelloffsets[n-1] + 1;
142 lastoffset++;
143 nprim_icd += Q.nprim12[cd + (++icd)];
144 }
145 else
146 shelloffsets[n] = shelloffsets[n-1];
147 iprimcd++;
148 }
149 }
150 else
151 iprimcd += SIMINT_SIMD_LEN;
152
153 // Do we have to compute this vector (or has it been screened out)?
154 // (not_screened != 0 means we have to do this vector)
155 if(check_screen)
156 {
157 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
158 if(vmax < screen_tol)
159 {
160 PRIM_PTR_INT__s_f_f_s += lastoffset*100;
161 PRIM_PTR_INT__s_g_f_s += lastoffset*150;
162 PRIM_PTR_INT__s_h_f_s += lastoffset*210;
163 continue;
164 }
165 }
166
167 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
168 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
169 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
170 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
171
172
173 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
174 SIMINT_DBLTYPE PQ[3];
175 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
176 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
177 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
178 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
179 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
180 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
181
182 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
183 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
184 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
185 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
186 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
187 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
188 const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
189
190 // NOTE: Minus sign!
191 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
192 SIMINT_DBLTYPE aop_PQ[3];
193 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
194 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
195 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
196
197 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
198 SIMINT_DBLTYPE aoq_PQ[3];
199 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
200 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
201 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
202 // Put a minus sign here so we don't have to in RR routines
203 a_over_q = SIMINT_NEG(a_over_q);
204
205
206 //////////////////////////////////////////////
207 // Fjt function section
208 // Maximum v value: 8
209 //////////////////////////////////////////////
210 // The parameter to the Fjt function
211 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
212
213
214 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
215
216
217 boys_F_split(PRIM_INT__s_s_s_s, F_x, 8);
218 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
219 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
220 for(n = 0; n <= 8; n++)
221 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
222
223 //////////////////////////////////////////////
224 // Primitive integrals: Vertical recurrance
225 //////////////////////////////////////////////
226
227 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
228 const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
229 const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
230 const SIMINT_DBLTYPE vrr_const_4_over_2p = SIMINT_MUL(const_4, one_over_2p);
231 const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
232 const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
233 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
234 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
235 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
236 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
237 const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
238
239
240
241 // Forming PRIM_INT__s_p_s_s[8 * 3];
242 for(n = 0; n < 8; ++n) // loop over orders of auxiliary function
243 {
244
245 PRIM_INT__s_p_s_s[n * 3 + 0] = SIMINT_MUL(P_PB[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
246 PRIM_INT__s_p_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_p_s_s[n * 3 + 0]);
247
248 PRIM_INT__s_p_s_s[n * 3 + 1] = SIMINT_MUL(P_PB[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
249 PRIM_INT__s_p_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_p_s_s[n * 3 + 1]);
250
251 PRIM_INT__s_p_s_s[n * 3 + 2] = SIMINT_MUL(P_PB[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
252 PRIM_INT__s_p_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_p_s_s[n * 3 + 2]);
253
254 }
255
256
257
258 // Forming PRIM_INT__s_d_s_s[7 * 6];
259 for(n = 0; n < 7; ++n) // loop over orders of auxiliary function
260 {
261
262 PRIM_INT__s_d_s_s[n * 6 + 0] = SIMINT_MUL(P_PB[0], PRIM_INT__s_p_s_s[n * 3 + 0]);
263 PRIM_INT__s_d_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_d_s_s[n * 6 + 0]);
264 PRIM_INT__s_d_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_d_s_s[n * 6 + 0]);
265
266 PRIM_INT__s_d_s_s[n * 6 + 1] = SIMINT_MUL(P_PB[1], PRIM_INT__s_p_s_s[n * 3 + 0]);
267 PRIM_INT__s_d_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_d_s_s[n * 6 + 1]);
268
269 PRIM_INT__s_d_s_s[n * 6 + 2] = SIMINT_MUL(P_PB[2], PRIM_INT__s_p_s_s[n * 3 + 0]);
270 PRIM_INT__s_d_s_s[n * 6 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_d_s_s[n * 6 + 2]);
271
272 PRIM_INT__s_d_s_s[n * 6 + 3] = SIMINT_MUL(P_PB[1], PRIM_INT__s_p_s_s[n * 3 + 1]);
273 PRIM_INT__s_d_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_p_s_s[(n+1) * 3 + 1], PRIM_INT__s_d_s_s[n * 6 + 3]);
274 PRIM_INT__s_d_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_d_s_s[n * 6 + 3]);
275
276 PRIM_INT__s_d_s_s[n * 6 + 4] = SIMINT_MUL(P_PB[2], PRIM_INT__s_p_s_s[n * 3 + 1]);
277 PRIM_INT__s_d_s_s[n * 6 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_p_s_s[(n+1) * 3 + 1], PRIM_INT__s_d_s_s[n * 6 + 4]);
278
279 PRIM_INT__s_d_s_s[n * 6 + 5] = SIMINT_MUL(P_PB[2], PRIM_INT__s_p_s_s[n * 3 + 2]);
280 PRIM_INT__s_d_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_p_s_s[(n+1) * 3 + 2], PRIM_INT__s_d_s_s[n * 6 + 5]);
281 PRIM_INT__s_d_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_d_s_s[n * 6 + 5]);
282
283 }
284
285
286
287 // Forming PRIM_INT__s_f_s_s[6 * 10];
288 for(n = 0; n < 6; ++n) // loop over orders of auxiliary function
289 {
290
291 PRIM_INT__s_f_s_s[n * 10 + 0] = SIMINT_MUL(P_PB[0], PRIM_INT__s_d_s_s[n * 6 + 0]);
292 PRIM_INT__s_f_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_d_s_s[(n+1) * 6 + 0], PRIM_INT__s_f_s_s[n * 10 + 0]);
293 PRIM_INT__s_f_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_p_s_s[n * 3 + 0]), PRIM_INT__s_f_s_s[n * 10 + 0]);
294
295 PRIM_INT__s_f_s_s[n * 10 + 1] = SIMINT_MUL(P_PB[1], PRIM_INT__s_d_s_s[n * 6 + 0]);
296 PRIM_INT__s_f_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_d_s_s[(n+1) * 6 + 0], PRIM_INT__s_f_s_s[n * 10 + 1]);
297
298 PRIM_INT__s_f_s_s[n * 10 + 2] = SIMINT_MUL(P_PB[2], PRIM_INT__s_d_s_s[n * 6 + 0]);
299 PRIM_INT__s_f_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 0], PRIM_INT__s_f_s_s[n * 10 + 2]);
300
301 PRIM_INT__s_f_s_s[n * 10 + 3] = SIMINT_MUL(P_PB[0], PRIM_INT__s_d_s_s[n * 6 + 3]);
302 PRIM_INT__s_f_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_d_s_s[(n+1) * 6 + 3], PRIM_INT__s_f_s_s[n * 10 + 3]);
303
304 PRIM_INT__s_f_s_s[n * 10 + 4] = SIMINT_MUL(P_PB[2], PRIM_INT__s_d_s_s[n * 6 + 1]);
305 PRIM_INT__s_f_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 1], PRIM_INT__s_f_s_s[n * 10 + 4]);
306
307 PRIM_INT__s_f_s_s[n * 10 + 5] = SIMINT_MUL(P_PB[0], PRIM_INT__s_d_s_s[n * 6 + 5]);
308 PRIM_INT__s_f_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_d_s_s[(n+1) * 6 + 5], PRIM_INT__s_f_s_s[n * 10 + 5]);
309
310 PRIM_INT__s_f_s_s[n * 10 + 6] = SIMINT_MUL(P_PB[1], PRIM_INT__s_d_s_s[n * 6 + 3]);
311 PRIM_INT__s_f_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_d_s_s[(n+1) * 6 + 3], PRIM_INT__s_f_s_s[n * 10 + 6]);
312 PRIM_INT__s_f_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_p_s_s[(n+1) * 3 + 1], PRIM_INT__s_p_s_s[n * 3 + 1]), PRIM_INT__s_f_s_s[n * 10 + 6]);
313
314 PRIM_INT__s_f_s_s[n * 10 + 7] = SIMINT_MUL(P_PB[2], PRIM_INT__s_d_s_s[n * 6 + 3]);
315 PRIM_INT__s_f_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 3], PRIM_INT__s_f_s_s[n * 10 + 7]);
316
317 PRIM_INT__s_f_s_s[n * 10 + 8] = SIMINT_MUL(P_PB[1], PRIM_INT__s_d_s_s[n * 6 + 5]);
318 PRIM_INT__s_f_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_d_s_s[(n+1) * 6 + 5], PRIM_INT__s_f_s_s[n * 10 + 8]);
319
320 PRIM_INT__s_f_s_s[n * 10 + 9] = SIMINT_MUL(P_PB[2], PRIM_INT__s_d_s_s[n * 6 + 5]);
321 PRIM_INT__s_f_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 5], PRIM_INT__s_f_s_s[n * 10 + 9]);
322 PRIM_INT__s_f_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_p_s_s[(n+1) * 3 + 2], PRIM_INT__s_p_s_s[n * 3 + 2]), PRIM_INT__s_f_s_s[n * 10 + 9]);
323
324 }
325
326
327 VRR_K_s_f_p_s(
328 PRIM_INT__s_f_p_s,
329 PRIM_INT__s_f_s_s,
330 PRIM_INT__s_d_s_s,
331 Q_PA,
332 aoq_PQ,
333 one_over_2pq,
334 3);
335
336
337
338 // Forming PRIM_INT__s_d_p_s[3 * 18];
339 for(n = 0; n < 3; ++n) // loop over orders of auxiliary function
340 {
341
342 PRIM_INT__s_d_p_s[n * 18 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_d_s_s[n * 6 + 0]);
343 PRIM_INT__s_d_p_s[n * 18 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_d_s_s[(n+1) * 6 + 0], PRIM_INT__s_d_p_s[n * 18 + 0]);
344 PRIM_INT__s_d_p_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_d_p_s[n * 18 + 0]);
345
346 PRIM_INT__s_d_p_s[n * 18 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_d_s_s[n * 6 + 0]);
347 PRIM_INT__s_d_p_s[n * 18 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_d_s_s[(n+1) * 6 + 0], PRIM_INT__s_d_p_s[n * 18 + 1]);
348
349 PRIM_INT__s_d_p_s[n * 18 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_d_s_s[n * 6 + 0]);
350 PRIM_INT__s_d_p_s[n * 18 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 0], PRIM_INT__s_d_p_s[n * 18 + 2]);
351
352 PRIM_INT__s_d_p_s[n * 18 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_d_s_s[n * 6 + 1]);
353 PRIM_INT__s_d_p_s[n * 18 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_d_s_s[(n+1) * 6 + 1], PRIM_INT__s_d_p_s[n * 18 + 3]);
354 PRIM_INT__s_d_p_s[n * 18 + 3] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_p_s_s[(n+1) * 3 + 1], PRIM_INT__s_d_p_s[n * 18 + 3]);
355
356 PRIM_INT__s_d_p_s[n * 18 + 4] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_d_s_s[n * 6 + 1]);
357 PRIM_INT__s_d_p_s[n * 18 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_d_s_s[(n+1) * 6 + 1], PRIM_INT__s_d_p_s[n * 18 + 4]);
358 PRIM_INT__s_d_p_s[n * 18 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_d_p_s[n * 18 + 4]);
359
360 PRIM_INT__s_d_p_s[n * 18 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_d_s_s[n * 6 + 1]);
361 PRIM_INT__s_d_p_s[n * 18 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 1], PRIM_INT__s_d_p_s[n * 18 + 5]);
362
363 PRIM_INT__s_d_p_s[n * 18 + 6] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_d_s_s[n * 6 + 2]);
364 PRIM_INT__s_d_p_s[n * 18 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_d_s_s[(n+1) * 6 + 2], PRIM_INT__s_d_p_s[n * 18 + 6]);
365 PRIM_INT__s_d_p_s[n * 18 + 6] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_p_s_s[(n+1) * 3 + 2], PRIM_INT__s_d_p_s[n * 18 + 6]);
366
367 PRIM_INT__s_d_p_s[n * 18 + 7] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_d_s_s[n * 6 + 2]);
368 PRIM_INT__s_d_p_s[n * 18 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_d_s_s[(n+1) * 6 + 2], PRIM_INT__s_d_p_s[n * 18 + 7]);
369
370 PRIM_INT__s_d_p_s[n * 18 + 8] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_d_s_s[n * 6 + 2]);
371 PRIM_INT__s_d_p_s[n * 18 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 2], PRIM_INT__s_d_p_s[n * 18 + 8]);
372 PRIM_INT__s_d_p_s[n * 18 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_d_p_s[n * 18 + 8]);
373
374 PRIM_INT__s_d_p_s[n * 18 + 9] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_d_s_s[n * 6 + 3]);
375 PRIM_INT__s_d_p_s[n * 18 + 9] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_d_s_s[(n+1) * 6 + 3], PRIM_INT__s_d_p_s[n * 18 + 9]);
376
377 PRIM_INT__s_d_p_s[n * 18 + 10] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_d_s_s[n * 6 + 3]);
378 PRIM_INT__s_d_p_s[n * 18 + 10] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_d_s_s[(n+1) * 6 + 3], PRIM_INT__s_d_p_s[n * 18 + 10]);
379 PRIM_INT__s_d_p_s[n * 18 + 10] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_p_s_s[(n+1) * 3 + 1], PRIM_INT__s_d_p_s[n * 18 + 10]);
380
381 PRIM_INT__s_d_p_s[n * 18 + 11] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_d_s_s[n * 6 + 3]);
382 PRIM_INT__s_d_p_s[n * 18 + 11] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 3], PRIM_INT__s_d_p_s[n * 18 + 11]);
383
384 PRIM_INT__s_d_p_s[n * 18 + 12] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_d_s_s[n * 6 + 4]);
385 PRIM_INT__s_d_p_s[n * 18 + 12] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_d_s_s[(n+1) * 6 + 4], PRIM_INT__s_d_p_s[n * 18 + 12]);
386
387 PRIM_INT__s_d_p_s[n * 18 + 13] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_d_s_s[n * 6 + 4]);
388 PRIM_INT__s_d_p_s[n * 18 + 13] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_d_s_s[(n+1) * 6 + 4], PRIM_INT__s_d_p_s[n * 18 + 13]);
389 PRIM_INT__s_d_p_s[n * 18 + 13] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_p_s_s[(n+1) * 3 + 2], PRIM_INT__s_d_p_s[n * 18 + 13]);
390
391 PRIM_INT__s_d_p_s[n * 18 + 14] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_d_s_s[n * 6 + 4]);
392 PRIM_INT__s_d_p_s[n * 18 + 14] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 4], PRIM_INT__s_d_p_s[n * 18 + 14]);
393 PRIM_INT__s_d_p_s[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_p_s_s[(n+1) * 3 + 1], PRIM_INT__s_d_p_s[n * 18 + 14]);
394
395 PRIM_INT__s_d_p_s[n * 18 + 15] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_d_s_s[n * 6 + 5]);
396 PRIM_INT__s_d_p_s[n * 18 + 15] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_d_s_s[(n+1) * 6 + 5], PRIM_INT__s_d_p_s[n * 18 + 15]);
397
398 PRIM_INT__s_d_p_s[n * 18 + 16] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_d_s_s[n * 6 + 5]);
399 PRIM_INT__s_d_p_s[n * 18 + 16] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_d_s_s[(n+1) * 6 + 5], PRIM_INT__s_d_p_s[n * 18 + 16]);
400
401 PRIM_INT__s_d_p_s[n * 18 + 17] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_d_s_s[n * 6 + 5]);
402 PRIM_INT__s_d_p_s[n * 18 + 17] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_d_s_s[(n+1) * 6 + 5], PRIM_INT__s_d_p_s[n * 18 + 17]);
403 PRIM_INT__s_d_p_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_p_s_s[(n+1) * 3 + 2], PRIM_INT__s_d_p_s[n * 18 + 17]);
404
405 }
406
407
408 VRR_K_s_f_d_s(
409 PRIM_INT__s_f_d_s,
410 PRIM_INT__s_f_p_s,
411 PRIM_INT__s_f_s_s,
412 PRIM_INT__s_d_p_s,
413 Q_PA,
414 a_over_q,
415 aoq_PQ,
416 one_over_2pq,
417 one_over_2q,
418 2);
419
420
421
422 // Forming PRIM_INT__s_p_p_s[3 * 9];
423 for(n = 0; n < 3; ++n) // loop over orders of auxiliary function
424 {
425
426 PRIM_INT__s_p_p_s[n * 9 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_p_s_s[n * 3 + 0]);
427 PRIM_INT__s_p_p_s[n * 9 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_p_p_s[n * 9 + 0]);
428 PRIM_INT__s_p_p_s[n * 9 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_p_p_s[n * 9 + 0]);
429
430 PRIM_INT__s_p_p_s[n * 9 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_p_s_s[n * 3 + 0]);
431 PRIM_INT__s_p_p_s[n * 9 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_p_p_s[n * 9 + 1]);
432
433 PRIM_INT__s_p_p_s[n * 9 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_p_s_s[n * 3 + 0]);
434 PRIM_INT__s_p_p_s[n * 9 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_p_s_s[(n+1) * 3 + 0], PRIM_INT__s_p_p_s[n * 9 + 2]);
435
436 PRIM_INT__s_p_p_s[n * 9 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_p_s_s[n * 3 + 1]);
437 PRIM_INT__s_p_p_s[n * 9 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_p_s_s[(n+1) * 3 + 1], PRIM_INT__s_p_p_s[n * 9 + 3]);
438
439 PRIM_INT__s_p_p_s[n * 9 + 4] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_p_s_s[n * 3 + 1]);
440 PRIM_INT__s_p_p_s[n * 9 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_p_s_s[(n+1) * 3 + 1], PRIM_INT__s_p_p_s[n * 9 + 4]);
441 PRIM_INT__s_p_p_s[n * 9 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_p_p_s[n * 9 + 4]);
442
443 PRIM_INT__s_p_p_s[n * 9 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_p_s_s[n * 3 + 1]);
444 PRIM_INT__s_p_p_s[n * 9 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_p_s_s[(n+1) * 3 + 1], PRIM_INT__s_p_p_s[n * 9 + 5]);
445
446 PRIM_INT__s_p_p_s[n * 9 + 6] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_p_s_s[n * 3 + 2]);
447 PRIM_INT__s_p_p_s[n * 9 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_p_s_s[(n+1) * 3 + 2], PRIM_INT__s_p_p_s[n * 9 + 6]);
448
449 PRIM_INT__s_p_p_s[n * 9 + 7] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_p_s_s[n * 3 + 2]);
450 PRIM_INT__s_p_p_s[n * 9 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_p_s_s[(n+1) * 3 + 2], PRIM_INT__s_p_p_s[n * 9 + 7]);
451
452 PRIM_INT__s_p_p_s[n * 9 + 8] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_p_s_s[n * 3 + 2]);
453 PRIM_INT__s_p_p_s[n * 9 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_p_s_s[(n+1) * 3 + 2], PRIM_INT__s_p_p_s[n * 9 + 8]);
454 PRIM_INT__s_p_p_s[n * 9 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_p_p_s[n * 9 + 8]);
455
456 }
457
458
459 VRR_K_s_d_d_s(
460 PRIM_INT__s_d_d_s,
461 PRIM_INT__s_d_p_s,
462 PRIM_INT__s_d_s_s,
463 PRIM_INT__s_p_p_s,
464 Q_PA,
465 a_over_q,
466 aoq_PQ,
467 one_over_2pq,
468 one_over_2q,
469 2);
470
471
472 ostei_general_vrr_K(0, 3, 3, 0, 1,
473 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
474 PRIM_INT__s_f_d_s, PRIM_INT__s_f_p_s, NULL, NULL, PRIM_INT__s_d_d_s, PRIM_INT__s_f_f_s);
475
476
477 VRR_J_s_g_s_s(
478 PRIM_INT__s_g_s_s,
479 PRIM_INT__s_f_s_s,
480 PRIM_INT__s_d_s_s,
481 P_PB,
482 a_over_p,
483 aop_PQ,
484 one_over_2p,
485 5);
486
487
488 VRR_K_s_g_p_s(
489 PRIM_INT__s_g_p_s,
490 PRIM_INT__s_g_s_s,
491 PRIM_INT__s_f_s_s,
492 Q_PA,
493 aoq_PQ,
494 one_over_2pq,
495 3);
496
497
498 ostei_general_vrr_K(0, 4, 2, 0, 2,
499 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
500 PRIM_INT__s_g_p_s, PRIM_INT__s_g_s_s, NULL, NULL, PRIM_INT__s_f_p_s, PRIM_INT__s_g_d_s);
501
502
503 ostei_general_vrr_K(0, 4, 3, 0, 1,
504 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
505 PRIM_INT__s_g_d_s, PRIM_INT__s_g_p_s, NULL, NULL, PRIM_INT__s_f_d_s, PRIM_INT__s_g_f_s);
506
507
508 VRR_J_s_h_s_s(
509 PRIM_INT__s_h_s_s,
510 PRIM_INT__s_g_s_s,
511 PRIM_INT__s_f_s_s,
512 P_PB,
513 a_over_p,
514 aop_PQ,
515 one_over_2p,
516 4);
517
518
519 ostei_general_vrr_K(0, 5, 1, 0, 3,
520 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
521 PRIM_INT__s_h_s_s, NULL, NULL, NULL, PRIM_INT__s_g_s_s, PRIM_INT__s_h_p_s);
522
523
524 ostei_general_vrr_K(0, 5, 2, 0, 2,
525 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
526 PRIM_INT__s_h_p_s, PRIM_INT__s_h_s_s, NULL, NULL, PRIM_INT__s_g_p_s, PRIM_INT__s_h_d_s);
527
528
529 ostei_general_vrr_K(0, 5, 3, 0, 1,
530 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
531 PRIM_INT__s_h_d_s, PRIM_INT__s_h_p_s, NULL, NULL, PRIM_INT__s_g_d_s, PRIM_INT__s_h_f_s);
532
533
534
535
536 ////////////////////////////////////
537 // Accumulate contracted integrals
538 ////////////////////////////////////
539 if(lastoffset == 0)
540 {
541 contract_all(100, PRIM_INT__s_f_f_s, PRIM_PTR_INT__s_f_f_s);
542 contract_all(150, PRIM_INT__s_g_f_s, PRIM_PTR_INT__s_g_f_s);
543 contract_all(210, PRIM_INT__s_h_f_s, PRIM_PTR_INT__s_h_f_s);
544 }
545 else
546 {
547 contract(100, shelloffsets, PRIM_INT__s_f_f_s, PRIM_PTR_INT__s_f_f_s);
548 contract(150, shelloffsets, PRIM_INT__s_g_f_s, PRIM_PTR_INT__s_g_f_s);
549 contract(210, shelloffsets, PRIM_INT__s_h_f_s, PRIM_PTR_INT__s_h_f_s);
550 PRIM_PTR_INT__s_f_f_s += lastoffset*100;
551 PRIM_PTR_INT__s_g_f_s += lastoffset*150;
552 PRIM_PTR_INT__s_h_f_s += lastoffset*210;
553 }
554
555 } // close loop over j
556 } // close loop over i
557
558 //Advance to the next batch
559 jstart = SIMINT_SIMD_ROUND(jend);
560
561 //////////////////////////////////////////////
562 // Contracted integrals: Horizontal recurrance
563 //////////////////////////////////////////////
564
565
566 const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
567
568
569 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
570 {
571
572 // set up HRR pointers
573 double const * restrict HRR_INT__s_f_f_s = INT__s_f_f_s + abcd * 100;
574 double const * restrict HRR_INT__s_g_f_s = INT__s_g_f_s + abcd * 150;
575 double const * restrict HRR_INT__s_h_f_s = INT__s_h_f_s + abcd * 210;
576 double * restrict HRR_INT__d_f_f_s = INT__d_f_f_s + real_abcd * 600;
577
578 // form INT__p_f_f_s
579 HRR_I_p_f(
580 HRR_INT__p_f_f_s,
581 HRR_INT__s_f_f_s,
582 HRR_INT__s_g_f_s,
583 hAB, 10);
584
585 // form INT__p_g_f_s
586 HRR_I_p_g(
587 HRR_INT__p_g_f_s,
588 HRR_INT__s_g_f_s,
589 HRR_INT__s_h_f_s,
590 hAB, 10);
591
592 // form INT__d_f_f_s
593 HRR_I_d_f(
594 HRR_INT__d_f_f_s,
595 HRR_INT__p_f_f_s,
596 HRR_INT__p_g_f_s,
597 hAB, 10);
598
599
600 } // close HRR loop
601
602
603 } // close loop cdbatch
604
605 istart = iend;
606 } // close loop over ab
607
608 return P.nshell12_clip * Q.nshell12_clip;
609 }
610
611