1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_f_f_s_d(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__f_f_s_d)8 int ostei_f_f_s_d(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__f_f_s_d)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__f_f_s_d);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int iket;
26
27 // partition workspace
28 double * const INT__f_s_s_d = work + (SIMINT_NSHELL_SIMD * 0);
29 double * const INT__g_s_s_d = work + (SIMINT_NSHELL_SIMD * 60);
30 double * const INT__h_s_s_d = work + (SIMINT_NSHELL_SIMD * 150);
31 double * const INT__i_s_s_d = work + (SIMINT_NSHELL_SIMD * 276);
32 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*444);
33 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
34 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 9;
35 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 33;
36 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_p = primwork + 75;
37 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 111;
38 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_p = primwork + 171;
39 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_d = primwork + 231;
40 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_s = primwork + 291;
41 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_p = primwork + 366;
42 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_d = primwork + 456;
43 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_s_s = primwork + 546;
44 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_s_p = primwork + 630;
45 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_s_d = primwork + 756;
46 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_s_s = primwork + 882;
47 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_s_p = primwork + 966;
48 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_s_d = primwork + 1134;
49 double * const hrrwork = (double *)(primwork + 1302);
50 double * const HRR_INT__f_p_s_d = hrrwork + 0;
51 double * const HRR_INT__f_d_s_d = hrrwork + 180;
52 double * const HRR_INT__g_p_s_d = hrrwork + 540;
53 double * const HRR_INT__g_d_s_d = hrrwork + 810;
54 double * const HRR_INT__h_p_s_d = hrrwork + 1350;
55
56
57 // Create constants
58 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
59 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
60 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
61 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
62 const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
63 const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
64 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
65
66
67 ////////////////////////////////////////
68 // Loop over shells and primitives
69 ////////////////////////////////////////
70
71 real_abcd = 0;
72 istart = 0;
73 for(ab = 0; ab < P.nshell12_clip; ++ab)
74 {
75 const int iend = istart + P.nprim12[ab];
76
77 cd = 0;
78 jstart = 0;
79
80 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
81 {
82 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
83 int jend = jstart;
84 for(i = 0; i < nshellbatch; i++)
85 jend += Q.nprim12[cd+i];
86
87 // Clear the beginning of the workspace (where we are accumulating integrals)
88 memset(work, 0, SIMINT_NSHELL_SIMD * 444 * sizeof(double));
89 abcd = 0;
90
91
92 for(i = istart; i < iend; ++i)
93 {
94 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
95
96 if(check_screen)
97 {
98 // Skip this whole thing if always insignificant
99 if((P.screen[i] * Q.screen_max) < screen_tol)
100 continue;
101 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
102 }
103
104 icd = 0;
105 iprimcd = 0;
106 nprim_icd = Q.nprim12[cd];
107 double * restrict PRIM_PTR_INT__f_s_s_d = INT__f_s_s_d + abcd * 60;
108 double * restrict PRIM_PTR_INT__g_s_s_d = INT__g_s_s_d + abcd * 90;
109 double * restrict PRIM_PTR_INT__h_s_s_d = INT__h_s_s_d + abcd * 126;
110 double * restrict PRIM_PTR_INT__i_s_s_d = INT__i_s_s_d + abcd * 168;
111
112
113
114 // Load these one per loop over i
115 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
116 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
117 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
118
119 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
120
121 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
122 {
123 // calculate the shell offsets
124 // these are the offset from the shell pointed to by cd
125 // for each element
126 int shelloffsets[SIMINT_SIMD_LEN] = {0};
127 int lastoffset = 0;
128 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
129
130 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
131 {
132 // Handle if the first element of the vector is a new shell
133 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
134 {
135 nprim_icd += Q.nprim12[cd + (++icd)];
136 PRIM_PTR_INT__f_s_s_d += 60;
137 PRIM_PTR_INT__g_s_s_d += 90;
138 PRIM_PTR_INT__h_s_s_d += 126;
139 PRIM_PTR_INT__i_s_s_d += 168;
140 }
141 iprimcd++;
142 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
143 {
144 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
145 {
146 shelloffsets[n] = shelloffsets[n-1] + 1;
147 lastoffset++;
148 nprim_icd += Q.nprim12[cd + (++icd)];
149 }
150 else
151 shelloffsets[n] = shelloffsets[n-1];
152 iprimcd++;
153 }
154 }
155 else
156 iprimcd += SIMINT_SIMD_LEN;
157
158 // Do we have to compute this vector (or has it been screened out)?
159 // (not_screened != 0 means we have to do this vector)
160 if(check_screen)
161 {
162 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
163 if(vmax < screen_tol)
164 {
165 PRIM_PTR_INT__f_s_s_d += lastoffset*60;
166 PRIM_PTR_INT__g_s_s_d += lastoffset*90;
167 PRIM_PTR_INT__h_s_s_d += lastoffset*126;
168 PRIM_PTR_INT__i_s_s_d += lastoffset*168;
169 continue;
170 }
171 }
172
173 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
174 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
175 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
176 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
177
178
179 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
180 SIMINT_DBLTYPE PQ[3];
181 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
182 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
183 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
184 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
185 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
186 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
187
188 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
189 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
190 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
191 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
192 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
193 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
194 const SIMINT_DBLTYPE Q_PB[3] = { SIMINT_DBLLOAD(Q.PB_x, j), SIMINT_DBLLOAD(Q.PB_y, j), SIMINT_DBLLOAD(Q.PB_z, j) };
195
196 // NOTE: Minus sign!
197 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
198 SIMINT_DBLTYPE aop_PQ[3];
199 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
200 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
201 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
202
203 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
204 SIMINT_DBLTYPE aoq_PQ[3];
205 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
206 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
207 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
208 // Put a minus sign here so we don't have to in RR routines
209 a_over_q = SIMINT_NEG(a_over_q);
210
211
212 //////////////////////////////////////////////
213 // Fjt function section
214 // Maximum v value: 8
215 //////////////////////////////////////////////
216 // The parameter to the Fjt function
217 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
218
219
220 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
221
222
223 boys_F_split(PRIM_INT__s_s_s_s, F_x, 8);
224 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
225 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
226 for(n = 0; n <= 8; n++)
227 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
228
229 //////////////////////////////////////////////
230 // Primitive integrals: Vertical recurrance
231 //////////////////////////////////////////////
232
233 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
234 const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
235 const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
236 const SIMINT_DBLTYPE vrr_const_4_over_2p = SIMINT_MUL(const_4, one_over_2p);
237 const SIMINT_DBLTYPE vrr_const_5_over_2p = SIMINT_MUL(const_5, one_over_2p);
238 const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
239 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
240 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
241 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
242 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
243 const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
244 const SIMINT_DBLTYPE vrr_const_6_over_2pq = SIMINT_MUL(const_6, one_over_2pq);
245
246
247
248 // Forming PRIM_INT__p_s_s_s[8 * 3];
249 for(n = 0; n < 8; ++n) // loop over orders of auxiliary function
250 {
251
252 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
253 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
254
255 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
256 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
257
258 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
259 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
260
261 }
262
263
264
265 // Forming PRIM_INT__d_s_s_s[7 * 6];
266 for(n = 0; n < 7; ++n) // loop over orders of auxiliary function
267 {
268
269 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
270 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
271 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
272
273 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
274 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 1]);
275
276 PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
277 PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 2]);
278
279 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
280 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
281 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
282
283 PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
284 PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 4]);
285
286 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
287 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
288 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
289
290 }
291
292
293
294 // Forming PRIM_INT__f_s_s_s[6 * 10];
295 for(n = 0; n < 6; ++n) // loop over orders of auxiliary function
296 {
297
298 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
299 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
300 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
301
302 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
303 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 1]);
304
305 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
306 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 2]);
307
308 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
309 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 3]);
310
311 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
312 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__f_s_s_s[n * 10 + 4]);
313
314 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
315 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 5]);
316
317 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
318 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
319 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
320
321 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
322 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 7]);
323
324 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
325 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 8]);
326
327 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
328 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
329 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
330
331 }
332
333
334 VRR_L_f_s_s_p(
335 PRIM_INT__f_s_s_p,
336 PRIM_INT__f_s_s_s,
337 PRIM_INT__d_s_s_s,
338 Q_PB,
339 aoq_PQ,
340 one_over_2pq,
341 2);
342
343
344
345 // Forming PRIM_INT__d_s_s_p[2 * 18];
346 for(n = 0; n < 2; ++n) // loop over orders of auxiliary function
347 {
348
349 PRIM_INT__d_s_s_p[n * 18 + 0] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
350 PRIM_INT__d_s_s_p[n * 18 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_s_p[n * 18 + 0]);
351 PRIM_INT__d_s_s_p[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_p[n * 18 + 0]);
352
353 PRIM_INT__d_s_s_p[n * 18 + 1] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
354 PRIM_INT__d_s_s_p[n * 18 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_s_p[n * 18 + 1]);
355
356 PRIM_INT__d_s_s_p[n * 18 + 2] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
357 PRIM_INT__d_s_s_p[n * 18 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_s_p[n * 18 + 2]);
358
359 PRIM_INT__d_s_s_p[n * 18 + 3] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 1]);
360 PRIM_INT__d_s_s_p[n * 18 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_s_p[n * 18 + 3]);
361 PRIM_INT__d_s_s_p[n * 18 + 3] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_p[n * 18 + 3]);
362
363 PRIM_INT__d_s_s_p[n * 18 + 4] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 1]);
364 PRIM_INT__d_s_s_p[n * 18 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_s_p[n * 18 + 4]);
365 PRIM_INT__d_s_s_p[n * 18 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_p[n * 18 + 4]);
366
367 PRIM_INT__d_s_s_p[n * 18 + 5] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
368 PRIM_INT__d_s_s_p[n * 18 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_s_p[n * 18 + 5]);
369
370 PRIM_INT__d_s_s_p[n * 18 + 6] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 2]);
371 PRIM_INT__d_s_s_p[n * 18 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_s_p[n * 18 + 6]);
372 PRIM_INT__d_s_s_p[n * 18 + 6] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_p[n * 18 + 6]);
373
374 PRIM_INT__d_s_s_p[n * 18 + 7] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 2]);
375 PRIM_INT__d_s_s_p[n * 18 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_s_p[n * 18 + 7]);
376
377 PRIM_INT__d_s_s_p[n * 18 + 8] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 2]);
378 PRIM_INT__d_s_s_p[n * 18 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_s_p[n * 18 + 8]);
379 PRIM_INT__d_s_s_p[n * 18 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_p[n * 18 + 8]);
380
381 PRIM_INT__d_s_s_p[n * 18 + 9] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
382 PRIM_INT__d_s_s_p[n * 18 + 9] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_s_p[n * 18 + 9]);
383
384 PRIM_INT__d_s_s_p[n * 18 + 10] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
385 PRIM_INT__d_s_s_p[n * 18 + 10] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_s_p[n * 18 + 10]);
386 PRIM_INT__d_s_s_p[n * 18 + 10] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_p[n * 18 + 10]);
387
388 PRIM_INT__d_s_s_p[n * 18 + 11] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
389 PRIM_INT__d_s_s_p[n * 18 + 11] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_s_p[n * 18 + 11]);
390
391 PRIM_INT__d_s_s_p[n * 18 + 12] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 4]);
392 PRIM_INT__d_s_s_p[n * 18 + 12] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_s_p[n * 18 + 12]);
393
394 PRIM_INT__d_s_s_p[n * 18 + 13] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 4]);
395 PRIM_INT__d_s_s_p[n * 18 + 13] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_s_p[n * 18 + 13]);
396 PRIM_INT__d_s_s_p[n * 18 + 13] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_p[n * 18 + 13]);
397
398 PRIM_INT__d_s_s_p[n * 18 + 14] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 4]);
399 PRIM_INT__d_s_s_p[n * 18 + 14] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_s_p[n * 18 + 14]);
400 PRIM_INT__d_s_s_p[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_p[n * 18 + 14]);
401
402 PRIM_INT__d_s_s_p[n * 18 + 15] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
403 PRIM_INT__d_s_s_p[n * 18 + 15] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_s_p[n * 18 + 15]);
404
405 PRIM_INT__d_s_s_p[n * 18 + 16] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
406 PRIM_INT__d_s_s_p[n * 18 + 16] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_s_p[n * 18 + 16]);
407
408 PRIM_INT__d_s_s_p[n * 18 + 17] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
409 PRIM_INT__d_s_s_p[n * 18 + 17] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_s_p[n * 18 + 17]);
410 PRIM_INT__d_s_s_p[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_p[n * 18 + 17]);
411
412 }
413
414
415 VRR_L_f_s_s_d(
416 PRIM_INT__f_s_s_d,
417 PRIM_INT__f_s_s_p,
418 PRIM_INT__f_s_s_s,
419 PRIM_INT__d_s_s_p,
420 Q_PB,
421 a_over_q,
422 aoq_PQ,
423 one_over_2pq,
424 one_over_2q,
425 1);
426
427
428 VRR_I_g_s_s_s(
429 PRIM_INT__g_s_s_s,
430 PRIM_INT__f_s_s_s,
431 PRIM_INT__d_s_s_s,
432 P_PA,
433 a_over_p,
434 aop_PQ,
435 one_over_2p,
436 5);
437
438
439 VRR_L_g_s_s_p(
440 PRIM_INT__g_s_s_p,
441 PRIM_INT__g_s_s_s,
442 PRIM_INT__f_s_s_s,
443 Q_PB,
444 aoq_PQ,
445 one_over_2pq,
446 2);
447
448
449 ostei_general_vrr_L(4, 0, 0, 2, 1,
450 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
451 PRIM_INT__g_s_s_p, NULL, PRIM_INT__g_s_s_s, PRIM_INT__f_s_s_p, NULL, PRIM_INT__g_s_s_d);
452
453
454 VRR_I_h_s_s_s(
455 PRIM_INT__h_s_s_s,
456 PRIM_INT__g_s_s_s,
457 PRIM_INT__f_s_s_s,
458 P_PA,
459 a_over_p,
460 aop_PQ,
461 one_over_2p,
462 4);
463
464
465 ostei_general_vrr_L(5, 0, 0, 1, 2,
466 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
467 PRIM_INT__h_s_s_s, NULL, NULL, PRIM_INT__g_s_s_s, NULL, PRIM_INT__h_s_s_p);
468
469
470 ostei_general_vrr_L(5, 0, 0, 2, 1,
471 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
472 PRIM_INT__h_s_s_p, NULL, PRIM_INT__h_s_s_s, PRIM_INT__g_s_s_p, NULL, PRIM_INT__h_s_s_d);
473
474
475 ostei_general_vrr1_I(6, 3,
476 one_over_2p, a_over_p, aop_PQ, P_PA,
477 PRIM_INT__h_s_s_s, PRIM_INT__g_s_s_s, PRIM_INT__i_s_s_s);
478
479
480 ostei_general_vrr_L(6, 0, 0, 1, 2,
481 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
482 PRIM_INT__i_s_s_s, NULL, NULL, PRIM_INT__h_s_s_s, NULL, PRIM_INT__i_s_s_p);
483
484
485 ostei_general_vrr_L(6, 0, 0, 2, 1,
486 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
487 PRIM_INT__i_s_s_p, NULL, PRIM_INT__i_s_s_s, PRIM_INT__h_s_s_p, NULL, PRIM_INT__i_s_s_d);
488
489
490
491
492 ////////////////////////////////////
493 // Accumulate contracted integrals
494 ////////////////////////////////////
495 if(lastoffset == 0)
496 {
497 contract_all(60, PRIM_INT__f_s_s_d, PRIM_PTR_INT__f_s_s_d);
498 contract_all(90, PRIM_INT__g_s_s_d, PRIM_PTR_INT__g_s_s_d);
499 contract_all(126, PRIM_INT__h_s_s_d, PRIM_PTR_INT__h_s_s_d);
500 contract_all(168, PRIM_INT__i_s_s_d, PRIM_PTR_INT__i_s_s_d);
501 }
502 else
503 {
504 contract(60, shelloffsets, PRIM_INT__f_s_s_d, PRIM_PTR_INT__f_s_s_d);
505 contract(90, shelloffsets, PRIM_INT__g_s_s_d, PRIM_PTR_INT__g_s_s_d);
506 contract(126, shelloffsets, PRIM_INT__h_s_s_d, PRIM_PTR_INT__h_s_s_d);
507 contract(168, shelloffsets, PRIM_INT__i_s_s_d, PRIM_PTR_INT__i_s_s_d);
508 PRIM_PTR_INT__f_s_s_d += lastoffset*60;
509 PRIM_PTR_INT__g_s_s_d += lastoffset*90;
510 PRIM_PTR_INT__h_s_s_d += lastoffset*126;
511 PRIM_PTR_INT__i_s_s_d += lastoffset*168;
512 }
513
514 } // close loop over j
515 } // close loop over i
516
517 //Advance to the next batch
518 jstart = SIMINT_SIMD_ROUND(jend);
519
520 //////////////////////////////////////////////
521 // Contracted integrals: Horizontal recurrance
522 //////////////////////////////////////////////
523
524
525 const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
526
527
528 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
529 {
530
531 // set up HRR pointers
532 double const * restrict HRR_INT__f_s_s_d = INT__f_s_s_d + abcd * 60;
533 double const * restrict HRR_INT__g_s_s_d = INT__g_s_s_d + abcd * 90;
534 double const * restrict HRR_INT__h_s_s_d = INT__h_s_s_d + abcd * 126;
535 double const * restrict HRR_INT__i_s_s_d = INT__i_s_s_d + abcd * 168;
536 double * restrict HRR_INT__f_f_s_d = INT__f_f_s_d + real_abcd * 600;
537
538 // form INT__f_p_s_d
539 HRR_J_f_p(
540 HRR_INT__f_p_s_d,
541 HRR_INT__f_s_s_d,
542 HRR_INT__g_s_s_d,
543 hAB, 6);
544
545 // form INT__g_p_s_d
546 HRR_J_g_p(
547 HRR_INT__g_p_s_d,
548 HRR_INT__g_s_s_d,
549 HRR_INT__h_s_s_d,
550 hAB, 6);
551
552 // form INT__h_p_s_d
553 ostei_general_hrr_J(5, 1, 0, 2, hAB, HRR_INT__i_s_s_d, HRR_INT__h_s_s_d, HRR_INT__h_p_s_d);
554
555 // form INT__f_d_s_d
556 HRR_J_f_d(
557 HRR_INT__f_d_s_d,
558 HRR_INT__f_p_s_d,
559 HRR_INT__g_p_s_d,
560 hAB, 6);
561
562 // form INT__g_d_s_d
563 ostei_general_hrr_J(4, 2, 0, 2, hAB, HRR_INT__h_p_s_d, HRR_INT__g_p_s_d, HRR_INT__g_d_s_d);
564
565 // form INT__f_f_s_d
566 ostei_general_hrr_J(3, 3, 0, 2, hAB, HRR_INT__g_d_s_d, HRR_INT__f_d_s_d, HRR_INT__f_f_s_d);
567
568
569 } // close HRR loop
570
571
572 } // close loop cdbatch
573
574 istart = iend;
575 } // close loop over ab
576
577 return P.nshell12_clip * Q.nshell12_clip;
578 }
579
580