1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_d_s_d_d(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_s_d_d)8 int ostei_d_s_d_d(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__d_s_d_d)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__d_s_d_d);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int ibra;
26
27 // partition workspace
28 double * const INT__d_s_d_s = work + (SIMINT_NSHELL_SIMD * 0);
29 double * const INT__d_s_f_s = work + (SIMINT_NSHELL_SIMD * 36);
30 double * const INT__d_s_g_s = work + (SIMINT_NSHELL_SIMD * 96);
31 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*186);
32 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
33 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_p_s = primwork + 7;
34 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_d_s = primwork + 25;
35 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_f_s = primwork + 55;
36 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_g_s = primwork + 95;
37 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_p_s = primwork + 140;
38 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_d_s = primwork + 158;
39 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_f_s = primwork + 194;
40 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_g_s = primwork + 254;
41 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_d_s = primwork + 344;
42 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_f_s = primwork + 380;
43 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_g_s = primwork + 440;
44 double * const hrrwork = (double *)(primwork + 530);
45 double * const HRR_INT__d_s_d_p = hrrwork + 0;
46 double * const HRR_INT__d_s_f_p = hrrwork + 108;
47
48
49 // Create constants
50 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
51 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
52 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
53 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
54 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
55
56
57 ////////////////////////////////////////
58 // Loop over shells and primitives
59 ////////////////////////////////////////
60
61 real_abcd = 0;
62 istart = 0;
63 for(ab = 0; ab < P.nshell12_clip; ++ab)
64 {
65 const int iend = istart + P.nprim12[ab];
66
67 cd = 0;
68 jstart = 0;
69
70 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
71 {
72 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
73 int jend = jstart;
74 for(i = 0; i < nshellbatch; i++)
75 jend += Q.nprim12[cd+i];
76
77 // Clear the beginning of the workspace (where we are accumulating integrals)
78 memset(work, 0, SIMINT_NSHELL_SIMD * 186 * sizeof(double));
79 abcd = 0;
80
81
82 for(i = istart; i < iend; ++i)
83 {
84 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
85
86 if(check_screen)
87 {
88 // Skip this whole thing if always insignificant
89 if((P.screen[i] * Q.screen_max) < screen_tol)
90 continue;
91 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
92 }
93
94 icd = 0;
95 iprimcd = 0;
96 nprim_icd = Q.nprim12[cd];
97 double * restrict PRIM_PTR_INT__d_s_d_s = INT__d_s_d_s + abcd * 36;
98 double * restrict PRIM_PTR_INT__d_s_f_s = INT__d_s_f_s + abcd * 60;
99 double * restrict PRIM_PTR_INT__d_s_g_s = INT__d_s_g_s + abcd * 90;
100
101
102
103 // Load these one per loop over i
104 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
105 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
106 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
107
108 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
109
110 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
111 {
112 // calculate the shell offsets
113 // these are the offset from the shell pointed to by cd
114 // for each element
115 int shelloffsets[SIMINT_SIMD_LEN] = {0};
116 int lastoffset = 0;
117 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
118
119 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
120 {
121 // Handle if the first element of the vector is a new shell
122 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
123 {
124 nprim_icd += Q.nprim12[cd + (++icd)];
125 PRIM_PTR_INT__d_s_d_s += 36;
126 PRIM_PTR_INT__d_s_f_s += 60;
127 PRIM_PTR_INT__d_s_g_s += 90;
128 }
129 iprimcd++;
130 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
131 {
132 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
133 {
134 shelloffsets[n] = shelloffsets[n-1] + 1;
135 lastoffset++;
136 nprim_icd += Q.nprim12[cd + (++icd)];
137 }
138 else
139 shelloffsets[n] = shelloffsets[n-1];
140 iprimcd++;
141 }
142 }
143 else
144 iprimcd += SIMINT_SIMD_LEN;
145
146 // Do we have to compute this vector (or has it been screened out)?
147 // (not_screened != 0 means we have to do this vector)
148 if(check_screen)
149 {
150 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
151 if(vmax < screen_tol)
152 {
153 PRIM_PTR_INT__d_s_d_s += lastoffset*36;
154 PRIM_PTR_INT__d_s_f_s += lastoffset*60;
155 PRIM_PTR_INT__d_s_g_s += lastoffset*90;
156 continue;
157 }
158 }
159
160 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
161 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
162 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
163 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
164
165
166 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
167 SIMINT_DBLTYPE PQ[3];
168 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
169 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
170 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
171 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
172 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
173 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
174
175 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
176 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
177 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
178 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
179 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
180 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
181 const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
182
183 // NOTE: Minus sign!
184 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
185 SIMINT_DBLTYPE aop_PQ[3];
186 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
187 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
188 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
189
190 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
191 SIMINT_DBLTYPE aoq_PQ[3];
192 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
193 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
194 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
195 // Put a minus sign here so we don't have to in RR routines
196 a_over_q = SIMINT_NEG(a_over_q);
197
198
199 //////////////////////////////////////////////
200 // Fjt function section
201 // Maximum v value: 6
202 //////////////////////////////////////////////
203 // The parameter to the Fjt function
204 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
205
206
207 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
208
209
210 boys_F_split(PRIM_INT__s_s_s_s, F_x, 6);
211 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
212 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
213 for(n = 0; n <= 6; n++)
214 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
215
216 //////////////////////////////////////////////
217 // Primitive integrals: Vertical recurrance
218 //////////////////////////////////////////////
219
220 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
221 const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
222 const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
223 const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
224 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
225 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
226 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
227 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
228
229
230
231 // Forming PRIM_INT__s_s_p_s[6 * 3];
232 for(n = 0; n < 6; ++n) // loop over orders of auxiliary function
233 {
234
235 PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
236 PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]);
237
238 PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
239 PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 1]);
240
241 PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
242 PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 2]);
243
244 }
245
246
247
248 // Forming PRIM_INT__s_s_d_s[5 * 6];
249 for(n = 0; n < 5; ++n) // loop over orders of auxiliary function
250 {
251
252 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
253 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 0]);
254 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 0]);
255
256 PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
257 PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 1]);
258
259 PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 0]);
260 PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 2]);
261
262 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
263 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 3]);
264 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 3]);
265
266 PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 1]);
267 PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 4]);
268
269 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
270 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_d_s[n * 6 + 5]);
271 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 5]);
272
273 }
274
275
276
277 // Forming PRIM_INT__p_s_d_s[2 * 18];
278 for(n = 0; n < 2; ++n) // loop over orders of auxiliary function
279 {
280
281 PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
282 PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
283 PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
284
285 PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 1]);
286 PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 1]);
287 PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 1]);
288
289 PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 2]);
290 PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 2]);
291 PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 2]);
292
293 PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
294 PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 3]);
295
296 PRIM_INT__p_s_d_s[n * 18 + 4] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 4]);
297 PRIM_INT__p_s_d_s[n * 18 + 4] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 4]);
298
299 PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
300 PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 5]);
301
302 PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
303 PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 6]);
304
305 PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 1]);
306 PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 7]);
307 PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 7]);
308
309 PRIM_INT__p_s_d_s[n * 18 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 2]);
310 PRIM_INT__p_s_d_s[n * 18 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 8]);
311
312 PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
313 PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 9]);
314 PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 9]);
315
316 PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 4]);
317 PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 10]);
318 PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 10]);
319
320 PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
321 PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 11]);
322
323 PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
324 PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 12]);
325
326 PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
327 PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 13]);
328
329 PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 2]);
330 PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 14]);
331 PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 14]);
332
333 PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
334 PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 15]);
335
336 PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 4]);
337 PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 16]);
338 PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 16]);
339
340 PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
341 PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 17]);
342 PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 17]);
343
344 }
345
346
347
348 // Forming PRIM_INT__p_s_p_s[2 * 9];
349 for(n = 0; n < 2; ++n) // loop over orders of auxiliary function
350 {
351
352 PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
353 PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
354 PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
355
356 PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_p_s[n * 3 + 1]);
357 PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 1]);
358
359 PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_p_s[n * 3 + 2]);
360 PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 2]);
361
362 PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
363 PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 3]);
364
365 PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
366 PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 4]);
367 PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 4]);
368
369 PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_p_s[n * 3 + 2]);
370 PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 5]);
371
372 PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_p_s[n * 3 + 0]);
373 PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 6]);
374
375 PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_p_s[n * 3 + 1]);
376 PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 7]);
377
378 PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
379 PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 8]);
380 PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 8]);
381
382 }
383
384
385 VRR_I_d_s_d_s(
386 PRIM_INT__d_s_d_s,
387 PRIM_INT__p_s_d_s,
388 PRIM_INT__s_s_d_s,
389 PRIM_INT__p_s_p_s,
390 P_PA,
391 a_over_p,
392 aop_PQ,
393 one_over_2p,
394 one_over_2pq,
395 1);
396
397
398
399 // Forming PRIM_INT__s_s_f_s[4 * 10];
400 for(n = 0; n < 4; ++n) // loop over orders of auxiliary function
401 {
402
403 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
404 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 0]);
405 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]), PRIM_INT__s_s_f_s[n * 10 + 0]);
406
407 PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
408 PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 1]);
409
410 PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
411 PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 2]);
412
413 PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
414 PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 3]);
415
416 PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
417 PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_s_f_s[n * 10 + 4]);
418
419 PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
420 PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 5]);
421
422 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
423 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 6]);
424 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_p_s[n * 3 + 1]), PRIM_INT__s_s_f_s[n * 10 + 6]);
425
426 PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
427 PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 7]);
428
429 PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
430 PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 8]);
431
432 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
433 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 9]);
434 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_p_s[n * 3 + 2]), PRIM_INT__s_s_f_s[n * 10 + 9]);
435
436 }
437
438
439 VRR_I_p_s_f_s(
440 PRIM_INT__p_s_f_s,
441 PRIM_INT__s_s_f_s,
442 PRIM_INT__s_s_d_s,
443 P_PA,
444 aop_PQ,
445 one_over_2pq,
446 2);
447
448
449 VRR_I_d_s_f_s(
450 PRIM_INT__d_s_f_s,
451 PRIM_INT__p_s_f_s,
452 PRIM_INT__s_s_f_s,
453 PRIM_INT__p_s_d_s,
454 P_PA,
455 a_over_p,
456 aop_PQ,
457 one_over_2p,
458 one_over_2pq,
459 1);
460
461
462 VRR_K_s_s_g_s(
463 PRIM_INT__s_s_g_s,
464 PRIM_INT__s_s_f_s,
465 PRIM_INT__s_s_d_s,
466 Q_PA,
467 a_over_q,
468 aoq_PQ,
469 one_over_2q,
470 3);
471
472
473 VRR_I_p_s_g_s(
474 PRIM_INT__p_s_g_s,
475 PRIM_INT__s_s_g_s,
476 PRIM_INT__s_s_f_s,
477 P_PA,
478 aop_PQ,
479 one_over_2pq,
480 2);
481
482
483 ostei_general_vrr_I(2, 0, 4, 0, 1,
484 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
485 PRIM_INT__p_s_g_s, PRIM_INT__s_s_g_s, NULL, PRIM_INT__p_s_f_s, NULL, PRIM_INT__d_s_g_s);
486
487
488
489
490 ////////////////////////////////////
491 // Accumulate contracted integrals
492 ////////////////////////////////////
493 if(lastoffset == 0)
494 {
495 contract_all(36, PRIM_INT__d_s_d_s, PRIM_PTR_INT__d_s_d_s);
496 contract_all(60, PRIM_INT__d_s_f_s, PRIM_PTR_INT__d_s_f_s);
497 contract_all(90, PRIM_INT__d_s_g_s, PRIM_PTR_INT__d_s_g_s);
498 }
499 else
500 {
501 contract(36, shelloffsets, PRIM_INT__d_s_d_s, PRIM_PTR_INT__d_s_d_s);
502 contract(60, shelloffsets, PRIM_INT__d_s_f_s, PRIM_PTR_INT__d_s_f_s);
503 contract(90, shelloffsets, PRIM_INT__d_s_g_s, PRIM_PTR_INT__d_s_g_s);
504 PRIM_PTR_INT__d_s_d_s += lastoffset*36;
505 PRIM_PTR_INT__d_s_f_s += lastoffset*60;
506 PRIM_PTR_INT__d_s_g_s += lastoffset*90;
507 }
508
509 } // close loop over j
510 } // close loop over i
511
512 //Advance to the next batch
513 jstart = SIMINT_SIMD_ROUND(jend);
514
515 //////////////////////////////////////////////
516 // Contracted integrals: Horizontal recurrance
517 //////////////////////////////////////////////
518
519
520
521
522 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
523 {
524 const double hCD[3] = { Q.AB_x[cd+abcd], Q.AB_y[cd+abcd], Q.AB_z[cd+abcd] };
525
526 // set up HRR pointers
527 double const * restrict HRR_INT__d_s_d_s = INT__d_s_d_s + abcd * 36;
528 double const * restrict HRR_INT__d_s_f_s = INT__d_s_f_s + abcd * 60;
529 double const * restrict HRR_INT__d_s_g_s = INT__d_s_g_s + abcd * 90;
530 double * restrict HRR_INT__d_s_d_d = INT__d_s_d_d + real_abcd * 216;
531
532 // form INT__d_s_d_p
533 for(ibra = 0; ibra < 6; ++ibra)
534 {
535 HRR_INT__d_s_d_p[ibra * 18 + 0] = HRR_INT__d_s_f_s[ibra * 10 + 0] + ( hCD[0] * HRR_INT__d_s_d_s[ibra * 6 + 0] );
536
537 HRR_INT__d_s_d_p[ibra * 18 + 1] = HRR_INT__d_s_f_s[ibra * 10 + 1] + ( hCD[1] * HRR_INT__d_s_d_s[ibra * 6 + 0] );
538
539 HRR_INT__d_s_d_p[ibra * 18 + 2] = HRR_INT__d_s_f_s[ibra * 10 + 2] + ( hCD[2] * HRR_INT__d_s_d_s[ibra * 6 + 0] );
540
541 HRR_INT__d_s_d_p[ibra * 18 + 3] = HRR_INT__d_s_f_s[ibra * 10 + 1] + ( hCD[0] * HRR_INT__d_s_d_s[ibra * 6 + 1] );
542
543 HRR_INT__d_s_d_p[ibra * 18 + 4] = HRR_INT__d_s_f_s[ibra * 10 + 3] + ( hCD[1] * HRR_INT__d_s_d_s[ibra * 6 + 1] );
544
545 HRR_INT__d_s_d_p[ibra * 18 + 5] = HRR_INT__d_s_f_s[ibra * 10 + 4] + ( hCD[2] * HRR_INT__d_s_d_s[ibra * 6 + 1] );
546
547 HRR_INT__d_s_d_p[ibra * 18 + 6] = HRR_INT__d_s_f_s[ibra * 10 + 2] + ( hCD[0] * HRR_INT__d_s_d_s[ibra * 6 + 2] );
548
549 HRR_INT__d_s_d_p[ibra * 18 + 7] = HRR_INT__d_s_f_s[ibra * 10 + 4] + ( hCD[1] * HRR_INT__d_s_d_s[ibra * 6 + 2] );
550
551 HRR_INT__d_s_d_p[ibra * 18 + 8] = HRR_INT__d_s_f_s[ibra * 10 + 5] + ( hCD[2] * HRR_INT__d_s_d_s[ibra * 6 + 2] );
552
553 HRR_INT__d_s_d_p[ibra * 18 + 9] = HRR_INT__d_s_f_s[ibra * 10 + 3] + ( hCD[0] * HRR_INT__d_s_d_s[ibra * 6 + 3] );
554
555 HRR_INT__d_s_d_p[ibra * 18 + 10] = HRR_INT__d_s_f_s[ibra * 10 + 6] + ( hCD[1] * HRR_INT__d_s_d_s[ibra * 6 + 3] );
556
557 HRR_INT__d_s_d_p[ibra * 18 + 11] = HRR_INT__d_s_f_s[ibra * 10 + 7] + ( hCD[2] * HRR_INT__d_s_d_s[ibra * 6 + 3] );
558
559 HRR_INT__d_s_d_p[ibra * 18 + 12] = HRR_INT__d_s_f_s[ibra * 10 + 4] + ( hCD[0] * HRR_INT__d_s_d_s[ibra * 6 + 4] );
560
561 HRR_INT__d_s_d_p[ibra * 18 + 13] = HRR_INT__d_s_f_s[ibra * 10 + 7] + ( hCD[1] * HRR_INT__d_s_d_s[ibra * 6 + 4] );
562
563 HRR_INT__d_s_d_p[ibra * 18 + 14] = HRR_INT__d_s_f_s[ibra * 10 + 8] + ( hCD[2] * HRR_INT__d_s_d_s[ibra * 6 + 4] );
564
565 HRR_INT__d_s_d_p[ibra * 18 + 15] = HRR_INT__d_s_f_s[ibra * 10 + 5] + ( hCD[0] * HRR_INT__d_s_d_s[ibra * 6 + 5] );
566
567 HRR_INT__d_s_d_p[ibra * 18 + 16] = HRR_INT__d_s_f_s[ibra * 10 + 8] + ( hCD[1] * HRR_INT__d_s_d_s[ibra * 6 + 5] );
568
569 HRR_INT__d_s_d_p[ibra * 18 + 17] = HRR_INT__d_s_f_s[ibra * 10 + 9] + ( hCD[2] * HRR_INT__d_s_d_s[ibra * 6 + 5] );
570
571 }
572
573 // form INT__d_s_f_p
574 HRR_L_f_p(
575 HRR_INT__d_s_f_p,
576 HRR_INT__d_s_f_s,
577 HRR_INT__d_s_g_s,
578 hCD, 6);
579
580 // form INT__d_s_d_d
581 HRR_L_d_d(
582 HRR_INT__d_s_d_d,
583 HRR_INT__d_s_d_p,
584 HRR_INT__d_s_f_p,
585 hCD, 6);
586
587
588 } // close HRR loop
589
590
591 } // close loop cdbatch
592
593 istart = iend;
594 } // close loop over ab
595
596 return P.nshell12_clip * Q.nshell12_clip;
597 }
598
599