1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_s_d_f_f(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__s_d_f_f)8 int ostei_s_d_f_f(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__s_d_f_f)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__s_d_f_f);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int ibra;
26
27 // partition workspace
28 double * const INT__s_d_f_s = work + (SIMINT_NSHELL_SIMD * 0);
29 double * const INT__s_d_g_s = work + (SIMINT_NSHELL_SIMD * 60);
30 double * const INT__s_d_h_s = work + (SIMINT_NSHELL_SIMD * 150);
31 double * const INT__s_d_i_s = work + (SIMINT_NSHELL_SIMD * 276);
32 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*444);
33 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
34 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_p_s = primwork + 9;
35 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_d_s = primwork + 33;
36 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_f_s = primwork + 75;
37 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_g_s = primwork + 135;
38 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_h_s = primwork + 210;
39 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_i_s = primwork + 294;
40 SIMINT_DBLTYPE * const restrict PRIM_INT__s_p_d_s = primwork + 378;
41 SIMINT_DBLTYPE * const restrict PRIM_INT__s_p_f_s = primwork + 414;
42 SIMINT_DBLTYPE * const restrict PRIM_INT__s_p_g_s = primwork + 474;
43 SIMINT_DBLTYPE * const restrict PRIM_INT__s_p_h_s = primwork + 564;
44 SIMINT_DBLTYPE * const restrict PRIM_INT__s_p_i_s = primwork + 690;
45 SIMINT_DBLTYPE * const restrict PRIM_INT__s_d_f_s = primwork + 858;
46 SIMINT_DBLTYPE * const restrict PRIM_INT__s_d_g_s = primwork + 918;
47 SIMINT_DBLTYPE * const restrict PRIM_INT__s_d_h_s = primwork + 1008;
48 SIMINT_DBLTYPE * const restrict PRIM_INT__s_d_i_s = primwork + 1134;
49 double * const hrrwork = (double *)(primwork + 1302);
50 double * const HRR_INT__s_d_f_p = hrrwork + 0;
51 double * const HRR_INT__s_d_f_d = hrrwork + 180;
52 double * const HRR_INT__s_d_g_p = hrrwork + 540;
53 double * const HRR_INT__s_d_g_d = hrrwork + 810;
54 double * const HRR_INT__s_d_h_p = hrrwork + 1350;
55
56
57 // Create constants
58 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
59 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
60 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
61 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
62 const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
63 const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
64 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
65
66
67 ////////////////////////////////////////
68 // Loop over shells and primitives
69 ////////////////////////////////////////
70
71 real_abcd = 0;
72 istart = 0;
73 for(ab = 0; ab < P.nshell12_clip; ++ab)
74 {
75 const int iend = istart + P.nprim12[ab];
76
77 cd = 0;
78 jstart = 0;
79
80 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
81 {
82 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
83 int jend = jstart;
84 for(i = 0; i < nshellbatch; i++)
85 jend += Q.nprim12[cd+i];
86
87 // Clear the beginning of the workspace (where we are accumulating integrals)
88 memset(work, 0, SIMINT_NSHELL_SIMD * 444 * sizeof(double));
89 abcd = 0;
90
91
92 for(i = istart; i < iend; ++i)
93 {
94 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
95
96 if(check_screen)
97 {
98 // Skip this whole thing if always insignificant
99 if((P.screen[i] * Q.screen_max) < screen_tol)
100 continue;
101 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
102 }
103
104 icd = 0;
105 iprimcd = 0;
106 nprim_icd = Q.nprim12[cd];
107 double * restrict PRIM_PTR_INT__s_d_f_s = INT__s_d_f_s + abcd * 60;
108 double * restrict PRIM_PTR_INT__s_d_g_s = INT__s_d_g_s + abcd * 90;
109 double * restrict PRIM_PTR_INT__s_d_h_s = INT__s_d_h_s + abcd * 126;
110 double * restrict PRIM_PTR_INT__s_d_i_s = INT__s_d_i_s + abcd * 168;
111
112
113
114 // Load these one per loop over i
115 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
116 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
117 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
118
119 const SIMINT_DBLTYPE P_PB[3] = { SIMINT_DBLSET1(P.PB_x[i]), SIMINT_DBLSET1(P.PB_y[i]), SIMINT_DBLSET1(P.PB_z[i]) };
120
121 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
122 {
123 // calculate the shell offsets
124 // these are the offset from the shell pointed to by cd
125 // for each element
126 int shelloffsets[SIMINT_SIMD_LEN] = {0};
127 int lastoffset = 0;
128 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
129
130 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
131 {
132 // Handle if the first element of the vector is a new shell
133 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
134 {
135 nprim_icd += Q.nprim12[cd + (++icd)];
136 PRIM_PTR_INT__s_d_f_s += 60;
137 PRIM_PTR_INT__s_d_g_s += 90;
138 PRIM_PTR_INT__s_d_h_s += 126;
139 PRIM_PTR_INT__s_d_i_s += 168;
140 }
141 iprimcd++;
142 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
143 {
144 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
145 {
146 shelloffsets[n] = shelloffsets[n-1] + 1;
147 lastoffset++;
148 nprim_icd += Q.nprim12[cd + (++icd)];
149 }
150 else
151 shelloffsets[n] = shelloffsets[n-1];
152 iprimcd++;
153 }
154 }
155 else
156 iprimcd += SIMINT_SIMD_LEN;
157
158 // Do we have to compute this vector (or has it been screened out)?
159 // (not_screened != 0 means we have to do this vector)
160 if(check_screen)
161 {
162 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
163 if(vmax < screen_tol)
164 {
165 PRIM_PTR_INT__s_d_f_s += lastoffset*60;
166 PRIM_PTR_INT__s_d_g_s += lastoffset*90;
167 PRIM_PTR_INT__s_d_h_s += lastoffset*126;
168 PRIM_PTR_INT__s_d_i_s += lastoffset*168;
169 continue;
170 }
171 }
172
173 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
174 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
175 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
176 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
177
178
179 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
180 SIMINT_DBLTYPE PQ[3];
181 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
182 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
183 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
184 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
185 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
186 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
187
188 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
189 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
190 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
191 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
192 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
193 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
194 const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
195
196 // NOTE: Minus sign!
197 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
198 SIMINT_DBLTYPE aop_PQ[3];
199 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
200 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
201 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
202
203 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
204 SIMINT_DBLTYPE aoq_PQ[3];
205 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
206 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
207 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
208 // Put a minus sign here so we don't have to in RR routines
209 a_over_q = SIMINT_NEG(a_over_q);
210
211
212 //////////////////////////////////////////////
213 // Fjt function section
214 // Maximum v value: 8
215 //////////////////////////////////////////////
216 // The parameter to the Fjt function
217 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
218
219
220 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
221
222
223 boys_F_split(PRIM_INT__s_s_s_s, F_x, 8);
224 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
225 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
226 for(n = 0; n <= 8; n++)
227 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
228
229 //////////////////////////////////////////////
230 // Primitive integrals: Vertical recurrance
231 //////////////////////////////////////////////
232
233 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
234 const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
235 const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
236 const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
237 const SIMINT_DBLTYPE vrr_const_4_over_2q = SIMINT_MUL(const_4, one_over_2q);
238 const SIMINT_DBLTYPE vrr_const_5_over_2q = SIMINT_MUL(const_5, one_over_2q);
239 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
240 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
241 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
242 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
243 const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
244 const SIMINT_DBLTYPE vrr_const_6_over_2pq = SIMINT_MUL(const_6, one_over_2pq);
245
246
247
248 // Forming PRIM_INT__s_s_p_s[8 * 3];
249 for(n = 0; n < 8; ++n) // loop over orders of auxiliary function
250 {
251
252 PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
253 PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]);
254
255 PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
256 PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 1]);
257
258 PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
259 PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 2]);
260
261 }
262
263
264
265 // Forming PRIM_INT__s_s_d_s[7 * 6];
266 for(n = 0; n < 7; ++n) // loop over orders of auxiliary function
267 {
268
269 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
270 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 0]);
271 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 0]);
272
273 PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
274 PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 1]);
275
276 PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 0]);
277 PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 2]);
278
279 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
280 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 3]);
281 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 3]);
282
283 PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 1]);
284 PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 4]);
285
286 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
287 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_d_s[n * 6 + 5]);
288 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 5]);
289
290 }
291
292
293
294 // Forming PRIM_INT__s_s_f_s[6 * 10];
295 for(n = 0; n < 6; ++n) // loop over orders of auxiliary function
296 {
297
298 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
299 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 0]);
300 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]), PRIM_INT__s_s_f_s[n * 10 + 0]);
301
302 PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
303 PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 1]);
304
305 PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
306 PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 2]);
307
308 PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
309 PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 3]);
310
311 PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
312 PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_s_f_s[n * 10 + 4]);
313
314 PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
315 PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 5]);
316
317 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
318 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 6]);
319 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_p_s[n * 3 + 1]), PRIM_INT__s_s_f_s[n * 10 + 6]);
320
321 PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
322 PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 7]);
323
324 PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
325 PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 8]);
326
327 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
328 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 9]);
329 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_p_s[n * 3 + 2]), PRIM_INT__s_s_f_s[n * 10 + 9]);
330
331 }
332
333
334 VRR_J_s_p_f_s(
335 PRIM_INT__s_p_f_s,
336 PRIM_INT__s_s_f_s,
337 PRIM_INT__s_s_d_s,
338 P_PB,
339 aop_PQ,
340 one_over_2pq,
341 2);
342
343
344
345 // Forming PRIM_INT__s_p_d_s[2 * 18];
346 for(n = 0; n < 2; ++n) // loop over orders of auxiliary function
347 {
348
349 PRIM_INT__s_p_d_s[n * 18 + 0] = SIMINT_MUL(P_PB[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
350 PRIM_INT__s_p_d_s[n * 18 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_p_d_s[n * 18 + 0]);
351 PRIM_INT__s_p_d_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_p_d_s[n * 18 + 0]);
352
353 PRIM_INT__s_p_d_s[n * 18 + 1] = SIMINT_MUL(P_PB[0], PRIM_INT__s_s_d_s[n * 6 + 1]);
354 PRIM_INT__s_p_d_s[n * 18 + 1] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_p_d_s[n * 18 + 1]);
355 PRIM_INT__s_p_d_s[n * 18 + 1] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_p_d_s[n * 18 + 1]);
356
357 PRIM_INT__s_p_d_s[n * 18 + 2] = SIMINT_MUL(P_PB[0], PRIM_INT__s_s_d_s[n * 6 + 2]);
358 PRIM_INT__s_p_d_s[n * 18 + 2] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__s_p_d_s[n * 18 + 2]);
359 PRIM_INT__s_p_d_s[n * 18 + 2] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_p_d_s[n * 18 + 2]);
360
361 PRIM_INT__s_p_d_s[n * 18 + 3] = SIMINT_MUL(P_PB[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
362 PRIM_INT__s_p_d_s[n * 18 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_p_d_s[n * 18 + 3]);
363
364 PRIM_INT__s_p_d_s[n * 18 + 4] = SIMINT_MUL(P_PB[0], PRIM_INT__s_s_d_s[n * 6 + 4]);
365 PRIM_INT__s_p_d_s[n * 18 + 4] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__s_p_d_s[n * 18 + 4]);
366
367 PRIM_INT__s_p_d_s[n * 18 + 5] = SIMINT_MUL(P_PB[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
368 PRIM_INT__s_p_d_s[n * 18 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_p_d_s[n * 18 + 5]);
369
370 PRIM_INT__s_p_d_s[n * 18 + 6] = SIMINT_MUL(P_PB[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
371 PRIM_INT__s_p_d_s[n * 18 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_p_d_s[n * 18 + 6]);
372
373 PRIM_INT__s_p_d_s[n * 18 + 7] = SIMINT_MUL(P_PB[1], PRIM_INT__s_s_d_s[n * 6 + 1]);
374 PRIM_INT__s_p_d_s[n * 18 + 7] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_p_d_s[n * 18 + 7]);
375 PRIM_INT__s_p_d_s[n * 18 + 7] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_p_d_s[n * 18 + 7]);
376
377 PRIM_INT__s_p_d_s[n * 18 + 8] = SIMINT_MUL(P_PB[1], PRIM_INT__s_s_d_s[n * 6 + 2]);
378 PRIM_INT__s_p_d_s[n * 18 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__s_p_d_s[n * 18 + 8]);
379
380 PRIM_INT__s_p_d_s[n * 18 + 9] = SIMINT_MUL(P_PB[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
381 PRIM_INT__s_p_d_s[n * 18 + 9] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_p_d_s[n * 18 + 9]);
382 PRIM_INT__s_p_d_s[n * 18 + 9] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_p_d_s[n * 18 + 9]);
383
384 PRIM_INT__s_p_d_s[n * 18 + 10] = SIMINT_MUL(P_PB[1], PRIM_INT__s_s_d_s[n * 6 + 4]);
385 PRIM_INT__s_p_d_s[n * 18 + 10] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__s_p_d_s[n * 18 + 10]);
386 PRIM_INT__s_p_d_s[n * 18 + 10] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_p_d_s[n * 18 + 10]);
387
388 PRIM_INT__s_p_d_s[n * 18 + 11] = SIMINT_MUL(P_PB[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
389 PRIM_INT__s_p_d_s[n * 18 + 11] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_p_d_s[n * 18 + 11]);
390
391 PRIM_INT__s_p_d_s[n * 18 + 12] = SIMINT_MUL(P_PB[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
392 PRIM_INT__s_p_d_s[n * 18 + 12] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_p_d_s[n * 18 + 12]);
393
394 PRIM_INT__s_p_d_s[n * 18 + 13] = SIMINT_MUL(P_PB[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
395 PRIM_INT__s_p_d_s[n * 18 + 13] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_p_d_s[n * 18 + 13]);
396
397 PRIM_INT__s_p_d_s[n * 18 + 14] = SIMINT_MUL(P_PB[2], PRIM_INT__s_s_d_s[n * 6 + 2]);
398 PRIM_INT__s_p_d_s[n * 18 + 14] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__s_p_d_s[n * 18 + 14]);
399 PRIM_INT__s_p_d_s[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_p_d_s[n * 18 + 14]);
400
401 PRIM_INT__s_p_d_s[n * 18 + 15] = SIMINT_MUL(P_PB[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
402 PRIM_INT__s_p_d_s[n * 18 + 15] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_p_d_s[n * 18 + 15]);
403
404 PRIM_INT__s_p_d_s[n * 18 + 16] = SIMINT_MUL(P_PB[2], PRIM_INT__s_s_d_s[n * 6 + 4]);
405 PRIM_INT__s_p_d_s[n * 18 + 16] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__s_p_d_s[n * 18 + 16]);
406 PRIM_INT__s_p_d_s[n * 18 + 16] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_p_d_s[n * 18 + 16]);
407
408 PRIM_INT__s_p_d_s[n * 18 + 17] = SIMINT_MUL(P_PB[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
409 PRIM_INT__s_p_d_s[n * 18 + 17] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_p_d_s[n * 18 + 17]);
410 PRIM_INT__s_p_d_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_p_d_s[n * 18 + 17]);
411
412 }
413
414
415 VRR_J_s_d_f_s(
416 PRIM_INT__s_d_f_s,
417 PRIM_INT__s_p_f_s,
418 PRIM_INT__s_s_f_s,
419 PRIM_INT__s_p_d_s,
420 P_PB,
421 a_over_p,
422 aop_PQ,
423 one_over_2p,
424 one_over_2pq,
425 1);
426
427
428 VRR_K_s_s_g_s(
429 PRIM_INT__s_s_g_s,
430 PRIM_INT__s_s_f_s,
431 PRIM_INT__s_s_d_s,
432 Q_PA,
433 a_over_q,
434 aoq_PQ,
435 one_over_2q,
436 5);
437
438
439 VRR_J_s_p_g_s(
440 PRIM_INT__s_p_g_s,
441 PRIM_INT__s_s_g_s,
442 PRIM_INT__s_s_f_s,
443 P_PB,
444 aop_PQ,
445 one_over_2pq,
446 2);
447
448
449 ostei_general_vrr_J(0, 2, 4, 0, 1,
450 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PB,
451 PRIM_INT__s_p_g_s, NULL, PRIM_INT__s_s_g_s, PRIM_INT__s_p_f_s, NULL, PRIM_INT__s_d_g_s);
452
453
454 VRR_K_s_s_h_s(
455 PRIM_INT__s_s_h_s,
456 PRIM_INT__s_s_g_s,
457 PRIM_INT__s_s_f_s,
458 Q_PA,
459 a_over_q,
460 aoq_PQ,
461 one_over_2q,
462 4);
463
464
465 ostei_general_vrr_J(0, 1, 5, 0, 2,
466 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PB,
467 PRIM_INT__s_s_h_s, NULL, NULL, PRIM_INT__s_s_g_s, NULL, PRIM_INT__s_p_h_s);
468
469
470 ostei_general_vrr_J(0, 2, 5, 0, 1,
471 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PB,
472 PRIM_INT__s_p_h_s, NULL, PRIM_INT__s_s_h_s, PRIM_INT__s_p_g_s, NULL, PRIM_INT__s_d_h_s);
473
474
475 ostei_general_vrr1_K(6, 3,
476 one_over_2q, a_over_q, aoq_PQ, Q_PA,
477 PRIM_INT__s_s_h_s, PRIM_INT__s_s_g_s, PRIM_INT__s_s_i_s);
478
479
480 ostei_general_vrr_J(0, 1, 6, 0, 2,
481 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PB,
482 PRIM_INT__s_s_i_s, NULL, NULL, PRIM_INT__s_s_h_s, NULL, PRIM_INT__s_p_i_s);
483
484
485 ostei_general_vrr_J(0, 2, 6, 0, 1,
486 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PB,
487 PRIM_INT__s_p_i_s, NULL, PRIM_INT__s_s_i_s, PRIM_INT__s_p_h_s, NULL, PRIM_INT__s_d_i_s);
488
489
490
491
492 ////////////////////////////////////
493 // Accumulate contracted integrals
494 ////////////////////////////////////
495 if(lastoffset == 0)
496 {
497 contract_all(60, PRIM_INT__s_d_f_s, PRIM_PTR_INT__s_d_f_s);
498 contract_all(90, PRIM_INT__s_d_g_s, PRIM_PTR_INT__s_d_g_s);
499 contract_all(126, PRIM_INT__s_d_h_s, PRIM_PTR_INT__s_d_h_s);
500 contract_all(168, PRIM_INT__s_d_i_s, PRIM_PTR_INT__s_d_i_s);
501 }
502 else
503 {
504 contract(60, shelloffsets, PRIM_INT__s_d_f_s, PRIM_PTR_INT__s_d_f_s);
505 contract(90, shelloffsets, PRIM_INT__s_d_g_s, PRIM_PTR_INT__s_d_g_s);
506 contract(126, shelloffsets, PRIM_INT__s_d_h_s, PRIM_PTR_INT__s_d_h_s);
507 contract(168, shelloffsets, PRIM_INT__s_d_i_s, PRIM_PTR_INT__s_d_i_s);
508 PRIM_PTR_INT__s_d_f_s += lastoffset*60;
509 PRIM_PTR_INT__s_d_g_s += lastoffset*90;
510 PRIM_PTR_INT__s_d_h_s += lastoffset*126;
511 PRIM_PTR_INT__s_d_i_s += lastoffset*168;
512 }
513
514 } // close loop over j
515 } // close loop over i
516
517 //Advance to the next batch
518 jstart = SIMINT_SIMD_ROUND(jend);
519
520 //////////////////////////////////////////////
521 // Contracted integrals: Horizontal recurrance
522 //////////////////////////////////////////////
523
524
525
526
527 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
528 {
529 const double hCD[3] = { Q.AB_x[cd+abcd], Q.AB_y[cd+abcd], Q.AB_z[cd+abcd] };
530
531 // set up HRR pointers
532 double const * restrict HRR_INT__s_d_f_s = INT__s_d_f_s + abcd * 60;
533 double const * restrict HRR_INT__s_d_g_s = INT__s_d_g_s + abcd * 90;
534 double const * restrict HRR_INT__s_d_h_s = INT__s_d_h_s + abcd * 126;
535 double const * restrict HRR_INT__s_d_i_s = INT__s_d_i_s + abcd * 168;
536 double * restrict HRR_INT__s_d_f_f = INT__s_d_f_f + real_abcd * 600;
537
538 // form INT__s_d_f_p
539 HRR_L_f_p(
540 HRR_INT__s_d_f_p,
541 HRR_INT__s_d_f_s,
542 HRR_INT__s_d_g_s,
543 hCD, 6);
544
545 // form INT__s_d_g_p
546 HRR_L_g_p(
547 HRR_INT__s_d_g_p,
548 HRR_INT__s_d_g_s,
549 HRR_INT__s_d_h_s,
550 hCD, 6);
551
552 // form INT__s_d_h_p
553 ostei_general_hrr_L(0, 2, 5, 1, hCD, HRR_INT__s_d_i_s, HRR_INT__s_d_h_s, HRR_INT__s_d_h_p);
554
555 // form INT__s_d_f_d
556 HRR_L_f_d(
557 HRR_INT__s_d_f_d,
558 HRR_INT__s_d_f_p,
559 HRR_INT__s_d_g_p,
560 hCD, 6);
561
562 // form INT__s_d_g_d
563 ostei_general_hrr_L(0, 2, 4, 2, hCD, HRR_INT__s_d_h_p, HRR_INT__s_d_g_p, HRR_INT__s_d_g_d);
564
565 // form INT__s_d_f_f
566 ostei_general_hrr_L(0, 2, 3, 3, hCD, HRR_INT__s_d_g_d, HRR_INT__s_d_f_d, HRR_INT__s_d_f_f);
567
568
569 } // close HRR loop
570
571
572 } // close loop cdbatch
573
574 istart = iend;
575 } // close loop over ab
576
577 return P.nshell12_clip * Q.nshell12_clip;
578 }
579
580