1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_d_d_s_p(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_d_s_p)8 int ostei_d_d_s_p(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__d_d_s_p)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__d_d_s_p);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int iket;
26
27 // partition workspace
28 double * const INT__d_s_s_p = work + (SIMINT_NSHELL_SIMD * 0);
29 double * const INT__f_s_s_p = work + (SIMINT_NSHELL_SIMD * 18);
30 double * const INT__g_s_s_p = work + (SIMINT_NSHELL_SIMD * 48);
31 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*93);
32 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
33 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 6;
34 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 21;
35 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_p = primwork + 45;
36 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 63;
37 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_p = primwork + 93;
38 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_s = primwork + 123;
39 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_p = primwork + 153;
40 double * const hrrwork = (double *)(primwork + 198);
41 double * const HRR_INT__d_p_s_p = hrrwork + 0;
42 double * const HRR_INT__f_p_s_p = hrrwork + 54;
43
44
45 // Create constants
46 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
47 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
48 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
49 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
50 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
51
52
53 ////////////////////////////////////////
54 // Loop over shells and primitives
55 ////////////////////////////////////////
56
57 real_abcd = 0;
58 istart = 0;
59 for(ab = 0; ab < P.nshell12_clip; ++ab)
60 {
61 const int iend = istart + P.nprim12[ab];
62
63 cd = 0;
64 jstart = 0;
65
66 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
67 {
68 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
69 int jend = jstart;
70 for(i = 0; i < nshellbatch; i++)
71 jend += Q.nprim12[cd+i];
72
73 // Clear the beginning of the workspace (where we are accumulating integrals)
74 memset(work, 0, SIMINT_NSHELL_SIMD * 93 * sizeof(double));
75 abcd = 0;
76
77
78 for(i = istart; i < iend; ++i)
79 {
80 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
81
82 if(check_screen)
83 {
84 // Skip this whole thing if always insignificant
85 if((P.screen[i] * Q.screen_max) < screen_tol)
86 continue;
87 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
88 }
89
90 icd = 0;
91 iprimcd = 0;
92 nprim_icd = Q.nprim12[cd];
93 double * restrict PRIM_PTR_INT__d_s_s_p = INT__d_s_s_p + abcd * 18;
94 double * restrict PRIM_PTR_INT__f_s_s_p = INT__f_s_s_p + abcd * 30;
95 double * restrict PRIM_PTR_INT__g_s_s_p = INT__g_s_s_p + abcd * 45;
96
97
98
99 // Load these one per loop over i
100 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
101 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
102 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
103
104 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
105
106 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
107 {
108 // calculate the shell offsets
109 // these are the offset from the shell pointed to by cd
110 // for each element
111 int shelloffsets[SIMINT_SIMD_LEN] = {0};
112 int lastoffset = 0;
113 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
114
115 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
116 {
117 // Handle if the first element of the vector is a new shell
118 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
119 {
120 nprim_icd += Q.nprim12[cd + (++icd)];
121 PRIM_PTR_INT__d_s_s_p += 18;
122 PRIM_PTR_INT__f_s_s_p += 30;
123 PRIM_PTR_INT__g_s_s_p += 45;
124 }
125 iprimcd++;
126 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
127 {
128 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
129 {
130 shelloffsets[n] = shelloffsets[n-1] + 1;
131 lastoffset++;
132 nprim_icd += Q.nprim12[cd + (++icd)];
133 }
134 else
135 shelloffsets[n] = shelloffsets[n-1];
136 iprimcd++;
137 }
138 }
139 else
140 iprimcd += SIMINT_SIMD_LEN;
141
142 // Do we have to compute this vector (or has it been screened out)?
143 // (not_screened != 0 means we have to do this vector)
144 if(check_screen)
145 {
146 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
147 if(vmax < screen_tol)
148 {
149 PRIM_PTR_INT__d_s_s_p += lastoffset*18;
150 PRIM_PTR_INT__f_s_s_p += lastoffset*30;
151 PRIM_PTR_INT__g_s_s_p += lastoffset*45;
152 continue;
153 }
154 }
155
156 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
157 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
158 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
159 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
160
161
162 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
163 SIMINT_DBLTYPE PQ[3];
164 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
165 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
166 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
167 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
168 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
169 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
170
171 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
172 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
173 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
174 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
175 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
176 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
177 const SIMINT_DBLTYPE Q_PB[3] = { SIMINT_DBLLOAD(Q.PB_x, j), SIMINT_DBLLOAD(Q.PB_y, j), SIMINT_DBLLOAD(Q.PB_z, j) };
178
179 // NOTE: Minus sign!
180 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
181 SIMINT_DBLTYPE aop_PQ[3];
182 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
183 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
184 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
185
186 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
187 SIMINT_DBLTYPE aoq_PQ[3];
188 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
189 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
190 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
191 // Put a minus sign here so we don't have to in RR routines
192 a_over_q = SIMINT_NEG(a_over_q);
193
194
195 //////////////////////////////////////////////
196 // Fjt function section
197 // Maximum v value: 5
198 //////////////////////////////////////////////
199 // The parameter to the Fjt function
200 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
201
202
203 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
204
205
206 boys_F_split(PRIM_INT__s_s_s_s, F_x, 5);
207 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
208 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
209 for(n = 0; n <= 5; n++)
210 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
211
212 //////////////////////////////////////////////
213 // Primitive integrals: Vertical recurrance
214 //////////////////////////////////////////////
215
216 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
217 const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
218 const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
219 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
220 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
221 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
222 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
223
224
225
226 // Forming PRIM_INT__p_s_s_s[5 * 3];
227 for(n = 0; n < 5; ++n) // loop over orders of auxiliary function
228 {
229
230 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
231 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
232
233 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
234 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
235
236 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
237 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
238
239 }
240
241
242
243 // Forming PRIM_INT__d_s_s_s[4 * 6];
244 for(n = 0; n < 4; ++n) // loop over orders of auxiliary function
245 {
246
247 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
248 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
249 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
250
251 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
252 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 1]);
253
254 PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
255 PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 2]);
256
257 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
258 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
259 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
260
261 PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
262 PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 4]);
263
264 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
265 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
266 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
267
268 }
269
270
271
272 // Forming PRIM_INT__d_s_s_p[1 * 18];
273 for(n = 0; n < 1; ++n) // loop over orders of auxiliary function
274 {
275
276 PRIM_INT__d_s_s_p[n * 18 + 0] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
277 PRIM_INT__d_s_s_p[n * 18 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_s_p[n * 18 + 0]);
278 PRIM_INT__d_s_s_p[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_p[n * 18 + 0]);
279
280 PRIM_INT__d_s_s_p[n * 18 + 1] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
281 PRIM_INT__d_s_s_p[n * 18 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_s_p[n * 18 + 1]);
282
283 PRIM_INT__d_s_s_p[n * 18 + 2] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
284 PRIM_INT__d_s_s_p[n * 18 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_s_p[n * 18 + 2]);
285
286 PRIM_INT__d_s_s_p[n * 18 + 3] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 1]);
287 PRIM_INT__d_s_s_p[n * 18 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_s_p[n * 18 + 3]);
288 PRIM_INT__d_s_s_p[n * 18 + 3] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_p[n * 18 + 3]);
289
290 PRIM_INT__d_s_s_p[n * 18 + 4] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 1]);
291 PRIM_INT__d_s_s_p[n * 18 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_s_p[n * 18 + 4]);
292 PRIM_INT__d_s_s_p[n * 18 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_p[n * 18 + 4]);
293
294 PRIM_INT__d_s_s_p[n * 18 + 5] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
295 PRIM_INT__d_s_s_p[n * 18 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_s_p[n * 18 + 5]);
296
297 PRIM_INT__d_s_s_p[n * 18 + 6] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 2]);
298 PRIM_INT__d_s_s_p[n * 18 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_s_p[n * 18 + 6]);
299 PRIM_INT__d_s_s_p[n * 18 + 6] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_p[n * 18 + 6]);
300
301 PRIM_INT__d_s_s_p[n * 18 + 7] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 2]);
302 PRIM_INT__d_s_s_p[n * 18 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_s_p[n * 18 + 7]);
303
304 PRIM_INT__d_s_s_p[n * 18 + 8] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 2]);
305 PRIM_INT__d_s_s_p[n * 18 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_s_p[n * 18 + 8]);
306 PRIM_INT__d_s_s_p[n * 18 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_p[n * 18 + 8]);
307
308 PRIM_INT__d_s_s_p[n * 18 + 9] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
309 PRIM_INT__d_s_s_p[n * 18 + 9] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_s_p[n * 18 + 9]);
310
311 PRIM_INT__d_s_s_p[n * 18 + 10] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
312 PRIM_INT__d_s_s_p[n * 18 + 10] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_s_p[n * 18 + 10]);
313 PRIM_INT__d_s_s_p[n * 18 + 10] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_p[n * 18 + 10]);
314
315 PRIM_INT__d_s_s_p[n * 18 + 11] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
316 PRIM_INT__d_s_s_p[n * 18 + 11] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_s_p[n * 18 + 11]);
317
318 PRIM_INT__d_s_s_p[n * 18 + 12] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 4]);
319 PRIM_INT__d_s_s_p[n * 18 + 12] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_s_p[n * 18 + 12]);
320
321 PRIM_INT__d_s_s_p[n * 18 + 13] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 4]);
322 PRIM_INT__d_s_s_p[n * 18 + 13] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_s_p[n * 18 + 13]);
323 PRIM_INT__d_s_s_p[n * 18 + 13] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_p[n * 18 + 13]);
324
325 PRIM_INT__d_s_s_p[n * 18 + 14] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 4]);
326 PRIM_INT__d_s_s_p[n * 18 + 14] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_s_p[n * 18 + 14]);
327 PRIM_INT__d_s_s_p[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_p[n * 18 + 14]);
328
329 PRIM_INT__d_s_s_p[n * 18 + 15] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
330 PRIM_INT__d_s_s_p[n * 18 + 15] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_s_p[n * 18 + 15]);
331
332 PRIM_INT__d_s_s_p[n * 18 + 16] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
333 PRIM_INT__d_s_s_p[n * 18 + 16] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_s_p[n * 18 + 16]);
334
335 PRIM_INT__d_s_s_p[n * 18 + 17] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
336 PRIM_INT__d_s_s_p[n * 18 + 17] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_s_p[n * 18 + 17]);
337 PRIM_INT__d_s_s_p[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_p[n * 18 + 17]);
338
339 }
340
341
342
343 // Forming PRIM_INT__f_s_s_s[3 * 10];
344 for(n = 0; n < 3; ++n) // loop over orders of auxiliary function
345 {
346
347 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
348 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
349 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
350
351 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
352 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 1]);
353
354 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
355 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 2]);
356
357 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
358 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 3]);
359
360 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
361 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__f_s_s_s[n * 10 + 4]);
362
363 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
364 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 5]);
365
366 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
367 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
368 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
369
370 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
371 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 7]);
372
373 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
374 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 8]);
375
376 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
377 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
378 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
379
380 }
381
382
383 VRR_L_f_s_s_p(
384 PRIM_INT__f_s_s_p,
385 PRIM_INT__f_s_s_s,
386 PRIM_INT__d_s_s_s,
387 Q_PB,
388 aoq_PQ,
389 one_over_2pq,
390 1);
391
392
393 VRR_I_g_s_s_s(
394 PRIM_INT__g_s_s_s,
395 PRIM_INT__f_s_s_s,
396 PRIM_INT__d_s_s_s,
397 P_PA,
398 a_over_p,
399 aop_PQ,
400 one_over_2p,
401 2);
402
403
404 VRR_L_g_s_s_p(
405 PRIM_INT__g_s_s_p,
406 PRIM_INT__g_s_s_s,
407 PRIM_INT__f_s_s_s,
408 Q_PB,
409 aoq_PQ,
410 one_over_2pq,
411 1);
412
413
414
415
416 ////////////////////////////////////
417 // Accumulate contracted integrals
418 ////////////////////////////////////
419 if(lastoffset == 0)
420 {
421 contract_all(18, PRIM_INT__d_s_s_p, PRIM_PTR_INT__d_s_s_p);
422 contract_all(30, PRIM_INT__f_s_s_p, PRIM_PTR_INT__f_s_s_p);
423 contract_all(45, PRIM_INT__g_s_s_p, PRIM_PTR_INT__g_s_s_p);
424 }
425 else
426 {
427 contract(18, shelloffsets, PRIM_INT__d_s_s_p, PRIM_PTR_INT__d_s_s_p);
428 contract(30, shelloffsets, PRIM_INT__f_s_s_p, PRIM_PTR_INT__f_s_s_p);
429 contract(45, shelloffsets, PRIM_INT__g_s_s_p, PRIM_PTR_INT__g_s_s_p);
430 PRIM_PTR_INT__d_s_s_p += lastoffset*18;
431 PRIM_PTR_INT__f_s_s_p += lastoffset*30;
432 PRIM_PTR_INT__g_s_s_p += lastoffset*45;
433 }
434
435 } // close loop over j
436 } // close loop over i
437
438 //Advance to the next batch
439 jstart = SIMINT_SIMD_ROUND(jend);
440
441 //////////////////////////////////////////////
442 // Contracted integrals: Horizontal recurrance
443 //////////////////////////////////////////////
444
445
446 const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
447
448
449 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
450 {
451
452 // set up HRR pointers
453 double const * restrict HRR_INT__d_s_s_p = INT__d_s_s_p + abcd * 18;
454 double const * restrict HRR_INT__f_s_s_p = INT__f_s_s_p + abcd * 30;
455 double const * restrict HRR_INT__g_s_s_p = INT__g_s_s_p + abcd * 45;
456 double * restrict HRR_INT__d_d_s_p = INT__d_d_s_p + real_abcd * 108;
457
458 // form INT__d_p_s_p
459 for(iket = 0; iket < 3; ++iket)
460 {
461 HRR_INT__d_p_s_p[0 * 3 + iket] = HRR_INT__f_s_s_p[0 * 3 + iket] + ( hAB[0] * HRR_INT__d_s_s_p[0 * 3 + iket] );
462
463 HRR_INT__d_p_s_p[1 * 3 + iket] = HRR_INT__f_s_s_p[1 * 3 + iket] + ( hAB[1] * HRR_INT__d_s_s_p[0 * 3 + iket] );
464
465 HRR_INT__d_p_s_p[2 * 3 + iket] = HRR_INT__f_s_s_p[2 * 3 + iket] + ( hAB[2] * HRR_INT__d_s_s_p[0 * 3 + iket] );
466
467 HRR_INT__d_p_s_p[3 * 3 + iket] = HRR_INT__f_s_s_p[1 * 3 + iket] + ( hAB[0] * HRR_INT__d_s_s_p[1 * 3 + iket] );
468
469 HRR_INT__d_p_s_p[4 * 3 + iket] = HRR_INT__f_s_s_p[3 * 3 + iket] + ( hAB[1] * HRR_INT__d_s_s_p[1 * 3 + iket] );
470
471 HRR_INT__d_p_s_p[5 * 3 + iket] = HRR_INT__f_s_s_p[4 * 3 + iket] + ( hAB[2] * HRR_INT__d_s_s_p[1 * 3 + iket] );
472
473 HRR_INT__d_p_s_p[6 * 3 + iket] = HRR_INT__f_s_s_p[2 * 3 + iket] + ( hAB[0] * HRR_INT__d_s_s_p[2 * 3 + iket] );
474
475 HRR_INT__d_p_s_p[7 * 3 + iket] = HRR_INT__f_s_s_p[4 * 3 + iket] + ( hAB[1] * HRR_INT__d_s_s_p[2 * 3 + iket] );
476
477 HRR_INT__d_p_s_p[8 * 3 + iket] = HRR_INT__f_s_s_p[5 * 3 + iket] + ( hAB[2] * HRR_INT__d_s_s_p[2 * 3 + iket] );
478
479 HRR_INT__d_p_s_p[9 * 3 + iket] = HRR_INT__f_s_s_p[3 * 3 + iket] + ( hAB[0] * HRR_INT__d_s_s_p[3 * 3 + iket] );
480
481 HRR_INT__d_p_s_p[10 * 3 + iket] = HRR_INT__f_s_s_p[6 * 3 + iket] + ( hAB[1] * HRR_INT__d_s_s_p[3 * 3 + iket] );
482
483 HRR_INT__d_p_s_p[11 * 3 + iket] = HRR_INT__f_s_s_p[7 * 3 + iket] + ( hAB[2] * HRR_INT__d_s_s_p[3 * 3 + iket] );
484
485 HRR_INT__d_p_s_p[12 * 3 + iket] = HRR_INT__f_s_s_p[4 * 3 + iket] + ( hAB[0] * HRR_INT__d_s_s_p[4 * 3 + iket] );
486
487 HRR_INT__d_p_s_p[13 * 3 + iket] = HRR_INT__f_s_s_p[7 * 3 + iket] + ( hAB[1] * HRR_INT__d_s_s_p[4 * 3 + iket] );
488
489 HRR_INT__d_p_s_p[14 * 3 + iket] = HRR_INT__f_s_s_p[8 * 3 + iket] + ( hAB[2] * HRR_INT__d_s_s_p[4 * 3 + iket] );
490
491 HRR_INT__d_p_s_p[15 * 3 + iket] = HRR_INT__f_s_s_p[5 * 3 + iket] + ( hAB[0] * HRR_INT__d_s_s_p[5 * 3 + iket] );
492
493 HRR_INT__d_p_s_p[16 * 3 + iket] = HRR_INT__f_s_s_p[8 * 3 + iket] + ( hAB[1] * HRR_INT__d_s_s_p[5 * 3 + iket] );
494
495 HRR_INT__d_p_s_p[17 * 3 + iket] = HRR_INT__f_s_s_p[9 * 3 + iket] + ( hAB[2] * HRR_INT__d_s_s_p[5 * 3 + iket] );
496
497 }
498
499
500 // form INT__f_p_s_p
501 HRR_J_f_p(
502 HRR_INT__f_p_s_p,
503 HRR_INT__f_s_s_p,
504 HRR_INT__g_s_s_p,
505 hAB, 3);
506
507 // form INT__d_d_s_p
508 HRR_J_d_d(
509 HRR_INT__d_d_s_p,
510 HRR_INT__d_p_s_p,
511 HRR_INT__f_p_s_p,
512 hAB, 3);
513
514
515 } // close HRR loop
516
517
518 } // close loop cdbatch
519
520 istart = iend;
521 } // close loop over ab
522
523 return P.nshell12_clip * Q.nshell12_clip;
524 }
525
526