1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_g_d_i_h(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__g_d_i_h)8 int ostei_g_d_i_h(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__g_d_i_h)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__g_d_i_h);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int iket;
26 int ibra;
27
28 // partition workspace
29 double * const INT__g_s_i_s = work + (SIMINT_NSHELL_SIMD * 0);
30 double * const INT__g_s_k_s = work + (SIMINT_NSHELL_SIMD * 420);
31 double * const INT__g_s_l_s = work + (SIMINT_NSHELL_SIMD * 960);
32 double * const INT__g_s_m_s = work + (SIMINT_NSHELL_SIMD * 1635);
33 double * const INT__g_s_n_s = work + (SIMINT_NSHELL_SIMD * 2460);
34 double * const INT__g_s_o_s = work + (SIMINT_NSHELL_SIMD * 3450);
35 double * const INT__h_s_i_s = work + (SIMINT_NSHELL_SIMD * 4620);
36 double * const INT__h_s_k_s = work + (SIMINT_NSHELL_SIMD * 5208);
37 double * const INT__h_s_l_s = work + (SIMINT_NSHELL_SIMD * 5964);
38 double * const INT__h_s_m_s = work + (SIMINT_NSHELL_SIMD * 6909);
39 double * const INT__h_s_n_s = work + (SIMINT_NSHELL_SIMD * 8064);
40 double * const INT__h_s_o_s = work + (SIMINT_NSHELL_SIMD * 9450);
41 double * const INT__i_s_i_s = work + (SIMINT_NSHELL_SIMD * 11088);
42 double * const INT__i_s_k_s = work + (SIMINT_NSHELL_SIMD * 11872);
43 double * const INT__i_s_l_s = work + (SIMINT_NSHELL_SIMD * 12880);
44 double * const INT__i_s_m_s = work + (SIMINT_NSHELL_SIMD * 14140);
45 double * const INT__i_s_n_s = work + (SIMINT_NSHELL_SIMD * 15680);
46 double * const INT__i_s_o_s = work + (SIMINT_NSHELL_SIMD * 17528);
47 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*19712);
48 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
49 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_p_s = primwork + 18;
50 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_d_s = primwork + 69;
51 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_f_s = primwork + 165;
52 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_g_s = primwork + 315;
53 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_h_s = primwork + 525;
54 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_i_s = primwork + 798;
55 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_k_s = primwork + 1134;
56 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_l_s = primwork + 1530;
57 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_m_s = primwork + 1980;
58 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_n_s = primwork + 2475;
59 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_o_s = primwork + 3003;
60 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_p_s = primwork + 3549;
61 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_d_s = primwork + 3603;
62 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_f_s = primwork + 3711;
63 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_g_s = primwork + 3891;
64 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_h_s = primwork + 4161;
65 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_i_s = primwork + 4539;
66 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_k_s = primwork + 5043;
67 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_l_s = primwork + 5691;
68 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_m_s = primwork + 6501;
69 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_n_s = primwork + 7491;
70 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_o_s = primwork + 8679;
71 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_d_s = primwork + 10083;
72 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_f_s = primwork + 10263;
73 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_g_s = primwork + 10563;
74 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_h_s = primwork + 11013;
75 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_i_s = primwork + 11643;
76 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_k_s = primwork + 12483;
77 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_l_s = primwork + 13563;
78 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_m_s = primwork + 14913;
79 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_n_s = primwork + 16563;
80 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_o_s = primwork + 18543;
81 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_f_s = primwork + 20883;
82 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_g_s = primwork + 21283;
83 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_h_s = primwork + 21883;
84 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_i_s = primwork + 22723;
85 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_k_s = primwork + 23843;
86 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_l_s = primwork + 25283;
87 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_m_s = primwork + 27083;
88 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_n_s = primwork + 29283;
89 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_o_s = primwork + 31923;
90 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_g_s = primwork + 35043;
91 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_h_s = primwork + 35718;
92 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_i_s = primwork + 36663;
93 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_k_s = primwork + 37923;
94 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_l_s = primwork + 39543;
95 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_m_s = primwork + 41568;
96 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_n_s = primwork + 44043;
97 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_o_s = primwork + 47013;
98 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_h_s = primwork + 50523;
99 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_i_s = primwork + 51405;
100 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_k_s = primwork + 52581;
101 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_l_s = primwork + 54093;
102 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_m_s = primwork + 55983;
103 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_n_s = primwork + 58293;
104 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_o_s = primwork + 61065;
105 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_i_s = primwork + 64341;
106 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_k_s = primwork + 65125;
107 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_l_s = primwork + 66133;
108 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_m_s = primwork + 67393;
109 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_n_s = primwork + 68933;
110 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_o_s = primwork + 70781;
111 double * const hrrwork = (double *)(primwork + 72965);
112 double * const HRR_INT__g_p_i_s = hrrwork + 0;
113 double * const HRR_INT__g_p_k_s = hrrwork + 1260;
114 double * const HRR_INT__g_p_l_s = hrrwork + 2880;
115 double * const HRR_INT__g_p_m_s = hrrwork + 4905;
116 double * const HRR_INT__g_p_n_s = hrrwork + 7380;
117 double * const HRR_INT__g_p_o_s = hrrwork + 10350;
118 double * const HRR_INT__g_d_i_s = hrrwork + 13860;
119 double * const HRR_INT__g_d_i_p = hrrwork + 16380;
120 double * const HRR_INT__g_d_i_d = hrrwork + 23940;
121 double * const HRR_INT__g_d_i_f = hrrwork + 39060;
122 double * const HRR_INT__g_d_i_g = hrrwork + 64260;
123 double * const HRR_INT__g_d_k_s = hrrwork + 102060;
124 double * const HRR_INT__g_d_k_p = hrrwork + 105300;
125 double * const HRR_INT__g_d_k_d = hrrwork + 115020;
126 double * const HRR_INT__g_d_k_f = hrrwork + 134460;
127 double * const HRR_INT__g_d_k_g = hrrwork + 166860;
128 double * const HRR_INT__g_d_l_s = hrrwork + 215460;
129 double * const HRR_INT__g_d_l_p = hrrwork + 219510;
130 double * const HRR_INT__g_d_l_d = hrrwork + 231660;
131 double * const HRR_INT__g_d_l_f = hrrwork + 255960;
132 double * const HRR_INT__g_d_m_s = hrrwork + 296460;
133 double * const HRR_INT__g_d_m_p = hrrwork + 301410;
134 double * const HRR_INT__g_d_m_d = hrrwork + 316260;
135 double * const HRR_INT__g_d_n_s = hrrwork + 345960;
136 double * const HRR_INT__g_d_n_p = hrrwork + 351900;
137 double * const HRR_INT__g_d_o_s = hrrwork + 369720;
138 double * const HRR_INT__h_p_i_s = hrrwork + 376740;
139 double * const HRR_INT__h_p_k_s = hrrwork + 378504;
140 double * const HRR_INT__h_p_l_s = hrrwork + 380772;
141 double * const HRR_INT__h_p_m_s = hrrwork + 383607;
142 double * const HRR_INT__h_p_n_s = hrrwork + 387072;
143 double * const HRR_INT__h_p_o_s = hrrwork + 391230;
144
145
146 // Create constants
147 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
148 const SIMINT_DBLTYPE const_10 = SIMINT_DBLSET1(10);
149 const SIMINT_DBLTYPE const_11 = SIMINT_DBLSET1(11);
150 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
151 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
152 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
153 const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
154 const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
155 const SIMINT_DBLTYPE const_7 = SIMINT_DBLSET1(7);
156 const SIMINT_DBLTYPE const_8 = SIMINT_DBLSET1(8);
157 const SIMINT_DBLTYPE const_9 = SIMINT_DBLSET1(9);
158 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
159
160
161 ////////////////////////////////////////
162 // Loop over shells and primitives
163 ////////////////////////////////////////
164
165 real_abcd = 0;
166 istart = 0;
167 for(ab = 0; ab < P.nshell12_clip; ++ab)
168 {
169 const int iend = istart + P.nprim12[ab];
170
171 cd = 0;
172 jstart = 0;
173
174 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
175 {
176 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
177 int jend = jstart;
178 for(i = 0; i < nshellbatch; i++)
179 jend += Q.nprim12[cd+i];
180
181 // Clear the beginning of the workspace (where we are accumulating integrals)
182 memset(work, 0, SIMINT_NSHELL_SIMD * 19712 * sizeof(double));
183 abcd = 0;
184
185
186 for(i = istart; i < iend; ++i)
187 {
188 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
189
190 if(check_screen)
191 {
192 // Skip this whole thing if always insignificant
193 if((P.screen[i] * Q.screen_max) < screen_tol)
194 continue;
195 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
196 }
197
198 icd = 0;
199 iprimcd = 0;
200 nprim_icd = Q.nprim12[cd];
201 double * restrict PRIM_PTR_INT__g_s_i_s = INT__g_s_i_s + abcd * 420;
202 double * restrict PRIM_PTR_INT__g_s_k_s = INT__g_s_k_s + abcd * 540;
203 double * restrict PRIM_PTR_INT__g_s_l_s = INT__g_s_l_s + abcd * 675;
204 double * restrict PRIM_PTR_INT__g_s_m_s = INT__g_s_m_s + abcd * 825;
205 double * restrict PRIM_PTR_INT__g_s_n_s = INT__g_s_n_s + abcd * 990;
206 double * restrict PRIM_PTR_INT__g_s_o_s = INT__g_s_o_s + abcd * 1170;
207 double * restrict PRIM_PTR_INT__h_s_i_s = INT__h_s_i_s + abcd * 588;
208 double * restrict PRIM_PTR_INT__h_s_k_s = INT__h_s_k_s + abcd * 756;
209 double * restrict PRIM_PTR_INT__h_s_l_s = INT__h_s_l_s + abcd * 945;
210 double * restrict PRIM_PTR_INT__h_s_m_s = INT__h_s_m_s + abcd * 1155;
211 double * restrict PRIM_PTR_INT__h_s_n_s = INT__h_s_n_s + abcd * 1386;
212 double * restrict PRIM_PTR_INT__h_s_o_s = INT__h_s_o_s + abcd * 1638;
213 double * restrict PRIM_PTR_INT__i_s_i_s = INT__i_s_i_s + abcd * 784;
214 double * restrict PRIM_PTR_INT__i_s_k_s = INT__i_s_k_s + abcd * 1008;
215 double * restrict PRIM_PTR_INT__i_s_l_s = INT__i_s_l_s + abcd * 1260;
216 double * restrict PRIM_PTR_INT__i_s_m_s = INT__i_s_m_s + abcd * 1540;
217 double * restrict PRIM_PTR_INT__i_s_n_s = INT__i_s_n_s + abcd * 1848;
218 double * restrict PRIM_PTR_INT__i_s_o_s = INT__i_s_o_s + abcd * 2184;
219
220
221
222 // Load these one per loop over i
223 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
224 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
225 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
226
227 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
228
229 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
230 {
231 // calculate the shell offsets
232 // these are the offset from the shell pointed to by cd
233 // for each element
234 int shelloffsets[SIMINT_SIMD_LEN] = {0};
235 int lastoffset = 0;
236 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
237
238 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
239 {
240 // Handle if the first element of the vector is a new shell
241 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
242 {
243 nprim_icd += Q.nprim12[cd + (++icd)];
244 PRIM_PTR_INT__g_s_i_s += 420;
245 PRIM_PTR_INT__g_s_k_s += 540;
246 PRIM_PTR_INT__g_s_l_s += 675;
247 PRIM_PTR_INT__g_s_m_s += 825;
248 PRIM_PTR_INT__g_s_n_s += 990;
249 PRIM_PTR_INT__g_s_o_s += 1170;
250 PRIM_PTR_INT__h_s_i_s += 588;
251 PRIM_PTR_INT__h_s_k_s += 756;
252 PRIM_PTR_INT__h_s_l_s += 945;
253 PRIM_PTR_INT__h_s_m_s += 1155;
254 PRIM_PTR_INT__h_s_n_s += 1386;
255 PRIM_PTR_INT__h_s_o_s += 1638;
256 PRIM_PTR_INT__i_s_i_s += 784;
257 PRIM_PTR_INT__i_s_k_s += 1008;
258 PRIM_PTR_INT__i_s_l_s += 1260;
259 PRIM_PTR_INT__i_s_m_s += 1540;
260 PRIM_PTR_INT__i_s_n_s += 1848;
261 PRIM_PTR_INT__i_s_o_s += 2184;
262 }
263 iprimcd++;
264 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
265 {
266 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
267 {
268 shelloffsets[n] = shelloffsets[n-1] + 1;
269 lastoffset++;
270 nprim_icd += Q.nprim12[cd + (++icd)];
271 }
272 else
273 shelloffsets[n] = shelloffsets[n-1];
274 iprimcd++;
275 }
276 }
277 else
278 iprimcd += SIMINT_SIMD_LEN;
279
280 // Do we have to compute this vector (or has it been screened out)?
281 // (not_screened != 0 means we have to do this vector)
282 if(check_screen)
283 {
284 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
285 if(vmax < screen_tol)
286 {
287 PRIM_PTR_INT__g_s_i_s += lastoffset*420;
288 PRIM_PTR_INT__g_s_k_s += lastoffset*540;
289 PRIM_PTR_INT__g_s_l_s += lastoffset*675;
290 PRIM_PTR_INT__g_s_m_s += lastoffset*825;
291 PRIM_PTR_INT__g_s_n_s += lastoffset*990;
292 PRIM_PTR_INT__g_s_o_s += lastoffset*1170;
293 PRIM_PTR_INT__h_s_i_s += lastoffset*588;
294 PRIM_PTR_INT__h_s_k_s += lastoffset*756;
295 PRIM_PTR_INT__h_s_l_s += lastoffset*945;
296 PRIM_PTR_INT__h_s_m_s += lastoffset*1155;
297 PRIM_PTR_INT__h_s_n_s += lastoffset*1386;
298 PRIM_PTR_INT__h_s_o_s += lastoffset*1638;
299 PRIM_PTR_INT__i_s_i_s += lastoffset*784;
300 PRIM_PTR_INT__i_s_k_s += lastoffset*1008;
301 PRIM_PTR_INT__i_s_l_s += lastoffset*1260;
302 PRIM_PTR_INT__i_s_m_s += lastoffset*1540;
303 PRIM_PTR_INT__i_s_n_s += lastoffset*1848;
304 PRIM_PTR_INT__i_s_o_s += lastoffset*2184;
305 continue;
306 }
307 }
308
309 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
310 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
311 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
312 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
313
314
315 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
316 SIMINT_DBLTYPE PQ[3];
317 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
318 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
319 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
320 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
321 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
322 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
323
324 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
325 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
326 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
327 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
328 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
329 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
330 const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
331
332 // NOTE: Minus sign!
333 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
334 SIMINT_DBLTYPE aop_PQ[3];
335 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
336 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
337 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
338
339 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
340 SIMINT_DBLTYPE aoq_PQ[3];
341 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
342 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
343 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
344 // Put a minus sign here so we don't have to in RR routines
345 a_over_q = SIMINT_NEG(a_over_q);
346
347
348 //////////////////////////////////////////////
349 // Fjt function section
350 // Maximum v value: 17
351 //////////////////////////////////////////////
352 // The parameter to the Fjt function
353 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
354
355
356 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
357
358
359 boys_F_split(PRIM_INT__s_s_s_s, F_x, 17);
360 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
361 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
362 for(n = 0; n <= 17; n++)
363 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
364
365 //////////////////////////////////////////////
366 // Primitive integrals: Vertical recurrance
367 //////////////////////////////////////////////
368
369 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
370 const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
371 const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
372 const SIMINT_DBLTYPE vrr_const_4_over_2p = SIMINT_MUL(const_4, one_over_2p);
373 const SIMINT_DBLTYPE vrr_const_5_over_2p = SIMINT_MUL(const_5, one_over_2p);
374 const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
375 const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
376 const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
377 const SIMINT_DBLTYPE vrr_const_4_over_2q = SIMINT_MUL(const_4, one_over_2q);
378 const SIMINT_DBLTYPE vrr_const_5_over_2q = SIMINT_MUL(const_5, one_over_2q);
379 const SIMINT_DBLTYPE vrr_const_6_over_2q = SIMINT_MUL(const_6, one_over_2q);
380 const SIMINT_DBLTYPE vrr_const_7_over_2q = SIMINT_MUL(const_7, one_over_2q);
381 const SIMINT_DBLTYPE vrr_const_8_over_2q = SIMINT_MUL(const_8, one_over_2q);
382 const SIMINT_DBLTYPE vrr_const_9_over_2q = SIMINT_MUL(const_9, one_over_2q);
383 const SIMINT_DBLTYPE vrr_const_10_over_2q = SIMINT_MUL(const_10, one_over_2q);
384 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
385 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
386 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
387 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
388 const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
389 const SIMINT_DBLTYPE vrr_const_6_over_2pq = SIMINT_MUL(const_6, one_over_2pq);
390 const SIMINT_DBLTYPE vrr_const_7_over_2pq = SIMINT_MUL(const_7, one_over_2pq);
391 const SIMINT_DBLTYPE vrr_const_8_over_2pq = SIMINT_MUL(const_8, one_over_2pq);
392 const SIMINT_DBLTYPE vrr_const_9_over_2pq = SIMINT_MUL(const_9, one_over_2pq);
393 const SIMINT_DBLTYPE vrr_const_10_over_2pq = SIMINT_MUL(const_10, one_over_2pq);
394 const SIMINT_DBLTYPE vrr_const_11_over_2pq = SIMINT_MUL(const_11, one_over_2pq);
395
396
397
398 // Forming PRIM_INT__s_s_p_s[17 * 3];
399 for(n = 0; n < 17; ++n) // loop over orders of auxiliary function
400 {
401
402 PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
403 PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]);
404
405 PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
406 PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 1]);
407
408 PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
409 PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 2]);
410
411 }
412
413
414
415 // Forming PRIM_INT__s_s_d_s[16 * 6];
416 for(n = 0; n < 16; ++n) // loop over orders of auxiliary function
417 {
418
419 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
420 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 0]);
421 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 0]);
422
423 PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
424 PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 1]);
425
426 PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 0]);
427 PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 2]);
428
429 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
430 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 3]);
431 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 3]);
432
433 PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 1]);
434 PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 4]);
435
436 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
437 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_d_s[n * 6 + 5]);
438 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 5]);
439
440 }
441
442
443
444 // Forming PRIM_INT__s_s_f_s[15 * 10];
445 for(n = 0; n < 15; ++n) // loop over orders of auxiliary function
446 {
447
448 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
449 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 0]);
450 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]), PRIM_INT__s_s_f_s[n * 10 + 0]);
451
452 PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
453 PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 1]);
454
455 PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
456 PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 2]);
457
458 PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
459 PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 3]);
460
461 PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
462 PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_s_f_s[n * 10 + 4]);
463
464 PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
465 PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 5]);
466
467 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
468 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 6]);
469 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_p_s[n * 3 + 1]), PRIM_INT__s_s_f_s[n * 10 + 6]);
470
471 PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
472 PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 7]);
473
474 PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
475 PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 8]);
476
477 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
478 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 9]);
479 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_p_s[n * 3 + 2]), PRIM_INT__s_s_f_s[n * 10 + 9]);
480
481 }
482
483
484 VRR_K_s_s_g_s(
485 PRIM_INT__s_s_g_s,
486 PRIM_INT__s_s_f_s,
487 PRIM_INT__s_s_d_s,
488 Q_PA,
489 a_over_q,
490 aoq_PQ,
491 one_over_2q,
492 14);
493
494
495 VRR_K_s_s_h_s(
496 PRIM_INT__s_s_h_s,
497 PRIM_INT__s_s_g_s,
498 PRIM_INT__s_s_f_s,
499 Q_PA,
500 a_over_q,
501 aoq_PQ,
502 one_over_2q,
503 13);
504
505
506 ostei_general_vrr1_K(6, 12,
507 one_over_2q, a_over_q, aoq_PQ, Q_PA,
508 PRIM_INT__s_s_h_s, PRIM_INT__s_s_g_s, PRIM_INT__s_s_i_s);
509
510
511 ostei_general_vrr_I(1, 0, 6, 0, 6,
512 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
513 PRIM_INT__s_s_i_s, NULL, NULL, PRIM_INT__s_s_h_s, NULL, PRIM_INT__p_s_i_s);
514
515
516 ostei_general_vrr_I(1, 0, 5, 0, 6,
517 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
518 PRIM_INT__s_s_h_s, NULL, NULL, PRIM_INT__s_s_g_s, NULL, PRIM_INT__p_s_h_s);
519
520
521 ostei_general_vrr_I(2, 0, 6, 0, 5,
522 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
523 PRIM_INT__p_s_i_s, PRIM_INT__s_s_i_s, NULL, PRIM_INT__p_s_h_s, NULL, PRIM_INT__d_s_i_s);
524
525
526 VRR_I_p_s_g_s(
527 PRIM_INT__p_s_g_s,
528 PRIM_INT__s_s_g_s,
529 PRIM_INT__s_s_f_s,
530 P_PA,
531 aop_PQ,
532 one_over_2pq,
533 6);
534
535
536 ostei_general_vrr_I(2, 0, 5, 0, 5,
537 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
538 PRIM_INT__p_s_h_s, PRIM_INT__s_s_h_s, NULL, PRIM_INT__p_s_g_s, NULL, PRIM_INT__d_s_h_s);
539
540
541 ostei_general_vrr_I(3, 0, 6, 0, 4,
542 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
543 PRIM_INT__d_s_i_s, PRIM_INT__p_s_i_s, NULL, PRIM_INT__d_s_h_s, NULL, PRIM_INT__f_s_i_s);
544
545
546 VRR_I_p_s_f_s(
547 PRIM_INT__p_s_f_s,
548 PRIM_INT__s_s_f_s,
549 PRIM_INT__s_s_d_s,
550 P_PA,
551 aop_PQ,
552 one_over_2pq,
553 6);
554
555
556 ostei_general_vrr_I(2, 0, 4, 0, 5,
557 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
558 PRIM_INT__p_s_g_s, PRIM_INT__s_s_g_s, NULL, PRIM_INT__p_s_f_s, NULL, PRIM_INT__d_s_g_s);
559
560
561 ostei_general_vrr_I(3, 0, 5, 0, 4,
562 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
563 PRIM_INT__d_s_h_s, PRIM_INT__p_s_h_s, NULL, PRIM_INT__d_s_g_s, NULL, PRIM_INT__f_s_h_s);
564
565
566 ostei_general_vrr_I(4, 0, 6, 0, 3,
567 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
568 PRIM_INT__f_s_i_s, PRIM_INT__d_s_i_s, NULL, PRIM_INT__f_s_h_s, NULL, PRIM_INT__g_s_i_s);
569
570
571 ostei_general_vrr1_K(7, 11,
572 one_over_2q, a_over_q, aoq_PQ, Q_PA,
573 PRIM_INT__s_s_i_s, PRIM_INT__s_s_h_s, PRIM_INT__s_s_k_s);
574
575
576 ostei_general_vrr_I(1, 0, 7, 0, 6,
577 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
578 PRIM_INT__s_s_k_s, NULL, NULL, PRIM_INT__s_s_i_s, NULL, PRIM_INT__p_s_k_s);
579
580
581 ostei_general_vrr_I(2, 0, 7, 0, 5,
582 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
583 PRIM_INT__p_s_k_s, PRIM_INT__s_s_k_s, NULL, PRIM_INT__p_s_i_s, NULL, PRIM_INT__d_s_k_s);
584
585
586 ostei_general_vrr_I(3, 0, 7, 0, 4,
587 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
588 PRIM_INT__d_s_k_s, PRIM_INT__p_s_k_s, NULL, PRIM_INT__d_s_i_s, NULL, PRIM_INT__f_s_k_s);
589
590
591 ostei_general_vrr_I(4, 0, 7, 0, 3,
592 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
593 PRIM_INT__f_s_k_s, PRIM_INT__d_s_k_s, NULL, PRIM_INT__f_s_i_s, NULL, PRIM_INT__g_s_k_s);
594
595
596
597 // Forming PRIM_INT__p_s_d_s[6 * 18];
598 for(n = 0; n < 6; ++n) // loop over orders of auxiliary function
599 {
600
601 PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
602 PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
603 PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
604
605 PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 1]);
606 PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 1]);
607 PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 1]);
608
609 PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 2]);
610 PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 2]);
611 PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 2]);
612
613 PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
614 PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 3]);
615
616 PRIM_INT__p_s_d_s[n * 18 + 4] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 4]);
617 PRIM_INT__p_s_d_s[n * 18 + 4] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 4]);
618
619 PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
620 PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 5]);
621
622 PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
623 PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 6]);
624
625 PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 1]);
626 PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 7]);
627 PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 7]);
628
629 PRIM_INT__p_s_d_s[n * 18 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 2]);
630 PRIM_INT__p_s_d_s[n * 18 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 8]);
631
632 PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
633 PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 9]);
634 PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 9]);
635
636 PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 4]);
637 PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 10]);
638 PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 10]);
639
640 PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
641 PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 11]);
642
643 PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
644 PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 12]);
645
646 PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
647 PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 13]);
648
649 PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 2]);
650 PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 14]);
651 PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 14]);
652
653 PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
654 PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 15]);
655
656 PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 4]);
657 PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 16]);
658 PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 16]);
659
660 PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
661 PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 17]);
662 PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 17]);
663
664 }
665
666
667 VRR_I_d_s_f_s(
668 PRIM_INT__d_s_f_s,
669 PRIM_INT__p_s_f_s,
670 PRIM_INT__s_s_f_s,
671 PRIM_INT__p_s_d_s,
672 P_PA,
673 a_over_p,
674 aop_PQ,
675 one_over_2p,
676 one_over_2pq,
677 5);
678
679
680 ostei_general_vrr_I(3, 0, 4, 0, 4,
681 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
682 PRIM_INT__d_s_g_s, PRIM_INT__p_s_g_s, NULL, PRIM_INT__d_s_f_s, NULL, PRIM_INT__f_s_g_s);
683
684
685 ostei_general_vrr_I(4, 0, 5, 0, 3,
686 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
687 PRIM_INT__f_s_h_s, PRIM_INT__d_s_h_s, NULL, PRIM_INT__f_s_g_s, NULL, PRIM_INT__g_s_h_s);
688
689
690 ostei_general_vrr_I(5, 0, 6, 0, 2,
691 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
692 PRIM_INT__g_s_i_s, PRIM_INT__f_s_i_s, NULL, PRIM_INT__g_s_h_s, NULL, PRIM_INT__h_s_i_s);
693
694
695 ostei_general_vrr1_K(8, 10,
696 one_over_2q, a_over_q, aoq_PQ, Q_PA,
697 PRIM_INT__s_s_k_s, PRIM_INT__s_s_i_s, PRIM_INT__s_s_l_s);
698
699
700 ostei_general_vrr_I(1, 0, 8, 0, 6,
701 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
702 PRIM_INT__s_s_l_s, NULL, NULL, PRIM_INT__s_s_k_s, NULL, PRIM_INT__p_s_l_s);
703
704
705 ostei_general_vrr_I(2, 0, 8, 0, 5,
706 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
707 PRIM_INT__p_s_l_s, PRIM_INT__s_s_l_s, NULL, PRIM_INT__p_s_k_s, NULL, PRIM_INT__d_s_l_s);
708
709
710 ostei_general_vrr_I(3, 0, 8, 0, 4,
711 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
712 PRIM_INT__d_s_l_s, PRIM_INT__p_s_l_s, NULL, PRIM_INT__d_s_k_s, NULL, PRIM_INT__f_s_l_s);
713
714
715 ostei_general_vrr_I(4, 0, 8, 0, 3,
716 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
717 PRIM_INT__f_s_l_s, PRIM_INT__d_s_l_s, NULL, PRIM_INT__f_s_k_s, NULL, PRIM_INT__g_s_l_s);
718
719
720 ostei_general_vrr_I(5, 0, 7, 0, 2,
721 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
722 PRIM_INT__g_s_k_s, PRIM_INT__f_s_k_s, NULL, PRIM_INT__g_s_i_s, NULL, PRIM_INT__h_s_k_s);
723
724
725
726 // Forming PRIM_INT__p_s_p_s[6 * 9];
727 for(n = 0; n < 6; ++n) // loop over orders of auxiliary function
728 {
729
730 PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
731 PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
732 PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
733
734 PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_p_s[n * 3 + 1]);
735 PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 1]);
736
737 PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_p_s[n * 3 + 2]);
738 PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 2]);
739
740 PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
741 PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 3]);
742
743 PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
744 PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 4]);
745 PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 4]);
746
747 PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_p_s[n * 3 + 2]);
748 PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 5]);
749
750 PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_p_s[n * 3 + 0]);
751 PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 6]);
752
753 PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_p_s[n * 3 + 1]);
754 PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 7]);
755
756 PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
757 PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 8]);
758 PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 8]);
759
760 }
761
762
763 VRR_I_d_s_d_s(
764 PRIM_INT__d_s_d_s,
765 PRIM_INT__p_s_d_s,
766 PRIM_INT__s_s_d_s,
767 PRIM_INT__p_s_p_s,
768 P_PA,
769 a_over_p,
770 aop_PQ,
771 one_over_2p,
772 one_over_2pq,
773 5);
774
775
776 ostei_general_vrr_I(3, 0, 3, 0, 4,
777 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
778 PRIM_INT__d_s_f_s, PRIM_INT__p_s_f_s, NULL, PRIM_INT__d_s_d_s, NULL, PRIM_INT__f_s_f_s);
779
780
781 ostei_general_vrr_I(4, 0, 4, 0, 3,
782 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
783 PRIM_INT__f_s_g_s, PRIM_INT__d_s_g_s, NULL, PRIM_INT__f_s_f_s, NULL, PRIM_INT__g_s_g_s);
784
785
786 ostei_general_vrr_I(5, 0, 5, 0, 2,
787 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
788 PRIM_INT__g_s_h_s, PRIM_INT__f_s_h_s, NULL, PRIM_INT__g_s_g_s, NULL, PRIM_INT__h_s_h_s);
789
790
791 ostei_general_vrr_I(6, 0, 6, 0, 1,
792 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
793 PRIM_INT__h_s_i_s, PRIM_INT__g_s_i_s, NULL, PRIM_INT__h_s_h_s, NULL, PRIM_INT__i_s_i_s);
794
795
796 ostei_general_vrr1_K(9, 9,
797 one_over_2q, a_over_q, aoq_PQ, Q_PA,
798 PRIM_INT__s_s_l_s, PRIM_INT__s_s_k_s, PRIM_INT__s_s_m_s);
799
800
801 ostei_general_vrr_I(1, 0, 9, 0, 6,
802 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
803 PRIM_INT__s_s_m_s, NULL, NULL, PRIM_INT__s_s_l_s, NULL, PRIM_INT__p_s_m_s);
804
805
806 ostei_general_vrr_I(2, 0, 9, 0, 5,
807 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
808 PRIM_INT__p_s_m_s, PRIM_INT__s_s_m_s, NULL, PRIM_INT__p_s_l_s, NULL, PRIM_INT__d_s_m_s);
809
810
811 ostei_general_vrr_I(3, 0, 9, 0, 4,
812 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
813 PRIM_INT__d_s_m_s, PRIM_INT__p_s_m_s, NULL, PRIM_INT__d_s_l_s, NULL, PRIM_INT__f_s_m_s);
814
815
816 ostei_general_vrr_I(4, 0, 9, 0, 3,
817 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
818 PRIM_INT__f_s_m_s, PRIM_INT__d_s_m_s, NULL, PRIM_INT__f_s_l_s, NULL, PRIM_INT__g_s_m_s);
819
820
821 ostei_general_vrr_I(5, 0, 8, 0, 2,
822 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
823 PRIM_INT__g_s_l_s, PRIM_INT__f_s_l_s, NULL, PRIM_INT__g_s_k_s, NULL, PRIM_INT__h_s_l_s);
824
825
826 ostei_general_vrr_I(6, 0, 7, 0, 1,
827 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
828 PRIM_INT__h_s_k_s, PRIM_INT__g_s_k_s, NULL, PRIM_INT__h_s_i_s, NULL, PRIM_INT__i_s_k_s);
829
830
831 ostei_general_vrr1_K(10, 8,
832 one_over_2q, a_over_q, aoq_PQ, Q_PA,
833 PRIM_INT__s_s_m_s, PRIM_INT__s_s_l_s, PRIM_INT__s_s_n_s);
834
835
836 ostei_general_vrr_I(1, 0, 10, 0, 6,
837 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
838 PRIM_INT__s_s_n_s, NULL, NULL, PRIM_INT__s_s_m_s, NULL, PRIM_INT__p_s_n_s);
839
840
841 ostei_general_vrr_I(2, 0, 10, 0, 5,
842 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
843 PRIM_INT__p_s_n_s, PRIM_INT__s_s_n_s, NULL, PRIM_INT__p_s_m_s, NULL, PRIM_INT__d_s_n_s);
844
845
846 ostei_general_vrr_I(3, 0, 10, 0, 4,
847 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
848 PRIM_INT__d_s_n_s, PRIM_INT__p_s_n_s, NULL, PRIM_INT__d_s_m_s, NULL, PRIM_INT__f_s_n_s);
849
850
851 ostei_general_vrr_I(4, 0, 10, 0, 3,
852 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
853 PRIM_INT__f_s_n_s, PRIM_INT__d_s_n_s, NULL, PRIM_INT__f_s_m_s, NULL, PRIM_INT__g_s_n_s);
854
855
856 ostei_general_vrr_I(5, 0, 9, 0, 2,
857 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
858 PRIM_INT__g_s_m_s, PRIM_INT__f_s_m_s, NULL, PRIM_INT__g_s_l_s, NULL, PRIM_INT__h_s_m_s);
859
860
861 ostei_general_vrr_I(6, 0, 8, 0, 1,
862 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
863 PRIM_INT__h_s_l_s, PRIM_INT__g_s_l_s, NULL, PRIM_INT__h_s_k_s, NULL, PRIM_INT__i_s_l_s);
864
865
866 ostei_general_vrr1_K(11, 7,
867 one_over_2q, a_over_q, aoq_PQ, Q_PA,
868 PRIM_INT__s_s_n_s, PRIM_INT__s_s_m_s, PRIM_INT__s_s_o_s);
869
870
871 ostei_general_vrr_I(1, 0, 11, 0, 6,
872 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
873 PRIM_INT__s_s_o_s, NULL, NULL, PRIM_INT__s_s_n_s, NULL, PRIM_INT__p_s_o_s);
874
875
876 ostei_general_vrr_I(2, 0, 11, 0, 5,
877 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
878 PRIM_INT__p_s_o_s, PRIM_INT__s_s_o_s, NULL, PRIM_INT__p_s_n_s, NULL, PRIM_INT__d_s_o_s);
879
880
881 ostei_general_vrr_I(3, 0, 11, 0, 4,
882 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
883 PRIM_INT__d_s_o_s, PRIM_INT__p_s_o_s, NULL, PRIM_INT__d_s_n_s, NULL, PRIM_INT__f_s_o_s);
884
885
886 ostei_general_vrr_I(4, 0, 11, 0, 3,
887 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
888 PRIM_INT__f_s_o_s, PRIM_INT__d_s_o_s, NULL, PRIM_INT__f_s_n_s, NULL, PRIM_INT__g_s_o_s);
889
890
891 ostei_general_vrr_I(5, 0, 10, 0, 2,
892 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
893 PRIM_INT__g_s_n_s, PRIM_INT__f_s_n_s, NULL, PRIM_INT__g_s_m_s, NULL, PRIM_INT__h_s_n_s);
894
895
896 ostei_general_vrr_I(6, 0, 9, 0, 1,
897 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
898 PRIM_INT__h_s_m_s, PRIM_INT__g_s_m_s, NULL, PRIM_INT__h_s_l_s, NULL, PRIM_INT__i_s_m_s);
899
900
901 ostei_general_vrr_I(5, 0, 11, 0, 2,
902 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
903 PRIM_INT__g_s_o_s, PRIM_INT__f_s_o_s, NULL, PRIM_INT__g_s_n_s, NULL, PRIM_INT__h_s_o_s);
904
905
906 ostei_general_vrr_I(6, 0, 10, 0, 1,
907 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
908 PRIM_INT__h_s_n_s, PRIM_INT__g_s_n_s, NULL, PRIM_INT__h_s_m_s, NULL, PRIM_INT__i_s_n_s);
909
910
911 ostei_general_vrr_I(6, 0, 11, 0, 1,
912 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
913 PRIM_INT__h_s_o_s, PRIM_INT__g_s_o_s, NULL, PRIM_INT__h_s_n_s, NULL, PRIM_INT__i_s_o_s);
914
915
916
917
918 ////////////////////////////////////
919 // Accumulate contracted integrals
920 ////////////////////////////////////
921 if(lastoffset == 0)
922 {
923 contract_all(420, PRIM_INT__g_s_i_s, PRIM_PTR_INT__g_s_i_s);
924 contract_all(540, PRIM_INT__g_s_k_s, PRIM_PTR_INT__g_s_k_s);
925 contract_all(675, PRIM_INT__g_s_l_s, PRIM_PTR_INT__g_s_l_s);
926 contract_all(825, PRIM_INT__g_s_m_s, PRIM_PTR_INT__g_s_m_s);
927 contract_all(990, PRIM_INT__g_s_n_s, PRIM_PTR_INT__g_s_n_s);
928 contract_all(1170, PRIM_INT__g_s_o_s, PRIM_PTR_INT__g_s_o_s);
929 contract_all(588, PRIM_INT__h_s_i_s, PRIM_PTR_INT__h_s_i_s);
930 contract_all(756, PRIM_INT__h_s_k_s, PRIM_PTR_INT__h_s_k_s);
931 contract_all(945, PRIM_INT__h_s_l_s, PRIM_PTR_INT__h_s_l_s);
932 contract_all(1155, PRIM_INT__h_s_m_s, PRIM_PTR_INT__h_s_m_s);
933 contract_all(1386, PRIM_INT__h_s_n_s, PRIM_PTR_INT__h_s_n_s);
934 contract_all(1638, PRIM_INT__h_s_o_s, PRIM_PTR_INT__h_s_o_s);
935 contract_all(784, PRIM_INT__i_s_i_s, PRIM_PTR_INT__i_s_i_s);
936 contract_all(1008, PRIM_INT__i_s_k_s, PRIM_PTR_INT__i_s_k_s);
937 contract_all(1260, PRIM_INT__i_s_l_s, PRIM_PTR_INT__i_s_l_s);
938 contract_all(1540, PRIM_INT__i_s_m_s, PRIM_PTR_INT__i_s_m_s);
939 contract_all(1848, PRIM_INT__i_s_n_s, PRIM_PTR_INT__i_s_n_s);
940 contract_all(2184, PRIM_INT__i_s_o_s, PRIM_PTR_INT__i_s_o_s);
941 }
942 else
943 {
944 contract(420, shelloffsets, PRIM_INT__g_s_i_s, PRIM_PTR_INT__g_s_i_s);
945 contract(540, shelloffsets, PRIM_INT__g_s_k_s, PRIM_PTR_INT__g_s_k_s);
946 contract(675, shelloffsets, PRIM_INT__g_s_l_s, PRIM_PTR_INT__g_s_l_s);
947 contract(825, shelloffsets, PRIM_INT__g_s_m_s, PRIM_PTR_INT__g_s_m_s);
948 contract(990, shelloffsets, PRIM_INT__g_s_n_s, PRIM_PTR_INT__g_s_n_s);
949 contract(1170, shelloffsets, PRIM_INT__g_s_o_s, PRIM_PTR_INT__g_s_o_s);
950 contract(588, shelloffsets, PRIM_INT__h_s_i_s, PRIM_PTR_INT__h_s_i_s);
951 contract(756, shelloffsets, PRIM_INT__h_s_k_s, PRIM_PTR_INT__h_s_k_s);
952 contract(945, shelloffsets, PRIM_INT__h_s_l_s, PRIM_PTR_INT__h_s_l_s);
953 contract(1155, shelloffsets, PRIM_INT__h_s_m_s, PRIM_PTR_INT__h_s_m_s);
954 contract(1386, shelloffsets, PRIM_INT__h_s_n_s, PRIM_PTR_INT__h_s_n_s);
955 contract(1638, shelloffsets, PRIM_INT__h_s_o_s, PRIM_PTR_INT__h_s_o_s);
956 contract(784, shelloffsets, PRIM_INT__i_s_i_s, PRIM_PTR_INT__i_s_i_s);
957 contract(1008, shelloffsets, PRIM_INT__i_s_k_s, PRIM_PTR_INT__i_s_k_s);
958 contract(1260, shelloffsets, PRIM_INT__i_s_l_s, PRIM_PTR_INT__i_s_l_s);
959 contract(1540, shelloffsets, PRIM_INT__i_s_m_s, PRIM_PTR_INT__i_s_m_s);
960 contract(1848, shelloffsets, PRIM_INT__i_s_n_s, PRIM_PTR_INT__i_s_n_s);
961 contract(2184, shelloffsets, PRIM_INT__i_s_o_s, PRIM_PTR_INT__i_s_o_s);
962 PRIM_PTR_INT__g_s_i_s += lastoffset*420;
963 PRIM_PTR_INT__g_s_k_s += lastoffset*540;
964 PRIM_PTR_INT__g_s_l_s += lastoffset*675;
965 PRIM_PTR_INT__g_s_m_s += lastoffset*825;
966 PRIM_PTR_INT__g_s_n_s += lastoffset*990;
967 PRIM_PTR_INT__g_s_o_s += lastoffset*1170;
968 PRIM_PTR_INT__h_s_i_s += lastoffset*588;
969 PRIM_PTR_INT__h_s_k_s += lastoffset*756;
970 PRIM_PTR_INT__h_s_l_s += lastoffset*945;
971 PRIM_PTR_INT__h_s_m_s += lastoffset*1155;
972 PRIM_PTR_INT__h_s_n_s += lastoffset*1386;
973 PRIM_PTR_INT__h_s_o_s += lastoffset*1638;
974 PRIM_PTR_INT__i_s_i_s += lastoffset*784;
975 PRIM_PTR_INT__i_s_k_s += lastoffset*1008;
976 PRIM_PTR_INT__i_s_l_s += lastoffset*1260;
977 PRIM_PTR_INT__i_s_m_s += lastoffset*1540;
978 PRIM_PTR_INT__i_s_n_s += lastoffset*1848;
979 PRIM_PTR_INT__i_s_o_s += lastoffset*2184;
980 }
981
982 } // close loop over j
983 } // close loop over i
984
985 //Advance to the next batch
986 jstart = SIMINT_SIMD_ROUND(jend);
987
988 //////////////////////////////////////////////
989 // Contracted integrals: Horizontal recurrance
990 //////////////////////////////////////////////
991
992
993 const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
994
995
996 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
997 {
998 const double hCD[3] = { Q.AB_x[cd+abcd], Q.AB_y[cd+abcd], Q.AB_z[cd+abcd] };
999
1000 // set up HRR pointers
1001 double const * restrict HRR_INT__g_s_i_s = INT__g_s_i_s + abcd * 420;
1002 double const * restrict HRR_INT__g_s_k_s = INT__g_s_k_s + abcd * 540;
1003 double const * restrict HRR_INT__g_s_l_s = INT__g_s_l_s + abcd * 675;
1004 double const * restrict HRR_INT__g_s_m_s = INT__g_s_m_s + abcd * 825;
1005 double const * restrict HRR_INT__g_s_n_s = INT__g_s_n_s + abcd * 990;
1006 double const * restrict HRR_INT__g_s_o_s = INT__g_s_o_s + abcd * 1170;
1007 double const * restrict HRR_INT__h_s_i_s = INT__h_s_i_s + abcd * 588;
1008 double const * restrict HRR_INT__h_s_k_s = INT__h_s_k_s + abcd * 756;
1009 double const * restrict HRR_INT__h_s_l_s = INT__h_s_l_s + abcd * 945;
1010 double const * restrict HRR_INT__h_s_m_s = INT__h_s_m_s + abcd * 1155;
1011 double const * restrict HRR_INT__h_s_n_s = INT__h_s_n_s + abcd * 1386;
1012 double const * restrict HRR_INT__h_s_o_s = INT__h_s_o_s + abcd * 1638;
1013 double const * restrict HRR_INT__i_s_i_s = INT__i_s_i_s + abcd * 784;
1014 double const * restrict HRR_INT__i_s_k_s = INT__i_s_k_s + abcd * 1008;
1015 double const * restrict HRR_INT__i_s_l_s = INT__i_s_l_s + abcd * 1260;
1016 double const * restrict HRR_INT__i_s_m_s = INT__i_s_m_s + abcd * 1540;
1017 double const * restrict HRR_INT__i_s_n_s = INT__i_s_n_s + abcd * 1848;
1018 double const * restrict HRR_INT__i_s_o_s = INT__i_s_o_s + abcd * 2184;
1019 double * restrict HRR_INT__g_d_i_h = INT__g_d_i_h + real_abcd * 52920;
1020
1021 // form INT__g_p_i_s
1022 HRR_J_g_p(
1023 HRR_INT__g_p_i_s,
1024 HRR_INT__g_s_i_s,
1025 HRR_INT__h_s_i_s,
1026 hAB, 28);
1027
1028 // form INT__g_p_k_s
1029 HRR_J_g_p(
1030 HRR_INT__g_p_k_s,
1031 HRR_INT__g_s_k_s,
1032 HRR_INT__h_s_k_s,
1033 hAB, 36);
1034
1035 // form INT__g_p_l_s
1036 HRR_J_g_p(
1037 HRR_INT__g_p_l_s,
1038 HRR_INT__g_s_l_s,
1039 HRR_INT__h_s_l_s,
1040 hAB, 45);
1041
1042 // form INT__g_p_m_s
1043 HRR_J_g_p(
1044 HRR_INT__g_p_m_s,
1045 HRR_INT__g_s_m_s,
1046 HRR_INT__h_s_m_s,
1047 hAB, 55);
1048
1049 // form INT__g_p_n_s
1050 HRR_J_g_p(
1051 HRR_INT__g_p_n_s,
1052 HRR_INT__g_s_n_s,
1053 HRR_INT__h_s_n_s,
1054 hAB, 66);
1055
1056 // form INT__g_p_o_s
1057 HRR_J_g_p(
1058 HRR_INT__g_p_o_s,
1059 HRR_INT__g_s_o_s,
1060 HRR_INT__h_s_o_s,
1061 hAB, 78);
1062
1063 // form INT__h_p_i_s
1064 ostei_general_hrr_J(5, 1, 6, 0, hAB, HRR_INT__i_s_i_s, HRR_INT__h_s_i_s, HRR_INT__h_p_i_s);
1065
1066 // form INT__h_p_k_s
1067 ostei_general_hrr_J(5, 1, 7, 0, hAB, HRR_INT__i_s_k_s, HRR_INT__h_s_k_s, HRR_INT__h_p_k_s);
1068
1069 // form INT__h_p_l_s
1070 ostei_general_hrr_J(5, 1, 8, 0, hAB, HRR_INT__i_s_l_s, HRR_INT__h_s_l_s, HRR_INT__h_p_l_s);
1071
1072 // form INT__h_p_m_s
1073 ostei_general_hrr_J(5, 1, 9, 0, hAB, HRR_INT__i_s_m_s, HRR_INT__h_s_m_s, HRR_INT__h_p_m_s);
1074
1075 // form INT__h_p_n_s
1076 ostei_general_hrr_J(5, 1, 10, 0, hAB, HRR_INT__i_s_n_s, HRR_INT__h_s_n_s, HRR_INT__h_p_n_s);
1077
1078 // form INT__h_p_o_s
1079 ostei_general_hrr_J(5, 1, 11, 0, hAB, HRR_INT__i_s_o_s, HRR_INT__h_s_o_s, HRR_INT__h_p_o_s);
1080
1081 // form INT__g_d_i_s
1082 ostei_general_hrr_J(4, 2, 6, 0, hAB, HRR_INT__h_p_i_s, HRR_INT__g_p_i_s, HRR_INT__g_d_i_s);
1083
1084 // form INT__g_d_k_s
1085 ostei_general_hrr_J(4, 2, 7, 0, hAB, HRR_INT__h_p_k_s, HRR_INT__g_p_k_s, HRR_INT__g_d_k_s);
1086
1087 // form INT__g_d_l_s
1088 ostei_general_hrr_J(4, 2, 8, 0, hAB, HRR_INT__h_p_l_s, HRR_INT__g_p_l_s, HRR_INT__g_d_l_s);
1089
1090 // form INT__g_d_m_s
1091 ostei_general_hrr_J(4, 2, 9, 0, hAB, HRR_INT__h_p_m_s, HRR_INT__g_p_m_s, HRR_INT__g_d_m_s);
1092
1093 // form INT__g_d_n_s
1094 ostei_general_hrr_J(4, 2, 10, 0, hAB, HRR_INT__h_p_n_s, HRR_INT__g_p_n_s, HRR_INT__g_d_n_s);
1095
1096 // form INT__g_d_o_s
1097 ostei_general_hrr_J(4, 2, 11, 0, hAB, HRR_INT__h_p_o_s, HRR_INT__g_p_o_s, HRR_INT__g_d_o_s);
1098
1099 // form INT__g_d_i_p
1100 ostei_general_hrr_L(4, 2, 6, 1, hCD, HRR_INT__g_d_k_s, HRR_INT__g_d_i_s, HRR_INT__g_d_i_p);
1101
1102 // form INT__g_d_k_p
1103 ostei_general_hrr_L(4, 2, 7, 1, hCD, HRR_INT__g_d_l_s, HRR_INT__g_d_k_s, HRR_INT__g_d_k_p);
1104
1105 // form INT__g_d_l_p
1106 ostei_general_hrr_L(4, 2, 8, 1, hCD, HRR_INT__g_d_m_s, HRR_INT__g_d_l_s, HRR_INT__g_d_l_p);
1107
1108 // form INT__g_d_m_p
1109 ostei_general_hrr_L(4, 2, 9, 1, hCD, HRR_INT__g_d_n_s, HRR_INT__g_d_m_s, HRR_INT__g_d_m_p);
1110
1111 // form INT__g_d_n_p
1112 ostei_general_hrr_L(4, 2, 10, 1, hCD, HRR_INT__g_d_o_s, HRR_INT__g_d_n_s, HRR_INT__g_d_n_p);
1113
1114 // form INT__g_d_i_d
1115 ostei_general_hrr_L(4, 2, 6, 2, hCD, HRR_INT__g_d_k_p, HRR_INT__g_d_i_p, HRR_INT__g_d_i_d);
1116
1117 // form INT__g_d_k_d
1118 ostei_general_hrr_L(4, 2, 7, 2, hCD, HRR_INT__g_d_l_p, HRR_INT__g_d_k_p, HRR_INT__g_d_k_d);
1119
1120 // form INT__g_d_l_d
1121 ostei_general_hrr_L(4, 2, 8, 2, hCD, HRR_INT__g_d_m_p, HRR_INT__g_d_l_p, HRR_INT__g_d_l_d);
1122
1123 // form INT__g_d_m_d
1124 ostei_general_hrr_L(4, 2, 9, 2, hCD, HRR_INT__g_d_n_p, HRR_INT__g_d_m_p, HRR_INT__g_d_m_d);
1125
1126 // form INT__g_d_i_f
1127 ostei_general_hrr_L(4, 2, 6, 3, hCD, HRR_INT__g_d_k_d, HRR_INT__g_d_i_d, HRR_INT__g_d_i_f);
1128
1129 // form INT__g_d_k_f
1130 ostei_general_hrr_L(4, 2, 7, 3, hCD, HRR_INT__g_d_l_d, HRR_INT__g_d_k_d, HRR_INT__g_d_k_f);
1131
1132 // form INT__g_d_l_f
1133 ostei_general_hrr_L(4, 2, 8, 3, hCD, HRR_INT__g_d_m_d, HRR_INT__g_d_l_d, HRR_INT__g_d_l_f);
1134
1135 // form INT__g_d_i_g
1136 ostei_general_hrr_L(4, 2, 6, 4, hCD, HRR_INT__g_d_k_f, HRR_INT__g_d_i_f, HRR_INT__g_d_i_g);
1137
1138 // form INT__g_d_k_g
1139 ostei_general_hrr_L(4, 2, 7, 4, hCD, HRR_INT__g_d_l_f, HRR_INT__g_d_k_f, HRR_INT__g_d_k_g);
1140
1141 // form INT__g_d_i_h
1142 ostei_general_hrr_L(4, 2, 6, 5, hCD, HRR_INT__g_d_k_g, HRR_INT__g_d_i_g, HRR_INT__g_d_i_h);
1143
1144
1145 } // close HRR loop
1146
1147
1148 } // close loop cdbatch
1149
1150 istart = iend;
1151 } // close loop over ab
1152
1153 return P.nshell12_clip * Q.nshell12_clip;
1154 }
1155
ostei_d_g_i_h(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_g_i_h)1156 int ostei_d_g_i_h(struct simint_multi_shellpair const P,
1157 struct simint_multi_shellpair const Q,
1158 double screen_tol,
1159 double * const restrict work,
1160 double * const restrict INT__d_g_i_h)
1161 {
1162 double P_AB[3*P.nshell12];
1163 struct simint_multi_shellpair P_tmp = P;
1164 P_tmp.PA_x = P.PB_x; P_tmp.PA_y = P.PB_y; P_tmp.PA_z = P.PB_z;
1165 P_tmp.PB_x = P.PA_x; P_tmp.PB_y = P.PA_y; P_tmp.PB_z = P.PA_z;
1166 P_tmp.AB_x = P_AB;
1167 P_tmp.AB_y = P_AB + P.nshell12;
1168 P_tmp.AB_z = P_AB + 2*P.nshell12;
1169
1170 for(int i = 0; i < P.nshell12; i++)
1171 {
1172 P_tmp.AB_x[i] = -P.AB_x[i];
1173 P_tmp.AB_y[i] = -P.AB_y[i];
1174 P_tmp.AB_z[i] = -P.AB_z[i];
1175 }
1176
1177 int ret = ostei_g_d_i_h(P_tmp, Q, screen_tol, work, INT__d_g_i_h);
1178 double buffer[52920] SIMINT_ALIGN_ARRAY_DBL;
1179
1180 for(int q = 0; q < ret; q++)
1181 {
1182 int idx = 0;
1183 for(int a = 0; a < 6; ++a)
1184 for(int b = 0; b < 15; ++b)
1185 for(int c = 0; c < 28; ++c)
1186 for(int d = 0; d < 21; ++d)
1187 buffer[idx++] = INT__d_g_i_h[q*52920+b*3528+a*588+c*21+d];
1188
1189 memcpy(INT__d_g_i_h+q*52920, buffer, 52920*sizeof(double));
1190 }
1191
1192 return ret;
1193 }
1194
ostei_g_d_h_i(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__g_d_h_i)1195 int ostei_g_d_h_i(struct simint_multi_shellpair const P,
1196 struct simint_multi_shellpair const Q,
1197 double screen_tol,
1198 double * const restrict work,
1199 double * const restrict INT__g_d_h_i)
1200 {
1201 double Q_AB[3*Q.nshell12];
1202 struct simint_multi_shellpair Q_tmp = Q;
1203 Q_tmp.PA_x = Q.PB_x; Q_tmp.PA_y = Q.PB_y; Q_tmp.PA_z = Q.PB_z;
1204 Q_tmp.PB_x = Q.PA_x; Q_tmp.PB_y = Q.PA_y; Q_tmp.PB_z = Q.PA_z;
1205 Q_tmp.AB_x = Q_AB;
1206 Q_tmp.AB_y = Q_AB + Q.nshell12;
1207 Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
1208
1209 for(int i = 0; i < Q.nshell12; i++)
1210 {
1211 Q_tmp.AB_x[i] = -Q.AB_x[i];
1212 Q_tmp.AB_y[i] = -Q.AB_y[i];
1213 Q_tmp.AB_z[i] = -Q.AB_z[i];
1214 }
1215
1216 int ret = ostei_g_d_i_h(P, Q_tmp, screen_tol, work, INT__g_d_h_i);
1217 double buffer[52920] SIMINT_ALIGN_ARRAY_DBL;
1218
1219 for(int q = 0; q < ret; q++)
1220 {
1221 int idx = 0;
1222 for(int a = 0; a < 15; ++a)
1223 for(int b = 0; b < 6; ++b)
1224 for(int c = 0; c < 21; ++c)
1225 for(int d = 0; d < 28; ++d)
1226 buffer[idx++] = INT__g_d_h_i[q*52920+a*3528+b*588+d*21+c];
1227
1228 memcpy(INT__g_d_h_i+q*52920, buffer, 52920*sizeof(double));
1229 }
1230
1231 return ret;
1232 }
1233
ostei_d_g_h_i(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_g_h_i)1234 int ostei_d_g_h_i(struct simint_multi_shellpair const P,
1235 struct simint_multi_shellpair const Q,
1236 double screen_tol,
1237 double * const restrict work,
1238 double * const restrict INT__d_g_h_i)
1239 {
1240 double P_AB[3*P.nshell12];
1241 struct simint_multi_shellpair P_tmp = P;
1242 P_tmp.PA_x = P.PB_x; P_tmp.PA_y = P.PB_y; P_tmp.PA_z = P.PB_z;
1243 P_tmp.PB_x = P.PA_x; P_tmp.PB_y = P.PA_y; P_tmp.PB_z = P.PA_z;
1244 P_tmp.AB_x = P_AB;
1245 P_tmp.AB_y = P_AB + P.nshell12;
1246 P_tmp.AB_z = P_AB + 2*P.nshell12;
1247
1248 for(int i = 0; i < P.nshell12; i++)
1249 {
1250 P_tmp.AB_x[i] = -P.AB_x[i];
1251 P_tmp.AB_y[i] = -P.AB_y[i];
1252 P_tmp.AB_z[i] = -P.AB_z[i];
1253 }
1254
1255 double Q_AB[3*Q.nshell12];
1256 struct simint_multi_shellpair Q_tmp = Q;
1257 Q_tmp.PA_x = Q.PB_x; Q_tmp.PA_y = Q.PB_y; Q_tmp.PA_z = Q.PB_z;
1258 Q_tmp.PB_x = Q.PA_x; Q_tmp.PB_y = Q.PA_y; Q_tmp.PB_z = Q.PA_z;
1259 Q_tmp.AB_x = Q_AB;
1260 Q_tmp.AB_y = Q_AB + Q.nshell12;
1261 Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
1262
1263 for(int i = 0; i < Q.nshell12; i++)
1264 {
1265 Q_tmp.AB_x[i] = -Q.AB_x[i];
1266 Q_tmp.AB_y[i] = -Q.AB_y[i];
1267 Q_tmp.AB_z[i] = -Q.AB_z[i];
1268 }
1269
1270 int ret = ostei_g_d_i_h(P_tmp, Q_tmp, screen_tol, work, INT__d_g_h_i);
1271 double buffer[52920] SIMINT_ALIGN_ARRAY_DBL;
1272
1273 for(int q = 0; q < ret; q++)
1274 {
1275 int idx = 0;
1276 for(int a = 0; a < 6; ++a)
1277 for(int b = 0; b < 15; ++b)
1278 for(int c = 0; c < 21; ++c)
1279 for(int d = 0; d < 28; ++d)
1280 buffer[idx++] = INT__d_g_h_i[q*52920+b*3528+a*588+d*21+c];
1281
1282 memcpy(INT__d_g_h_i+q*52920, buffer, 52920*sizeof(double));
1283 }
1284
1285 return ret;
1286 }
1287
1288