1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6 
7 
ostei_d_p_f_s(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_p_f_s)8 int ostei_d_p_f_s(struct simint_multi_shellpair const P,
9                   struct simint_multi_shellpair const Q,
10                   double screen_tol,
11                   double * const restrict work,
12                   double * const restrict INT__d_p_f_s)
13 {
14 
15     SIMINT_ASSUME_ALIGN_DBL(work);
16     SIMINT_ASSUME_ALIGN_DBL(INT__d_p_f_s);
17     int ab, cd, abcd;
18     int istart, jstart;
19     int iprimcd, nprim_icd, icd;
20     const int check_screen = (screen_tol > 0.0);
21     int i, j;
22     int n;
23     int not_screened;
24     int real_abcd;
25     int iket;
26 
27     // partition workspace
28     double * const INT__d_s_f_s = work + (SIMINT_NSHELL_SIMD * 0);
29     double * const INT__f_s_f_s = work + (SIMINT_NSHELL_SIMD * 60);
30     SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*160);
31     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
32     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_p_s = primwork + 7;
33     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 16;
34     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_p_s = primwork + 34;
35     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_d_s = primwork + 61;
36     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 97;
37     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_p_s = primwork + 127;
38     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_d_s = primwork + 181;
39     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_f_s = primwork + 253;
40     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 313;
41     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_p_s = primwork + 353;
42     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_d_s = primwork + 443;
43     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_f_s = primwork + 563;
44     double * const hrrwork = (double *)(primwork + 663);
45 
46 
47     // Create constants
48     const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
49     const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
50     const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
51     const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
52 
53 
54     ////////////////////////////////////////
55     // Loop over shells and primitives
56     ////////////////////////////////////////
57 
58     real_abcd = 0;
59     istart = 0;
60     for(ab = 0; ab < P.nshell12_clip; ++ab)
61     {
62         const int iend = istart + P.nprim12[ab];
63 
64         cd = 0;
65         jstart = 0;
66 
67         for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
68         {
69             const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
70             int jend = jstart;
71             for(i = 0; i < nshellbatch; i++)
72                 jend += Q.nprim12[cd+i];
73 
74             // Clear the beginning of the workspace (where we are accumulating integrals)
75             memset(work, 0, SIMINT_NSHELL_SIMD * 160 * sizeof(double));
76             abcd = 0;
77 
78 
79             for(i = istart; i < iend; ++i)
80             {
81                 SIMINT_DBLTYPE bra_screen_max;  // only used if check_screen
82 
83                 if(check_screen)
84                 {
85                     // Skip this whole thing if always insignificant
86                     if((P.screen[i] * Q.screen_max) < screen_tol)
87                         continue;
88                     bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
89                 }
90 
91                 icd = 0;
92                 iprimcd = 0;
93                 nprim_icd = Q.nprim12[cd];
94                 double * restrict PRIM_PTR_INT__d_s_f_s = INT__d_s_f_s + abcd * 60;
95                 double * restrict PRIM_PTR_INT__f_s_f_s = INT__f_s_f_s + abcd * 100;
96 
97 
98 
99                 // Load these one per loop over i
100                 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
101                 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
102                 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
103 
104                 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
105 
106                 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
107                 {
108                     // calculate the shell offsets
109                     // these are the offset from the shell pointed to by cd
110                     // for each element
111                     int shelloffsets[SIMINT_SIMD_LEN] = {0};
112                     int lastoffset = 0;
113                     const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
114 
115                     if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
116                     {
117                         // Handle if the first element of the vector is a new shell
118                         if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
119                         {
120                             nprim_icd += Q.nprim12[cd + (++icd)];
121                             PRIM_PTR_INT__d_s_f_s += 60;
122                             PRIM_PTR_INT__f_s_f_s += 100;
123                         }
124                         iprimcd++;
125                         for(n = 1; n < SIMINT_SIMD_LEN; ++n)
126                         {
127                             if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
128                             {
129                                 shelloffsets[n] = shelloffsets[n-1] + 1;
130                                 lastoffset++;
131                                 nprim_icd += Q.nprim12[cd + (++icd)];
132                             }
133                             else
134                                 shelloffsets[n] = shelloffsets[n-1];
135                             iprimcd++;
136                         }
137                     }
138                     else
139                         iprimcd += SIMINT_SIMD_LEN;
140 
141                     // Do we have to compute this vector (or has it been screened out)?
142                     // (not_screened != 0 means we have to do this vector)
143                     if(check_screen)
144                     {
145                         const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
146                         if(vmax < screen_tol)
147                         {
148                             PRIM_PTR_INT__d_s_f_s += lastoffset*60;
149                             PRIM_PTR_INT__f_s_f_s += lastoffset*100;
150                             continue;
151                         }
152                     }
153 
154                     const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
155                     const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
156                     const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
157                     const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
158 
159 
160                     /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
161                     SIMINT_DBLTYPE PQ[3];
162                     PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
163                     PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
164                     PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
165                     SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
166                     R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
167                     R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
168 
169                     const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
170                     const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
171                     const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
172                     const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
173                     const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
174                     const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
175                     const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
176 
177                     // NOTE: Minus sign!
178                     const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
179                     SIMINT_DBLTYPE aop_PQ[3];
180                     aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
181                     aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
182                     aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
183 
184                     SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
185                     SIMINT_DBLTYPE aoq_PQ[3];
186                     aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
187                     aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
188                     aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
189                     // Put a minus sign here so we don't have to in RR routines
190                     a_over_q = SIMINT_NEG(a_over_q);
191 
192 
193                     //////////////////////////////////////////////
194                     // Fjt function section
195                     // Maximum v value: 6
196                     //////////////////////////////////////////////
197                     // The parameter to the Fjt function
198                     const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
199 
200 
201                     const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
202 
203 
204                     boys_F_split(PRIM_INT__s_s_s_s, F_x, 6);
205                     SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
206                     prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
207                     for(n = 0; n <= 6; n++)
208                         PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
209 
210                     //////////////////////////////////////////////
211                     // Primitive integrals: Vertical recurrance
212                     //////////////////////////////////////////////
213 
214                     const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
215                     const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
216                     const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
217                     const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
218                     const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
219                     const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
220                     const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
221 
222 
223 
224                     // Forming PRIM_INT__p_s_s_s[6 * 3];
225                     for(n = 0; n < 6; ++n)  // loop over orders of auxiliary function
226                     {
227 
228                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
229                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
230 
231                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
232                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
233 
234                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
235                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
236 
237                     }
238 
239 
240 
241                     // Forming PRIM_INT__d_s_s_s[5 * 6];
242                     for(n = 0; n < 5; ++n)  // loop over orders of auxiliary function
243                     {
244 
245                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
246                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
247                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
248 
249                         PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
250                         PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 1]);
251 
252                         PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
253                         PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 2]);
254 
255                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
256                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
257                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
258 
259                         PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
260                         PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 4]);
261 
262                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
263                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
264                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
265 
266                     }
267 
268 
269 
270                     // Forming PRIM_INT__d_s_p_s[3 * 18];
271                     for(n = 0; n < 3; ++n)  // loop over orders of auxiliary function
272                     {
273 
274                         PRIM_INT__d_s_p_s[n * 18 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
275                         PRIM_INT__d_s_p_s[n * 18 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_p_s[n * 18 + 0]);
276                         PRIM_INT__d_s_p_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_p_s[n * 18 + 0]);
277 
278                         PRIM_INT__d_s_p_s[n * 18 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
279                         PRIM_INT__d_s_p_s[n * 18 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_p_s[n * 18 + 1]);
280 
281                         PRIM_INT__d_s_p_s[n * 18 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
282                         PRIM_INT__d_s_p_s[n * 18 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_p_s[n * 18 + 2]);
283 
284                         PRIM_INT__d_s_p_s[n * 18 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 1]);
285                         PRIM_INT__d_s_p_s[n * 18 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_p_s[n * 18 + 3]);
286                         PRIM_INT__d_s_p_s[n * 18 + 3] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_p_s[n * 18 + 3]);
287 
288                         PRIM_INT__d_s_p_s[n * 18 + 4] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 1]);
289                         PRIM_INT__d_s_p_s[n * 18 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_p_s[n * 18 + 4]);
290                         PRIM_INT__d_s_p_s[n * 18 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_p_s[n * 18 + 4]);
291 
292                         PRIM_INT__d_s_p_s[n * 18 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
293                         PRIM_INT__d_s_p_s[n * 18 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_p_s[n * 18 + 5]);
294 
295                         PRIM_INT__d_s_p_s[n * 18 + 6] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 2]);
296                         PRIM_INT__d_s_p_s[n * 18 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_p_s[n * 18 + 6]);
297                         PRIM_INT__d_s_p_s[n * 18 + 6] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_p_s[n * 18 + 6]);
298 
299                         PRIM_INT__d_s_p_s[n * 18 + 7] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 2]);
300                         PRIM_INT__d_s_p_s[n * 18 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_p_s[n * 18 + 7]);
301 
302                         PRIM_INT__d_s_p_s[n * 18 + 8] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 2]);
303                         PRIM_INT__d_s_p_s[n * 18 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_p_s[n * 18 + 8]);
304                         PRIM_INT__d_s_p_s[n * 18 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_p_s[n * 18 + 8]);
305 
306                         PRIM_INT__d_s_p_s[n * 18 + 9] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
307                         PRIM_INT__d_s_p_s[n * 18 + 9] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_p_s[n * 18 + 9]);
308 
309                         PRIM_INT__d_s_p_s[n * 18 + 10] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
310                         PRIM_INT__d_s_p_s[n * 18 + 10] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_p_s[n * 18 + 10]);
311                         PRIM_INT__d_s_p_s[n * 18 + 10] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_p_s[n * 18 + 10]);
312 
313                         PRIM_INT__d_s_p_s[n * 18 + 11] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
314                         PRIM_INT__d_s_p_s[n * 18 + 11] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_p_s[n * 18 + 11]);
315 
316                         PRIM_INT__d_s_p_s[n * 18 + 12] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 4]);
317                         PRIM_INT__d_s_p_s[n * 18 + 12] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_p_s[n * 18 + 12]);
318 
319                         PRIM_INT__d_s_p_s[n * 18 + 13] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 4]);
320                         PRIM_INT__d_s_p_s[n * 18 + 13] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_p_s[n * 18 + 13]);
321                         PRIM_INT__d_s_p_s[n * 18 + 13] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_p_s[n * 18 + 13]);
322 
323                         PRIM_INT__d_s_p_s[n * 18 + 14] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 4]);
324                         PRIM_INT__d_s_p_s[n * 18 + 14] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_p_s[n * 18 + 14]);
325                         PRIM_INT__d_s_p_s[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_p_s[n * 18 + 14]);
326 
327                         PRIM_INT__d_s_p_s[n * 18 + 15] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
328                         PRIM_INT__d_s_p_s[n * 18 + 15] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_p_s[n * 18 + 15]);
329 
330                         PRIM_INT__d_s_p_s[n * 18 + 16] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
331                         PRIM_INT__d_s_p_s[n * 18 + 16] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_p_s[n * 18 + 16]);
332 
333                         PRIM_INT__d_s_p_s[n * 18 + 17] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
334                         PRIM_INT__d_s_p_s[n * 18 + 17] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_p_s[n * 18 + 17]);
335                         PRIM_INT__d_s_p_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_p_s[n * 18 + 17]);
336 
337                     }
338 
339 
340 
341                     // Forming PRIM_INT__p_s_p_s[3 * 9];
342                     for(n = 0; n < 3; ++n)  // loop over orders of auxiliary function
343                     {
344 
345                         PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
346                         PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
347                         PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
348 
349                         PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
350                         PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 1]);
351 
352                         PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
353                         PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 2]);
354 
355                         PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_s_s[n * 3 + 1]);
356                         PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 3]);
357 
358                         PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
359                         PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 4]);
360                         PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 4]);
361 
362                         PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
363                         PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 5]);
364 
365                         PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_s_s[n * 3 + 2]);
366                         PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 6]);
367 
368                         PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_s_s[n * 3 + 2]);
369                         PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 7]);
370 
371                         PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
372                         PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 8]);
373                         PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 8]);
374 
375                     }
376 
377 
378                     VRR_K_d_s_d_s(
379                             PRIM_INT__d_s_d_s,
380                             PRIM_INT__d_s_p_s,
381                             PRIM_INT__d_s_s_s,
382                             PRIM_INT__p_s_p_s,
383                             Q_PA,
384                             a_over_q,
385                             aoq_PQ,
386                             one_over_2pq,
387                             one_over_2q,
388                             2);
389 
390 
391 
392                     // Forming PRIM_INT__s_s_p_s[3 * 3];
393                     for(n = 0; n < 3; ++n)  // loop over orders of auxiliary function
394                     {
395 
396                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
397                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]);
398 
399                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
400                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 1]);
401 
402                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
403                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 2]);
404 
405                     }
406 
407 
408 
409                     // Forming PRIM_INT__p_s_d_s[2 * 18];
410                     for(n = 0; n < 2; ++n)  // loop over orders of auxiliary function
411                     {
412 
413                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_p_s[n * 9 + 0]);
414                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_p_s[(n+1) * 9 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
415                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__p_s_d_s[n * 18 + 0]);
416                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
417 
418                         PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_p_s[n * 9 + 0]);
419                         PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_p_s[(n+1) * 9 + 0], PRIM_INT__p_s_d_s[n * 18 + 1]);
420 
421                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_p_s[n * 9 + 1]);
422                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_p_s[(n+1) * 9 + 1], PRIM_INT__p_s_d_s[n * 18 + 3]);
423                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__p_s_d_s[n * 18 + 3]);
424 
425                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_p_s[n * 9 + 2]);
426                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_p_s[(n+1) * 9 + 2], PRIM_INT__p_s_d_s[n * 18 + 5]);
427                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__p_s_d_s[n * 18 + 5]);
428 
429                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_p_s[n * 9 + 3]);
430                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_p_s[(n+1) * 9 + 3], PRIM_INT__p_s_d_s[n * 18 + 6]);
431                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__p_s_d_s[n * 18 + 6]);
432 
433                         PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_p_s[n * 9 + 3]);
434                         PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_p_s[(n+1) * 9 + 3], PRIM_INT__p_s_d_s[n * 18 + 7]);
435                         PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 7]);
436 
437                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_p_s[n * 9 + 4]);
438                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_p_s[(n+1) * 9 + 4], PRIM_INT__p_s_d_s[n * 18 + 9]);
439                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__p_s_d_s[n * 18 + 9]);
440                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 9]);
441 
442                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_p_s[n * 9 + 5]);
443                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_p_s[(n+1) * 9 + 5], PRIM_INT__p_s_d_s[n * 18 + 11]);
444                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__p_s_d_s[n * 18 + 11]);
445 
446                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_p_s[n * 9 + 6]);
447                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_p_s[(n+1) * 9 + 6], PRIM_INT__p_s_d_s[n * 18 + 12]);
448                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__p_s_d_s[n * 18 + 12]);
449 
450                         PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_p_s[n * 9 + 6]);
451                         PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_p_s[(n+1) * 9 + 6], PRIM_INT__p_s_d_s[n * 18 + 13]);
452 
453                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_p_s[n * 9 + 7]);
454                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_p_s[(n+1) * 9 + 7], PRIM_INT__p_s_d_s[n * 18 + 15]);
455                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__p_s_d_s[n * 18 + 15]);
456 
457                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_p_s[n * 9 + 8]);
458                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_p_s[(n+1) * 9 + 8], PRIM_INT__p_s_d_s[n * 18 + 17]);
459                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__p_s_d_s[n * 18 + 17]);
460                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 17]);
461 
462                     }
463 
464 
465                     VRR_K_d_s_f_s(
466                             PRIM_INT__d_s_f_s,
467                             PRIM_INT__d_s_d_s,
468                             PRIM_INT__d_s_p_s,
469                             PRIM_INT__p_s_d_s,
470                             Q_PA,
471                             a_over_q,
472                             aoq_PQ,
473                             one_over_2pq,
474                             one_over_2q,
475                             1);
476 
477 
478 
479                     // Forming PRIM_INT__f_s_s_s[4 * 10];
480                     for(n = 0; n < 4; ++n)  // loop over orders of auxiliary function
481                     {
482 
483                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
484                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
485                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
486 
487                         PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
488                         PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 1]);
489 
490                         PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
491                         PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 2]);
492 
493                         PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
494                         PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 3]);
495 
496                         PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
497                         PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__f_s_s_s[n * 10 + 4]);
498 
499                         PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
500                         PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 5]);
501 
502                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
503                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
504                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
505 
506                         PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
507                         PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 7]);
508 
509                         PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
510                         PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 8]);
511 
512                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
513                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
514                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
515 
516                     }
517 
518 
519                     VRR_K_f_s_p_s(
520                             PRIM_INT__f_s_p_s,
521                             PRIM_INT__f_s_s_s,
522                             PRIM_INT__d_s_s_s,
523                             Q_PA,
524                             aoq_PQ,
525                             one_over_2pq,
526                             3);
527 
528 
529                     VRR_K_f_s_d_s(
530                             PRIM_INT__f_s_d_s,
531                             PRIM_INT__f_s_p_s,
532                             PRIM_INT__f_s_s_s,
533                             PRIM_INT__d_s_p_s,
534                             Q_PA,
535                             a_over_q,
536                             aoq_PQ,
537                             one_over_2pq,
538                             one_over_2q,
539                             2);
540 
541 
542                     ostei_general_vrr_K(3, 0, 3, 0, 1,
543                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
544                             PRIM_INT__f_s_d_s, PRIM_INT__f_s_p_s, NULL, PRIM_INT__d_s_d_s, NULL, PRIM_INT__f_s_f_s);
545 
546 
547 
548 
549                     ////////////////////////////////////
550                     // Accumulate contracted integrals
551                     ////////////////////////////////////
552                     if(lastoffset == 0)
553                     {
554                         contract_all(60, PRIM_INT__d_s_f_s, PRIM_PTR_INT__d_s_f_s);
555                         contract_all(100, PRIM_INT__f_s_f_s, PRIM_PTR_INT__f_s_f_s);
556                     }
557                     else
558                     {
559                         contract(60, shelloffsets, PRIM_INT__d_s_f_s, PRIM_PTR_INT__d_s_f_s);
560                         contract(100, shelloffsets, PRIM_INT__f_s_f_s, PRIM_PTR_INT__f_s_f_s);
561                         PRIM_PTR_INT__d_s_f_s += lastoffset*60;
562                         PRIM_PTR_INT__f_s_f_s += lastoffset*100;
563                     }
564 
565                 }  // close loop over j
566             }  // close loop over i
567 
568             //Advance to the next batch
569             jstart = SIMINT_SIMD_ROUND(jend);
570 
571             //////////////////////////////////////////////
572             // Contracted integrals: Horizontal recurrance
573             //////////////////////////////////////////////
574 
575 
576             const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
577 
578 
579             for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
580             {
581 
582                 // set up HRR pointers
583                 double const * restrict HRR_INT__d_s_f_s = INT__d_s_f_s + abcd * 60;
584                 double const * restrict HRR_INT__f_s_f_s = INT__f_s_f_s + abcd * 100;
585                 double * restrict HRR_INT__d_p_f_s = INT__d_p_f_s + real_abcd * 180;
586 
587                 // form INT__d_p_f_s
588                 for(iket = 0; iket < 10; ++iket)
589                 {
590                     HRR_INT__d_p_f_s[0 * 10 + iket] = HRR_INT__f_s_f_s[0 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_f_s[0 * 10 + iket] );
591 
592                     HRR_INT__d_p_f_s[1 * 10 + iket] = HRR_INT__f_s_f_s[1 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_f_s[0 * 10 + iket] );
593 
594                     HRR_INT__d_p_f_s[2 * 10 + iket] = HRR_INT__f_s_f_s[2 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_f_s[0 * 10 + iket] );
595 
596                     HRR_INT__d_p_f_s[3 * 10 + iket] = HRR_INT__f_s_f_s[1 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_f_s[1 * 10 + iket] );
597 
598                     HRR_INT__d_p_f_s[4 * 10 + iket] = HRR_INT__f_s_f_s[3 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_f_s[1 * 10 + iket] );
599 
600                     HRR_INT__d_p_f_s[5 * 10 + iket] = HRR_INT__f_s_f_s[4 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_f_s[1 * 10 + iket] );
601 
602                     HRR_INT__d_p_f_s[6 * 10 + iket] = HRR_INT__f_s_f_s[2 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_f_s[2 * 10 + iket] );
603 
604                     HRR_INT__d_p_f_s[7 * 10 + iket] = HRR_INT__f_s_f_s[4 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_f_s[2 * 10 + iket] );
605 
606                     HRR_INT__d_p_f_s[8 * 10 + iket] = HRR_INT__f_s_f_s[5 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_f_s[2 * 10 + iket] );
607 
608                     HRR_INT__d_p_f_s[9 * 10 + iket] = HRR_INT__f_s_f_s[3 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_f_s[3 * 10 + iket] );
609 
610                     HRR_INT__d_p_f_s[10 * 10 + iket] = HRR_INT__f_s_f_s[6 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_f_s[3 * 10 + iket] );
611 
612                     HRR_INT__d_p_f_s[11 * 10 + iket] = HRR_INT__f_s_f_s[7 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_f_s[3 * 10 + iket] );
613 
614                     HRR_INT__d_p_f_s[12 * 10 + iket] = HRR_INT__f_s_f_s[4 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_f_s[4 * 10 + iket] );
615 
616                     HRR_INT__d_p_f_s[13 * 10 + iket] = HRR_INT__f_s_f_s[7 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_f_s[4 * 10 + iket] );
617 
618                     HRR_INT__d_p_f_s[14 * 10 + iket] = HRR_INT__f_s_f_s[8 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_f_s[4 * 10 + iket] );
619 
620                     HRR_INT__d_p_f_s[15 * 10 + iket] = HRR_INT__f_s_f_s[5 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_f_s[5 * 10 + iket] );
621 
622                     HRR_INT__d_p_f_s[16 * 10 + iket] = HRR_INT__f_s_f_s[8 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_f_s[5 * 10 + iket] );
623 
624                     HRR_INT__d_p_f_s[17 * 10 + iket] = HRR_INT__f_s_f_s[9 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_f_s[5 * 10 + iket] );
625 
626                 }
627 
628 
629 
630             }  // close HRR loop
631 
632 
633         }   // close loop cdbatch
634 
635         istart = iend;
636     }  // close loop over ab
637 
638     return P.nshell12_clip * Q.nshell12_clip;
639 }
640 
641