1 /* naive.c -- naive (quadratic) computation of theta-constants
2  *
3  * Copyright (C) 2006, 2011, 2012, 2013 INRIA
4  *
5  * This file is part of CMH.
6  *
7  * CMH is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License as published by the
9  * Free Software Foundation; either version 3 of the License, or (at your
10  * option) any later version.
11  *
12  * CMH is distributed in the hope that it will be useful, but WITHOUT ANY
13  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program. If not, see http://www.gnu.org/licenses/ .
19  */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <assert.h>
24 
25 #include <mpc.h>
26 
27 #include "params.h"
28 #include "macros.h"
29 #include "borchardt.h"
30 #include "naive.h"
31 #include "misc.h"
32 
33 static int
get_bound(mpc_t * t,int prec)34 get_bound (mpc_t *t, int prec)
35 {
36    mpfr_t a, b;
37    int p, R;
38 
39    p = cprec(t[0])+SEC_MARGIN;
40    finit(a, p);
41    finit(b, p);
42    /* We first compute the smallest eigenvalue of Im(t) */
43    fsub(b, MPC_IM(t[2]), MPC_IM(t[0]));
44    fmul(b, b, b);
45    fmul_2ui(a, MPC_IM(t[1]), 1);
46    fmul(a, a, a);
47    fadd(a, a, b);
48    fsqrt(a, a);
49    fneg(a, a);
50    fadd(a, a, MPC_IM(t[0]));
51    fadd(a, a, MPC_IM(t[2]));
52    fdiv_2ui(a, a, 2);
53 
54    /* We now want R such that R^2 >= 5(prec+5)/(23a) */
55    fmul_ui(a, a, 23);
56    fui_div(a, 5*(prec+5), a);
57 
58    R = 1;
59    if (!mpfr_number_p(a)) {
60       R=-1;
61    } else {
62       while ( fcmp_ui(a, R*R) > 0 )
63          R++;
64    }
65 
66    fclear(a);
67    fclear(b);
68    return R;
69 }
70 
71 
72 #ifdef  DEBUG_THETA_NAIVE
73 #define DPRINT(var, name, ...)						\
74             mpfr_printf(name ": %.12Rg+i*%.12Rg\n",			\
75                 ##__VA_ARGS__, mpc_realref(var), mpc_imagref(var))
76 #else
77 #define DPRINT(var, name, ...)  /**/
78 #endif
79 
80 #if 0
81 /* This can be dropped now */
82 void
83 eval_4theta_naive_orig (mpc_t *th, mpc_t *tau)
84 /* Input:
85  * tau represents the matrix [tau0, tau1; tau1, tau2] in the Siegel half space
86  * Output:
87  * th containing the four fundamental theta constants in tau
88  *
89  * Beware, the notation used in Dupont's thesis is [tau1,tau3,tau3,tau2]
90  * instead.
91  *
92  * The algorithm here follows closely the expression 10.1 on page 210 of
93  * Dupont's thesis. This does three multiplications in the inner loop,
94  * which gets executed a number of times which is linear in the desired
95  * precision N. The storage is proportional to sqrt(N).
96  *
97  * It is possible to do 2 mults only, with same storage, or 4 mults with
98  * constant storage.
99  */
100 {
101    int R_bound;
102    int prec, i, n0, n1;
103    mpfr_t pi;
104    mpc_t *qa, qc[3], qb[3], qbm[3], aux1[4], aux2[4], aux;
105 
106    /*
107     * AT THIS POINT, A FUNCTION COMPUTING R_bound SHOULD BE PLUGGED
108     * R_bound should be the floor of Sqrt((N+5)/9Im(tau_1))
109     */
110    R_bound = get_bound(tau, cprec(th[0]));
111    /* fprintf(stderr, "R_bound = %d\n", R_bound); */
112    assert (R_bound >= 0);
113 
114    prec = cprec(th[0])+SEC_MARGIN;
115    finit(pi, prec);
116    mpfr_const_pi(pi, GMP_RNDN);
117 
118    /* Memory allocation */
119    qa = (mpc_t *) malloc(R_bound*sizeof(mpc_t));
120    for (i=0; i<3; i++)
121    {
122       cinit(qb[i], prec);
123       cinit(qc[i], prec);
124       cinit(qbm[i], prec);
125    }
126    for (i=0; i<4; i++)
127    {
128       cinit(aux1[i], prec);
129       cinit(aux2[i], prec);
130    }
131    cinit(aux, prec);
132    for (i=0; i<R_bound; i++)
133       cinit(qa[i], prec);
134 
135    /* Computation of qa, qb, qc */
136    cmul_by_i (qa [0], tau [0]);
137    mpc_mul_fr(qa[0], qa[0], pi, MPC_RNDNN);
138    cexp(qa[0], qa[0]);
139 
140    cmul_by_i (qc [0], tau [2]);
141    mpc_mul_fr(qc[0], qc[0], pi, MPC_RNDNN);
142    cexp(qc[0], qc[0]);
143 
144    cmul_by_i (qb [0], tau [1]);
145    mpc_mul_fr(qb[0], qb[0], pi, MPC_RNDNN);
146    cmul_2ui(qb[0], qb[0], 1);
147    cexp(qb[0], qb[0]);
148 
149    DPRINT(qa[0], "q0");
150    DPRINT(qb[0], "q1^2");
151    DPRINT(qc[0], "q2");
152    /* qa[0] = exp(i*pi*tau0) */
153    /* qb[0] = exp(2i*pi*tau1) */
154    /* qc[0] = exp(i*pi*tau2) */
155 
156    cinv(qbm[0], qb[0]);
157    /* qbm[0] = exp(-2ipi*tau1) */
158 
159    /* Computation of the sq. powers of qa */
160    /* qb[1] and qbm[1] are used as temp values here */
161    csqr(qb[1], qa[0]);
162    cset(qbm[1], qa[0]);
163    for (i=1; i<R_bound; i++)
164    {
165       cmul(qbm[1], qbm[1], qb[1]);
166       cmul(qa[i], qa[i-1], qbm[1]);
167       DPRINT(qa[i], "q0^%d", (i+1)*(i+1));
168    }
169    /* qa[k-1] = exp(i*pi*k^2*tau0) */
170 
171 
172 
173    for (i=0; i<4; i++)
174       czero(aux1[i]);
175    for (n0=1; n0<=R_bound; n0++)
176    {
177       cadd(aux1[0], aux1[0], qa[n0-1]);
178       if (n0 % 2)
179          csub(aux1[1], aux1[1], qa[n0-1]);
180       else
181          cadd(aux1[1], aux1[1], qa[n0-1]);
182    }
183    cset(aux1[2], aux1[0]);
184    cset(aux1[3], aux1[1]);
185    /* aux1[0] = \sum_{k\geq1}        exp(i*pi*k^2*tau0)*/
186    /* aux1[1] = \sum_{k\geq1} (-1)^k exp(i*pi*k^2*tau0)*/
187    /* aux1[2] = \sum_{k\geq1}        exp(i*pi*k^2*tau0)*/
188    /* aux1[3] = \sum_{k\geq1} (-1)^k exp(i*pi*k^2*tau0)*/
189 
190    /* Initializing variables for the big loop... */
191    csqr(qc[1], qc[0]);  /* exp(i*pi*2*tau2) */
192    cset(qc[2], qc[0]);  /* exp(i*pi*(2n1+1)*tau2) */
193    cone(qb[1]);
194    cone(qbm[1]);
195 
196    DPRINT(aux1[0], "(Theta0-1)/2 at n1=0");
197    DPRINT(aux1[1], "(Theta1-1)/2 at n1=0");
198    DPRINT(aux1[2], "(Theta2-1)/2 at n1=0");
199    DPRINT(aux1[3], "(Theta3-1)/2 at n1=0");
200 
201    for (n1=1; n1<=R_bound; n1++)
202    {
203       /* First, qc is updated... */
204       if (n1 > 1)
205       {
206          cmul(qc[2], qc[2], qc[1]);     /* qc[2] = exp(i*pi*(2n1-1)*tau2) */
207          cmul(qc[0], qc[0], qc[2]);     /* qc[0] = exp(i*pi*(n1^2)*tau2) */
208       }
209       DPRINT(qc[0], "q2^%d", n1*n1);
210       cmul(qb[1], qb[1], qb[0]);        /* qb[1] = exp(2n1*i*pi*tau1) */
211       cmul(qbm[1], qbm[1], qbm[0]);     /* qbm[1] = exp(-2n1*i*pi*tau1) */
212       cone(qb[2]);
213       cone(qbm[2]);
214       for (i=0; i<4; i++)
215          czero(aux2[i]);
216       for (n0=1; n0<=R_bound; n0++)
217       {
218          cmul(qb[2], qb[2], qb[1]);      /* qb[2] = exp(2n0n1*i*pi*tau1) */
219          cmul(qbm[2], qbm[2], qbm[1]);   /* qbm[2] = exp(-2n0n1*i*pi*tau1) */
220          cadd(aux, qb[2], qbm[2]);
221          DPRINT(aux, "S_{%d*%d}", n1, n0);
222          cmul(aux, aux, qa[n0-1]);
223          /* aux2[0..3]: rightmost sum in the expression on the bottom of
224           * page 210 */
225          cadd(aux2[0], aux2[0], aux);
226          if (n0 % 2)
227             csub(aux2[1], aux2[1], aux);
228          else
229             cadd(aux2[1], aux2[1], aux);
230          if (n1 % 2)
231             csub(aux2[2], aux2[2], aux);
232          else
233             cadd(aux2[2], aux2[2], aux);
234          if ((n0+n1) % 2)
235             csub(aux2[3], aux2[3], aux);
236          else
237             cadd(aux2[3], aux2[3], aux);
238       }
239       /* Multiply the sum by exp(i*pi*n1^2*tau2) */
240       /* Modify aux1 to contain this new sum. aux1[] sums are computed
241        * piecewise, the part not depending on m is computed first.
242        */
243       for (i=0; i<4; i++)
244       {
245          cmul(aux2[i], aux2[i], qc[0]);
246          cadd(aux1[i], aux1[i], aux2[i]);
247       }
248       cadd(aux1[0], aux1[0], qc[0]);
249       cadd(aux1[1], aux1[1], qc[0]);
250       if (n1 % 2)
251       {
252          csub(aux1[2], aux1[2], qc[0]);
253          csub(aux1[3], aux1[3], qc[0]);
254       }
255       else {
256          cadd(aux1[2], aux1[2], qc[0]);
257          cadd(aux1[3], aux1[3], qc[0]);
258       }
259    }
260 
261    for (i=0; i<4; i++)
262    {
263       cmul_2ui(aux1[i], aux1[i], 1);
264       cadd_ui(th[i], aux1[i], 1);
265       cclear(aux1[i]);
266       cclear(aux2[i]);
267    }
268    for (i=0; i<3; i++)
269    {
270       cclear(qb[i]);
271       cclear(qbm[i]);
272       cclear(qc[i]);
273    }
274    for (i=0; i<R_bound; i++)
275       cclear(qa[i]);
276    free(qa);
277    cclear(aux);
278    fclear(pi);
279 }
280 #endif
281 
282 void
eval_4theta_naive(mpc_t * th,mpc_t * tau)283 eval_4theta_naive (mpc_t *th, mpc_t *tau)
284 {
285     /* Input: tau represents the matrix [tau0, tau1; tau1, tau2] in the
286      * Siegel half space
287      * Output: th containing the four fundamental theta constants in tau
288      */
289     /* This computes the theta constants by summing on a square [0,R]^2 */
290     int R = get_bound(tau, cprec(th[0]));
291     /* fprintf(stderr, "R_bound = %d\n", R); */
292 
293     int prec = cprec(th[0])+SEC_MARGIN;
294 
295     /* {{{ Need q_{0,1,2} first. Note that we are not inserting a 2 in the
296      * definition for q1 here, as is done in the function above. So qj is
297      * exactly exp(i*pi*tau_j)
298      */
299     mpc_t q[3];
300     {
301         mpfr_t pi;
302         finit(pi, prec);
303         mpfr_const_pi(pi, GMP_RNDN);
304 
305         for(int j = 0 ; j < 3 ; j++) {
306             cinit(q[j], prec);
307             cmul_by_i (q[j], tau [j]);
308             mpc_mul_fr(q[j], q[j], pi, MPC_RNDNN);
309             cexp(q[j], q[j]);
310             DPRINT(q[j], "q%d", j);
311         }
312 
313         fclear(pi);
314     } /* }}} */
315 
316     /* {{{ Set the initial value for the result, and allocate temps for
317      * intermediate sums. */
318     mpc_t rh[4];
319     for(int j = 0 ; j < 4 ; j++) {
320         cinit(rh[j], prec);
321         cone(th[j]);
322     } /* }}} */
323 
324     /* {{{ macro definition for update4 */
325     mpc_t tmp;  /* used internally by update4 */
326     cinit(tmp, prec);
327     /* This macro is used to update all four theta constants at once,
328      * given a specific summand x, at a given coordinate (n0,n1). The
329      * weight w must also be provided.
330      */
331 #define update4(dst,n0,n1,w,x)      do {				\
332         cmul_2ui(tmp, x, w);						\
333         for(int j = 0 ; j < 4 ; j++) {					\
334             int s = 0;							\
335             s += (n0) & j;						\
336             s += (n1) & (j>>1);						\
337             if (s & 1)							\
338                 csub(dst[j], dst[j], tmp);				\
339             else							\
340                 cadd(dst[j], dst[j], tmp);				\
341         }								\
342     } while (0)
343     /* }}} */
344 
345     /* {{{ Precompute a sublinear number of terms.  This helps us save one
346      * multiplication in the inner loop. */
347     mpc_t * q0;
348     {
349         q0 = malloc((R + 1) * sizeof(mpc_t));
350         for(int k = 0 ; k <= R ; k++) {
351             cinit(q0[k], prec);
352         }
353         cone(q0[0]);
354         cset(q0[1], q[0]);
355         mpc_t r0;       /* exp(i*pi*(2k-1)*tau_0) */
356         mpc_t qq0;      /* q[0]^2 */
357         cinit(r0, prec);
358         cinit(qq0, prec);
359         csqr(qq0,q[0]);
360         cmul(r0, qq0, q[0]);
361         for(int k = 1 ; k < R ; k++) {
362             cmul(q0[k+1], q0[k], r0);
363             cmul(r0, r0, qq0);
364             DPRINT(q0[k+1], "q0^%d", (k+1)*(k+1));
365         }
366         cclear(qq0);
367         cclear(r0);
368     } /* }}} */
369 
370 
371     /* We're summing over rows (0,k) to (R,k), for 1<=k<=R. The first row
372      * for k=0 is different, and computed from the precomputed
373      * q[0]^(k^2), which we do now.
374      */
375     for(int k = 1 ; k <= R ; k++) {
376         update4(th,k,0,1,q0[k]);
377     }
378     DPRINT(th[0], "Theta0 at n1=0");
379     DPRINT(th[1], "Theta1 at n1=0");
380     DPRINT(th[2], "Theta2 at n1=0");
381     DPRINT(th[3], "Theta3 at n1=0");
382 
383     /* For any k (which is the loop counter below)
384      * r2 is exp(i*pi*(2k+1)*tau_2)
385      * t2 is exp(i*pi*k^2*tau_2)
386      * qq2 is exp(i*pi*2*tau_2) (needed for updating r2)
387      */
388     mpc_t r2;
389     mpc_t t2;
390     mpc_t qq2;
391     cinit(t2, prec); cset(t2, q[2]);
392     cinit(qq2, prec); csqr(qq2, q[2]);
393     cinit(r2, prec); cmul(r2, t2, qq2);
394 
395     /* {{{ sums q1^(2k) + q1^(-2k)
396      *
397      * We maintain:
398      * s1 = q1^2 + q1^-2
399      * sk = q1^(2k) + q1^(-2k)
400      * skm = q1^(2k-2) + q1^(-2k-2)
401      *
402      * These can be updated by the shallow recurrence
403      * s_{k+1} = s_k*s_1 - s_{k-1}
404      *
405      * Initial value for sk is at k=1.
406      */
407     mpc_t s1, sk, skm;
408     mpc_t v1, vi, vim;
409     cinit(s1, prec);
410     cinit(sk, prec);
411     cinit(skm, prec);
412     cinit(v1, prec);
413     cinit(vi, prec);
414     cinit(vim, prec);
415     csqr(s1, q[1]);
416     cinv(sk, s1);
417     cadd(s1, s1, sk);
418     cset(sk, s1);
419     cset_ui(skm, 2);
420     /* }}} */
421 
422     mpc_t x;
423     cinit(x, prec);
424 
425     for(int k = 1 ; k <= R ; k++) {
426         /* Row (0,k) to (R, k)
427          * Term at (i,k) is q0^(i^2)*q2^(k^2)*(q1^(2*k*i)+q1^(-2*k*i)).
428          *
429          * We're factoring out the q2^(k^2) term, and accumulate in
430          * temporary variables rh[0] to rh[3].
431          *
432          * Beyond that, let v_i = (q1^(2*k*i)+q1^(-2*k*i)). We have v0=2,
433          * v1=sk, and the typical recurrence v_{i+1} = v_i*v_1 - v_{i-1}
434          * (same as satisfied by s_k).
435          */
436         /* First term is q2^(k^2), but as said above we multiply by this
437          * only in the end */
438         for(int j = 0 ; j < 4 ; j++) czero(rh[j]);
439         cone(x);
440         update4(rh, 0, k, 1, x);
441         /* Set v1, vi, vim */
442         cset_ui(vim, 2);
443         cset(vi, sk);
444         cset(v1, sk);
445         for(int i = 1 ; i < R ; i++) { /* {{{ inner loop */
446             DPRINT(vi, "S_{%d*%d}", k, i);
447             cmul(x, q0[i], vi);
448             update4(rh, i, k, 1, x);
449             /* Update vi */
450             cmul(tmp, vi, v1);
451             csub(vim, tmp, vim);
452             cswap(vi, vim);
453         } /* }}} */
454         /* {{{ last term in column: (k,R) */
455         DPRINT(vi, "S_{%d*%d}", k, R);
456         cmul(x, q0[R], vi);
457         update4(rh, R, k, 1, x);
458         /* }}} */
459         /* {{{ Now multiply by common q2^(k^2), and accumulate */
460         DPRINT(t2, "q2^%d", k*k);
461         for(int j = 0 ; j < 4 ; j++) {
462             cmul(rh[j], rh[j], t2);
463             cadd(th[j], th[j], rh[j]);
464         } /* }}} */
465         /* {{{ Update t2 = q2^(k^2) */
466         cmul(t2, t2, r2);
467         cmul(r2, r2, qq2);
468         /* }}} */
469         /* {{{ Update sk */
470         cmul(tmp, sk, s1);
471         csub(skm, tmp, skm);
472         cswap(sk, skm);
473         /* }}} */
474     }
475     /* {{{ clean up the mess */
476     cclear(x);
477     for(int k = 0 ; k <= R ; k++) {
478         cclear(q0[k]);
479     }
480     free(q0);
481     for(int j = 0 ; j < 3 ; j++) {
482         cclear(q[j]);
483     }
484     for(int j = 0 ; j < 4 ; j++) {
485         cclear(rh[j]);
486     }
487     cclear(qq2);
488     cclear(r2);
489     cclear(t2);
490     cclear(s1); cclear(sk); cclear(skm);
491     cclear(v1); cclear(vi); cclear(vim);
492     cclear(tmp);
493     /* }}} */
494 }
495 
496 void
eval_3theta2q_naive(mpc_t * b,mpc_t * tau)497 eval_3theta2q_naive (mpc_t *b, mpc_t *tau)
498 /* Input:
499  * tau represents the matrix [tau0, tau1; tau1, tau2] in the Siegel half space
500  * Output:
501  * b containing the squares of three fundamental theta quotients in tau
502  */
503 {
504    int prec;
505    mpc_t th[4];
506    int i;
507 
508    prec = cprec(b[0])+SEC_MARGIN;
509    for (i=0; i<4; i++)
510       cinit(th[i], prec);
511    eval_4theta_naive(th, tau);
512    cinv(th[0], th[0]);
513    for (i=1; i<4; i++) {
514       cmul(th[i], th[i], th[0]);
515       csqr(b[i-1], th[i]);
516    }
517    for (i=0; i<4; i++)
518       cclear(th[i]);
519 
520    return;
521 }
522 
523 void
eval_10theta2_naive(mpc_t * th2,mpc_t * tau)524 eval_10theta2_naive (mpc_t *th2, mpc_t *tau)
525 /* Input:
526    tau represents the matrix [tau0, tau1; tau1, tau2] in the Siegel half space
527    Output:
528    th2 containing the squares of the ten even theta functions in tau
529 */
530 {
531    mpc_t th4[4];
532    mpc_t th8[4];
533    mpc_t aux;
534    int i;
535 
536    mpfr_prec_t prec = cprec (th2 [0]);
537 
538    for (i=0; i<4; i++) {
539       cinit(th4[i], prec);
540       cinit(th8[i], prec);
541    }
542    cinit(aux, prec);
543 
544    /* evaluate 4 thetas in tau/2 */
545    for (i=0; i<3; i++)
546       cdiv_2ui(th8[i], tau[i], 1);
547    eval_4theta_naive(th4, th8);
548 
549    get_10theta2_from_4thetatauhalf (th2, th4);
550 
551    for (i=0; i<4; i++) {
552       cclear(th4[i]);
553       cclear(th8[i]);
554    }
555    cclear(aux);
556 }
557 
558 
559 /* This is a helper function. In order to obtain theta^2_{0,4,8,12}(tau),
560  * it suffices to know theta^2_{0,1,2,3}(tau/2). In applications, these
561  * might happen to be known from external sources. On the other hand,
562  * theta^2_{0,1,2,3}(tau/2) are not needed for the computation of the
563  * other even theta^2 at tau. So we split the computation in two.
564  *
565  * As per the even_thetas_ix array, this function will assign to elements
566  * numbered 0,4,6,8 in the destination array.
567  *
568  * This is the generic function. It's in both cases, whether we want the
569  * partial derivatives or not. dth may be passed as NULL to indicate that
570  * derivatives are not desired.
571  */
572 static void
get_4theta2_048c_from_4theta2tauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th8[4],mpc_t dth8[4][3])573 get_4theta2_048c_from_4theta2tauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th8[4], mpc_t dth8[4][3])
574 {
575 #define T(i)    th2[even_thetas_ix[i]]
576 #define dT(i,j)    dth2[even_thetas_ix[i]][j]
577 #define DO(idx, op1, op2, op3) do {					\
578    op1(T(idx), th8[0], th8[1]);						\
579    op2(T(idx), T(idx), th8[2]);						\
580    op3(T(idx), T(idx), th8[3]);						\
581    cdiv_2ui(T(idx), T(idx), 2);						\
582    if (dth2) for(int j = 0 ; j < 3 ; j++) {				\
583        op1(dT(idx,j), dth8[0][j], dth8[1][j]);				\
584        op2(dT(idx,j), dT(idx,j), dth8[2][j]);				\
585        op3(dT(idx,j), dT(idx,j), dth8[3][j]);				\
586        cdiv_2ui(dT(idx,j), dT(idx,j), 2);				\
587    } } while (0)
588    DO(0,  cadd, cadd, cadd);
589    DO(4,  csub, cadd, csub);
590    DO(8,  cadd, csub, csub);
591    DO(12, csub, csub, cadd);
592 #undef DO
593 #undef T
594 #undef dT
595 }
596 
597 /* Input: theta_{1,2,3}(Omega/2)^2/x    (e.g. with x=theta_0(Omega/2)^2)
598  * Output: theta_{0,4,8,12}(Omega)^2/x and the derivatives
599  */
600 static void
get_4theta2x_048c_from_3theta2qtauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th8[3],mpc_t dth8[3][3])601 get_4theta2x_048c_from_3theta2qtauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th8[3], mpc_t dth8[3][3])
602 {
603 #define T(i)    th2[even_thetas_ix[i]]
604 #define dT(i,j)    dth2[even_thetas_ix[i]][j]
605 #define DO(idx, op1x, op2, op3) do {					\
606    op1x(T(idx), th8[1-1]);					        \
607    cadd_ui(T(idx), T(idx), 1);                                          \
608    op2(T(idx), T(idx), th8[2-1]);					\
609    op3(T(idx), T(idx), th8[3-1]);					\
610    cdiv_2ui(T(idx), T(idx), 2);						\
611    if (dth2) for(int j = 0 ; j < 3 ; j++) {				\
612        op1x(dT(idx,j), dth8[1-1][j]);				        \
613        op2(dT(idx,j), dT(idx,j), dth8[2-1][j]);				\
614        op3(dT(idx,j), dT(idx,j), dth8[3-1][j]);				\
615        cdiv_2ui(dT(idx,j), dT(idx,j), 2);				\
616    } } while (0)
617    DO(0,  cset, cadd, cadd);
618    DO(4,  cneg, cadd, csub);
619    DO(8,  cset, csub, csub);
620    DO(12, cneg, csub, cadd);
621 #undef DO
622 #undef T
623 #undef dT
624 }
625 
626 #if 0
627 static void
628 get_4theta2_048c_from_4theta2tauhalf(mpc_t th2[10], mpc_t th8[4])
629 {
630     get_4theta2_048c_from_4theta2tauhalf_diff(th2, NULL, th8, NULL);
631 }
632 #endif
633 
634 /* Given (theta_{1,2,3}^2(tau/2))/theta_0^2(tau/2), return
635  * (theta_{0,4,8,12}^2(tau))/theta_0^2(tau/2).
636  */
637 static void
get_4theta2x_048c_from_3theta2qtauhalf(mpc_t th2[10],mpc_t th8[3])638 get_4theta2x_048c_from_3theta2qtauhalf(mpc_t th2[10], mpc_t th8[3])
639 {
640     get_4theta2x_048c_from_3theta2qtauhalf_diff(th2, NULL, th8, NULL);
641 }
642 
643 static void
get_6theta2_12369f_from_4thetatauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th[4],mpc_t dth[4][3])644 get_6theta2_12369f_from_4thetatauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th[4], mpc_t dth[4][3])
645 {
646    mpc_t aux;
647    mpfr_prec_t prec = cprec (th2 [0]);
648    cinit(aux, prec);
649 
650 #define T(i)    th2[even_thetas_ix[i]]
651 #define dT(i,j)    dth2[even_thetas_ix[i]][j]
652 
653 #define DO(x,y,u,v,w,z)							\
654    cmul(T(x), th[u], th[v]);						\
655    cmul(aux, th[w], th[z]);						\
656    csub(T(y), T(x), aux);						\
657    cadd(T(x), T(x), aux);						\
658    cdiv_2ui(T(y), T(y), 1);						\
659    cdiv_2ui(T(x), T(x), 1);						\
660    if (dth2) for(int j=0; j<3; j++) {					\
661        cmul(dT(x,j), th[u], dth[v][j]);					\
662        cmul(aux, th[v], dth[u][j]);					\
663        cadd(dT(x,j), dT(x,j), aux);					\
664        cset(dT(y,j), dT(x,j));						\
665        cmul(aux, th[w], dth[z][j]);					\
666        cadd(dT(x,j), dT(x,j), aux);					\
667        csub(dT(y,j), dT(y,j), aux);					\
668        cmul(aux, th[z], dth[w][j]);					\
669        cadd(dT(x,j), dT(x,j), aux);					\
670        csub(dT(y,j), dT(y,j), aux);					\
671        cdiv_2ui(dT(x,j), dT(x,j), 1);					\
672        cdiv_2ui(dT(y,j), dT(y,j), 1);					\
673    }
674    DO(1,9,0,1,2,3);
675    DO(2,6,0,2,1,3);
676    DO(3,15,0,3,1,2);
677 #undef  DO
678 #undef T
679 #undef dT
680    cclear(aux);
681 }
682 
683 /* This is a hack, which saves some multiplications */
684 /* Given (theta_{1,2,3}(tau/2))/theta_0(tau/2), return
685  * (theta_{1,2,3,6,9,15}^2(tau))/theta_0^2(tau/2).
686  *      and the derivatives
687  *
688  * FIXME: I don't thing we ever use the case where this functions needs
689  * input derivatives other than with respect to its own input variables,
690  * which means dth[i][j]=(i==j). We should use it to cut down on the
691  * number of operations.
692  */
693 static void
get_6theta2x_12369f_from_3thetaqtauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th[3],mpc_t dth[3][3])694 get_6theta2x_12369f_from_3thetaqtauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th[3], mpc_t dth[3][3])
695 {
696    mpc_t aux;
697    mpfr_prec_t prec = cprec (th2 [0]);
698    cinit(aux, prec);
699 
700 #define T(i)    th2[even_thetas_ix[i]]
701 #define dT(i,j)    dth2[even_thetas_ix[i]][j]
702 
703 #define DO(x,y,u,v,w,z)							\
704    cset(T(x), th[v-1]);				        		\
705    cmul(aux, th[w-1], th[z-1]);						\
706    csub(T(y), T(x), aux);						\
707    cadd(T(x), T(x), aux);						\
708    cdiv_2ui(T(y), T(y), 1);						\
709    cdiv_2ui(T(x), T(x), 1);						\
710    if (dth2) for(int j=0; j<3; j++) {					\
711        cset(dT(x,j), dth[v-1][j]);					\
712        cset(dT(y,j), dth[v-1][j]);	                        	\
713        cmul(aux, th[w-1], dth[z-1][j]);					\
714        cadd(dT(x,j), dT(x,j), aux);					\
715        csub(dT(y,j), dT(y,j), aux);					\
716        cmul(aux, th[z-1], dth[w-1][j]);					\
717        cadd(dT(x,j), dT(x,j), aux);					\
718        csub(dT(y,j), dT(y,j), aux);					\
719        cdiv_2ui(dT(x,j), dT(x,j), 1);					\
720        cdiv_2ui(dT(y,j), dT(y,j), 1);					\
721    }
722    DO(1,9,0,1,2,3);
723    DO(2,6,0,2,1,3);
724    DO(3,15,0,3,1,2);
725 #undef  DO
726 #undef T
727 #undef dT
728    cclear(aux);
729 }
730 
731 #if 0
732 static void
733 get_6theta2_12369f_from_4thetatauhalf(mpc_t th2[10], mpc_t th[4])
734 {
735     get_6theta2_12369f_from_4thetatauhalf_diff(th2, NULL, th, NULL);
736 }
737 #endif
738 
739 /* Given (theta_{1,2,3}(tau/2))/theta_0(tau/2), return
740  * (theta_{1,2,3,6,9,15}^2(tau))/theta_0(tau/2).
741  */
742 static void
get_6theta2x_12369f_from_3thetaqtauhalf(mpc_t th2[10],mpc_t th[3])743 get_6theta2x_12369f_from_3thetaqtauhalf(mpc_t th2[10], mpc_t th[3])
744 {
745     get_6theta2x_12369f_from_3thetaqtauhalf_diff(th2, NULL, th, NULL);
746 }
747 
748 /* FIXME: I don't thing we ever use the case where this functions needs
749  * input derivatives other than with respect to its own input variables,
750  * which means dth[i][j]=(i==j). We should use it to cut down on the
751  * number of operations.
752  */
753 void
get_10theta2x_from_3thetaqtauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th[3],mpc_t dth[3][3])754 get_10theta2x_from_3thetaqtauhalf_diff(mpc_t th2[10], mpc_t dth2[10][3], mpc_t th[3], mpc_t dth[3][3])
755 {
756     mpc_t thq[3], dthq[3][3];
757     for(int i = 0 ; i < 3 ; i++) {
758         cinit(thq[i], cprec(th2[i]));
759         csqr(thq[i], th[i]);
760         if (dth2) {
761             for(int j = 0 ; j < 3 ; j++) {
762                 cinit(dthq[i][j], cprec(th2[i]));
763                 cmul(dthq[i][j], th[i], dth[i][j]);
764                 cmul_2ui(dthq[i][j], dthq[i][j], 1);
765             }
766         }
767     }
768     get_4theta2x_048c_from_3theta2qtauhalf_diff(th2, dth2, thq, dthq);
769     for(int i = 0 ; i < 3 ; i++) {
770         cclear(thq[i]);
771         if (dth2) {
772             for(int j = 0 ; j < 3 ; j++) {
773                 cclear(dthq[i][j]);
774             }
775         }
776     }
777     get_6theta2x_12369f_from_3thetaqtauhalf_diff(th2, dth2, th, dth);
778 }
779 
780 void
get_10theta2x_from_3thetaqtauhalf(mpc_t th2[10],mpc_t th[3])781 get_10theta2x_from_3thetaqtauhalf(mpc_t th2[10], mpc_t th[3])
782 {
783     mpc_t thq[3];
784     for(int i = 0 ; i < 3 ; i++) {
785         cinit(thq[i], cprec(th2[i]));
786         csqr(thq[i], th[i]);
787     }
788     get_4theta2x_048c_from_3theta2qtauhalf(th2, thq);
789     get_6theta2x_12369f_from_3thetaqtauhalf(th2, th);
790     for(int i = 0 ; i < 3 ; i++) {
791         cclear(thq[i]);
792     }
793 }
794 
795 void
get_10theta2_from_4thetatauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th[4],mpc_t dth[4][3])796 get_10theta2_from_4thetatauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th[4], mpc_t dth[4][3])
797 /* Input:
798  * theta_0, ..., theta_3 evaluated in some tau/2
799  * Their gradients with respect to three variables x1, x2, x3
800  * Output:
801  * theta_0^2, ..., theta_15^2 evaluated in tau
802  * Their gradients with respect to x1, x2, x3
803  *
804  * The even thetas are stored in th2[0..9] following the indexing table
805  * even_thetas_ix
806  *
807  * NOTE: To gain a few cycles, it may be advantageous to call the two
808  * back-end functions get_4theta2_048c_from_4theta2tauhalf_diff and
809  * get_6theta2_12369f_from_4thetatauhalf_diff separately.
810  */
811 {
812     /* th8: 4 fundamental theta-squares at tau/2 */
813    mpc_t th8[4];
814    mpc_t dth8[4][3];
815    mpfr_prec_t prec = cprec (th2 [0]);
816    for (int i=0; i<4; i++) {
817       cinit(th8[i], prec);
818       if (dth) for(int j = 0; j < 3 ; j++) {
819           cinit(dth8[i][j], prec);
820       }
821    }
822    for (int i=0; i<4; i++) {
823       csqr(th8[i], th[i]);
824       if (dth) for(int j = 0; j < 3 ; j++) {
825           cmul(dth8[i][j], th8[i], dth[i][j]);
826           cmul_2ui(dth8[i][j], dth8[i][j], 1);
827       }
828    }
829 
830    get_4theta2_048c_from_4theta2tauhalf_diff(th2, dth2, th8, dth ? dth8 : NULL);
831    get_6theta2_12369f_from_4thetatauhalf_diff(th2, dth2, th, dth);
832 
833    for (int i=0; i<4; i++)
834       cclear(th8[i]);
835 }
836 
837 void
get_10theta2_from_4thetatauhalf(mpc_t * th2,mpc_t * th)838 get_10theta2_from_4thetatauhalf (mpc_t *th2, mpc_t *th)
839 /* Input:
840  * theta_0, ..., theta_3 evaluated in some tau/2
841  * Output:
842  * theta_0^2, ..., theta_15^2 evaluated in tau
843  *
844  * The even thetas are stored in th2[0..9] following the indexing table
845  * even_thetas_ix
846  *
847  * NOTE: To gain a few cycles, it may be advantageous to call the two
848  * back-end functions get_4theta2_048c_from_4theta2tauhalf and
849  * get_6theta2_12369f_from_4thetatauhalf separately.
850  */
851 {
852     get_10theta2_from_4thetatauhalf_diff(th2, NULL, th, NULL);
853 }
854 
855