1 /* naive.c -- naive (quadratic) computation of theta-constants
2 *
3 * Copyright (C) 2006, 2011, 2012, 2013 INRIA
4 *
5 * This file is part of CMH.
6 *
7 * CMH is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 3 of the License, or (at your
10 * option) any later version.
11 *
12 * CMH is distributed in the hope that it will be useful, but WITHOUT ANY
13 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see http://www.gnu.org/licenses/ .
19 */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <assert.h>
24
25 #include <mpc.h>
26
27 #include "params.h"
28 #include "macros.h"
29 #include "borchardt.h"
30 #include "naive.h"
31 #include "misc.h"
32
33 static int
get_bound(mpc_t * t,int prec)34 get_bound (mpc_t *t, int prec)
35 {
36 mpfr_t a, b;
37 int p, R;
38
39 p = cprec(t[0])+SEC_MARGIN;
40 finit(a, p);
41 finit(b, p);
42 /* We first compute the smallest eigenvalue of Im(t) */
43 fsub(b, MPC_IM(t[2]), MPC_IM(t[0]));
44 fmul(b, b, b);
45 fmul_2ui(a, MPC_IM(t[1]), 1);
46 fmul(a, a, a);
47 fadd(a, a, b);
48 fsqrt(a, a);
49 fneg(a, a);
50 fadd(a, a, MPC_IM(t[0]));
51 fadd(a, a, MPC_IM(t[2]));
52 fdiv_2ui(a, a, 2);
53
54 /* We now want R such that R^2 >= 5(prec+5)/(23a) */
55 fmul_ui(a, a, 23);
56 fui_div(a, 5*(prec+5), a);
57
58 R = 1;
59 if (!mpfr_number_p(a)) {
60 R=-1;
61 } else {
62 while ( fcmp_ui(a, R*R) > 0 )
63 R++;
64 }
65
66 fclear(a);
67 fclear(b);
68 return R;
69 }
70
71
72 #ifdef DEBUG_THETA_NAIVE
73 #define DPRINT(var, name, ...) \
74 mpfr_printf(name ": %.12Rg+i*%.12Rg\n", \
75 ##__VA_ARGS__, mpc_realref(var), mpc_imagref(var))
76 #else
77 #define DPRINT(var, name, ...) /**/
78 #endif
79
80 #if 0
81 /* This can be dropped now */
82 void
83 eval_4theta_naive_orig (mpc_t *th, mpc_t *tau)
84 /* Input:
85 * tau represents the matrix [tau0, tau1; tau1, tau2] in the Siegel half space
86 * Output:
87 * th containing the four fundamental theta constants in tau
88 *
89 * Beware, the notation used in Dupont's thesis is [tau1,tau3,tau3,tau2]
90 * instead.
91 *
92 * The algorithm here follows closely the expression 10.1 on page 210 of
93 * Dupont's thesis. This does three multiplications in the inner loop,
94 * which gets executed a number of times which is linear in the desired
95 * precision N. The storage is proportional to sqrt(N).
96 *
97 * It is possible to do 2 mults only, with same storage, or 4 mults with
98 * constant storage.
99 */
100 {
101 int R_bound;
102 int prec, i, n0, n1;
103 mpfr_t pi;
104 mpc_t *qa, qc[3], qb[3], qbm[3], aux1[4], aux2[4], aux;
105
106 /*
107 * AT THIS POINT, A FUNCTION COMPUTING R_bound SHOULD BE PLUGGED
108 * R_bound should be the floor of Sqrt((N+5)/9Im(tau_1))
109 */
110 R_bound = get_bound(tau, cprec(th[0]));
111 /* fprintf(stderr, "R_bound = %d\n", R_bound); */
112 assert (R_bound >= 0);
113
114 prec = cprec(th[0])+SEC_MARGIN;
115 finit(pi, prec);
116 mpfr_const_pi(pi, GMP_RNDN);
117
118 /* Memory allocation */
119 qa = (mpc_t *) malloc(R_bound*sizeof(mpc_t));
120 for (i=0; i<3; i++)
121 {
122 cinit(qb[i], prec);
123 cinit(qc[i], prec);
124 cinit(qbm[i], prec);
125 }
126 for (i=0; i<4; i++)
127 {
128 cinit(aux1[i], prec);
129 cinit(aux2[i], prec);
130 }
131 cinit(aux, prec);
132 for (i=0; i<R_bound; i++)
133 cinit(qa[i], prec);
134
135 /* Computation of qa, qb, qc */
136 cmul_by_i (qa [0], tau [0]);
137 mpc_mul_fr(qa[0], qa[0], pi, MPC_RNDNN);
138 cexp(qa[0], qa[0]);
139
140 cmul_by_i (qc [0], tau [2]);
141 mpc_mul_fr(qc[0], qc[0], pi, MPC_RNDNN);
142 cexp(qc[0], qc[0]);
143
144 cmul_by_i (qb [0], tau [1]);
145 mpc_mul_fr(qb[0], qb[0], pi, MPC_RNDNN);
146 cmul_2ui(qb[0], qb[0], 1);
147 cexp(qb[0], qb[0]);
148
149 DPRINT(qa[0], "q0");
150 DPRINT(qb[0], "q1^2");
151 DPRINT(qc[0], "q2");
152 /* qa[0] = exp(i*pi*tau0) */
153 /* qb[0] = exp(2i*pi*tau1) */
154 /* qc[0] = exp(i*pi*tau2) */
155
156 cinv(qbm[0], qb[0]);
157 /* qbm[0] = exp(-2ipi*tau1) */
158
159 /* Computation of the sq. powers of qa */
160 /* qb[1] and qbm[1] are used as temp values here */
161 csqr(qb[1], qa[0]);
162 cset(qbm[1], qa[0]);
163 for (i=1; i<R_bound; i++)
164 {
165 cmul(qbm[1], qbm[1], qb[1]);
166 cmul(qa[i], qa[i-1], qbm[1]);
167 DPRINT(qa[i], "q0^%d", (i+1)*(i+1));
168 }
169 /* qa[k-1] = exp(i*pi*k^2*tau0) */
170
171
172
173 for (i=0; i<4; i++)
174 czero(aux1[i]);
175 for (n0=1; n0<=R_bound; n0++)
176 {
177 cadd(aux1[0], aux1[0], qa[n0-1]);
178 if (n0 % 2)
179 csub(aux1[1], aux1[1], qa[n0-1]);
180 else
181 cadd(aux1[1], aux1[1], qa[n0-1]);
182 }
183 cset(aux1[2], aux1[0]);
184 cset(aux1[3], aux1[1]);
185 /* aux1[0] = \sum_{k\geq1} exp(i*pi*k^2*tau0)*/
186 /* aux1[1] = \sum_{k\geq1} (-1)^k exp(i*pi*k^2*tau0)*/
187 /* aux1[2] = \sum_{k\geq1} exp(i*pi*k^2*tau0)*/
188 /* aux1[3] = \sum_{k\geq1} (-1)^k exp(i*pi*k^2*tau0)*/
189
190 /* Initializing variables for the big loop... */
191 csqr(qc[1], qc[0]); /* exp(i*pi*2*tau2) */
192 cset(qc[2], qc[0]); /* exp(i*pi*(2n1+1)*tau2) */
193 cone(qb[1]);
194 cone(qbm[1]);
195
196 DPRINT(aux1[0], "(Theta0-1)/2 at n1=0");
197 DPRINT(aux1[1], "(Theta1-1)/2 at n1=0");
198 DPRINT(aux1[2], "(Theta2-1)/2 at n1=0");
199 DPRINT(aux1[3], "(Theta3-1)/2 at n1=0");
200
201 for (n1=1; n1<=R_bound; n1++)
202 {
203 /* First, qc is updated... */
204 if (n1 > 1)
205 {
206 cmul(qc[2], qc[2], qc[1]); /* qc[2] = exp(i*pi*(2n1-1)*tau2) */
207 cmul(qc[0], qc[0], qc[2]); /* qc[0] = exp(i*pi*(n1^2)*tau2) */
208 }
209 DPRINT(qc[0], "q2^%d", n1*n1);
210 cmul(qb[1], qb[1], qb[0]); /* qb[1] = exp(2n1*i*pi*tau1) */
211 cmul(qbm[1], qbm[1], qbm[0]); /* qbm[1] = exp(-2n1*i*pi*tau1) */
212 cone(qb[2]);
213 cone(qbm[2]);
214 for (i=0; i<4; i++)
215 czero(aux2[i]);
216 for (n0=1; n0<=R_bound; n0++)
217 {
218 cmul(qb[2], qb[2], qb[1]); /* qb[2] = exp(2n0n1*i*pi*tau1) */
219 cmul(qbm[2], qbm[2], qbm[1]); /* qbm[2] = exp(-2n0n1*i*pi*tau1) */
220 cadd(aux, qb[2], qbm[2]);
221 DPRINT(aux, "S_{%d*%d}", n1, n0);
222 cmul(aux, aux, qa[n0-1]);
223 /* aux2[0..3]: rightmost sum in the expression on the bottom of
224 * page 210 */
225 cadd(aux2[0], aux2[0], aux);
226 if (n0 % 2)
227 csub(aux2[1], aux2[1], aux);
228 else
229 cadd(aux2[1], aux2[1], aux);
230 if (n1 % 2)
231 csub(aux2[2], aux2[2], aux);
232 else
233 cadd(aux2[2], aux2[2], aux);
234 if ((n0+n1) % 2)
235 csub(aux2[3], aux2[3], aux);
236 else
237 cadd(aux2[3], aux2[3], aux);
238 }
239 /* Multiply the sum by exp(i*pi*n1^2*tau2) */
240 /* Modify aux1 to contain this new sum. aux1[] sums are computed
241 * piecewise, the part not depending on m is computed first.
242 */
243 for (i=0; i<4; i++)
244 {
245 cmul(aux2[i], aux2[i], qc[0]);
246 cadd(aux1[i], aux1[i], aux2[i]);
247 }
248 cadd(aux1[0], aux1[0], qc[0]);
249 cadd(aux1[1], aux1[1], qc[0]);
250 if (n1 % 2)
251 {
252 csub(aux1[2], aux1[2], qc[0]);
253 csub(aux1[3], aux1[3], qc[0]);
254 }
255 else {
256 cadd(aux1[2], aux1[2], qc[0]);
257 cadd(aux1[3], aux1[3], qc[0]);
258 }
259 }
260
261 for (i=0; i<4; i++)
262 {
263 cmul_2ui(aux1[i], aux1[i], 1);
264 cadd_ui(th[i], aux1[i], 1);
265 cclear(aux1[i]);
266 cclear(aux2[i]);
267 }
268 for (i=0; i<3; i++)
269 {
270 cclear(qb[i]);
271 cclear(qbm[i]);
272 cclear(qc[i]);
273 }
274 for (i=0; i<R_bound; i++)
275 cclear(qa[i]);
276 free(qa);
277 cclear(aux);
278 fclear(pi);
279 }
280 #endif
281
282 void
eval_4theta_naive(mpc_t * th,mpc_t * tau)283 eval_4theta_naive (mpc_t *th, mpc_t *tau)
284 {
285 /* Input: tau represents the matrix [tau0, tau1; tau1, tau2] in the
286 * Siegel half space
287 * Output: th containing the four fundamental theta constants in tau
288 */
289 /* This computes the theta constants by summing on a square [0,R]^2 */
290 int R = get_bound(tau, cprec(th[0]));
291 /* fprintf(stderr, "R_bound = %d\n", R); */
292
293 int prec = cprec(th[0])+SEC_MARGIN;
294
295 /* {{{ Need q_{0,1,2} first. Note that we are not inserting a 2 in the
296 * definition for q1 here, as is done in the function above. So qj is
297 * exactly exp(i*pi*tau_j)
298 */
299 mpc_t q[3];
300 {
301 mpfr_t pi;
302 finit(pi, prec);
303 mpfr_const_pi(pi, GMP_RNDN);
304
305 for(int j = 0 ; j < 3 ; j++) {
306 cinit(q[j], prec);
307 cmul_by_i (q[j], tau [j]);
308 mpc_mul_fr(q[j], q[j], pi, MPC_RNDNN);
309 cexp(q[j], q[j]);
310 DPRINT(q[j], "q%d", j);
311 }
312
313 fclear(pi);
314 } /* }}} */
315
316 /* {{{ Set the initial value for the result, and allocate temps for
317 * intermediate sums. */
318 mpc_t rh[4];
319 for(int j = 0 ; j < 4 ; j++) {
320 cinit(rh[j], prec);
321 cone(th[j]);
322 } /* }}} */
323
324 /* {{{ macro definition for update4 */
325 mpc_t tmp; /* used internally by update4 */
326 cinit(tmp, prec);
327 /* This macro is used to update all four theta constants at once,
328 * given a specific summand x, at a given coordinate (n0,n1). The
329 * weight w must also be provided.
330 */
331 #define update4(dst,n0,n1,w,x) do { \
332 cmul_2ui(tmp, x, w); \
333 for(int j = 0 ; j < 4 ; j++) { \
334 int s = 0; \
335 s += (n0) & j; \
336 s += (n1) & (j>>1); \
337 if (s & 1) \
338 csub(dst[j], dst[j], tmp); \
339 else \
340 cadd(dst[j], dst[j], tmp); \
341 } \
342 } while (0)
343 /* }}} */
344
345 /* {{{ Precompute a sublinear number of terms. This helps us save one
346 * multiplication in the inner loop. */
347 mpc_t * q0;
348 {
349 q0 = malloc((R + 1) * sizeof(mpc_t));
350 for(int k = 0 ; k <= R ; k++) {
351 cinit(q0[k], prec);
352 }
353 cone(q0[0]);
354 cset(q0[1], q[0]);
355 mpc_t r0; /* exp(i*pi*(2k-1)*tau_0) */
356 mpc_t qq0; /* q[0]^2 */
357 cinit(r0, prec);
358 cinit(qq0, prec);
359 csqr(qq0,q[0]);
360 cmul(r0, qq0, q[0]);
361 for(int k = 1 ; k < R ; k++) {
362 cmul(q0[k+1], q0[k], r0);
363 cmul(r0, r0, qq0);
364 DPRINT(q0[k+1], "q0^%d", (k+1)*(k+1));
365 }
366 cclear(qq0);
367 cclear(r0);
368 } /* }}} */
369
370
371 /* We're summing over rows (0,k) to (R,k), for 1<=k<=R. The first row
372 * for k=0 is different, and computed from the precomputed
373 * q[0]^(k^2), which we do now.
374 */
375 for(int k = 1 ; k <= R ; k++) {
376 update4(th,k,0,1,q0[k]);
377 }
378 DPRINT(th[0], "Theta0 at n1=0");
379 DPRINT(th[1], "Theta1 at n1=0");
380 DPRINT(th[2], "Theta2 at n1=0");
381 DPRINT(th[3], "Theta3 at n1=0");
382
383 /* For any k (which is the loop counter below)
384 * r2 is exp(i*pi*(2k+1)*tau_2)
385 * t2 is exp(i*pi*k^2*tau_2)
386 * qq2 is exp(i*pi*2*tau_2) (needed for updating r2)
387 */
388 mpc_t r2;
389 mpc_t t2;
390 mpc_t qq2;
391 cinit(t2, prec); cset(t2, q[2]);
392 cinit(qq2, prec); csqr(qq2, q[2]);
393 cinit(r2, prec); cmul(r2, t2, qq2);
394
395 /* {{{ sums q1^(2k) + q1^(-2k)
396 *
397 * We maintain:
398 * s1 = q1^2 + q1^-2
399 * sk = q1^(2k) + q1^(-2k)
400 * skm = q1^(2k-2) + q1^(-2k-2)
401 *
402 * These can be updated by the shallow recurrence
403 * s_{k+1} = s_k*s_1 - s_{k-1}
404 *
405 * Initial value for sk is at k=1.
406 */
407 mpc_t s1, sk, skm;
408 mpc_t v1, vi, vim;
409 cinit(s1, prec);
410 cinit(sk, prec);
411 cinit(skm, prec);
412 cinit(v1, prec);
413 cinit(vi, prec);
414 cinit(vim, prec);
415 csqr(s1, q[1]);
416 cinv(sk, s1);
417 cadd(s1, s1, sk);
418 cset(sk, s1);
419 cset_ui(skm, 2);
420 /* }}} */
421
422 mpc_t x;
423 cinit(x, prec);
424
425 for(int k = 1 ; k <= R ; k++) {
426 /* Row (0,k) to (R, k)
427 * Term at (i,k) is q0^(i^2)*q2^(k^2)*(q1^(2*k*i)+q1^(-2*k*i)).
428 *
429 * We're factoring out the q2^(k^2) term, and accumulate in
430 * temporary variables rh[0] to rh[3].
431 *
432 * Beyond that, let v_i = (q1^(2*k*i)+q1^(-2*k*i)). We have v0=2,
433 * v1=sk, and the typical recurrence v_{i+1} = v_i*v_1 - v_{i-1}
434 * (same as satisfied by s_k).
435 */
436 /* First term is q2^(k^2), but as said above we multiply by this
437 * only in the end */
438 for(int j = 0 ; j < 4 ; j++) czero(rh[j]);
439 cone(x);
440 update4(rh, 0, k, 1, x);
441 /* Set v1, vi, vim */
442 cset_ui(vim, 2);
443 cset(vi, sk);
444 cset(v1, sk);
445 for(int i = 1 ; i < R ; i++) { /* {{{ inner loop */
446 DPRINT(vi, "S_{%d*%d}", k, i);
447 cmul(x, q0[i], vi);
448 update4(rh, i, k, 1, x);
449 /* Update vi */
450 cmul(tmp, vi, v1);
451 csub(vim, tmp, vim);
452 cswap(vi, vim);
453 } /* }}} */
454 /* {{{ last term in column: (k,R) */
455 DPRINT(vi, "S_{%d*%d}", k, R);
456 cmul(x, q0[R], vi);
457 update4(rh, R, k, 1, x);
458 /* }}} */
459 /* {{{ Now multiply by common q2^(k^2), and accumulate */
460 DPRINT(t2, "q2^%d", k*k);
461 for(int j = 0 ; j < 4 ; j++) {
462 cmul(rh[j], rh[j], t2);
463 cadd(th[j], th[j], rh[j]);
464 } /* }}} */
465 /* {{{ Update t2 = q2^(k^2) */
466 cmul(t2, t2, r2);
467 cmul(r2, r2, qq2);
468 /* }}} */
469 /* {{{ Update sk */
470 cmul(tmp, sk, s1);
471 csub(skm, tmp, skm);
472 cswap(sk, skm);
473 /* }}} */
474 }
475 /* {{{ clean up the mess */
476 cclear(x);
477 for(int k = 0 ; k <= R ; k++) {
478 cclear(q0[k]);
479 }
480 free(q0);
481 for(int j = 0 ; j < 3 ; j++) {
482 cclear(q[j]);
483 }
484 for(int j = 0 ; j < 4 ; j++) {
485 cclear(rh[j]);
486 }
487 cclear(qq2);
488 cclear(r2);
489 cclear(t2);
490 cclear(s1); cclear(sk); cclear(skm);
491 cclear(v1); cclear(vi); cclear(vim);
492 cclear(tmp);
493 /* }}} */
494 }
495
496 void
eval_3theta2q_naive(mpc_t * b,mpc_t * tau)497 eval_3theta2q_naive (mpc_t *b, mpc_t *tau)
498 /* Input:
499 * tau represents the matrix [tau0, tau1; tau1, tau2] in the Siegel half space
500 * Output:
501 * b containing the squares of three fundamental theta quotients in tau
502 */
503 {
504 int prec;
505 mpc_t th[4];
506 int i;
507
508 prec = cprec(b[0])+SEC_MARGIN;
509 for (i=0; i<4; i++)
510 cinit(th[i], prec);
511 eval_4theta_naive(th, tau);
512 cinv(th[0], th[0]);
513 for (i=1; i<4; i++) {
514 cmul(th[i], th[i], th[0]);
515 csqr(b[i-1], th[i]);
516 }
517 for (i=0; i<4; i++)
518 cclear(th[i]);
519
520 return;
521 }
522
523 void
eval_10theta2_naive(mpc_t * th2,mpc_t * tau)524 eval_10theta2_naive (mpc_t *th2, mpc_t *tau)
525 /* Input:
526 tau represents the matrix [tau0, tau1; tau1, tau2] in the Siegel half space
527 Output:
528 th2 containing the squares of the ten even theta functions in tau
529 */
530 {
531 mpc_t th4[4];
532 mpc_t th8[4];
533 mpc_t aux;
534 int i;
535
536 mpfr_prec_t prec = cprec (th2 [0]);
537
538 for (i=0; i<4; i++) {
539 cinit(th4[i], prec);
540 cinit(th8[i], prec);
541 }
542 cinit(aux, prec);
543
544 /* evaluate 4 thetas in tau/2 */
545 for (i=0; i<3; i++)
546 cdiv_2ui(th8[i], tau[i], 1);
547 eval_4theta_naive(th4, th8);
548
549 get_10theta2_from_4thetatauhalf (th2, th4);
550
551 for (i=0; i<4; i++) {
552 cclear(th4[i]);
553 cclear(th8[i]);
554 }
555 cclear(aux);
556 }
557
558
559 /* This is a helper function. In order to obtain theta^2_{0,4,8,12}(tau),
560 * it suffices to know theta^2_{0,1,2,3}(tau/2). In applications, these
561 * might happen to be known from external sources. On the other hand,
562 * theta^2_{0,1,2,3}(tau/2) are not needed for the computation of the
563 * other even theta^2 at tau. So we split the computation in two.
564 *
565 * As per the even_thetas_ix array, this function will assign to elements
566 * numbered 0,4,6,8 in the destination array.
567 *
568 * This is the generic function. It's in both cases, whether we want the
569 * partial derivatives or not. dth may be passed as NULL to indicate that
570 * derivatives are not desired.
571 */
572 static void
get_4theta2_048c_from_4theta2tauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th8[4],mpc_t dth8[4][3])573 get_4theta2_048c_from_4theta2tauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th8[4], mpc_t dth8[4][3])
574 {
575 #define T(i) th2[even_thetas_ix[i]]
576 #define dT(i,j) dth2[even_thetas_ix[i]][j]
577 #define DO(idx, op1, op2, op3) do { \
578 op1(T(idx), th8[0], th8[1]); \
579 op2(T(idx), T(idx), th8[2]); \
580 op3(T(idx), T(idx), th8[3]); \
581 cdiv_2ui(T(idx), T(idx), 2); \
582 if (dth2) for(int j = 0 ; j < 3 ; j++) { \
583 op1(dT(idx,j), dth8[0][j], dth8[1][j]); \
584 op2(dT(idx,j), dT(idx,j), dth8[2][j]); \
585 op3(dT(idx,j), dT(idx,j), dth8[3][j]); \
586 cdiv_2ui(dT(idx,j), dT(idx,j), 2); \
587 } } while (0)
588 DO(0, cadd, cadd, cadd);
589 DO(4, csub, cadd, csub);
590 DO(8, cadd, csub, csub);
591 DO(12, csub, csub, cadd);
592 #undef DO
593 #undef T
594 #undef dT
595 }
596
597 /* Input: theta_{1,2,3}(Omega/2)^2/x (e.g. with x=theta_0(Omega/2)^2)
598 * Output: theta_{0,4,8,12}(Omega)^2/x and the derivatives
599 */
600 static void
get_4theta2x_048c_from_3theta2qtauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th8[3],mpc_t dth8[3][3])601 get_4theta2x_048c_from_3theta2qtauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th8[3], mpc_t dth8[3][3])
602 {
603 #define T(i) th2[even_thetas_ix[i]]
604 #define dT(i,j) dth2[even_thetas_ix[i]][j]
605 #define DO(idx, op1x, op2, op3) do { \
606 op1x(T(idx), th8[1-1]); \
607 cadd_ui(T(idx), T(idx), 1); \
608 op2(T(idx), T(idx), th8[2-1]); \
609 op3(T(idx), T(idx), th8[3-1]); \
610 cdiv_2ui(T(idx), T(idx), 2); \
611 if (dth2) for(int j = 0 ; j < 3 ; j++) { \
612 op1x(dT(idx,j), dth8[1-1][j]); \
613 op2(dT(idx,j), dT(idx,j), dth8[2-1][j]); \
614 op3(dT(idx,j), dT(idx,j), dth8[3-1][j]); \
615 cdiv_2ui(dT(idx,j), dT(idx,j), 2); \
616 } } while (0)
617 DO(0, cset, cadd, cadd);
618 DO(4, cneg, cadd, csub);
619 DO(8, cset, csub, csub);
620 DO(12, cneg, csub, cadd);
621 #undef DO
622 #undef T
623 #undef dT
624 }
625
626 #if 0
627 static void
628 get_4theta2_048c_from_4theta2tauhalf(mpc_t th2[10], mpc_t th8[4])
629 {
630 get_4theta2_048c_from_4theta2tauhalf_diff(th2, NULL, th8, NULL);
631 }
632 #endif
633
634 /* Given (theta_{1,2,3}^2(tau/2))/theta_0^2(tau/2), return
635 * (theta_{0,4,8,12}^2(tau))/theta_0^2(tau/2).
636 */
637 static void
get_4theta2x_048c_from_3theta2qtauhalf(mpc_t th2[10],mpc_t th8[3])638 get_4theta2x_048c_from_3theta2qtauhalf(mpc_t th2[10], mpc_t th8[3])
639 {
640 get_4theta2x_048c_from_3theta2qtauhalf_diff(th2, NULL, th8, NULL);
641 }
642
643 static void
get_6theta2_12369f_from_4thetatauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th[4],mpc_t dth[4][3])644 get_6theta2_12369f_from_4thetatauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th[4], mpc_t dth[4][3])
645 {
646 mpc_t aux;
647 mpfr_prec_t prec = cprec (th2 [0]);
648 cinit(aux, prec);
649
650 #define T(i) th2[even_thetas_ix[i]]
651 #define dT(i,j) dth2[even_thetas_ix[i]][j]
652
653 #define DO(x,y,u,v,w,z) \
654 cmul(T(x), th[u], th[v]); \
655 cmul(aux, th[w], th[z]); \
656 csub(T(y), T(x), aux); \
657 cadd(T(x), T(x), aux); \
658 cdiv_2ui(T(y), T(y), 1); \
659 cdiv_2ui(T(x), T(x), 1); \
660 if (dth2) for(int j=0; j<3; j++) { \
661 cmul(dT(x,j), th[u], dth[v][j]); \
662 cmul(aux, th[v], dth[u][j]); \
663 cadd(dT(x,j), dT(x,j), aux); \
664 cset(dT(y,j), dT(x,j)); \
665 cmul(aux, th[w], dth[z][j]); \
666 cadd(dT(x,j), dT(x,j), aux); \
667 csub(dT(y,j), dT(y,j), aux); \
668 cmul(aux, th[z], dth[w][j]); \
669 cadd(dT(x,j), dT(x,j), aux); \
670 csub(dT(y,j), dT(y,j), aux); \
671 cdiv_2ui(dT(x,j), dT(x,j), 1); \
672 cdiv_2ui(dT(y,j), dT(y,j), 1); \
673 }
674 DO(1,9,0,1,2,3);
675 DO(2,6,0,2,1,3);
676 DO(3,15,0,3,1,2);
677 #undef DO
678 #undef T
679 #undef dT
680 cclear(aux);
681 }
682
683 /* This is a hack, which saves some multiplications */
684 /* Given (theta_{1,2,3}(tau/2))/theta_0(tau/2), return
685 * (theta_{1,2,3,6,9,15}^2(tau))/theta_0^2(tau/2).
686 * and the derivatives
687 *
688 * FIXME: I don't thing we ever use the case where this functions needs
689 * input derivatives other than with respect to its own input variables,
690 * which means dth[i][j]=(i==j). We should use it to cut down on the
691 * number of operations.
692 */
693 static void
get_6theta2x_12369f_from_3thetaqtauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th[3],mpc_t dth[3][3])694 get_6theta2x_12369f_from_3thetaqtauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th[3], mpc_t dth[3][3])
695 {
696 mpc_t aux;
697 mpfr_prec_t prec = cprec (th2 [0]);
698 cinit(aux, prec);
699
700 #define T(i) th2[even_thetas_ix[i]]
701 #define dT(i,j) dth2[even_thetas_ix[i]][j]
702
703 #define DO(x,y,u,v,w,z) \
704 cset(T(x), th[v-1]); \
705 cmul(aux, th[w-1], th[z-1]); \
706 csub(T(y), T(x), aux); \
707 cadd(T(x), T(x), aux); \
708 cdiv_2ui(T(y), T(y), 1); \
709 cdiv_2ui(T(x), T(x), 1); \
710 if (dth2) for(int j=0; j<3; j++) { \
711 cset(dT(x,j), dth[v-1][j]); \
712 cset(dT(y,j), dth[v-1][j]); \
713 cmul(aux, th[w-1], dth[z-1][j]); \
714 cadd(dT(x,j), dT(x,j), aux); \
715 csub(dT(y,j), dT(y,j), aux); \
716 cmul(aux, th[z-1], dth[w-1][j]); \
717 cadd(dT(x,j), dT(x,j), aux); \
718 csub(dT(y,j), dT(y,j), aux); \
719 cdiv_2ui(dT(x,j), dT(x,j), 1); \
720 cdiv_2ui(dT(y,j), dT(y,j), 1); \
721 }
722 DO(1,9,0,1,2,3);
723 DO(2,6,0,2,1,3);
724 DO(3,15,0,3,1,2);
725 #undef DO
726 #undef T
727 #undef dT
728 cclear(aux);
729 }
730
731 #if 0
732 static void
733 get_6theta2_12369f_from_4thetatauhalf(mpc_t th2[10], mpc_t th[4])
734 {
735 get_6theta2_12369f_from_4thetatauhalf_diff(th2, NULL, th, NULL);
736 }
737 #endif
738
739 /* Given (theta_{1,2,3}(tau/2))/theta_0(tau/2), return
740 * (theta_{1,2,3,6,9,15}^2(tau))/theta_0(tau/2).
741 */
742 static void
get_6theta2x_12369f_from_3thetaqtauhalf(mpc_t th2[10],mpc_t th[3])743 get_6theta2x_12369f_from_3thetaqtauhalf(mpc_t th2[10], mpc_t th[3])
744 {
745 get_6theta2x_12369f_from_3thetaqtauhalf_diff(th2, NULL, th, NULL);
746 }
747
748 /* FIXME: I don't thing we ever use the case where this functions needs
749 * input derivatives other than with respect to its own input variables,
750 * which means dth[i][j]=(i==j). We should use it to cut down on the
751 * number of operations.
752 */
753 void
get_10theta2x_from_3thetaqtauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th[3],mpc_t dth[3][3])754 get_10theta2x_from_3thetaqtauhalf_diff(mpc_t th2[10], mpc_t dth2[10][3], mpc_t th[3], mpc_t dth[3][3])
755 {
756 mpc_t thq[3], dthq[3][3];
757 for(int i = 0 ; i < 3 ; i++) {
758 cinit(thq[i], cprec(th2[i]));
759 csqr(thq[i], th[i]);
760 if (dth2) {
761 for(int j = 0 ; j < 3 ; j++) {
762 cinit(dthq[i][j], cprec(th2[i]));
763 cmul(dthq[i][j], th[i], dth[i][j]);
764 cmul_2ui(dthq[i][j], dthq[i][j], 1);
765 }
766 }
767 }
768 get_4theta2x_048c_from_3theta2qtauhalf_diff(th2, dth2, thq, dthq);
769 for(int i = 0 ; i < 3 ; i++) {
770 cclear(thq[i]);
771 if (dth2) {
772 for(int j = 0 ; j < 3 ; j++) {
773 cclear(dthq[i][j]);
774 }
775 }
776 }
777 get_6theta2x_12369f_from_3thetaqtauhalf_diff(th2, dth2, th, dth);
778 }
779
780 void
get_10theta2x_from_3thetaqtauhalf(mpc_t th2[10],mpc_t th[3])781 get_10theta2x_from_3thetaqtauhalf(mpc_t th2[10], mpc_t th[3])
782 {
783 mpc_t thq[3];
784 for(int i = 0 ; i < 3 ; i++) {
785 cinit(thq[i], cprec(th2[i]));
786 csqr(thq[i], th[i]);
787 }
788 get_4theta2x_048c_from_3theta2qtauhalf(th2, thq);
789 get_6theta2x_12369f_from_3thetaqtauhalf(th2, th);
790 for(int i = 0 ; i < 3 ; i++) {
791 cclear(thq[i]);
792 }
793 }
794
795 void
get_10theta2_from_4thetatauhalf_diff(mpc_t th2[10],mpc_t dth2[10][3],mpc_t th[4],mpc_t dth[4][3])796 get_10theta2_from_4thetatauhalf_diff (mpc_t th2[10], mpc_t dth2[10][3], mpc_t th[4], mpc_t dth[4][3])
797 /* Input:
798 * theta_0, ..., theta_3 evaluated in some tau/2
799 * Their gradients with respect to three variables x1, x2, x3
800 * Output:
801 * theta_0^2, ..., theta_15^2 evaluated in tau
802 * Their gradients with respect to x1, x2, x3
803 *
804 * The even thetas are stored in th2[0..9] following the indexing table
805 * even_thetas_ix
806 *
807 * NOTE: To gain a few cycles, it may be advantageous to call the two
808 * back-end functions get_4theta2_048c_from_4theta2tauhalf_diff and
809 * get_6theta2_12369f_from_4thetatauhalf_diff separately.
810 */
811 {
812 /* th8: 4 fundamental theta-squares at tau/2 */
813 mpc_t th8[4];
814 mpc_t dth8[4][3];
815 mpfr_prec_t prec = cprec (th2 [0]);
816 for (int i=0; i<4; i++) {
817 cinit(th8[i], prec);
818 if (dth) for(int j = 0; j < 3 ; j++) {
819 cinit(dth8[i][j], prec);
820 }
821 }
822 for (int i=0; i<4; i++) {
823 csqr(th8[i], th[i]);
824 if (dth) for(int j = 0; j < 3 ; j++) {
825 cmul(dth8[i][j], th8[i], dth[i][j]);
826 cmul_2ui(dth8[i][j], dth8[i][j], 1);
827 }
828 }
829
830 get_4theta2_048c_from_4theta2tauhalf_diff(th2, dth2, th8, dth ? dth8 : NULL);
831 get_6theta2_12369f_from_4thetatauhalf_diff(th2, dth2, th, dth);
832
833 for (int i=0; i<4; i++)
834 cclear(th8[i]);
835 }
836
837 void
get_10theta2_from_4thetatauhalf(mpc_t * th2,mpc_t * th)838 get_10theta2_from_4thetatauhalf (mpc_t *th2, mpc_t *th)
839 /* Input:
840 * theta_0, ..., theta_3 evaluated in some tau/2
841 * Output:
842 * theta_0^2, ..., theta_15^2 evaluated in tau
843 *
844 * The even thetas are stored in th2[0..9] following the indexing table
845 * even_thetas_ix
846 *
847 * NOTE: To gain a few cycles, it may be advantageous to call the two
848 * back-end functions get_4theta2_048c_from_4theta2tauhalf and
849 * get_6theta2_12369f_from_4thetatauhalf separately.
850 */
851 {
852 get_10theta2_from_4thetatauhalf_diff(th2, NULL, th, NULL);
853 }
854
855