1 /* Auxiliary routines for polynomial selection
2 
3 Copyright 2008-2017 Emmanuel Thome, Paul Zimmermann
4 
5 This file is part of CADO-NFS.
6 
7 CADO-NFS is free software; you can redistribute it and/or modify it under the
8 terms of the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 2.1 of the License, or (at your option)
10 any later version.
11 
12 CADO-NFS is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
15 details.
16 
17 You should have received a copy of the GNU Lesser General Public License
18 along with CADO-NFS; see the file COPYING.  If not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20 */
21 
22 #include "cado.h" // IWYU pragma: keep
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <float.h> // for DBL_MAX
26 #include <math.h>
27 #include <gmp.h>
28 #include "auxiliary.h"
29 #include "gmp_aux.h"    // ulong_isprime
30 #include "macros.h" /* for ASSERT_ALWAYS */
31 #include "murphyE.h"
32 #include "rootfinder.h" // mpz_poly_roots
33 #include "timing.h"             // for seconds
34 #include "usp.h"        // usp_root_data
35 #include "version_info.h"        // cado_revision_string
36 #include "double_poly.h"
37 
38 /* define OPTIMIZE_MP to perform computations in multiple-precision */
39 //#define OPTIMIZE_MP
40 
41 //#define DEBUG_OPTIMIZE_AUX
42 
43 /************************* norm and skewness *********************************/
44 
45 /* Same as L2_lognorm, but takes 'double' instead of 'mpz_t' as coefficients.
46    Returns 1/2*log(int(int(F(r*cos(t)*s,r*sin(t))^2*r/s^d, r=0..1), t=0..2*Pi))
47    (circular method). Cf Remark 3.2 in Kleinjung paper, Math. of Comp., 2006.
48 
49    Maple code for degree 2:
50    f:=x->a2*x^2+a1*x+a0: d:=degree(f(x),x):
51    int(int((f(s*cos(t)/sin(t))*(r*sin(t))^d)^2*r/s^d, r=0..1), t=0..2*Pi);
52  */
53 static double
L2_lognorm_d(double_poly_srcptr p,double s)54 L2_lognorm_d (double_poly_srcptr p, double s)
55 {
56   double n;
57   double *a = p->coeff;
58   unsigned int d = p->deg;
59 
60   ASSERT_ALWAYS(1 <= d && d <= 7);
61 
62   if (d == 1)
63   {
64     double a1, a0;
65     a1 = a[1] * s;
66     a0 = a[0];
67     /* use circular integral (Sage code):
68        var('a1,a0,x,y,r,s,t')
69        f = a1*x+a0
70        F = expand(f(x=x/y)*y)
71        F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
72        v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
73        (s*v).expand().collect(pi)
74     */
75     n = a0 * a0 + a1 * a1;
76     n = n * 0.785398163397448310; /* Pi/4 */
77     if (isnan (n) || isinf (n))
78       n = DBL_MAX;
79     return 0.5 * log (n / s);
80   }
81   else if (d == 2)
82   {
83     double a2, a1, a0;
84     a2 = a[2] * s;
85     a1 = a[1];
86     a0 = a[0] / s;
87     /* use circular integral (Sage code):
88        var('a2,a1,a0,x,y,r,s,t')
89        f = a2*x^2+a1*x+a0
90        F = expand(f(x=x/y)*y^2)
91        F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
92        v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
93        (s^2*v).expand().collect(pi)
94     */
95     n = 3.0 * (a2 * a2 + a0 * a0) + 2.0 * a0 * a2 + a1 * a1;
96     n = n * 0.130899693899574704; /* Pi/24 */
97     if (isnan (n) || isinf (n))
98       n = DBL_MAX;
99     return 0.5 * log(n);
100   }
101   else if (d == 3)
102     {
103       double a3, a2, a1, a0, invs = 1.0 / s;
104       a3 = a[3] * s * s;
105       a2 = a[2] * s;
106       a1 = a[1];
107       a0 = a[0] * invs;
108       /* use circular integral (Sage code):
109          var('a3,a2,a1,a0,x,y,r,s,t')
110          f = a3*x^3+a2*x^2+a1*x+a0
111          F = expand(f(x=x/y)*y^3)
112          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
113          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
114          (s^3*v).expand().collect(pi)
115       */
116       n = 5.0 * (a3 * a3 + a0 * a0) + 2.0 * (a3 * a1 + a0 * a2)
117         + a1 * a1 + a2 * a2;
118       n = n * 0.049087385212340519352; /* Pi/64 */
119       if (isnan (n) || isinf (n))
120         n = DBL_MAX;
121       return 0.5 * log(n * invs);
122     }
123   else if (d == 4)
124     {
125       double a4, a3, a2, a1, a0, invs = 1.0 / s;
126 
127       a4 = a[4] * s * s;
128       a3 = a[3] * s;
129       a2 = a[2];
130       a1 = a[1] * invs;
131       a0 = a[0] * invs * invs;
132       /* use circular integral (Sage code):
133          var('a4,a3,a2,a1,a0,x,r,s,t')
134          f = a4*x^4+a3*x^3+a2*x^2+a1*x+a0
135          F = expand(f(x=x/y)*y^4)
136          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
137          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
138          (s^4*v).expand().collect(pi)
139       */
140       n = 35.0 * (a4 * a4 + a0 * a0) + 10.0 * (a4 * a2 + a2 * a0)
141         + 5.0 * (a3 * a3 + a1 * a1) + 6.0 * (a4 * a0 + a3 * a1)
142         + 3.0 * a2 * a2;
143       n = n * 0.0049087385212340519352; /* Pi/640 */
144       if (isnan (n) || isinf (n))
145         n = DBL_MAX;
146       return 0.5 * log(n);
147     }
148   else if (d == 5)
149     {
150       double a5, a4, a3, a2, a1, a0, invs = 1.0 / s;
151 
152       /*
153         f := a5*x^5+a4*x^4+a3*x^3+a2*x^2+a1*x+a0:
154         F := expand(y^5*subs(x=x/y,f));
155         int(int(subs(x=x*s,F)^2/s^5, x=-1..1), y=-1..1);
156        */
157       a0 = a[0] * invs * invs;
158       a1 = a[1] * invs;;
159       a2 = a[2];
160       a3 = a[3] * s;
161       a4 = a[4] * s * s;
162       a5 = a[5] * s * s * s;
163       /* use circular integral (Sage code):
164          var('a5,a4,a3,a2,a1,a0,x,r,s,t')
165          f = a5*x^5+a4*x^4+a3*x^3+a2*x^2+a1*x+a0
166          F = expand(f(x=x/y)*y^5)
167          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
168          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
169          (s^5*v).expand().collect(pi)
170       */
171       n = 6.0 * (a3 * a1 + a1 * a5 + a4 * a2 + a0 * a4)
172         + 14.0 * (a0 * a2 + a3 * a5) + 63.0 * (a0 * a0 + a5 * a5)
173         + 7.0 * (a4 * a4 + a1 * a1) + 3.0 * (a3 * a3 + a2 * a2);
174       n = n * 0.0020453077171808549730; /* Pi/1536 */
175       if (isnan (n) || isinf (n))
176         n = DBL_MAX;
177       return 0.5 * log(n * invs);
178     }
179   else if (d == 6)
180     {
181       double a6, a5, a4, a3, a2, a1, a0, invs;
182 
183       /* use circular integral (Sage code):
184          R.<x> = PolynomialRing(ZZ)
185          S.<a> = InfinitePolynomialRing(R)
186          d=6; f = SR(sum(a[i]*x^i for i in range(d+1)))
187          F = expand(f(x=x/y)*y^d)
188          var('r,s,t')
189          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
190          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
191          (s^d*v).expand().collect(pi)
192       */
193       invs = 1.0 / s;
194       a6 = a[6] * s * s * s;
195       a5 = a[5] * s * s;
196       a4 = a[4] * s;
197       a3 = a[3];
198       a2 = a[2] * invs;
199       a1 = a[1] * invs * invs;
200       a0 = a[0] * invs * invs * invs;
201       n = 231.0 * (a6 * a6 + a0 * a0) + 42.0 * (a6 * a4 + a2 * a0)
202         + 21.0 * (a5 * a5 + a1 * a1) + 7.0 * (a4 * a4 + a2 * a2)
203         + 14.0 * (a6 * a2 + a5 * a3 + a4 * a0 + a3 * a1)
204         + 10.0 * (a6 * a0 + a5 * a1 + a4 * a2) + 5.0 * a3 * a3;
205       n = n * 0.00043828022511018320850; /* Pi/7168 */
206       if (isnan (n) || isinf (n))
207         n = DBL_MAX;
208       return 0.5 * log(n);
209     }
210   else /* d == 7 */
211     {
212       double a7, a6, a5, a4, a3, a2, a1, a0;
213       double invs = 1.0 / s;
214 
215       a7 = a[7] * s * s * s * s;
216       a6 = a[6] * s * s * s;
217       a5 = a[5] * s * s;
218       a4 = a[4] * s;
219       a3 = a[3];
220       a2 = a[2] * invs;
221       a1 = a[1] * invs * invs;
222       a0 = a[0] * invs * invs * invs;
223       /* use circular integral (Sage code):
224          var('r,s,t,y')
225          R.<x> = PolynomialRing(ZZ)
226          S.<a> = InfinitePolynomialRing(R)
227          d=7; f = SR(sum(a[i]*x^i for i in range(d+1)))
228          F = expand(f(x=x/y)*y^d)
229          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
230          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
231          (s^d*v).expand().collect(pi)
232       */
233       n = 429.0*(a0*a0+a7*a7) + 33.0*(a1*a1+a6*a6) + 66.0*(a0*a2+a5*a7)
234         + 9*(a2*a2+a5*a5) + 18*(a1*a3+a0*a4+a4*a6+a3*a7) + 5*(a3*a3+a4*a4)
235         + 10*(a2*a4+a1*a5+a3*a5+a0*a6+a2*a6+a1*a7);
236       n = n * 0.000191747598485705154; /* Pi/16384 */
237       if (isnan (n) || isinf (n))
238         n = DBL_MAX;
239       return 0.5 * log(n * invs);
240     }
241 }
242 
243 /* Returns the logarithm of the L2-norm as defined by Kleinjung, i.e.,
244    log(1/2 sqrt(int(int((F(sx,y)/s^(d/2))^2, x=-1..1), y=-1..1))).
245    Since we only want to compare norms, we don't consider the log(1/2) term,
246    and compute only 1/2 log(int(int(...))) [here the 1/2 factor is important,
247    since it is added to the alpha root property term].
248 
249    Circular method: integrate over the unit circle.
250 */
251 double
L2_lognorm(mpz_poly_srcptr f,double s)252 L2_lognorm (mpz_poly_srcptr f, double s)
253 {
254   double res;
255   double_poly a;
256   double_poly_init(a, f->deg);
257   double_poly_set_mpz_poly (a, f);
258 
259   res = L2_lognorm_d (a, s);
260 
261   double_poly_clear(a);
262   return res;
263 }
264 
265 #ifdef OPTIMIZE_MP
266 /* The name is misleading. It returns the before-log part in
267    1/2 log(int(int(...)))
268 */
269 void
L2_lognorm_mp(mpz_poly_ptr f,mpz_t s,mpz_t norm)270 L2_lognorm_mp (mpz_poly_ptr f, mpz_t s, mpz_t norm)
271 {
272   mpz_t n, tmp, tmp1, tmpsum;
273   unsigned long i;
274   unsigned int d = f->deg;
275 
276   mpz_init_set_ui (n, 1);
277   mpz_init_set_ui (tmp, 1);
278   mpz_init_set_ui (tmp1, 1);
279   mpz_init_set_ui (tmpsum, 1);
280 
281   if (d != 6)
282     {
283       fprintf (stderr, "not yet implemented for degree %u\n", d);
284       exit (1);
285     }
286   else {
287     mpz_t a[d+1];
288     mpz_init_set_ui (a[0], 1);
289     for (i=1; i<=d; i++) {
290       mpz_init (a[i]);
291       mpz_mul (a[i], a[i-1], s);
292     }
293     for (i=0; i<=d; i++)
294       mpz_mul (a[i], a[i], f->coeff[i]);
295 
296     // n = 231.0 * (a6 * a6 + a0 * a0)
297     mpz_mul (tmp, a[6], a[6]);
298     mpz_mul (tmp1, a[0], a[0]);
299     mpz_add (tmpsum, tmp, tmp1);
300     mpz_mul_ui(n, tmpsum, 231);
301     //   + 42.0 * (a6 * a4 + a2 * a0)
302     mpz_mul (tmp, a[6], a[4]);
303     mpz_mul (tmp1, a[2], a[0]);
304     mpz_add (tmpsum, tmp, tmp1);
305     mpz_addmul_ui (n, tmpsum, 42);
306     //   + 21.0 * (a5 * a5 + a1 * a1)
307     mpz_mul (tmp, a[5], a[5]);
308     mpz_mul (tmp1, a[1], a[1]);
309     mpz_add (tmpsum, tmp, tmp1);
310     mpz_addmul_ui (n, tmpsum, 21);
311     // + 7.0 * (a4 * a4 + a2 * a2)
312     mpz_mul (tmp, a[4], a[4]);
313     mpz_mul (tmp1, a[2], a[2]);
314     mpz_add (tmpsum, tmp, tmp1);
315     mpz_addmul_ui (n, tmpsum, 7);
316     // +  14.0 * (a6 * a2 + a5 * a3 + a4 * a0 + a3 * a1)
317     mpz_mul (tmp, a[6], a[2]);
318     mpz_mul (tmp1, a[5], a[3]);
319     mpz_add (tmpsum, tmp, tmp1);
320     mpz_mul (tmp, a[4], a[0]);
321     mpz_mul (tmp1, a[3], a[1]);
322     mpz_add (tmp, tmp, tmp1);
323     mpz_add (tmpsum, tmp, tmpsum);
324     mpz_addmul_ui (n, tmpsum, 14);
325     // + 10.0 * (a6 * a0 + a5 * a1 + a4 * a2)
326     mpz_mul (tmp, a[6], a[0]);
327     mpz_mul (tmp1, a[5], a[1]);
328     mpz_add (tmpsum, tmp, tmp1);
329     mpz_mul (tmp, a[4], a[2]);
330     mpz_add (tmpsum, tmpsum, tmp);
331     mpz_addmul_ui (n, tmpsum, 10);
332     //  + 5.0 * a3 * a3;
333     mpz_mul (tmp, a[3], a[3]);
334     mpz_addmul_ui (n, tmp, 5);
335 
336     for (i=0; i<=d; i++)
337       mpz_clear (a[i]);
338   }
339 
340   mpz_set (norm, n);
341   mpz_clear (tmp);
342   mpz_clear (tmp1);
343   mpz_clear (tmpsum);
344   mpz_clear (n);
345 }
346 #endif
347 
348 static double
L2_skewness_deg6_approx(mpz_poly_srcptr f MAYBE_UNUSED,double_poly_ptr ff,double_poly_ptr dff,int prec)349 L2_skewness_deg6_approx (mpz_poly_srcptr f MAYBE_UNUSED, double_poly_ptr ff,
350                          double_poly_ptr dff, int prec)
351 {
352   double *dfd = dff->coeff;
353   double *fd = ff->coeff;
354   double s, nc, a, b, c, smin, smax;
355   int sign_changes = 0;
356   double q[7], logmu, best_logmu = DBL_MAX, best_s = DBL_MAX;
357 
358   dfd[6] = 99.0 * fd[6] * fd[6];
359   dfd[5] = 6.0 * (2.0 * fd[4] * fd[6] + fd[5] * fd[5]);
360   dfd[4] = 2.0 * (fd[2] * fd[6] + fd[3] * fd[5]) + fd[4] * fd[4];
361   dfd[2] = -2.0 * (fd[0] * fd[4] + fd[1] * fd[3]) - fd[2] * fd[2];
362   dfd[1] = -6.0 * (2.0 * fd[0] * fd[2] + fd[1] * fd[1]);
363   dfd[0] = -99.0 * fd[0] * fd[0];
364   if (dfd[1] > 0)
365     sign_changes ++; /* since dfd[0] < 0 */
366   if (dfd[1] * dfd[2] < 0)
367     sign_changes ++;
368   if (dfd[2] * dfd[4] < 0)
369     sign_changes ++; /* since dfd[3] = 0 */
370   if (dfd[4] * dfd[5] < 0)
371     sign_changes ++;
372   if (dfd[5] < 0)
373     sign_changes ++; /* since dfd[6] > 0 */
374   /* since dfd[6] and dfd[0] have opposite signs, we have an odd number of
375      roots on [0,+inf[. Moreover since dfd[3]=0, we can't have 5 positive
376      roots, thus we have either 1 or 3. */
377 
378   q[6] = dfd[6];
379   q[5] = (dfd[5] < 0) ? dfd[5] : 0.0;
380   q[4] = (dfd[4] < 0) ? dfd[4] : 0.0;
381   q[2] = (dfd[2] < 0) ? dfd[2] : 0.0;
382   q[1] = (dfd[1] < 0) ? dfd[1] : 0.0;
383   q[0] = dfd[0]; /* always negative */
384   s = 1.0;
385   while ((((((q[6]*s)+q[5])*s+q[4])*s*s+q[2])*s+q[1])*s+q[0] < 0)
386     s = s + s;
387   if (s == 1.0)
388     {
389       while ((((((q[6]*s)+q[5])*s+q[4])*s*s+q[2])*s+q[1])*s+q[0] > 0)
390         s = s * 0.5;
391       s = s + s;
392     }
393   smax = s;
394 
395   q[6] = dfd[6]; /* always positive */
396   q[5] = (dfd[5] > 0) ? dfd[5] : 0.0;
397   q[4] = (dfd[4] > 0) ? dfd[4] : 0.0;
398   q[2] = (dfd[2] > 0) ? dfd[2] : 0.0;
399   q[1] = (dfd[1] > 0) ? dfd[1] : 0.0;
400   q[0] = dfd[0];
401   s = smax;
402   while ((((((q[6]*s)+q[5])*s+q[4])*s*s+q[2])*s+q[1])*s+q[0] > 0)
403     s = s * 0.5;
404   smin = s;
405 
406   /* positive roots are in [smin, smax] */
407 
408   double v = -1.0, oldv;
409   for (double t = 2.0 * smin; t <= smax; t = 2.0 * t)
410     {
411       /* invariant: q(smin) < 0 */
412       oldv = v;
413       v = (((((dfd[6]*t)+dfd[5])*t+dfd[4])*t*t+dfd[2])*t+dfd[1])*t+dfd[0];
414       if (v == 0.0)
415         {
416           best_s = t;
417           break;
418         }
419       if (oldv < 0 && v > 0)
420         {
421           /* the derivative has a root in [smin,b] and is increasing, thus
422              the norm has a minimum in [smin,b], we refine it by dichotomy */
423           a = smin;
424           b = t;
425           for (int i = 0; i < prec; i++)
426             {
427               c = (a + b) * 0.5;
428 
429               nc = ((((dfd[6] * c + dfd[5]) * c + dfd[4]) * c * c + dfd[2]) * c
430                     + dfd[1]) * c + dfd[0];
431               if (nc > 0)
432                 b = c;
433               else
434                 a = c;
435             }
436           s = sqrt ((a + b) * 0.5);
437           if (sign_changes == 1)
438             {
439               best_s = s;
440               break;
441             }
442           logmu = L2_lognorm_d (ff, s);
443           if (logmu < best_logmu)
444             {
445               best_logmu = logmu;
446               best_s = s;
447             }
448         }
449       smin = t; /* to avoid hitting twice the same root of the derivative */
450     }
451 
452   return best_s;
453 }
454 
455 double
L2_skewness_deg6(mpz_poly_ptr f MAYBE_UNUSED,double_poly_srcptr ff,double_poly_srcptr dff MAYBE_UNUSED,int prec MAYBE_UNUSED)456 L2_skewness_deg6 (mpz_poly_ptr f MAYBE_UNUSED, double_poly_srcptr ff,
457                   double_poly_srcptr dff MAYBE_UNUSED, int prec MAYBE_UNUSED)
458 {
459   double s, logmu, logmu_min = DBL_MAX, s_min = 1.0;
460   mpz_poly df;
461   usp_root_data Roots[6];
462   int i, k;
463 
464   mpz_poly_init (df, 6);
465   for (i = 0; i < 6; i++)
466     usp_root_data_init (Roots + i);
467 
468   /* Sage code:
469      var('r,s,t,y')
470      R.<x> = PolynomialRing(ZZ)
471      S.<a> = InfinitePolynomialRing(R)
472      d=6; f = SR(sum(a[i]*x^i for i in range(d+1)))
473      F = expand(f(x=x/y)*y^d)
474      F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
475      v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
476      v = (7168*v/pi).expand()
477      dv = v.diff(s)
478      dv = (dv*s^7/14).expand().collect(s)
479      99*a6^2 * s^12 +
480      6*(2*a4*a6 + a5^2) * s^10 +
481      (2*a2*a6 + 2*a3*a5 + a4^2) * s^8 -
482      (2*a0*a4 + 2*a1*a3 + a2^2) * s^4 -
483      6*(2*a0*a2 + a1^2) * s^2 -
484      99*a0^2
485   */
486 #if 1 /* using numberOfRealRoots */
487   mpz_mul (df->coeff[6], f->coeff[6], f->coeff[6]);
488   mpz_mul_ui (df->coeff[6], df->coeff[6], 99); /* 99*a6^2 */
489   mpz_mul (df->coeff[5], f->coeff[4], f->coeff[6]);
490   mpz_mul_2exp (df->coeff[5], df->coeff[5], 1);
491   mpz_addmul (df->coeff[5], f->coeff[5], f->coeff[5]);
492   mpz_mul_ui (df->coeff[5], df->coeff[5], 6); /* 6*(2*a4*a6 + a5^2) */
493   mpz_mul (df->coeff[4], f->coeff[2], f->coeff[6]);
494   mpz_addmul (df->coeff[4], f->coeff[3], f->coeff[5]);
495   mpz_mul_2exp (df->coeff[4], df->coeff[4], 1);
496   mpz_addmul (df->coeff[4], f->coeff[4], f->coeff[4]); /*2*a2*a6+2*a3*a5+a4^2*/
497   mpz_set_ui (df->coeff[3], 0);
498   mpz_mul (df->coeff[2], f->coeff[0], f->coeff[4]);
499   mpz_addmul (df->coeff[2], f->coeff[1], f->coeff[3]);
500   mpz_mul_2exp (df->coeff[2], df->coeff[2], 1);
501   mpz_addmul (df->coeff[2], f->coeff[2], f->coeff[2]);
502   mpz_neg (df->coeff[2], df->coeff[2]); /* -(2*a0*a4+2*a1*a3+a2^2) */
503   mpz_mul (df->coeff[1], f->coeff[0], f->coeff[2]);
504   mpz_mul_2exp (df->coeff[1], df->coeff[1], 1);
505   mpz_addmul (df->coeff[1], f->coeff[1], f->coeff[1]);
506   mpz_mul_si (df->coeff[1], df->coeff[1], -6); /* -6*(2*a0*a2 + a1^2) */
507   mpz_mul (df->coeff[0], f->coeff[0], f->coeff[0]);
508   mpz_mul_si (df->coeff[0], df->coeff[0], -99); /* -99*a0^2 */
509 
510   df->deg = 6;
511   k = numberOfRealRoots ((const mpz_t *) df->coeff, 6, 0.0, 0, Roots);
512   int kpos = 0;
513   for (i = 0; i < k; i++)
514     if (mpz_sgn (Roots[i].b) > 0)
515       {
516         kpos ++;
517         s = rootRefine (Roots + i, (const mpz_t *) df->coeff, 6, ldexp (1.0, -prec));
518         s = sqrt (s);
519         logmu = L2_lognorm_d (ff, s);
520         if (logmu < logmu_min)
521           {
522             logmu_min = logmu;
523             s_min = s;
524           }
525       }
526 #else /* using double_poly_compute_roots */
527   double *dfd = dff->coeff;
528   double *fd = ff->coeff;
529   double roots[6];
530 
531   dfd[6] = 99.0 * fd[6] * fd[6];
532   dfd[5] = 6.0 * ( 2.0 * fd[4] * fd[6] + fd[5] * fd[5] );
533   dfd[4] = 2.0 * ( fd[2] * fd[6] + fd[3] * fd[5] ) + fd[4] * fd[4];
534   dfd[3] = 0.0;
535   dfd[2] = -2.0 * ( fd[0] * fd[4] + fd[1] * fd[3] ) - fd[2] * fd[2];
536   dfd[1] = -6.0 * ( 2.0 * fd[0] * fd[2] + fd[1] * fd[1] );
537   dfd[0] = -99.0 * fd[0] * fd[0];
538   double B = double_poly_bound_roots (dff);
539   k = double_poly_compute_roots (roots, dff, B);
540   ASSERT_ALWAYS(k > 0);
541   for (i = 0; i < k; i++)
542     {
543       s = sqrt (roots[i]);
544       logmu = L2_lognorm_d (ff, s);
545       if (logmu < logmu_min)
546         {
547           logmu_min = logmu;
548           s_min = s;
549         }
550     }
551 #endif
552 
553   mpz_poly_clear (df);
554   for (i = 0; i < 6; i++)
555     usp_root_data_clear (Roots + i);
556 
557   return s_min;
558 }
559 
560 /* return the skewness giving the best lognorm sum for two polynomials,
561    by using trichotomy between the optimal skewness of both polynomials */
562 double
L2_combined_skewness2(mpz_poly_srcptr f,mpz_poly_srcptr g,int prec)563 L2_combined_skewness2 (mpz_poly_srcptr f, mpz_poly_srcptr g, int prec)
564 {
565   double a, b, c, d, va, vb, vc, vd;
566 
567   a = L2_skewness (f, prec);
568   b = L2_skewness (g, prec);
569 
570   if (b < a)
571     {
572       c = b;
573       b = a;
574       a = c;
575     }
576 
577   ASSERT(a <= b);
578 
579   va = L2_lognorm (f, a) + L2_lognorm (g, a);
580   vb = L2_lognorm (f, b) + L2_lognorm (g, b);
581 
582   while (b - a > ldexp (a, -prec))
583     {
584       c = (2.0 * a + b) / 3.0;
585       vc = L2_lognorm (f, c) + L2_lognorm (g, c);
586 
587       d = (a + 2.0 * b) / 3.0;
588       vd = L2_lognorm (f, d) + L2_lognorm (g, d);
589 
590       if (va < vd && va < vb && vc < vd && vc < vb) /* minimum is in a or c */
591         {
592           b = d;
593           vb = vd;
594         }
595       else /* the minimum is in d or b */
596         {
597           a = c;
598           va = vc;
599         }
600     }
601   return (a + b) * 0.5;
602 }
603 
604 /* Use derivative test, with ellipse regions */
605 double
L2_skewness(mpz_poly_srcptr f,int prec)606 L2_skewness (mpz_poly_srcptr f, int prec)
607 {
608   double_poly ff, df;
609   double s = 0.0, a = 0.0, b = 0.0, c, nc, *fd, *dfd,
610     s1, s2, s3, s4, s5, s6, s7;
611   unsigned int d = f->deg;
612 
613   ASSERT_ALWAYS(1 <= d && d <= 7);
614 
615   double_poly_init (ff, d);
616   double_poly_init (df, d);
617 
618   /* convert once for all to double's to avoid expensive mpz_get_d() */
619   double_poly_set_mpz_poly (ff, f);
620   fd = ff->coeff;
621   dfd = df->coeff;
622   if (d == 7)
623     {
624       /* Sage code:
625          var('r,s,t,y')
626          R.<x> = PolynomialRing(ZZ)
627          S.<a> = InfinitePolynomialRing(R)
628          d=7; f = SR(sum(a[i]*x^i for i in range(d+1)))
629          F = expand(f(x=x/y)*y^d)
630          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
631          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
632          v = (16384*v/pi).expand()
633          dv = v.diff(s)
634          dv = (dv*s^8).expand().collect(s)
635          3003*a7^2*s^14 + 165*a6^2*s^12 + 330*a5*a7*s^12 + 27*a5^2*s^10
636          + 54*a4*a6*s^10 + 54*a3*a7*s^10 + 5*a4^2*s^8 + 10*a3*a5*s^8
637          + 10*a2*a6*s^8 + 10*a1*a7*s^8 - 5*a3^2*s^6 - 10*a2*a4*s^6
638          - 10*a1*a5*s^6 - 10*a0*a6*s^6 - 27*a2^2*s^4 - 54*a1*a3*s^4
639          - 54*a0*a4*s^4 - 165*a1^2*s^2 - 330*a0*a2*s^2 - 3003*a0^2
640       */
641       dfd[7] = 3003.0 * fd[7] * fd[7];
642       dfd[6] = 165.0 * (fd[6] * fd[6] + 2.0 * fd[5] * fd[7]);
643       dfd[5] = 27.0 * (fd[5]*fd[5] + 2.0*fd[4]*fd[6] + 2.0*fd[3]*fd[7]);
644       dfd[4] = 5.0*(fd[4]*fd[4]+2.0*fd[3]*fd[5]+2.0*fd[2]*fd[6]+2.0*fd[1]*fd[7]);
645       dfd[3] = 5.0*(fd[3]*fd[3]+2.0*fd[2]*fd[4]+2.0*fd[1]*fd[5]+2.0*fd[0]*fd[6]);
646       dfd[2] = 27.0 * (fd[2]*fd[2] + 2.0*fd[1]*fd[3] + 2.0*fd[0]*fd[4]);
647       dfd[1] = 165.0 * (fd[1]*fd[1] + 2.0*fd[0]*fd[2]);
648       dfd[0] = 3003 * fd[0] * fd[0];
649       s = 1.0;
650       nc = dfd[7] + dfd[6] + dfd[5] + dfd[4] - dfd[3] - dfd[2] - dfd[1]
651         - dfd[0];
652       /* first isolate the minimum in an interval [s, 2s] by dichotomy */
653       while (nc > 0)
654         {
655           s = 0.5 * s;
656           s1 = s * s;   /* s^2 */
657           s2 = s1 * s1; /* s^4 */
658           s3 = s2 * s1; /* s^6 */
659           s4 = s2 * s2; /* s^8 */
660           s5 = s4 * s1; /* s^10 */
661           s6 = s3 * s3; /* s^12 */
662           s7 = s6 * s1; /* s^14 */
663           nc = dfd[7] * s7 + dfd[6] * s6 + dfd[5] * s5 + dfd[4] * s4
664             - dfd[3] * s3 - dfd[2] * s2 - dfd[1] * s1 - dfd[0];
665         }
666       do
667         {
668           s = 2.0 * s;
669           s1 = s * s;   /* s^2 */
670           s2 = s1 * s1; /* s^4 */
671           s3 = s2 * s1; /* s^6 */
672           s4 = s2 * s2; /* s^8 */
673           s5 = s4 * s1; /* s^10 */
674           s6 = s3 * s3; /* s^12 */
675           s7 = s6 * s1; /* s^14 */
676           nc = dfd[7] * s7 + dfd[6] * s6 + dfd[5] * s5 + dfd[4] * s4
677             - dfd[3] * s3 - dfd[2] * s2 - dfd[1] * s1 - dfd[0];
678         }
679       while (nc < 0);
680 
681       /* now dv(s/2) < 0 < dv(s) thus the minimum is in [s/2, s] */
682       a = (s == 2.0) ? 1.0 : 0.5 * s;
683       b = s;
684       /* use dichotomy to refine the root */
685       while (prec--)
686         {
687           c = (a + b) * 0.5;
688           s1 = c * c;
689           s2 = s1 * s1;
690           s3 = s2 * s1;
691           s4 = s2 * s2;
692           s5 = s4 * s1;
693           s6 = s3 * s3;
694           s7 = s6 * s1;
695 
696           nc = dfd[7] * s7 + dfd[6] * s6 + dfd[5] * s5 + dfd[4] * s4
697             - dfd[3] * s3 - dfd[2] * s2 - dfd[1] * s1 - dfd[0];
698           if (nc > 0)
699             b = c;
700           else
701             a = c;
702         }
703     }
704   else if (d == 6)
705     {
706       s = L2_skewness_deg6_approx (f, ff, df, prec);
707       goto end;
708     }
709   else if (d == 5)
710     {
711       /* Sage code:
712          R.<x> = PolynomialRing(ZZ)
713          S.<a> = InfinitePolynomialRing(R)
714          d=5; f = SR(sum(a[i]*x^i for i in range(d+1)))
715          F = expand(f(x=x/y)*y^d)
716          var('r,s,t')
717          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
718          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
719          v = (1536*v/pi).expand()
720          dv = v.diff(s)
721          dv = (dv*s^6/3).expand().collect(s)
722       */
723       dfd[5] = 105.0 * fd[5] * fd[5];
724       dfd[4] = 7.0 * (2.0 * fd[3] * fd[5] + fd[4] * fd[4]);
725       dfd[3] = 2.0 * (fd[1] * fd[5] + fd[2] * fd[4]) + fd[3] * fd[3];
726       dfd[2] = 2.0 * (fd[0] * fd[4] + fd[1] * fd[3]) + fd[2] * fd[2];
727       dfd[1] = 7.0 * (2.0 * fd[0] * fd[2] + fd[1] * fd[1]);
728       dfd[0] = 105.0 * fd[0] * fd[0];
729       s = 1.0;
730       nc = dfd[5] + dfd[4] + dfd[3] - dfd[2] - dfd[1] - dfd[0];
731       /* first isolate the minimum in an interval [s, 2s] by dichotomy */
732       while (nc > 0)
733         {
734           s = 0.5 * s;
735           s1 = s * s;   /* s^2 */
736           s2 = s1 * s1; /* s^4 */
737           s3 = s2 * s1; /* s^6 */
738           s4 = s2 * s2; /* s^8 */
739           s5 = s4 * s1; /* s^10 */
740           nc = dfd[5] * s5 + dfd[4] * s4 + dfd[3] * s3
741             - dfd[2] * s2 - dfd[1] * s1 - dfd[0];
742         }
743       do
744         {
745           s = 2.0 * s;
746           s1 = s * s;   /* s^2 */
747           s2 = s1 * s1; /* s^4 */
748           s3 = s2 * s1; /* s^6 */
749           s4 = s2 * s2; /* s^8 */
750           s5 = s4 * s1; /* s^10 */
751           nc = dfd[5] * s5 + dfd[4] * s4 + dfd[3] * s3
752             - dfd[2] * s2 - dfd[1] * s1 - dfd[0];
753         }
754       while (nc < 0);
755 
756       /* now dv(s/2) < 0 < dv(s) thus the minimum is in [s/2, s] */
757       a = (s == 2.0) ? 1.0 : 0.5 * s;
758       b = s;
759       /* use dichotomy to refine the root */
760       while (prec--)
761         {
762           c = (a + b) * 0.5;
763           s1 = c * c;   /* s^2 */
764           s2 = s1 * s1; /* s^4 */
765           s3 = s2 * s1; /* s^6 */
766           s4 = s2 * s2; /* s^8 */
767           s5 = s4 * s1; /* s^10 */
768           nc = dfd[5] * s5 + dfd[4] * s4 + dfd[3] * s3
769             - dfd[2] * s2 - dfd[1] * s1 - dfd[0];
770           if (nc > 0)
771             b = c;
772           else
773             a = c;
774         }
775     }
776   else if (d == 4)
777     {
778       /* Sage code:
779          R.<x> = PolynomialRing(ZZ)
780          S.<a> = InfinitePolynomialRing(R)
781          d=4; f = SR(sum(a[i]*x^i for i in range(d+1)))
782          var('r,s,t,y')
783          F = expand(f(x=x/y)*y^d)
784          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
785          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
786          v = (640*v/pi).expand()
787          dv = v.diff(s)
788          dv = (dv*s^5/10).expand().collect(s)
789       */
790       dfd[4] = 14.0 * fd[4] * fd[4];
791       dfd[3] = 2.0 * fd[2] * fd[4] + fd[3] * fd[3];
792       dfd[1] = 2.0 * fd[0] * fd[2] + fd[1] * fd[1];
793       dfd[0] = 14.0 * fd[0] * fd[0];
794       s = 1.0;
795       nc = dfd[4] + dfd[3] - dfd[1] - dfd[0];
796       /* first isolate the minimum in an interval [s, 2s] by dichotomy */
797       while (nc > 0)
798         {
799           s = 0.5 * s;
800           s1 = s * s;   /* s^2 */
801           s2 = s1 * s1; /* s^4 */
802           s3 = s2 * s1; /* s^6 */
803           s4 = s2 * s2; /* s^8 */
804           nc = dfd[4] * s4 + dfd[3] * s3 - dfd[1] * s1 - dfd[0];
805         }
806       do
807         {
808           s = 2.0 * s;
809           s1 = s * s;   /* s^2 */
810           s2 = s1 * s1; /* s^4 */
811           s3 = s2 * s1; /* s^6 */
812           s4 = s2 * s2; /* s^8 */
813           nc = dfd[4] * s4 + dfd[3] * s3 - dfd[1] * s1 - dfd[0];
814         }
815       while (nc < 0);
816 
817       /* now dv(s/2) < 0 < dv(s) thus the minimum is in [s/2, s] */
818       a = (s == 2.0) ? 1.0 : 0.5 * s;
819       b = s;
820       /* use dichotomy to refine the root */
821       while (prec--)
822         {
823           c = (a + b) * 0.5;
824           s1 = c * c;   /* s^2 */
825           s2 = s1 * s1; /* s^4 */
826           s3 = s2 * s1; /* s^6 */
827           s4 = s2 * s2; /* s^8 */
828           nc = dfd[4] * s4 + dfd[3] * s3 - dfd[1] * s1 - dfd[0];
829           if (nc > 0)
830             b = c;
831           else
832             a = c;
833         }
834     }
835   else if (d == 3)
836     {
837       /* Sage code:
838          R.<x> = PolynomialRing(ZZ)
839          S.<a> = InfinitePolynomialRing(R)
840          d=3; f = SR(sum(a[i]*x^i for i in range(d+1)))
841          var('r,s,t,y')
842          F = expand(f(x=x/y)*y^d)
843          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
844          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
845          v = (64*v/pi).expand()
846          dv = v.diff(s)
847          dv = (dv*s^4).expand().collect(s)
848       */
849       dfd[3] = 15.0 * fd[3] * fd[3];
850       dfd[2] = 2.0 * fd[1] * fd[3] + fd[2] * fd[2];
851       dfd[1] = 2.0 * fd[0] * fd[2] + fd[1] * fd[1];
852       dfd[0] = 15.0 * fd[0] * fd[0];
853       s = 1.0;
854       nc = dfd[3] + dfd[2] - dfd[1] - dfd[0];
855       /* first isolate the minimum in an interval [s, 2s] by dichotomy */
856       while (nc > 0)
857         {
858           s = 0.5 * s;
859           s1 = s * s;   /* s^2 */
860           s2 = s1 * s1; /* s^4 */
861           s3 = s2 * s1; /* s^6 */
862           nc = dfd[3] * s3 + dfd[2] * s2 - dfd[1] * s1 - dfd[0];
863         }
864       do
865         {
866           s = 2.0 * s;
867           s1 = s * s;   /* s^2 */
868           s2 = s1 * s1; /* s^4 */
869           s3 = s2 * s1; /* s^6 */
870           nc = dfd[3] * s3 + dfd[2] * s2 - dfd[1] * s1 - dfd[0];
871         }
872       while (nc < 0);
873 
874       /* now dv(s/2) < 0 < dv(s) thus the minimum is in [s/2, s] */
875       a = (s == 2.0) ? 1.0 : 0.5 * s;
876       b = s;
877       /* use dichotomy to refine the root */
878       while (prec--)
879         {
880           c = (a + b) * 0.5;
881           s1 = c * c;   /* s^2 */
882           s2 = s1 * s1; /* s^4 */
883           s3 = s2 * s1; /* s^6 */
884           nc = dfd[3] * s3 + dfd[2] * s2 - dfd[1] * s1 - dfd[0];
885           if (nc > 0)
886             b = c;
887           else
888             a = c;
889         }
890     }
891   else if (d == 2)
892     {
893       /* Sage code:
894          var('r,s,t,y')
895          R.<x> = PolynomialRing(ZZ)
896          S.<a> = InfinitePolynomialRing(R)
897          d=2; f = SR(sum(a[i]*x^i for i in range(d+1)))
898          F = expand(f(x=x/y)*y^d)
899          F = F.subs(x=s^(1/2)*r*cos(t),y=r/s^(1/2)*sin(t))
900          v = integrate(integrate(F^2*r,(r,0,1)),(t,0,2*pi))
901          v = (24*v/pi).expand()
902          dv = v.diff(s)
903          dv = (dv*s^3).expand().collect(s)
904          We get dv = 6*a_2^2*s^4 - 6*a_0^2
905          thus the optimal skewness is sqrt(|a0|/|a2|).
906       */
907       a = b = sqrt (fabs (fd[0] / fd[2]));
908     }
909   else /* d == 1 */
910     a = b = fabs (fd[0] / fd[1]);
911 
912   s = (a + b) * 0.5;
913 
914  end:
915   double_poly_clear (ff);
916   double_poly_clear (df);
917 
918   return s;
919 }
920 
L2_skew_lognorm(mpz_poly_srcptr f,int prec)921 double L2_skew_lognorm (mpz_poly_srcptr f, int prec)
922 {
923   return L2_lognorm (f, L2_skewness (f, prec));
924 }
925 
926 #ifdef OPTIMIZE_MP
927 
928 /* Use derivative test, with ellipse regions */
929 void
L2_skewness_derivative_mp(mpz_poly_ptr F,int prec,mpz_t skewness)930 L2_skewness_derivative_mp (mpz_poly_ptr F, int prec, mpz_t skewness)
931 {
932   mpz_t s, s1, s2, s3, s4, s5, s6, a, b, c, nc;
933   mpz_init (s);
934   mpz_init (s1);
935   mpz_init (s2);
936   mpz_init (s3);
937   mpz_init (s4);
938   mpz_init (s5);
939   mpz_init (s6);
940   mpz_init (a);
941   mpz_init (b);
942   mpz_init (c);
943   mpz_init (nc);
944 
945   int i, d = F->deg;
946   mpz_t *f = F->coeff;
947 
948   if (d == 6) {
949     mpz_t df[d+1];
950     mpz_t tmp;
951     mpz_init_set_ui (tmp, 1);
952     for (i=0; i<=d; i++)
953       mpz_init (df[i]);
954 
955     // dfd[6] = 99.0 * fd[6] * fd[6];
956     mpz_mul (df[6], f[6], f[6]);
957     mpz_mul_ui(df[6], df[6], 99);
958     // dfd[5] = 6.0 * ( 2.0 * fd[4] * fd[6] + fd[5] * fd[5] );
959     mpz_mul (df[5], f[5], f[5]);
960     mpz_mul (tmp, f[4], f[6]);
961     mpz_add (df[5], df[5], tmp);
962     mpz_add (df[5], df[5], tmp);
963     mpz_mul_ui (df[5], df[5], 6);
964     // dfd[4] = 2.0 * ( fd[2] * fd[6] + fd[3] * fd[5] ) + fd[4] * fd[4];
965     mpz_mul (df[4], f[4], f[4]);
966     mpz_mul (tmp, f[3], f[5]);
967     mpz_add (df[4], df[4], tmp);
968     mpz_add (df[4], df[4], tmp);
969     mpz_mul (tmp, f[2], f[6]);
970     mpz_add (df[4], df[4], tmp);
971     mpz_add (df[4], df[4], tmp);
972     //  dfd[2] = 2.0 * ( fd[0] * fd[4] + fd[1] * fd[3] ) + fd[2] * fd[2];
973     mpz_mul (df[2], f[2], f[2]);
974     mpz_mul (tmp, f[1], f[3]);
975     mpz_add (df[2], df[2], tmp);
976     mpz_add (df[2], df[2], tmp);
977     mpz_mul (tmp, f[0], f[4]);
978     mpz_add (df[2], df[2], tmp);
979     mpz_add (df[2], df[2], tmp);
980     // dfd[1] = 6.0 * ( 2.0 * fd[0] * fd[2] + fd[1] * fd[1] );
981     mpz_mul (df[1], f[1], f[1]);
982     mpz_mul (tmp, f[0], f[2]);
983     mpz_add (df[1], df[1], tmp);
984     mpz_add (df[1], df[1], tmp);
985     mpz_mul_ui (df[1], df[1], 6);
986     // dfd[0] = 99.0 * fd[0] * fd[0] ;
987     mpz_mul (df[0], f[0], f[0]);
988     mpz_mul_ui(df[0], df[0], 99);
989     /*
990     gmp_fprintf (stderr, "df[6]: %Zd\n", df[6]);
991     gmp_fprintf (stderr, "df[5]: %Zd\n", df[5]);
992     gmp_fprintf (stderr, "df[4]: %Zd\n", df[4]);
993     gmp_fprintf (stderr, "df[2]: %Zd\n", df[2]);
994     gmp_fprintf (stderr, "df[1]: %Zd\n", df[1]);
995     gmp_fprintf (stderr, "df[0]: %Zd\n", df[0]);
996     */
997 
998     mpz_set_si (nc, -1);
999     mpz_set_ui (s, 1);
1000 
1001     /* first isolate the minimum in an interval [s, 2s] by dichotomy */
1002     while ( mpz_cmp_ui(nc, 0) < 0 ) {
1003 
1004       mpz_add (s, s, s); /* s = 2.0 * s */
1005       mpz_mul (s1, s, s); /* s^2 */
1006       mpz_mul (s2, s1, s1); /* s^4 */
1007       mpz_mul (s4, s2, s2); /* s^8 */
1008       mpz_mul (s5, s4, s1); /* s^10 */
1009       mpz_mul (s6, s5, s1); /* s^12 */
1010 
1011       /* nc = dfd[6] * s6 + dfd[5] * s5 + dfd[4] * s4
1012          - dfd[2] * s2 - dfd[1] * s1 - dfd[0];         */
1013       mpz_mul (s6, s6, df[6]);
1014       mpz_mul (s5, s5, df[5]);
1015       mpz_mul (s4, s4, df[4]);
1016       mpz_mul (s2, s2, df[2]);
1017       mpz_mul (s1, s1, df[1]);
1018       mpz_add (nc, s6, s5);
1019       mpz_add (nc, nc, s4);
1020       mpz_sub (nc, nc, s2);
1021       mpz_sub (nc, nc, s1);
1022       mpz_sub (nc, nc, df[0]);
1023 
1024     }
1025 
1026     /* now dv(s/2) < 0 < dv(s) thus the minimum is in [s/2, s] */
1027     mpz_cdiv_q_2exp (a, s, 1);
1028     mpz_set (b, s);
1029     /* use dichotomy to refine the root */
1030     while (prec--)
1031     {
1032       mpz_add (tmp, a, b);
1033       mpz_cdiv_q_2exp (c, tmp, 1);
1034       mpz_mul (s1, c, c); //s1 = c * c;
1035       mpz_mul (s2, s1, s1); //s2 = s1 * s1;
1036       mpz_mul (s4, s2, s2); //s4 = s2 * s2;
1037       mpz_mul (s5, s4, s1); //s5 = s4 * s1;
1038       mpz_mul (s6, s5, s1); //s6 = s5 * s1;
1039 
1040       /* nc = dfd[6] * s6 + dfd[5] * s5 + dfd[4] * s4
1041          - dfd[2] * s2 - dfd[1] * s1 - dfd[0]; */
1042       mpz_mul (s6, s6, df[6]);
1043       mpz_mul (s5, s5, df[5]);
1044       mpz_mul (s4, s4, df[4]);
1045       mpz_mul (s2, s2, df[2]);
1046       mpz_mul (s1, s1, df[1]);
1047       mpz_add (nc, s6, s5);
1048       mpz_add (nc, nc, s4);
1049       mpz_sub (nc, nc, s2);
1050       mpz_sub (nc, nc, s1);
1051       mpz_sub (nc, nc, df[0]);
1052 
1053       if (mpz_cmp_ui (nc, 0) > 0)
1054         mpz_set (b, c);
1055       else
1056         mpz_set (a, c);
1057     }
1058 
1059     mpz_clear (tmp);
1060     for (i=0; i<=d; i++)
1061       mpz_clear (df[i]);
1062 
1063   } // end
1064   else  {
1065     fprintf (stderr, "L2_skewness_derivative_mp not yet implemented for degree %d\n", d);
1066     exit (1);
1067   }
1068 
1069   mpz_add (s, a, b);
1070   mpz_cdiv_q_2exp (skewness, s, 1);
1071 
1072   mpz_clear (s);
1073   mpz_clear (s1);
1074   mpz_clear (s2);
1075   mpz_clear (s3);
1076   mpz_clear (s4);
1077   mpz_clear (s5);
1078   mpz_clear (s6);
1079   mpz_clear (a);
1080   mpz_clear (b);
1081   mpz_clear (c);
1082   mpz_clear (nc);
1083 }
1084 #endif
1085 
1086 /************************** polynomial arithmetic ****************************/
1087 
1088 /* h(x) <- h(x + r/p), where the coefficients of h(x + r/p) are known to
1089    be integers */
1090 static void
poly_shift_divp(mpz_t * h,unsigned int d,unsigned long r,unsigned long p)1091 poly_shift_divp (mpz_t *h, unsigned int d, unsigned long r, unsigned long p)
1092 {
1093   unsigned int i, k;
1094   mpz_t t;
1095 
1096   mpz_init (t);
1097   for (i = 1; i <= d; i++)
1098     for (k = d - i; k < d; k++)
1099       { /* h[k] <- h[k] + r/p h[k+1] */
1100         ASSERT (mpz_divisible_ui_p (h[k+1], p) != 0);
1101         mpz_divexact_ui (t, h[k+1], p);
1102         mpz_addmul_ui (h[k], t, r);
1103       }
1104   mpz_clear (t);
1105 }
1106 
1107 /********************* computation of alpha **********************************/
1108 
1109 /* Auxiliary routine for special_valuation(), see below. It returns the
1110    average p-valuation of the polynomial f. Works recursively.
1111    Assumes f is square-free, otherwise it will loop. */
1112 static double
special_val0(mpz_poly_srcptr f,unsigned long p,gmp_randstate_ptr rstate)1113 special_val0 (mpz_poly_srcptr f, unsigned long p, gmp_randstate_ptr rstate)
1114 {
1115   double v;
1116   mpz_t c,  *h;
1117   unsigned long *roots, r, r0;
1118   int i, d = f->deg, nroots;
1119   mpz_poly g, H;
1120 
1121   mpz_init (c);
1122   mpz_poly_content (c, f);
1123   for (v = 0.0; mpz_divisible_ui_p (c, p); v++, mpz_divexact_ui (c, c, p));
1124 
1125   mpz_poly_init(g, d);
1126   g->deg = d;
1127 
1128   /* g <- f/p^v */
1129   if (v != 0.0)
1130     {
1131       mpz_ui_pow_ui (c, p, (unsigned long) v); /* p^v */
1132       for (i = 0; i <= d; i++)
1133         mpz_divexact (g->coeff[i], f->coeff[i], c);
1134     }
1135   else
1136     mpz_poly_set (g, f);
1137 
1138   mpz_poly_init (H, d);
1139   H->deg = d;
1140   h = H->coeff;
1141   /* first compute h(x) = g(px) */
1142   mpz_set_ui (c, 1);
1143   for (i = 0; i <= d; i++)
1144     {
1145       mpz_mul (h[i], g->coeff[i], c);
1146       mpz_mul_ui (c, c, p);
1147     }
1148   /* Search for roots of g mod p */
1149   ASSERT (d > 0);
1150   roots = (unsigned long*) malloc (d * sizeof (unsigned long));
1151   FATAL_ERROR_CHECK(roots == NULL, "not enough memory");
1152 
1153   nroots = mpz_poly_roots_ulong (roots, g, p, rstate);
1154   ASSERT (nroots <= d);
1155   for (r0 = 0, i = 0; i < nroots; i++)
1156     {
1157       r = roots[i];
1158       mpz_poly_eval_diff_ui (c, g, r);
1159       if (mpz_divisible_ui_p (c, p) == 0) /* g'(r) <> 0 mod p */
1160         v += 1.0 / (double) (p - 1);
1161       else /* hard case */
1162         {
1163           /* g(px+r) = h(x + r/p), thus we can go from h0(x)=g(px+r0)
1164              to h1(x)=g(px+r1) by computing h0(x + (r1-r0)/p).
1165              Warning: we can have h = f, and thus an infinite loop, when
1166              the p-valuation of f is d, and f has a single root r/(1-p) of
1167              multiplicity d.
1168              Moreover if f(x) = c*p^d*(x-r+b*p)^d, where c is coprime to p,
1169              then h(x) = f(p*x+r)/p^d = c*p^d*(x+b)^d, and most likely after
1170              at most p iterations we'll go back to f(x), thus we should avoid
1171              all cases where f(x) has a root of multiplicity d, but how to
1172              check that efficiently? And which value to return in such a case?
1173           */
1174           ASSERT_ALWAYS (r >= r0); /* the roots are sorted */
1175           poly_shift_divp (h, d, r - r0, p);
1176           r0 = r;
1177           v += special_val0 (H, p, rstate) / (double) p;
1178         }
1179     }
1180   free (roots);
1181   mpz_poly_clear (H);
1182   mpz_poly_clear (g);
1183   mpz_clear (c);
1184 
1185   return v;
1186 }
1187 
1188 /* Compute the average valuation of F(a,b) for gcd(a,b)=1, for a prime p
1189    dividing the discriminant of f, using the following algorithm from
1190    Guillaume Hanrot (which is some kind of p-adic variant of Uspensky's
1191    algorithm):
1192 
1193    val(f, p)
1194      return val0(f, p) * p / (p+1) + val0(f(1/(p*x))*(p*x)^d, p) * 1/(p+1)
1195 
1196    val0(f, p).
1197      v <- valuation (content(f), p);
1198      f <- f/p^v
1199 
1200      r <- roots mod p(f, p)
1201 
1202      for r_i in r do
1203          if f'(r_i) <> 0 mod p then v +=  1/(p-1).
1204          else
1205               f2 <- f(p*x + r_i)
1206               v += val0(f2, p) / p.
1207          endif
1208      endfor
1209      Return v.
1210 
1211 A special case when:
1212 (a) p^2 does not divide disc(f),
1213 (b) p does not divide lc(f),
1214 then the average valuation is (p q_p - 1)/(p^2 - 1), where q_p is the number
1215 of roots of f mod p. When q_p=1, we get 1/(p+1).
1216 
1217 Note: when p does not divide lc(f), the val0(f(1/(p*x))*(p*x)^d, p) call
1218 always returns 0 in val(f,p).
1219 
1220 Assumes p divides disc = disc(f), d is the degree of f.
1221 */
1222 double
special_valuation(mpz_poly_srcptr f,unsigned long p,mpz_srcptr disc,gmp_randstate_ptr rstate)1223 special_valuation (mpz_poly_srcptr f, unsigned long p, mpz_srcptr disc, gmp_randstate_ptr rstate)
1224 {
1225     double v;
1226     int p_divides_lc;
1227     int pvaluation_disc = 0;
1228     double pd = (double) p;
1229     int d = f->deg;
1230 
1231     if (mpz_divisible_ui_p(disc, p)) {
1232   mpz_t t;
1233   pvaluation_disc++;
1234   mpz_init(t);
1235   mpz_divexact_ui(t, disc, p);
1236   if (mpz_divisible_ui_p(t, p))
1237       pvaluation_disc++;
1238   mpz_clear(t);
1239     }
1240 
1241     p_divides_lc = mpz_divisible_ui_p(f->coeff[d], p);
1242 
1243     if (pvaluation_disc == 0) {
1244   /* easy ! */
1245   int e;
1246   e = mpz_poly_roots_ulong (NULL, f, p, rstate);
1247   if (p_divides_lc) {
1248       /* Or the discriminant would have valuation 1 at least */
1249       ASSERT(mpz_divisible_ui_p(f->coeff[d - 1], p) == 0);
1250       e++;
1251   }
1252   return (pd * e) / (pd * pd - 1);
1253     } else if (pvaluation_disc == 1) {
1254       /* special case where p^2 does not divide disc */
1255   int e;
1256   e = mpz_poly_roots_ulong (NULL, f, p, rstate);
1257         if (p_divides_lc)
1258           e ++;
1259   /* something special here. */
1260   return (pd * e - 1) / (pd * pd - 1);
1261     } else {
1262   v = special_val0(f, p, rstate) * pd;
1263   if (p_divides_lc) {
1264       /* compute g(x) = f(1/(px))*(px)^d, i.e., g[i] = f[d-i]*p^i */
1265       /* IOW, the reciprocal polynomial evaluated at px */
1266       mpz_poly G;
1267       mpz_t *g;
1268       mpz_t t;
1269       int i;
1270 
1271       mpz_poly_init (G, d);
1272       G->deg = d;
1273       g = G->coeff;
1274       mpz_init_set_ui(t, 1);  /* will contains p^i */
1275       for (i = 0; i <= d; i++) {
1276         mpz_mul(g[i], f->coeff[d - i], t);
1277         mpz_mul_ui(t, t, p);
1278       }
1279       v += special_val0(G, p, rstate);
1280       mpz_poly_clear (G);
1281       mpz_clear(t);
1282   }
1283   v /= pd + 1.0;
1284   return v;
1285     }
1286 }
1287 
1288 /* Compute the value alpha(F) from Murphy's thesis, page 49:
1289    alpha(F) = sum(prime p <= B, (1 - q_p*p/(p+1)) log(p)/(p-1))
1290    where q_p is the number of roots of F mod p, including the number of
1291    projective roots (i.e., the zeros of the reciprocal polynomial mod p).
1292 
1293    alpha(F) is an estimate of the average logarithm of the part removed
1294    from sieving, compared to a random integer.
1295 
1296    We want alpha as small as possible, i.e., alpha negative with a large
1297    absolute value. Typical good values are alpha=-4, -5, ...
1298 */
1299 double
get_alpha(mpz_poly_srcptr f,unsigned long B)1300 get_alpha (mpz_poly_srcptr f, unsigned long B)
1301 {
1302   double alpha, e;
1303   unsigned long p;
1304   mpz_t disc;
1305 
1306   /* for F linear, we have q_p = 1 for all p, thus
1307      alpha(F) = sum(prime p <= B, log(p)/(p^2-1)) ~ 0.569959993064325 */
1308   if (f->deg == 1)
1309     return 0.569959993064325;
1310 
1311   /* a gmp_randstate init/clear cycle is about 10 times the cost of a
1312    * one-word gmp random pick. Given that we assume that B is at least
1313    * in the hundreds, the random picks alone would be a sufficient
1314    * observation to conclude that it's relatively harmless to do a
1315    * random initialization here, and present get_alpha as as something
1316    * deterministic. (and of course, there's tons of arithmetic in there
1317    * too.
1318    */
1319 
1320   gmp_randstate_t rstate;
1321   gmp_randinit_default(rstate);
1322 
1323   mpz_init (disc);
1324   mpz_poly_discriminant (disc, f);
1325 
1326   /* special_valuation returns the expected average exponent of p in F(a,b)
1327      for coprime a, b, i.e., e = q_p*p/(p^2-1), thus the contribution for p
1328      is (1/(p-1) - e) * log(p) */
1329 
1330   /* prime p=2 */
1331   e = special_valuation (f, 2, disc, rstate);
1332   alpha = (1.0 - e) * log (2.0);
1333 
1334   /* FIXME: generate all primes up to B and pass them to get_alpha */
1335   for (p = 3; p <= B; p += 2)
1336     if (ulong_isprime (p))
1337       {
1338         e = special_valuation (f, p, disc, rstate);
1339         alpha += (1.0 / (double) (p - 1) - e) * log ((double) p);
1340       }
1341   gmp_randclear(rstate);
1342   mpz_clear (disc);
1343   return alpha;
1344 }
1345 
1346 /* affine part of the special valution for polynomial f over p. */
1347 double
special_valuation_affine(mpz_poly_srcptr f,unsigned long p,mpz_srcptr disc,gmp_randstate_ptr rstate)1348 special_valuation_affine (mpz_poly_srcptr f, unsigned long p, mpz_srcptr disc, gmp_randstate_ptr rstate)
1349 {
1350    double v;
1351    int pvaluation_disc = 0;
1352    double pd = (double) p;
1353 
1354    if (mpz_divisible_ui_p(disc, p)) {
1355       mpz_t t;
1356       pvaluation_disc++;
1357       mpz_init(t);
1358       mpz_divexact_ui(t, disc, p);
1359       if (mpz_divisible_ui_p(t, p))
1360          pvaluation_disc++;
1361       mpz_clear(t);
1362    }
1363 
1364    if (pvaluation_disc == 0) {
1365       /* case 1: root must be simple*/
1366       int e = 0;
1367       e = mpz_poly_roots_ulong (NULL, f, p, rstate);
1368 
1369       return (pd * e) / (pd * pd - 1);
1370    }
1371    /* else if (pvaluation_disc == 1) { */
1372    /*     /\* case 2: special case where p^2 does not divide disc *\/ */
1373    /*     int e = 0; */
1374    /*     e = mpz_poly_roots_ulong (NULL, f, p); */
1375 
1376    /*     /\* something special here. *\/ */
1377    /*     return (pd * e - 1) / (pd * pd - 1); */
1378 
1379    /* } */
1380    else {
1381       v = special_val0(f, p, rstate) * pd;
1382       v /= pd + 1.0;
1383       return v;
1384    }
1385 }
1386 
1387 
1388 /*
1389   Find alpha_projective for a poly f. It uses
1390   some hacks here which need to be changed in future.
1391   Until now, since this will only be done several
1392   times, hence the speed is not critical.
1393 
1394   Note that, the returned alpha is the -val * log(p)
1395   part in the alpha. Hence, we can just add
1396   this to our affine part.
1397 */
1398 double
get_alpha_projective(mpz_poly_srcptr f,unsigned long B)1399 get_alpha_projective (mpz_poly_srcptr f, unsigned long B)
1400 {
1401    double alpha, e;
1402    unsigned long p;
1403    mpz_t disc;
1404 
1405    /* about random state: see comment in get_alpha */
1406    gmp_randstate_t rstate;
1407    gmp_randinit_default(rstate);
1408 
1409    mpz_init (disc);
1410    mpz_poly_discriminant (disc, f);
1411 
1412    /* prime p=2 */
1413    e = special_valuation (f, 2, disc, rstate) - special_valuation_affine (f, 2, disc, rstate);
1414 
1415    /* 1/(p-1) is counted in the affine part */
1416    alpha =  (- e) * log (2.0);
1417 
1418    /* FIXME: generate all primes up to B and pass them to get_alpha */
1419    for (p = 3; p <= B; p += 2)
1420       if (ulong_isprime (p)) {
1421          e = special_valuation(f, p, disc, rstate) - special_valuation_affine (f, p, disc, rstate);
1422          alpha += (- e) * log ((double) p);
1423       }
1424 
1425    gmp_randclear(rstate);
1426    mpz_clear (disc);
1427 
1428    return alpha;
1429 }
1430 
1431 /*
1432   Similar to above, but for affine part.
1433 */
1434 double
get_alpha_affine(mpz_poly_srcptr f,unsigned long B)1435 get_alpha_affine (mpz_poly_srcptr f, unsigned long B)
1436 {
1437    double alpha, e;
1438    unsigned long p;
1439    mpz_t disc;
1440 
1441   /* about random state: see comment in get_alpha */
1442   gmp_randstate_t rstate;
1443   gmp_randinit_default(rstate);
1444 
1445    mpz_init (disc);
1446    mpz_poly_discriminant (disc, f);
1447 
1448    /* prime p=2 */
1449    e = special_valuation_affine (f, 2, disc, rstate);
1450    alpha =  (1.0 - e) * log (2.0);
1451 
1452    //printf ("\np: %u, val: %f, alpha: %f\n", 2, e, alpha);
1453 
1454    /* FIXME: generate all primes up to B and pass them to get_alpha */
1455    for (p = 3; p <= B; p += 2)
1456       if (ulong_isprime (p)) {
1457          e = special_valuation_affine (f, p, disc, rstate);
1458          alpha += (1.0 / (double) (p - 1) - e) * log ((double) p);
1459          //printf ("\np: %u, val: %f, alpha: %f\n", p, e, alpha);
1460 
1461       }
1462    mpz_clear (disc);
1463    gmp_randclear(rstate);
1464    return alpha;
1465 }
1466 
1467 /*
1468   Similar to above, but for a given prime p.
1469 */
1470 double
get_alpha_affine_p(mpz_poly_srcptr f,unsigned long p,gmp_randstate_ptr rstate)1471 get_alpha_affine_p (mpz_poly_srcptr f, unsigned long p, gmp_randstate_ptr rstate)
1472 {
1473    double alpha, e;
1474    mpz_t disc;
1475 
1476    mpz_init (disc);
1477    mpz_poly_discriminant (disc, f);
1478 
1479    if (p == 2)
1480      {
1481        e = special_valuation_affine (f, 2, disc, rstate);
1482        alpha =  (1.0 - e) * log (2.0);
1483      }
1484    else
1485      {
1486        ASSERT (ulong_isprime (p));
1487        e = special_valuation_affine (f, p, disc, rstate);
1488        alpha = (1.0 / (double) (p - 1) - e) * log ((double) p);
1489      }
1490    mpz_clear (disc);
1491    return alpha;
1492 }
1493 
1494 #if 0
1495 /*
1496   Contribution from a particular multiple root r of the polynomial f
1497   over p. Note, r must also be a double root of f mod p.
1498 */
1499 static double
1500 average_valuation_affine_root (mpz_poly_ptr f, unsigned long p, unsigned long r )
1501 {
1502    unsigned long v = 0UL;
1503    int i, j;
1504    mpz_t c, *fv;
1505    double val;
1506 
1507    mpz_init (c);
1508 
1509    /* init fv */
1510    fv = (mpz_t*) malloc ((d + 1) * sizeof (mpz_t));
1511    if (fv == NULL) {
1512       fprintf (stderr, "Error, cannot allocate memory in average_valuation_affine_root.\n");
1513       exit (1);
1514    }
1515 
1516    for (i = 0; i <= d; i++)
1517       mpz_init_set (fv[i], f[i]);
1518 
1519    /* remove the p-valuations from fv */
1520    mpz_poly_content (c, f);
1521    while (mpz_divisible_ui_p(c, p)) {
1522       v += 1;
1523       for (i = 0; i <= d; i ++) {
1524          mpz_fdiv_q_ui (fv[i], f[i], p);
1525       }
1526    }
1527 
1528    /* first translate, then scale */
1529    for (i = d - 1; i >= 0; i--)
1530       for (j = i; j < d; j++)
1531          mpz_addmul_ui (fv[j], fv[j+1], r);
1532    /* t is p^i */
1533    mpz_set_ui(c, 1);
1534    for (i = 0; i <= d; i++) {
1535       mpz_mul(fv[i], fv[i], c);
1536       mpz_mul_ui(c, c, p);
1537    }
1538 
1539    /* now c is disc. */
1540    discriminant (c, fv, d);
1541    val = special_valuation_affine (fv, d, p, c);
1542    val = val / (double) p;
1543 
1544    /* clear */
1545    for (i = 0; i <= d; i++) {
1546       mpz_clear (fv[i]);
1547    }
1548 
1549    /* !!! REMEMBER THIS !!! */
1550    free (fv);
1551    mpz_clear(c);
1552    return val;
1553 }
1554 #endif
1555 
1556 /**************************** rotation ***************************************/
1557 
1558 /* replace f + k0 * x^t * (b*x + m) by f + k * x^t * (b*x + m), and return k */
1559 long
rotate_aux(mpz_t * f,mpz_t b,mpz_t m,long k0,long k,unsigned int t)1560 rotate_aux (mpz_t *f, mpz_t b, mpz_t m, long k0, long k, unsigned int t)
1561 {
1562   /* Warning: k - k0 might not be representable in a long! */
1563   unsigned long diff;
1564   if (k >= k0)
1565     {
1566       diff = k - k0; /* k - k0 always fits in an unsigned long */
1567       mpz_addmul_ui (f[t + 1], b, diff);
1568       mpz_addmul_ui (f[t], m, diff);
1569     }
1570   else
1571     {
1572       diff = k0 - k;
1573       mpz_submul_ui (f[t + 1], b, diff);
1574       mpz_submul_ui (f[t], m, diff);
1575     }
1576   return k;
1577 }
1578 
1579 /* replace f by f + k * x^t * (b*x + g0) */
1580 void
rotate_auxg_z(mpz_t * f,const mpz_t b,const mpz_t g0,const mpz_t k,unsigned int t)1581 rotate_auxg_z (mpz_t *f, const mpz_t b, const mpz_t g0, const mpz_t k, unsigned int t)
1582 {
1583   mpz_addmul (f[t + 1], b, k);
1584   mpz_addmul (f[t], g0, k);
1585 }
1586 
1587 /* replace f by f - k * x^t * (b*x + g0) */
1588 void
derotate_auxg_z(mpz_t * f,const mpz_t b,const mpz_t g0,const mpz_t k,unsigned int t)1589 derotate_auxg_z (mpz_t *f, const mpz_t b, const mpz_t g0, const mpz_t k, unsigned int t)
1590 {
1591   mpz_submul (f[t + 1], b, k);
1592   mpz_submul (f[t], g0, k);
1593 }
1594 
1595 /*
1596    Print f, g only.
1597    Note: it's a backend for print_cadopoly().
1598 */
1599 void
print_cadopoly_fg(FILE * fp,mpz_t * f,int df,mpz_t * g,int dg,mpz_srcptr n)1600 print_cadopoly_fg (FILE *fp, mpz_t *f, int df, mpz_t *g, int dg, mpz_srcptr n)
1601 {
1602    int i;
1603 
1604    /* n */
1605    gmp_fprintf (fp, "\nn: %Zd\n", n);
1606 
1607    /* Y[i] */
1608    for (i = dg; i >= 0; i--)
1609      gmp_fprintf (fp, "Y%d: %Zd\n", i, g[i]);
1610 
1611    /* c[i] */
1612    for (i = df; i >= 0; i--)
1613      gmp_fprintf (fp, "c%d: %Zd\n", i, f[i]);
1614 }
1615 
1616 
1617 /*
1618    Print f, g only, lognorm, skew, alpha, MurphyE.
1619    Note:  it's a backend for print_cadopoly_extra().
1620 */
1621 double
print_cadopoly(FILE * fp,cado_poly_srcptr p)1622 print_cadopoly (FILE *fp, cado_poly_srcptr p)
1623 {
1624    unsigned int nroots = 0;
1625    double alpha, alpha_proj, logmu, e;
1626    mpz_poly F, G;
1627 
1628    F->coeff = p->pols[ALG_SIDE]->coeff;
1629    F->deg = p->pols[ALG_SIDE]->deg;
1630    G->coeff = p->pols[RAT_SIDE]->coeff;
1631    G->deg = p->pols[RAT_SIDE]->deg;
1632 
1633    /* print f, g only*/
1634    print_cadopoly_fg (fp, F->coeff, F->deg, G->coeff, G->deg, p->n);
1635 
1636 #ifdef DEBUG
1637    fprintf (fp, "# ");
1638    fprint_polynomial (fp, F->coeff, F->deg);
1639    fprintf (fp, "# ");
1640    fprint_polynomial (fp, G->coeff, G->deg);
1641 #endif
1642 
1643    fprintf (fp, "skew: %1.3f\n", p->skew);
1644 
1645    if (G->deg > 1)
1646    {
1647     logmu = L2_lognorm (G, p->skew);
1648     alpha = get_alpha (G, get_alpha_bound ());
1649     alpha_proj = get_alpha_projective (G, get_alpha_bound ());
1650     nroots = numberOfRealRoots ((const mpz_t *) G->coeff, G->deg, 0, 0, NULL);
1651     fprintf (fp, "# lognorm: %1.2f, alpha: %1.2f (proj: %1.2f), E: %1.2f, "
1652                  "nr: %u\n", logmu, alpha, alpha_proj, logmu + alpha, nroots);
1653    }
1654 
1655    logmu = L2_lognorm (F, p->skew);
1656    alpha = get_alpha (F, get_alpha_bound ());
1657    alpha_proj = get_alpha_projective (F, get_alpha_bound ());
1658    nroots = numberOfRealRoots ((const mpz_t *) F->coeff, F->deg, 0, 0, NULL);
1659    fprintf (fp, "# lognorm: %1.2f, alpha: %1.2f (proj: %1.2f), E: %1.2f, "
1660                 "nr: %u\n", logmu, alpha, alpha_proj, logmu + alpha, nroots);
1661 
1662    int alpha_bound = get_alpha_bound ();
1663    e = MurphyE (p, bound_f, bound_g, area, MURPHY_K, alpha_bound);
1664    cado_poly_fprintf_MurphyE (fp, e, bound_f, bound_g, area, "");
1665 
1666    return e;
1667 }
1668 
1669 
1670 /*
1671    Print f, g, lognorm, skew, alpha, MurphyE, REV, time duration.
1672 */
1673 void
print_cadopoly_extra(FILE * fp,cado_poly cpoly,int argc,char * argv[],double st)1674 print_cadopoly_extra (FILE *fp, cado_poly cpoly, int argc, char *argv[], double st)
1675 {
1676    int i;
1677 
1678    print_cadopoly (fp, cpoly);
1679    /* extra info */
1680    fprintf (fp, "# generated by %s: %s", cado_revision_string, argv[0]);
1681    for (i = 1; i < argc; i++)
1682       fprintf (fp, " %s", argv[i]);
1683    fprintf (fp, " in %.2fs\n", (seconds () - st));
1684 }
1685 
1686 
1687 /*
1688   Call print_cadopoly, given f, g and return MurphyE.
1689 */
1690 double
print_poly_fg(mpz_poly_srcptr f,mpz_t * g,mpz_t N,int mode)1691 print_poly_fg (mpz_poly_srcptr f, mpz_t *g, mpz_t N, int mode)
1692 {
1693    double e;
1694    int i;
1695    int d = f->deg;
1696 
1697    cado_poly cpoly;
1698    cado_poly_init(cpoly);
1699    for (i = 0; i < (d + 1); i++)
1700       mpz_set(cpoly->pols[ALG_SIDE]->coeff[i], f->coeff[i]);
1701    for (i = 0; i < 2; i++)
1702       mpz_set(cpoly->pols[RAT_SIDE]->coeff[i], g[i]);
1703    mpz_set(cpoly->n, N);
1704    cpoly->skew = L2_skewness (f, SKEWNESS_DEFAULT_PREC);
1705    cpoly->pols[ALG_SIDE]->deg = d;
1706    cpoly->pols[RAT_SIDE]->deg = 1;
1707 
1708    if (mode == 1)
1709      {
1710        e = print_cadopoly (stdout, cpoly);
1711        fflush(stdout);
1712      }
1713    else
1714      e = MurphyE (cpoly, bound_f, bound_g, area, MURPHY_K, get_alpha_bound ());
1715 
1716    cado_poly_clear (cpoly);
1717    return e;
1718 }
1719 
1720 /* f <- f(x+k), g <- g(x+k) */
1721 void
do_translate_z(mpz_poly_ptr f,mpz_t * g,const mpz_t k)1722 do_translate_z (mpz_poly_ptr f, mpz_t *g, const mpz_t k)
1723 {
1724   int i, j;
1725   int d = f->deg;
1726 
1727   for (i = d - 1; i >= 0; i--)
1728     for (j = i; j < d; j++)
1729       mpz_addmul (f->coeff[j], f->coeff[j+1], k);
1730   mpz_addmul (g[0], g[1], k);
1731 }
1732 
1733 /* f <- f(x-k), g <- g(x-k) */
1734 void
do_detranslate_z(mpz_poly_ptr f,mpz_t * g,const mpz_t k)1735 do_detranslate_z (mpz_poly_ptr f, mpz_t *g, const mpz_t k)
1736 {
1737   int i, j;
1738   int d = f->deg;
1739 
1740   for (i = d - 1; i >= 0; i--)
1741     for (j = i; j < d; j++)
1742       mpz_submul (f->coeff[j], f->coeff[j+1], k);
1743   mpz_submul (g[0], g[1], k);
1744 }
1745 
1746 
1747 /* If final <> 0, print the real value of E (root-optimized polynomial),
1748    otherwise print the expected value of E. Return E or exp_E accordingly.
1749    TODO: adapt for more than 2 polynomials and two algebraic polynomials */
1750 double
cado_poly_fprintf_with_info(FILE * fp,cado_poly_ptr poly,const char * prefix,int final)1751 cado_poly_fprintf_with_info (FILE *fp, cado_poly_ptr poly, const char *prefix,
1752                              int final)
1753 {
1754   unsigned int nrroots;
1755   double lognorm, alpha, alpha_proj, exp_E;
1756 
1757   nrroots = numberOfRealRoots ((const mpz_t *) poly->pols[ALG_SIDE]->coeff, poly->pols[ALG_SIDE]->deg, 0, 0, NULL);
1758   if (poly->skew <= 0.0) /* If skew is undefined, compute it. */
1759     poly->skew = L2_skewness (poly->pols[ALG_SIDE], SKEWNESS_DEFAULT_PREC);
1760   lognorm = L2_lognorm (poly->pols[ALG_SIDE], poly->skew);
1761   alpha = get_alpha (poly->pols[ALG_SIDE], get_alpha_bound ());
1762   alpha_proj = get_alpha_projective (poly->pols[ALG_SIDE], get_alpha_bound ());
1763   exp_E = (final) ? 0.0 : lognorm
1764     + expected_rotation_gain (poly->pols[ALG_SIDE], poly->pols[RAT_SIDE]);
1765 
1766   cado_poly_fprintf (stdout, poly, prefix);
1767   cado_poly_fprintf_info (fp, lognorm, exp_E, alpha, alpha_proj, nrroots,
1768                           prefix);
1769   return (final) ? lognorm + alpha : exp_E;
1770 }
1771 
1772 /* TODO: adapt for more than 2 polynomials and two algebraic polynomials */
1773 double
cado_poly_fprintf_with_info_and_MurphyE(FILE * fp,cado_poly_ptr poly,double MurphyE,double bound_f,double bound_g,double area,const char * prefix)1774 cado_poly_fprintf_with_info_and_MurphyE (FILE *fp, cado_poly_ptr poly,
1775                                          double MurphyE, double bound_f,
1776                                          double bound_g, double area,
1777                                          const char *prefix)
1778 {
1779   double exp_E;
1780   exp_E = cado_poly_fprintf_with_info (fp, poly, prefix, 1);
1781   cado_poly_fprintf_MurphyE (fp, MurphyE, bound_f, bound_g, area, prefix);
1782   return exp_E;
1783 }
1784 
1785 static double
expected_alpha(double S)1786 expected_alpha (double S)
1787 {
1788   double logS, t;
1789 
1790   if (S <= 1.0)
1791     return 0.0;
1792 
1793   logS = log (S);
1794   t = sqrt (2 * logS);
1795   return -0.824 * (t - (log (logS) + 1.3766) / (2 * t));
1796 }
1797 
1798 /* compute largest interval kmin <= k <= kmax such that when we add k*x^i*g(x)
1799    to f(x), the lognorm does not exceed maxlognorm (with skewness s) */
1800 void
expected_growth(rotation_space * r,mpz_poly_srcptr f,mpz_poly_srcptr g,int i,double maxlognorm,double s)1801 expected_growth (rotation_space *r, mpz_poly_srcptr f, mpz_poly_srcptr g, int i,
1802                  double maxlognorm, double s)
1803 {
1804   mpz_t fi, fip1, kmin, kmax, k;
1805   double n2;
1806 
1807   mpz_init_set (fi, f->coeff[i]);
1808   mpz_init_set (fip1, f->coeff[i+1]);
1809   mpz_init (kmin);
1810   mpz_init (kmax);
1811   mpz_init (k);
1812 
1813   /* negative side */
1814   mpz_set_si (kmin, -1);
1815   for (;;)
1816     {
1817       mpz_set (f->coeff[i], fi);
1818       mpz_set (f->coeff[i+1], fip1);
1819       rotate_auxg_z (f->coeff, g->coeff[1], g->coeff[0], kmin, i);
1820       n2 = L2_lognorm (f, s);
1821       if (n2 > maxlognorm)
1822         break;
1823       mpz_mul_2exp (kmin, kmin, 1);
1824     }
1825   /* now kmin < k < kmin/2 */
1826   mpz_tdiv_q_2exp (kmax, kmin, 1);
1827   while (1)
1828     {
1829       mpz_add (k, kmin, kmax);
1830       mpz_div_2exp (k, k, 1);
1831       if (mpz_cmp (k, kmin) == 0 || mpz_cmp (k, kmax) == 0)
1832         break;
1833       mpz_set (f->coeff[i], fi);
1834       mpz_set (f->coeff[i+1], fip1);
1835       rotate_auxg_z (f->coeff, g->coeff[1], g->coeff[0], k, i);
1836       n2 = L2_lognorm (f, s);
1837       if (n2 > maxlognorm)
1838         mpz_set (kmin, k);
1839       else
1840         mpz_set (kmax, k);
1841     }
1842   r->kmin = mpz_get_d (kmax);
1843 
1844   /* positive side */
1845   mpz_set_ui (kmax, 1);
1846   for (;;)
1847     {
1848       mpz_set (f->coeff[i], fi);
1849       mpz_set (f->coeff[i+1], fip1);
1850       rotate_auxg_z (f->coeff, g->coeff[1], g->coeff[0], kmax, i);
1851       n2 = L2_lognorm (f, s);
1852       if (n2 > maxlognorm)
1853         break;
1854       mpz_mul_2exp (kmax, kmax, 1);
1855     }
1856   /* now kmax < k < kmax/2 */
1857   mpz_tdiv_q_2exp (kmin, kmax, 1);
1858   while (1)
1859     {
1860       mpz_add (k, kmin, kmax);
1861       mpz_div_2exp (k, k, 1);
1862       if (mpz_cmp (k, kmin) == 0 || mpz_cmp (k, kmax) == 0)
1863         break;
1864       mpz_set (f->coeff[i], fi);
1865       mpz_set (f->coeff[i+1], fip1);
1866       rotate_auxg_z (f->coeff, g->coeff[1], g->coeff[0], k, i);
1867       n2 = L2_lognorm (f, s);
1868       if (n2 > maxlognorm)
1869         mpz_set (kmax, k);
1870       else
1871         mpz_set (kmin, k);
1872     }
1873   r->kmax = mpz_get_d (kmin);
1874 
1875   /* reset f[i] and f[i+1] */
1876   mpz_set (f->coeff[i], fi);
1877   mpz_set (f->coeff[i+1], fip1);
1878 
1879   mpz_clear (fi);
1880   mpz_clear (fip1);
1881   mpz_clear (kmin);
1882   mpz_clear (kmax);
1883   mpz_clear (k);
1884 }
1885 
1886 /* for a given pair (f,g), tries to estimate the value of alpha one might
1887    expect from rotation (including the projective alpha) */
1888 double
expected_rotation_gain(mpz_poly_srcptr f,mpz_poly_srcptr g)1889 expected_rotation_gain (mpz_poly_srcptr f, mpz_poly_srcptr g)
1890 {
1891   double S = 1.0, s, incr = 0.0;
1892   rotation_space r;
1893   double proj_alpha = get_alpha_projective (f, ALPHA_BOUND_SMALL);
1894   double skew = L2_skewness (f, SKEWNESS_DEFAULT_PREC);
1895   double n = L2_lognorm (f, skew);
1896 
1897   for (int i = 0; 2 * i < f->deg; i++)
1898     {
1899       expected_growth (&r, f, g, i, n + NORM_MARGIN, skew);
1900       s = r.kmax - r.kmin + 1.0;
1901       S *= s;
1902       /* assume each non-zero rotation increases on average by NORM_MARGIN/2 */
1903       if (s >= 2.0)
1904         incr += NORM_MARGIN / 2.0;
1905     }
1906   return proj_alpha + expected_alpha (S) + incr;
1907 }
1908 
1909 int alpha_bound = ALPHA_BOUND; /* default value */
1910 
1911 void
set_alpha_bound(unsigned long bound)1912 set_alpha_bound (unsigned long bound)
1913 {
1914   alpha_bound = bound;
1915 }
1916 
1917 unsigned long
get_alpha_bound(void)1918 get_alpha_bound (void)
1919 {
1920   return alpha_bound;
1921 }
1922