1 /****************************************************************************
2  *
3  * cpoly.C                                                       René Hartung
4  *                                                          Laurent Bartholdi
5  *
6  *   @(#)$id: fr_dll.c,v 1.18 2010/10/26 05:19:40 gap exp $
7  *
8  * Copyright (c) 2011, Laurent Bartholdi
9  * based on TOMS/417
10  *
11  ****************************************************************************
12  *
13  * template for root-finding of complex polynomial
14  *
15  ****************************************************************************/
16 
17 // CAUCHY COMPUTES A LOWER BOUND ON THE MODULI OF THE ZEROS OF A
18 // POLYNOMIAL - PT IS THE MODULUS OF THE COEFFICIENTS.
19 //
cauchy(const int deg,xcomplex * P)20 static xreal cauchy(const int deg, xcomplex *P)
21 {
22   xreal x, xm, f, dx, df, *tmp = new xreal[deg+1];
23 
24   for(int i = 0; i<=deg; i++){ tmp[i] = xabs(P[i]); };
25 
26   // Compute upper estimate bound
27   x = xroot(tmp[deg],deg) / xroot(tmp[0],deg);
28   if(tmp[deg - 1] != 0.0) {
29     // Newton step at the origin is better, use it
30     xm = tmp[deg] / tmp[deg-1];
31     if (xm < x) x = xm;
32   }
33 
34   tmp[deg] = -tmp[deg];
35 
36   // Chop the interval (0,x) until f < 0
37   while(1) {
38     xm = x * 0.1;
39     // Evaluate the polynomial <tmp> at <xm>
40     f = tmp[0];
41     for(int i = 1; i <= deg; i++)
42       f = f * xm + tmp[i];
43 
44     if(f <= 0.0) break;
45     x = xm;
46   }
47   dx = x;
48 
49    // Do Newton iteration until x converges to two decimal places
50   while(xabs(dx / x) > 0.005) {
51     f  = tmp[0];
52     df = 0.0;
53     for(int i = 1; i <= deg; i++){
54       df = df * x + f;
55       f  =  f * x + tmp[i];
56     }
57 
58     dx = f / df;
59     x -= dx;				// Newton step
60   }
61 
62   delete[] tmp;
63 
64   return x;
65 }
66 
67 // RETURNS A SCALE FACTOR TO MULTIPLY THE COEFFICIENTS OF THE POLYNOMIAL.
68 // THE SCALING IS DONE TO AVOID OVERFLOW AND TO AVOID UNDETECTED UNDERFLOW
69 // INTERFERING WITH THE CONVERGENCE CRITERION.  THE FACTOR IS A POWER OF THE
70 //int BASE.
71 // PT - MODULUS OF COEFFICIENTS OF P
72 // ETA, INFIN, SMALNO, BASE - CONSTANTS DESCRIBING THE FLOATING POINT ARITHMETIC.
73 //
scale(const int deg,xcomplex * P)74 static void scale(const int deg, xcomplex* P)
75 {
76    int hi, lo, max, min, x, sc;
77 
78    // Find largest and smallest moduli of coefficients
79    hi = (int)(xMAX_EXP / 2.0);
80    lo = (int)(xMIN_EXP - xbits(P[0]));
81    max = xlogb(P[0]); // leading coefficient does not vanish!
82    min = xlogb(P[0]);
83 
84    for(int i = 0; i <= deg; i++) {
85       if (P[i] != 0){
86         x = xlogb(P[i]);
87         if(x > max) max = x;
88         if(x < min) min = x;
89       }
90     }
91 
92    // Scale only if there are very large or very small components
93    if(min >= lo && max <= hi) return;
94 
95    x = lo - min;
96    if(x <= 0)
97       sc = -(max+min) / 2;
98    else {
99       sc = x;
100       if(xMAX_EXP - sc > max) sc = 0;
101       }
102 
103    // Scale the polynomial
104    for(int i = 0; i<= deg; i++){ xscalbln(&P[i],sc); }
105 }
106 
107 // COMPUTES  THE DERIVATIVE  POLYNOMIAL AS THE INITIAL H
108 // POLYNOMIAL AND COMPUTES L1 NO-SHIFT H POLYNOMIALS.
109 //
noshft(const int l1,int deg,xcomplex * P,xcomplex * H)110 static void noshft(const int l1, int deg, xcomplex *P, xcomplex *H)
111 {
112   int i, j, jj;
113   xcomplex t;
114 
115   // compute the first H-polynomial as the (normed) derivative of P
116   for(i = 0; i < deg; i++)
117     H[i] = (P[i] * (deg-i)) / deg;
118 
119   for(jj = 1; jj <= l1; jj++) {
120     if(xnorm(H[deg - 1]) > xeta(P[deg-1])*xeta(P[deg-1])* 10*10 * xnorm(P[deg - 1])) {
121       t = -P[deg] / H[deg-1];
122       for(i = 0; i < deg-1; i++){
123 	j = deg - i - 1;
124 	H[j] = t * H[j-1] + P[j];
125       }
126       H[0] = P[0];
127     } else {
128       // if the constant term is essentially zero, shift H coefficients
129       for(i = 0; i < deg-1; i++) {
130 	j = deg - i - 1;
131 	H[j] = H[j - 1];
132       }
133       H[0] = 0;
134     }
135   }
136 }
137 
138 // EVALUATES A POLYNOMIAL  P  AT  S  BY THE HORNER RECURRENCE
139 // PLACING THE PARTIAL SUMS IN Q AND THE COMPUTED VALUE IN PV.
140 //
polyev(const int deg,const xcomplex & s,const xcomplex * Q,xcomplex * q)141 static xcomplex polyev(const int deg, const xcomplex &s, const xcomplex *Q, xcomplex *q) {
142    q[0] = Q[0];
143    for(int i = 1; i <= deg; i++){ q[i] = q[i-1] * s + Q[i]; };
144    return q[deg];
145 }
146 
147 // COMPUTES  T = -P(S)/H(S).
148 // BOOL   - LOGICAL, SET TRUE IF H(S) IS ESSENTIALLY ZERO.
149 //
calct(bool * bol,int deg,xcomplex & Ps,xcomplex * H,xcomplex * h,xcomplex & s)150 static xcomplex calct(bool *bol, int deg, xcomplex &Ps, xcomplex *H, xcomplex *h, xcomplex &s){
151   xcomplex Hs = polyev(deg-1, s, H, h);
152   *bol = xnorm(Hs) <= xeta(H[deg-1])*xeta(H[deg-1]) * 10*10 * xnorm(H[deg-1]);
153   if(!*bol)
154     return -Ps / Hs;
155   else
156     return xcomplex(0);
157 }
158 
159 // CALCULATES THE NEXT SHIFTED H POLYNOMIAL.
160 // BOOL   -  LOGICAL, IF .TRUE. H(S) IS ESSENTIALLY ZERO
161 //
nexth(const bool bol,int deg,xcomplex & t,xcomplex * H,xcomplex * h,xcomplex * p)162 static void nexth(const bool bol, int deg, xcomplex &t, xcomplex *H, xcomplex *h, xcomplex *p){
163    if(!bol){
164       for(int j = 1; j < deg; j++)
165          H[j] = t * h[j-1] + p[j];
166       H[0] = p[0];
167       }
168    else {
169      // If h[s] is zero replace H with qh
170      for(int j = 1; j < deg; j++)
171         H[j] = h[j - 1];
172      h[0] = 0;
173    }
174 }
175 
176 // BOUNDS THE ERROR IN EVALUATING THE POLYNOMIAL BY THE HORNER RECURRENCE.
177 // QR,QI - THE PARTIAL SUMS
178 // MS    -MODULUS OF THE POINT
179 // MP    -MODULUS OF POLYNOMIAL VALUE
180 // ARE, MRE -ERROR BOUNDS ON COMPLEX ADDITION AND MULTIPLICATION
181 //
errev(const int deg,const xcomplex * p,const xreal ms,const xreal & mp)182 static xreal errev(const int deg, const xcomplex *p, const xreal ms, const xreal &mp){
183    xreal MRE = 2.0 * sqrt(2.0) * xeta(p[0]);
184    xreal e =  xabs(p[0]) * MRE / (xeta(p[0]) + MRE);
185 
186    for(int i = 0; i <= deg; i++)
187       e = e * ms + xabs(p[i]);
188 
189    return e * (xeta(p[0]) + MRE) - MRE * mp;
190 }
191 
192 // CARRIES OUT THE THIRD STAGE ITERATION.
193 // L3 - LIMIT OF STEPS IN STAGE 3.
194 // ZR,ZI   - ON ENTRY CONTAINS THE INITIAL ITERATE, IF THE
195 //           ITERATION CONVERGES IT CONTAINS THE FINAL ITERATE ON EXIT.
196 // CONV    -  .TRUE. IF ITERATION CONVERGES
197 //
vrshft(const int l3,int deg,xcomplex * P,xcomplex * p,xcomplex * H,xcomplex * h,xcomplex * zero,xcomplex * s)198 static bool vrshft(const int l3, int deg, xcomplex *P, xcomplex *p, xcomplex *H, xcomplex *h, xcomplex *zero, xcomplex *s){
199   bool bol, conv, b;
200   int i, j;
201   xcomplex Ps, t;
202   xreal mp, ms, omp = 0.0, relstp = 0.0, tp;
203 
204   conv = b = false;
205   *s = *zero;
206 
207   // Main loop for stage three
208   for(i = 1; i <= l3; i++) {
209     // Evaluate P at S and test for convergence
210     Ps = polyev(deg, *s, P, p);
211     mp = xabs(Ps);
212     ms = xabs(*s);
213 
214     xreal zz = 20.0 * errev(deg, p, ms, mp);
215 
216     if (mp <= zz) {
217       conv = true;
218     }
219 
220     if(mp <= 20.0 * errev(deg, p, ms, mp)) {
221       // Polynomial value is smaller in value than a bound on the error
222       // in evaluating P, terminate the iteration
223       conv = true;
224       *zero = *s;
225       return conv;
226     }
227 
228     if(i != 1) {
229       if(!(b || mp < omp || relstp >= 0.05)){
230 	//       if(!(b || xlogb(mp) < omp || real(relstp) >= 0.05)){
231 	// Iteration has stalled. Probably a cluster of zeros. Do 5 fixed
232 	// shift steps into the cluster to force one zero to dominate
233 	tp = relstp;
234 	b = true;
235 	if(relstp < xeta(P[0])) tp = xeta(P[0]);
236 
237 	tp = xsqrt(tp);
238 	*s *= xcomplex(1.0+tp,tp);
239 
240 	Ps = polyev(deg, *s, P, p);
241 	for(j = 1; j <= 5; j++){
242 	  t = calct(&bol, deg, Ps, H, h, *s);
243 	  nexth(bol, deg, t, H, h, p);
244 	}
245 	omp = xINFIN;
246 	goto _20;
247       }
248 
249       // Exit if polynomial value increase significantly
250       if(mp * 0.1 > omp) return conv;
251     }
252 
253     omp = mp;
254 
255     // Calculate next iterate
256   _20:  t = calct(&bol, deg, Ps, H, h, *s);
257     nexth(bol, deg, t, H, h, p);
258     t = calct(&bol, deg, Ps, H, h, *s);
259     if(!bol) {
260       relstp = xabs(t) / xabs(*s);
261       *s += t;
262     }
263   } // end for
264 
265   return conv;
266 }
267 
268 // COMPUTES L2 FIXED-SHIFT H POLYNOMIALS AND TESTS FOR CONVERGENCE.
269 // INITIATES A VARIABLE-SHIFT ITERATION AND RETURNS WITH THE
270 // APPROXIMATE ZERO IF SUCCESSFUL.
271 // L2 - LIMIT OF FIXED SHIFT STEPS
272 // ZR,ZI - APPROXIMATE ZERO IF CONV IS .TRUE.
273 // CONV  - LOGICAL INDICATING CONVERGENCE OF STAGE 3 ITERATION
274 //
fxshft(const int l2,int deg,xcomplex * P,xcomplex * p,xcomplex * H,xcomplex * h,xcomplex * zero,xcomplex * s)275 static bool fxshft(const int l2, int deg, xcomplex *P, xcomplex *p, xcomplex *H, xcomplex *h, xcomplex *zero, xcomplex *s){
276    bool bol, conv;	 	       // boolean for convergence of stage 2
277    bool test, pasd;
278    xcomplex old_T, old_S, Ps, t;
279    xcomplex *tmp = new xcomplex[deg+1];
280 
281    Ps = polyev(deg, *s, P, p);
282    test = true;
283    pasd = false;
284 
285    // Calculate first T = -P(S)/H(S)
286    t = calct(&bol, deg, Ps, H, h, *s);
287 
288    // Main loop for second stage
289    for(int j = 1; j <= l2; j++){
290       old_T = t;
291 
292       // Compute the next H Polynomial and new t
293       nexth(bol, deg, t, H, h, p);
294       t = calct(&bol, deg, Ps, H, h, *s);
295       *zero = *s + t;
296 
297       // Test for convergence unless stage 3 has failed once or this
298       // is the last H Polynomial
299       if(!(bol || !test || j == l2)){
300          if(xabs(t - old_T) < 0.5 * xabs(*zero)) {
301             if(pasd) {
302                // The weak convergence test has been passwed twice, start the third stage
303                // Iteration, after saving the current H polynomial and shift
304                for(int i = 0; i < deg; i++)
305                   tmp[i] = H[i];
306                old_S = *s;
307 
308                conv = vrshft(10, deg, P, p, H, h, zero, s);
309                if(conv) goto done;
310 
311                //The iteration failed to converge. Turn off testing and restore h,s,pv and T
312                test = false;
313                for(int i = 0; i < deg; i++)
314                   H[i] = tmp[i];
315                *s = old_S;
316 
317                Ps = polyev(deg, *s, P, p);
318                t = calct(&bol, deg, Ps, H, h, *s);
319                continue;
320                }
321             pasd = true;
322             }
323          else
324             pasd = false;
325       }
326    }
327 
328    // Attempt an iteration with final H polynomial from second stage
329    conv = vrshft(10, deg, P, p, H, h, zero, s);
330 
331  done:
332    delete[] tmp;
333 
334    return conv;
335 }
336 
337 // Main function
338 //
cpoly(int degree,const xcomplex * poly,xcomplex * Roots,int prec)339 extern "C" int cpoly(int degree, const xcomplex *poly, xcomplex *Roots, int prec)
340 {
341   default_prec = prec; // ugly!
342   // __asm__("int3");
343 
344   xcomplex PhiDiff(-0.069756473,0.99756405);
345   xcomplex PhiRand(1.0/sqrt(2.0),-1.0/sqrt(2.0));
346   unsigned int conv = 0;
347 
348   while(poly[0] == 0) {
349     poly++;
350     degree--;
351     if (degree < 0)
352       return -1;
353   };
354 
355   xcomplex *P = new xcomplex[degree+1], *p = new xcomplex[degree+1],
356     *H = new xcomplex[degree+1], *h = new xcomplex[degree+1],
357     zero, s, bnd;
358 
359   int deg = degree;
360 
361   // Remove the zeros at the origin if any
362   while(poly[deg] == 0){
363     Roots[degree - deg] = 0;
364     deg--;
365   }
366 
367   if (deg == 0) goto done;
368 
369   // Make a copy of the coefficients
370   for(int i = 0; i <= deg; i++) { P[i] = poly[i]; }
371 
372   scale(deg, P);
373 
374  search:
375 
376   if(deg <= 1){
377     Roots[degree-1] = - P[1] / P[0];
378     deg = 0;
379     goto done;
380   }
381 
382   // compute a bound of the moduli of the roots (Newton-Raphson)
383   bnd = cauchy(deg, P);
384 
385   // Outer loop to control 2 Major passes with different sequences of shifts
386   for(int cnt1 = 1; cnt1 <= 2; cnt1++) {
387     // First stage  calculation , no shift
388     noshft(5, deg, P, H);
389 
390     // Inner loop to select a shift
391     for(int cnt2 = 1; cnt2 <= 9; cnt2++) {
392       // Shift is chosen with modulus bnd and amplitude rotated by 94 degree from the previous shif
393       PhiRand = PhiDiff * PhiRand;
394       s = bnd * PhiRand;
395 
396       // Second stage calculation, fixed shift
397       conv = fxshft(10 * cnt2, deg, P, p, H, h, &zero, &s);
398       if(conv) {
399 	// The second stage jumps directly to the third stage iteration
400 	// If successful the zero is stored and the polynomial deflated
401 	Roots[degree - deg] = zero;
402 
403 	// continue with the remaining polynomial
404 	deg--;
405 	for(int i = 0; i <= deg; i++){ P[i] = p[i]; };
406 	goto search;
407       }
408       // if the iteration is unsuccessful another shift is chosen
409     }
410 
411     // if 9 shifts fail, the outer loop is repeated with another sequence of shifts
412   }
413 
414   // The zerofinder has failed on two major passes
415   // return empty handed with the number of roots found (less than the original degree)
416  done:
417   delete[] p;
418   delete[] P;
419   delete[] h;
420   delete[] H;
421   return degree - deg;
422 }
423