1 /****************************************************************************
2 *
3 * cpoly.C René Hartung
4 * Laurent Bartholdi
5 *
6 * @(#)$id: fr_dll.c,v 1.18 2010/10/26 05:19:40 gap exp $
7 *
8 * Copyright (c) 2011, Laurent Bartholdi
9 * based on TOMS/417
10 *
11 ****************************************************************************
12 *
13 * template for root-finding of complex polynomial
14 *
15 ****************************************************************************/
16
17 // CAUCHY COMPUTES A LOWER BOUND ON THE MODULI OF THE ZEROS OF A
18 // POLYNOMIAL - PT IS THE MODULUS OF THE COEFFICIENTS.
19 //
cauchy(const int deg,xcomplex * P)20 static xreal cauchy(const int deg, xcomplex *P)
21 {
22 xreal x, xm, f, dx, df, *tmp = new xreal[deg+1];
23
24 for(int i = 0; i<=deg; i++){ tmp[i] = xabs(P[i]); };
25
26 // Compute upper estimate bound
27 x = xroot(tmp[deg],deg) / xroot(tmp[0],deg);
28 if(tmp[deg - 1] != 0.0) {
29 // Newton step at the origin is better, use it
30 xm = tmp[deg] / tmp[deg-1];
31 if (xm < x) x = xm;
32 }
33
34 tmp[deg] = -tmp[deg];
35
36 // Chop the interval (0,x) until f < 0
37 while(1) {
38 xm = x * 0.1;
39 // Evaluate the polynomial <tmp> at <xm>
40 f = tmp[0];
41 for(int i = 1; i <= deg; i++)
42 f = f * xm + tmp[i];
43
44 if(f <= 0.0) break;
45 x = xm;
46 }
47 dx = x;
48
49 // Do Newton iteration until x converges to two decimal places
50 while(xabs(dx / x) > 0.005) {
51 f = tmp[0];
52 df = 0.0;
53 for(int i = 1; i <= deg; i++){
54 df = df * x + f;
55 f = f * x + tmp[i];
56 }
57
58 dx = f / df;
59 x -= dx; // Newton step
60 }
61
62 delete[] tmp;
63
64 return x;
65 }
66
67 // RETURNS A SCALE FACTOR TO MULTIPLY THE COEFFICIENTS OF THE POLYNOMIAL.
68 // THE SCALING IS DONE TO AVOID OVERFLOW AND TO AVOID UNDETECTED UNDERFLOW
69 // INTERFERING WITH THE CONVERGENCE CRITERION. THE FACTOR IS A POWER OF THE
70 //int BASE.
71 // PT - MODULUS OF COEFFICIENTS OF P
72 // ETA, INFIN, SMALNO, BASE - CONSTANTS DESCRIBING THE FLOATING POINT ARITHMETIC.
73 //
scale(const int deg,xcomplex * P)74 static void scale(const int deg, xcomplex* P)
75 {
76 int hi, lo, max, min, x, sc;
77
78 // Find largest and smallest moduli of coefficients
79 hi = (int)(xMAX_EXP / 2.0);
80 lo = (int)(xMIN_EXP - xbits(P[0]));
81 max = xlogb(P[0]); // leading coefficient does not vanish!
82 min = xlogb(P[0]);
83
84 for(int i = 0; i <= deg; i++) {
85 if (P[i] != 0){
86 x = xlogb(P[i]);
87 if(x > max) max = x;
88 if(x < min) min = x;
89 }
90 }
91
92 // Scale only if there are very large or very small components
93 if(min >= lo && max <= hi) return;
94
95 x = lo - min;
96 if(x <= 0)
97 sc = -(max+min) / 2;
98 else {
99 sc = x;
100 if(xMAX_EXP - sc > max) sc = 0;
101 }
102
103 // Scale the polynomial
104 for(int i = 0; i<= deg; i++){ xscalbln(&P[i],sc); }
105 }
106
107 // COMPUTES THE DERIVATIVE POLYNOMIAL AS THE INITIAL H
108 // POLYNOMIAL AND COMPUTES L1 NO-SHIFT H POLYNOMIALS.
109 //
noshft(const int l1,int deg,xcomplex * P,xcomplex * H)110 static void noshft(const int l1, int deg, xcomplex *P, xcomplex *H)
111 {
112 int i, j, jj;
113 xcomplex t;
114
115 // compute the first H-polynomial as the (normed) derivative of P
116 for(i = 0; i < deg; i++)
117 H[i] = (P[i] * (deg-i)) / deg;
118
119 for(jj = 1; jj <= l1; jj++) {
120 if(xnorm(H[deg - 1]) > xeta(P[deg-1])*xeta(P[deg-1])* 10*10 * xnorm(P[deg - 1])) {
121 t = -P[deg] / H[deg-1];
122 for(i = 0; i < deg-1; i++){
123 j = deg - i - 1;
124 H[j] = t * H[j-1] + P[j];
125 }
126 H[0] = P[0];
127 } else {
128 // if the constant term is essentially zero, shift H coefficients
129 for(i = 0; i < deg-1; i++) {
130 j = deg - i - 1;
131 H[j] = H[j - 1];
132 }
133 H[0] = 0;
134 }
135 }
136 }
137
138 // EVALUATES A POLYNOMIAL P AT S BY THE HORNER RECURRENCE
139 // PLACING THE PARTIAL SUMS IN Q AND THE COMPUTED VALUE IN PV.
140 //
polyev(const int deg,const xcomplex & s,const xcomplex * Q,xcomplex * q)141 static xcomplex polyev(const int deg, const xcomplex &s, const xcomplex *Q, xcomplex *q) {
142 q[0] = Q[0];
143 for(int i = 1; i <= deg; i++){ q[i] = q[i-1] * s + Q[i]; };
144 return q[deg];
145 }
146
147 // COMPUTES T = -P(S)/H(S).
148 // BOOL - LOGICAL, SET TRUE IF H(S) IS ESSENTIALLY ZERO.
149 //
calct(bool * bol,int deg,xcomplex & Ps,xcomplex * H,xcomplex * h,xcomplex & s)150 static xcomplex calct(bool *bol, int deg, xcomplex &Ps, xcomplex *H, xcomplex *h, xcomplex &s){
151 xcomplex Hs = polyev(deg-1, s, H, h);
152 *bol = xnorm(Hs) <= xeta(H[deg-1])*xeta(H[deg-1]) * 10*10 * xnorm(H[deg-1]);
153 if(!*bol)
154 return -Ps / Hs;
155 else
156 return xcomplex(0);
157 }
158
159 // CALCULATES THE NEXT SHIFTED H POLYNOMIAL.
160 // BOOL - LOGICAL, IF .TRUE. H(S) IS ESSENTIALLY ZERO
161 //
nexth(const bool bol,int deg,xcomplex & t,xcomplex * H,xcomplex * h,xcomplex * p)162 static void nexth(const bool bol, int deg, xcomplex &t, xcomplex *H, xcomplex *h, xcomplex *p){
163 if(!bol){
164 for(int j = 1; j < deg; j++)
165 H[j] = t * h[j-1] + p[j];
166 H[0] = p[0];
167 }
168 else {
169 // If h[s] is zero replace H with qh
170 for(int j = 1; j < deg; j++)
171 H[j] = h[j - 1];
172 h[0] = 0;
173 }
174 }
175
176 // BOUNDS THE ERROR IN EVALUATING THE POLYNOMIAL BY THE HORNER RECURRENCE.
177 // QR,QI - THE PARTIAL SUMS
178 // MS -MODULUS OF THE POINT
179 // MP -MODULUS OF POLYNOMIAL VALUE
180 // ARE, MRE -ERROR BOUNDS ON COMPLEX ADDITION AND MULTIPLICATION
181 //
errev(const int deg,const xcomplex * p,const xreal ms,const xreal & mp)182 static xreal errev(const int deg, const xcomplex *p, const xreal ms, const xreal &mp){
183 xreal MRE = 2.0 * sqrt(2.0) * xeta(p[0]);
184 xreal e = xabs(p[0]) * MRE / (xeta(p[0]) + MRE);
185
186 for(int i = 0; i <= deg; i++)
187 e = e * ms + xabs(p[i]);
188
189 return e * (xeta(p[0]) + MRE) - MRE * mp;
190 }
191
192 // CARRIES OUT THE THIRD STAGE ITERATION.
193 // L3 - LIMIT OF STEPS IN STAGE 3.
194 // ZR,ZI - ON ENTRY CONTAINS THE INITIAL ITERATE, IF THE
195 // ITERATION CONVERGES IT CONTAINS THE FINAL ITERATE ON EXIT.
196 // CONV - .TRUE. IF ITERATION CONVERGES
197 //
vrshft(const int l3,int deg,xcomplex * P,xcomplex * p,xcomplex * H,xcomplex * h,xcomplex * zero,xcomplex * s)198 static bool vrshft(const int l3, int deg, xcomplex *P, xcomplex *p, xcomplex *H, xcomplex *h, xcomplex *zero, xcomplex *s){
199 bool bol, conv, b;
200 int i, j;
201 xcomplex Ps, t;
202 xreal mp, ms, omp = 0.0, relstp = 0.0, tp;
203
204 conv = b = false;
205 *s = *zero;
206
207 // Main loop for stage three
208 for(i = 1; i <= l3; i++) {
209 // Evaluate P at S and test for convergence
210 Ps = polyev(deg, *s, P, p);
211 mp = xabs(Ps);
212 ms = xabs(*s);
213
214 xreal zz = 20.0 * errev(deg, p, ms, mp);
215
216 if (mp <= zz) {
217 conv = true;
218 }
219
220 if(mp <= 20.0 * errev(deg, p, ms, mp)) {
221 // Polynomial value is smaller in value than a bound on the error
222 // in evaluating P, terminate the iteration
223 conv = true;
224 *zero = *s;
225 return conv;
226 }
227
228 if(i != 1) {
229 if(!(b || mp < omp || relstp >= 0.05)){
230 // if(!(b || xlogb(mp) < omp || real(relstp) >= 0.05)){
231 // Iteration has stalled. Probably a cluster of zeros. Do 5 fixed
232 // shift steps into the cluster to force one zero to dominate
233 tp = relstp;
234 b = true;
235 if(relstp < xeta(P[0])) tp = xeta(P[0]);
236
237 tp = xsqrt(tp);
238 *s *= xcomplex(1.0+tp,tp);
239
240 Ps = polyev(deg, *s, P, p);
241 for(j = 1; j <= 5; j++){
242 t = calct(&bol, deg, Ps, H, h, *s);
243 nexth(bol, deg, t, H, h, p);
244 }
245 omp = xINFIN;
246 goto _20;
247 }
248
249 // Exit if polynomial value increase significantly
250 if(mp * 0.1 > omp) return conv;
251 }
252
253 omp = mp;
254
255 // Calculate next iterate
256 _20: t = calct(&bol, deg, Ps, H, h, *s);
257 nexth(bol, deg, t, H, h, p);
258 t = calct(&bol, deg, Ps, H, h, *s);
259 if(!bol) {
260 relstp = xabs(t) / xabs(*s);
261 *s += t;
262 }
263 } // end for
264
265 return conv;
266 }
267
268 // COMPUTES L2 FIXED-SHIFT H POLYNOMIALS AND TESTS FOR CONVERGENCE.
269 // INITIATES A VARIABLE-SHIFT ITERATION AND RETURNS WITH THE
270 // APPROXIMATE ZERO IF SUCCESSFUL.
271 // L2 - LIMIT OF FIXED SHIFT STEPS
272 // ZR,ZI - APPROXIMATE ZERO IF CONV IS .TRUE.
273 // CONV - LOGICAL INDICATING CONVERGENCE OF STAGE 3 ITERATION
274 //
fxshft(const int l2,int deg,xcomplex * P,xcomplex * p,xcomplex * H,xcomplex * h,xcomplex * zero,xcomplex * s)275 static bool fxshft(const int l2, int deg, xcomplex *P, xcomplex *p, xcomplex *H, xcomplex *h, xcomplex *zero, xcomplex *s){
276 bool bol, conv; // boolean for convergence of stage 2
277 bool test, pasd;
278 xcomplex old_T, old_S, Ps, t;
279 xcomplex *tmp = new xcomplex[deg+1];
280
281 Ps = polyev(deg, *s, P, p);
282 test = true;
283 pasd = false;
284
285 // Calculate first T = -P(S)/H(S)
286 t = calct(&bol, deg, Ps, H, h, *s);
287
288 // Main loop for second stage
289 for(int j = 1; j <= l2; j++){
290 old_T = t;
291
292 // Compute the next H Polynomial and new t
293 nexth(bol, deg, t, H, h, p);
294 t = calct(&bol, deg, Ps, H, h, *s);
295 *zero = *s + t;
296
297 // Test for convergence unless stage 3 has failed once or this
298 // is the last H Polynomial
299 if(!(bol || !test || j == l2)){
300 if(xabs(t - old_T) < 0.5 * xabs(*zero)) {
301 if(pasd) {
302 // The weak convergence test has been passwed twice, start the third stage
303 // Iteration, after saving the current H polynomial and shift
304 for(int i = 0; i < deg; i++)
305 tmp[i] = H[i];
306 old_S = *s;
307
308 conv = vrshft(10, deg, P, p, H, h, zero, s);
309 if(conv) goto done;
310
311 //The iteration failed to converge. Turn off testing and restore h,s,pv and T
312 test = false;
313 for(int i = 0; i < deg; i++)
314 H[i] = tmp[i];
315 *s = old_S;
316
317 Ps = polyev(deg, *s, P, p);
318 t = calct(&bol, deg, Ps, H, h, *s);
319 continue;
320 }
321 pasd = true;
322 }
323 else
324 pasd = false;
325 }
326 }
327
328 // Attempt an iteration with final H polynomial from second stage
329 conv = vrshft(10, deg, P, p, H, h, zero, s);
330
331 done:
332 delete[] tmp;
333
334 return conv;
335 }
336
337 // Main function
338 //
cpoly(int degree,const xcomplex * poly,xcomplex * Roots,int prec)339 extern "C" int cpoly(int degree, const xcomplex *poly, xcomplex *Roots, int prec)
340 {
341 default_prec = prec; // ugly!
342 // __asm__("int3");
343
344 xcomplex PhiDiff(-0.069756473,0.99756405);
345 xcomplex PhiRand(1.0/sqrt(2.0),-1.0/sqrt(2.0));
346 unsigned int conv = 0;
347
348 while(poly[0] == 0) {
349 poly++;
350 degree--;
351 if (degree < 0)
352 return -1;
353 };
354
355 xcomplex *P = new xcomplex[degree+1], *p = new xcomplex[degree+1],
356 *H = new xcomplex[degree+1], *h = new xcomplex[degree+1],
357 zero, s, bnd;
358
359 int deg = degree;
360
361 // Remove the zeros at the origin if any
362 while(poly[deg] == 0){
363 Roots[degree - deg] = 0;
364 deg--;
365 }
366
367 if (deg == 0) goto done;
368
369 // Make a copy of the coefficients
370 for(int i = 0; i <= deg; i++) { P[i] = poly[i]; }
371
372 scale(deg, P);
373
374 search:
375
376 if(deg <= 1){
377 Roots[degree-1] = - P[1] / P[0];
378 deg = 0;
379 goto done;
380 }
381
382 // compute a bound of the moduli of the roots (Newton-Raphson)
383 bnd = cauchy(deg, P);
384
385 // Outer loop to control 2 Major passes with different sequences of shifts
386 for(int cnt1 = 1; cnt1 <= 2; cnt1++) {
387 // First stage calculation , no shift
388 noshft(5, deg, P, H);
389
390 // Inner loop to select a shift
391 for(int cnt2 = 1; cnt2 <= 9; cnt2++) {
392 // Shift is chosen with modulus bnd and amplitude rotated by 94 degree from the previous shif
393 PhiRand = PhiDiff * PhiRand;
394 s = bnd * PhiRand;
395
396 // Second stage calculation, fixed shift
397 conv = fxshft(10 * cnt2, deg, P, p, H, h, &zero, &s);
398 if(conv) {
399 // The second stage jumps directly to the third stage iteration
400 // If successful the zero is stored and the polynomial deflated
401 Roots[degree - deg] = zero;
402
403 // continue with the remaining polynomial
404 deg--;
405 for(int i = 0; i <= deg; i++){ P[i] = p[i]; };
406 goto search;
407 }
408 // if the iteration is unsuccessful another shift is chosen
409 }
410
411 // if 9 shifts fail, the outer loop is repeated with another sequence of shifts
412 }
413
414 // The zerofinder has failed on two major passes
415 // return empty handed with the number of roots found (less than the original degree)
416 done:
417 delete[] p;
418 delete[] P;
419 delete[] h;
420 delete[] H;
421 return degree - deg;
422 }
423