float-0.9.1/src/cpoly.C

/****************************************************************************
 *
 * cpoly.C                                                       René Hartung
 *                                                          Laurent Bartholdi
 *
 *   @(#)$id: fr_dll.c,v 1.18 2010/10/26 05:19:40 gap exp $
 *
 * Copyright (c) 2011, Laurent Bartholdi
 * based on TOMS/417
 *
 ****************************************************************************
 *
 * template for root-finding of complex polynomial
 *
 ****************************************************************************/

// CAUCHY COMPUTES A LOWER BOUND ON THE MODULI OF THE ZEROS OF A
// POLYNOMIAL - PT IS THE MODULUS OF THE COEFFICIENTS.
//
static xreal cauchy(const int deg, xcomplex *P)
{
  xreal x, xm, f, dx, df, *tmp = new xreal[deg+1];

  for(int i = 0; i<=deg; i++){ tmp[i] = xabs(P[i]); };

  // Compute upper estimate bound
  x = xroot(tmp[deg],deg) / xroot(tmp[0],deg);
  if(tmp[deg - 1] != 0.0) {
    // Newton step at the origin is better, use it
    xm = tmp[deg] / tmp[deg-1];
    if (xm < x) x = xm;
  }

  tmp[deg] = -tmp[deg];

  // Chop the interval (0,x) until f < 0
  while(1) {
    xm = x * 0.1;
    // Evaluate the polynomial <tmp> at <xm>
    f = tmp[0];
    for(int i = 1; i <= deg; i++)
      f = f * xm + tmp[i];

    if(f <= 0.0) break;
    x = xm;
  }
  dx = x;

   // Do Newton iteration until x converges to two decimal places
  while(xabs(dx / x) > 0.005) {
    f  = tmp[0];
    df = 0.0;
    for(int i = 1; i <= deg; i++){
      df = df * x + f;
      f  =  f * x + tmp[i];
    }

    dx = f / df;
    x -= dx;				// Newton step
  }

  delete[] tmp;

  return x;
}

// RETURNS A SCALE FACTOR TO MULTIPLY THE COEFFICIENTS OF THE POLYNOMIAL.
// THE SCALING IS DONE TO AVOID OVERFLOW AND TO AVOID UNDETECTED UNDERFLOW
// INTERFERING WITH THE CONVERGENCE CRITERION.  THE FACTOR IS A POWER OF THE
//int BASE.
// PT - MODULUS OF COEFFICIENTS OF P
// ETA, INFIN, SMALNO, BASE - CONSTANTS DESCRIBING THE FLOATING POINT ARITHMETIC.
//
static void scale(const int deg, xcomplex* P)
{
   int hi, lo, max, min, x, sc;

   // Find largest and smallest moduli of coefficients
   hi = (int)(xMAX_EXP / 2.0);
   lo = (int)(xMIN_EXP - xbits(P[0]));
   max = xlogb(P[0]); // leading coefficient does not vanish!
   min = xlogb(P[0]);

   for(int i = 0; i <= deg; i++) {
      if (P[i] != 0){
        x = xlogb(P[i]);
        if(x > max) max = x;
        if(x < min) min = x;
      }
    }

   // Scale only if there are very large or very small components
   if(min >= lo && max <= hi) return;

   x = lo - min;
   if(x <= 0)
      sc = -(max+min) / 2;
   else {
      sc = x;
      if(xMAX_EXP - sc > max) sc = 0;
      }

   // Scale the polynomial
   for(int i = 0; i<= deg; i++){ xscalbln(&P[i],sc); }
}

// COMPUTES  THE DERIVATIVE  POLYNOMIAL AS THE INITIAL H
// POLYNOMIAL AND COMPUTES L1 NO-SHIFT H POLYNOMIALS.
//
static void noshft(const int l1, int deg, xcomplex *P, xcomplex *H)
{
  int i, j, jj;
  xcomplex t;

  // compute the first H-polynomial as the (normed) derivative of P
  for(i = 0; i < deg; i++)
    H[i] = (P[i] * (deg-i)) / deg;

  for(jj = 1; jj <= l1; jj++) {
    if(xnorm(H[deg - 1]) > xeta(P[deg-1])*xeta(P[deg-1])* 10*10 * xnorm(P[deg - 1])) {
      t = -P[deg] / H[deg-1];
      for(i = 0; i < deg-1; i++){
	j = deg - i - 1;
	H[j] = t * H[j-1] + P[j];
      }
      H[0] = P[0];
    } else {
      // if the constant term is essentially zero, shift H coefficients
      for(i = 0; i < deg-1; i++) {
	j = deg - i - 1;
	H[j] = H[j - 1];
      }
      H[0] = 0;
    }
  }
}

// EVALUATES A POLYNOMIAL  P  AT  S  BY THE HORNER RECURRENCE
// PLACING THE PARTIAL SUMS IN Q AND THE COMPUTED VALUE IN PV.
//
static xcomplex polyev(const int deg, const xcomplex &s, const xcomplex *Q, xcomplex *q) {
   q[0] = Q[0];
   for(int i = 1; i <= deg; i++){ q[i] = q[i-1] * s + Q[i]; };
   return q[deg];
}

// COMPUTES  T = -P(S)/H(S).
// BOOL   - LOGICAL, SET TRUE IF H(S) IS ESSENTIALLY ZERO.
//
static xcomplex calct(bool *bol, int deg, xcomplex &Ps, xcomplex *H, xcomplex *h, xcomplex &s){
  xcomplex Hs = polyev(deg-1, s, H, h);
  *bol = xnorm(Hs) <= xeta(H[deg-1])*xeta(H[deg-1]) * 10*10 * xnorm(H[deg-1]);
  if(!*bol)
    return -Ps / Hs;
  else
    return xcomplex(0);
}

// CALCULATES THE NEXT SHIFTED H POLYNOMIAL.
// BOOL   -  LOGICAL, IF .TRUE. H(S) IS ESSENTIALLY ZERO
//
static void nexth(const bool bol, int deg, xcomplex &t, xcomplex *H, xcomplex *h, xcomplex *p){
   if(!bol){
      for(int j = 1; j < deg; j++)
         H[j] = t * h[j-1] + p[j];
      H[0] = p[0];
      }
   else {
     // If h[s] is zero replace H with qh
     for(int j = 1; j < deg; j++)
        H[j] = h[j - 1];
     h[0] = 0;
   }
}

// BOUNDS THE ERROR IN EVALUATING THE POLYNOMIAL BY THE HORNER RECURRENCE.
// QR,QI - THE PARTIAL SUMS
// MS    -MODULUS OF THE POINT
// MP    -MODULUS OF POLYNOMIAL VALUE
// ARE, MRE -ERROR BOUNDS ON COMPLEX ADDITION AND MULTIPLICATION
//
static xreal errev(const int deg, const xcomplex *p, const xreal ms, const xreal &mp){
   xreal MRE = 2.0 * sqrt(2.0) * xeta(p[0]);
   xreal e =  xabs(p[0]) * MRE / (xeta(p[0]) + MRE);

   for(int i = 0; i <= deg; i++)
      e = e * ms + xabs(p[i]);

   return e * (xeta(p[0]) + MRE) - MRE * mp;
}

// CARRIES OUT THE THIRD STAGE ITERATION.
// L3 - LIMIT OF STEPS IN STAGE 3.
// ZR,ZI   - ON ENTRY CONTAINS THE INITIAL ITERATE, IF THE
//           ITERATION CONVERGES IT CONTAINS THE FINAL ITERATE ON EXIT.
// CONV    -  .TRUE. IF ITERATION CONVERGES
//
static bool vrshft(const int l3, int deg, xcomplex *P, xcomplex *p, xcomplex *H, xcomplex *h, xcomplex *zero, xcomplex *s){
  bool bol, conv, b;
  int i, j;
  xcomplex Ps, t;
  xreal mp, ms, omp = 0.0, relstp = 0.0, tp;

  conv = b = false;
  *s = *zero;

  // Main loop for stage three
  for(i = 1; i <= l3; i++) {
    // Evaluate P at S and test for convergence
    Ps = polyev(deg, *s, P, p);
    mp = xabs(Ps);
    ms = xabs(*s);

    xreal zz = 20.0 * errev(deg, p, ms, mp);

    if (mp <= zz) {
      conv = true;
    }

    if(mp <= 20.0 * errev(deg, p, ms, mp)) {
      // Polynomial value is smaller in value than a bound on the error
      // in evaluating P, terminate the iteration
      conv = true;
      *zero = *s;
      return conv;
    }

    if(i != 1) {
      if(!(b || mp < omp || relstp >= 0.05)){
	//       if(!(b || xlogb(mp) < omp || real(relstp) >= 0.05)){
	// Iteration has stalled. Probably a cluster of zeros. Do 5 fixed
	// shift steps into the cluster to force one zero to dominate
	tp = relstp;
	b = true;
	if(relstp < xeta(P[0])) tp = xeta(P[0]);

	tp = xsqrt(tp);
	*s *= xcomplex(1.0+tp,tp);

	Ps = polyev(deg, *s, P, p);
	for(j = 1; j <= 5; j++){
	  t = calct(&bol, deg, Ps, H, h, *s);
	  nexth(bol, deg, t, H, h, p);
	}
	omp = xINFIN;
	goto _20;
      }

      // Exit if polynomial value increase significantly
      if(mp * 0.1 > omp) return conv;
    }

    omp = mp;

    // Calculate next iterate
  _20:  t = calct(&bol, deg, Ps, H, h, *s);
    nexth(bol, deg, t, H, h, p);
    t = calct(&bol, deg, Ps, H, h, *s);
    if(!bol) {
      relstp = xabs(t) / xabs(*s);
      *s += t;
    }
  } // end for

  return conv;
}

// COMPUTES L2 FIXED-SHIFT H POLYNOMIALS AND TESTS FOR CONVERGENCE.
// INITIATES A VARIABLE-SHIFT ITERATION AND RETURNS WITH THE
// APPROXIMATE ZERO IF SUCCESSFUL.
// L2 - LIMIT OF FIXED SHIFT STEPS
// ZR,ZI - APPROXIMATE ZERO IF CONV IS .TRUE.
// CONV  - LOGICAL INDICATING CONVERGENCE OF STAGE 3 ITERATION
//
static bool fxshft(const int l2, int deg, xcomplex *P, xcomplex *p, xcomplex *H, xcomplex *h, xcomplex *zero, xcomplex *s){
   bool bol, conv;	 	       // boolean for convergence of stage 2
   bool test, pasd;
   xcomplex old_T, old_S, Ps, t;
   xcomplex *tmp = new xcomplex[deg+1];

   Ps = polyev(deg, *s, P, p);
   test = true;
   pasd = false;

   // Calculate first T = -P(S)/H(S)
   t = calct(&bol, deg, Ps, H, h, *s);

   // Main loop for second stage
   for(int j = 1; j <= l2; j++){
      old_T = t;

      // Compute the next H Polynomial and new t
      nexth(bol, deg, t, H, h, p);
      t = calct(&bol, deg, Ps, H, h, *s);
      *zero = *s + t;

      // Test for convergence unless stage 3 has failed once or this
      // is the last H Polynomial
      if(!(bol || !test || j == l2)){
         if(xabs(t - old_T) < 0.5 * xabs(*zero)) {
            if(pasd) {
               // The weak convergence test has been passwed twice, start the third stage
               // Iteration, after saving the current H polynomial and shift
               for(int i = 0; i < deg; i++)
                  tmp[i] = H[i];
               old_S = *s;

               conv = vrshft(10, deg, P, p, H, h, zero, s);
               if(conv) goto done;

               //The iteration failed to converge. Turn off testing and restore h,s,pv and T
               test = false;
               for(int i = 0; i < deg; i++)
                  H[i] = tmp[i];
               *s = old_S;

               Ps = polyev(deg, *s, P, p);
               t = calct(&bol, deg, Ps, H, h, *s);
               continue;
               }
            pasd = true;
            }
         else
            pasd = false;
      }
   }

   // Attempt an iteration with final H polynomial from second stage
   conv = vrshft(10, deg, P, p, H, h, zero, s);

 done:
   delete[] tmp;

   return conv;
}

// Main function
//
extern "C" int cpoly(int degree, const xcomplex *poly, xcomplex *Roots, int prec)
{
  default_prec = prec; // ugly!
  // __asm__("int3");

  xcomplex PhiDiff(-0.069756473,0.99756405);
  xcomplex PhiRand(1.0/sqrt(2.0),-1.0/sqrt(2.0));
  unsigned int conv = 0;

  while(poly[0] == 0) {
    poly++;
    degree--;
    if (degree < 0)
      return -1;
  };

  xcomplex *P = new xcomplex[degree+1], *p = new xcomplex[degree+1],
    *H = new xcomplex[degree+1], *h = new xcomplex[degree+1],
    zero, s, bnd;

  int deg = degree;

  // Remove the zeros at the origin if any
  while(poly[deg] == 0){
    Roots[degree - deg] = 0;
    deg--;
  }

  if (deg == 0) goto done;

  // Make a copy of the coefficients
  for(int i = 0; i <= deg; i++) { P[i] = poly[i]; }

  scale(deg, P);

 search:

  if(deg <= 1){
    Roots[degree-1] = - P[1] / P[0];
    deg = 0;
    goto done;
  }

  // compute a bound of the moduli of the roots (Newton-Raphson)
  bnd = cauchy(deg, P);

  // Outer loop to control 2 Major passes with different sequences of shifts
  for(int cnt1 = 1; cnt1 <= 2; cnt1++) {
    // First stage  calculation , no shift
    noshft(5, deg, P, H);

    // Inner loop to select a shift
    for(int cnt2 = 1; cnt2 <= 9; cnt2++) {
      // Shift is chosen with modulus bnd and amplitude rotated by 94 degree from the previous shif
      PhiRand = PhiDiff * PhiRand;
      s = bnd * PhiRand;

      // Second stage calculation, fixed shift
      conv = fxshft(10 * cnt2, deg, P, p, H, h, &zero, &s);
      if(conv) {
	// The second stage jumps directly to the third stage iteration
	// If successful the zero is stored and the polynomial deflated
	Roots[degree - deg] = zero;

	// continue with the remaining polynomial
	deg--;
	for(int i = 0; i <= deg; i++){ P[i] = p[i]; };
	goto search;
      }
      // if the iteration is unsuccessful another shift is chosen
    }

    // if 9 shifts fail, the outer loop is repeated with another sequence of shifts
  }

  // The zerofinder has failed on two major passes
  // return empty handed with the number of roots found (less than the original degree)
 done:
  delete[] p;
  delete[] P;
  delete[] h;
  delete[] H;
  return degree - deg;
}