csl/cslbase/arith07.cpp

//  arith07.cpp                           Copyright (C) 1990-2021 Codemist

//
// Arithmetic functions.  negation plus a load of Common Lisp things
// for support of complex numbers.
//
//

/**************************************************************************
 * Copyright (C) 2021, Codemist.                         A C Norman       *
 *                                                                        *
 * Redistribution and use in source and binary forms, with or without     *
 * modification, are permitted provided that the following conditions are *
 * met:                                                                   *
 *                                                                        *
 *     * Redistributions of source code must retain the relevant          *
 *       copyright notice, this list of conditions and the following      *
 *       disclaimer.                                                      *
 *     * Redistributions in binary form must reproduce the above          *
 *       copyright notice, this list of conditions and the following      *
 *       disclaimer in the documentation and/or other materials provided  *
 *       with the distribution.                                           *
 *                                                                        *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    *
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      *
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS      *
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE         *
 * COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,   *
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,   *
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR  *
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF     *
 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH   *
 * DAMAGE.                                                                *
 *************************************************************************/


// $Id: arith07.cpp 5604 2021-01-20 22:22:10Z arthurcnorman $

#include "headers.h"


LispObject copyb(LispObject a)
//
// copy a bignum.
//
{   LispObject b;
    size_t len = bignum_length(a), i;
    Save save(a);
    b = get_basic_vector(TAG_NUMBERS, TYPE_BIGNUM, len);
    save.restore(a);
    len = (len-CELL)/4;
    for (i=0; i<len; i++)
        bignum_digits(b)[i] = vbignum_digits(a)[i];
    return b;
}

LispObject negateb(LispObject a)
//
// Negate a bignum.  Note that negating the 1-word bignum
// value of 0x08000000 will produce a fixnum as a result (for 32 bits),
// which might confuse the caller... in a similar way negating
// the value -0x40000000 will need to promote from a one-word
// bignum to a 2-word bignum.  How messy just for negation!
// And on 64 bit systems the same effect applies but with larger values!
// In an analogous manner negating the positive number of the style
// 0x40000000 can lead to a negative result that uses one less digit.
// Well on a 64-bit machine it is a 2-word bignum that can end up
// negated to get a fixnum result.
//
{   LispObject b;
    size_t len = bignum_length(a), i;
    int32_t carry;
// There are two messy special cases here. The first is that there is a
// positive value (2^27 or 2^59) which has to be represented as a bignum,
// but when you negate it you get a fixnum.
// Then there will be negative values (the smallest being -2^31 or -2^62)
// that fit in a certain number of words of bignum, but their absolute
// value needs one more word...
// Note that on a 64-bit machine there ought never to be any one-word
// bignums because all the values representable with just one 31-bit digit
// can be handled as fixnums instead.
    if (SIXTY_FOUR_BIT &&
        len == CELL+8)   // two-word bignum - do specially
    {   if (bignum_digits(a)[0] == 0 &&
            bignum_digits(a)[1] == (int32_t)0x10000000)
            return MOST_NEGATIVE_FIXNUM;
        else if (bignum_digits(a)[0] == 0 &&
                 (int32_t)bignum_digits(a)[1] == -(int32_t)(1<<30))
            return make_three_word_bignum(0, 1<<30, 0);
        uint32_t d0 = bignum_digits(a)[0];
        int32_t d1 = (int32_t)~bignum_digits(a)[1];
        if (d0 == 0) d1++;
        else return make_two_word_bignum(d1, (-d0) & 0x7fffffff);
    }
    if (!SIXTY_FOUR_BIT &&
        len == CELL+4)   // one-word bignum - do specially
    {   int32_t d0 = -(int32_t)bignum_digits(a)[0];
        if (d0 == MOST_NEGATIVE_FIXVAL) return MOST_NEGATIVE_FIXNUM;
        else if (d0 == 0x40000000) return make_two_word_bignum(0, d0);
        else return make_one_word_bignum(d0);
    }
    {   Save save(a);
        b = get_basic_vector(TAG_NUMBERS, TYPE_BIGNUM, len);
        errexit();
        save.restore(a);
    }
    len = (len-CELL-4)/4;
    carry = -1;
    for (i=0; i<len; i++)
    {
// The next couple of lines really caught me out wrt compiler optimisation
// before I put in all the casts. I used to have what was in effect
//    carry = (signed_x ^ 0x7fffffff) + (int32_t)((uint32_t)carry>>31);
//      ... ((uint32_t)carry >> 31);
// and a compiler seems to have observed that the masking leaves the left
// operand of the addition positive, and that the unsigned shift right
// leaves the right operand positive too. So based on an assumption that
// signed integer overflow will not happen it deduces that the sum will also
// be positive, and hence that on the next line (carry>>31) will be zero.
// For the assumption to fail there will have had to be integer overflow, and
// the C/C++ standards say that the consequence of that are undefined - a term
// that can include behaviour as per the optimised code here.
//
// To avoid that I am working on the basis that casts between int32_t and
// uint32_t will leave bit representations unchanged and that arithmetic uses
// twos complement for signed values. Then by casting to unsigned at times
// I can allow a carry to propagate into the top bit of a word without that
// counting as an overflow, and that should force the compiler to do the
// arithmetic in full.
//
// Having spotted this particular case I now worry about how many related
// ones there may be hiding in the code!
//
        carry = ADD32(clear_top_bit(~bignum_digits(a)[i]),
                      top_bit(carry));
        bignum_digits(b)[i] = clear_top_bit(carry);
    }
// Handle the top digit separately since it is signed.
    carry = ADD32(~bignum_digits(a)[i], top_bit(carry));
    if (!signed_overflow(carry))
    {
// If the most significant word ends up as -1 then I just might
// have 0x40000000 in the next word down and so I may need to shrink
// the number.  Since I handled 1-word bignums specially I have at
// least two words to deal with here.
        if (carry == -1 && (bignum_digits(b)[i-1] & 0x40000000) != 0)
        {   bignum_digits(b)[i-1] |= ~0x7fffffff;
            setnumhdr(b, numhdr(b) - pack_hdrlength(1));
            if (SIXTY_FOUR_BIT)
            {   if ((i & 1) != 0) bignum_digits(b)[i] = 0;
                else *reinterpret_cast<Header *>(&bignum_digits(b)[i]) = make_bighdr(
                                2);
            }
            else
            {   if ((i & 1) == 0) bignum_digits(b)[i] = 0;
                else *reinterpret_cast<Header *>(&bignum_digits(b)[i]) = make_bighdr(
                                2);
            }
        }
        else bignum_digits(b)[i] = carry;   // no shrinking needed
        return b;
    }
// Here I have overflow: this can only happen when I negate a number
// that started off with 0xc0000000 in the most significant digit,
// and I have to pad a zero word onto the front.
    bignum_digits(b)[i] = clear_top_bit(carry);
    return lengthen_by_one_bit(b, carry);
}

//
// generic negation
//

LispObject negate(LispObject a)
{   switch (static_cast<int>(a) & TAG_BITS)
    {   case TAG_FIXNUM:
            if (!SIXTY_FOUR_BIT && is_sfloat(a))
                return a ^ 0x80000000U;
            if (SIXTY_FOUR_BIT && is_sfloat(a))
                return a ^ UINT64_C(0x8000000000000000);
            else return make_lisp_integer64(-int_of_fixnum(a));
        case TAG_NUMBERS:
        {   int32_t ha = type_of_header(numhdr(a));
            switch (ha)
            {   case TYPE_BIGNUM:
                    return negateb(a);
                case TYPE_RATNUM:
                {   LispObject n = numerator(a),
                                   d = denominator(a);
                    Save save(d);
                    n = negate(n);
                    save.restore(d);
                    return make_ratio(n, d);
                }
                case TYPE_COMPLEX_NUM:
                {   LispObject r = real_part(a),
                                   i = imag_part(a);
                    Save save(i);
                    r = negate(r);
                    save.restore(i);
                    Save save1(r);
                    i = negate(i);
                    save1.restore(r);
                    return make_complex(r, i);
                }
                default:
                    return aerror1("bad arg for minus",  a);
            }
        }
        case TAG_BOXFLOAT:
            switch (type_of_header(flthdr(a)))
            {   case TYPE_SINGLE_FLOAT:
                    return make_boxfloat(-single_float_val(a),
                                         TYPE_SINGLE_FLOAT);
                case TYPE_DOUBLE_FLOAT:
                    return make_boxfloat(-double_float_val(a),
                                         TYPE_DOUBLE_FLOAT);
#ifdef HAVE_SOFTFLOAT
                case TYPE_LONG_FLOAT:
                {   float128_t aa = long_float_val(a);
                    f128M_negate(&aa);
                    return make_boxfloat128(aa);
                }
#endif // HAVE_SOFTFLOAT
            }
        default:
            return aerror1("bad arg for minus",  a);
    }
}


/*****************************************************************************/
//**              Transcendental functions etcetera.                       **
/*****************************************************************************/


//
// Much of the code here is extracted from the portable Fortran library
// used by Codemist with its Fortran compiler.
//

//
// The object of the following macro is to adjust the floating point
// variables concerned so that the more significant one can be squared
// with NO LOSS OF PRECISION. It is only used when there is no danger
// of over- or under-flow.
//
// This code is NOT PORTABLE but can be modified for use elsewhere
// It should, however, serve for IEEE and IBM FP formats.
//

typedef union _char_double
{   double d;
    char c[8];
} char_double_union;

#ifdef LITTLEENDIAN
#define LOW_BITS_OFFSET 0
#else
#define LOW_BITS_OFFSET 4
#endif

// The code here explictly puns between a double and a row of char values
// so that it can force the bottom 32-bits of the represenattion of the
// double to be zero. The use of the char type here and then memset to clear
// it is intended to keep me safe from strict-aliasing concerns, and modern
// C compilers are liable to map the use of memset onto a simple store
// instruction.

#define _fp_normalize(high, low)                                          \
    {   char_double_union temp;  /* access to representation     */       \
        temp.d = high;           /* take original number         */       \
        std::memset(&temp.c[LOW_BITS_OFFSET], 0, 4);                           \
                                 /* make low part of mantissa 0  */       \
        low += (high - temp.d);  /* add into low-order result    */       \
        high = temp.d;                                                    \
    }

//
// A modern C system will provide a datatype "complex double" which
// will (I hope) provide direct implementations of some things I need
// here. However in the end I may prefer not to use it because for
// real floating point I am using crlibm that implements correctly
// rounded and hence consistent across all platforms values. If I use
// that as a basis for my complex code I will at least get bit-for-bit
// identical results everywhere even if I do not manage to achieve
// correctly rounded last-bit performance in all cases.
// log, sqrt and all the inverse trig functions here need careful review
// as to their treatment of -0.0 on branch-cuts!
//

double Cabs(Complex z)
{
//
// Obtain the absolute value of a complex number - note that the main
// agony here is in ensuring that neither overflow nor underflow can
// wreck the calculation.  Given ideal arithmetic the sum could be carried
// through as just sqrt(x^2 + y^2).
//
    double x = z.real, y = z.imag;
    double scale;
    int n1, n2;
    if (x==0.0) return std::fabs(y);
    else if (y==0.0) return std::fabs(x);
    static_cast<void>(std::frexp(x, &n1));
    static_cast<void>(std::frexp(y, &n2));
// The exact range of values returned by frexp does not matter here
    if (n2>n1) n1 = n2;
// n1 is now the exponent of the larger (in absolute value) of x, y
    scale = std::ldexp(1.0, n1);     // can not be 0.0
    x /= scale;
    y /= scale;
// The above scaling operation introduces no rounding error (since the
// scale factor is exactly a power of 2). It reduces the larger of x, y
// to be somewhere near 1.0 so overflow in x*x+y*y is impossible. It is
// still possible that one of x*x and y*y will underflow (but not both)
// but this is harmless.
    return scale * std::sqrt(x*x + y*y);
}

Complex Ccos(Complex z)
{   double x = z.real, y = z.imag;
//
// cos(x + iy) = cos(x)*cosh(y) - i sin(x)*sinh(y)
// For smallish y this can be used directly.  For |y| > 50 I will
// compute sinh and cosh as just +/- exp(|y|)/2
//
    double s = std::sin(x), c = std::cos(x);
    double absy = std::fabs(y);
    if (absy <= 50.0)
    {   double sh = std::sinh(y), ch = std::cosh(y);
        z.real = c*ch;
        z.imag = - s*sh;
        return z;
    }
    else
    {   double w;
        int n = _reduced_exp(absy, &w) - 1;
        z.real = std::ldexp(c*w, n);
        if (y < 0.0) z.imag = std::ldexp(s*w, n);
        else z.imag = std::ldexp(-s*w, n);
        return z;
    }
}

static double reduced_power(double a, int n)
{
//
// Compute (1 + a)^n - 1 avoiding undue roundoff error.
// Assumes n >= 1 on entry and that a is small.
//
    if (n == 1) return a;
    {   double d = reduced_power(a, n/2);
        d = (2.0 + d)*d;
        if (n & 1) d += (1.0 + d)*a;
        return d;
    }
}

//
// The following value is included for documentation purposes - it
// give the largest args that can be given to exp() without leading to
// overflow on IEEE-arithmetic machines.
//   #define _exp_arg_limit 709.78271289338397
// Note that in any case exp(50.0) will not overflow (it is only 5.2e21),
// so it can be evaluated the simple direct way.
//

int _reduced_exp(double x, double *r)
{
//
// (*r) = exp(x)/2^n;   return n;
// where n will be selected so that *r gets set to a fairly small value
// (precise range of r unimportant provided it will be WELL away from
// chances of overflow, even when exp(x) would actually overflow). This
// function may only be called with argument x that is positive and
// large enough that n will end up satisfying n>=1.  The coding here
// will ensure that if x>4.0, and in general the use of this function
// will only be for x > 50.
// For IBM hardware it would be good to be able to control the value
// of n mod 4, maybe, to help counter wobbling precision.  This is not
// done here.
//
    int n;
    double f;
    n = static_cast<int>(x / 7.625 + 0.5);
//
// 7.625 = 61/8 and is expected to have an exact floating point
// representation here, so f is computed without any rounding error.
// (do I need something like the (x - 0.5) - 0.5 trick here?)
//
    f = std::exp(x - 7.625*static_cast<double>(n));
//
// the magic constant is ((exp(61/8) / 2048) - 1) and it arises because
// 61/88 is a decent rational approximation to log(2), hence exp(61/8)
// is almost 2^11.  Thus I compute exp(x) as
//   2^(11*n) * (exp(61/8)/2^11)^n * exp(f)
// The first factor is exact, the second is (1+e)^n where e is small and
// n is an integer, so can be computer accurately, and the residue f at the
// end is small enough not to give over-bad trouble.
// The numeric constant given here was calculated with the REDUCE 3.3
// bigfloat package.
//
#define _e61q8     3.81086435594567676751e-4
    *r = reduced_power(_e61q8, n)*f + f;
#undef _e61q8
    return 11*n;
}

Complex Cexp(Complex z)
{   double x = z.real, y = z.imag;
//
// value is exp(x)*(cos(y) + i sin(y)) but have care with overflow
// Here (and throughout the complex library) there is an opportunity
// to save time by computing sin(y) and cos(y) together.  Since this
// code is (to begin with) to sit on top of an arbitrary C library,
// perhaps with hardware support for the calculation of real-valued
// trig functions I am not going to try to realise this saving.
//
    double s = std::sin(y), c = std::cos(y);
//
// if x > 50 I will use a cautious sceme which computes exp(x) with
// its (binary) exponent separated. Note that 50.0 is chosen as a
// number noticably smaller than _exp_arg_limit (exp(50) = 5.18e21),
// but is not a critical very special number.
//
    if (x <= 50.0)      // includes x < 0.0, of course
    {   double w = std::exp(x);
        z.real = w*c;
        z.imag = w*s;
        return z;
    }
    else
    {   double w;
        int n = _reduced_exp(x, &w);
        z.real = std::ldexp(w*c, n);
        z.imag = std::ldexp(w*s, n);
        return z;
    }
}

Complex Cln(Complex z)
{   double x = z.real, y = z.imag;
//
// if x and y are both very large then cabs(z) may be out of range
// even though log or if is OK.  Thus it is necessary to perform an
// elaborate scaled calculation here, and not just
//               z.real = log(cabs(z));
//
    double scale, r;
    int n1, n2;
    if (x==0.0) r = std::log(std::fabs(y));
    else if (y==0.0) r = std::log(std::fabs(x));
    else
    {   static_cast<void>(std::frexp(x, &n1));
        static_cast<void>(std::frexp(y, &n2));
// The exact range of values returned by frexp does not matter here
        if (n2>n1) n1 = n2;
        scale = std::ldexp(1.0, n1);
        x /= scale;
        y /= scale;
        r = std::log(scale) + 0.5*std::log(x*x + y*y);
    }
    z.real = r;
//
// The C standard is not very explicit about the behaviour of atan2(0.0, -n)
// while for Fortran it is necessary that this returns +pi not -pi.  Hence
// with extreme caution I put a special test here.
//
    if (y == 0.0)
        if (x < 0.0) z.imag = _pi;
        else z.imag = 0.0;
    else z.imag = std::atan2(y, x);
    return z;
}


//
// Complex raising to a power.  This seems to be pretty nasty
// to get right, and the code includes extra high precision variants
// on atan() and log().  Further refinements wrt efficiency may be
// possible later on.
// This code has been partially tested, and seems to be uniformly
// better than using just a**b = exp(b*log(a)), but much more careful
// study is needed before it can possibly be claimed that it is
// right in the sense of not throwing away accuracy when it does not
// have to.  I also need to make careful checks to verify that the
// correct (principal) value is computed.
//

//
// The next function is used after arithmetic has been done on extra-
// precision numbers so that the relationship between high and low parts
// is no longer known.  Re-instate it.
//

#define _two_minus_25 2.98023223876953125e-8 // 2^(-25)

static double fp_add(double a, double b, double *lowres)
{
// Result is the high part of a+b, with the low part assigned to *lowres
    double absa, absb;
    if (a >= 0.0) absa = a;
    else absa = -a;
    if (b >= 0.0) absb = b;
    else absb = -b;
    if (absa < absb)
    {   double t = a; a = b; b = t;
    }
// Now a is the larger (in absolute value) of the two numbers
    if (absb > absa * _two_minus_25)
    {   double al = 0.0, bl = 0.0;
//
// If the exponent difference beweeen a and b is no more than 25 then
// I can add the top part (20 or 24 bits) of a to the top part of b
// without going beyond the 52 or 56 bits that a full mantissa can hold.
//
        _fp_normalize(a, al);
        _fp_normalize(b, bl);
        a = a + b;              // No rounding needed here
        b = al + bl;
        if (a == 0.0)
        {   a = b;
            b = 0.0;
        }
    }
//
// The above step leaves b small wrt the value in a (unless a+b led
// to substantial cancellation of leading digits), but leaves the high
// part a with bits everywhere.  Force low part of a to zero.
//
    {   double al = 0.0;
        _fp_normalize(a, al);
        b = b + al;
    }
    if (a >= 0.0) absa = a;
    else absa = -a;
    if (b >= 0.0) absb = b;
    else absb = -b;
    if (absb > absa * _two_minus_25)
//
// If on input a is close to -b, then a+b is close to zero.  In this
// case the exponents of a abd b matched, and so earlier calculations
// have all been done exactly.  Go around again to split residue into
// high and low parts.
//
    {   double al = 0.0, bl = 0.0;
        _fp_normalize(b, bl);
        a = a + b;
        _fp_normalize(a, al);
        b = bl + al;
    }
    *lowres = b;
    return a;
}

#undef _two_minus_25

static void extended_atan2(double b, double a, double *thetah,
                           double *thetal)
{   int octant;
    double rh, rl, thh, thl;
//
// First reduce the argument to the first octant (i.e. a, b both +ve,
// and b <= a).
//
    if (b < 0.0)
    {   octant = 4;
        a = -a;
        b = -b;
    }
    else octant = 0;
    if (a < 0.0)
    {   double t = a;
        octant += 2;
        a = b;
        b = -t;
    }
    if (b > a)
    {   double t = a;
        octant += 1;
        a = b;
        b = t;
    }

    {   static struct
        {   double h;
            double l;
        } _atan[] =
        {
//
// The table here gives atan(n/16) in 1.5-precision for n=0..16
// Note that all the magic numbers used in this file were calculated
// using the REDUCE bigfloat package, and all the 'exact' parts have
// a denominator of at worst 2^26 and so are expected to have lots
// of trailing zero bits in their floating point representation.
//
            { 0.0,                        0.0 },
            { 0.06241881847381591796875,  -0.84778585694947708870e-8 },
            { 0.124355018138885498046875, -2.35921240630155201508e-8 },
            { 0.185347974300384521484375, -2.43046897565983490387e-8 },
            { 0.2449786663055419921875,   -0.31786778380154175187e-8 },
            { 0.302884876728057861328125, -0.83530864557675689054e-8 },
            { 0.358770668506622314453125,  0.17639499059427950639e-8 },
            { 0.412410438060760498046875,  0.35366268088529162896e-8 },
            { 0.4636476039886474609375,    0.50121586552767562314e-8 },
            { 0.512389481067657470703125, -2.07569197640365239794e-8 },
            { 0.558599293231964111328125,  2.21115983246433832164e-8 },
            { 0.602287352085113525390625, -0.59501493437085023057e-8 },
            { 0.643501102924346923828125,  0.58689374629746842287e-8 },
            { 0.6823165416717529296875,    1.32029951485689299817e-8 },
            { 0.71882998943328857421875,   1.01883359311982641515e-8 },
            { 0.75315129756927490234375,  -1.66070805128190106297e-8 },
            { 0.785398185253143310546875, -2.18556950009312141541e-8 }
        };
        int k = static_cast<int>(16.0*(b/a + 0.03125)); // 0 to 16
        double kd = static_cast<double>(k)/16.0;
        double ah = a, al = 0.0,
               bh = b, bl = 0.0,
               ch, cl, q, q2;
        _fp_normalize(ah, al);
        _fp_normalize(bh, bl);
        ch = bh - ah*kd; cl = bl - al*kd;
        ah = ah + bh*kd; al = al + bl*kd;
        bh = ch; bl = cl;
// Now |(a/b)| <= 1/32
        ah = fp_add(ah, al, &al);       // Re-normalise
        bh = fp_add(bh, bl, &bl);
// Compute approximation to b/a
        rh = (bh + bl)/(ah + al); rl = 0.0;
        _fp_normalize(rh, rl);
        bh -= ah*rh; bl -= al*rh;
        rl = (bh + bl)/(ah + al);   // Quotient now formed
//
// Now it is necessary to compute atan(q) to one-and-a-half precision.
// Since |q| < 1/32 I will leave just q as the high order word of
// the result and compute atan(q)-q as a single precision value. This
// gives about 16 bits accuracy beyond regular single precision work.
//
        q = rh + rl;
        q2 = q*q;
// The expansion the follows could be done better using a minimax poly
        rl -= q*q2*(0.33333333333333333333 -
                    q2*(0.20000000000000000000 -
                        q2*(0.14285714285714285714 -
                            q2*(0.11111111111111111111 -
                                q2* 0.09090909090909090909))));
// OK - now (rh, rl) is atan(reduced b/a).  Need to add on atan(kd)
        rh += _atan[k].h;
        rl += _atan[k].l;
    }
//
// The following constants give high precision versions of pi and pi/2,
// and the high partwords (p2h and pih) have lots of low order zero bits
// in their binary representation.  Expect (=require) that the arithmetic
// that computes thh is done without introduced rounding error.
//
#define _p2h    1.57079632580280303955078125
#define _p2l    9.92093579680540441639751e-10

#define _pih    3.14159265160560607910156250
#define _pil    1.984187159361080883279502e-9

    switch (octant)
{       default:
        case 0: thh = rh;          thl = rl;          break;
        case 1: thh = _p2h - rh;   thl = _p2l - rl;   break;
        case 2: thh = _p2h + rh;   thl = _p2l + rl;   break;
        case 3: thh = _pih - rh;   thl = _pil - rl;   break;
        case 4: thh = -_pih + rh;  thl = -_pil + rl;  break;
        case 5: thh = -_p2h - rh;  thl = -_p2l - rl;  break;
        case 6: thh = -_p2h + rh;  thl = -_p2l + rl;  break;
        case 7: thh = -rh;         thl = -rl;         break;
    }

#undef _p2h
#undef _p2l
#undef _pih
#undef _pil

    *thetah = fp_add(thh, thl, thetal);
}

static void extended_log(int k, double a, double b,
                         double *logrh, double *logrl)
{
//
// If we had exact arithmetic this procedure could be:
//      k*log(2) + 0.5*log(a^2 + b^2)
//
    double al = 0.0, bl = 0.0, all = 0.0, bll = 0.0, c, ch, cl, cll;
    double w, q, qh, ql, rh, rl;
    int n;
//
// First (a^2 + b^2) is calculated, using extra precision.
// Because any rounding at this stage can lead to bad errors in
// the power that I eventually want to compute, I use 3-word
// arithmetic here, and with the version of _fp_normalize given
// above and IEEE or IBM370 arithmetic this part of the
// computation is exact.
//
    _fp_normalize(a, al); _fp_normalize(al, all);
    _fp_normalize(b, bl); _fp_normalize(bl, bll);
    ch = a*a + b*b;
    cl = 2.0*(a*al + b*bl);
    cll = (al*al + bl*bl) +
          all*(2.0*(a + al) + all) +
          bll*(2.0*(b + bl) + bll);
    _fp_normalize(ch, cl);
    _fp_normalize(cl, cll);
    c = ch + (cl + cll);        // single precision approximation
//
// At this stage the scaling of the input value will mean that we
// have 0.25 <= c <= 2.0
//
// Now rewrite things as
//      (2*k + n)*log(s) + 0.5*log((a^2 + b^2)/2^n))
//           where s = sqrt(2)
// and where the arg to the log is in sqrt(0.5), sqrt(2)
//

#define _sqrt_half 0.70710678118654752440
#define _sqrt_two  1.41421356237309504880
    k = 2*k;
    while (c < _sqrt_half)
    {   k   -= 1;
        ch  *= 2.0;
        cl  *= 2.0;
        cll *= 2.0;
        c   *= 2.0;
    }
    while (c > _sqrt_two)
    {   k   += 1;
        ch  *= 0.5;
        cl  *= 0.5;
        cll *= 0.5;
        c   *= 0.5;
    }
#undef _sqrt_half
#undef _sqrt_two
    n = static_cast<int>(16.0/c + 0.5);
    w = static_cast<double>(n) / 16.0;
    ch *= w;
    cl *= w;
    cll *= w;           // Now |c-1| < 0.04317
    ch = (ch - 0.5) - 0.5;
    ch = fp_add(ch, cl, &cl);
    cl = cl + cll;
//
// (ch, cl) is now the reduced argument ready for calculating log(1+c),
// and now that the reduction is over I can drop back to the use of just
// two doubleprecision values to represent c.
//
    c = ch + cl;
    qh = c / (2.0 + c);
    ql = 0.0;
    _fp_normalize(qh, ql);
    ql = ((ch - qh*(2.0 + ch)) + cl - qh*cl) / (2.0 + c);
// (qh, ql) is now c/(2.0 + c)
    rh = qh;  // 18 bits bigger than low part will end up
    q = qh + ql;
    w = q*q;
    rl = ql + q*w*(0.33333333333333333333 +
                   w*(0.20000000000000000000 +
                      w*(0.14285714285714285714 +
                         w*(0.11111111111111111111 +
                            w*(0.09090909090909090909)))));
//
// (rh, rl) is now atan(c) correct to double precision plus about 18 bits.
//

    {   double temp;
        static struct
        {   double h;
            double l;
        } _log_table[] =
        {
//
// The following values are (in extra precision) -log(n/16)/2 for n
// in the range 11 to 23 (i.e. roughly over sqrt(2)/2 to sqrt(2))
//
            {  0.1873466968536376953125,    2.786706765149099245e-8 },
            {  0.1438410282135009765625,    0.801238948715710950e-8 },
            {  0.103819668292999267578125,  1.409612298322959552e-8 },
            {  0.066765725612640380859375, -2.930037906928620319e-8 },
            {  0.0322692394256591796875,    2.114312640614896196e-8 },
            {  0.0,                         0.0                     },
            { -0.0303122997283935546875,   -1.117982386660280307e-8 },
            { -0.0588915348052978515625,    1.697710612429310295e-8 },
            { -0.08592510223388671875,     -2.622944289242004947e-8 },
            { -0.111571788787841796875,     1.313073691899185245e-8 },
            { -0.135966837406158447265625, -2.033566243215020975e-8 },
            { -0.159226894378662109375,     2.881939480146987639e-8 },
            { -0.18145275115966796875,      0.431498374218108783e-8 }
        };

        rh = fp_add(rh, _log_table[n-11].h, &temp);
        rl = temp + _log_table[n-11].l + rl;
    }

#define _exact_part_logroot2    0.3465735912322998046875
#define _approx_part_logroot2   (-9.5232714997888393927e-10)
// Multiply this by k and add it in
    {   double temp, kd = static_cast<double>(k);
        rh = fp_add(rh, kd*_exact_part_logroot2, &temp);
        rl = rl + temp + kd*_approx_part_logroot2;
    }
#undef _exact_part_logroot2
#undef _approx_part_logroot2

    *logrh = rh;
    *logrl = rl;
    return;
}


Complex Cpow(Complex z1, Complex z2)
{   double a = z1.real, b = z1.imag,
               c = z2.real, d = z2.imag;
    int k, m, n;
    double scale;
    double logrh, logrl, thetah, thetal;
    double r, i, rh, rl, ih, il, clow, dlow, q;
    double cw, sw, cost, sint;

    if (b == 0.0 && d == 0.0 &&
        a >= 0.0)// Simple case if both args are real
    {   z1.real = std::pow(a, c);
        z1.imag = 0.0;
        return z1;
    }
//
// Start by scaling z1 by dividing out a power of 2 so that |z1| is
// fairly close to 1
//
    if (a == 0.0)
    {   if (b == 0.0) return z1;    // 0.0**anything is really an error
// The exact values returned by frexp do not matter here
        static_cast<void>(std::frexp(b, &k));
    }
    else
    {   static_cast<void>(std::frexp(a, &k));
        if (b != 0.0)
        {   int n;
            static_cast<void>(std::frexp(b, &n));
            if (n > k) k = n;
        }
    }
    scale = std::ldexp(1.0, k);
    a /= scale;
    b /= scale;
//
// The idea next is to express z1 as r*exp(i theta), then z1**z2
// is exp(z2*log(z1)) and the exponent simplifies to
//             (c*log r - d*theta) + i(c theta + d log r).
// Note r = scale*sqrt(a*a + b*b) now.
// The argument for exp() must be computed to noticably more than
// regular precision, since otherwise exp() greatly magnifies the
// error in the real part (if it is large and positive) and range
// reduction in the imaginary part can equally lead to accuracy
// problems.  Neither c nor d can usefully be anywhere near the
// extreme range of floating point values, so I will not even try
// to scale them, thus I will end up happy(ish) if I can compute
// atan2(b, a) and log(|z1|) in one-and-a-half precision form.
// It would be best to compute theta in units of pi/4 rather than in
// raidians, since then the case of z^n (integer n) with arg(z) an
// exact multiple of pi/4 would work out better. However at present I
// just hope that the 1.5-precision working is good enough.
//
    extended_atan2(b, a, &thetah, &thetal);
    extended_log(k, a, b, &logrh, &logrl);

// Normalise all numbers
    clow = 0.0;
    dlow = 0.0;
    _fp_normalize(c, clow);
    _fp_normalize(d, dlow);

//  (rh, rl) = c*logr - d*theta;
    rh = c*logrh - d*thetah;  // No rounding in this computation
    rl = c*logrl + clow*(logrh + logrl) - d*thetal - dlow*
         (thetah + thetal);

//  (ih, il) = c*theta + d*logr;
    ih = c*thetah + d*logrh;  // No rounding in this computation
    il = c*thetal + clow*(thetah + thetal) + d*logrl + dlow*
         (logrh + logrl);

//
// Now it remains to take the exponential of the extended precision
// value in ((rh, rl), (ih, il)).
// Range reduce the real part by multiples of log(2), and the imaginary
// part by multiples of pi/2.
//

    rh = fp_add(rh, rl, &rl);   // renormalise
    r = rh + rl;                // Approximate value

#define _recip_log_2 1.4426950408889634074

    q = r * _recip_log_2;
    m = (q < 0.0) ? static_cast<int>(q - 0.5) : static_cast<int>(q + 0.5);
    q = static_cast<double>(m);
#undef _recip_log_2
//
// log 2 = 11629080/2^24  - 0.000 00000 19046 54299 95776 78785
// to reasonable accuracy.  It is vital that the exact part be
// read in as a number with lots of low zero bits, so when it gets
// multiplied by the integer q there is NO rounding needed.
//

#define _exact_part_log2    0.693147182464599609375
#define _approx_part_log2 (-1.9046542999577678785418e-9)
    rh = rh - q*_exact_part_log2;
    rl = rl - q*_approx_part_log2;
#undef _exact_part_log2
#undef _approx_part_log2
    r = std::exp(rh + rl);        // This should now be accurate enough

    ih = fp_add(ih, il, &il);
    i = ih + il;        // Approximate value

#define _recip_pi_by_2 0.6366197723675813431

    q = i * _recip_pi_by_2;
    n = (q < 0.0) ? static_cast<int>(q - 0.5) : static_cast<int>(q + 0.5);
    q = static_cast<double>(n);
//
// pi/2 = 105414357/2^26 + 0.000 00000 09920 93579 68054 04416 39751
// to reasonable accuracy.  It is vital that the exact part be
// read in as a number with lots of low zero bits, so when it gets
// multiplied by the integer q there is NO rounding needed.
//

#define _exact_part_pi2    1.57079632580280303955078125
#define _approx_part_pi2   9.92093579680540441639751e-10
    ih = ih - q*_exact_part_pi2;
    il = il - q*_approx_part_pi2;
#undef _recip_pi_by_2
#undef _exact_part_pi2
#undef _approx_part_pi2
    i = ih + il;            // Now accurate enough
//
// Having done super-careful range reduction I can call the regular
// sin/cos routines here.  If sin/cos could both be computed together
// that could speed things up a little bit, but by this stage they have
// not much in common.
//
    cw = std::cos(i);
    sw = std::sin(i);
    switch (n & 3)  // quadrant control
{       default:
        case 0: cost = cw;  sint = sw;  break;
        case 1: cost = -sw; sint = cw;  break;
        case 2: cost = -cw; sint = -sw; break;
        case 3: cost = sw;  sint = -cw; break;
    }

//
// Now, at long last, I can assemble the results and return.
//

    z1.real = std::ldexp(r*cost, m);
    z1.imag = std::ldexp(r*sint, m);
    return z1;
}

//
// End of complex-to-complex-power code.
//

// end of arith07.cpp