1*e4b17023SJohn Marino /* Software floating-point emulation. 2*e4b17023SJohn Marino Basic one-word fraction declaration and manipulation. 3*e4b17023SJohn Marino Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc. 4*e4b17023SJohn Marino This file is part of the GNU C Library. 5*e4b17023SJohn Marino Contributed by Richard Henderson (rth@cygnus.com), 6*e4b17023SJohn Marino Jakub Jelinek (jj@ultra.linux.cz), 7*e4b17023SJohn Marino David S. Miller (davem@redhat.com) and 8*e4b17023SJohn Marino Peter Maydell (pmaydell@chiark.greenend.org.uk). 9*e4b17023SJohn Marino 10*e4b17023SJohn Marino The GNU C Library is free software; you can redistribute it and/or 11*e4b17023SJohn Marino modify it under the terms of the GNU Lesser General Public 12*e4b17023SJohn Marino License as published by the Free Software Foundation; either 13*e4b17023SJohn Marino version 2.1 of the License, or (at your option) any later version. 14*e4b17023SJohn Marino 15*e4b17023SJohn Marino In addition to the permissions in the GNU Lesser General Public 16*e4b17023SJohn Marino License, the Free Software Foundation gives you unlimited 17*e4b17023SJohn Marino permission to link the compiled version of this file into 18*e4b17023SJohn Marino combinations with other programs, and to distribute those 19*e4b17023SJohn Marino combinations without any restriction coming from the use of this 20*e4b17023SJohn Marino file. (The Lesser General Public License restrictions do apply in 21*e4b17023SJohn Marino other respects; for example, they cover modification of the file, 22*e4b17023SJohn Marino and distribution when not linked into a combine executable.) 23*e4b17023SJohn Marino 24*e4b17023SJohn Marino The GNU C Library is distributed in the hope that it will be useful, 25*e4b17023SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of 26*e4b17023SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 27*e4b17023SJohn Marino Lesser General Public License for more details. 28*e4b17023SJohn Marino 29*e4b17023SJohn Marino You should have received a copy of the GNU Lesser General Public 30*e4b17023SJohn Marino License along with the GNU C Library; if not, see 31*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */ 32*e4b17023SJohn Marino 33*e4b17023SJohn Marino #define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f 34*e4b17023SJohn Marino #define _FP_FRAC_COPY_1(D,S) (D##_f = S##_f) 35*e4b17023SJohn Marino #define _FP_FRAC_SET_1(X,I) (X##_f = I) 36*e4b17023SJohn Marino #define _FP_FRAC_HIGH_1(X) (X##_f) 37*e4b17023SJohn Marino #define _FP_FRAC_LOW_1(X) (X##_f) 38*e4b17023SJohn Marino #define _FP_FRAC_WORD_1(X,w) (X##_f) 39*e4b17023SJohn Marino 40*e4b17023SJohn Marino #define _FP_FRAC_ADDI_1(X,I) (X##_f += I) 41*e4b17023SJohn Marino #define _FP_FRAC_SLL_1(X,N) \ 42*e4b17023SJohn Marino do { \ 43*e4b17023SJohn Marino if (__builtin_constant_p(N) && (N) == 1) \ 44*e4b17023SJohn Marino X##_f += X##_f; \ 45*e4b17023SJohn Marino else \ 46*e4b17023SJohn Marino X##_f <<= (N); \ 47*e4b17023SJohn Marino } while (0) 48*e4b17023SJohn Marino #define _FP_FRAC_SRL_1(X,N) (X##_f >>= N) 49*e4b17023SJohn Marino 50*e4b17023SJohn Marino /* Right shift with sticky-lsb. */ 51*e4b17023SJohn Marino #define _FP_FRAC_SRST_1(X,S,N,sz) __FP_FRAC_SRST_1(X##_f, S, N, sz) 52*e4b17023SJohn Marino #define _FP_FRAC_SRS_1(X,N,sz) __FP_FRAC_SRS_1(X##_f, N, sz) 53*e4b17023SJohn Marino 54*e4b17023SJohn Marino #define __FP_FRAC_SRST_1(X,S,N,sz) \ 55*e4b17023SJohn Marino do { \ 56*e4b17023SJohn Marino S = (__builtin_constant_p(N) && (N) == 1 \ 57*e4b17023SJohn Marino ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0); \ 58*e4b17023SJohn Marino X = X >> (N); \ 59*e4b17023SJohn Marino } while (0) 60*e4b17023SJohn Marino 61*e4b17023SJohn Marino #define __FP_FRAC_SRS_1(X,N,sz) \ 62*e4b17023SJohn Marino (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1 \ 63*e4b17023SJohn Marino ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0))) 64*e4b17023SJohn Marino 65*e4b17023SJohn Marino #define _FP_FRAC_ADD_1(R,X,Y) (R##_f = X##_f + Y##_f) 66*e4b17023SJohn Marino #define _FP_FRAC_SUB_1(R,X,Y) (R##_f = X##_f - Y##_f) 67*e4b17023SJohn Marino #define _FP_FRAC_DEC_1(X,Y) (X##_f -= Y##_f) 68*e4b17023SJohn Marino #define _FP_FRAC_CLZ_1(z, X) __FP_CLZ(z, X##_f) 69*e4b17023SJohn Marino 70*e4b17023SJohn Marino /* Predicates */ 71*e4b17023SJohn Marino #define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE)X##_f < 0) 72*e4b17023SJohn Marino #define _FP_FRAC_ZEROP_1(X) (X##_f == 0) 73*e4b17023SJohn Marino #define _FP_FRAC_OVERP_1(fs,X) (X##_f & _FP_OVERFLOW_##fs) 74*e4b17023SJohn Marino #define _FP_FRAC_CLEAR_OVERP_1(fs,X) (X##_f &= ~_FP_OVERFLOW_##fs) 75*e4b17023SJohn Marino #define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f) 76*e4b17023SJohn Marino #define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f) 77*e4b17023SJohn Marino #define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f) 78*e4b17023SJohn Marino 79*e4b17023SJohn Marino #define _FP_ZEROFRAC_1 0 80*e4b17023SJohn Marino #define _FP_MINFRAC_1 1 81*e4b17023SJohn Marino #define _FP_MAXFRAC_1 (~(_FP_WS_TYPE)0) 82*e4b17023SJohn Marino 83*e4b17023SJohn Marino /* 84*e4b17023SJohn Marino * Unpack the raw bits of a native fp value. Do not classify or 85*e4b17023SJohn Marino * normalize the data. 86*e4b17023SJohn Marino */ 87*e4b17023SJohn Marino 88*e4b17023SJohn Marino #define _FP_UNPACK_RAW_1(fs, X, val) \ 89*e4b17023SJohn Marino do { \ 90*e4b17023SJohn Marino union _FP_UNION_##fs _flo; _flo.flt = (val); \ 91*e4b17023SJohn Marino \ 92*e4b17023SJohn Marino X##_f = _flo.bits.frac; \ 93*e4b17023SJohn Marino X##_e = _flo.bits.exp; \ 94*e4b17023SJohn Marino X##_s = _flo.bits.sign; \ 95*e4b17023SJohn Marino } while (0) 96*e4b17023SJohn Marino 97*e4b17023SJohn Marino #define _FP_UNPACK_RAW_1_P(fs, X, val) \ 98*e4b17023SJohn Marino do { \ 99*e4b17023SJohn Marino union _FP_UNION_##fs *_flo = \ 100*e4b17023SJohn Marino (union _FP_UNION_##fs *)(val); \ 101*e4b17023SJohn Marino \ 102*e4b17023SJohn Marino X##_f = _flo->bits.frac; \ 103*e4b17023SJohn Marino X##_e = _flo->bits.exp; \ 104*e4b17023SJohn Marino X##_s = _flo->bits.sign; \ 105*e4b17023SJohn Marino } while (0) 106*e4b17023SJohn Marino 107*e4b17023SJohn Marino /* 108*e4b17023SJohn Marino * Repack the raw bits of a native fp value. 109*e4b17023SJohn Marino */ 110*e4b17023SJohn Marino 111*e4b17023SJohn Marino #define _FP_PACK_RAW_1(fs, val, X) \ 112*e4b17023SJohn Marino do { \ 113*e4b17023SJohn Marino union _FP_UNION_##fs _flo; \ 114*e4b17023SJohn Marino \ 115*e4b17023SJohn Marino _flo.bits.frac = X##_f; \ 116*e4b17023SJohn Marino _flo.bits.exp = X##_e; \ 117*e4b17023SJohn Marino _flo.bits.sign = X##_s; \ 118*e4b17023SJohn Marino \ 119*e4b17023SJohn Marino (val) = _flo.flt; \ 120*e4b17023SJohn Marino } while (0) 121*e4b17023SJohn Marino 122*e4b17023SJohn Marino #define _FP_PACK_RAW_1_P(fs, val, X) \ 123*e4b17023SJohn Marino do { \ 124*e4b17023SJohn Marino union _FP_UNION_##fs *_flo = \ 125*e4b17023SJohn Marino (union _FP_UNION_##fs *)(val); \ 126*e4b17023SJohn Marino \ 127*e4b17023SJohn Marino _flo->bits.frac = X##_f; \ 128*e4b17023SJohn Marino _flo->bits.exp = X##_e; \ 129*e4b17023SJohn Marino _flo->bits.sign = X##_s; \ 130*e4b17023SJohn Marino } while (0) 131*e4b17023SJohn Marino 132*e4b17023SJohn Marino 133*e4b17023SJohn Marino /* 134*e4b17023SJohn Marino * Multiplication algorithms: 135*e4b17023SJohn Marino */ 136*e4b17023SJohn Marino 137*e4b17023SJohn Marino /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the 138*e4b17023SJohn Marino multiplication immediately. */ 139*e4b17023SJohn Marino 140*e4b17023SJohn Marino #define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y) \ 141*e4b17023SJohn Marino do { \ 142*e4b17023SJohn Marino R##_f = X##_f * Y##_f; \ 143*e4b17023SJohn Marino /* Normalize since we know where the msb of the multiplicands \ 144*e4b17023SJohn Marino were (bit B), we know that the msb of the of the product is \ 145*e4b17023SJohn Marino at either 2B or 2B-1. */ \ 146*e4b17023SJohn Marino _FP_FRAC_SRS_1(R, wfracbits-1, 2*wfracbits); \ 147*e4b17023SJohn Marino } while (0) 148*e4b17023SJohn Marino 149*e4b17023SJohn Marino /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 150*e4b17023SJohn Marino 151*e4b17023SJohn Marino #define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit) \ 152*e4b17023SJohn Marino do { \ 153*e4b17023SJohn Marino _FP_W_TYPE _Z_f0, _Z_f1; \ 154*e4b17023SJohn Marino doit(_Z_f1, _Z_f0, X##_f, Y##_f); \ 155*e4b17023SJohn Marino /* Normalize since we know where the msb of the multiplicands \ 156*e4b17023SJohn Marino were (bit B), we know that the msb of the of the product is \ 157*e4b17023SJohn Marino at either 2B or 2B-1. */ \ 158*e4b17023SJohn Marino _FP_FRAC_SRS_2(_Z, wfracbits-1, 2*wfracbits); \ 159*e4b17023SJohn Marino R##_f = _Z_f0; \ 160*e4b17023SJohn Marino } while (0) 161*e4b17023SJohn Marino 162*e4b17023SJohn Marino /* Finally, a simple widening multiply algorithm. What fun! */ 163*e4b17023SJohn Marino 164*e4b17023SJohn Marino #define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \ 165*e4b17023SJohn Marino do { \ 166*e4b17023SJohn Marino _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1; \ 167*e4b17023SJohn Marino \ 168*e4b17023SJohn Marino /* split the words in half */ \ 169*e4b17023SJohn Marino _xh = X##_f >> (_FP_W_TYPE_SIZE/2); \ 170*e4b17023SJohn Marino _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \ 171*e4b17023SJohn Marino _yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \ 172*e4b17023SJohn Marino _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \ 173*e4b17023SJohn Marino \ 174*e4b17023SJohn Marino /* multiply the pieces */ \ 175*e4b17023SJohn Marino _z_f0 = _xl * _yl; \ 176*e4b17023SJohn Marino _a_f0 = _xh * _yl; \ 177*e4b17023SJohn Marino _a_f1 = _xl * _yh; \ 178*e4b17023SJohn Marino _z_f1 = _xh * _yh; \ 179*e4b17023SJohn Marino \ 180*e4b17023SJohn Marino /* reassemble into two full words */ \ 181*e4b17023SJohn Marino if ((_a_f0 += _a_f1) < _a_f1) \ 182*e4b17023SJohn Marino _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2); \ 183*e4b17023SJohn Marino _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \ 184*e4b17023SJohn Marino _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \ 185*e4b17023SJohn Marino _FP_FRAC_ADD_2(_z, _z, _a); \ 186*e4b17023SJohn Marino \ 187*e4b17023SJohn Marino /* normalize */ \ 188*e4b17023SJohn Marino _FP_FRAC_SRS_2(_z, wfracbits - 1, 2*wfracbits); \ 189*e4b17023SJohn Marino R##_f = _z_f0; \ 190*e4b17023SJohn Marino } while (0) 191*e4b17023SJohn Marino 192*e4b17023SJohn Marino 193*e4b17023SJohn Marino /* 194*e4b17023SJohn Marino * Division algorithms: 195*e4b17023SJohn Marino */ 196*e4b17023SJohn Marino 197*e4b17023SJohn Marino /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the 198*e4b17023SJohn Marino division immediately. Give this macro either _FP_DIV_HELP_imm for 199*e4b17023SJohn Marino C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you 200*e4b17023SJohn Marino choose will depend on what the compiler does with divrem4. */ 201*e4b17023SJohn Marino 202*e4b17023SJohn Marino #define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \ 203*e4b17023SJohn Marino do { \ 204*e4b17023SJohn Marino _FP_W_TYPE _q, _r; \ 205*e4b17023SJohn Marino X##_f <<= (X##_f < Y##_f \ 206*e4b17023SJohn Marino ? R##_e--, _FP_WFRACBITS_##fs \ 207*e4b17023SJohn Marino : _FP_WFRACBITS_##fs - 1); \ 208*e4b17023SJohn Marino doit(_q, _r, X##_f, Y##_f); \ 209*e4b17023SJohn Marino R##_f = _q | (_r != 0); \ 210*e4b17023SJohn Marino } while (0) 211*e4b17023SJohn Marino 212*e4b17023SJohn Marino /* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd 213*e4b17023SJohn Marino that may be useful in this situation. This first is for a primitive 214*e4b17023SJohn Marino that requires normalization, the second for one that does not. Look 215*e4b17023SJohn Marino for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */ 216*e4b17023SJohn Marino 217*e4b17023SJohn Marino #define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \ 218*e4b17023SJohn Marino do { \ 219*e4b17023SJohn Marino _FP_W_TYPE _nh, _nl, _q, _r, _y; \ 220*e4b17023SJohn Marino \ 221*e4b17023SJohn Marino /* Normalize Y -- i.e. make the most significant bit set. */ \ 222*e4b17023SJohn Marino _y = Y##_f << _FP_WFRACXBITS_##fs; \ 223*e4b17023SJohn Marino \ 224*e4b17023SJohn Marino /* Shift X op correspondingly high, that is, up one full word. */ \ 225*e4b17023SJohn Marino if (X##_f < Y##_f) \ 226*e4b17023SJohn Marino { \ 227*e4b17023SJohn Marino R##_e--; \ 228*e4b17023SJohn Marino _nl = 0; \ 229*e4b17023SJohn Marino _nh = X##_f; \ 230*e4b17023SJohn Marino } \ 231*e4b17023SJohn Marino else \ 232*e4b17023SJohn Marino { \ 233*e4b17023SJohn Marino _nl = X##_f << (_FP_W_TYPE_SIZE - 1); \ 234*e4b17023SJohn Marino _nh = X##_f >> 1; \ 235*e4b17023SJohn Marino } \ 236*e4b17023SJohn Marino \ 237*e4b17023SJohn Marino udiv_qrnnd(_q, _r, _nh, _nl, _y); \ 238*e4b17023SJohn Marino R##_f = _q | (_r != 0); \ 239*e4b17023SJohn Marino } while (0) 240*e4b17023SJohn Marino 241*e4b17023SJohn Marino #define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \ 242*e4b17023SJohn Marino do { \ 243*e4b17023SJohn Marino _FP_W_TYPE _nh, _nl, _q, _r; \ 244*e4b17023SJohn Marino if (X##_f < Y##_f) \ 245*e4b17023SJohn Marino { \ 246*e4b17023SJohn Marino R##_e--; \ 247*e4b17023SJohn Marino _nl = X##_f << _FP_WFRACBITS_##fs; \ 248*e4b17023SJohn Marino _nh = X##_f >> _FP_WFRACXBITS_##fs; \ 249*e4b17023SJohn Marino } \ 250*e4b17023SJohn Marino else \ 251*e4b17023SJohn Marino { \ 252*e4b17023SJohn Marino _nl = X##_f << (_FP_WFRACBITS_##fs - 1); \ 253*e4b17023SJohn Marino _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \ 254*e4b17023SJohn Marino } \ 255*e4b17023SJohn Marino udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \ 256*e4b17023SJohn Marino R##_f = _q | (_r != 0); \ 257*e4b17023SJohn Marino } while (0) 258*e4b17023SJohn Marino 259*e4b17023SJohn Marino 260*e4b17023SJohn Marino /* 261*e4b17023SJohn Marino * Square root algorithms: 262*e4b17023SJohn Marino * We have just one right now, maybe Newton approximation 263*e4b17023SJohn Marino * should be added for those machines where division is fast. 264*e4b17023SJohn Marino */ 265*e4b17023SJohn Marino 266*e4b17023SJohn Marino #define _FP_SQRT_MEAT_1(R, S, T, X, q) \ 267*e4b17023SJohn Marino do { \ 268*e4b17023SJohn Marino while (q != _FP_WORK_ROUND) \ 269*e4b17023SJohn Marino { \ 270*e4b17023SJohn Marino T##_f = S##_f + q; \ 271*e4b17023SJohn Marino if (T##_f <= X##_f) \ 272*e4b17023SJohn Marino { \ 273*e4b17023SJohn Marino S##_f = T##_f + q; \ 274*e4b17023SJohn Marino X##_f -= T##_f; \ 275*e4b17023SJohn Marino R##_f += q; \ 276*e4b17023SJohn Marino } \ 277*e4b17023SJohn Marino _FP_FRAC_SLL_1(X, 1); \ 278*e4b17023SJohn Marino q >>= 1; \ 279*e4b17023SJohn Marino } \ 280*e4b17023SJohn Marino if (X##_f) \ 281*e4b17023SJohn Marino { \ 282*e4b17023SJohn Marino if (S##_f < X##_f) \ 283*e4b17023SJohn Marino R##_f |= _FP_WORK_ROUND; \ 284*e4b17023SJohn Marino R##_f |= _FP_WORK_STICKY; \ 285*e4b17023SJohn Marino } \ 286*e4b17023SJohn Marino } while (0) 287*e4b17023SJohn Marino 288*e4b17023SJohn Marino /* 289*e4b17023SJohn Marino * Assembly/disassembly for converting to/from integral types. 290*e4b17023SJohn Marino * No shifting or overflow handled here. 291*e4b17023SJohn Marino */ 292*e4b17023SJohn Marino 293*e4b17023SJohn Marino #define _FP_FRAC_ASSEMBLE_1(r, X, rsize) (r = X##_f) 294*e4b17023SJohn Marino #define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = r) 295*e4b17023SJohn Marino 296*e4b17023SJohn Marino 297*e4b17023SJohn Marino /* 298*e4b17023SJohn Marino * Convert FP values between word sizes 299*e4b17023SJohn Marino */ 300*e4b17023SJohn Marino 301*e4b17023SJohn Marino #define _FP_FRAC_COPY_1_1(D, S) (D##_f = S##_f) 302