xref: /dragonfly/contrib/mpfr/src/sqrt.c (revision ab6d115f)
14a238c70SJohn Marino /* mpfr_sqrt -- square root of a floating-point number
24a238c70SJohn Marino 
3*ab6d115fSJohn Marino Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
54a238c70SJohn Marino 
64a238c70SJohn Marino This file is part of the GNU MPFR Library.
74a238c70SJohn Marino 
84a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
94a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
104a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
114a238c70SJohn Marino option) any later version.
124a238c70SJohn Marino 
134a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
144a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
154a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
164a238c70SJohn Marino License for more details.
174a238c70SJohn Marino 
184a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
194a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
204a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
214a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
224a238c70SJohn Marino 
234a238c70SJohn Marino #include "mpfr-impl.h"
244a238c70SJohn Marino 
254a238c70SJohn Marino int
mpfr_sqrt(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)264a238c70SJohn Marino mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
274a238c70SJohn Marino {
284a238c70SJohn Marino   mp_size_t rsize; /* number of limbs of r (plus 1 if exact limb multiple) */
294a238c70SJohn Marino   mp_size_t rrsize;
304a238c70SJohn Marino   mp_size_t usize; /* number of limbs of u */
314a238c70SJohn Marino   mp_size_t tsize; /* number of limbs of the sqrtrem remainder */
324a238c70SJohn Marino   mp_size_t k;
334a238c70SJohn Marino   mp_size_t l;
344a238c70SJohn Marino   mpfr_limb_ptr rp, rp0;
354a238c70SJohn Marino   mpfr_limb_ptr up;
364a238c70SJohn Marino   mpfr_limb_ptr sp;
374a238c70SJohn Marino   mp_limb_t sticky0; /* truncated part of input */
384a238c70SJohn Marino   mp_limb_t sticky1; /* truncated part of rp[0] */
394a238c70SJohn Marino   mp_limb_t sticky;
404a238c70SJohn Marino   int odd_exp;
414a238c70SJohn Marino   int sh; /* number of extra bits in rp[0] */
424a238c70SJohn Marino   int inexact; /* return ternary flag */
434a238c70SJohn Marino   mpfr_exp_t expr;
444a238c70SJohn Marino   MPFR_TMP_DECL(marker);
454a238c70SJohn Marino 
464a238c70SJohn Marino   MPFR_LOG_FUNC
474a238c70SJohn Marino     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (u), mpfr_log_prec, u, rnd_mode),
484a238c70SJohn Marino      ("y[%Pu]=%.*Rg inexact=%d",
494a238c70SJohn Marino       mpfr_get_prec (r), mpfr_log_prec, r, inexact));
504a238c70SJohn Marino 
514a238c70SJohn Marino   if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(u)))
524a238c70SJohn Marino     {
534a238c70SJohn Marino       if (MPFR_IS_NAN(u))
544a238c70SJohn Marino         {
554a238c70SJohn Marino           MPFR_SET_NAN(r);
564a238c70SJohn Marino           MPFR_RET_NAN;
574a238c70SJohn Marino         }
584a238c70SJohn Marino       else if (MPFR_IS_ZERO(u))
594a238c70SJohn Marino         {
604a238c70SJohn Marino           /* 0+ or 0- */
614a238c70SJohn Marino           MPFR_SET_SAME_SIGN(r, u);
624a238c70SJohn Marino           MPFR_SET_ZERO(r);
634a238c70SJohn Marino           MPFR_RET(0); /* zero is exact */
644a238c70SJohn Marino         }
654a238c70SJohn Marino       else
664a238c70SJohn Marino         {
674a238c70SJohn Marino           MPFR_ASSERTD(MPFR_IS_INF(u));
684a238c70SJohn Marino           /* sqrt(-Inf) = NAN */
694a238c70SJohn Marino           if (MPFR_IS_NEG(u))
704a238c70SJohn Marino             {
714a238c70SJohn Marino               MPFR_SET_NAN(r);
724a238c70SJohn Marino               MPFR_RET_NAN;
734a238c70SJohn Marino             }
744a238c70SJohn Marino           MPFR_SET_POS(r);
754a238c70SJohn Marino           MPFR_SET_INF(r);
764a238c70SJohn Marino           MPFR_RET(0);
774a238c70SJohn Marino         }
784a238c70SJohn Marino     }
794a238c70SJohn Marino   if (MPFR_UNLIKELY(MPFR_IS_NEG(u)))
804a238c70SJohn Marino     {
814a238c70SJohn Marino       MPFR_SET_NAN(r);
824a238c70SJohn Marino       MPFR_RET_NAN;
834a238c70SJohn Marino     }
844a238c70SJohn Marino   MPFR_SET_POS(r);
854a238c70SJohn Marino 
864a238c70SJohn Marino   MPFR_TMP_MARK (marker);
874a238c70SJohn Marino   MPFR_UNSIGNED_MINUS_MODULO(sh,MPFR_PREC(r));
884a238c70SJohn Marino   if (sh == 0 && rnd_mode == MPFR_RNDN)
894a238c70SJohn Marino     sh = GMP_NUMB_BITS; /* ugly case */
904a238c70SJohn Marino   rsize = MPFR_LIMB_SIZE(r) + (sh == GMP_NUMB_BITS);
914a238c70SJohn Marino   /* rsize is the number of limbs of r + 1 if exact limb multiple and rounding
924a238c70SJohn Marino      to nearest, this is the number of wanted limbs for the square root */
934a238c70SJohn Marino   rrsize = rsize + rsize;
944a238c70SJohn Marino   usize = MPFR_LIMB_SIZE(u); /* number of limbs of u */
954a238c70SJohn Marino   rp0 = MPFR_MANT(r);
964a238c70SJohn Marino   rp = (sh < GMP_NUMB_BITS) ? rp0 : MPFR_TMP_LIMBS_ALLOC (rsize);
974a238c70SJohn Marino   up = MPFR_MANT(u);
984a238c70SJohn Marino   sticky0 = MPFR_LIMB_ZERO; /* truncated part of input */
994a238c70SJohn Marino   sticky1 = MPFR_LIMB_ZERO; /* truncated part of rp[0] */
1004a238c70SJohn Marino   odd_exp = (unsigned int) MPFR_GET_EXP (u) & 1;
1014a238c70SJohn Marino   inexact = -1; /* return ternary flag */
1024a238c70SJohn Marino 
1034a238c70SJohn Marino   sp = MPFR_TMP_LIMBS_ALLOC (rrsize);
1044a238c70SJohn Marino 
1054a238c70SJohn Marino   /* copy the most significant limbs of u to {sp, rrsize} */
1064a238c70SJohn Marino   if (MPFR_LIKELY(usize <= rrsize)) /* in case r and u have the same precision,
1074a238c70SJohn Marino                                        we have indeed rrsize = 2 * usize */
1084a238c70SJohn Marino     {
1094a238c70SJohn Marino       k = rrsize - usize;
1104a238c70SJohn Marino       if (MPFR_LIKELY(k))
1114a238c70SJohn Marino         MPN_ZERO (sp, k);
1124a238c70SJohn Marino       if (odd_exp)
1134a238c70SJohn Marino         {
1144a238c70SJohn Marino           if (MPFR_LIKELY(k))
1154a238c70SJohn Marino             sp[k - 1] = mpn_rshift (sp + k, up, usize, 1);
1164a238c70SJohn Marino           else
1174a238c70SJohn Marino             sticky0 = mpn_rshift (sp, up, usize, 1);
1184a238c70SJohn Marino         }
1194a238c70SJohn Marino       else
1204a238c70SJohn Marino         MPN_COPY (sp + rrsize - usize, up, usize);
1214a238c70SJohn Marino     }
1224a238c70SJohn Marino   else /* usize > rrsize: truncate the input */
1234a238c70SJohn Marino     {
1244a238c70SJohn Marino       k = usize - rrsize;
1254a238c70SJohn Marino       if (odd_exp)
1264a238c70SJohn Marino         sticky0 = mpn_rshift (sp, up + k, rrsize, 1);
1274a238c70SJohn Marino       else
1284a238c70SJohn Marino         MPN_COPY (sp, up + k, rrsize);
1294a238c70SJohn Marino       l = k;
1304a238c70SJohn Marino       while (sticky0 == MPFR_LIMB_ZERO && l != 0)
1314a238c70SJohn Marino         sticky0 = up[--l];
1324a238c70SJohn Marino     }
1334a238c70SJohn Marino 
1344a238c70SJohn Marino   /* sticky0 is non-zero iff the truncated part of the input is non-zero */
1354a238c70SJohn Marino 
1364a238c70SJohn Marino   /* mpn_rootrem with NULL 2nd argument is faster than mpn_sqrtrem, thus use
1374a238c70SJohn Marino      it if available and if the user asked to use GMP internal functions */
1384a238c70SJohn Marino #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_ROOTREM)
1394a238c70SJohn Marino   tsize = __gmpn_rootrem (rp, NULL, sp, rrsize, 2);
1404a238c70SJohn Marino #else
1414a238c70SJohn Marino   tsize = mpn_sqrtrem (rp, NULL, sp, rrsize);
1424a238c70SJohn Marino #endif
1434a238c70SJohn Marino 
1444a238c70SJohn Marino   /* a return value of zero in mpn_sqrtrem indicates a perfect square */
1454a238c70SJohn Marino   sticky = sticky0 || tsize != 0;
1464a238c70SJohn Marino 
1474a238c70SJohn Marino   /* truncate low bits of rp[0] */
1484a238c70SJohn Marino   sticky1 = rp[0] & ((sh < GMP_NUMB_BITS) ? MPFR_LIMB_MASK(sh)
1494a238c70SJohn Marino                      : ~MPFR_LIMB_ZERO);
1504a238c70SJohn Marino   rp[0] -= sticky1;
1514a238c70SJohn Marino 
1524a238c70SJohn Marino   sticky = sticky || sticky1;
1534a238c70SJohn Marino 
1544a238c70SJohn Marino   expr = (MPFR_GET_EXP(u) + odd_exp) / 2;  /* exact */
1554a238c70SJohn Marino 
1564a238c70SJohn Marino   if (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDD || sticky == MPFR_LIMB_ZERO)
1574a238c70SJohn Marino     {
1584a238c70SJohn Marino       inexact = (sticky == MPFR_LIMB_ZERO) ? 0 : -1;
1594a238c70SJohn Marino       goto truncate;
1604a238c70SJohn Marino     }
1614a238c70SJohn Marino   else if (rnd_mode == MPFR_RNDN)
1624a238c70SJohn Marino     {
1634a238c70SJohn Marino       /* if sh < GMP_NUMB_BITS, the round bit is bit (sh-1) of sticky1
1644a238c70SJohn Marino                   and the sticky bit is formed by the low sh-1 bits from
1654a238c70SJohn Marino                   sticky1, together with the sqrtrem remainder and sticky0. */
1664a238c70SJohn Marino       if (sh < GMP_NUMB_BITS)
1674a238c70SJohn Marino         {
1684a238c70SJohn Marino           if (sticky1 & (MPFR_LIMB_ONE << (sh - 1)))
1694a238c70SJohn Marino             { /* round bit is set */
1704a238c70SJohn Marino               if (sticky1 == (MPFR_LIMB_ONE << (sh - 1)) && tsize == 0
1714a238c70SJohn Marino                   && sticky0 == 0)
1724a238c70SJohn Marino                 goto even_rule;
1734a238c70SJohn Marino               else
1744a238c70SJohn Marino                 goto add_one_ulp;
1754a238c70SJohn Marino             }
1764a238c70SJohn Marino           else /* round bit is zero */
1774a238c70SJohn Marino             goto truncate; /* with the default inexact=-1 */
1784a238c70SJohn Marino         }
1794a238c70SJohn Marino       else /* sh = GMP_NUMB_BITS: the round bit is the most significant bit
1804a238c70SJohn Marino               of rp[0], and the remaining GMP_NUMB_BITS-1 bits contribute to
1814a238c70SJohn Marino               the sticky bit */
1824a238c70SJohn Marino         {
1834a238c70SJohn Marino           if (sticky1 & MPFR_LIMB_HIGHBIT)
1844a238c70SJohn Marino             { /* round bit is set */
1854a238c70SJohn Marino               if (sticky1 == MPFR_LIMB_HIGHBIT && tsize == 0 && sticky0 == 0)
1864a238c70SJohn Marino                 goto even_rule;
1874a238c70SJohn Marino               else
1884a238c70SJohn Marino                 goto add_one_ulp;
1894a238c70SJohn Marino             }
1904a238c70SJohn Marino           else /* round bit is zero */
1914a238c70SJohn Marino             goto truncate; /* with the default inexact=-1 */
1924a238c70SJohn Marino         }
1934a238c70SJohn Marino     }
1944a238c70SJohn Marino   else /* rnd_mode=GMP_RDNU, necessarily sticky <> 0, thus add 1 ulp */
1954a238c70SJohn Marino     goto add_one_ulp;
1964a238c70SJohn Marino 
1974a238c70SJohn Marino  even_rule: /* has to set inexact */
1984a238c70SJohn Marino   if (sh < GMP_NUMB_BITS)
1994a238c70SJohn Marino     inexact = (rp[0] & (MPFR_LIMB_ONE << sh)) ? 1 : -1;
2004a238c70SJohn Marino   else
2014a238c70SJohn Marino     inexact = (rp[1] & MPFR_LIMB_ONE) ? 1 : -1;
2024a238c70SJohn Marino   if (inexact == -1)
2034a238c70SJohn Marino     goto truncate;
2044a238c70SJohn Marino   /* else go through add_one_ulp */
2054a238c70SJohn Marino 
2064a238c70SJohn Marino  add_one_ulp:
2074a238c70SJohn Marino   inexact = 1; /* always here */
2084a238c70SJohn Marino   if (sh == GMP_NUMB_BITS)
2094a238c70SJohn Marino     {
2104a238c70SJohn Marino       rp ++;
2114a238c70SJohn Marino       rsize --;
2124a238c70SJohn Marino       sh = 0;
2134a238c70SJohn Marino     }
2144a238c70SJohn Marino   if (mpn_add_1 (rp0, rp, rsize, MPFR_LIMB_ONE << sh))
2154a238c70SJohn Marino     {
2164a238c70SJohn Marino       expr ++;
2174a238c70SJohn Marino       rp[rsize - 1] = MPFR_LIMB_HIGHBIT;
2184a238c70SJohn Marino     }
2194a238c70SJohn Marino   goto end;
2204a238c70SJohn Marino 
2214a238c70SJohn Marino  truncate: /* inexact = 0 or -1 */
2224a238c70SJohn Marino   if (sh == GMP_NUMB_BITS)
2234a238c70SJohn Marino     MPN_COPY (rp0, rp + 1, rsize - 1);
2244a238c70SJohn Marino 
2254a238c70SJohn Marino  end:
2264a238c70SJohn Marino   MPFR_ASSERTN (expr >= MPFR_EMIN_MIN && expr <= MPFR_EMAX_MAX);
2274a238c70SJohn Marino   MPFR_EXP (r) = expr;
2284a238c70SJohn Marino   MPFR_TMP_FREE(marker);
2294a238c70SJohn Marino 
2304a238c70SJohn Marino   return mpfr_check_range (r, inexact, rnd_mode);
2314a238c70SJohn Marino }
232