14a238c70SJohn Marino /* mpfr_sqrt -- square root of a floating-point number
24a238c70SJohn Marino
3*ab6d115fSJohn Marino Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
54a238c70SJohn Marino
64a238c70SJohn Marino This file is part of the GNU MPFR Library.
74a238c70SJohn Marino
84a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
94a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
104a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
114a238c70SJohn Marino option) any later version.
124a238c70SJohn Marino
134a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
144a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
154a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
164a238c70SJohn Marino License for more details.
174a238c70SJohn Marino
184a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
194a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
204a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
214a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
224a238c70SJohn Marino
234a238c70SJohn Marino #include "mpfr-impl.h"
244a238c70SJohn Marino
254a238c70SJohn Marino int
mpfr_sqrt(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)264a238c70SJohn Marino mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
274a238c70SJohn Marino {
284a238c70SJohn Marino mp_size_t rsize; /* number of limbs of r (plus 1 if exact limb multiple) */
294a238c70SJohn Marino mp_size_t rrsize;
304a238c70SJohn Marino mp_size_t usize; /* number of limbs of u */
314a238c70SJohn Marino mp_size_t tsize; /* number of limbs of the sqrtrem remainder */
324a238c70SJohn Marino mp_size_t k;
334a238c70SJohn Marino mp_size_t l;
344a238c70SJohn Marino mpfr_limb_ptr rp, rp0;
354a238c70SJohn Marino mpfr_limb_ptr up;
364a238c70SJohn Marino mpfr_limb_ptr sp;
374a238c70SJohn Marino mp_limb_t sticky0; /* truncated part of input */
384a238c70SJohn Marino mp_limb_t sticky1; /* truncated part of rp[0] */
394a238c70SJohn Marino mp_limb_t sticky;
404a238c70SJohn Marino int odd_exp;
414a238c70SJohn Marino int sh; /* number of extra bits in rp[0] */
424a238c70SJohn Marino int inexact; /* return ternary flag */
434a238c70SJohn Marino mpfr_exp_t expr;
444a238c70SJohn Marino MPFR_TMP_DECL(marker);
454a238c70SJohn Marino
464a238c70SJohn Marino MPFR_LOG_FUNC
474a238c70SJohn Marino (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (u), mpfr_log_prec, u, rnd_mode),
484a238c70SJohn Marino ("y[%Pu]=%.*Rg inexact=%d",
494a238c70SJohn Marino mpfr_get_prec (r), mpfr_log_prec, r, inexact));
504a238c70SJohn Marino
514a238c70SJohn Marino if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(u)))
524a238c70SJohn Marino {
534a238c70SJohn Marino if (MPFR_IS_NAN(u))
544a238c70SJohn Marino {
554a238c70SJohn Marino MPFR_SET_NAN(r);
564a238c70SJohn Marino MPFR_RET_NAN;
574a238c70SJohn Marino }
584a238c70SJohn Marino else if (MPFR_IS_ZERO(u))
594a238c70SJohn Marino {
604a238c70SJohn Marino /* 0+ or 0- */
614a238c70SJohn Marino MPFR_SET_SAME_SIGN(r, u);
624a238c70SJohn Marino MPFR_SET_ZERO(r);
634a238c70SJohn Marino MPFR_RET(0); /* zero is exact */
644a238c70SJohn Marino }
654a238c70SJohn Marino else
664a238c70SJohn Marino {
674a238c70SJohn Marino MPFR_ASSERTD(MPFR_IS_INF(u));
684a238c70SJohn Marino /* sqrt(-Inf) = NAN */
694a238c70SJohn Marino if (MPFR_IS_NEG(u))
704a238c70SJohn Marino {
714a238c70SJohn Marino MPFR_SET_NAN(r);
724a238c70SJohn Marino MPFR_RET_NAN;
734a238c70SJohn Marino }
744a238c70SJohn Marino MPFR_SET_POS(r);
754a238c70SJohn Marino MPFR_SET_INF(r);
764a238c70SJohn Marino MPFR_RET(0);
774a238c70SJohn Marino }
784a238c70SJohn Marino }
794a238c70SJohn Marino if (MPFR_UNLIKELY(MPFR_IS_NEG(u)))
804a238c70SJohn Marino {
814a238c70SJohn Marino MPFR_SET_NAN(r);
824a238c70SJohn Marino MPFR_RET_NAN;
834a238c70SJohn Marino }
844a238c70SJohn Marino MPFR_SET_POS(r);
854a238c70SJohn Marino
864a238c70SJohn Marino MPFR_TMP_MARK (marker);
874a238c70SJohn Marino MPFR_UNSIGNED_MINUS_MODULO(sh,MPFR_PREC(r));
884a238c70SJohn Marino if (sh == 0 && rnd_mode == MPFR_RNDN)
894a238c70SJohn Marino sh = GMP_NUMB_BITS; /* ugly case */
904a238c70SJohn Marino rsize = MPFR_LIMB_SIZE(r) + (sh == GMP_NUMB_BITS);
914a238c70SJohn Marino /* rsize is the number of limbs of r + 1 if exact limb multiple and rounding
924a238c70SJohn Marino to nearest, this is the number of wanted limbs for the square root */
934a238c70SJohn Marino rrsize = rsize + rsize;
944a238c70SJohn Marino usize = MPFR_LIMB_SIZE(u); /* number of limbs of u */
954a238c70SJohn Marino rp0 = MPFR_MANT(r);
964a238c70SJohn Marino rp = (sh < GMP_NUMB_BITS) ? rp0 : MPFR_TMP_LIMBS_ALLOC (rsize);
974a238c70SJohn Marino up = MPFR_MANT(u);
984a238c70SJohn Marino sticky0 = MPFR_LIMB_ZERO; /* truncated part of input */
994a238c70SJohn Marino sticky1 = MPFR_LIMB_ZERO; /* truncated part of rp[0] */
1004a238c70SJohn Marino odd_exp = (unsigned int) MPFR_GET_EXP (u) & 1;
1014a238c70SJohn Marino inexact = -1; /* return ternary flag */
1024a238c70SJohn Marino
1034a238c70SJohn Marino sp = MPFR_TMP_LIMBS_ALLOC (rrsize);
1044a238c70SJohn Marino
1054a238c70SJohn Marino /* copy the most significant limbs of u to {sp, rrsize} */
1064a238c70SJohn Marino if (MPFR_LIKELY(usize <= rrsize)) /* in case r and u have the same precision,
1074a238c70SJohn Marino we have indeed rrsize = 2 * usize */
1084a238c70SJohn Marino {
1094a238c70SJohn Marino k = rrsize - usize;
1104a238c70SJohn Marino if (MPFR_LIKELY(k))
1114a238c70SJohn Marino MPN_ZERO (sp, k);
1124a238c70SJohn Marino if (odd_exp)
1134a238c70SJohn Marino {
1144a238c70SJohn Marino if (MPFR_LIKELY(k))
1154a238c70SJohn Marino sp[k - 1] = mpn_rshift (sp + k, up, usize, 1);
1164a238c70SJohn Marino else
1174a238c70SJohn Marino sticky0 = mpn_rshift (sp, up, usize, 1);
1184a238c70SJohn Marino }
1194a238c70SJohn Marino else
1204a238c70SJohn Marino MPN_COPY (sp + rrsize - usize, up, usize);
1214a238c70SJohn Marino }
1224a238c70SJohn Marino else /* usize > rrsize: truncate the input */
1234a238c70SJohn Marino {
1244a238c70SJohn Marino k = usize - rrsize;
1254a238c70SJohn Marino if (odd_exp)
1264a238c70SJohn Marino sticky0 = mpn_rshift (sp, up + k, rrsize, 1);
1274a238c70SJohn Marino else
1284a238c70SJohn Marino MPN_COPY (sp, up + k, rrsize);
1294a238c70SJohn Marino l = k;
1304a238c70SJohn Marino while (sticky0 == MPFR_LIMB_ZERO && l != 0)
1314a238c70SJohn Marino sticky0 = up[--l];
1324a238c70SJohn Marino }
1334a238c70SJohn Marino
1344a238c70SJohn Marino /* sticky0 is non-zero iff the truncated part of the input is non-zero */
1354a238c70SJohn Marino
1364a238c70SJohn Marino /* mpn_rootrem with NULL 2nd argument is faster than mpn_sqrtrem, thus use
1374a238c70SJohn Marino it if available and if the user asked to use GMP internal functions */
1384a238c70SJohn Marino #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_ROOTREM)
1394a238c70SJohn Marino tsize = __gmpn_rootrem (rp, NULL, sp, rrsize, 2);
1404a238c70SJohn Marino #else
1414a238c70SJohn Marino tsize = mpn_sqrtrem (rp, NULL, sp, rrsize);
1424a238c70SJohn Marino #endif
1434a238c70SJohn Marino
1444a238c70SJohn Marino /* a return value of zero in mpn_sqrtrem indicates a perfect square */
1454a238c70SJohn Marino sticky = sticky0 || tsize != 0;
1464a238c70SJohn Marino
1474a238c70SJohn Marino /* truncate low bits of rp[0] */
1484a238c70SJohn Marino sticky1 = rp[0] & ((sh < GMP_NUMB_BITS) ? MPFR_LIMB_MASK(sh)
1494a238c70SJohn Marino : ~MPFR_LIMB_ZERO);
1504a238c70SJohn Marino rp[0] -= sticky1;
1514a238c70SJohn Marino
1524a238c70SJohn Marino sticky = sticky || sticky1;
1534a238c70SJohn Marino
1544a238c70SJohn Marino expr = (MPFR_GET_EXP(u) + odd_exp) / 2; /* exact */
1554a238c70SJohn Marino
1564a238c70SJohn Marino if (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDD || sticky == MPFR_LIMB_ZERO)
1574a238c70SJohn Marino {
1584a238c70SJohn Marino inexact = (sticky == MPFR_LIMB_ZERO) ? 0 : -1;
1594a238c70SJohn Marino goto truncate;
1604a238c70SJohn Marino }
1614a238c70SJohn Marino else if (rnd_mode == MPFR_RNDN)
1624a238c70SJohn Marino {
1634a238c70SJohn Marino /* if sh < GMP_NUMB_BITS, the round bit is bit (sh-1) of sticky1
1644a238c70SJohn Marino and the sticky bit is formed by the low sh-1 bits from
1654a238c70SJohn Marino sticky1, together with the sqrtrem remainder and sticky0. */
1664a238c70SJohn Marino if (sh < GMP_NUMB_BITS)
1674a238c70SJohn Marino {
1684a238c70SJohn Marino if (sticky1 & (MPFR_LIMB_ONE << (sh - 1)))
1694a238c70SJohn Marino { /* round bit is set */
1704a238c70SJohn Marino if (sticky1 == (MPFR_LIMB_ONE << (sh - 1)) && tsize == 0
1714a238c70SJohn Marino && sticky0 == 0)
1724a238c70SJohn Marino goto even_rule;
1734a238c70SJohn Marino else
1744a238c70SJohn Marino goto add_one_ulp;
1754a238c70SJohn Marino }
1764a238c70SJohn Marino else /* round bit is zero */
1774a238c70SJohn Marino goto truncate; /* with the default inexact=-1 */
1784a238c70SJohn Marino }
1794a238c70SJohn Marino else /* sh = GMP_NUMB_BITS: the round bit is the most significant bit
1804a238c70SJohn Marino of rp[0], and the remaining GMP_NUMB_BITS-1 bits contribute to
1814a238c70SJohn Marino the sticky bit */
1824a238c70SJohn Marino {
1834a238c70SJohn Marino if (sticky1 & MPFR_LIMB_HIGHBIT)
1844a238c70SJohn Marino { /* round bit is set */
1854a238c70SJohn Marino if (sticky1 == MPFR_LIMB_HIGHBIT && tsize == 0 && sticky0 == 0)
1864a238c70SJohn Marino goto even_rule;
1874a238c70SJohn Marino else
1884a238c70SJohn Marino goto add_one_ulp;
1894a238c70SJohn Marino }
1904a238c70SJohn Marino else /* round bit is zero */
1914a238c70SJohn Marino goto truncate; /* with the default inexact=-1 */
1924a238c70SJohn Marino }
1934a238c70SJohn Marino }
1944a238c70SJohn Marino else /* rnd_mode=GMP_RDNU, necessarily sticky <> 0, thus add 1 ulp */
1954a238c70SJohn Marino goto add_one_ulp;
1964a238c70SJohn Marino
1974a238c70SJohn Marino even_rule: /* has to set inexact */
1984a238c70SJohn Marino if (sh < GMP_NUMB_BITS)
1994a238c70SJohn Marino inexact = (rp[0] & (MPFR_LIMB_ONE << sh)) ? 1 : -1;
2004a238c70SJohn Marino else
2014a238c70SJohn Marino inexact = (rp[1] & MPFR_LIMB_ONE) ? 1 : -1;
2024a238c70SJohn Marino if (inexact == -1)
2034a238c70SJohn Marino goto truncate;
2044a238c70SJohn Marino /* else go through add_one_ulp */
2054a238c70SJohn Marino
2064a238c70SJohn Marino add_one_ulp:
2074a238c70SJohn Marino inexact = 1; /* always here */
2084a238c70SJohn Marino if (sh == GMP_NUMB_BITS)
2094a238c70SJohn Marino {
2104a238c70SJohn Marino rp ++;
2114a238c70SJohn Marino rsize --;
2124a238c70SJohn Marino sh = 0;
2134a238c70SJohn Marino }
2144a238c70SJohn Marino if (mpn_add_1 (rp0, rp, rsize, MPFR_LIMB_ONE << sh))
2154a238c70SJohn Marino {
2164a238c70SJohn Marino expr ++;
2174a238c70SJohn Marino rp[rsize - 1] = MPFR_LIMB_HIGHBIT;
2184a238c70SJohn Marino }
2194a238c70SJohn Marino goto end;
2204a238c70SJohn Marino
2214a238c70SJohn Marino truncate: /* inexact = 0 or -1 */
2224a238c70SJohn Marino if (sh == GMP_NUMB_BITS)
2234a238c70SJohn Marino MPN_COPY (rp0, rp + 1, rsize - 1);
2244a238c70SJohn Marino
2254a238c70SJohn Marino end:
2264a238c70SJohn Marino MPFR_ASSERTN (expr >= MPFR_EMIN_MIN && expr <= MPFR_EMAX_MAX);
2274a238c70SJohn Marino MPFR_EXP (r) = expr;
2284a238c70SJohn Marino MPFR_TMP_FREE(marker);
2294a238c70SJohn Marino
2304a238c70SJohn Marino return mpfr_check_range (r, inexact, rnd_mode);
2314a238c70SJohn Marino }
232