math/libm_sse2/remainderf.c

4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer/*******************************************************************************
4afb647cSTimo KreuzerMIT License
4afb647cSTimo Kreuzer-----------
4afb647cSTimo Kreuzer
4afb647cSTimo KreuzerCopyright (c) 2002-2019 Advanced Micro Devices, Inc.
4afb647cSTimo Kreuzer
4afb647cSTimo KreuzerPermission is hereby granted, free of charge, to any person obtaining a copy
4afb647cSTimo Kreuzerof this Software and associated documentaon files (the "Software"), to deal
4afb647cSTimo Kreuzerin the Software without restriction, including without limitation the rights
4afb647cSTimo Kreuzerto use, copy, modify, merge, publish, distribute, sublicense, and/or sell
4afb647cSTimo Kreuzercopies of the Software, and to permit persons to whom the Software is
4afb647cSTimo Kreuzerfurnished to do so, subject to the following conditions:
4afb647cSTimo Kreuzer
4afb647cSTimo KreuzerThe above copyright notice and this permission notice shall be included in
4afb647cSTimo Kreuzerall copies or substantial portions of the Software.
4afb647cSTimo Kreuzer
4afb647cSTimo KreuzerTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4afb647cSTimo KreuzerIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4afb647cSTimo KreuzerFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4afb647cSTimo KreuzerAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
4afb647cSTimo KreuzerLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
4afb647cSTimo KreuzerOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
4afb647cSTimo KreuzerTHE SOFTWARE.
4afb647cSTimo Kreuzer*******************************************************************************/
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer#include "libm.h"
4afb647cSTimo Kreuzer#include "libm_util.h"
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer#define USE_NANF_WITH_FLAGS
4afb647cSTimo Kreuzer#define USE_SCALEDOUBLE_1
4afb647cSTimo Kreuzer#define USE_GET_FPSW_INLINE
4afb647cSTimo Kreuzer#define USE_SET_FPSW_INLINE
4afb647cSTimo Kreuzer#define USE_HANDLE_ERRORF
4afb647cSTimo Kreuzer#include "libm_inlines.h"
4afb647cSTimo Kreuzer#undef USE_NANF_WITH_FLAGS
4afb647cSTimo Kreuzer#undef USE_SCALEDOUBLE_1
4afb647cSTimo Kreuzer#undef USE_GET_FPSW_INLINE
4afb647cSTimo Kreuzer#undef USE_SET_FPSW_INLINE
4afb647cSTimo Kreuzer#undef USE_HANDLE_ERRORF
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer#if !defined(_CRTBLD_C9X)
4afb647cSTimo Kreuzer#define _CRTBLD_C9X
4afb647cSTimo Kreuzer#endif
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer#include "libm_errno.h"
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer// Disable "C4163: not available as intrinsic function" warning that older
4afb647cSTimo Kreuzer// compilers may issue here.
4afb647cSTimo Kreuzer#pragma warning(disable:4163)
4afb647cSTimo Kreuzer#pragma function(remainderf,fmodf)
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer#undef _FUNCNAME
4afb647cSTimo Kreuzer#if defined(COMPILING_FMOD)
4afb647cSTimo Kreuzerfloat fmodf(float x, float y)
4afb647cSTimo Kreuzer#define _FUNCNAME "fmodf"
4afb647cSTimo Kreuzer#define _OPERATION OP_FMOD
4afb647cSTimo Kreuzer#else
4afb647cSTimo Kreuzerfloat remainderf(float x, float y)
4afb647cSTimo Kreuzer#define _FUNCNAME "remainderf"
4afb647cSTimo Kreuzer#define _OPERATION OP_REM
4afb647cSTimo Kreuzer#endif
4afb647cSTimo Kreuzer{
4afb647cSTimo Kreuzer  double dx, dy, scale, w, t;
4afb647cSTimo Kreuzer  int i, ntimes, xexp, yexp;
*9e8ed3f8STimo Kreuzer  unsigned long long ux, uy, ax, ay;
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  unsigned int sw;
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  dx = x;
4afb647cSTimo Kreuzer  dy = y;
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  GET_BITS_DP64(dx, ux);
4afb647cSTimo Kreuzer  GET_BITS_DP64(dy, uy);
4afb647cSTimo Kreuzer  ax = ux & ~SIGNBIT_DP64;
4afb647cSTimo Kreuzer  ay = uy & ~SIGNBIT_DP64;
4afb647cSTimo Kreuzer  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
4afb647cSTimo Kreuzer  yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  if (xexp < 1 || xexp > BIASEDEMAX_DP64 ||
4afb647cSTimo Kreuzer      yexp < 1 || yexp > BIASEDEMAX_DP64)
4afb647cSTimo Kreuzer    {
4afb647cSTimo Kreuzer      /* x or y is zero, NaN or infinity (neither x nor y can be
4afb647cSTimo Kreuzer         denormalized because we promoted from float to double) */
4afb647cSTimo Kreuzer      if (xexp > BIASEDEMAX_DP64)
4afb647cSTimo Kreuzer        {
4afb647cSTimo Kreuzer          /* x is NaN or infinity */
4afb647cSTimo Kreuzer          if (ux & MANTBITS_DP64)
4afb647cSTimo Kreuzer            {
4afb647cSTimo Kreuzer              /* x is NaN */
4afb647cSTimo Kreuzer              unsigned int ufx;
4afb647cSTimo Kreuzer              GET_BITS_SP32(x, ufx);
4afb647cSTimo Kreuzer              return _handle_errorf(_FUNCNAME, _OPERATION, ufx|0x00400000, _DOMAIN, 0,
4afb647cSTimo Kreuzer                                   EDOM, x, y, 2);
4afb647cSTimo Kreuzer            }
4afb647cSTimo Kreuzer          else
4afb647cSTimo Kreuzer            {
4afb647cSTimo Kreuzer              /* x is infinity; result is NaN */
4afb647cSTimo Kreuzer              return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN,
4afb647cSTimo Kreuzer                                   AMD_F_INVALID, EDOM, x, y, 2);
4afb647cSTimo Kreuzer            }
4afb647cSTimo Kreuzer        }
4afb647cSTimo Kreuzer      else if (yexp > BIASEDEMAX_DP64)
4afb647cSTimo Kreuzer        {
4afb647cSTimo Kreuzer          /* y is NaN or infinity */
4afb647cSTimo Kreuzer          if (uy & MANTBITS_DP64)
4afb647cSTimo Kreuzer            {
4afb647cSTimo Kreuzer              /* y is NaN */
4afb647cSTimo Kreuzer              unsigned int ufy;
4afb647cSTimo Kreuzer              GET_BITS_SP32(y, ufy);
4afb647cSTimo Kreuzer              return _handle_errorf(_FUNCNAME, _OPERATION, ufy|0x00400000, _DOMAIN, 0,
4afb647cSTimo Kreuzer                                   EDOM, x, y, 2);
4afb647cSTimo Kreuzer            }
4afb647cSTimo Kreuzer          else
4afb647cSTimo Kreuzer            {
4afb647cSTimo Kreuzer#ifdef _CRTBLD_C9X
4afb647cSTimo Kreuzer              /* C99 return for y = +-inf is x */
4afb647cSTimo Kreuzer              return x;
4afb647cSTimo Kreuzer#else
4afb647cSTimo Kreuzer              /* y is infinity; result is indefinite */
4afb647cSTimo Kreuzer              return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN,
4afb647cSTimo Kreuzer                                   AMD_F_INVALID, EDOM, x, y, 2);
4afb647cSTimo Kreuzer#endif
4afb647cSTimo Kreuzer            }
4afb647cSTimo Kreuzer        }
4afb647cSTimo Kreuzer      else if (xexp < 1)
4afb647cSTimo Kreuzer        {
4afb647cSTimo Kreuzer          /* x must be zero (cannot be denormalized) */
4afb647cSTimo Kreuzer          if (yexp < 1)
4afb647cSTimo Kreuzer            {
4afb647cSTimo Kreuzer              /* y must be zero (cannot be denormalized) */
4afb647cSTimo Kreuzer              return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN,
4afb647cSTimo Kreuzer                                   AMD_F_INVALID, EDOM, x, y, 2);
4afb647cSTimo Kreuzer            }
4afb647cSTimo Kreuzer          else
4afb647cSTimo Kreuzer              /* C99 return for x = 0 must preserve sign */
4afb647cSTimo Kreuzer              return x;
4afb647cSTimo Kreuzer        }
4afb647cSTimo Kreuzer      else
4afb647cSTimo Kreuzer        {
4afb647cSTimo Kreuzer          /* y must be zero */
4afb647cSTimo Kreuzer          return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN,
4afb647cSTimo Kreuzer                               AMD_F_INVALID, EDOM, x, y, 2);
4afb647cSTimo Kreuzer        }
4afb647cSTimo Kreuzer    }
4afb647cSTimo Kreuzer  else if (ax == ay)
4afb647cSTimo Kreuzer    {
4afb647cSTimo Kreuzer      /* abs(x) == abs(y); return zero with the sign of x */
4afb647cSTimo Kreuzer      PUT_BITS_DP64(ux & SIGNBIT_DP64, dx);
4afb647cSTimo Kreuzer      return (float)dx;
4afb647cSTimo Kreuzer    }
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  /* Set dx = abs(x), dy = abs(y) */
4afb647cSTimo Kreuzer  PUT_BITS_DP64(ax, dx);
4afb647cSTimo Kreuzer  PUT_BITS_DP64(ay, dy);
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  if (ax < ay)
4afb647cSTimo Kreuzer    {
4afb647cSTimo Kreuzer      /* abs(x) < abs(y) */
4afb647cSTimo Kreuzer#if !defined(COMPILING_FMOD)
4afb647cSTimo Kreuzer      if (dx > 0.5*dy)
4afb647cSTimo Kreuzer        dx -= dy;
4afb647cSTimo Kreuzer#endif
4afb647cSTimo Kreuzer      return (float)(x < 0.0? -dx : dx);
4afb647cSTimo Kreuzer    }
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  /* Save the current floating-point status word. We need
4afb647cSTimo Kreuzer     to do this because the remainder function is always
4afb647cSTimo Kreuzer     exact for finite arguments, but our algorithm causes
4afb647cSTimo Kreuzer     the inexact flag to be raised. We therefore need to
4afb647cSTimo Kreuzer     restore the entry status before exiting. */
4afb647cSTimo Kreuzer  sw = get_fpsw_inline();
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  /* Set ntimes to the number of times we need to do a
4afb647cSTimo Kreuzer     partial remainder. If the exponent of x is an exact multiple
4afb647cSTimo Kreuzer     of 24 larger than the exponent of y, and the mantissa of x is
4afb647cSTimo Kreuzer     less than the mantissa of y, ntimes will be one too large
4afb647cSTimo Kreuzer     but it doesn't matter - it just means that we'll go round
4afb647cSTimo Kreuzer     the loop below one extra time. */
4afb647cSTimo Kreuzer  if (xexp <= yexp)
4afb647cSTimo Kreuzer    {
4afb647cSTimo Kreuzer      ntimes = 0;
4afb647cSTimo Kreuzer      w = dy;
4afb647cSTimo Kreuzer      scale = 1.0;
4afb647cSTimo Kreuzer    }
4afb647cSTimo Kreuzer  else
4afb647cSTimo Kreuzer    {
4afb647cSTimo Kreuzer      ntimes = (xexp - yexp) / 24;
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer      /* Set w = y * 2^(24*ntimes) */
*9e8ed3f8STimo Kreuzer      PUT_BITS_DP64((unsigned long long)(ntimes * 24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
4afb647cSTimo Kreuzer                    scale);
4afb647cSTimo Kreuzer      w = scale * dy;
4afb647cSTimo Kreuzer      /* Set scale = 2^(-24) */
*9e8ed3f8STimo Kreuzer      PUT_BITS_DP64((unsigned long long)(-24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
4afb647cSTimo Kreuzer                    scale);
4afb647cSTimo Kreuzer    }
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  /* Each time round the loop we compute a partial remainder.
4afb647cSTimo Kreuzer     This is done by subtracting a large multiple of w
4afb647cSTimo Kreuzer     from x each time, where w is a scaled up version of y.
4afb647cSTimo Kreuzer     The subtraction can be performed exactly when performed
4afb647cSTimo Kreuzer     in double precision, and the result at each stage can
4afb647cSTimo Kreuzer     fit exactly in a single precision number. */
4afb647cSTimo Kreuzer  for (i = 0; i < ntimes; i++)
4afb647cSTimo Kreuzer    {
4afb647cSTimo Kreuzer      /* t is the integer multiple of w that we will subtract.
4afb647cSTimo Kreuzer         We use a truncated value for t. */
4afb647cSTimo Kreuzer      t = (double)((int)(dx / w));
4afb647cSTimo Kreuzer      dx -= w * t;
4afb647cSTimo Kreuzer      /* Scale w down by 2^(-24) for the next iteration */
4afb647cSTimo Kreuzer      w *= scale;
4afb647cSTimo Kreuzer    }
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  /* One more time */
4afb647cSTimo Kreuzer#if defined(COMPILING_FMOD)
4afb647cSTimo Kreuzer  t = (double)((int)(dx / w));
4afb647cSTimo Kreuzer  dx -= w * t;
4afb647cSTimo Kreuzer#else
4afb647cSTimo Kreuzer {
4afb647cSTimo Kreuzer  unsigned int todd;
4afb647cSTimo Kreuzer  /* Variable todd says whether the integer t is odd or not */
4afb647cSTimo Kreuzer  t = (double)((int)(dx / w));
4afb647cSTimo Kreuzer  todd = ((int)(dx / w)) & 1;
4afb647cSTimo Kreuzer  dx -= w * t;
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  /* At this point, dx lies in the range [0,dy) */
4afb647cSTimo Kreuzer  /* For the remainder function, we need to adjust dx
4afb647cSTimo Kreuzer     so that it lies in the range (-y/2, y/2] by carefully
4afb647cSTimo Kreuzer     subtracting w (== dy == y) if necessary. */
4afb647cSTimo Kreuzer  if (dx > 0.5 * w || ((dx == 0.5 * w) && todd))
4afb647cSTimo Kreuzer    dx -= w;
4afb647cSTimo Kreuzer }
4afb647cSTimo Kreuzer#endif
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  /* **** N.B. for some reason this breaks the 32 bit version
4afb647cSTimo Kreuzer     of remainder when compiling with optimization. */
4afb647cSTimo Kreuzer  /* Restore the entry status flags */
4afb647cSTimo Kreuzer  set_fpsw_inline(sw);
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer  /* Set the result sign according to input argument x */
4afb647cSTimo Kreuzer  return (float)(x < 0.0? -dx : dx);
4afb647cSTimo Kreuzer
4afb647cSTimo Kreuzer}