xref: /reactos/sdk/lib/crt/math/libm_sse2/remainder.c (revision 1734f297)
1 
2 /*******************************************************************************
3 MIT License
4 -----------
5 
6 Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this Software and associated documentaon files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 THE SOFTWARE.
25 *******************************************************************************/
26 
27 #include "libm.h"
28 #include "libm_util.h"
29 
30 #define USE_NAN_WITH_FLAGS
31 #define USE_SCALEDOUBLE_3
32 #define USE_GET_FPSW_INLINE
33 #define USE_SET_FPSW_INLINE
34 #define USE_HANDLE_ERROR
35 #include "libm_inlines.h"
36 #undef USE_NAN_WITH_FLAGS
37 #undef USE_SCALEDOUBLE_3
38 #undef USE_GET_FPSW_INLINE
39 #undef USE_SET_FPSW_INLINE
40 #undef USE_HANDLE_ERROR
41 
42 #if !defined(_CRTBLD_C9X)
43 #define _CRTBLD_C9X
44 #endif
45 
46 #include "libm_errno.h"
47 
48 /* Computes the exact product of x and y, the result being the
49    nearly doublelength number (z,zz) */
50 static inline void dekker_mul12(double x, double y,
51                 double *z, double *zz)
52 {
53   double hx, tx, hy, ty;
54   /* Split x into hx (head) and tx (tail). Do the same for y. */
55   unsigned long long u;
56   GET_BITS_DP64(x, u);
57   u &= 0xfffffffff8000000;
58   PUT_BITS_DP64(u, hx);
59   tx = x - hx;
60   GET_BITS_DP64(y, u);
61   u &= 0xfffffffff8000000;
62   PUT_BITS_DP64(u, hy);
63   ty = y - hy;
64   *z = x * y;
65   *zz = (((hx * hy - *z) + hx * ty) + tx * hy) + tx * ty;
66 }
67 
68 #pragma function(fmod)
69 #undef _FUNCNAME
70 #if defined(COMPILING_FMOD)
71 double fmod(double x, double y)
72 #define _FUNCNAME "fmod"
73 #define _OPERATION OP_FMOD
74 #else
75 double remainder(double x, double y)
76 #define _FUNCNAME "remainder"
77 #define _OPERATION OP_REM
78 #endif
79 {
80   double dx, dy, scale, w, t, v, c, cc;
81   int i, ntimes, xexp, yexp;
82   unsigned long long u, ux, uy, ax, ay, todd;
83   unsigned int sw;
84 
85   dx = x;
86   dy = y;
87 
88 
89   GET_BITS_DP64(dx, ux);
90   GET_BITS_DP64(dy, uy);
91   ax = ux & ~SIGNBIT_DP64;
92   ay = uy & ~SIGNBIT_DP64;
93   xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
94   yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
95 
96   if (xexp < 1 || xexp > BIASEDEMAX_DP64 ||
97       yexp < 1 || yexp > BIASEDEMAX_DP64)
98     {
99       /* x or y is zero, denormalized, NaN or infinity */
100       if (xexp > BIASEDEMAX_DP64)
101         {
102           /* x is NaN or infinity */
103           if (ux & MANTBITS_DP64)
104             {
105               /* x is NaN */
106               return _handle_error(_FUNCNAME, _OPERATION, ux|0x0008000000000000, _DOMAIN, 0,
107                                   EDOM, x, y, 2);
108             }
109           else
110             {
111               /* x is infinity; result is NaN */
112               return _handle_error(_FUNCNAME, _OPERATION, INDEFBITPATT_DP64, _DOMAIN,
113                                   AMD_F_INVALID, EDOM, x, y, 2);
114             }
115         }
116       else if (yexp > BIASEDEMAX_DP64)
117         {
118           /* y is NaN or infinity */
119           if (uy & MANTBITS_DP64)
120             {
121               /* y is NaN */
122               return _handle_error(_FUNCNAME, _OPERATION, uy|0x0008000000000000, _DOMAIN, 0,
123                                   EDOM, x, y, 2);
124             }
125           else
126             {
127 #ifdef _CRTBLD_C9X
128               /* C99 return for y = +-inf is x */
129               return x;
130 #else
131               /* y is infinity; result is indefinite */
132               return _handle_error(_FUNCNAME, _OPERATION, INDEFBITPATT_DP64, _DOMAIN,
133                                   AMD_F_INVALID, EDOM, x, y, 2);
134 #endif
135             }
136         }
137       else if (ax == 0x0000000000000000)
138         {
139           /* x is zero */
140           if (ay == 0x0000000000000000)
141             {
142               /* y is zero */
143               return _handle_error(_FUNCNAME, _OPERATION, INDEFBITPATT_DP64, _DOMAIN,
144                                   AMD_F_INVALID, EDOM, x, y, 2);
145             }
146           else
147             /* C99 return for x = 0 must preserve sign */
148             return x;
149         }
150       else if (ay == 0x0000000000000000)
151         {
152           /* y is zero */
153           return _handle_error(_FUNCNAME, _OPERATION, INDEFBITPATT_DP64, _DOMAIN,
154                               AMD_F_INVALID, EDOM, x, y, 2);
155         }
156 
157       /* We've exhausted all other possibilities. One or both of x and
158          y must be denormalized */
159       if (xexp < 1)
160         {
161           /* x is denormalized. Figure out its exponent. */
162           u = ax;
163           while (u < IMPBIT_DP64)
164             {
165               xexp--;
166               u <<= 1;
167             }
168         }
169       if (yexp < 1)
170         {
171           /* y is denormalized. Figure out its exponent. */
172           u = ay;
173           while (u < IMPBIT_DP64)
174             {
175               yexp--;
176               u <<= 1;
177             }
178         }
179     }
180   else if (ax == ay)
181     {
182       /* abs(x) == abs(y); return zero with the sign of x */
183       PUT_BITS_DP64(ux & SIGNBIT_DP64, dx);
184       return dx;
185     }
186 
187   /* Set x = abs(x), y = abs(y) */
188   PUT_BITS_DP64(ax, dx);
189   PUT_BITS_DP64(ay, dy);
190 
191   if (ax < ay)
192     {
193       /* abs(x) < abs(y) */
194 #if !defined(COMPILING_FMOD)
195       if (dx > 0.5*dy)
196         dx -= dy;
197 #endif
198       return x < 0.0? -dx : dx;
199     }
200 
201   /* Save the current floating-point status word. We need
202      to do this because the remainder function is always
203      exact for finite arguments, but our algorithm causes
204      the inexact flag to be raised. We therefore need to
205      restore the entry status before exiting. */
206   sw = get_fpsw_inline();
207 
208   /* Set ntimes to the number of times we need to do a
209      partial remainder. If the exponent of x is an exact multiple
210      of 52 larger than the exponent of y, and the mantissa of x is
211      less than the mantissa of y, ntimes will be one too large
212      but it doesn't matter - it just means that we'll go round
213      the loop below one extra time. */
214   if (xexp <= yexp)
215     ntimes = 0;
216   else
217     ntimes = (xexp - yexp) / 52;
218 
219   if (ntimes == 0)
220     {
221       w = dy;
222       scale = 1.0;
223     }
224   else
225     {
226       /* Set w = y * 2^(52*ntimes) */
227       w = scaleDouble_3(dy, ntimes * 52);
228 
229       /* Set scale = 2^(-52) */
230       PUT_BITS_DP64((unsigned long long)(-52 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
231                     scale);
232     }
233 
234 
235   /* Each time round the loop we compute a partial remainder.
236      This is done by subtracting a large multiple of w
237      from x each time, where w is a scaled up version of y.
238      The subtraction must be performed exactly in quad
239      precision, though the result at each stage can
240      fit exactly in a double precision number. */
241   for (i = 0; i < ntimes; i++)
242     {
243       /* t is the integer multiple of w that we will subtract.
244          We use a truncated value for t.
245 
246          N.B. w has been chosen so that the integer t will have
247          at most 52 significant bits. This is the amount by
248          which the exponent of the partial remainder dx gets reduced
249          every time around the loop. In theory we could use
250          53 bits in t, but the quad precision multiplication
251          routine dekker_mul12 does not allow us to do that because
252          it loses the last (106th) bit of its quad precision result. */
253 
254       /* Set dx = dx - w * t, where t is equal to trunc(dx/w). */
255       t = (double)(long long)(dx / w);
256       /* At this point, t may be one too large due to
257          rounding of dx/w */
258 
259       /* Compute w * t in quad precision */
260       dekker_mul12(w, t, &c, &cc);
261 
262       /* Subtract w * t from dx */
263       v = dx - c;
264       dx = v + (((dx - v) - c) - cc);
265 
266       /* If t was one too large, dx will be negative. Add back
267          one w */
268       /* It might be possible to speed up this loop by finding
269          a way to compute correctly truncated t directly from dx and w.
270          We would then avoid the need for this check on negative dx. */
271       if (dx < 0.0)
272         dx += w;
273 
274       /* Scale w down by 2^(-52) for the next iteration */
275       w *= scale;
276     }
277 
278   /* One more time */
279   /* Variable todd says whether the integer t is odd or not */
280   t = (double)(long long)(dx / w);
281   todd = ((long long)(dx / w)) & 1;
282   dekker_mul12(w, t, &c, &cc);
283   v = dx - c;
284   dx = v + (((dx - v) - c) - cc);
285   if (dx < 0.0)
286     {
287       todd = !todd;
288       dx += w;
289     }
290 
291   /* At this point, dx lies in the range [0,dy) */
292 #if !defined(COMPILING_FMOD)
293   /* For the fmod function, we're done apart from setting
294      the correct sign. */
295   /* For the remainder function, we need to adjust dx
296      so that it lies in the range (-y/2, y/2] by carefully
297      subtracting w (== dy == y) if necessary. The rigmarole
298      with todd is to get the correct sign of the result
299      when x/y lies exactly half way between two integers,
300      when we need to choose the even integer. */
301   if (ay < 0x7fd0000000000000)
302     {
303       if (dx + dx > w || (todd && (dx + dx == w)))
304         dx -= w;
305     }
306   else if (dx > 0.5 * w || (todd && (dx == 0.5 * w)))
307     dx -= w;
308 
309 #endif
310 
311   /* **** N.B. for some reason this breaks the 32 bit version
312      of remainder when compiling with optimization. */
313   /* Restore the entry status flags */
314   set_fpsw_inline(sw);
315 
316   /* Set the result sign according to input argument x */
317   return x < 0.0? -dx : dx;
318 
319 }
320