1 
2 /*******************************************************************************
3 MIT License
4 -----------
5 
6 Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this Software and associated documentaon files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 THE SOFTWARE.
25 *******************************************************************************/
26 
27 #include "libm.h"
28 #include "libm_util.h"
29 
30 
31 /* Given positive argument x, reduce it to the range [-pi/4,pi/4] using
32    extra precision, and return the result in r, rr.
33    Return value "region" tells how many lots of pi/2 were subtracted
34    from x to put it in the range [-pi/4,pi/4], mod 4. */
35 void __remainder_piby2(double x, double *r, double *rr, int *region)
36 {
37       /* This method simulates multi-precision floating-point
38          arithmetic and is accurate for all 1 <= x < infinity */
39       static const double
40         piby2_lead = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */
41         piby2_part1 = 1.57079631090164184570e+00, /* 0x3ff921fb50000000 */
42         piby2_part2 = 1.58932547122958567343e-08, /* 0x3e5110b460000000 */
43         piby2_part3 = 6.12323399573676480327e-17; /* 0x3c91a62633145c06 */
44       const int bitsper = 10;
45       unsigned long long res[500];
46       unsigned long long ux, u, carry, mask, mant, highbitsrr;
47       int first, last, i, rexp, xexp, resexp, ltb, determ;
48       double xx, t;
49       static unsigned long long pibits[] =
50       {
51         0,    0,    0,    0,    0,    0,
52         162,  998,   54,  915,  580,   84,  671,  777,  855,  839,
53         851,  311,  448,  877,  553,  358,  316,  270,  260,  127,
54         593,  398,  701,  942,  965,  390,  882,  283,  570,  265,
55         221,  184,    6,  292,  750,  642,  465,  584,  463,  903,
56         491,  114,  786,  617,  830,  930,   35,  381,  302,  749,
57         72,  314,  412,  448,  619,  279,  894,  260,  921,  117,
58         569,  525,  307,  637,  156,  529,  504,  751,  505,  160,
59         945, 1022,  151, 1023,  480,  358,   15,  956,  753,   98,
60         858,   41,  721,  987,  310,  507,  242,  498,  777,  733,
61         244,  399,  870,  633,  510,  651,  373,  158,  940,  506,
62         997,  965,  947,  833,  825,  990,  165,  164,  746,  431,
63         949, 1004,  287,  565,  464,  533,  515,  193,  111,  798
64       };
65 
66       GET_BITS_DP64(x, ux);
67 
68 
69       xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
70       ux = (ux & MANTBITS_DP64) | IMPBIT_DP64;
71 
72       /* Now ux is the mantissa bit pattern of x as a long integer */
73       carry = 0;
74       mask = 1;
75       mask = (mask << bitsper) - 1;
76 
77       /* Set first and last to the positions of the first
78          and last chunks of 2/pi that we need */
79       first = xexp / bitsper;
80       resexp = xexp - first * bitsper;
81       /* 180 is the theoretical maximum number of bits (actually
82          175 for IEEE double precision) that we need to extract
83          from the middle of 2/pi to compute the reduced argument
84          accurately enough for our purposes */
85       last = first + 180 / bitsper;
86 
87       /* Do a long multiplication of the bits of 2/pi by the
88          integer mantissa */
89 #if 0
90       for (i = last; i >= first; i--)
91         {
92           u = pibits[i] * ux + carry;
93           res[i - first] = u & mask;
94           carry = u >> bitsper;
95         }
96       res[last - first + 1] = 0;
97 #else
98       /* Unroll the loop. This is only correct because we know
99          that bitsper is fixed as 10. */
100       res[19] = 0;
101       u = pibits[last] * ux;
102       res[18] = u & mask;
103       carry = u >> bitsper;
104       u = pibits[last-1] * ux + carry;
105       res[17] = u & mask;
106       carry = u >> bitsper;
107       u = pibits[last-2] * ux + carry;
108       res[16] = u & mask;
109       carry = u >> bitsper;
110       u = pibits[last-3] * ux + carry;
111       res[15] = u & mask;
112       carry = u >> bitsper;
113       u = pibits[last-4] * ux + carry;
114       res[14] = u & mask;
115       carry = u >> bitsper;
116       u = pibits[last-5] * ux + carry;
117       res[13] = u & mask;
118       carry = u >> bitsper;
119       u = pibits[last-6] * ux + carry;
120       res[12] = u & mask;
121       carry = u >> bitsper;
122       u = pibits[last-7] * ux + carry;
123       res[11] = u & mask;
124       carry = u >> bitsper;
125       u = pibits[last-8] * ux + carry;
126       res[10] = u & mask;
127       carry = u >> bitsper;
128       u = pibits[last-9] * ux + carry;
129       res[9] = u & mask;
130       carry = u >> bitsper;
131       u = pibits[last-10] * ux + carry;
132       res[8] = u & mask;
133       carry = u >> bitsper;
134       u = pibits[last-11] * ux + carry;
135       res[7] = u & mask;
136       carry = u >> bitsper;
137       u = pibits[last-12] * ux + carry;
138       res[6] = u & mask;
139       carry = u >> bitsper;
140       u = pibits[last-13] * ux + carry;
141       res[5] = u & mask;
142       carry = u >> bitsper;
143       u = pibits[last-14] * ux + carry;
144       res[4] = u & mask;
145       carry = u >> bitsper;
146       u = pibits[last-15] * ux + carry;
147       res[3] = u & mask;
148       carry = u >> bitsper;
149       u = pibits[last-16] * ux + carry;
150       res[2] = u & mask;
151       carry = u >> bitsper;
152       u = pibits[last-17] * ux + carry;
153       res[1] = u & mask;
154       carry = u >> bitsper;
155       u = pibits[last-18] * ux + carry;
156       res[0] = u & mask;
157 #endif
158 
159 
160       /* Reconstruct the result */
161       ltb = (int)((((res[0] << bitsper) | res[1])
162                    >> (bitsper - 1 - resexp)) & 7);
163 
164       /* determ says whether the fractional part is >= 0.5 */
165       determ = ltb & 1;
166 
167 
168       i = 1;
169       if (determ)
170         {
171           /* The mantissa is >= 0.5. We want to subtract it
172              from 1.0 by negating all the bits */
173           *region = ((ltb >> 1) + 1) & 3;
174           mant = 1;
175           mant = ~(res[1]) & ((mant << (bitsper - resexp)) - 1);
176           while (mant < 0x0020000000000000)
177             {
178               i++;
179               mant = (mant << bitsper) | (~(res[i]) & mask);
180             }
181           highbitsrr = ~(res[i + 1]) << (64 - bitsper);
182         }
183       else
184         {
185           *region = (ltb >> 1);
186           mant = 1;
187           mant = res[1] & ((mant << (bitsper - resexp)) - 1);
188           while (mant < 0x0020000000000000)
189             {
190               i++;
191               mant = (mant << bitsper) | res[i];
192             }
193           highbitsrr = res[i + 1] << (64 - bitsper);
194         }
195 
196       rexp = 52 + resexp - i * bitsper;
197 
198       while (mant >= 0x0020000000000000)
199         {
200           rexp++;
201           highbitsrr = (highbitsrr >> 1) | ((mant & 1) << 63);
202           mant >>= 1;
203         }
204 
205 
206       /* Put the result exponent rexp onto the mantissa pattern */
207       u = ((unsigned long long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64;
208       ux = (mant & MANTBITS_DP64) | u;
209       if (determ)
210         /* If we negated the mantissa we negate x too */
211         ux |= SIGNBIT_DP64;
212       PUT_BITS_DP64(ux, x);
213 
214       /* Create the bit pattern for rr */
215       highbitsrr >>= 12; /* Note this is shifted one place too far */
216       u = ((unsigned long long)rexp + EXPBIAS_DP64 - 53) << EXPSHIFTBITS_DP64;
217       PUT_BITS_DP64(u, t);
218       u |= highbitsrr;
219       PUT_BITS_DP64(u, xx);
220 
221       /* Subtract the implicit bit we accidentally added */
222       xx -= t;
223       /* Set the correct sign, and double to account for the
224          "one place too far" shift */
225       if (determ)
226         xx *= -2.0;
227       else
228         xx *= 2.0;
229 
230 
231       /* (x,xx) is an extra-precise version of the fractional part of
232          x * 2 / pi. Multiply (x,xx) by pi/2 in extra precision
233          to get the reduced argument (r,rr). */
234       {
235         double hx, tx, c, cc;
236         /* Split x into hx (head) and tx (tail) */
237         GET_BITS_DP64(x, ux);
238         ux &= 0xfffffffff8000000;
239         PUT_BITS_DP64(ux, hx);
240         tx = x - hx;
241 
242         c = piby2_lead * x;
243         cc = ((((piby2_part1 * hx - c) + piby2_part1 * tx) +
244                piby2_part2 * hx) + piby2_part2 * tx) +
245           (piby2_lead * xx + piby2_part3 * x);
246         *r = c + cc;
247         *rr = (c - *r) + cc;
248       }
249 
250   return;
251 }
252