1 /******************************************************************************
2   Copyright (c) 2007-2011, Intel Corp.
3   All rights reserved.
4 
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions are met:
7 
8     * Redistributions of source code must retain the above copyright notice,
9       this list of conditions and the following disclaimer.
10     * Redistributions in binary form must reproduce the above copyright
11       notice, this list of conditions and the following disclaimer in the
12       documentation and/or other materials provided with the distribution.
13     * Neither the name of Intel Corporation nor the names of its contributors
14       may be used to endorse or promote products derived from this software
15       without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27   THE POSSIBILITY OF SUCH DAMAGE.
28 ******************************************************************************/
29 
30 #include "bid_internal.h"
31 
32 /*****************************************************************************
33  *  BID64_nearbyintd
34  ****************************************************************************/
35 
36 BID_TYPE0_FUNCTION_ARGTYPE1(BID_UINT64, bid64_nearbyint, BID_UINT64, x)
37 
38   BID_UINT64 res = 0xbaddbaddbaddbaddull;
39   BID_UINT64 x_sign;
40   int exp;			// unbiased exponent
41   // Note: C1 represents the significand (BID_UINT64)
42   BID_UI64DOUBLE tmp1;
43   int x_nr_bits;
44   int q, ind, shift;
45   BID_UINT64 C1;
46   // BID_UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits
47   BID_UINT128 fstar = { {0x0ull, 0x0ull} };
48   BID_UINT128 P128;
49 
50   x_sign = x & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
51 
52   // check for NaNs and infinities
53   if ((x & MASK_NAN) == MASK_NAN) {	// check for NaN
54     if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
55       x = x & 0xfe00000000000000ull;	// clear G6-G12 and the payload bits
56     else
57       x = x & 0xfe03ffffffffffffull;	// clear G6-G12
58     if ((x & MASK_SNAN) == MASK_SNAN) {	// SNaN
59       // set invalid flag
60       *pfpsf |= BID_INVALID_EXCEPTION;
61       // return quiet (SNaN)
62       res = x & 0xfdffffffffffffffull;
63     } else {	// QNaN
64       res = x;
65     }
66     BID_RETURN (res);
67   } else if ((x & MASK_INF) == MASK_INF) {	// check for Infinity
68     res = x_sign | 0x7800000000000000ull;
69     BID_RETURN (res);
70   }
71   // unpack x
72   if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
73     // if the steering bits are 11 (condition will be 0), then
74     // the exponent is G[0:w+1]
75     exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
76     C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
77     if (C1 > 9999999999999999ull) {	// non-canonical
78       C1 = 0;
79     }
80   } else {	// if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
81     exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
82     C1 = (x & MASK_BINARY_SIG1);
83   }
84 
85   // if x is 0 or non-canonical return 0 preserving the sign bit and
86   // the preferred exponent of MAX(Q(x), 0)
87   if (C1 == 0) {
88     if (exp < 0)
89       exp = 0;
90     res = x_sign | (((BID_UINT64) exp + 398) << 53);
91     BID_RETURN (res);
92   }
93   // x is a finite non-zero number (not 0, non-canonical, or special)
94 
95   switch (rnd_mode) {
96   case BID_ROUNDING_TO_NEAREST:
97   case BID_ROUNDING_TIES_AWAY:
98     // return 0 if (exp <= -(p+1))
99     if (exp <= -17) {
100       res = x_sign | 0x31c0000000000000ull;
101       BID_RETURN (res);
102     }
103     break;
104   case BID_ROUNDING_DOWN:
105     // return 0 if (exp <= -p)
106     if (exp <= -16) {
107       if (x_sign) {
108 	res = 0xb1c0000000000001ull;
109       } else {
110 	res = 0x31c0000000000000ull;
111       }
112       BID_RETURN (res);
113     }
114     break;
115   case BID_ROUNDING_UP:
116     // return 0 if (exp <= -p)
117     if (exp <= -16) {
118       if (x_sign) {
119 	res = 0xb1c0000000000000ull;
120       } else {
121 	res = 0x31c0000000000001ull;
122       }
123       BID_RETURN (res);
124     }
125     break;
126   case BID_ROUNDING_TO_ZERO:
127     // return 0 if (exp <= -p)
128     if (exp <= -16) {
129       res = x_sign | 0x31c0000000000000ull;
130       BID_RETURN (res);
131     }
132     break;
133   }	// end switch ()
134 
135   // q = nr. of decimal digits in x (1 <= q <= 54)
136   //  determine first the nr. of bits in x
137   if (C1 >= 0x0020000000000000ull) {	// x >= 2^53
138     q = 16;
139   } else {	// if x < 2^53
140     tmp1.d = (double) C1;	// exact conversion
141     x_nr_bits =
142       1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
143     q = bid_nr_digits[x_nr_bits - 1].digits;
144     if (q == 0) {
145       q = bid_nr_digits[x_nr_bits - 1].digits1;
146       if (C1 >= bid_nr_digits[x_nr_bits - 1].threshold_lo)
147 	q++;
148     }
149   }
150 
151   if (exp >= 0) {	// -exp <= 0
152     // the argument is an integer already
153     res = x;
154     BID_RETURN (res);
155   }
156 
157   switch (rnd_mode) {
158   case BID_ROUNDING_TO_NEAREST:
159     if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
160       // need to shift right -exp digits from the coefficient; exp will be 0
161       ind = -exp;	// 1 <= ind <= 16; ind is a synonym for 'x'
162       // chop off ind digits from the lower part of C1
163       // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
164       // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
165       C1 = C1 + bid_midpoint64[ind - 1];
166       // calculate C* and f*
167       // C* is actually floor(C*) in this case
168       // C* and f* need shifting and masking, as shown by
169       // bid_shiftright128[] and bid_maskhigh128[]
170       // 1 <= x <= 16
171       // kx = 10^(-x) = bid_ten2mk64[ind - 1]
172       // C* = (C1 + 1/2 * 10^x) * 10^(-x)
173       // the approximation of 10^(-x) was rounded up to 64 bits
174       __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]);
175 
176       // if (0 < f* < 10^(-x)) then the result is a midpoint
177       //   if floor(C*) is even then C* = floor(C*) - logical right
178       //       shift; C* has p decimal digits, correct by Prop. 1)
179       //   else if floor(C*) is odd C* = floor(C*)-1 (logical right
180       //       shift; C* has p decimal digits, correct by Pr. 1)
181       // else
182       //   C* = floor(C*) (logical right shift; C has p decimal digits,
183       //       correct by Property 1)
184       // n = C* * 10^(e+x)
185 
186       if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
187 	res = P128.w[1];
188 	fstar.w[1] = 0;
189 	fstar.w[0] = P128.w[0];
190       } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
191 	shift = bid_shiftright128[ind - 1];	// 3 <= shift <= 63
192 	res = (P128.w[1] >> shift);
193 	fstar.w[1] = P128.w[1] & bid_maskhigh128[ind - 1];
194 	fstar.w[0] = P128.w[0];
195       }
196       // if (0 < f* < 10^(-x)) then the result is a midpoint
197       // since round_to_even, subtract 1 if current result is odd
198       if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0)
199 	  && (fstar.w[0] < bid_ten2mk64[ind - 1])) {
200 	res--;
201       }
202       // set exponent to zero as it was negative before.
203       res = x_sign | 0x31c0000000000000ull | res;
204       BID_RETURN (res);
205     } else {	// if exp < 0 and q + exp < 0
206       // the result is +0 or -0
207       res = x_sign | 0x31c0000000000000ull;
208       BID_RETURN (res);
209     }
210     break;
211   case BID_ROUNDING_TIES_AWAY:
212     if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
213       // need to shift right -exp digits from the coefficient; exp will be 0
214       ind = -exp;	// 1 <= ind <= 16; ind is a synonym for 'x'
215       // chop off ind digits from the lower part of C1
216       // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
217       // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
218       C1 = C1 + bid_midpoint64[ind - 1];
219       // calculate C* and f*
220       // C* is actually floor(C*) in this case
221       // C* and f* need shifting and masking, as shown by
222       // bid_shiftright128[] and bid_maskhigh128[]
223       // 1 <= x <= 16
224       // kx = 10^(-x) = bid_ten2mk64[ind - 1]
225       // C* = (C1 + 1/2 * 10^x) * 10^(-x)
226       // the approximation of 10^(-x) was rounded up to 64 bits
227       __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]);
228 
229       // if (0 < f* < 10^(-x)) then the result is a midpoint
230       //   C* = floor(C*) - logical right shift; C* has p decimal digits,
231       //       correct by Prop. 1)
232       // else
233       //   C* = floor(C*) (logical right shift; C has p decimal digits,
234       //       correct by Property 1)
235       // n = C* * 10^(e+x)
236 
237       if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
238 	res = P128.w[1];
239       } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
240 	shift = bid_shiftright128[ind - 1];	// 3 <= shift <= 63
241 	res = (P128.w[1] >> shift);
242       }
243       res = x_sign | 0x31c0000000000000ull | res;
244       BID_RETURN (res);
245     } else {	// if exp < 0 and q + exp < 0
246       // the result is +0 or -0
247       res = x_sign | 0x31c0000000000000ull;
248       BID_RETURN (res);
249     }
250     break;
251   case BID_ROUNDING_DOWN:
252     if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
253       // need to shift right -exp digits from the coefficient; exp will be 0
254       ind = -exp;	// 1 <= ind <= 16; ind is a synonym for 'x'
255       // chop off ind digits from the lower part of C1
256       // C1 fits in 64 bits
257       // calculate C* and f*
258       // C* is actually floor(C*) in this case
259       // C* and f* need shifting and masking, as shown by
260       // bid_shiftright128[] and bid_maskhigh128[]
261       // 1 <= x <= 16
262       // kx = 10^(-x) = bid_ten2mk64[ind - 1]
263       // C* = C1 * 10^(-x)
264       // the approximation of 10^(-x) was rounded up to 64 bits
265       __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]);
266 
267       // C* = floor(C*) (logical right shift; C has p decimal digits,
268       //       correct by Property 1)
269       // if (0 < f* < 10^(-x)) then the result is exact
270       // n = C* * 10^(e+x)
271 
272       if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
273 	res = P128.w[1];
274 	fstar.w[1] = 0;
275 	fstar.w[0] = P128.w[0];
276       } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
277 	shift = bid_shiftright128[ind - 1];	// 3 <= shift <= 63
278 	res = (P128.w[1] >> shift);
279 	fstar.w[1] = P128.w[1] & bid_maskhigh128[ind - 1];
280 	fstar.w[0] = P128.w[0];
281       }
282       // if (f* > 10^(-x)) then the result is inexact
283       if ((fstar.w[1] != 0) || (fstar.w[0] >= bid_ten2mk64[ind - 1])) {
284 	if (x_sign) {
285 	  // if negative and not exact, increment magnitude
286 	  res++;
287 	}
288       }
289       // set exponent to zero as it was negative before.
290       res = x_sign | 0x31c0000000000000ull | res;
291       BID_RETURN (res);
292     } else {	// if exp < 0 and q + exp <= 0
293       // the result is +0 or -1
294       if (x_sign) {
295 	res = 0xb1c0000000000001ull;
296       } else {
297 	res = 0x31c0000000000000ull;
298       }
299       BID_RETURN (res);
300     }
301     break;
302   case BID_ROUNDING_UP:
303     if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
304       // need to shift right -exp digits from the coefficient; exp will be 0
305       ind = -exp;	// 1 <= ind <= 16; ind is a synonym for 'x'
306       // chop off ind digits from the lower part of C1
307       // C1 fits in 64 bits
308       // calculate C* and f*
309       // C* is actually floor(C*) in this case
310       // C* and f* need shifting and masking, as shown by
311       // bid_shiftright128[] and bid_maskhigh128[]
312       // 1 <= x <= 16
313       // kx = 10^(-x) = bid_ten2mk64[ind - 1]
314       // C* = C1 * 10^(-x)
315       // the approximation of 10^(-x) was rounded up to 64 bits
316       __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]);
317 
318       // C* = floor(C*) (logical right shift; C has p decimal digits,
319       //       correct by Property 1)
320       // if (0 < f* < 10^(-x)) then the result is exact
321       // n = C* * 10^(e+x)
322 
323       if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
324 	res = P128.w[1];
325 	fstar.w[1] = 0;
326 	fstar.w[0] = P128.w[0];
327       } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
328 	shift = bid_shiftright128[ind - 1];	// 3 <= shift <= 63
329 	res = (P128.w[1] >> shift);
330 	fstar.w[1] = P128.w[1] & bid_maskhigh128[ind - 1];
331 	fstar.w[0] = P128.w[0];
332       }
333       // if (f* > 10^(-x)) then the result is inexact
334       if ((fstar.w[1] != 0) || (fstar.w[0] >= bid_ten2mk64[ind - 1])) {
335 	if (!x_sign) {
336 	  // if positive and not exact, increment magnitude
337 	  res++;
338 	}
339       }
340       // set exponent to zero as it was negative before.
341       res = x_sign | 0x31c0000000000000ull | res;
342       BID_RETURN (res);
343     } else {	// if exp < 0 and q + exp <= 0
344       // the result is -0 or +1
345       if (x_sign) {
346 	res = 0xb1c0000000000000ull;
347       } else {
348 	res = 0x31c0000000000001ull;
349       }
350       BID_RETURN (res);
351     }
352     break;
353   case BID_ROUNDING_TO_ZERO:
354     if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
355       // need to shift right -exp digits from the coefficient; exp will be 0
356       ind = -exp;	// 1 <= ind <= 16; ind is a synonym for 'x'
357       // chop off ind digits from the lower part of C1
358       // C1 fits in 127 bits
359       // calculate C* and f*
360       // C* is actually floor(C*) in this case
361       // C* and f* need shifting and masking, as shown by
362       // bid_shiftright128[] and bid_maskhigh128[]
363       // 1 <= x <= 16
364       // kx = 10^(-x) = bid_ten2mk64[ind - 1]
365       // C* = C1 * 10^(-x)
366       // the approximation of 10^(-x) was rounded up to 64 bits
367       __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]);
368 
369       // C* = floor(C*) (logical right shift; C has p decimal digits,
370       //       correct by Property 1)
371       // if (0 < f* < 10^(-x)) then the result is exact
372       // n = C* * 10^(e+x)
373 
374       if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
375 	res = P128.w[1];
376       } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
377 	shift = bid_shiftright128[ind - 1];	// 3 <= shift <= 63
378 	res = (P128.w[1] >> shift);
379       }
380       // set exponent to zero as it was negative before.
381       res = x_sign | 0x31c0000000000000ull | res;
382       BID_RETURN (res);
383     } else {	// if exp < 0 and q + exp < 0
384       // the result is +0 or -0
385       res = x_sign | 0x31c0000000000000ull;
386       BID_RETURN (res);
387     }
388     break;
389   }	// end switch ()
390   BID_RETURN (res);
391 }
392 
393