1 /****************************************************************************** 2 Copyright (c) 2007-2011, Intel Corp. 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of Intel Corporation nor the names of its contributors 14 may be used to endorse or promote products derived from this software 15 without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 THE POSSIBILITY OF SUCH DAMAGE. 28 ******************************************************************************/ 29 30 #include "bid_internal.h" 31 32 /***************************************************************************** 33 * BID64_nearbyintd 34 ****************************************************************************/ 35 36 BID_TYPE0_FUNCTION_ARGTYPE1(BID_UINT64, bid64_nearbyint, BID_UINT64, x) 37 38 BID_UINT64 res = 0xbaddbaddbaddbaddull; 39 BID_UINT64 x_sign; 40 int exp; // unbiased exponent 41 // Note: C1 represents the significand (BID_UINT64) 42 BID_UI64DOUBLE tmp1; 43 int x_nr_bits; 44 int q, ind, shift; 45 BID_UINT64 C1; 46 // BID_UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits 47 BID_UINT128 fstar = { {0x0ull, 0x0ull} }; 48 BID_UINT128 P128; 49 50 x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 51 52 // check for NaNs and infinities 53 if ((x & MASK_NAN) == MASK_NAN) { // check for NaN 54 if ((x & 0x0003ffffffffffffull) > 999999999999999ull) 55 x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits 56 else 57 x = x & 0xfe03ffffffffffffull; // clear G6-G12 58 if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN 59 // set invalid flag 60 *pfpsf |= BID_INVALID_EXCEPTION; 61 // return quiet (SNaN) 62 res = x & 0xfdffffffffffffffull; 63 } else { // QNaN 64 res = x; 65 } 66 BID_RETURN (res); 67 } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity 68 res = x_sign | 0x7800000000000000ull; 69 BID_RETURN (res); 70 } 71 // unpack x 72 if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { 73 // if the steering bits are 11 (condition will be 0), then 74 // the exponent is G[0:w+1] 75 exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; 76 C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; 77 if (C1 > 9999999999999999ull) { // non-canonical 78 C1 = 0; 79 } 80 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) 81 exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; 82 C1 = (x & MASK_BINARY_SIG1); 83 } 84 85 // if x is 0 or non-canonical return 0 preserving the sign bit and 86 // the preferred exponent of MAX(Q(x), 0) 87 if (C1 == 0) { 88 if (exp < 0) 89 exp = 0; 90 res = x_sign | (((BID_UINT64) exp + 398) << 53); 91 BID_RETURN (res); 92 } 93 // x is a finite non-zero number (not 0, non-canonical, or special) 94 95 switch (rnd_mode) { 96 case BID_ROUNDING_TO_NEAREST: 97 case BID_ROUNDING_TIES_AWAY: 98 // return 0 if (exp <= -(p+1)) 99 if (exp <= -17) { 100 res = x_sign | 0x31c0000000000000ull; 101 BID_RETURN (res); 102 } 103 break; 104 case BID_ROUNDING_DOWN: 105 // return 0 if (exp <= -p) 106 if (exp <= -16) { 107 if (x_sign) { 108 res = 0xb1c0000000000001ull; 109 } else { 110 res = 0x31c0000000000000ull; 111 } 112 BID_RETURN (res); 113 } 114 break; 115 case BID_ROUNDING_UP: 116 // return 0 if (exp <= -p) 117 if (exp <= -16) { 118 if (x_sign) { 119 res = 0xb1c0000000000000ull; 120 } else { 121 res = 0x31c0000000000001ull; 122 } 123 BID_RETURN (res); 124 } 125 break; 126 case BID_ROUNDING_TO_ZERO: 127 // return 0 if (exp <= -p) 128 if (exp <= -16) { 129 res = x_sign | 0x31c0000000000000ull; 130 BID_RETURN (res); 131 } 132 break; 133 } // end switch () 134 135 // q = nr. of decimal digits in x (1 <= q <= 54) 136 // determine first the nr. of bits in x 137 if (C1 >= 0x0020000000000000ull) { // x >= 2^53 138 q = 16; 139 } else { // if x < 2^53 140 tmp1.d = (double) C1; // exact conversion 141 x_nr_bits = 142 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); 143 q = bid_nr_digits[x_nr_bits - 1].digits; 144 if (q == 0) { 145 q = bid_nr_digits[x_nr_bits - 1].digits1; 146 if (C1 >= bid_nr_digits[x_nr_bits - 1].threshold_lo) 147 q++; 148 } 149 } 150 151 if (exp >= 0) { // -exp <= 0 152 // the argument is an integer already 153 res = x; 154 BID_RETURN (res); 155 } 156 157 switch (rnd_mode) { 158 case BID_ROUNDING_TO_NEAREST: 159 if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q 160 // need to shift right -exp digits from the coefficient; exp will be 0 161 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' 162 // chop off ind digits from the lower part of C1 163 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits 164 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate 165 C1 = C1 + bid_midpoint64[ind - 1]; 166 // calculate C* and f* 167 // C* is actually floor(C*) in this case 168 // C* and f* need shifting and masking, as shown by 169 // bid_shiftright128[] and bid_maskhigh128[] 170 // 1 <= x <= 16 171 // kx = 10^(-x) = bid_ten2mk64[ind - 1] 172 // C* = (C1 + 1/2 * 10^x) * 10^(-x) 173 // the approximation of 10^(-x) was rounded up to 64 bits 174 __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]); 175 176 // if (0 < f* < 10^(-x)) then the result is a midpoint 177 // if floor(C*) is even then C* = floor(C*) - logical right 178 // shift; C* has p decimal digits, correct by Prop. 1) 179 // else if floor(C*) is odd C* = floor(C*)-1 (logical right 180 // shift; C* has p decimal digits, correct by Pr. 1) 181 // else 182 // C* = floor(C*) (logical right shift; C has p decimal digits, 183 // correct by Property 1) 184 // n = C* * 10^(e+x) 185 186 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 187 res = P128.w[1]; 188 fstar.w[1] = 0; 189 fstar.w[0] = P128.w[0]; 190 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 191 shift = bid_shiftright128[ind - 1]; // 3 <= shift <= 63 192 res = (P128.w[1] >> shift); 193 fstar.w[1] = P128.w[1] & bid_maskhigh128[ind - 1]; 194 fstar.w[0] = P128.w[0]; 195 } 196 // if (0 < f* < 10^(-x)) then the result is a midpoint 197 // since round_to_even, subtract 1 if current result is odd 198 if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0) 199 && (fstar.w[0] < bid_ten2mk64[ind - 1])) { 200 res--; 201 } 202 // set exponent to zero as it was negative before. 203 res = x_sign | 0x31c0000000000000ull | res; 204 BID_RETURN (res); 205 } else { // if exp < 0 and q + exp < 0 206 // the result is +0 or -0 207 res = x_sign | 0x31c0000000000000ull; 208 BID_RETURN (res); 209 } 210 break; 211 case BID_ROUNDING_TIES_AWAY: 212 if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q 213 // need to shift right -exp digits from the coefficient; exp will be 0 214 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' 215 // chop off ind digits from the lower part of C1 216 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits 217 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate 218 C1 = C1 + bid_midpoint64[ind - 1]; 219 // calculate C* and f* 220 // C* is actually floor(C*) in this case 221 // C* and f* need shifting and masking, as shown by 222 // bid_shiftright128[] and bid_maskhigh128[] 223 // 1 <= x <= 16 224 // kx = 10^(-x) = bid_ten2mk64[ind - 1] 225 // C* = (C1 + 1/2 * 10^x) * 10^(-x) 226 // the approximation of 10^(-x) was rounded up to 64 bits 227 __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]); 228 229 // if (0 < f* < 10^(-x)) then the result is a midpoint 230 // C* = floor(C*) - logical right shift; C* has p decimal digits, 231 // correct by Prop. 1) 232 // else 233 // C* = floor(C*) (logical right shift; C has p decimal digits, 234 // correct by Property 1) 235 // n = C* * 10^(e+x) 236 237 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 238 res = P128.w[1]; 239 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 240 shift = bid_shiftright128[ind - 1]; // 3 <= shift <= 63 241 res = (P128.w[1] >> shift); 242 } 243 res = x_sign | 0x31c0000000000000ull | res; 244 BID_RETURN (res); 245 } else { // if exp < 0 and q + exp < 0 246 // the result is +0 or -0 247 res = x_sign | 0x31c0000000000000ull; 248 BID_RETURN (res); 249 } 250 break; 251 case BID_ROUNDING_DOWN: 252 if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q 253 // need to shift right -exp digits from the coefficient; exp will be 0 254 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' 255 // chop off ind digits from the lower part of C1 256 // C1 fits in 64 bits 257 // calculate C* and f* 258 // C* is actually floor(C*) in this case 259 // C* and f* need shifting and masking, as shown by 260 // bid_shiftright128[] and bid_maskhigh128[] 261 // 1 <= x <= 16 262 // kx = 10^(-x) = bid_ten2mk64[ind - 1] 263 // C* = C1 * 10^(-x) 264 // the approximation of 10^(-x) was rounded up to 64 bits 265 __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]); 266 267 // C* = floor(C*) (logical right shift; C has p decimal digits, 268 // correct by Property 1) 269 // if (0 < f* < 10^(-x)) then the result is exact 270 // n = C* * 10^(e+x) 271 272 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 273 res = P128.w[1]; 274 fstar.w[1] = 0; 275 fstar.w[0] = P128.w[0]; 276 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 277 shift = bid_shiftright128[ind - 1]; // 3 <= shift <= 63 278 res = (P128.w[1] >> shift); 279 fstar.w[1] = P128.w[1] & bid_maskhigh128[ind - 1]; 280 fstar.w[0] = P128.w[0]; 281 } 282 // if (f* > 10^(-x)) then the result is inexact 283 if ((fstar.w[1] != 0) || (fstar.w[0] >= bid_ten2mk64[ind - 1])) { 284 if (x_sign) { 285 // if negative and not exact, increment magnitude 286 res++; 287 } 288 } 289 // set exponent to zero as it was negative before. 290 res = x_sign | 0x31c0000000000000ull | res; 291 BID_RETURN (res); 292 } else { // if exp < 0 and q + exp <= 0 293 // the result is +0 or -1 294 if (x_sign) { 295 res = 0xb1c0000000000001ull; 296 } else { 297 res = 0x31c0000000000000ull; 298 } 299 BID_RETURN (res); 300 } 301 break; 302 case BID_ROUNDING_UP: 303 if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q 304 // need to shift right -exp digits from the coefficient; exp will be 0 305 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' 306 // chop off ind digits from the lower part of C1 307 // C1 fits in 64 bits 308 // calculate C* and f* 309 // C* is actually floor(C*) in this case 310 // C* and f* need shifting and masking, as shown by 311 // bid_shiftright128[] and bid_maskhigh128[] 312 // 1 <= x <= 16 313 // kx = 10^(-x) = bid_ten2mk64[ind - 1] 314 // C* = C1 * 10^(-x) 315 // the approximation of 10^(-x) was rounded up to 64 bits 316 __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]); 317 318 // C* = floor(C*) (logical right shift; C has p decimal digits, 319 // correct by Property 1) 320 // if (0 < f* < 10^(-x)) then the result is exact 321 // n = C* * 10^(e+x) 322 323 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 324 res = P128.w[1]; 325 fstar.w[1] = 0; 326 fstar.w[0] = P128.w[0]; 327 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 328 shift = bid_shiftright128[ind - 1]; // 3 <= shift <= 63 329 res = (P128.w[1] >> shift); 330 fstar.w[1] = P128.w[1] & bid_maskhigh128[ind - 1]; 331 fstar.w[0] = P128.w[0]; 332 } 333 // if (f* > 10^(-x)) then the result is inexact 334 if ((fstar.w[1] != 0) || (fstar.w[0] >= bid_ten2mk64[ind - 1])) { 335 if (!x_sign) { 336 // if positive and not exact, increment magnitude 337 res++; 338 } 339 } 340 // set exponent to zero as it was negative before. 341 res = x_sign | 0x31c0000000000000ull | res; 342 BID_RETURN (res); 343 } else { // if exp < 0 and q + exp <= 0 344 // the result is -0 or +1 345 if (x_sign) { 346 res = 0xb1c0000000000000ull; 347 } else { 348 res = 0x31c0000000000001ull; 349 } 350 BID_RETURN (res); 351 } 352 break; 353 case BID_ROUNDING_TO_ZERO: 354 if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q 355 // need to shift right -exp digits from the coefficient; exp will be 0 356 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' 357 // chop off ind digits from the lower part of C1 358 // C1 fits in 127 bits 359 // calculate C* and f* 360 // C* is actually floor(C*) in this case 361 // C* and f* need shifting and masking, as shown by 362 // bid_shiftright128[] and bid_maskhigh128[] 363 // 1 <= x <= 16 364 // kx = 10^(-x) = bid_ten2mk64[ind - 1] 365 // C* = C1 * 10^(-x) 366 // the approximation of 10^(-x) was rounded up to 64 bits 367 __mul_64x64_to_128 (P128, C1, bid_ten2mk64[ind - 1]); 368 369 // C* = floor(C*) (logical right shift; C has p decimal digits, 370 // correct by Property 1) 371 // if (0 < f* < 10^(-x)) then the result is exact 372 // n = C* * 10^(e+x) 373 374 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 375 res = P128.w[1]; 376 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 377 shift = bid_shiftright128[ind - 1]; // 3 <= shift <= 63 378 res = (P128.w[1] >> shift); 379 } 380 // set exponent to zero as it was negative before. 381 res = x_sign | 0x31c0000000000000ull | res; 382 BID_RETURN (res); 383 } else { // if exp < 0 and q + exp < 0 384 // the result is +0 or -0 385 res = x_sign | 0x31c0000000000000ull; 386 BID_RETURN (res); 387 } 388 break; 389 } // end switch () 390 BID_RETURN (res); 391 } 392 393