1 /******************************************************************************
2 Copyright (c) 2007-2011, Intel Corp.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its contributors
14 may be used to endorse or promote products derived from this software
15 without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 THE POSSIBILITY OF SUCH DAMAGE.
28 ******************************************************************************/
29
30 #ifndef __BIDECIMAL_H
31 #define __BIDECIMAL_H
32
33 #define _CRT_SECURE_NO_DEPRECATE
34 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
35 # pragma warning( disable: 4996 )
36 #endif
37
38 #include "bid_conf.h"
39 #include "bid_functions.h"
40
41 #define __BID_INLINE__ static __inline
42
43 #if defined (__INTEL_COMPILER)
44 #define FENCE __fence
45 #else
46 #define FENCE
47 #endif
48
49 /*********************************************************************
50 *
51 * Logical Shift Macros
52 *
53 *********************************************************************/
54
55 #define __shr_128(Q, A, k) \
56 { \
57 (Q).w[0] = (A).w[0] >> k; \
58 (Q).w[0] |= (A).w[1] << (64-k); \
59 (Q).w[1] = (A).w[1] >> k; \
60 }
61
62 #define __shr_128_long(Q, A, k) \
63 { \
64 if((k)<64) { \
65 (Q).w[0] = (A).w[0] >> k; \
66 (Q).w[0] |= (A).w[1] << (64-k); \
67 (Q).w[1] = (A).w[1] >> k; \
68 } \
69 else { \
70 (Q).w[0] = (A).w[1]>>((k)-64); \
71 (Q).w[1] = 0; \
72 } \
73 }
74
75 #define __shl_128_long(Q, A, k) \
76 { \
77 if((k)<64) { \
78 (Q).w[1] = (A).w[1] << k; \
79 (Q).w[1] |= (A).w[0] >> (64-k); \
80 (Q).w[0] = (A).w[0] << k; \
81 } \
82 else { \
83 (Q).w[1] = (A).w[0]<<((k)-64); \
84 (Q).w[0] = 0; \
85 } \
86 }
87
88 #define __low_64(Q) (Q).w[0]
89 /*********************************************************************
90 *
91 * String Macros
92 *
93 *********************************************************************/
94 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
95 /*********************************************************************
96 *
97 * Compare Macros
98 *
99 *********************************************************************/
100 // greater than
101 // return 0 if A<=B
102 // non-zero if A>B
103 #define __unsigned_compare_gt_128(A, B) \
104 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
105 // greater-or-equal
106 #define __unsigned_compare_ge_128(A, B) \
107 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
108 #define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
109 // tighten exponent range
110 #define __tight_bin_range_128(bp, P, bin_expon) \
111 { \
112 BID_UINT64 M; \
113 M = 1; \
114 (bp) = (bin_expon); \
115 if((bp)<63) { \
116 M <<= ((bp)+1); \
117 if((P).w[0] >= M) (bp)++; } \
118 else if((bp)>64) { \
119 M <<= ((bp)+1-64); \
120 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
121 (bp)++; } \
122 else if((P).w[1]) (bp)++; \
123 }
124 /*********************************************************************
125 *
126 * Add/Subtract Macros
127 *
128 *********************************************************************/
129 // add 64-bit value to 128-bit
130 #define __add_128_64(R128, A128, B64) \
131 { \
132 BID_UINT64 R64H; \
133 R64H = (A128).w[1]; \
134 (R128).w[0] = (B64) + (A128).w[0]; \
135 if((R128).w[0] < (B64)) \
136 R64H ++; \
137 (R128).w[1] = R64H; \
138 }
139 // subtract 64-bit value from 128-bit
140 #define __sub_128_64(R128, A128, B64) \
141 { \
142 BID_UINT64 R64H; \
143 R64H = (A128).w[1]; \
144 if((A128).w[0] < (B64)) \
145 R64H --; \
146 (R128).w[1] = R64H; \
147 (R128).w[0] = (A128).w[0] - (B64); \
148 }
149 // add 128-bit value to 128-bit
150 // assume no carry-out
151 #define __add_128_128(R128, A128, B128) \
152 { \
153 BID_UINT128 Q128; \
154 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
155 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
156 if(Q128.w[0] < (B128).w[0]) \
157 Q128.w[1] ++; \
158 (R128).w[1] = Q128.w[1]; \
159 (R128).w[0] = Q128.w[0]; \
160 }
161 #define __sub_128_128(R128, A128, B128) \
162 { \
163 BID_UINT128 Q128; \
164 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
165 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
166 if((A128).w[0] < (B128).w[0]) \
167 Q128.w[1] --; \
168 (R128).w[1] = Q128.w[1]; \
169 (R128).w[0] = Q128.w[0]; \
170 }
171 #define __add_carry_out(S, CY, X, Y) \
172 { \
173 BID_UINT64 X1=X; \
174 S = X + Y; \
175 CY = (S<X1) ? 1 : 0; \
176 }
177 #define __add_carry_in_out(S, CY, X, Y, CI) \
178 { \
179 BID_UINT64 X1; \
180 X1 = X + CI; \
181 S = X1 + Y; \
182 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
183 }
184 #define __sub_borrow_out(S, CY, X, Y) \
185 { \
186 BID_UINT64 X1=X; \
187 S = X - Y; \
188 CY = (S>X1) ? 1 : 0; \
189 }
190 #define __sub_borrow_in_out(S, CY, X, Y, CI) \
191 { \
192 BID_UINT64 X1, X0=X; \
193 X1 = X - CI; \
194 S = X1 - Y; \
195 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
196 }
197 // increment C128 and check for rounding overflow:
198 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
199 #define INCREMENT(C_128, exp) \
200 { \
201 C_128.w[0]++; \
202 if (C_128.w[0] == 0) C_128.w[1]++; \
203 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
204 C_128.w[0] == 0x378d8e6400000000ull) { \
205 exp++; \
206 C_128.w[1] = 0x0000314dc6448d93ull; \
207 C_128.w[0] = 0x38c15b0a00000000ull; \
208 } \
209 }
210 // decrement C128 and check for rounding underflow, but only at the
211 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
212 // and decrement the exponent
213 #define DECREMENT(C_128, exp) \
214 { \
215 C_128.w[0]--; \
216 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
217 if (C_128.w[1] == 0x0000314dc6448d93ull && \
218 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
219 exp--; \
220 C_128.w[1] = 0x0001ed09bead87c0ull; \
221 C_128.w[0] = 0x378d8e63ffffffffull; \
222 } \
223 }
224
225 /*********************************************************************
226 *
227 * Multiply Macros
228 *
229 *********************************************************************/
230 #define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
231 /***************************************
232 * Signed, Full 64x64-bit Multiply
233 ***************************************/
234 #define __imul_64x64_to_128(P, CX, CY) \
235 { \
236 BID_UINT64 SX, SY; \
237 __mul_64x64_to_128(P, CX, CY); \
238 \
239 SX = ((BID_SINT64)(CX))>>63; \
240 SY = ((BID_SINT64)(CY))>>63; \
241 SX &= CY; SY &= CX; \
242 \
243 (P).w[1] = (P).w[1] - SX - SY; \
244 }
245 /***************************************
246 * Signed, Full 64x128-bit Multiply
247 ***************************************/
248 #define __imul_64x128_full(Ph, Ql, A, B) \
249 { \
250 BID_UINT128 ALBL, ALBH, QM2, QM; \
251 \
252 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
253 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
254 \
255 (Ql).w[0] = ALBL.w[0]; \
256 QM.w[0] = ALBL.w[1]; \
257 QM.w[1] = ((BID_SINT64)ALBL.w[1])>>63; \
258 __add_128_128(QM2, ALBH, QM); \
259 (Ql).w[1] = QM2.w[0]; \
260 Ph = QM2.w[1]; \
261 }
262 /*****************************************************
263 * Unsigned Multiply Macros
264 *****************************************************/
265 // get full 64x64bit product
266 //
267 #define __mul_64x64_to_128(P, CX, CY) \
268 { \
269 BID_UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
270 CXH = (CX) >> 32; \
271 CXL = (BID_UINT32)(CX); \
272 CYH = (CY) >> 32; \
273 CYL = (BID_UINT32)(CY); \
274 \
275 PM = CXH*CYL; \
276 PH = CXH*CYH; \
277 PL = CXL*CYL; \
278 PM2 = CXL*CYH; \
279 PH += (PM>>32); \
280 PM = (BID_UINT64)((BID_UINT32)PM)+PM2+(PL>>32); \
281 \
282 (P).w[1] = PH + (PM>>32); \
283 (P).w[0] = (PM<<32)+(BID_UINT32)PL; \
284 }
285 // get full 64x64bit product
286 // Note:
287 // This macro is used for CX < 2^61, CY < 2^61
288 //
289 #define __mul_64x64_to_128_fast(P, CX, CY) \
290 { \
291 BID_UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
292 CXH = (CX) >> 32; \
293 CXL = (BID_UINT32)(CX); \
294 CYH = (CY) >> 32; \
295 CYL = (BID_UINT32)(CY); \
296 \
297 PM = CXH*CYL; \
298 PL = CXL*CYL; \
299 PH = CXH*CYH; \
300 PM += CXL*CYH; \
301 PM += (PL>>32); \
302 \
303 (P).w[1] = PH + (PM>>32); \
304 (P).w[0] = (PM<<32)+(BID_UINT32)PL; \
305 }
306 // used for CX< 2^60
307 #define __sqr64_fast(P, CX) \
308 { \
309 BID_UINT64 CXH, CXL, PL, PH, PM; \
310 CXH = (CX) >> 32; \
311 CXL = (BID_UINT32)(CX); \
312 \
313 PM = CXH*CXL; \
314 PL = CXL*CXL; \
315 PH = CXH*CXH; \
316 PM += PM; \
317 PM += (PL>>32); \
318 \
319 (P).w[1] = PH + (PM>>32); \
320 (P).w[0] = (PM<<32)+(BID_UINT32)PL; \
321 }
322 // get full 64x64bit product
323 // Note:
324 // This implementation is used for CX < 2^61, CY < 2^61
325 //
326 #define __mul_64x64_to_64_high_fast(P, CX, CY) \
327 { \
328 BID_UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
329 CXH = (CX) >> 32; \
330 CXL = (BID_UINT32)(CX); \
331 CYH = (CY) >> 32; \
332 CYL = (BID_UINT32)(CY); \
333 \
334 PM = CXH*CYL; \
335 PL = CXL*CYL; \
336 PH = CXH*CYH; \
337 PM += CXL*CYH; \
338 PM += (PL>>32); \
339 \
340 (P) = PH + (PM>>32); \
341 }
342 // get full 64x64bit product
343 //
344 #define __mul_64x64_to_128_full(P, CX, CY) \
345 { \
346 BID_UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
347 CXH = (CX) >> 32; \
348 CXL = (BID_UINT32)(CX); \
349 CYH = (CY) >> 32; \
350 CYL = (BID_UINT32)(CY); \
351 \
352 PM = CXH*CYL; \
353 PH = CXH*CYH; \
354 PL = CXL*CYL; \
355 PM2 = CXL*CYH; \
356 PH += (PM>>32); \
357 PM = (BID_UINT64)((BID_UINT32)PM)+PM2+(PL>>32); \
358 \
359 (P).w[1] = PH + (PM>>32); \
360 (P).w[0] = (PM<<32)+(BID_UINT32)PL; \
361 }
362 #define __mul_128x128_high(Q, A, B) \
363 { \
364 BID_UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
365 \
366 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
367 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
368 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
369 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
370 \
371 __add_128_128(QM, ALBH, AHBL); \
372 __add_128_64(QM2, QM, ALBL.w[1]); \
373 __add_128_64((Q), AHBH, QM2.w[1]); \
374 }
375 #define __mul_128x128_full(Qh, Ql, A, B) \
376 { \
377 BID_UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
378 \
379 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
380 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
381 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
382 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
383 \
384 __add_128_128(QM, ALBH, AHBL); \
385 (Ql).w[0] = ALBL.w[0]; \
386 __add_128_64(QM2, QM, ALBL.w[1]); \
387 __add_128_64((Qh), AHBH, QM2.w[1]); \
388 (Ql).w[1] = QM2.w[0]; \
389 }
390 #define __mul_128x128_low(Ql, A, B) \
391 { \
392 BID_UINT128 ALBL; \
393 BID_UINT64 QM64; \
394 \
395 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
396 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
397 \
398 (Ql).w[0] = ALBL.w[0]; \
399 (Ql).w[1] = QM64 + ALBL.w[1]; \
400 }
401 #define __mul_64x128_low(Ql, A, B) \
402 { \
403 BID_UINT128 ALBL, ALBH, QM2; \
404 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
405 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
406 (Ql).w[0] = ALBL.w[0]; \
407 __add_128_64(QM2, ALBH, ALBL.w[1]); \
408 (Ql).w[1] = QM2.w[0]; \
409 }
410 #define __mul_64x128_full(Ph, Ql, A, B) \
411 { \
412 BID_UINT128 ALBL, ALBH, QM2; \
413 \
414 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
415 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
416 \
417 (Ql).w[0] = ALBL.w[0]; \
418 __add_128_64(QM2, ALBH, ALBL.w[1]); \
419 (Ql).w[1] = QM2.w[0]; \
420 Ph = QM2.w[1]; \
421 }
422 #define __mul_64x128_to_192(Q, A, B) \
423 { \
424 BID_UINT128 ALBL, ALBH, QM2; \
425 \
426 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
427 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
428 \
429 (Q).w[0] = ALBL.w[0]; \
430 __add_128_64(QM2, ALBH, ALBL.w[1]); \
431 (Q).w[1] = QM2.w[0]; \
432 (Q).w[2] = QM2.w[1]; \
433 }
434 #define __mul_64x128_to192(Q, A, B) \
435 { \
436 BID_UINT128 ALBL, ALBH, QM2; \
437 \
438 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
439 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
440 \
441 (Q).w[0] = ALBL.w[0]; \
442 __add_128_64(QM2, ALBH, ALBL.w[1]); \
443 (Q).w[1] = QM2.w[0]; \
444 (Q).w[2] = QM2.w[1]; \
445 }
446 #define __mul_128x128_to_256(P256, A, B) \
447 { \
448 BID_UINT128 Qll, Qlh; \
449 BID_UINT64 Phl, Phh, CY1, CY2; \
450 \
451 __mul_64x128_full(Phl, Qll, A.w[0], B); \
452 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
453 (P256).w[0] = Qll.w[0]; \
454 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
455 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
456 (P256).w[3] = Phh + CY2; \
457 }
458 //
459 // For better performance, will check A.w[1] against 0,
460 // but not B.w[1]
461 // Use this macro accordingly
462 #define __mul_128x128_to_256_check_A(P256, A, B) \
463 { \
464 BID_UINT128 Qll, Qlh; \
465 BID_UINT64 Phl, Phh, CY1, CY2; \
466 \
467 __mul_64x128_full(Phl, Qll, A.w[0], B); \
468 (P256).w[0] = Qll.w[0]; \
469 if(A.w[1]) { \
470 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
471 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
472 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
473 (P256).w[3] = Phh + CY2; } \
474 else { \
475 (P256).w[1] = Qll.w[1]; \
476 (P256).w[2] = Phl; \
477 (P256).w[3] = 0; } \
478 }
479 #define __mul_64x192_to_256(lP, lA, lB) \
480 { \
481 BID_UINT128 lP0,lP1,lP2; \
482 BID_UINT64 lC; \
483 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
484 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
485 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
486 (lP).w[0] = lP0.w[0]; \
487 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
488 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
489 (lP).w[3] = lP2.w[1] + lC; \
490 }
491 #define __mul_64x256_to_320(P, A, B) \
492 { \
493 BID_UINT128 lP0,lP1,lP2,lP3; \
494 BID_UINT64 lC; \
495 __mul_64x64_to_128(lP0, A, (B).w[0]); \
496 __mul_64x64_to_128(lP1, A, (B).w[1]); \
497 __mul_64x64_to_128(lP2, A, (B).w[2]); \
498 __mul_64x64_to_128(lP3, A, (B).w[3]); \
499 (P).w[0] = lP0.w[0]; \
500 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
501 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
502 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
503 (P).w[4] = lP3.w[1] + lC; \
504 }
505 #define __mul_192x192_to_384(P, A, B) \
506 { \
507 BID_UINT256 P0,P1,P2; \
508 BID_UINT64 CY; \
509 __mul_64x192_to_256(P0, (A).w[0], B); \
510 __mul_64x192_to_256(P1, (A).w[1], B); \
511 __mul_64x192_to_256(P2, (A).w[2], B); \
512 (P).w[0] = P0.w[0]; \
513 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
514 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
515 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
516 (P).w[4] = P1.w[3] + CY; \
517 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
518 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
519 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
520 (P).w[5] = P2.w[3] + CY; \
521 }
522 #define __mul_64x320_to_384(P, A, B) \
523 { \
524 BID_UINT128 lP0,lP1,lP2,lP3,lP4; \
525 BID_UINT64 lC; \
526 __mul_64x64_to_128(lP0, A, (B).w[0]); \
527 __mul_64x64_to_128(lP1, A, (B).w[1]); \
528 __mul_64x64_to_128(lP2, A, (B).w[2]); \
529 __mul_64x64_to_128(lP3, A, (B).w[3]); \
530 __mul_64x64_to_128(lP4, A, (B).w[4]); \
531 (P).w[0] = lP0.w[0]; \
532 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
533 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
534 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
535 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
536 (P).w[5] = lP4.w[1] + lC; \
537 }
538 // A*A
539 // Full 128x128-bit product
540 #define __sqr128_to_256(P256, A) \
541 { \
542 BID_UINT128 Qll, Qlh, Qhh; \
543 BID_UINT64 TMP_C1, TMP_C2; \
544 \
545 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
546 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
547 Qhh.w[1] += (Qlh.w[1]>>63); \
548 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
549 Qlh.w[0] += Qlh.w[0]; \
550 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
551 \
552 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
553 (P256).w[0] = Qll.w[0]; \
554 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
555 (P256).w[3] = Qhh.w[1]+TMP_C2; \
556 }
557 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
558 { \
559 BID_UINT128 Qll, Qlh; \
560 BID_UINT64 Phl, Phh, C1, C2; \
561 \
562 __mul_64x128_full(Phl, Qll, A.w[0], B); \
563 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
564 (PQl).w[0] = Qll.w[0]; \
565 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
566 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
567 (PQh).w[1] = Phh + C2; \
568 }
569 #define __mul_256x256_to_512(P, A, B) \
570 { \
571 BID_UINT512 P0,P1,P2,P3; \
572 BID_UINT64 CY; \
573 __mul_64x256_to_320(P0, (A).w[0], B); \
574 __mul_64x256_to_320(P1, (A).w[1], B); \
575 __mul_64x256_to_320(P2, (A).w[2], B); \
576 __mul_64x256_to_320(P3, (A).w[3], B); \
577 (P).w[0] = P0.w[0]; \
578 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
579 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
580 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
581 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
582 (P).w[5] = P1.w[4] + CY; \
583 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
584 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
585 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
586 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
587 (P).w[6] = P2.w[4] + CY; \
588 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
589 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
590 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
591 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
592 (P).w[7] = P3.w[4] + CY; \
593 }
594 #define __mul_192x256_to_448(P, A, B) \
595 { \
596 BID_UINT512 P0,P1,P2; \
597 BID_UINT64 CY; \
598 __mul_64x256_to_320(P0, (A).w[0], B); \
599 __mul_64x256_to_320(P1, (A).w[1], B); \
600 __mul_64x256_to_320(P2, (A).w[2], B); \
601 (P).w[0] = P0.w[0]; \
602 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
603 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
604 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
605 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
606 (P).w[5] = P1.w[4] + CY; \
607 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
608 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
609 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
610 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
611 (P).w[6] = P2.w[4] + CY; \
612 }
613 #define __mul_320x320_to_640(P, A, B) \
614 { \
615 BID_UINT512 P0,P1,P2,P3; \
616 BID_UINT64 CY; \
617 __mul_256x256_to_512((P), (A), B); \
618 __mul_64x256_to_320(P1, (A).w[4], B); \
619 __mul_64x256_to_320(P2, (B).w[4], A); \
620 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
621 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
622 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
623 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
624 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
625 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
626 P3.w[1] += CY; \
627 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
628 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
629 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
630 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
631 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
632 (P).w[9] = P3.w[1] + CY; \
633 }
634 #define __mul_384x384_to_768(P, A, B) \
635 { \
636 BID_UINT512 P0,P1,P2,P3; \
637 BID_UINT64 CY; \
638 __mul_320x320_to_640((P), (A), B); \
639 __mul_64x320_to_384(P1, (A).w[5], B); \
640 __mul_64x320_to_384(P2, (B).w[5], A); \
641 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
642 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
643 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
644 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
645 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
646 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
647 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
648 P3.w[1] += CY; \
649 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
650 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
651 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
652 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
653 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
654 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
655 (P).w[11] = P3.w[1] + CY; \
656 }
657 #define __mul_64x128_short(Ql, A, B) \
658 { \
659 BID_UINT64 ALBH_L; \
660 \
661 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
662 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
663 \
664 (Ql).w[1] += ALBH_L; \
665 }
666 #define __scale128_10(D,_TMP) \
667 { \
668 BID_UINT128 _TMP2,_TMP8; \
669 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
670 _TMP2.w[0] = _TMP.w[0]<<1; \
671 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
672 _TMP8.w[0] = _TMP.w[0]<<3; \
673 __add_128_128(D, _TMP2, _TMP8); \
674 }
675 // 64x64-bit product
676 #define __mul_64x64_to_128MACH(P128, CX64, CY64) \
677 { \
678 BID_UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
679 CXH = (CX64) >> 32; \
680 CXL = (BID_UINT32)(CX64); \
681 CYH = (CY64) >> 32; \
682 CYL = (BID_UINT32)(CY64); \
683 PM = CXH*CYL; \
684 PH = CXH*CYH; \
685 PL = CXL*CYL; \
686 PM2 = CXL*CYH; \
687 PH += (PM>>32); \
688 PM = (BID_UINT64)((BID_UINT32)PM)+PM2+(PL>>32); \
689 (P128).w[1] = PH + (PM>>32); \
690 (P128).w[0] = (PM<<32)+(BID_UINT32)PL; \
691 }
692 // 64x64-bit product
693 #define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
694 { \
695 BID_UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
696 CXH = (CX64) >> 32; \
697 CXL = (BID_UINT32)(CX64); \
698 CYH = (CY64) >> 32; \
699 CYL = (BID_UINT32)(CY64); \
700 PM = CXH*CYL; \
701 PH = CXH*CYH; \
702 PL = CXL*CYL; \
703 PM2 = CXL*CYH; \
704 PH += (PM>>32); \
705 PM = (BID_UINT64)((BID_UINT32)PM)+PM2+(PL>>32); \
706 P64 = PH + (PM>>32); \
707 }
708 #define __mul_128x64_to_128(Q128, A64, B128) \
709 { \
710 BID_UINT64 ALBH_L; \
711 ALBH_L = (A64) * (B128).w[1]; \
712 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
713 (Q128).w[1] += ALBH_L; \
714 }
715 // might simplify by calculating just QM2.w[0]
716 #define __mul_64x128_to_128(Ql, A, B) \
717 { \
718 BID_UINT128 ALBL, ALBH, QM2; \
719 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
720 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
721 (Ql).w[0] = ALBL.w[0]; \
722 __add_128_64(QM2, ALBH, ALBL.w[1]); \
723 (Ql).w[1] = QM2.w[0]; \
724 }
725 /*********************************************************************
726 *
727 * BID Pack/Unpack Macros
728 *
729 *********************************************************************/
730 /////////////////////////////////////////
731 // BID64 definitions
732 ////////////////////////////////////////
733 #define DECIMAL_MAX_EXPON_64 767
734 #define DECIMAL_EXPONENT_BIAS 398
735 #define MAX_FORMAT_DIGITS 16
736 /////////////////////////////////////////
737 // BID128 definitions
738 ////////////////////////////////////////
739 #define DECIMAL_MAX_EXPON_128 12287
740 #define DECIMAL_EXPONENT_BIAS_128 6176
741 #define MAX_FORMAT_DIGITS_128 34
742 /////////////////////////////////////////
743 // BID32 definitions
744 ////////////////////////////////////////
745 #define DECIMAL_MAX_EXPON_32 191
746 #define DECIMAL_EXPONENT_BIAS_32 101
747 #define MAX_FORMAT_DIGITS_32 7
748 ////////////////////////////////////////
749 // Constant Definitions
750 ///////////////////////////////////////
751 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
752 #define INFINITY_MASK64 0x7800000000000000ull
753 #define SINFINITY_MASK64 0xf800000000000000ull
754 #define SSNAN_MASK64 0xfc00000000000000ull
755 #define NAN_MASK64 0x7c00000000000000ull
756 #define SNAN_MASK64 0x7e00000000000000ull
757 #define QUIET_MASK64 0xfdffffffffffffffull
758 #define LARGE_COEFF_MASK64 0x0007ffffffffffffull
759 #define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
760 #define SMALL_COEFF_MASK64 0x001fffffffffffffull
761 #define EXPONENT_MASK64 0x3ff
762 #define EXPONENT_SHIFT_LARGE64 51
763 #define EXPONENT_SHIFT_SMALL64 53
764 #define LARGEST_BID64 0x77fb86f26fc0ffffull
765 #define SMALLEST_BID64 0xf7fb86f26fc0ffffull
766 #define SMALL_COEFF_MASK128 0x0001ffffffffffffull
767 #define LARGE_COEFF_MASK128 0x00007fffffffffffull
768 #define EXPONENT_MASK128 0x3fff
769 #define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
770 #define LARGEST_BID128_LOW 0x378d8e63ffffffffull
771 #define SPECIAL_ENCODING_MASK32 0x60000000ul
772 #define SINFINITY_MASK32 0xf8000000ul
773 #define INFINITY_MASK32 0x78000000ul
774 #define LARGE_COEFF_MASK32 0x007ffffful
775 #define LARGE_COEFF_HIGH_BIT32 0x00800000ul
776 #define SMALL_COEFF_MASK32 0x001ffffful
777 #define EXPONENT_MASK32 0xff
778 #define LARGEST_BID32 0x77f8967f
779 #define NAN_MASK32 0x7c000000
780 #define SNAN_MASK32 0x7e000000
781 #define SSNAN_MASK32 0xfc000000
782 #define QUIET_MASK32 0xfdffffff
783 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
784 #define BINARY_EXPONENT_BIAS 0x3ff
785 #define UPPER_EXPON_LIMIT 51
786 // data needed for BID pack/unpack macros
787 BID_EXTERN_C BID_UINT64 bid_round_const_table[][19];
788 BID_EXTERN_C BID_UINT128 bid_reciprocals10_128[];
789 BID_EXTERN_C int bid_recip_scale[];
790 BID_EXTERN_C BID_UINT128 bid_power10_table_128[];
791 BID_EXTERN_C int bid_estimate_decimal_digits[];
792 BID_EXTERN_C int bid_estimate_bin_expon[];
793 BID_EXTERN_C BID_UINT64 bid_power10_index_binexp[];
794 BID_EXTERN_C int bid_short_recip_scale[];
795 BID_EXTERN_C int bid_bid_bid_recip_scale32[];
796 BID_EXTERN_C BID_UINT64 bid_reciprocals10_64[];
797 BID_EXTERN_C BID_UINT64 bid_bid_reciprocals10_32[];
798 BID_EXTERN_C BID_UINT128 bid_power10_index_binexp_128[];
799 BID_EXTERN_C BID_UINT128 bid_round_const_table_128[][36];
800
801
802 //////////////////////////////////////////////
803 // Status Flag Handling
804 /////////////////////////////////////////////
805 #define __set_status_flags(fpsc, status) *(fpsc) |= status
806 #define is_inexact(fpsc) ((*(fpsc))&BID_INEXACT_EXCEPTION)
807
808 __BID_INLINE__ BID_UINT64
unpack_BID64(BID_UINT64 * psign_x,int * pexponent_x,BID_UINT64 * pcoefficient_x,BID_UINT64 x)809 unpack_BID64 (BID_UINT64 * psign_x, int *pexponent_x,
810 BID_UINT64 * pcoefficient_x, BID_UINT64 x) {
811 BID_UINT64 tmp, coeff;
812
813 *psign_x = x & 0x8000000000000000ull;
814
815 if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
816 // special encodings
817 // coefficient
818 coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
819
820 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
821 *pexponent_x = 0;
822 *pcoefficient_x = x & 0xfe03ffffffffffffull;
823 if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
824 *pcoefficient_x = x & 0xfe00000000000000ull;
825 if ((x & NAN_MASK64) == INFINITY_MASK64)
826 *pcoefficient_x = x & SINFINITY_MASK64;
827 return 0; // NaN or Infinity
828 }
829 // check for non-canonical values
830 if (coeff >= 10000000000000000ull)
831 coeff = 0;
832 *pcoefficient_x = coeff;
833 // get exponent
834 tmp = x >> EXPONENT_SHIFT_LARGE64;
835 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
836 return coeff;
837 }
838 // exponent
839 tmp = x >> EXPONENT_SHIFT_SMALL64;
840 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
841 // coefficient
842 *pcoefficient_x = (x & SMALL_COEFF_MASK64);
843
844 return *pcoefficient_x;
845 }
846
847 //
848 // BID64 pack macro (general form)
849 //
850 __BID_INLINE__ BID_UINT64
get_BID64(BID_UINT64 sgn,int expon,BID_UINT64 coeff,int rmode,unsigned * fpsc)851 get_BID64 (BID_UINT64 sgn, int expon, BID_UINT64 coeff, int rmode,
852 unsigned *fpsc) {
853 BID_UINT128 Stemp, Q_low;
854 BID_UINT64 QH, r, mask, _C64, remainder_h, CY, carry;
855 int extra_digits, amount, amount2;
856 unsigned status;
857
858 if (coeff > 9999999999999999ull) {
859 expon++;
860 coeff = 1000000000000000ull;
861 }
862 // check for possible underflow/overflow
863 if (((unsigned) expon) >= 3 * 256) {
864 if (expon < 0) {
865 // underflow
866 if (expon + MAX_FORMAT_DIGITS < 0) {
867 #ifdef BID_SET_STATUS_FLAGS
868 __set_status_flags (fpsc,
869 BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
870 #endif
871 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
872 #ifndef IEEE_ROUND_NEAREST
873 if (rmode == BID_ROUNDING_DOWN && sgn)
874 return 0x8000000000000001ull;
875 if (rmode == BID_ROUNDING_UP && !sgn)
876 return 1ull;
877 #endif
878 #endif
879 // result is 0
880 return sgn;
881 }
882 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
883 #ifndef IEEE_ROUND_NEAREST
884 if (sgn && (unsigned) (rmode - 1) < 2)
885 rmode = 3 - rmode;
886 #endif
887 #endif
888 // get digits to be shifted out
889 extra_digits = -expon;
890 coeff += bid_round_const_table[rmode][extra_digits];
891
892 // get coeff*(2^M[extra_digits])/10^extra_digits
893 __mul_64x128_full (QH, Q_low, coeff,
894 bid_reciprocals10_128[extra_digits]);
895
896 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
897 amount = bid_recip_scale[extra_digits];
898
899 _C64 = QH >> amount;
900
901 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
902 #ifndef IEEE_ROUND_NEAREST
903 if (rmode == 0) //BID_ROUNDING_TO_NEAREST
904 #endif
905 if (_C64 & 1) {
906 // check whether fractional part of initial_P/10^extra_digits is exactly .5
907
908 // get remainder
909 amount2 = 64 - amount;
910 remainder_h = 0;
911 remainder_h--;
912 remainder_h >>= amount2;
913 remainder_h = remainder_h & QH;
914
915 if (!remainder_h
916 && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
917 || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
918 && Q_low.w[0] <
919 bid_reciprocals10_128[extra_digits].w[0]))) {
920 _C64--;
921 }
922 }
923 #endif
924
925 #ifdef BID_SET_STATUS_FLAGS
926
927 if (is_inexact (fpsc))
928 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
929 else {
930 status = BID_INEXACT_EXCEPTION;
931 // get remainder
932 remainder_h = QH << (64 - amount);
933
934 switch (rmode) {
935 case BID_ROUNDING_TO_NEAREST:
936 case BID_ROUNDING_TIES_AWAY:
937 // test whether fractional part is 0
938 if (remainder_h == 0x8000000000000000ull
939 && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
940 || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
941 && Q_low.w[0] <
942 bid_reciprocals10_128[extra_digits].w[0])))
943 status = BID_EXACT_STATUS;
944 break;
945 case BID_ROUNDING_DOWN:
946 case BID_ROUNDING_TO_ZERO:
947 if (!remainder_h
948 && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
949 || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
950 && Q_low.w[0] <
951 bid_reciprocals10_128[extra_digits].w[0])))
952 status = BID_EXACT_STATUS;
953 break;
954 default:
955 // round up
956 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
957 bid_reciprocals10_128[extra_digits].w[0]);
958 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
959 bid_reciprocals10_128[extra_digits].w[1], CY);
960 if ((remainder_h >> (64 - amount)) + carry >=
961 (((BID_UINT64) 1) << amount))
962 status = BID_EXACT_STATUS;
963 }
964
965 if (status != BID_EXACT_STATUS)
966 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
967 }
968
969 #endif
970
971 return sgn | _C64;
972 }
973 if(!coeff) { if(expon > DECIMAL_MAX_EXPON_64) expon = DECIMAL_MAX_EXPON_64; }
974 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
975 expon--;
976 coeff = (coeff << 3) + (coeff << 1);
977 }
978 if (expon > DECIMAL_MAX_EXPON_64) {
979 #ifdef BID_SET_STATUS_FLAGS
980 __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
981 #endif
982 // overflow
983 r = sgn | INFINITY_MASK64;
984 switch (rmode) {
985 case BID_ROUNDING_DOWN:
986 if (!sgn)
987 r = LARGEST_BID64;
988 break;
989 case BID_ROUNDING_TO_ZERO:
990 r = sgn | LARGEST_BID64;
991 break;
992 case BID_ROUNDING_UP:
993 // round up
994 if (sgn)
995 r = SMALLEST_BID64;
996 }
997 return r;
998 }
999 }
1000
1001 mask = 1;
1002 mask <<= EXPONENT_SHIFT_SMALL64;
1003
1004 // check whether coefficient fits in 10*5+3 bits
1005 if (coeff < mask) {
1006 r = expon;
1007 r <<= EXPONENT_SHIFT_SMALL64;
1008 r |= (coeff | sgn);
1009 return r;
1010 }
1011 // special format
1012
1013 // eliminate the case coeff==10^16 after rounding
1014 if (coeff == 10000000000000000ull) {
1015 r = expon + 1;
1016 r <<= EXPONENT_SHIFT_SMALL64;
1017 r |= (1000000000000000ull | sgn);
1018 return r;
1019 }
1020
1021 r = expon;
1022 r <<= EXPONENT_SHIFT_LARGE64;
1023 r |= (sgn | SPECIAL_ENCODING_MASK64);
1024 // add coeff, without leading bits
1025 mask = (mask >> 2) - 1;
1026 coeff &= mask;
1027 r |= coeff;
1028
1029 return r;
1030 }
1031
1032
1033
1034
1035 //
1036 // No overflow/underflow checking
1037 //
1038 __BID_INLINE__ BID_UINT64
fast_get_BID64(BID_UINT64 sgn,int expon,BID_UINT64 coeff)1039 fast_get_BID64 (BID_UINT64 sgn, int expon, BID_UINT64 coeff) {
1040 BID_UINT64 r, mask;
1041
1042 mask = 1;
1043 mask <<= EXPONENT_SHIFT_SMALL64;
1044
1045 // check whether coefficient fits in 10*5+3 bits
1046 if (coeff < mask) {
1047 r = expon;
1048 r <<= EXPONENT_SHIFT_SMALL64;
1049 r |= (coeff | sgn);
1050 return r;
1051 }
1052 // special format
1053
1054 // eliminate the case coeff==10^16 after rounding
1055 if (coeff == 10000000000000000ull) {
1056 r = expon + 1;
1057 r <<= EXPONENT_SHIFT_SMALL64;
1058 r |= (1000000000000000ull | sgn);
1059 return r;
1060 }
1061
1062 r = expon;
1063 r <<= EXPONENT_SHIFT_LARGE64;
1064 r |= (sgn | SPECIAL_ENCODING_MASK64);
1065 // add coeff, without leading bits
1066 mask = (mask >> 2) - 1;
1067 coeff &= mask;
1068 r |= coeff;
1069
1070 return r;
1071 }
1072
1073
1074 //
1075 // no underflow checking
1076 //
1077 __BID_INLINE__ BID_UINT64
fast_get_BID64_check_OF(BID_UINT64 sgn,int expon,BID_UINT64 coeff,int rmode,unsigned * fpsc)1078 fast_get_BID64_check_OF (BID_UINT64 sgn, int expon, BID_UINT64 coeff, int rmode,
1079 unsigned *fpsc) {
1080 BID_UINT64 r, mask;
1081
1082 if (((unsigned) expon) >= 3 * 256 - 1) {
1083 if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1084 expon = 3 * 256;
1085 coeff = 1000000000000000ull;
1086 }
1087
1088 if (((unsigned) expon) >= 3 * 256) {
1089 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1090 expon--;
1091 coeff = (coeff << 3) + (coeff << 1);
1092 }
1093 if (expon > DECIMAL_MAX_EXPON_64) {
1094 #ifdef BID_SET_STATUS_FLAGS
1095 __set_status_flags (fpsc,
1096 BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1097 #endif
1098 // overflow
1099 r = sgn | INFINITY_MASK64;
1100 switch (rmode) {
1101 case BID_ROUNDING_DOWN:
1102 if (!sgn)
1103 r = LARGEST_BID64;
1104 break;
1105 case BID_ROUNDING_TO_ZERO:
1106 r = sgn | LARGEST_BID64;
1107 break;
1108 case BID_ROUNDING_UP:
1109 // round up
1110 if (sgn)
1111 r = SMALLEST_BID64;
1112 }
1113 return r;
1114 }
1115 }
1116 }
1117
1118 mask = 1;
1119 mask <<= EXPONENT_SHIFT_SMALL64;
1120
1121 // check whether coefficient fits in 10*5+3 bits
1122 if (coeff < mask) {
1123 r = expon;
1124 r <<= EXPONENT_SHIFT_SMALL64;
1125 r |= (coeff | sgn);
1126 return r;
1127 }
1128 // special format
1129
1130 // eliminate the case coeff==10^16 after rounding
1131 if (coeff == 10000000000000000ull) {
1132 r = expon + 1;
1133 r <<= EXPONENT_SHIFT_SMALL64;
1134 r |= (1000000000000000ull | sgn);
1135 return r;
1136 }
1137
1138 r = expon;
1139 r <<= EXPONENT_SHIFT_LARGE64;
1140 r |= (sgn | SPECIAL_ENCODING_MASK64);
1141 // add coeff, without leading bits
1142 mask = (mask >> 2) - 1;
1143 coeff &= mask;
1144 r |= coeff;
1145
1146 return r;
1147 }
1148
1149
1150 //
1151 // No overflow/underflow checking
1152 // or checking for coefficients equal to 10^16 (after rounding)
1153 //
1154 __BID_INLINE__ BID_UINT64
very_fast_get_BID64(BID_UINT64 sgn,int expon,BID_UINT64 coeff)1155 very_fast_get_BID64 (BID_UINT64 sgn, int expon, BID_UINT64 coeff) {
1156 BID_UINT64 r, mask;
1157
1158 mask = 1;
1159 mask <<= EXPONENT_SHIFT_SMALL64;
1160
1161 // check whether coefficient fits in 10*5+3 bits
1162 if (coeff < mask) {
1163 r = expon;
1164 r <<= EXPONENT_SHIFT_SMALL64;
1165 r |= (coeff | sgn);
1166 return r;
1167 }
1168 // special format
1169 r = expon;
1170 r <<= EXPONENT_SHIFT_LARGE64;
1171 r |= (sgn | SPECIAL_ENCODING_MASK64);
1172 // add coeff, without leading bits
1173 mask = (mask >> 2) - 1;
1174 coeff &= mask;
1175 r |= coeff;
1176
1177 return r;
1178 }
1179
1180 //
1181 // No overflow/underflow checking or checking for coefficients above 2^53
1182 //
1183 __BID_INLINE__ BID_UINT64
very_fast_get_BID64_small_mantissa(BID_UINT64 sgn,int expon,BID_UINT64 coeff)1184 very_fast_get_BID64_small_mantissa (BID_UINT64 sgn, int expon, BID_UINT64 coeff) {
1185 // no UF/OF
1186 BID_UINT64 r;
1187
1188 r = expon;
1189 r <<= EXPONENT_SHIFT_SMALL64;
1190 r |= (coeff | sgn);
1191 return r;
1192 }
1193
1194
1195 //
1196 // This pack macro is used when underflow is known to occur
1197 //
1198 __BID_INLINE__ BID_UINT64
get_BID64_UF(BID_UINT64 sgn,int expon,BID_UINT64 coeff,BID_UINT64 R,int rmode,unsigned * fpsc)1199 get_BID64_UF (BID_UINT64 sgn, int expon, BID_UINT64 coeff, BID_UINT64 R, int rmode,
1200 unsigned *fpsc) {
1201 BID_UINT128 C128, Q_low, Stemp;
1202 BID_UINT64 _C64, remainder_h, QH, carry, CY;
1203 int extra_digits, amount, amount2;
1204 unsigned status;
1205
1206 // underflow
1207 if (expon + MAX_FORMAT_DIGITS < 0) {
1208 #ifdef BID_SET_STATUS_FLAGS
1209 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1210 #endif
1211 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1212 #ifndef IEEE_ROUND_NEAREST
1213 if (rmode == BID_ROUNDING_DOWN && sgn)
1214 return 0x8000000000000001ull;
1215 if (rmode == BID_ROUNDING_UP && !sgn)
1216 return 1ull;
1217 #endif
1218 #endif
1219 // result is 0
1220 return sgn;
1221 }
1222 // 10*coeff
1223 coeff = (coeff << 3) + (coeff << 1);
1224 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1225 #ifndef IEEE_ROUND_NEAREST
1226 if (sgn && (unsigned) (rmode - 1) < 2)
1227 rmode = 3 - rmode;
1228 #endif
1229 #endif
1230 if (R)
1231 coeff |= 1;
1232 // get digits to be shifted out
1233 extra_digits = 1 - expon;
1234 C128.w[0] = coeff + bid_round_const_table[rmode][extra_digits];
1235
1236 // get coeff*(2^M[extra_digits])/10^extra_digits
1237 __mul_64x128_full (QH, Q_low, C128.w[0],
1238 bid_reciprocals10_128[extra_digits]);
1239
1240 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1241 amount = bid_recip_scale[extra_digits];
1242
1243 _C64 = QH >> amount;
1244 //__shr_128(C128, Q_high, amount);
1245
1246 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1247 #ifndef IEEE_ROUND_NEAREST
1248 if (rmode == 0) //BID_ROUNDING_TO_NEAREST
1249 #endif
1250 if (_C64 & 1) {
1251 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1252
1253 // get remainder
1254 amount2 = 64 - amount;
1255 remainder_h = 0;
1256 remainder_h--;
1257 remainder_h >>= amount2;
1258 remainder_h = remainder_h & QH;
1259
1260 if (!remainder_h
1261 && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1262 || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1263 && Q_low.w[0] <
1264 bid_reciprocals10_128[extra_digits].w[0]))) {
1265 _C64--;
1266 }
1267 }
1268 #endif
1269
1270 #ifdef BID_SET_STATUS_FLAGS
1271
1272 if (is_inexact (fpsc))
1273 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
1274 else {
1275 status = BID_INEXACT_EXCEPTION;
1276 // get remainder
1277 remainder_h = QH << (64 - amount);
1278
1279 switch (rmode) {
1280 case BID_ROUNDING_TO_NEAREST:
1281 case BID_ROUNDING_TIES_AWAY:
1282 // test whether fractional part is 0
1283 if (remainder_h == 0x8000000000000000ull
1284 && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1285 || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1286 && Q_low.w[0] <
1287 bid_reciprocals10_128[extra_digits].w[0])))
1288 status = BID_EXACT_STATUS;
1289 break;
1290 case BID_ROUNDING_DOWN:
1291 case BID_ROUNDING_TO_ZERO:
1292 if (!remainder_h
1293 && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1294 || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1295 && Q_low.w[0] <
1296 bid_reciprocals10_128[extra_digits].w[0])))
1297 status = BID_EXACT_STATUS;
1298 break;
1299 default:
1300 // round up
1301 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1302 bid_reciprocals10_128[extra_digits].w[0]);
1303 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1304 bid_reciprocals10_128[extra_digits].w[1], CY);
1305 if ((remainder_h >> (64 - amount)) + carry >=
1306 (((BID_UINT64) 1) << amount))
1307 status = BID_EXACT_STATUS;
1308 }
1309
1310 if (status != BID_EXACT_STATUS)
1311 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
1312 }
1313
1314 #endif
1315
1316 return sgn | _C64;
1317
1318 }
1319
1320
1321
1322 //
1323 // This pack macro doesnot check for coefficients above 2^53
1324 //
1325 __BID_INLINE__ BID_UINT64
get_BID64_small_mantissa(BID_UINT64 sgn,int expon,BID_UINT64 coeff,int rmode,unsigned * fpsc)1326 get_BID64_small_mantissa (BID_UINT64 sgn, int expon, BID_UINT64 coeff,
1327 int rmode, unsigned *fpsc) {
1328 BID_UINT128 C128, Q_low, Stemp;
1329 BID_UINT64 r, mask, _C64, remainder_h, QH, carry, CY;
1330 int extra_digits, amount, amount2;
1331 unsigned status;
1332
1333 // check for possible underflow/overflow
1334 if (((unsigned) expon) >= 3 * 256) {
1335 if (expon < 0) {
1336 // underflow
1337 if (expon + MAX_FORMAT_DIGITS < 0) {
1338 #ifdef BID_SET_STATUS_FLAGS
1339 __set_status_flags (fpsc,
1340 BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1341 #endif
1342 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1343 #ifndef IEEE_ROUND_NEAREST
1344 if (rmode == BID_ROUNDING_DOWN && sgn)
1345 return 0x8000000000000001ull;
1346 if (rmode == BID_ROUNDING_UP && !sgn)
1347 return 1ull;
1348 #endif
1349 #endif
1350 // result is 0
1351 return sgn;
1352 }
1353 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1354 #ifndef IEEE_ROUND_NEAREST
1355 if (sgn && (unsigned) (rmode - 1) < 2)
1356 rmode = 3 - rmode;
1357 #endif
1358 #endif
1359 // get digits to be shifted out
1360 extra_digits = -expon;
1361 C128.w[0] = coeff + bid_round_const_table[rmode][extra_digits];
1362
1363 // get coeff*(2^M[extra_digits])/10^extra_digits
1364 __mul_64x128_full (QH, Q_low, C128.w[0],
1365 bid_reciprocals10_128[extra_digits]);
1366
1367 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1368 amount = bid_recip_scale[extra_digits];
1369
1370 _C64 = QH >> amount;
1371
1372 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1373 #ifndef IEEE_ROUND_NEAREST
1374 if (rmode == 0) //BID_ROUNDING_TO_NEAREST
1375 #endif
1376 if (_C64 & 1) {
1377 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1378
1379 // get remainder
1380 amount2 = 64 - amount;
1381 remainder_h = 0;
1382 remainder_h--;
1383 remainder_h >>= amount2;
1384 remainder_h = remainder_h & QH;
1385
1386 if (!remainder_h
1387 && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1388 || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1389 && Q_low.w[0] <
1390 bid_reciprocals10_128[extra_digits].w[0]))) {
1391 _C64--;
1392 }
1393 }
1394 #endif
1395
1396 #ifdef BID_SET_STATUS_FLAGS
1397
1398 if (is_inexact (fpsc))
1399 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
1400 else {
1401 status = BID_INEXACT_EXCEPTION;
1402 // get remainder
1403 remainder_h = QH << (64 - amount);
1404
1405 switch (rmode) {
1406 case BID_ROUNDING_TO_NEAREST:
1407 case BID_ROUNDING_TIES_AWAY:
1408 // test whether fractional part is 0
1409 if (remainder_h == 0x8000000000000000ull
1410 && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1411 || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1412 && Q_low.w[0] <
1413 bid_reciprocals10_128[extra_digits].w[0])))
1414 status = BID_EXACT_STATUS;
1415 break;
1416 case BID_ROUNDING_DOWN:
1417 case BID_ROUNDING_TO_ZERO:
1418 if (!remainder_h
1419 && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1420 || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1421 && Q_low.w[0] <
1422 bid_reciprocals10_128[extra_digits].w[0])))
1423 status = BID_EXACT_STATUS;
1424 break;
1425 default:
1426 // round up
1427 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1428 bid_reciprocals10_128[extra_digits].w[0]);
1429 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1430 bid_reciprocals10_128[extra_digits].w[1], CY);
1431 if ((remainder_h >> (64 - amount)) + carry >=
1432 (((BID_UINT64) 1) << amount))
1433 status = BID_EXACT_STATUS;
1434 }
1435
1436 if (status != BID_EXACT_STATUS)
1437 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
1438 }
1439
1440 #endif
1441
1442 return sgn | _C64;
1443 }
1444
1445 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1446 expon--;
1447 coeff = (coeff << 3) + (coeff << 1);
1448 }
1449 if (expon > DECIMAL_MAX_EXPON_64) {
1450 #ifdef BID_SET_STATUS_FLAGS
1451 __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1452 #endif
1453 // overflow
1454 r = sgn | INFINITY_MASK64;
1455 switch (rmode) {
1456 case BID_ROUNDING_DOWN:
1457 if (!sgn)
1458 r = LARGEST_BID64;
1459 break;
1460 case BID_ROUNDING_TO_ZERO:
1461 r = sgn | LARGEST_BID64;
1462 break;
1463 case BID_ROUNDING_UP:
1464 // round up
1465 if (sgn)
1466 r = SMALLEST_BID64;
1467 }
1468 return r;
1469 } else {
1470 mask = 1;
1471 mask <<= EXPONENT_SHIFT_SMALL64;
1472 if (coeff >= mask) {
1473 r = expon;
1474 r <<= EXPONENT_SHIFT_LARGE64;
1475 r |= (sgn | SPECIAL_ENCODING_MASK64);
1476 // add coeff, without leading bits
1477 mask = (mask >> 2) - 1;
1478 coeff &= mask;
1479 r |= coeff;
1480 return r;
1481 }
1482 }
1483 }
1484
1485 r = expon;
1486 r <<= EXPONENT_SHIFT_SMALL64;
1487 r |= (coeff | sgn);
1488
1489 return r;
1490 }
1491
1492
1493 /*****************************************************************************
1494 *
1495 * BID128 pack/unpack macros
1496 *
1497 *****************************************************************************/
1498
1499 //
1500 // Macro for handling BID128 underflow
1501 // sticky bit given as additional argument
1502 //
1503 __BID_INLINE__ BID_UINT128 *
bid_handle_UF_128_rem(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 CQ,BID_UINT64 R,unsigned * prounding_mode,unsigned * fpsc)1504 bid_handle_UF_128_rem (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 CQ,
1505 BID_UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1506 BID_UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1507 BID_UINT64 carry, CY;
1508 int ed2, amount;
1509 unsigned rmode, status;
1510
1511 // UF occurs
1512 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1513 #ifdef BID_SET_STATUS_FLAGS
1514 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1515 #endif
1516 pres->w[1] = sgn;
1517 pres->w[0] = 0;
1518 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1519 #ifndef IEEE_ROUND_NEAREST
1520 if ((sgn && *prounding_mode == BID_ROUNDING_DOWN)
1521 || (!sgn && *prounding_mode == BID_ROUNDING_UP))
1522 pres->w[0] = 1ull;
1523 #endif
1524 #endif
1525 return pres;
1526 }
1527 // CQ *= 10
1528 CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1529 CQ2.w[0] = CQ.w[0] << 1;
1530 CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1531 CQ8.w[0] = CQ.w[0] << 3;
1532 __add_128_128 (CQ, CQ2, CQ8);
1533
1534 // add remainder
1535 if (R)
1536 CQ.w[0] |= 1;
1537
1538 ed2 = 1 - expon;
1539 // add rounding constant to CQ
1540 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1541 #ifndef IEEE_ROUND_NEAREST
1542 rmode = *prounding_mode;
1543 if (sgn && (unsigned) (rmode - 1) < 2)
1544 rmode = 3 - rmode;
1545 #else
1546 rmode = 0;
1547 #endif
1548 #else
1549 rmode = 0;
1550 #endif
1551 T128 = bid_round_const_table_128[rmode][ed2];
1552 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1553 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1554
1555 TP128 = bid_reciprocals10_128[ed2];
1556 __mul_128x128_full (Qh, Ql, CQ, TP128);
1557 amount = bid_recip_scale[ed2];
1558
1559 if (amount >= 64) {
1560 CQ.w[0] = Qh.w[1] >> (amount - 64);
1561 CQ.w[1] = 0;
1562 } else {
1563 __shr_128 (CQ, Qh, amount);
1564 }
1565
1566 expon = 0;
1567
1568 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1569 #ifndef IEEE_ROUND_NEAREST
1570 if (!(*prounding_mode))
1571 #endif
1572 if (CQ.w[0] & 1) {
1573 // check whether fractional part of initial_P/10^ed1 is exactly .5
1574
1575 // get remainder
1576 __shl_128_long (Qh1, Qh, (128 - amount));
1577
1578 if (!Qh1.w[1] && !Qh1.w[0]
1579 && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1580 || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1581 && Ql.w[0] < bid_reciprocals10_128[ed2].w[0]))) {
1582 CQ.w[0]--;
1583 }
1584 }
1585 #endif
1586
1587 #ifdef BID_SET_STATUS_FLAGS
1588
1589 if (is_inexact (fpsc))
1590 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
1591 else {
1592 status = BID_INEXACT_EXCEPTION;
1593 // get remainder
1594 __shl_128_long (Qh1, Qh, (128 - amount));
1595
1596 switch (rmode) {
1597 case BID_ROUNDING_TO_NEAREST:
1598 case BID_ROUNDING_TIES_AWAY:
1599 // test whether fractional part is 0
1600 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1601 && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1602 || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1603 && Ql.w[0] < bid_reciprocals10_128[ed2].w[0])))
1604 status = BID_EXACT_STATUS;
1605 break;
1606 case BID_ROUNDING_DOWN:
1607 case BID_ROUNDING_TO_ZERO:
1608 if ((!Qh1.w[1]) && (!Qh1.w[0])
1609 && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1610 || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1611 && Ql.w[0] < bid_reciprocals10_128[ed2].w[0])))
1612 status = BID_EXACT_STATUS;
1613 break;
1614 default:
1615 // round up
1616 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1617 bid_reciprocals10_128[ed2].w[0]);
1618 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1619 bid_reciprocals10_128[ed2].w[1], CY);
1620 __shr_128_long (Qh, Qh1, (128 - amount));
1621 Tmp.w[0] = 1;
1622 Tmp.w[1] = 0;
1623 __shl_128_long (Tmp1, Tmp, amount);
1624 Qh.w[0] += carry;
1625 if (Qh.w[0] < carry)
1626 Qh.w[1]++;
1627 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1628 status = BID_EXACT_STATUS;
1629 }
1630
1631 if (status != BID_EXACT_STATUS)
1632 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
1633 }
1634
1635 #endif
1636
1637 pres->w[1] = sgn | CQ.w[1];
1638 pres->w[0] = CQ.w[0];
1639
1640 return pres;
1641
1642 }
1643
1644
1645 //
1646 // Macro for handling BID128 underflow
1647 //
1648 __BID_INLINE__ BID_UINT128 *
handle_UF_128(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 CQ,unsigned * prounding_mode,unsigned * fpsc)1649 handle_UF_128 (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 CQ,
1650 unsigned *prounding_mode, unsigned *fpsc) {
1651 BID_UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1652 BID_UINT64 carry, CY;
1653 int ed2, amount;
1654 unsigned rmode, status;
1655
1656 // UF occurs
1657 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1658 #ifdef BID_SET_STATUS_FLAGS
1659 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1660 #endif
1661 pres->w[1] = sgn;
1662 pres->w[0] = 0;
1663 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1664 #ifndef IEEE_ROUND_NEAREST
1665 if ((sgn && *prounding_mode == BID_ROUNDING_DOWN)
1666 || (!sgn && *prounding_mode == BID_ROUNDING_UP))
1667 pres->w[0] = 1ull;
1668 #endif
1669 #endif
1670 return pres;
1671 }
1672
1673 ed2 = 0 - expon;
1674 // add rounding constant to CQ
1675 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1676 #ifndef IEEE_ROUND_NEAREST
1677 rmode = *prounding_mode;
1678 if (sgn && (unsigned) (rmode - 1) < 2)
1679 rmode = 3 - rmode;
1680 #else
1681 rmode = 0;
1682 #endif
1683 #else
1684 rmode = 0;
1685 #endif
1686
1687 T128 = bid_round_const_table_128[rmode][ed2];
1688 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1689 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1690
1691 TP128 = bid_reciprocals10_128[ed2];
1692 __mul_128x128_full (Qh, Ql, CQ, TP128);
1693 amount = bid_recip_scale[ed2];
1694
1695 if (amount >= 64) {
1696 CQ.w[0] = Qh.w[1] >> (amount - 64);
1697 CQ.w[1] = 0;
1698 } else {
1699 __shr_128 (CQ, Qh, amount);
1700 }
1701
1702 expon = 0;
1703
1704 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1705 #ifndef IEEE_ROUND_NEAREST
1706 if (!(*prounding_mode))
1707 #endif
1708 if (CQ.w[0] & 1) {
1709 // check whether fractional part of initial_P/10^ed1 is exactly .5
1710
1711 // get remainder
1712 __shl_128_long (Qh1, Qh, (128 - amount));
1713
1714 if (!Qh1.w[1] && !Qh1.w[0]
1715 && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1716 || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1717 && Ql.w[0] < bid_reciprocals10_128[ed2].w[0]))) {
1718 CQ.w[0]--;
1719 }
1720 }
1721 #endif
1722
1723 #ifdef BID_SET_STATUS_FLAGS
1724
1725 if (is_inexact (fpsc))
1726 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
1727 else {
1728 status = BID_INEXACT_EXCEPTION;
1729 // get remainder
1730 __shl_128_long (Qh1, Qh, (128 - amount));
1731
1732 switch (rmode) {
1733 case BID_ROUNDING_TO_NEAREST:
1734 case BID_ROUNDING_TIES_AWAY:
1735 // test whether fractional part is 0
1736 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1737 && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1738 || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1739 && Ql.w[0] < bid_reciprocals10_128[ed2].w[0])))
1740 status = BID_EXACT_STATUS;
1741 break;
1742 case BID_ROUNDING_DOWN:
1743 case BID_ROUNDING_TO_ZERO:
1744 if ((!Qh1.w[1]) && (!Qh1.w[0])
1745 && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1746 || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1747 && Ql.w[0] < bid_reciprocals10_128[ed2].w[0])))
1748 status = BID_EXACT_STATUS;
1749 break;
1750 default:
1751 // round up
1752 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1753 bid_reciprocals10_128[ed2].w[0]);
1754 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1755 bid_reciprocals10_128[ed2].w[1], CY);
1756 __shr_128_long (Qh, Qh1, (128 - amount));
1757 Tmp.w[0] = 1;
1758 Tmp.w[1] = 0;
1759 __shl_128_long (Tmp1, Tmp, amount);
1760 Qh.w[0] += carry;
1761 if (Qh.w[0] < carry)
1762 Qh.w[1]++;
1763 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1764 status = BID_EXACT_STATUS;
1765 }
1766
1767 if (status != BID_EXACT_STATUS)
1768 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
1769 }
1770
1771 #endif
1772
1773 pres->w[1] = sgn | CQ.w[1];
1774 pres->w[0] = CQ.w[0];
1775
1776 return pres;
1777
1778 }
1779
1780
1781
1782 //
1783 // BID128 unpack, input passed by value (used in transcendental functions only)
1784 //
1785 __BID_INLINE__ BID_UINT64
unpack_BID128_value_BLE(BID_UINT64 * psign_x,int * pexponent_x,BID_UINT128 * pcoefficient_x,BID_UINT128 x)1786 unpack_BID128_value_BLE (BID_UINT64 * psign_x, int *pexponent_x,
1787 BID_UINT128 * pcoefficient_x, BID_UINT128 x) {
1788 BID_UINT128 coeff, T33, T34;
1789 BID_UINT64 ex;
1790
1791 *psign_x = (x.w[BID_HIGH_128W]) & 0x8000000000000000ull;
1792
1793 // special encodings
1794 if ((x.w[BID_HIGH_128W] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1795 if ((x.w[BID_HIGH_128W] & INFINITY_MASK64) < INFINITY_MASK64) {
1796 // non-canonical input
1797 pcoefficient_x->w[BID_LOW_128W] = 0;
1798 pcoefficient_x->w[BID_HIGH_128W] = 0;
1799 ex = (x.w[BID_HIGH_128W]) >> 47;
1800 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1801 return 0;
1802 }
1803 // 10^33
1804 T33 = bid_power10_table_128[33];
1805
1806 pcoefficient_x->w[BID_LOW_128W] = x.w[BID_LOW_128W];
1807 pcoefficient_x->w[BID_HIGH_128W] = (x.w[BID_HIGH_128W]) & 0x00003fffffffffffull;
1808 if ((pcoefficient_x->w[BID_HIGH_128W]>T33.w[1]) || ((pcoefficient_x->w[BID_HIGH_128W]==T33.w[1]) && (pcoefficient_x->w[BID_LOW_128W]>=T33.w[0]))) // non-canonical
1809 {
1810 pcoefficient_x->w[BID_HIGH_128W] = (x.w[BID_HIGH_128W]) & 0xfe00000000000000ull;
1811 pcoefficient_x->w[BID_LOW_128W] = 0;
1812 } else
1813 pcoefficient_x->w[BID_HIGH_128W] = (x.w[BID_HIGH_128W]) & 0xfe003fffffffffffull;
1814 if ((x.w[BID_HIGH_128W] & NAN_MASK64) == INFINITY_MASK64) {
1815 pcoefficient_x->w[BID_LOW_128W] = 0;
1816 pcoefficient_x->w[BID_HIGH_128W] = x.w[BID_HIGH_128W] & SINFINITY_MASK64;
1817 }
1818 *pexponent_x = 0;
1819 return 0; // NaN or Infinity
1820 }
1821
1822 coeff.w[BID_LOW_128W] = x.w[BID_LOW_128W];
1823 coeff.w[BID_HIGH_128W] = (x.w[BID_HIGH_128W]) & SMALL_COEFF_MASK128;
1824
1825 // 10^34
1826 T34 = bid_power10_table_128[34];
1827 // check for non-canonical values
1828 if ((coeff.w[BID_HIGH_128W]>T34.w[1]) || ((coeff.w[BID_HIGH_128W]==T34.w[1]) && (coeff.w[BID_LOW_128W]>=T34.w[0]))) {
1829 coeff.w[BID_LOW_128W] = coeff.w[BID_HIGH_128W] = 0;}
1830
1831 pcoefficient_x->w[BID_LOW_128W] = coeff.w[BID_LOW_128W];
1832 pcoefficient_x->w[BID_HIGH_128W] = coeff.w[BID_HIGH_128W];
1833
1834 ex = (x.w[BID_HIGH_128W]) >> 49;
1835 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1836
1837 return coeff.w[BID_LOW_128W] | coeff.w[BID_HIGH_128W];
1838 }
1839
1840
1841
1842 //
1843 // BID128 unpack, input passed by value
1844 //
1845 __BID_INLINE__ BID_UINT64
unpack_BID128_value(BID_UINT64 * psign_x,int * pexponent_x,BID_UINT128 * pcoefficient_x,BID_UINT128 x)1846 unpack_BID128_value (BID_UINT64 * psign_x, int *pexponent_x,
1847 BID_UINT128 * pcoefficient_x, BID_UINT128 x) {
1848 BID_UINT128 coeff, T33, T34;
1849 BID_UINT64 ex;
1850
1851 *psign_x = (x.w[1]) & 0x8000000000000000ull;
1852
1853 // special encodings
1854 if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1855 if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1856 // non-canonical input
1857 pcoefficient_x->w[0] = 0;
1858 pcoefficient_x->w[1] = 0;
1859 ex = (x.w[1]) >> 47;
1860 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1861 return 0;
1862 }
1863 // 10^33
1864 T33 = bid_power10_table_128[33];
1865 /*coeff.w[0] = x.w[0];
1866 coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1867 pcoefficient_x->w[0] = x.w[0];
1868 pcoefficient_x->w[1] = x.w[1];
1869 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1870 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1871
1872 pcoefficient_x->w[0] = x.w[0];
1873 pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1874 if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33)) // non-canonical
1875 {
1876 pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1877 pcoefficient_x->w[0] = 0;
1878 } else
1879 pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1880 if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1881 pcoefficient_x->w[0] = 0;
1882 pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1883 }
1884 *pexponent_x = 0;
1885 return 0; // NaN or Infinity
1886 }
1887
1888 coeff.w[0] = x.w[0];
1889 coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1890
1891 // 10^34
1892 T34 = bid_power10_table_128[34];
1893 // check for non-canonical values
1894 if (__unsigned_compare_ge_128 (coeff, T34))
1895 coeff.w[0] = coeff.w[1] = 0;
1896
1897 pcoefficient_x->w[0] = coeff.w[0];
1898 pcoefficient_x->w[1] = coeff.w[1];
1899
1900 ex = (x.w[1]) >> 49;
1901 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1902
1903 return coeff.w[0] | coeff.w[1];
1904 }
1905
1906 //
1907 // BID128 unpack, input passed by value
1908 //
1909 __BID_INLINE__ void
quick_unpack_BID128_em(int * pexponent_x,BID_UINT128 * pcoefficient_x,BID_UINT128 x)1910 quick_unpack_BID128_em (int *pexponent_x,
1911 BID_UINT128 * pcoefficient_x, BID_UINT128 x) {
1912 BID_UINT64 ex;
1913
1914 pcoefficient_x->w[0] = x.w[0];
1915 pcoefficient_x->w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1916
1917 ex = (x.w[1]) >> 49;
1918 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1919
1920 }
1921
1922
1923 //
1924 // BID128 unpack, input pased by reference
1925 //
1926 __BID_INLINE__ BID_UINT64
unpack_BID128(BID_UINT64 * psign_x,int * pexponent_x,BID_UINT128 * pcoefficient_x,BID_UINT128 * px)1927 unpack_BID128 (BID_UINT64 * psign_x, int *pexponent_x,
1928 BID_UINT128 * pcoefficient_x, BID_UINT128 * px) {
1929 BID_UINT128 coeff, T33, T34;
1930 BID_UINT64 ex;
1931
1932 *psign_x = (px->w[1]) & 0x8000000000000000ull;
1933
1934 // special encodings
1935 if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1936 if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1937 // non-canonical input
1938 pcoefficient_x->w[0] = 0;
1939 pcoefficient_x->w[1] = 0;
1940 ex = (px->w[1]) >> 47;
1941 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1942 return 0;
1943 }
1944 // 10^33
1945 T33 = bid_power10_table_128[33];
1946 coeff.w[0] = px->w[0];
1947 coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1948 pcoefficient_x->w[0] = px->w[0];
1949 pcoefficient_x->w[1] = px->w[1];
1950 if (__unsigned_compare_ge_128 (coeff, T33)) { // non-canonical
1951 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1952 pcoefficient_x->w[0] = 0;
1953 }
1954 *pexponent_x = 0;
1955 return 0; // NaN or Infinity
1956 }
1957
1958 coeff.w[0] = px->w[0];
1959 coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1960
1961 // 10^34
1962 T34 = bid_power10_table_128[34];
1963 // check for non-canonical values
1964 if (__unsigned_compare_ge_128 (coeff, T34))
1965 coeff.w[0] = coeff.w[1] = 0;
1966
1967 pcoefficient_x->w[0] = coeff.w[0];
1968 pcoefficient_x->w[1] = coeff.w[1];
1969
1970 ex = (px->w[1]) >> 49;
1971 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1972
1973 return coeff.w[0] | coeff.w[1];
1974 }
1975
1976 //
1977 // Pack macro checks for overflow, but not underflow
1978 //
1979 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_very_fast_OF(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1980 bid_get_BID128_very_fast_OF (BID_UINT128 * pres, BID_UINT64 sgn, int expon,
1981 BID_UINT128 coeff, unsigned *prounding_mode,
1982 unsigned *fpsc) {
1983 BID_UINT128 T;
1984 BID_UINT64 tmp, tmp2;
1985
1986 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1987
1988 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1989 T = bid_power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1990 while (__unsigned_compare_gt_128 (T, coeff)
1991 && expon > DECIMAL_MAX_EXPON_128) {
1992 coeff.w[1] =
1993 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1994 (coeff.w[0] >> 63);
1995 tmp2 = coeff.w[0] << 3;
1996 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1997 if (coeff.w[0] < tmp2)
1998 coeff.w[1]++;
1999
2000 expon--;
2001 }
2002 }
2003 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2004 // OF
2005 #ifdef BID_SET_STATUS_FLAGS
2006 __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2007 #endif
2008 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2009 #ifndef IEEE_ROUND_NEAREST
2010 if (*prounding_mode == BID_ROUNDING_TO_ZERO
2011 || (sgn && *prounding_mode == BID_ROUNDING_UP) || (!sgn
2012 &&
2013 *prounding_mode
2014 ==
2015 BID_ROUNDING_DOWN))
2016 {
2017 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2018 pres->w[0] = LARGEST_BID128_LOW;
2019 } else
2020 #endif
2021 #endif
2022 {
2023 pres->w[1] = sgn | INFINITY_MASK64;
2024 pres->w[0] = 0;
2025 }
2026 return pres;
2027 }
2028 }
2029
2030 pres->w[0] = coeff.w[0];
2031 tmp = expon;
2032 tmp <<= 49;
2033 pres->w[1] = sgn | tmp | coeff.w[1];
2034
2035 return pres;
2036 }
2037
2038
2039 //
2040 // No overflow/underflow checks
2041 // No checking for coefficient == 10^34 (rounding artifact)
2042 //
2043 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_very_fast(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff)2044 bid_get_BID128_very_fast (BID_UINT128 * pres, BID_UINT64 sgn, int expon,
2045 BID_UINT128 coeff) {
2046 BID_UINT64 tmp;
2047
2048 pres->w[0] = coeff.w[0];
2049 tmp = expon;
2050 tmp <<= 49;
2051 pres->w[1] = sgn | tmp | coeff.w[1];
2052
2053 return pres;
2054 }
2055
2056 // same as above, but for either endian mode
2057 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_very_fast_BLE(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff)2058 bid_get_BID128_very_fast_BLE (BID_UINT128 * pres, BID_UINT64 sgn, int expon,
2059 BID_UINT128 coeff) {
2060 BID_UINT64 tmp;
2061
2062 pres->w[BID_LOW_128W] = coeff.w[BID_LOW_128W];
2063 tmp = expon;
2064 tmp <<= 49;
2065 pres->w[BID_HIGH_128W] = sgn | tmp | coeff.w[BID_HIGH_128W];
2066
2067 return pres;
2068 }
2069
2070 //
2071 // No overflow/underflow checks
2072 //
2073 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_fast(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff)2074 bid_get_BID128_fast (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 coeff) {
2075 BID_UINT64 tmp;
2076
2077 // coeff==10^34?
2078 if (coeff.w[1] == 0x0001ed09bead87c0ull
2079 && coeff.w[0] == 0x378d8e6400000000ull) {
2080 expon++;
2081 // set coefficient to 10^33
2082 coeff.w[1] = 0x0000314dc6448d93ull;
2083 coeff.w[0] = 0x38c15b0a00000000ull;
2084 }
2085
2086 pres->w[0] = coeff.w[0];
2087 tmp = expon;
2088 tmp <<= 49;
2089 pres->w[1] = sgn | tmp | coeff.w[1];
2090
2091 return pres;
2092 }
2093
2094 //
2095 // General BID128 pack macro
2096 //
2097 __BID_INLINE__ BID_UINT128 *
bid_get_BID128(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)2098 bid_get_BID128 (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 coeff,
2099 unsigned *prounding_mode, unsigned *fpsc) {
2100 BID_UINT128 T;
2101 BID_UINT64 tmp, tmp2;
2102
2103 // coeff==10^34?
2104 if (coeff.w[1] == 0x0001ed09bead87c0ull
2105 && coeff.w[0] == 0x378d8e6400000000ull) {
2106 expon++;
2107 // set coefficient to 10^33
2108 coeff.w[1] = 0x0000314dc6448d93ull;
2109 coeff.w[0] = 0x38c15b0a00000000ull;
2110 }
2111 // check OF, UF
2112 if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
2113 // check UF
2114 if (expon < 0) {
2115 return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
2116 fpsc);
2117 }
2118
2119 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
2120 T = bid_power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
2121 while (__unsigned_compare_gt_128 (T, coeff)
2122 && expon > DECIMAL_MAX_EXPON_128) {
2123 coeff.w[1] =
2124 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2125 (coeff.w[0] >> 63);
2126 tmp2 = coeff.w[0] << 3;
2127 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2128 if (coeff.w[0] < tmp2)
2129 coeff.w[1]++;
2130
2131 expon--;
2132 }
2133 }
2134 if (expon > DECIMAL_MAX_EXPON_128) {
2135 if (!(coeff.w[1] | coeff.w[0])) {
2136 pres->w[1] = sgn | (((BID_UINT64) DECIMAL_MAX_EXPON_128) << 49);
2137 pres->w[0] = 0;
2138 return pres;
2139 }
2140 // OF
2141 #ifdef BID_SET_STATUS_FLAGS
2142 __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2143 #endif
2144 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2145 #ifndef IEEE_ROUND_NEAREST
2146 if (*prounding_mode == BID_ROUNDING_TO_ZERO
2147 || (sgn && *prounding_mode == BID_ROUNDING_UP) || (!sgn
2148 &&
2149 *prounding_mode
2150 ==
2151 BID_ROUNDING_DOWN))
2152 {
2153 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2154 pres->w[0] = LARGEST_BID128_LOW;
2155 } else
2156 #endif
2157 #endif
2158 {
2159 pres->w[1] = sgn | INFINITY_MASK64;
2160 pres->w[0] = 0;
2161 }
2162 return pres;
2163 }
2164 }
2165
2166 pres->w[0] = coeff.w[0];
2167 tmp = expon;
2168 tmp <<= 49;
2169 pres->w[1] = sgn | tmp | coeff.w[1];
2170
2171 return pres;
2172 }
2173
2174
2175 //
2176 // Macro used for conversions from string
2177 // (no additional arguments given for rounding mode, status flags)
2178 //
2179 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_string(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff)2180 bid_get_BID128_string (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 coeff) {
2181 BID_UINT128 D2, D8;
2182 BID_UINT64 tmp;
2183 unsigned rmode = 0, status;
2184
2185 // coeff==10^34?
2186 if (coeff.w[1] == 0x0001ed09bead87c0ull
2187 && coeff.w[0] == 0x378d8e6400000000ull) {
2188 expon++;
2189 // set coefficient to 10^33
2190 coeff.w[1] = 0x0000314dc6448d93ull;
2191 coeff.w[0] = 0x38c15b0a00000000ull;
2192 }
2193 // check OF, UF
2194 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2195 // check UF
2196 if (expon < 0)
2197 return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2198
2199 // OF
2200
2201 if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2202 while (expon > DECIMAL_MAX_EXPON_128 &&
2203 (coeff.w[1] < bid_power10_table_128[33].w[1] ||
2204 (coeff.w[1] == bid_power10_table_128[33].w[1]
2205 && coeff.w[0] < bid_power10_table_128[33].w[0]))) {
2206 D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2207 D2.w[0] = coeff.w[0] << 1;
2208 D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2209 D8.w[0] = coeff.w[0] << 3;
2210
2211 __add_128_128 (coeff, D2, D8);
2212 expon--;
2213 }
2214 } else if (!(coeff.w[0] | coeff.w[1]))
2215 expon = DECIMAL_MAX_EXPON_128;
2216
2217 if (expon > DECIMAL_MAX_EXPON_128) {
2218 pres->w[1] = sgn | INFINITY_MASK64;
2219 pres->w[0] = 0;
2220 switch (rmode) {
2221 case BID_ROUNDING_DOWN:
2222 if (!sgn) {
2223 pres->w[1] = LARGEST_BID128_HIGH;
2224 pres->w[0] = LARGEST_BID128_LOW;
2225 }
2226 break;
2227 case BID_ROUNDING_TO_ZERO:
2228 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2229 pres->w[0] = LARGEST_BID128_LOW;
2230 break;
2231 case BID_ROUNDING_UP:
2232 // round up
2233 if (sgn) {
2234 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2235 pres->w[0] = LARGEST_BID128_LOW;
2236 }
2237 break;
2238 }
2239
2240 return pres;
2241 }
2242 }
2243
2244 pres->w[0] = coeff.w[0];
2245 tmp = expon;
2246 tmp <<= 49;
2247 pres->w[1] = sgn | tmp | coeff.w[1];
2248
2249 return pres;
2250 }
2251
2252
2253
2254 /*****************************************************************************
2255 *
2256 * BID32 pack/unpack macros
2257 *
2258 *****************************************************************************/
2259
2260
2261 __BID_INLINE__ BID_UINT32
unpack_BID32(BID_UINT32 * psign_x,int * pexponent_x,BID_UINT32 * pcoefficient_x,BID_UINT32 x)2262 unpack_BID32 (BID_UINT32 * psign_x, int *pexponent_x,
2263 BID_UINT32 * pcoefficient_x, BID_UINT32 x) {
2264 BID_UINT32 tmp;
2265
2266 *psign_x = x & 0x80000000;
2267
2268 if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2269 // special encodings
2270 if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2271 *pcoefficient_x = x & 0xfe0fffff;
2272 if ((x & 0x000fffff) >= 1000000)
2273 *pcoefficient_x = x & 0xfe000000;
2274 if ((x & NAN_MASK32) == INFINITY_MASK32)
2275 *pcoefficient_x = x & 0xf8000000;
2276 *pexponent_x = 0;
2277 return 0; // NaN or Infinity
2278 }
2279 // coefficient
2280 *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2281 // check for non-canonical value
2282 if (*pcoefficient_x >= 10000000)
2283 *pcoefficient_x = 0;
2284 // get exponent
2285 tmp = x >> 21;
2286 *pexponent_x = tmp & EXPONENT_MASK32;
2287 return *pcoefficient_x;
2288 }
2289 // exponent
2290 tmp = x >> 23;
2291 *pexponent_x = tmp & EXPONENT_MASK32;
2292 // coefficient
2293 *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2294
2295 return *pcoefficient_x;
2296 }
2297
2298 //
2299 // General pack macro for BID32
2300 //
2301 __BID_INLINE__ BID_UINT32
get_BID32(BID_UINT32 sgn,int expon,BID_UINT64 coeff,int rmode,unsigned * fpsc)2302 get_BID32 (BID_UINT32 sgn, int expon, BID_UINT64 coeff, int rmode,
2303 unsigned *fpsc) {
2304 BID_UINT128 Q;
2305 BID_UINT64 _C64, remainder_h, carry, Stemp;
2306 BID_UINT32 r, mask;
2307 int extra_digits, amount, amount2;
2308 unsigned status;
2309
2310 if (coeff > 9999999ull) {
2311 expon++;
2312 coeff = 1000000ull;
2313 }
2314 // check for possible underflow/overflow
2315 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2316 if (expon < 0) {
2317 // underflow
2318 if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2319 #ifdef BID_SET_STATUS_FLAGS
2320 __set_status_flags (fpsc,
2321 BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2322 #endif
2323 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2324 #ifndef IEEE_ROUND_NEAREST
2325 if (rmode == BID_ROUNDING_DOWN && sgn)
2326 return 0x80000001;
2327 if (rmode == BID_ROUNDING_UP && !sgn)
2328 return 1;
2329 #endif
2330 #endif
2331 // result is 0
2332 return sgn;
2333 }
2334 // get digits to be shifted out
2335 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2336 rmode = 0;
2337 #endif
2338 #ifdef IEEE_ROUND_NEAREST
2339 rmode = 0;
2340 #endif
2341 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2342 #ifndef IEEE_ROUND_NEAREST
2343 if (sgn && (unsigned) (rmode - 1) < 2)
2344 rmode = 3 - rmode;
2345 #endif
2346 #endif
2347
2348 extra_digits = -expon;
2349 coeff += bid_round_const_table[rmode][extra_digits];
2350
2351 // get coeff*(2^M[extra_digits])/10^extra_digits
2352 __mul_64x64_to_128 (Q, coeff, bid_reciprocals10_64[extra_digits]);
2353
2354 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2355 amount = bid_short_recip_scale[extra_digits];
2356
2357 _C64 = Q.w[1] >> amount;
2358
2359 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2360 #ifndef IEEE_ROUND_NEAREST
2361 if (rmode == 0) //BID_ROUNDING_TO_NEAREST
2362 #endif
2363 if (_C64 & 1) {
2364 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2365
2366 // get remainder
2367 amount2 = 64 - amount;
2368 remainder_h = 0;
2369 remainder_h--;
2370 remainder_h >>= amount2;
2371 remainder_h = remainder_h & Q.w[1];
2372
2373 if (!remainder_h && (Q.w[0] < bid_reciprocals10_64[extra_digits])) {
2374 _C64--;
2375 }
2376 }
2377 #endif
2378
2379 #ifdef BID_SET_STATUS_FLAGS
2380
2381 if (is_inexact (fpsc))
2382 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
2383 else {
2384 status = BID_INEXACT_EXCEPTION;
2385 // get remainder
2386 remainder_h = Q.w[1] << (64 - amount);
2387
2388 switch (rmode) {
2389 case BID_ROUNDING_TO_NEAREST:
2390 case BID_ROUNDING_TIES_AWAY:
2391 // test whether fractional part is 0
2392 if (remainder_h == 0x8000000000000000ull
2393 && (Q.w[0] < bid_reciprocals10_64[extra_digits]))
2394 status = BID_EXACT_STATUS;
2395 break;
2396 case BID_ROUNDING_DOWN:
2397 case BID_ROUNDING_TO_ZERO:
2398 if (!remainder_h && (Q.w[0] < bid_reciprocals10_64[extra_digits]))
2399 status = BID_EXACT_STATUS;
2400 break;
2401 default:
2402 // round up
2403 __add_carry_out (Stemp, carry, Q.w[0],
2404 bid_reciprocals10_64[extra_digits]);
2405 if ((remainder_h >> (64 - amount)) + carry >=
2406 (((BID_UINT64) 1) << amount))
2407 status = BID_EXACT_STATUS;
2408 }
2409
2410 if (status != BID_EXACT_STATUS)
2411 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
2412 }
2413
2414 #endif
2415
2416 return sgn | (BID_UINT32) _C64;
2417 }
2418
2419 if(!coeff) { if(expon > DECIMAL_MAX_EXPON_32) expon = DECIMAL_MAX_EXPON_32; }
2420 while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2421 coeff = (coeff << 3) + (coeff << 1);
2422 expon--;
2423 }
2424 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2425 __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2426 // overflow
2427 r = sgn | INFINITY_MASK32;
2428 switch (rmode) {
2429 case BID_ROUNDING_DOWN:
2430 if (!sgn)
2431 r = LARGEST_BID32;
2432 break;
2433 case BID_ROUNDING_TO_ZERO:
2434 r = sgn | LARGEST_BID32;
2435 break;
2436 case BID_ROUNDING_UP:
2437 // round up
2438 if (sgn)
2439 r = sgn | LARGEST_BID32;
2440 }
2441 return r;
2442 }
2443 }
2444
2445 mask = 1 << 23;
2446
2447 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2448 if (coeff < mask) {
2449 r = expon;
2450 r <<= 23;
2451 r |= ((BID_UINT32) coeff | sgn);
2452 return r;
2453 }
2454 // special format
2455
2456 r = expon;
2457 r <<= 21;
2458 r |= (sgn | SPECIAL_ENCODING_MASK32);
2459 // add coeff, without leading bits
2460 mask = (1 << 21) - 1;
2461 r |= (((BID_UINT32) coeff) & mask);
2462
2463 return r;
2464 }
2465
2466
2467
2468
2469
2470 //
2471 // General pack macro for BID32
2472 //
2473 __BID_INLINE__ BID_UINT32
get_BID32_UF(BID_UINT32 sgn,int expon,BID_UINT64 coeff,BID_UINT32 R,int rmode,unsigned * fpsc)2474 get_BID32_UF (BID_UINT32 sgn, int expon, BID_UINT64 coeff, BID_UINT32 R, int rmode,
2475 unsigned *fpsc) {
2476 BID_UINT128 Q;
2477 BID_UINT64 _C64, remainder_h, carry, Stemp;
2478 BID_UINT32 r, mask;
2479 int extra_digits, amount, amount2;
2480 unsigned status;
2481
2482 if (coeff > 9999999ull) {
2483 expon++;
2484 coeff = 1000000ull;
2485 }
2486 // check for possible underflow/overflow
2487 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2488 if (expon < 0) {
2489 // underflow
2490 if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2491 #ifdef BID_SET_STATUS_FLAGS
2492 __set_status_flags (fpsc,
2493 BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2494 #endif
2495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2496 #ifndef IEEE_ROUND_NEAREST
2497 if (rmode == BID_ROUNDING_DOWN && sgn)
2498 return 0x80000001;
2499 if (rmode == BID_ROUNDING_UP && !sgn)
2500 return 1;
2501 #endif
2502 #endif
2503 // result is 0
2504 return sgn;
2505 }
2506 // get digits to be shifted out
2507 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2508 rmode = 0;
2509 #endif
2510 #ifdef IEEE_ROUND_NEAREST
2511 rmode = 0;
2512 #endif
2513 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2514 #ifndef IEEE_ROUND_NEAREST
2515 if (sgn && (unsigned) (rmode - 1) < 2)
2516 rmode = 3 - rmode;
2517 #endif
2518 #endif
2519
2520 // 10*coeff
2521 coeff = (coeff << 3) + (coeff << 1);
2522 if (R)
2523 coeff |= 1;
2524
2525 extra_digits = 1-expon;
2526 coeff += bid_round_const_table[rmode][extra_digits];
2527
2528 // get coeff*(2^M[extra_digits])/10^extra_digits
2529 __mul_64x64_to_128 (Q, coeff, bid_reciprocals10_64[extra_digits]);
2530
2531 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2532 amount = bid_short_recip_scale[extra_digits];
2533
2534 _C64 = Q.w[1] >> amount;
2535
2536 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2537 #ifndef IEEE_ROUND_NEAREST
2538 if (rmode == 0) //BID_ROUNDING_TO_NEAREST
2539 #endif
2540 if (_C64 & 1) {
2541 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2542
2543 // get remainder
2544 amount2 = 64 - amount;
2545 remainder_h = 0;
2546 remainder_h--;
2547 remainder_h >>= amount2;
2548 remainder_h = remainder_h & Q.w[1];
2549
2550 if (!remainder_h && (Q.w[0] < bid_reciprocals10_64[extra_digits])) {
2551 _C64--;
2552 }
2553 }
2554 #endif
2555
2556 #ifdef BID_SET_STATUS_FLAGS
2557 if (is_inexact (fpsc)){
2558 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);}
2559 else {
2560 status = BID_INEXACT_EXCEPTION;
2561 // get remainder
2562 remainder_h = Q.w[1] << (64 - amount);
2563
2564 switch (rmode) {
2565 case BID_ROUNDING_TO_NEAREST:
2566 case BID_ROUNDING_TIES_AWAY:
2567 // test whether fractional part is 0
2568 if (remainder_h == 0x8000000000000000ull
2569 && (Q.w[0] < bid_reciprocals10_64[extra_digits]))
2570 status = BID_EXACT_STATUS;
2571 break;
2572 case BID_ROUNDING_DOWN:
2573 case BID_ROUNDING_TO_ZERO:
2574 if (!remainder_h && (Q.w[0] < bid_reciprocals10_64[extra_digits]))
2575 status = BID_EXACT_STATUS;
2576 break;
2577 default:
2578 // round up
2579 __add_carry_out (Stemp, carry, Q.w[0],
2580 bid_reciprocals10_64[extra_digits]);
2581 if ((remainder_h >> (64 - amount)) + carry >=
2582 (((BID_UINT64) 1) << amount))
2583 status = BID_EXACT_STATUS;
2584 }
2585
2586 if (status != BID_EXACT_STATUS) {
2587 __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION|status);
2588 }
2589 }
2590 #endif
2591
2592 return sgn | (BID_UINT32) _C64;
2593 }
2594
2595 while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2596 coeff = (coeff << 3) + (coeff << 1);
2597 expon--;
2598 }
2599 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2600 __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2601 // overflow
2602 r = sgn | INFINITY_MASK32;
2603 switch (rmode) {
2604 case BID_ROUNDING_DOWN:
2605 if (!sgn)
2606 r = LARGEST_BID32;
2607 break;
2608 case BID_ROUNDING_TO_ZERO:
2609 r = sgn | LARGEST_BID32;
2610 break;
2611 case BID_ROUNDING_UP:
2612 // round up
2613 if (sgn)
2614 r = sgn | LARGEST_BID32;
2615 }
2616 return r;
2617 }
2618 }
2619
2620 mask = 1 << 23;
2621
2622 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2623 if (coeff < mask) {
2624 r = expon;
2625 r <<= 23;
2626 r |= ((BID_UINT32) coeff | sgn);
2627 return r;
2628 }
2629 // special format
2630
2631 r = expon;
2632 r <<= 21;
2633 r |= (sgn | SPECIAL_ENCODING_MASK32);
2634 // add coeff, without leading bits
2635 mask = (1 << 21) - 1;
2636 r |= (((BID_UINT32) coeff) & mask);
2637
2638 return r;
2639 }
2640
2641
2642
2643
2644
2645
2646 //
2647 // no overflow/underflow checks
2648 //
2649 __BID_INLINE__ BID_UINT32
very_fast_get_BID32(BID_UINT32 sgn,int expon,BID_UINT32 coeff)2650 very_fast_get_BID32 (BID_UINT32 sgn, int expon, BID_UINT32 coeff) {
2651 BID_UINT32 r, mask;
2652
2653 mask = 1 << 23;
2654
2655 // check whether coefficient fits in 10*2+3 bits
2656 if (coeff < mask) {
2657 r = expon;
2658 r <<= 23;
2659 r |= (coeff | sgn);
2660 return r;
2661 }
2662 // special format
2663 r = expon;
2664 r <<= 21;
2665 r |= (sgn | SPECIAL_ENCODING_MASK32);
2666 // add coeff, without leading bits
2667 mask = (1 << 21) - 1;
2668 coeff &= mask;
2669 r |= coeff;
2670
2671 return r;
2672 }
2673
2674 __BID_INLINE__ BID_UINT32
fast_get_BID32(BID_UINT32 sgn,int expon,BID_UINT32 coeff)2675 fast_get_BID32 (BID_UINT32 sgn, int expon, BID_UINT32 coeff) {
2676 BID_UINT32 r, mask;
2677
2678 mask = 1 << 23;
2679
2680 if (coeff > 9999999ul) {
2681 expon++;
2682 coeff = 1000000ul;
2683 }
2684 // check whether coefficient fits in 10*2+3 bits
2685 if (coeff < mask) {
2686 r = expon;
2687 r <<= 23;
2688 r |= (coeff | sgn);
2689 return r;
2690 }
2691 // special format
2692 r = expon;
2693 r <<= 21;
2694 r |= (sgn | SPECIAL_ENCODING_MASK32);
2695 // add coeff, without leading bits
2696 mask = (1 << 21) - 1;
2697 coeff &= mask;
2698 r |= coeff;
2699
2700 return r;
2701 }
2702
2703
2704
2705 /*************************************************************
2706 *
2707 *************************************************************/
2708 typedef struct BID_ALIGN (16)
2709 {
2710 BID_UINT64 w[6];
2711 } BID_UINT384;
2712 typedef struct BID_ALIGN (16)
2713 {
2714 BID_UINT64 w[8];
2715 } BID_UINT512;
2716
2717 // #define P 34
2718 #define MASK_STEERING_BITS 0x6000000000000000ull
2719 #define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
2720 #define MASK_BINARY_SIG1 0x001fffffffffffffull
2721 #define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
2722 //used to take G[2:w+3] (sec 3.3)
2723 #define MASK_BINARY_SIG2 0x0007ffffffffffffull
2724 //used to mask out G4:T0 (sec 3.3)
2725 #define MASK_BINARY_OR2 0x0020000000000000ull
2726 //used to prefix 8+G4 to T (sec 3.3)
2727 #define UPPER_EXPON_LIMIT 51
2728 #define MASK_EXP 0x7ffe000000000000ull
2729 #define MASK_EXP2 0x1fff800000000000ull
2730 #define MASK_SPECIAL 0x7800000000000000ull
2731 #define MASK_NAN 0x7c00000000000000ull
2732 #define MASK_SNAN 0x7e00000000000000ull
2733 #define MASK_ANY_INF 0x7c00000000000000ull
2734 #define MASK_INF 0x7800000000000000ull
2735 #define MASK_SIGN 0x8000000000000000ull
2736 #define MASK_COEFF 0x0001ffffffffffffull
2737 #define BIN_EXP_BIAS (0x1820ull << 49)
2738
2739 #define EXP_MIN32 0x00000000
2740 #define EXP_MAX32 0x5f800000
2741 #define EXP_MIN 0x0000000000000000ull
2742 // EXP_MIN = (-6176 + 6176) << 49
2743 #define EXP_MAX 0x5ffe000000000000ull
2744 // EXP_MAX = (6111 + 6176) << 49
2745 #define EXP_MAX_P1 0x6000000000000000ull
2746 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2747 #define EXP_P1 0x0002000000000000ull
2748 // EXP_ P1= 1 << 49
2749 #define expmin -6176
2750 // min unbiased exponent
2751 #define expmax 6111
2752 // max unbiased exponent
2753 #define expmin16 -398
2754 // min unbiased exponent
2755 #define expmax16 369
2756 // max unbiased exponent
2757 #define expmin7 -101
2758 // min unbiased exponent
2759 #define expmax7 90
2760 // max unbiased exponent
2761
2762 #define MASK_INF32 0x78000000
2763 #define MASK_ANY_INF32 0x7c000000
2764 #define MASK_SIGN32 0x80000000
2765 #define MASK_NAN32 0x7c000000
2766 #define MASK_SNAN32 0x7e000000
2767 #define SIGNMASK32 0x80000000
2768 #define BID32_SIG_MAX 0x0098967f
2769 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2770 #define SIGNMASK64 0x8000000000000000ull
2771 #define MASK_STEERING_BITS32 0x60000000
2772 #define MASK_BINARY_EXPONENT1_32 0x7f800000
2773 #define MASK_BINARY_SIG1_32 0x007fffff
2774 #define MASK_BINARY_EXPONENT2_32 0x1fe00000
2775 //used to take G[2:w+3] (sec 3.3)
2776 #define MASK_BINARY_SIG2_32 0x001fffff
2777 //used to mask out G4:T0 (sec 3.3)
2778 #define MASK_BINARY_OR2_32 0x00800000
2779 #define MASK_SPECIAL32 0x78000000
2780
2781 // typedef unsigned int BID_FPSC; // floating-point status and control
2782 // bit31:
2783 // bit30:
2784 // bit29:
2785 // bit28:
2786 // bit27:
2787 // bit26:
2788 // bit25:
2789 // bit24:
2790 // bit23:
2791 // bit22:
2792 // bit21:
2793 // bit20:
2794 // bit19:
2795 // bit18:
2796 // bit17:
2797 // bit16:
2798 // bit15:
2799 // bit14: RC:2
2800 // bit13: RC:1
2801 // bit12: RC:0
2802 // bit11: PM
2803 // bit10: UM
2804 // bit9: OM
2805 // bit8: ZM
2806 // bit7: DM
2807 // bit6: IM
2808 // bit5: PE
2809 // bit4: UE
2810 // bit3: OE
2811 // bit2: ZE
2812 // bit1: DE
2813 // bit0: IE
2814
2815 #define ROUNDING_BID_MODE_MASK 0x00007000
2816
2817 typedef struct _DEC_DIGITS {
2818 unsigned int digits;
2819 BID_UINT64 threshold_hi;
2820 BID_UINT64 threshold_lo;
2821 unsigned int digits1;
2822 } DEC_DIGITS;
2823
2824 BID_EXTERN_C DEC_DIGITS bid_nr_digits[];
2825 BID_EXTERN_C BID_UINT64 bid_midpoint64[];
2826 BID_EXTERN_C BID_UINT128 bid_midpoint128[];
2827 BID_EXTERN_C BID_UINT192 bid_midpoint192[];
2828 BID_EXTERN_C BID_UINT256 bid_midpoint256[];
2829 BID_EXTERN_C BID_UINT64 bid_ten2k64[];
2830 BID_EXTERN_C BID_UINT128 bid_ten2k128[];
2831 BID_EXTERN_C BID_UINT256 bid_ten2k256[];
2832 BID_EXTERN_C BID_UINT128 bid_ten2mk128[];
2833 BID_EXTERN_C BID_UINT64 bid_ten2mk64[];
2834 BID_EXTERN_C BID_UINT128 bid_ten2mk128trunc[];
2835 BID_EXTERN_C int bid_shiftright128[];
2836 BID_EXTERN_C BID_UINT64 bid_maskhigh128[];
2837 BID_EXTERN_C BID_UINT64 bid_maskhigh128M[];
2838 BID_EXTERN_C BID_UINT64 bid_maskhigh192M[];
2839 BID_EXTERN_C BID_UINT64 bid_maskhigh256M[];
2840 BID_EXTERN_C BID_UINT64 bid_onehalf128[];
2841 BID_EXTERN_C BID_UINT64 bid_onehalf128M[];
2842 BID_EXTERN_C BID_UINT64 bid_onehalf192M[];
2843 BID_EXTERN_C BID_UINT64 bid_onehalf256M[];
2844 BID_EXTERN_C BID_UINT128 bid_ten2mk128M[];
2845 BID_EXTERN_C BID_UINT128 bid_ten2mk128truncM[];
2846 BID_EXTERN_C BID_UINT192 bid_ten2mk192truncM[];
2847 BID_EXTERN_C BID_UINT256 bid_ten2mk256truncM[];
2848 BID_EXTERN_C int bid_shiftright128M[];
2849 BID_EXTERN_C int bid_shiftright192M[];
2850 BID_EXTERN_C int bid_shiftright256M[];
2851 BID_EXTERN_C BID_UINT192 bid_ten2mk192M[];
2852 BID_EXTERN_C BID_UINT256 bid_ten2mk256M[];
2853 BID_EXTERN_C unsigned char bid_char_table2[];
2854 BID_EXTERN_C unsigned char bid_char_table3[];
2855
2856 BID_EXTERN_C BID_UINT64 bid_ten2m3k64[];
2857 BID_EXTERN_C unsigned int bid_shift_ten2m3k64[];
2858 BID_EXTERN_C BID_UINT128 bid_ten2m3k128[];
2859 BID_EXTERN_C unsigned int bid_shift_ten2m3k128[];
2860
2861
2862
2863 /***************************************************************************
2864 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2865 ***************************************************************************/
2866
2867 BID_EXTERN_C BID_UINT64 bid_Kx64[];
2868 BID_EXTERN_C unsigned int bid_Ex64m64[];
2869 BID_EXTERN_C BID_UINT64 bid_half64[];
2870 BID_EXTERN_C BID_UINT64 bid_mask64[];
2871 BID_EXTERN_C BID_UINT64 bid_ten2mxtrunc64[];
2872
2873 BID_EXTERN_C BID_UINT128 bid_Kx128[];
2874 BID_EXTERN_C unsigned int bid_Ex128m128[];
2875 BID_EXTERN_C BID_UINT64 bid_half128[];
2876 BID_EXTERN_C BID_UINT64 bid_mask128[];
2877 BID_EXTERN_C BID_UINT128 bid_ten2mxtrunc128[];
2878
2879 BID_EXTERN_C BID_UINT192 bid_Kx192[];
2880 BID_EXTERN_C unsigned int bid_Ex192m192[];
2881 BID_EXTERN_C BID_UINT64 bid_half192[];
2882 BID_EXTERN_C BID_UINT64 bid_mask192[];
2883 BID_EXTERN_C BID_UINT192 bid_ten2mxtrunc192[];
2884
2885 BID_EXTERN_C BID_UINT256 bid_Kx256[];
2886 BID_EXTERN_C unsigned int bid_Ex256m256[];
2887 BID_EXTERN_C BID_UINT64 bid_half256[];
2888 BID_EXTERN_C BID_UINT64 bid_mask256[];
2889 BID_EXTERN_C BID_UINT256 bid_ten2mxtrunc256[];
2890
2891 typedef union BID_ALIGN (16) __bid64_128 {
2892 BID_UINT64 b64;
2893 BID_UINT128 b128;
2894 } BID64_128;
2895
2896 BID64_128 bid_fma (unsigned int P0,
2897 BID64_128 x1, unsigned int P1,
2898 BID64_128 y1, unsigned int P2,
2899 BID64_128 z1, unsigned int P3,
2900 unsigned int rnd_mode, BID_FPSC * fpsc);
2901
2902 #define P7 7
2903 #define P16 16
2904 #define P34 34
2905
2906 union __int_double {
2907 BID_UINT64 i;
2908 double d;
2909 };
2910 typedef union __int_double int_double;
2911
2912
2913 union __int_float {
2914 BID_UINT32 i;
2915 float d;
2916 };
2917 typedef union __int_float int_float;
2918
2919 #define SWAP(A,B,T) {\
2920 T = A; \
2921 A = B; \
2922 B = T; \
2923 }
2924
2925 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2926 // ie it knows that it is A bits long
2927 #define NUMBITS(A, coefficient_x, tempx){\
2928 temp_x.d=(float)coefficient_x;\
2929 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2930 }
2931
2932 enum class_types {
2933 signalingNaN,
2934 quietNaN,
2935 negativeInfinity,
2936 negativeNormal,
2937 negativeSubnormal,
2938 negativeZero,
2939 positiveZero,
2940 positiveSubnormal,
2941 positiveNormal,
2942 positiveInfinity
2943 };
2944
2945 typedef union {
2946 BID_UINT32 ui32;
2947 float f;
2948 } BID_UI32FLOAT;
2949
2950 typedef union {
2951 BID_UINT64 ui64;
2952 double d;
2953 } BID_UI64DOUBLE;
2954
2955
2956 //////////////////////////////////////////////////
2957 // Macros for raising exceptions / setting flags
2958 /////////////////////////////////////////////////
2959
2960 #define bid_raise_except(x) __set_status_flags (pfpsf, (x))
2961 #define get_bid_sw() (*pfpsf)
2962 #define set_bid_sw(x) (*pfpsf) = (x)
2963
2964 #endif
2965