1 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef __BIDECIMAL_H
25 #define __BIDECIMAL_H
26
27 #include "bid_conf.h"
28 #include "bid_functions.h"
29
30 #define __BID_INLINE__ static __inline
31
32 /*********************************************************************
33 *
34 * Logical Shift Macros
35 *
36 *********************************************************************/
37
38 #define __shr_128(Q, A, k) \
39 { \
40 (Q).w[0] = (A).w[0] >> k; \
41 (Q).w[0] |= (A).w[1] << (64-k); \
42 (Q).w[1] = (A).w[1] >> k; \
43 }
44
45 #define __shr_128_long(Q, A, k) \
46 { \
47 if((k)<64) { \
48 (Q).w[0] = (A).w[0] >> k; \
49 (Q).w[0] |= (A).w[1] << (64-k); \
50 (Q).w[1] = (A).w[1] >> k; \
51 } \
52 else { \
53 (Q).w[0] = (A).w[1]>>((k)-64); \
54 (Q).w[1] = 0; \
55 } \
56 }
57
58 #define __shl_128_long(Q, A, k) \
59 { \
60 if((k)<64) { \
61 (Q).w[1] = (A).w[1] << k; \
62 (Q).w[1] |= (A).w[0] >> (64-k); \
63 (Q).w[0] = (A).w[0] << k; \
64 } \
65 else { \
66 (Q).w[1] = (A).w[0]<<((k)-64); \
67 (Q).w[0] = 0; \
68 } \
69 }
70
71 #define __low_64(Q) (Q).w[0]
72 /*********************************************************************
73 *
74 * String Macros
75 *
76 *********************************************************************/
77 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
78 /*********************************************************************
79 *
80 * Compare Macros
81 *
82 *********************************************************************/
83 // greater than
84 // return 0 if A<=B
85 // non-zero if A>B
86 #define __unsigned_compare_gt_128(A, B) \
87 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
88 // greater-or-equal
89 #define __unsigned_compare_ge_128(A, B) \
90 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
91 #define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
92 // tighten exponent range
93 #define __tight_bin_range_128(bp, P, bin_expon) \
94 { \
95 UINT64 M; \
96 M = 1; \
97 (bp) = (bin_expon); \
98 if((bp)<63) { \
99 M <<= ((bp)+1); \
100 if((P).w[0] >= M) (bp)++; } \
101 else if((bp)>64) { \
102 M <<= ((bp)+1-64); \
103 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
104 (bp)++; } \
105 else if((P).w[1]) (bp)++; \
106 }
107 /*********************************************************************
108 *
109 * Add/Subtract Macros
110 *
111 *********************************************************************/
112 // add 64-bit value to 128-bit
113 #define __add_128_64(R128, A128, B64) \
114 { \
115 UINT64 R64H; \
116 R64H = (A128).w[1]; \
117 (R128).w[0] = (B64) + (A128).w[0]; \
118 if((R128).w[0] < (B64)) \
119 R64H ++; \
120 (R128).w[1] = R64H; \
121 }
122 // subtract 64-bit value from 128-bit
123 #define __sub_128_64(R128, A128, B64) \
124 { \
125 UINT64 R64H; \
126 R64H = (A128).w[1]; \
127 if((A128).w[0] < (B64)) \
128 R64H --; \
129 (R128).w[1] = R64H; \
130 (R128).w[0] = (A128).w[0] - (B64); \
131 }
132 // add 128-bit value to 128-bit
133 // assume no carry-out
134 #define __add_128_128(R128, A128, B128) \
135 { \
136 UINT128 Q128; \
137 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
138 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
139 if(Q128.w[0] < (B128).w[0]) \
140 Q128.w[1] ++; \
141 (R128).w[1] = Q128.w[1]; \
142 (R128).w[0] = Q128.w[0]; \
143 }
144 #define __sub_128_128(R128, A128, B128) \
145 { \
146 UINT128 Q128; \
147 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
148 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
149 if((A128).w[0] < (B128).w[0]) \
150 Q128.w[1] --; \
151 (R128).w[1] = Q128.w[1]; \
152 (R128).w[0] = Q128.w[0]; \
153 }
154 #define __add_carry_out(S, CY, X, Y) \
155 { \
156 UINT64 X1=X; \
157 S = X + Y; \
158 CY = (S<X1) ? 1 : 0; \
159 }
160 #define __add_carry_in_out(S, CY, X, Y, CI) \
161 { \
162 UINT64 X1; \
163 X1 = X + CI; \
164 S = X1 + Y; \
165 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
166 }
167 #define __sub_borrow_out(S, CY, X, Y) \
168 { \
169 UINT64 X1=X; \
170 S = X - Y; \
171 CY = (S>X1) ? 1 : 0; \
172 }
173 #define __sub_borrow_in_out(S, CY, X, Y, CI) \
174 { \
175 UINT64 X1, X0=X; \
176 X1 = X - CI; \
177 S = X1 - Y; \
178 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
179 }
180 // increment C128 and check for rounding overflow:
181 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
182 #define INCREMENT(C_128, exp) \
183 { \
184 C_128.w[0]++; \
185 if (C_128.w[0] == 0) C_128.w[1]++; \
186 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
187 C_128.w[0] == 0x378d8e6400000000ull) { \
188 exp++; \
189 C_128.w[1] = 0x0000314dc6448d93ull; \
190 C_128.w[0] = 0x38c15b0a00000000ull; \
191 } \
192 }
193 // decrement C128 and check for rounding underflow, but only at the
194 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
195 // and decrement the exponent
196 #define DECREMENT(C_128, exp) \
197 { \
198 C_128.w[0]--; \
199 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
200 if (C_128.w[1] == 0x0000314dc6448d93ull && \
201 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
202 exp--; \
203 C_128.w[1] = 0x0001ed09bead87c0ull; \
204 C_128.w[0] = 0x378d8e63ffffffffull; \
205 } \
206 }
207
208 /*********************************************************************
209 *
210 * Multiply Macros
211 *
212 *********************************************************************/
213 #define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
214 /***************************************
215 * Signed, Full 64x64-bit Multiply
216 ***************************************/
217 #define __imul_64x64_to_128(P, CX, CY) \
218 { \
219 UINT64 SX, SY; \
220 __mul_64x64_to_128(P, CX, CY); \
221 \
222 SX = ((SINT64)(CX))>>63; \
223 SY = ((SINT64)(CY))>>63; \
224 SX &= CY; SY &= CX; \
225 \
226 (P).w[1] = (P).w[1] - SX - SY; \
227 }
228 /***************************************
229 * Signed, Full 64x128-bit Multiply
230 ***************************************/
231 #define __imul_64x128_full(Ph, Ql, A, B) \
232 { \
233 UINT128 ALBL, ALBH, QM2, QM; \
234 \
235 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
236 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
237 \
238 (Ql).w[0] = ALBL.w[0]; \
239 QM.w[0] = ALBL.w[1]; \
240 QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
241 __add_128_128(QM2, ALBH, QM); \
242 (Ql).w[1] = QM2.w[0]; \
243 Ph = QM2.w[1]; \
244 }
245 /*****************************************************
246 * Unsigned Multiply Macros
247 *****************************************************/
248 // get full 64x64bit product
249 //
250 #define __mul_64x64_to_128(P, CX, CY) \
251 { \
252 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
253 CXH = (CX) >> 32; \
254 CXL = (UINT32)(CX); \
255 CYH = (CY) >> 32; \
256 CYL = (UINT32)(CY); \
257 \
258 PM = CXH*CYL; \
259 PH = CXH*CYH; \
260 PL = CXL*CYL; \
261 PM2 = CXL*CYH; \
262 PH += (PM>>32); \
263 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
264 \
265 (P).w[1] = PH + (PM>>32); \
266 (P).w[0] = (PM<<32)+(UINT32)PL; \
267 }
268 // get full 64x64bit product
269 // Note:
270 // This macro is used for CX < 2^61, CY < 2^61
271 //
272 #define __mul_64x64_to_128_fast(P, CX, CY) \
273 { \
274 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
275 CXH = (CX) >> 32; \
276 CXL = (UINT32)(CX); \
277 CYH = (CY) >> 32; \
278 CYL = (UINT32)(CY); \
279 \
280 PM = CXH*CYL; \
281 PL = CXL*CYL; \
282 PH = CXH*CYH; \
283 PM += CXL*CYH; \
284 PM += (PL>>32); \
285 \
286 (P).w[1] = PH + (PM>>32); \
287 (P).w[0] = (PM<<32)+(UINT32)PL; \
288 }
289 // used for CX< 2^60
290 #define __sqr64_fast(P, CX) \
291 { \
292 UINT64 CXH, CXL, PL, PH, PM; \
293 CXH = (CX) >> 32; \
294 CXL = (UINT32)(CX); \
295 \
296 PM = CXH*CXL; \
297 PL = CXL*CXL; \
298 PH = CXH*CXH; \
299 PM += PM; \
300 PM += (PL>>32); \
301 \
302 (P).w[1] = PH + (PM>>32); \
303 (P).w[0] = (PM<<32)+(UINT32)PL; \
304 }
305 // get full 64x64bit product
306 // Note:
307 // This implementation is used for CX < 2^61, CY < 2^61
308 //
309 #define __mul_64x64_to_64_high_fast(P, CX, CY) \
310 { \
311 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
312 CXH = (CX) >> 32; \
313 CXL = (UINT32)(CX); \
314 CYH = (CY) >> 32; \
315 CYL = (UINT32)(CY); \
316 \
317 PM = CXH*CYL; \
318 PL = CXL*CYL; \
319 PH = CXH*CYH; \
320 PM += CXL*CYH; \
321 PM += (PL>>32); \
322 \
323 (P) = PH + (PM>>32); \
324 }
325 // get full 64x64bit product
326 //
327 #define __mul_64x64_to_128_full(P, CX, CY) \
328 { \
329 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
330 CXH = (CX) >> 32; \
331 CXL = (UINT32)(CX); \
332 CYH = (CY) >> 32; \
333 CYL = (UINT32)(CY); \
334 \
335 PM = CXH*CYL; \
336 PH = CXH*CYH; \
337 PL = CXL*CYL; \
338 PM2 = CXL*CYH; \
339 PH += (PM>>32); \
340 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
341 \
342 (P).w[1] = PH + (PM>>32); \
343 (P).w[0] = (PM<<32)+(UINT32)PL; \
344 }
345 #define __mul_128x128_high(Q, A, B) \
346 { \
347 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
348 \
349 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
350 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
351 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
352 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
353 \
354 __add_128_128(QM, ALBH, AHBL); \
355 __add_128_64(QM2, QM, ALBL.w[1]); \
356 __add_128_64((Q), AHBH, QM2.w[1]); \
357 }
358 #define __mul_128x128_full(Qh, Ql, A, B) \
359 { \
360 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
361 \
362 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
363 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
364 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
365 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
366 \
367 __add_128_128(QM, ALBH, AHBL); \
368 (Ql).w[0] = ALBL.w[0]; \
369 __add_128_64(QM2, QM, ALBL.w[1]); \
370 __add_128_64((Qh), AHBH, QM2.w[1]); \
371 (Ql).w[1] = QM2.w[0]; \
372 }
373 #define __mul_128x128_low(Ql, A, B) \
374 { \
375 UINT128 ALBL; \
376 UINT64 QM64; \
377 \
378 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
379 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
380 \
381 (Ql).w[0] = ALBL.w[0]; \
382 (Ql).w[1] = QM64 + ALBL.w[1]; \
383 }
384 #define __mul_64x128_low(Ql, A, B) \
385 { \
386 UINT128 ALBL, ALBH, QM2; \
387 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
388 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
389 (Ql).w[0] = ALBL.w[0]; \
390 __add_128_64(QM2, ALBH, ALBL.w[1]); \
391 (Ql).w[1] = QM2.w[0]; \
392 }
393 #define __mul_64x128_full(Ph, Ql, A, B) \
394 { \
395 UINT128 ALBL, ALBH, QM2; \
396 \
397 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
398 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
399 \
400 (Ql).w[0] = ALBL.w[0]; \
401 __add_128_64(QM2, ALBH, ALBL.w[1]); \
402 (Ql).w[1] = QM2.w[0]; \
403 Ph = QM2.w[1]; \
404 }
405 #define __mul_64x128_to_192(Q, A, B) \
406 { \
407 UINT128 ALBL, ALBH, QM2; \
408 \
409 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
410 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
411 \
412 (Q).w[0] = ALBL.w[0]; \
413 __add_128_64(QM2, ALBH, ALBL.w[1]); \
414 (Q).w[1] = QM2.w[0]; \
415 (Q).w[2] = QM2.w[1]; \
416 }
417 #define __mul_64x128_to192(Q, A, B) \
418 { \
419 UINT128 ALBL, ALBH, QM2; \
420 \
421 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
422 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
423 \
424 (Q).w[0] = ALBL.w[0]; \
425 __add_128_64(QM2, ALBH, ALBL.w[1]); \
426 (Q).w[1] = QM2.w[0]; \
427 (Q).w[2] = QM2.w[1]; \
428 }
429 #define __mul_128x128_to_256(P256, A, B) \
430 { \
431 UINT128 Qll, Qlh; \
432 UINT64 Phl, Phh, CY1, CY2; \
433 \
434 __mul_64x128_full(Phl, Qll, A.w[0], B); \
435 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
436 (P256).w[0] = Qll.w[0]; \
437 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
438 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
439 (P256).w[3] = Phh + CY2; \
440 }
441 //
442 // For better performance, will check A.w[1] against 0,
443 // but not B.w[1]
444 // Use this macro accordingly
445 #define __mul_128x128_to_256_check_A(P256, A, B) \
446 { \
447 UINT128 Qll, Qlh; \
448 UINT64 Phl, Phh, CY1, CY2; \
449 \
450 __mul_64x128_full(Phl, Qll, A.w[0], B); \
451 (P256).w[0] = Qll.w[0]; \
452 if(A.w[1]) { \
453 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
454 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
455 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
456 (P256).w[3] = Phh + CY2; } \
457 else { \
458 (P256).w[1] = Qll.w[1]; \
459 (P256).w[2] = Phl; \
460 (P256).w[3] = 0; } \
461 }
462 #define __mul_64x192_to_256(lP, lA, lB) \
463 { \
464 UINT128 lP0,lP1,lP2; \
465 UINT64 lC; \
466 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
467 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
468 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
469 (lP).w[0] = lP0.w[0]; \
470 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
471 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
472 (lP).w[3] = lP2.w[1] + lC; \
473 }
474 #define __mul_64x256_to_320(P, A, B) \
475 { \
476 UINT128 lP0,lP1,lP2,lP3; \
477 UINT64 lC; \
478 __mul_64x64_to_128(lP0, A, (B).w[0]); \
479 __mul_64x64_to_128(lP1, A, (B).w[1]); \
480 __mul_64x64_to_128(lP2, A, (B).w[2]); \
481 __mul_64x64_to_128(lP3, A, (B).w[3]); \
482 (P).w[0] = lP0.w[0]; \
483 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
484 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
485 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
486 (P).w[4] = lP3.w[1] + lC; \
487 }
488 #define __mul_192x192_to_384(P, A, B) \
489 { \
490 UINT256 P0,P1,P2; \
491 UINT64 CY; \
492 __mul_64x192_to_256(P0, (A).w[0], B); \
493 __mul_64x192_to_256(P1, (A).w[1], B); \
494 __mul_64x192_to_256(P2, (A).w[2], B); \
495 (P).w[0] = P0.w[0]; \
496 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
497 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
498 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
499 (P).w[4] = P1.w[3] + CY; \
500 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
501 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
502 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
503 (P).w[5] = P2.w[3] + CY; \
504 }
505 #define __mul_64x320_to_384(P, A, B) \
506 { \
507 UINT128 lP0,lP1,lP2,lP3,lP4; \
508 UINT64 lC; \
509 __mul_64x64_to_128(lP0, A, (B).w[0]); \
510 __mul_64x64_to_128(lP1, A, (B).w[1]); \
511 __mul_64x64_to_128(lP2, A, (B).w[2]); \
512 __mul_64x64_to_128(lP3, A, (B).w[3]); \
513 __mul_64x64_to_128(lP4, A, (B).w[4]); \
514 (P).w[0] = lP0.w[0]; \
515 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
516 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
517 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
518 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
519 (P).w[5] = lP4.w[1] + lC; \
520 }
521 // A*A
522 // Full 128x128-bit product
523 #define __sqr128_to_256(P256, A) \
524 { \
525 UINT128 Qll, Qlh, Qhh; \
526 UINT64 TMP_C1, TMP_C2; \
527 \
528 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
529 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
530 Qhh.w[1] += (Qlh.w[1]>>63); \
531 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
532 Qlh.w[0] += Qlh.w[0]; \
533 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
534 \
535 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
536 (P256).w[0] = Qll.w[0]; \
537 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
538 (P256).w[3] = Qhh.w[1]+TMP_C2; \
539 }
540 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
541 { \
542 UINT128 Qll, Qlh; \
543 UINT64 Phl, Phh, C1, C2; \
544 \
545 __mul_64x128_full(Phl, Qll, A.w[0], B); \
546 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
547 (PQl).w[0] = Qll.w[0]; \
548 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
549 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
550 (PQh).w[1] = Phh + C2; \
551 }
552 #define __mul_256x256_to_512(P, A, B) \
553 { \
554 UINT512 P0,P1,P2,P3; \
555 UINT64 CY; \
556 __mul_64x256_to_320(P0, (A).w[0], B); \
557 __mul_64x256_to_320(P1, (A).w[1], B); \
558 __mul_64x256_to_320(P2, (A).w[2], B); \
559 __mul_64x256_to_320(P3, (A).w[3], B); \
560 (P).w[0] = P0.w[0]; \
561 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
562 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
563 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
564 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
565 (P).w[5] = P1.w[4] + CY; \
566 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
567 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
568 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
569 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
570 (P).w[6] = P2.w[4] + CY; \
571 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
572 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
573 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
574 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
575 (P).w[7] = P3.w[4] + CY; \
576 }
577 #define __mul_192x256_to_448(P, A, B) \
578 { \
579 UINT512 P0,P1,P2; \
580 UINT64 CY; \
581 __mul_64x256_to_320(P0, (A).w[0], B); \
582 __mul_64x256_to_320(P1, (A).w[1], B); \
583 __mul_64x256_to_320(P2, (A).w[2], B); \
584 (P).w[0] = P0.w[0]; \
585 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
586 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
587 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
588 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
589 (P).w[5] = P1.w[4] + CY; \
590 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
591 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
592 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
593 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
594 (P).w[6] = P2.w[4] + CY; \
595 }
596 #define __mul_320x320_to_640(P, A, B) \
597 { \
598 UINT512 P0,P1,P2,P3; \
599 UINT64 CY; \
600 __mul_256x256_to_512((P), (A), B); \
601 __mul_64x256_to_320(P1, (A).w[4], B); \
602 __mul_64x256_to_320(P2, (B).w[4], A); \
603 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
604 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
605 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
606 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
607 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
608 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
609 P3.w[1] += CY; \
610 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
611 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
612 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
613 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
614 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
615 (P).w[9] = P3.w[1] + CY; \
616 }
617 #define __mul_384x384_to_768(P, A, B) \
618 { \
619 UINT512 P0,P1,P2,P3; \
620 UINT64 CY; \
621 __mul_320x320_to_640((P), (A), B); \
622 __mul_64x320_to_384(P1, (A).w[5], B); \
623 __mul_64x320_to_384(P2, (B).w[5], A); \
624 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
625 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
626 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
627 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
628 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
629 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
630 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
631 P3.w[1] += CY; \
632 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
633 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
634 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
635 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
636 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
637 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
638 (P).w[11] = P3.w[1] + CY; \
639 }
640 #define __mul_64x128_short(Ql, A, B) \
641 { \
642 UINT64 ALBH_L; \
643 \
644 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
645 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
646 \
647 (Ql).w[1] += ALBH_L; \
648 }
649 #define __scale128_10(D,_TMP) \
650 { \
651 UINT128 _TMP2,_TMP8; \
652 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
653 _TMP2.w[0] = _TMP.w[0]<<1; \
654 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
655 _TMP8.w[0] = _TMP.w[0]<<3; \
656 __add_128_128(D, _TMP2, _TMP8); \
657 }
658 // 64x64-bit product
659 #define __mul_64x64_to_128MACH(P128, CX64, CY64) \
660 { \
661 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
662 CXH = (CX64) >> 32; \
663 CXL = (UINT32)(CX64); \
664 CYH = (CY64) >> 32; \
665 CYL = (UINT32)(CY64); \
666 PM = CXH*CYL; \
667 PH = CXH*CYH; \
668 PL = CXL*CYL; \
669 PM2 = CXL*CYH; \
670 PH += (PM>>32); \
671 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
672 (P128).w[1] = PH + (PM>>32); \
673 (P128).w[0] = (PM<<32)+(UINT32)PL; \
674 }
675 // 64x64-bit product
676 #define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
677 { \
678 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
679 CXH = (CX64) >> 32; \
680 CXL = (UINT32)(CX64); \
681 CYH = (CY64) >> 32; \
682 CYL = (UINT32)(CY64); \
683 PM = CXH*CYL; \
684 PH = CXH*CYH; \
685 PL = CXL*CYL; \
686 PM2 = CXL*CYH; \
687 PH += (PM>>32); \
688 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
689 P64 = PH + (PM>>32); \
690 }
691 #define __mul_128x64_to_128(Q128, A64, B128) \
692 { \
693 UINT64 ALBH_L; \
694 ALBH_L = (A64) * (B128).w[1]; \
695 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
696 (Q128).w[1] += ALBH_L; \
697 }
698 // might simplify by calculating just QM2.w[0]
699 #define __mul_64x128_to_128(Ql, A, B) \
700 { \
701 UINT128 ALBL, ALBH, QM2; \
702 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
703 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
704 (Ql).w[0] = ALBL.w[0]; \
705 __add_128_64(QM2, ALBH, ALBL.w[1]); \
706 (Ql).w[1] = QM2.w[0]; \
707 }
708 /*********************************************************************
709 *
710 * BID Pack/Unpack Macros
711 *
712 *********************************************************************/
713 /////////////////////////////////////////
714 // BID64 definitions
715 ////////////////////////////////////////
716 #define DECIMAL_MAX_EXPON_64 767
717 #define DECIMAL_EXPONENT_BIAS 398
718 #define MAX_FORMAT_DIGITS 16
719 /////////////////////////////////////////
720 // BID128 definitions
721 ////////////////////////////////////////
722 #define DECIMAL_MAX_EXPON_128 12287
723 #define DECIMAL_EXPONENT_BIAS_128 6176
724 #define MAX_FORMAT_DIGITS_128 34
725 /////////////////////////////////////////
726 // BID32 definitions
727 ////////////////////////////////////////
728 #define DECIMAL_MAX_EXPON_32 191
729 #define DECIMAL_EXPONENT_BIAS_32 101
730 #define MAX_FORMAT_DIGITS_32 7
731 ////////////////////////////////////////
732 // Constant Definitions
733 ///////////////////////////////////////
734 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
735 #define INFINITY_MASK64 0x7800000000000000ull
736 #define SINFINITY_MASK64 0xf800000000000000ull
737 #define SSNAN_MASK64 0xfc00000000000000ull
738 #define NAN_MASK64 0x7c00000000000000ull
739 #define SNAN_MASK64 0x7e00000000000000ull
740 #define QUIET_MASK64 0xfdffffffffffffffull
741 #define LARGE_COEFF_MASK64 0x0007ffffffffffffull
742 #define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
743 #define SMALL_COEFF_MASK64 0x001fffffffffffffull
744 #define EXPONENT_MASK64 0x3ff
745 #define EXPONENT_SHIFT_LARGE64 51
746 #define EXPONENT_SHIFT_SMALL64 53
747 #define LARGEST_BID64 0x77fb86f26fc0ffffull
748 #define SMALLEST_BID64 0xf7fb86f26fc0ffffull
749 #define SMALL_COEFF_MASK128 0x0001ffffffffffffull
750 #define LARGE_COEFF_MASK128 0x00007fffffffffffull
751 #define EXPONENT_MASK128 0x3fff
752 #define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
753 #define LARGEST_BID128_LOW 0x378d8e63ffffffffull
754 #define SPECIAL_ENCODING_MASK32 0x60000000ul
755 #define INFINITY_MASK32 0x78000000ul
756 #define LARGE_COEFF_MASK32 0x007ffffful
757 #define LARGE_COEFF_HIGH_BIT32 0x00800000ul
758 #define SMALL_COEFF_MASK32 0x001ffffful
759 #define EXPONENT_MASK32 0xff
760 #define LARGEST_BID32 0x77f8967f
761 #define NAN_MASK32 0x7c000000
762 #define SNAN_MASK32 0x7e000000
763 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
764 #define BINARY_EXPONENT_BIAS 0x3ff
765 #define UPPER_EXPON_LIMIT 51
766 // data needed for BID pack/unpack macros
767 extern UINT64 round_const_table[][19];
768 extern UINT128 reciprocals10_128[];
769 extern int recip_scale[];
770 extern UINT128 power10_table_128[];
771 extern int estimate_decimal_digits[];
772 extern int estimate_bin_expon[];
773 extern UINT64 power10_index_binexp[];
774 extern int short_recip_scale[];
775 extern UINT64 reciprocals10_64[];
776 extern UINT128 power10_index_binexp_128[];
777 extern UINT128 round_const_table_128[][36];
778
779
780 //////////////////////////////////////////////
781 // Status Flag Handling
782 /////////////////////////////////////////////
783 #define __set_status_flags(fpsc, status) *(fpsc) |= status
784 #define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
785
786 __BID_INLINE__ UINT64
unpack_BID64(UINT64 * psign_x,int * pexponent_x,UINT64 * pcoefficient_x,UINT64 x)787 unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
788 UINT64 * pcoefficient_x, UINT64 x) {
789 UINT64 tmp, coeff;
790
791 *psign_x = x & 0x8000000000000000ull;
792
793 if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
794 // special encodings
795 // coefficient
796 coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
797
798 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
799 *pexponent_x = 0;
800 *pcoefficient_x = x & 0xfe03ffffffffffffull;
801 if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
802 *pcoefficient_x = x & 0xfe00000000000000ull;
803 if ((x & NAN_MASK64) == INFINITY_MASK64)
804 *pcoefficient_x = x & SINFINITY_MASK64;
805 return 0; // NaN or Infinity
806 }
807 // check for non-canonical values
808 if (coeff >= 10000000000000000ull)
809 coeff = 0;
810 *pcoefficient_x = coeff;
811 // get exponent
812 tmp = x >> EXPONENT_SHIFT_LARGE64;
813 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
814 return coeff;
815 }
816 // exponent
817 tmp = x >> EXPONENT_SHIFT_SMALL64;
818 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819 // coefficient
820 *pcoefficient_x = (x & SMALL_COEFF_MASK64);
821
822 return *pcoefficient_x;
823 }
824
825 //
826 // BID64 pack macro (general form)
827 //
828 __BID_INLINE__ UINT64
get_BID64(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)829 get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
830 unsigned *fpsc) {
831 UINT128 Stemp, Q_low;
832 UINT64 QH, r, mask, C64, remainder_h, CY, carry;
833 int extra_digits, amount, amount2;
834 unsigned status;
835
836 if (coeff > 9999999999999999ull) {
837 expon++;
838 coeff = 1000000000000000ull;
839 }
840 // check for possible underflow/overflow
841 if (((unsigned) expon) >= 3 * 256) {
842 if (expon < 0) {
843 // underflow
844 if (expon + MAX_FORMAT_DIGITS < 0) {
845 #ifdef SET_STATUS_FLAGS
846 __set_status_flags (fpsc,
847 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
848 #endif
849 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850 #ifndef IEEE_ROUND_NEAREST
851 if (rmode == ROUNDING_DOWN && sgn)
852 return 0x8000000000000001ull;
853 if (rmode == ROUNDING_UP && !sgn)
854 return 1ull;
855 #endif
856 #endif
857 // result is 0
858 return sgn;
859 }
860 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861 #ifndef IEEE_ROUND_NEAREST
862 if (sgn && (unsigned) (rmode - 1) < 2)
863 rmode = 3 - rmode;
864 #endif
865 #endif
866 // get digits to be shifted out
867 extra_digits = -expon;
868 coeff += round_const_table[rmode][extra_digits];
869
870 // get coeff*(2^M[extra_digits])/10^extra_digits
871 __mul_64x128_full (QH, Q_low, coeff,
872 reciprocals10_128[extra_digits]);
873
874 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
875 amount = recip_scale[extra_digits];
876
877 C64 = QH >> amount;
878
879 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880 #ifndef IEEE_ROUND_NEAREST
881 if (rmode == 0) //ROUNDING_TO_NEAREST
882 #endif
883 if (C64 & 1) {
884 // check whether fractional part of initial_P/10^extra_digits is exactly .5
885
886 // get remainder
887 amount2 = 64 - amount;
888 remainder_h = 0;
889 remainder_h--;
890 remainder_h >>= amount2;
891 remainder_h = remainder_h & QH;
892
893 if (!remainder_h
894 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
895 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
896 && Q_low.w[0] <
897 reciprocals10_128[extra_digits].w[0]))) {
898 C64--;
899 }
900 }
901 #endif
902
903 #ifdef SET_STATUS_FLAGS
904
905 if (is_inexact (fpsc))
906 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
907 else {
908 status = INEXACT_EXCEPTION;
909 // get remainder
910 remainder_h = QH << (64 - amount);
911
912 switch (rmode) {
913 case ROUNDING_TO_NEAREST:
914 case ROUNDING_TIES_AWAY:
915 // test whether fractional part is 0
916 if (remainder_h == 0x8000000000000000ull
917 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
918 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
919 && Q_low.w[0] <
920 reciprocals10_128[extra_digits].w[0])))
921 status = EXACT_STATUS;
922 break;
923 case ROUNDING_DOWN:
924 case ROUNDING_TO_ZERO:
925 if (!remainder_h
926 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
927 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
928 && Q_low.w[0] <
929 reciprocals10_128[extra_digits].w[0])))
930 status = EXACT_STATUS;
931 break;
932 default:
933 // round up
934 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
935 reciprocals10_128[extra_digits].w[0]);
936 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
937 reciprocals10_128[extra_digits].w[1], CY);
938 if ((remainder_h >> (64 - amount)) + carry >=
939 (((UINT64) 1) << amount))
940 status = EXACT_STATUS;
941 }
942
943 if (status != EXACT_STATUS)
944 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
945 }
946
947 #endif
948
949 return sgn | C64;
950 }
951 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
952 expon--;
953 coeff = (coeff << 3) + (coeff << 1);
954 }
955 if (expon > DECIMAL_MAX_EXPON_64) {
956 #ifdef SET_STATUS_FLAGS
957 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
958 #endif
959 // overflow
960 r = sgn | INFINITY_MASK64;
961 switch (rmode) {
962 case ROUNDING_DOWN:
963 if (!sgn)
964 r = LARGEST_BID64;
965 break;
966 case ROUNDING_TO_ZERO:
967 r = sgn | LARGEST_BID64;
968 break;
969 case ROUNDING_UP:
970 // round up
971 if (sgn)
972 r = SMALLEST_BID64;
973 }
974 return r;
975 }
976 }
977
978 mask = 1;
979 mask <<= EXPONENT_SHIFT_SMALL64;
980
981 // check whether coefficient fits in 10*5+3 bits
982 if (coeff < mask) {
983 r = expon;
984 r <<= EXPONENT_SHIFT_SMALL64;
985 r |= (coeff | sgn);
986 return r;
987 }
988 // special format
989
990 // eliminate the case coeff==10^16 after rounding
991 if (coeff == 10000000000000000ull) {
992 r = expon + 1;
993 r <<= EXPONENT_SHIFT_SMALL64;
994 r |= (1000000000000000ull | sgn);
995 return r;
996 }
997
998 r = expon;
999 r <<= EXPONENT_SHIFT_LARGE64;
1000 r |= (sgn | SPECIAL_ENCODING_MASK64);
1001 // add coeff, without leading bits
1002 mask = (mask >> 2) - 1;
1003 coeff &= mask;
1004 r |= coeff;
1005
1006 return r;
1007 }
1008
1009
1010
1011
1012 //
1013 // No overflow/underflow checking
1014 //
1015 __BID_INLINE__ UINT64
fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1016 fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017 UINT64 r, mask;
1018
1019 mask = 1;
1020 mask <<= EXPONENT_SHIFT_SMALL64;
1021
1022 // check whether coefficient fits in 10*5+3 bits
1023 if (coeff < mask) {
1024 r = expon;
1025 r <<= EXPONENT_SHIFT_SMALL64;
1026 r |= (coeff | sgn);
1027 return r;
1028 }
1029 // special format
1030
1031 // eliminate the case coeff==10^16 after rounding
1032 if (coeff == 10000000000000000ull) {
1033 r = expon + 1;
1034 r <<= EXPONENT_SHIFT_SMALL64;
1035 r |= (1000000000000000ull | sgn);
1036 return r;
1037 }
1038
1039 r = expon;
1040 r <<= EXPONENT_SHIFT_LARGE64;
1041 r |= (sgn | SPECIAL_ENCODING_MASK64);
1042 // add coeff, without leading bits
1043 mask = (mask >> 2) - 1;
1044 coeff &= mask;
1045 r |= coeff;
1046
1047 return r;
1048 }
1049
1050
1051 //
1052 // no underflow checking
1053 //
1054 __BID_INLINE__ UINT64
fast_get_BID64_check_OF(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1055 fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056 unsigned *fpsc) {
1057 UINT64 r, mask;
1058
1059 if (((unsigned) expon) >= 3 * 256 - 1) {
1060 if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061 expon = 3 * 256;
1062 coeff = 1000000000000000ull;
1063 }
1064
1065 if (((unsigned) expon) >= 3 * 256) {
1066 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067 expon--;
1068 coeff = (coeff << 3) + (coeff << 1);
1069 }
1070 if (expon > DECIMAL_MAX_EXPON_64) {
1071 #ifdef SET_STATUS_FLAGS
1072 __set_status_flags (fpsc,
1073 OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074 #endif
1075 // overflow
1076 r = sgn | INFINITY_MASK64;
1077 switch (rmode) {
1078 case ROUNDING_DOWN:
1079 if (!sgn)
1080 r = LARGEST_BID64;
1081 break;
1082 case ROUNDING_TO_ZERO:
1083 r = sgn | LARGEST_BID64;
1084 break;
1085 case ROUNDING_UP:
1086 // round up
1087 if (sgn)
1088 r = SMALLEST_BID64;
1089 }
1090 return r;
1091 }
1092 }
1093 }
1094
1095 mask = 1;
1096 mask <<= EXPONENT_SHIFT_SMALL64;
1097
1098 // check whether coefficient fits in 10*5+3 bits
1099 if (coeff < mask) {
1100 r = expon;
1101 r <<= EXPONENT_SHIFT_SMALL64;
1102 r |= (coeff | sgn);
1103 return r;
1104 }
1105 // special format
1106
1107 // eliminate the case coeff==10^16 after rounding
1108 if (coeff == 10000000000000000ull) {
1109 r = expon + 1;
1110 r <<= EXPONENT_SHIFT_SMALL64;
1111 r |= (1000000000000000ull | sgn);
1112 return r;
1113 }
1114
1115 r = expon;
1116 r <<= EXPONENT_SHIFT_LARGE64;
1117 r |= (sgn | SPECIAL_ENCODING_MASK64);
1118 // add coeff, without leading bits
1119 mask = (mask >> 2) - 1;
1120 coeff &= mask;
1121 r |= coeff;
1122
1123 return r;
1124 }
1125
1126
1127 //
1128 // No overflow/underflow checking
1129 // or checking for coefficients equal to 10^16 (after rounding)
1130 //
1131 __BID_INLINE__ UINT64
very_fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1132 very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133 UINT64 r, mask;
1134
1135 mask = 1;
1136 mask <<= EXPONENT_SHIFT_SMALL64;
1137
1138 // check whether coefficient fits in 10*5+3 bits
1139 if (coeff < mask) {
1140 r = expon;
1141 r <<= EXPONENT_SHIFT_SMALL64;
1142 r |= (coeff | sgn);
1143 return r;
1144 }
1145 // special format
1146 r = expon;
1147 r <<= EXPONENT_SHIFT_LARGE64;
1148 r |= (sgn | SPECIAL_ENCODING_MASK64);
1149 // add coeff, without leading bits
1150 mask = (mask >> 2) - 1;
1151 coeff &= mask;
1152 r |= coeff;
1153
1154 return r;
1155 }
1156
1157 //
1158 // No overflow/underflow checking or checking for coefficients above 2^53
1159 //
1160 __BID_INLINE__ UINT64
very_fast_get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff)1161 very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162 // no UF/OF
1163 UINT64 r;
1164
1165 r = expon;
1166 r <<= EXPONENT_SHIFT_SMALL64;
1167 r |= (coeff | sgn);
1168 return r;
1169 }
1170
1171
1172 //
1173 // This pack macro is used when underflow is known to occur
1174 //
1175 __BID_INLINE__ UINT64
get_BID64_UF(UINT64 sgn,int expon,UINT64 coeff,UINT64 R,int rmode,unsigned * fpsc)1176 get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177 unsigned *fpsc) {
1178 UINT128 C128, Q_low, Stemp;
1179 UINT64 C64, remainder_h, QH, carry, CY;
1180 int extra_digits, amount, amount2;
1181 unsigned status;
1182
1183 // underflow
1184 if (expon + MAX_FORMAT_DIGITS < 0) {
1185 #ifdef SET_STATUS_FLAGS
1186 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187 #endif
1188 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189 #ifndef IEEE_ROUND_NEAREST
1190 if (rmode == ROUNDING_DOWN && sgn)
1191 return 0x8000000000000001ull;
1192 if (rmode == ROUNDING_UP && !sgn)
1193 return 1ull;
1194 #endif
1195 #endif
1196 // result is 0
1197 return sgn;
1198 }
1199 // 10*coeff
1200 coeff = (coeff << 3) + (coeff << 1);
1201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202 #ifndef IEEE_ROUND_NEAREST
1203 if (sgn && (unsigned) (rmode - 1) < 2)
1204 rmode = 3 - rmode;
1205 #endif
1206 #endif
1207 if (R)
1208 coeff |= 1;
1209 // get digits to be shifted out
1210 extra_digits = 1 - expon;
1211 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1212
1213 // get coeff*(2^M[extra_digits])/10^extra_digits
1214 __mul_64x128_full (QH, Q_low, C128.w[0],
1215 reciprocals10_128[extra_digits]);
1216
1217 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1218 amount = recip_scale[extra_digits];
1219
1220 C64 = QH >> amount;
1221 //__shr_128(C128, Q_high, amount);
1222
1223 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224 #ifndef IEEE_ROUND_NEAREST
1225 if (rmode == 0) //ROUNDING_TO_NEAREST
1226 #endif
1227 if (C64 & 1) {
1228 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229
1230 // get remainder
1231 amount2 = 64 - amount;
1232 remainder_h = 0;
1233 remainder_h--;
1234 remainder_h >>= amount2;
1235 remainder_h = remainder_h & QH;
1236
1237 if (!remainder_h
1238 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1239 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1240 && Q_low.w[0] <
1241 reciprocals10_128[extra_digits].w[0]))) {
1242 C64--;
1243 }
1244 }
1245 #endif
1246
1247 #ifdef SET_STATUS_FLAGS
1248
1249 if (is_inexact (fpsc))
1250 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251 else {
1252 status = INEXACT_EXCEPTION;
1253 // get remainder
1254 remainder_h = QH << (64 - amount);
1255
1256 switch (rmode) {
1257 case ROUNDING_TO_NEAREST:
1258 case ROUNDING_TIES_AWAY:
1259 // test whether fractional part is 0
1260 if (remainder_h == 0x8000000000000000ull
1261 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1262 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1263 && Q_low.w[0] <
1264 reciprocals10_128[extra_digits].w[0])))
1265 status = EXACT_STATUS;
1266 break;
1267 case ROUNDING_DOWN:
1268 case ROUNDING_TO_ZERO:
1269 if (!remainder_h
1270 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1271 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1272 && Q_low.w[0] <
1273 reciprocals10_128[extra_digits].w[0])))
1274 status = EXACT_STATUS;
1275 break;
1276 default:
1277 // round up
1278 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1279 reciprocals10_128[extra_digits].w[0]);
1280 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1281 reciprocals10_128[extra_digits].w[1], CY);
1282 if ((remainder_h >> (64 - amount)) + carry >=
1283 (((UINT64) 1) << amount))
1284 status = EXACT_STATUS;
1285 }
1286
1287 if (status != EXACT_STATUS)
1288 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289 }
1290
1291 #endif
1292
1293 return sgn | C64;
1294
1295 }
1296
1297
1298
1299 //
1300 // This pack macro doesnot check for coefficients above 2^53
1301 //
1302 __BID_INLINE__ UINT64
get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1303 get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304 int rmode, unsigned *fpsc) {
1305 UINT128 C128, Q_low, Stemp;
1306 UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307 int extra_digits, amount, amount2;
1308 unsigned status;
1309
1310 // check for possible underflow/overflow
1311 if (((unsigned) expon) >= 3 * 256) {
1312 if (expon < 0) {
1313 // underflow
1314 if (expon + MAX_FORMAT_DIGITS < 0) {
1315 #ifdef SET_STATUS_FLAGS
1316 __set_status_flags (fpsc,
1317 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318 #endif
1319 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320 #ifndef IEEE_ROUND_NEAREST
1321 if (rmode == ROUNDING_DOWN && sgn)
1322 return 0x8000000000000001ull;
1323 if (rmode == ROUNDING_UP && !sgn)
1324 return 1ull;
1325 #endif
1326 #endif
1327 // result is 0
1328 return sgn;
1329 }
1330 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331 #ifndef IEEE_ROUND_NEAREST
1332 if (sgn && (unsigned) (rmode - 1) < 2)
1333 rmode = 3 - rmode;
1334 #endif
1335 #endif
1336 // get digits to be shifted out
1337 extra_digits = -expon;
1338 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1339
1340 // get coeff*(2^M[extra_digits])/10^extra_digits
1341 __mul_64x128_full (QH, Q_low, C128.w[0],
1342 reciprocals10_128[extra_digits]);
1343
1344 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1345 amount = recip_scale[extra_digits];
1346
1347 C64 = QH >> amount;
1348
1349 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350 #ifndef IEEE_ROUND_NEAREST
1351 if (rmode == 0) //ROUNDING_TO_NEAREST
1352 #endif
1353 if (C64 & 1) {
1354 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355
1356 // get remainder
1357 amount2 = 64 - amount;
1358 remainder_h = 0;
1359 remainder_h--;
1360 remainder_h >>= amount2;
1361 remainder_h = remainder_h & QH;
1362
1363 if (!remainder_h
1364 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1365 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1366 && Q_low.w[0] <
1367 reciprocals10_128[extra_digits].w[0]))) {
1368 C64--;
1369 }
1370 }
1371 #endif
1372
1373 #ifdef SET_STATUS_FLAGS
1374
1375 if (is_inexact (fpsc))
1376 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377 else {
1378 status = INEXACT_EXCEPTION;
1379 // get remainder
1380 remainder_h = QH << (64 - amount);
1381
1382 switch (rmode) {
1383 case ROUNDING_TO_NEAREST:
1384 case ROUNDING_TIES_AWAY:
1385 // test whether fractional part is 0
1386 if (remainder_h == 0x8000000000000000ull
1387 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1388 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1389 && Q_low.w[0] <
1390 reciprocals10_128[extra_digits].w[0])))
1391 status = EXACT_STATUS;
1392 break;
1393 case ROUNDING_DOWN:
1394 case ROUNDING_TO_ZERO:
1395 if (!remainder_h
1396 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1397 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1398 && Q_low.w[0] <
1399 reciprocals10_128[extra_digits].w[0])))
1400 status = EXACT_STATUS;
1401 break;
1402 default:
1403 // round up
1404 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1405 reciprocals10_128[extra_digits].w[0]);
1406 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1407 reciprocals10_128[extra_digits].w[1], CY);
1408 if ((remainder_h >> (64 - amount)) + carry >=
1409 (((UINT64) 1) << amount))
1410 status = EXACT_STATUS;
1411 }
1412
1413 if (status != EXACT_STATUS)
1414 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415 }
1416
1417 #endif
1418
1419 return sgn | C64;
1420 }
1421
1422 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423 expon--;
1424 coeff = (coeff << 3) + (coeff << 1);
1425 }
1426 if (expon > DECIMAL_MAX_EXPON_64) {
1427 #ifdef SET_STATUS_FLAGS
1428 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429 #endif
1430 // overflow
1431 r = sgn | INFINITY_MASK64;
1432 switch (rmode) {
1433 case ROUNDING_DOWN:
1434 if (!sgn)
1435 r = LARGEST_BID64;
1436 break;
1437 case ROUNDING_TO_ZERO:
1438 r = sgn | LARGEST_BID64;
1439 break;
1440 case ROUNDING_UP:
1441 // round up
1442 if (sgn)
1443 r = SMALLEST_BID64;
1444 }
1445 return r;
1446 } else {
1447 mask = 1;
1448 mask <<= EXPONENT_SHIFT_SMALL64;
1449 if (coeff >= mask) {
1450 r = expon;
1451 r <<= EXPONENT_SHIFT_LARGE64;
1452 r |= (sgn | SPECIAL_ENCODING_MASK64);
1453 // add coeff, without leading bits
1454 mask = (mask >> 2) - 1;
1455 coeff &= mask;
1456 r |= coeff;
1457 return r;
1458 }
1459 }
1460 }
1461
1462 r = expon;
1463 r <<= EXPONENT_SHIFT_SMALL64;
1464 r |= (coeff | sgn);
1465
1466 return r;
1467 }
1468
1469
1470 /*****************************************************************************
1471 *
1472 * BID128 pack/unpack macros
1473 *
1474 *****************************************************************************/
1475
1476 //
1477 // Macro for handling BID128 underflow
1478 // sticky bit given as additional argument
1479 //
1480 __BID_INLINE__ UINT128 *
handle_UF_128_rem(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,UINT64 R,unsigned * prounding_mode,unsigned * fpsc)1481 handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482 UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484 UINT64 carry, CY;
1485 int ed2, amount;
1486 unsigned rmode, status;
1487
1488 // UF occurs
1489 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490 #ifdef SET_STATUS_FLAGS
1491 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492 #endif
1493 pres->w[1] = sgn;
1494 pres->w[0] = 0;
1495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496 #ifndef IEEE_ROUND_NEAREST
1497 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498 || (!sgn && *prounding_mode == ROUNDING_UP))
1499 pres->w[0] = 1ull;
1500 #endif
1501 #endif
1502 return pres;
1503 }
1504 // CQ *= 10
1505 CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506 CQ2.w[0] = CQ.w[0] << 1;
1507 CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508 CQ8.w[0] = CQ.w[0] << 3;
1509 __add_128_128 (CQ, CQ2, CQ8);
1510
1511 // add remainder
1512 if (R)
1513 CQ.w[0] |= 1;
1514
1515 ed2 = 1 - expon;
1516 // add rounding constant to CQ
1517 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518 #ifndef IEEE_ROUND_NEAREST
1519 rmode = *prounding_mode;
1520 if (sgn && (unsigned) (rmode - 1) < 2)
1521 rmode = 3 - rmode;
1522 #else
1523 rmode = 0;
1524 #endif
1525 #else
1526 rmode = 0;
1527 #endif
1528 T128 = round_const_table_128[rmode][ed2];
1529 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531
1532 TP128 = reciprocals10_128[ed2];
1533 __mul_128x128_full (Qh, Ql, CQ, TP128);
1534 amount = recip_scale[ed2];
1535
1536 if (amount >= 64) {
1537 CQ.w[0] = Qh.w[1] >> (amount - 64);
1538 CQ.w[1] = 0;
1539 } else {
1540 __shr_128 (CQ, Qh, amount);
1541 }
1542
1543 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1544 #ifndef IEEE_ROUND_NEAREST
1545 if (!(*prounding_mode))
1546 #endif
1547 if (CQ.w[0] & 1) {
1548 // check whether fractional part of initial_P/10^ed1 is exactly .5
1549
1550 // get remainder
1551 __shl_128_long (Qh1, Qh, (128 - amount));
1552
1553 if (!Qh1.w[1] && !Qh1.w[0]
1554 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1555 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1556 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1557 CQ.w[0]--;
1558 }
1559 }
1560 #endif
1561
1562 #ifdef SET_STATUS_FLAGS
1563
1564 if (is_inexact (fpsc))
1565 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1566 else {
1567 status = INEXACT_EXCEPTION;
1568 // get remainder
1569 __shl_128_long (Qh1, Qh, (128 - amount));
1570
1571 switch (rmode) {
1572 case ROUNDING_TO_NEAREST:
1573 case ROUNDING_TIES_AWAY:
1574 // test whether fractional part is 0
1575 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1576 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1577 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1578 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1579 status = EXACT_STATUS;
1580 break;
1581 case ROUNDING_DOWN:
1582 case ROUNDING_TO_ZERO:
1583 if ((!Qh1.w[1]) && (!Qh1.w[0])
1584 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1585 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1586 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1587 status = EXACT_STATUS;
1588 break;
1589 default:
1590 // round up
1591 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1592 reciprocals10_128[ed2].w[0]);
1593 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1594 reciprocals10_128[ed2].w[1], CY);
1595 __shr_128_long (Qh, Qh1, (128 - amount));
1596 Tmp.w[0] = 1;
1597 Tmp.w[1] = 0;
1598 __shl_128_long (Tmp1, Tmp, amount);
1599 Qh.w[0] += carry;
1600 if (Qh.w[0] < carry)
1601 Qh.w[1]++;
1602 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1603 status = EXACT_STATUS;
1604 }
1605
1606 if (status != EXACT_STATUS)
1607 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1608 }
1609
1610 #endif
1611
1612 pres->w[1] = sgn | CQ.w[1];
1613 pres->w[0] = CQ.w[0];
1614
1615 return pres;
1616
1617 }
1618
1619
1620 //
1621 // Macro for handling BID128 underflow
1622 //
1623 __BID_INLINE__ UINT128 *
handle_UF_128(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,unsigned * prounding_mode,unsigned * fpsc)1624 handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1625 unsigned *prounding_mode, unsigned *fpsc) {
1626 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1627 UINT64 carry, CY;
1628 int ed2, amount;
1629 unsigned rmode, status;
1630
1631 // UF occurs
1632 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1633 #ifdef SET_STATUS_FLAGS
1634 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1635 #endif
1636 pres->w[1] = sgn;
1637 pres->w[0] = 0;
1638 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1639 #ifndef IEEE_ROUND_NEAREST
1640 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1641 || (!sgn && *prounding_mode == ROUNDING_UP))
1642 pres->w[0] = 1ull;
1643 #endif
1644 #endif
1645 return pres;
1646 }
1647
1648 ed2 = 0 - expon;
1649 // add rounding constant to CQ
1650 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1651 #ifndef IEEE_ROUND_NEAREST
1652 rmode = *prounding_mode;
1653 if (sgn && (unsigned) (rmode - 1) < 2)
1654 rmode = 3 - rmode;
1655 #else
1656 rmode = 0;
1657 #endif
1658 #else
1659 rmode = 0;
1660 #endif
1661
1662 T128 = round_const_table_128[rmode][ed2];
1663 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1664 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1665
1666 TP128 = reciprocals10_128[ed2];
1667 __mul_128x128_full (Qh, Ql, CQ, TP128);
1668 amount = recip_scale[ed2];
1669
1670 if (amount >= 64) {
1671 CQ.w[0] = Qh.w[1] >> (amount - 64);
1672 CQ.w[1] = 0;
1673 } else {
1674 __shr_128 (CQ, Qh, amount);
1675 }
1676
1677 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1678 #ifndef IEEE_ROUND_NEAREST
1679 if (!(*prounding_mode))
1680 #endif
1681 if (CQ.w[0] & 1) {
1682 // check whether fractional part of initial_P/10^ed1 is exactly .5
1683
1684 // get remainder
1685 __shl_128_long (Qh1, Qh, (128 - amount));
1686
1687 if (!Qh1.w[1] && !Qh1.w[0]
1688 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1689 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1690 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1691 CQ.w[0]--;
1692 }
1693 }
1694 #endif
1695
1696 #ifdef SET_STATUS_FLAGS
1697
1698 if (is_inexact (fpsc))
1699 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1700 else {
1701 status = INEXACT_EXCEPTION;
1702 // get remainder
1703 __shl_128_long (Qh1, Qh, (128 - amount));
1704
1705 switch (rmode) {
1706 case ROUNDING_TO_NEAREST:
1707 case ROUNDING_TIES_AWAY:
1708 // test whether fractional part is 0
1709 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1710 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1711 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1712 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1713 status = EXACT_STATUS;
1714 break;
1715 case ROUNDING_DOWN:
1716 case ROUNDING_TO_ZERO:
1717 if ((!Qh1.w[1]) && (!Qh1.w[0])
1718 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1719 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1720 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1721 status = EXACT_STATUS;
1722 break;
1723 default:
1724 // round up
1725 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1726 reciprocals10_128[ed2].w[0]);
1727 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1728 reciprocals10_128[ed2].w[1], CY);
1729 __shr_128_long (Qh, Qh1, (128 - amount));
1730 Tmp.w[0] = 1;
1731 Tmp.w[1] = 0;
1732 __shl_128_long (Tmp1, Tmp, amount);
1733 Qh.w[0] += carry;
1734 if (Qh.w[0] < carry)
1735 Qh.w[1]++;
1736 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1737 status = EXACT_STATUS;
1738 }
1739
1740 if (status != EXACT_STATUS)
1741 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1742 }
1743
1744 #endif
1745
1746 pres->w[1] = sgn | CQ.w[1];
1747 pres->w[0] = CQ.w[0];
1748
1749 return pres;
1750
1751 }
1752
1753
1754
1755 //
1756 // BID128 unpack, input passed by value
1757 //
1758 __BID_INLINE__ UINT64
unpack_BID128_value(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 x)1759 unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
1760 UINT128 * pcoefficient_x, UINT128 x) {
1761 UINT128 coeff, T33, T34;
1762 UINT64 ex;
1763
1764 *psign_x = (x.w[1]) & 0x8000000000000000ull;
1765
1766 // special encodings
1767 if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1768 if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1769 // non-canonical input
1770 pcoefficient_x->w[0] = 0;
1771 pcoefficient_x->w[1] = 0;
1772 ex = (x.w[1]) >> 47;
1773 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1774 return 0;
1775 }
1776 // 10^33
1777 T33 = power10_table_128[33];
1778 /*coeff.w[0] = x.w[0];
1779 coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1780 pcoefficient_x->w[0] = x.w[0];
1781 pcoefficient_x->w[1] = x.w[1];
1782 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1783 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1784
1785 pcoefficient_x->w[0] = x.w[0];
1786 pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1787 if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33)) // non-canonical
1788 {
1789 pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1790 pcoefficient_x->w[0] = 0;
1791 } else
1792 pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1793 if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1794 pcoefficient_x->w[0] = 0;
1795 pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1796 }
1797 *pexponent_x = 0;
1798 return 0; // NaN or Infinity
1799 }
1800
1801 coeff.w[0] = x.w[0];
1802 coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1803
1804 // 10^34
1805 T34 = power10_table_128[34];
1806 // check for non-canonical values
1807 if (__unsigned_compare_ge_128 (coeff, T34))
1808 coeff.w[0] = coeff.w[1] = 0;
1809
1810 pcoefficient_x->w[0] = coeff.w[0];
1811 pcoefficient_x->w[1] = coeff.w[1];
1812
1813 ex = (x.w[1]) >> 49;
1814 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1815
1816 return coeff.w[0] | coeff.w[1];
1817 }
1818
1819
1820 //
1821 // BID128 unpack, input pased by reference
1822 //
1823 __BID_INLINE__ UINT64
unpack_BID128(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 * px)1824 unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1825 UINT128 * pcoefficient_x, UINT128 * px) {
1826 UINT128 coeff, T33, T34;
1827 UINT64 ex;
1828
1829 *psign_x = (px->w[1]) & 0x8000000000000000ull;
1830
1831 // special encodings
1832 if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1833 if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1834 // non-canonical input
1835 pcoefficient_x->w[0] = 0;
1836 pcoefficient_x->w[1] = 0;
1837 ex = (px->w[1]) >> 47;
1838 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1839 return 0;
1840 }
1841 // 10^33
1842 T33 = power10_table_128[33];
1843 coeff.w[0] = px->w[0];
1844 coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1845 pcoefficient_x->w[0] = px->w[0];
1846 pcoefficient_x->w[1] = px->w[1];
1847 if (__unsigned_compare_ge_128 (coeff, T33)) { // non-canonical
1848 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1849 pcoefficient_x->w[0] = 0;
1850 }
1851 *pexponent_x = 0;
1852 return 0; // NaN or Infinity
1853 }
1854
1855 coeff.w[0] = px->w[0];
1856 coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1857
1858 // 10^34
1859 T34 = power10_table_128[34];
1860 // check for non-canonical values
1861 if (__unsigned_compare_ge_128 (coeff, T34))
1862 coeff.w[0] = coeff.w[1] = 0;
1863
1864 pcoefficient_x->w[0] = coeff.w[0];
1865 pcoefficient_x->w[1] = coeff.w[1];
1866
1867 ex = (px->w[1]) >> 49;
1868 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1869
1870 return coeff.w[0] | coeff.w[1];
1871 }
1872
1873 //
1874 // Pack macro checks for overflow, but not underflow
1875 //
1876 __BID_INLINE__ UINT128 *
get_BID128_very_fast_OF(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1877 get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1878 UINT128 coeff, unsigned *prounding_mode,
1879 unsigned *fpsc) {
1880 UINT128 T;
1881 UINT64 tmp, tmp2;
1882
1883 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1884
1885 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1886 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1887 while (__unsigned_compare_gt_128 (T, coeff)
1888 && expon > DECIMAL_MAX_EXPON_128) {
1889 coeff.w[1] =
1890 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1891 (coeff.w[0] >> 63);
1892 tmp2 = coeff.w[0] << 3;
1893 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1894 if (coeff.w[0] < tmp2)
1895 coeff.w[1]++;
1896
1897 expon--;
1898 }
1899 }
1900 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1901 // OF
1902 #ifdef SET_STATUS_FLAGS
1903 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1904 #endif
1905 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1906 #ifndef IEEE_ROUND_NEAREST
1907 if (*prounding_mode == ROUNDING_TO_ZERO
1908 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1909 &&
1910 *prounding_mode
1911 ==
1912 ROUNDING_DOWN))
1913 {
1914 pres->w[1] = sgn | LARGEST_BID128_HIGH;
1915 pres->w[0] = LARGEST_BID128_LOW;
1916 } else
1917 #endif
1918 #endif
1919 {
1920 pres->w[1] = sgn | INFINITY_MASK64;
1921 pres->w[0] = 0;
1922 }
1923 return pres;
1924 }
1925 }
1926
1927 pres->w[0] = coeff.w[0];
1928 tmp = expon;
1929 tmp <<= 49;
1930 pres->w[1] = sgn | tmp | coeff.w[1];
1931
1932 return pres;
1933 }
1934
1935
1936 //
1937 // No overflow/underflow checks
1938 // No checking for coefficient == 10^34 (rounding artifact)
1939 //
1940 __BID_INLINE__ UINT128 *
get_BID128_very_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1941 get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1942 UINT128 coeff) {
1943 UINT64 tmp;
1944
1945 pres->w[0] = coeff.w[0];
1946 tmp = expon;
1947 tmp <<= 49;
1948 pres->w[1] = sgn | tmp | coeff.w[1];
1949
1950 return pres;
1951 }
1952
1953 //
1954 // No overflow/underflow checks
1955 //
1956 __BID_INLINE__ UINT128 *
get_BID128_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1957 get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1958 UINT64 tmp;
1959
1960 // coeff==10^34?
1961 if (coeff.w[1] == 0x0001ed09bead87c0ull
1962 && coeff.w[0] == 0x378d8e6400000000ull) {
1963 expon++;
1964 // set coefficient to 10^33
1965 coeff.w[1] = 0x0000314dc6448d93ull;
1966 coeff.w[0] = 0x38c15b0a00000000ull;
1967 }
1968
1969 pres->w[0] = coeff.w[0];
1970 tmp = expon;
1971 tmp <<= 49;
1972 pres->w[1] = sgn | tmp | coeff.w[1];
1973
1974 return pres;
1975 }
1976
1977 //
1978 // General BID128 pack macro
1979 //
1980 __BID_INLINE__ UINT128 *
get_BID128(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1981 get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1982 unsigned *prounding_mode, unsigned *fpsc) {
1983 UINT128 T;
1984 UINT64 tmp, tmp2;
1985
1986 // coeff==10^34?
1987 if (coeff.w[1] == 0x0001ed09bead87c0ull
1988 && coeff.w[0] == 0x378d8e6400000000ull) {
1989 expon++;
1990 // set coefficient to 10^33
1991 coeff.w[1] = 0x0000314dc6448d93ull;
1992 coeff.w[0] = 0x38c15b0a00000000ull;
1993 }
1994 // check OF, UF
1995 if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
1996 // check UF
1997 if (expon < 0) {
1998 return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
1999 fpsc);
2000 }
2001
2002 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
2003 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
2004 while (__unsigned_compare_gt_128 (T, coeff)
2005 && expon > DECIMAL_MAX_EXPON_128) {
2006 coeff.w[1] =
2007 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2008 (coeff.w[0] >> 63);
2009 tmp2 = coeff.w[0] << 3;
2010 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2011 if (coeff.w[0] < tmp2)
2012 coeff.w[1]++;
2013
2014 expon--;
2015 }
2016 }
2017 if (expon > DECIMAL_MAX_EXPON_128) {
2018 if (!(coeff.w[1] | coeff.w[0])) {
2019 pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
2020 pres->w[0] = 0;
2021 return pres;
2022 }
2023 // OF
2024 #ifdef SET_STATUS_FLAGS
2025 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2026 #endif
2027 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2028 #ifndef IEEE_ROUND_NEAREST
2029 if (*prounding_mode == ROUNDING_TO_ZERO
2030 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2031 &&
2032 *prounding_mode
2033 ==
2034 ROUNDING_DOWN))
2035 {
2036 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2037 pres->w[0] = LARGEST_BID128_LOW;
2038 } else
2039 #endif
2040 #endif
2041 {
2042 pres->w[1] = sgn | INFINITY_MASK64;
2043 pres->w[0] = 0;
2044 }
2045 return pres;
2046 }
2047 }
2048
2049 pres->w[0] = coeff.w[0];
2050 tmp = expon;
2051 tmp <<= 49;
2052 pres->w[1] = sgn | tmp | coeff.w[1];
2053
2054 return pres;
2055 }
2056
2057
2058 //
2059 // Macro used for conversions from string
2060 // (no additional arguments given for rounding mode, status flags)
2061 //
2062 __BID_INLINE__ UINT128 *
get_BID128_string(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)2063 get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2064 UINT128 D2, D8;
2065 UINT64 tmp;
2066 unsigned rmode = 0, status;
2067
2068 // coeff==10^34?
2069 if (coeff.w[1] == 0x0001ed09bead87c0ull
2070 && coeff.w[0] == 0x378d8e6400000000ull) {
2071 expon++;
2072 // set coefficient to 10^33
2073 coeff.w[1] = 0x0000314dc6448d93ull;
2074 coeff.w[0] = 0x38c15b0a00000000ull;
2075 }
2076 // check OF, UF
2077 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2078 // check UF
2079 if (expon < 0)
2080 return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2081
2082 // OF
2083
2084 if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2085 while (expon > DECIMAL_MAX_EXPON_128 &&
2086 (coeff.w[1] < power10_table_128[33].w[1] ||
2087 (coeff.w[1] == power10_table_128[33].w[1]
2088 && coeff.w[0] < power10_table_128[33].w[0]))) {
2089 D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2090 D2.w[0] = coeff.w[0] << 1;
2091 D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2092 D8.w[0] = coeff.w[0] << 3;
2093
2094 __add_128_128 (coeff, D2, D8);
2095 expon--;
2096 }
2097 } else if (!(coeff.w[0] | coeff.w[1]))
2098 expon = DECIMAL_MAX_EXPON_128;
2099
2100 if (expon > DECIMAL_MAX_EXPON_128) {
2101 pres->w[1] = sgn | INFINITY_MASK64;
2102 pres->w[0] = 0;
2103 switch (rmode) {
2104 case ROUNDING_DOWN:
2105 if (!sgn) {
2106 pres->w[1] = LARGEST_BID128_HIGH;
2107 pres->w[0] = LARGEST_BID128_LOW;
2108 }
2109 break;
2110 case ROUNDING_TO_ZERO:
2111 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2112 pres->w[0] = LARGEST_BID128_LOW;
2113 break;
2114 case ROUNDING_UP:
2115 // round up
2116 if (sgn) {
2117 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2118 pres->w[0] = LARGEST_BID128_LOW;
2119 }
2120 break;
2121 }
2122
2123 return pres;
2124 }
2125 }
2126
2127 pres->w[0] = coeff.w[0];
2128 tmp = expon;
2129 tmp <<= 49;
2130 pres->w[1] = sgn | tmp | coeff.w[1];
2131
2132 return pres;
2133 }
2134
2135
2136
2137 /*****************************************************************************
2138 *
2139 * BID32 pack/unpack macros
2140 *
2141 *****************************************************************************/
2142
2143
2144 __BID_INLINE__ UINT32
unpack_BID32(UINT32 * psign_x,int * pexponent_x,UINT32 * pcoefficient_x,UINT32 x)2145 unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2146 UINT32 * pcoefficient_x, UINT32 x) {
2147 UINT32 tmp;
2148
2149 *psign_x = x & 0x80000000;
2150
2151 if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2152 // special encodings
2153 if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2154 *pcoefficient_x = x & 0xfe0fffff;
2155 if ((x & 0x000fffff) >= 1000000)
2156 *pcoefficient_x = x & 0xfe000000;
2157 if ((x & NAN_MASK32) == INFINITY_MASK32)
2158 *pcoefficient_x = x & 0xf8000000;
2159 *pexponent_x = 0;
2160 return 0; // NaN or Infinity
2161 }
2162 // coefficient
2163 *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2164 // check for non-canonical value
2165 if (*pcoefficient_x >= 10000000)
2166 *pcoefficient_x = 0;
2167 // get exponent
2168 tmp = x >> 21;
2169 *pexponent_x = tmp & EXPONENT_MASK32;
2170 return 1;
2171 }
2172 // exponent
2173 tmp = x >> 23;
2174 *pexponent_x = tmp & EXPONENT_MASK32;
2175 // coefficient
2176 *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2177
2178 return *pcoefficient_x;
2179 }
2180
2181 //
2182 // General pack macro for BID32
2183 //
2184 __BID_INLINE__ UINT32
get_BID32(UINT32 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)2185 get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2186 unsigned *fpsc) {
2187 UINT128 Q;
2188 UINT64 C64, remainder_h, carry, Stemp;
2189 UINT32 r, mask;
2190 int extra_digits, amount, amount2;
2191 unsigned status;
2192
2193 if (coeff > 9999999ull) {
2194 expon++;
2195 coeff = 1000000ull;
2196 }
2197 // check for possible underflow/overflow
2198 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2199 if (expon < 0) {
2200 // underflow
2201 if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2202 #ifdef SET_STATUS_FLAGS
2203 __set_status_flags (fpsc,
2204 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2205 #endif
2206 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2207 #ifndef IEEE_ROUND_NEAREST
2208 if (rmode == ROUNDING_DOWN && sgn)
2209 return 0x80000001;
2210 if (rmode == ROUNDING_UP && !sgn)
2211 return 1;
2212 #endif
2213 #endif
2214 // result is 0
2215 return sgn;
2216 }
2217 // get digits to be shifted out
2218 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2219 rmode = 0;
2220 #endif
2221 #ifdef IEEE_ROUND_NEAREST
2222 rmode = 0;
2223 #endif
2224 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2225 #ifndef IEEE_ROUND_NEAREST
2226 if (sgn && (unsigned) (rmode - 1) < 2)
2227 rmode = 3 - rmode;
2228 #endif
2229 #endif
2230
2231 extra_digits = -expon;
2232 coeff += round_const_table[rmode][extra_digits];
2233
2234 // get coeff*(2^M[extra_digits])/10^extra_digits
2235 __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
2236
2237 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2238 amount = short_recip_scale[extra_digits];
2239
2240 C64 = Q.w[1] >> amount;
2241
2242 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2243 #ifndef IEEE_ROUND_NEAREST
2244 if (rmode == 0) //ROUNDING_TO_NEAREST
2245 #endif
2246 if (C64 & 1) {
2247 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2248
2249 // get remainder
2250 amount2 = 64 - amount;
2251 remainder_h = 0;
2252 remainder_h--;
2253 remainder_h >>= amount2;
2254 remainder_h = remainder_h & Q.w[1];
2255
2256 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
2257 C64--;
2258 }
2259 }
2260 #endif
2261
2262 #ifdef SET_STATUS_FLAGS
2263
2264 if (is_inexact (fpsc))
2265 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2266 else {
2267 status = INEXACT_EXCEPTION;
2268 // get remainder
2269 remainder_h = Q.w[1] << (64 - amount);
2270
2271 switch (rmode) {
2272 case ROUNDING_TO_NEAREST:
2273 case ROUNDING_TIES_AWAY:
2274 // test whether fractional part is 0
2275 if (remainder_h == 0x8000000000000000ull
2276 && (Q.w[0] < reciprocals10_64[extra_digits]))
2277 status = EXACT_STATUS;
2278 break;
2279 case ROUNDING_DOWN:
2280 case ROUNDING_TO_ZERO:
2281 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
2282 status = EXACT_STATUS;
2283 break;
2284 default:
2285 // round up
2286 __add_carry_out (Stemp, carry, Q.w[0],
2287 reciprocals10_64[extra_digits]);
2288 if ((remainder_h >> (64 - amount)) + carry >=
2289 (((UINT64) 1) << amount))
2290 status = EXACT_STATUS;
2291 }
2292
2293 if (status != EXACT_STATUS)
2294 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2295 }
2296
2297 #endif
2298
2299 return sgn | (UINT32) C64;
2300 }
2301
2302 while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2303 coeff = (coeff << 3) + (coeff << 1);
2304 expon--;
2305 }
2306 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2307 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2308 // overflow
2309 r = sgn | INFINITY_MASK32;
2310 switch (rmode) {
2311 case ROUNDING_DOWN:
2312 if (!sgn)
2313 r = LARGEST_BID32;
2314 break;
2315 case ROUNDING_TO_ZERO:
2316 r = sgn | LARGEST_BID32;
2317 break;
2318 case ROUNDING_UP:
2319 // round up
2320 if (sgn)
2321 r = sgn | LARGEST_BID32;
2322 }
2323 return r;
2324 }
2325 }
2326
2327 mask = 1 << 23;
2328
2329 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2330 if (coeff < mask) {
2331 r = expon;
2332 r <<= 23;
2333 r |= ((UINT32) coeff | sgn);
2334 return r;
2335 }
2336 // special format
2337
2338 r = expon;
2339 r <<= 21;
2340 r |= (sgn | SPECIAL_ENCODING_MASK32);
2341 // add coeff, without leading bits
2342 mask = (1 << 21) - 1;
2343 r |= (((UINT32) coeff) & mask);
2344
2345 return r;
2346 }
2347
2348
2349
2350 //
2351 // no overflow/underflow checks
2352 //
2353 __BID_INLINE__ UINT32
very_fast_get_BID32(UINT32 sgn,int expon,UINT32 coeff)2354 very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2355 UINT32 r, mask;
2356
2357 mask = 1 << 23;
2358
2359 // check whether coefficient fits in 10*2+3 bits
2360 if (coeff < mask) {
2361 r = expon;
2362 r <<= 23;
2363 r |= (coeff | sgn);
2364 return r;
2365 }
2366 // special format
2367 r = expon;
2368 r <<= 21;
2369 r |= (sgn | SPECIAL_ENCODING_MASK32);
2370 // add coeff, without leading bits
2371 mask = (1 << 21) - 1;
2372 coeff &= mask;
2373 r |= coeff;
2374
2375 return r;
2376 }
2377
2378
2379
2380 /*************************************************************
2381 *
2382 *************************************************************/
2383 typedef
2384 ALIGN (16)
2385 struct {
2386 UINT64 w[6];
2387 } UINT384;
2388 typedef ALIGN (16)
2389 struct {
2390 UINT64 w[8];
2391 } UINT512;
2392
2393 // #define P 34
2394 #define MASK_STEERING_BITS 0x6000000000000000ull
2395 #define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
2396 #define MASK_BINARY_SIG1 0x001fffffffffffffull
2397 #define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
2398 //used to take G[2:w+3] (sec 3.3)
2399 #define MASK_BINARY_SIG2 0x0007ffffffffffffull
2400 //used to mask out G4:T0 (sec 3.3)
2401 #define MASK_BINARY_OR2 0x0020000000000000ull
2402 //used to prefix 8+G4 to T (sec 3.3)
2403 #define UPPER_EXPON_LIMIT 51
2404 #define MASK_EXP 0x7ffe000000000000ull
2405 #define MASK_SPECIAL 0x7800000000000000ull
2406 #define MASK_NAN 0x7c00000000000000ull
2407 #define MASK_SNAN 0x7e00000000000000ull
2408 #define MASK_ANY_INF 0x7c00000000000000ull
2409 #define MASK_INF 0x7800000000000000ull
2410 #define MASK_SIGN 0x8000000000000000ull
2411 #define MASK_COEFF 0x0001ffffffffffffull
2412 #define BIN_EXP_BIAS (0x1820ull << 49)
2413
2414 #define EXP_MIN 0x0000000000000000ull
2415 // EXP_MIN = (-6176 + 6176) << 49
2416 #define EXP_MAX 0x5ffe000000000000ull
2417 // EXP_MAX = (6111 + 6176) << 49
2418 #define EXP_MAX_P1 0x6000000000000000ull
2419 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2420 #define EXP_P1 0x0002000000000000ull
2421 // EXP_ P1= 1 << 49
2422 #define expmin -6176
2423 // min unbiased exponent
2424 #define expmax 6111
2425 // max unbiased exponent
2426 #define expmin16 -398
2427 // min unbiased exponent
2428 #define expmax16 369
2429 // max unbiased exponent
2430
2431 #define SIGNMASK32 0x80000000
2432 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2433 #define SIGNMASK64 0x8000000000000000ull
2434
2435 // typedef unsigned int FPSC; // floating-point status and control
2436 // bit31:
2437 // bit30:
2438 // bit29:
2439 // bit28:
2440 // bit27:
2441 // bit26:
2442 // bit25:
2443 // bit24:
2444 // bit23:
2445 // bit22:
2446 // bit21:
2447 // bit20:
2448 // bit19:
2449 // bit18:
2450 // bit17:
2451 // bit16:
2452 // bit15:
2453 // bit14: RC:2
2454 // bit13: RC:1
2455 // bit12: RC:0
2456 // bit11: PM
2457 // bit10: UM
2458 // bit9: OM
2459 // bit8: ZM
2460 // bit7: DM
2461 // bit6: IM
2462 // bit5: PE
2463 // bit4: UE
2464 // bit3: OE
2465 // bit2: ZE
2466 // bit1: DE
2467 // bit0: IE
2468
2469 #define ROUNDING_MODE_MASK 0x00007000
2470
2471 typedef struct _DEC_DIGITS {
2472 unsigned int digits;
2473 UINT64 threshold_hi;
2474 UINT64 threshold_lo;
2475 unsigned int digits1;
2476 } DEC_DIGITS;
2477
2478 extern DEC_DIGITS nr_digits[];
2479 extern UINT64 midpoint64[];
2480 extern UINT128 midpoint128[];
2481 extern UINT192 midpoint192[];
2482 extern UINT256 midpoint256[];
2483 extern UINT64 ten2k64[];
2484 extern UINT128 ten2k128[];
2485 extern UINT256 ten2k256[];
2486 extern UINT128 ten2mk128[];
2487 extern UINT64 ten2mk64[];
2488 extern UINT128 ten2mk128trunc[];
2489 extern int shiftright128[];
2490 extern UINT64 maskhigh128[];
2491 extern UINT64 maskhigh128M[];
2492 extern UINT64 maskhigh192M[];
2493 extern UINT64 maskhigh256M[];
2494 extern UINT64 onehalf128[];
2495 extern UINT64 onehalf128M[];
2496 extern UINT64 onehalf192M[];
2497 extern UINT64 onehalf256M[];
2498 extern UINT128 ten2mk128M[];
2499 extern UINT128 ten2mk128truncM[];
2500 extern UINT192 ten2mk192truncM[];
2501 extern UINT256 ten2mk256truncM[];
2502 extern int shiftright128M[];
2503 extern int shiftright192M[];
2504 extern int shiftright256M[];
2505 extern UINT192 ten2mk192M[];
2506 extern UINT256 ten2mk256M[];
2507 extern unsigned char char_table2[];
2508 extern unsigned char char_table3[];
2509
2510 extern UINT64 ten2m3k64[];
2511 extern unsigned int shift_ten2m3k64[];
2512 extern UINT128 ten2m3k128[];
2513 extern unsigned int shift_ten2m3k128[];
2514
2515
2516
2517 /***************************************************************************
2518 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2519 ***************************************************************************/
2520
2521 extern UINT64 Kx64[];
2522 extern unsigned int Ex64m64[];
2523 extern UINT64 half64[];
2524 extern UINT64 mask64[];
2525 extern UINT64 ten2mxtrunc64[];
2526
2527 extern UINT128 Kx128[];
2528 extern unsigned int Ex128m128[];
2529 extern UINT64 half128[];
2530 extern UINT64 mask128[];
2531 extern UINT128 ten2mxtrunc128[];
2532
2533 extern UINT192 Kx192[];
2534 extern unsigned int Ex192m192[];
2535 extern UINT64 half192[];
2536 extern UINT64 mask192[];
2537 extern UINT192 ten2mxtrunc192[];
2538
2539 extern UINT256 Kx256[];
2540 extern unsigned int Ex256m256[];
2541 extern UINT64 half256[];
2542 extern UINT64 mask256[];
2543 extern UINT256 ten2mxtrunc256[];
2544
2545 typedef union __bid64_128 {
2546 UINT64 b64;
2547 UINT128 b128;
2548 } BID64_128;
2549
2550 BID64_128 bid_fma (unsigned int P0,
2551 BID64_128 x1, unsigned int P1,
2552 BID64_128 y1, unsigned int P2,
2553 BID64_128 z1, unsigned int P3,
2554 unsigned int rnd_mode, FPSC * fpsc);
2555
2556 #define P16 16
2557 #define P34 34
2558
2559 union __int_double {
2560 UINT64 i;
2561 double d;
2562 };
2563 typedef union __int_double int_double;
2564
2565
2566 union __int_float {
2567 UINT32 i;
2568 float d;
2569 };
2570 typedef union __int_float int_float;
2571
2572 #define SWAP(A,B,T) {\
2573 T = A; \
2574 A = B; \
2575 B = T; \
2576 }
2577
2578 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2579 // ie it knows that it is A bits long
2580 #define NUMBITS(A, coefficient_x, tempx){\
2581 temp_x.d=(float)coefficient_x;\
2582 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2583 }
2584
2585 enum class_types {
2586 signalingNaN,
2587 quietNaN,
2588 negativeInfinity,
2589 negativeNormal,
2590 negativeSubnormal,
2591 negativeZero,
2592 positiveZero,
2593 positiveSubnormal,
2594 positiveNormal,
2595 positiveInfinity
2596 };
2597
2598 typedef union {
2599 UINT64 ui64;
2600 double d;
2601 } BID_UI64DOUBLE;
2602
2603 #endif
2604