1 /* Copyright (C) 2007-2020 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef __BIDECIMAL_H
25 #define __BIDECIMAL_H
26
27 #include "bid_conf.h"
28 #include "bid_functions.h"
29
30 #define __BID_INLINE__ static __inline
31
32 /*********************************************************************
33 *
34 * Logical Shift Macros
35 *
36 *********************************************************************/
37
38 #define __shr_128(Q, A, k) \
39 { \
40 (Q).w[0] = (A).w[0] >> k; \
41 (Q).w[0] |= (A).w[1] << (64-k); \
42 (Q).w[1] = (A).w[1] >> k; \
43 }
44
45 #define __shr_128_long(Q, A, k) \
46 { \
47 if((k)<64) { \
48 (Q).w[0] = (A).w[0] >> k; \
49 (Q).w[0] |= (A).w[1] << (64-k); \
50 (Q).w[1] = (A).w[1] >> k; \
51 } \
52 else { \
53 (Q).w[0] = (A).w[1]>>((k)-64); \
54 (Q).w[1] = 0; \
55 } \
56 }
57
58 #define __shl_128_long(Q, A, k) \
59 { \
60 if((k)<64) { \
61 (Q).w[1] = (A).w[1] << k; \
62 (Q).w[1] |= (A).w[0] >> (64-k); \
63 (Q).w[0] = (A).w[0] << k; \
64 } \
65 else { \
66 (Q).w[1] = (A).w[0]<<((k)-64); \
67 (Q).w[0] = 0; \
68 } \
69 }
70
71 #define __low_64(Q) (Q).w[0]
72 /*********************************************************************
73 *
74 * String Macros
75 *
76 *********************************************************************/
77 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
78 /*********************************************************************
79 *
80 * Compare Macros
81 *
82 *********************************************************************/
83 // greater than
84 // return 0 if A<=B
85 // non-zero if A>B
86 #define __unsigned_compare_gt_128(A, B) \
87 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
88 // greater-or-equal
89 #define __unsigned_compare_ge_128(A, B) \
90 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
91 #define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
92 // tighten exponent range
93 #define __tight_bin_range_128(bp, P, bin_expon) \
94 { \
95 UINT64 M; \
96 M = 1; \
97 (bp) = (bin_expon); \
98 if((bp)<63) { \
99 M <<= ((bp)+1); \
100 if((P).w[0] >= M) (bp)++; } \
101 else if((bp)>64) { \
102 M <<= ((bp)+1-64); \
103 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
104 (bp)++; } \
105 else if((P).w[1]) (bp)++; \
106 }
107 /*********************************************************************
108 *
109 * Add/Subtract Macros
110 *
111 *********************************************************************/
112 // add 64-bit value to 128-bit
113 #define __add_128_64(R128, A128, B64) \
114 { \
115 UINT64 R64H; \
116 R64H = (A128).w[1]; \
117 (R128).w[0] = (B64) + (A128).w[0]; \
118 if((R128).w[0] < (B64)) \
119 R64H ++; \
120 (R128).w[1] = R64H; \
121 }
122 // subtract 64-bit value from 128-bit
123 #define __sub_128_64(R128, A128, B64) \
124 { \
125 UINT64 R64H; \
126 R64H = (A128).w[1]; \
127 if((A128).w[0] < (B64)) \
128 R64H --; \
129 (R128).w[1] = R64H; \
130 (R128).w[0] = (A128).w[0] - (B64); \
131 }
132 // add 128-bit value to 128-bit
133 // assume no carry-out
134 #define __add_128_128(R128, A128, B128) \
135 { \
136 UINT128 Q128; \
137 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
138 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
139 if(Q128.w[0] < (B128).w[0]) \
140 Q128.w[1] ++; \
141 (R128).w[1] = Q128.w[1]; \
142 (R128).w[0] = Q128.w[0]; \
143 }
144 #define __sub_128_128(R128, A128, B128) \
145 { \
146 UINT128 Q128; \
147 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
148 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
149 if((A128).w[0] < (B128).w[0]) \
150 Q128.w[1] --; \
151 (R128).w[1] = Q128.w[1]; \
152 (R128).w[0] = Q128.w[0]; \
153 }
154 #define __add_carry_out(S, CY, X, Y) \
155 { \
156 UINT64 X1=X; \
157 S = X + Y; \
158 CY = (S<X1) ? 1 : 0; \
159 }
160 #define __add_carry_in_out(S, CY, X, Y, CI) \
161 { \
162 UINT64 X1; \
163 X1 = X + CI; \
164 S = X1 + Y; \
165 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
166 }
167 #define __sub_borrow_out(S, CY, X, Y) \
168 { \
169 UINT64 X1=X; \
170 S = X - Y; \
171 CY = (S>X1) ? 1 : 0; \
172 }
173 #define __sub_borrow_in_out(S, CY, X, Y, CI) \
174 { \
175 UINT64 X1, X0=X; \
176 X1 = X - CI; \
177 S = X1 - Y; \
178 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
179 }
180 // increment C128 and check for rounding overflow:
181 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
182 #define INCREMENT(C_128, exp) \
183 { \
184 C_128.w[0]++; \
185 if (C_128.w[0] == 0) C_128.w[1]++; \
186 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
187 C_128.w[0] == 0x378d8e6400000000ull) { \
188 exp++; \
189 C_128.w[1] = 0x0000314dc6448d93ull; \
190 C_128.w[0] = 0x38c15b0a00000000ull; \
191 } \
192 }
193 // decrement C128 and check for rounding underflow, but only at the
194 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
195 // and decrement the exponent
196 #define DECREMENT(C_128, exp) \
197 { \
198 C_128.w[0]--; \
199 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
200 if (C_128.w[1] == 0x0000314dc6448d93ull && \
201 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
202 exp--; \
203 C_128.w[1] = 0x0001ed09bead87c0ull; \
204 C_128.w[0] = 0x378d8e63ffffffffull; \
205 } \
206 }
207
208 /*********************************************************************
209 *
210 * Multiply Macros
211 *
212 *********************************************************************/
213 #define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
214 /***************************************
215 * Signed, Full 64x64-bit Multiply
216 ***************************************/
217 #define __imul_64x64_to_128(P, CX, CY) \
218 { \
219 UINT64 SX, SY; \
220 __mul_64x64_to_128(P, CX, CY); \
221 \
222 SX = ((SINT64)(CX))>>63; \
223 SY = ((SINT64)(CY))>>63; \
224 SX &= CY; SY &= CX; \
225 \
226 (P).w[1] = (P).w[1] - SX - SY; \
227 }
228 /***************************************
229 * Signed, Full 64x128-bit Multiply
230 ***************************************/
231 #define __imul_64x128_full(Ph, Ql, A, B) \
232 { \
233 UINT128 ALBL, ALBH, QM2, QM; \
234 \
235 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
236 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
237 \
238 (Ql).w[0] = ALBL.w[0]; \
239 QM.w[0] = ALBL.w[1]; \
240 QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
241 __add_128_128(QM2, ALBH, QM); \
242 (Ql).w[1] = QM2.w[0]; \
243 Ph = QM2.w[1]; \
244 }
245 /*****************************************************
246 * Unsigned Multiply Macros
247 *****************************************************/
248 // get full 64x64bit product
249 //
250 #define __mul_64x64_to_128(P, CX, CY) \
251 { \
252 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
253 CXH = (CX) >> 32; \
254 CXL = (UINT32)(CX); \
255 CYH = (CY) >> 32; \
256 CYL = (UINT32)(CY); \
257 \
258 PM = CXH*CYL; \
259 PH = CXH*CYH; \
260 PL = CXL*CYL; \
261 PM2 = CXL*CYH; \
262 PH += (PM>>32); \
263 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
264 \
265 (P).w[1] = PH + (PM>>32); \
266 (P).w[0] = (PM<<32)+(UINT32)PL; \
267 }
268 // get full 64x64bit product
269 // Note:
270 // This macro is used for CX < 2^61, CY < 2^61
271 //
272 #define __mul_64x64_to_128_fast(P, CX, CY) \
273 { \
274 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
275 CXH = (CX) >> 32; \
276 CXL = (UINT32)(CX); \
277 CYH = (CY) >> 32; \
278 CYL = (UINT32)(CY); \
279 \
280 PM = CXH*CYL; \
281 PL = CXL*CYL; \
282 PH = CXH*CYH; \
283 PM += CXL*CYH; \
284 PM += (PL>>32); \
285 \
286 (P).w[1] = PH + (PM>>32); \
287 (P).w[0] = (PM<<32)+(UINT32)PL; \
288 }
289 // used for CX< 2^60
290 #define __sqr64_fast(P, CX) \
291 { \
292 UINT64 CXH, CXL, PL, PH, PM; \
293 CXH = (CX) >> 32; \
294 CXL = (UINT32)(CX); \
295 \
296 PM = CXH*CXL; \
297 PL = CXL*CXL; \
298 PH = CXH*CXH; \
299 PM += PM; \
300 PM += (PL>>32); \
301 \
302 (P).w[1] = PH + (PM>>32); \
303 (P).w[0] = (PM<<32)+(UINT32)PL; \
304 }
305 // get full 64x64bit product
306 // Note:
307 // This implementation is used for CX < 2^61, CY < 2^61
308 //
309 #define __mul_64x64_to_64_high_fast(P, CX, CY) \
310 { \
311 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
312 CXH = (CX) >> 32; \
313 CXL = (UINT32)(CX); \
314 CYH = (CY) >> 32; \
315 CYL = (UINT32)(CY); \
316 \
317 PM = CXH*CYL; \
318 PL = CXL*CYL; \
319 PH = CXH*CYH; \
320 PM += CXL*CYH; \
321 PM += (PL>>32); \
322 \
323 (P) = PH + (PM>>32); \
324 }
325 // get full 64x64bit product
326 //
327 #define __mul_64x64_to_128_full(P, CX, CY) \
328 { \
329 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
330 CXH = (CX) >> 32; \
331 CXL = (UINT32)(CX); \
332 CYH = (CY) >> 32; \
333 CYL = (UINT32)(CY); \
334 \
335 PM = CXH*CYL; \
336 PH = CXH*CYH; \
337 PL = CXL*CYL; \
338 PM2 = CXL*CYH; \
339 PH += (PM>>32); \
340 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
341 \
342 (P).w[1] = PH + (PM>>32); \
343 (P).w[0] = (PM<<32)+(UINT32)PL; \
344 }
345 #define __mul_128x128_high(Q, A, B) \
346 { \
347 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
348 \
349 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
350 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
351 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
352 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
353 \
354 __add_128_128(QM, ALBH, AHBL); \
355 __add_128_64(QM2, QM, ALBL.w[1]); \
356 __add_128_64((Q), AHBH, QM2.w[1]); \
357 }
358 #define __mul_128x128_full(Qh, Ql, A, B) \
359 { \
360 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
361 \
362 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
363 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
364 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
365 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
366 \
367 __add_128_128(QM, ALBH, AHBL); \
368 (Ql).w[0] = ALBL.w[0]; \
369 __add_128_64(QM2, QM, ALBL.w[1]); \
370 __add_128_64((Qh), AHBH, QM2.w[1]); \
371 (Ql).w[1] = QM2.w[0]; \
372 }
373 #define __mul_128x128_low(Ql, A, B) \
374 { \
375 UINT128 ALBL; \
376 UINT64 QM64; \
377 \
378 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
379 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
380 \
381 (Ql).w[0] = ALBL.w[0]; \
382 (Ql).w[1] = QM64 + ALBL.w[1]; \
383 }
384 #define __mul_64x128_low(Ql, A, B) \
385 { \
386 UINT128 ALBL, ALBH, QM2; \
387 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
388 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
389 (Ql).w[0] = ALBL.w[0]; \
390 __add_128_64(QM2, ALBH, ALBL.w[1]); \
391 (Ql).w[1] = QM2.w[0]; \
392 }
393 #define __mul_64x128_full(Ph, Ql, A, B) \
394 { \
395 UINT128 ALBL, ALBH, QM2; \
396 \
397 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
398 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
399 \
400 (Ql).w[0] = ALBL.w[0]; \
401 __add_128_64(QM2, ALBH, ALBL.w[1]); \
402 (Ql).w[1] = QM2.w[0]; \
403 Ph = QM2.w[1]; \
404 }
405 #define __mul_64x128_to_192(Q, A, B) \
406 { \
407 UINT128 ALBL, ALBH, QM2; \
408 \
409 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
410 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
411 \
412 (Q).w[0] = ALBL.w[0]; \
413 __add_128_64(QM2, ALBH, ALBL.w[1]); \
414 (Q).w[1] = QM2.w[0]; \
415 (Q).w[2] = QM2.w[1]; \
416 }
417 #define __mul_64x128_to192(Q, A, B) \
418 { \
419 UINT128 ALBL, ALBH, QM2; \
420 \
421 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
422 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
423 \
424 (Q).w[0] = ALBL.w[0]; \
425 __add_128_64(QM2, ALBH, ALBL.w[1]); \
426 (Q).w[1] = QM2.w[0]; \
427 (Q).w[2] = QM2.w[1]; \
428 }
429 #define __mul_128x128_to_256(P256, A, B) \
430 { \
431 UINT128 Qll, Qlh; \
432 UINT64 Phl, Phh, CY1, CY2; \
433 \
434 __mul_64x128_full(Phl, Qll, A.w[0], B); \
435 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
436 (P256).w[0] = Qll.w[0]; \
437 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
438 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
439 (P256).w[3] = Phh + CY2; \
440 }
441 //
442 // For better performance, will check A.w[1] against 0,
443 // but not B.w[1]
444 // Use this macro accordingly
445 #define __mul_128x128_to_256_check_A(P256, A, B) \
446 { \
447 UINT128 Qll, Qlh; \
448 UINT64 Phl, Phh, CY1, CY2; \
449 \
450 __mul_64x128_full(Phl, Qll, A.w[0], B); \
451 (P256).w[0] = Qll.w[0]; \
452 if(A.w[1]) { \
453 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
454 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
455 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
456 (P256).w[3] = Phh + CY2; } \
457 else { \
458 (P256).w[1] = Qll.w[1]; \
459 (P256).w[2] = Phl; \
460 (P256).w[3] = 0; } \
461 }
462 #define __mul_64x192_to_256(lP, lA, lB) \
463 { \
464 UINT128 lP0,lP1,lP2; \
465 UINT64 lC; \
466 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
467 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
468 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
469 (lP).w[0] = lP0.w[0]; \
470 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
471 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
472 (lP).w[3] = lP2.w[1] + lC; \
473 }
474 #define __mul_64x256_to_320(P, A, B) \
475 { \
476 UINT128 lP0,lP1,lP2,lP3; \
477 UINT64 lC; \
478 __mul_64x64_to_128(lP0, A, (B).w[0]); \
479 __mul_64x64_to_128(lP1, A, (B).w[1]); \
480 __mul_64x64_to_128(lP2, A, (B).w[2]); \
481 __mul_64x64_to_128(lP3, A, (B).w[3]); \
482 (P).w[0] = lP0.w[0]; \
483 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
484 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
485 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
486 (P).w[4] = lP3.w[1] + lC; \
487 }
488 #define __mul_192x192_to_384(P, A, B) \
489 { \
490 UINT256 P0,P1,P2; \
491 UINT64 CY; \
492 __mul_64x192_to_256(P0, (A).w[0], B); \
493 __mul_64x192_to_256(P1, (A).w[1], B); \
494 __mul_64x192_to_256(P2, (A).w[2], B); \
495 (P).w[0] = P0.w[0]; \
496 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
497 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
498 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
499 (P).w[4] = P1.w[3] + CY; \
500 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
501 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
502 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
503 (P).w[5] = P2.w[3] + CY; \
504 }
505 #define __mul_64x320_to_384(P, A, B) \
506 { \
507 UINT128 lP0,lP1,lP2,lP3,lP4; \
508 UINT64 lC; \
509 __mul_64x64_to_128(lP0, A, (B).w[0]); \
510 __mul_64x64_to_128(lP1, A, (B).w[1]); \
511 __mul_64x64_to_128(lP2, A, (B).w[2]); \
512 __mul_64x64_to_128(lP3, A, (B).w[3]); \
513 __mul_64x64_to_128(lP4, A, (B).w[4]); \
514 (P).w[0] = lP0.w[0]; \
515 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
516 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
517 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
518 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
519 (P).w[5] = lP4.w[1] + lC; \
520 }
521 // A*A
522 // Full 128x128-bit product
523 #define __sqr128_to_256(P256, A) \
524 { \
525 UINT128 Qll, Qlh, Qhh; \
526 UINT64 TMP_C1, TMP_C2; \
527 \
528 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
529 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
530 Qhh.w[1] += (Qlh.w[1]>>63); \
531 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
532 Qlh.w[0] += Qlh.w[0]; \
533 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
534 \
535 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
536 (P256).w[0] = Qll.w[0]; \
537 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
538 (P256).w[3] = Qhh.w[1]+TMP_C2; \
539 }
540 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
541 { \
542 UINT128 Qll, Qlh; \
543 UINT64 Phl, Phh, C1, C2; \
544 \
545 __mul_64x128_full(Phl, Qll, A.w[0], B); \
546 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
547 (PQl).w[0] = Qll.w[0]; \
548 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
549 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
550 (PQh).w[1] = Phh + C2; \
551 }
552 #define __mul_256x256_to_512(P, A, B) \
553 { \
554 UINT512 P0,P1,P2,P3; \
555 UINT64 CY; \
556 __mul_64x256_to_320(P0, (A).w[0], B); \
557 __mul_64x256_to_320(P1, (A).w[1], B); \
558 __mul_64x256_to_320(P2, (A).w[2], B); \
559 __mul_64x256_to_320(P3, (A).w[3], B); \
560 (P).w[0] = P0.w[0]; \
561 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
562 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
563 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
564 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
565 (P).w[5] = P1.w[4] + CY; \
566 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
567 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
568 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
569 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
570 (P).w[6] = P2.w[4] + CY; \
571 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
572 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
573 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
574 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
575 (P).w[7] = P3.w[4] + CY; \
576 }
577 #define __mul_192x256_to_448(P, A, B) \
578 { \
579 UINT512 P0,P1,P2; \
580 UINT64 CY; \
581 __mul_64x256_to_320(P0, (A).w[0], B); \
582 __mul_64x256_to_320(P1, (A).w[1], B); \
583 __mul_64x256_to_320(P2, (A).w[2], B); \
584 (P).w[0] = P0.w[0]; \
585 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
586 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
587 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
588 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
589 (P).w[5] = P1.w[4] + CY; \
590 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
591 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
592 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
593 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
594 (P).w[6] = P2.w[4] + CY; \
595 }
596 #define __mul_320x320_to_640(P, A, B) \
597 { \
598 UINT512 P0,P1,P2,P3; \
599 UINT64 CY; \
600 __mul_256x256_to_512((P), (A), B); \
601 __mul_64x256_to_320(P1, (A).w[4], B); \
602 __mul_64x256_to_320(P2, (B).w[4], A); \
603 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
604 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
605 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
606 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
607 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
608 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
609 P3.w[1] += CY; \
610 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
611 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
612 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
613 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
614 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
615 (P).w[9] = P3.w[1] + CY; \
616 }
617 #define __mul_384x384_to_768(P, A, B) \
618 { \
619 UINT512 P0,P1,P2,P3; \
620 UINT64 CY; \
621 __mul_320x320_to_640((P), (A), B); \
622 __mul_64x320_to_384(P1, (A).w[5], B); \
623 __mul_64x320_to_384(P2, (B).w[5], A); \
624 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
625 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
626 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
627 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
628 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
629 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
630 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
631 P3.w[1] += CY; \
632 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
633 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
634 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
635 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
636 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
637 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
638 (P).w[11] = P3.w[1] + CY; \
639 }
640 #define __mul_64x128_short(Ql, A, B) \
641 { \
642 UINT64 ALBH_L; \
643 \
644 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
645 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
646 \
647 (Ql).w[1] += ALBH_L; \
648 }
649 #define __scale128_10(D,_TMP) \
650 { \
651 UINT128 _TMP2,_TMP8; \
652 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
653 _TMP2.w[0] = _TMP.w[0]<<1; \
654 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
655 _TMP8.w[0] = _TMP.w[0]<<3; \
656 __add_128_128(D, _TMP2, _TMP8); \
657 }
658 // 64x64-bit product
659 #define __mul_64x64_to_128MACH(P128, CX64, CY64) \
660 { \
661 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
662 CXH = (CX64) >> 32; \
663 CXL = (UINT32)(CX64); \
664 CYH = (CY64) >> 32; \
665 CYL = (UINT32)(CY64); \
666 PM = CXH*CYL; \
667 PH = CXH*CYH; \
668 PL = CXL*CYL; \
669 PM2 = CXL*CYH; \
670 PH += (PM>>32); \
671 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
672 (P128).w[1] = PH + (PM>>32); \
673 (P128).w[0] = (PM<<32)+(UINT32)PL; \
674 }
675 // 64x64-bit product
676 #define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
677 { \
678 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
679 CXH = (CX64) >> 32; \
680 CXL = (UINT32)(CX64); \
681 CYH = (CY64) >> 32; \
682 CYL = (UINT32)(CY64); \
683 PM = CXH*CYL; \
684 PH = CXH*CYH; \
685 PL = CXL*CYL; \
686 PM2 = CXL*CYH; \
687 PH += (PM>>32); \
688 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
689 P64 = PH + (PM>>32); \
690 }
691 #define __mul_128x64_to_128(Q128, A64, B128) \
692 { \
693 UINT64 ALBH_L; \
694 ALBH_L = (A64) * (B128).w[1]; \
695 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
696 (Q128).w[1] += ALBH_L; \
697 }
698 // might simplify by calculating just QM2.w[0]
699 #define __mul_64x128_to_128(Ql, A, B) \
700 { \
701 UINT128 ALBL, ALBH, QM2; \
702 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
703 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
704 (Ql).w[0] = ALBL.w[0]; \
705 __add_128_64(QM2, ALBH, ALBL.w[1]); \
706 (Ql).w[1] = QM2.w[0]; \
707 }
708 /*********************************************************************
709 *
710 * BID Pack/Unpack Macros
711 *
712 *********************************************************************/
713 /////////////////////////////////////////
714 // BID64 definitions
715 ////////////////////////////////////////
716 #define DECIMAL_MAX_EXPON_64 767
717 #define DECIMAL_EXPONENT_BIAS 398
718 #define MAX_FORMAT_DIGITS 16
719 /////////////////////////////////////////
720 // BID128 definitions
721 ////////////////////////////////////////
722 #define DECIMAL_MAX_EXPON_128 12287
723 #define DECIMAL_EXPONENT_BIAS_128 6176
724 #define MAX_FORMAT_DIGITS_128 34
725 /////////////////////////////////////////
726 // BID32 definitions
727 ////////////////////////////////////////
728 #define DECIMAL_MAX_EXPON_32 191
729 #define DECIMAL_EXPONENT_BIAS_32 101
730 #define MAX_FORMAT_DIGITS_32 7
731 ////////////////////////////////////////
732 // Constant Definitions
733 ///////////////////////////////////////
734 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
735 #define INFINITY_MASK64 0x7800000000000000ull
736 #define SINFINITY_MASK64 0xf800000000000000ull
737 #define SSNAN_MASK64 0xfc00000000000000ull
738 #define NAN_MASK64 0x7c00000000000000ull
739 #define SNAN_MASK64 0x7e00000000000000ull
740 #define QUIET_MASK64 0xfdffffffffffffffull
741 #define LARGE_COEFF_MASK64 0x0007ffffffffffffull
742 #define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
743 #define SMALL_COEFF_MASK64 0x001fffffffffffffull
744 #define EXPONENT_MASK64 0x3ff
745 #define EXPONENT_SHIFT_LARGE64 51
746 #define EXPONENT_SHIFT_SMALL64 53
747 #define LARGEST_BID64 0x77fb86f26fc0ffffull
748 #define SMALLEST_BID64 0xf7fb86f26fc0ffffull
749 #define SMALL_COEFF_MASK128 0x0001ffffffffffffull
750 #define LARGE_COEFF_MASK128 0x00007fffffffffffull
751 #define EXPONENT_MASK128 0x3fff
752 #define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
753 #define LARGEST_BID128_LOW 0x378d8e63ffffffffull
754 #define SPECIAL_ENCODING_MASK32 0x60000000ul
755 #define INFINITY_MASK32 0x78000000ul
756 #define LARGE_COEFF_MASK32 0x007ffffful
757 #define LARGE_COEFF_HIGH_BIT32 0x00800000ul
758 #define SMALL_COEFF_MASK32 0x001ffffful
759 #define EXPONENT_MASK32 0xff
760 #define LARGEST_BID32 0x77f8967f
761 #define NAN_MASK32 0x7c000000
762 #define SNAN_MASK32 0x7e000000
763 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
764 #define BINARY_EXPONENT_BIAS 0x3ff
765 #define UPPER_EXPON_LIMIT 51
766 // data needed for BID pack/unpack macros
767 extern UINT64 round_const_table[][19];
768 extern UINT128 reciprocals10_128[];
769 extern int recip_scale[];
770 extern UINT128 power10_table_128[];
771 extern int estimate_decimal_digits[];
772 extern int estimate_bin_expon[];
773 extern UINT64 power10_index_binexp[];
774 extern int short_recip_scale[];
775 extern UINT64 reciprocals10_64[];
776 extern UINT128 power10_index_binexp_128[];
777 extern UINT128 round_const_table_128[][36];
778
779
780 //////////////////////////////////////////////
781 // Status Flag Handling
782 /////////////////////////////////////////////
783 #define __set_status_flags(fpsc, status) *(fpsc) |= status
784 #define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
785
786 __BID_INLINE__ UINT64
unpack_BID64(UINT64 * psign_x,int * pexponent_x,UINT64 * pcoefficient_x,UINT64 x)787 unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
788 UINT64 * pcoefficient_x, UINT64 x) {
789 UINT64 tmp, coeff;
790
791 *psign_x = x & 0x8000000000000000ull;
792
793 if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
794 // special encodings
795 // coefficient
796 coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
797
798 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
799 *pexponent_x = 0;
800 *pcoefficient_x = x & 0xfe03ffffffffffffull;
801 if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
802 *pcoefficient_x = x & 0xfe00000000000000ull;
803 if ((x & NAN_MASK64) == INFINITY_MASK64)
804 *pcoefficient_x = x & SINFINITY_MASK64;
805 return 0; // NaN or Infinity
806 }
807 // check for non-canonical values
808 if (coeff >= 10000000000000000ull)
809 coeff = 0;
810 *pcoefficient_x = coeff;
811 // get exponent
812 tmp = x >> EXPONENT_SHIFT_LARGE64;
813 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
814 return coeff;
815 }
816 // exponent
817 tmp = x >> EXPONENT_SHIFT_SMALL64;
818 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819 // coefficient
820 *pcoefficient_x = (x & SMALL_COEFF_MASK64);
821
822 return *pcoefficient_x;
823 }
824
825 //
826 // BID64 pack macro (general form)
827 //
828 __BID_INLINE__ UINT64
get_BID64(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)829 get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
830 unsigned *fpsc) {
831 UINT128 Stemp, Q_low;
832 UINT64 QH, r, mask, C64, remainder_h, CY, carry;
833 int extra_digits, amount, amount2;
834 unsigned status;
835
836 if (coeff > 9999999999999999ull) {
837 expon++;
838 coeff = 1000000000000000ull;
839 }
840 // check for possible underflow/overflow
841 if (((unsigned) expon) >= 3 * 256) {
842 if (expon < 0) {
843 // underflow
844 if (expon + MAX_FORMAT_DIGITS < 0) {
845 #ifdef SET_STATUS_FLAGS
846 __set_status_flags (fpsc,
847 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
848 #endif
849 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850 #ifndef IEEE_ROUND_NEAREST
851 if (rmode == ROUNDING_DOWN && sgn)
852 return 0x8000000000000001ull;
853 if (rmode == ROUNDING_UP && !sgn)
854 return 1ull;
855 #endif
856 #endif
857 // result is 0
858 return sgn;
859 }
860 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861 #ifndef IEEE_ROUND_NEAREST
862 if (sgn && (unsigned) (rmode - 1) < 2)
863 rmode = 3 - rmode;
864 #endif
865 #endif
866 // get digits to be shifted out
867 extra_digits = -expon;
868 coeff += round_const_table[rmode][extra_digits];
869
870 // get coeff*(2^M[extra_digits])/10^extra_digits
871 __mul_64x128_full (QH, Q_low, coeff,
872 reciprocals10_128[extra_digits]);
873
874 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
875 amount = recip_scale[extra_digits];
876
877 C64 = QH >> amount;
878
879 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880 #ifndef IEEE_ROUND_NEAREST
881 if (rmode == 0) //ROUNDING_TO_NEAREST
882 #endif
883 if (C64 & 1) {
884 // check whether fractional part of initial_P/10^extra_digits is exactly .5
885
886 // get remainder
887 amount2 = 64 - amount;
888 remainder_h = 0;
889 remainder_h--;
890 remainder_h >>= amount2;
891 remainder_h = remainder_h & QH;
892
893 if (!remainder_h
894 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
895 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
896 && Q_low.w[0] <
897 reciprocals10_128[extra_digits].w[0]))) {
898 C64--;
899 }
900 }
901 #endif
902
903 #ifdef SET_STATUS_FLAGS
904
905 if (is_inexact (fpsc))
906 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
907 else {
908 status = INEXACT_EXCEPTION;
909 // get remainder
910 remainder_h = QH << (64 - amount);
911
912 switch (rmode) {
913 case ROUNDING_TO_NEAREST:
914 case ROUNDING_TIES_AWAY:
915 // test whether fractional part is 0
916 if (remainder_h == 0x8000000000000000ull
917 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
918 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
919 && Q_low.w[0] <
920 reciprocals10_128[extra_digits].w[0])))
921 status = EXACT_STATUS;
922 break;
923 case ROUNDING_DOWN:
924 case ROUNDING_TO_ZERO:
925 if (!remainder_h
926 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
927 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
928 && Q_low.w[0] <
929 reciprocals10_128[extra_digits].w[0])))
930 status = EXACT_STATUS;
931 break;
932 default:
933 // round up
934 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
935 reciprocals10_128[extra_digits].w[0]);
936 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
937 reciprocals10_128[extra_digits].w[1], CY);
938 if ((remainder_h >> (64 - amount)) + carry >=
939 (((UINT64) 1) << amount))
940 status = EXACT_STATUS;
941 }
942
943 if (status != EXACT_STATUS)
944 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
945 }
946
947 #endif
948
949 return sgn | C64;
950 }
951 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
952 expon--;
953 coeff = (coeff << 3) + (coeff << 1);
954 }
955 if (expon > DECIMAL_MAX_EXPON_64) {
956 #ifdef SET_STATUS_FLAGS
957 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
958 #endif
959 // overflow
960 r = sgn | INFINITY_MASK64;
961 switch (rmode) {
962 case ROUNDING_DOWN:
963 if (!sgn)
964 r = LARGEST_BID64;
965 break;
966 case ROUNDING_TO_ZERO:
967 r = sgn | LARGEST_BID64;
968 break;
969 case ROUNDING_UP:
970 // round up
971 if (sgn)
972 r = SMALLEST_BID64;
973 }
974 return r;
975 }
976 }
977
978 mask = 1;
979 mask <<= EXPONENT_SHIFT_SMALL64;
980
981 // check whether coefficient fits in 10*5+3 bits
982 if (coeff < mask) {
983 r = expon;
984 r <<= EXPONENT_SHIFT_SMALL64;
985 r |= (coeff | sgn);
986 return r;
987 }
988 // special format
989
990 // eliminate the case coeff==10^16 after rounding
991 if (coeff == 10000000000000000ull) {
992 r = expon + 1;
993 r <<= EXPONENT_SHIFT_SMALL64;
994 r |= (1000000000000000ull | sgn);
995 return r;
996 }
997
998 r = expon;
999 r <<= EXPONENT_SHIFT_LARGE64;
1000 r |= (sgn | SPECIAL_ENCODING_MASK64);
1001 // add coeff, without leading bits
1002 mask = (mask >> 2) - 1;
1003 coeff &= mask;
1004 r |= coeff;
1005
1006 return r;
1007 }
1008
1009
1010
1011
1012 //
1013 // No overflow/underflow checking
1014 //
1015 __BID_INLINE__ UINT64
fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1016 fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017 UINT64 r, mask;
1018
1019 mask = 1;
1020 mask <<= EXPONENT_SHIFT_SMALL64;
1021
1022 // check whether coefficient fits in 10*5+3 bits
1023 if (coeff < mask) {
1024 r = expon;
1025 r <<= EXPONENT_SHIFT_SMALL64;
1026 r |= (coeff | sgn);
1027 return r;
1028 }
1029 // special format
1030
1031 // eliminate the case coeff==10^16 after rounding
1032 if (coeff == 10000000000000000ull) {
1033 r = expon + 1;
1034 r <<= EXPONENT_SHIFT_SMALL64;
1035 r |= (1000000000000000ull | sgn);
1036 return r;
1037 }
1038
1039 r = expon;
1040 r <<= EXPONENT_SHIFT_LARGE64;
1041 r |= (sgn | SPECIAL_ENCODING_MASK64);
1042 // add coeff, without leading bits
1043 mask = (mask >> 2) - 1;
1044 coeff &= mask;
1045 r |= coeff;
1046
1047 return r;
1048 }
1049
1050
1051 //
1052 // no underflow checking
1053 //
1054 __BID_INLINE__ UINT64
fast_get_BID64_check_OF(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1055 fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056 unsigned *fpsc) {
1057 UINT64 r, mask;
1058
1059 if (((unsigned) expon) >= 3 * 256 - 1) {
1060 if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061 expon = 3 * 256;
1062 coeff = 1000000000000000ull;
1063 }
1064
1065 if (((unsigned) expon) >= 3 * 256) {
1066 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067 expon--;
1068 coeff = (coeff << 3) + (coeff << 1);
1069 }
1070 if (expon > DECIMAL_MAX_EXPON_64) {
1071 #ifdef SET_STATUS_FLAGS
1072 __set_status_flags (fpsc,
1073 OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074 #endif
1075 // overflow
1076 r = sgn | INFINITY_MASK64;
1077 switch (rmode) {
1078 case ROUNDING_DOWN:
1079 if (!sgn)
1080 r = LARGEST_BID64;
1081 break;
1082 case ROUNDING_TO_ZERO:
1083 r = sgn | LARGEST_BID64;
1084 break;
1085 case ROUNDING_UP:
1086 // round up
1087 if (sgn)
1088 r = SMALLEST_BID64;
1089 }
1090 return r;
1091 }
1092 }
1093 }
1094
1095 mask = 1;
1096 mask <<= EXPONENT_SHIFT_SMALL64;
1097
1098 // check whether coefficient fits in 10*5+3 bits
1099 if (coeff < mask) {
1100 r = expon;
1101 r <<= EXPONENT_SHIFT_SMALL64;
1102 r |= (coeff | sgn);
1103 return r;
1104 }
1105 // special format
1106
1107 // eliminate the case coeff==10^16 after rounding
1108 if (coeff == 10000000000000000ull) {
1109 r = expon + 1;
1110 r <<= EXPONENT_SHIFT_SMALL64;
1111 r |= (1000000000000000ull | sgn);
1112 return r;
1113 }
1114
1115 r = expon;
1116 r <<= EXPONENT_SHIFT_LARGE64;
1117 r |= (sgn | SPECIAL_ENCODING_MASK64);
1118 // add coeff, without leading bits
1119 mask = (mask >> 2) - 1;
1120 coeff &= mask;
1121 r |= coeff;
1122
1123 return r;
1124 }
1125
1126
1127 //
1128 // No overflow/underflow checking
1129 // or checking for coefficients equal to 10^16 (after rounding)
1130 //
1131 __BID_INLINE__ UINT64
very_fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1132 very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133 UINT64 r, mask;
1134
1135 mask = 1;
1136 mask <<= EXPONENT_SHIFT_SMALL64;
1137
1138 // check whether coefficient fits in 10*5+3 bits
1139 if (coeff < mask) {
1140 r = expon;
1141 r <<= EXPONENT_SHIFT_SMALL64;
1142 r |= (coeff | sgn);
1143 return r;
1144 }
1145 // special format
1146 r = expon;
1147 r <<= EXPONENT_SHIFT_LARGE64;
1148 r |= (sgn | SPECIAL_ENCODING_MASK64);
1149 // add coeff, without leading bits
1150 mask = (mask >> 2) - 1;
1151 coeff &= mask;
1152 r |= coeff;
1153
1154 return r;
1155 }
1156
1157 //
1158 // No overflow/underflow checking or checking for coefficients above 2^53
1159 //
1160 __BID_INLINE__ UINT64
very_fast_get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff)1161 very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162 // no UF/OF
1163 UINT64 r;
1164
1165 r = expon;
1166 r <<= EXPONENT_SHIFT_SMALL64;
1167 r |= (coeff | sgn);
1168 return r;
1169 }
1170
1171
1172 //
1173 // This pack macro is used when underflow is known to occur
1174 //
1175 __BID_INLINE__ UINT64
get_BID64_UF(UINT64 sgn,int expon,UINT64 coeff,UINT64 R,int rmode,unsigned * fpsc)1176 get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177 unsigned *fpsc) {
1178 UINT128 C128, Q_low, Stemp;
1179 UINT64 C64, remainder_h, QH, carry, CY;
1180 int extra_digits, amount, amount2;
1181 unsigned status;
1182
1183 // underflow
1184 if (expon + MAX_FORMAT_DIGITS < 0) {
1185 #ifdef SET_STATUS_FLAGS
1186 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187 #endif
1188 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189 #ifndef IEEE_ROUND_NEAREST
1190 if (rmode == ROUNDING_DOWN && sgn)
1191 return 0x8000000000000001ull;
1192 if (rmode == ROUNDING_UP && !sgn)
1193 return 1ull;
1194 #endif
1195 #endif
1196 // result is 0
1197 return sgn;
1198 }
1199 // 10*coeff
1200 coeff = (coeff << 3) + (coeff << 1);
1201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202 #ifndef IEEE_ROUND_NEAREST
1203 if (sgn && (unsigned) (rmode - 1) < 2)
1204 rmode = 3 - rmode;
1205 #endif
1206 #endif
1207 if (R)
1208 coeff |= 1;
1209 // get digits to be shifted out
1210 extra_digits = 1 - expon;
1211 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1212
1213 // get coeff*(2^M[extra_digits])/10^extra_digits
1214 __mul_64x128_full (QH, Q_low, C128.w[0],
1215 reciprocals10_128[extra_digits]);
1216
1217 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1218 amount = recip_scale[extra_digits];
1219
1220 C64 = QH >> amount;
1221 //__shr_128(C128, Q_high, amount);
1222
1223 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224 #ifndef IEEE_ROUND_NEAREST
1225 if (rmode == 0) //ROUNDING_TO_NEAREST
1226 #endif
1227 if (C64 & 1) {
1228 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229
1230 // get remainder
1231 amount2 = 64 - amount;
1232 remainder_h = 0;
1233 remainder_h--;
1234 remainder_h >>= amount2;
1235 remainder_h = remainder_h & QH;
1236
1237 if (!remainder_h
1238 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1239 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1240 && Q_low.w[0] <
1241 reciprocals10_128[extra_digits].w[0]))) {
1242 C64--;
1243 }
1244 }
1245 #endif
1246
1247 #ifdef SET_STATUS_FLAGS
1248
1249 if (is_inexact (fpsc))
1250 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251 else {
1252 status = INEXACT_EXCEPTION;
1253 // get remainder
1254 remainder_h = QH << (64 - amount);
1255
1256 switch (rmode) {
1257 case ROUNDING_TO_NEAREST:
1258 case ROUNDING_TIES_AWAY:
1259 // test whether fractional part is 0
1260 if (remainder_h == 0x8000000000000000ull
1261 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1262 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1263 && Q_low.w[0] <
1264 reciprocals10_128[extra_digits].w[0])))
1265 status = EXACT_STATUS;
1266 break;
1267 case ROUNDING_DOWN:
1268 case ROUNDING_TO_ZERO:
1269 if (!remainder_h
1270 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1271 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1272 && Q_low.w[0] <
1273 reciprocals10_128[extra_digits].w[0])))
1274 status = EXACT_STATUS;
1275 break;
1276 default:
1277 // round up
1278 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1279 reciprocals10_128[extra_digits].w[0]);
1280 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1281 reciprocals10_128[extra_digits].w[1], CY);
1282 if ((remainder_h >> (64 - amount)) + carry >=
1283 (((UINT64) 1) << amount))
1284 status = EXACT_STATUS;
1285 }
1286
1287 if (status != EXACT_STATUS)
1288 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289 }
1290
1291 #endif
1292
1293 return sgn | C64;
1294
1295 }
1296
1297
1298
1299 //
1300 // This pack macro doesnot check for coefficients above 2^53
1301 //
1302 __BID_INLINE__ UINT64
get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1303 get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304 int rmode, unsigned *fpsc) {
1305 UINT128 C128, Q_low, Stemp;
1306 UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307 int extra_digits, amount, amount2;
1308 unsigned status;
1309
1310 // check for possible underflow/overflow
1311 if (((unsigned) expon) >= 3 * 256) {
1312 if (expon < 0) {
1313 // underflow
1314 if (expon + MAX_FORMAT_DIGITS < 0) {
1315 #ifdef SET_STATUS_FLAGS
1316 __set_status_flags (fpsc,
1317 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318 #endif
1319 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320 #ifndef IEEE_ROUND_NEAREST
1321 if (rmode == ROUNDING_DOWN && sgn)
1322 return 0x8000000000000001ull;
1323 if (rmode == ROUNDING_UP && !sgn)
1324 return 1ull;
1325 #endif
1326 #endif
1327 // result is 0
1328 return sgn;
1329 }
1330 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331 #ifndef IEEE_ROUND_NEAREST
1332 if (sgn && (unsigned) (rmode - 1) < 2)
1333 rmode = 3 - rmode;
1334 #endif
1335 #endif
1336 // get digits to be shifted out
1337 extra_digits = -expon;
1338 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1339
1340 // get coeff*(2^M[extra_digits])/10^extra_digits
1341 __mul_64x128_full (QH, Q_low, C128.w[0],
1342 reciprocals10_128[extra_digits]);
1343
1344 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1345 amount = recip_scale[extra_digits];
1346
1347 C64 = QH >> amount;
1348
1349 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350 #ifndef IEEE_ROUND_NEAREST
1351 if (rmode == 0) //ROUNDING_TO_NEAREST
1352 #endif
1353 if (C64 & 1) {
1354 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355
1356 // get remainder
1357 amount2 = 64 - amount;
1358 remainder_h = 0;
1359 remainder_h--;
1360 remainder_h >>= amount2;
1361 remainder_h = remainder_h & QH;
1362
1363 if (!remainder_h
1364 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1365 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1366 && Q_low.w[0] <
1367 reciprocals10_128[extra_digits].w[0]))) {
1368 C64--;
1369 }
1370 }
1371 #endif
1372
1373 #ifdef SET_STATUS_FLAGS
1374
1375 if (is_inexact (fpsc))
1376 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377 else {
1378 status = INEXACT_EXCEPTION;
1379 // get remainder
1380 remainder_h = QH << (64 - amount);
1381
1382 switch (rmode) {
1383 case ROUNDING_TO_NEAREST:
1384 case ROUNDING_TIES_AWAY:
1385 // test whether fractional part is 0
1386 if (remainder_h == 0x8000000000000000ull
1387 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1388 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1389 && Q_low.w[0] <
1390 reciprocals10_128[extra_digits].w[0])))
1391 status = EXACT_STATUS;
1392 break;
1393 case ROUNDING_DOWN:
1394 case ROUNDING_TO_ZERO:
1395 if (!remainder_h
1396 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1397 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1398 && Q_low.w[0] <
1399 reciprocals10_128[extra_digits].w[0])))
1400 status = EXACT_STATUS;
1401 break;
1402 default:
1403 // round up
1404 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1405 reciprocals10_128[extra_digits].w[0]);
1406 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1407 reciprocals10_128[extra_digits].w[1], CY);
1408 if ((remainder_h >> (64 - amount)) + carry >=
1409 (((UINT64) 1) << amount))
1410 status = EXACT_STATUS;
1411 }
1412
1413 if (status != EXACT_STATUS)
1414 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415 }
1416
1417 #endif
1418
1419 return sgn | C64;
1420 }
1421
1422 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423 expon--;
1424 coeff = (coeff << 3) + (coeff << 1);
1425 }
1426 if (expon > DECIMAL_MAX_EXPON_64) {
1427 #ifdef SET_STATUS_FLAGS
1428 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429 #endif
1430 // overflow
1431 r = sgn | INFINITY_MASK64;
1432 switch (rmode) {
1433 case ROUNDING_DOWN:
1434 if (!sgn)
1435 r = LARGEST_BID64;
1436 break;
1437 case ROUNDING_TO_ZERO:
1438 r = sgn | LARGEST_BID64;
1439 break;
1440 case ROUNDING_UP:
1441 // round up
1442 if (sgn)
1443 r = SMALLEST_BID64;
1444 }
1445 return r;
1446 } else {
1447 mask = 1;
1448 mask <<= EXPONENT_SHIFT_SMALL64;
1449 if (coeff >= mask) {
1450 r = expon;
1451 r <<= EXPONENT_SHIFT_LARGE64;
1452 r |= (sgn | SPECIAL_ENCODING_MASK64);
1453 // add coeff, without leading bits
1454 mask = (mask >> 2) - 1;
1455 coeff &= mask;
1456 r |= coeff;
1457 return r;
1458 }
1459 }
1460 }
1461
1462 r = expon;
1463 r <<= EXPONENT_SHIFT_SMALL64;
1464 r |= (coeff | sgn);
1465
1466 return r;
1467 }
1468
1469
1470 /*****************************************************************************
1471 *
1472 * BID128 pack/unpack macros
1473 *
1474 *****************************************************************************/
1475
1476 //
1477 // Macro for handling BID128 underflow
1478 // sticky bit given as additional argument
1479 //
1480 __BID_INLINE__ UINT128 *
handle_UF_128_rem(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,UINT64 R,unsigned * prounding_mode,unsigned * fpsc)1481 handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482 UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484 UINT64 carry, CY;
1485 int ed2, amount;
1486 unsigned rmode, status;
1487
1488 // UF occurs
1489 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490 #ifdef SET_STATUS_FLAGS
1491 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492 #endif
1493 pres->w[1] = sgn;
1494 pres->w[0] = 0;
1495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496 #ifndef IEEE_ROUND_NEAREST
1497 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498 || (!sgn && *prounding_mode == ROUNDING_UP))
1499 pres->w[0] = 1ull;
1500 #endif
1501 #endif
1502 return pres;
1503 }
1504 // CQ *= 10
1505 CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506 CQ2.w[0] = CQ.w[0] << 1;
1507 CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508 CQ8.w[0] = CQ.w[0] << 3;
1509 __add_128_128 (CQ, CQ2, CQ8);
1510
1511 // add remainder
1512 if (R)
1513 CQ.w[0] |= 1;
1514
1515 ed2 = 1 - expon;
1516 // add rounding constant to CQ
1517 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518 #ifndef IEEE_ROUND_NEAREST
1519 rmode = *prounding_mode;
1520 if (sgn && (unsigned) (rmode - 1) < 2)
1521 rmode = 3 - rmode;
1522 #else
1523 rmode = 0;
1524 #endif
1525 #else
1526 rmode = 0;
1527 #endif
1528 T128 = round_const_table_128[rmode][ed2];
1529 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531
1532 TP128 = reciprocals10_128[ed2];
1533 __mul_128x128_full (Qh, Ql, CQ, TP128);
1534 amount = recip_scale[ed2];
1535
1536 if (amount >= 64) {
1537 CQ.w[0] = Qh.w[1] >> (amount - 64);
1538 CQ.w[1] = 0;
1539 } else {
1540 __shr_128 (CQ, Qh, amount);
1541 }
1542
1543 expon = 0;
1544
1545 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1546 #ifndef IEEE_ROUND_NEAREST
1547 if (!(*prounding_mode))
1548 #endif
1549 if (CQ.w[0] & 1) {
1550 // check whether fractional part of initial_P/10^ed1 is exactly .5
1551
1552 // get remainder
1553 __shl_128_long (Qh1, Qh, (128 - amount));
1554
1555 if (!Qh1.w[1] && !Qh1.w[0]
1556 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1557 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1558 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1559 CQ.w[0]--;
1560 }
1561 }
1562 #endif
1563
1564 #ifdef SET_STATUS_FLAGS
1565
1566 if (is_inexact (fpsc))
1567 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1568 else {
1569 status = INEXACT_EXCEPTION;
1570 // get remainder
1571 __shl_128_long (Qh1, Qh, (128 - amount));
1572
1573 switch (rmode) {
1574 case ROUNDING_TO_NEAREST:
1575 case ROUNDING_TIES_AWAY:
1576 // test whether fractional part is 0
1577 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1578 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1579 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1580 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1581 status = EXACT_STATUS;
1582 break;
1583 case ROUNDING_DOWN:
1584 case ROUNDING_TO_ZERO:
1585 if ((!Qh1.w[1]) && (!Qh1.w[0])
1586 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1587 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1588 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1589 status = EXACT_STATUS;
1590 break;
1591 default:
1592 // round up
1593 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1594 reciprocals10_128[ed2].w[0]);
1595 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1596 reciprocals10_128[ed2].w[1], CY);
1597 __shr_128_long (Qh, Qh1, (128 - amount));
1598 Tmp.w[0] = 1;
1599 Tmp.w[1] = 0;
1600 __shl_128_long (Tmp1, Tmp, amount);
1601 Qh.w[0] += carry;
1602 if (Qh.w[0] < carry)
1603 Qh.w[1]++;
1604 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1605 status = EXACT_STATUS;
1606 }
1607
1608 if (status != EXACT_STATUS)
1609 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1610 }
1611
1612 #endif
1613
1614 pres->w[1] = sgn | CQ.w[1];
1615 pres->w[0] = CQ.w[0];
1616
1617 return pres;
1618
1619 }
1620
1621
1622 //
1623 // Macro for handling BID128 underflow
1624 //
1625 __BID_INLINE__ UINT128 *
handle_UF_128(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,unsigned * prounding_mode,unsigned * fpsc)1626 handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1627 unsigned *prounding_mode, unsigned *fpsc) {
1628 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1629 UINT64 carry, CY;
1630 int ed2, amount;
1631 unsigned rmode, status;
1632
1633 // UF occurs
1634 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1635 #ifdef SET_STATUS_FLAGS
1636 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1637 #endif
1638 pres->w[1] = sgn;
1639 pres->w[0] = 0;
1640 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1641 #ifndef IEEE_ROUND_NEAREST
1642 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1643 || (!sgn && *prounding_mode == ROUNDING_UP))
1644 pres->w[0] = 1ull;
1645 #endif
1646 #endif
1647 return pres;
1648 }
1649
1650 ed2 = 0 - expon;
1651 // add rounding constant to CQ
1652 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1653 #ifndef IEEE_ROUND_NEAREST
1654 rmode = *prounding_mode;
1655 if (sgn && (unsigned) (rmode - 1) < 2)
1656 rmode = 3 - rmode;
1657 #else
1658 rmode = 0;
1659 #endif
1660 #else
1661 rmode = 0;
1662 #endif
1663
1664 T128 = round_const_table_128[rmode][ed2];
1665 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1666 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1667
1668 TP128 = reciprocals10_128[ed2];
1669 __mul_128x128_full (Qh, Ql, CQ, TP128);
1670 amount = recip_scale[ed2];
1671
1672 if (amount >= 64) {
1673 CQ.w[0] = Qh.w[1] >> (amount - 64);
1674 CQ.w[1] = 0;
1675 } else {
1676 __shr_128 (CQ, Qh, amount);
1677 }
1678
1679 expon = 0;
1680
1681 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1682 #ifndef IEEE_ROUND_NEAREST
1683 if (!(*prounding_mode))
1684 #endif
1685 if (CQ.w[0] & 1) {
1686 // check whether fractional part of initial_P/10^ed1 is exactly .5
1687
1688 // get remainder
1689 __shl_128_long (Qh1, Qh, (128 - amount));
1690
1691 if (!Qh1.w[1] && !Qh1.w[0]
1692 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1693 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1694 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1695 CQ.w[0]--;
1696 }
1697 }
1698 #endif
1699
1700 #ifdef SET_STATUS_FLAGS
1701
1702 if (is_inexact (fpsc))
1703 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1704 else {
1705 status = INEXACT_EXCEPTION;
1706 // get remainder
1707 __shl_128_long (Qh1, Qh, (128 - amount));
1708
1709 switch (rmode) {
1710 case ROUNDING_TO_NEAREST:
1711 case ROUNDING_TIES_AWAY:
1712 // test whether fractional part is 0
1713 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1714 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1715 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1716 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1717 status = EXACT_STATUS;
1718 break;
1719 case ROUNDING_DOWN:
1720 case ROUNDING_TO_ZERO:
1721 if ((!Qh1.w[1]) && (!Qh1.w[0])
1722 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1723 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1724 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1725 status = EXACT_STATUS;
1726 break;
1727 default:
1728 // round up
1729 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1730 reciprocals10_128[ed2].w[0]);
1731 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1732 reciprocals10_128[ed2].w[1], CY);
1733 __shr_128_long (Qh, Qh1, (128 - amount));
1734 Tmp.w[0] = 1;
1735 Tmp.w[1] = 0;
1736 __shl_128_long (Tmp1, Tmp, amount);
1737 Qh.w[0] += carry;
1738 if (Qh.w[0] < carry)
1739 Qh.w[1]++;
1740 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1741 status = EXACT_STATUS;
1742 }
1743
1744 if (status != EXACT_STATUS)
1745 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1746 }
1747
1748 #endif
1749
1750 pres->w[1] = sgn | CQ.w[1];
1751 pres->w[0] = CQ.w[0];
1752
1753 return pres;
1754
1755 }
1756
1757
1758
1759 //
1760 // BID128 unpack, input passed by value
1761 //
1762 __BID_INLINE__ UINT64
unpack_BID128_value(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 x)1763 unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
1764 UINT128 * pcoefficient_x, UINT128 x) {
1765 UINT128 coeff, T33, T34;
1766 UINT64 ex;
1767
1768 *psign_x = (x.w[1]) & 0x8000000000000000ull;
1769
1770 // special encodings
1771 if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1772 if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1773 // non-canonical input
1774 pcoefficient_x->w[0] = 0;
1775 pcoefficient_x->w[1] = 0;
1776 ex = (x.w[1]) >> 47;
1777 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1778 return 0;
1779 }
1780 // 10^33
1781 T33 = power10_table_128[33];
1782 /*coeff.w[0] = x.w[0];
1783 coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1784 pcoefficient_x->w[0] = x.w[0];
1785 pcoefficient_x->w[1] = x.w[1];
1786 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1787 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1788
1789 pcoefficient_x->w[0] = x.w[0];
1790 pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1791 if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33)) // non-canonical
1792 {
1793 pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1794 pcoefficient_x->w[0] = 0;
1795 } else
1796 pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1797 if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1798 pcoefficient_x->w[0] = 0;
1799 pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1800 }
1801 *pexponent_x = 0;
1802 return 0; // NaN or Infinity
1803 }
1804
1805 coeff.w[0] = x.w[0];
1806 coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1807
1808 // 10^34
1809 T34 = power10_table_128[34];
1810 // check for non-canonical values
1811 if (__unsigned_compare_ge_128 (coeff, T34))
1812 coeff.w[0] = coeff.w[1] = 0;
1813
1814 pcoefficient_x->w[0] = coeff.w[0];
1815 pcoefficient_x->w[1] = coeff.w[1];
1816
1817 ex = (x.w[1]) >> 49;
1818 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1819
1820 return coeff.w[0] | coeff.w[1];
1821 }
1822
1823
1824 //
1825 // BID128 unpack, input pased by reference
1826 //
1827 __BID_INLINE__ UINT64
unpack_BID128(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 * px)1828 unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1829 UINT128 * pcoefficient_x, UINT128 * px) {
1830 UINT128 coeff, T33, T34;
1831 UINT64 ex;
1832
1833 *psign_x = (px->w[1]) & 0x8000000000000000ull;
1834
1835 // special encodings
1836 if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1837 if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1838 // non-canonical input
1839 pcoefficient_x->w[0] = 0;
1840 pcoefficient_x->w[1] = 0;
1841 ex = (px->w[1]) >> 47;
1842 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1843 return 0;
1844 }
1845 // 10^33
1846 T33 = power10_table_128[33];
1847 coeff.w[0] = px->w[0];
1848 coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1849 pcoefficient_x->w[0] = px->w[0];
1850 pcoefficient_x->w[1] = px->w[1];
1851 if (__unsigned_compare_ge_128 (coeff, T33)) { // non-canonical
1852 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1853 pcoefficient_x->w[0] = 0;
1854 }
1855 *pexponent_x = 0;
1856 return 0; // NaN or Infinity
1857 }
1858
1859 coeff.w[0] = px->w[0];
1860 coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1861
1862 // 10^34
1863 T34 = power10_table_128[34];
1864 // check for non-canonical values
1865 if (__unsigned_compare_ge_128 (coeff, T34))
1866 coeff.w[0] = coeff.w[1] = 0;
1867
1868 pcoefficient_x->w[0] = coeff.w[0];
1869 pcoefficient_x->w[1] = coeff.w[1];
1870
1871 ex = (px->w[1]) >> 49;
1872 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1873
1874 return coeff.w[0] | coeff.w[1];
1875 }
1876
1877 //
1878 // Pack macro checks for overflow, but not underflow
1879 //
1880 __BID_INLINE__ UINT128 *
get_BID128_very_fast_OF(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1881 get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1882 UINT128 coeff, unsigned *prounding_mode,
1883 unsigned *fpsc) {
1884 UINT128 T;
1885 UINT64 tmp, tmp2;
1886
1887 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1888
1889 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1890 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1891 while (__unsigned_compare_gt_128 (T, coeff)
1892 && expon > DECIMAL_MAX_EXPON_128) {
1893 coeff.w[1] =
1894 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1895 (coeff.w[0] >> 63);
1896 tmp2 = coeff.w[0] << 3;
1897 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1898 if (coeff.w[0] < tmp2)
1899 coeff.w[1]++;
1900
1901 expon--;
1902 }
1903 }
1904 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1905 // OF
1906 #ifdef SET_STATUS_FLAGS
1907 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1908 #endif
1909 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1910 #ifndef IEEE_ROUND_NEAREST
1911 if (*prounding_mode == ROUNDING_TO_ZERO
1912 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1913 &&
1914 *prounding_mode
1915 ==
1916 ROUNDING_DOWN))
1917 {
1918 pres->w[1] = sgn | LARGEST_BID128_HIGH;
1919 pres->w[0] = LARGEST_BID128_LOW;
1920 } else
1921 #endif
1922 #endif
1923 {
1924 pres->w[1] = sgn | INFINITY_MASK64;
1925 pres->w[0] = 0;
1926 }
1927 return pres;
1928 }
1929 }
1930
1931 pres->w[0] = coeff.w[0];
1932 tmp = expon;
1933 tmp <<= 49;
1934 pres->w[1] = sgn | tmp | coeff.w[1];
1935
1936 return pres;
1937 }
1938
1939
1940 //
1941 // No overflow/underflow checks
1942 // No checking for coefficient == 10^34 (rounding artifact)
1943 //
1944 __BID_INLINE__ UINT128 *
get_BID128_very_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1945 get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1946 UINT128 coeff) {
1947 UINT64 tmp;
1948
1949 pres->w[0] = coeff.w[0];
1950 tmp = expon;
1951 tmp <<= 49;
1952 pres->w[1] = sgn | tmp | coeff.w[1];
1953
1954 return pres;
1955 }
1956
1957 //
1958 // No overflow/underflow checks
1959 //
1960 __BID_INLINE__ UINT128 *
get_BID128_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1961 get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1962 UINT64 tmp;
1963
1964 // coeff==10^34?
1965 if (coeff.w[1] == 0x0001ed09bead87c0ull
1966 && coeff.w[0] == 0x378d8e6400000000ull) {
1967 expon++;
1968 // set coefficient to 10^33
1969 coeff.w[1] = 0x0000314dc6448d93ull;
1970 coeff.w[0] = 0x38c15b0a00000000ull;
1971 }
1972
1973 pres->w[0] = coeff.w[0];
1974 tmp = expon;
1975 tmp <<= 49;
1976 pres->w[1] = sgn | tmp | coeff.w[1];
1977
1978 return pres;
1979 }
1980
1981 //
1982 // General BID128 pack macro
1983 //
1984 __BID_INLINE__ UINT128 *
get_BID128(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1985 get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1986 unsigned *prounding_mode, unsigned *fpsc) {
1987 UINT128 T;
1988 UINT64 tmp, tmp2;
1989
1990 // coeff==10^34?
1991 if (coeff.w[1] == 0x0001ed09bead87c0ull
1992 && coeff.w[0] == 0x378d8e6400000000ull) {
1993 expon++;
1994 // set coefficient to 10^33
1995 coeff.w[1] = 0x0000314dc6448d93ull;
1996 coeff.w[0] = 0x38c15b0a00000000ull;
1997 }
1998 // check OF, UF
1999 if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
2000 // check UF
2001 if (expon < 0) {
2002 return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
2003 fpsc);
2004 }
2005
2006 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
2007 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
2008 while (__unsigned_compare_gt_128 (T, coeff)
2009 && expon > DECIMAL_MAX_EXPON_128) {
2010 coeff.w[1] =
2011 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2012 (coeff.w[0] >> 63);
2013 tmp2 = coeff.w[0] << 3;
2014 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2015 if (coeff.w[0] < tmp2)
2016 coeff.w[1]++;
2017
2018 expon--;
2019 }
2020 }
2021 if (expon > DECIMAL_MAX_EXPON_128) {
2022 if (!(coeff.w[1] | coeff.w[0])) {
2023 pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
2024 pres->w[0] = 0;
2025 return pres;
2026 }
2027 // OF
2028 #ifdef SET_STATUS_FLAGS
2029 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2030 #endif
2031 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2032 #ifndef IEEE_ROUND_NEAREST
2033 if (*prounding_mode == ROUNDING_TO_ZERO
2034 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2035 &&
2036 *prounding_mode
2037 ==
2038 ROUNDING_DOWN))
2039 {
2040 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2041 pres->w[0] = LARGEST_BID128_LOW;
2042 } else
2043 #endif
2044 #endif
2045 {
2046 pres->w[1] = sgn | INFINITY_MASK64;
2047 pres->w[0] = 0;
2048 }
2049 return pres;
2050 }
2051 }
2052
2053 pres->w[0] = coeff.w[0];
2054 tmp = expon;
2055 tmp <<= 49;
2056 pres->w[1] = sgn | tmp | coeff.w[1];
2057
2058 return pres;
2059 }
2060
2061
2062 //
2063 // Macro used for conversions from string
2064 // (no additional arguments given for rounding mode, status flags)
2065 //
2066 __BID_INLINE__ UINT128 *
get_BID128_string(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)2067 get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2068 UINT128 D2, D8;
2069 UINT64 tmp;
2070 unsigned rmode = 0, status;
2071
2072 // coeff==10^34?
2073 if (coeff.w[1] == 0x0001ed09bead87c0ull
2074 && coeff.w[0] == 0x378d8e6400000000ull) {
2075 expon++;
2076 // set coefficient to 10^33
2077 coeff.w[1] = 0x0000314dc6448d93ull;
2078 coeff.w[0] = 0x38c15b0a00000000ull;
2079 }
2080 // check OF, UF
2081 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2082 // check UF
2083 if (expon < 0)
2084 return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2085
2086 // OF
2087
2088 if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2089 while (expon > DECIMAL_MAX_EXPON_128 &&
2090 (coeff.w[1] < power10_table_128[33].w[1] ||
2091 (coeff.w[1] == power10_table_128[33].w[1]
2092 && coeff.w[0] < power10_table_128[33].w[0]))) {
2093 D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2094 D2.w[0] = coeff.w[0] << 1;
2095 D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2096 D8.w[0] = coeff.w[0] << 3;
2097
2098 __add_128_128 (coeff, D2, D8);
2099 expon--;
2100 }
2101 } else if (!(coeff.w[0] | coeff.w[1]))
2102 expon = DECIMAL_MAX_EXPON_128;
2103
2104 if (expon > DECIMAL_MAX_EXPON_128) {
2105 pres->w[1] = sgn | INFINITY_MASK64;
2106 pres->w[0] = 0;
2107 switch (rmode) {
2108 case ROUNDING_DOWN:
2109 if (!sgn) {
2110 pres->w[1] = LARGEST_BID128_HIGH;
2111 pres->w[0] = LARGEST_BID128_LOW;
2112 }
2113 break;
2114 case ROUNDING_TO_ZERO:
2115 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2116 pres->w[0] = LARGEST_BID128_LOW;
2117 break;
2118 case ROUNDING_UP:
2119 // round up
2120 if (sgn) {
2121 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2122 pres->w[0] = LARGEST_BID128_LOW;
2123 }
2124 break;
2125 }
2126
2127 return pres;
2128 }
2129 }
2130
2131 pres->w[0] = coeff.w[0];
2132 tmp = expon;
2133 tmp <<= 49;
2134 pres->w[1] = sgn | tmp | coeff.w[1];
2135
2136 return pres;
2137 }
2138
2139
2140
2141 /*****************************************************************************
2142 *
2143 * BID32 pack/unpack macros
2144 *
2145 *****************************************************************************/
2146
2147
2148 __BID_INLINE__ UINT32
unpack_BID32(UINT32 * psign_x,int * pexponent_x,UINT32 * pcoefficient_x,UINT32 x)2149 unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2150 UINT32 * pcoefficient_x, UINT32 x) {
2151 UINT32 tmp;
2152
2153 *psign_x = x & 0x80000000;
2154
2155 if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2156 // special encodings
2157 if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2158 *pcoefficient_x = x & 0xfe0fffff;
2159 if ((x & 0x000fffff) >= 1000000)
2160 *pcoefficient_x = x & 0xfe000000;
2161 if ((x & NAN_MASK32) == INFINITY_MASK32)
2162 *pcoefficient_x = x & 0xf8000000;
2163 *pexponent_x = 0;
2164 return 0; // NaN or Infinity
2165 }
2166 // coefficient
2167 *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2168 // check for non-canonical value
2169 if (*pcoefficient_x >= 10000000)
2170 *pcoefficient_x = 0;
2171 // get exponent
2172 tmp = x >> 21;
2173 *pexponent_x = tmp & EXPONENT_MASK32;
2174 return 1;
2175 }
2176 // exponent
2177 tmp = x >> 23;
2178 *pexponent_x = tmp & EXPONENT_MASK32;
2179 // coefficient
2180 *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2181
2182 return *pcoefficient_x;
2183 }
2184
2185 //
2186 // General pack macro for BID32
2187 //
2188 __BID_INLINE__ UINT32
get_BID32(UINT32 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)2189 get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2190 unsigned *fpsc) {
2191 UINT128 Q;
2192 UINT64 C64, remainder_h, carry, Stemp;
2193 UINT32 r, mask;
2194 int extra_digits, amount, amount2;
2195 unsigned status;
2196
2197 if (coeff > 9999999ull) {
2198 expon++;
2199 coeff = 1000000ull;
2200 }
2201 // check for possible underflow/overflow
2202 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2203 if (expon < 0) {
2204 // underflow
2205 if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2206 #ifdef SET_STATUS_FLAGS
2207 __set_status_flags (fpsc,
2208 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2209 #endif
2210 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2211 #ifndef IEEE_ROUND_NEAREST
2212 if (rmode == ROUNDING_DOWN && sgn)
2213 return 0x80000001;
2214 if (rmode == ROUNDING_UP && !sgn)
2215 return 1;
2216 #endif
2217 #endif
2218 // result is 0
2219 return sgn;
2220 }
2221 // get digits to be shifted out
2222 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2223 rmode = 0;
2224 #endif
2225 #ifdef IEEE_ROUND_NEAREST
2226 rmode = 0;
2227 #endif
2228 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2229 #ifndef IEEE_ROUND_NEAREST
2230 if (sgn && (unsigned) (rmode - 1) < 2)
2231 rmode = 3 - rmode;
2232 #endif
2233 #endif
2234
2235 extra_digits = -expon;
2236 coeff += round_const_table[rmode][extra_digits];
2237
2238 // get coeff*(2^M[extra_digits])/10^extra_digits
2239 __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
2240
2241 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2242 amount = short_recip_scale[extra_digits];
2243
2244 C64 = Q.w[1] >> amount;
2245
2246 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2247 #ifndef IEEE_ROUND_NEAREST
2248 if (rmode == 0) //ROUNDING_TO_NEAREST
2249 #endif
2250 if (C64 & 1) {
2251 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2252
2253 // get remainder
2254 amount2 = 64 - amount;
2255 remainder_h = 0;
2256 remainder_h--;
2257 remainder_h >>= amount2;
2258 remainder_h = remainder_h & Q.w[1];
2259
2260 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
2261 C64--;
2262 }
2263 }
2264 #endif
2265
2266 #ifdef SET_STATUS_FLAGS
2267
2268 if (is_inexact (fpsc))
2269 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2270 else {
2271 status = INEXACT_EXCEPTION;
2272 // get remainder
2273 remainder_h = Q.w[1] << (64 - amount);
2274
2275 switch (rmode) {
2276 case ROUNDING_TO_NEAREST:
2277 case ROUNDING_TIES_AWAY:
2278 // test whether fractional part is 0
2279 if (remainder_h == 0x8000000000000000ull
2280 && (Q.w[0] < reciprocals10_64[extra_digits]))
2281 status = EXACT_STATUS;
2282 break;
2283 case ROUNDING_DOWN:
2284 case ROUNDING_TO_ZERO:
2285 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
2286 status = EXACT_STATUS;
2287 break;
2288 default:
2289 // round up
2290 __add_carry_out (Stemp, carry, Q.w[0],
2291 reciprocals10_64[extra_digits]);
2292 if ((remainder_h >> (64 - amount)) + carry >=
2293 (((UINT64) 1) << amount))
2294 status = EXACT_STATUS;
2295 }
2296
2297 if (status != EXACT_STATUS)
2298 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2299 }
2300
2301 #endif
2302
2303 return sgn | (UINT32) C64;
2304 }
2305
2306 while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2307 coeff = (coeff << 3) + (coeff << 1);
2308 expon--;
2309 }
2310 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2311 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2312 // overflow
2313 r = sgn | INFINITY_MASK32;
2314 switch (rmode) {
2315 case ROUNDING_DOWN:
2316 if (!sgn)
2317 r = LARGEST_BID32;
2318 break;
2319 case ROUNDING_TO_ZERO:
2320 r = sgn | LARGEST_BID32;
2321 break;
2322 case ROUNDING_UP:
2323 // round up
2324 if (sgn)
2325 r = sgn | LARGEST_BID32;
2326 }
2327 return r;
2328 }
2329 }
2330
2331 mask = 1 << 23;
2332
2333 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2334 if (coeff < mask) {
2335 r = expon;
2336 r <<= 23;
2337 r |= ((UINT32) coeff | sgn);
2338 return r;
2339 }
2340 // special format
2341
2342 r = expon;
2343 r <<= 21;
2344 r |= (sgn | SPECIAL_ENCODING_MASK32);
2345 // add coeff, without leading bits
2346 mask = (1 << 21) - 1;
2347 r |= (((UINT32) coeff) & mask);
2348
2349 return r;
2350 }
2351
2352
2353
2354 //
2355 // no overflow/underflow checks
2356 //
2357 __BID_INLINE__ UINT32
very_fast_get_BID32(UINT32 sgn,int expon,UINT32 coeff)2358 very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2359 UINT32 r, mask;
2360
2361 mask = 1 << 23;
2362
2363 // check whether coefficient fits in 10*2+3 bits
2364 if (coeff < mask) {
2365 r = expon;
2366 r <<= 23;
2367 r |= (coeff | sgn);
2368 return r;
2369 }
2370 // special format
2371 r = expon;
2372 r <<= 21;
2373 r |= (sgn | SPECIAL_ENCODING_MASK32);
2374 // add coeff, without leading bits
2375 mask = (1 << 21) - 1;
2376 coeff &= mask;
2377 r |= coeff;
2378
2379 return r;
2380 }
2381
2382
2383
2384 /*************************************************************
2385 *
2386 *************************************************************/
2387 typedef
2388 ALIGN (16)
2389 struct {
2390 UINT64 w[6];
2391 } UINT384;
2392 typedef ALIGN (16)
2393 struct {
2394 UINT64 w[8];
2395 } UINT512;
2396
2397 // #define P 34
2398 #define MASK_STEERING_BITS 0x6000000000000000ull
2399 #define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
2400 #define MASK_BINARY_SIG1 0x001fffffffffffffull
2401 #define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
2402 //used to take G[2:w+3] (sec 3.3)
2403 #define MASK_BINARY_SIG2 0x0007ffffffffffffull
2404 //used to mask out G4:T0 (sec 3.3)
2405 #define MASK_BINARY_OR2 0x0020000000000000ull
2406 //used to prefix 8+G4 to T (sec 3.3)
2407 #define UPPER_EXPON_LIMIT 51
2408 #define MASK_EXP 0x7ffe000000000000ull
2409 #define MASK_SPECIAL 0x7800000000000000ull
2410 #define MASK_NAN 0x7c00000000000000ull
2411 #define MASK_SNAN 0x7e00000000000000ull
2412 #define MASK_ANY_INF 0x7c00000000000000ull
2413 #define MASK_INF 0x7800000000000000ull
2414 #define MASK_SIGN 0x8000000000000000ull
2415 #define MASK_COEFF 0x0001ffffffffffffull
2416 #define BIN_EXP_BIAS (0x1820ull << 49)
2417
2418 #define EXP_MIN 0x0000000000000000ull
2419 // EXP_MIN = (-6176 + 6176) << 49
2420 #define EXP_MAX 0x5ffe000000000000ull
2421 // EXP_MAX = (6111 + 6176) << 49
2422 #define EXP_MAX_P1 0x6000000000000000ull
2423 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2424 #define EXP_P1 0x0002000000000000ull
2425 // EXP_ P1= 1 << 49
2426 #define expmin -6176
2427 // min unbiased exponent
2428 #define expmax 6111
2429 // max unbiased exponent
2430 #define expmin16 -398
2431 // min unbiased exponent
2432 #define expmax16 369
2433 // max unbiased exponent
2434
2435 #define SIGNMASK32 0x80000000
2436 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2437 #define SIGNMASK64 0x8000000000000000ull
2438
2439 // typedef unsigned int FPSC; // floating-point status and control
2440 // bit31:
2441 // bit30:
2442 // bit29:
2443 // bit28:
2444 // bit27:
2445 // bit26:
2446 // bit25:
2447 // bit24:
2448 // bit23:
2449 // bit22:
2450 // bit21:
2451 // bit20:
2452 // bit19:
2453 // bit18:
2454 // bit17:
2455 // bit16:
2456 // bit15:
2457 // bit14: RC:2
2458 // bit13: RC:1
2459 // bit12: RC:0
2460 // bit11: PM
2461 // bit10: UM
2462 // bit9: OM
2463 // bit8: ZM
2464 // bit7: DM
2465 // bit6: IM
2466 // bit5: PE
2467 // bit4: UE
2468 // bit3: OE
2469 // bit2: ZE
2470 // bit1: DE
2471 // bit0: IE
2472
2473 #define ROUNDING_MODE_MASK 0x00007000
2474
2475 typedef struct _DEC_DIGITS {
2476 unsigned int digits;
2477 UINT64 threshold_hi;
2478 UINT64 threshold_lo;
2479 unsigned int digits1;
2480 } DEC_DIGITS;
2481
2482 extern DEC_DIGITS nr_digits[];
2483 extern UINT64 midpoint64[];
2484 extern UINT128 midpoint128[];
2485 extern UINT192 midpoint192[];
2486 extern UINT256 midpoint256[];
2487 extern UINT64 ten2k64[];
2488 extern UINT128 ten2k128[];
2489 extern UINT256 ten2k256[];
2490 extern UINT128 ten2mk128[];
2491 extern UINT64 ten2mk64[];
2492 extern UINT128 ten2mk128trunc[];
2493 extern int shiftright128[];
2494 extern UINT64 maskhigh128[];
2495 extern UINT64 maskhigh128M[];
2496 extern UINT64 maskhigh192M[];
2497 extern UINT64 maskhigh256M[];
2498 extern UINT64 onehalf128[];
2499 extern UINT64 onehalf128M[];
2500 extern UINT64 onehalf192M[];
2501 extern UINT64 onehalf256M[];
2502 extern UINT128 ten2mk128M[];
2503 extern UINT128 ten2mk128truncM[];
2504 extern UINT192 ten2mk192truncM[];
2505 extern UINT256 ten2mk256truncM[];
2506 extern int shiftright128M[];
2507 extern int shiftright192M[];
2508 extern int shiftright256M[];
2509 extern UINT192 ten2mk192M[];
2510 extern UINT256 ten2mk256M[];
2511 extern unsigned char char_table2[];
2512 extern unsigned char char_table3[];
2513
2514 extern UINT64 ten2m3k64[];
2515 extern unsigned int shift_ten2m3k64[];
2516 extern UINT128 ten2m3k128[];
2517 extern unsigned int shift_ten2m3k128[];
2518
2519
2520
2521 /***************************************************************************
2522 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2523 ***************************************************************************/
2524
2525 extern UINT64 Kx64[];
2526 extern unsigned int Ex64m64[];
2527 extern UINT64 half64[];
2528 extern UINT64 mask64[];
2529 extern UINT64 ten2mxtrunc64[];
2530
2531 extern UINT128 Kx128[];
2532 extern unsigned int Ex128m128[];
2533 extern UINT64 half128[];
2534 extern UINT64 mask128[];
2535 extern UINT128 ten2mxtrunc128[];
2536
2537 extern UINT192 Kx192[];
2538 extern unsigned int Ex192m192[];
2539 extern UINT64 half192[];
2540 extern UINT64 mask192[];
2541 extern UINT192 ten2mxtrunc192[];
2542
2543 extern UINT256 Kx256[];
2544 extern unsigned int Ex256m256[];
2545 extern UINT64 half256[];
2546 extern UINT64 mask256[];
2547 extern UINT256 ten2mxtrunc256[];
2548
2549 typedef union __bid64_128 {
2550 UINT64 b64;
2551 UINT128 b128;
2552 } BID64_128;
2553
2554 BID64_128 bid_fma (unsigned int P0,
2555 BID64_128 x1, unsigned int P1,
2556 BID64_128 y1, unsigned int P2,
2557 BID64_128 z1, unsigned int P3,
2558 unsigned int rnd_mode, FPSC * fpsc);
2559
2560 #define P16 16
2561 #define P34 34
2562
2563 union __int_double {
2564 UINT64 i;
2565 double d;
2566 };
2567 typedef union __int_double int_double;
2568
2569
2570 union __int_float {
2571 UINT32 i;
2572 float d;
2573 };
2574 typedef union __int_float int_float;
2575
2576 #define SWAP(A,B,T) {\
2577 T = A; \
2578 A = B; \
2579 B = T; \
2580 }
2581
2582 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2583 // ie it knows that it is A bits long
2584 #define NUMBITS(A, coefficient_x, tempx){\
2585 temp_x.d=(float)coefficient_x;\
2586 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2587 }
2588
2589 enum class_types {
2590 signalingNaN,
2591 quietNaN,
2592 negativeInfinity,
2593 negativeNormal,
2594 negativeSubnormal,
2595 negativeZero,
2596 positiveZero,
2597 positiveSubnormal,
2598 positiveNormal,
2599 positiveInfinity
2600 };
2601
2602 typedef union {
2603 UINT64 ui64;
2604 double d;
2605 } BID_UI64DOUBLE;
2606
2607 #endif
2608