1 /* Copyright (C) 2007-2020 Free Software Foundation, Inc.
2 
3 This file is part of GCC.
4 
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9 
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 for more details.
14 
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18 
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22 <http://www.gnu.org/licenses/>.  */
23 
24 #ifndef __BIDECIMAL_H
25 #define __BIDECIMAL_H
26 
27 #include "bid_conf.h"
28 #include "bid_functions.h"
29 
30 #define __BID_INLINE__ static __inline
31 
32 /*********************************************************************
33  *
34  *      Logical Shift Macros
35  *
36  *********************************************************************/
37 
38 #define __shr_128(Q, A, k)                        \
39 {                                                 \
40      (Q).w[0] = (A).w[0] >> k;                      \
41 	 (Q).w[0] |= (A).w[1] << (64-k);               \
42 	 (Q).w[1] = (A).w[1] >> k;                    \
43 }
44 
45 #define __shr_128_long(Q, A, k)                   \
46 {                                                 \
47 	if((k)<64) {                                  \
48      (Q).w[0] = (A).w[0] >> k;                    \
49 	 (Q).w[0] |= (A).w[1] << (64-k);              \
50 	 (Q).w[1] = (A).w[1] >> k;                    \
51 	}                                             \
52 	else {                                        \
53 	 (Q).w[0] = (A).w[1]>>((k)-64);               \
54 	 (Q).w[1] = 0;                                \
55 	}                                             \
56 }
57 
58 #define __shl_128_long(Q, A, k)                   \
59 {                                                 \
60 	if((k)<64) {                                  \
61      (Q).w[1] = (A).w[1] << k;                    \
62 	 (Q).w[1] |= (A).w[0] >> (64-k);              \
63 	 (Q).w[0] = (A).w[0] << k;                    \
64 	}                                             \
65 	else {                                        \
66 	 (Q).w[1] = (A).w[0]<<((k)-64);               \
67 	 (Q).w[0] = 0;                                \
68 	}                                             \
69 }
70 
71 #define __low_64(Q)  (Q).w[0]
72 /*********************************************************************
73  *
74  *      String Macros
75  *
76  *********************************************************************/
77 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
78 /*********************************************************************
79  *
80  *      Compare Macros
81  *
82  *********************************************************************/
83 // greater than
84 //  return 0 if A<=B
85 //  non-zero if A>B
86 #define __unsigned_compare_gt_128(A, B)  \
87     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
88 // greater-or-equal
89 #define __unsigned_compare_ge_128(A, B)  \
90     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
91 #define __test_equal_128(A, B)  (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
92 // tighten exponent range
93 #define __tight_bin_range_128(bp, P, bin_expon)  \
94 {                                                \
95 UINT64 M;                                        \
96 	M = 1;                                       \
97 	(bp) = (bin_expon);                          \
98 	if((bp)<63) {                                \
99 	  M <<= ((bp)+1);                            \
100 	  if((P).w[0] >= M) (bp)++; }                 \
101 	else if((bp)>64) {                           \
102 	  M <<= ((bp)+1-64);                         \
103 	  if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
104 	      (bp)++; }                              \
105 	else if((P).w[1]) (bp)++;                    \
106 }
107 /*********************************************************************
108  *
109  *      Add/Subtract Macros
110  *
111  *********************************************************************/
112 // add 64-bit value to 128-bit
113 #define __add_128_64(R128, A128, B64)    \
114 {                                        \
115 UINT64 R64H;                             \
116 	R64H = (A128).w[1];                 \
117 	(R128).w[0] = (B64) + (A128).w[0];     \
118 	if((R128).w[0] < (B64))               \
119 	  R64H ++;                           \
120     (R128).w[1] = R64H;                  \
121 }
122 // subtract 64-bit value from 128-bit
123 #define __sub_128_64(R128, A128, B64)    \
124 {                                        \
125 UINT64 R64H;                             \
126 	R64H = (A128).w[1];                  \
127 	if((A128).w[0] < (B64))               \
128 	  R64H --;                           \
129     (R128).w[1] = R64H;                  \
130 	(R128).w[0] = (A128).w[0] - (B64);     \
131 }
132 // add 128-bit value to 128-bit
133 // assume no carry-out
134 #define __add_128_128(R128, A128, B128)  \
135 {                                        \
136 UINT128 Q128;                            \
137 	Q128.w[1] = (A128).w[1]+(B128).w[1]; \
138 	Q128.w[0] = (B128).w[0] + (A128).w[0];  \
139 	if(Q128.w[0] < (B128).w[0])            \
140 	  Q128.w[1] ++;                      \
141     (R128).w[1] = Q128.w[1];             \
142     (R128).w[0] = Q128.w[0];               \
143 }
144 #define __sub_128_128(R128, A128, B128)  \
145 {                                        \
146 UINT128 Q128;                            \
147 	Q128.w[1] = (A128).w[1]-(B128).w[1]; \
148 	Q128.w[0] = (A128).w[0] - (B128).w[0];  \
149 	if((A128).w[0] < (B128).w[0])          \
150 	  Q128.w[1] --;                      \
151     (R128).w[1] = Q128.w[1];             \
152     (R128).w[0] = Q128.w[0];               \
153 }
154 #define __add_carry_out(S, CY, X, Y)    \
155 {                                      \
156 UINT64 X1=X;                           \
157 	S = X + Y;                         \
158 	CY = (S<X1) ? 1 : 0;                \
159 }
160 #define __add_carry_in_out(S, CY, X, Y, CI)    \
161 {                                             \
162 UINT64 X1;                                    \
163 	X1 = X + CI;                              \
164 	S = X1 + Y;                               \
165 	CY = ((S<X1) || (X1<CI)) ? 1 : 0;          \
166 }
167 #define __sub_borrow_out(S, CY, X, Y)    \
168 {                                      \
169 UINT64 X1=X;                           \
170 	S = X - Y;                         \
171 	CY = (S>X1) ? 1 : 0;                \
172 }
173 #define __sub_borrow_in_out(S, CY, X, Y, CI)    \
174 {                                             \
175 UINT64 X1, X0=X;                              \
176 	X1 = X - CI;                              \
177 	S = X1 - Y;                               \
178 	CY = ((S>X1) || (X1>X0)) ? 1 : 0;          \
179 }
180 // increment C128 and check for rounding overflow:
181 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
182 #define INCREMENT(C_128, exp)                                           \
183 {                                                                       \
184   C_128.w[0]++;                                                         \
185   if (C_128.w[0] == 0) C_128.w[1]++;                                    \
186   if (C_128.w[1] == 0x0001ed09bead87c0ull &&                            \
187       C_128.w[0] == 0x378d8e6400000000ull) {                            \
188     exp++;                                                              \
189     C_128.w[1] = 0x0000314dc6448d93ull;                                 \
190     C_128.w[0] = 0x38c15b0a00000000ull;                                 \
191   }                                                                     \
192 }
193 // decrement C128 and check for rounding underflow, but only at the
194 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
195 // and decrement the exponent
196 #define DECREMENT(C_128, exp)                                           \
197 {                                                                       \
198   C_128.w[0]--;                                                         \
199   if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--;                \
200   if (C_128.w[1] == 0x0000314dc6448d93ull &&                            \
201       C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) {                 \
202     exp--;                                                              \
203     C_128.w[1] = 0x0001ed09bead87c0ull;                                 \
204     C_128.w[0] = 0x378d8e63ffffffffull;                                 \
205   }                                                                     \
206 }
207 
208  /*********************************************************************
209  *
210  *      Multiply Macros
211  *
212  *********************************************************************/
213 #define __mul_64x64_to_64(P64, CX, CY)  (P64) = (CX) * (CY)
214 /***************************************
215  *  Signed, Full 64x64-bit Multiply
216  ***************************************/
217 #define __imul_64x64_to_128(P, CX, CY)  \
218 {                                       \
219 UINT64 SX, SY;                          \
220    __mul_64x64_to_128(P, CX, CY);       \
221                                         \
222    SX = ((SINT64)(CX))>>63;             \
223    SY = ((SINT64)(CY))>>63;             \
224    SX &= CY;   SY &= CX;                \
225                                         \
226    (P).w[1] = (P).w[1] - SX - SY;       \
227 }
228 /***************************************
229  *  Signed, Full 64x128-bit Multiply
230  ***************************************/
231 #define __imul_64x128_full(Ph, Ql, A, B)          \
232 {                                                 \
233 UINT128 ALBL, ALBH, QM2, QM;                      \
234                                                   \
235 	__imul_64x64_to_128(ALBH, (A), (B).w[1]);     \
236 	__imul_64x64_to_128(ALBL, (A), (B).w[0]);      \
237                                                   \
238 	(Ql).w[0] = ALBL.w[0];                          \
239 	QM.w[0] = ALBL.w[1];                           \
240 	QM.w[1] = ((SINT64)ALBL.w[1])>>63;            \
241     __add_128_128(QM2, ALBH, QM);                 \
242 	(Ql).w[1] = QM2.w[0];                          \
243     Ph = QM2.w[1];                                \
244 }
245 /*****************************************************
246  *      Unsigned Multiply Macros
247  *****************************************************/
248 // get full 64x64bit product
249 //
250 #define __mul_64x64_to_128(P, CX, CY)   \
251 {                                       \
252 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
253 	CXH = (CX) >> 32;                     \
254 	CXL = (UINT32)(CX);                   \
255 	CYH = (CY) >> 32;                     \
256 	CYL = (UINT32)(CY);                   \
257 	                                      \
258     PM = CXH*CYL;                         \
259 	PH = CXH*CYH;                         \
260 	PL = CXL*CYL;                         \
261 	PM2 = CXL*CYH;                        \
262 	PH += (PM>>32);                       \
263 	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
264                                           \
265 	(P).w[1] = PH + (PM>>32);             \
266 	(P).w[0] = (PM<<32)+(UINT32)PL;       \
267 }
268 // get full 64x64bit product
269 // Note:
270 // This macro is used for CX < 2^61, CY < 2^61
271 //
272 #define __mul_64x64_to_128_fast(P, CX, CY)   \
273 {                                       \
274 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
275 	CXH = (CX) >> 32;                   \
276 	CXL = (UINT32)(CX);                 \
277 	CYH = (CY) >> 32;                   \
278 	CYL = (UINT32)(CY);                 \
279 	                                    \
280     PM = CXH*CYL;                       \
281 	PL = CXL*CYL;                       \
282 	PH = CXH*CYH;                       \
283 	PM += CXL*CYH;                      \
284 	PM += (PL>>32);                     \
285                                         \
286 	(P).w[1] = PH + (PM>>32);           \
287 	(P).w[0] = (PM<<32)+(UINT32)PL;      \
288 }
289 // used for CX< 2^60
290 #define __sqr64_fast(P, CX)   \
291 {                                       \
292 UINT64 CXH, CXL, PL, PH, PM;            \
293 	CXH = (CX) >> 32;                   \
294 	CXL = (UINT32)(CX);                 \
295 	                                    \
296     PM = CXH*CXL;                       \
297 	PL = CXL*CXL;                       \
298 	PH = CXH*CXH;                       \
299 	PM += PM;                           \
300 	PM += (PL>>32);                     \
301                                         \
302 	(P).w[1] = PH + (PM>>32);           \
303 	(P).w[0] = (PM<<32)+(UINT32)PL;     \
304 }
305 // get full 64x64bit product
306 // Note:
307 // This implementation is used for CX < 2^61, CY < 2^61
308 //
309 #define __mul_64x64_to_64_high_fast(P, CX, CY)   \
310 {                                       \
311 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
312 	CXH = (CX) >> 32;                   \
313 	CXL = (UINT32)(CX);                 \
314 	CYH = (CY) >> 32;                   \
315 	CYL = (UINT32)(CY);                 \
316 	                                    \
317     PM = CXH*CYL;                       \
318 	PL = CXL*CYL;                       \
319 	PH = CXH*CYH;                       \
320 	PM += CXL*CYH;                      \
321 	PM += (PL>>32);                     \
322                                         \
323 	(P) = PH + (PM>>32);                \
324 }
325 // get full 64x64bit product
326 //
327 #define __mul_64x64_to_128_full(P, CX, CY)     \
328 {                                         \
329 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
330 	CXH = (CX) >> 32;                     \
331 	CXL = (UINT32)(CX);                   \
332 	CYH = (CY) >> 32;                     \
333 	CYL = (UINT32)(CY);                   \
334 	                                      \
335     PM = CXH*CYL;                         \
336 	PH = CXH*CYH;                         \
337 	PL = CXL*CYL;                         \
338 	PM2 = CXL*CYH;                        \
339 	PH += (PM>>32);                       \
340 	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
341                                           \
342 	(P).w[1] = PH + (PM>>32);             \
343 	(P).w[0] = (PM<<32)+(UINT32)PL;        \
344 }
345 #define __mul_128x128_high(Q, A, B)               \
346 {                                                 \
347 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
348                                                   \
349 	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
350 	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
351 	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
352 	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
353                                                   \
354     __add_128_128(QM, ALBH, AHBL);                \
355     __add_128_64(QM2, QM, ALBL.w[1]);             \
356     __add_128_64((Q), AHBH, QM2.w[1]);            \
357 }
358 #define __mul_128x128_full(Qh, Ql, A, B)          \
359 {                                                 \
360 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
361                                                   \
362 	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
363 	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
364 	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
365 	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
366                                                   \
367     __add_128_128(QM, ALBH, AHBL);                \
368 	(Ql).w[0] = ALBL.w[0];                          \
369     __add_128_64(QM2, QM, ALBL.w[1]);             \
370     __add_128_64((Qh), AHBH, QM2.w[1]);           \
371 	(Ql).w[1] = QM2.w[0];                          \
372 }
373 #define __mul_128x128_low(Ql, A, B)               \
374 {                                                 \
375 UINT128 ALBL;                                     \
376 UINT64 QM64;                                      \
377                                                   \
378 	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
379 	QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1];   \
380                                                   \
381 	(Ql).w[0] = ALBL.w[0];                          \
382 	(Ql).w[1] = QM64 + ALBL.w[1];                 \
383 }
384 #define __mul_64x128_low(Ql, A, B)                \
385 {                                                 \
386   UINT128 ALBL, ALBH, QM2;                        \
387   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
388   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
389   (Ql).w[0] = ALBL.w[0];                          \
390   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
391   (Ql).w[1] = QM2.w[0];                           \
392 }
393 #define __mul_64x128_full(Ph, Ql, A, B)           \
394 {                                                 \
395 UINT128 ALBL, ALBH, QM2;                          \
396                                                   \
397 	__mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
398 	__mul_64x64_to_128(ALBL, (A), (B).w[0]);       \
399                                                   \
400 	(Ql).w[0] = ALBL.w[0];                          \
401     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
402 	(Ql).w[1] = QM2.w[0];                          \
403     Ph = QM2.w[1];                                \
404 }
405 #define __mul_64x128_to_192(Q, A, B)              \
406 {                                                 \
407 UINT128 ALBL, ALBH, QM2;                          \
408                                                   \
409 	__mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
410 	__mul_64x64_to_128(ALBL, (A), (B).w[0]);      \
411                                                   \
412 	(Q).w[0] = ALBL.w[0];                         \
413     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
414 	(Q).w[1] = QM2.w[0];                          \
415     (Q).w[2] = QM2.w[1];                          \
416 }
417 #define __mul_64x128_to192(Q, A, B)          \
418 {                                             \
419 UINT128 ALBL, ALBH, QM2;                      \
420                                               \
421     __mul_64x64_to_128(ALBH, (A), (B).w[1]);  \
422     __mul_64x64_to_128(ALBL, (A), (B).w[0]);  \
423                                               \
424     (Q).w[0] = ALBL.w[0];                    \
425     __add_128_64(QM2, ALBH, ALBL.w[1]);       \
426     (Q).w[1] = QM2.w[0];                     \
427     (Q).w[2] = QM2.w[1];                     \
428 }
429 #define __mul_128x128_to_256(P256, A, B)                         \
430 {                                                                \
431 UINT128 Qll, Qlh;                                                \
432 UINT64 Phl, Phh, CY1, CY2;                                         \
433                                                                  \
434    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
435    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
436   (P256).w[0] = Qll.w[0];                                        \
437 	   __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
438 	   __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);    \
439 	   (P256).w[3] = Phh + CY2;                                   \
440 }
441 //
442 // For better performance, will check A.w[1] against 0,
443 //                         but not B.w[1]
444 // Use this macro accordingly
445 #define __mul_128x128_to_256_check_A(P256, A, B)                   \
446 {                                                                  \
447 UINT128 Qll, Qlh;                                                  \
448 UINT64 Phl, Phh, CY1, CY2;                                           \
449                                                                    \
450    __mul_64x128_full(Phl, Qll, A.w[0], B);                          \
451   (P256).w[0] = Qll.w[0];                                        \
452    if(A.w[1])  {                                                   \
453 	   __mul_64x128_full(Phh, Qlh, A.w[1], B);                     \
454 	   __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
455 	   __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);   \
456 	   (P256).w[3] = Phh + CY2;   }                              \
457    else  {                                                         \
458 	   (P256).w[1] = Qll.w[1];                                  \
459 	   (P256).w[2] = Phl;                                       \
460 	   (P256).w[3] = 0;  }                                      \
461 }
462 #define __mul_64x192_to_256(lP, lA, lB)                      \
463 {                                                         \
464 UINT128 lP0,lP1,lP2;                                      \
465 UINT64 lC;                                                 \
466 	__mul_64x64_to_128(lP0, lA, (lB).w[0]);              \
467 	__mul_64x64_to_128(lP1, lA, (lB).w[1]);              \
468 	__mul_64x64_to_128(lP2, lA, (lB).w[2]);              \
469 	(lP).w[0] = lP0.w[0];                                \
470 	__add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]);      \
471 	__add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
472 	(lP).w[3] = lP2.w[1] + lC;                           \
473 }
474 #define __mul_64x256_to_320(P, A, B)                    \
475 {                                                       \
476 UINT128 lP0,lP1,lP2,lP3;                                \
477 UINT64 lC;                                               \
478 	__mul_64x64_to_128(lP0, A, (B).w[0]);             \
479 	__mul_64x64_to_128(lP1, A, (B).w[1]);             \
480 	__mul_64x64_to_128(lP2, A, (B).w[2]);             \
481 	__mul_64x64_to_128(lP3, A, (B).w[3]);             \
482 	(P).w[0] = lP0.w[0];                               \
483 	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
484 	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
485 	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
486 	(P).w[4] = lP3.w[1] + lC;                          \
487 }
488 #define __mul_192x192_to_384(P, A, B)                          \
489 {                                                              \
490 UINT256 P0,P1,P2;                                              \
491 UINT64 CY;                                                      \
492 	__mul_64x192_to_256(P0, (A).w[0], B);                   \
493 	__mul_64x192_to_256(P1, (A).w[1], B);                   \
494 	__mul_64x192_to_256(P2, (A).w[2], B);                   \
495 	(P).w[0] = P0.w[0];                                  \
496 	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
497 	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
498 	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
499 	(P).w[4] = P1.w[3] + CY;                              \
500 	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
501 	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
502 	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
503 	(P).w[5] = P2.w[3] + CY;                              \
504 }
505 #define __mul_64x320_to_384(P, A, B)                    \
506 {                                                       \
507 UINT128 lP0,lP1,lP2,lP3,lP4;                            \
508 UINT64 lC;                                               \
509 	__mul_64x64_to_128(lP0, A, (B).w[0]);             \
510 	__mul_64x64_to_128(lP1, A, (B).w[1]);             \
511 	__mul_64x64_to_128(lP2, A, (B).w[2]);             \
512 	__mul_64x64_to_128(lP3, A, (B).w[3]);             \
513 	__mul_64x64_to_128(lP4, A, (B).w[4]);             \
514 	(P).w[0] = lP0.w[0];                               \
515 	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
516 	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
517 	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
518 	__add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
519 	(P).w[5] = lP4.w[1] + lC;                          \
520 }
521 // A*A
522 // Full 128x128-bit product
523 #define __sqr128_to_256(P256, A)                                 \
524 {                                                                \
525 UINT128 Qll, Qlh, Qhh;                                           \
526 UINT64 TMP_C1, TMP_C2;                                 \
527                                                                  \
528    __mul_64x64_to_128(Qhh, A.w[1], A.w[1]);                      \
529    __mul_64x64_to_128(Qlh, A.w[0], A.w[1]);                      \
530    Qhh.w[1] += (Qlh.w[1]>>63);                                   \
531    Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63);                \
532    Qlh.w[0] += Qlh.w[0];                                         \
533    __mul_64x64_to_128(Qll, A.w[0], A.w[0]);                      \
534                                                                  \
535    __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]);      \
536    (P256).w[0] = Qll.w[0];                                       \
537    __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1);    \
538    (P256).w[3] = Qhh.w[1]+TMP_C2;                                         \
539 }
540 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B)            \
541 {                                                                \
542 UINT128 Qll, Qlh;                                                \
543 UINT64 Phl, Phh, C1, C2;                                         \
544                                                                  \
545    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
546    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
547   (PQl).w[0] = Qll.w[0];                                        \
548 	   __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]);      \
549 	   __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1);    \
550 	   (PQh).w[1] = Phh + C2;                                   \
551 }
552 #define __mul_256x256_to_512(P, A, B)                          \
553 {                                                              \
554 UINT512 P0,P1,P2,P3;                                           \
555 UINT64 CY;                                                      \
556 	__mul_64x256_to_320(P0, (A).w[0], B);                   \
557 	__mul_64x256_to_320(P1, (A).w[1], B);                   \
558 	__mul_64x256_to_320(P2, (A).w[2], B);                   \
559 	__mul_64x256_to_320(P3, (A).w[3], B);                   \
560 	(P).w[0] = P0.w[0];                                  \
561 	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
562 	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
563 	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
564 	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
565 	(P).w[5] = P1.w[4] + CY;                              \
566 	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
567 	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
568 	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
569 	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
570 	(P).w[6] = P2.w[4] + CY;                              \
571 	__add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]);     \
572 	__add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY);   \
573 	__add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY);   \
574 	__add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY);   \
575 	(P).w[7] = P3.w[4] + CY;                              \
576 }
577 #define __mul_192x256_to_448(P, A, B)                          \
578 {                                                              \
579 UINT512 P0,P1,P2;                                           \
580 UINT64 CY;                                                      \
581 	__mul_64x256_to_320(P0, (A).w[0], B);                   \
582 	__mul_64x256_to_320(P1, (A).w[1], B);                   \
583 	__mul_64x256_to_320(P2, (A).w[2], B);                   \
584 	(P).w[0] = P0.w[0];                                  \
585 	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
586 	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
587 	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
588 	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
589 	(P).w[5] = P1.w[4] + CY;                              \
590 	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
591 	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
592 	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
593 	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
594 	(P).w[6] = P2.w[4] + CY;                              \
595 }
596 #define __mul_320x320_to_640(P, A, B)                          \
597 {                                                              \
598 UINT512 P0,P1,P2,P3;                                           \
599 UINT64 CY;                                                     \
600 	__mul_256x256_to_512((P), (A), B);                   \
601 	__mul_64x256_to_320(P1, (A).w[4], B);                   \
602 	__mul_64x256_to_320(P2, (B).w[4], A);                   \
603 	__mul_64x64_to_128(P3, (A).w[4], (B).w[4]);               \
604 	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
605 	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
606 	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
607 	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
608 	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
609 	P3.w[1] += CY;                                       \
610 	__add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]);      \
611 	__add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
612 	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
613 	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
614 	__add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
615 	(P).w[9] = P3.w[1] + CY;                             \
616 }
617 #define __mul_384x384_to_768(P, A, B)                          \
618 {                                                              \
619 UINT512 P0,P1,P2,P3;                                           \
620 UINT64 CY;                                                     \
621 	__mul_320x320_to_640((P), (A), B);                         \
622 	__mul_64x320_to_384(P1, (A).w[5], B);                   \
623 	__mul_64x320_to_384(P2, (B).w[5], A);                   \
624 	__mul_64x64_to_128(P3, (A).w[5], (B).w[5]);               \
625 	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
626 	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
627 	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
628 	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
629 	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
630 	__add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
631 	P3.w[1] += CY;                                       \
632 	__add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]);      \
633 	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
634 	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
635 	__add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
636 	__add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
637 	__add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
638 	(P).w[11] = P3.w[1] + CY;                             \
639 }
640 #define __mul_64x128_short(Ql, A, B)              \
641 {                                                 \
642 UINT64 ALBH_L;                                    \
643                                                   \
644 	__mul_64x64_to_64(ALBH_L, (A),(B).w[1]);      \
645 	__mul_64x64_to_128((Ql), (A), (B).w[0]);       \
646                                                   \
647 	(Ql).w[1] += ALBH_L;                          \
648 }
649 #define __scale128_10(D,_TMP)                            \
650 {                                                        \
651 UINT128 _TMP2,_TMP8;                                     \
652 	  _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63);       \
653 	  _TMP2.w[0] = _TMP.w[0]<<1;                         \
654 	  _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61);       \
655 	  _TMP8.w[0] = _TMP.w[0]<<3;                         \
656 	  __add_128_128(D, _TMP2, _TMP8);                    \
657 }
658 // 64x64-bit product
659 #define __mul_64x64_to_128MACH(P128, CX64, CY64)  \
660 {                                                  \
661   UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
662   CXH = (CX64) >> 32;                              \
663   CXL = (UINT32)(CX64);                            \
664   CYH = (CY64) >> 32;                              \
665   CYL = (UINT32)(CY64);                            \
666   PM = CXH*CYL;                                    \
667   PH = CXH*CYH;                                    \
668   PL = CXL*CYL;                                    \
669   PM2 = CXL*CYH;                                   \
670   PH += (PM>>32);                                  \
671   PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
672   (P128).w[1] = PH + (PM>>32);                     \
673   (P128).w[0] = (PM<<32)+(UINT32)PL;                \
674 }
675 // 64x64-bit product
676 #define __mul_64x64_to_128HIGH(P64, CX64, CY64)  \
677 {                                                  \
678   UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
679   CXH = (CX64) >> 32;                              \
680   CXL = (UINT32)(CX64);                            \
681   CYH = (CY64) >> 32;                              \
682   CYL = (UINT32)(CY64);                            \
683   PM = CXH*CYL;                                    \
684   PH = CXH*CYH;                                    \
685   PL = CXL*CYL;                                    \
686   PM2 = CXL*CYH;                                   \
687   PH += (PM>>32);                                  \
688   PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
689   P64 = PH + (PM>>32);                     \
690 }
691 #define __mul_128x64_to_128(Q128, A64, B128)        \
692 {                                                  \
693   UINT64 ALBH_L;                                   \
694   ALBH_L = (A64) * (B128).w[1];                    \
695   __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]);   \
696   (Q128).w[1] += ALBH_L;                           \
697 }
698 // might simplify by calculating just QM2.w[0]
699 #define __mul_64x128_to_128(Ql, A, B)           \
700 {                                                 \
701   UINT128 ALBL, ALBH, QM2;                        \
702   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
703   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
704   (Ql).w[0] = ALBL.w[0];                          \
705   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
706   (Ql).w[1] = QM2.w[0];                           \
707 }
708 /*********************************************************************
709  *
710  *      BID Pack/Unpack Macros
711  *
712  *********************************************************************/
713 /////////////////////////////////////////
714 // BID64 definitions
715 ////////////////////////////////////////
716 #define DECIMAL_MAX_EXPON_64  767
717 #define DECIMAL_EXPONENT_BIAS 398
718 #define MAX_FORMAT_DIGITS     16
719 /////////////////////////////////////////
720 // BID128 definitions
721 ////////////////////////////////////////
722 #define DECIMAL_MAX_EXPON_128  12287
723 #define DECIMAL_EXPONENT_BIAS_128  6176
724 #define MAX_FORMAT_DIGITS_128      34
725 /////////////////////////////////////////
726 // BID32 definitions
727 ////////////////////////////////////////
728 #define DECIMAL_MAX_EXPON_32  191
729 #define DECIMAL_EXPONENT_BIAS_32  101
730 #define MAX_FORMAT_DIGITS_32      7
731 ////////////////////////////////////////
732 // Constant Definitions
733 ///////////////////////////////////////
734 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
735 #define INFINITY_MASK64         0x7800000000000000ull
736 #define SINFINITY_MASK64        0xf800000000000000ull
737 #define SSNAN_MASK64            0xfc00000000000000ull
738 #define NAN_MASK64              0x7c00000000000000ull
739 #define SNAN_MASK64             0x7e00000000000000ull
740 #define QUIET_MASK64            0xfdffffffffffffffull
741 #define LARGE_COEFF_MASK64      0x0007ffffffffffffull
742 #define LARGE_COEFF_HIGH_BIT64  0x0020000000000000ull
743 #define SMALL_COEFF_MASK64      0x001fffffffffffffull
744 #define EXPONENT_MASK64         0x3ff
745 #define EXPONENT_SHIFT_LARGE64  51
746 #define EXPONENT_SHIFT_SMALL64  53
747 #define LARGEST_BID64           0x77fb86f26fc0ffffull
748 #define SMALLEST_BID64          0xf7fb86f26fc0ffffull
749 #define SMALL_COEFF_MASK128     0x0001ffffffffffffull
750 #define LARGE_COEFF_MASK128     0x00007fffffffffffull
751 #define EXPONENT_MASK128        0x3fff
752 #define LARGEST_BID128_HIGH     0x5fffed09bead87c0ull
753 #define LARGEST_BID128_LOW      0x378d8e63ffffffffull
754 #define SPECIAL_ENCODING_MASK32 0x60000000ul
755 #define INFINITY_MASK32         0x78000000ul
756 #define LARGE_COEFF_MASK32      0x007ffffful
757 #define LARGE_COEFF_HIGH_BIT32  0x00800000ul
758 #define SMALL_COEFF_MASK32      0x001ffffful
759 #define EXPONENT_MASK32         0xff
760 #define LARGEST_BID32           0x77f8967f
761 #define NAN_MASK32              0x7c000000
762 #define SNAN_MASK32             0x7e000000
763 #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
764 #define BINARY_EXPONENT_BIAS  0x3ff
765 #define UPPER_EXPON_LIMIT     51
766 // data needed for BID pack/unpack macros
767 extern UINT64 round_const_table[][19];
768 extern UINT128 reciprocals10_128[];
769 extern int recip_scale[];
770 extern UINT128 power10_table_128[];
771 extern int estimate_decimal_digits[];
772 extern int estimate_bin_expon[];
773 extern UINT64 power10_index_binexp[];
774 extern int short_recip_scale[];
775 extern UINT64 reciprocals10_64[];
776 extern UINT128 power10_index_binexp_128[];
777 extern UINT128 round_const_table_128[][36];
778 
779 
780 //////////////////////////////////////////////
781 //  Status Flag Handling
782 /////////////////////////////////////////////
783 #define __set_status_flags(fpsc, status)  *(fpsc) |= status
784 #define is_inexact(fpsc)  ((*(fpsc))&INEXACT_EXCEPTION)
785 
786 __BID_INLINE__ UINT64
unpack_BID64(UINT64 * psign_x,int * pexponent_x,UINT64 * pcoefficient_x,UINT64 x)787 unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
788 	      UINT64 * pcoefficient_x, UINT64 x) {
789   UINT64 tmp, coeff;
790 
791   *psign_x = x & 0x8000000000000000ull;
792 
793   if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
794     // special encodings
795     // coefficient
796     coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
797 
798     if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
799       *pexponent_x = 0;
800       *pcoefficient_x = x & 0xfe03ffffffffffffull;
801       if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
802 	*pcoefficient_x = x & 0xfe00000000000000ull;
803       if ((x & NAN_MASK64) == INFINITY_MASK64)
804 	*pcoefficient_x = x & SINFINITY_MASK64;
805       return 0;	// NaN or Infinity
806     }
807     // check for non-canonical values
808     if (coeff >= 10000000000000000ull)
809       coeff = 0;
810     *pcoefficient_x = coeff;
811     // get exponent
812     tmp = x >> EXPONENT_SHIFT_LARGE64;
813     *pexponent_x = (int) (tmp & EXPONENT_MASK64);
814     return coeff;
815   }
816   // exponent
817   tmp = x >> EXPONENT_SHIFT_SMALL64;
818   *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819   // coefficient
820   *pcoefficient_x = (x & SMALL_COEFF_MASK64);
821 
822   return *pcoefficient_x;
823 }
824 
825 //
826 //   BID64 pack macro (general form)
827 //
828 __BID_INLINE__ UINT64
get_BID64(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)829 get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
830 	   unsigned *fpsc) {
831   UINT128 Stemp, Q_low;
832   UINT64 QH, r, mask, C64, remainder_h, CY, carry;
833   int extra_digits, amount, amount2;
834   unsigned status;
835 
836   if (coeff > 9999999999999999ull) {
837     expon++;
838     coeff = 1000000000000000ull;
839   }
840   // check for possible underflow/overflow
841   if (((unsigned) expon) >= 3 * 256) {
842     if (expon < 0) {
843       // underflow
844       if (expon + MAX_FORMAT_DIGITS < 0) {
845 #ifdef SET_STATUS_FLAGS
846 	__set_status_flags (fpsc,
847 			    UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
848 #endif
849 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850 #ifndef IEEE_ROUND_NEAREST
851 	if (rmode == ROUNDING_DOWN && sgn)
852 	  return 0x8000000000000001ull;
853 	if (rmode == ROUNDING_UP && !sgn)
854 	  return 1ull;
855 #endif
856 #endif
857 	// result is 0
858 	return sgn;
859       }
860 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861 #ifndef IEEE_ROUND_NEAREST
862       if (sgn && (unsigned) (rmode - 1) < 2)
863 	rmode = 3 - rmode;
864 #endif
865 #endif
866       // get digits to be shifted out
867       extra_digits = -expon;
868       coeff += round_const_table[rmode][extra_digits];
869 
870       // get coeff*(2^M[extra_digits])/10^extra_digits
871       __mul_64x128_full (QH, Q_low, coeff,
872 			 reciprocals10_128[extra_digits]);
873 
874       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
875       amount = recip_scale[extra_digits];
876 
877       C64 = QH >> amount;
878 
879 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880 #ifndef IEEE_ROUND_NEAREST
881       if (rmode == 0)	//ROUNDING_TO_NEAREST
882 #endif
883 	if (C64 & 1) {
884 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
885 
886 	  // get remainder
887 	  amount2 = 64 - amount;
888 	  remainder_h = 0;
889 	  remainder_h--;
890 	  remainder_h >>= amount2;
891 	  remainder_h = remainder_h & QH;
892 
893 	  if (!remainder_h
894 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
895 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
896 		      && Q_low.w[0] <
897 		      reciprocals10_128[extra_digits].w[0]))) {
898 	    C64--;
899 	  }
900 	}
901 #endif
902 
903 #ifdef SET_STATUS_FLAGS
904 
905       if (is_inexact (fpsc))
906 	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
907       else {
908 	status = INEXACT_EXCEPTION;
909 	// get remainder
910 	remainder_h = QH << (64 - amount);
911 
912 	switch (rmode) {
913 	case ROUNDING_TO_NEAREST:
914 	case ROUNDING_TIES_AWAY:
915 	  // test whether fractional part is 0
916 	  if (remainder_h == 0x8000000000000000ull
917 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
918 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
919 		      && Q_low.w[0] <
920 		      reciprocals10_128[extra_digits].w[0])))
921 	    status = EXACT_STATUS;
922 	  break;
923 	case ROUNDING_DOWN:
924 	case ROUNDING_TO_ZERO:
925 	  if (!remainder_h
926 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
927 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
928 		      && Q_low.w[0] <
929 		      reciprocals10_128[extra_digits].w[0])))
930 	    status = EXACT_STATUS;
931 	  break;
932 	default:
933 	  // round up
934 	  __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
935 			   reciprocals10_128[extra_digits].w[0]);
936 	  __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
937 			      reciprocals10_128[extra_digits].w[1], CY);
938 	  if ((remainder_h >> (64 - amount)) + carry >=
939 	      (((UINT64) 1) << amount))
940 	    status = EXACT_STATUS;
941 	}
942 
943 	if (status != EXACT_STATUS)
944 	  __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
945       }
946 
947 #endif
948 
949       return sgn | C64;
950     }
951     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
952       expon--;
953       coeff = (coeff << 3) + (coeff << 1);
954     }
955     if (expon > DECIMAL_MAX_EXPON_64) {
956 #ifdef SET_STATUS_FLAGS
957       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
958 #endif
959       // overflow
960       r = sgn | INFINITY_MASK64;
961       switch (rmode) {
962       case ROUNDING_DOWN:
963 	if (!sgn)
964 	  r = LARGEST_BID64;
965 	break;
966       case ROUNDING_TO_ZERO:
967 	r = sgn | LARGEST_BID64;
968 	break;
969       case ROUNDING_UP:
970 	// round up
971 	if (sgn)
972 	  r = SMALLEST_BID64;
973       }
974       return r;
975     }
976   }
977 
978   mask = 1;
979   mask <<= EXPONENT_SHIFT_SMALL64;
980 
981   // check whether coefficient fits in 10*5+3 bits
982   if (coeff < mask) {
983     r = expon;
984     r <<= EXPONENT_SHIFT_SMALL64;
985     r |= (coeff | sgn);
986     return r;
987   }
988   // special format
989 
990   // eliminate the case coeff==10^16 after rounding
991   if (coeff == 10000000000000000ull) {
992     r = expon + 1;
993     r <<= EXPONENT_SHIFT_SMALL64;
994     r |= (1000000000000000ull | sgn);
995     return r;
996   }
997 
998   r = expon;
999   r <<= EXPONENT_SHIFT_LARGE64;
1000   r |= (sgn | SPECIAL_ENCODING_MASK64);
1001   // add coeff, without leading bits
1002   mask = (mask >> 2) - 1;
1003   coeff &= mask;
1004   r |= coeff;
1005 
1006   return r;
1007 }
1008 
1009 
1010 
1011 
1012 //
1013 //   No overflow/underflow checking
1014 //
1015 __BID_INLINE__ UINT64
fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1016 fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017   UINT64 r, mask;
1018 
1019   mask = 1;
1020   mask <<= EXPONENT_SHIFT_SMALL64;
1021 
1022   // check whether coefficient fits in 10*5+3 bits
1023   if (coeff < mask) {
1024     r = expon;
1025     r <<= EXPONENT_SHIFT_SMALL64;
1026     r |= (coeff | sgn);
1027     return r;
1028   }
1029   // special format
1030 
1031   // eliminate the case coeff==10^16 after rounding
1032   if (coeff == 10000000000000000ull) {
1033     r = expon + 1;
1034     r <<= EXPONENT_SHIFT_SMALL64;
1035     r |= (1000000000000000ull | sgn);
1036     return r;
1037   }
1038 
1039   r = expon;
1040   r <<= EXPONENT_SHIFT_LARGE64;
1041   r |= (sgn | SPECIAL_ENCODING_MASK64);
1042   // add coeff, without leading bits
1043   mask = (mask >> 2) - 1;
1044   coeff &= mask;
1045   r |= coeff;
1046 
1047   return r;
1048 }
1049 
1050 
1051 //
1052 //   no underflow checking
1053 //
1054 __BID_INLINE__ UINT64
fast_get_BID64_check_OF(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1055 fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056 			 unsigned *fpsc) {
1057   UINT64 r, mask;
1058 
1059   if (((unsigned) expon) >= 3 * 256 - 1) {
1060     if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061       expon = 3 * 256;
1062       coeff = 1000000000000000ull;
1063     }
1064 
1065     if (((unsigned) expon) >= 3 * 256) {
1066       while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067 	expon--;
1068 	coeff = (coeff << 3) + (coeff << 1);
1069       }
1070       if (expon > DECIMAL_MAX_EXPON_64) {
1071 #ifdef SET_STATUS_FLAGS
1072 	__set_status_flags (fpsc,
1073 			    OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074 #endif
1075 	// overflow
1076 	r = sgn | INFINITY_MASK64;
1077 	switch (rmode) {
1078 	case ROUNDING_DOWN:
1079 	  if (!sgn)
1080 	    r = LARGEST_BID64;
1081 	  break;
1082 	case ROUNDING_TO_ZERO:
1083 	  r = sgn | LARGEST_BID64;
1084 	  break;
1085 	case ROUNDING_UP:
1086 	  // round up
1087 	  if (sgn)
1088 	    r = SMALLEST_BID64;
1089 	}
1090 	return r;
1091       }
1092     }
1093   }
1094 
1095   mask = 1;
1096   mask <<= EXPONENT_SHIFT_SMALL64;
1097 
1098   // check whether coefficient fits in 10*5+3 bits
1099   if (coeff < mask) {
1100     r = expon;
1101     r <<= EXPONENT_SHIFT_SMALL64;
1102     r |= (coeff | sgn);
1103     return r;
1104   }
1105   // special format
1106 
1107   // eliminate the case coeff==10^16 after rounding
1108   if (coeff == 10000000000000000ull) {
1109     r = expon + 1;
1110     r <<= EXPONENT_SHIFT_SMALL64;
1111     r |= (1000000000000000ull | sgn);
1112     return r;
1113   }
1114 
1115   r = expon;
1116   r <<= EXPONENT_SHIFT_LARGE64;
1117   r |= (sgn | SPECIAL_ENCODING_MASK64);
1118   // add coeff, without leading bits
1119   mask = (mask >> 2) - 1;
1120   coeff &= mask;
1121   r |= coeff;
1122 
1123   return r;
1124 }
1125 
1126 
1127 //
1128 //   No overflow/underflow checking
1129 //   or checking for coefficients equal to 10^16 (after rounding)
1130 //
1131 __BID_INLINE__ UINT64
very_fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1132 very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133   UINT64 r, mask;
1134 
1135   mask = 1;
1136   mask <<= EXPONENT_SHIFT_SMALL64;
1137 
1138   // check whether coefficient fits in 10*5+3 bits
1139   if (coeff < mask) {
1140     r = expon;
1141     r <<= EXPONENT_SHIFT_SMALL64;
1142     r |= (coeff | sgn);
1143     return r;
1144   }
1145   // special format
1146   r = expon;
1147   r <<= EXPONENT_SHIFT_LARGE64;
1148   r |= (sgn | SPECIAL_ENCODING_MASK64);
1149   // add coeff, without leading bits
1150   mask = (mask >> 2) - 1;
1151   coeff &= mask;
1152   r |= coeff;
1153 
1154   return r;
1155 }
1156 
1157 //
1158 //   No overflow/underflow checking or checking for coefficients above 2^53
1159 //
1160 __BID_INLINE__ UINT64
very_fast_get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff)1161 very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162   // no UF/OF
1163   UINT64 r;
1164 
1165   r = expon;
1166   r <<= EXPONENT_SHIFT_SMALL64;
1167   r |= (coeff | sgn);
1168   return r;
1169 }
1170 
1171 
1172 //
1173 // This pack macro is used when underflow is known to occur
1174 //
1175 __BID_INLINE__ UINT64
get_BID64_UF(UINT64 sgn,int expon,UINT64 coeff,UINT64 R,int rmode,unsigned * fpsc)1176 get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177 	      unsigned *fpsc) {
1178   UINT128 C128, Q_low, Stemp;
1179   UINT64 C64, remainder_h, QH, carry, CY;
1180   int extra_digits, amount, amount2;
1181   unsigned status;
1182 
1183   // underflow
1184   if (expon + MAX_FORMAT_DIGITS < 0) {
1185 #ifdef SET_STATUS_FLAGS
1186     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187 #endif
1188 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189 #ifndef IEEE_ROUND_NEAREST
1190     if (rmode == ROUNDING_DOWN && sgn)
1191       return 0x8000000000000001ull;
1192     if (rmode == ROUNDING_UP && !sgn)
1193       return 1ull;
1194 #endif
1195 #endif
1196     // result is 0
1197     return sgn;
1198   }
1199   // 10*coeff
1200   coeff = (coeff << 3) + (coeff << 1);
1201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202 #ifndef IEEE_ROUND_NEAREST
1203   if (sgn && (unsigned) (rmode - 1) < 2)
1204     rmode = 3 - rmode;
1205 #endif
1206 #endif
1207   if (R)
1208     coeff |= 1;
1209   // get digits to be shifted out
1210   extra_digits = 1 - expon;
1211   C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1212 
1213   // get coeff*(2^M[extra_digits])/10^extra_digits
1214   __mul_64x128_full (QH, Q_low, C128.w[0],
1215 		     reciprocals10_128[extra_digits]);
1216 
1217   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1218   amount = recip_scale[extra_digits];
1219 
1220   C64 = QH >> amount;
1221   //__shr_128(C128, Q_high, amount);
1222 
1223 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224 #ifndef IEEE_ROUND_NEAREST
1225   if (rmode == 0)	//ROUNDING_TO_NEAREST
1226 #endif
1227     if (C64 & 1) {
1228       // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229 
1230       // get remainder
1231       amount2 = 64 - amount;
1232       remainder_h = 0;
1233       remainder_h--;
1234       remainder_h >>= amount2;
1235       remainder_h = remainder_h & QH;
1236 
1237       if (!remainder_h
1238 	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1239 	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1240 		  && Q_low.w[0] <
1241 		  reciprocals10_128[extra_digits].w[0]))) {
1242 	C64--;
1243       }
1244     }
1245 #endif
1246 
1247 #ifdef SET_STATUS_FLAGS
1248 
1249   if (is_inexact (fpsc))
1250     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251   else {
1252     status = INEXACT_EXCEPTION;
1253     // get remainder
1254     remainder_h = QH << (64 - amount);
1255 
1256     switch (rmode) {
1257     case ROUNDING_TO_NEAREST:
1258     case ROUNDING_TIES_AWAY:
1259       // test whether fractional part is 0
1260       if (remainder_h == 0x8000000000000000ull
1261 	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1262 	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1263 		  && Q_low.w[0] <
1264 		  reciprocals10_128[extra_digits].w[0])))
1265 	status = EXACT_STATUS;
1266       break;
1267     case ROUNDING_DOWN:
1268     case ROUNDING_TO_ZERO:
1269       if (!remainder_h
1270 	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1271 	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1272 		  && Q_low.w[0] <
1273 		  reciprocals10_128[extra_digits].w[0])))
1274 	status = EXACT_STATUS;
1275       break;
1276     default:
1277       // round up
1278       __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1279 		       reciprocals10_128[extra_digits].w[0]);
1280       __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1281 			  reciprocals10_128[extra_digits].w[1], CY);
1282       if ((remainder_h >> (64 - amount)) + carry >=
1283 	  (((UINT64) 1) << amount))
1284 	status = EXACT_STATUS;
1285     }
1286 
1287     if (status != EXACT_STATUS)
1288       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289   }
1290 
1291 #endif
1292 
1293   return sgn | C64;
1294 
1295 }
1296 
1297 
1298 
1299 //
1300 //   This pack macro doesnot check for coefficients above 2^53
1301 //
1302 __BID_INLINE__ UINT64
get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1303 get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304 			  int rmode, unsigned *fpsc) {
1305   UINT128 C128, Q_low, Stemp;
1306   UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307   int extra_digits, amount, amount2;
1308   unsigned status;
1309 
1310   // check for possible underflow/overflow
1311   if (((unsigned) expon) >= 3 * 256) {
1312     if (expon < 0) {
1313       // underflow
1314       if (expon + MAX_FORMAT_DIGITS < 0) {
1315 #ifdef SET_STATUS_FLAGS
1316 	__set_status_flags (fpsc,
1317 			    UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318 #endif
1319 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320 #ifndef IEEE_ROUND_NEAREST
1321 	if (rmode == ROUNDING_DOWN && sgn)
1322 	  return 0x8000000000000001ull;
1323 	if (rmode == ROUNDING_UP && !sgn)
1324 	  return 1ull;
1325 #endif
1326 #endif
1327 	// result is 0
1328 	return sgn;
1329       }
1330 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331 #ifndef IEEE_ROUND_NEAREST
1332       if (sgn && (unsigned) (rmode - 1) < 2)
1333 	rmode = 3 - rmode;
1334 #endif
1335 #endif
1336       // get digits to be shifted out
1337       extra_digits = -expon;
1338       C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1339 
1340       // get coeff*(2^M[extra_digits])/10^extra_digits
1341       __mul_64x128_full (QH, Q_low, C128.w[0],
1342 			 reciprocals10_128[extra_digits]);
1343 
1344       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1345       amount = recip_scale[extra_digits];
1346 
1347       C64 = QH >> amount;
1348 
1349 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350 #ifndef IEEE_ROUND_NEAREST
1351       if (rmode == 0)	//ROUNDING_TO_NEAREST
1352 #endif
1353 	if (C64 & 1) {
1354 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355 
1356 	  // get remainder
1357 	  amount2 = 64 - amount;
1358 	  remainder_h = 0;
1359 	  remainder_h--;
1360 	  remainder_h >>= amount2;
1361 	  remainder_h = remainder_h & QH;
1362 
1363 	  if (!remainder_h
1364 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1365 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1366 		      && Q_low.w[0] <
1367 		      reciprocals10_128[extra_digits].w[0]))) {
1368 	    C64--;
1369 	  }
1370 	}
1371 #endif
1372 
1373 #ifdef SET_STATUS_FLAGS
1374 
1375       if (is_inexact (fpsc))
1376 	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377       else {
1378 	status = INEXACT_EXCEPTION;
1379 	// get remainder
1380 	remainder_h = QH << (64 - amount);
1381 
1382 	switch (rmode) {
1383 	case ROUNDING_TO_NEAREST:
1384 	case ROUNDING_TIES_AWAY:
1385 	  // test whether fractional part is 0
1386 	  if (remainder_h == 0x8000000000000000ull
1387 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1388 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1389 		      && Q_low.w[0] <
1390 		      reciprocals10_128[extra_digits].w[0])))
1391 	    status = EXACT_STATUS;
1392 	  break;
1393 	case ROUNDING_DOWN:
1394 	case ROUNDING_TO_ZERO:
1395 	  if (!remainder_h
1396 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1397 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1398 		      && Q_low.w[0] <
1399 		      reciprocals10_128[extra_digits].w[0])))
1400 	    status = EXACT_STATUS;
1401 	  break;
1402 	default:
1403 	  // round up
1404 	  __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1405 			   reciprocals10_128[extra_digits].w[0]);
1406 	  __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1407 			      reciprocals10_128[extra_digits].w[1], CY);
1408 	  if ((remainder_h >> (64 - amount)) + carry >=
1409 	      (((UINT64) 1) << amount))
1410 	    status = EXACT_STATUS;
1411 	}
1412 
1413 	if (status != EXACT_STATUS)
1414 	  __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415       }
1416 
1417 #endif
1418 
1419       return sgn | C64;
1420     }
1421 
1422     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423       expon--;
1424       coeff = (coeff << 3) + (coeff << 1);
1425     }
1426     if (expon > DECIMAL_MAX_EXPON_64) {
1427 #ifdef SET_STATUS_FLAGS
1428       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429 #endif
1430       // overflow
1431       r = sgn | INFINITY_MASK64;
1432       switch (rmode) {
1433       case ROUNDING_DOWN:
1434 	if (!sgn)
1435 	  r = LARGEST_BID64;
1436 	break;
1437       case ROUNDING_TO_ZERO:
1438 	r = sgn | LARGEST_BID64;
1439 	break;
1440       case ROUNDING_UP:
1441 	// round up
1442 	if (sgn)
1443 	  r = SMALLEST_BID64;
1444       }
1445       return r;
1446     } else {
1447       mask = 1;
1448       mask <<= EXPONENT_SHIFT_SMALL64;
1449       if (coeff >= mask) {
1450 	r = expon;
1451 	r <<= EXPONENT_SHIFT_LARGE64;
1452 	r |= (sgn | SPECIAL_ENCODING_MASK64);
1453 	// add coeff, without leading bits
1454 	mask = (mask >> 2) - 1;
1455 	coeff &= mask;
1456 	r |= coeff;
1457 	return r;
1458       }
1459     }
1460   }
1461 
1462   r = expon;
1463   r <<= EXPONENT_SHIFT_SMALL64;
1464   r |= (coeff | sgn);
1465 
1466   return r;
1467 }
1468 
1469 
1470 /*****************************************************************************
1471 *
1472 *    BID128 pack/unpack macros
1473 *
1474 *****************************************************************************/
1475 
1476 //
1477 //   Macro for handling BID128 underflow
1478 //         sticky bit given as additional argument
1479 //
1480 __BID_INLINE__ UINT128 *
handle_UF_128_rem(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,UINT64 R,unsigned * prounding_mode,unsigned * fpsc)1481 handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482 		   UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483   UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484   UINT64 carry, CY;
1485   int ed2, amount;
1486   unsigned rmode, status;
1487 
1488   // UF occurs
1489   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490 #ifdef SET_STATUS_FLAGS
1491     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492 #endif
1493     pres->w[1] = sgn;
1494     pres->w[0] = 0;
1495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496 #ifndef IEEE_ROUND_NEAREST
1497     if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498 	|| (!sgn && *prounding_mode == ROUNDING_UP))
1499       pres->w[0] = 1ull;
1500 #endif
1501 #endif
1502     return pres;
1503   }
1504   // CQ *= 10
1505   CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506   CQ2.w[0] = CQ.w[0] << 1;
1507   CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508   CQ8.w[0] = CQ.w[0] << 3;
1509   __add_128_128 (CQ, CQ2, CQ8);
1510 
1511   // add remainder
1512   if (R)
1513     CQ.w[0] |= 1;
1514 
1515   ed2 = 1 - expon;
1516   // add rounding constant to CQ
1517 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518 #ifndef IEEE_ROUND_NEAREST
1519   rmode = *prounding_mode;
1520   if (sgn && (unsigned) (rmode - 1) < 2)
1521     rmode = 3 - rmode;
1522 #else
1523   rmode = 0;
1524 #endif
1525 #else
1526   rmode = 0;
1527 #endif
1528   T128 = round_const_table_128[rmode][ed2];
1529   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531 
1532   TP128 = reciprocals10_128[ed2];
1533   __mul_128x128_full (Qh, Ql, CQ, TP128);
1534   amount = recip_scale[ed2];
1535 
1536   if (amount >= 64) {
1537     CQ.w[0] = Qh.w[1] >> (amount - 64);
1538     CQ.w[1] = 0;
1539   } else {
1540     __shr_128 (CQ, Qh, amount);
1541   }
1542 
1543   expon = 0;
1544 
1545 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1546 #ifndef IEEE_ROUND_NEAREST
1547   if (!(*prounding_mode))
1548 #endif
1549     if (CQ.w[0] & 1) {
1550       // check whether fractional part of initial_P/10^ed1 is exactly .5
1551 
1552       // get remainder
1553       __shl_128_long (Qh1, Qh, (128 - amount));
1554 
1555       if (!Qh1.w[1] && !Qh1.w[0]
1556 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1557 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1558 		  && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1559 	CQ.w[0]--;
1560       }
1561     }
1562 #endif
1563 
1564 #ifdef SET_STATUS_FLAGS
1565 
1566   if (is_inexact (fpsc))
1567     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1568   else {
1569     status = INEXACT_EXCEPTION;
1570     // get remainder
1571     __shl_128_long (Qh1, Qh, (128 - amount));
1572 
1573     switch (rmode) {
1574     case ROUNDING_TO_NEAREST:
1575     case ROUNDING_TIES_AWAY:
1576       // test whether fractional part is 0
1577       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1578 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1579 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1580 		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1581 	status = EXACT_STATUS;
1582       break;
1583     case ROUNDING_DOWN:
1584     case ROUNDING_TO_ZERO:
1585       if ((!Qh1.w[1]) && (!Qh1.w[0])
1586 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1587 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1588 		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1589 	status = EXACT_STATUS;
1590       break;
1591     default:
1592       // round up
1593       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1594 		       reciprocals10_128[ed2].w[0]);
1595       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1596 			  reciprocals10_128[ed2].w[1], CY);
1597       __shr_128_long (Qh, Qh1, (128 - amount));
1598       Tmp.w[0] = 1;
1599       Tmp.w[1] = 0;
1600       __shl_128_long (Tmp1, Tmp, amount);
1601       Qh.w[0] += carry;
1602       if (Qh.w[0] < carry)
1603 	Qh.w[1]++;
1604       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1605 	status = EXACT_STATUS;
1606     }
1607 
1608     if (status != EXACT_STATUS)
1609       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1610   }
1611 
1612 #endif
1613 
1614   pres->w[1] = sgn | CQ.w[1];
1615   pres->w[0] = CQ.w[0];
1616 
1617   return pres;
1618 
1619 }
1620 
1621 
1622 //
1623 //   Macro for handling BID128 underflow
1624 //
1625 __BID_INLINE__ UINT128 *
handle_UF_128(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,unsigned * prounding_mode,unsigned * fpsc)1626 handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1627 	       unsigned *prounding_mode, unsigned *fpsc) {
1628   UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1629   UINT64 carry, CY;
1630   int ed2, amount;
1631   unsigned rmode, status;
1632 
1633   // UF occurs
1634   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1635 #ifdef SET_STATUS_FLAGS
1636     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1637 #endif
1638     pres->w[1] = sgn;
1639     pres->w[0] = 0;
1640 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1641 #ifndef IEEE_ROUND_NEAREST
1642     if ((sgn && *prounding_mode == ROUNDING_DOWN)
1643 	|| (!sgn && *prounding_mode == ROUNDING_UP))
1644       pres->w[0] = 1ull;
1645 #endif
1646 #endif
1647     return pres;
1648   }
1649 
1650   ed2 = 0 - expon;
1651   // add rounding constant to CQ
1652 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1653 #ifndef IEEE_ROUND_NEAREST
1654   rmode = *prounding_mode;
1655   if (sgn && (unsigned) (rmode - 1) < 2)
1656     rmode = 3 - rmode;
1657 #else
1658   rmode = 0;
1659 #endif
1660 #else
1661   rmode = 0;
1662 #endif
1663 
1664   T128 = round_const_table_128[rmode][ed2];
1665   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1666   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1667 
1668   TP128 = reciprocals10_128[ed2];
1669   __mul_128x128_full (Qh, Ql, CQ, TP128);
1670   amount = recip_scale[ed2];
1671 
1672   if (amount >= 64) {
1673     CQ.w[0] = Qh.w[1] >> (amount - 64);
1674     CQ.w[1] = 0;
1675   } else {
1676     __shr_128 (CQ, Qh, amount);
1677   }
1678 
1679   expon = 0;
1680 
1681 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1682 #ifndef IEEE_ROUND_NEAREST
1683   if (!(*prounding_mode))
1684 #endif
1685     if (CQ.w[0] & 1) {
1686       // check whether fractional part of initial_P/10^ed1 is exactly .5
1687 
1688       // get remainder
1689       __shl_128_long (Qh1, Qh, (128 - amount));
1690 
1691       if (!Qh1.w[1] && !Qh1.w[0]
1692 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1693 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1694 		  && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1695 	CQ.w[0]--;
1696       }
1697     }
1698 #endif
1699 
1700 #ifdef SET_STATUS_FLAGS
1701 
1702   if (is_inexact (fpsc))
1703     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1704   else {
1705     status = INEXACT_EXCEPTION;
1706     // get remainder
1707     __shl_128_long (Qh1, Qh, (128 - amount));
1708 
1709     switch (rmode) {
1710     case ROUNDING_TO_NEAREST:
1711     case ROUNDING_TIES_AWAY:
1712       // test whether fractional part is 0
1713       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1714 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1715 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1716 		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1717 	status = EXACT_STATUS;
1718       break;
1719     case ROUNDING_DOWN:
1720     case ROUNDING_TO_ZERO:
1721       if ((!Qh1.w[1]) && (!Qh1.w[0])
1722 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1723 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1724 		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1725 	status = EXACT_STATUS;
1726       break;
1727     default:
1728       // round up
1729       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1730 		       reciprocals10_128[ed2].w[0]);
1731       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1732 			  reciprocals10_128[ed2].w[1], CY);
1733       __shr_128_long (Qh, Qh1, (128 - amount));
1734       Tmp.w[0] = 1;
1735       Tmp.w[1] = 0;
1736       __shl_128_long (Tmp1, Tmp, amount);
1737       Qh.w[0] += carry;
1738       if (Qh.w[0] < carry)
1739 	Qh.w[1]++;
1740       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1741 	status = EXACT_STATUS;
1742     }
1743 
1744     if (status != EXACT_STATUS)
1745       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1746   }
1747 
1748 #endif
1749 
1750   pres->w[1] = sgn | CQ.w[1];
1751   pres->w[0] = CQ.w[0];
1752 
1753   return pres;
1754 
1755 }
1756 
1757 
1758 
1759 //
1760 //  BID128 unpack, input passed by value
1761 //
1762 __BID_INLINE__ UINT64
unpack_BID128_value(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 x)1763 unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
1764 		     UINT128 * pcoefficient_x, UINT128 x) {
1765   UINT128 coeff, T33, T34;
1766   UINT64 ex;
1767 
1768   *psign_x = (x.w[1]) & 0x8000000000000000ull;
1769 
1770   // special encodings
1771   if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1772     if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1773       // non-canonical input
1774       pcoefficient_x->w[0] = 0;
1775       pcoefficient_x->w[1] = 0;
1776       ex = (x.w[1]) >> 47;
1777       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1778       return 0;
1779     }
1780     // 10^33
1781     T33 = power10_table_128[33];
1782     /*coeff.w[0] = x.w[0];
1783        coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1784        pcoefficient_x->w[0] = x.w[0];
1785        pcoefficient_x->w[1] = x.w[1];
1786        if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1787        pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1788 
1789     pcoefficient_x->w[0] = x.w[0];
1790     pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1791     if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33))	// non-canonical
1792     {
1793       pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1794       pcoefficient_x->w[0] = 0;
1795     } else
1796       pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1797     if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1798       pcoefficient_x->w[0] = 0;
1799       pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1800     }
1801     *pexponent_x = 0;
1802     return 0;	// NaN or Infinity
1803   }
1804 
1805   coeff.w[0] = x.w[0];
1806   coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1807 
1808   // 10^34
1809   T34 = power10_table_128[34];
1810   // check for non-canonical values
1811   if (__unsigned_compare_ge_128 (coeff, T34))
1812     coeff.w[0] = coeff.w[1] = 0;
1813 
1814   pcoefficient_x->w[0] = coeff.w[0];
1815   pcoefficient_x->w[1] = coeff.w[1];
1816 
1817   ex = (x.w[1]) >> 49;
1818   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1819 
1820   return coeff.w[0] | coeff.w[1];
1821 }
1822 
1823 
1824 //
1825 //  BID128 unpack, input pased by reference
1826 //
1827 __BID_INLINE__ UINT64
unpack_BID128(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 * px)1828 unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1829 	       UINT128 * pcoefficient_x, UINT128 * px) {
1830   UINT128 coeff, T33, T34;
1831   UINT64 ex;
1832 
1833   *psign_x = (px->w[1]) & 0x8000000000000000ull;
1834 
1835   // special encodings
1836   if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1837     if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1838       // non-canonical input
1839       pcoefficient_x->w[0] = 0;
1840       pcoefficient_x->w[1] = 0;
1841       ex = (px->w[1]) >> 47;
1842       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1843       return 0;
1844     }
1845     // 10^33
1846     T33 = power10_table_128[33];
1847     coeff.w[0] = px->w[0];
1848     coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1849     pcoefficient_x->w[0] = px->w[0];
1850     pcoefficient_x->w[1] = px->w[1];
1851     if (__unsigned_compare_ge_128 (coeff, T33)) {	// non-canonical
1852       pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1853       pcoefficient_x->w[0] = 0;
1854     }
1855     *pexponent_x = 0;
1856     return 0;	// NaN or Infinity
1857   }
1858 
1859   coeff.w[0] = px->w[0];
1860   coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1861 
1862   // 10^34
1863   T34 = power10_table_128[34];
1864   // check for non-canonical values
1865   if (__unsigned_compare_ge_128 (coeff, T34))
1866     coeff.w[0] = coeff.w[1] = 0;
1867 
1868   pcoefficient_x->w[0] = coeff.w[0];
1869   pcoefficient_x->w[1] = coeff.w[1];
1870 
1871   ex = (px->w[1]) >> 49;
1872   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1873 
1874   return coeff.w[0] | coeff.w[1];
1875 }
1876 
1877 //
1878 //   Pack macro checks for overflow, but not underflow
1879 //
1880 __BID_INLINE__ UINT128 *
get_BID128_very_fast_OF(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1881 get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1882 			 UINT128 coeff, unsigned *prounding_mode,
1883 			 unsigned *fpsc) {
1884   UINT128 T;
1885   UINT64 tmp, tmp2;
1886 
1887   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1888 
1889     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1890       T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1891       while (__unsigned_compare_gt_128 (T, coeff)
1892 	     && expon > DECIMAL_MAX_EXPON_128) {
1893 	coeff.w[1] =
1894 	  (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1895 	  (coeff.w[0] >> 63);
1896 	tmp2 = coeff.w[0] << 3;
1897 	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1898 	if (coeff.w[0] < tmp2)
1899 	  coeff.w[1]++;
1900 
1901 	expon--;
1902       }
1903     }
1904     if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1905       // OF
1906 #ifdef SET_STATUS_FLAGS
1907       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1908 #endif
1909 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1910 #ifndef IEEE_ROUND_NEAREST
1911       if (*prounding_mode == ROUNDING_TO_ZERO
1912 	  || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1913 							 &&
1914 							 *prounding_mode
1915 							 ==
1916 							 ROUNDING_DOWN))
1917       {
1918 	pres->w[1] = sgn | LARGEST_BID128_HIGH;
1919 	pres->w[0] = LARGEST_BID128_LOW;
1920       } else
1921 #endif
1922 #endif
1923       {
1924 	pres->w[1] = sgn | INFINITY_MASK64;
1925 	pres->w[0] = 0;
1926       }
1927       return pres;
1928     }
1929   }
1930 
1931   pres->w[0] = coeff.w[0];
1932   tmp = expon;
1933   tmp <<= 49;
1934   pres->w[1] = sgn | tmp | coeff.w[1];
1935 
1936   return pres;
1937 }
1938 
1939 
1940 //
1941 //   No overflow/underflow checks
1942 //   No checking for coefficient == 10^34 (rounding artifact)
1943 //
1944 __BID_INLINE__ UINT128 *
get_BID128_very_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1945 get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1946 		      UINT128 coeff) {
1947   UINT64 tmp;
1948 
1949   pres->w[0] = coeff.w[0];
1950   tmp = expon;
1951   tmp <<= 49;
1952   pres->w[1] = sgn | tmp | coeff.w[1];
1953 
1954   return pres;
1955 }
1956 
1957 //
1958 //   No overflow/underflow checks
1959 //
1960 __BID_INLINE__ UINT128 *
get_BID128_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1961 get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1962   UINT64 tmp;
1963 
1964   // coeff==10^34?
1965   if (coeff.w[1] == 0x0001ed09bead87c0ull
1966       && coeff.w[0] == 0x378d8e6400000000ull) {
1967     expon++;
1968     // set coefficient to 10^33
1969     coeff.w[1] = 0x0000314dc6448d93ull;
1970     coeff.w[0] = 0x38c15b0a00000000ull;
1971   }
1972 
1973   pres->w[0] = coeff.w[0];
1974   tmp = expon;
1975   tmp <<= 49;
1976   pres->w[1] = sgn | tmp | coeff.w[1];
1977 
1978   return pres;
1979 }
1980 
1981 //
1982 //   General BID128 pack macro
1983 //
1984 __BID_INLINE__ UINT128 *
get_BID128(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1985 get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1986 	    unsigned *prounding_mode, unsigned *fpsc) {
1987   UINT128 T;
1988   UINT64 tmp, tmp2;
1989 
1990   // coeff==10^34?
1991   if (coeff.w[1] == 0x0001ed09bead87c0ull
1992       && coeff.w[0] == 0x378d8e6400000000ull) {
1993     expon++;
1994     // set coefficient to 10^33
1995     coeff.w[1] = 0x0000314dc6448d93ull;
1996     coeff.w[0] = 0x38c15b0a00000000ull;
1997   }
1998   // check OF, UF
1999   if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
2000     // check UF
2001     if (expon < 0) {
2002       return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
2003 			    fpsc);
2004     }
2005 
2006     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
2007       T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
2008       while (__unsigned_compare_gt_128 (T, coeff)
2009 	     && expon > DECIMAL_MAX_EXPON_128) {
2010 	coeff.w[1] =
2011 	  (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2012 	  (coeff.w[0] >> 63);
2013 	tmp2 = coeff.w[0] << 3;
2014 	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2015 	if (coeff.w[0] < tmp2)
2016 	  coeff.w[1]++;
2017 
2018 	expon--;
2019       }
2020     }
2021     if (expon > DECIMAL_MAX_EXPON_128) {
2022       if (!(coeff.w[1] | coeff.w[0])) {
2023 	pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
2024 	pres->w[0] = 0;
2025 	return pres;
2026       }
2027       // OF
2028 #ifdef SET_STATUS_FLAGS
2029       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2030 #endif
2031 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2032 #ifndef IEEE_ROUND_NEAREST
2033       if (*prounding_mode == ROUNDING_TO_ZERO
2034 	  || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2035 							 &&
2036 							 *prounding_mode
2037 							 ==
2038 							 ROUNDING_DOWN))
2039       {
2040 	pres->w[1] = sgn | LARGEST_BID128_HIGH;
2041 	pres->w[0] = LARGEST_BID128_LOW;
2042       } else
2043 #endif
2044 #endif
2045       {
2046 	pres->w[1] = sgn | INFINITY_MASK64;
2047 	pres->w[0] = 0;
2048       }
2049       return pres;
2050     }
2051   }
2052 
2053   pres->w[0] = coeff.w[0];
2054   tmp = expon;
2055   tmp <<= 49;
2056   pres->w[1] = sgn | tmp | coeff.w[1];
2057 
2058   return pres;
2059 }
2060 
2061 
2062 //
2063 //  Macro used for conversions from string
2064 //        (no additional arguments given for rounding mode, status flags)
2065 //
2066 __BID_INLINE__ UINT128 *
get_BID128_string(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)2067 get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2068   UINT128 D2, D8;
2069   UINT64 tmp;
2070   unsigned rmode = 0, status;
2071 
2072   // coeff==10^34?
2073   if (coeff.w[1] == 0x0001ed09bead87c0ull
2074       && coeff.w[0] == 0x378d8e6400000000ull) {
2075     expon++;
2076     // set coefficient to 10^33
2077     coeff.w[1] = 0x0000314dc6448d93ull;
2078     coeff.w[0] = 0x38c15b0a00000000ull;
2079   }
2080   // check OF, UF
2081   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2082     // check UF
2083     if (expon < 0)
2084       return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2085 
2086     // OF
2087 
2088     if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2089       while (expon > DECIMAL_MAX_EXPON_128 &&
2090 	     (coeff.w[1] < power10_table_128[33].w[1] ||
2091 	      (coeff.w[1] == power10_table_128[33].w[1]
2092 	       && coeff.w[0] < power10_table_128[33].w[0]))) {
2093 	D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2094 	D2.w[0] = coeff.w[0] << 1;
2095 	D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2096 	D8.w[0] = coeff.w[0] << 3;
2097 
2098 	__add_128_128 (coeff, D2, D8);
2099 	expon--;
2100       }
2101     } else if (!(coeff.w[0] | coeff.w[1]))
2102       expon = DECIMAL_MAX_EXPON_128;
2103 
2104     if (expon > DECIMAL_MAX_EXPON_128) {
2105       pres->w[1] = sgn | INFINITY_MASK64;
2106       pres->w[0] = 0;
2107       switch (rmode) {
2108       case ROUNDING_DOWN:
2109 	if (!sgn) {
2110 	  pres->w[1] = LARGEST_BID128_HIGH;
2111 	  pres->w[0] = LARGEST_BID128_LOW;
2112 	}
2113 	break;
2114       case ROUNDING_TO_ZERO:
2115 	pres->w[1] = sgn | LARGEST_BID128_HIGH;
2116 	pres->w[0] = LARGEST_BID128_LOW;
2117 	break;
2118       case ROUNDING_UP:
2119 	// round up
2120 	if (sgn) {
2121 	  pres->w[1] = sgn | LARGEST_BID128_HIGH;
2122 	  pres->w[0] = LARGEST_BID128_LOW;
2123 	}
2124 	break;
2125       }
2126 
2127       return pres;
2128     }
2129   }
2130 
2131   pres->w[0] = coeff.w[0];
2132   tmp = expon;
2133   tmp <<= 49;
2134   pres->w[1] = sgn | tmp | coeff.w[1];
2135 
2136   return pres;
2137 }
2138 
2139 
2140 
2141 /*****************************************************************************
2142 *
2143 *    BID32 pack/unpack macros
2144 *
2145 *****************************************************************************/
2146 
2147 
2148 __BID_INLINE__ UINT32
unpack_BID32(UINT32 * psign_x,int * pexponent_x,UINT32 * pcoefficient_x,UINT32 x)2149 unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2150 	      UINT32 * pcoefficient_x, UINT32 x) {
2151   UINT32 tmp;
2152 
2153   *psign_x = x & 0x80000000;
2154 
2155   if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2156     // special encodings
2157     if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2158       *pcoefficient_x = x & 0xfe0fffff;
2159       if ((x & 0x000fffff) >= 1000000)
2160 	*pcoefficient_x = x & 0xfe000000;
2161       if ((x & NAN_MASK32) == INFINITY_MASK32)
2162 	*pcoefficient_x = x & 0xf8000000;
2163       *pexponent_x = 0;
2164       return 0;	// NaN or Infinity
2165     }
2166     // coefficient
2167     *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2168     // check for non-canonical value
2169     if (*pcoefficient_x >= 10000000)
2170       *pcoefficient_x = 0;
2171     // get exponent
2172     tmp = x >> 21;
2173     *pexponent_x = tmp & EXPONENT_MASK32;
2174     return 1;
2175   }
2176   // exponent
2177   tmp = x >> 23;
2178   *pexponent_x = tmp & EXPONENT_MASK32;
2179   // coefficient
2180   *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2181 
2182   return *pcoefficient_x;
2183 }
2184 
2185 //
2186 //   General pack macro for BID32
2187 //
2188 __BID_INLINE__ UINT32
get_BID32(UINT32 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)2189 get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2190 	   unsigned *fpsc) {
2191   UINT128 Q;
2192   UINT64 C64, remainder_h, carry, Stemp;
2193   UINT32 r, mask;
2194   int extra_digits, amount, amount2;
2195   unsigned status;
2196 
2197   if (coeff > 9999999ull) {
2198     expon++;
2199     coeff = 1000000ull;
2200   }
2201   // check for possible underflow/overflow
2202   if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2203     if (expon < 0) {
2204       // underflow
2205       if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2206 #ifdef SET_STATUS_FLAGS
2207 	__set_status_flags (fpsc,
2208 			    UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2209 #endif
2210 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2211 #ifndef IEEE_ROUND_NEAREST
2212 	if (rmode == ROUNDING_DOWN && sgn)
2213 	  return 0x80000001;
2214 	if (rmode == ROUNDING_UP && !sgn)
2215 	  return 1;
2216 #endif
2217 #endif
2218 	// result is 0
2219 	return sgn;
2220       }
2221       // get digits to be shifted out
2222 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2223       rmode = 0;
2224 #endif
2225 #ifdef IEEE_ROUND_NEAREST
2226       rmode = 0;
2227 #endif
2228 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2229 #ifndef IEEE_ROUND_NEAREST
2230       if (sgn && (unsigned) (rmode - 1) < 2)
2231 	rmode = 3 - rmode;
2232 #endif
2233 #endif
2234 
2235       extra_digits = -expon;
2236       coeff += round_const_table[rmode][extra_digits];
2237 
2238       // get coeff*(2^M[extra_digits])/10^extra_digits
2239       __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
2240 
2241       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2242       amount = short_recip_scale[extra_digits];
2243 
2244       C64 = Q.w[1] >> amount;
2245 
2246 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2247 #ifndef IEEE_ROUND_NEAREST
2248       if (rmode == 0)	//ROUNDING_TO_NEAREST
2249 #endif
2250 	if (C64 & 1) {
2251 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
2252 
2253 	  // get remainder
2254 	  amount2 = 64 - amount;
2255 	  remainder_h = 0;
2256 	  remainder_h--;
2257 	  remainder_h >>= amount2;
2258 	  remainder_h = remainder_h & Q.w[1];
2259 
2260 	  if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
2261 	    C64--;
2262 	  }
2263 	}
2264 #endif
2265 
2266 #ifdef SET_STATUS_FLAGS
2267 
2268       if (is_inexact (fpsc))
2269 	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2270       else {
2271 	status = INEXACT_EXCEPTION;
2272 	// get remainder
2273 	remainder_h = Q.w[1] << (64 - amount);
2274 
2275 	switch (rmode) {
2276 	case ROUNDING_TO_NEAREST:
2277 	case ROUNDING_TIES_AWAY:
2278 	  // test whether fractional part is 0
2279 	  if (remainder_h == 0x8000000000000000ull
2280 	      && (Q.w[0] < reciprocals10_64[extra_digits]))
2281 	    status = EXACT_STATUS;
2282 	  break;
2283 	case ROUNDING_DOWN:
2284 	case ROUNDING_TO_ZERO:
2285 	  if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
2286 	    status = EXACT_STATUS;
2287 	  break;
2288 	default:
2289 	  // round up
2290 	  __add_carry_out (Stemp, carry, Q.w[0],
2291 			   reciprocals10_64[extra_digits]);
2292 	  if ((remainder_h >> (64 - amount)) + carry >=
2293 	      (((UINT64) 1) << amount))
2294 	    status = EXACT_STATUS;
2295 	}
2296 
2297 	if (status != EXACT_STATUS)
2298 	  __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2299       }
2300 
2301 #endif
2302 
2303       return sgn | (UINT32) C64;
2304     }
2305 
2306     while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2307       coeff = (coeff << 3) + (coeff << 1);
2308       expon--;
2309     }
2310     if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2311       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2312       // overflow
2313       r = sgn | INFINITY_MASK32;
2314       switch (rmode) {
2315       case ROUNDING_DOWN:
2316 	if (!sgn)
2317 	  r = LARGEST_BID32;
2318 	break;
2319       case ROUNDING_TO_ZERO:
2320 	r = sgn | LARGEST_BID32;
2321 	break;
2322       case ROUNDING_UP:
2323 	// round up
2324 	if (sgn)
2325 	  r = sgn | LARGEST_BID32;
2326       }
2327       return r;
2328     }
2329   }
2330 
2331   mask = 1 << 23;
2332 
2333   // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2334   if (coeff < mask) {
2335     r = expon;
2336     r <<= 23;
2337     r |= ((UINT32) coeff | sgn);
2338     return r;
2339   }
2340   // special format
2341 
2342   r = expon;
2343   r <<= 21;
2344   r |= (sgn | SPECIAL_ENCODING_MASK32);
2345   // add coeff, without leading bits
2346   mask = (1 << 21) - 1;
2347   r |= (((UINT32) coeff) & mask);
2348 
2349   return r;
2350 }
2351 
2352 
2353 
2354 //
2355 //   no overflow/underflow checks
2356 //
2357 __BID_INLINE__ UINT32
very_fast_get_BID32(UINT32 sgn,int expon,UINT32 coeff)2358 very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2359   UINT32 r, mask;
2360 
2361   mask = 1 << 23;
2362 
2363   // check whether coefficient fits in 10*2+3 bits
2364   if (coeff < mask) {
2365     r = expon;
2366     r <<= 23;
2367     r |= (coeff | sgn);
2368     return r;
2369   }
2370   // special format
2371   r = expon;
2372   r <<= 21;
2373   r |= (sgn | SPECIAL_ENCODING_MASK32);
2374   // add coeff, without leading bits
2375   mask = (1 << 21) - 1;
2376   coeff &= mask;
2377   r |= coeff;
2378 
2379   return r;
2380 }
2381 
2382 
2383 
2384 /*************************************************************
2385  *
2386  *************************************************************/
2387 typedef
2388 ALIGN (16)
2389      struct {
2390        UINT64 w[6];
2391      } UINT384;
2392      typedef ALIGN (16)
2393      struct {
2394        UINT64 w[8];
2395      } UINT512;
2396 
2397 // #define P                               34
2398 #define MASK_STEERING_BITS              0x6000000000000000ull
2399 #define MASK_BINARY_EXPONENT1           0x7fe0000000000000ull
2400 #define MASK_BINARY_SIG1                0x001fffffffffffffull
2401 #define MASK_BINARY_EXPONENT2           0x1ff8000000000000ull
2402     //used to take G[2:w+3] (sec 3.3)
2403 #define MASK_BINARY_SIG2                0x0007ffffffffffffull
2404     //used to mask out G4:T0 (sec 3.3)
2405 #define MASK_BINARY_OR2                 0x0020000000000000ull
2406     //used to prefix 8+G4 to T (sec 3.3)
2407 #define UPPER_EXPON_LIMIT               51
2408 #define MASK_EXP                        0x7ffe000000000000ull
2409 #define MASK_SPECIAL                    0x7800000000000000ull
2410 #define MASK_NAN                        0x7c00000000000000ull
2411 #define MASK_SNAN                       0x7e00000000000000ull
2412 #define MASK_ANY_INF                    0x7c00000000000000ull
2413 #define MASK_INF                        0x7800000000000000ull
2414 #define MASK_SIGN                       0x8000000000000000ull
2415 #define MASK_COEFF                      0x0001ffffffffffffull
2416 #define BIN_EXP_BIAS                    (0x1820ull << 49)
2417 
2418 #define EXP_MIN                         0x0000000000000000ull
2419    // EXP_MIN = (-6176 + 6176) << 49
2420 #define EXP_MAX                         0x5ffe000000000000ull
2421   // EXP_MAX = (6111 + 6176) << 49
2422 #define EXP_MAX_P1                      0x6000000000000000ull
2423   // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2424 #define EXP_P1                          0x0002000000000000ull
2425   // EXP_ P1= 1 << 49
2426 #define expmin                            -6176
2427   // min unbiased exponent
2428 #define expmax                            6111
2429   // max unbiased exponent
2430 #define expmin16                          -398
2431   // min unbiased exponent
2432 #define expmax16                          369
2433   // max unbiased exponent
2434 
2435 #define SIGNMASK32 0x80000000
2436 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2437 #define SIGNMASK64    0x8000000000000000ull
2438 
2439 // typedef unsigned int FPSC; // floating-point status and control
2440 	// bit31:
2441 	// bit30:
2442 	// bit29:
2443 	// bit28:
2444 	// bit27:
2445 	// bit26:
2446 	// bit25:
2447 	// bit24:
2448 	// bit23:
2449 	// bit22:
2450 	// bit21:
2451 	// bit20:
2452 	// bit19:
2453 	// bit18:
2454 	// bit17:
2455 	// bit16:
2456 	// bit15:
2457 	// bit14: RC:2
2458 	// bit13: RC:1
2459 	// bit12: RC:0
2460 	// bit11: PM
2461 	// bit10: UM
2462 	// bit9:  OM
2463 	// bit8:  ZM
2464 	// bit7:  DM
2465 	// bit6:  IM
2466 	// bit5:  PE
2467 	// bit4:  UE
2468 	// bit3:  OE
2469 	// bit2:  ZE
2470 	// bit1:  DE
2471 	// bit0:  IE
2472 
2473 #define ROUNDING_MODE_MASK	0x00007000
2474 
2475      typedef struct _DEC_DIGITS {
2476        unsigned int digits;
2477        UINT64 threshold_hi;
2478        UINT64 threshold_lo;
2479        unsigned int digits1;
2480      } DEC_DIGITS;
2481 
2482      extern DEC_DIGITS nr_digits[];
2483      extern UINT64 midpoint64[];
2484      extern UINT128 midpoint128[];
2485      extern UINT192 midpoint192[];
2486      extern UINT256 midpoint256[];
2487      extern UINT64 ten2k64[];
2488      extern UINT128 ten2k128[];
2489      extern UINT256 ten2k256[];
2490      extern UINT128 ten2mk128[];
2491      extern UINT64 ten2mk64[];
2492      extern UINT128 ten2mk128trunc[];
2493      extern int shiftright128[];
2494      extern UINT64 maskhigh128[];
2495      extern UINT64 maskhigh128M[];
2496      extern UINT64 maskhigh192M[];
2497      extern UINT64 maskhigh256M[];
2498      extern UINT64 onehalf128[];
2499      extern UINT64 onehalf128M[];
2500      extern UINT64 onehalf192M[];
2501      extern UINT64 onehalf256M[];
2502      extern UINT128 ten2mk128M[];
2503      extern UINT128 ten2mk128truncM[];
2504      extern UINT192 ten2mk192truncM[];
2505      extern UINT256 ten2mk256truncM[];
2506      extern int shiftright128M[];
2507      extern int shiftright192M[];
2508      extern int shiftright256M[];
2509      extern UINT192 ten2mk192M[];
2510      extern UINT256 ten2mk256M[];
2511      extern unsigned char char_table2[];
2512      extern unsigned char char_table3[];
2513 
2514      extern UINT64 ten2m3k64[];
2515      extern unsigned int shift_ten2m3k64[];
2516      extern UINT128 ten2m3k128[];
2517      extern unsigned int shift_ten2m3k128[];
2518 
2519 
2520 
2521 /***************************************************************************
2522  *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2523  ***************************************************************************/
2524 
2525      extern UINT64 Kx64[];
2526      extern unsigned int Ex64m64[];
2527      extern UINT64 half64[];
2528      extern UINT64 mask64[];
2529      extern UINT64 ten2mxtrunc64[];
2530 
2531      extern UINT128 Kx128[];
2532      extern unsigned int Ex128m128[];
2533      extern UINT64 half128[];
2534      extern UINT64 mask128[];
2535      extern UINT128 ten2mxtrunc128[];
2536 
2537      extern UINT192 Kx192[];
2538      extern unsigned int Ex192m192[];
2539      extern UINT64 half192[];
2540      extern UINT64 mask192[];
2541      extern UINT192 ten2mxtrunc192[];
2542 
2543      extern UINT256 Kx256[];
2544      extern unsigned int Ex256m256[];
2545      extern UINT64 half256[];
2546      extern UINT64 mask256[];
2547      extern UINT256 ten2mxtrunc256[];
2548 
2549      typedef union __bid64_128 {
2550        UINT64 b64;
2551        UINT128 b128;
2552      } BID64_128;
2553 
2554      BID64_128 bid_fma (unsigned int P0,
2555 			BID64_128 x1, unsigned int P1,
2556 			BID64_128 y1, unsigned int P2,
2557 			BID64_128 z1, unsigned int P3,
2558 			unsigned int rnd_mode, FPSC * fpsc);
2559 
2560 #define         P16     16
2561 #define         P34     34
2562 
2563      union __int_double {
2564        UINT64 i;
2565        double d;
2566      };
2567      typedef union __int_double int_double;
2568 
2569 
2570      union __int_float {
2571        UINT32 i;
2572        float d;
2573      };
2574      typedef union __int_float int_float;
2575 
2576 #define SWAP(A,B,T) {\
2577         T = A; \
2578         A = B; \
2579         B = T; \
2580 }
2581 
2582 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2583 // ie it knows that it is A bits long
2584 #define NUMBITS(A, coefficient_x, tempx){\
2585       temp_x.d=(float)coefficient_x;\
2586       A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2587 }
2588 
2589      enum class_types {
2590        signalingNaN,
2591        quietNaN,
2592        negativeInfinity,
2593        negativeNormal,
2594        negativeSubnormal,
2595        negativeZero,
2596        positiveZero,
2597        positiveSubnormal,
2598        positiveNormal,
2599        positiveInfinity
2600      };
2601 
2602      typedef union {
2603        UINT64 ui64;
2604        double d;
2605      } BID_UI64DOUBLE;
2606 
2607 #endif
2608