1 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.
2 
3 This file is part of GCC.
4 
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9 
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 for more details.
14 
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18 
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22 <http://www.gnu.org/licenses/>.  */
23 
24 #ifndef __BIDECIMAL_H
25 #define __BIDECIMAL_H
26 
27 #include "bid_conf.h"
28 #include "bid_functions.h"
29 
30 #define __BID_INLINE__ static __inline
31 
32 /*********************************************************************
33  *
34  *      Logical Shift Macros
35  *
36  *********************************************************************/
37 
38 #define __shr_128(Q, A, k)                        \
39 {                                                 \
40      (Q).w[0] = (A).w[0] >> k;                      \
41 	 (Q).w[0] |= (A).w[1] << (64-k);               \
42 	 (Q).w[1] = (A).w[1] >> k;                    \
43 }
44 
45 #define __shr_128_long(Q, A, k)                   \
46 {                                                 \
47 	if((k)<64) {                                  \
48      (Q).w[0] = (A).w[0] >> k;                    \
49 	 (Q).w[0] |= (A).w[1] << (64-k);              \
50 	 (Q).w[1] = (A).w[1] >> k;                    \
51 	}                                             \
52 	else {                                        \
53 	 (Q).w[0] = (A).w[1]>>((k)-64);               \
54 	 (Q).w[1] = 0;                                \
55 	}                                             \
56 }
57 
58 #define __shl_128_long(Q, A, k)                   \
59 {                                                 \
60 	if((k)<64) {                                  \
61      (Q).w[1] = (A).w[1] << k;                    \
62 	 (Q).w[1] |= (A).w[0] >> (64-k);              \
63 	 (Q).w[0] = (A).w[0] << k;                    \
64 	}                                             \
65 	else {                                        \
66 	 (Q).w[1] = (A).w[0]<<((k)-64);               \
67 	 (Q).w[0] = 0;                                \
68 	}                                             \
69 }
70 
71 #define __low_64(Q)  (Q).w[0]
72 /*********************************************************************
73  *
74  *      String Macros
75  *
76  *********************************************************************/
77 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
78 /*********************************************************************
79  *
80  *      Compare Macros
81  *
82  *********************************************************************/
83 // greater than
84 //  return 0 if A<=B
85 //  non-zero if A>B
86 #define __unsigned_compare_gt_128(A, B)  \
87     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
88 // greater-or-equal
89 #define __unsigned_compare_ge_128(A, B)  \
90     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
91 #define __test_equal_128(A, B)  (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
92 // tighten exponent range
93 #define __tight_bin_range_128(bp, P, bin_expon)  \
94 {                                                \
95 UINT64 M;                                        \
96 	M = 1;                                       \
97 	(bp) = (bin_expon);                          \
98 	if((bp)<63) {                                \
99 	  M <<= ((bp)+1);                            \
100 	  if((P).w[0] >= M) (bp)++; }                 \
101 	else if((bp)>64) {                           \
102 	  M <<= ((bp)+1-64);                         \
103 	  if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
104 	      (bp)++; }                              \
105 	else if((P).w[1]) (bp)++;                    \
106 }
107 /*********************************************************************
108  *
109  *      Add/Subtract Macros
110  *
111  *********************************************************************/
112 // add 64-bit value to 128-bit
113 #define __add_128_64(R128, A128, B64)    \
114 {                                        \
115 UINT64 R64H;                             \
116 	R64H = (A128).w[1];                 \
117 	(R128).w[0] = (B64) + (A128).w[0];     \
118 	if((R128).w[0] < (B64))               \
119 	  R64H ++;                           \
120     (R128).w[1] = R64H;                  \
121 }
122 // subtract 64-bit value from 128-bit
123 #define __sub_128_64(R128, A128, B64)    \
124 {                                        \
125 UINT64 R64H;                             \
126 	R64H = (A128).w[1];                  \
127 	if((A128).w[0] < (B64))               \
128 	  R64H --;                           \
129     (R128).w[1] = R64H;                  \
130 	(R128).w[0] = (A128).w[0] - (B64);     \
131 }
132 // add 128-bit value to 128-bit
133 // assume no carry-out
134 #define __add_128_128(R128, A128, B128)  \
135 {                                        \
136 UINT128 Q128;                            \
137 	Q128.w[1] = (A128).w[1]+(B128).w[1]; \
138 	Q128.w[0] = (B128).w[0] + (A128).w[0];  \
139 	if(Q128.w[0] < (B128).w[0])            \
140 	  Q128.w[1] ++;                      \
141     (R128).w[1] = Q128.w[1];             \
142     (R128).w[0] = Q128.w[0];               \
143 }
144 #define __sub_128_128(R128, A128, B128)  \
145 {                                        \
146 UINT128 Q128;                            \
147 	Q128.w[1] = (A128).w[1]-(B128).w[1]; \
148 	Q128.w[0] = (A128).w[0] - (B128).w[0];  \
149 	if((A128).w[0] < (B128).w[0])          \
150 	  Q128.w[1] --;                      \
151     (R128).w[1] = Q128.w[1];             \
152     (R128).w[0] = Q128.w[0];               \
153 }
154 #define __add_carry_out(S, CY, X, Y)    \
155 {                                      \
156 UINT64 X1=X;                           \
157 	S = X + Y;                         \
158 	CY = (S<X1) ? 1 : 0;                \
159 }
160 #define __add_carry_in_out(S, CY, X, Y, CI)    \
161 {                                             \
162 UINT64 X1;                                    \
163 	X1 = X + CI;                              \
164 	S = X1 + Y;                               \
165 	CY = ((S<X1) || (X1<CI)) ? 1 : 0;          \
166 }
167 #define __sub_borrow_out(S, CY, X, Y)    \
168 {                                      \
169 UINT64 X1=X;                           \
170 	S = X - Y;                         \
171 	CY = (S>X1) ? 1 : 0;                \
172 }
173 #define __sub_borrow_in_out(S, CY, X, Y, CI)    \
174 {                                             \
175 UINT64 X1, X0=X;                              \
176 	X1 = X - CI;                              \
177 	S = X1 - Y;                               \
178 	CY = ((S>X1) || (X1>X0)) ? 1 : 0;          \
179 }
180 // increment C128 and check for rounding overflow:
181 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
182 #define INCREMENT(C_128, exp)                                           \
183 {                                                                       \
184   C_128.w[0]++;                                                         \
185   if (C_128.w[0] == 0) C_128.w[1]++;                                    \
186   if (C_128.w[1] == 0x0001ed09bead87c0ull &&                            \
187       C_128.w[0] == 0x378d8e6400000000ull) {                            \
188     exp++;                                                              \
189     C_128.w[1] = 0x0000314dc6448d93ull;                                 \
190     C_128.w[0] = 0x38c15b0a00000000ull;                                 \
191   }                                                                     \
192 }
193 // decrement C128 and check for rounding underflow, but only at the
194 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
195 // and decrement the exponent
196 #define DECREMENT(C_128, exp)                                           \
197 {                                                                       \
198   C_128.w[0]--;                                                         \
199   if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--;                \
200   if (C_128.w[1] == 0x0000314dc6448d93ull &&                            \
201       C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) {                 \
202     exp--;                                                              \
203     C_128.w[1] = 0x0001ed09bead87c0ull;                                 \
204     C_128.w[0] = 0x378d8e63ffffffffull;                                 \
205   }                                                                     \
206 }
207 
208  /*********************************************************************
209  *
210  *      Multiply Macros
211  *
212  *********************************************************************/
213 #define __mul_64x64_to_64(P64, CX, CY)  (P64) = (CX) * (CY)
214 /***************************************
215  *  Signed, Full 64x64-bit Multiply
216  ***************************************/
217 #define __imul_64x64_to_128(P, CX, CY)  \
218 {                                       \
219 UINT64 SX, SY;                          \
220    __mul_64x64_to_128(P, CX, CY);       \
221                                         \
222    SX = ((SINT64)(CX))>>63;             \
223    SY = ((SINT64)(CY))>>63;             \
224    SX &= CY;   SY &= CX;                \
225                                         \
226    (P).w[1] = (P).w[1] - SX - SY;       \
227 }
228 /***************************************
229  *  Signed, Full 64x128-bit Multiply
230  ***************************************/
231 #define __imul_64x128_full(Ph, Ql, A, B)          \
232 {                                                 \
233 UINT128 ALBL, ALBH, QM2, QM;                      \
234                                                   \
235 	__imul_64x64_to_128(ALBH, (A), (B).w[1]);     \
236 	__imul_64x64_to_128(ALBL, (A), (B).w[0]);      \
237                                                   \
238 	(Ql).w[0] = ALBL.w[0];                          \
239 	QM.w[0] = ALBL.w[1];                           \
240 	QM.w[1] = ((SINT64)ALBL.w[1])>>63;            \
241     __add_128_128(QM2, ALBH, QM);                 \
242 	(Ql).w[1] = QM2.w[0];                          \
243     Ph = QM2.w[1];                                \
244 }
245 /*****************************************************
246  *      Unsigned Multiply Macros
247  *****************************************************/
248 // get full 64x64bit product
249 //
250 #define __mul_64x64_to_128(P, CX, CY)   \
251 {                                       \
252 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
253 	CXH = (CX) >> 32;                     \
254 	CXL = (UINT32)(CX);                   \
255 	CYH = (CY) >> 32;                     \
256 	CYL = (UINT32)(CY);                   \
257 	                                      \
258     PM = CXH*CYL;                         \
259 	PH = CXH*CYH;                         \
260 	PL = CXL*CYL;                         \
261 	PM2 = CXL*CYH;                        \
262 	PH += (PM>>32);                       \
263 	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
264                                           \
265 	(P).w[1] = PH + (PM>>32);             \
266 	(P).w[0] = (PM<<32)+(UINT32)PL;       \
267 }
268 // get full 64x64bit product
269 // Note:
270 // This macro is used for CX < 2^61, CY < 2^61
271 //
272 #define __mul_64x64_to_128_fast(P, CX, CY)   \
273 {                                       \
274 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
275 	CXH = (CX) >> 32;                   \
276 	CXL = (UINT32)(CX);                 \
277 	CYH = (CY) >> 32;                   \
278 	CYL = (UINT32)(CY);                 \
279 	                                    \
280     PM = CXH*CYL;                       \
281 	PL = CXL*CYL;                       \
282 	PH = CXH*CYH;                       \
283 	PM += CXL*CYH;                      \
284 	PM += (PL>>32);                     \
285                                         \
286 	(P).w[1] = PH + (PM>>32);           \
287 	(P).w[0] = (PM<<32)+(UINT32)PL;      \
288 }
289 // used for CX< 2^60
290 #define __sqr64_fast(P, CX)   \
291 {                                       \
292 UINT64 CXH, CXL, PL, PH, PM;            \
293 	CXH = (CX) >> 32;                   \
294 	CXL = (UINT32)(CX);                 \
295 	                                    \
296     PM = CXH*CXL;                       \
297 	PL = CXL*CXL;                       \
298 	PH = CXH*CXH;                       \
299 	PM += PM;                           \
300 	PM += (PL>>32);                     \
301                                         \
302 	(P).w[1] = PH + (PM>>32);           \
303 	(P).w[0] = (PM<<32)+(UINT32)PL;     \
304 }
305 // get full 64x64bit product
306 // Note:
307 // This implementation is used for CX < 2^61, CY < 2^61
308 //
309 #define __mul_64x64_to_64_high_fast(P, CX, CY)   \
310 {                                       \
311 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
312 	CXH = (CX) >> 32;                   \
313 	CXL = (UINT32)(CX);                 \
314 	CYH = (CY) >> 32;                   \
315 	CYL = (UINT32)(CY);                 \
316 	                                    \
317     PM = CXH*CYL;                       \
318 	PL = CXL*CYL;                       \
319 	PH = CXH*CYH;                       \
320 	PM += CXL*CYH;                      \
321 	PM += (PL>>32);                     \
322                                         \
323 	(P) = PH + (PM>>32);                \
324 }
325 // get full 64x64bit product
326 //
327 #define __mul_64x64_to_128_full(P, CX, CY)     \
328 {                                         \
329 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
330 	CXH = (CX) >> 32;                     \
331 	CXL = (UINT32)(CX);                   \
332 	CYH = (CY) >> 32;                     \
333 	CYL = (UINT32)(CY);                   \
334 	                                      \
335     PM = CXH*CYL;                         \
336 	PH = CXH*CYH;                         \
337 	PL = CXL*CYL;                         \
338 	PM2 = CXL*CYH;                        \
339 	PH += (PM>>32);                       \
340 	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
341                                           \
342 	(P).w[1] = PH + (PM>>32);             \
343 	(P).w[0] = (PM<<32)+(UINT32)PL;        \
344 }
345 #define __mul_128x128_high(Q, A, B)               \
346 {                                                 \
347 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
348                                                   \
349 	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
350 	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
351 	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
352 	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
353                                                   \
354     __add_128_128(QM, ALBH, AHBL);                \
355     __add_128_64(QM2, QM, ALBL.w[1]);             \
356     __add_128_64((Q), AHBH, QM2.w[1]);            \
357 }
358 #define __mul_128x128_full(Qh, Ql, A, B)          \
359 {                                                 \
360 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
361                                                   \
362 	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
363 	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
364 	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
365 	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
366                                                   \
367     __add_128_128(QM, ALBH, AHBL);                \
368 	(Ql).w[0] = ALBL.w[0];                          \
369     __add_128_64(QM2, QM, ALBL.w[1]);             \
370     __add_128_64((Qh), AHBH, QM2.w[1]);           \
371 	(Ql).w[1] = QM2.w[0];                          \
372 }
373 #define __mul_128x128_low(Ql, A, B)               \
374 {                                                 \
375 UINT128 ALBL;                                     \
376 UINT64 QM64;                                      \
377                                                   \
378 	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
379 	QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1];   \
380                                                   \
381 	(Ql).w[0] = ALBL.w[0];                          \
382 	(Ql).w[1] = QM64 + ALBL.w[1];                 \
383 }
384 #define __mul_64x128_low(Ql, A, B)                \
385 {                                                 \
386   UINT128 ALBL, ALBH, QM2;                        \
387   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
388   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
389   (Ql).w[0] = ALBL.w[0];                          \
390   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
391   (Ql).w[1] = QM2.w[0];                           \
392 }
393 #define __mul_64x128_full(Ph, Ql, A, B)           \
394 {                                                 \
395 UINT128 ALBL, ALBH, QM2;                          \
396                                                   \
397 	__mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
398 	__mul_64x64_to_128(ALBL, (A), (B).w[0]);       \
399                                                   \
400 	(Ql).w[0] = ALBL.w[0];                          \
401     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
402 	(Ql).w[1] = QM2.w[0];                          \
403     Ph = QM2.w[1];                                \
404 }
405 #define __mul_64x128_to_192(Q, A, B)              \
406 {                                                 \
407 UINT128 ALBL, ALBH, QM2;                          \
408                                                   \
409 	__mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
410 	__mul_64x64_to_128(ALBL, (A), (B).w[0]);      \
411                                                   \
412 	(Q).w[0] = ALBL.w[0];                         \
413     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
414 	(Q).w[1] = QM2.w[0];                          \
415     (Q).w[2] = QM2.w[1];                          \
416 }
417 #define __mul_64x128_to192(Q, A, B)          \
418 {                                             \
419 UINT128 ALBL, ALBH, QM2;                      \
420                                               \
421     __mul_64x64_to_128(ALBH, (A), (B).w[1]);  \
422     __mul_64x64_to_128(ALBL, (A), (B).w[0]);  \
423                                               \
424     (Q).w[0] = ALBL.w[0];                    \
425     __add_128_64(QM2, ALBH, ALBL.w[1]);       \
426     (Q).w[1] = QM2.w[0];                     \
427     (Q).w[2] = QM2.w[1];                     \
428 }
429 #define __mul_128x128_to_256(P256, A, B)                         \
430 {                                                                \
431 UINT128 Qll, Qlh;                                                \
432 UINT64 Phl, Phh, CY1, CY2;                                         \
433                                                                  \
434    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
435    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
436   (P256).w[0] = Qll.w[0];                                        \
437 	   __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
438 	   __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);    \
439 	   (P256).w[3] = Phh + CY2;                                   \
440 }
441 //
442 // For better performance, will check A.w[1] against 0,
443 //                         but not B.w[1]
444 // Use this macro accordingly
445 #define __mul_128x128_to_256_check_A(P256, A, B)                   \
446 {                                                                  \
447 UINT128 Qll, Qlh;                                                  \
448 UINT64 Phl, Phh, CY1, CY2;                                           \
449                                                                    \
450    __mul_64x128_full(Phl, Qll, A.w[0], B);                          \
451   (P256).w[0] = Qll.w[0];                                        \
452    if(A.w[1])  {                                                   \
453 	   __mul_64x128_full(Phh, Qlh, A.w[1], B);                     \
454 	   __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
455 	   __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);   \
456 	   (P256).w[3] = Phh + CY2;   }                              \
457    else  {                                                         \
458 	   (P256).w[1] = Qll.w[1];                                  \
459 	   (P256).w[2] = Phl;                                       \
460 	   (P256).w[3] = 0;  }                                      \
461 }
462 #define __mul_64x192_to_256(lP, lA, lB)                      \
463 {                                                         \
464 UINT128 lP0,lP1,lP2;                                      \
465 UINT64 lC;                                                 \
466 	__mul_64x64_to_128(lP0, lA, (lB).w[0]);              \
467 	__mul_64x64_to_128(lP1, lA, (lB).w[1]);              \
468 	__mul_64x64_to_128(lP2, lA, (lB).w[2]);              \
469 	(lP).w[0] = lP0.w[0];                                \
470 	__add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]);      \
471 	__add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
472 	(lP).w[3] = lP2.w[1] + lC;                           \
473 }
474 #define __mul_64x256_to_320(P, A, B)                    \
475 {                                                       \
476 UINT128 lP0,lP1,lP2,lP3;                                \
477 UINT64 lC;                                               \
478 	__mul_64x64_to_128(lP0, A, (B).w[0]);             \
479 	__mul_64x64_to_128(lP1, A, (B).w[1]);             \
480 	__mul_64x64_to_128(lP2, A, (B).w[2]);             \
481 	__mul_64x64_to_128(lP3, A, (B).w[3]);             \
482 	(P).w[0] = lP0.w[0];                               \
483 	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
484 	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
485 	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
486 	(P).w[4] = lP3.w[1] + lC;                          \
487 }
488 #define __mul_192x192_to_384(P, A, B)                          \
489 {                                                              \
490 UINT256 P0,P1,P2;                                              \
491 UINT64 CY;                                                      \
492 	__mul_64x192_to_256(P0, (A).w[0], B);                   \
493 	__mul_64x192_to_256(P1, (A).w[1], B);                   \
494 	__mul_64x192_to_256(P2, (A).w[2], B);                   \
495 	(P).w[0] = P0.w[0];                                  \
496 	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
497 	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
498 	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
499 	(P).w[4] = P1.w[3] + CY;                              \
500 	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
501 	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
502 	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
503 	(P).w[5] = P2.w[3] + CY;                              \
504 }
505 #define __mul_64x320_to_384(P, A, B)                    \
506 {                                                       \
507 UINT128 lP0,lP1,lP2,lP3,lP4;                            \
508 UINT64 lC;                                               \
509 	__mul_64x64_to_128(lP0, A, (B).w[0]);             \
510 	__mul_64x64_to_128(lP1, A, (B).w[1]);             \
511 	__mul_64x64_to_128(lP2, A, (B).w[2]);             \
512 	__mul_64x64_to_128(lP3, A, (B).w[3]);             \
513 	__mul_64x64_to_128(lP4, A, (B).w[4]);             \
514 	(P).w[0] = lP0.w[0];                               \
515 	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
516 	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
517 	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
518 	__add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
519 	(P).w[5] = lP4.w[1] + lC;                          \
520 }
521 // A*A
522 // Full 128x128-bit product
523 #define __sqr128_to_256(P256, A)                                 \
524 {                                                                \
525 UINT128 Qll, Qlh, Qhh;                                           \
526 UINT64 TMP_C1, TMP_C2;                                 \
527                                                                  \
528    __mul_64x64_to_128(Qhh, A.w[1], A.w[1]);                      \
529    __mul_64x64_to_128(Qlh, A.w[0], A.w[1]);                      \
530    Qhh.w[1] += (Qlh.w[1]>>63);                                   \
531    Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63);                \
532    Qlh.w[0] += Qlh.w[0];                                         \
533    __mul_64x64_to_128(Qll, A.w[0], A.w[0]);                      \
534                                                                  \
535    __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]);      \
536    (P256).w[0] = Qll.w[0];                                       \
537    __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1);    \
538    (P256).w[3] = Qhh.w[1]+TMP_C2;                                         \
539 }
540 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B)            \
541 {                                                                \
542 UINT128 Qll, Qlh;                                                \
543 UINT64 Phl, Phh, C1, C2;                                         \
544                                                                  \
545    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
546    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
547   (PQl).w[0] = Qll.w[0];                                        \
548 	   __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]);      \
549 	   __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1);    \
550 	   (PQh).w[1] = Phh + C2;                                   \
551 }
552 #define __mul_256x256_to_512(P, A, B)                          \
553 {                                                              \
554 UINT512 P0,P1,P2,P3;                                           \
555 UINT64 CY;                                                      \
556 	__mul_64x256_to_320(P0, (A).w[0], B);                   \
557 	__mul_64x256_to_320(P1, (A).w[1], B);                   \
558 	__mul_64x256_to_320(P2, (A).w[2], B);                   \
559 	__mul_64x256_to_320(P3, (A).w[3], B);                   \
560 	(P).w[0] = P0.w[0];                                  \
561 	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
562 	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
563 	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
564 	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
565 	(P).w[5] = P1.w[4] + CY;                              \
566 	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
567 	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
568 	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
569 	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
570 	(P).w[6] = P2.w[4] + CY;                              \
571 	__add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]);     \
572 	__add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY);   \
573 	__add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY);   \
574 	__add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY);   \
575 	(P).w[7] = P3.w[4] + CY;                              \
576 }
577 #define __mul_192x256_to_448(P, A, B)                          \
578 {                                                              \
579 UINT512 P0,P1,P2;                                           \
580 UINT64 CY;                                                      \
581 	__mul_64x256_to_320(P0, (A).w[0], B);                   \
582 	__mul_64x256_to_320(P1, (A).w[1], B);                   \
583 	__mul_64x256_to_320(P2, (A).w[2], B);                   \
584 	(P).w[0] = P0.w[0];                                  \
585 	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
586 	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
587 	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
588 	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
589 	(P).w[5] = P1.w[4] + CY;                              \
590 	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
591 	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
592 	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
593 	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
594 	(P).w[6] = P2.w[4] + CY;                              \
595 }
596 #define __mul_320x320_to_640(P, A, B)                          \
597 {                                                              \
598 UINT512 P0,P1,P2,P3;                                           \
599 UINT64 CY;                                                     \
600 	__mul_256x256_to_512((P), (A), B);                   \
601 	__mul_64x256_to_320(P1, (A).w[4], B);                   \
602 	__mul_64x256_to_320(P2, (B).w[4], A);                   \
603 	__mul_64x64_to_128(P3, (A).w[4], (B).w[4]);               \
604 	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
605 	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
606 	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
607 	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
608 	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
609 	P3.w[1] += CY;                                       \
610 	__add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]);      \
611 	__add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
612 	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
613 	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
614 	__add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
615 	(P).w[9] = P3.w[1] + CY;                             \
616 }
617 #define __mul_384x384_to_768(P, A, B)                          \
618 {                                                              \
619 UINT512 P0,P1,P2,P3;                                           \
620 UINT64 CY;                                                     \
621 	__mul_320x320_to_640((P), (A), B);                         \
622 	__mul_64x320_to_384(P1, (A).w[5], B);                   \
623 	__mul_64x320_to_384(P2, (B).w[5], A);                   \
624 	__mul_64x64_to_128(P3, (A).w[5], (B).w[5]);               \
625 	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
626 	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
627 	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
628 	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
629 	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
630 	__add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
631 	P3.w[1] += CY;                                       \
632 	__add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]);      \
633 	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
634 	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
635 	__add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
636 	__add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
637 	__add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
638 	(P).w[11] = P3.w[1] + CY;                             \
639 }
640 #define __mul_64x128_short(Ql, A, B)              \
641 {                                                 \
642 UINT64 ALBH_L;                                    \
643                                                   \
644 	__mul_64x64_to_64(ALBH_L, (A),(B).w[1]);      \
645 	__mul_64x64_to_128((Ql), (A), (B).w[0]);       \
646                                                   \
647 	(Ql).w[1] += ALBH_L;                          \
648 }
649 #define __scale128_10(D,_TMP)                            \
650 {                                                        \
651 UINT128 _TMP2,_TMP8;                                     \
652 	  _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63);       \
653 	  _TMP2.w[0] = _TMP.w[0]<<1;                         \
654 	  _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61);       \
655 	  _TMP8.w[0] = _TMP.w[0]<<3;                         \
656 	  __add_128_128(D, _TMP2, _TMP8);                    \
657 }
658 // 64x64-bit product
659 #define __mul_64x64_to_128MACH(P128, CX64, CY64)  \
660 {                                                  \
661   UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
662   CXH = (CX64) >> 32;                              \
663   CXL = (UINT32)(CX64);                            \
664   CYH = (CY64) >> 32;                              \
665   CYL = (UINT32)(CY64);                            \
666   PM = CXH*CYL;                                    \
667   PH = CXH*CYH;                                    \
668   PL = CXL*CYL;                                    \
669   PM2 = CXL*CYH;                                   \
670   PH += (PM>>32);                                  \
671   PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
672   (P128).w[1] = PH + (PM>>32);                     \
673   (P128).w[0] = (PM<<32)+(UINT32)PL;                \
674 }
675 // 64x64-bit product
676 #define __mul_64x64_to_128HIGH(P64, CX64, CY64)  \
677 {                                                  \
678   UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
679   CXH = (CX64) >> 32;                              \
680   CXL = (UINT32)(CX64);                            \
681   CYH = (CY64) >> 32;                              \
682   CYL = (UINT32)(CY64);                            \
683   PM = CXH*CYL;                                    \
684   PH = CXH*CYH;                                    \
685   PL = CXL*CYL;                                    \
686   PM2 = CXL*CYH;                                   \
687   PH += (PM>>32);                                  \
688   PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
689   P64 = PH + (PM>>32);                     \
690 }
691 #define __mul_128x64_to_128(Q128, A64, B128)        \
692 {                                                  \
693   UINT64 ALBH_L;                                   \
694   ALBH_L = (A64) * (B128).w[1];                    \
695   __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]);   \
696   (Q128).w[1] += ALBH_L;                           \
697 }
698 // might simplify by calculating just QM2.w[0]
699 #define __mul_64x128_to_128(Ql, A, B)           \
700 {                                                 \
701   UINT128 ALBL, ALBH, QM2;                        \
702   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
703   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
704   (Ql).w[0] = ALBL.w[0];                          \
705   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
706   (Ql).w[1] = QM2.w[0];                           \
707 }
708 /*********************************************************************
709  *
710  *      BID Pack/Unpack Macros
711  *
712  *********************************************************************/
713 /////////////////////////////////////////
714 // BID64 definitions
715 ////////////////////////////////////////
716 #define DECIMAL_MAX_EXPON_64  767
717 #define DECIMAL_EXPONENT_BIAS 398
718 #define MAX_FORMAT_DIGITS     16
719 /////////////////////////////////////////
720 // BID128 definitions
721 ////////////////////////////////////////
722 #define DECIMAL_MAX_EXPON_128  12287
723 #define DECIMAL_EXPONENT_BIAS_128  6176
724 #define MAX_FORMAT_DIGITS_128      34
725 /////////////////////////////////////////
726 // BID32 definitions
727 ////////////////////////////////////////
728 #define DECIMAL_MAX_EXPON_32  191
729 #define DECIMAL_EXPONENT_BIAS_32  101
730 #define MAX_FORMAT_DIGITS_32      7
731 ////////////////////////////////////////
732 // Constant Definitions
733 ///////////////////////////////////////
734 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
735 #define INFINITY_MASK64         0x7800000000000000ull
736 #define SINFINITY_MASK64        0xf800000000000000ull
737 #define SSNAN_MASK64            0xfc00000000000000ull
738 #define NAN_MASK64              0x7c00000000000000ull
739 #define SNAN_MASK64             0x7e00000000000000ull
740 #define QUIET_MASK64            0xfdffffffffffffffull
741 #define LARGE_COEFF_MASK64      0x0007ffffffffffffull
742 #define LARGE_COEFF_HIGH_BIT64  0x0020000000000000ull
743 #define SMALL_COEFF_MASK64      0x001fffffffffffffull
744 #define EXPONENT_MASK64         0x3ff
745 #define EXPONENT_SHIFT_LARGE64  51
746 #define EXPONENT_SHIFT_SMALL64  53
747 #define LARGEST_BID64           0x77fb86f26fc0ffffull
748 #define SMALLEST_BID64          0xf7fb86f26fc0ffffull
749 #define SMALL_COEFF_MASK128     0x0001ffffffffffffull
750 #define LARGE_COEFF_MASK128     0x00007fffffffffffull
751 #define EXPONENT_MASK128        0x3fff
752 #define LARGEST_BID128_HIGH     0x5fffed09bead87c0ull
753 #define LARGEST_BID128_LOW      0x378d8e63ffffffffull
754 #define SPECIAL_ENCODING_MASK32 0x60000000ul
755 #define INFINITY_MASK32         0x78000000ul
756 #define LARGE_COEFF_MASK32      0x007ffffful
757 #define LARGE_COEFF_HIGH_BIT32  0x00800000ul
758 #define SMALL_COEFF_MASK32      0x001ffffful
759 #define EXPONENT_MASK32         0xff
760 #define LARGEST_BID32           0x77f8967f
761 #define NAN_MASK32              0x7c000000
762 #define SNAN_MASK32             0x7e000000
763 #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
764 #define BINARY_EXPONENT_BIAS  0x3ff
765 #define UPPER_EXPON_LIMIT     51
766 // data needed for BID pack/unpack macros
767 extern UINT64 round_const_table[][19];
768 extern UINT128 reciprocals10_128[];
769 extern int recip_scale[];
770 extern UINT128 power10_table_128[];
771 extern int estimate_decimal_digits[];
772 extern int estimate_bin_expon[];
773 extern UINT64 power10_index_binexp[];
774 extern int short_recip_scale[];
775 extern UINT64 reciprocals10_64[];
776 extern UINT128 power10_index_binexp_128[];
777 extern UINT128 round_const_table_128[][36];
778 
779 
780 //////////////////////////////////////////////
781 //  Status Flag Handling
782 /////////////////////////////////////////////
783 #define __set_status_flags(fpsc, status)  *(fpsc) |= status
784 #define is_inexact(fpsc)  ((*(fpsc))&INEXACT_EXCEPTION)
785 
786 __BID_INLINE__ UINT64
unpack_BID64(UINT64 * psign_x,int * pexponent_x,UINT64 * pcoefficient_x,UINT64 x)787 unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
788 	      UINT64 * pcoefficient_x, UINT64 x) {
789   UINT64 tmp, coeff;
790 
791   *psign_x = x & 0x8000000000000000ull;
792 
793   if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
794     // special encodings
795     // coefficient
796     coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
797 
798     if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
799       *pexponent_x = 0;
800       *pcoefficient_x = x & 0xfe03ffffffffffffull;
801       if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
802 	*pcoefficient_x = x & 0xfe00000000000000ull;
803       if ((x & NAN_MASK64) == INFINITY_MASK64)
804 	*pcoefficient_x = x & SINFINITY_MASK64;
805       return 0;	// NaN or Infinity
806     }
807     // check for non-canonical values
808     if (coeff >= 10000000000000000ull)
809       coeff = 0;
810     *pcoefficient_x = coeff;
811     // get exponent
812     tmp = x >> EXPONENT_SHIFT_LARGE64;
813     *pexponent_x = (int) (tmp & EXPONENT_MASK64);
814     return coeff;
815   }
816   // exponent
817   tmp = x >> EXPONENT_SHIFT_SMALL64;
818   *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819   // coefficient
820   *pcoefficient_x = (x & SMALL_COEFF_MASK64);
821 
822   return *pcoefficient_x;
823 }
824 
825 //
826 //   BID64 pack macro (general form)
827 //
828 __BID_INLINE__ UINT64
get_BID64(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)829 get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
830 	   unsigned *fpsc) {
831   UINT128 Stemp, Q_low;
832   UINT64 QH, r, mask, C64, remainder_h, CY, carry;
833   int extra_digits, amount, amount2;
834   unsigned status;
835 
836   if (coeff > 9999999999999999ull) {
837     expon++;
838     coeff = 1000000000000000ull;
839   }
840   // check for possible underflow/overflow
841   if (((unsigned) expon) >= 3 * 256) {
842     if (expon < 0) {
843       // underflow
844       if (expon + MAX_FORMAT_DIGITS < 0) {
845 #ifdef SET_STATUS_FLAGS
846 	__set_status_flags (fpsc,
847 			    UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
848 #endif
849 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850 #ifndef IEEE_ROUND_NEAREST
851 	if (rmode == ROUNDING_DOWN && sgn)
852 	  return 0x8000000000000001ull;
853 	if (rmode == ROUNDING_UP && !sgn)
854 	  return 1ull;
855 #endif
856 #endif
857 	// result is 0
858 	return sgn;
859       }
860 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861 #ifndef IEEE_ROUND_NEAREST
862       if (sgn && (unsigned) (rmode - 1) < 2)
863 	rmode = 3 - rmode;
864 #endif
865 #endif
866       // get digits to be shifted out
867       extra_digits = -expon;
868       coeff += round_const_table[rmode][extra_digits];
869 
870       // get coeff*(2^M[extra_digits])/10^extra_digits
871       __mul_64x128_full (QH, Q_low, coeff,
872 			 reciprocals10_128[extra_digits]);
873 
874       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
875       amount = recip_scale[extra_digits];
876 
877       C64 = QH >> amount;
878 
879 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880 #ifndef IEEE_ROUND_NEAREST
881       if (rmode == 0)	//ROUNDING_TO_NEAREST
882 #endif
883 	if (C64 & 1) {
884 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
885 
886 	  // get remainder
887 	  amount2 = 64 - amount;
888 	  remainder_h = 0;
889 	  remainder_h--;
890 	  remainder_h >>= amount2;
891 	  remainder_h = remainder_h & QH;
892 
893 	  if (!remainder_h
894 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
895 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
896 		      && Q_low.w[0] <
897 		      reciprocals10_128[extra_digits].w[0]))) {
898 	    C64--;
899 	  }
900 	}
901 #endif
902 
903 #ifdef SET_STATUS_FLAGS
904 
905       if (is_inexact (fpsc))
906 	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
907       else {
908 	status = INEXACT_EXCEPTION;
909 	// get remainder
910 	remainder_h = QH << (64 - amount);
911 
912 	switch (rmode) {
913 	case ROUNDING_TO_NEAREST:
914 	case ROUNDING_TIES_AWAY:
915 	  // test whether fractional part is 0
916 	  if (remainder_h == 0x8000000000000000ull
917 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
918 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
919 		      && Q_low.w[0] <
920 		      reciprocals10_128[extra_digits].w[0])))
921 	    status = EXACT_STATUS;
922 	  break;
923 	case ROUNDING_DOWN:
924 	case ROUNDING_TO_ZERO:
925 	  if (!remainder_h
926 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
927 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
928 		      && Q_low.w[0] <
929 		      reciprocals10_128[extra_digits].w[0])))
930 	    status = EXACT_STATUS;
931 	  break;
932 	default:
933 	  // round up
934 	  __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
935 			   reciprocals10_128[extra_digits].w[0]);
936 	  __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
937 			      reciprocals10_128[extra_digits].w[1], CY);
938 	  if ((remainder_h >> (64 - amount)) + carry >=
939 	      (((UINT64) 1) << amount))
940 	    status = EXACT_STATUS;
941 	}
942 
943 	if (status != EXACT_STATUS)
944 	  __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
945       }
946 
947 #endif
948 
949       return sgn | C64;
950     }
951     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
952       expon--;
953       coeff = (coeff << 3) + (coeff << 1);
954     }
955     if (expon > DECIMAL_MAX_EXPON_64) {
956 #ifdef SET_STATUS_FLAGS
957       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
958 #endif
959       // overflow
960       r = sgn | INFINITY_MASK64;
961       switch (rmode) {
962       case ROUNDING_DOWN:
963 	if (!sgn)
964 	  r = LARGEST_BID64;
965 	break;
966       case ROUNDING_TO_ZERO:
967 	r = sgn | LARGEST_BID64;
968 	break;
969       case ROUNDING_UP:
970 	// round up
971 	if (sgn)
972 	  r = SMALLEST_BID64;
973       }
974       return r;
975     }
976   }
977 
978   mask = 1;
979   mask <<= EXPONENT_SHIFT_SMALL64;
980 
981   // check whether coefficient fits in 10*5+3 bits
982   if (coeff < mask) {
983     r = expon;
984     r <<= EXPONENT_SHIFT_SMALL64;
985     r |= (coeff | sgn);
986     return r;
987   }
988   // special format
989 
990   // eliminate the case coeff==10^16 after rounding
991   if (coeff == 10000000000000000ull) {
992     r = expon + 1;
993     r <<= EXPONENT_SHIFT_SMALL64;
994     r |= (1000000000000000ull | sgn);
995     return r;
996   }
997 
998   r = expon;
999   r <<= EXPONENT_SHIFT_LARGE64;
1000   r |= (sgn | SPECIAL_ENCODING_MASK64);
1001   // add coeff, without leading bits
1002   mask = (mask >> 2) - 1;
1003   coeff &= mask;
1004   r |= coeff;
1005 
1006   return r;
1007 }
1008 
1009 
1010 
1011 
1012 //
1013 //   No overflow/underflow checking
1014 //
1015 __BID_INLINE__ UINT64
fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1016 fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017   UINT64 r, mask;
1018 
1019   mask = 1;
1020   mask <<= EXPONENT_SHIFT_SMALL64;
1021 
1022   // check whether coefficient fits in 10*5+3 bits
1023   if (coeff < mask) {
1024     r = expon;
1025     r <<= EXPONENT_SHIFT_SMALL64;
1026     r |= (coeff | sgn);
1027     return r;
1028   }
1029   // special format
1030 
1031   // eliminate the case coeff==10^16 after rounding
1032   if (coeff == 10000000000000000ull) {
1033     r = expon + 1;
1034     r <<= EXPONENT_SHIFT_SMALL64;
1035     r |= (1000000000000000ull | sgn);
1036     return r;
1037   }
1038 
1039   r = expon;
1040   r <<= EXPONENT_SHIFT_LARGE64;
1041   r |= (sgn | SPECIAL_ENCODING_MASK64);
1042   // add coeff, without leading bits
1043   mask = (mask >> 2) - 1;
1044   coeff &= mask;
1045   r |= coeff;
1046 
1047   return r;
1048 }
1049 
1050 
1051 //
1052 //   no underflow checking
1053 //
1054 __BID_INLINE__ UINT64
fast_get_BID64_check_OF(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1055 fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056 			 unsigned *fpsc) {
1057   UINT64 r, mask;
1058 
1059   if (((unsigned) expon) >= 3 * 256 - 1) {
1060     if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061       expon = 3 * 256;
1062       coeff = 1000000000000000ull;
1063     }
1064 
1065     if (((unsigned) expon) >= 3 * 256) {
1066       while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067 	expon--;
1068 	coeff = (coeff << 3) + (coeff << 1);
1069       }
1070       if (expon > DECIMAL_MAX_EXPON_64) {
1071 #ifdef SET_STATUS_FLAGS
1072 	__set_status_flags (fpsc,
1073 			    OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074 #endif
1075 	// overflow
1076 	r = sgn | INFINITY_MASK64;
1077 	switch (rmode) {
1078 	case ROUNDING_DOWN:
1079 	  if (!sgn)
1080 	    r = LARGEST_BID64;
1081 	  break;
1082 	case ROUNDING_TO_ZERO:
1083 	  r = sgn | LARGEST_BID64;
1084 	  break;
1085 	case ROUNDING_UP:
1086 	  // round up
1087 	  if (sgn)
1088 	    r = SMALLEST_BID64;
1089 	}
1090 	return r;
1091       }
1092     }
1093   }
1094 
1095   mask = 1;
1096   mask <<= EXPONENT_SHIFT_SMALL64;
1097 
1098   // check whether coefficient fits in 10*5+3 bits
1099   if (coeff < mask) {
1100     r = expon;
1101     r <<= EXPONENT_SHIFT_SMALL64;
1102     r |= (coeff | sgn);
1103     return r;
1104   }
1105   // special format
1106 
1107   // eliminate the case coeff==10^16 after rounding
1108   if (coeff == 10000000000000000ull) {
1109     r = expon + 1;
1110     r <<= EXPONENT_SHIFT_SMALL64;
1111     r |= (1000000000000000ull | sgn);
1112     return r;
1113   }
1114 
1115   r = expon;
1116   r <<= EXPONENT_SHIFT_LARGE64;
1117   r |= (sgn | SPECIAL_ENCODING_MASK64);
1118   // add coeff, without leading bits
1119   mask = (mask >> 2) - 1;
1120   coeff &= mask;
1121   r |= coeff;
1122 
1123   return r;
1124 }
1125 
1126 
1127 //
1128 //   No overflow/underflow checking
1129 //   or checking for coefficients equal to 10^16 (after rounding)
1130 //
1131 __BID_INLINE__ UINT64
very_fast_get_BID64(UINT64 sgn,int expon,UINT64 coeff)1132 very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133   UINT64 r, mask;
1134 
1135   mask = 1;
1136   mask <<= EXPONENT_SHIFT_SMALL64;
1137 
1138   // check whether coefficient fits in 10*5+3 bits
1139   if (coeff < mask) {
1140     r = expon;
1141     r <<= EXPONENT_SHIFT_SMALL64;
1142     r |= (coeff | sgn);
1143     return r;
1144   }
1145   // special format
1146   r = expon;
1147   r <<= EXPONENT_SHIFT_LARGE64;
1148   r |= (sgn | SPECIAL_ENCODING_MASK64);
1149   // add coeff, without leading bits
1150   mask = (mask >> 2) - 1;
1151   coeff &= mask;
1152   r |= coeff;
1153 
1154   return r;
1155 }
1156 
1157 //
1158 //   No overflow/underflow checking or checking for coefficients above 2^53
1159 //
1160 __BID_INLINE__ UINT64
very_fast_get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff)1161 very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162   // no UF/OF
1163   UINT64 r;
1164 
1165   r = expon;
1166   r <<= EXPONENT_SHIFT_SMALL64;
1167   r |= (coeff | sgn);
1168   return r;
1169 }
1170 
1171 
1172 //
1173 // This pack macro is used when underflow is known to occur
1174 //
1175 __BID_INLINE__ UINT64
get_BID64_UF(UINT64 sgn,int expon,UINT64 coeff,UINT64 R,int rmode,unsigned * fpsc)1176 get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177 	      unsigned *fpsc) {
1178   UINT128 C128, Q_low, Stemp;
1179   UINT64 C64, remainder_h, QH, carry, CY;
1180   int extra_digits, amount, amount2;
1181   unsigned status;
1182 
1183   // underflow
1184   if (expon + MAX_FORMAT_DIGITS < 0) {
1185 #ifdef SET_STATUS_FLAGS
1186     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187 #endif
1188 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189 #ifndef IEEE_ROUND_NEAREST
1190     if (rmode == ROUNDING_DOWN && sgn)
1191       return 0x8000000000000001ull;
1192     if (rmode == ROUNDING_UP && !sgn)
1193       return 1ull;
1194 #endif
1195 #endif
1196     // result is 0
1197     return sgn;
1198   }
1199   // 10*coeff
1200   coeff = (coeff << 3) + (coeff << 1);
1201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202 #ifndef IEEE_ROUND_NEAREST
1203   if (sgn && (unsigned) (rmode - 1) < 2)
1204     rmode = 3 - rmode;
1205 #endif
1206 #endif
1207   if (R)
1208     coeff |= 1;
1209   // get digits to be shifted out
1210   extra_digits = 1 - expon;
1211   C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1212 
1213   // get coeff*(2^M[extra_digits])/10^extra_digits
1214   __mul_64x128_full (QH, Q_low, C128.w[0],
1215 		     reciprocals10_128[extra_digits]);
1216 
1217   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1218   amount = recip_scale[extra_digits];
1219 
1220   C64 = QH >> amount;
1221   //__shr_128(C128, Q_high, amount);
1222 
1223 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224 #ifndef IEEE_ROUND_NEAREST
1225   if (rmode == 0)	//ROUNDING_TO_NEAREST
1226 #endif
1227     if (C64 & 1) {
1228       // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229 
1230       // get remainder
1231       amount2 = 64 - amount;
1232       remainder_h = 0;
1233       remainder_h--;
1234       remainder_h >>= amount2;
1235       remainder_h = remainder_h & QH;
1236 
1237       if (!remainder_h
1238 	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1239 	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1240 		  && Q_low.w[0] <
1241 		  reciprocals10_128[extra_digits].w[0]))) {
1242 	C64--;
1243       }
1244     }
1245 #endif
1246 
1247 #ifdef SET_STATUS_FLAGS
1248 
1249   if (is_inexact (fpsc))
1250     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251   else {
1252     status = INEXACT_EXCEPTION;
1253     // get remainder
1254     remainder_h = QH << (64 - amount);
1255 
1256     switch (rmode) {
1257     case ROUNDING_TO_NEAREST:
1258     case ROUNDING_TIES_AWAY:
1259       // test whether fractional part is 0
1260       if (remainder_h == 0x8000000000000000ull
1261 	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1262 	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1263 		  && Q_low.w[0] <
1264 		  reciprocals10_128[extra_digits].w[0])))
1265 	status = EXACT_STATUS;
1266       break;
1267     case ROUNDING_DOWN:
1268     case ROUNDING_TO_ZERO:
1269       if (!remainder_h
1270 	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1271 	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1272 		  && Q_low.w[0] <
1273 		  reciprocals10_128[extra_digits].w[0])))
1274 	status = EXACT_STATUS;
1275       break;
1276     default:
1277       // round up
1278       __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1279 		       reciprocals10_128[extra_digits].w[0]);
1280       __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1281 			  reciprocals10_128[extra_digits].w[1], CY);
1282       if ((remainder_h >> (64 - amount)) + carry >=
1283 	  (((UINT64) 1) << amount))
1284 	status = EXACT_STATUS;
1285     }
1286 
1287     if (status != EXACT_STATUS)
1288       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289   }
1290 
1291 #endif
1292 
1293   return sgn | C64;
1294 
1295 }
1296 
1297 
1298 
1299 //
1300 //   This pack macro doesnot check for coefficients above 2^53
1301 //
1302 __BID_INLINE__ UINT64
get_BID64_small_mantissa(UINT64 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)1303 get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304 			  int rmode, unsigned *fpsc) {
1305   UINT128 C128, Q_low, Stemp;
1306   UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307   int extra_digits, amount, amount2;
1308   unsigned status;
1309 
1310   // check for possible underflow/overflow
1311   if (((unsigned) expon) >= 3 * 256) {
1312     if (expon < 0) {
1313       // underflow
1314       if (expon + MAX_FORMAT_DIGITS < 0) {
1315 #ifdef SET_STATUS_FLAGS
1316 	__set_status_flags (fpsc,
1317 			    UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318 #endif
1319 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320 #ifndef IEEE_ROUND_NEAREST
1321 	if (rmode == ROUNDING_DOWN && sgn)
1322 	  return 0x8000000000000001ull;
1323 	if (rmode == ROUNDING_UP && !sgn)
1324 	  return 1ull;
1325 #endif
1326 #endif
1327 	// result is 0
1328 	return sgn;
1329       }
1330 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331 #ifndef IEEE_ROUND_NEAREST
1332       if (sgn && (unsigned) (rmode - 1) < 2)
1333 	rmode = 3 - rmode;
1334 #endif
1335 #endif
1336       // get digits to be shifted out
1337       extra_digits = -expon;
1338       C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1339 
1340       // get coeff*(2^M[extra_digits])/10^extra_digits
1341       __mul_64x128_full (QH, Q_low, C128.w[0],
1342 			 reciprocals10_128[extra_digits]);
1343 
1344       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1345       amount = recip_scale[extra_digits];
1346 
1347       C64 = QH >> amount;
1348 
1349 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350 #ifndef IEEE_ROUND_NEAREST
1351       if (rmode == 0)	//ROUNDING_TO_NEAREST
1352 #endif
1353 	if (C64 & 1) {
1354 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355 
1356 	  // get remainder
1357 	  amount2 = 64 - amount;
1358 	  remainder_h = 0;
1359 	  remainder_h--;
1360 	  remainder_h >>= amount2;
1361 	  remainder_h = remainder_h & QH;
1362 
1363 	  if (!remainder_h
1364 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1365 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1366 		      && Q_low.w[0] <
1367 		      reciprocals10_128[extra_digits].w[0]))) {
1368 	    C64--;
1369 	  }
1370 	}
1371 #endif
1372 
1373 #ifdef SET_STATUS_FLAGS
1374 
1375       if (is_inexact (fpsc))
1376 	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377       else {
1378 	status = INEXACT_EXCEPTION;
1379 	// get remainder
1380 	remainder_h = QH << (64 - amount);
1381 
1382 	switch (rmode) {
1383 	case ROUNDING_TO_NEAREST:
1384 	case ROUNDING_TIES_AWAY:
1385 	  // test whether fractional part is 0
1386 	  if (remainder_h == 0x8000000000000000ull
1387 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1388 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1389 		      && Q_low.w[0] <
1390 		      reciprocals10_128[extra_digits].w[0])))
1391 	    status = EXACT_STATUS;
1392 	  break;
1393 	case ROUNDING_DOWN:
1394 	case ROUNDING_TO_ZERO:
1395 	  if (!remainder_h
1396 	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1397 		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1398 		      && Q_low.w[0] <
1399 		      reciprocals10_128[extra_digits].w[0])))
1400 	    status = EXACT_STATUS;
1401 	  break;
1402 	default:
1403 	  // round up
1404 	  __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1405 			   reciprocals10_128[extra_digits].w[0]);
1406 	  __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1407 			      reciprocals10_128[extra_digits].w[1], CY);
1408 	  if ((remainder_h >> (64 - amount)) + carry >=
1409 	      (((UINT64) 1) << amount))
1410 	    status = EXACT_STATUS;
1411 	}
1412 
1413 	if (status != EXACT_STATUS)
1414 	  __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415       }
1416 
1417 #endif
1418 
1419       return sgn | C64;
1420     }
1421 
1422     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423       expon--;
1424       coeff = (coeff << 3) + (coeff << 1);
1425     }
1426     if (expon > DECIMAL_MAX_EXPON_64) {
1427 #ifdef SET_STATUS_FLAGS
1428       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429 #endif
1430       // overflow
1431       r = sgn | INFINITY_MASK64;
1432       switch (rmode) {
1433       case ROUNDING_DOWN:
1434 	if (!sgn)
1435 	  r = LARGEST_BID64;
1436 	break;
1437       case ROUNDING_TO_ZERO:
1438 	r = sgn | LARGEST_BID64;
1439 	break;
1440       case ROUNDING_UP:
1441 	// round up
1442 	if (sgn)
1443 	  r = SMALLEST_BID64;
1444       }
1445       return r;
1446     } else {
1447       mask = 1;
1448       mask <<= EXPONENT_SHIFT_SMALL64;
1449       if (coeff >= mask) {
1450 	r = expon;
1451 	r <<= EXPONENT_SHIFT_LARGE64;
1452 	r |= (sgn | SPECIAL_ENCODING_MASK64);
1453 	// add coeff, without leading bits
1454 	mask = (mask >> 2) - 1;
1455 	coeff &= mask;
1456 	r |= coeff;
1457 	return r;
1458       }
1459     }
1460   }
1461 
1462   r = expon;
1463   r <<= EXPONENT_SHIFT_SMALL64;
1464   r |= (coeff | sgn);
1465 
1466   return r;
1467 }
1468 
1469 
1470 /*****************************************************************************
1471 *
1472 *    BID128 pack/unpack macros
1473 *
1474 *****************************************************************************/
1475 
1476 //
1477 //   Macro for handling BID128 underflow
1478 //         sticky bit given as additional argument
1479 //
1480 __BID_INLINE__ UINT128 *
handle_UF_128_rem(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,UINT64 R,unsigned * prounding_mode,unsigned * fpsc)1481 handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482 		   UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483   UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484   UINT64 carry, CY;
1485   int ed2, amount;
1486   unsigned rmode, status;
1487 
1488   // UF occurs
1489   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490 #ifdef SET_STATUS_FLAGS
1491     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492 #endif
1493     pres->w[1] = sgn;
1494     pres->w[0] = 0;
1495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496 #ifndef IEEE_ROUND_NEAREST
1497     if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498 	|| (!sgn && *prounding_mode == ROUNDING_UP))
1499       pres->w[0] = 1ull;
1500 #endif
1501 #endif
1502     return pres;
1503   }
1504   // CQ *= 10
1505   CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506   CQ2.w[0] = CQ.w[0] << 1;
1507   CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508   CQ8.w[0] = CQ.w[0] << 3;
1509   __add_128_128 (CQ, CQ2, CQ8);
1510 
1511   // add remainder
1512   if (R)
1513     CQ.w[0] |= 1;
1514 
1515   ed2 = 1 - expon;
1516   // add rounding constant to CQ
1517 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518 #ifndef IEEE_ROUND_NEAREST
1519   rmode = *prounding_mode;
1520   if (sgn && (unsigned) (rmode - 1) < 2)
1521     rmode = 3 - rmode;
1522 #else
1523   rmode = 0;
1524 #endif
1525 #else
1526   rmode = 0;
1527 #endif
1528   T128 = round_const_table_128[rmode][ed2];
1529   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531 
1532   TP128 = reciprocals10_128[ed2];
1533   __mul_128x128_full (Qh, Ql, CQ, TP128);
1534   amount = recip_scale[ed2];
1535 
1536   if (amount >= 64) {
1537     CQ.w[0] = Qh.w[1] >> (amount - 64);
1538     CQ.w[1] = 0;
1539   } else {
1540     __shr_128 (CQ, Qh, amount);
1541   }
1542 
1543 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1544 #ifndef IEEE_ROUND_NEAREST
1545   if (!(*prounding_mode))
1546 #endif
1547     if (CQ.w[0] & 1) {
1548       // check whether fractional part of initial_P/10^ed1 is exactly .5
1549 
1550       // get remainder
1551       __shl_128_long (Qh1, Qh, (128 - amount));
1552 
1553       if (!Qh1.w[1] && !Qh1.w[0]
1554 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1555 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1556 		  && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1557 	CQ.w[0]--;
1558       }
1559     }
1560 #endif
1561 
1562 #ifdef SET_STATUS_FLAGS
1563 
1564   if (is_inexact (fpsc))
1565     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1566   else {
1567     status = INEXACT_EXCEPTION;
1568     // get remainder
1569     __shl_128_long (Qh1, Qh, (128 - amount));
1570 
1571     switch (rmode) {
1572     case ROUNDING_TO_NEAREST:
1573     case ROUNDING_TIES_AWAY:
1574       // test whether fractional part is 0
1575       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1576 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1577 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1578 		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1579 	status = EXACT_STATUS;
1580       break;
1581     case ROUNDING_DOWN:
1582     case ROUNDING_TO_ZERO:
1583       if ((!Qh1.w[1]) && (!Qh1.w[0])
1584 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1585 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1586 		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1587 	status = EXACT_STATUS;
1588       break;
1589     default:
1590       // round up
1591       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1592 		       reciprocals10_128[ed2].w[0]);
1593       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1594 			  reciprocals10_128[ed2].w[1], CY);
1595       __shr_128_long (Qh, Qh1, (128 - amount));
1596       Tmp.w[0] = 1;
1597       Tmp.w[1] = 0;
1598       __shl_128_long (Tmp1, Tmp, amount);
1599       Qh.w[0] += carry;
1600       if (Qh.w[0] < carry)
1601 	Qh.w[1]++;
1602       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1603 	status = EXACT_STATUS;
1604     }
1605 
1606     if (status != EXACT_STATUS)
1607       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1608   }
1609 
1610 #endif
1611 
1612   pres->w[1] = sgn | CQ.w[1];
1613   pres->w[0] = CQ.w[0];
1614 
1615   return pres;
1616 
1617 }
1618 
1619 
1620 //
1621 //   Macro for handling BID128 underflow
1622 //
1623 __BID_INLINE__ UINT128 *
handle_UF_128(UINT128 * pres,UINT64 sgn,int expon,UINT128 CQ,unsigned * prounding_mode,unsigned * fpsc)1624 handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1625 	       unsigned *prounding_mode, unsigned *fpsc) {
1626   UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1627   UINT64 carry, CY;
1628   int ed2, amount;
1629   unsigned rmode, status;
1630 
1631   // UF occurs
1632   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1633 #ifdef SET_STATUS_FLAGS
1634     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1635 #endif
1636     pres->w[1] = sgn;
1637     pres->w[0] = 0;
1638 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1639 #ifndef IEEE_ROUND_NEAREST
1640     if ((sgn && *prounding_mode == ROUNDING_DOWN)
1641 	|| (!sgn && *prounding_mode == ROUNDING_UP))
1642       pres->w[0] = 1ull;
1643 #endif
1644 #endif
1645     return pres;
1646   }
1647 
1648   ed2 = 0 - expon;
1649   // add rounding constant to CQ
1650 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1651 #ifndef IEEE_ROUND_NEAREST
1652   rmode = *prounding_mode;
1653   if (sgn && (unsigned) (rmode - 1) < 2)
1654     rmode = 3 - rmode;
1655 #else
1656   rmode = 0;
1657 #endif
1658 #else
1659   rmode = 0;
1660 #endif
1661 
1662   T128 = round_const_table_128[rmode][ed2];
1663   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1664   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1665 
1666   TP128 = reciprocals10_128[ed2];
1667   __mul_128x128_full (Qh, Ql, CQ, TP128);
1668   amount = recip_scale[ed2];
1669 
1670   if (amount >= 64) {
1671     CQ.w[0] = Qh.w[1] >> (amount - 64);
1672     CQ.w[1] = 0;
1673   } else {
1674     __shr_128 (CQ, Qh, amount);
1675   }
1676 
1677 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1678 #ifndef IEEE_ROUND_NEAREST
1679   if (!(*prounding_mode))
1680 #endif
1681     if (CQ.w[0] & 1) {
1682       // check whether fractional part of initial_P/10^ed1 is exactly .5
1683 
1684       // get remainder
1685       __shl_128_long (Qh1, Qh, (128 - amount));
1686 
1687       if (!Qh1.w[1] && !Qh1.w[0]
1688 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1689 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1690 		  && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1691 	CQ.w[0]--;
1692       }
1693     }
1694 #endif
1695 
1696 #ifdef SET_STATUS_FLAGS
1697 
1698   if (is_inexact (fpsc))
1699     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1700   else {
1701     status = INEXACT_EXCEPTION;
1702     // get remainder
1703     __shl_128_long (Qh1, Qh, (128 - amount));
1704 
1705     switch (rmode) {
1706     case ROUNDING_TO_NEAREST:
1707     case ROUNDING_TIES_AWAY:
1708       // test whether fractional part is 0
1709       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1710 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1711 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1712 		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1713 	status = EXACT_STATUS;
1714       break;
1715     case ROUNDING_DOWN:
1716     case ROUNDING_TO_ZERO:
1717       if ((!Qh1.w[1]) && (!Qh1.w[0])
1718 	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1719 	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1720 		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1721 	status = EXACT_STATUS;
1722       break;
1723     default:
1724       // round up
1725       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1726 		       reciprocals10_128[ed2].w[0]);
1727       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1728 			  reciprocals10_128[ed2].w[1], CY);
1729       __shr_128_long (Qh, Qh1, (128 - amount));
1730       Tmp.w[0] = 1;
1731       Tmp.w[1] = 0;
1732       __shl_128_long (Tmp1, Tmp, amount);
1733       Qh.w[0] += carry;
1734       if (Qh.w[0] < carry)
1735 	Qh.w[1]++;
1736       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1737 	status = EXACT_STATUS;
1738     }
1739 
1740     if (status != EXACT_STATUS)
1741       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1742   }
1743 
1744 #endif
1745 
1746   pres->w[1] = sgn | CQ.w[1];
1747   pres->w[0] = CQ.w[0];
1748 
1749   return pres;
1750 
1751 }
1752 
1753 
1754 
1755 //
1756 //  BID128 unpack, input passed by value
1757 //
1758 __BID_INLINE__ UINT64
unpack_BID128_value(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 x)1759 unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
1760 		     UINT128 * pcoefficient_x, UINT128 x) {
1761   UINT128 coeff, T33, T34;
1762   UINT64 ex;
1763 
1764   *psign_x = (x.w[1]) & 0x8000000000000000ull;
1765 
1766   // special encodings
1767   if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1768     if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1769       // non-canonical input
1770       pcoefficient_x->w[0] = 0;
1771       pcoefficient_x->w[1] = 0;
1772       ex = (x.w[1]) >> 47;
1773       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1774       return 0;
1775     }
1776     // 10^33
1777     T33 = power10_table_128[33];
1778     /*coeff.w[0] = x.w[0];
1779        coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1780        pcoefficient_x->w[0] = x.w[0];
1781        pcoefficient_x->w[1] = x.w[1];
1782        if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1783        pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1784 
1785     pcoefficient_x->w[0] = x.w[0];
1786     pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1787     if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33))	// non-canonical
1788     {
1789       pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1790       pcoefficient_x->w[0] = 0;
1791     } else
1792       pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1793     if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1794       pcoefficient_x->w[0] = 0;
1795       pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1796     }
1797     *pexponent_x = 0;
1798     return 0;	// NaN or Infinity
1799   }
1800 
1801   coeff.w[0] = x.w[0];
1802   coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1803 
1804   // 10^34
1805   T34 = power10_table_128[34];
1806   // check for non-canonical values
1807   if (__unsigned_compare_ge_128 (coeff, T34))
1808     coeff.w[0] = coeff.w[1] = 0;
1809 
1810   pcoefficient_x->w[0] = coeff.w[0];
1811   pcoefficient_x->w[1] = coeff.w[1];
1812 
1813   ex = (x.w[1]) >> 49;
1814   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1815 
1816   return coeff.w[0] | coeff.w[1];
1817 }
1818 
1819 
1820 //
1821 //  BID128 unpack, input pased by reference
1822 //
1823 __BID_INLINE__ UINT64
unpack_BID128(UINT64 * psign_x,int * pexponent_x,UINT128 * pcoefficient_x,UINT128 * px)1824 unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1825 	       UINT128 * pcoefficient_x, UINT128 * px) {
1826   UINT128 coeff, T33, T34;
1827   UINT64 ex;
1828 
1829   *psign_x = (px->w[1]) & 0x8000000000000000ull;
1830 
1831   // special encodings
1832   if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1833     if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1834       // non-canonical input
1835       pcoefficient_x->w[0] = 0;
1836       pcoefficient_x->w[1] = 0;
1837       ex = (px->w[1]) >> 47;
1838       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1839       return 0;
1840     }
1841     // 10^33
1842     T33 = power10_table_128[33];
1843     coeff.w[0] = px->w[0];
1844     coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1845     pcoefficient_x->w[0] = px->w[0];
1846     pcoefficient_x->w[1] = px->w[1];
1847     if (__unsigned_compare_ge_128 (coeff, T33)) {	// non-canonical
1848       pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1849       pcoefficient_x->w[0] = 0;
1850     }
1851     *pexponent_x = 0;
1852     return 0;	// NaN or Infinity
1853   }
1854 
1855   coeff.w[0] = px->w[0];
1856   coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1857 
1858   // 10^34
1859   T34 = power10_table_128[34];
1860   // check for non-canonical values
1861   if (__unsigned_compare_ge_128 (coeff, T34))
1862     coeff.w[0] = coeff.w[1] = 0;
1863 
1864   pcoefficient_x->w[0] = coeff.w[0];
1865   pcoefficient_x->w[1] = coeff.w[1];
1866 
1867   ex = (px->w[1]) >> 49;
1868   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1869 
1870   return coeff.w[0] | coeff.w[1];
1871 }
1872 
1873 //
1874 //   Pack macro checks for overflow, but not underflow
1875 //
1876 __BID_INLINE__ UINT128 *
get_BID128_very_fast_OF(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1877 get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1878 			 UINT128 coeff, unsigned *prounding_mode,
1879 			 unsigned *fpsc) {
1880   UINT128 T;
1881   UINT64 tmp, tmp2;
1882 
1883   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1884 
1885     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1886       T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1887       while (__unsigned_compare_gt_128 (T, coeff)
1888 	     && expon > DECIMAL_MAX_EXPON_128) {
1889 	coeff.w[1] =
1890 	  (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1891 	  (coeff.w[0] >> 63);
1892 	tmp2 = coeff.w[0] << 3;
1893 	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1894 	if (coeff.w[0] < tmp2)
1895 	  coeff.w[1]++;
1896 
1897 	expon--;
1898       }
1899     }
1900     if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1901       // OF
1902 #ifdef SET_STATUS_FLAGS
1903       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1904 #endif
1905 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1906 #ifndef IEEE_ROUND_NEAREST
1907       if (*prounding_mode == ROUNDING_TO_ZERO
1908 	  || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1909 							 &&
1910 							 *prounding_mode
1911 							 ==
1912 							 ROUNDING_DOWN))
1913       {
1914 	pres->w[1] = sgn | LARGEST_BID128_HIGH;
1915 	pres->w[0] = LARGEST_BID128_LOW;
1916       } else
1917 #endif
1918 #endif
1919       {
1920 	pres->w[1] = sgn | INFINITY_MASK64;
1921 	pres->w[0] = 0;
1922       }
1923       return pres;
1924     }
1925   }
1926 
1927   pres->w[0] = coeff.w[0];
1928   tmp = expon;
1929   tmp <<= 49;
1930   pres->w[1] = sgn | tmp | coeff.w[1];
1931 
1932   return pres;
1933 }
1934 
1935 
1936 //
1937 //   No overflow/underflow checks
1938 //   No checking for coefficient == 10^34 (rounding artifact)
1939 //
1940 __BID_INLINE__ UINT128 *
get_BID128_very_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1941 get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1942 		      UINT128 coeff) {
1943   UINT64 tmp;
1944 
1945   pres->w[0] = coeff.w[0];
1946   tmp = expon;
1947   tmp <<= 49;
1948   pres->w[1] = sgn | tmp | coeff.w[1];
1949 
1950   return pres;
1951 }
1952 
1953 //
1954 //   No overflow/underflow checks
1955 //
1956 __BID_INLINE__ UINT128 *
get_BID128_fast(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)1957 get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1958   UINT64 tmp;
1959 
1960   // coeff==10^34?
1961   if (coeff.w[1] == 0x0001ed09bead87c0ull
1962       && coeff.w[0] == 0x378d8e6400000000ull) {
1963     expon++;
1964     // set coefficient to 10^33
1965     coeff.w[1] = 0x0000314dc6448d93ull;
1966     coeff.w[0] = 0x38c15b0a00000000ull;
1967   }
1968 
1969   pres->w[0] = coeff.w[0];
1970   tmp = expon;
1971   tmp <<= 49;
1972   pres->w[1] = sgn | tmp | coeff.w[1];
1973 
1974   return pres;
1975 }
1976 
1977 //
1978 //   General BID128 pack macro
1979 //
1980 __BID_INLINE__ UINT128 *
get_BID128(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1981 get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1982 	    unsigned *prounding_mode, unsigned *fpsc) {
1983   UINT128 T;
1984   UINT64 tmp, tmp2;
1985 
1986   // coeff==10^34?
1987   if (coeff.w[1] == 0x0001ed09bead87c0ull
1988       && coeff.w[0] == 0x378d8e6400000000ull) {
1989     expon++;
1990     // set coefficient to 10^33
1991     coeff.w[1] = 0x0000314dc6448d93ull;
1992     coeff.w[0] = 0x38c15b0a00000000ull;
1993   }
1994   // check OF, UF
1995   if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
1996     // check UF
1997     if (expon < 0) {
1998       return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
1999 			    fpsc);
2000     }
2001 
2002     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
2003       T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
2004       while (__unsigned_compare_gt_128 (T, coeff)
2005 	     && expon > DECIMAL_MAX_EXPON_128) {
2006 	coeff.w[1] =
2007 	  (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2008 	  (coeff.w[0] >> 63);
2009 	tmp2 = coeff.w[0] << 3;
2010 	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2011 	if (coeff.w[0] < tmp2)
2012 	  coeff.w[1]++;
2013 
2014 	expon--;
2015       }
2016     }
2017     if (expon > DECIMAL_MAX_EXPON_128) {
2018       if (!(coeff.w[1] | coeff.w[0])) {
2019 	pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
2020 	pres->w[0] = 0;
2021 	return pres;
2022       }
2023       // OF
2024 #ifdef SET_STATUS_FLAGS
2025       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2026 #endif
2027 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2028 #ifndef IEEE_ROUND_NEAREST
2029       if (*prounding_mode == ROUNDING_TO_ZERO
2030 	  || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2031 							 &&
2032 							 *prounding_mode
2033 							 ==
2034 							 ROUNDING_DOWN))
2035       {
2036 	pres->w[1] = sgn | LARGEST_BID128_HIGH;
2037 	pres->w[0] = LARGEST_BID128_LOW;
2038       } else
2039 #endif
2040 #endif
2041       {
2042 	pres->w[1] = sgn | INFINITY_MASK64;
2043 	pres->w[0] = 0;
2044       }
2045       return pres;
2046     }
2047   }
2048 
2049   pres->w[0] = coeff.w[0];
2050   tmp = expon;
2051   tmp <<= 49;
2052   pres->w[1] = sgn | tmp | coeff.w[1];
2053 
2054   return pres;
2055 }
2056 
2057 
2058 //
2059 //  Macro used for conversions from string
2060 //        (no additional arguments given for rounding mode, status flags)
2061 //
2062 __BID_INLINE__ UINT128 *
get_BID128_string(UINT128 * pres,UINT64 sgn,int expon,UINT128 coeff)2063 get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2064   UINT128 D2, D8;
2065   UINT64 tmp;
2066   unsigned rmode = 0, status;
2067 
2068   // coeff==10^34?
2069   if (coeff.w[1] == 0x0001ed09bead87c0ull
2070       && coeff.w[0] == 0x378d8e6400000000ull) {
2071     expon++;
2072     // set coefficient to 10^33
2073     coeff.w[1] = 0x0000314dc6448d93ull;
2074     coeff.w[0] = 0x38c15b0a00000000ull;
2075   }
2076   // check OF, UF
2077   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2078     // check UF
2079     if (expon < 0)
2080       return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2081 
2082     // OF
2083 
2084     if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2085       while (expon > DECIMAL_MAX_EXPON_128 &&
2086 	     (coeff.w[1] < power10_table_128[33].w[1] ||
2087 	      (coeff.w[1] == power10_table_128[33].w[1]
2088 	       && coeff.w[0] < power10_table_128[33].w[0]))) {
2089 	D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2090 	D2.w[0] = coeff.w[0] << 1;
2091 	D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2092 	D8.w[0] = coeff.w[0] << 3;
2093 
2094 	__add_128_128 (coeff, D2, D8);
2095 	expon--;
2096       }
2097     } else if (!(coeff.w[0] | coeff.w[1]))
2098       expon = DECIMAL_MAX_EXPON_128;
2099 
2100     if (expon > DECIMAL_MAX_EXPON_128) {
2101       pres->w[1] = sgn | INFINITY_MASK64;
2102       pres->w[0] = 0;
2103       switch (rmode) {
2104       case ROUNDING_DOWN:
2105 	if (!sgn) {
2106 	  pres->w[1] = LARGEST_BID128_HIGH;
2107 	  pres->w[0] = LARGEST_BID128_LOW;
2108 	}
2109 	break;
2110       case ROUNDING_TO_ZERO:
2111 	pres->w[1] = sgn | LARGEST_BID128_HIGH;
2112 	pres->w[0] = LARGEST_BID128_LOW;
2113 	break;
2114       case ROUNDING_UP:
2115 	// round up
2116 	if (sgn) {
2117 	  pres->w[1] = sgn | LARGEST_BID128_HIGH;
2118 	  pres->w[0] = LARGEST_BID128_LOW;
2119 	}
2120 	break;
2121       }
2122 
2123       return pres;
2124     }
2125   }
2126 
2127   pres->w[0] = coeff.w[0];
2128   tmp = expon;
2129   tmp <<= 49;
2130   pres->w[1] = sgn | tmp | coeff.w[1];
2131 
2132   return pres;
2133 }
2134 
2135 
2136 
2137 /*****************************************************************************
2138 *
2139 *    BID32 pack/unpack macros
2140 *
2141 *****************************************************************************/
2142 
2143 
2144 __BID_INLINE__ UINT32
unpack_BID32(UINT32 * psign_x,int * pexponent_x,UINT32 * pcoefficient_x,UINT32 x)2145 unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2146 	      UINT32 * pcoefficient_x, UINT32 x) {
2147   UINT32 tmp;
2148 
2149   *psign_x = x & 0x80000000;
2150 
2151   if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2152     // special encodings
2153     if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2154       *pcoefficient_x = x & 0xfe0fffff;
2155       if ((x & 0x000fffff) >= 1000000)
2156 	*pcoefficient_x = x & 0xfe000000;
2157       if ((x & NAN_MASK32) == INFINITY_MASK32)
2158 	*pcoefficient_x = x & 0xf8000000;
2159       *pexponent_x = 0;
2160       return 0;	// NaN or Infinity
2161     }
2162     // coefficient
2163     *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2164     // check for non-canonical value
2165     if (*pcoefficient_x >= 10000000)
2166       *pcoefficient_x = 0;
2167     // get exponent
2168     tmp = x >> 21;
2169     *pexponent_x = tmp & EXPONENT_MASK32;
2170     return 1;
2171   }
2172   // exponent
2173   tmp = x >> 23;
2174   *pexponent_x = tmp & EXPONENT_MASK32;
2175   // coefficient
2176   *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2177 
2178   return *pcoefficient_x;
2179 }
2180 
2181 //
2182 //   General pack macro for BID32
2183 //
2184 __BID_INLINE__ UINT32
get_BID32(UINT32 sgn,int expon,UINT64 coeff,int rmode,unsigned * fpsc)2185 get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2186 	   unsigned *fpsc) {
2187   UINT128 Q;
2188   UINT64 C64, remainder_h, carry, Stemp;
2189   UINT32 r, mask;
2190   int extra_digits, amount, amount2;
2191   unsigned status;
2192 
2193   if (coeff > 9999999ull) {
2194     expon++;
2195     coeff = 1000000ull;
2196   }
2197   // check for possible underflow/overflow
2198   if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2199     if (expon < 0) {
2200       // underflow
2201       if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2202 #ifdef SET_STATUS_FLAGS
2203 	__set_status_flags (fpsc,
2204 			    UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2205 #endif
2206 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2207 #ifndef IEEE_ROUND_NEAREST
2208 	if (rmode == ROUNDING_DOWN && sgn)
2209 	  return 0x80000001;
2210 	if (rmode == ROUNDING_UP && !sgn)
2211 	  return 1;
2212 #endif
2213 #endif
2214 	// result is 0
2215 	return sgn;
2216       }
2217       // get digits to be shifted out
2218 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2219       rmode = 0;
2220 #endif
2221 #ifdef IEEE_ROUND_NEAREST
2222       rmode = 0;
2223 #endif
2224 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2225 #ifndef IEEE_ROUND_NEAREST
2226       if (sgn && (unsigned) (rmode - 1) < 2)
2227 	rmode = 3 - rmode;
2228 #endif
2229 #endif
2230 
2231       extra_digits = -expon;
2232       coeff += round_const_table[rmode][extra_digits];
2233 
2234       // get coeff*(2^M[extra_digits])/10^extra_digits
2235       __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
2236 
2237       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2238       amount = short_recip_scale[extra_digits];
2239 
2240       C64 = Q.w[1] >> amount;
2241 
2242 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2243 #ifndef IEEE_ROUND_NEAREST
2244       if (rmode == 0)	//ROUNDING_TO_NEAREST
2245 #endif
2246 	if (C64 & 1) {
2247 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
2248 
2249 	  // get remainder
2250 	  amount2 = 64 - amount;
2251 	  remainder_h = 0;
2252 	  remainder_h--;
2253 	  remainder_h >>= amount2;
2254 	  remainder_h = remainder_h & Q.w[1];
2255 
2256 	  if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
2257 	    C64--;
2258 	  }
2259 	}
2260 #endif
2261 
2262 #ifdef SET_STATUS_FLAGS
2263 
2264       if (is_inexact (fpsc))
2265 	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2266       else {
2267 	status = INEXACT_EXCEPTION;
2268 	// get remainder
2269 	remainder_h = Q.w[1] << (64 - amount);
2270 
2271 	switch (rmode) {
2272 	case ROUNDING_TO_NEAREST:
2273 	case ROUNDING_TIES_AWAY:
2274 	  // test whether fractional part is 0
2275 	  if (remainder_h == 0x8000000000000000ull
2276 	      && (Q.w[0] < reciprocals10_64[extra_digits]))
2277 	    status = EXACT_STATUS;
2278 	  break;
2279 	case ROUNDING_DOWN:
2280 	case ROUNDING_TO_ZERO:
2281 	  if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
2282 	    status = EXACT_STATUS;
2283 	  break;
2284 	default:
2285 	  // round up
2286 	  __add_carry_out (Stemp, carry, Q.w[0],
2287 			   reciprocals10_64[extra_digits]);
2288 	  if ((remainder_h >> (64 - amount)) + carry >=
2289 	      (((UINT64) 1) << amount))
2290 	    status = EXACT_STATUS;
2291 	}
2292 
2293 	if (status != EXACT_STATUS)
2294 	  __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2295       }
2296 
2297 #endif
2298 
2299       return sgn | (UINT32) C64;
2300     }
2301 
2302     while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2303       coeff = (coeff << 3) + (coeff << 1);
2304       expon--;
2305     }
2306     if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2307       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2308       // overflow
2309       r = sgn | INFINITY_MASK32;
2310       switch (rmode) {
2311       case ROUNDING_DOWN:
2312 	if (!sgn)
2313 	  r = LARGEST_BID32;
2314 	break;
2315       case ROUNDING_TO_ZERO:
2316 	r = sgn | LARGEST_BID32;
2317 	break;
2318       case ROUNDING_UP:
2319 	// round up
2320 	if (sgn)
2321 	  r = sgn | LARGEST_BID32;
2322       }
2323       return r;
2324     }
2325   }
2326 
2327   mask = 1 << 23;
2328 
2329   // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2330   if (coeff < mask) {
2331     r = expon;
2332     r <<= 23;
2333     r |= ((UINT32) coeff | sgn);
2334     return r;
2335   }
2336   // special format
2337 
2338   r = expon;
2339   r <<= 21;
2340   r |= (sgn | SPECIAL_ENCODING_MASK32);
2341   // add coeff, without leading bits
2342   mask = (1 << 21) - 1;
2343   r |= (((UINT32) coeff) & mask);
2344 
2345   return r;
2346 }
2347 
2348 
2349 
2350 //
2351 //   no overflow/underflow checks
2352 //
2353 __BID_INLINE__ UINT32
very_fast_get_BID32(UINT32 sgn,int expon,UINT32 coeff)2354 very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2355   UINT32 r, mask;
2356 
2357   mask = 1 << 23;
2358 
2359   // check whether coefficient fits in 10*2+3 bits
2360   if (coeff < mask) {
2361     r = expon;
2362     r <<= 23;
2363     r |= (coeff | sgn);
2364     return r;
2365   }
2366   // special format
2367   r = expon;
2368   r <<= 21;
2369   r |= (sgn | SPECIAL_ENCODING_MASK32);
2370   // add coeff, without leading bits
2371   mask = (1 << 21) - 1;
2372   coeff &= mask;
2373   r |= coeff;
2374 
2375   return r;
2376 }
2377 
2378 
2379 
2380 /*************************************************************
2381  *
2382  *************************************************************/
2383 typedef
2384 ALIGN (16)
2385      struct {
2386        UINT64 w[6];
2387      } UINT384;
2388      typedef ALIGN (16)
2389      struct {
2390        UINT64 w[8];
2391      } UINT512;
2392 
2393 // #define P                               34
2394 #define MASK_STEERING_BITS              0x6000000000000000ull
2395 #define MASK_BINARY_EXPONENT1           0x7fe0000000000000ull
2396 #define MASK_BINARY_SIG1                0x001fffffffffffffull
2397 #define MASK_BINARY_EXPONENT2           0x1ff8000000000000ull
2398     //used to take G[2:w+3] (sec 3.3)
2399 #define MASK_BINARY_SIG2                0x0007ffffffffffffull
2400     //used to mask out G4:T0 (sec 3.3)
2401 #define MASK_BINARY_OR2                 0x0020000000000000ull
2402     //used to prefix 8+G4 to T (sec 3.3)
2403 #define UPPER_EXPON_LIMIT               51
2404 #define MASK_EXP                        0x7ffe000000000000ull
2405 #define MASK_SPECIAL                    0x7800000000000000ull
2406 #define MASK_NAN                        0x7c00000000000000ull
2407 #define MASK_SNAN                       0x7e00000000000000ull
2408 #define MASK_ANY_INF                    0x7c00000000000000ull
2409 #define MASK_INF                        0x7800000000000000ull
2410 #define MASK_SIGN                       0x8000000000000000ull
2411 #define MASK_COEFF                      0x0001ffffffffffffull
2412 #define BIN_EXP_BIAS                    (0x1820ull << 49)
2413 
2414 #define EXP_MIN                         0x0000000000000000ull
2415    // EXP_MIN = (-6176 + 6176) << 49
2416 #define EXP_MAX                         0x5ffe000000000000ull
2417   // EXP_MAX = (6111 + 6176) << 49
2418 #define EXP_MAX_P1                      0x6000000000000000ull
2419   // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2420 #define EXP_P1                          0x0002000000000000ull
2421   // EXP_ P1= 1 << 49
2422 #define expmin                            -6176
2423   // min unbiased exponent
2424 #define expmax                            6111
2425   // max unbiased exponent
2426 #define expmin16                          -398
2427   // min unbiased exponent
2428 #define expmax16                          369
2429   // max unbiased exponent
2430 
2431 #define SIGNMASK32 0x80000000
2432 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2433 #define SIGNMASK64    0x8000000000000000ull
2434 
2435 // typedef unsigned int FPSC; // floating-point status and control
2436 	// bit31:
2437 	// bit30:
2438 	// bit29:
2439 	// bit28:
2440 	// bit27:
2441 	// bit26:
2442 	// bit25:
2443 	// bit24:
2444 	// bit23:
2445 	// bit22:
2446 	// bit21:
2447 	// bit20:
2448 	// bit19:
2449 	// bit18:
2450 	// bit17:
2451 	// bit16:
2452 	// bit15:
2453 	// bit14: RC:2
2454 	// bit13: RC:1
2455 	// bit12: RC:0
2456 	// bit11: PM
2457 	// bit10: UM
2458 	// bit9:  OM
2459 	// bit8:  ZM
2460 	// bit7:  DM
2461 	// bit6:  IM
2462 	// bit5:  PE
2463 	// bit4:  UE
2464 	// bit3:  OE
2465 	// bit2:  ZE
2466 	// bit1:  DE
2467 	// bit0:  IE
2468 
2469 #define ROUNDING_MODE_MASK	0x00007000
2470 
2471      typedef struct _DEC_DIGITS {
2472        unsigned int digits;
2473        UINT64 threshold_hi;
2474        UINT64 threshold_lo;
2475        unsigned int digits1;
2476      } DEC_DIGITS;
2477 
2478      extern DEC_DIGITS nr_digits[];
2479      extern UINT64 midpoint64[];
2480      extern UINT128 midpoint128[];
2481      extern UINT192 midpoint192[];
2482      extern UINT256 midpoint256[];
2483      extern UINT64 ten2k64[];
2484      extern UINT128 ten2k128[];
2485      extern UINT256 ten2k256[];
2486      extern UINT128 ten2mk128[];
2487      extern UINT64 ten2mk64[];
2488      extern UINT128 ten2mk128trunc[];
2489      extern int shiftright128[];
2490      extern UINT64 maskhigh128[];
2491      extern UINT64 maskhigh128M[];
2492      extern UINT64 maskhigh192M[];
2493      extern UINT64 maskhigh256M[];
2494      extern UINT64 onehalf128[];
2495      extern UINT64 onehalf128M[];
2496      extern UINT64 onehalf192M[];
2497      extern UINT64 onehalf256M[];
2498      extern UINT128 ten2mk128M[];
2499      extern UINT128 ten2mk128truncM[];
2500      extern UINT192 ten2mk192truncM[];
2501      extern UINT256 ten2mk256truncM[];
2502      extern int shiftright128M[];
2503      extern int shiftright192M[];
2504      extern int shiftright256M[];
2505      extern UINT192 ten2mk192M[];
2506      extern UINT256 ten2mk256M[];
2507      extern unsigned char char_table2[];
2508      extern unsigned char char_table3[];
2509 
2510      extern UINT64 ten2m3k64[];
2511      extern unsigned int shift_ten2m3k64[];
2512      extern UINT128 ten2m3k128[];
2513      extern unsigned int shift_ten2m3k128[];
2514 
2515 
2516 
2517 /***************************************************************************
2518  *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2519  ***************************************************************************/
2520 
2521      extern UINT64 Kx64[];
2522      extern unsigned int Ex64m64[];
2523      extern UINT64 half64[];
2524      extern UINT64 mask64[];
2525      extern UINT64 ten2mxtrunc64[];
2526 
2527      extern UINT128 Kx128[];
2528      extern unsigned int Ex128m128[];
2529      extern UINT64 half128[];
2530      extern UINT64 mask128[];
2531      extern UINT128 ten2mxtrunc128[];
2532 
2533      extern UINT192 Kx192[];
2534      extern unsigned int Ex192m192[];
2535      extern UINT64 half192[];
2536      extern UINT64 mask192[];
2537      extern UINT192 ten2mxtrunc192[];
2538 
2539      extern UINT256 Kx256[];
2540      extern unsigned int Ex256m256[];
2541      extern UINT64 half256[];
2542      extern UINT64 mask256[];
2543      extern UINT256 ten2mxtrunc256[];
2544 
2545      typedef union __bid64_128 {
2546        UINT64 b64;
2547        UINT128 b128;
2548      } BID64_128;
2549 
2550      BID64_128 bid_fma (unsigned int P0,
2551 			BID64_128 x1, unsigned int P1,
2552 			BID64_128 y1, unsigned int P2,
2553 			BID64_128 z1, unsigned int P3,
2554 			unsigned int rnd_mode, FPSC * fpsc);
2555 
2556 #define         P16     16
2557 #define         P34     34
2558 
2559      union __int_double {
2560        UINT64 i;
2561        double d;
2562      };
2563      typedef union __int_double int_double;
2564 
2565 
2566      union __int_float {
2567        UINT32 i;
2568        float d;
2569      };
2570      typedef union __int_float int_float;
2571 
2572 #define SWAP(A,B,T) {\
2573         T = A; \
2574         A = B; \
2575         B = T; \
2576 }
2577 
2578 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2579 // ie it knows that it is A bits long
2580 #define NUMBITS(A, coefficient_x, tempx){\
2581       temp_x.d=(float)coefficient_x;\
2582       A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2583 }
2584 
2585      enum class_types {
2586        signalingNaN,
2587        quietNaN,
2588        negativeInfinity,
2589        negativeNormal,
2590        negativeSubnormal,
2591        negativeZero,
2592        positiveZero,
2593        positiveSubnormal,
2594        positiveNormal,
2595        positiveInfinity
2596      };
2597 
2598      typedef union {
2599        UINT64 ui64;
2600        double d;
2601      } BID_UI64DOUBLE;
2602 
2603 #endif
2604