1 /******************************************************************************
2   Copyright (c) 2007-2011, Intel Corp.
3   All rights reserved.
4 
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions are met:
7 
8     * Redistributions of source code must retain the above copyright notice,
9       this list of conditions and the following disclaimer.
10     * Redistributions in binary form must reproduce the above copyright
11       notice, this list of conditions and the following disclaimer in the
12       documentation and/or other materials provided with the distribution.
13     * Neither the name of Intel Corporation nor the names of its contributors
14       may be used to endorse or promote products derived from this software
15       without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27   THE POSSIBILITY OF SUCH DAMAGE.
28 ******************************************************************************/
29 
30 #ifndef __BIDECIMAL_H
31 #define __BIDECIMAL_H
32 
33 #define _CRT_SECURE_NO_DEPRECATE
34 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
35 #   pragma warning( disable: 4996 )
36 #endif
37 
38 #include "bid_conf.h"
39 #include "bid_functions.h"
40 
41 #define __BID_INLINE__ static __inline
42 
43 #if defined (__INTEL_COMPILER)
44 #define FENCE __fence
45 #else
46 #define FENCE
47 #endif
48 
49 /*********************************************************************
50  *
51  *      Logical Shift Macros
52  *
53  *********************************************************************/
54 
55 #define __shr_128(Q, A, k)                        \
56 {                                                 \
57      (Q).w[0] = (A).w[0] >> k;                      \
58 	 (Q).w[0] |= (A).w[1] << (64-k);               \
59 	 (Q).w[1] = (A).w[1] >> k;                    \
60 }
61 
62 #define __shr_128_long(Q, A, k)                   \
63 {                                                 \
64 	if((k)<64) {                                  \
65      (Q).w[0] = (A).w[0] >> k;                    \
66 	 (Q).w[0] |= (A).w[1] << (64-k);              \
67 	 (Q).w[1] = (A).w[1] >> k;                    \
68 	}                                             \
69 	else {                                        \
70 	 (Q).w[0] = (A).w[1]>>((k)-64);               \
71 	 (Q).w[1] = 0;                                \
72 	}                                             \
73 }
74 
75 #define __shl_128_long(Q, A, k)                   \
76 {                                                 \
77 	if((k)<64) {                                  \
78      (Q).w[1] = (A).w[1] << k;                    \
79 	 (Q).w[1] |= (A).w[0] >> (64-k);              \
80 	 (Q).w[0] = (A).w[0] << k;                    \
81 	}                                             \
82 	else {                                        \
83 	 (Q).w[1] = (A).w[0]<<((k)-64);               \
84 	 (Q).w[0] = 0;                                \
85 	}                                             \
86 }
87 
88 #define __low_64(Q)  (Q).w[0]
89 /*********************************************************************
90  *
91  *      String Macros
92  *
93  *********************************************************************/
94 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
95 /*********************************************************************
96  *
97  *      Compare Macros
98  *
99  *********************************************************************/
100 // greater than
101 //  return 0 if A<=B
102 //  non-zero if A>B
103 #define __unsigned_compare_gt_128(A, B)  \
104     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
105 // greater-or-equal
106 #define __unsigned_compare_ge_128(A, B)  \
107     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
108 #define __test_equal_128(A, B)  (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
109 // tighten exponent range
110 #define __tight_bin_range_128(bp, P, bin_expon)  \
111 {                                                \
112 BID_UINT64 M;                                        \
113 	M = 1;                                       \
114 	(bp) = (bin_expon);                          \
115 	if((bp)<63) {                                \
116 	  M <<= ((bp)+1);                            \
117 	  if((P).w[0] >= M) (bp)++; }                 \
118 	else if((bp)>64) {                           \
119 	  M <<= ((bp)+1-64);                         \
120 	  if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
121 	      (bp)++; }                              \
122 	else if((P).w[1]) (bp)++;                    \
123 }
124 /*********************************************************************
125  *
126  *      Add/Subtract Macros
127  *
128  *********************************************************************/
129 // add 64-bit value to 128-bit
130 #define __add_128_64(R128, A128, B64)    \
131 {                                        \
132 BID_UINT64 R64H;                             \
133 	R64H = (A128).w[1];                 \
134 	(R128).w[0] = (B64) + (A128).w[0];     \
135 	if((R128).w[0] < (B64))               \
136 	  R64H ++;                           \
137     (R128).w[1] = R64H;                  \
138 }
139 // subtract 64-bit value from 128-bit
140 #define __sub_128_64(R128, A128, B64)    \
141 {                                        \
142 BID_UINT64 R64H;                             \
143 	R64H = (A128).w[1];                  \
144 	if((A128).w[0] < (B64))               \
145 	  R64H --;                           \
146     (R128).w[1] = R64H;                  \
147 	(R128).w[0] = (A128).w[0] - (B64);     \
148 }
149 // add 128-bit value to 128-bit
150 // assume no carry-out
151 #define __add_128_128(R128, A128, B128)  \
152 {                                        \
153 BID_UINT128 Q128;                            \
154 	Q128.w[1] = (A128).w[1]+(B128).w[1]; \
155 	Q128.w[0] = (B128).w[0] + (A128).w[0];  \
156 	if(Q128.w[0] < (B128).w[0])            \
157 	  Q128.w[1] ++;                      \
158     (R128).w[1] = Q128.w[1];             \
159     (R128).w[0] = Q128.w[0];               \
160 }
161 #define __sub_128_128(R128, A128, B128)  \
162 {                                        \
163 BID_UINT128 Q128;                            \
164 	Q128.w[1] = (A128).w[1]-(B128).w[1]; \
165 	Q128.w[0] = (A128).w[0] - (B128).w[0];  \
166 	if((A128).w[0] < (B128).w[0])          \
167 	  Q128.w[1] --;                      \
168     (R128).w[1] = Q128.w[1];             \
169     (R128).w[0] = Q128.w[0];               \
170 }
171 #define __add_carry_out(S, CY, X, Y)    \
172 {                                      \
173 BID_UINT64 X1=X;                           \
174 	S = X + Y;                         \
175 	CY = (S<X1) ? 1 : 0;                \
176 }
177 #define __add_carry_in_out(S, CY, X, Y, CI)    \
178 {                                             \
179 BID_UINT64 X1;                                    \
180 	X1 = X + CI;                              \
181 	S = X1 + Y;                               \
182 	CY = ((S<X1) || (X1<CI)) ? 1 : 0;          \
183 }
184 #define __sub_borrow_out(S, CY, X, Y)    \
185 {                                      \
186 BID_UINT64 X1=X;                           \
187 	S = X - Y;                         \
188 	CY = (S>X1) ? 1 : 0;                \
189 }
190 #define __sub_borrow_in_out(S, CY, X, Y, CI)    \
191 {                                             \
192 BID_UINT64 X1, X0=X;                              \
193 	X1 = X - CI;                              \
194 	S = X1 - Y;                               \
195 	CY = ((S>X1) || (X1>X0)) ? 1 : 0;          \
196 }
197 // increment C128 and check for rounding overflow:
198 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
199 #define INCREMENT(C_128, exp)                                           \
200 {                                                                       \
201   C_128.w[0]++;                                                         \
202   if (C_128.w[0] == 0) C_128.w[1]++;                                    \
203   if (C_128.w[1] == 0x0001ed09bead87c0ull &&                            \
204       C_128.w[0] == 0x378d8e6400000000ull) {                            \
205     exp++;                                                              \
206     C_128.w[1] = 0x0000314dc6448d93ull;                                 \
207     C_128.w[0] = 0x38c15b0a00000000ull;                                 \
208   }                                                                     \
209 }
210 // decrement C128 and check for rounding underflow, but only at the
211 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
212 // and decrement the exponent
213 #define DECREMENT(C_128, exp)                                           \
214 {                                                                       \
215   C_128.w[0]--;                                                         \
216   if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--;                \
217   if (C_128.w[1] == 0x0000314dc6448d93ull &&                            \
218       C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) {                 \
219     exp--;                                                              \
220     C_128.w[1] = 0x0001ed09bead87c0ull;                                 \
221     C_128.w[0] = 0x378d8e63ffffffffull;                                 \
222   }                                                                     \
223 }
224 
225  /*********************************************************************
226  *
227  *      Multiply Macros
228  *
229  *********************************************************************/
230 #define __mul_64x64_to_64(P64, CX, CY)  (P64) = (CX) * (CY)
231 /***************************************
232  *  Signed, Full 64x64-bit Multiply
233  ***************************************/
234 #define __imul_64x64_to_128(P, CX, CY)  \
235 {                                       \
236 BID_UINT64 SX, SY;                          \
237    __mul_64x64_to_128(P, CX, CY);       \
238                                         \
239    SX = ((BID_SINT64)(CX))>>63;             \
240    SY = ((BID_SINT64)(CY))>>63;             \
241    SX &= CY;   SY &= CX;                \
242                                         \
243    (P).w[1] = (P).w[1] - SX - SY;       \
244 }
245 /***************************************
246  *  Signed, Full 64x128-bit Multiply
247  ***************************************/
248 #define __imul_64x128_full(Ph, Ql, A, B)          \
249 {                                                 \
250 BID_UINT128 ALBL, ALBH, QM2, QM;                      \
251                                                   \
252 	__imul_64x64_to_128(ALBH, (A), (B).w[1]);     \
253 	__imul_64x64_to_128(ALBL, (A), (B).w[0]);      \
254                                                   \
255 	(Ql).w[0] = ALBL.w[0];                          \
256 	QM.w[0] = ALBL.w[1];                           \
257 	QM.w[1] = ((BID_SINT64)ALBL.w[1])>>63;            \
258     __add_128_128(QM2, ALBH, QM);                 \
259 	(Ql).w[1] = QM2.w[0];                          \
260     Ph = QM2.w[1];                                \
261 }
262 /*****************************************************
263  *      Unsigned Multiply Macros
264  *****************************************************/
265 // get full 64x64bit product
266 //
267 #define __mul_64x64_to_128(P, CX, CY)   \
268 {                                       \
269 BID_UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
270 	CXH = (CX) >> 32;                     \
271 	CXL = (BID_UINT32)(CX);                   \
272 	CYH = (CY) >> 32;                     \
273 	CYL = (BID_UINT32)(CY);                   \
274 	                                      \
275     PM = CXH*CYL;                         \
276 	PH = CXH*CYH;                         \
277 	PL = CXL*CYL;                         \
278 	PM2 = CXL*CYH;                        \
279 	PH += (PM>>32);                       \
280 	PM = (BID_UINT64)((BID_UINT32)PM)+PM2+(PL>>32); \
281                                           \
282 	(P).w[1] = PH + (PM>>32);             \
283 	(P).w[0] = (PM<<32)+(BID_UINT32)PL;       \
284 }
285 // get full 64x64bit product
286 // Note:
287 // This macro is used for CX < 2^61, CY < 2^61
288 //
289 #define __mul_64x64_to_128_fast(P, CX, CY)   \
290 {                                       \
291 BID_UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
292 	CXH = (CX) >> 32;                   \
293 	CXL = (BID_UINT32)(CX);                 \
294 	CYH = (CY) >> 32;                   \
295 	CYL = (BID_UINT32)(CY);                 \
296 	                                    \
297     PM = CXH*CYL;                       \
298 	PL = CXL*CYL;                       \
299 	PH = CXH*CYH;                       \
300 	PM += CXL*CYH;                      \
301 	PM += (PL>>32);                     \
302                                         \
303 	(P).w[1] = PH + (PM>>32);           \
304 	(P).w[0] = (PM<<32)+(BID_UINT32)PL;      \
305 }
306 // used for CX< 2^60
307 #define __sqr64_fast(P, CX)   \
308 {                                       \
309 BID_UINT64 CXH, CXL, PL, PH, PM;            \
310 	CXH = (CX) >> 32;                   \
311 	CXL = (BID_UINT32)(CX);                 \
312 	                                    \
313     PM = CXH*CXL;                       \
314 	PL = CXL*CXL;                       \
315 	PH = CXH*CXH;                       \
316 	PM += PM;                           \
317 	PM += (PL>>32);                     \
318                                         \
319 	(P).w[1] = PH + (PM>>32);           \
320 	(P).w[0] = (PM<<32)+(BID_UINT32)PL;     \
321 }
322 // get full 64x64bit product
323 // Note:
324 // This implementation is used for CX < 2^61, CY < 2^61
325 //
326 #define __mul_64x64_to_64_high_fast(P, CX, CY)   \
327 {                                       \
328 BID_UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
329 	CXH = (CX) >> 32;                   \
330 	CXL = (BID_UINT32)(CX);                 \
331 	CYH = (CY) >> 32;                   \
332 	CYL = (BID_UINT32)(CY);                 \
333 	                                    \
334     PM = CXH*CYL;                       \
335 	PL = CXL*CYL;                       \
336 	PH = CXH*CYH;                       \
337 	PM += CXL*CYH;                      \
338 	PM += (PL>>32);                     \
339                                         \
340 	(P) = PH + (PM>>32);                \
341 }
342 // get full 64x64bit product
343 //
344 #define __mul_64x64_to_128_full(P, CX, CY)     \
345 {                                         \
346 BID_UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
347 	CXH = (CX) >> 32;                     \
348 	CXL = (BID_UINT32)(CX);                   \
349 	CYH = (CY) >> 32;                     \
350 	CYL = (BID_UINT32)(CY);                   \
351 	                                      \
352     PM = CXH*CYL;                         \
353 	PH = CXH*CYH;                         \
354 	PL = CXL*CYL;                         \
355 	PM2 = CXL*CYH;                        \
356 	PH += (PM>>32);                       \
357 	PM = (BID_UINT64)((BID_UINT32)PM)+PM2+(PL>>32); \
358                                           \
359 	(P).w[1] = PH + (PM>>32);             \
360 	(P).w[0] = (PM<<32)+(BID_UINT32)PL;        \
361 }
362 #define __mul_128x128_high(Q, A, B)               \
363 {                                                 \
364 BID_UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
365                                                   \
366 	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
367 	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
368 	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
369 	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
370                                                   \
371     __add_128_128(QM, ALBH, AHBL);                \
372     __add_128_64(QM2, QM, ALBL.w[1]);             \
373     __add_128_64((Q), AHBH, QM2.w[1]);            \
374 }
375 #define __mul_128x128_full(Qh, Ql, A, B)          \
376 {                                                 \
377 BID_UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
378                                                   \
379 	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
380 	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
381 	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
382 	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
383                                                   \
384     __add_128_128(QM, ALBH, AHBL);                \
385 	(Ql).w[0] = ALBL.w[0];                          \
386     __add_128_64(QM2, QM, ALBL.w[1]);             \
387     __add_128_64((Qh), AHBH, QM2.w[1]);           \
388 	(Ql).w[1] = QM2.w[0];                          \
389 }
390 #define __mul_128x128_low(Ql, A, B)               \
391 {                                                 \
392 BID_UINT128 ALBL;                                     \
393 BID_UINT64 QM64;                                      \
394                                                   \
395 	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
396 	QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1];   \
397                                                   \
398 	(Ql).w[0] = ALBL.w[0];                          \
399 	(Ql).w[1] = QM64 + ALBL.w[1];                 \
400 }
401 #define __mul_64x128_low(Ql, A, B)                \
402 {                                                 \
403   BID_UINT128 ALBL, ALBH, QM2;                        \
404   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
405   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
406   (Ql).w[0] = ALBL.w[0];                          \
407   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
408   (Ql).w[1] = QM2.w[0];                           \
409 }
410 #define __mul_64x128_full(Ph, Ql, A, B)           \
411 {                                                 \
412 BID_UINT128 ALBL, ALBH, QM2;                          \
413                                                   \
414 	__mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
415 	__mul_64x64_to_128(ALBL, (A), (B).w[0]);       \
416                                                   \
417 	(Ql).w[0] = ALBL.w[0];                          \
418     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
419 	(Ql).w[1] = QM2.w[0];                          \
420     Ph = QM2.w[1];                                \
421 }
422 #define __mul_64x128_to_192(Q, A, B)              \
423 {                                                 \
424 BID_UINT128 ALBL, ALBH, QM2;                          \
425                                                   \
426 	__mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
427 	__mul_64x64_to_128(ALBL, (A), (B).w[0]);      \
428                                                   \
429 	(Q).w[0] = ALBL.w[0];                         \
430     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
431 	(Q).w[1] = QM2.w[0];                          \
432     (Q).w[2] = QM2.w[1];                          \
433 }
434 #define __mul_64x128_to192(Q, A, B)          \
435 {                                             \
436 BID_UINT128 ALBL, ALBH, QM2;                      \
437                                               \
438     __mul_64x64_to_128(ALBH, (A), (B).w[1]);  \
439     __mul_64x64_to_128(ALBL, (A), (B).w[0]);  \
440                                               \
441     (Q).w[0] = ALBL.w[0];                    \
442     __add_128_64(QM2, ALBH, ALBL.w[1]);       \
443     (Q).w[1] = QM2.w[0];                     \
444     (Q).w[2] = QM2.w[1];                     \
445 }
446 #define __mul_128x128_to_256(P256, A, B)                         \
447 {                                                                \
448 BID_UINT128 Qll, Qlh;                                                \
449 BID_UINT64 Phl, Phh, CY1, CY2;                                         \
450                                                                  \
451    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
452    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
453   (P256).w[0] = Qll.w[0];                                        \
454 	   __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
455 	   __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);    \
456 	   (P256).w[3] = Phh + CY2;                                   \
457 }
458 //
459 // For better performance, will check A.w[1] against 0,
460 //                         but not B.w[1]
461 // Use this macro accordingly
462 #define __mul_128x128_to_256_check_A(P256, A, B)                   \
463 {                                                                  \
464 BID_UINT128 Qll, Qlh;                                                  \
465 BID_UINT64 Phl, Phh, CY1, CY2;                                           \
466                                                                    \
467    __mul_64x128_full(Phl, Qll, A.w[0], B);                          \
468   (P256).w[0] = Qll.w[0];                                        \
469    if(A.w[1])  {                                                   \
470 	   __mul_64x128_full(Phh, Qlh, A.w[1], B);                     \
471 	   __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
472 	   __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);   \
473 	   (P256).w[3] = Phh + CY2;   }                              \
474    else  {                                                         \
475 	   (P256).w[1] = Qll.w[1];                                  \
476 	   (P256).w[2] = Phl;                                       \
477 	   (P256).w[3] = 0;  }                                      \
478 }
479 #define __mul_64x192_to_256(lP, lA, lB)                      \
480 {                                                         \
481 BID_UINT128 lP0,lP1,lP2;                                      \
482 BID_UINT64 lC;                                                 \
483 	__mul_64x64_to_128(lP0, lA, (lB).w[0]);              \
484 	__mul_64x64_to_128(lP1, lA, (lB).w[1]);              \
485 	__mul_64x64_to_128(lP2, lA, (lB).w[2]);              \
486 	(lP).w[0] = lP0.w[0];                                \
487 	__add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]);      \
488 	__add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
489 	(lP).w[3] = lP2.w[1] + lC;                           \
490 }
491 #define __mul_64x256_to_320(P, A, B)                    \
492 {                                                       \
493 BID_UINT128 lP0,lP1,lP2,lP3;                                \
494 BID_UINT64 lC;                                               \
495 	__mul_64x64_to_128(lP0, A, (B).w[0]);             \
496 	__mul_64x64_to_128(lP1, A, (B).w[1]);             \
497 	__mul_64x64_to_128(lP2, A, (B).w[2]);             \
498 	__mul_64x64_to_128(lP3, A, (B).w[3]);             \
499 	(P).w[0] = lP0.w[0];                               \
500 	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
501 	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
502 	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
503 	(P).w[4] = lP3.w[1] + lC;                          \
504 }
505 #define __mul_192x192_to_384(P, A, B)                          \
506 {                                                              \
507 BID_UINT256 P0,P1,P2;                                              \
508 BID_UINT64 CY;                                                      \
509 	__mul_64x192_to_256(P0, (A).w[0], B);                   \
510 	__mul_64x192_to_256(P1, (A).w[1], B);                   \
511 	__mul_64x192_to_256(P2, (A).w[2], B);                   \
512 	(P).w[0] = P0.w[0];                                  \
513 	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
514 	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
515 	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
516 	(P).w[4] = P1.w[3] + CY;                              \
517 	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
518 	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
519 	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
520 	(P).w[5] = P2.w[3] + CY;                              \
521 }
522 #define __mul_64x320_to_384(P, A, B)                    \
523 {                                                       \
524 BID_UINT128 lP0,lP1,lP2,lP3,lP4;                            \
525 BID_UINT64 lC;                                               \
526 	__mul_64x64_to_128(lP0, A, (B).w[0]);             \
527 	__mul_64x64_to_128(lP1, A, (B).w[1]);             \
528 	__mul_64x64_to_128(lP2, A, (B).w[2]);             \
529 	__mul_64x64_to_128(lP3, A, (B).w[3]);             \
530 	__mul_64x64_to_128(lP4, A, (B).w[4]);             \
531 	(P).w[0] = lP0.w[0];                               \
532 	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
533 	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
534 	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
535 	__add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
536 	(P).w[5] = lP4.w[1] + lC;                          \
537 }
538 // A*A
539 // Full 128x128-bit product
540 #define __sqr128_to_256(P256, A)                                 \
541 {                                                                \
542 BID_UINT128 Qll, Qlh, Qhh;                                           \
543 BID_UINT64 TMP_C1, TMP_C2;                                 \
544                                                                  \
545    __mul_64x64_to_128(Qhh, A.w[1], A.w[1]);                      \
546    __mul_64x64_to_128(Qlh, A.w[0], A.w[1]);                      \
547    Qhh.w[1] += (Qlh.w[1]>>63);                                   \
548    Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63);                \
549    Qlh.w[0] += Qlh.w[0];                                         \
550    __mul_64x64_to_128(Qll, A.w[0], A.w[0]);                      \
551                                                                  \
552    __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]);      \
553    (P256).w[0] = Qll.w[0];                                       \
554    __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1);    \
555    (P256).w[3] = Qhh.w[1]+TMP_C2;                                         \
556 }
557 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B)            \
558 {                                                                \
559 BID_UINT128 Qll, Qlh;                                                \
560 BID_UINT64 Phl, Phh, C1, C2;                                         \
561                                                                  \
562    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
563    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
564   (PQl).w[0] = Qll.w[0];                                        \
565 	   __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]);      \
566 	   __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1);    \
567 	   (PQh).w[1] = Phh + C2;                                   \
568 }
569 #define __mul_256x256_to_512(P, A, B)                          \
570 {                                                              \
571 BID_UINT512 P0,P1,P2,P3;                                           \
572 BID_UINT64 CY;                                                      \
573 	__mul_64x256_to_320(P0, (A).w[0], B);                   \
574 	__mul_64x256_to_320(P1, (A).w[1], B);                   \
575 	__mul_64x256_to_320(P2, (A).w[2], B);                   \
576 	__mul_64x256_to_320(P3, (A).w[3], B);                   \
577 	(P).w[0] = P0.w[0];                                  \
578 	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
579 	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
580 	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
581 	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
582 	(P).w[5] = P1.w[4] + CY;                              \
583 	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
584 	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
585 	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
586 	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
587 	(P).w[6] = P2.w[4] + CY;                              \
588 	__add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]);     \
589 	__add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY);   \
590 	__add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY);   \
591 	__add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY);   \
592 	(P).w[7] = P3.w[4] + CY;                              \
593 }
594 #define __mul_192x256_to_448(P, A, B)                          \
595 {                                                              \
596 BID_UINT512 P0,P1,P2;                                           \
597 BID_UINT64 CY;                                                      \
598 	__mul_64x256_to_320(P0, (A).w[0], B);                   \
599 	__mul_64x256_to_320(P1, (A).w[1], B);                   \
600 	__mul_64x256_to_320(P2, (A).w[2], B);                   \
601 	(P).w[0] = P0.w[0];                                  \
602 	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
603 	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
604 	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
605 	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
606 	(P).w[5] = P1.w[4] + CY;                              \
607 	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
608 	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
609 	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
610 	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
611 	(P).w[6] = P2.w[4] + CY;                              \
612 }
613 #define __mul_320x320_to_640(P, A, B)                          \
614 {                                                              \
615 BID_UINT512 P0,P1,P2,P3;                                           \
616 BID_UINT64 CY;                                                     \
617 	__mul_256x256_to_512((P), (A), B);                   \
618 	__mul_64x256_to_320(P1, (A).w[4], B);                   \
619 	__mul_64x256_to_320(P2, (B).w[4], A);                   \
620 	__mul_64x64_to_128(P3, (A).w[4], (B).w[4]);               \
621 	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
622 	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
623 	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
624 	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
625 	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
626 	P3.w[1] += CY;                                       \
627 	__add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]);      \
628 	__add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
629 	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
630 	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
631 	__add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
632 	(P).w[9] = P3.w[1] + CY;                             \
633 }
634 #define __mul_384x384_to_768(P, A, B)                          \
635 {                                                              \
636 BID_UINT512 P0,P1,P2,P3;                                           \
637 BID_UINT64 CY;                                                     \
638 	__mul_320x320_to_640((P), (A), B);                         \
639 	__mul_64x320_to_384(P1, (A).w[5], B);                   \
640 	__mul_64x320_to_384(P2, (B).w[5], A);                   \
641 	__mul_64x64_to_128(P3, (A).w[5], (B).w[5]);               \
642 	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
643 	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
644 	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
645 	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
646 	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
647 	__add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
648 	P3.w[1] += CY;                                       \
649 	__add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]);      \
650 	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
651 	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
652 	__add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
653 	__add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
654 	__add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
655 	(P).w[11] = P3.w[1] + CY;                             \
656 }
657 #define __mul_64x128_short(Ql, A, B)              \
658 {                                                 \
659 BID_UINT64 ALBH_L;                                    \
660                                                   \
661 	__mul_64x64_to_64(ALBH_L, (A),(B).w[1]);      \
662 	__mul_64x64_to_128((Ql), (A), (B).w[0]);       \
663                                                   \
664 	(Ql).w[1] += ALBH_L;                          \
665 }
666 #define __scale128_10(D,_TMP)                            \
667 {                                                        \
668 BID_UINT128 _TMP2,_TMP8;                                     \
669 	  _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63);       \
670 	  _TMP2.w[0] = _TMP.w[0]<<1;                         \
671 	  _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61);       \
672 	  _TMP8.w[0] = _TMP.w[0]<<3;                         \
673 	  __add_128_128(D, _TMP2, _TMP8);                    \
674 }
675 // 64x64-bit product
676 #define __mul_64x64_to_128MACH(P128, CX64, CY64)  \
677 {                                                  \
678   BID_UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
679   CXH = (CX64) >> 32;                              \
680   CXL = (BID_UINT32)(CX64);                            \
681   CYH = (CY64) >> 32;                              \
682   CYL = (BID_UINT32)(CY64);                            \
683   PM = CXH*CYL;                                    \
684   PH = CXH*CYH;                                    \
685   PL = CXL*CYL;                                    \
686   PM2 = CXL*CYH;                                   \
687   PH += (PM>>32);                                  \
688   PM = (BID_UINT64)((BID_UINT32)PM)+PM2+(PL>>32);          \
689   (P128).w[1] = PH + (PM>>32);                     \
690   (P128).w[0] = (PM<<32)+(BID_UINT32)PL;                \
691 }
692 // 64x64-bit product
693 #define __mul_64x64_to_128HIGH(P64, CX64, CY64)  \
694 {                                                  \
695   BID_UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
696   CXH = (CX64) >> 32;                              \
697   CXL = (BID_UINT32)(CX64);                            \
698   CYH = (CY64) >> 32;                              \
699   CYL = (BID_UINT32)(CY64);                            \
700   PM = CXH*CYL;                                    \
701   PH = CXH*CYH;                                    \
702   PL = CXL*CYL;                                    \
703   PM2 = CXL*CYH;                                   \
704   PH += (PM>>32);                                  \
705   PM = (BID_UINT64)((BID_UINT32)PM)+PM2+(PL>>32);          \
706   P64 = PH + (PM>>32);                     \
707 }
708 #define __mul_128x64_to_128(Q128, A64, B128)        \
709 {                                                  \
710   BID_UINT64 ALBH_L;                                   \
711   ALBH_L = (A64) * (B128).w[1];                    \
712   __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]);   \
713   (Q128).w[1] += ALBH_L;                           \
714 }
715 // might simplify by calculating just QM2.w[0]
716 #define __mul_64x128_to_128(Ql, A, B)           \
717 {                                                 \
718   BID_UINT128 ALBL, ALBH, QM2;                        \
719   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
720   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
721   (Ql).w[0] = ALBL.w[0];                          \
722   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
723   (Ql).w[1] = QM2.w[0];                           \
724 }
725 /*********************************************************************
726  *
727  *      BID Pack/Unpack Macros
728  *
729  *********************************************************************/
730 /////////////////////////////////////////
731 // BID64 definitions
732 ////////////////////////////////////////
733 #define DECIMAL_MAX_EXPON_64  767
734 #define DECIMAL_EXPONENT_BIAS 398
735 #define MAX_FORMAT_DIGITS     16
736 /////////////////////////////////////////
737 // BID128 definitions
738 ////////////////////////////////////////
739 #define DECIMAL_MAX_EXPON_128  12287
740 #define DECIMAL_EXPONENT_BIAS_128  6176
741 #define MAX_FORMAT_DIGITS_128      34
742 /////////////////////////////////////////
743 // BID32 definitions
744 ////////////////////////////////////////
745 #define DECIMAL_MAX_EXPON_32  191
746 #define DECIMAL_EXPONENT_BIAS_32  101
747 #define MAX_FORMAT_DIGITS_32      7
748 ////////////////////////////////////////
749 // Constant Definitions
750 ///////////////////////////////////////
751 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
752 #define INFINITY_MASK64         0x7800000000000000ull
753 #define SINFINITY_MASK64        0xf800000000000000ull
754 #define SSNAN_MASK64            0xfc00000000000000ull
755 #define NAN_MASK64              0x7c00000000000000ull
756 #define SNAN_MASK64             0x7e00000000000000ull
757 #define QUIET_MASK64            0xfdffffffffffffffull
758 #define LARGE_COEFF_MASK64      0x0007ffffffffffffull
759 #define LARGE_COEFF_HIGH_BIT64  0x0020000000000000ull
760 #define SMALL_COEFF_MASK64      0x001fffffffffffffull
761 #define EXPONENT_MASK64         0x3ff
762 #define EXPONENT_SHIFT_LARGE64  51
763 #define EXPONENT_SHIFT_SMALL64  53
764 #define LARGEST_BID64           0x77fb86f26fc0ffffull
765 #define SMALLEST_BID64          0xf7fb86f26fc0ffffull
766 #define SMALL_COEFF_MASK128     0x0001ffffffffffffull
767 #define LARGE_COEFF_MASK128     0x00007fffffffffffull
768 #define EXPONENT_MASK128        0x3fff
769 #define LARGEST_BID128_HIGH     0x5fffed09bead87c0ull
770 #define LARGEST_BID128_LOW      0x378d8e63ffffffffull
771 #define SPECIAL_ENCODING_MASK32 0x60000000ul
772 #define SINFINITY_MASK32        0xf8000000ul
773 #define INFINITY_MASK32         0x78000000ul
774 #define LARGE_COEFF_MASK32      0x007ffffful
775 #define LARGE_COEFF_HIGH_BIT32  0x00800000ul
776 #define SMALL_COEFF_MASK32      0x001ffffful
777 #define EXPONENT_MASK32         0xff
778 #define LARGEST_BID32           0x77f8967f
779 #define NAN_MASK32              0x7c000000
780 #define SNAN_MASK32             0x7e000000
781 #define SSNAN_MASK32            0xfc000000
782 #define QUIET_MASK32            0xfdffffff
783 #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
784 #define BINARY_EXPONENT_BIAS  0x3ff
785 #define UPPER_EXPON_LIMIT     51
786 // data needed for BID pack/unpack macros
787 BID_EXTERN_C BID_UINT64 bid_round_const_table[][19];
788 BID_EXTERN_C BID_UINT128 bid_reciprocals10_128[];
789 BID_EXTERN_C int bid_recip_scale[];
790 BID_EXTERN_C BID_UINT128 bid_power10_table_128[];
791 BID_EXTERN_C int bid_estimate_decimal_digits[];
792 BID_EXTERN_C int bid_estimate_bin_expon[];
793 BID_EXTERN_C BID_UINT64 bid_power10_index_binexp[];
794 BID_EXTERN_C int bid_short_recip_scale[];
795 BID_EXTERN_C int bid_bid_bid_recip_scale32[];
796 BID_EXTERN_C BID_UINT64 bid_reciprocals10_64[];
797 BID_EXTERN_C BID_UINT64 bid_bid_reciprocals10_32[];
798 BID_EXTERN_C BID_UINT128 bid_power10_index_binexp_128[];
799 BID_EXTERN_C BID_UINT128 bid_round_const_table_128[][36];
800 
801 
802 //////////////////////////////////////////////
803 //  Status Flag Handling
804 /////////////////////////////////////////////
805 #define __set_status_flags(fpsc, status)  *(fpsc) |= status
806 #define is_inexact(fpsc)  ((*(fpsc))&BID_INEXACT_EXCEPTION)
807 
808 __BID_INLINE__ BID_UINT64
unpack_BID64(BID_UINT64 * psign_x,int * pexponent_x,BID_UINT64 * pcoefficient_x,BID_UINT64 x)809 unpack_BID64 (BID_UINT64 * psign_x, int *pexponent_x,
810 	      BID_UINT64 * pcoefficient_x, BID_UINT64 x) {
811   BID_UINT64 tmp, coeff;
812 
813   *psign_x = x & 0x8000000000000000ull;
814 
815   if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
816     // special encodings
817     // coefficient
818     coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
819 
820     if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
821       *pexponent_x = 0;
822       *pcoefficient_x = x & 0xfe03ffffffffffffull;
823       if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
824 	*pcoefficient_x = x & 0xfe00000000000000ull;
825       if ((x & NAN_MASK64) == INFINITY_MASK64)
826 	*pcoefficient_x = x & SINFINITY_MASK64;
827       return 0;	// NaN or Infinity
828     }
829     // check for non-canonical values
830     if (coeff >= 10000000000000000ull)
831       coeff = 0;
832     *pcoefficient_x = coeff;
833     // get exponent
834     tmp = x >> EXPONENT_SHIFT_LARGE64;
835     *pexponent_x = (int) (tmp & EXPONENT_MASK64);
836     return coeff;
837   }
838   // exponent
839   tmp = x >> EXPONENT_SHIFT_SMALL64;
840   *pexponent_x = (int) (tmp & EXPONENT_MASK64);
841   // coefficient
842   *pcoefficient_x = (x & SMALL_COEFF_MASK64);
843 
844   return *pcoefficient_x;
845 }
846 
847 //
848 //   BID64 pack macro (general form)
849 //
850 __BID_INLINE__ BID_UINT64
get_BID64(BID_UINT64 sgn,int expon,BID_UINT64 coeff,int rmode,unsigned * fpsc)851 get_BID64 (BID_UINT64 sgn, int expon, BID_UINT64 coeff, int rmode,
852 	   unsigned *fpsc) {
853   BID_UINT128 Stemp, Q_low;
854   BID_UINT64 QH, r, mask, _C64, remainder_h, CY, carry;
855   int extra_digits, amount, amount2;
856   unsigned status;
857 
858   if (coeff > 9999999999999999ull) {
859     expon++;
860     coeff = 1000000000000000ull;
861   }
862   // check for possible underflow/overflow
863   if (((unsigned) expon) >= 3 * 256) {
864     if (expon < 0) {
865       // underflow
866       if (expon + MAX_FORMAT_DIGITS < 0) {
867 #ifdef BID_SET_STATUS_FLAGS
868 	__set_status_flags (fpsc,
869 			    BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
870 #endif
871 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
872 #ifndef IEEE_ROUND_NEAREST
873 	if (rmode == BID_ROUNDING_DOWN && sgn)
874 	  return 0x8000000000000001ull;
875 	if (rmode == BID_ROUNDING_UP && !sgn)
876 	  return 1ull;
877 #endif
878 #endif
879 	// result is 0
880 	return sgn;
881       }
882 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
883 #ifndef IEEE_ROUND_NEAREST
884       if (sgn && (unsigned) (rmode - 1) < 2)
885 	rmode = 3 - rmode;
886 #endif
887 #endif
888       // get digits to be shifted out
889       extra_digits = -expon;
890       coeff += bid_round_const_table[rmode][extra_digits];
891 
892       // get coeff*(2^M[extra_digits])/10^extra_digits
893       __mul_64x128_full (QH, Q_low, coeff,
894 			 bid_reciprocals10_128[extra_digits]);
895 
896       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
897       amount = bid_recip_scale[extra_digits];
898 
899       _C64 = QH >> amount;
900 
901 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
902 #ifndef IEEE_ROUND_NEAREST
903       if (rmode == 0)	//BID_ROUNDING_TO_NEAREST
904 #endif
905 	if (_C64 & 1) {
906 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
907 
908 	  // get remainder
909 	  amount2 = 64 - amount;
910 	  remainder_h = 0;
911 	  remainder_h--;
912 	  remainder_h >>= amount2;
913 	  remainder_h = remainder_h & QH;
914 
915 	  if (!remainder_h
916 	      && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
917 		  || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
918 		      && Q_low.w[0] <
919 		      bid_reciprocals10_128[extra_digits].w[0]))) {
920 	    _C64--;
921 	  }
922 	}
923 #endif
924 
925 #ifdef BID_SET_STATUS_FLAGS
926 
927       if (is_inexact (fpsc))
928 	__set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
929       else {
930 	status = BID_INEXACT_EXCEPTION;
931 	// get remainder
932 	remainder_h = QH << (64 - amount);
933 
934 	switch (rmode) {
935 	case BID_ROUNDING_TO_NEAREST:
936 	case BID_ROUNDING_TIES_AWAY:
937 	  // test whether fractional part is 0
938 	  if (remainder_h == 0x8000000000000000ull
939 	      && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
940 		  || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
941 		      && Q_low.w[0] <
942 		      bid_reciprocals10_128[extra_digits].w[0])))
943 	    status = BID_EXACT_STATUS;
944 	  break;
945 	case BID_ROUNDING_DOWN:
946 	case BID_ROUNDING_TO_ZERO:
947 	  if (!remainder_h
948 	      && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
949 		  || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
950 		      && Q_low.w[0] <
951 		      bid_reciprocals10_128[extra_digits].w[0])))
952 	    status = BID_EXACT_STATUS;
953 	  break;
954 	default:
955 	  // round up
956 	  __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
957 			   bid_reciprocals10_128[extra_digits].w[0]);
958 	  __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
959 			      bid_reciprocals10_128[extra_digits].w[1], CY);
960 	  if ((remainder_h >> (64 - amount)) + carry >=
961 	      (((BID_UINT64) 1) << amount))
962 	    status = BID_EXACT_STATUS;
963 	}
964 
965 	if (status != BID_EXACT_STATUS)
966 	  __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
967       }
968 
969 #endif
970 
971       return sgn | _C64;
972     }
973     if(!coeff) { if(expon > DECIMAL_MAX_EXPON_64) expon = DECIMAL_MAX_EXPON_64; }
974     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
975       expon--;
976       coeff = (coeff << 3) + (coeff << 1);
977     }
978     if (expon > DECIMAL_MAX_EXPON_64) {
979 #ifdef BID_SET_STATUS_FLAGS
980       __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
981 #endif
982       // overflow
983       r = sgn | INFINITY_MASK64;
984       switch (rmode) {
985       case BID_ROUNDING_DOWN:
986 	if (!sgn)
987 	  r = LARGEST_BID64;
988 	break;
989       case BID_ROUNDING_TO_ZERO:
990 	r = sgn | LARGEST_BID64;
991 	break;
992       case BID_ROUNDING_UP:
993 	// round up
994 	if (sgn)
995 	  r = SMALLEST_BID64;
996       }
997       return r;
998     }
999   }
1000 
1001   mask = 1;
1002   mask <<= EXPONENT_SHIFT_SMALL64;
1003 
1004   // check whether coefficient fits in 10*5+3 bits
1005   if (coeff < mask) {
1006     r = expon;
1007     r <<= EXPONENT_SHIFT_SMALL64;
1008     r |= (coeff | sgn);
1009     return r;
1010   }
1011   // special format
1012 
1013   // eliminate the case coeff==10^16 after rounding
1014   if (coeff == 10000000000000000ull) {
1015     r = expon + 1;
1016     r <<= EXPONENT_SHIFT_SMALL64;
1017     r |= (1000000000000000ull | sgn);
1018     return r;
1019   }
1020 
1021   r = expon;
1022   r <<= EXPONENT_SHIFT_LARGE64;
1023   r |= (sgn | SPECIAL_ENCODING_MASK64);
1024   // add coeff, without leading bits
1025   mask = (mask >> 2) - 1;
1026   coeff &= mask;
1027   r |= coeff;
1028 
1029   return r;
1030 }
1031 
1032 
1033 
1034 
1035 //
1036 //   No overflow/underflow checking
1037 //
1038 __BID_INLINE__ BID_UINT64
fast_get_BID64(BID_UINT64 sgn,int expon,BID_UINT64 coeff)1039 fast_get_BID64 (BID_UINT64 sgn, int expon, BID_UINT64 coeff) {
1040   BID_UINT64 r, mask;
1041 
1042   mask = 1;
1043   mask <<= EXPONENT_SHIFT_SMALL64;
1044 
1045   // check whether coefficient fits in 10*5+3 bits
1046   if (coeff < mask) {
1047     r = expon;
1048     r <<= EXPONENT_SHIFT_SMALL64;
1049     r |= (coeff | sgn);
1050     return r;
1051   }
1052   // special format
1053 
1054   // eliminate the case coeff==10^16 after rounding
1055   if (coeff == 10000000000000000ull) {
1056     r = expon + 1;
1057     r <<= EXPONENT_SHIFT_SMALL64;
1058     r |= (1000000000000000ull | sgn);
1059     return r;
1060   }
1061 
1062   r = expon;
1063   r <<= EXPONENT_SHIFT_LARGE64;
1064   r |= (sgn | SPECIAL_ENCODING_MASK64);
1065   // add coeff, without leading bits
1066   mask = (mask >> 2) - 1;
1067   coeff &= mask;
1068   r |= coeff;
1069 
1070   return r;
1071 }
1072 
1073 
1074 //
1075 //   no underflow checking
1076 //
1077 __BID_INLINE__ BID_UINT64
fast_get_BID64_check_OF(BID_UINT64 sgn,int expon,BID_UINT64 coeff,int rmode,unsigned * fpsc)1078 fast_get_BID64_check_OF (BID_UINT64 sgn, int expon, BID_UINT64 coeff, int rmode,
1079 			 unsigned *fpsc) {
1080   BID_UINT64 r, mask;
1081 
1082   if (((unsigned) expon) >= 3 * 256 - 1) {
1083     if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1084       expon = 3 * 256;
1085       coeff = 1000000000000000ull;
1086     }
1087 
1088     if (((unsigned) expon) >= 3 * 256) {
1089       while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1090 	expon--;
1091 	coeff = (coeff << 3) + (coeff << 1);
1092       }
1093       if (expon > DECIMAL_MAX_EXPON_64) {
1094 #ifdef BID_SET_STATUS_FLAGS
1095 	__set_status_flags (fpsc,
1096 			    BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1097 #endif
1098 	// overflow
1099 	r = sgn | INFINITY_MASK64;
1100 	switch (rmode) {
1101 	case BID_ROUNDING_DOWN:
1102 	  if (!sgn)
1103 	    r = LARGEST_BID64;
1104 	  break;
1105 	case BID_ROUNDING_TO_ZERO:
1106 	  r = sgn | LARGEST_BID64;
1107 	  break;
1108 	case BID_ROUNDING_UP:
1109 	  // round up
1110 	  if (sgn)
1111 	    r = SMALLEST_BID64;
1112 	}
1113 	return r;
1114       }
1115     }
1116   }
1117 
1118   mask = 1;
1119   mask <<= EXPONENT_SHIFT_SMALL64;
1120 
1121   // check whether coefficient fits in 10*5+3 bits
1122   if (coeff < mask) {
1123     r = expon;
1124     r <<= EXPONENT_SHIFT_SMALL64;
1125     r |= (coeff | sgn);
1126     return r;
1127   }
1128   // special format
1129 
1130   // eliminate the case coeff==10^16 after rounding
1131   if (coeff == 10000000000000000ull) {
1132     r = expon + 1;
1133     r <<= EXPONENT_SHIFT_SMALL64;
1134     r |= (1000000000000000ull | sgn);
1135     return r;
1136   }
1137 
1138   r = expon;
1139   r <<= EXPONENT_SHIFT_LARGE64;
1140   r |= (sgn | SPECIAL_ENCODING_MASK64);
1141   // add coeff, without leading bits
1142   mask = (mask >> 2) - 1;
1143   coeff &= mask;
1144   r |= coeff;
1145 
1146   return r;
1147 }
1148 
1149 
1150 //
1151 //   No overflow/underflow checking
1152 //   or checking for coefficients equal to 10^16 (after rounding)
1153 //
1154 __BID_INLINE__ BID_UINT64
very_fast_get_BID64(BID_UINT64 sgn,int expon,BID_UINT64 coeff)1155 very_fast_get_BID64 (BID_UINT64 sgn, int expon, BID_UINT64 coeff) {
1156   BID_UINT64 r, mask;
1157 
1158   mask = 1;
1159   mask <<= EXPONENT_SHIFT_SMALL64;
1160 
1161   // check whether coefficient fits in 10*5+3 bits
1162   if (coeff < mask) {
1163     r = expon;
1164     r <<= EXPONENT_SHIFT_SMALL64;
1165     r |= (coeff | sgn);
1166     return r;
1167   }
1168   // special format
1169   r = expon;
1170   r <<= EXPONENT_SHIFT_LARGE64;
1171   r |= (sgn | SPECIAL_ENCODING_MASK64);
1172   // add coeff, without leading bits
1173   mask = (mask >> 2) - 1;
1174   coeff &= mask;
1175   r |= coeff;
1176 
1177   return r;
1178 }
1179 
1180 //
1181 //   No overflow/underflow checking or checking for coefficients above 2^53
1182 //
1183 __BID_INLINE__ BID_UINT64
very_fast_get_BID64_small_mantissa(BID_UINT64 sgn,int expon,BID_UINT64 coeff)1184 very_fast_get_BID64_small_mantissa (BID_UINT64 sgn, int expon, BID_UINT64 coeff) {
1185   // no UF/OF
1186   BID_UINT64 r;
1187 
1188   r = expon;
1189   r <<= EXPONENT_SHIFT_SMALL64;
1190   r |= (coeff | sgn);
1191   return r;
1192 }
1193 
1194 
1195 //
1196 // This pack macro is used when underflow is known to occur
1197 //
1198 __BID_INLINE__ BID_UINT64
get_BID64_UF(BID_UINT64 sgn,int expon,BID_UINT64 coeff,BID_UINT64 R,int rmode,unsigned * fpsc)1199 get_BID64_UF (BID_UINT64 sgn, int expon, BID_UINT64 coeff, BID_UINT64 R, int rmode,
1200 	      unsigned *fpsc) {
1201   BID_UINT128 C128, Q_low, Stemp;
1202   BID_UINT64 _C64, remainder_h, QH, carry, CY;
1203   int extra_digits, amount, amount2;
1204   unsigned status;
1205 
1206   // underflow
1207   if (expon + MAX_FORMAT_DIGITS < 0) {
1208 #ifdef BID_SET_STATUS_FLAGS
1209     __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1210 #endif
1211 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1212 #ifndef IEEE_ROUND_NEAREST
1213     if (rmode == BID_ROUNDING_DOWN && sgn)
1214       return 0x8000000000000001ull;
1215     if (rmode == BID_ROUNDING_UP && !sgn)
1216       return 1ull;
1217 #endif
1218 #endif
1219     // result is 0
1220     return sgn;
1221   }
1222   // 10*coeff
1223   coeff = (coeff << 3) + (coeff << 1);
1224 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1225 #ifndef IEEE_ROUND_NEAREST
1226   if (sgn && (unsigned) (rmode - 1) < 2)
1227     rmode = 3 - rmode;
1228 #endif
1229 #endif
1230   if (R)
1231     coeff |= 1;
1232   // get digits to be shifted out
1233   extra_digits = 1 - expon;
1234   C128.w[0] = coeff + bid_round_const_table[rmode][extra_digits];
1235 
1236   // get coeff*(2^M[extra_digits])/10^extra_digits
1237   __mul_64x128_full (QH, Q_low, C128.w[0],
1238 		     bid_reciprocals10_128[extra_digits]);
1239 
1240   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1241   amount = bid_recip_scale[extra_digits];
1242 
1243   _C64 = QH >> amount;
1244   //__shr_128(C128, Q_high, amount);
1245 
1246 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1247 #ifndef IEEE_ROUND_NEAREST
1248   if (rmode == 0)	//BID_ROUNDING_TO_NEAREST
1249 #endif
1250     if (_C64 & 1) {
1251       // check whether fractional part of initial_P/10^extra_digits is exactly .5
1252 
1253       // get remainder
1254       amount2 = 64 - amount;
1255       remainder_h = 0;
1256       remainder_h--;
1257       remainder_h >>= amount2;
1258       remainder_h = remainder_h & QH;
1259 
1260       if (!remainder_h
1261 	  && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1262 	      || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1263 		  && Q_low.w[0] <
1264 		  bid_reciprocals10_128[extra_digits].w[0]))) {
1265 	_C64--;
1266       }
1267     }
1268 #endif
1269 
1270 #ifdef BID_SET_STATUS_FLAGS
1271 
1272   if (is_inexact (fpsc))
1273     __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
1274   else {
1275     status = BID_INEXACT_EXCEPTION;
1276     // get remainder
1277     remainder_h = QH << (64 - amount);
1278 
1279     switch (rmode) {
1280     case BID_ROUNDING_TO_NEAREST:
1281     case BID_ROUNDING_TIES_AWAY:
1282       // test whether fractional part is 0
1283       if (remainder_h == 0x8000000000000000ull
1284 	  && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1285 	      || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1286 		  && Q_low.w[0] <
1287 		  bid_reciprocals10_128[extra_digits].w[0])))
1288 	status = BID_EXACT_STATUS;
1289       break;
1290     case BID_ROUNDING_DOWN:
1291     case BID_ROUNDING_TO_ZERO:
1292       if (!remainder_h
1293 	  && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1294 	      || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1295 		  && Q_low.w[0] <
1296 		  bid_reciprocals10_128[extra_digits].w[0])))
1297 	status = BID_EXACT_STATUS;
1298       break;
1299     default:
1300       // round up
1301       __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1302 		       bid_reciprocals10_128[extra_digits].w[0]);
1303       __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1304 			  bid_reciprocals10_128[extra_digits].w[1], CY);
1305       if ((remainder_h >> (64 - amount)) + carry >=
1306 	  (((BID_UINT64) 1) << amount))
1307 	status = BID_EXACT_STATUS;
1308     }
1309 
1310     if (status != BID_EXACT_STATUS)
1311       __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
1312   }
1313 
1314 #endif
1315 
1316   return sgn | _C64;
1317 
1318 }
1319 
1320 
1321 
1322 //
1323 //   This pack macro doesnot check for coefficients above 2^53
1324 //
1325 __BID_INLINE__ BID_UINT64
get_BID64_small_mantissa(BID_UINT64 sgn,int expon,BID_UINT64 coeff,int rmode,unsigned * fpsc)1326 get_BID64_small_mantissa (BID_UINT64 sgn, int expon, BID_UINT64 coeff,
1327 			  int rmode, unsigned *fpsc) {
1328   BID_UINT128 C128, Q_low, Stemp;
1329   BID_UINT64 r, mask, _C64, remainder_h, QH, carry, CY;
1330   int extra_digits, amount, amount2;
1331   unsigned status;
1332 
1333   // check for possible underflow/overflow
1334   if (((unsigned) expon) >= 3 * 256) {
1335     if (expon < 0) {
1336       // underflow
1337       if (expon + MAX_FORMAT_DIGITS < 0) {
1338 #ifdef BID_SET_STATUS_FLAGS
1339 	__set_status_flags (fpsc,
1340 			    BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1341 #endif
1342 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1343 #ifndef IEEE_ROUND_NEAREST
1344 	if (rmode == BID_ROUNDING_DOWN && sgn)
1345 	  return 0x8000000000000001ull;
1346 	if (rmode == BID_ROUNDING_UP && !sgn)
1347 	  return 1ull;
1348 #endif
1349 #endif
1350 	// result is 0
1351 	return sgn;
1352       }
1353 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1354 #ifndef IEEE_ROUND_NEAREST
1355       if (sgn && (unsigned) (rmode - 1) < 2)
1356 	rmode = 3 - rmode;
1357 #endif
1358 #endif
1359       // get digits to be shifted out
1360       extra_digits = -expon;
1361       C128.w[0] = coeff + bid_round_const_table[rmode][extra_digits];
1362 
1363       // get coeff*(2^M[extra_digits])/10^extra_digits
1364       __mul_64x128_full (QH, Q_low, C128.w[0],
1365 			 bid_reciprocals10_128[extra_digits]);
1366 
1367       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1368       amount = bid_recip_scale[extra_digits];
1369 
1370       _C64 = QH >> amount;
1371 
1372 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1373 #ifndef IEEE_ROUND_NEAREST
1374       if (rmode == 0)	//BID_ROUNDING_TO_NEAREST
1375 #endif
1376 	if (_C64 & 1) {
1377 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
1378 
1379 	  // get remainder
1380 	  amount2 = 64 - amount;
1381 	  remainder_h = 0;
1382 	  remainder_h--;
1383 	  remainder_h >>= amount2;
1384 	  remainder_h = remainder_h & QH;
1385 
1386 	  if (!remainder_h
1387 	      && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1388 		  || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1389 		      && Q_low.w[0] <
1390 		      bid_reciprocals10_128[extra_digits].w[0]))) {
1391 	    _C64--;
1392 	  }
1393 	}
1394 #endif
1395 
1396 #ifdef BID_SET_STATUS_FLAGS
1397 
1398       if (is_inexact (fpsc))
1399 	__set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
1400       else {
1401 	status = BID_INEXACT_EXCEPTION;
1402 	// get remainder
1403 	remainder_h = QH << (64 - amount);
1404 
1405 	switch (rmode) {
1406 	case BID_ROUNDING_TO_NEAREST:
1407 	case BID_ROUNDING_TIES_AWAY:
1408 	  // test whether fractional part is 0
1409 	  if (remainder_h == 0x8000000000000000ull
1410 	      && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1411 		  || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1412 		      && Q_low.w[0] <
1413 		      bid_reciprocals10_128[extra_digits].w[0])))
1414 	    status = BID_EXACT_STATUS;
1415 	  break;
1416 	case BID_ROUNDING_DOWN:
1417 	case BID_ROUNDING_TO_ZERO:
1418 	  if (!remainder_h
1419 	      && (Q_low.w[1] < bid_reciprocals10_128[extra_digits].w[1]
1420 		  || (Q_low.w[1] == bid_reciprocals10_128[extra_digits].w[1]
1421 		      && Q_low.w[0] <
1422 		      bid_reciprocals10_128[extra_digits].w[0])))
1423 	    status = BID_EXACT_STATUS;
1424 	  break;
1425 	default:
1426 	  // round up
1427 	  __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1428 			   bid_reciprocals10_128[extra_digits].w[0]);
1429 	  __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1430 			      bid_reciprocals10_128[extra_digits].w[1], CY);
1431 	  if ((remainder_h >> (64 - amount)) + carry >=
1432 	      (((BID_UINT64) 1) << amount))
1433 	    status = BID_EXACT_STATUS;
1434 	}
1435 
1436 	if (status != BID_EXACT_STATUS)
1437 	  __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
1438       }
1439 
1440 #endif
1441 
1442       return sgn | _C64;
1443     }
1444 
1445     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1446       expon--;
1447       coeff = (coeff << 3) + (coeff << 1);
1448     }
1449     if (expon > DECIMAL_MAX_EXPON_64) {
1450 #ifdef BID_SET_STATUS_FLAGS
1451       __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1452 #endif
1453       // overflow
1454       r = sgn | INFINITY_MASK64;
1455       switch (rmode) {
1456       case BID_ROUNDING_DOWN:
1457 	if (!sgn)
1458 	  r = LARGEST_BID64;
1459 	break;
1460       case BID_ROUNDING_TO_ZERO:
1461 	r = sgn | LARGEST_BID64;
1462 	break;
1463       case BID_ROUNDING_UP:
1464 	// round up
1465 	if (sgn)
1466 	  r = SMALLEST_BID64;
1467       }
1468       return r;
1469     } else {
1470       mask = 1;
1471       mask <<= EXPONENT_SHIFT_SMALL64;
1472       if (coeff >= mask) {
1473 	r = expon;
1474 	r <<= EXPONENT_SHIFT_LARGE64;
1475 	r |= (sgn | SPECIAL_ENCODING_MASK64);
1476 	// add coeff, without leading bits
1477 	mask = (mask >> 2) - 1;
1478 	coeff &= mask;
1479 	r |= coeff;
1480 	return r;
1481       }
1482     }
1483   }
1484 
1485   r = expon;
1486   r <<= EXPONENT_SHIFT_SMALL64;
1487   r |= (coeff | sgn);
1488 
1489   return r;
1490 }
1491 
1492 
1493 /*****************************************************************************
1494 *
1495 *    BID128 pack/unpack macros
1496 *
1497 *****************************************************************************/
1498 
1499 //
1500 //   Macro for handling BID128 underflow
1501 //         sticky bit given as additional argument
1502 //
1503 __BID_INLINE__ BID_UINT128 *
bid_handle_UF_128_rem(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 CQ,BID_UINT64 R,unsigned * prounding_mode,unsigned * fpsc)1504 bid_handle_UF_128_rem (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 CQ,
1505 		   BID_UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1506   BID_UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1507   BID_UINT64 carry, CY;
1508   int ed2, amount;
1509   unsigned rmode, status;
1510 
1511   // UF occurs
1512   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1513 #ifdef BID_SET_STATUS_FLAGS
1514     __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1515 #endif
1516     pres->w[1] = sgn;
1517     pres->w[0] = 0;
1518 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1519 #ifndef IEEE_ROUND_NEAREST
1520     if ((sgn && *prounding_mode == BID_ROUNDING_DOWN)
1521 	|| (!sgn && *prounding_mode == BID_ROUNDING_UP))
1522       pres->w[0] = 1ull;
1523 #endif
1524 #endif
1525     return pres;
1526   }
1527   // CQ *= 10
1528   CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1529   CQ2.w[0] = CQ.w[0] << 1;
1530   CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1531   CQ8.w[0] = CQ.w[0] << 3;
1532   __add_128_128 (CQ, CQ2, CQ8);
1533 
1534   // add remainder
1535   if (R)
1536     CQ.w[0] |= 1;
1537 
1538   ed2 = 1 - expon;
1539   // add rounding constant to CQ
1540 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1541 #ifndef IEEE_ROUND_NEAREST
1542   rmode = *prounding_mode;
1543   if (sgn && (unsigned) (rmode - 1) < 2)
1544     rmode = 3 - rmode;
1545 #else
1546   rmode = 0;
1547 #endif
1548 #else
1549   rmode = 0;
1550 #endif
1551   T128 = bid_round_const_table_128[rmode][ed2];
1552   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1553   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1554 
1555   TP128 = bid_reciprocals10_128[ed2];
1556   __mul_128x128_full (Qh, Ql, CQ, TP128);
1557   amount = bid_recip_scale[ed2];
1558 
1559   if (amount >= 64) {
1560     CQ.w[0] = Qh.w[1] >> (amount - 64);
1561     CQ.w[1] = 0;
1562   } else {
1563     __shr_128 (CQ, Qh, amount);
1564   }
1565 
1566   expon = 0;
1567 
1568 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1569 #ifndef IEEE_ROUND_NEAREST
1570   if (!(*prounding_mode))
1571 #endif
1572     if (CQ.w[0] & 1) {
1573       // check whether fractional part of initial_P/10^ed1 is exactly .5
1574 
1575       // get remainder
1576       __shl_128_long (Qh1, Qh, (128 - amount));
1577 
1578       if (!Qh1.w[1] && !Qh1.w[0]
1579 	  && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1580 	      || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1581 		  && Ql.w[0] < bid_reciprocals10_128[ed2].w[0]))) {
1582 	CQ.w[0]--;
1583       }
1584     }
1585 #endif
1586 
1587 #ifdef BID_SET_STATUS_FLAGS
1588 
1589   if (is_inexact (fpsc))
1590     __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
1591   else {
1592     status = BID_INEXACT_EXCEPTION;
1593     // get remainder
1594     __shl_128_long (Qh1, Qh, (128 - amount));
1595 
1596     switch (rmode) {
1597     case BID_ROUNDING_TO_NEAREST:
1598     case BID_ROUNDING_TIES_AWAY:
1599       // test whether fractional part is 0
1600       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1601 	  && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1602 	      || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1603 		  && Ql.w[0] < bid_reciprocals10_128[ed2].w[0])))
1604 	status = BID_EXACT_STATUS;
1605       break;
1606     case BID_ROUNDING_DOWN:
1607     case BID_ROUNDING_TO_ZERO:
1608       if ((!Qh1.w[1]) && (!Qh1.w[0])
1609 	  && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1610 	      || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1611 		  && Ql.w[0] < bid_reciprocals10_128[ed2].w[0])))
1612 	status = BID_EXACT_STATUS;
1613       break;
1614     default:
1615       // round up
1616       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1617 		       bid_reciprocals10_128[ed2].w[0]);
1618       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1619 			  bid_reciprocals10_128[ed2].w[1], CY);
1620       __shr_128_long (Qh, Qh1, (128 - amount));
1621       Tmp.w[0] = 1;
1622       Tmp.w[1] = 0;
1623       __shl_128_long (Tmp1, Tmp, amount);
1624       Qh.w[0] += carry;
1625       if (Qh.w[0] < carry)
1626 	Qh.w[1]++;
1627       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1628 	status = BID_EXACT_STATUS;
1629     }
1630 
1631     if (status != BID_EXACT_STATUS)
1632       __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
1633   }
1634 
1635 #endif
1636 
1637   pres->w[1] = sgn | CQ.w[1];
1638   pres->w[0] = CQ.w[0];
1639 
1640   return pres;
1641 
1642 }
1643 
1644 
1645 //
1646 //   Macro for handling BID128 underflow
1647 //
1648 __BID_INLINE__ BID_UINT128 *
handle_UF_128(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 CQ,unsigned * prounding_mode,unsigned * fpsc)1649 handle_UF_128 (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 CQ,
1650 	       unsigned *prounding_mode, unsigned *fpsc) {
1651   BID_UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1652   BID_UINT64 carry, CY;
1653   int ed2, amount;
1654   unsigned rmode, status;
1655 
1656   // UF occurs
1657   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1658 #ifdef BID_SET_STATUS_FLAGS
1659     __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
1660 #endif
1661     pres->w[1] = sgn;
1662     pres->w[0] = 0;
1663 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1664 #ifndef IEEE_ROUND_NEAREST
1665     if ((sgn && *prounding_mode == BID_ROUNDING_DOWN)
1666 	|| (!sgn && *prounding_mode == BID_ROUNDING_UP))
1667       pres->w[0] = 1ull;
1668 #endif
1669 #endif
1670     return pres;
1671   }
1672 
1673   ed2 = 0 - expon;
1674   // add rounding constant to CQ
1675 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1676 #ifndef IEEE_ROUND_NEAREST
1677   rmode = *prounding_mode;
1678   if (sgn && (unsigned) (rmode - 1) < 2)
1679     rmode = 3 - rmode;
1680 #else
1681   rmode = 0;
1682 #endif
1683 #else
1684   rmode = 0;
1685 #endif
1686 
1687   T128 = bid_round_const_table_128[rmode][ed2];
1688   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1689   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1690 
1691   TP128 = bid_reciprocals10_128[ed2];
1692   __mul_128x128_full (Qh, Ql, CQ, TP128);
1693   amount = bid_recip_scale[ed2];
1694 
1695   if (amount >= 64) {
1696     CQ.w[0] = Qh.w[1] >> (amount - 64);
1697     CQ.w[1] = 0;
1698   } else {
1699     __shr_128 (CQ, Qh, amount);
1700   }
1701 
1702   expon = 0;
1703 
1704 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1705 #ifndef IEEE_ROUND_NEAREST
1706   if (!(*prounding_mode))
1707 #endif
1708     if (CQ.w[0] & 1) {
1709       // check whether fractional part of initial_P/10^ed1 is exactly .5
1710 
1711       // get remainder
1712       __shl_128_long (Qh1, Qh, (128 - amount));
1713 
1714       if (!Qh1.w[1] && !Qh1.w[0]
1715 	  && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1716 	      || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1717 		  && Ql.w[0] < bid_reciprocals10_128[ed2].w[0]))) {
1718 	CQ.w[0]--;
1719       }
1720     }
1721 #endif
1722 
1723 #ifdef BID_SET_STATUS_FLAGS
1724 
1725   if (is_inexact (fpsc))
1726     __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
1727   else {
1728     status = BID_INEXACT_EXCEPTION;
1729     // get remainder
1730     __shl_128_long (Qh1, Qh, (128 - amount));
1731 
1732     switch (rmode) {
1733     case BID_ROUNDING_TO_NEAREST:
1734     case BID_ROUNDING_TIES_AWAY:
1735       // test whether fractional part is 0
1736       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1737 	  && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1738 	      || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1739 		  && Ql.w[0] < bid_reciprocals10_128[ed2].w[0])))
1740 	status = BID_EXACT_STATUS;
1741       break;
1742     case BID_ROUNDING_DOWN:
1743     case BID_ROUNDING_TO_ZERO:
1744       if ((!Qh1.w[1]) && (!Qh1.w[0])
1745 	  && (Ql.w[1] < bid_reciprocals10_128[ed2].w[1]
1746 	      || (Ql.w[1] == bid_reciprocals10_128[ed2].w[1]
1747 		  && Ql.w[0] < bid_reciprocals10_128[ed2].w[0])))
1748 	status = BID_EXACT_STATUS;
1749       break;
1750     default:
1751       // round up
1752       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1753 		       bid_reciprocals10_128[ed2].w[0]);
1754       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1755 			  bid_reciprocals10_128[ed2].w[1], CY);
1756       __shr_128_long (Qh, Qh1, (128 - amount));
1757       Tmp.w[0] = 1;
1758       Tmp.w[1] = 0;
1759       __shl_128_long (Tmp1, Tmp, amount);
1760       Qh.w[0] += carry;
1761       if (Qh.w[0] < carry)
1762 	Qh.w[1]++;
1763       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1764 	status = BID_EXACT_STATUS;
1765     }
1766 
1767     if (status != BID_EXACT_STATUS)
1768       __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
1769   }
1770 
1771 #endif
1772 
1773   pres->w[1] = sgn | CQ.w[1];
1774   pres->w[0] = CQ.w[0];
1775 
1776   return pres;
1777 
1778 }
1779 
1780 
1781 
1782 //
1783 //  BID128 unpack, input passed by value  (used in transcendental functions only)
1784 //
1785 __BID_INLINE__ BID_UINT64
unpack_BID128_value_BLE(BID_UINT64 * psign_x,int * pexponent_x,BID_UINT128 * pcoefficient_x,BID_UINT128 x)1786 unpack_BID128_value_BLE (BID_UINT64 * psign_x, int *pexponent_x,
1787 		     BID_UINT128 * pcoefficient_x, BID_UINT128 x) {
1788   BID_UINT128 coeff, T33, T34;
1789   BID_UINT64 ex;
1790 
1791   *psign_x = (x.w[BID_HIGH_128W]) & 0x8000000000000000ull;
1792 
1793   // special encodings
1794   if ((x.w[BID_HIGH_128W] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1795     if ((x.w[BID_HIGH_128W] & INFINITY_MASK64) < INFINITY_MASK64) {
1796       // non-canonical input
1797       pcoefficient_x->w[BID_LOW_128W] = 0;
1798       pcoefficient_x->w[BID_HIGH_128W] = 0;
1799       ex = (x.w[BID_HIGH_128W]) >> 47;
1800       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1801       return 0;
1802     }
1803     // 10^33
1804     T33 = bid_power10_table_128[33];
1805 
1806     pcoefficient_x->w[BID_LOW_128W] = x.w[BID_LOW_128W];
1807     pcoefficient_x->w[BID_HIGH_128W] = (x.w[BID_HIGH_128W]) & 0x00003fffffffffffull;
1808 	if ((pcoefficient_x->w[BID_HIGH_128W]>T33.w[1]) || ((pcoefficient_x->w[BID_HIGH_128W]==T33.w[1]) && (pcoefficient_x->w[BID_LOW_128W]>=T33.w[0]))) // non-canonical
1809     {
1810       pcoefficient_x->w[BID_HIGH_128W] = (x.w[BID_HIGH_128W]) & 0xfe00000000000000ull;
1811       pcoefficient_x->w[BID_LOW_128W] = 0;
1812     } else
1813       pcoefficient_x->w[BID_HIGH_128W] = (x.w[BID_HIGH_128W]) & 0xfe003fffffffffffull;
1814     if ((x.w[BID_HIGH_128W] & NAN_MASK64) == INFINITY_MASK64) {
1815       pcoefficient_x->w[BID_LOW_128W] = 0;
1816       pcoefficient_x->w[BID_HIGH_128W] = x.w[BID_HIGH_128W] & SINFINITY_MASK64;
1817     }
1818     *pexponent_x = 0;
1819     return 0;	// NaN or Infinity
1820   }
1821 
1822   coeff.w[BID_LOW_128W] = x.w[BID_LOW_128W];
1823   coeff.w[BID_HIGH_128W] = (x.w[BID_HIGH_128W]) & SMALL_COEFF_MASK128;
1824 
1825   // 10^34
1826   T34 = bid_power10_table_128[34];
1827   // check for non-canonical values
1828   if ((coeff.w[BID_HIGH_128W]>T34.w[1]) || ((coeff.w[BID_HIGH_128W]==T34.w[1]) && (coeff.w[BID_LOW_128W]>=T34.w[0]))) {
1829 	  coeff.w[BID_LOW_128W] = coeff.w[BID_HIGH_128W] = 0;}
1830 
1831   pcoefficient_x->w[BID_LOW_128W] = coeff.w[BID_LOW_128W];
1832   pcoefficient_x->w[BID_HIGH_128W] = coeff.w[BID_HIGH_128W];
1833 
1834   ex = (x.w[BID_HIGH_128W]) >> 49;
1835   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1836 
1837   return coeff.w[BID_LOW_128W] | coeff.w[BID_HIGH_128W];
1838 }
1839 
1840 
1841 
1842 //
1843 //  BID128 unpack, input passed by value
1844 //
1845 __BID_INLINE__ BID_UINT64
unpack_BID128_value(BID_UINT64 * psign_x,int * pexponent_x,BID_UINT128 * pcoefficient_x,BID_UINT128 x)1846 unpack_BID128_value (BID_UINT64 * psign_x, int *pexponent_x,
1847 		     BID_UINT128 * pcoefficient_x, BID_UINT128 x) {
1848   BID_UINT128 coeff, T33, T34;
1849   BID_UINT64 ex;
1850 
1851   *psign_x = (x.w[1]) & 0x8000000000000000ull;
1852 
1853   // special encodings
1854   if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1855     if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1856       // non-canonical input
1857       pcoefficient_x->w[0] = 0;
1858       pcoefficient_x->w[1] = 0;
1859       ex = (x.w[1]) >> 47;
1860       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1861       return 0;
1862     }
1863     // 10^33
1864     T33 = bid_power10_table_128[33];
1865     /*coeff.w[0] = x.w[0];
1866        coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1867        pcoefficient_x->w[0] = x.w[0];
1868        pcoefficient_x->w[1] = x.w[1];
1869        if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1870        pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1871 
1872     pcoefficient_x->w[0] = x.w[0];
1873     pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1874     if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33))	// non-canonical
1875     {
1876       pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1877       pcoefficient_x->w[0] = 0;
1878     } else
1879       pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1880     if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1881       pcoefficient_x->w[0] = 0;
1882       pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1883     }
1884     *pexponent_x = 0;
1885     return 0;	// NaN or Infinity
1886   }
1887 
1888   coeff.w[0] = x.w[0];
1889   coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1890 
1891   // 10^34
1892   T34 = bid_power10_table_128[34];
1893   // check for non-canonical values
1894   if (__unsigned_compare_ge_128 (coeff, T34))
1895     coeff.w[0] = coeff.w[1] = 0;
1896 
1897   pcoefficient_x->w[0] = coeff.w[0];
1898   pcoefficient_x->w[1] = coeff.w[1];
1899 
1900   ex = (x.w[1]) >> 49;
1901   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1902 
1903   return coeff.w[0] | coeff.w[1];
1904 }
1905 
1906 //
1907 //  BID128 unpack, input passed by value
1908 //
1909 __BID_INLINE__ void
quick_unpack_BID128_em(int * pexponent_x,BID_UINT128 * pcoefficient_x,BID_UINT128 x)1910 quick_unpack_BID128_em (int *pexponent_x,
1911 		     BID_UINT128 * pcoefficient_x, BID_UINT128 x) {
1912   BID_UINT64 ex;
1913 
1914   pcoefficient_x->w[0] = x.w[0];
1915   pcoefficient_x->w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1916 
1917   ex = (x.w[1]) >> 49;
1918   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1919 
1920 }
1921 
1922 
1923 //
1924 //  BID128 unpack, input pased by reference
1925 //
1926 __BID_INLINE__ BID_UINT64
unpack_BID128(BID_UINT64 * psign_x,int * pexponent_x,BID_UINT128 * pcoefficient_x,BID_UINT128 * px)1927 unpack_BID128 (BID_UINT64 * psign_x, int *pexponent_x,
1928 	       BID_UINT128 * pcoefficient_x, BID_UINT128 * px) {
1929   BID_UINT128 coeff, T33, T34;
1930   BID_UINT64 ex;
1931 
1932   *psign_x = (px->w[1]) & 0x8000000000000000ull;
1933 
1934   // special encodings
1935   if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1936     if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1937       // non-canonical input
1938       pcoefficient_x->w[0] = 0;
1939       pcoefficient_x->w[1] = 0;
1940       ex = (px->w[1]) >> 47;
1941       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1942       return 0;
1943     }
1944     // 10^33
1945     T33 = bid_power10_table_128[33];
1946     coeff.w[0] = px->w[0];
1947     coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1948     pcoefficient_x->w[0] = px->w[0];
1949     pcoefficient_x->w[1] = px->w[1];
1950     if (__unsigned_compare_ge_128 (coeff, T33)) {	// non-canonical
1951       pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1952       pcoefficient_x->w[0] = 0;
1953     }
1954     *pexponent_x = 0;
1955     return 0;	// NaN or Infinity
1956   }
1957 
1958   coeff.w[0] = px->w[0];
1959   coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1960 
1961   // 10^34
1962   T34 = bid_power10_table_128[34];
1963   // check for non-canonical values
1964   if (__unsigned_compare_ge_128 (coeff, T34))
1965     coeff.w[0] = coeff.w[1] = 0;
1966 
1967   pcoefficient_x->w[0] = coeff.w[0];
1968   pcoefficient_x->w[1] = coeff.w[1];
1969 
1970   ex = (px->w[1]) >> 49;
1971   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1972 
1973   return coeff.w[0] | coeff.w[1];
1974 }
1975 
1976 //
1977 //   Pack macro checks for overflow, but not underflow
1978 //
1979 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_very_fast_OF(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)1980 bid_get_BID128_very_fast_OF (BID_UINT128 * pres, BID_UINT64 sgn, int expon,
1981 			 BID_UINT128 coeff, unsigned *prounding_mode,
1982 			 unsigned *fpsc) {
1983   BID_UINT128 T;
1984   BID_UINT64 tmp, tmp2;
1985 
1986   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1987 
1988     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1989       T = bid_power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1990       while (__unsigned_compare_gt_128 (T, coeff)
1991 	     && expon > DECIMAL_MAX_EXPON_128) {
1992 	coeff.w[1] =
1993 	  (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1994 	  (coeff.w[0] >> 63);
1995 	tmp2 = coeff.w[0] << 3;
1996 	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1997 	if (coeff.w[0] < tmp2)
1998 	  coeff.w[1]++;
1999 
2000 	expon--;
2001       }
2002     }
2003     if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2004       // OF
2005 #ifdef BID_SET_STATUS_FLAGS
2006       __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2007 #endif
2008 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2009 #ifndef IEEE_ROUND_NEAREST
2010       if (*prounding_mode == BID_ROUNDING_TO_ZERO
2011 	  || (sgn && *prounding_mode == BID_ROUNDING_UP) || (!sgn
2012 							 &&
2013 							 *prounding_mode
2014 							 ==
2015 							 BID_ROUNDING_DOWN))
2016       {
2017 	pres->w[1] = sgn | LARGEST_BID128_HIGH;
2018 	pres->w[0] = LARGEST_BID128_LOW;
2019       } else
2020 #endif
2021 #endif
2022       {
2023 	pres->w[1] = sgn | INFINITY_MASK64;
2024 	pres->w[0] = 0;
2025       }
2026       return pres;
2027     }
2028   }
2029 
2030   pres->w[0] = coeff.w[0];
2031   tmp = expon;
2032   tmp <<= 49;
2033   pres->w[1] = sgn | tmp | coeff.w[1];
2034 
2035   return pres;
2036 }
2037 
2038 
2039 //
2040 //   No overflow/underflow checks
2041 //   No checking for coefficient == 10^34 (rounding artifact)
2042 //
2043 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_very_fast(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff)2044 bid_get_BID128_very_fast (BID_UINT128 * pres, BID_UINT64 sgn, int expon,
2045 		      BID_UINT128 coeff) {
2046   BID_UINT64 tmp;
2047 
2048   pres->w[0] = coeff.w[0];
2049   tmp = expon;
2050   tmp <<= 49;
2051   pres->w[1] = sgn | tmp | coeff.w[1];
2052 
2053   return pres;
2054 }
2055 
2056 // same as above, but for either endian mode
2057 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_very_fast_BLE(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff)2058 bid_get_BID128_very_fast_BLE (BID_UINT128 * pres, BID_UINT64 sgn, int expon,
2059 		      BID_UINT128 coeff) {
2060   BID_UINT64 tmp;
2061 
2062   pres->w[BID_LOW_128W] = coeff.w[BID_LOW_128W];
2063   tmp = expon;
2064   tmp <<= 49;
2065   pres->w[BID_HIGH_128W] = sgn | tmp | coeff.w[BID_HIGH_128W];
2066 
2067   return pres;
2068 }
2069 
2070 //
2071 //   No overflow/underflow checks
2072 //
2073 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_fast(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff)2074 bid_get_BID128_fast (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 coeff) {
2075   BID_UINT64 tmp;
2076 
2077   // coeff==10^34?
2078   if (coeff.w[1] == 0x0001ed09bead87c0ull
2079       && coeff.w[0] == 0x378d8e6400000000ull) {
2080     expon++;
2081     // set coefficient to 10^33
2082     coeff.w[1] = 0x0000314dc6448d93ull;
2083     coeff.w[0] = 0x38c15b0a00000000ull;
2084   }
2085 
2086   pres->w[0] = coeff.w[0];
2087   tmp = expon;
2088   tmp <<= 49;
2089   pres->w[1] = sgn | tmp | coeff.w[1];
2090 
2091   return pres;
2092 }
2093 
2094 //
2095 //   General BID128 pack macro
2096 //
2097 __BID_INLINE__ BID_UINT128 *
bid_get_BID128(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff,unsigned * prounding_mode,unsigned * fpsc)2098 bid_get_BID128 (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 coeff,
2099 	    unsigned *prounding_mode, unsigned *fpsc) {
2100   BID_UINT128 T;
2101   BID_UINT64 tmp, tmp2;
2102 
2103   // coeff==10^34?
2104   if (coeff.w[1] == 0x0001ed09bead87c0ull
2105       && coeff.w[0] == 0x378d8e6400000000ull) {
2106     expon++;
2107     // set coefficient to 10^33
2108     coeff.w[1] = 0x0000314dc6448d93ull;
2109     coeff.w[0] = 0x38c15b0a00000000ull;
2110   }
2111   // check OF, UF
2112   if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
2113     // check UF
2114     if (expon < 0) {
2115       return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
2116 			    fpsc);
2117     }
2118 
2119     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
2120       T = bid_power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
2121       while (__unsigned_compare_gt_128 (T, coeff)
2122 	     && expon > DECIMAL_MAX_EXPON_128) {
2123 	coeff.w[1] =
2124 	  (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2125 	  (coeff.w[0] >> 63);
2126 	tmp2 = coeff.w[0] << 3;
2127 	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2128 	if (coeff.w[0] < tmp2)
2129 	  coeff.w[1]++;
2130 
2131 	expon--;
2132       }
2133     }
2134     if (expon > DECIMAL_MAX_EXPON_128) {
2135       if (!(coeff.w[1] | coeff.w[0])) {
2136 	pres->w[1] = sgn | (((BID_UINT64) DECIMAL_MAX_EXPON_128) << 49);
2137 	pres->w[0] = 0;
2138 	return pres;
2139       }
2140       // OF
2141 #ifdef BID_SET_STATUS_FLAGS
2142       __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2143 #endif
2144 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2145 #ifndef IEEE_ROUND_NEAREST
2146       if (*prounding_mode == BID_ROUNDING_TO_ZERO
2147 	  || (sgn && *prounding_mode == BID_ROUNDING_UP) || (!sgn
2148 							 &&
2149 							 *prounding_mode
2150 							 ==
2151 							 BID_ROUNDING_DOWN))
2152       {
2153 	pres->w[1] = sgn | LARGEST_BID128_HIGH;
2154 	pres->w[0] = LARGEST_BID128_LOW;
2155       } else
2156 #endif
2157 #endif
2158       {
2159 	pres->w[1] = sgn | INFINITY_MASK64;
2160 	pres->w[0] = 0;
2161       }
2162       return pres;
2163     }
2164   }
2165 
2166   pres->w[0] = coeff.w[0];
2167   tmp = expon;
2168   tmp <<= 49;
2169   pres->w[1] = sgn | tmp | coeff.w[1];
2170 
2171   return pres;
2172 }
2173 
2174 
2175 //
2176 //  Macro used for conversions from string
2177 //        (no additional arguments given for rounding mode, status flags)
2178 //
2179 __BID_INLINE__ BID_UINT128 *
bid_get_BID128_string(BID_UINT128 * pres,BID_UINT64 sgn,int expon,BID_UINT128 coeff)2180 bid_get_BID128_string (BID_UINT128 * pres, BID_UINT64 sgn, int expon, BID_UINT128 coeff) {
2181   BID_UINT128 D2, D8;
2182   BID_UINT64 tmp;
2183   unsigned rmode = 0, status;
2184 
2185   // coeff==10^34?
2186   if (coeff.w[1] == 0x0001ed09bead87c0ull
2187       && coeff.w[0] == 0x378d8e6400000000ull) {
2188     expon++;
2189     // set coefficient to 10^33
2190     coeff.w[1] = 0x0000314dc6448d93ull;
2191     coeff.w[0] = 0x38c15b0a00000000ull;
2192   }
2193   // check OF, UF
2194   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2195     // check UF
2196     if (expon < 0)
2197       return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2198 
2199     // OF
2200 
2201     if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2202       while (expon > DECIMAL_MAX_EXPON_128 &&
2203 	     (coeff.w[1] < bid_power10_table_128[33].w[1] ||
2204 	      (coeff.w[1] == bid_power10_table_128[33].w[1]
2205 	       && coeff.w[0] < bid_power10_table_128[33].w[0]))) {
2206 	D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2207 	D2.w[0] = coeff.w[0] << 1;
2208 	D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2209 	D8.w[0] = coeff.w[0] << 3;
2210 
2211 	__add_128_128 (coeff, D2, D8);
2212 	expon--;
2213       }
2214     } else if (!(coeff.w[0] | coeff.w[1]))
2215       expon = DECIMAL_MAX_EXPON_128;
2216 
2217     if (expon > DECIMAL_MAX_EXPON_128) {
2218       pres->w[1] = sgn | INFINITY_MASK64;
2219       pres->w[0] = 0;
2220       switch (rmode) {
2221       case BID_ROUNDING_DOWN:
2222 	if (!sgn) {
2223 	  pres->w[1] = LARGEST_BID128_HIGH;
2224 	  pres->w[0] = LARGEST_BID128_LOW;
2225 	}
2226 	break;
2227       case BID_ROUNDING_TO_ZERO:
2228 	pres->w[1] = sgn | LARGEST_BID128_HIGH;
2229 	pres->w[0] = LARGEST_BID128_LOW;
2230 	break;
2231       case BID_ROUNDING_UP:
2232 	// round up
2233 	if (sgn) {
2234 	  pres->w[1] = sgn | LARGEST_BID128_HIGH;
2235 	  pres->w[0] = LARGEST_BID128_LOW;
2236 	}
2237 	break;
2238       }
2239 
2240       return pres;
2241     }
2242   }
2243 
2244   pres->w[0] = coeff.w[0];
2245   tmp = expon;
2246   tmp <<= 49;
2247   pres->w[1] = sgn | tmp | coeff.w[1];
2248 
2249   return pres;
2250 }
2251 
2252 
2253 
2254 /*****************************************************************************
2255 *
2256 *    BID32 pack/unpack macros
2257 *
2258 *****************************************************************************/
2259 
2260 
2261 __BID_INLINE__ BID_UINT32
unpack_BID32(BID_UINT32 * psign_x,int * pexponent_x,BID_UINT32 * pcoefficient_x,BID_UINT32 x)2262 unpack_BID32 (BID_UINT32 * psign_x, int *pexponent_x,
2263 	      BID_UINT32 * pcoefficient_x, BID_UINT32 x) {
2264   BID_UINT32 tmp;
2265 
2266   *psign_x = x & 0x80000000;
2267 
2268   if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2269     // special encodings
2270     if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2271       *pcoefficient_x = x & 0xfe0fffff;
2272       if ((x & 0x000fffff) >= 1000000)
2273 	*pcoefficient_x = x & 0xfe000000;
2274       if ((x & NAN_MASK32) == INFINITY_MASK32)
2275 	*pcoefficient_x = x & 0xf8000000;
2276       *pexponent_x = 0;
2277       return 0;	// NaN or Infinity
2278     }
2279     // coefficient
2280     *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2281     // check for non-canonical value
2282     if (*pcoefficient_x >= 10000000)
2283       *pcoefficient_x = 0;
2284     // get exponent
2285     tmp = x >> 21;
2286     *pexponent_x = tmp & EXPONENT_MASK32;
2287     return *pcoefficient_x;
2288   }
2289   // exponent
2290   tmp = x >> 23;
2291   *pexponent_x = tmp & EXPONENT_MASK32;
2292   // coefficient
2293   *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2294 
2295   return *pcoefficient_x;
2296 }
2297 
2298 //
2299 //   General pack macro for BID32
2300 //
2301 __BID_INLINE__ BID_UINT32
get_BID32(BID_UINT32 sgn,int expon,BID_UINT64 coeff,int rmode,unsigned * fpsc)2302 get_BID32 (BID_UINT32 sgn, int expon, BID_UINT64 coeff, int rmode,
2303 	   unsigned *fpsc) {
2304   BID_UINT128 Q;
2305   BID_UINT64 _C64, remainder_h, carry, Stemp;
2306   BID_UINT32 r, mask;
2307   int extra_digits, amount, amount2;
2308   unsigned status;
2309 
2310   if (coeff > 9999999ull) {
2311     expon++;
2312     coeff = 1000000ull;
2313   }
2314   // check for possible underflow/overflow
2315   if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2316     if (expon < 0) {
2317       // underflow
2318       if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2319 #ifdef BID_SET_STATUS_FLAGS
2320 	__set_status_flags (fpsc,
2321 			    BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2322 #endif
2323 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2324 #ifndef IEEE_ROUND_NEAREST
2325 	if (rmode == BID_ROUNDING_DOWN && sgn)
2326 	  return 0x80000001;
2327 	if (rmode == BID_ROUNDING_UP && !sgn)
2328 	  return 1;
2329 #endif
2330 #endif
2331 	// result is 0
2332 	return sgn;
2333       }
2334       // get digits to be shifted out
2335 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2336       rmode = 0;
2337 #endif
2338 #ifdef IEEE_ROUND_NEAREST
2339       rmode = 0;
2340 #endif
2341 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2342 #ifndef IEEE_ROUND_NEAREST
2343       if (sgn && (unsigned) (rmode - 1) < 2)
2344 	rmode = 3 - rmode;
2345 #endif
2346 #endif
2347 
2348       extra_digits = -expon;
2349       coeff += bid_round_const_table[rmode][extra_digits];
2350 
2351       // get coeff*(2^M[extra_digits])/10^extra_digits
2352       __mul_64x64_to_128 (Q, coeff, bid_reciprocals10_64[extra_digits]);
2353 
2354       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2355       amount = bid_short_recip_scale[extra_digits];
2356 
2357       _C64 = Q.w[1] >> amount;
2358 
2359 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2360 #ifndef IEEE_ROUND_NEAREST
2361       if (rmode == 0)	//BID_ROUNDING_TO_NEAREST
2362 #endif
2363 	if (_C64 & 1) {
2364 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
2365 
2366 	  // get remainder
2367 	  amount2 = 64 - amount;
2368 	  remainder_h = 0;
2369 	  remainder_h--;
2370 	  remainder_h >>= amount2;
2371 	  remainder_h = remainder_h & Q.w[1];
2372 
2373 	  if (!remainder_h && (Q.w[0] < bid_reciprocals10_64[extra_digits])) {
2374 	    _C64--;
2375 	  }
2376 	}
2377 #endif
2378 
2379 #ifdef BID_SET_STATUS_FLAGS
2380 
2381       if (is_inexact (fpsc))
2382 	__set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);
2383       else {
2384 	status = BID_INEXACT_EXCEPTION;
2385 	// get remainder
2386 	remainder_h = Q.w[1] << (64 - amount);
2387 
2388 	switch (rmode) {
2389 	case BID_ROUNDING_TO_NEAREST:
2390 	case BID_ROUNDING_TIES_AWAY:
2391 	  // test whether fractional part is 0
2392 	  if (remainder_h == 0x8000000000000000ull
2393 	      && (Q.w[0] < bid_reciprocals10_64[extra_digits]))
2394 	    status = BID_EXACT_STATUS;
2395 	  break;
2396 	case BID_ROUNDING_DOWN:
2397 	case BID_ROUNDING_TO_ZERO:
2398 	  if (!remainder_h && (Q.w[0] < bid_reciprocals10_64[extra_digits]))
2399 	    status = BID_EXACT_STATUS;
2400 	  break;
2401 	default:
2402 	  // round up
2403 	  __add_carry_out (Stemp, carry, Q.w[0],
2404 			   bid_reciprocals10_64[extra_digits]);
2405 	  if ((remainder_h >> (64 - amount)) + carry >=
2406 	      (((BID_UINT64) 1) << amount))
2407 	    status = BID_EXACT_STATUS;
2408 	}
2409 
2410 	if (status != BID_EXACT_STATUS)
2411 	  __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION | status);
2412       }
2413 
2414 #endif
2415 
2416       return sgn | (BID_UINT32) _C64;
2417     }
2418 
2419     if(!coeff)  { if(expon > DECIMAL_MAX_EXPON_32) expon = DECIMAL_MAX_EXPON_32; }
2420     while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2421       coeff = (coeff << 3) + (coeff << 1);
2422       expon--;
2423     }
2424     if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2425       __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2426       // overflow
2427       r = sgn | INFINITY_MASK32;
2428       switch (rmode) {
2429       case BID_ROUNDING_DOWN:
2430 	if (!sgn)
2431 	  r = LARGEST_BID32;
2432 	break;
2433       case BID_ROUNDING_TO_ZERO:
2434 	r = sgn | LARGEST_BID32;
2435 	break;
2436       case BID_ROUNDING_UP:
2437 	// round up
2438 	if (sgn)
2439 	  r = sgn | LARGEST_BID32;
2440       }
2441       return r;
2442     }
2443   }
2444 
2445   mask = 1 << 23;
2446 
2447   // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2448   if (coeff < mask) {
2449     r = expon;
2450     r <<= 23;
2451     r |= ((BID_UINT32) coeff | sgn);
2452     return r;
2453   }
2454   // special format
2455 
2456   r = expon;
2457   r <<= 21;
2458   r |= (sgn | SPECIAL_ENCODING_MASK32);
2459   // add coeff, without leading bits
2460   mask = (1 << 21) - 1;
2461   r |= (((BID_UINT32) coeff) & mask);
2462 
2463   return r;
2464 }
2465 
2466 
2467 
2468 
2469 
2470 //
2471 //   General pack macro for BID32
2472 //
2473 __BID_INLINE__ BID_UINT32
get_BID32_UF(BID_UINT32 sgn,int expon,BID_UINT64 coeff,BID_UINT32 R,int rmode,unsigned * fpsc)2474 get_BID32_UF (BID_UINT32 sgn, int expon, BID_UINT64 coeff, BID_UINT32 R, int rmode,
2475 	   unsigned *fpsc) {
2476   BID_UINT128 Q;
2477   BID_UINT64 _C64, remainder_h, carry, Stemp;
2478   BID_UINT32 r, mask;
2479   int extra_digits, amount, amount2;
2480   unsigned status;
2481 
2482   if (coeff > 9999999ull) {
2483     expon++;
2484     coeff = 1000000ull;
2485   }
2486   // check for possible underflow/overflow
2487   if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2488    if (expon < 0) {
2489       // underflow
2490       if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2491 #ifdef BID_SET_STATUS_FLAGS
2492 	__set_status_flags (fpsc,
2493 			    BID_UNDERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2494 #endif
2495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2496 #ifndef IEEE_ROUND_NEAREST
2497 	if (rmode == BID_ROUNDING_DOWN && sgn)
2498 	  return 0x80000001;
2499 	if (rmode == BID_ROUNDING_UP && !sgn)
2500 	  return 1;
2501 #endif
2502 #endif
2503 	// result is 0
2504 	return sgn;
2505       }
2506       // get digits to be shifted out
2507 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2508       rmode = 0;
2509 #endif
2510 #ifdef IEEE_ROUND_NEAREST
2511       rmode = 0;
2512 #endif
2513 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2514 #ifndef IEEE_ROUND_NEAREST
2515       if (sgn && (unsigned) (rmode - 1) < 2)
2516 	rmode = 3 - rmode;
2517 #endif
2518 #endif
2519 
2520   // 10*coeff
2521   coeff = (coeff << 3) + (coeff << 1);
2522   if (R)
2523     coeff |= 1;
2524 
2525       extra_digits = 1-expon;
2526       coeff += bid_round_const_table[rmode][extra_digits];
2527 
2528       // get coeff*(2^M[extra_digits])/10^extra_digits
2529       __mul_64x64_to_128 (Q, coeff, bid_reciprocals10_64[extra_digits]);
2530 
2531       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2532       amount = bid_short_recip_scale[extra_digits];
2533 
2534       _C64 = Q.w[1] >> amount;
2535 
2536 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2537 #ifndef IEEE_ROUND_NEAREST
2538       if (rmode == 0)	//BID_ROUNDING_TO_NEAREST
2539 #endif
2540 	if (_C64 & 1) {
2541 	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
2542 
2543 	  // get remainder
2544 	  amount2 = 64 - amount;
2545 	  remainder_h = 0;
2546 	  remainder_h--;
2547 	  remainder_h >>= amount2;
2548 	  remainder_h = remainder_h & Q.w[1];
2549 
2550 	  if (!remainder_h && (Q.w[0] < bid_reciprocals10_64[extra_digits])) {
2551 	    _C64--;
2552 	  }
2553 	}
2554 #endif
2555 
2556 #ifdef BID_SET_STATUS_FLAGS
2557 	  if (is_inexact (fpsc)){
2558 			  __set_status_flags (fpsc, BID_UNDERFLOW_EXCEPTION);}
2559       else {
2560 	status = BID_INEXACT_EXCEPTION;
2561 	// get remainder
2562 	remainder_h = Q.w[1] << (64 - amount);
2563 
2564 	switch (rmode) {
2565 	case BID_ROUNDING_TO_NEAREST:
2566 	case BID_ROUNDING_TIES_AWAY:
2567 	  // test whether fractional part is 0
2568 	  if (remainder_h == 0x8000000000000000ull
2569 	      && (Q.w[0] < bid_reciprocals10_64[extra_digits]))
2570 	    status = BID_EXACT_STATUS;
2571 	  break;
2572 	case BID_ROUNDING_DOWN:
2573 	case BID_ROUNDING_TO_ZERO:
2574 	  if (!remainder_h && (Q.w[0] < bid_reciprocals10_64[extra_digits]))
2575 	    status = BID_EXACT_STATUS;
2576 	  break;
2577 	default:
2578 	  // round up
2579 	  __add_carry_out (Stemp, carry, Q.w[0],
2580 			   bid_reciprocals10_64[extra_digits]);
2581 	  if ((remainder_h >> (64 - amount)) + carry >=
2582 	      (((BID_UINT64) 1) << amount))
2583 	    status = BID_EXACT_STATUS;
2584 	}
2585 
2586 	if (status != BID_EXACT_STATUS) {
2587 	  __set_status_flags (fpsc,  BID_UNDERFLOW_EXCEPTION|status);
2588 	}
2589  }
2590 #endif
2591 
2592       return sgn | (BID_UINT32) _C64;
2593     }
2594 
2595     while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2596       coeff = (coeff << 3) + (coeff << 1);
2597       expon--;
2598     }
2599     if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2600       __set_status_flags (fpsc, BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION);
2601       // overflow
2602       r = sgn | INFINITY_MASK32;
2603       switch (rmode) {
2604       case BID_ROUNDING_DOWN:
2605 	if (!sgn)
2606 	  r = LARGEST_BID32;
2607 	break;
2608       case BID_ROUNDING_TO_ZERO:
2609 	r = sgn | LARGEST_BID32;
2610 	break;
2611       case BID_ROUNDING_UP:
2612 	// round up
2613 	if (sgn)
2614 	  r = sgn | LARGEST_BID32;
2615       }
2616       return r;
2617     }
2618   }
2619 
2620   mask = 1 << 23;
2621 
2622   // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2623   if (coeff < mask) {
2624     r = expon;
2625     r <<= 23;
2626     r |= ((BID_UINT32) coeff | sgn);
2627     return r;
2628   }
2629   // special format
2630 
2631   r = expon;
2632   r <<= 21;
2633   r |= (sgn | SPECIAL_ENCODING_MASK32);
2634   // add coeff, without leading bits
2635   mask = (1 << 21) - 1;
2636   r |= (((BID_UINT32) coeff) & mask);
2637 
2638   return r;
2639 }
2640 
2641 
2642 
2643 
2644 
2645 
2646 //
2647 //   no overflow/underflow checks
2648 //
2649 __BID_INLINE__ BID_UINT32
very_fast_get_BID32(BID_UINT32 sgn,int expon,BID_UINT32 coeff)2650 very_fast_get_BID32 (BID_UINT32 sgn, int expon, BID_UINT32 coeff) {
2651   BID_UINT32 r, mask;
2652 
2653   mask = 1 << 23;
2654 
2655   // check whether coefficient fits in 10*2+3 bits
2656   if (coeff < mask) {
2657     r = expon;
2658     r <<= 23;
2659     r |= (coeff | sgn);
2660     return r;
2661   }
2662   // special format
2663   r = expon;
2664   r <<= 21;
2665   r |= (sgn | SPECIAL_ENCODING_MASK32);
2666   // add coeff, without leading bits
2667   mask = (1 << 21) - 1;
2668   coeff &= mask;
2669   r |= coeff;
2670 
2671   return r;
2672 }
2673 
2674 __BID_INLINE__ BID_UINT32
fast_get_BID32(BID_UINT32 sgn,int expon,BID_UINT32 coeff)2675 fast_get_BID32 (BID_UINT32 sgn, int expon, BID_UINT32 coeff) {
2676   BID_UINT32 r, mask;
2677 
2678   mask = 1 << 23;
2679 
2680   if (coeff > 9999999ul) {
2681     expon++;
2682     coeff = 1000000ul;
2683   }
2684   // check whether coefficient fits in 10*2+3 bits
2685   if (coeff < mask) {
2686     r = expon;
2687     r <<= 23;
2688     r |= (coeff | sgn);
2689     return r;
2690   }
2691   // special format
2692   r = expon;
2693   r <<= 21;
2694   r |= (sgn | SPECIAL_ENCODING_MASK32);
2695   // add coeff, without leading bits
2696   mask = (1 << 21) - 1;
2697   coeff &= mask;
2698   r |= coeff;
2699 
2700   return r;
2701 }
2702 
2703 
2704 
2705 /*************************************************************
2706  *
2707  *************************************************************/
2708 typedef struct BID_ALIGN (16)
2709      {
2710        BID_UINT64 w[6];
2711      } BID_UINT384;
2712      typedef struct BID_ALIGN (16)
2713      {
2714        BID_UINT64 w[8];
2715      } BID_UINT512;
2716 
2717 // #define P                               34
2718 #define MASK_STEERING_BITS              0x6000000000000000ull
2719 #define MASK_BINARY_EXPONENT1           0x7fe0000000000000ull
2720 #define MASK_BINARY_SIG1                0x001fffffffffffffull
2721 #define MASK_BINARY_EXPONENT2           0x1ff8000000000000ull
2722     //used to take G[2:w+3] (sec 3.3)
2723 #define MASK_BINARY_SIG2                0x0007ffffffffffffull
2724     //used to mask out G4:T0 (sec 3.3)
2725 #define MASK_BINARY_OR2                 0x0020000000000000ull
2726     //used to prefix 8+G4 to T (sec 3.3)
2727 #define UPPER_EXPON_LIMIT               51
2728 #define MASK_EXP                        0x7ffe000000000000ull
2729 #define MASK_EXP2                       0x1fff800000000000ull
2730 #define MASK_SPECIAL                    0x7800000000000000ull
2731 #define MASK_NAN                        0x7c00000000000000ull
2732 #define MASK_SNAN                       0x7e00000000000000ull
2733 #define MASK_ANY_INF                    0x7c00000000000000ull
2734 #define MASK_INF                        0x7800000000000000ull
2735 #define MASK_SIGN                       0x8000000000000000ull
2736 #define MASK_COEFF                      0x0001ffffffffffffull
2737 #define BIN_EXP_BIAS                    (0x1820ull << 49)
2738 
2739 #define EXP_MIN32                       0x00000000
2740 #define EXP_MAX32                       0x5f800000
2741 #define EXP_MIN                         0x0000000000000000ull
2742    // EXP_MIN = (-6176 + 6176) << 49
2743 #define EXP_MAX                         0x5ffe000000000000ull
2744   // EXP_MAX = (6111 + 6176) << 49
2745 #define EXP_MAX_P1                      0x6000000000000000ull
2746   // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2747 #define EXP_P1                          0x0002000000000000ull
2748   // EXP_ P1= 1 << 49
2749 #define expmin                            -6176
2750   // min unbiased exponent
2751 #define expmax                            6111
2752   // max unbiased exponent
2753 #define expmin16                          -398
2754   // min unbiased exponent
2755 #define expmax16                          369
2756   // max unbiased exponent
2757 #define expmin7                           -101
2758   // min unbiased exponent
2759 #define expmax7                           90
2760   // max unbiased exponent
2761 
2762 #define MASK_INF32                        0x78000000
2763 #define MASK_ANY_INF32                    0x7c000000
2764 #define MASK_SIGN32                       0x80000000
2765 #define MASK_NAN32                        0x7c000000
2766 #define MASK_SNAN32                       0x7e000000
2767 #define SIGNMASK32                        0x80000000
2768 #define BID32_SIG_MAX                     0x0098967f
2769 #define BID64_SIG_MAX                     0x002386F26FC0ffffull
2770 #define SIGNMASK64                        0x8000000000000000ull
2771 #define MASK_STEERING_BITS32              0x60000000
2772 #define MASK_BINARY_EXPONENT1_32          0x7f800000
2773 #define MASK_BINARY_SIG1_32               0x007fffff
2774 #define MASK_BINARY_EXPONENT2_32          0x1fe00000
2775     //used to take G[2:w+3] (sec 3.3)
2776 #define MASK_BINARY_SIG2_32               0x001fffff
2777     //used to mask out G4:T0 (sec 3.3)
2778 #define MASK_BINARY_OR2_32                0x00800000
2779 #define MASK_SPECIAL32                    0x78000000
2780 
2781 // typedef unsigned int BID_FPSC; // floating-point status and control
2782 	// bit31:
2783 	// bit30:
2784 	// bit29:
2785 	// bit28:
2786 	// bit27:
2787 	// bit26:
2788 	// bit25:
2789 	// bit24:
2790 	// bit23:
2791 	// bit22:
2792 	// bit21:
2793 	// bit20:
2794 	// bit19:
2795 	// bit18:
2796 	// bit17:
2797 	// bit16:
2798 	// bit15:
2799 	// bit14: RC:2
2800 	// bit13: RC:1
2801 	// bit12: RC:0
2802 	// bit11: PM
2803 	// bit10: UM
2804 	// bit9:  OM
2805 	// bit8:  ZM
2806 	// bit7:  DM
2807 	// bit6:  IM
2808 	// bit5:  PE
2809 	// bit4:  UE
2810 	// bit3:  OE
2811 	// bit2:  ZE
2812 	// bit1:  DE
2813 	// bit0:  IE
2814 
2815 #define ROUNDING_BID_MODE_MASK	0x00007000
2816 
2817      typedef struct _DEC_DIGITS {
2818        unsigned int digits;
2819        BID_UINT64 threshold_hi;
2820        BID_UINT64 threshold_lo;
2821        unsigned int digits1;
2822      } DEC_DIGITS;
2823 
2824      BID_EXTERN_C DEC_DIGITS bid_nr_digits[];
2825      BID_EXTERN_C BID_UINT64 bid_midpoint64[];
2826      BID_EXTERN_C BID_UINT128 bid_midpoint128[];
2827      BID_EXTERN_C BID_UINT192 bid_midpoint192[];
2828      BID_EXTERN_C BID_UINT256 bid_midpoint256[];
2829      BID_EXTERN_C BID_UINT64 bid_ten2k64[];
2830      BID_EXTERN_C BID_UINT128 bid_ten2k128[];
2831      BID_EXTERN_C BID_UINT256 bid_ten2k256[];
2832      BID_EXTERN_C BID_UINT128 bid_ten2mk128[];
2833      BID_EXTERN_C BID_UINT64 bid_ten2mk64[];
2834      BID_EXTERN_C BID_UINT128 bid_ten2mk128trunc[];
2835      BID_EXTERN_C int bid_shiftright128[];
2836      BID_EXTERN_C BID_UINT64 bid_maskhigh128[];
2837      BID_EXTERN_C BID_UINT64 bid_maskhigh128M[];
2838      BID_EXTERN_C BID_UINT64 bid_maskhigh192M[];
2839      BID_EXTERN_C BID_UINT64 bid_maskhigh256M[];
2840      BID_EXTERN_C BID_UINT64 bid_onehalf128[];
2841      BID_EXTERN_C BID_UINT64 bid_onehalf128M[];
2842      BID_EXTERN_C BID_UINT64 bid_onehalf192M[];
2843      BID_EXTERN_C BID_UINT64 bid_onehalf256M[];
2844      BID_EXTERN_C BID_UINT128 bid_ten2mk128M[];
2845      BID_EXTERN_C BID_UINT128 bid_ten2mk128truncM[];
2846      BID_EXTERN_C BID_UINT192 bid_ten2mk192truncM[];
2847      BID_EXTERN_C BID_UINT256 bid_ten2mk256truncM[];
2848      BID_EXTERN_C int bid_shiftright128M[];
2849      BID_EXTERN_C int bid_shiftright192M[];
2850      BID_EXTERN_C int bid_shiftright256M[];
2851      BID_EXTERN_C BID_UINT192 bid_ten2mk192M[];
2852      BID_EXTERN_C BID_UINT256 bid_ten2mk256M[];
2853      BID_EXTERN_C unsigned char bid_char_table2[];
2854      BID_EXTERN_C unsigned char bid_char_table3[];
2855 
2856      BID_EXTERN_C BID_UINT64 bid_ten2m3k64[];
2857      BID_EXTERN_C unsigned int bid_shift_ten2m3k64[];
2858      BID_EXTERN_C BID_UINT128 bid_ten2m3k128[];
2859      BID_EXTERN_C unsigned int bid_shift_ten2m3k128[];
2860 
2861 
2862 
2863 /***************************************************************************
2864  *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2865  ***************************************************************************/
2866 
2867      BID_EXTERN_C BID_UINT64 bid_Kx64[];
2868      BID_EXTERN_C unsigned int bid_Ex64m64[];
2869      BID_EXTERN_C BID_UINT64 bid_half64[];
2870      BID_EXTERN_C BID_UINT64 bid_mask64[];
2871      BID_EXTERN_C BID_UINT64 bid_ten2mxtrunc64[];
2872 
2873      BID_EXTERN_C BID_UINT128 bid_Kx128[];
2874      BID_EXTERN_C unsigned int bid_Ex128m128[];
2875      BID_EXTERN_C BID_UINT64 bid_half128[];
2876      BID_EXTERN_C BID_UINT64 bid_mask128[];
2877      BID_EXTERN_C BID_UINT128 bid_ten2mxtrunc128[];
2878 
2879      BID_EXTERN_C BID_UINT192 bid_Kx192[];
2880      BID_EXTERN_C unsigned int bid_Ex192m192[];
2881      BID_EXTERN_C BID_UINT64 bid_half192[];
2882      BID_EXTERN_C BID_UINT64 bid_mask192[];
2883      BID_EXTERN_C BID_UINT192 bid_ten2mxtrunc192[];
2884 
2885      BID_EXTERN_C BID_UINT256 bid_Kx256[];
2886      BID_EXTERN_C unsigned int bid_Ex256m256[];
2887      BID_EXTERN_C BID_UINT64 bid_half256[];
2888      BID_EXTERN_C BID_UINT64 bid_mask256[];
2889      BID_EXTERN_C BID_UINT256 bid_ten2mxtrunc256[];
2890 
2891      typedef union BID_ALIGN (16) __bid64_128 {
2892        BID_UINT64 b64;
2893        BID_UINT128 b128;
2894      } BID64_128;
2895 
2896      BID64_128 bid_fma (unsigned int P0,
2897 			BID64_128 x1, unsigned int P1,
2898 			BID64_128 y1, unsigned int P2,
2899 			BID64_128 z1, unsigned int P3,
2900 			unsigned int rnd_mode, BID_FPSC * fpsc);
2901 
2902 #define         P7      7
2903 #define         P16     16
2904 #define         P34     34
2905 
2906      union __int_double {
2907        BID_UINT64 i;
2908        double d;
2909      };
2910      typedef union __int_double int_double;
2911 
2912 
2913      union __int_float {
2914        BID_UINT32 i;
2915        float d;
2916      };
2917      typedef union __int_float int_float;
2918 
2919 #define SWAP(A,B,T) {\
2920         T = A; \
2921         A = B; \
2922         B = T; \
2923 }
2924 
2925 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2926 // ie it knows that it is A bits long
2927 #define NUMBITS(A, coefficient_x, tempx){\
2928       temp_x.d=(float)coefficient_x;\
2929       A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2930 }
2931 
2932 enum class_types {
2933   signalingNaN,
2934   quietNaN,
2935   negativeInfinity,
2936   negativeNormal,
2937   negativeSubnormal,
2938   negativeZero,
2939   positiveZero,
2940   positiveSubnormal,
2941   positiveNormal,
2942   positiveInfinity
2943 };
2944 
2945 typedef union {
2946   BID_UINT32 ui32;
2947   float f;
2948 } BID_UI32FLOAT;
2949 
2950 typedef union {
2951   BID_UINT64 ui64;
2952   double d;
2953 } BID_UI64DOUBLE;
2954 
2955 
2956 //////////////////////////////////////////////////
2957 // Macros for raising exceptions / setting flags
2958 /////////////////////////////////////////////////
2959 
2960 #define bid_raise_except(x)  __set_status_flags (pfpsf, (x))
2961 #define get_bid_sw()         (*pfpsf)
2962 #define set_bid_sw(x)        (*pfpsf) = (x)
2963 
2964 #endif
2965