1 /* Copyright (C) 2007-2014 Free Software Foundation, Inc.
2 
3 This file is part of GCC.
4 
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9 
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 for more details.
14 
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18 
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22 <http://www.gnu.org/licenses/>.  */
23 
24 /*****************************************************************************
25  *
26  *  BID128 fma   x * y + z
27  *
28  ****************************************************************************/
29 
30 #include "bid_internal.h"
31 
32 static void
rounding_correction(unsigned int rnd_mode,unsigned int is_inexact_lt_midpoint,unsigned int is_inexact_gt_midpoint,unsigned int is_midpoint_lt_even,unsigned int is_midpoint_gt_even,int unbexp,UINT128 * ptrres,_IDEC_flags * ptrfpsf)33 rounding_correction (unsigned int rnd_mode,
34         	     unsigned int is_inexact_lt_midpoint,
35         	     unsigned int is_inexact_gt_midpoint,
36         	     unsigned int is_midpoint_lt_even,
37         	     unsigned int is_midpoint_gt_even,
38         	     int unbexp,
39         	     UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
40   // unbiased true exponent unbexp may be larger than emax
41 
42   UINT128 res = *ptrres; // expected to have the correct sign and coefficient
43   // (the exponent field is ignored, as unbexp is used instead)
44   UINT64 sign, exp;
45   UINT64 C_hi, C_lo;
46 
47   // general correction from RN to RA, RM, RP, RZ
48   // Note: if the result is negative, then is_inexact_lt_midpoint,
49   // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
50   // have to be considered as if determined for the absolute value of the
51   // result (so they seem to be reversed)
52 
53   if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
54       is_midpoint_lt_even || is_midpoint_gt_even) {
55     *ptrfpsf |= INEXACT_EXCEPTION;
56   }
57   // apply correction to result calculated with unbounded exponent
58   sign = res.w[1] & MASK_SIGN;
59   exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
60   C_hi = res.w[1] & MASK_COEFF;
61   C_lo = res.w[0];
62   if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
63       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
64       is_midpoint_gt_even))) ||
65       (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
66       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
67       is_midpoint_gt_even)))) {
68     // C = C + 1
69     C_lo = C_lo + 1;
70     if (C_lo == 0)
71       C_hi = C_hi + 1;
72     if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
73       // C = 10^34 => rounding overflow
74       C_hi = 0x0000314dc6448d93ull;
75       C_lo = 0x38c15b0a00000000ull; // 10^33
76       // exp = exp + EXP_P1;
77       unbexp = unbexp + 1;
78       exp = (UINT64) (unbexp + 6176) << 49;
79     }
80   } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
81       ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
82       (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
83     // C = C - 1
84     C_lo = C_lo - 1;
85     if (C_lo == 0xffffffffffffffffull)
86       C_hi--;
87     // check if we crossed into the lower decade
88     if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
89       // C = 10^33 - 1
90       if (exp > 0) {
91         C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
92         C_lo = 0x378d8e63ffffffffull;
93         // exp = exp - EXP_P1;
94         unbexp = unbexp - 1;
95         exp = (UINT64) (unbexp + 6176) << 49;
96       } else { // if exp = 0
97         if (is_midpoint_lt_even || is_midpoint_lt_even ||
98             is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact
99           *ptrfpsf |= UNDERFLOW_EXCEPTION;
100       }
101     }
102   } else {
103     ; // the result is already correct
104   }
105   if (unbexp > expmax) { // 6111
106     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
107     exp = 0;
108     if (!sign) { // result is positive
109       if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
110         C_hi = 0x7800000000000000ull;
111         C_lo = 0x0000000000000000ull;
112       } else { // res = +MAXFP = (10^34-1) * 10^emax
113         C_hi = 0x5fffed09bead87c0ull;
114         C_lo = 0x378d8e63ffffffffull;
115       }
116     } else { // result is negative
117       if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
118         C_hi = 0xf800000000000000ull;
119         C_lo = 0x0000000000000000ull;
120       } else { // res = -MAXFP = -(10^34-1) * 10^emax
121         C_hi = 0xdfffed09bead87c0ull;
122         C_lo = 0x378d8e63ffffffffull;
123       }
124     }
125   }
126   // assemble the result
127   res.w[1] = sign | exp | C_hi;
128   res.w[0] = C_lo;
129   *ptrres = res;
130 }
131 
132 static void
add256(UINT256 x,UINT256 y,UINT256 * pz)133 add256 (UINT256 x, UINT256 y, UINT256 * pz) {
134   // *z = x + yl assume the sum fits in 256 bits
135   UINT256 z;
136   z.w[0] = x.w[0] + y.w[0];
137   if (z.w[0] < x.w[0]) {
138     x.w[1]++;
139     if (x.w[1] == 0x0000000000000000ull) {
140       x.w[2]++;
141       if (x.w[2] == 0x0000000000000000ull) {
142         x.w[3]++;
143       }
144     }
145   }
146   z.w[1] = x.w[1] + y.w[1];
147   if (z.w[1] < x.w[1]) {
148     x.w[2]++;
149     if (x.w[2] == 0x0000000000000000ull) {
150       x.w[3]++;
151     }
152   }
153   z.w[2] = x.w[2] + y.w[2];
154   if (z.w[2] < x.w[2]) {
155     x.w[3]++;
156   }
157   z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
158   *pz = z;
159 }
160 
161 static void
sub256(UINT256 x,UINT256 y,UINT256 * pz)162 sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
163   // *z = x - y; assume x >= y
164   UINT256 z;
165   z.w[0] = x.w[0] - y.w[0];
166   if (z.w[0] > x.w[0]) {
167     x.w[1]--;
168     if (x.w[1] == 0xffffffffffffffffull) {
169       x.w[2]--;
170       if (x.w[2] == 0xffffffffffffffffull) {
171         x.w[3]--;
172       }
173     }
174   }
175   z.w[1] = x.w[1] - y.w[1];
176   if (z.w[1] > x.w[1]) {
177     x.w[2]--;
178     if (x.w[2] == 0xffffffffffffffffull) {
179       x.w[3]--;
180     }
181   }
182   z.w[2] = x.w[2] - y.w[2];
183   if (z.w[2] > x.w[2]) {
184     x.w[3]--;
185   }
186   z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
187   *pz = z;
188 }
189 
190 
191 static int
nr_digits256(UINT256 R256)192 nr_digits256 (UINT256 R256) {
193   int ind;
194   // determine the number of decimal digits in R256
195   if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
196     // between 1 and 19 digits
197     for (ind = 1; ind <= 19; ind++) {
198       if (R256.w[0] < ten2k64[ind]) {
199         break;
200       }
201     }
202     // ind digits
203   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
204              (R256.w[1] < ten2k128[0].w[1] ||
205               (R256.w[1] == ten2k128[0].w[1]
206                && R256.w[0] < ten2k128[0].w[0]))) {
207     // 20 digits
208     ind = 20;
209   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
210     // between 21 and 38 digits
211     for (ind = 1; ind <= 18; ind++) {
212       if (R256.w[1] < ten2k128[ind].w[1] ||
213           (R256.w[1] == ten2k128[ind].w[1] &&
214            R256.w[0] < ten2k128[ind].w[0])) {
215         break;
216       }
217     }
218     // ind + 20 digits
219     ind = ind + 20;
220   } else if (R256.w[3] == 0x0 &&
221              (R256.w[2] < ten2k256[0].w[2] ||
222               (R256.w[2] == ten2k256[0].w[2] &&
223                R256.w[1] < ten2k256[0].w[1]) ||
224               (R256.w[2] == ten2k256[0].w[2] &&
225                R256.w[1] == ten2k256[0].w[1] &&
226                R256.w[0] < ten2k256[0].w[0]))) {
227     // 39 digits
228     ind = 39;
229   } else {
230     // between 40 and 68 digits
231     for (ind = 1; ind <= 29; ind++) {
232       if (R256.w[3] < ten2k256[ind].w[3] ||
233           (R256.w[3] == ten2k256[ind].w[3] &&
234            R256.w[2] < ten2k256[ind].w[2]) ||
235           (R256.w[3] == ten2k256[ind].w[3] &&
236            R256.w[2] == ten2k256[ind].w[2] &&
237            R256.w[1] < ten2k256[ind].w[1]) ||
238           (R256.w[3] == ten2k256[ind].w[3] &&
239            R256.w[2] == ten2k256[ind].w[2] &&
240            R256.w[1] == ten2k256[ind].w[1] &&
241            R256.w[0] < ten2k256[ind].w[0])) {
242         break;
243       }
244     }
245     // ind + 39 digits
246     ind = ind + 39;
247   }
248   return (ind);
249 }
250 
251 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
252 // use the rounding information from ptr_is_* to avoid a double rounding error
253 static void
add_and_round(int q3,int q4,int e4,int delta,int p34,UINT64 z_sign,UINT64 p_sign,UINT128 C3,UINT256 C4,int rnd_mode,int * ptr_is_midpoint_lt_even,int * ptr_is_midpoint_gt_even,int * ptr_is_inexact_lt_midpoint,int * ptr_is_inexact_gt_midpoint,_IDEC_flags * ptrfpsf,UINT128 * ptrres)254 add_and_round (int q3,
255                int q4,
256                int e4,
257                int delta,
258                int p34,
259                UINT64 z_sign,
260                UINT64 p_sign,
261                UINT128 C3,
262                UINT256 C4,
263                int rnd_mode,
264                int *ptr_is_midpoint_lt_even,
265                int *ptr_is_midpoint_gt_even,
266                int *ptr_is_inexact_lt_midpoint,
267                int *ptr_is_inexact_gt_midpoint,
268                _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
269 
270   int scale;
271   int x0;
272   int ind;
273   UINT64 R64;
274   UINT128 P128, R128;
275   UINT192 P192, R192;
276   UINT256 R256;
277   int is_midpoint_lt_even = 0;
278   int is_midpoint_gt_even = 0;
279   int is_inexact_lt_midpoint = 0;
280   int is_inexact_gt_midpoint = 0;
281   int is_midpoint_lt_even0 = 0;
282   int is_midpoint_gt_even0 = 0;
283   int is_inexact_lt_midpoint0 = 0;
284   int is_inexact_gt_midpoint0 = 0;
285   int incr_exp = 0;
286   int is_tiny = 0;
287   int lt_half_ulp = 0;
288   int eq_half_ulp = 0;
289   // int gt_half_ulp = 0;
290   UINT128 res = *ptrres;
291 
292   // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
293   scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
294   // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
295 
296   // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
297   // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
298   if (scale == 0) {
299     R256.w[3] = 0x0ull;
300     R256.w[2] = 0x0ull;
301     R256.w[1] = C3.w[1];
302     R256.w[0] = C3.w[0];
303   } else if (scale <= 19) { // 10^scale fits in 64 bits
304     P128.w[1] = 0;
305     P128.w[0] = ten2k64[scale];
306     __mul_128x128_to_256 (R256, P128, C3);
307   } else if (scale <= 38) { // 10^scale fits in 128 bits
308     __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
309   } else if (scale <= 57) { // 39 <= scale <= 57
310     // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
311     // (10^67 has 223 bits; 10^69 has 230 bits);
312     // must split the computation:
313     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
314     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
315     // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
316     __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
317     // now multiply R128 by 10^38
318     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
319   } else { // 58 <= scale <= 66
320     // 10^scale takes between 193 and 220 bits,
321     // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
322     // must split the computation:
323     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
324     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
325     // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
326     // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
327     // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
328     __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
329     // now calculate 10*38 * 10^(scale-38) * C3
330     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
331   }
332   // C3 * 10^scale is now in R256
333 
334   // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
335   // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
336   // possible
337   // add/subtract C4 and C3 * 10^scale; the exponent is e4
338   if (p_sign == z_sign) { // R256 = C4 + R256
339     // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
340     // but may require rounding
341     add256 (C4, R256, &R256);
342   } else { // if (p_sign != z_sign) { // R256 = C4 - R256
343     // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
344     // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
345     // but may require rounding
346 
347     // compare first R256 = C3 * 10^scale and C4
348     if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
349         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
350         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
351         R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
352       // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
353       // but may require rounding
354       sub256 (R256, C4, &R256);
355       // flip p_sign too, because the result has the sign of z
356       p_sign = z_sign;
357     } else { // if C4 > C3 * 10^scale
358       // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
359       // but may require rounding
360       sub256 (C4, R256, &R256);
361     }
362     // if the result is pure zero, the sign depends on the rounding mode
363     // (x*y and z had opposite signs)
364     if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
365         R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
366       if (rnd_mode != ROUNDING_DOWN)
367         p_sign = 0x0000000000000000ull;
368       else
369         p_sign = 0x8000000000000000ull;
370       // the exponent is max (e4, expmin)
371       if (e4 < -6176)
372         e4 = expmin;
373       // assemble result
374       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
375       res.w[0] = 0x0;
376       *ptrres = res;
377       return;
378     }
379   }
380 
381   // determine the number of decimal digits in R256
382   ind = nr_digits256 (R256);
383 
384   // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
385   // round to the destination precision, with unbounded exponent
386 
387   if (ind <= p34) {
388     // result rounded to the destination precision with unbounded exponent
389     // is exact
390     if (ind + e4 < p34 + expmin) {
391       is_tiny = 1; // applies to all rounding modes
392     }
393     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
394     res.w[0] = R256.w[0];
395     // Note: res is correct only if expmin <= e4 <= expmax
396   } else { // if (ind > p34)
397     // if more than P digits, round to nearest to P digits
398     // round R256 to p34 digits
399     x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
400     if (ind <= 38) {
401       P128.w[1] = R256.w[1];
402       P128.w[0] = R256.w[0];
403       round128_19_38 (ind, x0, P128, &R128, &incr_exp,
404         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
405         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
406     } else if (ind <= 57) {
407       P192.w[2] = R256.w[2];
408       P192.w[1] = R256.w[1];
409       P192.w[0] = R256.w[0];
410       round192_39_57 (ind, x0, P192, &R192, &incr_exp,
411         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
412         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
413       R128.w[1] = R192.w[1];
414       R128.w[0] = R192.w[0];
415     } else { // if (ind <= 68)
416       round256_58_76 (ind, x0, R256, &R256, &incr_exp,
417         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
418         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
419       R128.w[1] = R256.w[1];
420       R128.w[0] = R256.w[0];
421     }
422     // the rounded result has p34 = 34 digits
423     e4 = e4 + x0 + incr_exp;
424     if (rnd_mode == ROUNDING_TO_NEAREST) {
425       if (e4 < expmin) {
426         is_tiny = 1; // for other rounding modes apply correction
427       }
428     } else {
429       // for RM, RP, RZ, RA apply correction in order to determine tininess
430       // but do not save the result; apply the correction to
431       // (-1)^p_sign * significand * 10^0
432       P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
433       P128.w[0] = R128.w[0];
434       rounding_correction (rnd_mode,
435         		   is_inexact_lt_midpoint,
436         		   is_inexact_gt_midpoint, is_midpoint_lt_even,
437         		   is_midpoint_gt_even, 0, &P128, ptrfpsf);
438       scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
439       // the number of digits in the significand is p34 = 34
440       if (e4 + scale < expmin) {
441         is_tiny = 1;
442       }
443     }
444     ind = p34; // the number of decimal digits in the signifcand of res
445     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
446     res.w[0] = R128.w[0];
447     // Note: res is correct only if expmin <= e4 <= expmax
448     // set the inexact flag after rounding with bounded exponent, if any
449   }
450   // at this point we have the result rounded with unbounded exponent in
451   // res and we know its tininess:
452   // res = (-1)^p_sign * significand * 10^e4,
453   // where q (significand) = ind <= p34
454   // Note: res is correct only if expmin <= e4 <= expmax
455 
456   // check for overflow if RN
457   if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
458     res.w[1] = p_sign | 0x7800000000000000ull;
459     res.w[0] = 0x0000000000000000ull;
460     *ptrres = res;
461     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
462     return; // BID_RETURN (res)
463   } // else not overflow or not RN, so continue
464 
465   // if (e4 >= expmin) we have the result rounded with bounded exponent
466   if (e4 < expmin) {
467     x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
468     // where the result rounded [at most] once is
469     //   (-1)^p_sign * significand_res * 10^e4
470 
471     // avoid double rounding error
472     is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
473     is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
474     is_midpoint_lt_even0 = is_midpoint_lt_even;
475     is_midpoint_gt_even0 = is_midpoint_gt_even;
476     is_inexact_lt_midpoint = 0;
477     is_inexact_gt_midpoint = 0;
478     is_midpoint_lt_even = 0;
479     is_midpoint_gt_even = 0;
480 
481     if (x0 > ind) {
482       // nothing is left of res when moving the decimal point left x0 digits
483       is_inexact_lt_midpoint = 1;
484       res.w[1] = p_sign | 0x0000000000000000ull;
485       res.w[0] = 0x0000000000000000ull;
486       e4 = expmin;
487     } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
488       // this is <, =, or > 1/2 ulp
489       // compare the ind-digit value in the significand of res with
490       // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
491       // less than, equal to, or greater than 1/2 ulp (significand of res)
492       R128.w[1] = res.w[1] & MASK_COEFF;
493       R128.w[0] = res.w[0];
494       if (ind <= 19) {
495         if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
496           lt_half_ulp = 1;
497           is_inexact_lt_midpoint = 1;
498         } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
499           eq_half_ulp = 1;
500           is_midpoint_gt_even = 1;
501         } else { // > 1/2 ulp
502           // gt_half_ulp = 1;
503           is_inexact_gt_midpoint = 1;
504         }
505       } else { // if (ind <= 38) {
506         if (R128.w[1] < midpoint128[ind - 20].w[1] ||
507             (R128.w[1] == midpoint128[ind - 20].w[1] &&
508             R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
509           lt_half_ulp = 1;
510           is_inexact_lt_midpoint = 1;
511         } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
512             R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
513           eq_half_ulp = 1;
514           is_midpoint_gt_even = 1;
515         } else { // > 1/2 ulp
516           // gt_half_ulp = 1;
517           is_inexact_gt_midpoint = 1;
518         }
519       }
520       if (lt_half_ulp || eq_half_ulp) {
521         // res = +0.0 * 10^expmin
522         res.w[1] = 0x0000000000000000ull;
523         res.w[0] = 0x0000000000000000ull;
524       } else { // if (gt_half_ulp)
525         // res = +1 * 10^expmin
526         res.w[1] = 0x0000000000000000ull;
527         res.w[0] = 0x0000000000000001ull;
528       }
529       res.w[1] = p_sign | res.w[1];
530       e4 = expmin;
531     } else { // if (1 <= x0 <= ind - 1 <= 33)
532       // round the ind-digit result to ind - x0 digits
533 
534       if (ind <= 18) { // 2 <= ind <= 18
535         round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
536         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
537         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
538         res.w[1] = 0x0;
539         res.w[0] = R64;
540       } else if (ind <= 38) {
541         P128.w[1] = res.w[1] & MASK_COEFF;
542         P128.w[0] = res.w[0];
543         round128_19_38 (ind, x0, P128, &res, &incr_exp,
544         		&is_midpoint_lt_even, &is_midpoint_gt_even,
545         		&is_inexact_lt_midpoint,
546         		&is_inexact_gt_midpoint);
547       }
548       e4 = e4 + x0; // expmin
549       // we want the exponent to be expmin, so if incr_exp = 1 then
550       // multiply the rounded result by 10 - it will still fit in 113 bits
551       if (incr_exp) {
552         // 64 x 128 -> 128
553         P128.w[1] = res.w[1] & MASK_COEFF;
554         P128.w[0] = res.w[0];
555         __mul_64x128_to_128 (res, ten2k64[1], P128);
556       }
557       res.w[1] =
558         p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
559       // avoid a double rounding error
560       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
561           is_midpoint_lt_even) { // double rounding error upward
562         // res = res - 1
563         res.w[0]--;
564         if (res.w[0] == 0xffffffffffffffffull)
565           res.w[1]--;
566         // Note: a double rounding error upward is not possible; for this
567         // the result after the first rounding would have to be 99...95
568         // (35 digits in all), possibly followed by a number of zeros; this
569         // is not possible in Cases (2)-(6) or (15)-(17) which may get here
570         is_midpoint_lt_even = 0;
571         is_inexact_lt_midpoint = 1;
572       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
573           is_midpoint_gt_even) { // double rounding error downward
574         // res = res + 1
575         res.w[0]++;
576         if (res.w[0] == 0)
577           res.w[1]++;
578         is_midpoint_gt_even = 0;
579         is_inexact_gt_midpoint = 1;
580       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
581         	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
582         // if this second rounding was exact the result may still be
583         // inexact because of the first rounding
584         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
585           is_inexact_gt_midpoint = 1;
586         }
587         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
588           is_inexact_lt_midpoint = 1;
589         }
590       } else if (is_midpoint_gt_even &&
591         	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
592         // pulled up to a midpoint
593         is_inexact_lt_midpoint = 1;
594         is_inexact_gt_midpoint = 0;
595         is_midpoint_lt_even = 0;
596         is_midpoint_gt_even = 0;
597       } else if (is_midpoint_lt_even &&
598         	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
599         // pulled down to a midpoint
600         is_inexact_lt_midpoint = 0;
601         is_inexact_gt_midpoint = 1;
602         is_midpoint_lt_even = 0;
603         is_midpoint_gt_even = 0;
604       } else {
605         ;
606       }
607     }
608   }
609   // res contains the correct result
610   // apply correction if not rounding to nearest
611   if (rnd_mode != ROUNDING_TO_NEAREST) {
612     rounding_correction (rnd_mode,
613         		 is_inexact_lt_midpoint, is_inexact_gt_midpoint,
614         		 is_midpoint_lt_even, is_midpoint_gt_even,
615         		 e4, &res, ptrfpsf);
616   }
617   if (is_midpoint_lt_even || is_midpoint_gt_even ||
618       is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
619     // set the inexact flag
620     *ptrfpsf |= INEXACT_EXCEPTION;
621     if (is_tiny)
622       *ptrfpsf |= UNDERFLOW_EXCEPTION;
623   }
624 
625   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
626   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
627   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
628   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
629   *ptrres = res;
630   return;
631 }
632 
633 
634 #if DECIMAL_CALL_BY_REFERENCE
635 static void
bid128_ext_fma(int * ptr_is_midpoint_lt_even,int * ptr_is_midpoint_gt_even,int * ptr_is_inexact_lt_midpoint,int * ptr_is_inexact_gt_midpoint,UINT128 * pres,UINT128 * px,UINT128 * py,UINT128 * pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM)636 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
637         	int *ptr_is_midpoint_gt_even,
638         	int *ptr_is_inexact_lt_midpoint,
639         	int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
640         	UINT128 * px, UINT128 * py,
641         	UINT128 *
642         	pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
643         	_EXC_INFO_PARAM) {
644   UINT128 x = *px, y = *py, z = *pz;
645 #if !DECIMAL_GLOBAL_ROUNDING
646   unsigned int rnd_mode = *prnd_mode;
647 #endif
648 #else
649 static UINT128
650 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
651         	int *ptr_is_midpoint_gt_even,
652         	int *ptr_is_inexact_lt_midpoint,
653         	int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
654         	UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
655         	_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
656 #endif
657 
658   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
659   UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
660   UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
661   int true_p_exp;
662   UINT128 C1, C2, C3;
663   UINT256 C4;
664   int q1 = 0, q2 = 0, q3 = 0, q4;
665   int e1, e2, e3, e4;
666   int scale, ind, delta, x0;
667   int p34 = P34; // used to modify the limit on the number of digits
668   BID_UI64DOUBLE tmp;
669   int x_nr_bits, y_nr_bits, z_nr_bits;
670   unsigned int save_fpsf;
671   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
672   int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
673   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
674   int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
675   int incr_exp = 0;
676   int lsb;
677   int lt_half_ulp = 0;
678   int eq_half_ulp = 0;
679   int gt_half_ulp = 0;
680   int is_tiny = 0;
681   UINT64 R64, tmp64;
682   UINT128 P128, R128;
683   UINT192 P192, R192;
684   UINT256 R256;
685 
686   // the following are based on the table of special cases for fma; the NaN
687   // behavior is similar to that of the IA-64 Architecture fma
688 
689   // identify cases where at least one operand is NaN
690 
691   BID_SWAP128 (x);
692   BID_SWAP128 (y);
693   BID_SWAP128 (z);
694   if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
695     // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
696     // check first for non-canonical NaN payload
697     if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
698         (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
699          (y.w[0] > 0x38c15b09ffffffffull))) {
700       y.w[1] = y.w[1] & 0xffffc00000000000ull;
701       y.w[0] = 0x0ull;
702     }
703     if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
704       // set invalid flag
705       *pfpsf |= INVALID_EXCEPTION;
706       // return quiet (y)
707       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
708       res.w[0] = y.w[0];
709     } else { // y is QNaN
710       // return y
711       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
712       res.w[0] = y.w[0];
713       // if z = SNaN or x = SNaN signal invalid exception
714       if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
715           (x.w[1] & MASK_SNAN) == MASK_SNAN) {
716         // set invalid flag
717         *pfpsf |= INVALID_EXCEPTION;
718       }
719     }
720     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
721     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
722     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
723     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
724     BID_SWAP128 (res);
725     BID_RETURN (res)
726   } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
727     // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
728     // check first for non-canonical NaN payload
729     if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
730         (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
731          (z.w[0] > 0x38c15b09ffffffffull))) {
732       z.w[1] = z.w[1] & 0xffffc00000000000ull;
733       z.w[0] = 0x0ull;
734     }
735     if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
736       // set invalid flag
737       *pfpsf |= INVALID_EXCEPTION;
738       // return quiet (z)
739       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
740       res.w[0] = z.w[0];
741     } else { // z is QNaN
742       // return z
743       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
744       res.w[0] = z.w[0];
745       // if x = SNaN signal invalid exception
746       if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
747         // set invalid flag
748         *pfpsf |= INVALID_EXCEPTION;
749       }
750     }
751     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
752     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
753     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
754     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
755     BID_SWAP128 (res);
756     BID_RETURN (res)
757   } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
758     // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
759     // check first for non-canonical NaN payload
760     if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
761         (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
762          (x.w[0] > 0x38c15b09ffffffffull))) {
763       x.w[1] = x.w[1] & 0xffffc00000000000ull;
764       x.w[0] = 0x0ull;
765     }
766     if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
767       // set invalid flag
768       *pfpsf |= INVALID_EXCEPTION;
769       // return quiet (x)
770       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
771       res.w[0] = x.w[0];
772     } else { // x is QNaN
773       // return x
774       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
775       res.w[0] = x.w[0];
776     }
777     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
778     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
779     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
780     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
781     BID_SWAP128 (res);
782     BID_RETURN (res)
783   }
784   // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
785   // for non-canonical values
786 
787   x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
788   C1.w[1] = x.w[1] & MASK_COEFF;
789   C1.w[0] = x.w[0];
790   if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
791     // if x is not infinity check for non-canonical values - treated as zero
792     if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
793       // non-canonical
794       x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
795       C1.w[1] = 0; // significand high
796       C1.w[0] = 0; // significand low
797     } else { // G0_G1 != 11
798       x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
799       if (C1.w[1] > 0x0001ed09bead87c0ull ||
800           (C1.w[1] == 0x0001ed09bead87c0ull &&
801            C1.w[0] > 0x378d8e63ffffffffull)) {
802         // x is non-canonical if coefficient is larger than 10^34 -1
803         C1.w[1] = 0;
804         C1.w[0] = 0;
805       } else { // canonical
806         ;
807       }
808     }
809   }
810   y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
811   C2.w[1] = y.w[1] & MASK_COEFF;
812   C2.w[0] = y.w[0];
813   if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
814     // if y is not infinity check for non-canonical values - treated as zero
815     if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
816       // non-canonical
817       y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
818       C2.w[1] = 0; // significand high
819       C2.w[0] = 0; // significand low
820     } else { // G0_G1 != 11
821       y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
822       if (C2.w[1] > 0x0001ed09bead87c0ull ||
823           (C2.w[1] == 0x0001ed09bead87c0ull &&
824            C2.w[0] > 0x378d8e63ffffffffull)) {
825         // y is non-canonical if coefficient is larger than 10^34 -1
826         C2.w[1] = 0;
827         C2.w[0] = 0;
828       } else { // canonical
829         ;
830       }
831     }
832   }
833   z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
834   C3.w[1] = z.w[1] & MASK_COEFF;
835   C3.w[0] = z.w[0];
836   if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
837     // if z is not infinity check for non-canonical values - treated as zero
838     if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
839       // non-canonical
840       z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
841       C3.w[1] = 0; // significand high
842       C3.w[0] = 0; // significand low
843     } else { // G0_G1 != 11
844       z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
845       if (C3.w[1] > 0x0001ed09bead87c0ull ||
846           (C3.w[1] == 0x0001ed09bead87c0ull &&
847            C3.w[0] > 0x378d8e63ffffffffull)) {
848         // z is non-canonical if coefficient is larger than 10^34 -1
849         C3.w[1] = 0;
850         C3.w[0] = 0;
851       } else { // canonical
852         ;
853       }
854     }
855   }
856 
857   p_sign = x_sign ^ y_sign; // sign of the product
858 
859   // identify cases where at least one operand is infinity
860 
861   if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
862     if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
863       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
864         if (p_sign == z_sign) {
865           res.w[1] = z_sign | MASK_INF;
866           res.w[0] = 0x0;
867         } else {
868           // return QNaN Indefinite
869           res.w[1] = 0x7c00000000000000ull;
870           res.w[0] = 0x0000000000000000ull;
871           // set invalid flag
872           *pfpsf |= INVALID_EXCEPTION;
873         }
874       } else { // z = 0 or z = f
875         res.w[1] = p_sign | MASK_INF;
876         res.w[0] = 0x0;
877       }
878     } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
879       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
880         if (p_sign == z_sign) {
881           res.w[1] = z_sign | MASK_INF;
882           res.w[0] = 0x0;
883         } else {
884           // return QNaN Indefinite
885           res.w[1] = 0x7c00000000000000ull;
886           res.w[0] = 0x0000000000000000ull;
887           // set invalid flag
888           *pfpsf |= INVALID_EXCEPTION;
889         }
890       } else { // z = 0 or z = f
891         res.w[1] = p_sign | MASK_INF;
892         res.w[0] = 0x0;
893       }
894     } else { // y = 0
895       // return QNaN Indefinite
896       res.w[1] = 0x7c00000000000000ull;
897       res.w[0] = 0x0000000000000000ull;
898       // set invalid flag
899       *pfpsf |= INVALID_EXCEPTION;
900     }
901     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
902     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
903     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
904     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
905     BID_SWAP128 (res);
906     BID_RETURN (res)
907   } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
908     if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
909       // x = f, necessarily
910       if ((p_sign != z_sign)
911           || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
912         // return QNaN Indefinite
913         res.w[1] = 0x7c00000000000000ull;
914         res.w[0] = 0x0000000000000000ull;
915         // set invalid flag
916         *pfpsf |= INVALID_EXCEPTION;
917       } else {
918         res.w[1] = z_sign | MASK_INF;
919         res.w[0] = 0x0;
920       }
921     } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
922       // z = 0, f, inf
923       // return QNaN Indefinite
924       res.w[1] = 0x7c00000000000000ull;
925       res.w[0] = 0x0000000000000000ull;
926       // set invalid flag
927       *pfpsf |= INVALID_EXCEPTION;
928     } else {
929       // x = f and z = 0, f, necessarily
930       res.w[1] = p_sign | MASK_INF;
931       res.w[0] = 0x0;
932     }
933     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
934     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
935     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
936     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
937     BID_SWAP128 (res);
938     BID_RETURN (res)
939   } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
940     // x = 0, f and y = 0, f, necessarily
941     res.w[1] = z_sign | MASK_INF;
942     res.w[0] = 0x0;
943     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
944     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
945     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
946     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
947     BID_SWAP128 (res);
948     BID_RETURN (res)
949   }
950 
951   true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
952   if (true_p_exp < -6176)
953     p_exp = 0; // cannot be less than EXP_MIN
954   else
955     p_exp = (UINT64) (true_p_exp + 6176) << 49;
956 
957   if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
958     // the result is 0
959     if (p_exp < z_exp)
960       res.w[1] = p_exp; // preferred exponent
961     else
962       res.w[1] = z_exp; // preferred exponent
963     if (p_sign == z_sign) {
964       res.w[1] |= z_sign;
965       res.w[0] = 0x0;
966     } else { // x * y and z have opposite signs
967       if (rnd_mode == ROUNDING_DOWN) {
968         // res = -0.0
969         res.w[1] |= MASK_SIGN;
970         res.w[0] = 0x0;
971       } else {
972         // res = +0.0
973         // res.w[1] |= 0x0;
974         res.w[0] = 0x0;
975       }
976     }
977     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
978     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
979     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
980     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
981     BID_SWAP128 (res);
982     BID_RETURN (res)
983   }
984   // from this point on, we may need to know the number of decimal digits
985   // in the significands of x, y, z when x, y, z != 0
986 
987   if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
988     // q1 = nr. of decimal digits in x
989     // determine first the nr. of bits in x
990     if (C1.w[1] == 0) {
991       if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
992         // split the 64-bit value in two 32-bit halves to avoid rounding errors
993         if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
994           tmp.d = (double) (C1.w[0] >> 32); // exact conversion
995           x_nr_bits =
996             33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
997         } else { // x < 2^32
998           tmp.d = (double) (C1.w[0]); // exact conversion
999           x_nr_bits =
1000             1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1001         }
1002       } else { // if x < 2^53
1003         tmp.d = (double) C1.w[0]; // exact conversion
1004         x_nr_bits =
1005           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1006       }
1007     } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1008       tmp.d = (double) C1.w[1]; // exact conversion
1009       x_nr_bits =
1010         65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1011     }
1012     q1 = nr_digits[x_nr_bits - 1].digits;
1013     if (q1 == 0) {
1014       q1 = nr_digits[x_nr_bits - 1].digits1;
1015       if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1016           (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1017            C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1018         q1++;
1019     }
1020   }
1021 
1022   if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
1023     if (C2.w[1] == 0) {
1024       if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
1025         // split the 64-bit value in two 32-bit halves to avoid rounding errors
1026         if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
1027           tmp.d = (double) (C2.w[0] >> 32); // exact conversion
1028           y_nr_bits =
1029             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1030         } else { // y < 2^32
1031           tmp.d = (double) C2.w[0]; // exact conversion
1032           y_nr_bits =
1033             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1034         }
1035       } else { // if y < 2^53
1036         tmp.d = (double) C2.w[0]; // exact conversion
1037         y_nr_bits =
1038           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1039       }
1040     } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1041       tmp.d = (double) C2.w[1]; // exact conversion
1042       y_nr_bits =
1043         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1044     }
1045 
1046     q2 = nr_digits[y_nr_bits].digits;
1047     if (q2 == 0) {
1048       q2 = nr_digits[y_nr_bits].digits1;
1049       if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
1050           (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
1051            C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
1052         q2++;
1053     }
1054   }
1055 
1056   if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
1057     if (C3.w[1] == 0) {
1058       if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
1059         // split the 64-bit value in two 32-bit halves to avoid rounding errors
1060         if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
1061           tmp.d = (double) (C3.w[0] >> 32); // exact conversion
1062           z_nr_bits =
1063             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1064         } else { // z < 2^32
1065           tmp.d = (double) C3.w[0]; // exact conversion
1066           z_nr_bits =
1067             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1068         }
1069       } else { // if z < 2^53
1070         tmp.d = (double) C3.w[0]; // exact conversion
1071         z_nr_bits =
1072           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1073       }
1074     } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1075       tmp.d = (double) C3.w[1]; // exact conversion
1076       z_nr_bits =
1077         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1078     }
1079 
1080     q3 = nr_digits[z_nr_bits].digits;
1081     if (q3 == 0) {
1082       q3 = nr_digits[z_nr_bits].digits1;
1083       if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
1084           (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
1085            C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
1086         q3++;
1087     }
1088   }
1089 
1090   if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
1091       (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
1092     // x = 0 or y = 0
1093     // z = f, necessarily; for 0 + z return z, with the preferred exponent
1094     // the result is z, but need to get the preferred exponent
1095     if (z_exp <= p_exp) { // the preferred exponent is z_exp
1096       res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
1097       res.w[0] = C3.w[0];
1098     } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1099       // return (C3 * 10^scale) * 10^(z_exp - scale)
1100       // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1101       scale = p34 - q3;
1102       ind = (z_exp - p_exp) >> 49;
1103       if (ind < scale)
1104         scale = ind;
1105       if (scale == 0) {
1106         res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
1107         res.w[0] = z.w[0];
1108       } else if (q3 <= 19) { // z fits in 64 bits
1109         if (scale <= 19) { // 10^scale fits in 64 bits
1110           // 64 x 64 C3.w[0] * ten2k64[scale]
1111           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1112         } else { // 10^scale fits in 128 bits
1113           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1114           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1115         }
1116       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1117         // 64 x 128 ten2k64[scale] * C3
1118         __mul_128x64_to_128 (res, ten2k64[scale], C3);
1119       }
1120       // subtract scale from the exponent
1121       z_exp = z_exp - ((UINT64) scale << 49);
1122       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1123     }
1124     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1125     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1126     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1127     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1128     BID_SWAP128 (res);
1129     BID_RETURN (res)
1130   } else {
1131     ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1132   }
1133 
1134   e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
1135   e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
1136   e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
1137   e4 = e1 + e2; // unbiased exponent of the exact x * y
1138 
1139   // calculate C1 * C2 and its number of decimal digits, q4
1140 
1141   // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1142   // where 2 <= q1 + q2 <= 68
1143   // calculate C4 = C1 * C2 and determine q
1144   C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
1145   if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1146     C4.w[0] = C1.w[0] * C2.w[0];
1147     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1148     if (C4.w[0] < ten2k64[q1 + q2 - 1])
1149       q4 = q1 + q2 - 1; // q4 in [1, 18]
1150     else
1151       q4 = q1 + q2; // q4 in [2, 19]
1152     // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1153   } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1154     // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1155     __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
1156     // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1157     if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
1158       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1159       q4 = 19; // 19 = q1 + q2 - 1
1160     } else {
1161       // if (C4.w[1] == 0)
1162       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1163       // else
1164       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1165       q4 = 20; // 20 = q1 + q2
1166     }
1167   } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
1168     // C4 = C1 * C2 fits in 64 or 128 bits
1169     // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1170     // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1171     if (q1 <= 19) {
1172       __mul_128x64_to_128 (C4, C1.w[0], C2);
1173     } else { // q2 <= 19
1174       __mul_128x64_to_128 (C4, C2.w[0], C1);
1175     }
1176     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1177     if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
1178         (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
1179          C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
1180       // if (C4.w[1] == 0) // q4 = 20, necessarily
1181       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1182       // else
1183       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1184       q4 = q1 + q2 - 1; // q4 in [20, 37]
1185     } else {
1186       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1187       q4 = q1 + q2; // q4 in [21, 38]
1188     }
1189   } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1190     // both C1 and C2 fit in 128 bits (actually in 113 bits)
1191     // may replace this by 128x128_to192
1192     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
1193     // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1194     if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
1195         		 (C4.w[1] == ten2k128[18].w[1]
1196         		  && C4.w[0] < ten2k128[18].w[0]))) {
1197       // 18 = 38 - 20 = q1+q2-1 - 20
1198       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1199       q4 = 38; // 38 = q1 + q2 - 1
1200     } else {
1201       // if (C4.w[2] == 0)
1202       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1203       // else
1204       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1205       q4 = 39; // 39 = q1 + q2
1206     }
1207   } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
1208     // C4 = C1 * C2 fits in 128 or 192 bits
1209     // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1210     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1211     // may fit in 64 bits
1212     if (C1.w[1] == 0) { // C1 fits in 64 bits
1213       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1214       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
1215     } else if (C2.w[1] == 0) { // C2 fits in 64 bits
1216       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1217       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
1218     } else { // both C1 and C2 require 128 bits
1219       // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1220       __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1221     }
1222     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1223     if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1224         (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1225          (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1226           (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1227            C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
1228       // if (C4.w[2] == 0) // q4 = 39, necessarily
1229       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1230       // else
1231       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1232       q4 = q1 + q2 - 1; // q4 in [39, 56]
1233     } else {
1234       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1235       q4 = q1 + q2; // q4 in [40, 57]
1236     }
1237   } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1238     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1239     // may fit in 64 bits
1240     if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
1241       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
1242     } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
1243       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
1244     } else { // C1 * C2 will fit in 192 bits or in 256 bits
1245       __mul_128x128_to_256 (C4, C1, C2);
1246     }
1247     // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1248     if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
1249         		 (C4.w[2] == ten2k256[18].w[2]
1250         		  && (C4.w[1] < ten2k256[18].w[1]
1251         		      || (C4.w[1] == ten2k256[18].w[1]
1252         			  && C4.w[0] < ten2k256[18].w[0]))))) {
1253       // 18 = 57 - 39 = q1+q2-1 - 39
1254       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1255       q4 = 57; // 57 = q1 + q2 - 1
1256     } else {
1257       // if (C4.w[3] == 0)
1258       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1259       // else
1260       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1261       q4 = 58; // 58 = q1 + q2
1262     }
1263   } else { // if 59 <= q1 + q2 <= 68
1264     // C4 = C1 * C2 fits in 192 or 256 bits
1265     // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1266     // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1267     // 64 bits
1268     // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1269     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1270     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1271     if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
1272         (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
1273          (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1274           (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1275            (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1276             (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1277              C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
1278       // if (C4.w[3] == 0) // q4 = 58, necessarily
1279       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1280       // else
1281       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1282       q4 = q1 + q2 - 1; // q4 in [58, 67]
1283     } else {
1284       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1285       q4 = q1 + q2; // q4 in [59, 68]
1286     }
1287   }
1288 
1289   if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
1290     save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
1291     *pfpsf = 0;
1292 
1293     if (q4 > p34) {
1294 
1295       // truncate C4 to p34 digits into res
1296       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1297       x0 = q4 - p34;
1298       if (q4 <= 38) {
1299         P128.w[1] = C4.w[1];
1300         P128.w[0] = C4.w[0];
1301         round128_19_38 (q4, x0, P128, &res, &incr_exp,
1302         		&is_midpoint_lt_even, &is_midpoint_gt_even,
1303         		&is_inexact_lt_midpoint,
1304         		&is_inexact_gt_midpoint);
1305       } else if (q4 <= 57) { // 35 <= q4 <= 57
1306         P192.w[2] = C4.w[2];
1307         P192.w[1] = C4.w[1];
1308         P192.w[0] = C4.w[0];
1309         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
1310         		&is_midpoint_lt_even, &is_midpoint_gt_even,
1311         		&is_inexact_lt_midpoint,
1312         		&is_inexact_gt_midpoint);
1313         res.w[0] = R192.w[0];
1314         res.w[1] = R192.w[1];
1315       } else { // if (q4 <= 68)
1316         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
1317         		&is_midpoint_lt_even, &is_midpoint_gt_even,
1318         		&is_inexact_lt_midpoint,
1319         		&is_inexact_gt_midpoint);
1320         res.w[0] = R256.w[0];
1321         res.w[1] = R256.w[1];
1322       }
1323       e4 = e4 + x0;
1324       if (incr_exp) {
1325         e4 = e4 + 1;
1326       }
1327       q4 = p34;
1328       // res is now the coefficient of the result rounded to the destination
1329       // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1330     } else { // if (q4 <= p34)
1331       // C4 * 10^e4 is the result rounded to the destination precision, with
1332       // unbounded exponent (which is exact)
1333 
1334       if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
1335         // e4 is too large, but can be brought within range by scaling up C4
1336         scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1337         // res = (C4 * 10^scale) * 10^expmax
1338         if (q4 <= 19) { // C4 fits in 64 bits
1339           if (scale <= 19) { // 10^scale fits in 64 bits
1340             // 64 x 64 C4.w[0] * ten2k64[scale]
1341             __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
1342           } else { // 10^scale fits in 128 bits
1343             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1344             __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
1345           }
1346         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1347           // 64 x 128 ten2k64[scale] * CC43
1348           __mul_128x64_to_128 (res, ten2k64[scale], C4);
1349         }
1350         e4 = e4 - scale; // expmax
1351         q4 = q4 + scale;
1352       } else {
1353         res.w[1] = C4.w[1];
1354         res.w[0] = C4.w[0];
1355       }
1356       // res is the coefficient of the result rounded to the destination
1357       // precision, with unbounded exponent (it has q4 digits); the exponent
1358       // is e4 (exact result)
1359     }
1360 
1361     // check for overflow
1362     if (q4 + e4 > p34 + expmax) {
1363       if (rnd_mode == ROUNDING_TO_NEAREST) {
1364         res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
1365         res.w[0] = 0x0000000000000000ull;
1366         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1367       } else {
1368         res.w[1] = p_sign | res.w[1];
1369         rounding_correction (rnd_mode,
1370         		     is_inexact_lt_midpoint,
1371         		     is_inexact_gt_midpoint,
1372         		     is_midpoint_lt_even, is_midpoint_gt_even,
1373         		     e4, &res, pfpsf);
1374       }
1375       *pfpsf |= save_fpsf;
1376       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1377       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1378       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1379       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1380       BID_SWAP128 (res);
1381       BID_RETURN (res)
1382     }
1383     // check for underflow
1384     if (q4 + e4 < expmin + P34) {
1385       is_tiny = 1; // the result is tiny
1386       if (e4 < expmin) {
1387         // if e4 < expmin, we must truncate more of res
1388         x0 = expmin - e4; // x0 >= 1
1389         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
1390         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
1391         is_midpoint_lt_even0 = is_midpoint_lt_even;
1392         is_midpoint_gt_even0 = is_midpoint_gt_even;
1393         is_inexact_lt_midpoint = 0;
1394         is_inexact_gt_midpoint = 0;
1395         is_midpoint_lt_even = 0;
1396         is_midpoint_gt_even = 0;
1397         // the number of decimal digits in res is q4
1398         if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1399           if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1400             round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
1401         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
1402         		  &is_inexact_lt_midpoint,
1403         		  &is_inexact_gt_midpoint);
1404             if (incr_exp) {
1405               // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1406               R64 = ten2k64[q4 - x0];
1407             }
1408             // res.w[1] = 0; (from above)
1409             res.w[0] = R64;
1410           } else { // if (q4 <= 34)
1411             // 19 <= q4 <= 38
1412             P128.w[1] = res.w[1];
1413             P128.w[0] = res.w[0];
1414             round128_19_38 (q4, x0, P128, &res, &incr_exp,
1415         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
1416         		    &is_inexact_lt_midpoint,
1417         		    &is_inexact_gt_midpoint);
1418             if (incr_exp) {
1419               // increase coefficient by a factor of 10; this will be <= 10^33
1420               // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1421               if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
1422         	// res.w[1] = 0;
1423         	res.w[0] = ten2k64[q4 - x0];
1424               } else { // 20 <= q4 - x0 <= 37
1425         	res.w[0] = ten2k128[q4 - x0 - 20].w[0];
1426         	res.w[1] = ten2k128[q4 - x0 - 20].w[1];
1427               }
1428             }
1429           }
1430           e4 = e4 + x0; // expmin
1431         } else if (x0 == q4) {
1432           // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1433           // determine relationship with 1/2 ulp
1434           if (q4 <= 19) {
1435             if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1436               lt_half_ulp = 1;
1437               is_inexact_lt_midpoint = 1;
1438             } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1439               eq_half_ulp = 1;
1440               is_midpoint_gt_even = 1;
1441             } else { // > 1/2 ulp
1442               // gt_half_ulp = 1;
1443               is_inexact_gt_midpoint = 1;
1444             }
1445           } else { // if (q4 <= 34)
1446             if (res.w[1] < midpoint128[q4 - 20].w[1] ||
1447                 (res.w[1] == midpoint128[q4 - 20].w[1] &&
1448                 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
1449               lt_half_ulp = 1;
1450               is_inexact_lt_midpoint = 1;
1451             } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
1452                 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1453               eq_half_ulp = 1;
1454               is_midpoint_gt_even = 1;
1455             } else { // > 1/2 ulp
1456               // gt_half_ulp = 1;
1457               is_inexact_gt_midpoint = 1;
1458             }
1459           }
1460           if (lt_half_ulp || eq_half_ulp) {
1461             // res = +0.0 * 10^expmin
1462             res.w[1] = 0x0000000000000000ull;
1463             res.w[0] = 0x0000000000000000ull;
1464           } else { // if (gt_half_ulp)
1465             // res = +1 * 10^expmin
1466             res.w[1] = 0x0000000000000000ull;
1467             res.w[0] = 0x0000000000000001ull;
1468           }
1469           e4 = expmin;
1470         } else { // if (x0 > q4)
1471           // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1472           res.w[1] = 0;
1473           res.w[0] = 0;
1474           e4 = expmin;
1475           is_inexact_lt_midpoint = 1;
1476         }
1477         // avoid a double rounding error
1478         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
1479             is_midpoint_lt_even) { // double rounding error upward
1480           // res = res - 1
1481           res.w[0]--;
1482           if (res.w[0] == 0xffffffffffffffffull)
1483             res.w[1]--;
1484           // Note: a double rounding error upward is not possible; for this
1485           // the result after the first rounding would have to be 99...95
1486           // (35 digits in all), possibly followed by a number of zeros; this
1487           // not possible for f * f + 0
1488           is_midpoint_lt_even = 0;
1489           is_inexact_lt_midpoint = 1;
1490         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
1491             is_midpoint_gt_even) { // double rounding error downward
1492           // res = res + 1
1493           res.w[0]++;
1494           if (res.w[0] == 0)
1495             res.w[1]++;
1496           is_midpoint_gt_even = 0;
1497           is_inexact_gt_midpoint = 1;
1498         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
1499         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
1500           // if this second rounding was exact the result may still be
1501           // inexact because of the first rounding
1502           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
1503             is_inexact_gt_midpoint = 1;
1504           }
1505           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
1506             is_inexact_lt_midpoint = 1;
1507           }
1508         } else if (is_midpoint_gt_even &&
1509         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
1510           // pulled up to a midpoint
1511           is_inexact_lt_midpoint = 1;
1512           is_inexact_gt_midpoint = 0;
1513           is_midpoint_lt_even = 0;
1514           is_midpoint_gt_even = 0;
1515         } else if (is_midpoint_lt_even &&
1516         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
1517           // pulled down to a midpoint
1518           is_inexact_lt_midpoint = 0;
1519           is_inexact_gt_midpoint = 1;
1520           is_midpoint_lt_even = 0;
1521           is_midpoint_gt_even = 0;
1522         } else {
1523           ;
1524         }
1525       } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1526         if (e3 < e4) {
1527           // if (e3 < e4) the preferred exponent is e3
1528           // return (C4 * 10^scale) * 10^(e4 - scale)
1529           // where scale = min (p34-q4, (e4 - e3))
1530           scale = p34 - q4;
1531           ind = e4 - e3;
1532           if (ind < scale)
1533             scale = ind;
1534           if (scale == 0) {
1535             ; // res and e4 are unchanged
1536           } else if (q4 <= 19) { // C4 fits in 64 bits
1537             if (scale <= 19) { // 10^scale fits in 64 bits
1538               // 64 x 64 res.w[0] * ten2k64[scale]
1539               __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
1540             } else { // 10^scale fits in 128 bits
1541               // 64 x 128 res.w[0] * ten2k128[scale - 20]
1542               __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
1543             }
1544           } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1545             // 64 x 128 ten2k64[scale] * C3
1546             __mul_128x64_to_128 (res, ten2k64[scale], res);
1547           }
1548           // subtract scale from the exponent
1549           e4 = e4 - scale;
1550         }
1551       }
1552 
1553       // check for inexact result
1554       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1555           is_midpoint_lt_even || is_midpoint_gt_even) {
1556         // set the inexact flag and the underflow flag
1557         *pfpsf |= INEXACT_EXCEPTION;
1558         *pfpsf |= UNDERFLOW_EXCEPTION;
1559       }
1560       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1561       if (rnd_mode != ROUNDING_TO_NEAREST) {
1562         rounding_correction (rnd_mode,
1563         		     is_inexact_lt_midpoint,
1564         		     is_inexact_gt_midpoint,
1565         		     is_midpoint_lt_even, is_midpoint_gt_even,
1566         		     e4, &res, pfpsf);
1567       }
1568       *pfpsf |= save_fpsf;
1569       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1570       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1571       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1572       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1573       BID_SWAP128 (res);
1574       BID_RETURN (res)
1575     }
1576     // no overflow, and no underflow for rounding to nearest
1577     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1578 
1579     if (rnd_mode != ROUNDING_TO_NEAREST) {
1580       rounding_correction (rnd_mode,
1581         		   is_inexact_lt_midpoint,
1582         		   is_inexact_gt_midpoint,
1583         		   is_midpoint_lt_even, is_midpoint_gt_even,
1584         		   e4, &res, pfpsf);
1585       // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1586       if (e4 == expmin) {
1587         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
1588             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
1589              res.w[0] < 0x38c15b0a00000000ull)) {
1590           is_tiny = 1;
1591         }
1592       }
1593     }
1594 
1595     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1596         is_midpoint_lt_even || is_midpoint_gt_even) {
1597       // set the inexact flag
1598       *pfpsf |= INEXACT_EXCEPTION;
1599       if (is_tiny)
1600         *pfpsf |= UNDERFLOW_EXCEPTION;
1601     }
1602 
1603     if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
1604       // need to ensure that the result has the preferred exponent
1605       p_exp = res.w[1] & MASK_EXP;
1606       if (z_exp < p_exp) { // the preferred exponent is z_exp
1607         // signficand of res in C3
1608         C3.w[1] = res.w[1] & MASK_COEFF;
1609         C3.w[0] = res.w[0];
1610         // the number of decimal digits of x * y is q4 <= 34
1611         // Note: the coefficient fits in 128 bits
1612 
1613         // return (C3 * 10^scale) * 10^(p_exp - scale)
1614         // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1615         scale = p34 - q4;
1616         ind = (p_exp - z_exp) >> 49;
1617         if (ind < scale)
1618           scale = ind;
1619         // subtract scale from the exponent
1620         p_exp = p_exp - ((UINT64) scale << 49);
1621         if (scale == 0) {
1622           ; // leave res unchanged
1623         } else if (q4 <= 19) { // x * y fits in 64 bits
1624           if (scale <= 19) { // 10^scale fits in 64 bits
1625             // 64 x 64 C3.w[0] * ten2k64[scale]
1626             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1627           } else { // 10^scale fits in 128 bits
1628             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1629             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1630           }
1631           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1632         } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1633           // 64 x 128 ten2k64[scale] * C3
1634           __mul_128x64_to_128 (res, ten2k64[scale], C3);
1635           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1636         }
1637       } // else leave the result as it is, because p_exp <= z_exp
1638     }
1639     *pfpsf |= save_fpsf;
1640     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1641     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1642     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1643     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1644     BID_SWAP128 (res);
1645     BID_RETURN (res)
1646   } // else we have f * f + f
1647 
1648   // continue with x = f, y = f, z = f
1649 
1650   delta = q3 + e3 - q4 - e4;
1651 delta_ge_zero:
1652   if (delta >= 0) {
1653 
1654     if (p34 <= delta - 1 ||	// Case (1')
1655         (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
1656       // check for overflow, which can occur only in Case (1')
1657       if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
1658         // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1659         // condition for (q3 + e3) > (p34 + expmax)
1660         if (rnd_mode == ROUNDING_TO_NEAREST) {
1661           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
1662           res.w[0] = 0x0000000000000000ull;
1663           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1664         } else {
1665           if (p_sign == z_sign) {
1666             is_inexact_lt_midpoint = 1;
1667           } else {
1668             is_inexact_gt_midpoint = 1;
1669           }
1670           // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1671           scale = p34 - q3;
1672           if (scale == 0) {
1673             res.w[1] = z_sign | C3.w[1];
1674             res.w[0] = C3.w[0];
1675           } else {
1676             if (q3 <= 19) { // C3 fits in 64 bits
1677               if (scale <= 19) { // 10^scale fits in 64 bits
1678         	// 64 x 64 C3.w[0] * ten2k64[scale]
1679         	__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1680               } else { // 10^scale fits in 128 bits
1681         	// 64 x 128 C3.w[0] * ten2k128[scale - 20]
1682         	__mul_128x64_to_128 (res, C3.w[0],
1683         			     ten2k128[scale - 20]);
1684               }
1685             } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1686               // 64 x 128 ten2k64[scale] * C3
1687               __mul_128x64_to_128 (res, ten2k64[scale], C3);
1688             }
1689             // the coefficient in res has q3 + scale = p34 digits
1690           }
1691           e3 = e3 - scale;
1692           res.w[1] = z_sign | res.w[1];
1693           rounding_correction (rnd_mode,
1694         		       is_inexact_lt_midpoint,
1695         		       is_inexact_gt_midpoint,
1696         		       is_midpoint_lt_even, is_midpoint_gt_even,
1697         		       e3, &res, pfpsf);
1698         }
1699         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1700         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1701         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1702         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1703         BID_SWAP128 (res);
1704         BID_RETURN (res)
1705       }
1706       // res = z
1707       if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
1708         // return (C3 * 10^scale) * 10^(z_exp - scale)
1709         // where scale = min (p34-q3, z_exp-EMIN)
1710         scale = p34 - q3;
1711         ind = e3 + 6176;
1712         if (ind < scale)
1713           scale = ind;
1714         if (scale == 0) {
1715           res.w[1] = C3.w[1];
1716           res.w[0] = C3.w[0];
1717         } else if (q3 <= 19) { // z fits in 64 bits
1718           if (scale <= 19) { // 10^scale fits in 64 bits
1719             // 64 x 64 C3.w[0] * ten2k64[scale]
1720             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1721           } else { // 10^scale fits in 128 bits
1722             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1723             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1724           }
1725         } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1726           // 64 x 128 ten2k64[scale] * C3
1727           __mul_128x64_to_128 (res, ten2k64[scale], C3);
1728         }
1729         // the coefficient in res has q3 + scale digits
1730         // subtract scale from the exponent
1731         z_exp = z_exp - ((UINT64) scale << 49);
1732         e3 = e3 - scale;
1733         res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1734         if (scale + q3 < p34)
1735           *pfpsf |= UNDERFLOW_EXCEPTION;
1736       } else {
1737         scale = 0;
1738         res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
1739         res.w[0] = C3.w[0];
1740       }
1741 
1742       // use the following to avoid double rounding errors when operating on
1743       // mixed formats in rounding to nearest, and for correcting the result
1744       // if not rounding to nearest
1745       if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
1746         // there is a gap of exactly one digit between the scaled C3 and C4
1747         // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1748         if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
1749             (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
1750             (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
1751         		  C3.w[0] != ten2k128[q3 - 21].w[0]))) {
1752           // C3 * 10^ scale != 10^(q3-1)
1753           // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1754           // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1755           is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
1756         } else { // if C3 * 10^scale = 10^(q3+scale-1)
1757           // ok from above e3 = (z_exp >> 49) - 6176;
1758           // the result is always inexact
1759           if (q4 == 1) {
1760             R64 = C4.w[0];
1761           } else {
1762             // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1763             // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1764             if (q4 <= 18) { // 2 <= q4 <= 18
1765               round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
1766         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
1767         		    &is_inexact_lt_midpoint,
1768         		    &is_inexact_gt_midpoint);
1769             } else if (q4 <= 38) {
1770               P128.w[1] = C4.w[1];
1771               P128.w[0] = C4.w[0];
1772               round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
1773         		      &is_midpoint_lt_even,
1774         		      &is_midpoint_gt_even,
1775         		      &is_inexact_lt_midpoint,
1776         		      &is_inexact_gt_midpoint);
1777               R64 = R128.w[0]; // one decimal digit
1778             } else if (q4 <= 57) {
1779               P192.w[2] = C4.w[2];
1780               P192.w[1] = C4.w[1];
1781               P192.w[0] = C4.w[0];
1782               round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
1783         		      &is_midpoint_lt_even,
1784         		      &is_midpoint_gt_even,
1785         		      &is_inexact_lt_midpoint,
1786         		      &is_inexact_gt_midpoint);
1787               R64 = R192.w[0]; // one decimal digit
1788             } else { // if (q4 <= 68)
1789               round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
1790         		      &is_midpoint_lt_even,
1791         		      &is_midpoint_gt_even,
1792         		      &is_inexact_lt_midpoint,
1793         		      &is_inexact_gt_midpoint);
1794               R64 = R256.w[0]; // one decimal digit
1795             }
1796             if (incr_exp) {
1797               R64 = 10;
1798             }
1799           }
1800           if (q4 == 1 && C4.w[0] == 5) {
1801             is_inexact_lt_midpoint = 0;
1802             is_inexact_gt_midpoint = 0;
1803             is_midpoint_lt_even = 1;
1804             is_midpoint_gt_even = 0;
1805           } else if ((e3 == expmin) ||
1806         	     R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
1807             // result does not change
1808             is_inexact_lt_midpoint = 0;
1809             is_inexact_gt_midpoint = 1;
1810             is_midpoint_lt_even = 0;
1811             is_midpoint_gt_even = 0;
1812           } else {
1813             is_inexact_lt_midpoint = 1;
1814             is_inexact_gt_midpoint = 0;
1815             is_midpoint_lt_even = 0;
1816             is_midpoint_gt_even = 0;
1817             // result decremented is 10^(q3+scale) - 1
1818             if ((q3 + scale) <= 19) {
1819               res.w[1] = 0;
1820               res.w[0] = ten2k64[q3 + scale];
1821             } else { // if ((q3 + scale + 1) <= 35)
1822               res.w[1] = ten2k128[q3 + scale - 20].w[1];
1823               res.w[0] = ten2k128[q3 + scale - 20].w[0];
1824             }
1825             res.w[0] = res.w[0] - 1; // borrow never occurs
1826             z_exp = z_exp - EXP_P1;
1827             e3 = e3 - 1;
1828             res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
1829           }
1830           if (e3 == expmin) {
1831             if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
1832               ; // result not tiny (in round-to-nearest mode)
1833             } else {
1834               *pfpsf |= UNDERFLOW_EXCEPTION;
1835             }
1836           }
1837         } // end 10^(q3+scale-1)
1838         // set the inexact flag
1839         *pfpsf |= INEXACT_EXCEPTION;
1840       } else {
1841         if (p_sign == z_sign) {
1842           // if (z_sign), set as if for absolute value
1843           is_inexact_lt_midpoint = 1;
1844         } else { // if (p_sign != z_sign)
1845           // if (z_sign), set as if for absolute value
1846           is_inexact_gt_midpoint = 1;
1847         }
1848         *pfpsf |= INEXACT_EXCEPTION;
1849       }
1850       // the result is always inexact => set the inexact flag
1851       // Determine tininess:
1852       //    if (exp > expmin)
1853       //      the result is not tiny
1854       //    else // if exp = emin
1855       //      if (q3 + scale < p34)
1856       //        the result is tiny
1857       //      else // if (q3 + scale = p34)
1858       //        if (C3 * 10^scale > 10^33)
1859       //          the result is not tiny
1860       //        else // if C3 * 10^scale = 10^33
1861       //          if (xy * z > 0)
1862       //            the result is not tiny
1863       //          else // if (xy * z < 0)
1864       //            if (z > 0)
1865       //              if rnd_mode != RP
1866       //                the result is tiny
1867       //              else // if RP
1868       //                the result is not tiny
1869       //            else // if (z < 0)
1870       //              if rnd_mode != RM
1871       //                the result is tiny
1872       //              else // if RM
1873       //                the result is not tiny
1874       //              endif
1875       //            endif
1876       //          endif
1877       //        endif
1878       //      endif
1879       //    endif
1880       if ((e3 == expmin && (q3 + scale) < p34) ||
1881           (e3 == expmin && (q3 + scale) == p34 &&
1882           (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&	// 10^33_high
1883           res.w[0] == 0x38c15b0a00000000ull &&	// 10^33_low
1884           z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
1885           (z_sign && rnd_mode != ROUNDING_DOWN)))) {
1886         *pfpsf |= UNDERFLOW_EXCEPTION;
1887       }
1888       if (rnd_mode != ROUNDING_TO_NEAREST) {
1889         rounding_correction (rnd_mode,
1890         		     is_inexact_lt_midpoint,
1891         		     is_inexact_gt_midpoint,
1892         		     is_midpoint_lt_even, is_midpoint_gt_even,
1893         		     e3, &res, pfpsf);
1894       }
1895       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1896       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1897       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1898       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1899       BID_SWAP128 (res);
1900       BID_RETURN (res)
1901 
1902     } else if (p34 == delta) { // Case (1''B)
1903 
1904       // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1905       // and C3 can be scaled up to p34 digits if needed
1906 
1907       // scale C3 to p34 digits if needed
1908       scale = p34 - q3; // 0 <= scale <= p34 - 1
1909       if (scale == 0) {
1910         res.w[1] = C3.w[1];
1911         res.w[0] = C3.w[0];
1912       } else if (q3 <= 19) { // z fits in 64 bits
1913         if (scale <= 19) { // 10^scale fits in 64 bits
1914           // 64 x 64 C3.w[0] * ten2k64[scale]
1915           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1916         } else { // 10^scale fits in 128 bits
1917           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1918           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1919         }
1920       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1921         // 64 x 128 ten2k64[scale] * C3
1922         __mul_128x64_to_128 (res, ten2k64[scale], C3);
1923       }
1924       // subtract scale from the exponent
1925       z_exp = z_exp - ((UINT64) scale << 49);
1926       e3 = e3 - scale;
1927       // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1928 
1929       // determine whether x * y is less than, equal to, or greater than
1930       // 1/2 ulp (z)
1931       if (q4 <= 19) {
1932         if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1933           lt_half_ulp = 1;
1934         } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1935           eq_half_ulp = 1;
1936         } else { // > 1/2 ulp
1937           gt_half_ulp = 1;
1938         }
1939       } else if (q4 <= 38) {
1940         if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
1941             (C4.w[1] == midpoint128[q4 - 20].w[1] &&
1942             C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
1943           lt_half_ulp = 1;
1944         } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
1945             C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1946           eq_half_ulp = 1;
1947         } else { // > 1/2 ulp
1948           gt_half_ulp = 1;
1949         }
1950       } else if (q4 <= 58) {
1951         if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
1952             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1953             C4.w[1] < midpoint192[q4 - 39].w[1]) ||
1954             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1955             C4.w[1] == midpoint192[q4 - 39].w[1] &&
1956             C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
1957           lt_half_ulp = 1;
1958         } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
1959             C4.w[1] == midpoint192[q4 - 39].w[1] &&
1960             C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
1961           eq_half_ulp = 1;
1962         } else { // > 1/2 ulp
1963           gt_half_ulp = 1;
1964         }
1965       } else {
1966         if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
1967             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1968             C4.w[2] < midpoint256[q4 - 59].w[2]) ||
1969             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1970             C4.w[2] == midpoint256[q4 - 59].w[2] &&
1971             C4.w[1] < midpoint256[q4 - 59].w[1]) ||
1972             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1973             C4.w[2] == midpoint256[q4 - 59].w[2] &&
1974             C4.w[1] == midpoint256[q4 - 59].w[1] &&
1975             C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
1976           lt_half_ulp = 1;
1977         } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1978             C4.w[2] == midpoint256[q4 - 59].w[2] &&
1979             C4.w[1] == midpoint256[q4 - 59].w[1] &&
1980             C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
1981           eq_half_ulp = 1;
1982         } else { // > 1/2 ulp
1983           gt_half_ulp = 1;
1984         }
1985       }
1986 
1987       if (p_sign == z_sign) {
1988         if (lt_half_ulp) {
1989           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1990           // use the following to avoid double rounding errors when operating on
1991           // mixed formats in rounding to nearest
1992           is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
1993         } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
1994           // add 1 ulp to the significand
1995           res.w[0]++;
1996           if (res.w[0] == 0x0ull)
1997             res.w[1]++;
1998           // check for rounding overflow, when coeff == 10^34
1999           if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
2000               res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
2001             e3 = e3 + 1;
2002             // coeff = 10^33
2003             z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
2004             res.w[1] = 0x0000314dc6448d93ull;
2005             res.w[0] = 0x38c15b0a00000000ull;
2006           }
2007           // end add 1 ulp
2008           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2009           if (eq_half_ulp) {
2010             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2011           } else {
2012             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2013           }
2014         } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2015           // leave unchanged
2016           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2017           is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2018         }
2019         // the result is always inexact, and never tiny
2020         // set the inexact flag
2021         *pfpsf |= INEXACT_EXCEPTION;
2022         // check for overflow
2023         if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
2024           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2025           res.w[0] = 0x0000000000000000ull;
2026           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2027           *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2028           *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2029           *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2030           *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2031           BID_SWAP128 (res);
2032           BID_RETURN (res)
2033         }
2034         if (rnd_mode != ROUNDING_TO_NEAREST) {
2035           rounding_correction (rnd_mode,
2036         		       is_inexact_lt_midpoint,
2037         		       is_inexact_gt_midpoint,
2038         		       is_midpoint_lt_even, is_midpoint_gt_even,
2039         		       e3, &res, pfpsf);
2040           z_exp = res.w[1] & MASK_EXP;
2041         }
2042       } else { // if (p_sign != z_sign)
2043         // consider two cases, because C3 * 10^scale = 10^33 is a special case
2044         if (res.w[1] != 0x0000314dc6448d93ull ||
2045             res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
2046           if (lt_half_ulp) {
2047             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2048             // use the following to avoid double rounding errors when operating
2049             // on mixed formats in rounding to nearest
2050             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2051           } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
2052             // subtract 1 ulp from the significand
2053             res.w[0]--;
2054             if (res.w[0] == 0xffffffffffffffffull)
2055               res.w[1]--;
2056             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2057             if (eq_half_ulp) {
2058               is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2059             } else {
2060               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2061             }
2062           } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2063             // leave unchanged
2064             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2065             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2066           }
2067           // the result is always inexact, and never tiny
2068           // check for overflow for RN
2069           if (e3 > expmax) {
2070             if (rnd_mode == ROUNDING_TO_NEAREST) {
2071               res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2072               res.w[0] = 0x0000000000000000ull;
2073               *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2074             } else {
2075               rounding_correction (rnd_mode,
2076         			   is_inexact_lt_midpoint,
2077         			   is_inexact_gt_midpoint,
2078         			   is_midpoint_lt_even,
2079         			   is_midpoint_gt_even, e3, &res,
2080         			   pfpsf);
2081             }
2082             *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2083             *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2084             *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2085             *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2086             BID_SWAP128 (res);
2087             BID_RETURN (res)
2088           }
2089           // set the inexact flag
2090           *pfpsf |= INEXACT_EXCEPTION;
2091           if (rnd_mode != ROUNDING_TO_NEAREST) {
2092             rounding_correction (rnd_mode,
2093         			 is_inexact_lt_midpoint,
2094         			 is_inexact_gt_midpoint,
2095         			 is_midpoint_lt_even,
2096         			 is_midpoint_gt_even, e3, &res, pfpsf);
2097           }
2098           z_exp = res.w[1] & MASK_EXP;
2099         } else { // if C3 * 10^scale = 10^33
2100           e3 = (z_exp >> 49) - 6176;
2101           if (e3 > expmin) {
2102             // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2103             // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2104             if (q4 == 1) {
2105               // if q4 = 1 the result is exact
2106               // result coefficient = 10^34 - C4
2107               res.w[1] = 0x0001ed09bead87c0ull;
2108               res.w[0] = 0x378d8e6400000000ull - C4.w[0];
2109               z_exp = z_exp - EXP_P1;
2110               e3 = e3 - 1;
2111               res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2112             } else {
2113               // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2114               // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2115               if (q4 <= 18) { // 2 <= q4 <= 18
2116         	round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
2117         		      &is_midpoint_lt_even,
2118         		      &is_midpoint_gt_even,
2119         		      &is_inexact_lt_midpoint,
2120         		      &is_inexact_gt_midpoint);
2121               } else if (q4 <= 38) {
2122         	P128.w[1] = C4.w[1];
2123         	P128.w[0] = C4.w[0];
2124         	round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
2125         			&is_midpoint_lt_even,
2126         			&is_midpoint_gt_even,
2127         			&is_inexact_lt_midpoint,
2128         			&is_inexact_gt_midpoint);
2129         	R64 = R128.w[0]; // one decimal digit
2130               } else if (q4 <= 57) {
2131         	P192.w[2] = C4.w[2];
2132         	P192.w[1] = C4.w[1];
2133         	P192.w[0] = C4.w[0];
2134         	round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
2135         			&is_midpoint_lt_even,
2136         			&is_midpoint_gt_even,
2137         			&is_inexact_lt_midpoint,
2138         			&is_inexact_gt_midpoint);
2139         	R64 = R192.w[0]; // one decimal digit
2140               } else { // if (q4 <= 68)
2141         	round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
2142         			&is_midpoint_lt_even,
2143         			&is_midpoint_gt_even,
2144         			&is_inexact_lt_midpoint,
2145         			&is_inexact_gt_midpoint);
2146         	R64 = R256.w[0]; // one decimal digit
2147               }
2148               if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2149         	  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2150         	// the result is exact: 10^34 - R64
2151         	// incr_exp = 0 with certainty
2152         	z_exp = z_exp - EXP_P1;
2153         	e3 = e3 - 1;
2154         	res.w[1] =
2155         	  z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
2156         	res.w[0] = 0x378d8e6400000000ull - R64;
2157               } else {
2158         	// We want R64 to be the top digit of C4, but we actually
2159         	// obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2160         	// because the top digit is (C4 * 10^(-q4+1))RZ
2161         	// however, if incr_exp = 1 then R64 = 10 with certainty
2162         	if (incr_exp) {
2163         	  R64 = 10;
2164         	}
2165         	// the result is inexact as C4 has more than 1 significant digit
2166         	// and C3 * 10^scale = 10^33
2167         	// example of case that is treated here:
2168         	// 100...0 * 10^e3 - 0.41 * 10^e3 =
2169         	// 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2170         	// note that (e3 > expmin}
2171         	// in order to round, subtract R64 from 10^34 and then compare
2172         	// C4 - R64 * 10^(q4-1) with 1/2 ulp
2173         	// calculate 10^34 - R64
2174         	res.w[1] = 0x0001ed09bead87c0ull;
2175         	res.w[0] = 0x378d8e6400000000ull - R64;
2176         	z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
2177         	// calculate C4 - R64 * 10^(q4-1); this is a rare case and
2178         	// R64 is small, 1 <= R64 <= 9
2179         	e3 = e3 - 1;
2180         	if (is_inexact_lt_midpoint) {
2181         	  is_inexact_lt_midpoint = 0;
2182         	  is_inexact_gt_midpoint = 1;
2183         	} else if (is_inexact_gt_midpoint) {
2184         	  is_inexact_gt_midpoint = 0;
2185         	  is_inexact_lt_midpoint = 1;
2186         	} else if (is_midpoint_lt_even) {
2187         	  is_midpoint_lt_even = 0;
2188         	  is_midpoint_gt_even = 1;
2189         	} else if (is_midpoint_gt_even) {
2190         	  is_midpoint_gt_even = 0;
2191         	  is_midpoint_lt_even = 1;
2192         	} else {
2193         	  ;
2194         	}
2195         	// the result is always inexact, and never tiny
2196         	// check for overflow for RN
2197         	if (e3 > expmax) {
2198         	  if (rnd_mode == ROUNDING_TO_NEAREST) {
2199         	    res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2200         	    res.w[0] = 0x0000000000000000ull;
2201         	    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2202         	  } else {
2203         	    rounding_correction (rnd_mode,
2204         				 is_inexact_lt_midpoint,
2205         				 is_inexact_gt_midpoint,
2206         				 is_midpoint_lt_even,
2207         				 is_midpoint_gt_even, e3, &res,
2208         				 pfpsf);
2209         	  }
2210         	  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2211         	  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2212         	  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2213         	  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2214         	  BID_SWAP128 (res);
2215         	  BID_RETURN (res)
2216         	}
2217         	// set the inexact flag
2218         	*pfpsf |= INEXACT_EXCEPTION;
2219         	res.w[1] =
2220         	  z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2221         	if (rnd_mode != ROUNDING_TO_NEAREST) {
2222         	  rounding_correction (rnd_mode,
2223         			       is_inexact_lt_midpoint,
2224         			       is_inexact_gt_midpoint,
2225         			       is_midpoint_lt_even,
2226         			       is_midpoint_gt_even, e3, &res,
2227         			       pfpsf);
2228         	}
2229         	z_exp = res.w[1] & MASK_EXP;
2230               } // end result is inexact
2231             } // end q4 > 1
2232           } else { // if (e3 = emin)
2233             // if e3 = expmin the result is also tiny (the condition for
2234             // tininess is C4 > 050...0 [q4 digits] which is met because
2235             // the msd of C4 is not zero)
2236             // the result is tiny and inexact in all rounding modes;
2237             // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2238             // gt_half_ulp to calculate)
2239             // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2240 
2241             // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2242             if (gt_half_ulp) { // res = 10^33 - 1
2243               res.w[1] = 0x0000314dc6448d93ull;
2244               res.w[0] = 0x38c15b09ffffffffull;
2245             } else {
2246               res.w[1] = 0x0000314dc6448d93ull;
2247               res.w[0] = 0x38c15b0a00000000ull;
2248             }
2249             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2250             *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
2251 
2252             if (eq_half_ulp) {
2253               is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2254             } else if (lt_half_ulp) {
2255               is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
2256             } else { // if (gt_half_ulp)
2257               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2258             }
2259 
2260             if (rnd_mode != ROUNDING_TO_NEAREST) {
2261               rounding_correction (rnd_mode,
2262                   is_inexact_lt_midpoint,
2263                   is_inexact_gt_midpoint,
2264                   is_midpoint_lt_even,
2265                   is_midpoint_gt_even, e3, &res,
2266                   pfpsf);
2267               z_exp = res.w[1] & MASK_EXP;
2268             }
2269           } // end e3 = emin
2270           // set the inexact flag (if the result was not exact)
2271           if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2272               is_midpoint_lt_even || is_midpoint_gt_even)
2273             *pfpsf |= INEXACT_EXCEPTION;
2274         } // end 10^33
2275       } // end if (p_sign != z_sign)
2276       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2277       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2278       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2279       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2280       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2281       BID_SWAP128 (res);
2282       BID_RETURN (res)
2283 
2284     } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2285         (q3 <= delta && delta + q4 <= p34) || // Case (3)
2286         (delta < q3 && p34 < delta + q4) || // Case (4)
2287         (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
2288         (delta + q4 < q3)) && // Case (6)
2289         !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
2290 
2291       // the result has the sign of z
2292 
2293       if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2294           (delta < q3 && p34 < delta + q4)) { // Case (4)
2295         // round first the sum x * y + z with unbounded exponent
2296         // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2297         // 1 <= scale <= 33
2298         // calculate res = C3 * 10^scale
2299         scale = p34 - q3;
2300         x0 = delta + q4 - p34;
2301       } else if (delta + q4 < q3) { // Case (6)
2302         // make Case (6) look like Case (3) or Case (5) with scale = 0
2303         // by scaling up C4 by 10^(q3 - delta - q4)
2304         scale = q3 - delta - q4; // 1 <= scale <= 33
2305         if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2306           if (scale <= 19) { // 10^scale fits in 64 bits
2307             // 64 x 64 C4.w[0] * ten2k64[scale]
2308             __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
2309           } else { // 10^scale fits in 128 bits
2310             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2311             __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
2312           }
2313         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2314           // 64 x 128 ten2k64[scale] * C4
2315           __mul_128x64_to_128 (P128, ten2k64[scale], C4);
2316         }
2317         C4.w[0] = P128.w[0];
2318         C4.w[1] = P128.w[1];
2319         // e4 does not need adjustment, as it is not used from this point on
2320         scale = 0;
2321         x0 = 0;
2322         // now Case (6) looks like Case (3) or Case (5) with scale = 0
2323       } else { // if Case (3) or Case (5)
2324         // Note: Case (3) is similar to Case (2), but scale differs and the
2325         // result is exact, unless it is tiny (so x0 = 0 when calculating the
2326         // result with unbounded exponent)
2327 
2328         // calculate first the sum x * y + z with unbounded exponent (exact)
2329         // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2330         // 1 <= scale <= 33
2331         // calculate res = C3 * 10^scale
2332         scale = delta + q4 - q3;
2333         x0 = 0;
2334         // Note: the comments which follow refer [mainly] to Case (2)]
2335       }
2336 
2337     case2_repeat:
2338       if (scale == 0) { // this could happen e.g. if we return to case2_repeat
2339         // or in Case (4)
2340         res.w[1] = C3.w[1];
2341         res.w[0] = C3.w[0];
2342       } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
2343         if (scale <= 19) { // 10^scale fits in 64 bits
2344           // 64 x 64 C3.w[0] * ten2k64[scale]
2345           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
2346         } else { // 10^scale fits in 128 bits
2347           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2348           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
2349         }
2350       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2351         // 64 x 128 ten2k64[scale] * C3
2352         __mul_128x64_to_128 (res, ten2k64[scale], C3);
2353       }
2354       // e3 is already calculated
2355       e3 = e3 - scale;
2356       // now res = C3 * 10^scale and e3 = e3 - scale
2357       // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2358       // because the result was too small
2359 
2360       // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2361       // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2362       // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2363       // the rounding fits in 128 bits!)
2364       // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2365       // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2366       if (x0 == 0) { // this could happen only if we return to case2_repeat, or
2367         // for Case (3) or Case (6)
2368         R128.w[1] = C4.w[1];
2369         R128.w[0] = C4.w[0];
2370       } else if (q4 <= 18) {
2371         // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2372         round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
2373             &is_midpoint_lt_even, &is_midpoint_gt_even,
2374             &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
2375         if (incr_exp) {
2376           // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2377           R64 = ten2k64[q4 - x0];
2378         }
2379         R128.w[1] = 0;
2380         R128.w[0] = R64;
2381       } else if (q4 <= 38) {
2382         // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2383         P128.w[1] = C4.w[1];
2384         P128.w[0] = C4.w[0];
2385         round128_19_38 (q4, x0, P128, &R128, &incr_exp,
2386             &is_midpoint_lt_even, &is_midpoint_gt_even,
2387             &is_inexact_lt_midpoint,
2388             &is_inexact_gt_midpoint);
2389         if (incr_exp) {
2390           // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2391           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2392             R128.w[0] = ten2k64[q4 - x0];
2393             // R128.w[1] stays 0
2394           } else { // 20 <= q4 - x0 <= 37
2395             R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
2396             R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
2397           }
2398         }
2399       } else if (q4 <= 57) {
2400         // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2401         P192.w[2] = C4.w[2];
2402         P192.w[1] = C4.w[1];
2403         P192.w[0] = C4.w[0];
2404         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2405             &is_midpoint_lt_even, &is_midpoint_gt_even,
2406             &is_inexact_lt_midpoint,
2407             &is_inexact_gt_midpoint);
2408         // R192.w[2] is always 0
2409         if (incr_exp) {
2410           // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2411           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2412             R192.w[0] = ten2k64[q4 - x0];
2413             // R192.w[1] stays 0
2414             // R192.w[2] stays 0
2415           } else { // 20 <= q4 - x0 <= 33
2416             R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
2417             R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
2418             // R192.w[2] stays 0
2419           }
2420         }
2421         R128.w[1] = R192.w[1];
2422         R128.w[0] = R192.w[0];
2423       } else {
2424         // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2425         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2426             &is_midpoint_lt_even, &is_midpoint_gt_even,
2427             &is_inexact_lt_midpoint,
2428             &is_inexact_gt_midpoint);
2429         // R256.w[3] and R256.w[2] are always 0
2430         if (incr_exp) {
2431           // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2432           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2433             R256.w[0] = ten2k64[q4 - x0];
2434             // R256.w[1] stays 0
2435             // R256.w[2] stays 0
2436             // R256.w[3] stays 0
2437           } else { // 20 <= q4 - x0 <= 33
2438             R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
2439             R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
2440             // R256.w[2] stays 0
2441             // R256.w[3] stays 0
2442           }
2443         }
2444         R128.w[1] = R256.w[1];
2445         R128.w[0] = R256.w[0];
2446       }
2447       // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2448       // rounded to nearest, which were copied into R128
2449       if (z_sign == p_sign) {
2450         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
2451         // the sum can result in [up to] p34 or p34 + 1 digits
2452         res.w[0] = res.w[0] + R128.w[0];
2453         res.w[1] = res.w[1] + R128.w[1];
2454         if (res.w[0] < R128.w[0])
2455           res.w[1]++; // carry
2456         // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2457         if (res.w[1] > 0x0001ed09bead87c0ull ||
2458             (res.w[1] == 0x0001ed09bead87c0ull &&
2459              res.w[0] > 0x378d8e63ffffffffull)) {
2460           // avoid double rounding error
2461           is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2462           is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2463           is_midpoint_lt_even0 = is_midpoint_lt_even;
2464           is_midpoint_gt_even0 = is_midpoint_gt_even;
2465           is_inexact_lt_midpoint = 0;
2466           is_inexact_gt_midpoint = 0;
2467           is_midpoint_lt_even = 0;
2468           is_midpoint_gt_even = 0;
2469           P128.w[1] = res.w[1];
2470           P128.w[0] = res.w[0];
2471           round128_19_38 (35, 1, P128, &res, &incr_exp,
2472               &is_midpoint_lt_even, &is_midpoint_gt_even,
2473               &is_inexact_lt_midpoint,
2474               &is_inexact_gt_midpoint);
2475           // incr_exp is 0 with certainty in this case
2476           // avoid a double rounding error
2477           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2478               is_midpoint_lt_even) { // double rounding error upward
2479             // res = res - 1
2480             res.w[0]--;
2481             if (res.w[0] == 0xffffffffffffffffull)
2482               res.w[1]--;
2483             // Note: a double rounding error upward is not possible; for this
2484             // the result after the first rounding would have to be 99...95
2485             // (35 digits in all), possibly followed by a number of zeros; this
2486             // not possible in Cases (2)-(6) or (15)-(17) which may get here
2487             is_midpoint_lt_even = 0;
2488             is_inexact_lt_midpoint = 1;
2489           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2490               is_midpoint_gt_even) { // double rounding error downward
2491             // res = res + 1
2492             res.w[0]++;
2493             if (res.w[0] == 0)
2494               res.w[1]++;
2495             is_midpoint_gt_even = 0;
2496             is_inexact_gt_midpoint = 1;
2497           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2498         	     !is_inexact_lt_midpoint
2499         	     && !is_inexact_gt_midpoint) {
2500             // if this second rounding was exact the result may still be
2501             // inexact because of the first rounding
2502             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2503               is_inexact_gt_midpoint = 1;
2504             }
2505             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2506               is_inexact_lt_midpoint = 1;
2507             }
2508           } else if (is_midpoint_gt_even &&
2509         	     (is_inexact_gt_midpoint0
2510         	      || is_midpoint_lt_even0)) {
2511             // pulled up to a midpoint
2512             is_inexact_lt_midpoint = 1;
2513             is_inexact_gt_midpoint = 0;
2514             is_midpoint_lt_even = 0;
2515             is_midpoint_gt_even = 0;
2516           } else if (is_midpoint_lt_even &&
2517         	     (is_inexact_lt_midpoint0
2518         	      || is_midpoint_gt_even0)) {
2519             // pulled down to a midpoint
2520             is_inexact_lt_midpoint = 0;
2521             is_inexact_gt_midpoint = 1;
2522             is_midpoint_lt_even = 0;
2523             is_midpoint_gt_even = 0;
2524           } else {
2525             ;
2526           }
2527           // adjust exponent
2528           e3 = e3 + 1;
2529           if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2530               !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2531             if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2532         	is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2533               is_inexact_lt_midpoint = 1;
2534             }
2535           }
2536         } else {
2537           // this is the result rounded with unbounded exponent, unless a
2538           // correction is needed
2539           res.w[1] = res.w[1] & MASK_COEFF;
2540           if (lsb == 1) {
2541             if (is_midpoint_gt_even) {
2542               // res = res + 1
2543               is_midpoint_gt_even = 0;
2544               is_midpoint_lt_even = 1;
2545               res.w[0]++;
2546               if (res.w[0] == 0x0)
2547         	res.w[1]++;
2548               // check for rounding overflow
2549               if (res.w[1] == 0x0001ed09bead87c0ull &&
2550         	  res.w[0] == 0x378d8e6400000000ull) {
2551         	// res = 10^34 => rounding overflow
2552         	res.w[1] = 0x0000314dc6448d93ull;
2553         	res.w[0] = 0x38c15b0a00000000ull; // 10^33
2554         	e3++;
2555               }
2556             } else if (is_midpoint_lt_even) {
2557               // res = res - 1
2558               is_midpoint_lt_even = 0;
2559               is_midpoint_gt_even = 1;
2560               res.w[0]--;
2561               if (res.w[0] == 0xffffffffffffffffull)
2562         	res.w[1]--;
2563               // if the result is pure zero, the sign depends on the rounding
2564               // mode (x*y and z had opposite signs)
2565               if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2566         	if (rnd_mode != ROUNDING_DOWN)
2567         	  z_sign = 0x0000000000000000ull;
2568         	else
2569         	  z_sign = 0x8000000000000000ull;
2570         	// the exponent is max (e3, expmin)
2571         	res.w[1] = 0x0;
2572         	res.w[0] = 0x0;
2573         	*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2574         	*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2575         	*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2576         	*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2577         	BID_SWAP128 (res);
2578         	BID_RETURN (res)
2579               }
2580             } else {
2581               ;
2582             }
2583           }
2584         }
2585       } else { // if (z_sign != p_sign)
2586         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2587         // used to swap rounding indicators if p_sign != z_sign
2588         // the sum can result in [up to] p34 or p34 - 1 digits
2589         tmp64 = res.w[0];
2590         res.w[0] = res.w[0] - R128.w[0];
2591         res.w[1] = res.w[1] - R128.w[1];
2592         if (res.w[0] > tmp64)
2593           res.w[1]--; // borrow
2594         // if res < 10^33 and exp > expmin need to decrease x0 and
2595         // increase scale by 1
2596         if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
2597         		     (res.w[1] == 0x0000314dc6448d93ull &&
2598         		      res.w[0] < 0x38c15b0a00000000ull)) ||
2599         		    (is_inexact_lt_midpoint
2600         		     && res.w[1] == 0x0000314dc6448d93ull
2601         		     && res.w[0] == 0x38c15b0a00000000ull))
2602             && x0 >= 1) {
2603           x0 = x0 - 1;
2604           // first restore e3, otherwise it will be too small
2605           e3 = e3 + scale;
2606           scale = scale + 1;
2607           is_inexact_lt_midpoint = 0;
2608           is_inexact_gt_midpoint = 0;
2609           is_midpoint_lt_even = 0;
2610           is_midpoint_gt_even = 0;
2611           incr_exp = 0;
2612           goto case2_repeat;
2613         }
2614         // else this is the result rounded with unbounded exponent;
2615         // because the result has opposite sign to that of C4 which was
2616         // rounded, need to change the rounding indicators
2617         if (is_inexact_lt_midpoint) {
2618           is_inexact_lt_midpoint = 0;
2619           is_inexact_gt_midpoint = 1;
2620         } else if (is_inexact_gt_midpoint) {
2621           is_inexact_gt_midpoint = 0;
2622           is_inexact_lt_midpoint = 1;
2623         } else if (lsb == 0) {
2624           if (is_midpoint_lt_even) {
2625             is_midpoint_lt_even = 0;
2626             is_midpoint_gt_even = 1;
2627           } else if (is_midpoint_gt_even) {
2628             is_midpoint_gt_even = 0;
2629             is_midpoint_lt_even = 1;
2630           } else {
2631             ;
2632           }
2633         } else if (lsb == 1) {
2634           if (is_midpoint_lt_even) {
2635             // res = res + 1
2636             res.w[0]++;
2637             if (res.w[0] == 0x0)
2638               res.w[1]++;
2639             // check for rounding overflow
2640             if (res.w[1] == 0x0001ed09bead87c0ull &&
2641         	res.w[0] == 0x378d8e6400000000ull) {
2642               // res = 10^34 => rounding overflow
2643               res.w[1] = 0x0000314dc6448d93ull;
2644               res.w[0] = 0x38c15b0a00000000ull; // 10^33
2645               e3++;
2646             }
2647           } else if (is_midpoint_gt_even) {
2648             // res = res - 1
2649             res.w[0]--;
2650             if (res.w[0] == 0xffffffffffffffffull)
2651               res.w[1]--;
2652             // if the result is pure zero, the sign depends on the rounding
2653             // mode (x*y and z had opposite signs)
2654             if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2655               if (rnd_mode != ROUNDING_DOWN)
2656         	z_sign = 0x0000000000000000ull;
2657               else
2658         	z_sign = 0x8000000000000000ull;
2659               // the exponent is max (e3, expmin)
2660               res.w[1] = 0x0;
2661               res.w[0] = 0x0;
2662               *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2663               *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2664               *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2665               *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2666               BID_SWAP128 (res);
2667               BID_RETURN (res)
2668             }
2669           } else {
2670             ;
2671           }
2672         } else {
2673           ;
2674         }
2675       }
2676       // check for underflow
2677       if (e3 == expmin) { // and if significand < 10^33 => result is tiny
2678         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
2679             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
2680              res.w[0] < 0x38c15b0a00000000ull)) {
2681           is_tiny = 1;
2682         }
2683       } else if (e3 < expmin) {
2684         // the result is tiny, so we must truncate more of res
2685         is_tiny = 1;
2686         x0 = expmin - e3;
2687         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2688         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2689         is_midpoint_lt_even0 = is_midpoint_lt_even;
2690         is_midpoint_gt_even0 = is_midpoint_gt_even;
2691         is_inexact_lt_midpoint = 0;
2692         is_inexact_gt_midpoint = 0;
2693         is_midpoint_lt_even = 0;
2694         is_midpoint_gt_even = 0;
2695         // determine the number of decimal digits in res
2696         if (res.w[1] == 0x0) {
2697           // between 1 and 19 digits
2698           for (ind = 1; ind <= 19; ind++) {
2699             if (res.w[0] < ten2k64[ind]) {
2700               break;
2701             }
2702           }
2703           // ind digits
2704         } else if (res.w[1] < ten2k128[0].w[1] ||
2705         	   (res.w[1] == ten2k128[0].w[1]
2706         	    && res.w[0] < ten2k128[0].w[0])) {
2707           // 20 digits
2708           ind = 20;
2709         } else { // between 21 and 38 digits
2710           for (ind = 1; ind <= 18; ind++) {
2711             if (res.w[1] < ten2k128[ind].w[1] ||
2712         	(res.w[1] == ten2k128[ind].w[1] &&
2713         	 res.w[0] < ten2k128[ind].w[0])) {
2714               break;
2715             }
2716           }
2717           // ind + 20 digits
2718           ind = ind + 20;
2719         }
2720 
2721         // at this point ind >= x0; because delta >= 2 on this path, the case
2722         // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2723         // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2724         // the signs of x * y and z are opposite, and through cancellation
2725         // the most significant decimal digit in res has the weight
2726         // 10^(emin-1); however, it is clear that in this case the most
2727         // significant digit is 9, so the result before rounding is
2728         // 0.9... * 10^emin
2729         // Otherwise, ind > x0 because there are non-zero decimal digits in the
2730         // result with weight of at least 10^emin, and correction for underflow
2731         //  can be carried out using the round*_*_2_* () routines
2732         if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
2733           res.w[1] = 0x0;
2734           res.w[0] = 0x1;
2735           is_inexact_gt_midpoint = 1;
2736         } else if (ind <= 18) { // check that 2 <= ind
2737           // 2 <= ind <= 18, 1 <= x0 <= 17
2738           round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
2739         		&is_midpoint_lt_even, &is_midpoint_gt_even,
2740         		&is_inexact_lt_midpoint,
2741         		&is_inexact_gt_midpoint);
2742           if (incr_exp) {
2743             // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2744             R64 = ten2k64[ind - x0];
2745           }
2746           res.w[1] = 0;
2747           res.w[0] = R64;
2748         } else if (ind <= 38) {
2749           // 19 <= ind <= 38
2750           P128.w[1] = res.w[1];
2751           P128.w[0] = res.w[0];
2752           round128_19_38 (ind, x0, P128, &res, &incr_exp,
2753         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
2754         		  &is_inexact_lt_midpoint,
2755         		  &is_inexact_gt_midpoint);
2756           if (incr_exp) {
2757             // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2758             if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
2759               res.w[0] = ten2k64[ind - x0];
2760               // res.w[1] stays 0
2761             } else { // 20 <= ind - x0 <= 37
2762               res.w[0] = ten2k128[ind - x0 - 20].w[0];
2763               res.w[1] = ten2k128[ind - x0 - 20].w[1];
2764             }
2765           }
2766         }
2767         // avoid a double rounding error
2768         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2769             is_midpoint_lt_even) { // double rounding error upward
2770           // res = res - 1
2771           res.w[0]--;
2772           if (res.w[0] == 0xffffffffffffffffull)
2773             res.w[1]--;
2774           // Note: a double rounding error upward is not possible; for this
2775           // the result after the first rounding would have to be 99...95
2776           // (35 digits in all), possibly followed by a number of zeros; this
2777           // not possible in Cases (2)-(6) which may get here
2778           is_midpoint_lt_even = 0;
2779           is_inexact_lt_midpoint = 1;
2780         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2781             is_midpoint_gt_even) { // double rounding error downward
2782           // res = res + 1
2783           res.w[0]++;
2784           if (res.w[0] == 0)
2785             res.w[1]++;
2786           is_midpoint_gt_even = 0;
2787           is_inexact_gt_midpoint = 1;
2788         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2789         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2790           // if this second rounding was exact the result may still be
2791           // inexact because of the first rounding
2792           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2793             is_inexact_gt_midpoint = 1;
2794           }
2795           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2796             is_inexact_lt_midpoint = 1;
2797           }
2798         } else if (is_midpoint_gt_even &&
2799         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
2800           // pulled up to a midpoint
2801           is_inexact_lt_midpoint = 1;
2802           is_inexact_gt_midpoint = 0;
2803           is_midpoint_lt_even = 0;
2804           is_midpoint_gt_even = 0;
2805         } else if (is_midpoint_lt_even &&
2806         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
2807           // pulled down to a midpoint
2808           is_inexact_lt_midpoint = 0;
2809           is_inexact_gt_midpoint = 1;
2810           is_midpoint_lt_even = 0;
2811           is_midpoint_gt_even = 0;
2812         } else {
2813           ;
2814         }
2815         // adjust exponent
2816         e3 = e3 + x0;
2817         if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2818             !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2819           if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2820               is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2821             is_inexact_lt_midpoint = 1;
2822           }
2823         }
2824       } else {
2825         ; // not underflow
2826       }
2827       // check for inexact result
2828       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2829           is_midpoint_lt_even || is_midpoint_gt_even) {
2830         // set the inexact flag
2831         *pfpsf |= INEXACT_EXCEPTION;
2832         if (is_tiny)
2833           *pfpsf |= UNDERFLOW_EXCEPTION;
2834       }
2835       // now check for significand = 10^34 (may have resulted from going
2836       // back to case2_repeat)
2837       if (res.w[1] == 0x0001ed09bead87c0ull &&
2838           res.w[0] == 0x378d8e6400000000ull) { // if  res = 10^34
2839         res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
2840         res.w[0] = 0x38c15b0a00000000ull;
2841         e3 = e3 + 1;
2842       }
2843       res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2844       // check for overflow
2845       if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
2846         res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2847         res.w[0] = 0x0000000000000000ull;
2848         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2849       }
2850       if (rnd_mode != ROUNDING_TO_NEAREST) {
2851         rounding_correction (rnd_mode,
2852         		     is_inexact_lt_midpoint,
2853         		     is_inexact_gt_midpoint,
2854         		     is_midpoint_lt_even, is_midpoint_gt_even,
2855         		     e3, &res, pfpsf);
2856       }
2857       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2858       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2859       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2860       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2861       BID_SWAP128 (res);
2862       BID_RETURN (res)
2863 
2864     } else {
2865 
2866       // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2867       // the signs of x*y and z are opposite; in these cases massive
2868       // cancellation can occur, so it is better to scale either C3 or C4 and
2869       // to perform the subtraction before rounding; rounding is performed
2870       // next, depending on the number of decimal digits in the result and on
2871       // the exponent value
2872       // Note: overlow is not possible in this case
2873       // this is similar to Cases (15), (16), and (17)
2874 
2875       if (delta + q4 < q3) { // from Case (6)
2876         // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2877         // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2878         // and call add_and_round; delta stays positive
2879         // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2880         P128.w[1] = C3.w[1];
2881         P128.w[0] = C3.w[0];
2882         C3.w[1] = C4.w[1];
2883         C3.w[0] = C4.w[0];
2884         C4.w[1] = P128.w[1];
2885         C4.w[0] = P128.w[0];
2886         ind = q3;
2887         q3 = q4;
2888         q4 = ind;
2889         ind = e3;
2890         e3 = e4;
2891         e4 = ind;
2892         tmp_sign = z_sign;
2893         z_sign = p_sign;
2894         p_sign = tmp_sign;
2895       } else { // from Cases (2), (3), (4), (5)
2896         // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2897         // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2898         // (16), and (17) if we just change the sign of delta
2899         delta = -delta;
2900       }
2901       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
2902         	     rnd_mode, &is_midpoint_lt_even,
2903         	     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
2904         	     &is_inexact_gt_midpoint, pfpsf, &res);
2905       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2906       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2907       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2908       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2909       BID_SWAP128 (res);
2910       BID_RETURN (res)
2911 
2912     }
2913 
2914   } else { // if delta < 0
2915 
2916     delta = -delta;
2917 
2918     if (p34 < q4 && q4 <= delta) { // Case (7)
2919 
2920       // truncate C4 to p34 digits into res
2921       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2922       x0 = q4 - p34;
2923       if (q4 <= 38) {
2924         P128.w[1] = C4.w[1];
2925         P128.w[0] = C4.w[0];
2926         round128_19_38 (q4, x0, P128, &res, &incr_exp,
2927         		&is_midpoint_lt_even, &is_midpoint_gt_even,
2928         		&is_inexact_lt_midpoint,
2929         		&is_inexact_gt_midpoint);
2930       } else if (q4 <= 57) { // 35 <= q4 <= 57
2931         P192.w[2] = C4.w[2];
2932         P192.w[1] = C4.w[1];
2933         P192.w[0] = C4.w[0];
2934         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2935         		&is_midpoint_lt_even, &is_midpoint_gt_even,
2936         		&is_inexact_lt_midpoint,
2937         		&is_inexact_gt_midpoint);
2938         res.w[0] = R192.w[0];
2939         res.w[1] = R192.w[1];
2940       } else { // if (q4 <= 68)
2941         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2942         		&is_midpoint_lt_even, &is_midpoint_gt_even,
2943         		&is_inexact_lt_midpoint,
2944         		&is_inexact_gt_midpoint);
2945         res.w[0] = R256.w[0];
2946         res.w[1] = R256.w[1];
2947       }
2948       e4 = e4 + x0;
2949       if (incr_exp) {
2950         e4 = e4 + 1;
2951       }
2952       if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2953           !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2954         // if C4 rounded to p34 digits is exact then the result is inexact,
2955         // in a way that depends on the signs of x * y and z
2956         if (p_sign == z_sign) {
2957           is_inexact_lt_midpoint = 1;
2958         } else { // if (p_sign != z_sign)
2959           if (res.w[1] != 0x0000314dc6448d93ull ||
2960               res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
2961             is_inexact_gt_midpoint = 1;
2962           } else { // res = 10^33 and exact is a special case
2963             // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2964             // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2965             // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2966             // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2967             if (delta > p34 + 1) { // C3 < 1/2
2968               // res = 10^33, unchanged
2969               is_inexact_gt_midpoint = 1;
2970             } else { // if (delta == p34 + 1)
2971               if (q3 <= 19) {
2972         	if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
2973         	  // res = 10^33, unchanged
2974         	  is_inexact_gt_midpoint = 1;
2975         	} else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
2976         	  // res = 10^33, unchanged
2977         	  is_midpoint_lt_even = 1;
2978         	} else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2979         	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2980         	  res.w[0] = 0x378d8e63ffffffffull;
2981         	  e4 = e4 - 1;
2982         	  is_inexact_lt_midpoint = 1;
2983         	}
2984               } else { // if (20 <= q3 <=34)
2985         	if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
2986                     (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2987                     C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
2988         	  // res = 10^33, unchanged
2989         	  is_inexact_gt_midpoint = 1;
2990         	} else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2991                     C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
2992         	  // res = 10^33, unchanged
2993         	  is_midpoint_lt_even = 1;
2994         	} else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
2995         	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2996         	  res.w[0] = 0x378d8e63ffffffffull;
2997         	  e4 = e4 - 1;
2998         	  is_inexact_lt_midpoint = 1;
2999         	}
3000               }
3001             }
3002           }
3003         }
3004       } else if (is_midpoint_lt_even) {
3005         if (z_sign != p_sign) {
3006           // needs correction: res = res - 1
3007           res.w[0] = res.w[0] - 1;
3008           if (res.w[0] == 0xffffffffffffffffull)
3009             res.w[1]--;
3010           // if it is (10^33-1)*10^e4 then the corect result is
3011           // (10^34-1)*10(e4-1)
3012           if (res.w[1] == 0x0000314dc6448d93ull &&
3013               res.w[0] == 0x38c15b09ffffffffull) {
3014             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3015             res.w[0] = 0x378d8e63ffffffffull;
3016             e4 = e4 - 1;
3017           }
3018           is_midpoint_lt_even = 0;
3019           is_inexact_lt_midpoint = 1;
3020         } else { // if (z_sign == p_sign)
3021           is_midpoint_lt_even = 0;
3022           is_inexact_gt_midpoint = 1;
3023         }
3024       } else if (is_midpoint_gt_even) {
3025         if (z_sign == p_sign) {
3026           // needs correction: res = res + 1 (cannot cross in the next binade)
3027           res.w[0] = res.w[0] + 1;
3028           if (res.w[0] == 0x0000000000000000ull)
3029             res.w[1]++;
3030           is_midpoint_gt_even = 0;
3031           is_inexact_gt_midpoint = 1;
3032         } else { // if (z_sign != p_sign)
3033           is_midpoint_gt_even = 0;
3034           is_inexact_lt_midpoint = 1;
3035         }
3036       } else {
3037         ; // the rounded result is already correct
3038       }
3039       // check for overflow
3040       if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
3041         res.w[1] = p_sign | 0x7800000000000000ull;
3042         res.w[0] = 0x0000000000000000ull;
3043         *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
3044       } else { // no overflow or not RN
3045         p_exp = ((UINT64) (e4 + 6176) << 49);
3046         res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
3047       }
3048       if (rnd_mode != ROUNDING_TO_NEAREST) {
3049         rounding_correction (rnd_mode,
3050         		     is_inexact_lt_midpoint,
3051         		     is_inexact_gt_midpoint,
3052         		     is_midpoint_lt_even, is_midpoint_gt_even,
3053         		     e4, &res, pfpsf);
3054       }
3055       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
3056           is_midpoint_lt_even || is_midpoint_gt_even) {
3057         // set the inexact flag
3058         *pfpsf |= INEXACT_EXCEPTION;
3059       }
3060       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3061       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3062       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3063       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3064       BID_SWAP128 (res);
3065       BID_RETURN (res)
3066 
3067     } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
3068         (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
3069         (q4 <= delta && delta + q3 <= p34) || // Case (10)
3070         (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
3071         (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
3072         (delta + q3 < q4 && q4 <= p34)) { // Case (18)
3073 
3074       // Case (8) is similar to Case (1), with C3 and C4 swapped
3075       // Case (9) is similar to Case (2), with C3 and C4 swapped
3076       // Case (10) is similar to Case (3), with C3 and C4 swapped
3077       // Case (13) is similar to Case (4), with C3 and C4 swapped
3078       // Case (14) is similar to Case (5), with C3 and C4 swapped
3079       // Case (18) is similar to Case (6), with C3 and C4 swapped
3080 
3081       // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3082       // and go back to delta_ge_zero
3083       // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3084       P128.w[1] = C3.w[1];
3085       P128.w[0] = C3.w[0];
3086       C3.w[1] = C4.w[1];
3087       C3.w[0] = C4.w[0];
3088       C4.w[1] = P128.w[1];
3089       C4.w[0] = P128.w[0];
3090       ind = q3;
3091       q3 = q4;
3092       q4 = ind;
3093       ind = e3;
3094       e3 = e4;
3095       e4 = ind;
3096       tmp_sign = z_sign;
3097       z_sign = p_sign;
3098       p_sign = tmp_sign;
3099       tmp.ui64 = z_exp;
3100       z_exp = p_exp;
3101       p_exp = tmp.ui64;
3102       goto delta_ge_zero;
3103 
3104     } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
3105                (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
3106 
3107       // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3108       // 1 <= x0 <= q3 - 1 <= p34 - 1
3109       x0 = e4 - e3; // or x0 = delta + q3 - q4
3110       if (q3 <= 18) { // 2 <= q3 <= 18
3111         round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
3112         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
3113         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
3114         // C3.w[1] = 0;
3115         C3.w[0] = R64;
3116       } else if (q3 <= 38) {
3117         round128_19_38 (q3, x0, C3, &R128, &incr_exp,
3118         		&is_midpoint_lt_even, &is_midpoint_gt_even,
3119         		&is_inexact_lt_midpoint,
3120         		&is_inexact_gt_midpoint);
3121         C3.w[1] = R128.w[1];
3122         C3.w[0] = R128.w[0];
3123       }
3124       // the rounded result has q3 - x0 digits
3125       // we want the exponent to be e4, so if incr_exp = 1 then
3126       // multiply the rounded result by 10 - it will still fit in 113 bits
3127       if (incr_exp) {
3128         // 64 x 128 -> 128
3129         P128.w[1] = C3.w[1];
3130         P128.w[0] = C3.w[0];
3131         __mul_64x128_to_128 (C3, ten2k64[1], P128);
3132       }
3133       e3 = e3 + x0; // this is e4
3134       // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3135       // the result will have the sign of x * y; the exponent is e4
3136       R256.w[3] = 0;
3137       R256.w[2] = 0;
3138       R256.w[1] = C3.w[1];
3139       R256.w[0] = C3.w[0];
3140       if (p_sign == z_sign) { // R256 = C4 + R256
3141         add256 (C4, R256, &R256);
3142       } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3143         sub256 (C4, R256, &R256); // the result cannot be pure zero
3144         // because the result has opposite sign to that of R256 which was
3145         // rounded, need to change the rounding indicators
3146         lsb = C4.w[0] & 0x01;
3147         if (is_inexact_lt_midpoint) {
3148           is_inexact_lt_midpoint = 0;
3149           is_inexact_gt_midpoint = 1;
3150         } else if (is_inexact_gt_midpoint) {
3151           is_inexact_gt_midpoint = 0;
3152           is_inexact_lt_midpoint = 1;
3153         } else if (lsb == 0) {
3154           if (is_midpoint_lt_even) {
3155             is_midpoint_lt_even = 0;
3156             is_midpoint_gt_even = 1;
3157           } else if (is_midpoint_gt_even) {
3158             is_midpoint_gt_even = 0;
3159             is_midpoint_lt_even = 1;
3160           } else {
3161             ;
3162           }
3163         } else if (lsb == 1) {
3164           if (is_midpoint_lt_even) {
3165             // res = res + 1
3166             R256.w[0]++;
3167             if (R256.w[0] == 0x0) {
3168               R256.w[1]++;
3169               if (R256.w[1] == 0x0) {
3170         	R256.w[2]++;
3171         	if (R256.w[2] == 0x0) {
3172         	  R256.w[3]++;
3173         	}
3174               }
3175             }
3176             // no check for rounding overflow - R256 was a difference
3177           } else if (is_midpoint_gt_even) {
3178             // res = res - 1
3179             R256.w[0]--;
3180             if (R256.w[0] == 0xffffffffffffffffull) {
3181               R256.w[1]--;
3182               if (R256.w[1] == 0xffffffffffffffffull) {
3183         	R256.w[2]--;
3184         	if (R256.w[2] == 0xffffffffffffffffull) {
3185         	  R256.w[3]--;
3186         	}
3187               }
3188             }
3189           } else {
3190             ;
3191           }
3192         } else {
3193           ;
3194         }
3195       }
3196       // determine the number of decimal digits in R256
3197       ind = nr_digits256 (R256); // ind >= p34
3198       // if R256 is sum, then ind > p34; if R256 is a difference, then
3199       // ind >= p34; this means that we can calculate the result rounded to
3200       // the destination precision, with unbounded exponent, starting from R256
3201       // and using the indicators from the rounding of C3 to avoid a double
3202       // rounding error
3203 
3204       if (ind < p34) {
3205         ;
3206       } else if (ind == p34) {
3207         // the result rounded to the destination precision with
3208         // unbounded exponent
3209         // is (-1)^p_sign * R256 * 10^e4
3210         res.w[1] = R256.w[1];
3211         res.w[0] = R256.w[0];
3212       } else { // if (ind > p34)
3213         // if more than P digits, round to nearest to P digits
3214         // round R256 to p34 digits
3215         x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
3216         // save C3 rounding indicators to help avoid double rounding error
3217         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3218         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3219         is_midpoint_lt_even0 = is_midpoint_lt_even;
3220         is_midpoint_gt_even0 = is_midpoint_gt_even;
3221         // initialize rounding indicators
3222         is_inexact_lt_midpoint = 0;
3223         is_inexact_gt_midpoint = 0;
3224         is_midpoint_lt_even = 0;
3225         is_midpoint_gt_even = 0;
3226         // round to p34 digits; the result fits in 113 bits
3227         if (ind <= 38) {
3228           P128.w[1] = R256.w[1];
3229           P128.w[0] = R256.w[0];
3230           round128_19_38 (ind, x0, P128, &R128, &incr_exp,
3231         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
3232         		  &is_inexact_lt_midpoint,
3233         		  &is_inexact_gt_midpoint);
3234         } else if (ind <= 57) {
3235           P192.w[2] = R256.w[2];
3236           P192.w[1] = R256.w[1];
3237           P192.w[0] = R256.w[0];
3238           round192_39_57 (ind, x0, P192, &R192, &incr_exp,
3239         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
3240         		  &is_inexact_lt_midpoint,
3241         		  &is_inexact_gt_midpoint);
3242           R128.w[1] = R192.w[1];
3243           R128.w[0] = R192.w[0];
3244         } else { // if (ind <= 68)
3245           round256_58_76 (ind, x0, R256, &R256, &incr_exp,
3246         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
3247         		  &is_inexact_lt_midpoint,
3248         		  &is_inexact_gt_midpoint);
3249           R128.w[1] = R256.w[1];
3250           R128.w[0] = R256.w[0];
3251         }
3252         // the rounded result has p34 = 34 digits
3253         e4 = e4 + x0 + incr_exp;
3254 
3255         res.w[1] = R128.w[1];
3256         res.w[0] = R128.w[0];
3257 
3258         // avoid a double rounding error
3259         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3260             is_midpoint_lt_even) { // double rounding error upward
3261           // res = res - 1
3262           res.w[0]--;
3263           if (res.w[0] == 0xffffffffffffffffull)
3264             res.w[1]--;
3265           is_midpoint_lt_even = 0;
3266           is_inexact_lt_midpoint = 1;
3267           // Note: a double rounding error upward is not possible; for this
3268           // the result after the first rounding would have to be 99...95
3269           // (35 digits in all), possibly followed by a number of zeros; this
3270           // not possible in Cases (2)-(6) or (15)-(17) which may get here
3271           // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3272           if (res.w[1] == 0x0000314dc6448d93ull &&
3273             res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
3274             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3275             res.w[0] = 0x378d8e63ffffffffull;
3276             e4--;
3277           }
3278         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3279             is_midpoint_gt_even) { // double rounding error downward
3280           // res = res + 1
3281           res.w[0]++;
3282           if (res.w[0] == 0)
3283             res.w[1]++;
3284           is_midpoint_gt_even = 0;
3285           is_inexact_gt_midpoint = 1;
3286         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3287         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
3288           // if this second rounding was exact the result may still be
3289           // inexact because of the first rounding
3290           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3291             is_inexact_gt_midpoint = 1;
3292           }
3293           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3294             is_inexact_lt_midpoint = 1;
3295           }
3296         } else if (is_midpoint_gt_even &&
3297         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
3298           // pulled up to a midpoint
3299           is_inexact_lt_midpoint = 1;
3300           is_inexact_gt_midpoint = 0;
3301           is_midpoint_lt_even = 0;
3302           is_midpoint_gt_even = 0;
3303         } else if (is_midpoint_lt_even &&
3304         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
3305           // pulled down to a midpoint
3306           is_inexact_lt_midpoint = 0;
3307           is_inexact_gt_midpoint = 1;
3308           is_midpoint_lt_even = 0;
3309           is_midpoint_gt_even = 0;
3310         } else {
3311           ;
3312         }
3313       }
3314 
3315       // determine tininess
3316       if (rnd_mode == ROUNDING_TO_NEAREST) {
3317         if (e4 < expmin) {
3318           is_tiny = 1; // for other rounding modes apply correction
3319         }
3320       } else {
3321         // for RM, RP, RZ, RA apply correction in order to determine tininess
3322         // but do not save the result; apply the correction to
3323         // (-1)^p_sign * res * 10^0
3324         P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
3325         P128.w[0] = res.w[0];
3326         rounding_correction (rnd_mode,
3327         		     is_inexact_lt_midpoint,
3328         		     is_inexact_gt_midpoint,
3329         		     is_midpoint_lt_even, is_midpoint_gt_even,
3330         		     0, &P128, pfpsf);
3331         scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
3332         // the number of digits in the significand is p34 = 34
3333         if (e4 + scale < expmin) {
3334           is_tiny = 1;
3335         }
3336       }
3337 
3338       // the result rounded to the destination precision with unbounded exponent
3339       // is (-1)^p_sign * res * 10^e4
3340       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
3341       // res.w[0] unchanged;
3342       // Note: res is correct only if expmin <= e4 <= expmax
3343       ind = p34; // the number of decimal digits in the signifcand of res
3344 
3345       // at this point we have the result rounded with unbounded exponent in
3346       // res and we know its tininess:
3347       // res = (-1)^p_sign * significand * 10^e4,
3348       // where q (significand) = ind = p34
3349       // Note: res is correct only if expmin <= e4 <= expmax
3350 
3351       // check for overflow if RN
3352       if (rnd_mode == ROUNDING_TO_NEAREST
3353           && (ind + e4) > (p34 + expmax)) {
3354         res.w[1] = p_sign | 0x7800000000000000ull;
3355         res.w[0] = 0x0000000000000000ull;
3356         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
3357         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3358         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3359         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3360         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3361         BID_SWAP128 (res);
3362         BID_RETURN (res)
3363       } // else not overflow or not RN, so continue
3364 
3365       // from this point on this is similar to the last part of the computation
3366       // for Cases (15), (16), (17)
3367 
3368       // if (e4 >= expmin) we have the result rounded with bounded exponent
3369       if (e4 < expmin) {
3370         x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
3371         // where the result rounded [at most] once is
3372         //   (-1)^p_sign * significand_res * 10^e4
3373 
3374         // avoid double rounding error
3375         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3376         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3377         is_midpoint_lt_even0 = is_midpoint_lt_even;
3378         is_midpoint_gt_even0 = is_midpoint_gt_even;
3379         is_inexact_lt_midpoint = 0;
3380         is_inexact_gt_midpoint = 0;
3381         is_midpoint_lt_even = 0;
3382         is_midpoint_gt_even = 0;
3383 
3384         if (x0 > ind) {
3385           // nothing is left of res when moving the decimal point left x0 digits
3386           is_inexact_lt_midpoint = 1;
3387           res.w[1] = p_sign | 0x0000000000000000ull;
3388           res.w[0] = 0x0000000000000000ull;
3389           e4 = expmin;
3390         } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
3391           // this is <, =, or > 1/2 ulp
3392           // compare the ind-digit value in the significand of res with
3393           // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3394           // less than, equal to, or greater than 1/2 ulp (significand of res)
3395           R128.w[1] = res.w[1] & MASK_COEFF;
3396           R128.w[0] = res.w[0];
3397           if (ind <= 19) {
3398             if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
3399               lt_half_ulp = 1;
3400               is_inexact_lt_midpoint = 1;
3401             } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
3402               eq_half_ulp = 1;
3403               is_midpoint_gt_even = 1;
3404             } else { // > 1/2 ulp
3405               gt_half_ulp = 1;
3406               is_inexact_gt_midpoint = 1;
3407             }
3408           } else { // if (ind <= 38)
3409             if (R128.w[1] < midpoint128[ind - 20].w[1] ||
3410                 (R128.w[1] == midpoint128[ind - 20].w[1] &&
3411                 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
3412               lt_half_ulp = 1;
3413               is_inexact_lt_midpoint = 1;
3414             } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
3415                 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
3416               eq_half_ulp = 1;
3417               is_midpoint_gt_even = 1;
3418             } else { // > 1/2 ulp
3419               gt_half_ulp = 1;
3420               is_inexact_gt_midpoint = 1;
3421             }
3422           }
3423           if (lt_half_ulp || eq_half_ulp) {
3424             // res = +0.0 * 10^expmin
3425             res.w[1] = 0x0000000000000000ull;
3426             res.w[0] = 0x0000000000000000ull;
3427           } else { // if (gt_half_ulp)
3428             // res = +1 * 10^expmin
3429             res.w[1] = 0x0000000000000000ull;
3430             res.w[0] = 0x0000000000000001ull;
3431           }
3432           res.w[1] = p_sign | res.w[1];
3433           e4 = expmin;
3434         } else { // if (1 <= x0 <= ind - 1 <= 33)
3435           // round the ind-digit result to ind - x0 digits
3436 
3437           if (ind <= 18) { // 2 <= ind <= 18
3438             round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
3439         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
3440         		  &is_inexact_lt_midpoint,
3441         		  &is_inexact_gt_midpoint);
3442             res.w[1] = 0x0;
3443             res.w[0] = R64;
3444           } else if (ind <= 38) {
3445             P128.w[1] = res.w[1] & MASK_COEFF;
3446             P128.w[0] = res.w[0];
3447             round128_19_38 (ind, x0, P128, &res, &incr_exp,
3448         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
3449         		    &is_inexact_lt_midpoint,
3450         		    &is_inexact_gt_midpoint);
3451           }
3452           e4 = e4 + x0; // expmin
3453           // we want the exponent to be expmin, so if incr_exp = 1 then
3454           // multiply the rounded result by 10 - it will still fit in 113 bits
3455           if (incr_exp) {
3456             // 64 x 128 -> 128
3457             P128.w[1] = res.w[1] & MASK_COEFF;
3458             P128.w[0] = res.w[0];
3459             __mul_64x128_to_128 (res, ten2k64[1], P128);
3460           }
3461           res.w[1] =
3462             p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
3463         					     w[1] & MASK_COEFF);
3464           // avoid a double rounding error
3465           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3466                 is_midpoint_lt_even) { // double rounding error upward
3467             // res = res - 1
3468             res.w[0]--;
3469             if (res.w[0] == 0xffffffffffffffffull)
3470               res.w[1]--;
3471             // Note: a double rounding error upward is not possible; for this
3472             // the result after the first rounding would have to be 99...95
3473             // (35 digits in all), possibly followed by a number of zeros; this
3474             // not possible in this underflow case
3475             is_midpoint_lt_even = 0;
3476             is_inexact_lt_midpoint = 1;
3477           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3478                 is_midpoint_gt_even) { // double rounding error downward
3479             // res = res + 1
3480             res.w[0]++;
3481             if (res.w[0] == 0)
3482               res.w[1]++;
3483             is_midpoint_gt_even = 0;
3484             is_inexact_gt_midpoint = 1;
3485           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3486         	     !is_inexact_lt_midpoint
3487         	     && !is_inexact_gt_midpoint) {
3488             // if this second rounding was exact the result may still be
3489             // inexact because of the first rounding
3490             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3491               is_inexact_gt_midpoint = 1;
3492             }
3493             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3494               is_inexact_lt_midpoint = 1;
3495             }
3496           } else if (is_midpoint_gt_even &&
3497         	     (is_inexact_gt_midpoint0
3498         	      || is_midpoint_lt_even0)) {
3499             // pulled up to a midpoint
3500             is_inexact_lt_midpoint = 1;
3501             is_inexact_gt_midpoint = 0;
3502             is_midpoint_lt_even = 0;
3503             is_midpoint_gt_even = 0;
3504           } else if (is_midpoint_lt_even &&
3505         	     (is_inexact_lt_midpoint0
3506         	      || is_midpoint_gt_even0)) {
3507             // pulled down to a midpoint
3508             is_inexact_lt_midpoint = 0;
3509             is_inexact_gt_midpoint = 1;
3510             is_midpoint_lt_even = 0;
3511             is_midpoint_gt_even = 0;
3512           } else {
3513             ;
3514           }
3515         }
3516       }
3517       // res contains the correct result
3518       // apply correction if not rounding to nearest
3519       if (rnd_mode != ROUNDING_TO_NEAREST) {
3520         rounding_correction (rnd_mode,
3521         		     is_inexact_lt_midpoint,
3522         		     is_inexact_gt_midpoint,
3523         		     is_midpoint_lt_even, is_midpoint_gt_even,
3524         		     e4, &res, pfpsf);
3525       }
3526       if (is_midpoint_lt_even || is_midpoint_gt_even ||
3527           is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
3528         // set the inexact flag
3529         *pfpsf |= INEXACT_EXCEPTION;
3530         if (is_tiny)
3531           *pfpsf |= UNDERFLOW_EXCEPTION;
3532       }
3533       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3534       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3535       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3536       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3537       BID_SWAP128 (res);
3538       BID_RETURN (res)
3539 
3540     } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
3541         (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
3542         (delta + q3 <= p34 && p34 < q4)) { // Case (17)
3543 
3544       // calculate first the result rounded to the destination precision, with
3545       // unbounded exponent
3546 
3547       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
3548               rnd_mode, &is_midpoint_lt_even,
3549               &is_midpoint_gt_even, &is_inexact_lt_midpoint,
3550               &is_inexact_gt_midpoint, pfpsf, &res);
3551       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3552       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3553       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3554       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3555       BID_SWAP128 (res);
3556       BID_RETURN (res)
3557 
3558     } else {
3559       ;
3560     }
3561 
3562   } // end if delta < 0
3563 
3564   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3565   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3566   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3567   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3568   BID_SWAP128 (res);
3569   BID_RETURN (res)
3570 
3571 }
3572 
3573 
3574 #if DECIMAL_CALL_BY_REFERENCE
3575 void
3576 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
3577             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3578             _EXC_INFO_PARAM) {
3579   UINT128 x = *px, y = *py, z = *pz;
3580 #if !DECIMAL_GLOBAL_ROUNDING
3581   unsigned int rnd_mode = *prnd_mode;
3582 #endif
3583 #else
3584 UINT128
3585 bid128_fma (UINT128 x, UINT128 y, UINT128 z
3586             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3587             _EXC_INFO_PARAM) {
3588 #endif
3589   int is_midpoint_lt_even, is_midpoint_gt_even,
3590     is_inexact_lt_midpoint, is_inexact_gt_midpoint;
3591   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3592 
3593 #if DECIMAL_CALL_BY_REFERENCE
3594   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3595         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3596         	  &res, &x, &y, &z
3597         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3598         	  _EXC_INFO_ARG);
3599 #else
3600   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3601         		&is_inexact_lt_midpoint,
3602         		&is_inexact_gt_midpoint, x, y,
3603         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3604         		_EXC_INFO_ARG);
3605 #endif
3606   BID_RETURN (res);
3607 }
3608 
3609 
3610 #if DECIMAL_CALL_BY_REFERENCE
3611 void
3612 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
3613                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3614                _EXC_INFO_PARAM) {
3615   UINT64 x = *px, y = *py, z = *pz;
3616 #if !DECIMAL_GLOBAL_ROUNDING
3617   unsigned int rnd_mode = *prnd_mode;
3618 #endif
3619 #else
3620 UINT128
3621 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
3622                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3623                _EXC_INFO_PARAM) {
3624 #endif
3625   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3626     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3627   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3628   UINT128 x1, y1, z1;
3629 
3630 #if DECIMAL_CALL_BY_REFERENCE
3631   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3632   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3633   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3634   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3635         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3636         	  &res, &x1, &y1, &z1
3637         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3638         	  _EXC_INFO_ARG);
3639 #else
3640   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3641   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3642   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3643   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3644         		&is_inexact_lt_midpoint,
3645         		&is_inexact_gt_midpoint, x1, y1,
3646         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3647         		_EXC_INFO_ARG);
3648 #endif
3649   BID_RETURN (res);
3650 }
3651 
3652 
3653 #if DECIMAL_CALL_BY_REFERENCE
3654 void
3655 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3656                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3657                _EXC_INFO_PARAM) {
3658   UINT64 x = *px, y = *py;
3659   UINT128 z = *pz;
3660 #if !DECIMAL_GLOBAL_ROUNDING
3661   unsigned int rnd_mode = *prnd_mode;
3662 #endif
3663 #else
3664 UINT128
3665 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
3666                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3667                _EXC_INFO_PARAM) {
3668 #endif
3669   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3670     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3671   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3672   UINT128 x1, y1;
3673 
3674 #if DECIMAL_CALL_BY_REFERENCE
3675   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3676   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3677   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3678         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3679         	  &res, &x1, &y1, &z
3680         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3681         	  _EXC_INFO_ARG);
3682 #else
3683   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3684   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3685   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3686         		&is_inexact_lt_midpoint,
3687         		&is_inexact_gt_midpoint, x1, y1,
3688         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3689         		_EXC_INFO_ARG);
3690 #endif
3691   BID_RETURN (res);
3692 }
3693 
3694 
3695 #if DECIMAL_CALL_BY_REFERENCE
3696 void
3697 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3698                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3699                _EXC_INFO_PARAM) {
3700   UINT64 x = *px, z = *pz;
3701 #if !DECIMAL_GLOBAL_ROUNDING
3702   unsigned int rnd_mode = *prnd_mode;
3703 #endif
3704 #else
3705 UINT128
3706 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
3707                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3708                _EXC_INFO_PARAM) {
3709 #endif
3710   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3711     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3712   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3713   UINT128 x1, z1;
3714 
3715 #if DECIMAL_CALL_BY_REFERENCE
3716   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3717   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3718   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3719         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3720         	  &res, &x1, py, &z1
3721         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3722         	  _EXC_INFO_ARG);
3723 #else
3724   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3725   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3726   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3727         		&is_inexact_lt_midpoint,
3728         		&is_inexact_gt_midpoint, x1, y,
3729         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3730         		_EXC_INFO_ARG);
3731 #endif
3732   BID_RETURN (res);
3733 }
3734 
3735 
3736 #if DECIMAL_CALL_BY_REFERENCE
3737 void
3738 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3739                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3740                _EXC_INFO_PARAM) {
3741   UINT64 x = *px;
3742 #if !DECIMAL_GLOBAL_ROUNDING
3743   unsigned int rnd_mode = *prnd_mode;
3744 #endif
3745 #else
3746 UINT128
3747 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
3748                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3749                _EXC_INFO_PARAM) {
3750 #endif
3751   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3752     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3753   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3754   UINT128 x1;
3755 
3756 #if DECIMAL_CALL_BY_REFERENCE
3757   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3758   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3759         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3760         	  &res, &x1, py, pz
3761         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3762         	  _EXC_INFO_ARG);
3763 #else
3764   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3765   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3766         		&is_inexact_lt_midpoint,
3767         		&is_inexact_gt_midpoint, x1, y,
3768         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3769         		_EXC_INFO_ARG);
3770 #endif
3771   BID_RETURN (res);
3772 }
3773 
3774 
3775 #if DECIMAL_CALL_BY_REFERENCE
3776 void
3777 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
3778                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3779                _EXC_INFO_PARAM) {
3780   UINT64 y = *py, z = *pz;
3781 #if !DECIMAL_GLOBAL_ROUNDING
3782   unsigned int rnd_mode = *prnd_mode;
3783 #endif
3784 #else
3785 UINT128
3786 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
3787                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3788                _EXC_INFO_PARAM) {
3789 #endif
3790   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3791     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3792   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3793   UINT128 y1, z1;
3794 
3795 #if DECIMAL_CALL_BY_REFERENCE
3796   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3797   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3798   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3799         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3800         	  &res, px, &y1, &z1
3801         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3802         	  _EXC_INFO_ARG);
3803 #else
3804   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3805   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3806   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3807         		&is_inexact_lt_midpoint,
3808         		&is_inexact_gt_midpoint, x, y1,
3809         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3810         		_EXC_INFO_ARG);
3811 #endif
3812   BID_RETURN (res);
3813 }
3814 
3815 
3816 #if DECIMAL_CALL_BY_REFERENCE
3817 void
3818 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
3819                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3820                _EXC_INFO_PARAM) {
3821   UINT64 y = *py;
3822 #if !DECIMAL_GLOBAL_ROUNDING
3823   unsigned int rnd_mode = *prnd_mode;
3824 #endif
3825 #else
3826 UINT128
3827 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
3828                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3829                _EXC_INFO_PARAM) {
3830 #endif
3831   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3832     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3833   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3834   UINT128 y1;
3835 
3836 #if DECIMAL_CALL_BY_REFERENCE
3837   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3838   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3839         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3840         	  &res, px, &y1, pz
3841         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3842         	  _EXC_INFO_ARG);
3843 #else
3844   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3845   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3846         		&is_inexact_lt_midpoint,
3847         		&is_inexact_gt_midpoint, x, y1,
3848         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3849         		_EXC_INFO_ARG);
3850 #endif
3851   BID_RETURN (res);
3852 }
3853 
3854 
3855 #if DECIMAL_CALL_BY_REFERENCE
3856 void
3857 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
3858                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3859                _EXC_INFO_PARAM) {
3860   UINT64 z = *pz;
3861 #if !DECIMAL_GLOBAL_ROUNDING
3862   unsigned int rnd_mode = *prnd_mode;
3863 #endif
3864 #else
3865 UINT128
3866 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
3867                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3868                _EXC_INFO_PARAM) {
3869 #endif
3870   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3871     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3872   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3873   UINT128 z1;
3874 
3875 #if DECIMAL_CALL_BY_REFERENCE
3876   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3877   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3878         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3879         	  &res, px, py, &z1
3880         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3881         	  _EXC_INFO_ARG);
3882 #else
3883   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3884   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3885         		&is_inexact_lt_midpoint,
3886         		&is_inexact_gt_midpoint, x, y,
3887         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3888         		_EXC_INFO_ARG);
3889 #endif
3890   BID_RETURN (res);
3891 }
3892 
3893 // Note: bid128qqq_fma is represented by bid128_fma
3894 
3895 // Note: bid64ddd_fma is represented by bid64_fma
3896 
3897 #if DECIMAL_CALL_BY_REFERENCE
3898 void
3899 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3900               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3901               _EXC_INFO_PARAM) {
3902   UINT64 x = *px, y = *py;
3903 #if !DECIMAL_GLOBAL_ROUNDING
3904   unsigned int rnd_mode = *prnd_mode;
3905 #endif
3906 #else
3907 UINT64
3908 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
3909               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3910               _EXC_INFO_PARAM) {
3911 #endif
3912   UINT64 res1 = 0xbaddbaddbaddbaddull;
3913   UINT128 x1, y1;
3914 
3915 #if DECIMAL_CALL_BY_REFERENCE
3916   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3917   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3918   bid64qqq_fma (&res1, &x1, &y1, pz
3919         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3920         	_EXC_INFO_ARG);
3921 #else
3922   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3923   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3924   res1 = bid64qqq_fma (x1, y1, z
3925         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3926         	       _EXC_INFO_ARG);
3927 #endif
3928   BID_RETURN (res1);
3929 }
3930 
3931 
3932 #if DECIMAL_CALL_BY_REFERENCE
3933 void
3934 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3935               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3936               _EXC_INFO_PARAM) {
3937   UINT64 x = *px, z = *pz;
3938 #if !DECIMAL_GLOBAL_ROUNDING
3939   unsigned int rnd_mode = *prnd_mode;
3940 #endif
3941 #else
3942 UINT64
3943 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
3944               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3945               _EXC_INFO_PARAM) {
3946 #endif
3947   UINT64 res1 = 0xbaddbaddbaddbaddull;
3948   UINT128 x1, z1;
3949 
3950 #if DECIMAL_CALL_BY_REFERENCE
3951   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3952   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3953   bid64qqq_fma (&res1, &x1, py, &z1
3954         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3955         	_EXC_INFO_ARG);
3956 #else
3957   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3958   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3959   res1 = bid64qqq_fma (x1, y, z1
3960         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3961         	       _EXC_INFO_ARG);
3962 #endif
3963   BID_RETURN (res1);
3964 }
3965 
3966 
3967 #if DECIMAL_CALL_BY_REFERENCE
3968 void
3969 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3970               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3971               _EXC_INFO_PARAM) {
3972   UINT64 x = *px;
3973 #if !DECIMAL_GLOBAL_ROUNDING
3974   unsigned int rnd_mode = *prnd_mode;
3975 #endif
3976 #else
3977 UINT64
3978 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
3979               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3980               _EXC_INFO_PARAM) {
3981 #endif
3982   UINT64 res1 = 0xbaddbaddbaddbaddull;
3983   UINT128 x1;
3984 
3985 #if DECIMAL_CALL_BY_REFERENCE
3986   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3987   bid64qqq_fma (&res1, &x1, py, pz
3988         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3989         	_EXC_INFO_ARG);
3990 #else
3991   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3992   res1 = bid64qqq_fma (x1, y, z
3993         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3994         	       _EXC_INFO_ARG);
3995 #endif
3996   BID_RETURN (res1);
3997 }
3998 
3999 
4000 #if DECIMAL_CALL_BY_REFERENCE
4001 void
4002 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
4003               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4004               _EXC_INFO_PARAM) {
4005   UINT64 y = *py, z = *pz;
4006 #if !DECIMAL_GLOBAL_ROUNDING
4007   unsigned int rnd_mode = *prnd_mode;
4008 #endif
4009 #else
4010 UINT64
4011 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
4012               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4013               _EXC_INFO_PARAM) {
4014 #endif
4015   UINT64 res1 = 0xbaddbaddbaddbaddull;
4016   UINT128 y1, z1;
4017 
4018 #if DECIMAL_CALL_BY_REFERENCE
4019   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4020   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4021   bid64qqq_fma (&res1, px, &y1, &z1
4022         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4023         	_EXC_INFO_ARG);
4024 #else
4025   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4026   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4027   res1 = bid64qqq_fma (x, y1, z1
4028         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4029         	       _EXC_INFO_ARG);
4030 #endif
4031   BID_RETURN (res1);
4032 }
4033 
4034 
4035 #if DECIMAL_CALL_BY_REFERENCE
4036 void
4037 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
4038               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4039               _EXC_INFO_PARAM) {
4040   UINT64 y = *py;
4041 #if !DECIMAL_GLOBAL_ROUNDING
4042   unsigned int rnd_mode = *prnd_mode;
4043 #endif
4044 #else
4045 UINT64
4046 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
4047               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4048               _EXC_INFO_PARAM) {
4049 #endif
4050   UINT64 res1 = 0xbaddbaddbaddbaddull;
4051   UINT128 y1;
4052 
4053 #if DECIMAL_CALL_BY_REFERENCE
4054   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4055   bid64qqq_fma (&res1, px, &y1, pz
4056         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4057         	_EXC_INFO_ARG);
4058 #else
4059   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4060   res1 = bid64qqq_fma (x, y1, z
4061         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4062         	       _EXC_INFO_ARG);
4063 #endif
4064   BID_RETURN (res1);
4065 }
4066 
4067 
4068 #if DECIMAL_CALL_BY_REFERENCE
4069 void
4070 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
4071               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4072               _EXC_INFO_PARAM) {
4073   UINT64 z = *pz;
4074 #if !DECIMAL_GLOBAL_ROUNDING
4075   unsigned int rnd_mode = *prnd_mode;
4076 #endif
4077 #else
4078 UINT64
4079 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
4080               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4081               _EXC_INFO_PARAM) {
4082 #endif
4083   UINT64 res1 = 0xbaddbaddbaddbaddull;
4084   UINT128 z1;
4085 
4086 #if DECIMAL_CALL_BY_REFERENCE
4087   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4088   bid64qqq_fma (&res1, px, py, &z1
4089         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4090         	_EXC_INFO_ARG);
4091 #else
4092   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4093   res1 = bid64qqq_fma (x, y, z1
4094         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4095         	       _EXC_INFO_ARG);
4096 #endif
4097   BID_RETURN (res1);
4098 }
4099 
4100 
4101 #if DECIMAL_CALL_BY_REFERENCE
4102 void
4103 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
4104               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4105               _EXC_INFO_PARAM) {
4106   UINT128 x = *px, y = *py, z = *pz;
4107 #if !DECIMAL_GLOBAL_ROUNDING
4108   unsigned int rnd_mode = *prnd_mode;
4109 #endif
4110 #else
4111 UINT64
4112 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
4113               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4114               _EXC_INFO_PARAM) {
4115 #endif
4116   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
4117     is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
4118   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
4119     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
4120   int incr_exp;
4121   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4122   UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4123   UINT64 res1 = 0xbaddbaddbaddbaddull;
4124   unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
4125   UINT64 sign;
4126   UINT64 exp;
4127   int unbexp;
4128   UINT128 C;
4129   BID_UI64DOUBLE tmp;
4130   int nr_bits;
4131   int q, x0;
4132   int scale;
4133   int lt_half_ulp = 0, eq_half_ulp = 0;
4134 
4135   // Note: for rounding modes other than RN or RA, the result can be obtained
4136   // by rounding first to BID128 and then to BID64
4137 
4138   save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
4139   *pfpsf = 0;
4140 
4141 #if DECIMAL_CALL_BY_REFERENCE
4142   bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4143         	  &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
4144         	  &res, &x, &y, &z
4145         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4146         	  _EXC_INFO_ARG);
4147 #else
4148   res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4149         		&is_inexact_lt_midpoint0,
4150         		&is_inexact_gt_midpoint0, x, y,
4151         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4152         		_EXC_INFO_ARG);
4153 #endif
4154 
4155   if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
4156       (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
4157       ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
4158       ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
4159 #if DECIMAL_CALL_BY_REFERENCE
4160     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4161 #else
4162     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4163 #endif
4164     // determine the unbiased exponent of the result
4165     unbexp = ((res1 >> 53) & 0x3ff) - 398;
4166 
4167     // if subnormal, res1  must have exp = -398
4168     // if tiny and inexact set underflow and inexact status flags
4169     if (!((res1 & MASK_NAN) == MASK_NAN) &&	// res1 not NaN
4170         (unbexp == -398)
4171         && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
4172         && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
4173             || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
4174       // set the inexact flag and the underflow flag
4175       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4176     } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4177                is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4178       // set the inexact flag and the underflow flag
4179       *pfpsf |= INEXACT_EXCEPTION;
4180     }
4181 
4182     *pfpsf |= save_fpsf;
4183     BID_RETURN (res1);
4184   } // else continue, and use rounding to nearest to round to 16 digits
4185 
4186   // at this point the result is rounded to nearest (even or away) to 34 digits
4187   // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4188   sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
4189   exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
4190   unbexp = (exp >> 49) - 6176;
4191   C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
4192   C.w[0] = res.w[LOW_128W];
4193 
4194   if ((C.w[1] == 0x0 && C.w[0] == 0x0) ||	// result is zero
4195       (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
4196       // clear under/overflow
4197 #if DECIMAL_CALL_BY_REFERENCE
4198     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4199 #else
4200     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4201 #endif
4202     *pfpsf |= save_fpsf;
4203     BID_RETURN (res1);
4204   } // else continue
4205 
4206   // -398 - 34 <= unbexp <= 369 + 15
4207   if (rnd_mode == ROUNDING_TIES_AWAY) {
4208     // apply correction, if needed, to make the result rounded to nearest-even
4209     if (is_midpoint_gt_even) {
4210       // res = res - 1
4211       res1--; // res1 is now even
4212     } // else the result is already correctly rounded to nearest-even
4213   }
4214   // at this point the result is finite, non-zero canonical normal or subnormal,
4215   // and in most cases overflow or underflow will not occur
4216 
4217   // determine the number of digits q in the result
4218   // q = nr. of decimal digits in x
4219   // determine first the nr. of bits in x
4220   if (C.w[1] == 0) {
4221     if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
4222       // split the 64-bit value in two 32-bit halves to avoid rounding errors
4223       if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
4224         tmp.d = (double) (C.w[0] >> 32); // exact conversion
4225         nr_bits =
4226           33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4227       } else { // x < 2^32
4228         tmp.d = (double) (C.w[0]); // exact conversion
4229         nr_bits =
4230           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4231       }
4232     } else { // if x < 2^53
4233       tmp.d = (double) C.w[0]; // exact conversion
4234       nr_bits =
4235         1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4236     }
4237   } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4238     tmp.d = (double) C.w[1]; // exact conversion
4239     nr_bits =
4240       65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4241   }
4242   q = nr_digits[nr_bits - 1].digits;
4243   if (q == 0) {
4244     q = nr_digits[nr_bits - 1].digits1;
4245     if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
4246         (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
4247          C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
4248       q++;
4249   }
4250   // if q > 16, round to nearest even to 16 digits (but for underflow it may
4251   // have to be truncated even more)
4252   if (q > 16) {
4253     x0 = q - 16;
4254     if (q <= 18) {
4255       round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
4256         	    &is_midpoint_lt_even, &is_midpoint_gt_even,
4257         	    &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4258     } else { // 19 <= q <= 34
4259       round128_19_38 (q, x0, C, &res128, &incr_exp,
4260         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
4261         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4262       res1 = res128.w[0]; // the result fits in 64 bits
4263     }
4264     unbexp = unbexp + x0;
4265     if (incr_exp)
4266       unbexp++;
4267     q = 16; // need to set in case denormalization is necessary
4268   } else {
4269     // the result does not require a second rounding (and it must have
4270     // been exact in the first rounding, since q <= 16)
4271     res1 = C.w[0];
4272   }
4273 
4274   // avoid a double rounding error
4275   if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4276       is_midpoint_lt_even) { // double rounding error upward
4277     // res = res - 1
4278     res1--; // res1 becomes odd
4279     is_midpoint_lt_even = 0;
4280     is_inexact_lt_midpoint = 1;
4281     if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
4282       res1 = 0x002386f26fc0ffffull; // 10^16 - 1
4283       unbexp--;
4284     }
4285   } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4286       is_midpoint_gt_even) { // double rounding error downward
4287     // res = res + 1
4288     res1++; // res1 becomes odd (so it cannot be 10^16)
4289     is_midpoint_gt_even = 0;
4290     is_inexact_gt_midpoint = 1;
4291   } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4292              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4293     // if this second rounding was exact the result may still be
4294     // inexact because of the first rounding
4295     if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4296       is_inexact_gt_midpoint = 1;
4297     }
4298     if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4299       is_inexact_lt_midpoint = 1;
4300     }
4301   } else if (is_midpoint_gt_even &&
4302              (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4303     // pulled up to a midpoint
4304     is_inexact_lt_midpoint = 1;
4305     is_inexact_gt_midpoint = 0;
4306     is_midpoint_lt_even = 0;
4307     is_midpoint_gt_even = 0;
4308   } else if (is_midpoint_lt_even &&
4309              (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4310     // pulled down to a midpoint
4311     is_inexact_lt_midpoint = 0;
4312     is_inexact_gt_midpoint = 1;
4313     is_midpoint_lt_even = 0;
4314     is_midpoint_gt_even = 0;
4315   } else {
4316     ;
4317   }
4318   // this is the result rounded correctly to nearest even, with unbounded exp.
4319 
4320   // check for overflow
4321   if (q + unbexp > P16 + expmax16) {
4322     res1 = sign | 0x7800000000000000ull;
4323     *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
4324     *pfpsf |= save_fpsf;
4325     BID_RETURN (res1)
4326   } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
4327     // not overflow; the result must be exact, and we can multiply res1 by
4328     // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4329     scale = unbexp - expmax16;
4330     res1 = res1 * ten2k64[scale]; // res1 * 10^scale
4331     unbexp = expmax16; // unbexp - scale
4332   } else {
4333     ; // continue
4334   }
4335 
4336   // check for underflow
4337   if (q + unbexp < P16 + expmin16) {
4338     if (unbexp < expmin16) {
4339       // we must truncate more of res
4340       x0 = expmin16 - unbexp; // x0 >= 1
4341       is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
4342       is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
4343       is_midpoint_lt_even0 = is_midpoint_lt_even;
4344       is_midpoint_gt_even0 = is_midpoint_gt_even;
4345       is_inexact_lt_midpoint = 0;
4346       is_inexact_gt_midpoint = 0;
4347       is_midpoint_lt_even = 0;
4348       is_midpoint_gt_even = 0;
4349       // the number of decimal digits in res1 is q
4350       if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4351         // 2 <= q <= 16, 1 <= x0 <= 15
4352         round64_2_18 (q, x0, res1, &res1, &incr_exp,
4353         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
4354         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4355         if (incr_exp) {
4356           // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4357           res1 = ten2k64[q - x0];
4358         }
4359         unbexp = unbexp + x0; // expmin16
4360       } else if (x0 == q) {
4361         // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4362         // determine relationship with 1/2 ulp
4363         // q <= 16
4364         if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
4365           lt_half_ulp = 1;
4366           is_inexact_lt_midpoint = 1;
4367         } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
4368           eq_half_ulp = 1;
4369           is_midpoint_gt_even = 1;
4370         } else { // > 1/2 ulp
4371           // gt_half_ulp = 1;
4372           is_inexact_gt_midpoint = 1;
4373         }
4374         if (lt_half_ulp || eq_half_ulp) {
4375           // res = +0.0 * 10^expmin16
4376           res1 = 0x0000000000000000ull;
4377         } else { // if (gt_half_ulp)
4378           // res = +1 * 10^expmin16
4379           res1 = 0x0000000000000001ull;
4380         }
4381         unbexp = expmin16;
4382       } else { // if (x0 > q)
4383         // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4384         res1 = 0x0000000000000000ull;
4385         unbexp = expmin16;
4386         is_inexact_lt_midpoint = 1;
4387       }
4388       // avoid a double rounding error
4389       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4390           is_midpoint_lt_even) { // double rounding error upward
4391         // res = res - 1
4392         res1--; // res1 becomes odd
4393         is_midpoint_lt_even = 0;
4394         is_inexact_lt_midpoint = 1;
4395       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4396           is_midpoint_gt_even) { // double rounding error downward
4397         // res = res + 1
4398         res1++; // res1 becomes odd
4399         is_midpoint_gt_even = 0;
4400         is_inexact_gt_midpoint = 1;
4401       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4402         	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4403         // if this rounding was exact the result may still be
4404         // inexact because of the previous roundings
4405         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4406           is_inexact_gt_midpoint = 1;
4407         }
4408         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4409           is_inexact_lt_midpoint = 1;
4410         }
4411       } else if (is_midpoint_gt_even &&
4412         	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4413         // pulled up to a midpoint
4414         is_inexact_lt_midpoint = 1;
4415         is_inexact_gt_midpoint = 0;
4416         is_midpoint_lt_even = 0;
4417         is_midpoint_gt_even = 0;
4418       } else if (is_midpoint_lt_even &&
4419         	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4420         // pulled down to a midpoint
4421         is_inexact_lt_midpoint = 0;
4422         is_inexact_gt_midpoint = 1;
4423         is_midpoint_lt_even = 0;
4424         is_midpoint_gt_even = 0;
4425       } else {
4426         ;
4427       }
4428     }
4429     // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4430     // and the result is tiny and exact
4431 
4432     // check for inexact result
4433     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4434         is_midpoint_lt_even || is_midpoint_gt_even ||
4435         is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4436         is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4437       // set the inexact flag and the underflow flag
4438       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4439     }
4440   } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4441              is_midpoint_lt_even || is_midpoint_gt_even) {
4442     *pfpsf |= INEXACT_EXCEPTION;
4443   }
4444   // this is the result rounded correctly to nearest, with bounded exponent
4445 
4446   if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
4447     // res = res + 1
4448     res1++; // res1 is now odd
4449   } // else the result is already correct
4450 
4451   // assemble the result
4452   if (res1 < 0x0020000000000000ull) { // res < 2^53
4453     res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
4454   } else { // res1 >= 2^53
4455     res1 = sign | MASK_STEERING_BITS |
4456       ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
4457   }
4458   *pfpsf |= save_fpsf;
4459   BID_RETURN (res1);
4460 }
4461