1 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 /*****************************************************************************
25 *
26 * BID128 fma x * y + z
27 *
28 ****************************************************************************/
29
30 #include "bid_internal.h"
31
32 static void
rounding_correction(unsigned int rnd_mode,unsigned int is_inexact_lt_midpoint,unsigned int is_inexact_gt_midpoint,unsigned int is_midpoint_lt_even,unsigned int is_midpoint_gt_even,int unbexp,UINT128 * ptrres,_IDEC_flags * ptrfpsf)33 rounding_correction (unsigned int rnd_mode,
34 unsigned int is_inexact_lt_midpoint,
35 unsigned int is_inexact_gt_midpoint,
36 unsigned int is_midpoint_lt_even,
37 unsigned int is_midpoint_gt_even,
38 int unbexp,
39 UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
40 // unbiased true exponent unbexp may be larger than emax
41
42 UINT128 res = *ptrres; // expected to have the correct sign and coefficient
43 // (the exponent field is ignored, as unbexp is used instead)
44 UINT64 sign, exp;
45 UINT64 C_hi, C_lo;
46
47 // general correction from RN to RA, RM, RP, RZ
48 // Note: if the result is negative, then is_inexact_lt_midpoint,
49 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
50 // have to be considered as if determined for the absolute value of the
51 // result (so they seem to be reversed)
52
53 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
54 is_midpoint_lt_even || is_midpoint_gt_even) {
55 *ptrfpsf |= INEXACT_EXCEPTION;
56 }
57 // apply correction to result calculated with unbounded exponent
58 sign = res.w[1] & MASK_SIGN;
59 exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
60 C_hi = res.w[1] & MASK_COEFF;
61 C_lo = res.w[0];
62 if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
63 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
64 is_midpoint_gt_even))) ||
65 (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
66 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
67 is_midpoint_gt_even)))) {
68 // C = C + 1
69 C_lo = C_lo + 1;
70 if (C_lo == 0)
71 C_hi = C_hi + 1;
72 if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
73 // C = 10^34 => rounding overflow
74 C_hi = 0x0000314dc6448d93ull;
75 C_lo = 0x38c15b0a00000000ull; // 10^33
76 // exp = exp + EXP_P1;
77 unbexp = unbexp + 1;
78 exp = (UINT64) (unbexp + 6176) << 49;
79 }
80 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
81 ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
82 (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
83 // C = C - 1
84 C_lo = C_lo - 1;
85 if (C_lo == 0xffffffffffffffffull)
86 C_hi--;
87 // check if we crossed into the lower decade
88 if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
89 // C = 10^33 - 1
90 if (exp > 0) {
91 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
92 C_lo = 0x378d8e63ffffffffull;
93 // exp = exp - EXP_P1;
94 unbexp = unbexp - 1;
95 exp = (UINT64) (unbexp + 6176) << 49;
96 } else { // if exp = 0 the result is tiny & inexact
97 *ptrfpsf |= UNDERFLOW_EXCEPTION;
98 }
99 }
100 } else {
101 ; // the result is already correct
102 }
103 if (unbexp > expmax) { // 6111
104 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
105 exp = 0;
106 if (!sign) { // result is positive
107 if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
108 C_hi = 0x7800000000000000ull;
109 C_lo = 0x0000000000000000ull;
110 } else { // res = +MAXFP = (10^34-1) * 10^emax
111 C_hi = 0x5fffed09bead87c0ull;
112 C_lo = 0x378d8e63ffffffffull;
113 }
114 } else { // result is negative
115 if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
116 C_hi = 0xf800000000000000ull;
117 C_lo = 0x0000000000000000ull;
118 } else { // res = -MAXFP = -(10^34-1) * 10^emax
119 C_hi = 0xdfffed09bead87c0ull;
120 C_lo = 0x378d8e63ffffffffull;
121 }
122 }
123 }
124 // assemble the result
125 res.w[1] = sign | exp | C_hi;
126 res.w[0] = C_lo;
127 *ptrres = res;
128 }
129
130 static void
add256(UINT256 x,UINT256 y,UINT256 * pz)131 add256 (UINT256 x, UINT256 y, UINT256 * pz) {
132 // *z = x + yl assume the sum fits in 256 bits
133 UINT256 z;
134 z.w[0] = x.w[0] + y.w[0];
135 if (z.w[0] < x.w[0]) {
136 x.w[1]++;
137 if (x.w[1] == 0x0000000000000000ull) {
138 x.w[2]++;
139 if (x.w[2] == 0x0000000000000000ull) {
140 x.w[3]++;
141 }
142 }
143 }
144 z.w[1] = x.w[1] + y.w[1];
145 if (z.w[1] < x.w[1]) {
146 x.w[2]++;
147 if (x.w[2] == 0x0000000000000000ull) {
148 x.w[3]++;
149 }
150 }
151 z.w[2] = x.w[2] + y.w[2];
152 if (z.w[2] < x.w[2]) {
153 x.w[3]++;
154 }
155 z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
156 *pz = z;
157 }
158
159 static void
sub256(UINT256 x,UINT256 y,UINT256 * pz)160 sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
161 // *z = x - y; assume x >= y
162 UINT256 z;
163 z.w[0] = x.w[0] - y.w[0];
164 if (z.w[0] > x.w[0]) {
165 x.w[1]--;
166 if (x.w[1] == 0xffffffffffffffffull) {
167 x.w[2]--;
168 if (x.w[2] == 0xffffffffffffffffull) {
169 x.w[3]--;
170 }
171 }
172 }
173 z.w[1] = x.w[1] - y.w[1];
174 if (z.w[1] > x.w[1]) {
175 x.w[2]--;
176 if (x.w[2] == 0xffffffffffffffffull) {
177 x.w[3]--;
178 }
179 }
180 z.w[2] = x.w[2] - y.w[2];
181 if (z.w[2] > x.w[2]) {
182 x.w[3]--;
183 }
184 z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
185 *pz = z;
186 }
187
188
189 static int
nr_digits256(UINT256 R256)190 nr_digits256 (UINT256 R256) {
191 int ind;
192 // determine the number of decimal digits in R256
193 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
194 // between 1 and 19 digits
195 for (ind = 1; ind <= 19; ind++) {
196 if (R256.w[0] < ten2k64[ind]) {
197 break;
198 }
199 }
200 // ind digits
201 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
202 (R256.w[1] < ten2k128[0].w[1] ||
203 (R256.w[1] == ten2k128[0].w[1]
204 && R256.w[0] < ten2k128[0].w[0]))) {
205 // 20 digits
206 ind = 20;
207 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
208 // between 21 and 38 digits
209 for (ind = 1; ind <= 18; ind++) {
210 if (R256.w[1] < ten2k128[ind].w[1] ||
211 (R256.w[1] == ten2k128[ind].w[1] &&
212 R256.w[0] < ten2k128[ind].w[0])) {
213 break;
214 }
215 }
216 // ind + 20 digits
217 ind = ind + 20;
218 } else if (R256.w[3] == 0x0 &&
219 (R256.w[2] < ten2k256[0].w[2] ||
220 (R256.w[2] == ten2k256[0].w[2] &&
221 R256.w[1] < ten2k256[0].w[1]) ||
222 (R256.w[2] == ten2k256[0].w[2] &&
223 R256.w[1] == ten2k256[0].w[1] &&
224 R256.w[0] < ten2k256[0].w[0]))) {
225 // 39 digits
226 ind = 39;
227 } else {
228 // between 40 and 68 digits
229 for (ind = 1; ind <= 29; ind++) {
230 if (R256.w[3] < ten2k256[ind].w[3] ||
231 (R256.w[3] == ten2k256[ind].w[3] &&
232 R256.w[2] < ten2k256[ind].w[2]) ||
233 (R256.w[3] == ten2k256[ind].w[3] &&
234 R256.w[2] == ten2k256[ind].w[2] &&
235 R256.w[1] < ten2k256[ind].w[1]) ||
236 (R256.w[3] == ten2k256[ind].w[3] &&
237 R256.w[2] == ten2k256[ind].w[2] &&
238 R256.w[1] == ten2k256[ind].w[1] &&
239 R256.w[0] < ten2k256[ind].w[0])) {
240 break;
241 }
242 }
243 // ind + 39 digits
244 ind = ind + 39;
245 }
246 return (ind);
247 }
248
249 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
250 // use the rounding information from ptr_is_* to avoid a double rounding error
251 static void
add_and_round(int q3,int q4,int e4,int delta,int p34,UINT64 z_sign,UINT64 p_sign,UINT128 C3,UINT256 C4,int rnd_mode,int * ptr_is_midpoint_lt_even,int * ptr_is_midpoint_gt_even,int * ptr_is_inexact_lt_midpoint,int * ptr_is_inexact_gt_midpoint,_IDEC_flags * ptrfpsf,UINT128 * ptrres)252 add_and_round (int q3,
253 int q4,
254 int e4,
255 int delta,
256 int p34,
257 UINT64 z_sign,
258 UINT64 p_sign,
259 UINT128 C3,
260 UINT256 C4,
261 int rnd_mode,
262 int *ptr_is_midpoint_lt_even,
263 int *ptr_is_midpoint_gt_even,
264 int *ptr_is_inexact_lt_midpoint,
265 int *ptr_is_inexact_gt_midpoint,
266 _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
267
268 int scale;
269 int x0;
270 int ind;
271 UINT64 R64;
272 UINT128 P128, R128;
273 UINT192 P192, R192;
274 UINT256 R256;
275 int is_midpoint_lt_even = 0;
276 int is_midpoint_gt_even = 0;
277 int is_inexact_lt_midpoint = 0;
278 int is_inexact_gt_midpoint = 0;
279 int is_midpoint_lt_even0 = 0;
280 int is_midpoint_gt_even0 = 0;
281 int is_inexact_lt_midpoint0 = 0;
282 int is_inexact_gt_midpoint0 = 0;
283 int incr_exp = 0;
284 int is_tiny = 0;
285 int lt_half_ulp = 0;
286 int eq_half_ulp = 0;
287 // int gt_half_ulp = 0;
288 UINT128 res = *ptrres;
289
290 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
291 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
292 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
293
294 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
295 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
296 if (scale == 0) {
297 R256.w[3] = 0x0ull;
298 R256.w[2] = 0x0ull;
299 R256.w[1] = C3.w[1];
300 R256.w[0] = C3.w[0];
301 } else if (scale <= 19) { // 10^scale fits in 64 bits
302 P128.w[1] = 0;
303 P128.w[0] = ten2k64[scale];
304 __mul_128x128_to_256 (R256, P128, C3);
305 } else if (scale <= 38) { // 10^scale fits in 128 bits
306 __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
307 } else if (scale <= 57) { // 39 <= scale <= 57
308 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
309 // (10^67 has 223 bits; 10^69 has 230 bits);
310 // must split the computation:
311 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
312 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
313 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
314 __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
315 // now multiply R128 by 10^38
316 __mul_128x128_to_256 (R256, R128, ten2k128[18]);
317 } else { // 58 <= scale <= 66
318 // 10^scale takes between 193 and 220 bits,
319 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
320 // must split the computation:
321 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
322 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
323 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
324 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
325 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
326 __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
327 // now calculate 10*38 * 10^(scale-38) * C3
328 __mul_128x128_to_256 (R256, R128, ten2k128[18]);
329 }
330 // C3 * 10^scale is now in R256
331
332 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
333 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
334 // possible
335 // add/subtract C4 and C3 * 10^scale; the exponent is e4
336 if (p_sign == z_sign) { // R256 = C4 + R256
337 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
338 // but may require rounding
339 add256 (C4, R256, &R256);
340 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
341 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
342 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
343 // but may require rounding
344
345 // compare first R256 = C3 * 10^scale and C4
346 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
347 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
348 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
349 R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
350 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
351 // but may require rounding
352 sub256 (R256, C4, &R256);
353 // flip p_sign too, because the result has the sign of z
354 p_sign = z_sign;
355 } else { // if C4 > C3 * 10^scale
356 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
357 // but may require rounding
358 sub256 (C4, R256, &R256);
359 }
360 // if the result is pure zero, the sign depends on the rounding mode
361 // (x*y and z had opposite signs)
362 if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
363 R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
364 if (rnd_mode != ROUNDING_DOWN)
365 p_sign = 0x0000000000000000ull;
366 else
367 p_sign = 0x8000000000000000ull;
368 // the exponent is max (e4, expmin)
369 if (e4 < -6176)
370 e4 = expmin;
371 // assemble result
372 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
373 res.w[0] = 0x0;
374 *ptrres = res;
375 return;
376 }
377 }
378
379 // determine the number of decimal digits in R256
380 ind = nr_digits256 (R256);
381
382 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
383 // round to the destination precision, with unbounded exponent
384
385 if (ind <= p34) {
386 // result rounded to the destination precision with unbounded exponent
387 // is exact
388 if (ind + e4 < p34 + expmin) {
389 is_tiny = 1; // applies to all rounding modes
390 }
391 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
392 res.w[0] = R256.w[0];
393 // Note: res is correct only if expmin <= e4 <= expmax
394 } else { // if (ind > p34)
395 // if more than P digits, round to nearest to P digits
396 // round R256 to p34 digits
397 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
398 if (ind <= 38) {
399 P128.w[1] = R256.w[1];
400 P128.w[0] = R256.w[0];
401 round128_19_38 (ind, x0, P128, &R128, &incr_exp,
402 &is_midpoint_lt_even, &is_midpoint_gt_even,
403 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
404 } else if (ind <= 57) {
405 P192.w[2] = R256.w[2];
406 P192.w[1] = R256.w[1];
407 P192.w[0] = R256.w[0];
408 round192_39_57 (ind, x0, P192, &R192, &incr_exp,
409 &is_midpoint_lt_even, &is_midpoint_gt_even,
410 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
411 R128.w[1] = R192.w[1];
412 R128.w[0] = R192.w[0];
413 } else { // if (ind <= 68)
414 round256_58_76 (ind, x0, R256, &R256, &incr_exp,
415 &is_midpoint_lt_even, &is_midpoint_gt_even,
416 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
417 R128.w[1] = R256.w[1];
418 R128.w[0] = R256.w[0];
419 }
420 // the rounded result has p34 = 34 digits
421 e4 = e4 + x0 + incr_exp;
422 if (rnd_mode == ROUNDING_TO_NEAREST) {
423 if (e4 < expmin) {
424 is_tiny = 1; // for other rounding modes apply correction
425 }
426 } else {
427 // for RM, RP, RZ, RA apply correction in order to determine tininess
428 // but do not save the result; apply the correction to
429 // (-1)^p_sign * significand * 10^0
430 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
431 P128.w[0] = R128.w[0];
432 rounding_correction (rnd_mode,
433 is_inexact_lt_midpoint,
434 is_inexact_gt_midpoint, is_midpoint_lt_even,
435 is_midpoint_gt_even, 0, &P128, ptrfpsf);
436 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
437 // the number of digits in the significand is p34 = 34
438 if (e4 + scale < expmin) {
439 is_tiny = 1;
440 }
441 }
442 ind = p34; // the number of decimal digits in the signifcand of res
443 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
444 res.w[0] = R128.w[0];
445 // Note: res is correct only if expmin <= e4 <= expmax
446 // set the inexact flag after rounding with bounded exponent, if any
447 }
448 // at this point we have the result rounded with unbounded exponent in
449 // res and we know its tininess:
450 // res = (-1)^p_sign * significand * 10^e4,
451 // where q (significand) = ind <= p34
452 // Note: res is correct only if expmin <= e4 <= expmax
453
454 // check for overflow if RN
455 if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
456 res.w[1] = p_sign | 0x7800000000000000ull;
457 res.w[0] = 0x0000000000000000ull;
458 *ptrres = res;
459 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
460 return; // BID_RETURN (res)
461 } // else not overflow or not RN, so continue
462
463 // if (e4 >= expmin) we have the result rounded with bounded exponent
464 if (e4 < expmin) {
465 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
466 // where the result rounded [at most] once is
467 // (-1)^p_sign * significand_res * 10^e4
468
469 // avoid double rounding error
470 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
471 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
472 is_midpoint_lt_even0 = is_midpoint_lt_even;
473 is_midpoint_gt_even0 = is_midpoint_gt_even;
474 is_inexact_lt_midpoint = 0;
475 is_inexact_gt_midpoint = 0;
476 is_midpoint_lt_even = 0;
477 is_midpoint_gt_even = 0;
478
479 if (x0 > ind) {
480 // nothing is left of res when moving the decimal point left x0 digits
481 is_inexact_lt_midpoint = 1;
482 res.w[1] = p_sign | 0x0000000000000000ull;
483 res.w[0] = 0x0000000000000000ull;
484 e4 = expmin;
485 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
486 // this is <, =, or > 1/2 ulp
487 // compare the ind-digit value in the significand of res with
488 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
489 // less than, equal to, or greater than 1/2 ulp (significand of res)
490 R128.w[1] = res.w[1] & MASK_COEFF;
491 R128.w[0] = res.w[0];
492 if (ind <= 19) {
493 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
494 lt_half_ulp = 1;
495 is_inexact_lt_midpoint = 1;
496 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
497 eq_half_ulp = 1;
498 is_midpoint_gt_even = 1;
499 } else { // > 1/2 ulp
500 // gt_half_ulp = 1;
501 is_inexact_gt_midpoint = 1;
502 }
503 } else { // if (ind <= 38) {
504 if (R128.w[1] < midpoint128[ind - 20].w[1] ||
505 (R128.w[1] == midpoint128[ind - 20].w[1] &&
506 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
507 lt_half_ulp = 1;
508 is_inexact_lt_midpoint = 1;
509 } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
510 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
511 eq_half_ulp = 1;
512 is_midpoint_gt_even = 1;
513 } else { // > 1/2 ulp
514 // gt_half_ulp = 1;
515 is_inexact_gt_midpoint = 1;
516 }
517 }
518 if (lt_half_ulp || eq_half_ulp) {
519 // res = +0.0 * 10^expmin
520 res.w[1] = 0x0000000000000000ull;
521 res.w[0] = 0x0000000000000000ull;
522 } else { // if (gt_half_ulp)
523 // res = +1 * 10^expmin
524 res.w[1] = 0x0000000000000000ull;
525 res.w[0] = 0x0000000000000001ull;
526 }
527 res.w[1] = p_sign | res.w[1];
528 e4 = expmin;
529 } else { // if (1 <= x0 <= ind - 1 <= 33)
530 // round the ind-digit result to ind - x0 digits
531
532 if (ind <= 18) { // 2 <= ind <= 18
533 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
534 &is_midpoint_lt_even, &is_midpoint_gt_even,
535 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
536 res.w[1] = 0x0;
537 res.w[0] = R64;
538 } else if (ind <= 38) {
539 P128.w[1] = res.w[1] & MASK_COEFF;
540 P128.w[0] = res.w[0];
541 round128_19_38 (ind, x0, P128, &res, &incr_exp,
542 &is_midpoint_lt_even, &is_midpoint_gt_even,
543 &is_inexact_lt_midpoint,
544 &is_inexact_gt_midpoint);
545 }
546 e4 = e4 + x0; // expmin
547 // we want the exponent to be expmin, so if incr_exp = 1 then
548 // multiply the rounded result by 10 - it will still fit in 113 bits
549 if (incr_exp) {
550 // 64 x 128 -> 128
551 P128.w[1] = res.w[1] & MASK_COEFF;
552 P128.w[0] = res.w[0];
553 __mul_64x128_to_128 (res, ten2k64[1], P128);
554 }
555 res.w[1] =
556 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
557 // avoid a double rounding error
558 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
559 is_midpoint_lt_even) { // double rounding error upward
560 // res = res - 1
561 res.w[0]--;
562 if (res.w[0] == 0xffffffffffffffffull)
563 res.w[1]--;
564 // Note: a double rounding error upward is not possible; for this
565 // the result after the first rounding would have to be 99...95
566 // (35 digits in all), possibly followed by a number of zeros; this
567 // is not possible in Cases (2)-(6) or (15)-(17) which may get here
568 is_midpoint_lt_even = 0;
569 is_inexact_lt_midpoint = 1;
570 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
571 is_midpoint_gt_even) { // double rounding error downward
572 // res = res + 1
573 res.w[0]++;
574 if (res.w[0] == 0)
575 res.w[1]++;
576 is_midpoint_gt_even = 0;
577 is_inexact_gt_midpoint = 1;
578 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
579 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
580 // if this second rounding was exact the result may still be
581 // inexact because of the first rounding
582 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
583 is_inexact_gt_midpoint = 1;
584 }
585 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
586 is_inexact_lt_midpoint = 1;
587 }
588 } else if (is_midpoint_gt_even &&
589 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
590 // pulled up to a midpoint
591 is_inexact_lt_midpoint = 1;
592 is_inexact_gt_midpoint = 0;
593 is_midpoint_lt_even = 0;
594 is_midpoint_gt_even = 0;
595 } else if (is_midpoint_lt_even &&
596 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
597 // pulled down to a midpoint
598 is_inexact_lt_midpoint = 0;
599 is_inexact_gt_midpoint = 1;
600 is_midpoint_lt_even = 0;
601 is_midpoint_gt_even = 0;
602 } else {
603 ;
604 }
605 }
606 }
607 // res contains the correct result
608 // apply correction if not rounding to nearest
609 if (rnd_mode != ROUNDING_TO_NEAREST) {
610 rounding_correction (rnd_mode,
611 is_inexact_lt_midpoint, is_inexact_gt_midpoint,
612 is_midpoint_lt_even, is_midpoint_gt_even,
613 e4, &res, ptrfpsf);
614 }
615 if (is_midpoint_lt_even || is_midpoint_gt_even ||
616 is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
617 // set the inexact flag
618 *ptrfpsf |= INEXACT_EXCEPTION;
619 if (is_tiny)
620 *ptrfpsf |= UNDERFLOW_EXCEPTION;
621 }
622
623 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
624 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
625 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
626 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
627 *ptrres = res;
628 return;
629 }
630
631
632 #if DECIMAL_CALL_BY_REFERENCE
633 static void
bid128_ext_fma(int * ptr_is_midpoint_lt_even,int * ptr_is_midpoint_gt_even,int * ptr_is_inexact_lt_midpoint,int * ptr_is_inexact_gt_midpoint,UINT128 * pres,UINT128 * px,UINT128 * py,UINT128 * pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM)634 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
635 int *ptr_is_midpoint_gt_even,
636 int *ptr_is_inexact_lt_midpoint,
637 int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
638 UINT128 * px, UINT128 * py,
639 UINT128 *
640 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
641 _EXC_INFO_PARAM) {
642 UINT128 x = *px, y = *py, z = *pz;
643 #if !DECIMAL_GLOBAL_ROUNDING
644 unsigned int rnd_mode = *prnd_mode;
645 #endif
646 #else
647 static UINT128
648 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
649 int *ptr_is_midpoint_gt_even,
650 int *ptr_is_inexact_lt_midpoint,
651 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
652 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
653 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
654 #endif
655
656 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
657 UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
658 UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
659 int true_p_exp;
660 UINT128 C1, C2, C3;
661 UINT256 C4;
662 int q1 = 0, q2 = 0, q3 = 0, q4;
663 int e1, e2, e3, e4;
664 int scale, ind, delta, x0;
665 int p34 = P34; // used to modify the limit on the number of digits
666 BID_UI64DOUBLE tmp;
667 int x_nr_bits, y_nr_bits, z_nr_bits;
668 unsigned int save_fpsf;
669 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
670 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
671 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
672 int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
673 int incr_exp = 0;
674 int lsb;
675 int lt_half_ulp = 0;
676 int eq_half_ulp = 0;
677 int gt_half_ulp = 0;
678 int is_tiny = 0;
679 UINT64 R64, tmp64;
680 UINT128 P128, R128;
681 UINT192 P192, R192;
682 UINT256 R256;
683
684 // the following are based on the table of special cases for fma; the NaN
685 // behavior is similar to that of the IA-64 Architecture fma
686
687 // identify cases where at least one operand is NaN
688
689 BID_SWAP128 (x);
690 BID_SWAP128 (y);
691 BID_SWAP128 (z);
692 if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
693 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
694 // check first for non-canonical NaN payload
695 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
696 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
697 (y.w[0] > 0x38c15b09ffffffffull))) {
698 y.w[1] = y.w[1] & 0xffffc00000000000ull;
699 y.w[0] = 0x0ull;
700 }
701 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
702 // set invalid flag
703 *pfpsf |= INVALID_EXCEPTION;
704 // return quiet (y)
705 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
706 res.w[0] = y.w[0];
707 } else { // y is QNaN
708 // return y
709 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
710 res.w[0] = y.w[0];
711 // if z = SNaN or x = SNaN signal invalid exception
712 if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
713 (x.w[1] & MASK_SNAN) == MASK_SNAN) {
714 // set invalid flag
715 *pfpsf |= INVALID_EXCEPTION;
716 }
717 }
718 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
719 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
720 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
721 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
722 BID_SWAP128 (res);
723 BID_RETURN (res)
724 } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
725 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
726 // check first for non-canonical NaN payload
727 if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
728 (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
729 (z.w[0] > 0x38c15b09ffffffffull))) {
730 z.w[1] = z.w[1] & 0xffffc00000000000ull;
731 z.w[0] = 0x0ull;
732 }
733 if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
734 // set invalid flag
735 *pfpsf |= INVALID_EXCEPTION;
736 // return quiet (z)
737 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
738 res.w[0] = z.w[0];
739 } else { // z is QNaN
740 // return z
741 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
742 res.w[0] = z.w[0];
743 // if x = SNaN signal invalid exception
744 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
745 // set invalid flag
746 *pfpsf |= INVALID_EXCEPTION;
747 }
748 }
749 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
750 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
751 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
752 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
753 BID_SWAP128 (res);
754 BID_RETURN (res)
755 } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
756 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
757 // check first for non-canonical NaN payload
758 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
759 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
760 (x.w[0] > 0x38c15b09ffffffffull))) {
761 x.w[1] = x.w[1] & 0xffffc00000000000ull;
762 x.w[0] = 0x0ull;
763 }
764 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
765 // set invalid flag
766 *pfpsf |= INVALID_EXCEPTION;
767 // return quiet (x)
768 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
769 res.w[0] = x.w[0];
770 } else { // x is QNaN
771 // return x
772 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
773 res.w[0] = x.w[0];
774 }
775 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
776 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
777 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
778 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
779 BID_SWAP128 (res);
780 BID_RETURN (res)
781 }
782 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
783 // for non-canonical values
784
785 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
786 C1.w[1] = x.w[1] & MASK_COEFF;
787 C1.w[0] = x.w[0];
788 if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
789 // if x is not infinity check for non-canonical values - treated as zero
790 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
791 // non-canonical
792 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
793 C1.w[1] = 0; // significand high
794 C1.w[0] = 0; // significand low
795 } else { // G0_G1 != 11
796 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
797 if (C1.w[1] > 0x0001ed09bead87c0ull ||
798 (C1.w[1] == 0x0001ed09bead87c0ull &&
799 C1.w[0] > 0x378d8e63ffffffffull)) {
800 // x is non-canonical if coefficient is larger than 10^34 -1
801 C1.w[1] = 0;
802 C1.w[0] = 0;
803 } else { // canonical
804 ;
805 }
806 }
807 }
808 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
809 C2.w[1] = y.w[1] & MASK_COEFF;
810 C2.w[0] = y.w[0];
811 if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
812 // if y is not infinity check for non-canonical values - treated as zero
813 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
814 // non-canonical
815 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
816 C2.w[1] = 0; // significand high
817 C2.w[0] = 0; // significand low
818 } else { // G0_G1 != 11
819 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
820 if (C2.w[1] > 0x0001ed09bead87c0ull ||
821 (C2.w[1] == 0x0001ed09bead87c0ull &&
822 C2.w[0] > 0x378d8e63ffffffffull)) {
823 // y is non-canonical if coefficient is larger than 10^34 -1
824 C2.w[1] = 0;
825 C2.w[0] = 0;
826 } else { // canonical
827 ;
828 }
829 }
830 }
831 z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
832 C3.w[1] = z.w[1] & MASK_COEFF;
833 C3.w[0] = z.w[0];
834 if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
835 // if z is not infinity check for non-canonical values - treated as zero
836 if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
837 // non-canonical
838 z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
839 C3.w[1] = 0; // significand high
840 C3.w[0] = 0; // significand low
841 } else { // G0_G1 != 11
842 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
843 if (C3.w[1] > 0x0001ed09bead87c0ull ||
844 (C3.w[1] == 0x0001ed09bead87c0ull &&
845 C3.w[0] > 0x378d8e63ffffffffull)) {
846 // z is non-canonical if coefficient is larger than 10^34 -1
847 C3.w[1] = 0;
848 C3.w[0] = 0;
849 } else { // canonical
850 ;
851 }
852 }
853 }
854
855 p_sign = x_sign ^ y_sign; // sign of the product
856
857 // identify cases where at least one operand is infinity
858
859 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
860 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
861 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
862 if (p_sign == z_sign) {
863 res.w[1] = z_sign | MASK_INF;
864 res.w[0] = 0x0;
865 } else {
866 // return QNaN Indefinite
867 res.w[1] = 0x7c00000000000000ull;
868 res.w[0] = 0x0000000000000000ull;
869 // set invalid flag
870 *pfpsf |= INVALID_EXCEPTION;
871 }
872 } else { // z = 0 or z = f
873 res.w[1] = p_sign | MASK_INF;
874 res.w[0] = 0x0;
875 }
876 } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
877 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
878 if (p_sign == z_sign) {
879 res.w[1] = z_sign | MASK_INF;
880 res.w[0] = 0x0;
881 } else {
882 // return QNaN Indefinite
883 res.w[1] = 0x7c00000000000000ull;
884 res.w[0] = 0x0000000000000000ull;
885 // set invalid flag
886 *pfpsf |= INVALID_EXCEPTION;
887 }
888 } else { // z = 0 or z = f
889 res.w[1] = p_sign | MASK_INF;
890 res.w[0] = 0x0;
891 }
892 } else { // y = 0
893 // return QNaN Indefinite
894 res.w[1] = 0x7c00000000000000ull;
895 res.w[0] = 0x0000000000000000ull;
896 // set invalid flag
897 *pfpsf |= INVALID_EXCEPTION;
898 }
899 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
900 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
901 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
902 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
903 BID_SWAP128 (res);
904 BID_RETURN (res)
905 } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
906 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
907 // x = f, necessarily
908 if ((p_sign != z_sign)
909 || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
910 // return QNaN Indefinite
911 res.w[1] = 0x7c00000000000000ull;
912 res.w[0] = 0x0000000000000000ull;
913 // set invalid flag
914 *pfpsf |= INVALID_EXCEPTION;
915 } else {
916 res.w[1] = z_sign | MASK_INF;
917 res.w[0] = 0x0;
918 }
919 } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
920 // z = 0, f, inf
921 // return QNaN Indefinite
922 res.w[1] = 0x7c00000000000000ull;
923 res.w[0] = 0x0000000000000000ull;
924 // set invalid flag
925 *pfpsf |= INVALID_EXCEPTION;
926 } else {
927 // x = f and z = 0, f, necessarily
928 res.w[1] = p_sign | MASK_INF;
929 res.w[0] = 0x0;
930 }
931 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
932 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
933 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
934 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
935 BID_SWAP128 (res);
936 BID_RETURN (res)
937 } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
938 // x = 0, f and y = 0, f, necessarily
939 res.w[1] = z_sign | MASK_INF;
940 res.w[0] = 0x0;
941 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
942 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
943 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
944 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
945 BID_SWAP128 (res);
946 BID_RETURN (res)
947 }
948
949 true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
950 if (true_p_exp < -6176)
951 p_exp = 0; // cannot be less than EXP_MIN
952 else
953 p_exp = (UINT64) (true_p_exp + 6176) << 49;
954
955 if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
956 // the result is 0
957 if (p_exp < z_exp)
958 res.w[1] = p_exp; // preferred exponent
959 else
960 res.w[1] = z_exp; // preferred exponent
961 if (p_sign == z_sign) {
962 res.w[1] |= z_sign;
963 res.w[0] = 0x0;
964 } else { // x * y and z have opposite signs
965 if (rnd_mode == ROUNDING_DOWN) {
966 // res = -0.0
967 res.w[1] |= MASK_SIGN;
968 res.w[0] = 0x0;
969 } else {
970 // res = +0.0
971 // res.w[1] |= 0x0;
972 res.w[0] = 0x0;
973 }
974 }
975 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
976 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
977 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
978 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
979 BID_SWAP128 (res);
980 BID_RETURN (res)
981 }
982 // from this point on, we may need to know the number of decimal digits
983 // in the significands of x, y, z when x, y, z != 0
984
985 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
986 // q1 = nr. of decimal digits in x
987 // determine first the nr. of bits in x
988 if (C1.w[1] == 0) {
989 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
990 // split the 64-bit value in two 32-bit halves to avoid rounding errors
991 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
992 tmp.d = (double) (C1.w[0] >> 32); // exact conversion
993 x_nr_bits =
994 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
995 } else { // x < 2^32
996 tmp.d = (double) (C1.w[0]); // exact conversion
997 x_nr_bits =
998 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
999 }
1000 } else { // if x < 2^53
1001 tmp.d = (double) C1.w[0]; // exact conversion
1002 x_nr_bits =
1003 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1004 }
1005 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1006 tmp.d = (double) C1.w[1]; // exact conversion
1007 x_nr_bits =
1008 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1009 }
1010 q1 = nr_digits[x_nr_bits - 1].digits;
1011 if (q1 == 0) {
1012 q1 = nr_digits[x_nr_bits - 1].digits1;
1013 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1014 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1015 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1016 q1++;
1017 }
1018 }
1019
1020 if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
1021 if (C2.w[1] == 0) {
1022 if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
1023 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1024 if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
1025 tmp.d = (double) (C2.w[0] >> 32); // exact conversion
1026 y_nr_bits =
1027 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1028 } else { // y < 2^32
1029 tmp.d = (double) C2.w[0]; // exact conversion
1030 y_nr_bits =
1031 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1032 }
1033 } else { // if y < 2^53
1034 tmp.d = (double) C2.w[0]; // exact conversion
1035 y_nr_bits =
1036 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1037 }
1038 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1039 tmp.d = (double) C2.w[1]; // exact conversion
1040 y_nr_bits =
1041 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1042 }
1043
1044 q2 = nr_digits[y_nr_bits].digits;
1045 if (q2 == 0) {
1046 q2 = nr_digits[y_nr_bits].digits1;
1047 if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
1048 (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
1049 C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
1050 q2++;
1051 }
1052 }
1053
1054 if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
1055 if (C3.w[1] == 0) {
1056 if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
1057 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1058 if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
1059 tmp.d = (double) (C3.w[0] >> 32); // exact conversion
1060 z_nr_bits =
1061 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1062 } else { // z < 2^32
1063 tmp.d = (double) C3.w[0]; // exact conversion
1064 z_nr_bits =
1065 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1066 }
1067 } else { // if z < 2^53
1068 tmp.d = (double) C3.w[0]; // exact conversion
1069 z_nr_bits =
1070 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1071 }
1072 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1073 tmp.d = (double) C3.w[1]; // exact conversion
1074 z_nr_bits =
1075 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1076 }
1077
1078 q3 = nr_digits[z_nr_bits].digits;
1079 if (q3 == 0) {
1080 q3 = nr_digits[z_nr_bits].digits1;
1081 if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
1082 (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
1083 C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
1084 q3++;
1085 }
1086 }
1087
1088 if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
1089 (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
1090 // x = 0 or y = 0
1091 // z = f, necessarily; for 0 + z return z, with the preferred exponent
1092 // the result is z, but need to get the preferred exponent
1093 if (z_exp <= p_exp) { // the preferred exponent is z_exp
1094 res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
1095 res.w[0] = C3.w[0];
1096 } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1097 // return (C3 * 10^scale) * 10^(z_exp - scale)
1098 // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1099 scale = p34 - q3;
1100 ind = (z_exp - p_exp) >> 49;
1101 if (ind < scale)
1102 scale = ind;
1103 if (scale == 0) {
1104 res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
1105 res.w[0] = z.w[0];
1106 } else if (q3 <= 19) { // z fits in 64 bits
1107 if (scale <= 19) { // 10^scale fits in 64 bits
1108 // 64 x 64 C3.w[0] * ten2k64[scale]
1109 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1110 } else { // 10^scale fits in 128 bits
1111 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1112 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1113 }
1114 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1115 // 64 x 128 ten2k64[scale] * C3
1116 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1117 }
1118 // subtract scale from the exponent
1119 z_exp = z_exp - ((UINT64) scale << 49);
1120 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1121 }
1122 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1123 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1124 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1125 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1126 BID_SWAP128 (res);
1127 BID_RETURN (res)
1128 } else {
1129 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1130 }
1131
1132 e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
1133 e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
1134 e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
1135 e4 = e1 + e2; // unbiased exponent of the exact x * y
1136
1137 // calculate C1 * C2 and its number of decimal digits, q4
1138
1139 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1140 // where 2 <= q1 + q2 <= 68
1141 // calculate C4 = C1 * C2 and determine q
1142 C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
1143 if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1144 C4.w[0] = C1.w[0] * C2.w[0];
1145 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1146 if (C4.w[0] < ten2k64[q1 + q2 - 1])
1147 q4 = q1 + q2 - 1; // q4 in [1, 18]
1148 else
1149 q4 = q1 + q2; // q4 in [2, 19]
1150 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1151 } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1152 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1153 __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
1154 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1155 if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
1156 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1157 q4 = 19; // 19 = q1 + q2 - 1
1158 } else {
1159 // if (C4.w[1] == 0)
1160 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1161 // else
1162 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1163 q4 = 20; // 20 = q1 + q2
1164 }
1165 } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
1166 // C4 = C1 * C2 fits in 64 or 128 bits
1167 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1168 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1169 if (q1 <= 19) {
1170 __mul_128x64_to_128 (C4, C1.w[0], C2);
1171 } else { // q2 <= 19
1172 __mul_128x64_to_128 (C4, C2.w[0], C1);
1173 }
1174 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1175 if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
1176 (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
1177 C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
1178 // if (C4.w[1] == 0) // q4 = 20, necessarily
1179 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1180 // else
1181 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1182 q4 = q1 + q2 - 1; // q4 in [20, 37]
1183 } else {
1184 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1185 q4 = q1 + q2; // q4 in [21, 38]
1186 }
1187 } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1188 // both C1 and C2 fit in 128 bits (actually in 113 bits)
1189 // may replace this by 128x128_to192
1190 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
1191 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1192 if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
1193 (C4.w[1] == ten2k128[18].w[1]
1194 && C4.w[0] < ten2k128[18].w[0]))) {
1195 // 18 = 38 - 20 = q1+q2-1 - 20
1196 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1197 q4 = 38; // 38 = q1 + q2 - 1
1198 } else {
1199 // if (C4.w[2] == 0)
1200 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1201 // else
1202 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1203 q4 = 39; // 39 = q1 + q2
1204 }
1205 } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
1206 // C4 = C1 * C2 fits in 128 or 192 bits
1207 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1208 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1209 // may fit in 64 bits
1210 if (C1.w[1] == 0) { // C1 fits in 64 bits
1211 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1212 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
1213 } else if (C2.w[1] == 0) { // C2 fits in 64 bits
1214 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1215 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
1216 } else { // both C1 and C2 require 128 bits
1217 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1218 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1219 }
1220 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1221 if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1222 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1223 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1224 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1225 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
1226 // if (C4.w[2] == 0) // q4 = 39, necessarily
1227 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1228 // else
1229 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1230 q4 = q1 + q2 - 1; // q4 in [39, 56]
1231 } else {
1232 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1233 q4 = q1 + q2; // q4 in [40, 57]
1234 }
1235 } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1236 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1237 // may fit in 64 bits
1238 if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
1239 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
1240 } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
1241 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
1242 } else { // C1 * C2 will fit in 192 bits or in 256 bits
1243 __mul_128x128_to_256 (C4, C1, C2);
1244 }
1245 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1246 if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
1247 (C4.w[2] == ten2k256[18].w[2]
1248 && (C4.w[1] < ten2k256[18].w[1]
1249 || (C4.w[1] == ten2k256[18].w[1]
1250 && C4.w[0] < ten2k256[18].w[0]))))) {
1251 // 18 = 57 - 39 = q1+q2-1 - 39
1252 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1253 q4 = 57; // 57 = q1 + q2 - 1
1254 } else {
1255 // if (C4.w[3] == 0)
1256 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1257 // else
1258 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1259 q4 = 58; // 58 = q1 + q2
1260 }
1261 } else { // if 59 <= q1 + q2 <= 68
1262 // C4 = C1 * C2 fits in 192 or 256 bits
1263 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1264 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1265 // 64 bits
1266 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1267 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1268 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1269 if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
1270 (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
1271 (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1272 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1273 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1274 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1275 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
1276 // if (C4.w[3] == 0) // q4 = 58, necessarily
1277 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1278 // else
1279 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1280 q4 = q1 + q2 - 1; // q4 in [58, 67]
1281 } else {
1282 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1283 q4 = q1 + q2; // q4 in [59, 68]
1284 }
1285 }
1286
1287 if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
1288 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
1289 *pfpsf = 0;
1290
1291 if (q4 > p34) {
1292
1293 // truncate C4 to p34 digits into res
1294 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1295 x0 = q4 - p34;
1296 if (q4 <= 38) {
1297 P128.w[1] = C4.w[1];
1298 P128.w[0] = C4.w[0];
1299 round128_19_38 (q4, x0, P128, &res, &incr_exp,
1300 &is_midpoint_lt_even, &is_midpoint_gt_even,
1301 &is_inexact_lt_midpoint,
1302 &is_inexact_gt_midpoint);
1303 } else if (q4 <= 57) { // 35 <= q4 <= 57
1304 P192.w[2] = C4.w[2];
1305 P192.w[1] = C4.w[1];
1306 P192.w[0] = C4.w[0];
1307 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
1308 &is_midpoint_lt_even, &is_midpoint_gt_even,
1309 &is_inexact_lt_midpoint,
1310 &is_inexact_gt_midpoint);
1311 res.w[0] = R192.w[0];
1312 res.w[1] = R192.w[1];
1313 } else { // if (q4 <= 68)
1314 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
1315 &is_midpoint_lt_even, &is_midpoint_gt_even,
1316 &is_inexact_lt_midpoint,
1317 &is_inexact_gt_midpoint);
1318 res.w[0] = R256.w[0];
1319 res.w[1] = R256.w[1];
1320 }
1321 e4 = e4 + x0;
1322 if (incr_exp) {
1323 e4 = e4 + 1;
1324 }
1325 q4 = p34;
1326 // res is now the coefficient of the result rounded to the destination
1327 // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1328 } else { // if (q4 <= p34)
1329 // C4 * 10^e4 is the result rounded to the destination precision, with
1330 // unbounded exponent (which is exact)
1331
1332 if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
1333 // e4 is too large, but can be brought within range by scaling up C4
1334 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1335 // res = (C4 * 10^scale) * 10^expmax
1336 if (q4 <= 19) { // C4 fits in 64 bits
1337 if (scale <= 19) { // 10^scale fits in 64 bits
1338 // 64 x 64 C4.w[0] * ten2k64[scale]
1339 __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
1340 } else { // 10^scale fits in 128 bits
1341 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1342 __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
1343 }
1344 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1345 // 64 x 128 ten2k64[scale] * CC43
1346 __mul_128x64_to_128 (res, ten2k64[scale], C4);
1347 }
1348 e4 = e4 - scale; // expmax
1349 q4 = q4 + scale;
1350 } else {
1351 res.w[1] = C4.w[1];
1352 res.w[0] = C4.w[0];
1353 }
1354 // res is the coefficient of the result rounded to the destination
1355 // precision, with unbounded exponent (it has q4 digits); the exponent
1356 // is e4 (exact result)
1357 }
1358
1359 // check for overflow
1360 if (q4 + e4 > p34 + expmax) {
1361 if (rnd_mode == ROUNDING_TO_NEAREST) {
1362 res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
1363 res.w[0] = 0x0000000000000000ull;
1364 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1365 } else {
1366 res.w[1] = p_sign | res.w[1];
1367 rounding_correction (rnd_mode,
1368 is_inexact_lt_midpoint,
1369 is_inexact_gt_midpoint,
1370 is_midpoint_lt_even, is_midpoint_gt_even,
1371 e4, &res, pfpsf);
1372 }
1373 *pfpsf |= save_fpsf;
1374 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1375 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1376 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1377 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1378 BID_SWAP128 (res);
1379 BID_RETURN (res)
1380 }
1381 // check for underflow
1382 if (q4 + e4 < expmin + P34) {
1383 is_tiny = 1; // the result is tiny
1384 if (e4 < expmin) {
1385 // if e4 < expmin, we must truncate more of res
1386 x0 = expmin - e4; // x0 >= 1
1387 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
1388 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
1389 is_midpoint_lt_even0 = is_midpoint_lt_even;
1390 is_midpoint_gt_even0 = is_midpoint_gt_even;
1391 is_inexact_lt_midpoint = 0;
1392 is_inexact_gt_midpoint = 0;
1393 is_midpoint_lt_even = 0;
1394 is_midpoint_gt_even = 0;
1395 // the number of decimal digits in res is q4
1396 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1397 if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1398 round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
1399 &is_midpoint_lt_even, &is_midpoint_gt_even,
1400 &is_inexact_lt_midpoint,
1401 &is_inexact_gt_midpoint);
1402 if (incr_exp) {
1403 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1404 R64 = ten2k64[q4 - x0];
1405 }
1406 // res.w[1] = 0; (from above)
1407 res.w[0] = R64;
1408 } else { // if (q4 <= 34)
1409 // 19 <= q4 <= 38
1410 P128.w[1] = res.w[1];
1411 P128.w[0] = res.w[0];
1412 round128_19_38 (q4, x0, P128, &res, &incr_exp,
1413 &is_midpoint_lt_even, &is_midpoint_gt_even,
1414 &is_inexact_lt_midpoint,
1415 &is_inexact_gt_midpoint);
1416 if (incr_exp) {
1417 // increase coefficient by a factor of 10; this will be <= 10^33
1418 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1419 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
1420 // res.w[1] = 0;
1421 res.w[0] = ten2k64[q4 - x0];
1422 } else { // 20 <= q4 - x0 <= 37
1423 res.w[0] = ten2k128[q4 - x0 - 20].w[0];
1424 res.w[1] = ten2k128[q4 - x0 - 20].w[1];
1425 }
1426 }
1427 }
1428 e4 = e4 + x0; // expmin
1429 } else if (x0 == q4) {
1430 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1431 // determine relationship with 1/2 ulp
1432 if (q4 <= 19) {
1433 if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1434 lt_half_ulp = 1;
1435 is_inexact_lt_midpoint = 1;
1436 } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1437 eq_half_ulp = 1;
1438 is_midpoint_gt_even = 1;
1439 } else { // > 1/2 ulp
1440 // gt_half_ulp = 1;
1441 is_inexact_gt_midpoint = 1;
1442 }
1443 } else { // if (q4 <= 34)
1444 if (res.w[1] < midpoint128[q4 - 20].w[1] ||
1445 (res.w[1] == midpoint128[q4 - 20].w[1] &&
1446 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
1447 lt_half_ulp = 1;
1448 is_inexact_lt_midpoint = 1;
1449 } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
1450 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1451 eq_half_ulp = 1;
1452 is_midpoint_gt_even = 1;
1453 } else { // > 1/2 ulp
1454 // gt_half_ulp = 1;
1455 is_inexact_gt_midpoint = 1;
1456 }
1457 }
1458 if (lt_half_ulp || eq_half_ulp) {
1459 // res = +0.0 * 10^expmin
1460 res.w[1] = 0x0000000000000000ull;
1461 res.w[0] = 0x0000000000000000ull;
1462 } else { // if (gt_half_ulp)
1463 // res = +1 * 10^expmin
1464 res.w[1] = 0x0000000000000000ull;
1465 res.w[0] = 0x0000000000000001ull;
1466 }
1467 e4 = expmin;
1468 } else { // if (x0 > q4)
1469 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1470 res.w[1] = 0;
1471 res.w[0] = 0;
1472 e4 = expmin;
1473 is_inexact_lt_midpoint = 1;
1474 }
1475 // avoid a double rounding error
1476 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
1477 is_midpoint_lt_even) { // double rounding error upward
1478 // res = res - 1
1479 res.w[0]--;
1480 if (res.w[0] == 0xffffffffffffffffull)
1481 res.w[1]--;
1482 // Note: a double rounding error upward is not possible; for this
1483 // the result after the first rounding would have to be 99...95
1484 // (35 digits in all), possibly followed by a number of zeros; this
1485 // not possible for f * f + 0
1486 is_midpoint_lt_even = 0;
1487 is_inexact_lt_midpoint = 1;
1488 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
1489 is_midpoint_gt_even) { // double rounding error downward
1490 // res = res + 1
1491 res.w[0]++;
1492 if (res.w[0] == 0)
1493 res.w[1]++;
1494 is_midpoint_gt_even = 0;
1495 is_inexact_gt_midpoint = 1;
1496 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
1497 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
1498 // if this second rounding was exact the result may still be
1499 // inexact because of the first rounding
1500 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
1501 is_inexact_gt_midpoint = 1;
1502 }
1503 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
1504 is_inexact_lt_midpoint = 1;
1505 }
1506 } else if (is_midpoint_gt_even &&
1507 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
1508 // pulled up to a midpoint
1509 is_inexact_lt_midpoint = 1;
1510 is_inexact_gt_midpoint = 0;
1511 is_midpoint_lt_even = 0;
1512 is_midpoint_gt_even = 0;
1513 } else if (is_midpoint_lt_even &&
1514 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
1515 // pulled down to a midpoint
1516 is_inexact_lt_midpoint = 0;
1517 is_inexact_gt_midpoint = 1;
1518 is_midpoint_lt_even = 0;
1519 is_midpoint_gt_even = 0;
1520 } else {
1521 ;
1522 }
1523 } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1524 if (e3 < e4) {
1525 // if (e3 < e4) the preferred exponent is e3
1526 // return (C4 * 10^scale) * 10^(e4 - scale)
1527 // where scale = min (p34-q4, (e4 - e3))
1528 scale = p34 - q4;
1529 ind = e4 - e3;
1530 if (ind < scale)
1531 scale = ind;
1532 if (scale == 0) {
1533 ; // res and e4 are unchanged
1534 } else if (q4 <= 19) { // C4 fits in 64 bits
1535 if (scale <= 19) { // 10^scale fits in 64 bits
1536 // 64 x 64 res.w[0] * ten2k64[scale]
1537 __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
1538 } else { // 10^scale fits in 128 bits
1539 // 64 x 128 res.w[0] * ten2k128[scale - 20]
1540 __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
1541 }
1542 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1543 // 64 x 128 ten2k64[scale] * C3
1544 __mul_128x64_to_128 (res, ten2k64[scale], res);
1545 }
1546 // subtract scale from the exponent
1547 e4 = e4 - scale;
1548 }
1549 }
1550
1551 // check for inexact result
1552 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1553 is_midpoint_lt_even || is_midpoint_gt_even) {
1554 // set the inexact flag and the underflow flag
1555 *pfpsf |= INEXACT_EXCEPTION;
1556 *pfpsf |= UNDERFLOW_EXCEPTION;
1557 }
1558 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1559 if (rnd_mode != ROUNDING_TO_NEAREST) {
1560 rounding_correction (rnd_mode,
1561 is_inexact_lt_midpoint,
1562 is_inexact_gt_midpoint,
1563 is_midpoint_lt_even, is_midpoint_gt_even,
1564 e4, &res, pfpsf);
1565 }
1566 *pfpsf |= save_fpsf;
1567 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1568 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1569 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1570 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1571 BID_SWAP128 (res);
1572 BID_RETURN (res)
1573 }
1574 // no overflow, and no underflow for rounding to nearest
1575 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1576
1577 if (rnd_mode != ROUNDING_TO_NEAREST) {
1578 rounding_correction (rnd_mode,
1579 is_inexact_lt_midpoint,
1580 is_inexact_gt_midpoint,
1581 is_midpoint_lt_even, is_midpoint_gt_even,
1582 e4, &res, pfpsf);
1583 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1584 if (e4 == expmin) {
1585 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
1586 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
1587 res.w[0] < 0x38c15b0a00000000ull)) {
1588 is_tiny = 1;
1589 }
1590 }
1591 }
1592
1593 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1594 is_midpoint_lt_even || is_midpoint_gt_even) {
1595 // set the inexact flag
1596 *pfpsf |= INEXACT_EXCEPTION;
1597 if (is_tiny)
1598 *pfpsf |= UNDERFLOW_EXCEPTION;
1599 }
1600
1601 if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
1602 // need to ensure that the result has the preferred exponent
1603 p_exp = res.w[1] & MASK_EXP;
1604 if (z_exp < p_exp) { // the preferred exponent is z_exp
1605 // signficand of res in C3
1606 C3.w[1] = res.w[1] & MASK_COEFF;
1607 C3.w[0] = res.w[0];
1608 // the number of decimal digits of x * y is q4 <= 34
1609 // Note: the coefficient fits in 128 bits
1610
1611 // return (C3 * 10^scale) * 10^(p_exp - scale)
1612 // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1613 scale = p34 - q4;
1614 ind = (p_exp - z_exp) >> 49;
1615 if (ind < scale)
1616 scale = ind;
1617 // subtract scale from the exponent
1618 p_exp = p_exp - ((UINT64) scale << 49);
1619 if (scale == 0) {
1620 ; // leave res unchanged
1621 } else if (q4 <= 19) { // x * y fits in 64 bits
1622 if (scale <= 19) { // 10^scale fits in 64 bits
1623 // 64 x 64 C3.w[0] * ten2k64[scale]
1624 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1625 } else { // 10^scale fits in 128 bits
1626 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1627 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1628 }
1629 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1630 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1631 // 64 x 128 ten2k64[scale] * C3
1632 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1633 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1634 }
1635 } // else leave the result as it is, because p_exp <= z_exp
1636 }
1637 *pfpsf |= save_fpsf;
1638 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1639 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1640 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1641 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1642 BID_SWAP128 (res);
1643 BID_RETURN (res)
1644 } // else we have f * f + f
1645
1646 // continue with x = f, y = f, z = f
1647
1648 delta = q3 + e3 - q4 - e4;
1649 delta_ge_zero:
1650 if (delta >= 0) {
1651
1652 if (p34 <= delta - 1 || // Case (1')
1653 (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
1654 // check for overflow, which can occur only in Case (1')
1655 if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
1656 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1657 // condition for (q3 + e3) > (p34 + expmax)
1658 if (rnd_mode == ROUNDING_TO_NEAREST) {
1659 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
1660 res.w[0] = 0x0000000000000000ull;
1661 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1662 } else {
1663 if (p_sign == z_sign) {
1664 is_inexact_lt_midpoint = 1;
1665 } else {
1666 is_inexact_gt_midpoint = 1;
1667 }
1668 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1669 scale = p34 - q3;
1670 if (scale == 0) {
1671 res.w[1] = z_sign | C3.w[1];
1672 res.w[0] = C3.w[0];
1673 } else {
1674 if (q3 <= 19) { // C3 fits in 64 bits
1675 if (scale <= 19) { // 10^scale fits in 64 bits
1676 // 64 x 64 C3.w[0] * ten2k64[scale]
1677 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1678 } else { // 10^scale fits in 128 bits
1679 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1680 __mul_128x64_to_128 (res, C3.w[0],
1681 ten2k128[scale - 20]);
1682 }
1683 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1684 // 64 x 128 ten2k64[scale] * C3
1685 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1686 }
1687 // the coefficient in res has q3 + scale = p34 digits
1688 }
1689 e3 = e3 - scale;
1690 res.w[1] = z_sign | res.w[1];
1691 rounding_correction (rnd_mode,
1692 is_inexact_lt_midpoint,
1693 is_inexact_gt_midpoint,
1694 is_midpoint_lt_even, is_midpoint_gt_even,
1695 e3, &res, pfpsf);
1696 }
1697 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1698 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1699 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1700 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1701 BID_SWAP128 (res);
1702 BID_RETURN (res)
1703 }
1704 // res = z
1705 if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
1706 // return (C3 * 10^scale) * 10^(z_exp - scale)
1707 // where scale = min (p34-q3, z_exp-EMIN)
1708 scale = p34 - q3;
1709 ind = e3 + 6176;
1710 if (ind < scale)
1711 scale = ind;
1712 if (scale == 0) {
1713 res.w[1] = C3.w[1];
1714 res.w[0] = C3.w[0];
1715 } else if (q3 <= 19) { // z fits in 64 bits
1716 if (scale <= 19) { // 10^scale fits in 64 bits
1717 // 64 x 64 C3.w[0] * ten2k64[scale]
1718 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1719 } else { // 10^scale fits in 128 bits
1720 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1721 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1722 }
1723 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1724 // 64 x 128 ten2k64[scale] * C3
1725 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1726 }
1727 // the coefficient in res has q3 + scale digits
1728 // subtract scale from the exponent
1729 z_exp = z_exp - ((UINT64) scale << 49);
1730 e3 = e3 - scale;
1731 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1732 if (scale + q3 < p34)
1733 *pfpsf |= UNDERFLOW_EXCEPTION;
1734 } else {
1735 scale = 0;
1736 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
1737 res.w[0] = C3.w[0];
1738 }
1739
1740 // use the following to avoid double rounding errors when operating on
1741 // mixed formats in rounding to nearest, and for correcting the result
1742 // if not rounding to nearest
1743 if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
1744 // there is a gap of exactly one digit between the scaled C3 and C4
1745 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1746 if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
1747 (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
1748 (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
1749 C3.w[0] != ten2k128[q3 - 21].w[0]))) {
1750 // C3 * 10^ scale != 10^(q3-1)
1751 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1752 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1753 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
1754 } else { // if C3 * 10^scale = 10^(q3+scale-1)
1755 // ok from above e3 = (z_exp >> 49) - 6176;
1756 // the result is always inexact
1757 if (q4 == 1) {
1758 R64 = C4.w[0];
1759 } else {
1760 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1761 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1762 if (q4 <= 18) { // 2 <= q4 <= 18
1763 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
1764 &is_midpoint_lt_even, &is_midpoint_gt_even,
1765 &is_inexact_lt_midpoint,
1766 &is_inexact_gt_midpoint);
1767 } else if (q4 <= 38) {
1768 P128.w[1] = C4.w[1];
1769 P128.w[0] = C4.w[0];
1770 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
1771 &is_midpoint_lt_even,
1772 &is_midpoint_gt_even,
1773 &is_inexact_lt_midpoint,
1774 &is_inexact_gt_midpoint);
1775 R64 = R128.w[0]; // one decimal digit
1776 } else if (q4 <= 57) {
1777 P192.w[2] = C4.w[2];
1778 P192.w[1] = C4.w[1];
1779 P192.w[0] = C4.w[0];
1780 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
1781 &is_midpoint_lt_even,
1782 &is_midpoint_gt_even,
1783 &is_inexact_lt_midpoint,
1784 &is_inexact_gt_midpoint);
1785 R64 = R192.w[0]; // one decimal digit
1786 } else { // if (q4 <= 68)
1787 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
1788 &is_midpoint_lt_even,
1789 &is_midpoint_gt_even,
1790 &is_inexact_lt_midpoint,
1791 &is_inexact_gt_midpoint);
1792 R64 = R256.w[0]; // one decimal digit
1793 }
1794 if (incr_exp) {
1795 R64 = 10;
1796 }
1797 }
1798 if (q4 == 1 && C4.w[0] == 5) {
1799 is_inexact_lt_midpoint = 0;
1800 is_inexact_gt_midpoint = 0;
1801 is_midpoint_lt_even = 1;
1802 is_midpoint_gt_even = 0;
1803 } else if ((e3 == expmin) ||
1804 R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
1805 // result does not change
1806 is_inexact_lt_midpoint = 0;
1807 is_inexact_gt_midpoint = 1;
1808 is_midpoint_lt_even = 0;
1809 is_midpoint_gt_even = 0;
1810 } else {
1811 is_inexact_lt_midpoint = 1;
1812 is_inexact_gt_midpoint = 0;
1813 is_midpoint_lt_even = 0;
1814 is_midpoint_gt_even = 0;
1815 // result decremented is 10^(q3+scale) - 1
1816 if ((q3 + scale) <= 19) {
1817 res.w[1] = 0;
1818 res.w[0] = ten2k64[q3 + scale];
1819 } else { // if ((q3 + scale + 1) <= 35)
1820 res.w[1] = ten2k128[q3 + scale - 20].w[1];
1821 res.w[0] = ten2k128[q3 + scale - 20].w[0];
1822 }
1823 res.w[0] = res.w[0] - 1; // borrow never occurs
1824 z_exp = z_exp - EXP_P1;
1825 e3 = e3 - 1;
1826 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
1827 }
1828 if (e3 == expmin) {
1829 if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
1830 ; // result not tiny (in round-to-nearest mode)
1831 } else {
1832 *pfpsf |= UNDERFLOW_EXCEPTION;
1833 }
1834 }
1835 } // end 10^(q3+scale-1)
1836 // set the inexact flag
1837 *pfpsf |= INEXACT_EXCEPTION;
1838 } else {
1839 if (p_sign == z_sign) {
1840 // if (z_sign), set as if for absolute value
1841 is_inexact_lt_midpoint = 1;
1842 } else { // if (p_sign != z_sign)
1843 // if (z_sign), set as if for absolute value
1844 is_inexact_gt_midpoint = 1;
1845 }
1846 *pfpsf |= INEXACT_EXCEPTION;
1847 }
1848 // the result is always inexact => set the inexact flag
1849 // Determine tininess:
1850 // if (exp > expmin)
1851 // the result is not tiny
1852 // else // if exp = emin
1853 // if (q3 + scale < p34)
1854 // the result is tiny
1855 // else // if (q3 + scale = p34)
1856 // if (C3 * 10^scale > 10^33)
1857 // the result is not tiny
1858 // else // if C3 * 10^scale = 10^33
1859 // if (xy * z > 0)
1860 // the result is not tiny
1861 // else // if (xy * z < 0)
1862 // if (z > 0)
1863 // if rnd_mode != RP
1864 // the result is tiny
1865 // else // if RP
1866 // the result is not tiny
1867 // else // if (z < 0)
1868 // if rnd_mode != RM
1869 // the result is tiny
1870 // else // if RM
1871 // the result is not tiny
1872 // endif
1873 // endif
1874 // endif
1875 // endif
1876 // endif
1877 // endif
1878 if ((e3 == expmin && (q3 + scale) < p34) ||
1879 (e3 == expmin && (q3 + scale) == p34 &&
1880 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high
1881 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low
1882 z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
1883 (z_sign && rnd_mode != ROUNDING_DOWN)))) {
1884 *pfpsf |= UNDERFLOW_EXCEPTION;
1885 }
1886 if (rnd_mode != ROUNDING_TO_NEAREST) {
1887 rounding_correction (rnd_mode,
1888 is_inexact_lt_midpoint,
1889 is_inexact_gt_midpoint,
1890 is_midpoint_lt_even, is_midpoint_gt_even,
1891 e3, &res, pfpsf);
1892 }
1893 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1894 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1895 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1896 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1897 BID_SWAP128 (res);
1898 BID_RETURN (res)
1899
1900 } else if (p34 == delta) { // Case (1''B)
1901
1902 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1903 // and C3 can be scaled up to p34 digits if needed
1904
1905 // scale C3 to p34 digits if needed
1906 scale = p34 - q3; // 0 <= scale <= p34 - 1
1907 if (scale == 0) {
1908 res.w[1] = C3.w[1];
1909 res.w[0] = C3.w[0];
1910 } else if (q3 <= 19) { // z fits in 64 bits
1911 if (scale <= 19) { // 10^scale fits in 64 bits
1912 // 64 x 64 C3.w[0] * ten2k64[scale]
1913 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1914 } else { // 10^scale fits in 128 bits
1915 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1916 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1917 }
1918 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1919 // 64 x 128 ten2k64[scale] * C3
1920 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1921 }
1922 // subtract scale from the exponent
1923 z_exp = z_exp - ((UINT64) scale << 49);
1924 e3 = e3 - scale;
1925 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1926
1927 // determine whether x * y is less than, equal to, or greater than
1928 // 1/2 ulp (z)
1929 if (q4 <= 19) {
1930 if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1931 lt_half_ulp = 1;
1932 } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1933 eq_half_ulp = 1;
1934 } else { // > 1/2 ulp
1935 gt_half_ulp = 1;
1936 }
1937 } else if (q4 <= 38) {
1938 if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
1939 (C4.w[1] == midpoint128[q4 - 20].w[1] &&
1940 C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
1941 lt_half_ulp = 1;
1942 } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
1943 C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1944 eq_half_ulp = 1;
1945 } else { // > 1/2 ulp
1946 gt_half_ulp = 1;
1947 }
1948 } else if (q4 <= 58) {
1949 if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
1950 (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1951 C4.w[1] < midpoint192[q4 - 39].w[1]) ||
1952 (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1953 C4.w[1] == midpoint192[q4 - 39].w[1] &&
1954 C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
1955 lt_half_ulp = 1;
1956 } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
1957 C4.w[1] == midpoint192[q4 - 39].w[1] &&
1958 C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
1959 eq_half_ulp = 1;
1960 } else { // > 1/2 ulp
1961 gt_half_ulp = 1;
1962 }
1963 } else {
1964 if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
1965 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1966 C4.w[2] < midpoint256[q4 - 59].w[2]) ||
1967 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1968 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1969 C4.w[1] < midpoint256[q4 - 59].w[1]) ||
1970 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1971 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1972 C4.w[1] == midpoint256[q4 - 59].w[1] &&
1973 C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
1974 lt_half_ulp = 1;
1975 } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1976 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1977 C4.w[1] == midpoint256[q4 - 59].w[1] &&
1978 C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
1979 eq_half_ulp = 1;
1980 } else { // > 1/2 ulp
1981 gt_half_ulp = 1;
1982 }
1983 }
1984
1985 if (p_sign == z_sign) {
1986 if (lt_half_ulp) {
1987 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1988 // use the following to avoid double rounding errors when operating on
1989 // mixed formats in rounding to nearest
1990 is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
1991 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
1992 // add 1 ulp to the significand
1993 res.w[0]++;
1994 if (res.w[0] == 0x0ull)
1995 res.w[1]++;
1996 // check for rounding overflow, when coeff == 10^34
1997 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
1998 res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
1999 e3 = e3 + 1;
2000 // coeff = 10^33
2001 z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
2002 res.w[1] = 0x0000314dc6448d93ull;
2003 res.w[0] = 0x38c15b0a00000000ull;
2004 }
2005 // end add 1 ulp
2006 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2007 if (eq_half_ulp) {
2008 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2009 } else {
2010 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2011 }
2012 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2013 // leave unchanged
2014 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2015 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2016 }
2017 // the result is always inexact, and never tiny
2018 // set the inexact flag
2019 *pfpsf |= INEXACT_EXCEPTION;
2020 // check for overflow
2021 if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
2022 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2023 res.w[0] = 0x0000000000000000ull;
2024 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2025 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2026 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2027 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2028 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2029 BID_SWAP128 (res);
2030 BID_RETURN (res)
2031 }
2032 if (rnd_mode != ROUNDING_TO_NEAREST) {
2033 rounding_correction (rnd_mode,
2034 is_inexact_lt_midpoint,
2035 is_inexact_gt_midpoint,
2036 is_midpoint_lt_even, is_midpoint_gt_even,
2037 e3, &res, pfpsf);
2038 z_exp = res.w[1] & MASK_EXP;
2039 }
2040 } else { // if (p_sign != z_sign)
2041 // consider two cases, because C3 * 10^scale = 10^33 is a special case
2042 if (res.w[1] != 0x0000314dc6448d93ull ||
2043 res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
2044 if (lt_half_ulp) {
2045 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2046 // use the following to avoid double rounding errors when operating
2047 // on mixed formats in rounding to nearest
2048 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2049 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
2050 // subtract 1 ulp from the significand
2051 res.w[0]--;
2052 if (res.w[0] == 0xffffffffffffffffull)
2053 res.w[1]--;
2054 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2055 if (eq_half_ulp) {
2056 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2057 } else {
2058 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2059 }
2060 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2061 // leave unchanged
2062 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2063 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2064 }
2065 // the result is always inexact, and never tiny
2066 // check for overflow for RN
2067 if (e3 > expmax) {
2068 if (rnd_mode == ROUNDING_TO_NEAREST) {
2069 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2070 res.w[0] = 0x0000000000000000ull;
2071 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2072 } else {
2073 rounding_correction (rnd_mode,
2074 is_inexact_lt_midpoint,
2075 is_inexact_gt_midpoint,
2076 is_midpoint_lt_even,
2077 is_midpoint_gt_even, e3, &res,
2078 pfpsf);
2079 }
2080 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2081 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2082 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2083 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2084 BID_SWAP128 (res);
2085 BID_RETURN (res)
2086 }
2087 // set the inexact flag
2088 *pfpsf |= INEXACT_EXCEPTION;
2089 if (rnd_mode != ROUNDING_TO_NEAREST) {
2090 rounding_correction (rnd_mode,
2091 is_inexact_lt_midpoint,
2092 is_inexact_gt_midpoint,
2093 is_midpoint_lt_even,
2094 is_midpoint_gt_even, e3, &res, pfpsf);
2095 }
2096 z_exp = res.w[1] & MASK_EXP;
2097 } else { // if C3 * 10^scale = 10^33
2098 e3 = (z_exp >> 49) - 6176;
2099 if (e3 > expmin) {
2100 // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2101 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2102 if (q4 == 1) {
2103 // if q4 = 1 the result is exact
2104 // result coefficient = 10^34 - C4
2105 res.w[1] = 0x0001ed09bead87c0ull;
2106 res.w[0] = 0x378d8e6400000000ull - C4.w[0];
2107 z_exp = z_exp - EXP_P1;
2108 e3 = e3 - 1;
2109 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2110 } else {
2111 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2112 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2113 if (q4 <= 18) { // 2 <= q4 <= 18
2114 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
2115 &is_midpoint_lt_even,
2116 &is_midpoint_gt_even,
2117 &is_inexact_lt_midpoint,
2118 &is_inexact_gt_midpoint);
2119 } else if (q4 <= 38) {
2120 P128.w[1] = C4.w[1];
2121 P128.w[0] = C4.w[0];
2122 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
2123 &is_midpoint_lt_even,
2124 &is_midpoint_gt_even,
2125 &is_inexact_lt_midpoint,
2126 &is_inexact_gt_midpoint);
2127 R64 = R128.w[0]; // one decimal digit
2128 } else if (q4 <= 57) {
2129 P192.w[2] = C4.w[2];
2130 P192.w[1] = C4.w[1];
2131 P192.w[0] = C4.w[0];
2132 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
2133 &is_midpoint_lt_even,
2134 &is_midpoint_gt_even,
2135 &is_inexact_lt_midpoint,
2136 &is_inexact_gt_midpoint);
2137 R64 = R192.w[0]; // one decimal digit
2138 } else { // if (q4 <= 68)
2139 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
2140 &is_midpoint_lt_even,
2141 &is_midpoint_gt_even,
2142 &is_inexact_lt_midpoint,
2143 &is_inexact_gt_midpoint);
2144 R64 = R256.w[0]; // one decimal digit
2145 }
2146 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2147 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2148 // the result is exact: 10^34 - R64
2149 // incr_exp = 0 with certainty
2150 z_exp = z_exp - EXP_P1;
2151 e3 = e3 - 1;
2152 res.w[1] =
2153 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
2154 res.w[0] = 0x378d8e6400000000ull - R64;
2155 } else {
2156 // We want R64 to be the top digit of C4, but we actually
2157 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2158 // because the top digit is (C4 * 10^(-q4+1))RZ
2159 // however, if incr_exp = 1 then R64 = 10 with certainty
2160 if (incr_exp) {
2161 R64 = 10;
2162 }
2163 // the result is inexact as C4 has more than 1 significant digit
2164 // and C3 * 10^scale = 10^33
2165 // example of case that is treated here:
2166 // 100...0 * 10^e3 - 0.41 * 10^e3 =
2167 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2168 // note that (e3 > expmin}
2169 // in order to round, subtract R64 from 10^34 and then compare
2170 // C4 - R64 * 10^(q4-1) with 1/2 ulp
2171 // calculate 10^34 - R64
2172 res.w[1] = 0x0001ed09bead87c0ull;
2173 res.w[0] = 0x378d8e6400000000ull - R64;
2174 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
2175 // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2176 // R64 is small, 1 <= R64 <= 9
2177 e3 = e3 - 1;
2178 if (is_inexact_lt_midpoint) {
2179 is_inexact_lt_midpoint = 0;
2180 is_inexact_gt_midpoint = 1;
2181 } else if (is_inexact_gt_midpoint) {
2182 is_inexact_gt_midpoint = 0;
2183 is_inexact_lt_midpoint = 1;
2184 } else if (is_midpoint_lt_even) {
2185 is_midpoint_lt_even = 0;
2186 is_midpoint_gt_even = 1;
2187 } else if (is_midpoint_gt_even) {
2188 is_midpoint_gt_even = 0;
2189 is_midpoint_lt_even = 1;
2190 } else {
2191 ;
2192 }
2193 // the result is always inexact, and never tiny
2194 // check for overflow for RN
2195 if (e3 > expmax) {
2196 if (rnd_mode == ROUNDING_TO_NEAREST) {
2197 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2198 res.w[0] = 0x0000000000000000ull;
2199 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2200 } else {
2201 rounding_correction (rnd_mode,
2202 is_inexact_lt_midpoint,
2203 is_inexact_gt_midpoint,
2204 is_midpoint_lt_even,
2205 is_midpoint_gt_even, e3, &res,
2206 pfpsf);
2207 }
2208 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2209 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2210 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2211 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2212 BID_SWAP128 (res);
2213 BID_RETURN (res)
2214 }
2215 // set the inexact flag
2216 *pfpsf |= INEXACT_EXCEPTION;
2217 res.w[1] =
2218 z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2219 if (rnd_mode != ROUNDING_TO_NEAREST) {
2220 rounding_correction (rnd_mode,
2221 is_inexact_lt_midpoint,
2222 is_inexact_gt_midpoint,
2223 is_midpoint_lt_even,
2224 is_midpoint_gt_even, e3, &res,
2225 pfpsf);
2226 }
2227 z_exp = res.w[1] & MASK_EXP;
2228 } // end result is inexact
2229 } // end q4 > 1
2230 } else { // if (e3 = emin)
2231 // if e3 = expmin the result is also tiny (the condition for
2232 // tininess is C4 > 050...0 [q4 digits] which is met because
2233 // the msd of C4 is not zero)
2234 // the result is tiny and inexact in all rounding modes;
2235 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2236 // gt_half_ulp to calculate)
2237 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2238
2239 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2240 if (gt_half_ulp) { // res = 10^33 - 1
2241 res.w[1] = 0x0000314dc6448d93ull;
2242 res.w[0] = 0x38c15b09ffffffffull;
2243 } else {
2244 res.w[1] = 0x0000314dc6448d93ull;
2245 res.w[0] = 0x38c15b0a00000000ull;
2246 }
2247 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2248 *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
2249
2250 if (eq_half_ulp) {
2251 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2252 } else if (lt_half_ulp) {
2253 is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
2254 } else { // if (gt_half_ulp)
2255 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2256 }
2257
2258 if (rnd_mode != ROUNDING_TO_NEAREST) {
2259 rounding_correction (rnd_mode,
2260 is_inexact_lt_midpoint,
2261 is_inexact_gt_midpoint,
2262 is_midpoint_lt_even,
2263 is_midpoint_gt_even, e3, &res,
2264 pfpsf);
2265 z_exp = res.w[1] & MASK_EXP;
2266 }
2267 } // end e3 = emin
2268 // set the inexact flag (if the result was not exact)
2269 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2270 is_midpoint_lt_even || is_midpoint_gt_even)
2271 *pfpsf |= INEXACT_EXCEPTION;
2272 } // end 10^33
2273 } // end if (p_sign != z_sign)
2274 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2275 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2276 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2277 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2278 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2279 BID_SWAP128 (res);
2280 BID_RETURN (res)
2281
2282 } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2283 (q3 <= delta && delta + q4 <= p34) || // Case (3)
2284 (delta < q3 && p34 < delta + q4) || // Case (4)
2285 (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
2286 (delta + q4 < q3)) && // Case (6)
2287 !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
2288
2289 // the result has the sign of z
2290
2291 if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2292 (delta < q3 && p34 < delta + q4)) { // Case (4)
2293 // round first the sum x * y + z with unbounded exponent
2294 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2295 // 1 <= scale <= 33
2296 // calculate res = C3 * 10^scale
2297 scale = p34 - q3;
2298 x0 = delta + q4 - p34;
2299 } else if (delta + q4 < q3) { // Case (6)
2300 // make Case (6) look like Case (3) or Case (5) with scale = 0
2301 // by scaling up C4 by 10^(q3 - delta - q4)
2302 scale = q3 - delta - q4; // 1 <= scale <= 33
2303 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2304 if (scale <= 19) { // 10^scale fits in 64 bits
2305 // 64 x 64 C4.w[0] * ten2k64[scale]
2306 __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
2307 } else { // 10^scale fits in 128 bits
2308 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2309 __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
2310 }
2311 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2312 // 64 x 128 ten2k64[scale] * C4
2313 __mul_128x64_to_128 (P128, ten2k64[scale], C4);
2314 }
2315 C4.w[0] = P128.w[0];
2316 C4.w[1] = P128.w[1];
2317 // e4 does not need adjustment, as it is not used from this point on
2318 scale = 0;
2319 x0 = 0;
2320 // now Case (6) looks like Case (3) or Case (5) with scale = 0
2321 } else { // if Case (3) or Case (5)
2322 // Note: Case (3) is similar to Case (2), but scale differs and the
2323 // result is exact, unless it is tiny (so x0 = 0 when calculating the
2324 // result with unbounded exponent)
2325
2326 // calculate first the sum x * y + z with unbounded exponent (exact)
2327 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2328 // 1 <= scale <= 33
2329 // calculate res = C3 * 10^scale
2330 scale = delta + q4 - q3;
2331 x0 = 0;
2332 // Note: the comments which follow refer [mainly] to Case (2)]
2333 }
2334
2335 case2_repeat:
2336 if (scale == 0) { // this could happen e.g. if we return to case2_repeat
2337 // or in Case (4)
2338 res.w[1] = C3.w[1];
2339 res.w[0] = C3.w[0];
2340 } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
2341 if (scale <= 19) { // 10^scale fits in 64 bits
2342 // 64 x 64 C3.w[0] * ten2k64[scale]
2343 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
2344 } else { // 10^scale fits in 128 bits
2345 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2346 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
2347 }
2348 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2349 // 64 x 128 ten2k64[scale] * C3
2350 __mul_128x64_to_128 (res, ten2k64[scale], C3);
2351 }
2352 // e3 is already calculated
2353 e3 = e3 - scale;
2354 // now res = C3 * 10^scale and e3 = e3 - scale
2355 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2356 // because the result was too small
2357
2358 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2359 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2360 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2361 // the rounding fits in 128 bits!)
2362 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2363 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2364 if (x0 == 0) { // this could happen only if we return to case2_repeat, or
2365 // for Case (3) or Case (6)
2366 R128.w[1] = C4.w[1];
2367 R128.w[0] = C4.w[0];
2368 } else if (q4 <= 18) {
2369 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2370 round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
2371 &is_midpoint_lt_even, &is_midpoint_gt_even,
2372 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
2373 if (incr_exp) {
2374 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2375 R64 = ten2k64[q4 - x0];
2376 }
2377 R128.w[1] = 0;
2378 R128.w[0] = R64;
2379 } else if (q4 <= 38) {
2380 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2381 P128.w[1] = C4.w[1];
2382 P128.w[0] = C4.w[0];
2383 round128_19_38 (q4, x0, P128, &R128, &incr_exp,
2384 &is_midpoint_lt_even, &is_midpoint_gt_even,
2385 &is_inexact_lt_midpoint,
2386 &is_inexact_gt_midpoint);
2387 if (incr_exp) {
2388 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2389 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2390 R128.w[0] = ten2k64[q4 - x0];
2391 // R128.w[1] stays 0
2392 } else { // 20 <= q4 - x0 <= 37
2393 R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
2394 R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
2395 }
2396 }
2397 } else if (q4 <= 57) {
2398 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2399 P192.w[2] = C4.w[2];
2400 P192.w[1] = C4.w[1];
2401 P192.w[0] = C4.w[0];
2402 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2403 &is_midpoint_lt_even, &is_midpoint_gt_even,
2404 &is_inexact_lt_midpoint,
2405 &is_inexact_gt_midpoint);
2406 // R192.w[2] is always 0
2407 if (incr_exp) {
2408 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2409 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2410 R192.w[0] = ten2k64[q4 - x0];
2411 // R192.w[1] stays 0
2412 // R192.w[2] stays 0
2413 } else { // 20 <= q4 - x0 <= 33
2414 R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
2415 R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
2416 // R192.w[2] stays 0
2417 }
2418 }
2419 R128.w[1] = R192.w[1];
2420 R128.w[0] = R192.w[0];
2421 } else {
2422 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2423 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2424 &is_midpoint_lt_even, &is_midpoint_gt_even,
2425 &is_inexact_lt_midpoint,
2426 &is_inexact_gt_midpoint);
2427 // R256.w[3] and R256.w[2] are always 0
2428 if (incr_exp) {
2429 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2430 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2431 R256.w[0] = ten2k64[q4 - x0];
2432 // R256.w[1] stays 0
2433 // R256.w[2] stays 0
2434 // R256.w[3] stays 0
2435 } else { // 20 <= q4 - x0 <= 33
2436 R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
2437 R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
2438 // R256.w[2] stays 0
2439 // R256.w[3] stays 0
2440 }
2441 }
2442 R128.w[1] = R256.w[1];
2443 R128.w[0] = R256.w[0];
2444 }
2445 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2446 // rounded to nearest, which were copied into R128
2447 if (z_sign == p_sign) {
2448 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
2449 // the sum can result in [up to] p34 or p34 + 1 digits
2450 res.w[0] = res.w[0] + R128.w[0];
2451 res.w[1] = res.w[1] + R128.w[1];
2452 if (res.w[0] < R128.w[0])
2453 res.w[1]++; // carry
2454 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2455 if (res.w[1] > 0x0001ed09bead87c0ull ||
2456 (res.w[1] == 0x0001ed09bead87c0ull &&
2457 res.w[0] > 0x378d8e63ffffffffull)) {
2458 // avoid double rounding error
2459 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2460 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2461 is_midpoint_lt_even0 = is_midpoint_lt_even;
2462 is_midpoint_gt_even0 = is_midpoint_gt_even;
2463 is_inexact_lt_midpoint = 0;
2464 is_inexact_gt_midpoint = 0;
2465 is_midpoint_lt_even = 0;
2466 is_midpoint_gt_even = 0;
2467 P128.w[1] = res.w[1];
2468 P128.w[0] = res.w[0];
2469 round128_19_38 (35, 1, P128, &res, &incr_exp,
2470 &is_midpoint_lt_even, &is_midpoint_gt_even,
2471 &is_inexact_lt_midpoint,
2472 &is_inexact_gt_midpoint);
2473 // incr_exp is 0 with certainty in this case
2474 // avoid a double rounding error
2475 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2476 is_midpoint_lt_even) { // double rounding error upward
2477 // res = res - 1
2478 res.w[0]--;
2479 if (res.w[0] == 0xffffffffffffffffull)
2480 res.w[1]--;
2481 // Note: a double rounding error upward is not possible; for this
2482 // the result after the first rounding would have to be 99...95
2483 // (35 digits in all), possibly followed by a number of zeros; this
2484 // not possible in Cases (2)-(6) or (15)-(17) which may get here
2485 is_midpoint_lt_even = 0;
2486 is_inexact_lt_midpoint = 1;
2487 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2488 is_midpoint_gt_even) { // double rounding error downward
2489 // res = res + 1
2490 res.w[0]++;
2491 if (res.w[0] == 0)
2492 res.w[1]++;
2493 is_midpoint_gt_even = 0;
2494 is_inexact_gt_midpoint = 1;
2495 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2496 !is_inexact_lt_midpoint
2497 && !is_inexact_gt_midpoint) {
2498 // if this second rounding was exact the result may still be
2499 // inexact because of the first rounding
2500 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2501 is_inexact_gt_midpoint = 1;
2502 }
2503 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2504 is_inexact_lt_midpoint = 1;
2505 }
2506 } else if (is_midpoint_gt_even &&
2507 (is_inexact_gt_midpoint0
2508 || is_midpoint_lt_even0)) {
2509 // pulled up to a midpoint
2510 is_inexact_lt_midpoint = 1;
2511 is_inexact_gt_midpoint = 0;
2512 is_midpoint_lt_even = 0;
2513 is_midpoint_gt_even = 0;
2514 } else if (is_midpoint_lt_even &&
2515 (is_inexact_lt_midpoint0
2516 || is_midpoint_gt_even0)) {
2517 // pulled down to a midpoint
2518 is_inexact_lt_midpoint = 0;
2519 is_inexact_gt_midpoint = 1;
2520 is_midpoint_lt_even = 0;
2521 is_midpoint_gt_even = 0;
2522 } else {
2523 ;
2524 }
2525 // adjust exponent
2526 e3 = e3 + 1;
2527 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2528 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2529 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2530 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2531 is_inexact_lt_midpoint = 1;
2532 }
2533 }
2534 } else {
2535 // this is the result rounded with unbounded exponent, unless a
2536 // correction is needed
2537 res.w[1] = res.w[1] & MASK_COEFF;
2538 if (lsb == 1) {
2539 if (is_midpoint_gt_even) {
2540 // res = res + 1
2541 is_midpoint_gt_even = 0;
2542 is_midpoint_lt_even = 1;
2543 res.w[0]++;
2544 if (res.w[0] == 0x0)
2545 res.w[1]++;
2546 // check for rounding overflow
2547 if (res.w[1] == 0x0001ed09bead87c0ull &&
2548 res.w[0] == 0x378d8e6400000000ull) {
2549 // res = 10^34 => rounding overflow
2550 res.w[1] = 0x0000314dc6448d93ull;
2551 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2552 e3++;
2553 }
2554 } else if (is_midpoint_lt_even) {
2555 // res = res - 1
2556 is_midpoint_lt_even = 0;
2557 is_midpoint_gt_even = 1;
2558 res.w[0]--;
2559 if (res.w[0] == 0xffffffffffffffffull)
2560 res.w[1]--;
2561 // if the result is pure zero, the sign depends on the rounding
2562 // mode (x*y and z had opposite signs)
2563 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2564 if (rnd_mode != ROUNDING_DOWN)
2565 z_sign = 0x0000000000000000ull;
2566 else
2567 z_sign = 0x8000000000000000ull;
2568 // the exponent is max (e3, expmin)
2569 res.w[1] = 0x0;
2570 res.w[0] = 0x0;
2571 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2572 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2573 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2574 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2575 BID_SWAP128 (res);
2576 BID_RETURN (res)
2577 }
2578 } else {
2579 ;
2580 }
2581 }
2582 }
2583 } else { // if (z_sign != p_sign)
2584 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2585 // used to swap rounding indicators if p_sign != z_sign
2586 // the sum can result in [up to] p34 or p34 - 1 digits
2587 tmp64 = res.w[0];
2588 res.w[0] = res.w[0] - R128.w[0];
2589 res.w[1] = res.w[1] - R128.w[1];
2590 if (res.w[0] > tmp64)
2591 res.w[1]--; // borrow
2592 // if res < 10^33 and exp > expmin need to decrease x0 and
2593 // increase scale by 1
2594 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
2595 (res.w[1] == 0x0000314dc6448d93ull &&
2596 res.w[0] < 0x38c15b0a00000000ull)) ||
2597 (is_inexact_lt_midpoint
2598 && res.w[1] == 0x0000314dc6448d93ull
2599 && res.w[0] == 0x38c15b0a00000000ull))
2600 && x0 >= 1) {
2601 x0 = x0 - 1;
2602 // first restore e3, otherwise it will be too small
2603 e3 = e3 + scale;
2604 scale = scale + 1;
2605 is_inexact_lt_midpoint = 0;
2606 is_inexact_gt_midpoint = 0;
2607 is_midpoint_lt_even = 0;
2608 is_midpoint_gt_even = 0;
2609 incr_exp = 0;
2610 goto case2_repeat;
2611 }
2612 // else this is the result rounded with unbounded exponent;
2613 // because the result has opposite sign to that of C4 which was
2614 // rounded, need to change the rounding indicators
2615 if (is_inexact_lt_midpoint) {
2616 is_inexact_lt_midpoint = 0;
2617 is_inexact_gt_midpoint = 1;
2618 } else if (is_inexact_gt_midpoint) {
2619 is_inexact_gt_midpoint = 0;
2620 is_inexact_lt_midpoint = 1;
2621 } else if (lsb == 0) {
2622 if (is_midpoint_lt_even) {
2623 is_midpoint_lt_even = 0;
2624 is_midpoint_gt_even = 1;
2625 } else if (is_midpoint_gt_even) {
2626 is_midpoint_gt_even = 0;
2627 is_midpoint_lt_even = 1;
2628 } else {
2629 ;
2630 }
2631 } else if (lsb == 1) {
2632 if (is_midpoint_lt_even) {
2633 // res = res + 1
2634 res.w[0]++;
2635 if (res.w[0] == 0x0)
2636 res.w[1]++;
2637 // check for rounding overflow
2638 if (res.w[1] == 0x0001ed09bead87c0ull &&
2639 res.w[0] == 0x378d8e6400000000ull) {
2640 // res = 10^34 => rounding overflow
2641 res.w[1] = 0x0000314dc6448d93ull;
2642 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2643 e3++;
2644 }
2645 } else if (is_midpoint_gt_even) {
2646 // res = res - 1
2647 res.w[0]--;
2648 if (res.w[0] == 0xffffffffffffffffull)
2649 res.w[1]--;
2650 // if the result is pure zero, the sign depends on the rounding
2651 // mode (x*y and z had opposite signs)
2652 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2653 if (rnd_mode != ROUNDING_DOWN)
2654 z_sign = 0x0000000000000000ull;
2655 else
2656 z_sign = 0x8000000000000000ull;
2657 // the exponent is max (e3, expmin)
2658 res.w[1] = 0x0;
2659 res.w[0] = 0x0;
2660 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2661 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2662 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2663 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2664 BID_SWAP128 (res);
2665 BID_RETURN (res)
2666 }
2667 } else {
2668 ;
2669 }
2670 } else {
2671 ;
2672 }
2673 }
2674 // check for underflow
2675 if (e3 == expmin) { // and if significand < 10^33 => result is tiny
2676 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
2677 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
2678 res.w[0] < 0x38c15b0a00000000ull)) {
2679 is_tiny = 1;
2680 }
2681 } else if (e3 < expmin) {
2682 // the result is tiny, so we must truncate more of res
2683 is_tiny = 1;
2684 x0 = expmin - e3;
2685 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2686 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2687 is_midpoint_lt_even0 = is_midpoint_lt_even;
2688 is_midpoint_gt_even0 = is_midpoint_gt_even;
2689 is_inexact_lt_midpoint = 0;
2690 is_inexact_gt_midpoint = 0;
2691 is_midpoint_lt_even = 0;
2692 is_midpoint_gt_even = 0;
2693 // determine the number of decimal digits in res
2694 if (res.w[1] == 0x0) {
2695 // between 1 and 19 digits
2696 for (ind = 1; ind <= 19; ind++) {
2697 if (res.w[0] < ten2k64[ind]) {
2698 break;
2699 }
2700 }
2701 // ind digits
2702 } else if (res.w[1] < ten2k128[0].w[1] ||
2703 (res.w[1] == ten2k128[0].w[1]
2704 && res.w[0] < ten2k128[0].w[0])) {
2705 // 20 digits
2706 ind = 20;
2707 } else { // between 21 and 38 digits
2708 for (ind = 1; ind <= 18; ind++) {
2709 if (res.w[1] < ten2k128[ind].w[1] ||
2710 (res.w[1] == ten2k128[ind].w[1] &&
2711 res.w[0] < ten2k128[ind].w[0])) {
2712 break;
2713 }
2714 }
2715 // ind + 20 digits
2716 ind = ind + 20;
2717 }
2718
2719 // at this point ind >= x0; because delta >= 2 on this path, the case
2720 // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2721 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2722 // the signs of x * y and z are opposite, and through cancellation
2723 // the most significant decimal digit in res has the weight
2724 // 10^(emin-1); however, it is clear that in this case the most
2725 // significant digit is 9, so the result before rounding is
2726 // 0.9... * 10^emin
2727 // Otherwise, ind > x0 because there are non-zero decimal digits in the
2728 // result with weight of at least 10^emin, and correction for underflow
2729 // can be carried out using the round*_*_2_* () routines
2730 if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
2731 res.w[1] = 0x0;
2732 res.w[0] = 0x1;
2733 is_inexact_gt_midpoint = 1;
2734 } else if (ind <= 18) { // check that 2 <= ind
2735 // 2 <= ind <= 18, 1 <= x0 <= 17
2736 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
2737 &is_midpoint_lt_even, &is_midpoint_gt_even,
2738 &is_inexact_lt_midpoint,
2739 &is_inexact_gt_midpoint);
2740 if (incr_exp) {
2741 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2742 R64 = ten2k64[ind - x0];
2743 }
2744 res.w[1] = 0;
2745 res.w[0] = R64;
2746 } else if (ind <= 38) {
2747 // 19 <= ind <= 38
2748 P128.w[1] = res.w[1];
2749 P128.w[0] = res.w[0];
2750 round128_19_38 (ind, x0, P128, &res, &incr_exp,
2751 &is_midpoint_lt_even, &is_midpoint_gt_even,
2752 &is_inexact_lt_midpoint,
2753 &is_inexact_gt_midpoint);
2754 if (incr_exp) {
2755 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2756 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
2757 res.w[0] = ten2k64[ind - x0];
2758 // res.w[1] stays 0
2759 } else { // 20 <= ind - x0 <= 37
2760 res.w[0] = ten2k128[ind - x0 - 20].w[0];
2761 res.w[1] = ten2k128[ind - x0 - 20].w[1];
2762 }
2763 }
2764 }
2765 // avoid a double rounding error
2766 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2767 is_midpoint_lt_even) { // double rounding error upward
2768 // res = res - 1
2769 res.w[0]--;
2770 if (res.w[0] == 0xffffffffffffffffull)
2771 res.w[1]--;
2772 // Note: a double rounding error upward is not possible; for this
2773 // the result after the first rounding would have to be 99...95
2774 // (35 digits in all), possibly followed by a number of zeros; this
2775 // not possible in Cases (2)-(6) which may get here
2776 is_midpoint_lt_even = 0;
2777 is_inexact_lt_midpoint = 1;
2778 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2779 is_midpoint_gt_even) { // double rounding error downward
2780 // res = res + 1
2781 res.w[0]++;
2782 if (res.w[0] == 0)
2783 res.w[1]++;
2784 is_midpoint_gt_even = 0;
2785 is_inexact_gt_midpoint = 1;
2786 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2787 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2788 // if this second rounding was exact the result may still be
2789 // inexact because of the first rounding
2790 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2791 is_inexact_gt_midpoint = 1;
2792 }
2793 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2794 is_inexact_lt_midpoint = 1;
2795 }
2796 } else if (is_midpoint_gt_even &&
2797 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
2798 // pulled up to a midpoint
2799 is_inexact_lt_midpoint = 1;
2800 is_inexact_gt_midpoint = 0;
2801 is_midpoint_lt_even = 0;
2802 is_midpoint_gt_even = 0;
2803 } else if (is_midpoint_lt_even &&
2804 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
2805 // pulled down to a midpoint
2806 is_inexact_lt_midpoint = 0;
2807 is_inexact_gt_midpoint = 1;
2808 is_midpoint_lt_even = 0;
2809 is_midpoint_gt_even = 0;
2810 } else {
2811 ;
2812 }
2813 // adjust exponent
2814 e3 = e3 + x0;
2815 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2816 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2817 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2818 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2819 is_inexact_lt_midpoint = 1;
2820 }
2821 }
2822 } else {
2823 ; // not underflow
2824 }
2825 // check for inexact result
2826 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2827 is_midpoint_lt_even || is_midpoint_gt_even) {
2828 // set the inexact flag
2829 *pfpsf |= INEXACT_EXCEPTION;
2830 if (is_tiny)
2831 *pfpsf |= UNDERFLOW_EXCEPTION;
2832 }
2833 // now check for significand = 10^34 (may have resulted from going
2834 // back to case2_repeat)
2835 if (res.w[1] == 0x0001ed09bead87c0ull &&
2836 res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34
2837 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
2838 res.w[0] = 0x38c15b0a00000000ull;
2839 e3 = e3 + 1;
2840 }
2841 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2842 // check for overflow
2843 if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
2844 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2845 res.w[0] = 0x0000000000000000ull;
2846 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2847 }
2848 if (rnd_mode != ROUNDING_TO_NEAREST) {
2849 rounding_correction (rnd_mode,
2850 is_inexact_lt_midpoint,
2851 is_inexact_gt_midpoint,
2852 is_midpoint_lt_even, is_midpoint_gt_even,
2853 e3, &res, pfpsf);
2854 }
2855 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2856 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2857 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2858 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2859 BID_SWAP128 (res);
2860 BID_RETURN (res)
2861
2862 } else {
2863
2864 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2865 // the signs of x*y and z are opposite; in these cases massive
2866 // cancellation can occur, so it is better to scale either C3 or C4 and
2867 // to perform the subtraction before rounding; rounding is performed
2868 // next, depending on the number of decimal digits in the result and on
2869 // the exponent value
2870 // Note: overlow is not possible in this case
2871 // this is similar to Cases (15), (16), and (17)
2872
2873 if (delta + q4 < q3) { // from Case (6)
2874 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2875 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2876 // and call add_and_round; delta stays positive
2877 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2878 P128.w[1] = C3.w[1];
2879 P128.w[0] = C3.w[0];
2880 C3.w[1] = C4.w[1];
2881 C3.w[0] = C4.w[0];
2882 C4.w[1] = P128.w[1];
2883 C4.w[0] = P128.w[0];
2884 ind = q3;
2885 q3 = q4;
2886 q4 = ind;
2887 ind = e3;
2888 e3 = e4;
2889 e4 = ind;
2890 tmp_sign = z_sign;
2891 z_sign = p_sign;
2892 p_sign = tmp_sign;
2893 } else { // from Cases (2), (3), (4), (5)
2894 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2895 // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2896 // (16), and (17) if we just change the sign of delta
2897 delta = -delta;
2898 }
2899 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
2900 rnd_mode, &is_midpoint_lt_even,
2901 &is_midpoint_gt_even, &is_inexact_lt_midpoint,
2902 &is_inexact_gt_midpoint, pfpsf, &res);
2903 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2904 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2905 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2906 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2907 BID_SWAP128 (res);
2908 BID_RETURN (res)
2909
2910 }
2911
2912 } else { // if delta < 0
2913
2914 delta = -delta;
2915
2916 if (p34 < q4 && q4 <= delta) { // Case (7)
2917
2918 // truncate C4 to p34 digits into res
2919 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2920 x0 = q4 - p34;
2921 if (q4 <= 38) {
2922 P128.w[1] = C4.w[1];
2923 P128.w[0] = C4.w[0];
2924 round128_19_38 (q4, x0, P128, &res, &incr_exp,
2925 &is_midpoint_lt_even, &is_midpoint_gt_even,
2926 &is_inexact_lt_midpoint,
2927 &is_inexact_gt_midpoint);
2928 } else if (q4 <= 57) { // 35 <= q4 <= 57
2929 P192.w[2] = C4.w[2];
2930 P192.w[1] = C4.w[1];
2931 P192.w[0] = C4.w[0];
2932 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2933 &is_midpoint_lt_even, &is_midpoint_gt_even,
2934 &is_inexact_lt_midpoint,
2935 &is_inexact_gt_midpoint);
2936 res.w[0] = R192.w[0];
2937 res.w[1] = R192.w[1];
2938 } else { // if (q4 <= 68)
2939 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2940 &is_midpoint_lt_even, &is_midpoint_gt_even,
2941 &is_inexact_lt_midpoint,
2942 &is_inexact_gt_midpoint);
2943 res.w[0] = R256.w[0];
2944 res.w[1] = R256.w[1];
2945 }
2946 e4 = e4 + x0;
2947 if (incr_exp) {
2948 e4 = e4 + 1;
2949 }
2950 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2951 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2952 // if C4 rounded to p34 digits is exact then the result is inexact,
2953 // in a way that depends on the signs of x * y and z
2954 if (p_sign == z_sign) {
2955 is_inexact_lt_midpoint = 1;
2956 } else { // if (p_sign != z_sign)
2957 if (res.w[1] != 0x0000314dc6448d93ull ||
2958 res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
2959 is_inexact_gt_midpoint = 1;
2960 } else { // res = 10^33 and exact is a special case
2961 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2962 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2963 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2964 // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2965 if (delta > p34 + 1) { // C3 < 1/2
2966 // res = 10^33, unchanged
2967 is_inexact_gt_midpoint = 1;
2968 } else { // if (delta == p34 + 1)
2969 if (q3 <= 19) {
2970 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
2971 // res = 10^33, unchanged
2972 is_inexact_gt_midpoint = 1;
2973 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
2974 // res = 10^33, unchanged
2975 is_midpoint_lt_even = 1;
2976 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2977 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2978 res.w[0] = 0x378d8e63ffffffffull;
2979 e4 = e4 - 1;
2980 is_inexact_lt_midpoint = 1;
2981 }
2982 } else { // if (20 <= q3 <=34)
2983 if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
2984 (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2985 C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
2986 // res = 10^33, unchanged
2987 is_inexact_gt_midpoint = 1;
2988 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2989 C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
2990 // res = 10^33, unchanged
2991 is_midpoint_lt_even = 1;
2992 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
2993 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2994 res.w[0] = 0x378d8e63ffffffffull;
2995 e4 = e4 - 1;
2996 is_inexact_lt_midpoint = 1;
2997 }
2998 }
2999 }
3000 }
3001 }
3002 } else if (is_midpoint_lt_even) {
3003 if (z_sign != p_sign) {
3004 // needs correction: res = res - 1
3005 res.w[0] = res.w[0] - 1;
3006 if (res.w[0] == 0xffffffffffffffffull)
3007 res.w[1]--;
3008 // if it is (10^33-1)*10^e4 then the corect result is
3009 // (10^34-1)*10(e4-1)
3010 if (res.w[1] == 0x0000314dc6448d93ull &&
3011 res.w[0] == 0x38c15b09ffffffffull) {
3012 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3013 res.w[0] = 0x378d8e63ffffffffull;
3014 e4 = e4 - 1;
3015 }
3016 is_midpoint_lt_even = 0;
3017 is_inexact_lt_midpoint = 1;
3018 } else { // if (z_sign == p_sign)
3019 is_midpoint_lt_even = 0;
3020 is_inexact_gt_midpoint = 1;
3021 }
3022 } else if (is_midpoint_gt_even) {
3023 if (z_sign == p_sign) {
3024 // needs correction: res = res + 1 (cannot cross in the next binade)
3025 res.w[0] = res.w[0] + 1;
3026 if (res.w[0] == 0x0000000000000000ull)
3027 res.w[1]++;
3028 is_midpoint_gt_even = 0;
3029 is_inexact_gt_midpoint = 1;
3030 } else { // if (z_sign != p_sign)
3031 is_midpoint_gt_even = 0;
3032 is_inexact_lt_midpoint = 1;
3033 }
3034 } else {
3035 ; // the rounded result is already correct
3036 }
3037 // check for overflow
3038 if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
3039 res.w[1] = p_sign | 0x7800000000000000ull;
3040 res.w[0] = 0x0000000000000000ull;
3041 *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
3042 } else { // no overflow or not RN
3043 p_exp = ((UINT64) (e4 + 6176) << 49);
3044 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
3045 }
3046 if (rnd_mode != ROUNDING_TO_NEAREST) {
3047 rounding_correction (rnd_mode,
3048 is_inexact_lt_midpoint,
3049 is_inexact_gt_midpoint,
3050 is_midpoint_lt_even, is_midpoint_gt_even,
3051 e4, &res, pfpsf);
3052 }
3053 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
3054 is_midpoint_lt_even || is_midpoint_gt_even) {
3055 // set the inexact flag
3056 *pfpsf |= INEXACT_EXCEPTION;
3057 }
3058 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3059 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3060 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3061 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3062 BID_SWAP128 (res);
3063 BID_RETURN (res)
3064
3065 } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
3066 (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
3067 (q4 <= delta && delta + q3 <= p34) || // Case (10)
3068 (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
3069 (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
3070 (delta + q3 < q4 && q4 <= p34)) { // Case (18)
3071
3072 // Case (8) is similar to Case (1), with C3 and C4 swapped
3073 // Case (9) is similar to Case (2), with C3 and C4 swapped
3074 // Case (10) is similar to Case (3), with C3 and C4 swapped
3075 // Case (13) is similar to Case (4), with C3 and C4 swapped
3076 // Case (14) is similar to Case (5), with C3 and C4 swapped
3077 // Case (18) is similar to Case (6), with C3 and C4 swapped
3078
3079 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3080 // and go back to delta_ge_zero
3081 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3082 P128.w[1] = C3.w[1];
3083 P128.w[0] = C3.w[0];
3084 C3.w[1] = C4.w[1];
3085 C3.w[0] = C4.w[0];
3086 C4.w[1] = P128.w[1];
3087 C4.w[0] = P128.w[0];
3088 ind = q3;
3089 q3 = q4;
3090 q4 = ind;
3091 ind = e3;
3092 e3 = e4;
3093 e4 = ind;
3094 tmp_sign = z_sign;
3095 z_sign = p_sign;
3096 p_sign = tmp_sign;
3097 tmp.ui64 = z_exp;
3098 z_exp = p_exp;
3099 p_exp = tmp.ui64;
3100 goto delta_ge_zero;
3101
3102 } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
3103 (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
3104
3105 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3106 // 1 <= x0 <= q3 - 1 <= p34 - 1
3107 x0 = e4 - e3; // or x0 = delta + q3 - q4
3108 if (q3 <= 18) { // 2 <= q3 <= 18
3109 round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
3110 &is_midpoint_lt_even, &is_midpoint_gt_even,
3111 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
3112 // C3.w[1] = 0;
3113 C3.w[0] = R64;
3114 } else if (q3 <= 38) {
3115 round128_19_38 (q3, x0, C3, &R128, &incr_exp,
3116 &is_midpoint_lt_even, &is_midpoint_gt_even,
3117 &is_inexact_lt_midpoint,
3118 &is_inexact_gt_midpoint);
3119 C3.w[1] = R128.w[1];
3120 C3.w[0] = R128.w[0];
3121 }
3122 // the rounded result has q3 - x0 digits
3123 // we want the exponent to be e4, so if incr_exp = 1 then
3124 // multiply the rounded result by 10 - it will still fit in 113 bits
3125 if (incr_exp) {
3126 // 64 x 128 -> 128
3127 P128.w[1] = C3.w[1];
3128 P128.w[0] = C3.w[0];
3129 __mul_64x128_to_128 (C3, ten2k64[1], P128);
3130 }
3131 e3 = e3 + x0; // this is e4
3132 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3133 // the result will have the sign of x * y; the exponent is e4
3134 R256.w[3] = 0;
3135 R256.w[2] = 0;
3136 R256.w[1] = C3.w[1];
3137 R256.w[0] = C3.w[0];
3138 if (p_sign == z_sign) { // R256 = C4 + R256
3139 add256 (C4, R256, &R256);
3140 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3141 sub256 (C4, R256, &R256); // the result cannot be pure zero
3142 // because the result has opposite sign to that of R256 which was
3143 // rounded, need to change the rounding indicators
3144 lsb = C4.w[0] & 0x01;
3145 if (is_inexact_lt_midpoint) {
3146 is_inexact_lt_midpoint = 0;
3147 is_inexact_gt_midpoint = 1;
3148 } else if (is_inexact_gt_midpoint) {
3149 is_inexact_gt_midpoint = 0;
3150 is_inexact_lt_midpoint = 1;
3151 } else if (lsb == 0) {
3152 if (is_midpoint_lt_even) {
3153 is_midpoint_lt_even = 0;
3154 is_midpoint_gt_even = 1;
3155 } else if (is_midpoint_gt_even) {
3156 is_midpoint_gt_even = 0;
3157 is_midpoint_lt_even = 1;
3158 } else {
3159 ;
3160 }
3161 } else if (lsb == 1) {
3162 if (is_midpoint_lt_even) {
3163 // res = res + 1
3164 R256.w[0]++;
3165 if (R256.w[0] == 0x0) {
3166 R256.w[1]++;
3167 if (R256.w[1] == 0x0) {
3168 R256.w[2]++;
3169 if (R256.w[2] == 0x0) {
3170 R256.w[3]++;
3171 }
3172 }
3173 }
3174 // no check for rounding overflow - R256 was a difference
3175 } else if (is_midpoint_gt_even) {
3176 // res = res - 1
3177 R256.w[0]--;
3178 if (R256.w[0] == 0xffffffffffffffffull) {
3179 R256.w[1]--;
3180 if (R256.w[1] == 0xffffffffffffffffull) {
3181 R256.w[2]--;
3182 if (R256.w[2] == 0xffffffffffffffffull) {
3183 R256.w[3]--;
3184 }
3185 }
3186 }
3187 } else {
3188 ;
3189 }
3190 } else {
3191 ;
3192 }
3193 }
3194 // determine the number of decimal digits in R256
3195 ind = nr_digits256 (R256); // ind >= p34
3196 // if R256 is sum, then ind > p34; if R256 is a difference, then
3197 // ind >= p34; this means that we can calculate the result rounded to
3198 // the destination precision, with unbounded exponent, starting from R256
3199 // and using the indicators from the rounding of C3 to avoid a double
3200 // rounding error
3201
3202 if (ind < p34) {
3203 ;
3204 } else if (ind == p34) {
3205 // the result rounded to the destination precision with
3206 // unbounded exponent
3207 // is (-1)^p_sign * R256 * 10^e4
3208 res.w[1] = R256.w[1];
3209 res.w[0] = R256.w[0];
3210 } else { // if (ind > p34)
3211 // if more than P digits, round to nearest to P digits
3212 // round R256 to p34 digits
3213 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
3214 // save C3 rounding indicators to help avoid double rounding error
3215 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3216 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3217 is_midpoint_lt_even0 = is_midpoint_lt_even;
3218 is_midpoint_gt_even0 = is_midpoint_gt_even;
3219 // initialize rounding indicators
3220 is_inexact_lt_midpoint = 0;
3221 is_inexact_gt_midpoint = 0;
3222 is_midpoint_lt_even = 0;
3223 is_midpoint_gt_even = 0;
3224 // round to p34 digits; the result fits in 113 bits
3225 if (ind <= 38) {
3226 P128.w[1] = R256.w[1];
3227 P128.w[0] = R256.w[0];
3228 round128_19_38 (ind, x0, P128, &R128, &incr_exp,
3229 &is_midpoint_lt_even, &is_midpoint_gt_even,
3230 &is_inexact_lt_midpoint,
3231 &is_inexact_gt_midpoint);
3232 } else if (ind <= 57) {
3233 P192.w[2] = R256.w[2];
3234 P192.w[1] = R256.w[1];
3235 P192.w[0] = R256.w[0];
3236 round192_39_57 (ind, x0, P192, &R192, &incr_exp,
3237 &is_midpoint_lt_even, &is_midpoint_gt_even,
3238 &is_inexact_lt_midpoint,
3239 &is_inexact_gt_midpoint);
3240 R128.w[1] = R192.w[1];
3241 R128.w[0] = R192.w[0];
3242 } else { // if (ind <= 68)
3243 round256_58_76 (ind, x0, R256, &R256, &incr_exp,
3244 &is_midpoint_lt_even, &is_midpoint_gt_even,
3245 &is_inexact_lt_midpoint,
3246 &is_inexact_gt_midpoint);
3247 R128.w[1] = R256.w[1];
3248 R128.w[0] = R256.w[0];
3249 }
3250 // the rounded result has p34 = 34 digits
3251 e4 = e4 + x0 + incr_exp;
3252
3253 res.w[1] = R128.w[1];
3254 res.w[0] = R128.w[0];
3255
3256 // avoid a double rounding error
3257 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3258 is_midpoint_lt_even) { // double rounding error upward
3259 // res = res - 1
3260 res.w[0]--;
3261 if (res.w[0] == 0xffffffffffffffffull)
3262 res.w[1]--;
3263 is_midpoint_lt_even = 0;
3264 is_inexact_lt_midpoint = 1;
3265 // Note: a double rounding error upward is not possible; for this
3266 // the result after the first rounding would have to be 99...95
3267 // (35 digits in all), possibly followed by a number of zeros; this
3268 // not possible in Cases (2)-(6) or (15)-(17) which may get here
3269 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3270 if (res.w[1] == 0x0000314dc6448d93ull &&
3271 res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
3272 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3273 res.w[0] = 0x378d8e63ffffffffull;
3274 e4--;
3275 }
3276 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3277 is_midpoint_gt_even) { // double rounding error downward
3278 // res = res + 1
3279 res.w[0]++;
3280 if (res.w[0] == 0)
3281 res.w[1]++;
3282 is_midpoint_gt_even = 0;
3283 is_inexact_gt_midpoint = 1;
3284 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3285 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
3286 // if this second rounding was exact the result may still be
3287 // inexact because of the first rounding
3288 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3289 is_inexact_gt_midpoint = 1;
3290 }
3291 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3292 is_inexact_lt_midpoint = 1;
3293 }
3294 } else if (is_midpoint_gt_even &&
3295 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
3296 // pulled up to a midpoint
3297 is_inexact_lt_midpoint = 1;
3298 is_inexact_gt_midpoint = 0;
3299 is_midpoint_lt_even = 0;
3300 is_midpoint_gt_even = 0;
3301 } else if (is_midpoint_lt_even &&
3302 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
3303 // pulled down to a midpoint
3304 is_inexact_lt_midpoint = 0;
3305 is_inexact_gt_midpoint = 1;
3306 is_midpoint_lt_even = 0;
3307 is_midpoint_gt_even = 0;
3308 } else {
3309 ;
3310 }
3311 }
3312
3313 // determine tininess
3314 if (rnd_mode == ROUNDING_TO_NEAREST) {
3315 if (e4 < expmin) {
3316 is_tiny = 1; // for other rounding modes apply correction
3317 }
3318 } else {
3319 // for RM, RP, RZ, RA apply correction in order to determine tininess
3320 // but do not save the result; apply the correction to
3321 // (-1)^p_sign * res * 10^0
3322 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
3323 P128.w[0] = res.w[0];
3324 rounding_correction (rnd_mode,
3325 is_inexact_lt_midpoint,
3326 is_inexact_gt_midpoint,
3327 is_midpoint_lt_even, is_midpoint_gt_even,
3328 0, &P128, pfpsf);
3329 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
3330 // the number of digits in the significand is p34 = 34
3331 if (e4 + scale < expmin) {
3332 is_tiny = 1;
3333 }
3334 }
3335
3336 // the result rounded to the destination precision with unbounded exponent
3337 // is (-1)^p_sign * res * 10^e4
3338 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
3339 // res.w[0] unchanged;
3340 // Note: res is correct only if expmin <= e4 <= expmax
3341 ind = p34; // the number of decimal digits in the signifcand of res
3342
3343 // at this point we have the result rounded with unbounded exponent in
3344 // res and we know its tininess:
3345 // res = (-1)^p_sign * significand * 10^e4,
3346 // where q (significand) = ind = p34
3347 // Note: res is correct only if expmin <= e4 <= expmax
3348
3349 // check for overflow if RN
3350 if (rnd_mode == ROUNDING_TO_NEAREST
3351 && (ind + e4) > (p34 + expmax)) {
3352 res.w[1] = p_sign | 0x7800000000000000ull;
3353 res.w[0] = 0x0000000000000000ull;
3354 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
3355 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3356 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3357 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3358 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3359 BID_SWAP128 (res);
3360 BID_RETURN (res)
3361 } // else not overflow or not RN, so continue
3362
3363 // from this point on this is similar to the last part of the computation
3364 // for Cases (15), (16), (17)
3365
3366 // if (e4 >= expmin) we have the result rounded with bounded exponent
3367 if (e4 < expmin) {
3368 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
3369 // where the result rounded [at most] once is
3370 // (-1)^p_sign * significand_res * 10^e4
3371
3372 // avoid double rounding error
3373 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3374 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3375 is_midpoint_lt_even0 = is_midpoint_lt_even;
3376 is_midpoint_gt_even0 = is_midpoint_gt_even;
3377 is_inexact_lt_midpoint = 0;
3378 is_inexact_gt_midpoint = 0;
3379 is_midpoint_lt_even = 0;
3380 is_midpoint_gt_even = 0;
3381
3382 if (x0 > ind) {
3383 // nothing is left of res when moving the decimal point left x0 digits
3384 is_inexact_lt_midpoint = 1;
3385 res.w[1] = p_sign | 0x0000000000000000ull;
3386 res.w[0] = 0x0000000000000000ull;
3387 e4 = expmin;
3388 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
3389 // this is <, =, or > 1/2 ulp
3390 // compare the ind-digit value in the significand of res with
3391 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3392 // less than, equal to, or greater than 1/2 ulp (significand of res)
3393 R128.w[1] = res.w[1] & MASK_COEFF;
3394 R128.w[0] = res.w[0];
3395 if (ind <= 19) {
3396 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
3397 lt_half_ulp = 1;
3398 is_inexact_lt_midpoint = 1;
3399 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
3400 eq_half_ulp = 1;
3401 is_midpoint_gt_even = 1;
3402 } else { // > 1/2 ulp
3403 gt_half_ulp = 1;
3404 is_inexact_gt_midpoint = 1;
3405 }
3406 } else { // if (ind <= 38)
3407 if (R128.w[1] < midpoint128[ind - 20].w[1] ||
3408 (R128.w[1] == midpoint128[ind - 20].w[1] &&
3409 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
3410 lt_half_ulp = 1;
3411 is_inexact_lt_midpoint = 1;
3412 } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
3413 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
3414 eq_half_ulp = 1;
3415 is_midpoint_gt_even = 1;
3416 } else { // > 1/2 ulp
3417 gt_half_ulp = 1;
3418 is_inexact_gt_midpoint = 1;
3419 }
3420 }
3421 if (lt_half_ulp || eq_half_ulp) {
3422 // res = +0.0 * 10^expmin
3423 res.w[1] = 0x0000000000000000ull;
3424 res.w[0] = 0x0000000000000000ull;
3425 } else { // if (gt_half_ulp)
3426 // res = +1 * 10^expmin
3427 res.w[1] = 0x0000000000000000ull;
3428 res.w[0] = 0x0000000000000001ull;
3429 }
3430 res.w[1] = p_sign | res.w[1];
3431 e4 = expmin;
3432 } else { // if (1 <= x0 <= ind - 1 <= 33)
3433 // round the ind-digit result to ind - x0 digits
3434
3435 if (ind <= 18) { // 2 <= ind <= 18
3436 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
3437 &is_midpoint_lt_even, &is_midpoint_gt_even,
3438 &is_inexact_lt_midpoint,
3439 &is_inexact_gt_midpoint);
3440 res.w[1] = 0x0;
3441 res.w[0] = R64;
3442 } else if (ind <= 38) {
3443 P128.w[1] = res.w[1] & MASK_COEFF;
3444 P128.w[0] = res.w[0];
3445 round128_19_38 (ind, x0, P128, &res, &incr_exp,
3446 &is_midpoint_lt_even, &is_midpoint_gt_even,
3447 &is_inexact_lt_midpoint,
3448 &is_inexact_gt_midpoint);
3449 }
3450 e4 = e4 + x0; // expmin
3451 // we want the exponent to be expmin, so if incr_exp = 1 then
3452 // multiply the rounded result by 10 - it will still fit in 113 bits
3453 if (incr_exp) {
3454 // 64 x 128 -> 128
3455 P128.w[1] = res.w[1] & MASK_COEFF;
3456 P128.w[0] = res.w[0];
3457 __mul_64x128_to_128 (res, ten2k64[1], P128);
3458 }
3459 res.w[1] =
3460 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
3461 w[1] & MASK_COEFF);
3462 // avoid a double rounding error
3463 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3464 is_midpoint_lt_even) { // double rounding error upward
3465 // res = res - 1
3466 res.w[0]--;
3467 if (res.w[0] == 0xffffffffffffffffull)
3468 res.w[1]--;
3469 // Note: a double rounding error upward is not possible; for this
3470 // the result after the first rounding would have to be 99...95
3471 // (35 digits in all), possibly followed by a number of zeros; this
3472 // not possible in this underflow case
3473 is_midpoint_lt_even = 0;
3474 is_inexact_lt_midpoint = 1;
3475 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3476 is_midpoint_gt_even) { // double rounding error downward
3477 // res = res + 1
3478 res.w[0]++;
3479 if (res.w[0] == 0)
3480 res.w[1]++;
3481 is_midpoint_gt_even = 0;
3482 is_inexact_gt_midpoint = 1;
3483 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3484 !is_inexact_lt_midpoint
3485 && !is_inexact_gt_midpoint) {
3486 // if this second rounding was exact the result may still be
3487 // inexact because of the first rounding
3488 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3489 is_inexact_gt_midpoint = 1;
3490 }
3491 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3492 is_inexact_lt_midpoint = 1;
3493 }
3494 } else if (is_midpoint_gt_even &&
3495 (is_inexact_gt_midpoint0
3496 || is_midpoint_lt_even0)) {
3497 // pulled up to a midpoint
3498 is_inexact_lt_midpoint = 1;
3499 is_inexact_gt_midpoint = 0;
3500 is_midpoint_lt_even = 0;
3501 is_midpoint_gt_even = 0;
3502 } else if (is_midpoint_lt_even &&
3503 (is_inexact_lt_midpoint0
3504 || is_midpoint_gt_even0)) {
3505 // pulled down to a midpoint
3506 is_inexact_lt_midpoint = 0;
3507 is_inexact_gt_midpoint = 1;
3508 is_midpoint_lt_even = 0;
3509 is_midpoint_gt_even = 0;
3510 } else {
3511 ;
3512 }
3513 }
3514 }
3515 // res contains the correct result
3516 // apply correction if not rounding to nearest
3517 if (rnd_mode != ROUNDING_TO_NEAREST) {
3518 rounding_correction (rnd_mode,
3519 is_inexact_lt_midpoint,
3520 is_inexact_gt_midpoint,
3521 is_midpoint_lt_even, is_midpoint_gt_even,
3522 e4, &res, pfpsf);
3523 }
3524 if (is_midpoint_lt_even || is_midpoint_gt_even ||
3525 is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
3526 // set the inexact flag
3527 *pfpsf |= INEXACT_EXCEPTION;
3528 if (is_tiny)
3529 *pfpsf |= UNDERFLOW_EXCEPTION;
3530 }
3531 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3532 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3533 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3534 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3535 BID_SWAP128 (res);
3536 BID_RETURN (res)
3537
3538 } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
3539 (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
3540 (delta + q3 <= p34 && p34 < q4)) { // Case (17)
3541
3542 // calculate first the result rounded to the destination precision, with
3543 // unbounded exponent
3544
3545 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
3546 rnd_mode, &is_midpoint_lt_even,
3547 &is_midpoint_gt_even, &is_inexact_lt_midpoint,
3548 &is_inexact_gt_midpoint, pfpsf, &res);
3549 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3550 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3551 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3552 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3553 BID_SWAP128 (res);
3554 BID_RETURN (res)
3555
3556 } else {
3557 ;
3558 }
3559
3560 } // end if delta < 0
3561
3562 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3563 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3564 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3565 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3566 BID_SWAP128 (res);
3567 BID_RETURN (res)
3568
3569 }
3570
3571
3572 #if DECIMAL_CALL_BY_REFERENCE
3573 void
3574 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
3575 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3576 _EXC_INFO_PARAM) {
3577 UINT128 x = *px, y = *py, z = *pz;
3578 #if !DECIMAL_GLOBAL_ROUNDING
3579 unsigned int rnd_mode = *prnd_mode;
3580 #endif
3581 #else
3582 UINT128
3583 bid128_fma (UINT128 x, UINT128 y, UINT128 z
3584 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3585 _EXC_INFO_PARAM) {
3586 #endif
3587 int is_midpoint_lt_even, is_midpoint_gt_even,
3588 is_inexact_lt_midpoint, is_inexact_gt_midpoint;
3589 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3590
3591 #if DECIMAL_CALL_BY_REFERENCE
3592 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3593 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3594 &res, &x, &y, &z
3595 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3596 _EXC_INFO_ARG);
3597 #else
3598 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3599 &is_inexact_lt_midpoint,
3600 &is_inexact_gt_midpoint, x, y,
3601 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3602 _EXC_INFO_ARG);
3603 #endif
3604 BID_RETURN (res);
3605 }
3606
3607
3608 #if DECIMAL_CALL_BY_REFERENCE
3609 void
3610 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
3611 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3612 _EXC_INFO_PARAM) {
3613 UINT64 x = *px, y = *py, z = *pz;
3614 #if !DECIMAL_GLOBAL_ROUNDING
3615 unsigned int rnd_mode = *prnd_mode;
3616 #endif
3617 #else
3618 UINT128
3619 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
3620 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3621 _EXC_INFO_PARAM) {
3622 #endif
3623 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3624 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3625 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3626 UINT128 x1, y1, z1;
3627
3628 #if DECIMAL_CALL_BY_REFERENCE
3629 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3630 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3631 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3632 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3633 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3634 &res, &x1, &y1, &z1
3635 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3636 _EXC_INFO_ARG);
3637 #else
3638 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3639 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3640 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3641 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3642 &is_inexact_lt_midpoint,
3643 &is_inexact_gt_midpoint, x1, y1,
3644 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3645 _EXC_INFO_ARG);
3646 #endif
3647 BID_RETURN (res);
3648 }
3649
3650
3651 #if DECIMAL_CALL_BY_REFERENCE
3652 void
3653 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3654 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3655 _EXC_INFO_PARAM) {
3656 UINT64 x = *px, y = *py;
3657 UINT128 z = *pz;
3658 #if !DECIMAL_GLOBAL_ROUNDING
3659 unsigned int rnd_mode = *prnd_mode;
3660 #endif
3661 #else
3662 UINT128
3663 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
3664 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3665 _EXC_INFO_PARAM) {
3666 #endif
3667 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3668 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3669 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3670 UINT128 x1, y1;
3671
3672 #if DECIMAL_CALL_BY_REFERENCE
3673 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3674 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3675 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3676 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3677 &res, &x1, &y1, &z
3678 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3679 _EXC_INFO_ARG);
3680 #else
3681 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3682 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3683 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3684 &is_inexact_lt_midpoint,
3685 &is_inexact_gt_midpoint, x1, y1,
3686 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3687 _EXC_INFO_ARG);
3688 #endif
3689 BID_RETURN (res);
3690 }
3691
3692
3693 #if DECIMAL_CALL_BY_REFERENCE
3694 void
3695 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3696 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3697 _EXC_INFO_PARAM) {
3698 UINT64 x = *px, z = *pz;
3699 #if !DECIMAL_GLOBAL_ROUNDING
3700 unsigned int rnd_mode = *prnd_mode;
3701 #endif
3702 #else
3703 UINT128
3704 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
3705 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3706 _EXC_INFO_PARAM) {
3707 #endif
3708 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3709 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3710 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3711 UINT128 x1, z1;
3712
3713 #if DECIMAL_CALL_BY_REFERENCE
3714 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3715 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3716 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3717 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3718 &res, &x1, py, &z1
3719 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3720 _EXC_INFO_ARG);
3721 #else
3722 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3723 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3724 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3725 &is_inexact_lt_midpoint,
3726 &is_inexact_gt_midpoint, x1, y,
3727 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3728 _EXC_INFO_ARG);
3729 #endif
3730 BID_RETURN (res);
3731 }
3732
3733
3734 #if DECIMAL_CALL_BY_REFERENCE
3735 void
3736 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3737 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3738 _EXC_INFO_PARAM) {
3739 UINT64 x = *px;
3740 #if !DECIMAL_GLOBAL_ROUNDING
3741 unsigned int rnd_mode = *prnd_mode;
3742 #endif
3743 #else
3744 UINT128
3745 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
3746 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3747 _EXC_INFO_PARAM) {
3748 #endif
3749 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3750 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3751 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3752 UINT128 x1;
3753
3754 #if DECIMAL_CALL_BY_REFERENCE
3755 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3756 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3757 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3758 &res, &x1, py, pz
3759 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3760 _EXC_INFO_ARG);
3761 #else
3762 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3763 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3764 &is_inexact_lt_midpoint,
3765 &is_inexact_gt_midpoint, x1, y,
3766 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3767 _EXC_INFO_ARG);
3768 #endif
3769 BID_RETURN (res);
3770 }
3771
3772
3773 #if DECIMAL_CALL_BY_REFERENCE
3774 void
3775 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
3776 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3777 _EXC_INFO_PARAM) {
3778 UINT64 y = *py, z = *pz;
3779 #if !DECIMAL_GLOBAL_ROUNDING
3780 unsigned int rnd_mode = *prnd_mode;
3781 #endif
3782 #else
3783 UINT128
3784 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
3785 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3786 _EXC_INFO_PARAM) {
3787 #endif
3788 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3789 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3790 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3791 UINT128 y1, z1;
3792
3793 #if DECIMAL_CALL_BY_REFERENCE
3794 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3795 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3796 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3797 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3798 &res, px, &y1, &z1
3799 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3800 _EXC_INFO_ARG);
3801 #else
3802 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3803 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3804 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3805 &is_inexact_lt_midpoint,
3806 &is_inexact_gt_midpoint, x, y1,
3807 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3808 _EXC_INFO_ARG);
3809 #endif
3810 BID_RETURN (res);
3811 }
3812
3813
3814 #if DECIMAL_CALL_BY_REFERENCE
3815 void
3816 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
3817 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3818 _EXC_INFO_PARAM) {
3819 UINT64 y = *py;
3820 #if !DECIMAL_GLOBAL_ROUNDING
3821 unsigned int rnd_mode = *prnd_mode;
3822 #endif
3823 #else
3824 UINT128
3825 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
3826 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3827 _EXC_INFO_PARAM) {
3828 #endif
3829 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3830 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3831 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3832 UINT128 y1;
3833
3834 #if DECIMAL_CALL_BY_REFERENCE
3835 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3836 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3837 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3838 &res, px, &y1, pz
3839 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3840 _EXC_INFO_ARG);
3841 #else
3842 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3843 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3844 &is_inexact_lt_midpoint,
3845 &is_inexact_gt_midpoint, x, y1,
3846 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3847 _EXC_INFO_ARG);
3848 #endif
3849 BID_RETURN (res);
3850 }
3851
3852
3853 #if DECIMAL_CALL_BY_REFERENCE
3854 void
3855 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
3856 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3857 _EXC_INFO_PARAM) {
3858 UINT64 z = *pz;
3859 #if !DECIMAL_GLOBAL_ROUNDING
3860 unsigned int rnd_mode = *prnd_mode;
3861 #endif
3862 #else
3863 UINT128
3864 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
3865 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3866 _EXC_INFO_PARAM) {
3867 #endif
3868 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3869 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3870 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3871 UINT128 z1;
3872
3873 #if DECIMAL_CALL_BY_REFERENCE
3874 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3875 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3876 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3877 &res, px, py, &z1
3878 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3879 _EXC_INFO_ARG);
3880 #else
3881 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3882 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3883 &is_inexact_lt_midpoint,
3884 &is_inexact_gt_midpoint, x, y,
3885 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3886 _EXC_INFO_ARG);
3887 #endif
3888 BID_RETURN (res);
3889 }
3890
3891 // Note: bid128qqq_fma is represented by bid128_fma
3892
3893 // Note: bid64ddd_fma is represented by bid64_fma
3894
3895 #if DECIMAL_CALL_BY_REFERENCE
3896 void
3897 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3898 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3899 _EXC_INFO_PARAM) {
3900 UINT64 x = *px, y = *py;
3901 #if !DECIMAL_GLOBAL_ROUNDING
3902 unsigned int rnd_mode = *prnd_mode;
3903 #endif
3904 #else
3905 UINT64
3906 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
3907 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3908 _EXC_INFO_PARAM) {
3909 #endif
3910 UINT64 res1 = 0xbaddbaddbaddbaddull;
3911 UINT128 x1, y1;
3912
3913 #if DECIMAL_CALL_BY_REFERENCE
3914 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3915 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3916 bid64qqq_fma (&res1, &x1, &y1, pz
3917 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3918 _EXC_INFO_ARG);
3919 #else
3920 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3921 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3922 res1 = bid64qqq_fma (x1, y1, z
3923 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3924 _EXC_INFO_ARG);
3925 #endif
3926 BID_RETURN (res1);
3927 }
3928
3929
3930 #if DECIMAL_CALL_BY_REFERENCE
3931 void
3932 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3933 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3934 _EXC_INFO_PARAM) {
3935 UINT64 x = *px, z = *pz;
3936 #if !DECIMAL_GLOBAL_ROUNDING
3937 unsigned int rnd_mode = *prnd_mode;
3938 #endif
3939 #else
3940 UINT64
3941 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
3942 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3943 _EXC_INFO_PARAM) {
3944 #endif
3945 UINT64 res1 = 0xbaddbaddbaddbaddull;
3946 UINT128 x1, z1;
3947
3948 #if DECIMAL_CALL_BY_REFERENCE
3949 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3950 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3951 bid64qqq_fma (&res1, &x1, py, &z1
3952 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3953 _EXC_INFO_ARG);
3954 #else
3955 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3956 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3957 res1 = bid64qqq_fma (x1, y, z1
3958 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3959 _EXC_INFO_ARG);
3960 #endif
3961 BID_RETURN (res1);
3962 }
3963
3964
3965 #if DECIMAL_CALL_BY_REFERENCE
3966 void
3967 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3968 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3969 _EXC_INFO_PARAM) {
3970 UINT64 x = *px;
3971 #if !DECIMAL_GLOBAL_ROUNDING
3972 unsigned int rnd_mode = *prnd_mode;
3973 #endif
3974 #else
3975 UINT64
3976 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
3977 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3978 _EXC_INFO_PARAM) {
3979 #endif
3980 UINT64 res1 = 0xbaddbaddbaddbaddull;
3981 UINT128 x1;
3982
3983 #if DECIMAL_CALL_BY_REFERENCE
3984 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3985 bid64qqq_fma (&res1, &x1, py, pz
3986 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3987 _EXC_INFO_ARG);
3988 #else
3989 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3990 res1 = bid64qqq_fma (x1, y, z
3991 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3992 _EXC_INFO_ARG);
3993 #endif
3994 BID_RETURN (res1);
3995 }
3996
3997
3998 #if DECIMAL_CALL_BY_REFERENCE
3999 void
4000 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
4001 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4002 _EXC_INFO_PARAM) {
4003 UINT64 y = *py, z = *pz;
4004 #if !DECIMAL_GLOBAL_ROUNDING
4005 unsigned int rnd_mode = *prnd_mode;
4006 #endif
4007 #else
4008 UINT64
4009 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
4010 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4011 _EXC_INFO_PARAM) {
4012 #endif
4013 UINT64 res1 = 0xbaddbaddbaddbaddull;
4014 UINT128 y1, z1;
4015
4016 #if DECIMAL_CALL_BY_REFERENCE
4017 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4018 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4019 bid64qqq_fma (&res1, px, &y1, &z1
4020 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4021 _EXC_INFO_ARG);
4022 #else
4023 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4024 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4025 res1 = bid64qqq_fma (x, y1, z1
4026 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4027 _EXC_INFO_ARG);
4028 #endif
4029 BID_RETURN (res1);
4030 }
4031
4032
4033 #if DECIMAL_CALL_BY_REFERENCE
4034 void
4035 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
4036 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4037 _EXC_INFO_PARAM) {
4038 UINT64 y = *py;
4039 #if !DECIMAL_GLOBAL_ROUNDING
4040 unsigned int rnd_mode = *prnd_mode;
4041 #endif
4042 #else
4043 UINT64
4044 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
4045 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4046 _EXC_INFO_PARAM) {
4047 #endif
4048 UINT64 res1 = 0xbaddbaddbaddbaddull;
4049 UINT128 y1;
4050
4051 #if DECIMAL_CALL_BY_REFERENCE
4052 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4053 bid64qqq_fma (&res1, px, &y1, pz
4054 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4055 _EXC_INFO_ARG);
4056 #else
4057 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4058 res1 = bid64qqq_fma (x, y1, z
4059 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4060 _EXC_INFO_ARG);
4061 #endif
4062 BID_RETURN (res1);
4063 }
4064
4065
4066 #if DECIMAL_CALL_BY_REFERENCE
4067 void
4068 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
4069 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4070 _EXC_INFO_PARAM) {
4071 UINT64 z = *pz;
4072 #if !DECIMAL_GLOBAL_ROUNDING
4073 unsigned int rnd_mode = *prnd_mode;
4074 #endif
4075 #else
4076 UINT64
4077 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
4078 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4079 _EXC_INFO_PARAM) {
4080 #endif
4081 UINT64 res1 = 0xbaddbaddbaddbaddull;
4082 UINT128 z1;
4083
4084 #if DECIMAL_CALL_BY_REFERENCE
4085 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4086 bid64qqq_fma (&res1, px, py, &z1
4087 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4088 _EXC_INFO_ARG);
4089 #else
4090 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4091 res1 = bid64qqq_fma (x, y, z1
4092 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4093 _EXC_INFO_ARG);
4094 #endif
4095 BID_RETURN (res1);
4096 }
4097
4098
4099 #if DECIMAL_CALL_BY_REFERENCE
4100 void
4101 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
4102 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4103 _EXC_INFO_PARAM) {
4104 UINT128 x = *px, y = *py, z = *pz;
4105 #if !DECIMAL_GLOBAL_ROUNDING
4106 unsigned int rnd_mode = *prnd_mode;
4107 #endif
4108 #else
4109 UINT64
4110 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
4111 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4112 _EXC_INFO_PARAM) {
4113 #endif
4114 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
4115 is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
4116 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
4117 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
4118 int incr_exp;
4119 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4120 UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4121 UINT64 res1 = 0xbaddbaddbaddbaddull;
4122 unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
4123 UINT64 sign;
4124 UINT64 exp;
4125 int unbexp;
4126 UINT128 C;
4127 BID_UI64DOUBLE tmp;
4128 int nr_bits;
4129 int q, x0;
4130 int scale;
4131 int lt_half_ulp = 0, eq_half_ulp = 0;
4132
4133 // Note: for rounding modes other than RN or RA, the result can be obtained
4134 // by rounding first to BID128 and then to BID64
4135
4136 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
4137 *pfpsf = 0;
4138
4139 #if DECIMAL_CALL_BY_REFERENCE
4140 bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4141 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
4142 &res, &x, &y, &z
4143 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4144 _EXC_INFO_ARG);
4145 #else
4146 res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4147 &is_inexact_lt_midpoint0,
4148 &is_inexact_gt_midpoint0, x, y,
4149 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4150 _EXC_INFO_ARG);
4151 #endif
4152
4153 if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
4154 (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
4155 ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
4156 ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
4157 #if DECIMAL_CALL_BY_REFERENCE
4158 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4159 #else
4160 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4161 #endif
4162 // determine the unbiased exponent of the result
4163 unbexp = ((res1 >> 53) & 0x3ff) - 398;
4164
4165 // if subnormal, res1 must have exp = -398
4166 // if tiny and inexact set underflow and inexact status flags
4167 if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN
4168 (unbexp == -398)
4169 && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
4170 && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
4171 || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
4172 // set the inexact flag and the underflow flag
4173 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4174 } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4175 is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4176 // set the inexact flag and the underflow flag
4177 *pfpsf |= INEXACT_EXCEPTION;
4178 }
4179
4180 *pfpsf |= save_fpsf;
4181 BID_RETURN (res1);
4182 } // else continue, and use rounding to nearest to round to 16 digits
4183
4184 // at this point the result is rounded to nearest (even or away) to 34 digits
4185 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4186 sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
4187 exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
4188 unbexp = (exp >> 49) - 6176;
4189 C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
4190 C.w[0] = res.w[LOW_128W];
4191
4192 if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero
4193 (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
4194 // clear under/overflow
4195 #if DECIMAL_CALL_BY_REFERENCE
4196 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4197 #else
4198 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4199 #endif
4200 *pfpsf |= save_fpsf;
4201 BID_RETURN (res1);
4202 } // else continue
4203
4204 // -398 - 34 <= unbexp <= 369 + 15
4205 if (rnd_mode == ROUNDING_TIES_AWAY) {
4206 // apply correction, if needed, to make the result rounded to nearest-even
4207 if (is_midpoint_gt_even) {
4208 // res = res - 1
4209 res1--; // res1 is now even
4210 } // else the result is already correctly rounded to nearest-even
4211 }
4212 // at this point the result is finite, non-zero canonical normal or subnormal,
4213 // and in most cases overflow or underflow will not occur
4214
4215 // determine the number of digits q in the result
4216 // q = nr. of decimal digits in x
4217 // determine first the nr. of bits in x
4218 if (C.w[1] == 0) {
4219 if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
4220 // split the 64-bit value in two 32-bit halves to avoid rounding errors
4221 if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
4222 tmp.d = (double) (C.w[0] >> 32); // exact conversion
4223 nr_bits =
4224 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4225 } else { // x < 2^32
4226 tmp.d = (double) (C.w[0]); // exact conversion
4227 nr_bits =
4228 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4229 }
4230 } else { // if x < 2^53
4231 tmp.d = (double) C.w[0]; // exact conversion
4232 nr_bits =
4233 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4234 }
4235 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4236 tmp.d = (double) C.w[1]; // exact conversion
4237 nr_bits =
4238 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4239 }
4240 q = nr_digits[nr_bits - 1].digits;
4241 if (q == 0) {
4242 q = nr_digits[nr_bits - 1].digits1;
4243 if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
4244 (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
4245 C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
4246 q++;
4247 }
4248 // if q > 16, round to nearest even to 16 digits (but for underflow it may
4249 // have to be truncated even more)
4250 if (q > 16) {
4251 x0 = q - 16;
4252 if (q <= 18) {
4253 round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
4254 &is_midpoint_lt_even, &is_midpoint_gt_even,
4255 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4256 } else { // 19 <= q <= 34
4257 round128_19_38 (q, x0, C, &res128, &incr_exp,
4258 &is_midpoint_lt_even, &is_midpoint_gt_even,
4259 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4260 res1 = res128.w[0]; // the result fits in 64 bits
4261 }
4262 unbexp = unbexp + x0;
4263 if (incr_exp)
4264 unbexp++;
4265 q = 16; // need to set in case denormalization is necessary
4266 } else {
4267 // the result does not require a second rounding (and it must have
4268 // been exact in the first rounding, since q <= 16)
4269 res1 = C.w[0];
4270 }
4271
4272 // avoid a double rounding error
4273 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4274 is_midpoint_lt_even) { // double rounding error upward
4275 // res = res - 1
4276 res1--; // res1 becomes odd
4277 is_midpoint_lt_even = 0;
4278 is_inexact_lt_midpoint = 1;
4279 if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
4280 res1 = 0x002386f26fc0ffffull; // 10^16 - 1
4281 unbexp--;
4282 }
4283 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4284 is_midpoint_gt_even) { // double rounding error downward
4285 // res = res + 1
4286 res1++; // res1 becomes odd (so it cannot be 10^16)
4287 is_midpoint_gt_even = 0;
4288 is_inexact_gt_midpoint = 1;
4289 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4290 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4291 // if this second rounding was exact the result may still be
4292 // inexact because of the first rounding
4293 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4294 is_inexact_gt_midpoint = 1;
4295 }
4296 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4297 is_inexact_lt_midpoint = 1;
4298 }
4299 } else if (is_midpoint_gt_even &&
4300 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4301 // pulled up to a midpoint
4302 is_inexact_lt_midpoint = 1;
4303 is_inexact_gt_midpoint = 0;
4304 is_midpoint_lt_even = 0;
4305 is_midpoint_gt_even = 0;
4306 } else if (is_midpoint_lt_even &&
4307 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4308 // pulled down to a midpoint
4309 is_inexact_lt_midpoint = 0;
4310 is_inexact_gt_midpoint = 1;
4311 is_midpoint_lt_even = 0;
4312 is_midpoint_gt_even = 0;
4313 } else {
4314 ;
4315 }
4316 // this is the result rounded correctly to nearest even, with unbounded exp.
4317
4318 // check for overflow
4319 if (q + unbexp > P16 + expmax16) {
4320 res1 = sign | 0x7800000000000000ull;
4321 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
4322 *pfpsf |= save_fpsf;
4323 BID_RETURN (res1)
4324 } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
4325 // not overflow; the result must be exact, and we can multiply res1 by
4326 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4327 scale = unbexp - expmax16;
4328 res1 = res1 * ten2k64[scale]; // res1 * 10^scale
4329 unbexp = expmax16; // unbexp - scale
4330 } else {
4331 ; // continue
4332 }
4333
4334 // check for underflow
4335 if (q + unbexp < P16 + expmin16) {
4336 if (unbexp < expmin16) {
4337 // we must truncate more of res
4338 x0 = expmin16 - unbexp; // x0 >= 1
4339 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
4340 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
4341 is_midpoint_lt_even0 = is_midpoint_lt_even;
4342 is_midpoint_gt_even0 = is_midpoint_gt_even;
4343 is_inexact_lt_midpoint = 0;
4344 is_inexact_gt_midpoint = 0;
4345 is_midpoint_lt_even = 0;
4346 is_midpoint_gt_even = 0;
4347 // the number of decimal digits in res1 is q
4348 if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4349 // 2 <= q <= 16, 1 <= x0 <= 15
4350 round64_2_18 (q, x0, res1, &res1, &incr_exp,
4351 &is_midpoint_lt_even, &is_midpoint_gt_even,
4352 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4353 if (incr_exp) {
4354 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4355 res1 = ten2k64[q - x0];
4356 }
4357 unbexp = unbexp + x0; // expmin16
4358 } else if (x0 == q) {
4359 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4360 // determine relationship with 1/2 ulp
4361 // q <= 16
4362 if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
4363 lt_half_ulp = 1;
4364 is_inexact_lt_midpoint = 1;
4365 } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
4366 eq_half_ulp = 1;
4367 is_midpoint_gt_even = 1;
4368 } else { // > 1/2 ulp
4369 // gt_half_ulp = 1;
4370 is_inexact_gt_midpoint = 1;
4371 }
4372 if (lt_half_ulp || eq_half_ulp) {
4373 // res = +0.0 * 10^expmin16
4374 res1 = 0x0000000000000000ull;
4375 } else { // if (gt_half_ulp)
4376 // res = +1 * 10^expmin16
4377 res1 = 0x0000000000000001ull;
4378 }
4379 unbexp = expmin16;
4380 } else { // if (x0 > q)
4381 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4382 res1 = 0x0000000000000000ull;
4383 unbexp = expmin16;
4384 is_inexact_lt_midpoint = 1;
4385 }
4386 // avoid a double rounding error
4387 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4388 is_midpoint_lt_even) { // double rounding error upward
4389 // res = res - 1
4390 res1--; // res1 becomes odd
4391 is_midpoint_lt_even = 0;
4392 is_inexact_lt_midpoint = 1;
4393 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4394 is_midpoint_gt_even) { // double rounding error downward
4395 // res = res + 1
4396 res1++; // res1 becomes odd
4397 is_midpoint_gt_even = 0;
4398 is_inexact_gt_midpoint = 1;
4399 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4400 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4401 // if this rounding was exact the result may still be
4402 // inexact because of the previous roundings
4403 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4404 is_inexact_gt_midpoint = 1;
4405 }
4406 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4407 is_inexact_lt_midpoint = 1;
4408 }
4409 } else if (is_midpoint_gt_even &&
4410 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4411 // pulled up to a midpoint
4412 is_inexact_lt_midpoint = 1;
4413 is_inexact_gt_midpoint = 0;
4414 is_midpoint_lt_even = 0;
4415 is_midpoint_gt_even = 0;
4416 } else if (is_midpoint_lt_even &&
4417 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4418 // pulled down to a midpoint
4419 is_inexact_lt_midpoint = 0;
4420 is_inexact_gt_midpoint = 1;
4421 is_midpoint_lt_even = 0;
4422 is_midpoint_gt_even = 0;
4423 } else {
4424 ;
4425 }
4426 }
4427 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4428 // and the result is tiny and exact
4429
4430 // check for inexact result
4431 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4432 is_midpoint_lt_even || is_midpoint_gt_even ||
4433 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4434 is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4435 // set the inexact flag and the underflow flag
4436 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4437 }
4438 } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4439 is_midpoint_lt_even || is_midpoint_gt_even) {
4440 *pfpsf |= INEXACT_EXCEPTION;
4441 }
4442 // this is the result rounded correctly to nearest, with bounded exponent
4443
4444 if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
4445 // res = res + 1
4446 res1++; // res1 is now odd
4447 } // else the result is already correct
4448
4449 // assemble the result
4450 if (res1 < 0x0020000000000000ull) { // res < 2^53
4451 res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
4452 } else { // res1 >= 2^53
4453 res1 = sign | MASK_STEERING_BITS |
4454 ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
4455 }
4456 *pfpsf |= save_fpsf;
4457 BID_RETURN (res1);
4458 }
4459