1 /******************************************************************************
2   Copyright (c) 2007-2011, Intel Corp.
3   All rights reserved.
4 
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions are met:
7 
8     * Redistributions of source code must retain the above copyright notice,
9       this list of conditions and the following disclaimer.
10     * Redistributions in binary form must reproduce the above copyright
11       notice, this list of conditions and the following disclaimer in the
12       documentation and/or other materials provided with the distribution.
13     * Neither the name of Intel Corporation nor the names of its contributors
14       may be used to endorse or promote products derived from this software
15       without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27   THE POSSIBILITY OF SUCH DAMAGE.
28 ******************************************************************************/
29 
30 /*****************************************************************************
31  *    BID64 fma
32  *****************************************************************************
33  *
34  *  Algorithm description:
35  *
36  *  if multiplication is guranteed exact (short coefficients)
37  *     call the unpacked arg. equivalent of bid64_add(x*y, z)
38  *  else
39  *     get full coefficient_x*coefficient_y product
40  *     call subroutine to perform addition of 64-bit argument
41  *                                         to 128-bit product
42  *
43  ****************************************************************************/
44 
45 #define BID_FUNCTION_SETS_BINARY_FLAGS
46 
47 #include "bid_inline_add.h"
48 
49 #if DECIMAL_CALL_BY_REFERENCE
50 BID_EXTERN_C void bid64_mul (BID_UINT64 * pres, BID_UINT64 * px,
51 		       BID_UINT64 *
52 		       py _RND_MODE_PARAM _EXC_FLAGS_PARAM
53 		       _EXC_MASKS_PARAM _EXC_INFO_PARAM);
54 #else
55 
56 BID_EXTERN_C BID_UINT64 bid64_mul (BID_UINT64 x,
57 			 BID_UINT64 y _RND_MODE_PARAM
58 			 _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
59 			 _EXC_INFO_PARAM);
60 #endif
61 
62 
63 BID_TYPE0_FUNCTION_ARGTYPE1_ARGTYPE2_ARGTYPE3(BID_UINT64, bid64_fma, BID_UINT64, x, BID_UINT64, y, BID_UINT64, z)
64   BID_UINT128 P, CT, CZ;
65 #if DECIMAL_TINY_DETECTION_AFTER_ROUNDING
66   BID_UINT128 PU;
67 #endif
68   BID_UINT64 sign_x, sign_y, coefficient_x, coefficient_y, sign_z,
69     coefficient_z;
70   BID_UINT64 C64, remainder_y, res;
71   BID_UINT64 CYh, CY0L, T, valid_x, valid_y, valid_z;
72   int_double tempx, tempy;
73   int extra_digits, exponent_x, exponent_y, bin_expon_cx, bin_expon_cy,
74     bin_expon_product;
75 #if DECIMAL_TINY_DETECTION_AFTER_ROUNDING
76   int rmode;
77 #endif
78   int digits_p, bp, final_exponent, exponent_z, digits_z, ez, ey,
79     scale_z, uf_status;
80 
81   BID_OPT_SAVE_BINARY_FLAGS()
82 
83   valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
84   valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
85   valid_z = unpack_BID64 (&sign_z, &exponent_z, &coefficient_z, z);
86 
87   // unpack arguments, check for NaN, Infinity, or 0
88   if (!valid_x || !valid_y || !valid_z) {
89 
90     if ((y & MASK_NAN) == MASK_NAN) {	// y is NAN
91       // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
92       // check first for non-canonical NaN payload
93       y = y & 0xfe03ffffffffffffull;	// clear G6-G12
94       if ((y & 0x0003ffffffffffffull) > 999999999999999ull) {
95 	y = y & 0xfe00000000000000ull;	// clear G6-G12 and the payload bits
96       }
97       if ((y & MASK_SNAN) == MASK_SNAN) {	// y is SNAN
98 	// set invalid flag
99 	*pfpsf |= BID_INVALID_EXCEPTION;
100 	// return quiet (y)
101 	res = y & 0xfdffffffffffffffull;
102       } else {	// y is QNaN
103 	// return y
104 	res = y;
105 	// if z = SNaN or x = SNaN signal invalid exception
106 	if ((z & MASK_SNAN) == MASK_SNAN
107 	    || (x & MASK_SNAN) == MASK_SNAN) {
108 	  // set invalid flag
109 	  *pfpsf |= BID_INVALID_EXCEPTION;
110 	}
111       }
112       BID_RETURN (res)
113     } else if ((z & MASK_NAN) == MASK_NAN) {	// z is NAN
114       // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
115       // check first for non-canonical NaN payload
116       z = z & 0xfe03ffffffffffffull;	// clear G6-G12
117       if ((z & 0x0003ffffffffffffull) > 999999999999999ull) {
118 	z = z & 0xfe00000000000000ull;	// clear G6-G12 and the payload bits
119       }
120       if ((z & MASK_SNAN) == MASK_SNAN) {	// z is SNAN
121 	// set invalid flag
122 	*pfpsf |= BID_INVALID_EXCEPTION;
123 	// return quiet (z)
124 	res = z & 0xfdffffffffffffffull;
125       } else {	// z is QNaN
126 	// return z
127 	res = z;
128 	// if x = SNaN signal invalid exception
129 	if ((x & MASK_SNAN) == MASK_SNAN) {
130 	  // set invalid flag
131 	  *pfpsf |= BID_INVALID_EXCEPTION;
132 	}
133       }
134       BID_RETURN (res)
135     } else if ((x & MASK_NAN) == MASK_NAN) {	// x is NAN
136       // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
137       // check first for non-canonical NaN payload
138       x = x & 0xfe03ffffffffffffull;	// clear G6-G12
139       if ((x & 0x0003ffffffffffffull) > 999999999999999ull) {
140 	x = x & 0xfe00000000000000ull;	// clear G6-G12 and the payload bits
141       }
142       if ((x & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
143 	// set invalid flag
144 	*pfpsf |= BID_INVALID_EXCEPTION;
145 	// return quiet (x)
146 	res = x & 0xfdffffffffffffffull;
147       } else {	// x is QNaN
148 	// return x
149 	res = x;	// clear out G[6]-G[16]
150       }
151       BID_RETURN (res)
152     }
153 
154     if (!valid_x) {
155       // x is Inf. or 0
156 
157       // x is Infinity?
158       if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
159 	// check if y is 0
160 	if (!coefficient_y) {
161 	  // y==0, return NaN
162 #ifdef BID_SET_STATUS_FLAGS
163 	  if ((z & 0x7e00000000000000ull) != 0x7c00000000000000ull)
164 	    __set_status_flags (pfpsf, BID_INVALID_EXCEPTION);
165 #endif
166 	  BID_RETURN (0x7c00000000000000ull);
167 	}
168 	// test if z is Inf of oposite sign
169 	if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
170 	    && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
171 	  // return NaN
172 #ifdef BID_SET_STATUS_FLAGS
173 	  __set_status_flags (pfpsf, BID_INVALID_EXCEPTION);
174 #endif
175 	  BID_RETURN (0x7c00000000000000ull);
176 	}
177 	// otherwise return +/-Inf
178 	BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
179 		    0x7800000000000000ull);
180       }
181       // x is 0
182       if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)
183 	  && ((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
184 
185 	if (coefficient_z) {
186 	  exponent_y = exponent_x - DECIMAL_EXPONENT_BIAS + exponent_y;
187 
188 	  sign_z = z & 0x8000000000000000ull;
189 
190 	  if (exponent_y >= exponent_z)
191 	    BID_RETURN (z);
192 	  res =
193 	    add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
194 			&rnd_mode, pfpsf);
195 	  BID_RETURN (res);
196 	}
197       }
198     }
199     if (!valid_y) {
200       // y is Inf. or 0
201 
202       // y is Infinity?
203       if ((y & 0x7800000000000000ull) == 0x7800000000000000ull) {
204 	// check if x is 0
205 	if (!coefficient_x) {
206 	  // y==0, return NaN
207 #ifdef BID_SET_STATUS_FLAGS
208 	  __set_status_flags (pfpsf, BID_INVALID_EXCEPTION);
209 #endif
210 	  BID_RETURN (0x7c00000000000000ull);
211 	}
212 	// test if z is Inf of oposite sign
213 	if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
214 	    && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
215 #ifdef BID_SET_STATUS_FLAGS
216 	  __set_status_flags (pfpsf, BID_INVALID_EXCEPTION);
217 #endif
218 	  // return NaN
219 	  BID_RETURN (0x7c00000000000000ull);
220 	}
221 	// otherwise return +/-Inf
222 	BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
223 		    0x7800000000000000ull);
224       }
225       // y is 0
226       if (((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
227 
228 	if (coefficient_z) {
229 	  exponent_y += exponent_x - DECIMAL_EXPONENT_BIAS;
230 
231 	  sign_z = z & 0x8000000000000000ull;
232 
233 	  if (exponent_y >= exponent_z)
234 	    BID_RETURN (z);
235 	  res =
236 	    add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
237 			&rnd_mode, pfpsf);
238 	  BID_RETURN (res);
239 	}
240       }
241     }
242 
243     if (!valid_z) {
244       // y is Inf. or 0
245 
246       // test if y is NaN/Inf
247       if ((z & 0x7800000000000000ull) == 0x7800000000000000ull) {
248 	BID_RETURN (coefficient_z & QUIET_MASK64);
249       }
250       // z is 0, return x*y
251       if ((!coefficient_x) || (!coefficient_y)) {
252 	//0+/-0
253 	exponent_x += exponent_y - DECIMAL_EXPONENT_BIAS;
254 	if (exponent_x > DECIMAL_MAX_EXPON_64)
255 	  exponent_x = DECIMAL_MAX_EXPON_64;
256 	else if (exponent_x < 0)
257 	  exponent_x = 0;
258 	if (exponent_x <= exponent_z)
259 	  res = ((BID_UINT64) exponent_x) << 53;
260 	else
261 	  res = ((BID_UINT64) exponent_z) << 53;
262 	if ((sign_x ^ sign_y) == sign_z)
263 	  res |= sign_z;
264 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
265 #ifndef IEEE_ROUND_NEAREST
266 	else if (rnd_mode == BID_ROUNDING_DOWN)
267 	  res |= 0x8000000000000000ull;
268 #endif
269 #endif
270 	BID_RETURN (res);
271       }
272     }
273   }
274 
275   /* get binary coefficients of x and y */
276 
277   //--- get number of bits in the coefficients of x and y ---
278   // version 2 (original)
279   tempx.d = (double) coefficient_x;
280   bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52);
281 
282   tempy.d = (double) coefficient_y;
283   bin_expon_cy = ((tempy.i & MASK_BINARY_EXPONENT) >> 52);
284 
285   // magnitude estimate for coefficient_x*coefficient_y is
286   //        2^(unbiased_bin_expon_cx + unbiased_bin_expon_cx)
287   bin_expon_product = bin_expon_cx + bin_expon_cy;
288 
289   // check if coefficient_x*coefficient_y<2^(10*k+3)
290   // equivalent to unbiased_bin_expon_cx + unbiased_bin_expon_cx < 10*k+1
291   if (bin_expon_product < UPPER_EXPON_LIMIT + 2 * BINARY_EXPONENT_BIAS) {
292     //  easy multiply
293     C64 = coefficient_x * coefficient_y;
294     final_exponent = exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS;
295     if ((final_exponent > 0) || (!coefficient_z)) {
296       res =
297 	bid_get_add64 (sign_x ^ sign_y,
298 		   final_exponent, C64, sign_z, exponent_z, coefficient_z, rnd_mode, pfpsf);
299       BID_RETURN (res);
300     } else {
301       P.w[0] = C64;
302       P.w[1] = 0;
303       extra_digits = 0;
304     }
305   } else {
306     if (!coefficient_z) {
307 #if DECIMAL_CALL_BY_REFERENCE
308       bid64_mul (&res, &x,
309 		 &y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
310 		 _EXC_INFO_ARG);
311 #else
312       res =
313 	bid64_mul (x,
314 		   y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
315 		   _EXC_INFO_ARG);
316 #endif
317       BID_RETURN (res);
318     }
319     // get 128-bit product: coefficient_x*coefficient_y
320     __mul_64x64_to_128 (P, coefficient_x, coefficient_y);
321 
322     // tighten binary range of P:  leading bit is 2^bp
323     // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1
324     bin_expon_product -= 2 * BINARY_EXPONENT_BIAS;
325     __tight_bin_range_128 (bp, P, bin_expon_product);
326 
327     // get number of decimal digits in the product
328     digits_p = bid_estimate_decimal_digits[bp];
329     if (!(__unsigned_compare_gt_128 (bid_power10_table_128[digits_p], P)))
330       digits_p++;	// if bid_power10_table_128[digits_p] <= P
331 
332     // determine number of decimal digits to be rounded out
333     extra_digits = digits_p - MAX_FORMAT_DIGITS;
334     final_exponent =
335       exponent_x + exponent_y + extra_digits - DECIMAL_EXPONENT_BIAS;
336   }
337 
338   if (((unsigned) final_exponent) >= 3 * 256) {
339     if (final_exponent < 0) {
340       //--- get number of bits in the coefficients of z  ---
341       tempx.d = (double) coefficient_z;
342       bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
343       // get number of decimal digits in the coeff_x
344       digits_z = bid_estimate_decimal_digits[bin_expon_cx];
345       if (coefficient_z >= bid_power10_table_128[digits_z].w[0])
346 	digits_z++;
347       // underflow
348       if ((final_exponent + 16 < 0)
349 	  || (exponent_z + digits_z > 33 + final_exponent)) {
350 	res =
351 	  BID_normalize (sign_z, exponent_z, coefficient_z,
352 			 sign_x ^ sign_y, 1, rnd_mode, pfpsf);
353 	BID_RETURN (res);
354       }
355 
356       ez = exponent_z + digits_z - 16;
357       if (ez < 0)
358 	ez = 0;
359       scale_z = exponent_z - ez;
360       coefficient_z *= bid_power10_table_128[scale_z].w[0];
361       ey = final_exponent - extra_digits;
362       extra_digits = ez - ey;
363 
364       if (extra_digits > 17) {
365 	CYh = __truncate (P, 16);
366 	// get remainder
367 	T = bid_power10_table_128[16].w[0];
368 	__mul_64x64_to_64 (CY0L, CYh, T);
369 	remainder_y = P.w[0] - CY0L;
370 
371 	extra_digits -= 16;
372 	P.w[0] = CYh;
373 	P.w[1] = 0;
374       } else
375 	remainder_y = 0;
376 
377       // align coeff_x, CYh
378       __mul_64x64_to_128 (CZ, coefficient_z,
379 			  bid_power10_table_128[extra_digits].w[0]);
380 
381       if (sign_z == (sign_y ^ sign_x)) {
382 	__add_128_128 (CT, CZ, P);
383 	if (__unsigned_compare_ge_128
384 	    (CT, bid_power10_table_128[16 + extra_digits])) {
385 	  extra_digits++;
386 	  ez++;
387 	}
388       } else {
389 	if (remainder_y && (__unsigned_compare_ge_128 (CZ, P))) {
390 	  P.w[0]++;
391 	  if (!P.w[0])
392 	    P.w[1]++;
393 	}
394 	__sub_128_128 (CT, CZ, P);
395 	if (((BID_SINT64) CT.w[1]) < 0) {
396 	  sign_z = sign_y ^ sign_x;
397 	  CT.w[0] = 0 - CT.w[0];
398 	  CT.w[1] = 0 - CT.w[1];
399 	  if (CT.w[0])
400 	    CT.w[1]--;
401 	} else if(!(CT.w[1]|CT.w[0]))
402 		sign_z = (rnd_mode!=BID_ROUNDING_DOWN)? 0: 0x8000000000000000ull;
403 	if (ez
404 	    &&
405 	    (__unsigned_compare_gt_128
406 	     (bid_power10_table_128[15 + extra_digits], CT))) {
407 	  extra_digits--;
408 	  ez--;
409 	}
410       }
411 
412 #ifdef BID_SET_STATUS_FLAGS
413       uf_status = 0;
414       if ((!ez)
415 	  &&
416 	  __unsigned_compare_gt_128 (bid_power10_table_128
417 				     [extra_digits + 15], CT)) {
418 #if DECIMAL_TINY_DETECTION_AFTER_ROUNDING
419 	rmode = rnd_mode;
420 	if (sign_z && (unsigned) (rmode - 1) < 2)
421 	  rmode = 3 - rmode;
422 	PU = bid_power10_table_128[extra_digits + 15];
423 	PU.w[0]--;
424 	if (__unsigned_compare_gt_128 (PU, CT)
425 	    || (rmode == BID_ROUNDING_DOWN)
426 	    || (rmode == BID_ROUNDING_TO_ZERO))
427 	  uf_status = BID_UNDERFLOW_EXCEPTION;
428 	else if (extra_digits < 2) {
429 	  if ((rmode == BID_ROUNDING_UP)) {
430 	    if (!extra_digits)
431 	      uf_status = BID_UNDERFLOW_EXCEPTION;
432 	    else {
433 	      if (remainder_y && (sign_z != (sign_y ^ sign_x)))
434 		remainder_y = bid_power10_table_128[16].w[0] - remainder_y;
435 
436 	      if (bid_power10_table_128[15].w[0] > remainder_y)
437 		uf_status = BID_UNDERFLOW_EXCEPTION;
438 	    }
439 	  } else	// RN or RN_away
440 	  {
441 	    if (remainder_y && (sign_z != (sign_y ^ sign_x)))
442 	      remainder_y = bid_power10_table_128[16].w[0] - remainder_y;
443 
444 	    if (!extra_digits) {
445 	      remainder_y += bid_round_const_table[rmode][15];
446 	      if (remainder_y < bid_power10_table_128[16].w[0])
447 		uf_status = BID_UNDERFLOW_EXCEPTION;
448 	    } else {
449 	      if (remainder_y < bid_round_const_table[rmode][16])
450 		uf_status = BID_UNDERFLOW_EXCEPTION;
451 	    }
452 	  }
453 	  //__set_status_flags (pfpsf, uf_status);
454 	}
455 #else  // DECIMAL_TINY_DETECTION_AFTER_ROUNDING
456 		uf_status = BID_UNDERFLOW_EXCEPTION;
457 #endif
458       }
459 #endif
460       res =
461 	__bid_full_round64_remainder (sign_z, ez - extra_digits, CT,
462 				      extra_digits, remainder_y,
463 				      rnd_mode, pfpsf, uf_status);
464       BID_RETURN (res);
465 
466     } else {
467       if ((sign_z == (sign_x ^ sign_y))
468 	  || (final_exponent > 3 * 256 + 15)) {
469 	res =
470 	  fast_get_BID64_check_OF (sign_x ^ sign_y, final_exponent,
471 				   1000000000000000ull, rnd_mode,
472 				   pfpsf);
473 	BID_RETURN (res);
474       }
475     }
476   }
477 
478 
479   if (extra_digits > 0) {
480     res =
481       bid_get_add128 (sign_z, exponent_z, coefficient_z, sign_x ^ sign_y,
482 		  final_exponent, P, extra_digits, rnd_mode, pfpsf);
483     BID_RETURN (res);
484   }
485   // go to convert_format and exit
486   else {
487     C64 = __low_64 (P);
488 
489     res =
490       bid_get_add64 (sign_x ^ sign_y,
491 		 exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS, C64,
492 		 sign_z, exponent_z, coefficient_z,
493 		 rnd_mode, pfpsf);
494     BID_RETURN (res);
495   }
496 }
497 
498 
499