1 /* Software floating-point emulation.
2    Definitions for IEEE Extended Precision.
3    Copyright (C) 1999-2014 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Jakub Jelinek (jj@ultra.linux.cz).
6 
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Lesser General Public
9    License as published by the Free Software Foundation; either
10    version 2.1 of the License, or (at your option) any later version.
11 
12    In addition to the permissions in the GNU Lesser General Public
13    License, the Free Software Foundation gives you unlimited
14    permission to link the compiled version of this file into
15    combinations with other programs, and to distribute those
16    combinations without any restriction coming from the use of this
17    file.  (The Lesser General Public License restrictions do apply in
18    other respects; for example, they cover modification of the file,
19    and distribution when not linked into a combine executable.)
20 
21    The GNU C Library is distributed in the hope that it will be useful,
22    but WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    Lesser General Public License for more details.
25 
26    You should have received a copy of the GNU Lesser General Public
27    License along with the GNU C Library; if not, see
28    <http://www.gnu.org/licenses/>.  */
29 
30 #if _FP_W_TYPE_SIZE < 32
31 # error "Here's a nickel, kid. Go buy yourself a real computer."
32 #endif
33 
34 #if _FP_W_TYPE_SIZE < 64
35 # define _FP_FRACTBITS_E	(4*_FP_W_TYPE_SIZE)
36 # define _FP_FRACTBITS_DW_E	(8*_FP_W_TYPE_SIZE)
37 #else
38 # define _FP_FRACTBITS_E	(2*_FP_W_TYPE_SIZE)
39 # define _FP_FRACTBITS_DW_E	(4*_FP_W_TYPE_SIZE)
40 #endif
41 
42 #define _FP_FRACBITS_E		64
43 #define _FP_FRACXBITS_E		(_FP_FRACTBITS_E - _FP_FRACBITS_E)
44 #define _FP_WFRACBITS_E		(_FP_WORKBITS + _FP_FRACBITS_E)
45 #define _FP_WFRACXBITS_E	(_FP_FRACTBITS_E - _FP_WFRACBITS_E)
46 #define _FP_EXPBITS_E		15
47 #define _FP_EXPBIAS_E		16383
48 #define _FP_EXPMAX_E		32767
49 
50 #define _FP_QNANBIT_E		\
51 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
52 #define _FP_QNANBIT_SH_E		\
53 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
54 #define _FP_IMPLBIT_E		\
55 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
56 #define _FP_IMPLBIT_SH_E		\
57 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
58 #define _FP_OVERFLOW_E		\
59 	((_FP_W_TYPE) 1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
60 
61 #define _FP_WFRACBITS_DW_E	(2 * _FP_WFRACBITS_E)
62 #define _FP_WFRACXBITS_DW_E	(_FP_FRACTBITS_DW_E - _FP_WFRACBITS_DW_E)
63 #define _FP_HIGHBIT_DW_E	\
64   ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_E - 1) % _FP_W_TYPE_SIZE)
65 
66 typedef float XFtype __attribute__ ((mode (XF)));
67 
68 #if _FP_W_TYPE_SIZE < 64
69 
70 union _FP_UNION_E
71 {
72   XFtype flt;
73   struct _FP_STRUCT_LAYOUT
74   {
75 # if __BYTE_ORDER == __BIG_ENDIAN
76     unsigned long pad1 : _FP_W_TYPE_SIZE;
77     unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
78     unsigned long sign : 1;
79     unsigned long exp : _FP_EXPBITS_E;
80     unsigned long frac1 : _FP_W_TYPE_SIZE;
81     unsigned long frac0 : _FP_W_TYPE_SIZE;
82 # else
83     unsigned long frac0 : _FP_W_TYPE_SIZE;
84     unsigned long frac1 : _FP_W_TYPE_SIZE;
85     unsigned exp : _FP_EXPBITS_E;
86     unsigned sign : 1;
87 # endif /* not bigendian */
88   } bits __attribute__ ((packed));
89 };
90 
91 
92 # define FP_DECL_E(X)		_FP_DECL (4, X)
93 
94 # define FP_UNPACK_RAW_E(X, val)		\
95   do						\
96     {						\
97       union _FP_UNION_E _flo;			\
98       _flo.flt = (val);				\
99 						\
100       X##_f[2] = 0;				\
101       X##_f[3] = 0;				\
102       X##_f[0] = _flo.bits.frac0;		\
103       X##_f[1] = _flo.bits.frac1;		\
104       X##_e  = _flo.bits.exp;			\
105       X##_s  = _flo.bits.sign;			\
106     }						\
107   while (0)
108 
109 # define FP_UNPACK_RAW_EP(X, val)				\
110   do								\
111     {								\
112       union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val);	\
113 								\
114       X##_f[2] = 0;						\
115       X##_f[3] = 0;						\
116       X##_f[0] = _flo->bits.frac0;				\
117       X##_f[1] = _flo->bits.frac1;				\
118       X##_e  = _flo->bits.exp;					\
119       X##_s  = _flo->bits.sign;					\
120     }								\
121   while (0)
122 
123 # define FP_PACK_RAW_E(val, X)			\
124   do						\
125     {						\
126       union _FP_UNION_E _flo;			\
127 						\
128       if (X##_e)				\
129 	X##_f[1] |= _FP_IMPLBIT_E;		\
130       else					\
131 	X##_f[1] &= ~(_FP_IMPLBIT_E);		\
132       _flo.bits.frac0 = X##_f[0];		\
133       _flo.bits.frac1 = X##_f[1];		\
134       _flo.bits.exp   = X##_e;			\
135       _flo.bits.sign  = X##_s;			\
136 						\
137       (val) = _flo.flt;				\
138     }						\
139   while (0)
140 
141 # define FP_PACK_RAW_EP(val, X)						\
142   do									\
143     {									\
144       if (!FP_INHIBIT_RESULTS)						\
145 	{								\
146 	  union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val);	\
147 									\
148 	  if (X##_e)							\
149 	    X##_f[1] |= _FP_IMPLBIT_E;					\
150 	  else								\
151 	    X##_f[1] &= ~(_FP_IMPLBIT_E);				\
152 	  _flo->bits.frac0 = X##_f[0];					\
153 	  _flo->bits.frac1 = X##_f[1];					\
154 	  _flo->bits.exp   = X##_e;					\
155 	  _flo->bits.sign  = X##_s;					\
156 	}								\
157     }									\
158   while (0)
159 
160 # define FP_UNPACK_E(X, val)			\
161   do						\
162     {						\
163       FP_UNPACK_RAW_E (X, val);			\
164       _FP_UNPACK_CANONICAL (E, 4, X);		\
165     }						\
166   while (0)
167 
168 # define FP_UNPACK_EP(X, val)			\
169   do						\
170     {						\
171       FP_UNPACK_RAW_EP (X, val);		\
172       _FP_UNPACK_CANONICAL (E, 4, X);		\
173     }						\
174   while (0)
175 
176 # define FP_UNPACK_SEMIRAW_E(X, val)		\
177   do						\
178     {						\
179       FP_UNPACK_RAW_E (X, val);			\
180       _FP_UNPACK_SEMIRAW (E, 4, X);		\
181     }						\
182   while (0)
183 
184 # define FP_UNPACK_SEMIRAW_EP(X, val)		\
185   do						\
186     {						\
187       FP_UNPACK_RAW_EP (X, val);		\
188       _FP_UNPACK_SEMIRAW (E, 4, X);		\
189     }						\
190   while (0)
191 
192 # define FP_PACK_E(val, X)			\
193   do						\
194     {						\
195       _FP_PACK_CANONICAL (E, 4, X);		\
196       FP_PACK_RAW_E (val, X);			\
197     }						\
198   while (0)
199 
200 # define FP_PACK_EP(val, X)			\
201   do						\
202     {						\
203       _FP_PACK_CANONICAL (E, 4, X);		\
204       FP_PACK_RAW_EP (val, X);			\
205     }						\
206   while (0)
207 
208 # define FP_PACK_SEMIRAW_E(val, X)		\
209   do						\
210     {						\
211       _FP_PACK_SEMIRAW (E, 4, X);		\
212       FP_PACK_RAW_E (val, X);			\
213     }						\
214   while (0)
215 
216 # define FP_PACK_SEMIRAW_EP(val, X)		\
217   do						\
218     {						\
219       _FP_PACK_SEMIRAW (E, 4, X);		\
220       FP_PACK_RAW_EP (val, X);			\
221     }						\
222   while (0)
223 
224 # define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN (E, 4, X)
225 # define FP_NEG_E(R, X)		_FP_NEG (E, 4, R, X)
226 # define FP_ADD_E(R, X, Y)	_FP_ADD (E, 4, R, X, Y)
227 # define FP_SUB_E(R, X, Y)	_FP_SUB (E, 4, R, X, Y)
228 # define FP_MUL_E(R, X, Y)	_FP_MUL (E, 4, R, X, Y)
229 # define FP_DIV_E(R, X, Y)	_FP_DIV (E, 4, R, X, Y)
230 # define FP_SQRT_E(R, X)	_FP_SQRT (E, 4, R, X)
231 # define FP_FMA_E(R, X, Y, Z)	_FP_FMA (E, 4, 8, R, X, Y, Z)
232 
233 /*
234  * Square root algorithms:
235  * We have just one right now, maybe Newton approximation
236  * should be added for those machines where division is fast.
237  * This has special _E version because standard _4 square
238  * root would not work (it has to start normally with the
239  * second word and not the first), but as we have to do it
240  * anyway, we optimize it by doing most of the calculations
241  * in two UWtype registers instead of four.
242  */
243 
244 # define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
245   do							\
246     {							\
247       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);	\
248       _FP_FRAC_SRL_4 (X, (_FP_WORKBITS));		\
249       while (q)						\
250 	{						\
251 	  T##_f[1] = S##_f[1] + q;			\
252 	  if (T##_f[1] <= X##_f[1])			\
253 	    {						\
254 	      S##_f[1] = T##_f[1] + q;			\
255 	      X##_f[1] -= T##_f[1];			\
256 	      R##_f[1] += q;				\
257 	    }						\
258 	  _FP_FRAC_SLL_2 (X, 1);			\
259 	  q >>= 1;					\
260 	}						\
261       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);	\
262       while (q)						\
263 	{						\
264 	  T##_f[0] = S##_f[0] + q;			\
265 	  T##_f[1] = S##_f[1];				\
266 	  if (T##_f[1] < X##_f[1]			\
267 	      || (T##_f[1] == X##_f[1]			\
268 		  && T##_f[0] <= X##_f[0]))		\
269 	    {						\
270 	      S##_f[0] = T##_f[0] + q;			\
271 	      S##_f[1] += (T##_f[0] > S##_f[0]);	\
272 	      _FP_FRAC_DEC_2 (X, T);			\
273 	      R##_f[0] += q;				\
274 	    }						\
275 	  _FP_FRAC_SLL_2 (X, 1);			\
276 	  q >>= 1;					\
277 	}						\
278       _FP_FRAC_SLL_4 (R, (_FP_WORKBITS));		\
279       if (X##_f[0] | X##_f[1])				\
280 	{						\
281 	  if (S##_f[1] < X##_f[1]			\
282 	      || (S##_f[1] == X##_f[1]			\
283 		  && S##_f[0] < X##_f[0]))		\
284 	    R##_f[0] |= _FP_WORK_ROUND;			\
285 	  R##_f[0] |= _FP_WORK_STICKY;			\
286 	}						\
287     }							\
288   while (0)
289 
290 # define FP_CMP_E(r, X, Y, un)		_FP_CMP (E, 4, r, X, Y, un)
291 # define FP_CMP_EQ_E(r, X, Y)		_FP_CMP_EQ (E, 4, r, X, Y)
292 # define FP_CMP_UNORD_E(r, X, Y)	_FP_CMP_UNORD (E, 4, r, X, Y)
293 
294 # define FP_TO_INT_E(r, X, rsz, rsg)	_FP_TO_INT (E, 4, r, X, rsz, rsg)
295 # define FP_FROM_INT_E(X, r, rs, rt)	_FP_FROM_INT (E, 4, X, r, rs, rt)
296 
297 # define _FP_FRAC_HIGH_E(X)	(X##_f[2])
298 # define _FP_FRAC_HIGH_RAW_E(X)	(X##_f[1])
299 
300 # define _FP_FRAC_HIGH_DW_E(X)	(X##_f[4])
301 
302 #else   /* not _FP_W_TYPE_SIZE < 64 */
303 union _FP_UNION_E
304 {
305   XFtype flt;
306   struct _FP_STRUCT_LAYOUT
307   {
308 # if __BYTE_ORDER == __BIG_ENDIAN
309     _FP_W_TYPE pad  : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
310     unsigned sign   : 1;
311     unsigned exp    : _FP_EXPBITS_E;
312     _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
313 # else
314     _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
315     unsigned exp    : _FP_EXPBITS_E;
316     unsigned sign   : 1;
317 # endif
318   } bits;
319 };
320 
321 # define FP_DECL_E(X)		_FP_DECL (2, X)
322 
323 # define FP_UNPACK_RAW_E(X, val)		\
324   do						\
325     {						\
326       union _FP_UNION_E _flo;			\
327       _flo.flt = (val);				\
328 						\
329       X##_f0 = _flo.bits.frac;			\
330       X##_f1 = 0;				\
331       X##_e = _flo.bits.exp;			\
332       X##_s = _flo.bits.sign;			\
333     }						\
334   while (0)
335 
336 # define FP_UNPACK_RAW_EP(X, val)				\
337   do								\
338     {								\
339       union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val);	\
340 								\
341       X##_f0 = _flo->bits.frac;					\
342       X##_f1 = 0;						\
343       X##_e = _flo->bits.exp;					\
344       X##_s = _flo->bits.sign;					\
345     }								\
346   while (0)
347 
348 # define FP_PACK_RAW_E(val, X)			\
349   do						\
350     {						\
351       union _FP_UNION_E _flo;			\
352 						\
353       if (X##_e)				\
354 	X##_f0 |= _FP_IMPLBIT_E;		\
355       else					\
356 	X##_f0 &= ~(_FP_IMPLBIT_E);		\
357       _flo.bits.frac = X##_f0;			\
358       _flo.bits.exp  = X##_e;			\
359       _flo.bits.sign = X##_s;			\
360 						\
361       (val) = _flo.flt;				\
362     }						\
363   while (0)
364 
365 # define FP_PACK_RAW_EP(fs, val, X)					\
366   do									\
367     {									\
368       if (!FP_INHIBIT_RESULTS)						\
369 	{								\
370 	  union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val);	\
371 									\
372 	  if (X##_e)							\
373 	    X##_f0 |= _FP_IMPLBIT_E;					\
374 	  else								\
375 	    X##_f0 &= ~(_FP_IMPLBIT_E);					\
376 	  _flo->bits.frac = X##_f0;					\
377 	  _flo->bits.exp  = X##_e;					\
378 	  _flo->bits.sign = X##_s;					\
379 	}								\
380     }									\
381   while (0)
382 
383 
384 # define FP_UNPACK_E(X, val)			\
385   do						\
386     {						\
387       FP_UNPACK_RAW_E (X, val);			\
388       _FP_UNPACK_CANONICAL (E, 2, X);		\
389     }						\
390   while (0)
391 
392 # define FP_UNPACK_EP(X, val)			\
393   do						\
394     {						\
395       FP_UNPACK_RAW_EP (X, val);		\
396       _FP_UNPACK_CANONICAL (E, 2, X);		\
397     }						\
398   while (0)
399 
400 # define FP_UNPACK_SEMIRAW_E(X, val)		\
401   do						\
402     {						\
403       FP_UNPACK_RAW_E (X, val);			\
404       _FP_UNPACK_SEMIRAW (E, 2, X);		\
405     }						\
406   while (0)
407 
408 # define FP_UNPACK_SEMIRAW_EP(X, val)		\
409   do						\
410     {						\
411       FP_UNPACK_RAW_EP (X, val);		\
412       _FP_UNPACK_SEMIRAW (E, 2, X);		\
413     }						\
414   while (0)
415 
416 # define FP_PACK_E(val, X)			\
417   do						\
418     {						\
419       _FP_PACK_CANONICAL (E, 2, X);		\
420       FP_PACK_RAW_E (val, X);			\
421     }						\
422   while (0)
423 
424 # define FP_PACK_EP(val, X)			\
425   do						\
426     {						\
427       _FP_PACK_CANONICAL (E, 2, X);		\
428       FP_PACK_RAW_EP (val, X);			\
429     }						\
430   while (0)
431 
432 # define FP_PACK_SEMIRAW_E(val, X)		\
433   do						\
434     {						\
435       _FP_PACK_SEMIRAW (E, 2, X);		\
436       FP_PACK_RAW_E (val, X);			\
437     }						\
438   while (0)
439 
440 # define FP_PACK_SEMIRAW_EP(val, X)		\
441   do						\
442     {						\
443       _FP_PACK_SEMIRAW (E, 2, X);		\
444       FP_PACK_RAW_EP (val, X);			\
445     }						\
446   while (0)
447 
448 # define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN (E, 2, X)
449 # define FP_NEG_E(R, X)		_FP_NEG (E, 2, R, X)
450 # define FP_ADD_E(R, X, Y)	_FP_ADD (E, 2, R, X, Y)
451 # define FP_SUB_E(R, X, Y)	_FP_SUB (E, 2, R, X, Y)
452 # define FP_MUL_E(R, X, Y)	_FP_MUL (E, 2, R, X, Y)
453 # define FP_DIV_E(R, X, Y)	_FP_DIV (E, 2, R, X, Y)
454 # define FP_SQRT_E(R, X)	_FP_SQRT (E, 2, R, X)
455 # define FP_FMA_E(R, X, Y, Z)	_FP_FMA (E, 2, 4, R, X, Y, Z)
456 
457 /*
458  * Square root algorithms:
459  * We have just one right now, maybe Newton approximation
460  * should be added for those machines where division is fast.
461  * We optimize it by doing most of the calculations
462  * in one UWtype registers instead of two, although we don't
463  * have to.
464  */
465 # define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
466   do							\
467     {							\
468       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);	\
469       _FP_FRAC_SRL_2 (X, (_FP_WORKBITS));		\
470       while (q)						\
471 	{						\
472 	  T##_f0 = S##_f0 + q;				\
473 	  if (T##_f0 <= X##_f0)				\
474 	    {						\
475 	      S##_f0 = T##_f0 + q;			\
476 	      X##_f0 -= T##_f0;				\
477 	      R##_f0 += q;				\
478 	    }						\
479 	  _FP_FRAC_SLL_1 (X, 1);			\
480 	  q >>= 1;					\
481 	}						\
482       _FP_FRAC_SLL_2 (R, (_FP_WORKBITS));		\
483       if (X##_f0)					\
484 	{						\
485 	  if (S##_f0 < X##_f0)				\
486 	    R##_f0 |= _FP_WORK_ROUND;			\
487 	  R##_f0 |= _FP_WORK_STICKY;			\
488 	}						\
489     }							\
490   while (0)
491 
492 # define FP_CMP_E(r, X, Y, un)		_FP_CMP (E, 2, r, X, Y, un)
493 # define FP_CMP_EQ_E(r, X, Y)		_FP_CMP_EQ (E, 2, r, X, Y)
494 # define FP_CMP_UNORD_E(r, X, Y)	_FP_CMP_UNORD (E, 2, r, X, Y)
495 
496 # define FP_TO_INT_E(r, X, rsz, rsg)	_FP_TO_INT (E, 2, r, X, rsz, rsg)
497 # define FP_FROM_INT_E(X, r, rs, rt)	_FP_FROM_INT (E, 2, X, r, rs, rt)
498 
499 # define _FP_FRAC_HIGH_E(X)	(X##_f1)
500 # define _FP_FRAC_HIGH_RAW_E(X)	(X##_f0)
501 
502 # define _FP_FRAC_HIGH_DW_E(X)	(X##_f[2])
503 
504 #endif /* not _FP_W_TYPE_SIZE < 64 */
505