1 /* Software floating-point emulation.
2    Definitions for IEEE Extended Precision.
3    Copyright (C) 1999-2016 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Jakub Jelinek (jj@ultra.linux.cz).
6 
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Lesser General Public
9    License as published by the Free Software Foundation; either
10    version 2.1 of the License, or (at your option) any later version.
11 
12    In addition to the permissions in the GNU Lesser General Public
13    License, the Free Software Foundation gives you unlimited
14    permission to link the compiled version of this file into
15    combinations with other programs, and to distribute those
16    combinations without any restriction coming from the use of this
17    file.  (The Lesser General Public License restrictions do apply in
18    other respects; for example, they cover modification of the file,
19    and distribution when not linked into a combine executable.)
20 
21    The GNU C Library is distributed in the hope that it will be useful,
22    but WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    Lesser General Public License for more details.
25 
26    You should have received a copy of the GNU Lesser General Public
27    License along with the GNU C Library; if not, see
28    <http://www.gnu.org/licenses/>.  */
29 
30 #ifndef SOFT_FP_EXTENDED_H
31 #define SOFT_FP_EXTENDED_H	1
32 
33 #if _FP_W_TYPE_SIZE < 32
34 # error "Here's a nickel, kid. Go buy yourself a real computer."
35 #endif
36 
37 #if _FP_W_TYPE_SIZE < 64
38 # define _FP_FRACTBITS_E	(4*_FP_W_TYPE_SIZE)
39 # define _FP_FRACTBITS_DW_E	(8*_FP_W_TYPE_SIZE)
40 #else
41 # define _FP_FRACTBITS_E	(2*_FP_W_TYPE_SIZE)
42 # define _FP_FRACTBITS_DW_E	(4*_FP_W_TYPE_SIZE)
43 #endif
44 
45 #define _FP_FRACBITS_E		64
46 #define _FP_FRACXBITS_E		(_FP_FRACTBITS_E - _FP_FRACBITS_E)
47 #define _FP_WFRACBITS_E		(_FP_WORKBITS + _FP_FRACBITS_E)
48 #define _FP_WFRACXBITS_E	(_FP_FRACTBITS_E - _FP_WFRACBITS_E)
49 #define _FP_EXPBITS_E		15
50 #define _FP_EXPBIAS_E		16383
51 #define _FP_EXPMAX_E		32767
52 
53 #define _FP_QNANBIT_E		\
54 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
55 #define _FP_QNANBIT_SH_E		\
56 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
57 #define _FP_IMPLBIT_E		\
58 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
59 #define _FP_IMPLBIT_SH_E		\
60 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
61 #define _FP_OVERFLOW_E		\
62 	((_FP_W_TYPE) 1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
63 
64 #define _FP_WFRACBITS_DW_E	(2 * _FP_WFRACBITS_E)
65 #define _FP_WFRACXBITS_DW_E	(_FP_FRACTBITS_DW_E - _FP_WFRACBITS_DW_E)
66 #define _FP_HIGHBIT_DW_E	\
67   ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_E - 1) % _FP_W_TYPE_SIZE)
68 
69 typedef float XFtype __attribute__ ((mode (XF)));
70 
71 #if _FP_W_TYPE_SIZE < 64
72 
73 union _FP_UNION_E
74 {
75   XFtype flt;
76   struct _FP_STRUCT_LAYOUT
77   {
78 # if __BYTE_ORDER == __BIG_ENDIAN
79     unsigned long pad1 : _FP_W_TYPE_SIZE;
80     unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
81     unsigned long sign : 1;
82     unsigned long exp : _FP_EXPBITS_E;
83     unsigned long frac1 : _FP_W_TYPE_SIZE;
84     unsigned long frac0 : _FP_W_TYPE_SIZE;
85 # else
86     unsigned long frac0 : _FP_W_TYPE_SIZE;
87     unsigned long frac1 : _FP_W_TYPE_SIZE;
88     unsigned exp : _FP_EXPBITS_E;
89     unsigned sign : 1;
90 # endif /* not bigendian */
91   } bits __attribute__ ((packed));
92 };
93 
94 
95 # define FP_DECL_E(X)		_FP_DECL (4, X)
96 
97 # define FP_UNPACK_RAW_E(X, val)			\
98   do							\
99     {							\
100       union _FP_UNION_E FP_UNPACK_RAW_E_flo;		\
101       FP_UNPACK_RAW_E_flo.flt = (val);			\
102 							\
103       X##_f[2] = 0;					\
104       X##_f[3] = 0;					\
105       X##_f[0] = FP_UNPACK_RAW_E_flo.bits.frac0;	\
106       X##_f[1] = FP_UNPACK_RAW_E_flo.bits.frac1;	\
107       X##_f[1] &= ~_FP_IMPLBIT_E;			\
108       X##_e  = FP_UNPACK_RAW_E_flo.bits.exp;		\
109       X##_s  = FP_UNPACK_RAW_E_flo.bits.sign;		\
110     }							\
111   while (0)
112 
113 # define FP_UNPACK_RAW_EP(X, val)			\
114   do							\
115     {							\
116       union _FP_UNION_E *FP_UNPACK_RAW_EP_flo		\
117 	= (union _FP_UNION_E *) (val);			\
118 							\
119       X##_f[2] = 0;					\
120       X##_f[3] = 0;					\
121       X##_f[0] = FP_UNPACK_RAW_EP_flo->bits.frac0;	\
122       X##_f[1] = FP_UNPACK_RAW_EP_flo->bits.frac1;	\
123       X##_f[1] &= ~_FP_IMPLBIT_E;			\
124       X##_e  = FP_UNPACK_RAW_EP_flo->bits.exp;		\
125       X##_s  = FP_UNPACK_RAW_EP_flo->bits.sign;		\
126     }							\
127   while (0)
128 
129 # define FP_PACK_RAW_E(val, X)			\
130   do						\
131     {						\
132       union _FP_UNION_E FP_PACK_RAW_E_flo;	\
133 						\
134       if (X##_e)				\
135 	X##_f[1] |= _FP_IMPLBIT_E;		\
136       else					\
137 	X##_f[1] &= ~(_FP_IMPLBIT_E);		\
138       FP_PACK_RAW_E_flo.bits.frac0 = X##_f[0];	\
139       FP_PACK_RAW_E_flo.bits.frac1 = X##_f[1];	\
140       FP_PACK_RAW_E_flo.bits.exp   = X##_e;	\
141       FP_PACK_RAW_E_flo.bits.sign  = X##_s;	\
142 						\
143       (val) = FP_PACK_RAW_E_flo.flt;		\
144     }						\
145   while (0)
146 
147 # define FP_PACK_RAW_EP(val, X)				\
148   do							\
149     {							\
150       if (!FP_INHIBIT_RESULTS)				\
151 	{						\
152 	  union _FP_UNION_E *FP_PACK_RAW_EP_flo		\
153 	    = (union _FP_UNION_E *) (val);		\
154 							\
155 	  if (X##_e)					\
156 	    X##_f[1] |= _FP_IMPLBIT_E;			\
157 	  else						\
158 	    X##_f[1] &= ~(_FP_IMPLBIT_E);		\
159 	  FP_PACK_RAW_EP_flo->bits.frac0 = X##_f[0];	\
160 	  FP_PACK_RAW_EP_flo->bits.frac1 = X##_f[1];	\
161 	  FP_PACK_RAW_EP_flo->bits.exp   = X##_e;	\
162 	  FP_PACK_RAW_EP_flo->bits.sign  = X##_s;	\
163 	}						\
164     }							\
165   while (0)
166 
167 # define FP_UNPACK_E(X, val)			\
168   do						\
169     {						\
170       FP_UNPACK_RAW_E (X, (val));		\
171       _FP_UNPACK_CANONICAL (E, 4, X);		\
172     }						\
173   while (0)
174 
175 # define FP_UNPACK_EP(X, val)			\
176   do						\
177     {						\
178       FP_UNPACK_RAW_EP (X, (val));		\
179       _FP_UNPACK_CANONICAL (E, 4, X);		\
180     }						\
181   while (0)
182 
183 # define FP_UNPACK_SEMIRAW_E(X, val)		\
184   do						\
185     {						\
186       FP_UNPACK_RAW_E (X, (val));		\
187       _FP_UNPACK_SEMIRAW (E, 4, X);		\
188     }						\
189   while (0)
190 
191 # define FP_UNPACK_SEMIRAW_EP(X, val)		\
192   do						\
193     {						\
194       FP_UNPACK_RAW_EP (X, (val));		\
195       _FP_UNPACK_SEMIRAW (E, 4, X);		\
196     }						\
197   while (0)
198 
199 # define FP_PACK_E(val, X)			\
200   do						\
201     {						\
202       _FP_PACK_CANONICAL (E, 4, X);		\
203       FP_PACK_RAW_E ((val), X);			\
204     }						\
205   while (0)
206 
207 # define FP_PACK_EP(val, X)			\
208   do						\
209     {						\
210       _FP_PACK_CANONICAL (E, 4, X);		\
211       FP_PACK_RAW_EP ((val), X);		\
212     }						\
213   while (0)
214 
215 # define FP_PACK_SEMIRAW_E(val, X)		\
216   do						\
217     {						\
218       _FP_PACK_SEMIRAW (E, 4, X);		\
219       FP_PACK_RAW_E ((val), X);			\
220     }						\
221   while (0)
222 
223 # define FP_PACK_SEMIRAW_EP(val, X)		\
224   do						\
225     {						\
226       _FP_PACK_SEMIRAW (E, 4, X);		\
227       FP_PACK_RAW_EP ((val), X);		\
228     }						\
229   while (0)
230 
231 # define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN (E, 4, X)
232 # define FP_NEG_E(R, X)		_FP_NEG (E, 4, R, X)
233 # define FP_ADD_E(R, X, Y)	_FP_ADD (E, 4, R, X, Y)
234 # define FP_SUB_E(R, X, Y)	_FP_SUB (E, 4, R, X, Y)
235 # define FP_MUL_E(R, X, Y)	_FP_MUL (E, 4, R, X, Y)
236 # define FP_DIV_E(R, X, Y)	_FP_DIV (E, 4, R, X, Y)
237 # define FP_SQRT_E(R, X)	_FP_SQRT (E, 4, R, X)
238 # define FP_FMA_E(R, X, Y, Z)	_FP_FMA (E, 4, 8, R, X, Y, Z)
239 
240 /* Square root algorithms:
241    We have just one right now, maybe Newton approximation
242    should be added for those machines where division is fast.
243    This has special _E version because standard _4 square
244    root would not work (it has to start normally with the
245    second word and not the first), but as we have to do it
246    anyway, we optimize it by doing most of the calculations
247    in two UWtype registers instead of four.  */
248 
249 # define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
250   do							\
251     {							\
252       (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);	\
253       _FP_FRAC_SRL_4 (X, (_FP_WORKBITS));		\
254       while (q)						\
255 	{						\
256 	  T##_f[1] = S##_f[1] + (q);			\
257 	  if (T##_f[1] <= X##_f[1])			\
258 	    {						\
259 	      S##_f[1] = T##_f[1] + (q);		\
260 	      X##_f[1] -= T##_f[1];			\
261 	      R##_f[1] += (q);				\
262 	    }						\
263 	  _FP_FRAC_SLL_2 (X, 1);			\
264 	  (q) >>= 1;					\
265 	}						\
266       (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);	\
267       while (q)						\
268 	{						\
269 	  T##_f[0] = S##_f[0] + (q);			\
270 	  T##_f[1] = S##_f[1];				\
271 	  if (T##_f[1] < X##_f[1]			\
272 	      || (T##_f[1] == X##_f[1]			\
273 		  && T##_f[0] <= X##_f[0]))		\
274 	    {						\
275 	      S##_f[0] = T##_f[0] + (q);		\
276 	      S##_f[1] += (T##_f[0] > S##_f[0]);	\
277 	      _FP_FRAC_DEC_2 (X, T);			\
278 	      R##_f[0] += (q);				\
279 	    }						\
280 	  _FP_FRAC_SLL_2 (X, 1);			\
281 	  (q) >>= 1;					\
282 	}						\
283       _FP_FRAC_SLL_4 (R, (_FP_WORKBITS));		\
284       if (X##_f[0] | X##_f[1])				\
285 	{						\
286 	  if (S##_f[1] < X##_f[1]			\
287 	      || (S##_f[1] == X##_f[1]			\
288 		  && S##_f[0] < X##_f[0]))		\
289 	    R##_f[0] |= _FP_WORK_ROUND;			\
290 	  R##_f[0] |= _FP_WORK_STICKY;			\
291 	}						\
292     }							\
293   while (0)
294 
295 # define FP_CMP_E(r, X, Y, un, ex)	_FP_CMP (E, 4, (r), X, Y, (un), (ex))
296 # define FP_CMP_EQ_E(r, X, Y, ex)	_FP_CMP_EQ (E, 4, (r), X, Y, (ex))
297 # define FP_CMP_UNORD_E(r, X, Y, ex)	_FP_CMP_UNORD (E, 4, (r), X, Y, (ex))
298 
299 # define FP_TO_INT_E(r, X, rsz, rsg)	_FP_TO_INT (E, 4, (r), X, (rsz), (rsg))
300 # define FP_TO_INT_ROUND_E(r, X, rsz, rsg)	\
301   _FP_TO_INT_ROUND (E, 4, (r), X, (rsz), (rsg))
302 # define FP_FROM_INT_E(X, r, rs, rt)	_FP_FROM_INT (E, 4, X, (r), (rs), rt)
303 
304 # define _FP_FRAC_HIGH_E(X)	(X##_f[2])
305 # define _FP_FRAC_HIGH_RAW_E(X)	(X##_f[1])
306 
307 # define _FP_FRAC_HIGH_DW_E(X)	(X##_f[4])
308 
309 #else   /* not _FP_W_TYPE_SIZE < 64 */
310 union _FP_UNION_E
311 {
312   XFtype flt;
313   struct _FP_STRUCT_LAYOUT
314   {
315 # if __BYTE_ORDER == __BIG_ENDIAN
316     _FP_W_TYPE pad  : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
317     unsigned sign   : 1;
318     unsigned exp    : _FP_EXPBITS_E;
319     _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
320 # else
321     _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
322     unsigned exp    : _FP_EXPBITS_E;
323     unsigned sign   : 1;
324 # endif
325   } bits;
326 };
327 
328 # define FP_DECL_E(X)		_FP_DECL (2, X)
329 
330 # define FP_UNPACK_RAW_E(X, val)		\
331   do						\
332     {						\
333       union _FP_UNION_E FP_UNPACK_RAW_E_flo;	\
334       FP_UNPACK_RAW_E_flo.flt = (val);		\
335 						\
336       X##_f0 = FP_UNPACK_RAW_E_flo.bits.frac;	\
337       X##_f0 &= ~_FP_IMPLBIT_E;			\
338       X##_f1 = 0;				\
339       X##_e = FP_UNPACK_RAW_E_flo.bits.exp;	\
340       X##_s = FP_UNPACK_RAW_E_flo.bits.sign;	\
341     }						\
342   while (0)
343 
344 # define FP_UNPACK_RAW_EP(X, val)		\
345   do						\
346     {						\
347       union _FP_UNION_E *FP_UNPACK_RAW_EP_flo	\
348 	= (union _FP_UNION_E *) (val);		\
349 						\
350       X##_f0 = FP_UNPACK_RAW_EP_flo->bits.frac;	\
351       X##_f0 &= ~_FP_IMPLBIT_E;			\
352       X##_f1 = 0;				\
353       X##_e = FP_UNPACK_RAW_EP_flo->bits.exp;	\
354       X##_s = FP_UNPACK_RAW_EP_flo->bits.sign;	\
355     }						\
356   while (0)
357 
358 # define FP_PACK_RAW_E(val, X)			\
359   do						\
360     {						\
361       union _FP_UNION_E FP_PACK_RAW_E_flo;	\
362 						\
363       if (X##_e)				\
364 	X##_f0 |= _FP_IMPLBIT_E;		\
365       else					\
366 	X##_f0 &= ~(_FP_IMPLBIT_E);		\
367       FP_PACK_RAW_E_flo.bits.frac = X##_f0;	\
368       FP_PACK_RAW_E_flo.bits.exp  = X##_e;	\
369       FP_PACK_RAW_E_flo.bits.sign = X##_s;	\
370 						\
371       (val) = FP_PACK_RAW_E_flo.flt;		\
372     }						\
373   while (0)
374 
375 # define FP_PACK_RAW_EP(fs, val, X)			\
376   do							\
377     {							\
378       if (!FP_INHIBIT_RESULTS)				\
379 	{						\
380 	  union _FP_UNION_E *FP_PACK_RAW_EP_flo		\
381 	    = (union _FP_UNION_E *) (val);		\
382 							\
383 	  if (X##_e)					\
384 	    X##_f0 |= _FP_IMPLBIT_E;			\
385 	  else						\
386 	    X##_f0 &= ~(_FP_IMPLBIT_E);			\
387 	  FP_PACK_RAW_EP_flo->bits.frac = X##_f0;	\
388 	  FP_PACK_RAW_EP_flo->bits.exp  = X##_e;	\
389 	  FP_PACK_RAW_EP_flo->bits.sign = X##_s;	\
390 	}						\
391     }							\
392   while (0)
393 
394 
395 # define FP_UNPACK_E(X, val)			\
396   do						\
397     {						\
398       FP_UNPACK_RAW_E (X, (val));		\
399       _FP_UNPACK_CANONICAL (E, 2, X);		\
400     }						\
401   while (0)
402 
403 # define FP_UNPACK_EP(X, val)			\
404   do						\
405     {						\
406       FP_UNPACK_RAW_EP (X, (val));		\
407       _FP_UNPACK_CANONICAL (E, 2, X);		\
408     }						\
409   while (0)
410 
411 # define FP_UNPACK_SEMIRAW_E(X, val)		\
412   do						\
413     {						\
414       FP_UNPACK_RAW_E (X, (val));		\
415       _FP_UNPACK_SEMIRAW (E, 2, X);		\
416     }						\
417   while (0)
418 
419 # define FP_UNPACK_SEMIRAW_EP(X, val)		\
420   do						\
421     {						\
422       FP_UNPACK_RAW_EP (X, (val));		\
423       _FP_UNPACK_SEMIRAW (E, 2, X);		\
424     }						\
425   while (0)
426 
427 # define FP_PACK_E(val, X)			\
428   do						\
429     {						\
430       _FP_PACK_CANONICAL (E, 2, X);		\
431       FP_PACK_RAW_E ((val), X);			\
432     }						\
433   while (0)
434 
435 # define FP_PACK_EP(val, X)			\
436   do						\
437     {						\
438       _FP_PACK_CANONICAL (E, 2, X);		\
439       FP_PACK_RAW_EP ((val), X);		\
440     }						\
441   while (0)
442 
443 # define FP_PACK_SEMIRAW_E(val, X)		\
444   do						\
445     {						\
446       _FP_PACK_SEMIRAW (E, 2, X);		\
447       FP_PACK_RAW_E ((val), X);			\
448     }						\
449   while (0)
450 
451 # define FP_PACK_SEMIRAW_EP(val, X)		\
452   do						\
453     {						\
454       _FP_PACK_SEMIRAW (E, 2, X);		\
455       FP_PACK_RAW_EP ((val), X);		\
456     }						\
457   while (0)
458 
459 # define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN (E, 2, X)
460 # define FP_NEG_E(R, X)		_FP_NEG (E, 2, R, X)
461 # define FP_ADD_E(R, X, Y)	_FP_ADD (E, 2, R, X, Y)
462 # define FP_SUB_E(R, X, Y)	_FP_SUB (E, 2, R, X, Y)
463 # define FP_MUL_E(R, X, Y)	_FP_MUL (E, 2, R, X, Y)
464 # define FP_DIV_E(R, X, Y)	_FP_DIV (E, 2, R, X, Y)
465 # define FP_SQRT_E(R, X)	_FP_SQRT (E, 2, R, X)
466 # define FP_FMA_E(R, X, Y, Z)	_FP_FMA (E, 2, 4, R, X, Y, Z)
467 
468 /* Square root algorithms:
469    We have just one right now, maybe Newton approximation
470    should be added for those machines where division is fast.
471    We optimize it by doing most of the calculations
472    in one UWtype registers instead of two, although we don't
473    have to.  */
474 # define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
475   do							\
476     {							\
477       (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);	\
478       _FP_FRAC_SRL_2 (X, (_FP_WORKBITS));		\
479       while (q)						\
480 	{						\
481 	  T##_f0 = S##_f0 + (q);			\
482 	  if (T##_f0 <= X##_f0)				\
483 	    {						\
484 	      S##_f0 = T##_f0 + (q);			\
485 	      X##_f0 -= T##_f0;				\
486 	      R##_f0 += (q);				\
487 	    }						\
488 	  _FP_FRAC_SLL_1 (X, 1);			\
489 	  (q) >>= 1;					\
490 	}						\
491       _FP_FRAC_SLL_2 (R, (_FP_WORKBITS));		\
492       if (X##_f0)					\
493 	{						\
494 	  if (S##_f0 < X##_f0)				\
495 	    R##_f0 |= _FP_WORK_ROUND;			\
496 	  R##_f0 |= _FP_WORK_STICKY;			\
497 	}						\
498     }							\
499   while (0)
500 
501 # define FP_CMP_E(r, X, Y, un, ex)	_FP_CMP (E, 2, (r), X, Y, (un), (ex))
502 # define FP_CMP_EQ_E(r, X, Y, ex)	_FP_CMP_EQ (E, 2, (r), X, Y, (ex))
503 # define FP_CMP_UNORD_E(r, X, Y, ex)	_FP_CMP_UNORD (E, 2, (r), X, Y, (ex))
504 
505 # define FP_TO_INT_E(r, X, rsz, rsg)	_FP_TO_INT (E, 2, (r), X, (rsz), (rsg))
506 # define FP_TO_INT_ROUND_E(r, X, rsz, rsg)	\
507   _FP_TO_INT_ROUND (E, 2, (r), X, (rsz), (rsg))
508 # define FP_FROM_INT_E(X, r, rs, rt)	_FP_FROM_INT (E, 2, X, (r), (rs), rt)
509 
510 # define _FP_FRAC_HIGH_E(X)	(X##_f1)
511 # define _FP_FRAC_HIGH_RAW_E(X)	(X##_f0)
512 
513 # define _FP_FRAC_HIGH_DW_E(X)	(X##_f[2])
514 
515 #endif /* not _FP_W_TYPE_SIZE < 64 */
516 
517 #endif /* !SOFT_FP_EXTENDED_H */
518