1 /* Software floating-point emulation.
2    Basic one-word fraction declaration and manipulation.
3    Copyright (C) 1997-2014 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Richard Henderson (rth@cygnus.com),
6 		  Jakub Jelinek (jj@ultra.linux.cz),
7 		  David S. Miller (davem@redhat.com) and
8 		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
9 
10    The GNU C Library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2.1 of the License, or (at your option) any later version.
14 
15    In addition to the permissions in the GNU Lesser General Public
16    License, the Free Software Foundation gives you unlimited
17    permission to link the compiled version of this file into
18    combinations with other programs, and to distribute those
19    combinations without any restriction coming from the use of this
20    file.  (The Lesser General Public License restrictions do apply in
21    other respects; for example, they cover modification of the file,
22    and distribution when not linked into a combine executable.)
23 
24    The GNU C Library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28 
29    You should have received a copy of the GNU Lesser General Public
30    License along with the GNU C Library; if not, see
31    <http://www.gnu.org/licenses/>.  */
32 
33 #define _FP_FRAC_DECL_1(X)	_FP_W_TYPE X##_f
34 #define _FP_FRAC_COPY_1(D, S)	(D##_f = S##_f)
35 #define _FP_FRAC_SET_1(X, I)	(X##_f = I)
36 #define _FP_FRAC_HIGH_1(X)	(X##_f)
37 #define _FP_FRAC_LOW_1(X)	(X##_f)
38 #define _FP_FRAC_WORD_1(X, w)	(X##_f)
39 
40 #define _FP_FRAC_ADDI_1(X, I)	(X##_f += I)
41 #define _FP_FRAC_SLL_1(X, N)			\
42   do						\
43     {						\
44       if (__builtin_constant_p (N) && (N) == 1)	\
45 	X##_f += X##_f;				\
46       else					\
47 	X##_f <<= (N);				\
48     }						\
49   while (0)
50 #define _FP_FRAC_SRL_1(X, N)	(X##_f >>= N)
51 
52 /* Right shift with sticky-lsb.  */
53 #define _FP_FRAC_SRST_1(X, S, N, sz)	__FP_FRAC_SRST_1 (X##_f, S, N, sz)
54 #define _FP_FRAC_SRS_1(X, N, sz)	__FP_FRAC_SRS_1 (X##_f, N, sz)
55 
56 #define __FP_FRAC_SRST_1(X, S, N, sz)			\
57   do							\
58     {							\
59       S = (__builtin_constant_p (N) && (N) == 1		\
60 	   ? X & 1					\
61 	   : (X << (_FP_W_TYPE_SIZE - (N))) != 0);	\
62       X = X >> (N);					\
63     }							\
64   while (0)
65 
66 #define __FP_FRAC_SRS_1(X, N, sz)				\
67   (X = (X >> (N) | (__builtin_constant_p (N) && (N) == 1	\
68 		    ? X & 1					\
69 		    : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
70 
71 #define _FP_FRAC_ADD_1(R, X, Y)	(R##_f = X##_f + Y##_f)
72 #define _FP_FRAC_SUB_1(R, X, Y)	(R##_f = X##_f - Y##_f)
73 #define _FP_FRAC_DEC_1(X, Y)	(X##_f -= Y##_f)
74 #define _FP_FRAC_CLZ_1(z, X)	__FP_CLZ (z, X##_f)
75 
76 /* Predicates */
77 #define _FP_FRAC_NEGP_1(X)	((_FP_WS_TYPE) X##_f < 0)
78 #define _FP_FRAC_ZEROP_1(X)	(X##_f == 0)
79 #define _FP_FRAC_OVERP_1(fs, X)	(X##_f & _FP_OVERFLOW_##fs)
80 #define _FP_FRAC_CLEAR_OVERP_1(fs, X)	(X##_f &= ~_FP_OVERFLOW_##fs)
81 #define _FP_FRAC_HIGHBIT_DW_1(fs, X)	(X##_f & _FP_HIGHBIT_DW_##fs)
82 #define _FP_FRAC_EQ_1(X, Y)	(X##_f == Y##_f)
83 #define _FP_FRAC_GE_1(X, Y)	(X##_f >= Y##_f)
84 #define _FP_FRAC_GT_1(X, Y)	(X##_f > Y##_f)
85 
86 #define _FP_ZEROFRAC_1		0
87 #define _FP_MINFRAC_1		1
88 #define _FP_MAXFRAC_1		(~(_FP_WS_TYPE) 0)
89 
90 /*
91  * Unpack the raw bits of a native fp value.  Do not classify or
92  * normalize the data.
93  */
94 
95 #define _FP_UNPACK_RAW_1(fs, X, val)		\
96   do						\
97     {						\
98       union _FP_UNION_##fs _flo;		\
99       _flo.flt = (val);				\
100 						\
101       X##_f = _flo.bits.frac;			\
102       X##_e = _flo.bits.exp;			\
103       X##_s = _flo.bits.sign;			\
104     }						\
105   while (0)
106 
107 #define _FP_UNPACK_RAW_1_P(fs, X, val)					\
108   do									\
109     {									\
110       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);	\
111 									\
112       X##_f = _flo->bits.frac;						\
113       X##_e = _flo->bits.exp;						\
114       X##_s = _flo->bits.sign;						\
115     }									\
116   while (0)
117 
118 /*
119  * Repack the raw bits of a native fp value.
120  */
121 
122 #define _FP_PACK_RAW_1(fs, val, X)		\
123   do						\
124     {						\
125       union _FP_UNION_##fs _flo;		\
126 						\
127       _flo.bits.frac = X##_f;			\
128       _flo.bits.exp  = X##_e;			\
129       _flo.bits.sign = X##_s;			\
130 						\
131       (val) = _flo.flt;				\
132     }						\
133   while (0)
134 
135 #define _FP_PACK_RAW_1_P(fs, val, X)					\
136   do									\
137     {									\
138       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);	\
139 									\
140       _flo->bits.frac = X##_f;						\
141       _flo->bits.exp  = X##_e;						\
142       _flo->bits.sign = X##_s;						\
143     }									\
144   while (0)
145 
146 
147 /*
148  * Multiplication algorithms:
149  */
150 
151 /* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
152    multiplication immediately.  */
153 
154 #define _FP_MUL_MEAT_DW_1_imm(wfracbits, R, X, Y)	\
155   do							\
156     {							\
157       R##_f = X##_f * Y##_f;				\
158     }							\
159   while (0)
160 
161 #define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y)				\
162   do									\
163     {									\
164       _FP_MUL_MEAT_DW_1_imm (wfracbits, R, X, Y);			\
165       /* Normalize since we know where the msb of the multiplicands	\
166 	 were (bit B), we know that the msb of the of the product is	\
167 	 at either 2B or 2B-1.  */					\
168       _FP_FRAC_SRS_1 (R, wfracbits-1, 2*wfracbits);			\
169     }									\
170   while (0)
171 
172 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
173 
174 #define _FP_MUL_MEAT_DW_1_wide(wfracbits, R, X, Y, doit)	\
175   do								\
176     {								\
177       doit (R##_f1, R##_f0, X##_f, Y##_f);			\
178     }								\
179   while (0)
180 
181 #define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit)			\
182   do									\
183     {									\
184       _FP_FRAC_DECL_2 (_Z);						\
185       _FP_MUL_MEAT_DW_1_wide (wfracbits, _Z, X, Y, doit);		\
186       /* Normalize since we know where the msb of the multiplicands	\
187 	 were (bit B), we know that the msb of the of the product is	\
188 	 at either 2B or 2B-1.  */					\
189       _FP_FRAC_SRS_2 (_Z, wfracbits-1, 2*wfracbits);			\
190       R##_f = _Z_f0;							\
191     }									\
192   while (0)
193 
194 /* Finally, a simple widening multiply algorithm.  What fun!  */
195 
196 #define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y)			\
197   do									\
198     {									\
199       _FP_W_TYPE _xh, _xl, _yh, _yl;					\
200       _FP_FRAC_DECL_2 (_a);						\
201 									\
202       /* split the words in half */					\
203       _xh = X##_f >> (_FP_W_TYPE_SIZE/2);				\
204       _xl = X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1);	\
205       _yh = Y##_f >> (_FP_W_TYPE_SIZE/2);				\
206       _yl = Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1);	\
207 									\
208       /* multiply the pieces */						\
209       R##_f0 = _xl * _yl;						\
210       _a_f0 = _xh * _yl;						\
211       _a_f1 = _xl * _yh;						\
212       R##_f1 = _xh * _yh;						\
213 									\
214       /* reassemble into two full words */				\
215       if ((_a_f0 += _a_f1) < _a_f1)					\
216 	R##_f1 += (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2);		\
217       _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2);				\
218       _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2);				\
219       _FP_FRAC_ADD_2 (R, R, _a);					\
220     }									\
221   while (0)
222 
223 #define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y)		\
224   do							\
225     {							\
226       _FP_FRAC_DECL_2 (_z);				\
227       _FP_MUL_MEAT_DW_1_hard (wfracbits, _z, X, Y);	\
228 							\
229       /* normalize */					\
230       _FP_FRAC_SRS_2 (_z, wfracbits - 1, 2*wfracbits);	\
231       R##_f = _z_f0;					\
232     }							\
233   while (0)
234 
235 
236 /*
237  * Division algorithms:
238  */
239 
240 /* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
241    division immediately.  Give this macro either _FP_DIV_HELP_imm for
242    C primitives or _FP_DIV_HELP_ldiv for the ISO function.  Which you
243    choose will depend on what the compiler does with divrem4.  */
244 
245 #define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit)	\
246   do						\
247     {						\
248       _FP_W_TYPE _q, _r;			\
249       X##_f <<= (X##_f < Y##_f			\
250 		 ? R##_e--, _FP_WFRACBITS_##fs	\
251 		 : _FP_WFRACBITS_##fs - 1);	\
252       doit (_q, _r, X##_f, Y##_f);		\
253       R##_f = _q | (_r != 0);			\
254     }						\
255   while (0)
256 
257 /* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
258    that may be useful in this situation.  This first is for a primitive
259    that requires normalization, the second for one that does not.  Look
260    for UDIV_NEEDS_NORMALIZATION to tell which your machine needs.  */
261 
262 #define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y)				\
263   do									\
264     {									\
265       _FP_W_TYPE _nh, _nl, _q, _r, _y;					\
266 									\
267       /* Normalize Y -- i.e. make the most significant bit set.  */	\
268       _y = Y##_f << _FP_WFRACXBITS_##fs;				\
269 									\
270       /* Shift X op correspondingly high, that is, up one full word.  */ \
271       if (X##_f < Y##_f)						\
272 	{								\
273 	  R##_e--;							\
274 	  _nl = 0;							\
275 	  _nh = X##_f;							\
276 	}								\
277       else								\
278 	{								\
279 	  _nl = X##_f << (_FP_W_TYPE_SIZE - 1);				\
280 	  _nh = X##_f >> 1;						\
281 	}								\
282 									\
283       udiv_qrnnd (_q, _r, _nh, _nl, _y);				\
284       R##_f = _q | (_r != 0);						\
285     }									\
286   while (0)
287 
288 #define _FP_DIV_MEAT_1_udiv(fs, R, X, Y)		\
289   do							\
290     {							\
291       _FP_W_TYPE _nh, _nl, _q, _r;			\
292       if (X##_f < Y##_f)				\
293 	{						\
294 	  R##_e--;					\
295 	  _nl = X##_f << _FP_WFRACBITS_##fs;		\
296 	  _nh = X##_f >> _FP_WFRACXBITS_##fs;		\
297 	}						\
298       else						\
299 	{						\
300 	  _nl = X##_f << (_FP_WFRACBITS_##fs - 1);	\
301 	  _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1);	\
302 	}						\
303       udiv_qrnnd (_q, _r, _nh, _nl, Y##_f);		\
304       R##_f = _q | (_r != 0);				\
305     }							\
306   while (0)
307 
308 
309 /*
310  * Square root algorithms:
311  * We have just one right now, maybe Newton approximation
312  * should be added for those machines where division is fast.
313  */
314 
315 #define _FP_SQRT_MEAT_1(R, S, T, X, q)		\
316   do						\
317     {						\
318       while (q != _FP_WORK_ROUND)		\
319 	{					\
320 	  T##_f = S##_f + q;			\
321 	  if (T##_f <= X##_f)			\
322 	    {					\
323 	      S##_f = T##_f + q;		\
324 	      X##_f -= T##_f;			\
325 	      R##_f += q;			\
326 	    }					\
327 	  _FP_FRAC_SLL_1 (X, 1);		\
328 	  q >>= 1;				\
329 	}					\
330       if (X##_f)				\
331 	{					\
332 	  if (S##_f < X##_f)			\
333 	    R##_f |= _FP_WORK_ROUND;		\
334 	  R##_f |= _FP_WORK_STICKY;		\
335 	}					\
336     }						\
337   while (0)
338 
339 /*
340  * Assembly/disassembly for converting to/from integral types.
341  * No shifting or overflow handled here.
342  */
343 
344 #define _FP_FRAC_ASSEMBLE_1(r, X, rsize)	(r = X##_f)
345 #define _FP_FRAC_DISASSEMBLE_1(X, r, rsize)	(X##_f = r)
346 
347 
348 /*
349  * Convert FP values between word sizes
350  */
351 
352 #define _FP_FRAC_COPY_1_1(D, S)		(D##_f = S##_f)
353