1 /* Software floating-point emulation.
2    Basic two-word fraction declaration and manipulation.
3    Copyright (C) 1997-2014 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Richard Henderson (rth@cygnus.com),
6 		  Jakub Jelinek (jj@ultra.linux.cz),
7 		  David S. Miller (davem@redhat.com) and
8 		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
9 
10    The GNU C Library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2.1 of the License, or (at your option) any later version.
14 
15    In addition to the permissions in the GNU Lesser General Public
16    License, the Free Software Foundation gives you unlimited
17    permission to link the compiled version of this file into
18    combinations with other programs, and to distribute those
19    combinations without any restriction coming from the use of this
20    file.  (The Lesser General Public License restrictions do apply in
21    other respects; for example, they cover modification of the file,
22    and distribution when not linked into a combine executable.)
23 
24    The GNU C Library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28 
29    You should have received a copy of the GNU Lesser General Public
30    License along with the GNU C Library; if not, see
31    <http://www.gnu.org/licenses/>.  */
32 
33 #define _FP_FRAC_DECL_2(X)	_FP_W_TYPE X##_f0, X##_f1
34 #define _FP_FRAC_COPY_2(D, S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
35 #define _FP_FRAC_SET_2(X, I)	__FP_FRAC_SET_2 (X, I)
36 #define _FP_FRAC_HIGH_2(X)	(X##_f1)
37 #define _FP_FRAC_LOW_2(X)	(X##_f0)
38 #define _FP_FRAC_WORD_2(X, w)	(X##_f##w)
39 
40 #define _FP_FRAC_SLL_2(X, N)						\
41   (void) (((N) < _FP_W_TYPE_SIZE)					\
42 	  ? ({								\
43 	      if (__builtin_constant_p (N) && (N) == 1)			\
44 		{							\
45 		  X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
46 		  X##_f0 += X##_f0;					\
47 		}							\
48 	      else							\
49 		{							\
50 		  X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51 		  X##_f0 <<= (N);					\
52 		}							\
53 	      0;							\
54 	    })								\
55 	  : ({								\
56 	      X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);		\
57 	      X##_f0 = 0;						\
58 	    }))
59 
60 
61 #define _FP_FRAC_SRL_2(X, N)						\
62   (void) (((N) < _FP_W_TYPE_SIZE)					\
63 	  ? ({								\
64 	      X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65 	      X##_f1 >>= (N);						\
66 	    })								\
67 	  : ({								\
68 	      X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);		\
69 	      X##_f1 = 0;						\
70 	    }))
71 
72 /* Right shift with sticky-lsb.  */
73 #define _FP_FRAC_SRST_2(X, S, N, sz)					\
74   (void) (((N) < _FP_W_TYPE_SIZE)					\
75 	  ? ({								\
76 	      S = (__builtin_constant_p (N) && (N) == 1			\
77 		   ? X##_f0 & 1						\
78 		   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);		\
79 	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80 	      X##_f1 >>= (N);						\
81 	    })								\
82 	  : ({								\
83 	      S = ((((N) == _FP_W_TYPE_SIZE				\
84 		     ? 0						\
85 		     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))		\
86 		    | X##_f0) != 0);					\
87 	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));		\
88 	      X##_f1 = 0;						\
89 	    }))
90 
91 #define _FP_FRAC_SRS_2(X, N, sz)					\
92   (void) (((N) < _FP_W_TYPE_SIZE)					\
93 	  ? ({								\
94 	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
95 			| (__builtin_constant_p (N) && (N) == 1		\
96 			   ? X##_f0 & 1					\
97 			   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98 	      X##_f1 >>= (N);						\
99 	    })								\
100 	  : ({								\
101 	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)		\
102 			| ((((N) == _FP_W_TYPE_SIZE			\
103 			     ? 0					\
104 			     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))	\
105 			    | X##_f0) != 0));				\
106 	      X##_f1 = 0;						\
107 	    }))
108 
109 #define _FP_FRAC_ADDI_2(X, I)	\
110   __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
111 
112 #define _FP_FRAC_ADD_2(R, X, Y)	\
113   __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
114 
115 #define _FP_FRAC_SUB_2(R, X, Y)	\
116   __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
117 
118 #define _FP_FRAC_DEC_2(X, Y)	\
119   __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
120 
121 #define _FP_FRAC_CLZ_2(R, X)			\
122   do						\
123     {						\
124       if (X##_f1)				\
125 	__FP_CLZ (R, X##_f1);			\
126       else					\
127 	{					\
128 	  __FP_CLZ (R, X##_f0);			\
129 	  R += _FP_W_TYPE_SIZE;			\
130 	}					\
131     }						\
132   while (0)
133 
134 /* Predicates */
135 #define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE) X##_f1 < 0)
136 #define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
137 #define _FP_FRAC_OVERP_2(fs, X)	(_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X)	(_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X)	\
140   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
141 #define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
142 #define _FP_FRAC_GT_2(X, Y)	\
143   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
144 #define _FP_FRAC_GE_2(X, Y)	\
145   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
146 
147 #define _FP_ZEROFRAC_2		0, 0
148 #define _FP_MINFRAC_2		0, 1
149 #define _FP_MAXFRAC_2		(~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
150 
151 /*
152  * Internals
153  */
154 
155 #define __FP_FRAC_SET_2(X, I1, I0)	(X##_f0 = I0, X##_f1 = I1)
156 
157 #define __FP_CLZ_2(R, xh, xl)			\
158   do						\
159     {						\
160       if (xh)					\
161 	__FP_CLZ (R, xh);			\
162       else					\
163 	{					\
164 	  __FP_CLZ (R, xl);			\
165 	  R += _FP_W_TYPE_SIZE;			\
166 	}					\
167     }						\
168   while (0)
169 
170 #if 0
171 
172 # ifndef __FP_FRAC_ADDI_2
173 #  define __FP_FRAC_ADDI_2(xh, xl, i)	\
174   (xh += ((xl += i) < i))
175 # endif
176 # ifndef __FP_FRAC_ADD_2
177 #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)	\
178   (rh = xh + yh + ((rl = xl + yl) < xl))
179 # endif
180 # ifndef __FP_FRAC_SUB_2
181 #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)	\
182   (rh = xh - yh - ((rl = xl - yl) > xl))
183 # endif
184 # ifndef __FP_FRAC_DEC_2
185 #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
186   do						\
187     {						\
188       UWtype _t = xl;				\
189       xh -= yh + ((xl -= yl) > _t);		\
190     }						\
191   while (0)
192 # endif
193 
194 #else
195 
196 # undef __FP_FRAC_ADDI_2
197 # define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa (xh, xl, xh, xl, 0, i)
198 # undef __FP_FRAC_ADD_2
199 # define __FP_FRAC_ADD_2		add_ssaaaa
200 # undef __FP_FRAC_SUB_2
201 # define __FP_FRAC_SUB_2		sub_ddmmss
202 # undef __FP_FRAC_DEC_2
203 # define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
204   sub_ddmmss (xh, xl, xh, xl, yh, yl)
205 
206 #endif
207 
208 /*
209  * Unpack the raw bits of a native fp value.  Do not classify or
210  * normalize the data.
211  */
212 
213 #define _FP_UNPACK_RAW_2(fs, X, val)		\
214   do						\
215     {						\
216       union _FP_UNION_##fs _flo;		\
217       _flo.flt = (val);				\
218 						\
219       X##_f0 = _flo.bits.frac0;			\
220       X##_f1 = _flo.bits.frac1;			\
221       X##_e  = _flo.bits.exp;			\
222       X##_s  = _flo.bits.sign;			\
223     }						\
224   while (0)
225 
226 #define _FP_UNPACK_RAW_2_P(fs, X, val)					\
227   do									\
228     {									\
229       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);	\
230 									\
231       X##_f0 = _flo->bits.frac0;					\
232       X##_f1 = _flo->bits.frac1;					\
233       X##_e  = _flo->bits.exp;						\
234       X##_s  = _flo->bits.sign;						\
235     }									\
236   while (0)
237 
238 
239 /*
240  * Repack the raw bits of a native fp value.
241  */
242 
243 #define _FP_PACK_RAW_2(fs, val, X)		\
244   do						\
245     {						\
246       union _FP_UNION_##fs _flo;		\
247 						\
248       _flo.bits.frac0 = X##_f0;			\
249       _flo.bits.frac1 = X##_f1;			\
250       _flo.bits.exp   = X##_e;			\
251       _flo.bits.sign  = X##_s;			\
252 						\
253       (val) = _flo.flt;				\
254     }						\
255   while (0)
256 
257 #define _FP_PACK_RAW_2_P(fs, val, X)					\
258   do									\
259     {									\
260       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);	\
261 									\
262       _flo->bits.frac0 = X##_f0;					\
263       _flo->bits.frac1 = X##_f1;					\
264       _flo->bits.exp   = X##_e;						\
265       _flo->bits.sign  = X##_s;						\
266     }									\
267   while (0)
268 
269 
270 /*
271  * Multiplication algorithms:
272  */
273 
274 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
275 
276 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)		\
277   do									\
278     {									\
279       _FP_FRAC_DECL_2 (_b);						\
280       _FP_FRAC_DECL_2 (_c);						\
281 									\
282       doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \
283       doit (_b_f1, _b_f0, X##_f0, Y##_f1);				\
284       doit (_c_f1, _c_f0, X##_f1, Y##_f0);				\
285       doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), X##_f1, Y##_f1); \
286 									\
287       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
288 		       _FP_FRAC_WORD_4 (R, 1), 0, _b_f1, _b_f0,		\
289 		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
290 		       _FP_FRAC_WORD_4 (R, 1));				\
291       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
292 		       _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0,		\
293 		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
294 		       _FP_FRAC_WORD_4 (R, 1));				\
295     }									\
296   while (0)
297 
298 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
299   do									\
300     {									\
301       _FP_FRAC_DECL_4 (_z);						\
302 									\
303       _FP_MUL_MEAT_DW_2_wide (wfracbits, _z, X, Y, doit);		\
304 									\
305       /* Normalize since we know where the msb of the multiplicands	\
306 	 were (bit B), we know that the msb of the of the product is	\
307 	 at either 2B or 2B-1.  */					\
308       _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits);			\
309       R##_f0 = _FP_FRAC_WORD_4 (_z, 0);					\
310       R##_f1 = _FP_FRAC_WORD_4 (_z, 1);					\
311     }									\
312   while (0)
313 
314 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
315    Do only 3 multiplications instead of four. This one is for machines
316    where multiplication is much more expensive than subtraction.  */
317 
318 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)		\
319   do									\
320     {									\
321       _FP_FRAC_DECL_2 (_b);						\
322       _FP_FRAC_DECL_2 (_c);						\
323       _FP_W_TYPE _d;							\
324       int _c1, _c2;							\
325 									\
326       _b_f0 = X##_f0 + X##_f1;						\
327       _c1 = _b_f0 < X##_f0;						\
328       _b_f1 = Y##_f0 + Y##_f1;						\
329       _c2 = _b_f1 < Y##_f0;						\
330       doit (_d, _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0);		\
331       doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), _b_f0, _b_f1); \
332       doit (_c_f1, _c_f0, X##_f1, Y##_f1);				\
333 									\
334       _b_f0 &= -_c2;							\
335       _b_f1 &= -_c1;							\
336       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
337 		       _FP_FRAC_WORD_4 (R, 1), (_c1 & _c2), 0, _d,	\
338 		       0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
339       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
340 			_b_f0);						\
341       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
342 			_b_f1);						\
343       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
344 		       _FP_FRAC_WORD_4 (R, 1),				\
345 		       0, _d, _FP_FRAC_WORD_4 (R, 0));			\
346       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
347 		       _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0);	\
348       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
349 		       _c_f1, _c_f0,					\
350 		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2));	\
351     }									\
352   while (0)
353 
354 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
355   do									\
356     {									\
357       _FP_FRAC_DECL_4 (_z);						\
358 									\
359       _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, _z, X, Y, doit);		\
360 									\
361       /* Normalize since we know where the msb of the multiplicands	\
362 	 were (bit B), we know that the msb of the of the product is	\
363 	 at either 2B or 2B-1.  */					\
364       _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits);			\
365       R##_f0 = _FP_FRAC_WORD_4 (_z, 0);					\
366       R##_f1 = _FP_FRAC_WORD_4 (_z, 1);					\
367     }									\
368   while (0)
369 
370 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)	\
371   do							\
372     {							\
373       _FP_W_TYPE _x[2], _y[2];				\
374       _x[0] = X##_f0;					\
375       _x[1] = X##_f1;					\
376       _y[0] = Y##_f0;					\
377       _y[1] = Y##_f1;					\
378 							\
379       mpn_mul_n (R##_f, _x, _y, 2);			\
380     }							\
381   while (0)
382 
383 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
384   do									\
385     {									\
386       _FP_FRAC_DECL_4 (_z);						\
387 									\
388       _FP_MUL_MEAT_DW_2_gmp (wfracbits, _z, X, Y);			\
389 									\
390       /* Normalize since we know where the msb of the multiplicands	\
391 	 were (bit B), we know that the msb of the of the product is	\
392 	 at either 2B or 2B-1.  */					\
393       _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits);			\
394       R##_f0 = _z_f[0];							\
395       R##_f1 = _z_f[1];							\
396     }									\
397   while (0)
398 
399 /* Do at most 120x120=240 bits multiplication using double floating
400    point multiplication.  This is useful if floating point
401    multiplication has much bigger throughput than integer multiply.
402    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
403    between 106 and 120 only.
404    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
405    SETFETZ is a macro which will disable all FPU exceptions and set rounding
406    towards zero,  RESETFE should optionally reset it back.  */
407 
408 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
409   do									\
410     {									\
411       static const double _const[] =					\
412 	{								\
413 	  /* 2^-24 */ 5.9604644775390625e-08,				\
414 	  /* 2^-48 */ 3.5527136788005009e-15,				\
415 	  /* 2^-72 */ 2.1175823681357508e-22,				\
416 	  /* 2^-96 */ 1.2621774483536189e-29,				\
417 	  /* 2^28 */ 2.68435456e+08,					\
418 	  /* 2^4 */ 1.600000e+01,					\
419 	  /* 2^-20 */ 9.5367431640625e-07,				\
420 	  /* 2^-44 */ 5.6843418860808015e-14,				\
421 	  /* 2^-68 */ 3.3881317890172014e-21,				\
422 	  /* 2^-92 */ 2.0194839173657902e-28,				\
423 	  /* 2^-116 */ 1.2037062152420224e-35				\
424 	};								\
425       double _a240, _b240, _c240, _d240, _e240, _f240,			\
426 	_g240, _h240, _i240, _j240, _k240;				\
427       union { double d; UDItype i; } _l240, _m240, _n240, _o240,	\
428 				       _p240, _q240, _r240, _s240;	\
429       UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;		\
430 									\
431       if (wfracbits < 106 || wfracbits > 120)				\
432 	abort ();							\
433 									\
434       setfetz;								\
435 									\
436       _e240 = (double) (long) (X##_f0 & 0xffffff);			\
437       _j240 = (double) (long) (Y##_f0 & 0xffffff);			\
438       _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);		\
439       _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);		\
440       _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
441       _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
442       _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);		\
443       _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);		\
444       _a240 = (double) (long) (X##_f1 >> 32);				\
445       _f240 = (double) (long) (Y##_f1 >> 32);				\
446       _e240 *= _const[3];						\
447       _j240 *= _const[3];						\
448       _d240 *= _const[2];						\
449       _i240 *= _const[2];						\
450       _c240 *= _const[1];						\
451       _h240 *= _const[1];						\
452       _b240 *= _const[0];						\
453       _g240 *= _const[0];						\
454       _s240.d =							      _e240*_j240; \
455       _r240.d =						_d240*_j240 + _e240*_i240; \
456       _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240; \
457       _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
458       _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
459       _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;	\
460       _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;		\
461       _l240.d = _a240*_g240 + _b240*_f240;				\
462       _k240 =   _a240*_f240;						\
463       _r240.d += _s240.d;						\
464       _q240.d += _r240.d;						\
465       _p240.d += _q240.d;						\
466       _o240.d += _p240.d;						\
467       _n240.d += _o240.d;						\
468       _m240.d += _n240.d;						\
469       _l240.d += _m240.d;						\
470       _k240 += _l240.d;							\
471       _s240.d -= ((_const[10]+_s240.d)-_const[10]);			\
472       _r240.d -= ((_const[9]+_r240.d)-_const[9]);			\
473       _q240.d -= ((_const[8]+_q240.d)-_const[8]);			\
474       _p240.d -= ((_const[7]+_p240.d)-_const[7]);			\
475       _o240.d += _const[7];						\
476       _n240.d += _const[6];						\
477       _m240.d += _const[5];						\
478       _l240.d += _const[4];						\
479       if (_s240.d != 0.0)						\
480 	_y240 = 1;							\
481       if (_r240.d != 0.0)						\
482 	_y240 = 1;							\
483       if (_q240.d != 0.0)						\
484 	_y240 = 1;							\
485       if (_p240.d != 0.0)						\
486 	_y240 = 1;							\
487       _t240 = (DItype) _k240;						\
488       _u240 = _l240.i;							\
489       _v240 = _m240.i;							\
490       _w240 = _n240.i;							\
491       _x240 = _o240.i;							\
492       R##_f1 = ((_t240 << (128 - (wfracbits - 1)))			\
493 		| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));	\
494       R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))		\
495 		| ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))	\
496 		| ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))	\
497 		| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))	\
498 		| _y240);						\
499       resetfe;								\
500     }									\
501   while (0)
502 
503 /*
504  * Division algorithms:
505  */
506 
507 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
508   do									\
509     {									\
510       _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;	\
511       if (_FP_FRAC_GE_2 (X, Y))						\
512 	{								\
513 	  _n_f2 = X##_f1 >> 1;						\
514 	  _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;	\
515 	  _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
516 	}								\
517       else								\
518 	{								\
519 	  R##_e--;							\
520 	  _n_f2 = X##_f1;						\
521 	  _n_f1 = X##_f0;						\
522 	  _n_f0 = 0;							\
523 	}								\
524 									\
525       /* Normalize, i.e. make the most significant bit of the		\
526 	 denominator set. */						\
527       _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);				\
528 									\
529       udiv_qrnnd (R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);			\
530       umul_ppmm (_m_f1, _m_f0, R##_f1, Y##_f0);				\
531       _r_f0 = _n_f0;							\
532       if (_FP_FRAC_GT_2 (_m, _r))					\
533 	{								\
534 	  R##_f1--;							\
535 	  _FP_FRAC_ADD_2 (_r, Y, _r);					\
536 	  if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r))		\
537 	    {								\
538 	      R##_f1--;							\
539 	      _FP_FRAC_ADD_2 (_r, Y, _r);				\
540 	    }								\
541 	}								\
542       _FP_FRAC_DEC_2 (_r, _m);						\
543 									\
544       if (_r_f1 == Y##_f1)						\
545 	{								\
546 	  /* This is a special case, not an optimization		\
547 	     (_r/Y##_f1 would not fit into UWtype).			\
548 	     As _r is guaranteed to be < Y,  R##_f0 can be either	\
549 	     (UWtype)-1 or (UWtype)-2.  But as we know what kind	\
550 	     of bits it is (sticky, guard, round),  we don't care.	\
551 	     We also don't care what the reminder is,  because the	\
552 	     guard bit will be set anyway.  -jj */			\
553 	  R##_f0 = -1;							\
554 	}								\
555       else								\
556 	{								\
557 	  udiv_qrnnd (R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);		\
558 	  umul_ppmm (_m_f1, _m_f0, R##_f0, Y##_f0);			\
559 	  _r_f0 = 0;							\
560 	  if (_FP_FRAC_GT_2 (_m, _r))					\
561 	    {								\
562 	      R##_f0--;							\
563 	      _FP_FRAC_ADD_2 (_r, Y, _r);				\
564 	      if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r))	\
565 		{							\
566 		  R##_f0--;						\
567 		  _FP_FRAC_ADD_2 (_r, Y, _r);				\
568 		}							\
569 	    }								\
570 	  if (!_FP_FRAC_EQ_2 (_r, _m))					\
571 	    R##_f0 |= _FP_WORK_STICKY;					\
572 	}								\
573     }									\
574   while (0)
575 
576 
577 /*
578  * Square root algorithms:
579  * We have just one right now, maybe Newton approximation
580  * should be added for those machines where division is fast.
581  */
582 
583 #define _FP_SQRT_MEAT_2(R, S, T, X, q)				\
584   do								\
585     {								\
586       while (q)							\
587 	{							\
588 	  T##_f1 = S##_f1 + q;					\
589 	  if (T##_f1 <= X##_f1)					\
590 	    {							\
591 	      S##_f1 = T##_f1 + q;				\
592 	      X##_f1 -= T##_f1;					\
593 	      R##_f1 += q;					\
594 	    }							\
595 	  _FP_FRAC_SLL_2 (X, 1);				\
596 	  q >>= 1;						\
597 	}							\
598       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);		\
599       while (q != _FP_WORK_ROUND)				\
600 	{							\
601 	  T##_f0 = S##_f0 + q;					\
602 	  T##_f1 = S##_f1;					\
603 	  if (T##_f1 < X##_f1					\
604 	      || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
605 	    {							\
606 	      S##_f0 = T##_f0 + q;				\
607 	      S##_f1 += (T##_f0 > S##_f0);			\
608 	      _FP_FRAC_DEC_2 (X, T);				\
609 	      R##_f0 += q;					\
610 	    }							\
611 	  _FP_FRAC_SLL_2 (X, 1);				\
612 	  q >>= 1;						\
613 	}							\
614       if (X##_f0 | X##_f1)					\
615 	{							\
616 	  if (S##_f1 < X##_f1					\
617 	      || (S##_f1 == X##_f1 && S##_f0 < X##_f0))		\
618 	    R##_f0 |= _FP_WORK_ROUND;				\
619 	  R##_f0 |= _FP_WORK_STICKY;				\
620 	}							\
621     }								\
622   while (0)
623 
624 
625 /*
626  * Assembly/disassembly for converting to/from integral types.
627  * No shifting or overflow handled here.
628  */
629 
630 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
631   (void) ((rsize <= _FP_W_TYPE_SIZE)		\
632 	  ? ({ r = X##_f0; })			\
633 	  : ({					\
634 	      r = X##_f1;			\
635 	      r <<= _FP_W_TYPE_SIZE;		\
636 	      r += X##_f0;			\
637 	    }))
638 
639 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)				\
640   do									\
641     {									\
642       X##_f0 = r;							\
643       X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
644     }									\
645   while (0)
646 
647 /*
648  * Convert FP values between word sizes
649  */
650 
651 #define _FP_FRAC_COPY_1_2(D, S)		(D##_f = S##_f0)
652 
653 #define _FP_FRAC_COPY_2_1(D, S)		((D##_f0 = S##_f), (D##_f1 = 0))
654 
655 #define _FP_FRAC_COPY_2_2(D, S)		_FP_FRAC_COPY_2 (D, S)
656