xref: /netbsd/sys/lib/libkern/softfloat-macros.h (revision bde95653)
1 /* $NetBSD: softfloat-macros.h,v 1.3 2020/09/01 15:45:20 thorpej Exp $ */
2 
3 /*============================================================================
4 
5 This C source fragment is part of the Berkeley SoftFloat IEEE Floating-Point
6 Arithmetic Package, Release 2c, by John R. Hauser.
7 
8 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
9 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
10 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
11 AND ORGANIZATIONS WHO CAN AND WILL TOLERATE ALL LOSSES, COSTS, OR OTHER
12 PROBLEMS THEY INCUR DUE TO THE SOFTWARE WITHOUT RECOMPENSE FROM JOHN HAUSER OR
13 THE INTERNATIONAL COMPUTER SCIENCE INSTITUTE, AND WHO FURTHERMORE EFFECTIVELY
14 INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE INSTITUTE
15 (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR OTHER
16 PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE, OR
17 INCURRED BY ANYONE DUE TO A DERIVATIVE WORK THEY CREATE USING ANY PART OF THE
18 SOFTWARE.
19 
20 Derivative works require also that (1) the source code for the derivative work
21 includes prominent notice that the work is derivative, and (2) the source code
22 includes prominent notice of these three paragraphs for those parts of this
23 code that are retained.
24 
25 =============================================================================*/
26 
27 /*----------------------------------------------------------------------------
28 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
29 | bits are shifted off, they are "jammed" into the least significant bit of
30 | the result by setting the least significant bit to 1.  The value of `count'
31 | can be arbitrarily large; in particular, if `count' is greater than 32, the
32 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
33 | The result is stored in the location pointed to by `zPtr'.
34 *----------------------------------------------------------------------------*/
35 
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)36 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
37 {
38     bits32 z;
39 
40     if ( count == 0 ) {
41         z = a;
42     }
43     else if ( count < 32 ) {
44         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
45     }
46     else {
47         z = ( a != 0 );
48     }
49     *zPtr = z;
50 
51 }
52 
53 /*----------------------------------------------------------------------------
54 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
55 | bits are shifted off, they are "jammed" into the least significant bit of
56 | the result by setting the least significant bit to 1.  The value of `count'
57 | can be arbitrarily large; in particular, if `count' is greater than 64, the
58 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
59 | The result is stored in the location pointed to by `zPtr'.
60 *----------------------------------------------------------------------------*/
61 
shift64RightJamming(bits64 a,int16 count,bits64 * zPtr)62 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
63 {
64     bits64 z;
65 
66     if ( count == 0 ) {
67         z = a;
68     }
69     else if ( count < 64 ) {
70         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
71     }
72     else {
73         z = ( a != 0 );
74     }
75     *zPtr = z;
76 
77 }
78 
79 /*----------------------------------------------------------------------------
80 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
81 | _plus_ the number of bits given in `count'.  The shifted result is at most
82 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
83 | bits shifted off form a second 64-bit result as follows:  The _last_ bit
84 | shifted off is the most-significant bit of the extra result, and the other
85 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
86 | bits shifted off were all zero.  This extra result is stored in the location
87 | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
88 |     (This routine makes more sense if `a0' and `a1' are considered to form
89 | a fixed-point value with binary point between `a0' and `a1'.  This fixed-
90 | point value is shifted right by the number of bits given in `count', and
91 | the integer part of the result is returned at the location pointed to by
92 | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
93 | described above, and is returned at the location pointed to by `z1Ptr'.)
94 *----------------------------------------------------------------------------*/
95 
96 INLINE void
shift64ExtraRightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)97  shift64ExtraRightJamming(
98      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
99 {
100     bits64 z0, z1;
101     int8 negCount = ( - count ) & 63;
102 
103     if ( count == 0 ) {
104         z1 = a1;
105         z0 = a0;
106     }
107     else if ( count < 64 ) {
108         z1 = ( a0<<negCount ) | ( a1 != 0 );
109         z0 = a0>>count;
110     }
111     else {
112         if ( count == 64 ) {
113             z1 = a0 | ( a1 != 0 );
114         }
115         else {
116             z1 = ( ( a0 | a1 ) != 0 );
117         }
118         z0 = 0;
119     }
120     *z1Ptr = z1;
121     *z0Ptr = z0;
122 
123 }
124 
125 /*----------------------------------------------------------------------------
126 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
127 | number of bits given in `count'.  Any bits shifted off are lost.  The value
128 | of `count' can be arbitrarily large; in particular, if `count' is greater
129 | than 128, the result will be 0.  The result is broken into two 64-bit pieces
130 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
131 *----------------------------------------------------------------------------*/
132 
133 INLINE void
shift128Right(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)134  shift128Right(
135      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
136 {
137     bits64 z0, z1;
138     int8 negCount = ( - count ) & 63;
139 
140     if ( count == 0 ) {
141         z1 = a1;
142         z0 = a0;
143     }
144     else if ( count < 64 ) {
145         z1 = ( a0<<negCount ) | ( a1>>count );
146         z0 = a0>>count;
147     }
148     else {
149         z1 = ( count < 128 ) ? ( a0>>( count & 63 ) ) : 0;
150         z0 = 0;
151     }
152     *z1Ptr = z1;
153     *z0Ptr = z0;
154 
155 }
156 
157 /*----------------------------------------------------------------------------
158 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
159 | number of bits given in `count'.  If any nonzero bits are shifted off, they
160 | are "jammed" into the least significant bit of the result by setting the
161 | least significant bit to 1.  The value of `count' can be arbitrarily large;
162 | in particular, if `count' is greater than 128, the result will be either
163 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
164 | nonzero.  The result is broken into two 64-bit pieces which are stored at
165 | the locations pointed to by `z0Ptr' and `z1Ptr'.
166 *----------------------------------------------------------------------------*/
167 
168 INLINE void
shift128RightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)169  shift128RightJamming(
170      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
171 {
172     bits64 z0, z1;
173     int8 negCount = ( - count ) & 63;
174 
175     if ( count == 0 ) {
176         z1 = a1;
177         z0 = a0;
178     }
179     else if ( count < 64 ) {
180         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
181         z0 = a0>>count;
182     }
183     else {
184         if ( count == 64 ) {
185             z1 = a0 | ( a1 != 0 );
186         }
187         else if ( count < 128 ) {
188             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
189         }
190         else {
191             z1 = ( ( a0 | a1 ) != 0 );
192         }
193         z0 = 0;
194     }
195     *z1Ptr = z1;
196     *z0Ptr = z0;
197 
198 }
199 
200 /*----------------------------------------------------------------------------
201 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
202 | by 64 _plus_ the number of bits given in `count'.  The shifted result is
203 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
204 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
205 | off form a third 64-bit result as follows:  The _last_ bit shifted off is
206 | the most-significant bit of the extra result, and the other 63 bits of the
207 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
208 | were all zero.  This extra result is stored in the location pointed to by
209 | `z2Ptr'.  The value of `count' can be arbitrarily large.
210 |     (This routine makes more sense if `a0', `a1', and `a2' are considered
211 | to form a fixed-point value with binary point between `a1' and `a2'.  This
212 | fixed-point value is shifted right by the number of bits given in `count',
213 | and the integer part of the result is returned at the locations pointed to
214 | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
215 | corrupted as described above, and is returned at the location pointed to by
216 | `z2Ptr'.)
217 *----------------------------------------------------------------------------*/
218 
219 INLINE void
shift128ExtraRightJamming(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)220  shift128ExtraRightJamming(
221      bits64 a0,
222      bits64 a1,
223      bits64 a2,
224      int16 count,
225      bits64 *z0Ptr,
226      bits64 *z1Ptr,
227      bits64 *z2Ptr
228  )
229 {
230     bits64 z0, z1, z2;
231     int8 negCount = ( - count ) & 63;
232 
233     if ( count == 0 ) {
234         z2 = a2;
235         z1 = a1;
236         z0 = a0;
237     }
238     else {
239         if ( count < 64 ) {
240             z2 = a1<<negCount;
241             z1 = ( a0<<negCount ) | ( a1>>count );
242             z0 = a0>>count;
243         }
244         else {
245             if ( count == 64 ) {
246                 z2 = a1;
247                 z1 = a0;
248             }
249             else {
250                 a2 |= a1;
251                 if ( count < 128 ) {
252                     z2 = a0<<negCount;
253                     z1 = a0>>( count & 63 );
254                 }
255                 else {
256                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
257                     z1 = 0;
258                 }
259             }
260             z0 = 0;
261         }
262         z2 |= ( a2 != 0 );
263     }
264     *z2Ptr = z2;
265     *z1Ptr = z1;
266     *z0Ptr = z0;
267 
268 }
269 
270 /*----------------------------------------------------------------------------
271 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
272 | number of bits given in `count'.  Any bits shifted off are lost.  The value
273 | of `count' must be less than 64.  The result is broken into two 64-bit
274 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
275 *----------------------------------------------------------------------------*/
276 
277 INLINE void
shortShift128Left(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)278  shortShift128Left(
279      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
280 {
281 
282     *z1Ptr = a1<<count;
283     *z0Ptr =
284         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
285 
286 }
287 
288 /*----------------------------------------------------------------------------
289 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
290 | by the number of bits given in `count'.  Any bits shifted off are lost.
291 | The value of `count' must be less than 64.  The result is broken into three
292 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
293 | `z1Ptr', and `z2Ptr'.
294 *----------------------------------------------------------------------------*/
295 
296 INLINE void
shortShift192Left(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)297  shortShift192Left(
298      bits64 a0,
299      bits64 a1,
300      bits64 a2,
301      int16 count,
302      bits64 *z0Ptr,
303      bits64 *z1Ptr,
304      bits64 *z2Ptr
305  )
306 {
307     bits64 z0, z1, z2;
308     int8 negCount;
309 
310     z2 = a2<<count;
311     z1 = a1<<count;
312     z0 = a0<<count;
313     if ( 0 < count ) {
314         negCount = ( ( - count ) & 63 );
315         z1 |= a2>>negCount;
316         z0 |= a1>>negCount;
317     }
318     *z2Ptr = z2;
319     *z1Ptr = z1;
320     *z0Ptr = z0;
321 
322 }
323 
324 /*----------------------------------------------------------------------------
325 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
326 | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
327 | any carry out is lost.  The result is broken into two 64-bit pieces which
328 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
329 *----------------------------------------------------------------------------*/
330 
331 INLINE void
add128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)332  add128(
333      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
334 {
335     bits64 z1;
336 
337     z1 = a1 + b1;
338     *z1Ptr = z1;
339     *z0Ptr = a0 + b0 + ( z1 < a1 );
340 
341 }
342 
343 /*----------------------------------------------------------------------------
344 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
345 | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
346 | modulo 2^192, so any carry out is lost.  The result is broken into three
347 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
348 | `z1Ptr', and `z2Ptr'.
349 *----------------------------------------------------------------------------*/
350 
351 INLINE void
add192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)352  add192(
353      bits64 a0,
354      bits64 a1,
355      bits64 a2,
356      bits64 b0,
357      bits64 b1,
358      bits64 b2,
359      bits64 *z0Ptr,
360      bits64 *z1Ptr,
361      bits64 *z2Ptr
362  )
363 {
364     bits64 z0, z1, z2;
365     int8 carry0, carry1;
366 
367     z2 = a2 + b2;
368     carry1 = ( z2 < a2 );
369     z1 = a1 + b1;
370     carry0 = ( z1 < a1 );
371     z0 = a0 + b0;
372     z1 += carry1;
373     z0 += ( z1 < carry1 );
374     z0 += carry0;
375     *z2Ptr = z2;
376     *z1Ptr = z1;
377     *z0Ptr = z0;
378 
379 }
380 
381 /*----------------------------------------------------------------------------
382 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
383 | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
384 | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
385 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
386 | `z1Ptr'.
387 *----------------------------------------------------------------------------*/
388 
389 INLINE void
sub128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)390  sub128(
391      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
392 {
393 
394     *z1Ptr = a1 - b1;
395     *z0Ptr = a0 - b0 - ( a1 < b1 );
396 
397 }
398 
399 /*----------------------------------------------------------------------------
400 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
401 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
402 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
403 | result is broken into three 64-bit pieces which are stored at the locations
404 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
405 *----------------------------------------------------------------------------*/
406 
407 INLINE void
sub192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)408  sub192(
409      bits64 a0,
410      bits64 a1,
411      bits64 a2,
412      bits64 b0,
413      bits64 b1,
414      bits64 b2,
415      bits64 *z0Ptr,
416      bits64 *z1Ptr,
417      bits64 *z2Ptr
418  )
419 {
420     bits64 z0, z1, z2;
421     int8 borrow0, borrow1;
422 
423     z2 = a2 - b2;
424     borrow1 = ( a2 < b2 );
425     z1 = a1 - b1;
426     borrow0 = ( a1 < b1 );
427     z0 = a0 - b0;
428     z0 -= ( z1 < borrow1 );
429     z1 -= borrow1;
430     z0 -= borrow0;
431     *z2Ptr = z2;
432     *z1Ptr = z1;
433     *z0Ptr = z0;
434 
435 }
436 
437 /*----------------------------------------------------------------------------
438 | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
439 | into two 64-bit pieces which are stored at the locations pointed to by
440 | `z0Ptr' and `z1Ptr'.
441 *----------------------------------------------------------------------------*/
442 
mul64To128(bits64 a,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr)443 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
444 {
445     bits32 aHigh, aLow, bHigh, bLow;
446     bits64 z0, zMiddleA, zMiddleB, z1;
447 
448     aLow = a;
449     aHigh = a>>32;
450     bLow = b;
451     bHigh = b>>32;
452     z1 = ( (bits64) aLow ) * bLow;
453     zMiddleA = ( (bits64) aLow ) * bHigh;
454     zMiddleB = ( (bits64) aHigh ) * bLow;
455     z0 = ( (bits64) aHigh ) * bHigh;
456     zMiddleA += zMiddleB;
457     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
458     zMiddleA <<= 32;
459     z1 += zMiddleA;
460     z0 += ( z1 < zMiddleA );
461     *z1Ptr = z1;
462     *z0Ptr = z0;
463 
464 }
465 
466 /*----------------------------------------------------------------------------
467 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
468 | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
469 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
470 | `z2Ptr'.
471 *----------------------------------------------------------------------------*/
472 
473 INLINE void
mul128By64To192(bits64 a0,bits64 a1,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)474  mul128By64To192(
475      bits64 a0,
476      bits64 a1,
477      bits64 b,
478      bits64 *z0Ptr,
479      bits64 *z1Ptr,
480      bits64 *z2Ptr
481  )
482 {
483     bits64 z0, z1, z2, more1;
484 
485     mul64To128( a1, b, &z1, &z2 );
486     mul64To128( a0, b, &z0, &more1 );
487     add128( z0, more1, 0, z1, &z0, &z1 );
488     *z2Ptr = z2;
489     *z1Ptr = z1;
490     *z0Ptr = z0;
491 
492 }
493 
494 /*----------------------------------------------------------------------------
495 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
496 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
497 | product.  The product is broken into four 64-bit pieces which are stored at
498 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
499 *----------------------------------------------------------------------------*/
500 
501 INLINE void
mul128To256(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr,bits64 * z3Ptr)502  mul128To256(
503      bits64 a0,
504      bits64 a1,
505      bits64 b0,
506      bits64 b1,
507      bits64 *z0Ptr,
508      bits64 *z1Ptr,
509      bits64 *z2Ptr,
510      bits64 *z3Ptr
511  )
512 {
513     bits64 z0, z1, z2, z3;
514     bits64 more1, more2;
515 
516     mul64To128( a1, b1, &z2, &z3 );
517     mul64To128( a1, b0, &z1, &more2 );
518     add128( z1, more2, 0, z2, &z1, &z2 );
519     mul64To128( a0, b0, &z0, &more1 );
520     add128( z0, more1, 0, z1, &z0, &z1 );
521     mul64To128( a0, b1, &more1, &more2 );
522     add128( more1, more2, 0, z2, &more1, &z2 );
523     add128( z0, z1, 0, more1, &z0, &z1 );
524     *z3Ptr = z3;
525     *z2Ptr = z2;
526     *z1Ptr = z1;
527     *z0Ptr = z0;
528 
529 }
530 
531 /*----------------------------------------------------------------------------
532 | Returns an approximation to the 64-bit integer quotient obtained by dividing
533 | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
534 | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
535 | toward zero, the approximation returned lies between q and q + 2 inclusive.
536 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
537 | unsigned integer is returned.
538 *----------------------------------------------------------------------------*/
539 
estimateDiv128To64(bits64 a0,bits64 a1,bits64 b)540 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
541 {
542     bits64 b0, b1;
543     bits64 rem0, rem1, term0, term1;
544     bits64 z;
545 
546     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
547     b0 = b>>32;
548     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
549     mul64To128( b, z, &term0, &term1 );
550     sub128( a0, a1, term0, term1, &rem0, &rem1 );
551     while ( ( (sbits64) rem0 ) < 0 ) {
552         z -= LIT64( 0x100000000 );
553         b1 = b<<32;
554         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
555     }
556     rem0 = ( rem0<<32 ) | ( rem1>>32 );
557     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
558     return z;
559 
560 }
561 
562 #ifndef SOFTFLOAT_FOR_GCC /* Not used */
563 /*----------------------------------------------------------------------------
564 | Returns an approximation to the square root of the 32-bit significand given
565 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
566 | `aExp' (the least significant bit) is 1, the integer returned approximates
567 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
568 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
569 | case, the approximation returned lies strictly within +/-2 of the exact
570 | value.
571 *----------------------------------------------------------------------------*/
572 
estimateSqrt32(int16 aExp,bits32 a)573 static bits32 estimateSqrt32( int16 aExp, bits32 a )
574 {
575     static const bits16 sqrtOddAdjustments[] = {
576         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
577         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
578     };
579     static const bits16 sqrtEvenAdjustments[] = {
580         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
581         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
582     };
583     int8 index;
584     bits32 z;
585 
586     index = ( a>>27 ) & 15;
587     if ( aExp & 1 ) {
588         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
589         z = ( ( a / z )<<14 ) + ( z<<15 );
590         a >>= 1;
591     }
592     else {
593         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
594         z = a / z + z;
595         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
596         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
597     }
598     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
599 
600 }
601 #endif
602 
603 /*----------------------------------------------------------------------------
604 | Returns the number of leading 0 bits before the most-significant 1 bit of
605 | `a'.  If `a' is zero, 32 is returned.
606 *----------------------------------------------------------------------------*/
607 
countLeadingZeros32(bits32 a)608 static int8 countLeadingZeros32( bits32 a )
609 {
610     static const int8 countLeadingZerosHigh[] = {
611         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
612         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
613         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
614         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
615         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
616         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
617         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
618         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
619         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
621         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
622         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
623         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
624         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
625         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
627     };
628     int8 shiftCount;
629 
630     shiftCount = 0;
631     if ( a < 0x10000 ) {
632         shiftCount += 16;
633         a <<= 16;
634     }
635     if ( a < 0x1000000 ) {
636         shiftCount += 8;
637         a <<= 8;
638     }
639     shiftCount += countLeadingZerosHigh[ a>>24 ];
640     return shiftCount;
641 
642 }
643 
644 /*----------------------------------------------------------------------------
645 | Returns the number of leading 0 bits before the most-significant 1 bit of
646 | `a'.  If `a' is zero, 64 is returned.
647 *----------------------------------------------------------------------------*/
648 
countLeadingZeros64(bits64 a)649 static int8 countLeadingZeros64( bits64 a )
650 {
651     int8 shiftCount;
652 
653     shiftCount = 0;
654     if ( a < ( (bits64) 1 )<<32 ) {
655         shiftCount += 32;
656     }
657     else {
658         a >>= 32;
659     }
660     shiftCount += countLeadingZeros32( a );
661     return shiftCount;
662 
663 }
664 
665 /*----------------------------------------------------------------------------
666 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
667 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
668 | Otherwise, returns 0.
669 *----------------------------------------------------------------------------*/
670 
eq128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)671 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
672 {
673 
674     return ( a0 == b0 ) && ( a1 == b1 );
675 
676 }
677 
678 /*----------------------------------------------------------------------------
679 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
680 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
681 | Otherwise, returns 0.
682 *----------------------------------------------------------------------------*/
683 
le128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)684 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
685 {
686 
687     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
688 
689 }
690 
691 /*----------------------------------------------------------------------------
692 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
693 | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
694 | returns 0.
695 *----------------------------------------------------------------------------*/
696 
lt128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)697 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
698 {
699 
700     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
701 
702 }
703 
704 /*----------------------------------------------------------------------------
705 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
706 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
707 | Otherwise, returns 0.
708 *----------------------------------------------------------------------------*/
709 
ne128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)710 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
711 {
712 
713     return ( a0 != b0 ) || ( a1 != b1 );
714 
715 }
716 
717