xref: /openbsd/sys/lib/libkern/softfloat-macros.h (revision 82d799bf)
1 /*	$OpenBSD: softfloat-macros.h,v 1.3 2007/12/29 17:43:14 miod Exp $	*/
2 /*	$NetBSD: softfloat-macros.h,v 1.1 2001/04/26 03:10:47 ross Exp $	*/
3 
4 /*
5 ===============================================================================
6 
7 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
8 Arithmetic Package, Release 2a.
9 
10 Written by John R. Hauser.  This work was made possible in part by the
11 International Computer Science Institute, located at Suite 600, 1947 Center
12 Street, Berkeley, California 94704.  Funding was partially provided by the
13 National Science Foundation under grant MIP-9311980.  The original version
14 of this code was written as part of a project to build a fixed-point vector
15 processor in collaboration with the University of California at Berkeley,
16 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
17 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
18 arithmetic/SoftFloat.html'.
19 
20 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable
21 effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT
22 WILL AT TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS
23 RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL
24 RESPONSIBILITY FOR ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM
25 THEIR OWN USE OF THE SOFTWARE, AND WHO ALSO EFFECTIVELY INDEMNIFY
26 (possibly via similar legal warning) JOHN HAUSER AND THE INTERNATIONAL
27 COMPUTER SCIENCE INSTITUTE AGAINST ALL LOSSES, COSTS, OR OTHER PROBLEMS
28 ARISING FROM THE USE OF THE SOFTWARE BY THEIR CUSTOMERS AND CLIENTS.
29 
30 Derivative works are acceptable, even for commercial purposes, so long as
31 (1) they include prominent notice that the work is derivative, and (2) they
32 include prominent notice akin to these four paragraphs for those parts of
33 this code that are retained.
34 
35 ===============================================================================
36 */
37 
38 /*
39 -------------------------------------------------------------------------------
40 Shifts `a' right by the number of bits given in `count'.  If any nonzero
41 bits are shifted off, they are ``jammed'' into the least significant bit of
42 the result by setting the least significant bit to 1.  The value of `count'
43 can be arbitrarily large; in particular, if `count' is greater than 32, the
44 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
45 The result is stored in the location pointed to by `zPtr'.
46 -------------------------------------------------------------------------------
47 */
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)48 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
49 {
50     bits32 z;
51 
52     if ( count == 0 ) {
53         z = a;
54     }
55     else if ( count < 32 ) {
56         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
57     }
58     else {
59         z = ( a != 0 );
60     }
61     *zPtr = z;
62 
63 }
64 
65 /*
66 -------------------------------------------------------------------------------
67 Shifts `a' right by the number of bits given in `count'.  If any nonzero
68 bits are shifted off, they are ``jammed'' into the least significant bit of
69 the result by setting the least significant bit to 1.  The value of `count'
70 can be arbitrarily large; in particular, if `count' is greater than 64, the
71 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
72 The result is stored in the location pointed to by `zPtr'.
73 -------------------------------------------------------------------------------
74 */
shift64RightJamming(bits64 a,int16 count,bits64 * zPtr)75 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
76 {
77     bits64 z;
78 
79     if ( count == 0 ) {
80         z = a;
81     }
82     else if ( count < 64 ) {
83         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
84     }
85     else {
86         z = ( a != 0 );
87     }
88     *zPtr = z;
89 
90 }
91 
92 /*
93 -------------------------------------------------------------------------------
94 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
95 _plus_ the number of bits given in `count'.  The shifted result is at most
96 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
97 bits shifted off form a second 64-bit result as follows:  The _last_ bit
98 shifted off is the most-significant bit of the extra result, and the other
99 63 bits of the extra result are all zero if and only if _all_but_the_last_
100 bits shifted off were all zero.  This extra result is stored in the location
101 pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
102     (This routine makes more sense if `a0' and `a1' are considered to form a
103 fixed-point value with binary point between `a0' and `a1'.  This fixed-point
104 value is shifted right by the number of bits given in `count', and the
105 integer part of the result is returned at the location pointed to by
106 `z0Ptr'.  The fractional part of the result may be slightly corrupted as
107 described above, and is returned at the location pointed to by `z1Ptr'.)
108 -------------------------------------------------------------------------------
109 */
110 INLINE void
shift64ExtraRightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)111  shift64ExtraRightJamming(
112      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
113 {
114     bits64 z0, z1;
115     int8 negCount = ( - count ) & 63;
116 
117     if ( count == 0 ) {
118         z1 = a1;
119         z0 = a0;
120     }
121     else if ( count < 64 ) {
122         z1 = ( a0<<negCount ) | ( a1 != 0 );
123         z0 = a0>>count;
124     }
125     else {
126         if ( count == 64 ) {
127             z1 = a0 | ( a1 != 0 );
128         }
129         else {
130             z1 = ( ( a0 | a1 ) != 0 );
131         }
132         z0 = 0;
133     }
134     *z1Ptr = z1;
135     *z0Ptr = z0;
136 
137 }
138 
139 #if defined(FLOATX80) || defined(FLOAT128)
140 
141 /*
142 -------------------------------------------------------------------------------
143 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
144 number of bits given in `count'.  Any bits shifted off are lost.  The value
145 of `count' can be arbitrarily large; in particular, if `count' is greater
146 than 128, the result will be 0.  The result is broken into two 64-bit pieces
147 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
148 -------------------------------------------------------------------------------
149 */
150 INLINE void
shift128Right(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)151  shift128Right(
152      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
153 {
154     bits64 z0, z1;
155     int8 negCount = ( - count ) & 63;
156 
157     if ( count == 0 ) {
158         z1 = a1;
159         z0 = a0;
160     }
161     else if ( count < 64 ) {
162         z1 = ( a0<<negCount ) | ( a1>>count );
163         z0 = a0>>count;
164     }
165     else {
166         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
167         z0 = 0;
168     }
169     *z1Ptr = z1;
170     *z0Ptr = z0;
171 
172 }
173 
174 /*
175 -------------------------------------------------------------------------------
176 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
177 number of bits given in `count'.  If any nonzero bits are shifted off, they
178 are ``jammed'' into the least significant bit of the result by setting the
179 least significant bit to 1.  The value of `count' can be arbitrarily large;
180 in particular, if `count' is greater than 128, the result will be either
181 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
182 nonzero.  The result is broken into two 64-bit pieces which are stored at
183 the locations pointed to by `z0Ptr' and `z1Ptr'.
184 -------------------------------------------------------------------------------
185 */
186 INLINE void
shift128RightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)187  shift128RightJamming(
188      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
189 {
190     bits64 z0, z1;
191     int8 negCount = ( - count ) & 63;
192 
193     if ( count == 0 ) {
194         z1 = a1;
195         z0 = a0;
196     }
197     else if ( count < 64 ) {
198         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
199         z0 = a0>>count;
200     }
201     else {
202         if ( count == 64 ) {
203             z1 = a0 | ( a1 != 0 );
204         }
205         else if ( count < 128 ) {
206             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
207         }
208         else {
209             z1 = ( ( a0 | a1 ) != 0 );
210         }
211         z0 = 0;
212     }
213     *z1Ptr = z1;
214     *z0Ptr = z0;
215 
216 }
217 
218 /*
219 -------------------------------------------------------------------------------
220 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
221 by 64 _plus_ the number of bits given in `count'.  The shifted result is
222 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
223 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
224 off form a third 64-bit result as follows:  The _last_ bit shifted off is
225 the most-significant bit of the extra result, and the other 63 bits of the
226 extra result are all zero if and only if _all_but_the_last_ bits shifted off
227 were all zero.  This extra result is stored in the location pointed to by
228 `z2Ptr'.  The value of `count' can be arbitrarily large.
229     (This routine makes more sense if `a0', `a1', and `a2' are considered
230 to form a fixed-point value with binary point between `a1' and `a2'.  This
231 fixed-point value is shifted right by the number of bits given in `count',
232 and the integer part of the result is returned at the locations pointed to
233 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
234 corrupted as described above, and is returned at the location pointed to by
235 `z2Ptr'.)
236 -------------------------------------------------------------------------------
237 */
238 INLINE void
shift128ExtraRightJamming(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)239  shift128ExtraRightJamming(
240      bits64 a0,
241      bits64 a1,
242      bits64 a2,
243      int16 count,
244      bits64 *z0Ptr,
245      bits64 *z1Ptr,
246      bits64 *z2Ptr
247  )
248 {
249     bits64 z0, z1, z2;
250     int8 negCount = ( - count ) & 63;
251 
252     if ( count == 0 ) {
253         z2 = a2;
254         z1 = a1;
255         z0 = a0;
256     }
257     else {
258         if ( count < 64 ) {
259             z2 = a1<<negCount;
260             z1 = ( a0<<negCount ) | ( a1>>count );
261             z0 = a0>>count;
262         }
263         else {
264             if ( count == 64 ) {
265                 z2 = a1;
266                 z1 = a0;
267             }
268             else {
269                 a2 |= a1;
270                 if ( count < 128 ) {
271                     z2 = a0<<negCount;
272                     z1 = a0>>( count & 63 );
273                 }
274                 else {
275                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
276                     z1 = 0;
277                 }
278             }
279             z0 = 0;
280         }
281         z2 |= ( a2 != 0 );
282     }
283     *z2Ptr = z2;
284     *z1Ptr = z1;
285     *z0Ptr = z0;
286 
287 }
288 
289 /*
290 -------------------------------------------------------------------------------
291 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
292 number of bits given in `count'.  Any bits shifted off are lost.  The value
293 of `count' must be less than 64.  The result is broken into two 64-bit
294 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
295 -------------------------------------------------------------------------------
296 */
297 INLINE void
shortShift128Left(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)298  shortShift128Left(
299      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
300 {
301 
302     *z1Ptr = a1<<count;
303     *z0Ptr =
304         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
305 
306 }
307 
308 #endif	/* FLOATX80 || FLOAT128 */
309 
310 #ifdef FLOAT128
311 
312 /*
313 -------------------------------------------------------------------------------
314 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
315 by the number of bits given in `count'.  Any bits shifted off are lost.
316 The value of `count' must be less than 64.  The result is broken into three
317 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
318 `z1Ptr', and `z2Ptr'.
319 -------------------------------------------------------------------------------
320 */
321 INLINE void
shortShift192Left(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)322  shortShift192Left(
323      bits64 a0,
324      bits64 a1,
325      bits64 a2,
326      int16 count,
327      bits64 *z0Ptr,
328      bits64 *z1Ptr,
329      bits64 *z2Ptr
330  )
331 {
332     bits64 z0, z1, z2;
333     int8 negCount;
334 
335     z2 = a2<<count;
336     z1 = a1<<count;
337     z0 = a0<<count;
338     if ( 0 < count ) {
339         negCount = ( ( - count ) & 63 );
340         z1 |= a2>>negCount;
341         z0 |= a1>>negCount;
342     }
343     *z2Ptr = z2;
344     *z1Ptr = z1;
345     *z0Ptr = z0;
346 
347 }
348 
349 #endif	/* FLOAT128 */
350 
351 /*
352 -------------------------------------------------------------------------------
353 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
354 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
355 any carry out is lost.  The result is broken into two 64-bit pieces which
356 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
357 -------------------------------------------------------------------------------
358 */
359 INLINE void
add128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)360  add128(
361      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
362 {
363     bits64 z1;
364 
365     z1 = a1 + b1;
366     *z1Ptr = z1;
367     *z0Ptr = a0 + b0 + ( z1 < a1 );
368 
369 }
370 
371 #if defined(FLOATX80) || defined(FLOAT128)
372 
373 /*
374 -------------------------------------------------------------------------------
375 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
376 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
377 modulo 2^192, so any carry out is lost.  The result is broken into three
378 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
379 `z1Ptr', and `z2Ptr'.
380 -------------------------------------------------------------------------------
381 */
382 INLINE void
add192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)383  add192(
384      bits64 a0,
385      bits64 a1,
386      bits64 a2,
387      bits64 b0,
388      bits64 b1,
389      bits64 b2,
390      bits64 *z0Ptr,
391      bits64 *z1Ptr,
392      bits64 *z2Ptr
393  )
394 {
395     bits64 z0, z1, z2;
396     int8 carry0, carry1;
397 
398     z2 = a2 + b2;
399     carry1 = ( z2 < a2 );
400     z1 = a1 + b1;
401     carry0 = ( z1 < a1 );
402     z0 = a0 + b0;
403     z1 += carry1;
404     z0 += ( z1 < carry1 );
405     z0 += carry0;
406     *z2Ptr = z2;
407     *z1Ptr = z1;
408     *z0Ptr = z0;
409 
410 }
411 
412 #endif	/* FLOATX80 || FLOAT128 */
413 
414 /*
415 -------------------------------------------------------------------------------
416 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
417 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
418 2^128, so any borrow out (carry out) is lost.  The result is broken into two
419 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
420 `z1Ptr'.
421 -------------------------------------------------------------------------------
422 */
423 INLINE void
sub128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)424  sub128(
425      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
426 {
427 
428     *z1Ptr = a1 - b1;
429     *z0Ptr = a0 - b0 - ( a1 < b1 );
430 
431 }
432 
433 #if defined(FLOATX80) || defined(FLOAT128)
434 
435 /*
436 -------------------------------------------------------------------------------
437 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
438 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
439 Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
440 result is broken into three 64-bit pieces which are stored at the locations
441 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
442 -------------------------------------------------------------------------------
443 */
444 INLINE void
sub192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)445  sub192(
446      bits64 a0,
447      bits64 a1,
448      bits64 a2,
449      bits64 b0,
450      bits64 b1,
451      bits64 b2,
452      bits64 *z0Ptr,
453      bits64 *z1Ptr,
454      bits64 *z2Ptr
455  )
456 {
457     bits64 z0, z1, z2;
458     int8 borrow0, borrow1;
459 
460     z2 = a2 - b2;
461     borrow1 = ( a2 < b2 );
462     z1 = a1 - b1;
463     borrow0 = ( a1 < b1 );
464     z0 = a0 - b0;
465     z0 -= ( z1 < borrow1 );
466     z1 -= borrow1;
467     z0 -= borrow0;
468     *z2Ptr = z2;
469     *z1Ptr = z1;
470     *z0Ptr = z0;
471 
472 }
473 
474 #endif	/* FLOATX80 || FLOAT128 */
475 
476 /*
477 -------------------------------------------------------------------------------
478 Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
479 into two 64-bit pieces which are stored at the locations pointed to by
480 `z0Ptr' and `z1Ptr'.
481 -------------------------------------------------------------------------------
482 */
mul64To128(bits64 a,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr)483 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
484 {
485     bits32 aHigh, aLow, bHigh, bLow;
486     bits64 z0, zMiddleA, zMiddleB, z1;
487 
488     aLow = a;
489     aHigh = a>>32;
490     bLow = b;
491     bHigh = b>>32;
492     z1 = ( (bits64) aLow ) * bLow;
493     zMiddleA = ( (bits64) aLow ) * bHigh;
494     zMiddleB = ( (bits64) aHigh ) * bLow;
495     z0 = ( (bits64) aHigh ) * bHigh;
496     zMiddleA += zMiddleB;
497     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
498     zMiddleA <<= 32;
499     z1 += zMiddleA;
500     z0 += ( z1 < zMiddleA );
501     *z1Ptr = z1;
502     *z0Ptr = z0;
503 
504 }
505 
506 #ifdef FLOAT128
507 
508 /*
509 -------------------------------------------------------------------------------
510 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
511 `b' to obtain a 192-bit product.  The product is broken into three 64-bit
512 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
513 `z2Ptr'.
514 -------------------------------------------------------------------------------
515 */
516 INLINE void
mul128By64To192(bits64 a0,bits64 a1,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)517  mul128By64To192(
518      bits64 a0,
519      bits64 a1,
520      bits64 b,
521      bits64 *z0Ptr,
522      bits64 *z1Ptr,
523      bits64 *z2Ptr
524  )
525 {
526     bits64 z0, z1, z2, more1;
527 
528     mul64To128( a1, b, &z1, &z2 );
529     mul64To128( a0, b, &z0, &more1 );
530     add128( z0, more1, 0, z1, &z0, &z1 );
531     *z2Ptr = z2;
532     *z1Ptr = z1;
533     *z0Ptr = z0;
534 
535 }
536 
537 /*
538 -------------------------------------------------------------------------------
539 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
540 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
541 product.  The product is broken into four 64-bit pieces which are stored at
542 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
543 -------------------------------------------------------------------------------
544 */
545 INLINE void
mul128To256(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr,bits64 * z3Ptr)546  mul128To256(
547      bits64 a0,
548      bits64 a1,
549      bits64 b0,
550      bits64 b1,
551      bits64 *z0Ptr,
552      bits64 *z1Ptr,
553      bits64 *z2Ptr,
554      bits64 *z3Ptr
555  )
556 {
557     bits64 z0, z1, z2, z3;
558     bits64 more1, more2;
559 
560     mul64To128( a1, b1, &z2, &z3 );
561     mul64To128( a1, b0, &z1, &more2 );
562     add128( z1, more2, 0, z2, &z1, &z2 );
563     mul64To128( a0, b0, &z0, &more1 );
564     add128( z0, more1, 0, z1, &z0, &z1 );
565     mul64To128( a0, b1, &more1, &more2 );
566     add128( more1, more2, 0, z2, &more1, &z2 );
567     add128( z0, z1, 0, more1, &z0, &z1 );
568     *z3Ptr = z3;
569     *z2Ptr = z2;
570     *z1Ptr = z1;
571     *z0Ptr = z0;
572 
573 }
574 
575 #endif	/* FLOAT128 */
576 
577 /*
578 -------------------------------------------------------------------------------
579 Returns an approximation to the 64-bit integer quotient obtained by dividing
580 `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
581 divisor `b' must be at least 2^63.  If q is the exact quotient truncated
582 toward zero, the approximation returned lies between q and q + 2 inclusive.
583 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
584 unsigned integer is returned.
585 -------------------------------------------------------------------------------
586 */
estimateDiv128To64(bits64 a0,bits64 a1,bits64 b)587 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
588 {
589     bits64 b0, b1;
590     bits64 rem0, rem1, term0, term1;
591     bits64 z;
592 
593     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
594     b0 = b>>32;
595     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
596     mul64To128( b, z, &term0, &term1 );
597     sub128( a0, a1, term0, term1, &rem0, &rem1 );
598     while ( ( (sbits64) rem0 ) < 0 ) {
599         z -= LIT64( 0x100000000 );
600         b1 = b<<32;
601         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
602     }
603     rem0 = ( rem0<<32 ) | ( rem1>>32 );
604     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
605     return z;
606 
607 }
608 
609 #ifndef SOFTFLOAT_FOR_GCC /* Not used */
610 /*
611 -------------------------------------------------------------------------------
612 Returns an approximation to the square root of the 32-bit significand given
613 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
614 `aExp' (the least significant bit) is 1, the integer returned approximates
615 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
616 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
617 case, the approximation returned lies strictly within +/-2 of the exact
618 value.
619 -------------------------------------------------------------------------------
620 */
estimateSqrt32(int16 aExp,bits32 a)621 static bits32 estimateSqrt32( int16 aExp, bits32 a )
622 {
623     static const bits16 sqrtOddAdjustments[] = {
624         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
625         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
626     };
627     static const bits16 sqrtEvenAdjustments[] = {
628         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
629         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
630     };
631     int8 index;
632     bits32 z;
633 
634     index = ( a>>27 ) & 15;
635     if ( aExp & 1 ) {
636         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
637         z = ( ( a / z )<<14 ) + ( z<<15 );
638         a >>= 1;
639     }
640     else {
641         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
642         z = a / z + z;
643         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
644         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
645     }
646     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
647 
648 }
649 #endif
650 
651 /*
652 -------------------------------------------------------------------------------
653 Returns the number of leading 0 bits before the most-significant 1 bit of
654 `a'.  If `a' is zero, 32 is returned.
655 -------------------------------------------------------------------------------
656 */
657 #ifndef SOFTFLOAT_MD_CLZ
countLeadingZeros32(bits32 a)658 static int8 countLeadingZeros32( bits32 a )
659 {
660     static const int8 countLeadingZerosHigh[] = {
661         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
662         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
663         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
664         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
665         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
666         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
667         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
668         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
669         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
670         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
671         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
672         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
673         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
674         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
675         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
676         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
677     };
678     int8 shiftCount;
679 
680     shiftCount = 0;
681     if ( a < 0x10000 ) {
682         shiftCount += 16;
683         a <<= 16;
684     }
685     if ( a < 0x1000000 ) {
686         shiftCount += 8;
687         a <<= 8;
688     }
689     shiftCount += countLeadingZerosHigh[ a>>24 ];
690     return shiftCount;
691 
692 }
693 #endif
694 
695 /*
696 -------------------------------------------------------------------------------
697 Returns the number of leading 0 bits before the most-significant 1 bit of
698 `a'.  If `a' is zero, 64 is returned.
699 -------------------------------------------------------------------------------
700 */
countLeadingZeros64(bits64 a)701 static int8 countLeadingZeros64( bits64 a )
702 {
703     int8 shiftCount;
704 
705     shiftCount = 0;
706     if ( a < ( (bits64) 1 )<<32 ) {
707         shiftCount += 32;
708     }
709     else {
710         a >>= 32;
711     }
712     shiftCount += countLeadingZeros32( a );
713     return shiftCount;
714 
715 }
716 
717 #if defined(FLOATX80) || defined(FLOAT128)
718 
719 /*
720 -------------------------------------------------------------------------------
721 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
722 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
723 Otherwise, returns 0.
724 -------------------------------------------------------------------------------
725 */
eq128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)726 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
727 {
728 
729     return ( a0 == b0 ) && ( a1 == b1 );
730 
731 }
732 
733 /*
734 -------------------------------------------------------------------------------
735 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
736 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
737 Otherwise, returns 0.
738 -------------------------------------------------------------------------------
739 */
le128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)740 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
741 {
742 
743     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
744 
745 }
746 
747 /*
748 -------------------------------------------------------------------------------
749 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
750 than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
751 returns 0.
752 -------------------------------------------------------------------------------
753 */
lt128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)754 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
755 {
756 
757     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
758 
759 }
760 
761 #endif	/* FLOATX80 || FLOAT128 */
762 
763 #if 0
764 
765 /*
766 -------------------------------------------------------------------------------
767 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
768 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
769 Otherwise, returns 0.
770 -------------------------------------------------------------------------------
771 */
772 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
773 {
774 
775     return ( a0 != b0 ) || ( a1 != b1 );
776 
777 }
778 
779 #endif
780