1 /* $OpenBSD: softfloat-macros.h,v 1.3 2007/12/29 17:43:14 miod Exp $ */
2 /* $NetBSD: softfloat-macros.h,v 1.1 2001/04/26 03:10:47 ross Exp $ */
3
4 /*
5 ===============================================================================
6
7 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
8 Arithmetic Package, Release 2a.
9
10 Written by John R. Hauser. This work was made possible in part by the
11 International Computer Science Institute, located at Suite 600, 1947 Center
12 Street, Berkeley, California 94704. Funding was partially provided by the
13 National Science Foundation under grant MIP-9311980. The original version
14 of this code was written as part of a project to build a fixed-point vector
15 processor in collaboration with the University of California at Berkeley,
16 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
17 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
18 arithmetic/SoftFloat.html'.
19
20 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable
21 effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT
22 WILL AT TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS
23 RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL
24 RESPONSIBILITY FOR ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM
25 THEIR OWN USE OF THE SOFTWARE, AND WHO ALSO EFFECTIVELY INDEMNIFY
26 (possibly via similar legal warning) JOHN HAUSER AND THE INTERNATIONAL
27 COMPUTER SCIENCE INSTITUTE AGAINST ALL LOSSES, COSTS, OR OTHER PROBLEMS
28 ARISING FROM THE USE OF THE SOFTWARE BY THEIR CUSTOMERS AND CLIENTS.
29
30 Derivative works are acceptable, even for commercial purposes, so long as
31 (1) they include prominent notice that the work is derivative, and (2) they
32 include prominent notice akin to these four paragraphs for those parts of
33 this code that are retained.
34
35 ===============================================================================
36 */
37
38 /*
39 -------------------------------------------------------------------------------
40 Shifts `a' right by the number of bits given in `count'. If any nonzero
41 bits are shifted off, they are ``jammed'' into the least significant bit of
42 the result by setting the least significant bit to 1. The value of `count'
43 can be arbitrarily large; in particular, if `count' is greater than 32, the
44 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
45 The result is stored in the location pointed to by `zPtr'.
46 -------------------------------------------------------------------------------
47 */
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)48 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
49 {
50 bits32 z;
51
52 if ( count == 0 ) {
53 z = a;
54 }
55 else if ( count < 32 ) {
56 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
57 }
58 else {
59 z = ( a != 0 );
60 }
61 *zPtr = z;
62
63 }
64
65 /*
66 -------------------------------------------------------------------------------
67 Shifts `a' right by the number of bits given in `count'. If any nonzero
68 bits are shifted off, they are ``jammed'' into the least significant bit of
69 the result by setting the least significant bit to 1. The value of `count'
70 can be arbitrarily large; in particular, if `count' is greater than 64, the
71 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
72 The result is stored in the location pointed to by `zPtr'.
73 -------------------------------------------------------------------------------
74 */
shift64RightJamming(bits64 a,int16 count,bits64 * zPtr)75 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
76 {
77 bits64 z;
78
79 if ( count == 0 ) {
80 z = a;
81 }
82 else if ( count < 64 ) {
83 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
84 }
85 else {
86 z = ( a != 0 );
87 }
88 *zPtr = z;
89
90 }
91
92 /*
93 -------------------------------------------------------------------------------
94 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
95 _plus_ the number of bits given in `count'. The shifted result is at most
96 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
97 bits shifted off form a second 64-bit result as follows: The _last_ bit
98 shifted off is the most-significant bit of the extra result, and the other
99 63 bits of the extra result are all zero if and only if _all_but_the_last_
100 bits shifted off were all zero. This extra result is stored in the location
101 pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
102 (This routine makes more sense if `a0' and `a1' are considered to form a
103 fixed-point value with binary point between `a0' and `a1'. This fixed-point
104 value is shifted right by the number of bits given in `count', and the
105 integer part of the result is returned at the location pointed to by
106 `z0Ptr'. The fractional part of the result may be slightly corrupted as
107 described above, and is returned at the location pointed to by `z1Ptr'.)
108 -------------------------------------------------------------------------------
109 */
110 INLINE void
shift64ExtraRightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)111 shift64ExtraRightJamming(
112 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
113 {
114 bits64 z0, z1;
115 int8 negCount = ( - count ) & 63;
116
117 if ( count == 0 ) {
118 z1 = a1;
119 z0 = a0;
120 }
121 else if ( count < 64 ) {
122 z1 = ( a0<<negCount ) | ( a1 != 0 );
123 z0 = a0>>count;
124 }
125 else {
126 if ( count == 64 ) {
127 z1 = a0 | ( a1 != 0 );
128 }
129 else {
130 z1 = ( ( a0 | a1 ) != 0 );
131 }
132 z0 = 0;
133 }
134 *z1Ptr = z1;
135 *z0Ptr = z0;
136
137 }
138
139 #if defined(FLOATX80) || defined(FLOAT128)
140
141 /*
142 -------------------------------------------------------------------------------
143 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
144 number of bits given in `count'. Any bits shifted off are lost. The value
145 of `count' can be arbitrarily large; in particular, if `count' is greater
146 than 128, the result will be 0. The result is broken into two 64-bit pieces
147 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
148 -------------------------------------------------------------------------------
149 */
150 INLINE void
shift128Right(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)151 shift128Right(
152 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
153 {
154 bits64 z0, z1;
155 int8 negCount = ( - count ) & 63;
156
157 if ( count == 0 ) {
158 z1 = a1;
159 z0 = a0;
160 }
161 else if ( count < 64 ) {
162 z1 = ( a0<<negCount ) | ( a1>>count );
163 z0 = a0>>count;
164 }
165 else {
166 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
167 z0 = 0;
168 }
169 *z1Ptr = z1;
170 *z0Ptr = z0;
171
172 }
173
174 /*
175 -------------------------------------------------------------------------------
176 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
177 number of bits given in `count'. If any nonzero bits are shifted off, they
178 are ``jammed'' into the least significant bit of the result by setting the
179 least significant bit to 1. The value of `count' can be arbitrarily large;
180 in particular, if `count' is greater than 128, the result will be either
181 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
182 nonzero. The result is broken into two 64-bit pieces which are stored at
183 the locations pointed to by `z0Ptr' and `z1Ptr'.
184 -------------------------------------------------------------------------------
185 */
186 INLINE void
shift128RightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)187 shift128RightJamming(
188 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
189 {
190 bits64 z0, z1;
191 int8 negCount = ( - count ) & 63;
192
193 if ( count == 0 ) {
194 z1 = a1;
195 z0 = a0;
196 }
197 else if ( count < 64 ) {
198 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
199 z0 = a0>>count;
200 }
201 else {
202 if ( count == 64 ) {
203 z1 = a0 | ( a1 != 0 );
204 }
205 else if ( count < 128 ) {
206 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
207 }
208 else {
209 z1 = ( ( a0 | a1 ) != 0 );
210 }
211 z0 = 0;
212 }
213 *z1Ptr = z1;
214 *z0Ptr = z0;
215
216 }
217
218 /*
219 -------------------------------------------------------------------------------
220 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
221 by 64 _plus_ the number of bits given in `count'. The shifted result is
222 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
223 stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
224 off form a third 64-bit result as follows: The _last_ bit shifted off is
225 the most-significant bit of the extra result, and the other 63 bits of the
226 extra result are all zero if and only if _all_but_the_last_ bits shifted off
227 were all zero. This extra result is stored in the location pointed to by
228 `z2Ptr'. The value of `count' can be arbitrarily large.
229 (This routine makes more sense if `a0', `a1', and `a2' are considered
230 to form a fixed-point value with binary point between `a1' and `a2'. This
231 fixed-point value is shifted right by the number of bits given in `count',
232 and the integer part of the result is returned at the locations pointed to
233 by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
234 corrupted as described above, and is returned at the location pointed to by
235 `z2Ptr'.)
236 -------------------------------------------------------------------------------
237 */
238 INLINE void
shift128ExtraRightJamming(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)239 shift128ExtraRightJamming(
240 bits64 a0,
241 bits64 a1,
242 bits64 a2,
243 int16 count,
244 bits64 *z0Ptr,
245 bits64 *z1Ptr,
246 bits64 *z2Ptr
247 )
248 {
249 bits64 z0, z1, z2;
250 int8 negCount = ( - count ) & 63;
251
252 if ( count == 0 ) {
253 z2 = a2;
254 z1 = a1;
255 z0 = a0;
256 }
257 else {
258 if ( count < 64 ) {
259 z2 = a1<<negCount;
260 z1 = ( a0<<negCount ) | ( a1>>count );
261 z0 = a0>>count;
262 }
263 else {
264 if ( count == 64 ) {
265 z2 = a1;
266 z1 = a0;
267 }
268 else {
269 a2 |= a1;
270 if ( count < 128 ) {
271 z2 = a0<<negCount;
272 z1 = a0>>( count & 63 );
273 }
274 else {
275 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
276 z1 = 0;
277 }
278 }
279 z0 = 0;
280 }
281 z2 |= ( a2 != 0 );
282 }
283 *z2Ptr = z2;
284 *z1Ptr = z1;
285 *z0Ptr = z0;
286
287 }
288
289 /*
290 -------------------------------------------------------------------------------
291 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
292 number of bits given in `count'. Any bits shifted off are lost. The value
293 of `count' must be less than 64. The result is broken into two 64-bit
294 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
295 -------------------------------------------------------------------------------
296 */
297 INLINE void
shortShift128Left(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)298 shortShift128Left(
299 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
300 {
301
302 *z1Ptr = a1<<count;
303 *z0Ptr =
304 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
305
306 }
307
308 #endif /* FLOATX80 || FLOAT128 */
309
310 #ifdef FLOAT128
311
312 /*
313 -------------------------------------------------------------------------------
314 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
315 by the number of bits given in `count'. Any bits shifted off are lost.
316 The value of `count' must be less than 64. The result is broken into three
317 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
318 `z1Ptr', and `z2Ptr'.
319 -------------------------------------------------------------------------------
320 */
321 INLINE void
shortShift192Left(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)322 shortShift192Left(
323 bits64 a0,
324 bits64 a1,
325 bits64 a2,
326 int16 count,
327 bits64 *z0Ptr,
328 bits64 *z1Ptr,
329 bits64 *z2Ptr
330 )
331 {
332 bits64 z0, z1, z2;
333 int8 negCount;
334
335 z2 = a2<<count;
336 z1 = a1<<count;
337 z0 = a0<<count;
338 if ( 0 < count ) {
339 negCount = ( ( - count ) & 63 );
340 z1 |= a2>>negCount;
341 z0 |= a1>>negCount;
342 }
343 *z2Ptr = z2;
344 *z1Ptr = z1;
345 *z0Ptr = z0;
346
347 }
348
349 #endif /* FLOAT128 */
350
351 /*
352 -------------------------------------------------------------------------------
353 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
354 value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
355 any carry out is lost. The result is broken into two 64-bit pieces which
356 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
357 -------------------------------------------------------------------------------
358 */
359 INLINE void
add128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)360 add128(
361 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
362 {
363 bits64 z1;
364
365 z1 = a1 + b1;
366 *z1Ptr = z1;
367 *z0Ptr = a0 + b0 + ( z1 < a1 );
368
369 }
370
371 #if defined(FLOATX80) || defined(FLOAT128)
372
373 /*
374 -------------------------------------------------------------------------------
375 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
376 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
377 modulo 2^192, so any carry out is lost. The result is broken into three
378 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
379 `z1Ptr', and `z2Ptr'.
380 -------------------------------------------------------------------------------
381 */
382 INLINE void
add192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)383 add192(
384 bits64 a0,
385 bits64 a1,
386 bits64 a2,
387 bits64 b0,
388 bits64 b1,
389 bits64 b2,
390 bits64 *z0Ptr,
391 bits64 *z1Ptr,
392 bits64 *z2Ptr
393 )
394 {
395 bits64 z0, z1, z2;
396 int8 carry0, carry1;
397
398 z2 = a2 + b2;
399 carry1 = ( z2 < a2 );
400 z1 = a1 + b1;
401 carry0 = ( z1 < a1 );
402 z0 = a0 + b0;
403 z1 += carry1;
404 z0 += ( z1 < carry1 );
405 z0 += carry0;
406 *z2Ptr = z2;
407 *z1Ptr = z1;
408 *z0Ptr = z0;
409
410 }
411
412 #endif /* FLOATX80 || FLOAT128 */
413
414 /*
415 -------------------------------------------------------------------------------
416 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
417 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
418 2^128, so any borrow out (carry out) is lost. The result is broken into two
419 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
420 `z1Ptr'.
421 -------------------------------------------------------------------------------
422 */
423 INLINE void
sub128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)424 sub128(
425 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
426 {
427
428 *z1Ptr = a1 - b1;
429 *z0Ptr = a0 - b0 - ( a1 < b1 );
430
431 }
432
433 #if defined(FLOATX80) || defined(FLOAT128)
434
435 /*
436 -------------------------------------------------------------------------------
437 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
438 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
439 Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
440 result is broken into three 64-bit pieces which are stored at the locations
441 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
442 -------------------------------------------------------------------------------
443 */
444 INLINE void
sub192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)445 sub192(
446 bits64 a0,
447 bits64 a1,
448 bits64 a2,
449 bits64 b0,
450 bits64 b1,
451 bits64 b2,
452 bits64 *z0Ptr,
453 bits64 *z1Ptr,
454 bits64 *z2Ptr
455 )
456 {
457 bits64 z0, z1, z2;
458 int8 borrow0, borrow1;
459
460 z2 = a2 - b2;
461 borrow1 = ( a2 < b2 );
462 z1 = a1 - b1;
463 borrow0 = ( a1 < b1 );
464 z0 = a0 - b0;
465 z0 -= ( z1 < borrow1 );
466 z1 -= borrow1;
467 z0 -= borrow0;
468 *z2Ptr = z2;
469 *z1Ptr = z1;
470 *z0Ptr = z0;
471
472 }
473
474 #endif /* FLOATX80 || FLOAT128 */
475
476 /*
477 -------------------------------------------------------------------------------
478 Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
479 into two 64-bit pieces which are stored at the locations pointed to by
480 `z0Ptr' and `z1Ptr'.
481 -------------------------------------------------------------------------------
482 */
mul64To128(bits64 a,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr)483 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
484 {
485 bits32 aHigh, aLow, bHigh, bLow;
486 bits64 z0, zMiddleA, zMiddleB, z1;
487
488 aLow = a;
489 aHigh = a>>32;
490 bLow = b;
491 bHigh = b>>32;
492 z1 = ( (bits64) aLow ) * bLow;
493 zMiddleA = ( (bits64) aLow ) * bHigh;
494 zMiddleB = ( (bits64) aHigh ) * bLow;
495 z0 = ( (bits64) aHigh ) * bHigh;
496 zMiddleA += zMiddleB;
497 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
498 zMiddleA <<= 32;
499 z1 += zMiddleA;
500 z0 += ( z1 < zMiddleA );
501 *z1Ptr = z1;
502 *z0Ptr = z0;
503
504 }
505
506 #ifdef FLOAT128
507
508 /*
509 -------------------------------------------------------------------------------
510 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
511 `b' to obtain a 192-bit product. The product is broken into three 64-bit
512 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
513 `z2Ptr'.
514 -------------------------------------------------------------------------------
515 */
516 INLINE void
mul128By64To192(bits64 a0,bits64 a1,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)517 mul128By64To192(
518 bits64 a0,
519 bits64 a1,
520 bits64 b,
521 bits64 *z0Ptr,
522 bits64 *z1Ptr,
523 bits64 *z2Ptr
524 )
525 {
526 bits64 z0, z1, z2, more1;
527
528 mul64To128( a1, b, &z1, &z2 );
529 mul64To128( a0, b, &z0, &more1 );
530 add128( z0, more1, 0, z1, &z0, &z1 );
531 *z2Ptr = z2;
532 *z1Ptr = z1;
533 *z0Ptr = z0;
534
535 }
536
537 /*
538 -------------------------------------------------------------------------------
539 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
540 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
541 product. The product is broken into four 64-bit pieces which are stored at
542 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
543 -------------------------------------------------------------------------------
544 */
545 INLINE void
mul128To256(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr,bits64 * z3Ptr)546 mul128To256(
547 bits64 a0,
548 bits64 a1,
549 bits64 b0,
550 bits64 b1,
551 bits64 *z0Ptr,
552 bits64 *z1Ptr,
553 bits64 *z2Ptr,
554 bits64 *z3Ptr
555 )
556 {
557 bits64 z0, z1, z2, z3;
558 bits64 more1, more2;
559
560 mul64To128( a1, b1, &z2, &z3 );
561 mul64To128( a1, b0, &z1, &more2 );
562 add128( z1, more2, 0, z2, &z1, &z2 );
563 mul64To128( a0, b0, &z0, &more1 );
564 add128( z0, more1, 0, z1, &z0, &z1 );
565 mul64To128( a0, b1, &more1, &more2 );
566 add128( more1, more2, 0, z2, &more1, &z2 );
567 add128( z0, z1, 0, more1, &z0, &z1 );
568 *z3Ptr = z3;
569 *z2Ptr = z2;
570 *z1Ptr = z1;
571 *z0Ptr = z0;
572
573 }
574
575 #endif /* FLOAT128 */
576
577 /*
578 -------------------------------------------------------------------------------
579 Returns an approximation to the 64-bit integer quotient obtained by dividing
580 `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
581 divisor `b' must be at least 2^63. If q is the exact quotient truncated
582 toward zero, the approximation returned lies between q and q + 2 inclusive.
583 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
584 unsigned integer is returned.
585 -------------------------------------------------------------------------------
586 */
estimateDiv128To64(bits64 a0,bits64 a1,bits64 b)587 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
588 {
589 bits64 b0, b1;
590 bits64 rem0, rem1, term0, term1;
591 bits64 z;
592
593 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
594 b0 = b>>32;
595 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
596 mul64To128( b, z, &term0, &term1 );
597 sub128( a0, a1, term0, term1, &rem0, &rem1 );
598 while ( ( (sbits64) rem0 ) < 0 ) {
599 z -= LIT64( 0x100000000 );
600 b1 = b<<32;
601 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
602 }
603 rem0 = ( rem0<<32 ) | ( rem1>>32 );
604 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
605 return z;
606
607 }
608
609 #ifndef SOFTFLOAT_FOR_GCC /* Not used */
610 /*
611 -------------------------------------------------------------------------------
612 Returns an approximation to the square root of the 32-bit significand given
613 by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
614 `aExp' (the least significant bit) is 1, the integer returned approximates
615 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
616 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
617 case, the approximation returned lies strictly within +/-2 of the exact
618 value.
619 -------------------------------------------------------------------------------
620 */
estimateSqrt32(int16 aExp,bits32 a)621 static bits32 estimateSqrt32( int16 aExp, bits32 a )
622 {
623 static const bits16 sqrtOddAdjustments[] = {
624 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
625 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
626 };
627 static const bits16 sqrtEvenAdjustments[] = {
628 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
629 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
630 };
631 int8 index;
632 bits32 z;
633
634 index = ( a>>27 ) & 15;
635 if ( aExp & 1 ) {
636 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
637 z = ( ( a / z )<<14 ) + ( z<<15 );
638 a >>= 1;
639 }
640 else {
641 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
642 z = a / z + z;
643 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
644 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
645 }
646 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
647
648 }
649 #endif
650
651 /*
652 -------------------------------------------------------------------------------
653 Returns the number of leading 0 bits before the most-significant 1 bit of
654 `a'. If `a' is zero, 32 is returned.
655 -------------------------------------------------------------------------------
656 */
657 #ifndef SOFTFLOAT_MD_CLZ
countLeadingZeros32(bits32 a)658 static int8 countLeadingZeros32( bits32 a )
659 {
660 static const int8 countLeadingZerosHigh[] = {
661 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
662 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
663 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
664 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
665 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
666 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
667 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
668 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
677 };
678 int8 shiftCount;
679
680 shiftCount = 0;
681 if ( a < 0x10000 ) {
682 shiftCount += 16;
683 a <<= 16;
684 }
685 if ( a < 0x1000000 ) {
686 shiftCount += 8;
687 a <<= 8;
688 }
689 shiftCount += countLeadingZerosHigh[ a>>24 ];
690 return shiftCount;
691
692 }
693 #endif
694
695 /*
696 -------------------------------------------------------------------------------
697 Returns the number of leading 0 bits before the most-significant 1 bit of
698 `a'. If `a' is zero, 64 is returned.
699 -------------------------------------------------------------------------------
700 */
countLeadingZeros64(bits64 a)701 static int8 countLeadingZeros64( bits64 a )
702 {
703 int8 shiftCount;
704
705 shiftCount = 0;
706 if ( a < ( (bits64) 1 )<<32 ) {
707 shiftCount += 32;
708 }
709 else {
710 a >>= 32;
711 }
712 shiftCount += countLeadingZeros32( a );
713 return shiftCount;
714
715 }
716
717 #if defined(FLOATX80) || defined(FLOAT128)
718
719 /*
720 -------------------------------------------------------------------------------
721 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
722 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
723 Otherwise, returns 0.
724 -------------------------------------------------------------------------------
725 */
eq128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)726 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
727 {
728
729 return ( a0 == b0 ) && ( a1 == b1 );
730
731 }
732
733 /*
734 -------------------------------------------------------------------------------
735 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
736 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
737 Otherwise, returns 0.
738 -------------------------------------------------------------------------------
739 */
le128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)740 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
741 {
742
743 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
744
745 }
746
747 /*
748 -------------------------------------------------------------------------------
749 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
750 than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
751 returns 0.
752 -------------------------------------------------------------------------------
753 */
lt128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)754 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
755 {
756
757 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
758
759 }
760
761 #endif /* FLOATX80 || FLOAT128 */
762
763 #if 0
764
765 /*
766 -------------------------------------------------------------------------------
767 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
768 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
769 Otherwise, returns 0.
770 -------------------------------------------------------------------------------
771 */
772 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
773 {
774
775 return ( a0 != b0 ) || ( a1 != b1 );
776
777 }
778
779 #endif
780