1 
2 /*============================================================================
3 
4 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
5 Arithmetic Package, Release 2b.
6 
7 Written by John R. Hauser.  This work was made possible in part by the
8 International Computer Science Institute, located at Suite 600, 1947 Center
9 Street, Berkeley, California 94704.  Funding was partially provided by the
10 National Science Foundation under grant MIP-9311980.  The original version
11 of this code was written as part of a project to build a fixed-point vector
12 processor in collaboration with the University of California at Berkeley,
13 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
14 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 arithmetic/SoftFloat.html'.
16 
17 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
18 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
24 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
25 
26 Derivative works are acceptable, even for commercial purposes, so long as
27 (1) the source code for the derivative work includes prominent notice that
28 the work is derivative, and (2) the source code includes prominent notice with
29 these four paragraphs for those parts of this code that are retained.
30 
31 =============================================================================*/
32 
33 /*----------------------------------------------------------------------------
34 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
35 | bits are shifted off, they are ``jammed'' into the least significant bit of
36 | the result by setting the least significant bit to 1.  The value of `count'
37 | can be arbitrarily large; in particular, if `count' is greater than 32, the
38 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39 | The result is stored in the location pointed to by `zPtr'.
40 *----------------------------------------------------------------------------*/
41 
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)42 static INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43 {
44     bits32 z;
45 
46     if ( count == 0 ) {
47         z = a;
48     }
49     else if ( count < 32 ) {
50         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
51     }
52     else {
53         z = ( a != 0 );
54     }
55     *zPtr = z;
56 
57 }
58 
59 /*----------------------------------------------------------------------------
60 | Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
61 | number of bits given in `count'.  Any bits shifted off are lost.  The value
62 | of `count' must be less than 32.  The result is broken into two 32-bit
63 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
64 *----------------------------------------------------------------------------*/
65 
66 static INLINE void
shortShift64Left(bits32 a0,bits32 a1,int16 count,bits32 * z0Ptr,bits32 * z1Ptr)67  shortShift64Left(
68      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
69 {
70 
71     *z1Ptr = a1<<count;
72     *z0Ptr =
73         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
74 
75 }
76 
77 /*----------------------------------------------------------------------------
78 | Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
79 | by the number of bits given in `count'.  Any bits shifted off are lost.
80 | The value of `count' must be less than 32.  The result is broken into three
81 | 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
82 | `z1Ptr', and `z2Ptr'.
83 *----------------------------------------------------------------------------*/
84 
85 static INLINE void
shortShift96Left(bits32 a0,bits32 a1,bits32 a2,int16 count,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)86  shortShift96Left(
87      bits32 a0,
88      bits32 a1,
89      bits32 a2,
90      int16 count,
91      bits32 *z0Ptr,
92      bits32 *z1Ptr,
93      bits32 *z2Ptr
94  )
95 {
96     bits32 z0, z1, z2;
97     int8 negCount;
98 
99     z2 = a2<<count;
100     z1 = a1<<count;
101     z0 = a0<<count;
102     if ( 0 < count ) {
103         negCount = ( ( - count ) & 31 );
104         z1 |= a2>>negCount;
105         z0 |= a1>>negCount;
106     }
107     *z2Ptr = z2;
108     *z1Ptr = z1;
109     *z0Ptr = z0;
110 
111 }
112 
113 /*----------------------------------------------------------------------------
114 | Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
115 | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
116 | any carry out is lost.  The result is broken into two 32-bit pieces which
117 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
118 *----------------------------------------------------------------------------*/
119 
120 static INLINE void
add64(bits32 a0,bits32 a1,bits32 b0,bits32 b1,bits32 * z0Ptr,bits32 * z1Ptr)121  add64(
122      bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
123 {
124     bits32 z1;
125 
126     z1 = a1 + b1;
127     *z1Ptr = z1;
128     *z0Ptr = a0 + b0 + ( z1 < a1 );
129 
130 }
131 
132 /*----------------------------------------------------------------------------
133 | Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
134 | 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
135 | modulo 2^96, so any carry out is lost.  The result is broken into three
136 | 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
137 | `z1Ptr', and `z2Ptr'.
138 *----------------------------------------------------------------------------*/
139 
140 static INLINE void
add96(bits32 a0,bits32 a1,bits32 a2,bits32 b0,bits32 b1,bits32 b2,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)141  add96(
142      bits32 a0,
143      bits32 a1,
144      bits32 a2,
145      bits32 b0,
146      bits32 b1,
147      bits32 b2,
148      bits32 *z0Ptr,
149      bits32 *z1Ptr,
150      bits32 *z2Ptr
151  )
152 {
153     bits32 z0, z1, z2;
154     int8 carry0, carry1;
155 
156     z2 = a2 + b2;
157     carry1 = ( z2 < a2 );
158     z1 = a1 + b1;
159     carry0 = ( z1 < a1 );
160     z0 = a0 + b0;
161     z1 += carry1;
162     z0 += ( z1 < carry1 );
163     z0 += carry0;
164     *z2Ptr = z2;
165     *z1Ptr = z1;
166     *z0Ptr = z0;
167 
168 }
169 
170 /*----------------------------------------------------------------------------
171 | Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
172 | 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
173 | 2^64, so any borrow out (carry out) is lost.  The result is broken into two
174 | 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
175 | `z1Ptr'.
176 *----------------------------------------------------------------------------*/
177 
178 static INLINE void
sub64(bits32 a0,bits32 a1,bits32 b0,bits32 b1,bits32 * z0Ptr,bits32 * z1Ptr)179  sub64(
180      bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
181 {
182 
183     *z1Ptr = a1 - b1;
184     *z0Ptr = a0 - b0 - ( a1 < b1 );
185 
186 }
187 
188 /*----------------------------------------------------------------------------
189 | Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
190 | the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
191 | is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
192 | into three 32-bit pieces which are stored at the locations pointed to by
193 | `z0Ptr', `z1Ptr', and `z2Ptr'.
194 *----------------------------------------------------------------------------*/
195 
196 static INLINE void
sub96(bits32 a0,bits32 a1,bits32 a2,bits32 b0,bits32 b1,bits32 b2,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)197  sub96(
198      bits32 a0,
199      bits32 a1,
200      bits32 a2,
201      bits32 b0,
202      bits32 b1,
203      bits32 b2,
204      bits32 *z0Ptr,
205      bits32 *z1Ptr,
206      bits32 *z2Ptr
207  )
208 {
209     bits32 z0, z1, z2;
210     int8 borrow0, borrow1;
211 
212     z2 = a2 - b2;
213     borrow1 = ( a2 < b2 );
214     z1 = a1 - b1;
215     borrow0 = ( a1 < b1 );
216     z0 = a0 - b0;
217     z0 -= ( z1 < borrow1 );
218     z1 -= borrow1;
219     z0 -= borrow0;
220     *z2Ptr = z2;
221     *z1Ptr = z1;
222     *z0Ptr = z0;
223 
224 }
225 
226 /*----------------------------------------------------------------------------
227 | Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
228 | into two 32-bit pieces which are stored at the locations pointed to by
229 | `z0Ptr' and `z1Ptr'.
230 *----------------------------------------------------------------------------*/
231 
mul32To64(bits32 a,bits32 b,bits32 * z0Ptr,bits32 * z1Ptr)232 static INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
233 {
234     bits16 aHigh, aLow, bHigh, bLow;
235     bits32 z0, zMiddleA, zMiddleB, z1;
236 
237     aLow = a;
238     aHigh = a>>16;
239     bLow = b;
240     bHigh = b>>16;
241     z1 = ( (bits32) aLow ) * bLow;
242     zMiddleA = ( (bits32) aLow ) * bHigh;
243     zMiddleB = ( (bits32) aHigh ) * bLow;
244     z0 = ( (bits32) aHigh ) * bHigh;
245     zMiddleA += zMiddleB;
246     z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
247     zMiddleA <<= 16;
248     z1 += zMiddleA;
249     z0 += ( z1 < zMiddleA );
250     *z1Ptr = z1;
251     *z0Ptr = z0;
252 
253 }
254 
255 /*----------------------------------------------------------------------------
256 | Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
257 | to obtain a 96-bit product.  The product is broken into three 32-bit pieces
258 | which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
259 | `z2Ptr'.
260 *----------------------------------------------------------------------------*/
261 
262 static INLINE void
mul64By32To96(bits32 a0,bits32 a1,bits32 b,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)263  mul64By32To96(
264      bits32 a0,
265      bits32 a1,
266      bits32 b,
267      bits32 *z0Ptr,
268      bits32 *z1Ptr,
269      bits32 *z2Ptr
270  )
271 {
272     bits32 z0, z1, z2, more1;
273 
274     mul32To64( a1, b, &z1, &z2 );
275     mul32To64( a0, b, &z0, &more1 );
276     add64( z0, more1, 0, z1, &z0, &z1 );
277     *z2Ptr = z2;
278     *z1Ptr = z1;
279     *z0Ptr = z0;
280 
281 }
282 
283 /*----------------------------------------------------------------------------
284 | Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
285 | 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
286 | product.  The product is broken into four 32-bit pieces which are stored at
287 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
288 *----------------------------------------------------------------------------*/
289 
290 static INLINE void
mul64To128(bits32 a0,bits32 a1,bits32 b0,bits32 b1,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr,bits32 * z3Ptr)291  mul64To128(
292      bits32 a0,
293      bits32 a1,
294      bits32 b0,
295      bits32 b1,
296      bits32 *z0Ptr,
297      bits32 *z1Ptr,
298      bits32 *z2Ptr,
299      bits32 *z3Ptr
300  )
301 {
302     bits32 z0, z1, z2, z3;
303     bits32 more1, more2;
304 
305     mul32To64( a1, b1, &z2, &z3 );
306     mul32To64( a1, b0, &z1, &more2 );
307     add64( z1, more2, 0, z2, &z1, &z2 );
308     mul32To64( a0, b0, &z0, &more1 );
309     add64( z0, more1, 0, z1, &z0, &z1 );
310     mul32To64( a0, b1, &more1, &more2 );
311     add64( more1, more2, 0, z2, &more1, &z2 );
312     add64( z0, z1, 0, more1, &z0, &z1 );
313     *z3Ptr = z3;
314     *z2Ptr = z2;
315     *z1Ptr = z1;
316     *z0Ptr = z0;
317 
318 }
319 
320 /*----------------------------------------------------------------------------
321 | Returns an approximation to the 32-bit integer quotient obtained by dividing
322 | `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
323 | divisor `b' must be at least 2^31.  If q is the exact quotient truncated
324 | toward zero, the approximation returned lies between q and q + 2 inclusive.
325 | If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
326 | unsigned integer is returned.
327 *----------------------------------------------------------------------------*/
328 
estimateDiv64To32(bits32 a0,bits32 a1,bits32 b)329 static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
330 {
331     bits32 b0, b1;
332     bits32 rem0, rem1, term0, term1;
333     bits32 z;
334 
335     if ( b <= a0 ) return 0xFFFFFFFF;
336     b0 = b>>16;
337     z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16;
338     mul32To64( b, z, &term0, &term1 );
339     sub64( a0, a1, term0, term1, &rem0, &rem1 );
340     while ( ( (sbits32) rem0 ) < 0 ) {
341         z -= 0x10000;
342         b1 = b<<16;
343         add64( rem0, rem1, b0, b1, &rem0, &rem1 );
344     }
345     rem0 = ( rem0<<16 ) | ( rem1>>16 );
346     z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0;
347     return z;
348 
349 }
350 
351 /*----------------------------------------------------------------------------
352 | Returns an approximation to the square root of the 32-bit significand given
353 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
354 | `aExp' (the least significant bit) is 1, the integer returned approximates
355 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
356 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
357 | case, the approximation returned lies strictly within +/-2 of the exact
358 | value.
359 *----------------------------------------------------------------------------*/
360 
estimateSqrt32(int16 aExp,bits32 a)361 static bits32 estimateSqrt32( int16 aExp, bits32 a )
362 {
363     static const bits16 sqrtOddAdjustments[] = {
364         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
365         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
366     };
367     static const bits16 sqrtEvenAdjustments[] = {
368         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
369         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
370     };
371     int8 index;
372     bits32 z;
373 
374     index = ( a>>27 ) & 15;
375     if ( aExp & 1 ) {
376         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
377         z = ( ( a / z )<<14 ) + ( z<<15 );
378         a >>= 1;
379     }
380     else {
381         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
382         z = a / z + z;
383         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
384         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
385     }
386     return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 );
387 
388 }
389 
390 /*----------------------------------------------------------------------------
391 | Returns the number of leading 0 bits before the most-significant 1 bit of
392 | `a'.  If `a' is zero, 32 is returned.
393 *----------------------------------------------------------------------------*/
394 
countLeadingZeros32(bits32 a)395 static int8 countLeadingZeros32( bits32 a )
396 {
397     static const int8 countLeadingZerosHigh[] = {
398         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
399         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
401         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
403         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
404         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
405         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
406         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
409         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
410         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
411         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
413         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
414     };
415     int8 shiftCount;
416 
417     shiftCount = 0;
418     if ( a < 0x10000 ) {
419         shiftCount += 16;
420         a <<= 16;
421     }
422     if ( a < 0x1000000 ) {
423         shiftCount += 8;
424         a <<= 8;
425     }
426     shiftCount += countLeadingZerosHigh[ a>>24 ];
427     return shiftCount;
428 
429 }
430 
431 /*----------------------------------------------------------------------------
432 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
433 | equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
434 | returns 0.
435 *----------------------------------------------------------------------------*/
436 
eq64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)437 static INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
438 {
439 
440     return ( a0 == b0 ) && ( a1 == b1 );
441 
442 }
443 
444 /*----------------------------------------------------------------------------
445 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
446 | than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
447 | Otherwise, returns 0.
448 *----------------------------------------------------------------------------*/
449 
le64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)450 static INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
451 {
452 
453     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
454 
455 }
456 
457 /*----------------------------------------------------------------------------
458 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
459 | than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
460 | returns 0.
461 *----------------------------------------------------------------------------*/
462 
lt64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)463 static INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
464 {
465 
466     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
467 
468 }
469 
470 /*----------------------------------------------------------------------------
471 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
472 | equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
473 | returns 0.
474 *----------------------------------------------------------------------------*/
475 
ne64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)476 static INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
477 {
478 
479     return ( a0 != b0 ) || ( a1 != b1 );
480 
481 }
482 
483