1
2 /*============================================================================
3
4 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
5 Arithmetic Package, Release 2b.
6
7 Written by John R. Hauser. This work was made possible in part by the
8 International Computer Science Institute, located at Suite 600, 1947 Center
9 Street, Berkeley, California 94704. Funding was partially provided by the
10 National Science Foundation under grant MIP-9311980. The original version
11 of this code was written as part of a project to build a fixed-point vector
12 processor in collaboration with the University of California at Berkeley,
13 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
14 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 arithmetic/SoftFloat.html'.
16
17 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
18 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
24 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
25
26 Derivative works are acceptable, even for commercial purposes, so long as
27 (1) the source code for the derivative work includes prominent notice that
28 the work is derivative, and (2) the source code includes prominent notice with
29 these four paragraphs for those parts of this code that are retained.
30
31 =============================================================================*/
32
33 /*----------------------------------------------------------------------------
34 | Shifts `a' right by the number of bits given in `count'. If any nonzero
35 | bits are shifted off, they are ``jammed'' into the least significant bit of
36 | the result by setting the least significant bit to 1. The value of `count'
37 | can be arbitrarily large; in particular, if `count' is greater than 32, the
38 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39 | The result is stored in the location pointed to by `zPtr'.
40 *----------------------------------------------------------------------------*/
41
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)42 static INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43 {
44 bits32 z;
45
46 if ( count == 0 ) {
47 z = a;
48 }
49 else if ( count < 32 ) {
50 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
51 }
52 else {
53 z = ( a != 0 );
54 }
55 *zPtr = z;
56
57 }
58
59 /*----------------------------------------------------------------------------
60 | Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
61 | number of bits given in `count'. Any bits shifted off are lost. The value
62 | of `count' must be less than 32. The result is broken into two 32-bit
63 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
64 *----------------------------------------------------------------------------*/
65
66 static INLINE void
shortShift64Left(bits32 a0,bits32 a1,int16 count,bits32 * z0Ptr,bits32 * z1Ptr)67 shortShift64Left(
68 bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
69 {
70
71 *z1Ptr = a1<<count;
72 *z0Ptr =
73 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
74
75 }
76
77 /*----------------------------------------------------------------------------
78 | Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
79 | by the number of bits given in `count'. Any bits shifted off are lost.
80 | The value of `count' must be less than 32. The result is broken into three
81 | 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
82 | `z1Ptr', and `z2Ptr'.
83 *----------------------------------------------------------------------------*/
84
85 static INLINE void
shortShift96Left(bits32 a0,bits32 a1,bits32 a2,int16 count,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)86 shortShift96Left(
87 bits32 a0,
88 bits32 a1,
89 bits32 a2,
90 int16 count,
91 bits32 *z0Ptr,
92 bits32 *z1Ptr,
93 bits32 *z2Ptr
94 )
95 {
96 bits32 z0, z1, z2;
97 int8 negCount;
98
99 z2 = a2<<count;
100 z1 = a1<<count;
101 z0 = a0<<count;
102 if ( 0 < count ) {
103 negCount = ( ( - count ) & 31 );
104 z1 |= a2>>negCount;
105 z0 |= a1>>negCount;
106 }
107 *z2Ptr = z2;
108 *z1Ptr = z1;
109 *z0Ptr = z0;
110
111 }
112
113 /*----------------------------------------------------------------------------
114 | Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
115 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so
116 | any carry out is lost. The result is broken into two 32-bit pieces which
117 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
118 *----------------------------------------------------------------------------*/
119
120 static INLINE void
add64(bits32 a0,bits32 a1,bits32 b0,bits32 b1,bits32 * z0Ptr,bits32 * z1Ptr)121 add64(
122 bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
123 {
124 bits32 z1;
125
126 z1 = a1 + b1;
127 *z1Ptr = z1;
128 *z0Ptr = a0 + b0 + ( z1 < a1 );
129
130 }
131
132 /*----------------------------------------------------------------------------
133 | Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
134 | 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
135 | modulo 2^96, so any carry out is lost. The result is broken into three
136 | 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
137 | `z1Ptr', and `z2Ptr'.
138 *----------------------------------------------------------------------------*/
139
140 static INLINE void
add96(bits32 a0,bits32 a1,bits32 a2,bits32 b0,bits32 b1,bits32 b2,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)141 add96(
142 bits32 a0,
143 bits32 a1,
144 bits32 a2,
145 bits32 b0,
146 bits32 b1,
147 bits32 b2,
148 bits32 *z0Ptr,
149 bits32 *z1Ptr,
150 bits32 *z2Ptr
151 )
152 {
153 bits32 z0, z1, z2;
154 int8 carry0, carry1;
155
156 z2 = a2 + b2;
157 carry1 = ( z2 < a2 );
158 z1 = a1 + b1;
159 carry0 = ( z1 < a1 );
160 z0 = a0 + b0;
161 z1 += carry1;
162 z0 += ( z1 < carry1 );
163 z0 += carry0;
164 *z2Ptr = z2;
165 *z1Ptr = z1;
166 *z0Ptr = z0;
167
168 }
169
170 /*----------------------------------------------------------------------------
171 | Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
172 | 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
173 | 2^64, so any borrow out (carry out) is lost. The result is broken into two
174 | 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
175 | `z1Ptr'.
176 *----------------------------------------------------------------------------*/
177
178 static INLINE void
sub64(bits32 a0,bits32 a1,bits32 b0,bits32 b1,bits32 * z0Ptr,bits32 * z1Ptr)179 sub64(
180 bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
181 {
182
183 *z1Ptr = a1 - b1;
184 *z0Ptr = a0 - b0 - ( a1 < b1 );
185
186 }
187
188 /*----------------------------------------------------------------------------
189 | Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
190 | the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction
191 | is modulo 2^96, so any borrow out (carry out) is lost. The result is broken
192 | into three 32-bit pieces which are stored at the locations pointed to by
193 | `z0Ptr', `z1Ptr', and `z2Ptr'.
194 *----------------------------------------------------------------------------*/
195
196 static INLINE void
sub96(bits32 a0,bits32 a1,bits32 a2,bits32 b0,bits32 b1,bits32 b2,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)197 sub96(
198 bits32 a0,
199 bits32 a1,
200 bits32 a2,
201 bits32 b0,
202 bits32 b1,
203 bits32 b2,
204 bits32 *z0Ptr,
205 bits32 *z1Ptr,
206 bits32 *z2Ptr
207 )
208 {
209 bits32 z0, z1, z2;
210 int8 borrow0, borrow1;
211
212 z2 = a2 - b2;
213 borrow1 = ( a2 < b2 );
214 z1 = a1 - b1;
215 borrow0 = ( a1 < b1 );
216 z0 = a0 - b0;
217 z0 -= ( z1 < borrow1 );
218 z1 -= borrow1;
219 z0 -= borrow0;
220 *z2Ptr = z2;
221 *z1Ptr = z1;
222 *z0Ptr = z0;
223
224 }
225
226 /*----------------------------------------------------------------------------
227 | Multiplies `a' by `b' to obtain a 64-bit product. The product is broken
228 | into two 32-bit pieces which are stored at the locations pointed to by
229 | `z0Ptr' and `z1Ptr'.
230 *----------------------------------------------------------------------------*/
231
mul32To64(bits32 a,bits32 b,bits32 * z0Ptr,bits32 * z1Ptr)232 static INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
233 {
234 bits16 aHigh, aLow, bHigh, bLow;
235 bits32 z0, zMiddleA, zMiddleB, z1;
236
237 aLow = a;
238 aHigh = a>>16;
239 bLow = b;
240 bHigh = b>>16;
241 z1 = ( (bits32) aLow ) * bLow;
242 zMiddleA = ( (bits32) aLow ) * bHigh;
243 zMiddleB = ( (bits32) aHigh ) * bLow;
244 z0 = ( (bits32) aHigh ) * bHigh;
245 zMiddleA += zMiddleB;
246 z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
247 zMiddleA <<= 16;
248 z1 += zMiddleA;
249 z0 += ( z1 < zMiddleA );
250 *z1Ptr = z1;
251 *z0Ptr = z0;
252
253 }
254
255 /*----------------------------------------------------------------------------
256 | Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
257 | to obtain a 96-bit product. The product is broken into three 32-bit pieces
258 | which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
259 | `z2Ptr'.
260 *----------------------------------------------------------------------------*/
261
262 static INLINE void
mul64By32To96(bits32 a0,bits32 a1,bits32 b,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)263 mul64By32To96(
264 bits32 a0,
265 bits32 a1,
266 bits32 b,
267 bits32 *z0Ptr,
268 bits32 *z1Ptr,
269 bits32 *z2Ptr
270 )
271 {
272 bits32 z0, z1, z2, more1;
273
274 mul32To64( a1, b, &z1, &z2 );
275 mul32To64( a0, b, &z0, &more1 );
276 add64( z0, more1, 0, z1, &z0, &z1 );
277 *z2Ptr = z2;
278 *z1Ptr = z1;
279 *z0Ptr = z0;
280
281 }
282
283 /*----------------------------------------------------------------------------
284 | Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
285 | 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
286 | product. The product is broken into four 32-bit pieces which are stored at
287 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
288 *----------------------------------------------------------------------------*/
289
290 static INLINE void
mul64To128(bits32 a0,bits32 a1,bits32 b0,bits32 b1,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr,bits32 * z3Ptr)291 mul64To128(
292 bits32 a0,
293 bits32 a1,
294 bits32 b0,
295 bits32 b1,
296 bits32 *z0Ptr,
297 bits32 *z1Ptr,
298 bits32 *z2Ptr,
299 bits32 *z3Ptr
300 )
301 {
302 bits32 z0, z1, z2, z3;
303 bits32 more1, more2;
304
305 mul32To64( a1, b1, &z2, &z3 );
306 mul32To64( a1, b0, &z1, &more2 );
307 add64( z1, more2, 0, z2, &z1, &z2 );
308 mul32To64( a0, b0, &z0, &more1 );
309 add64( z0, more1, 0, z1, &z0, &z1 );
310 mul32To64( a0, b1, &more1, &more2 );
311 add64( more1, more2, 0, z2, &more1, &z2 );
312 add64( z0, z1, 0, more1, &z0, &z1 );
313 *z3Ptr = z3;
314 *z2Ptr = z2;
315 *z1Ptr = z1;
316 *z0Ptr = z0;
317
318 }
319
320 /*----------------------------------------------------------------------------
321 | Returns an approximation to the 32-bit integer quotient obtained by dividing
322 | `b' into the 64-bit value formed by concatenating `a0' and `a1'. The
323 | divisor `b' must be at least 2^31. If q is the exact quotient truncated
324 | toward zero, the approximation returned lies between q and q + 2 inclusive.
325 | If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
326 | unsigned integer is returned.
327 *----------------------------------------------------------------------------*/
328
estimateDiv64To32(bits32 a0,bits32 a1,bits32 b)329 static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
330 {
331 bits32 b0, b1;
332 bits32 rem0, rem1, term0, term1;
333 bits32 z;
334
335 if ( b <= a0 ) return 0xFFFFFFFF;
336 b0 = b>>16;
337 z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16;
338 mul32To64( b, z, &term0, &term1 );
339 sub64( a0, a1, term0, term1, &rem0, &rem1 );
340 while ( ( (sbits32) rem0 ) < 0 ) {
341 z -= 0x10000;
342 b1 = b<<16;
343 add64( rem0, rem1, b0, b1, &rem0, &rem1 );
344 }
345 rem0 = ( rem0<<16 ) | ( rem1>>16 );
346 z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0;
347 return z;
348
349 }
350
351 /*----------------------------------------------------------------------------
352 | Returns an approximation to the square root of the 32-bit significand given
353 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
354 | `aExp' (the least significant bit) is 1, the integer returned approximates
355 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
356 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
357 | case, the approximation returned lies strictly within +/-2 of the exact
358 | value.
359 *----------------------------------------------------------------------------*/
360
estimateSqrt32(int16 aExp,bits32 a)361 static bits32 estimateSqrt32( int16 aExp, bits32 a )
362 {
363 static const bits16 sqrtOddAdjustments[] = {
364 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
365 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
366 };
367 static const bits16 sqrtEvenAdjustments[] = {
368 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
369 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
370 };
371 int8 index;
372 bits32 z;
373
374 index = ( a>>27 ) & 15;
375 if ( aExp & 1 ) {
376 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
377 z = ( ( a / z )<<14 ) + ( z<<15 );
378 a >>= 1;
379 }
380 else {
381 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
382 z = a / z + z;
383 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
384 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
385 }
386 return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 );
387
388 }
389
390 /*----------------------------------------------------------------------------
391 | Returns the number of leading 0 bits before the most-significant 1 bit of
392 | `a'. If `a' is zero, 32 is returned.
393 *----------------------------------------------------------------------------*/
394
countLeadingZeros32(bits32 a)395 static int8 countLeadingZeros32( bits32 a )
396 {
397 static const int8 countLeadingZerosHigh[] = {
398 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
403 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
404 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
405 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
409 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
413 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
414 };
415 int8 shiftCount;
416
417 shiftCount = 0;
418 if ( a < 0x10000 ) {
419 shiftCount += 16;
420 a <<= 16;
421 }
422 if ( a < 0x1000000 ) {
423 shiftCount += 8;
424 a <<= 8;
425 }
426 shiftCount += countLeadingZerosHigh[ a>>24 ];
427 return shiftCount;
428
429 }
430
431 /*----------------------------------------------------------------------------
432 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
433 | equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
434 | returns 0.
435 *----------------------------------------------------------------------------*/
436
eq64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)437 static INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
438 {
439
440 return ( a0 == b0 ) && ( a1 == b1 );
441
442 }
443
444 /*----------------------------------------------------------------------------
445 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
446 | than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
447 | Otherwise, returns 0.
448 *----------------------------------------------------------------------------*/
449
le64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)450 static INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
451 {
452
453 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
454
455 }
456
457 /*----------------------------------------------------------------------------
458 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
459 | than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
460 | returns 0.
461 *----------------------------------------------------------------------------*/
462
lt64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)463 static INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
464 {
465
466 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
467
468 }
469
470 /*----------------------------------------------------------------------------
471 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
472 | equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
473 | returns 0.
474 *----------------------------------------------------------------------------*/
475
ne64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)476 static INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
477 {
478
479 return ( a0 != b0 ) || ( a1 != b1 );
480
481 }
482
483