1 /*	$OpenBSD: softfloat-macros.h,v 1.2 2018/01/19 16:16:09 kettenis Exp $	*/
2 /*
3 ===============================================================================
4 
5 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6 Arithmetic Package, Release 2a.
7 
8 Written by John R. Hauser.  This work was made possible in part by the
9 International Computer Science Institute, located at Suite 600, 1947 Center
10 Street, Berkeley, California 94704.  Funding was partially provided by the
11 National Science Foundation under grant MIP-9311980.  The original version
12 of this code was written as part of a project to build a fixed-point vector
13 processor in collaboration with the University of California at Berkeley,
14 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
15 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16 arithmetic/SoftFloat.html'.
17 
18 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
19 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23 
24 Derivative works are acceptable, even for commercial purposes, so long as
25 (1) they include prominent notice that the work is derivative, and (2) they
26 include prominent notice akin to these four paragraphs for those parts of
27 this code that are retained.
28 
29 ===============================================================================
30 */
31 
32 /*
33 -------------------------------------------------------------------------------
34 Shifts `a' right by the number of bits given in `count'.  If any nonzero
35 bits are shifted off, they are ``jammed'' into the least significant bit of
36 the result by setting the least significant bit to 1.  The value of `count'
37 can be arbitrarily large; in particular, if `count' is greater than 32, the
38 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39 The result is stored in the location pointed to by `zPtr'.
40 -------------------------------------------------------------------------------
41 */
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)42 static __inline void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43 {
44     bits32 z;
45 
46     if ( count == 0 ) {
47         z = a;
48     }
49     else if ( count < 32 ) {
50         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
51     }
52     else {
53         z = ( a != 0 );
54     }
55     *zPtr = z;
56 
57 }
58 
59 /*
60 -------------------------------------------------------------------------------
61 Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
62 number of bits given in `count'.  Any bits shifted off are lost.  The value
63 of `count' can be arbitrarily large; in particular, if `count' is greater
64 than 64, the result will be 0.  The result is broken into two 32-bit pieces
65 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
66 -------------------------------------------------------------------------------
67 */
68 static __inline void
shift64Right(bits32 a0,bits32 a1,int16 count,bits32 * z0Ptr,bits32 * z1Ptr)69  shift64Right(
70      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
71 {
72     bits32 z0, z1;
73     int8 negCount = ( - count ) & 31;
74 
75     if ( count == 0 ) {
76         z1 = a1;
77         z0 = a0;
78     }
79     else if ( count < 32 ) {
80         z1 = ( a0<<negCount ) | ( a1>>count );
81         z0 = a0>>count;
82     }
83     else {
84         z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0;
85         z0 = 0;
86     }
87     *z1Ptr = z1;
88     *z0Ptr = z0;
89 
90 }
91 
92 /*
93 -------------------------------------------------------------------------------
94 Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
95 number of bits given in `count'.  If any nonzero bits are shifted off, they
96 are ``jammed'' into the least significant bit of the result by setting the
97 least significant bit to 1.  The value of `count' can be arbitrarily large;
98 in particular, if `count' is greater than 64, the result will be either 0
99 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
100 nonzero.  The result is broken into two 32-bit pieces which are stored at
101 the locations pointed to by `z0Ptr' and `z1Ptr'.
102 -------------------------------------------------------------------------------
103 */
104 static __inline void
shift64RightJamming(bits32 a0,bits32 a1,int16 count,bits32 * z0Ptr,bits32 * z1Ptr)105  shift64RightJamming(
106      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
107 {
108     bits32 z0, z1;
109     int8 negCount = ( - count ) & 31;
110 
111     if ( count == 0 ) {
112         z1 = a1;
113         z0 = a0;
114     }
115     else if ( count < 32 ) {
116         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
117         z0 = a0>>count;
118     }
119     else {
120         if ( count == 32 ) {
121             z1 = a0 | ( a1 != 0 );
122         }
123         else if ( count < 64 ) {
124             z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
125         }
126         else {
127             z1 = ( ( a0 | a1 ) != 0 );
128         }
129         z0 = 0;
130     }
131     *z1Ptr = z1;
132     *z0Ptr = z0;
133 
134 }
135 
136 /*
137 -------------------------------------------------------------------------------
138 Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
139 by 32 _plus_ the number of bits given in `count'.  The shifted result is
140 at most 64 nonzero bits; these are broken into two 32-bit pieces which are
141 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
142 off form a third 32-bit result as follows:  The _last_ bit shifted off is
143 the most-significant bit of the extra result, and the other 31 bits of the
144 extra result are all zero if and only if _all_but_the_last_ bits shifted off
145 were all zero.  This extra result is stored in the location pointed to by
146 `z2Ptr'.  The value of `count' can be arbitrarily large.
147     (This routine makes more sense if `a0', `a1', and `a2' are considered
148 to form a fixed-point value with binary point between `a1' and `a2'.  This
149 fixed-point value is shifted right by the number of bits given in `count',
150 and the integer part of the result is returned at the locations pointed to
151 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
152 corrupted as described above, and is returned at the location pointed to by
153 `z2Ptr'.)
154 -------------------------------------------------------------------------------
155 */
156 static __inline void
shift64ExtraRightJamming(bits32 a0,bits32 a1,bits32 a2,int16 count,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)157  shift64ExtraRightJamming(
158      bits32 a0,
159      bits32 a1,
160      bits32 a2,
161      int16 count,
162      bits32 *z0Ptr,
163      bits32 *z1Ptr,
164      bits32 *z2Ptr
165  )
166 {
167     bits32 z0, z1, z2;
168     int8 negCount = ( - count ) & 31;
169 
170     if ( count == 0 ) {
171         z2 = a2;
172         z1 = a1;
173         z0 = a0;
174     }
175     else {
176         if ( count < 32 ) {
177             z2 = a1<<negCount;
178             z1 = ( a0<<negCount ) | ( a1>>count );
179             z0 = a0>>count;
180         }
181         else {
182             if ( count == 32 ) {
183                 z2 = a1;
184                 z1 = a0;
185             }
186             else {
187                 a2 |= a1;
188                 if ( count < 64 ) {
189                     z2 = a0<<negCount;
190                     z1 = a0>>( count & 31 );
191                 }
192                 else {
193                     z2 = ( count == 64 ) ? a0 : ( a0 != 0 );
194                     z1 = 0;
195                 }
196             }
197             z0 = 0;
198         }
199         z2 |= ( a2 != 0 );
200     }
201     *z2Ptr = z2;
202     *z1Ptr = z1;
203     *z0Ptr = z0;
204 
205 }
206 
207 /*
208 -------------------------------------------------------------------------------
209 Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
210 number of bits given in `count'.  Any bits shifted off are lost.  The value
211 of `count' must be less than 32.  The result is broken into two 32-bit
212 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
213 -------------------------------------------------------------------------------
214 */
215 static __inline void
shortShift64Left(bits32 a0,bits32 a1,int16 count,bits32 * z0Ptr,bits32 * z1Ptr)216  shortShift64Left(
217      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
218 {
219 
220     *z1Ptr = a1<<count;
221     *z0Ptr =
222         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
223 
224 }
225 
226 /*
227 -------------------------------------------------------------------------------
228 Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
229 by the number of bits given in `count'.  Any bits shifted off are lost.
230 The value of `count' must be less than 32.  The result is broken into three
231 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
232 `z1Ptr', and `z2Ptr'.
233 -------------------------------------------------------------------------------
234 */
235 static __inline void
shortShift96Left(bits32 a0,bits32 a1,bits32 a2,int16 count,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)236  shortShift96Left(
237      bits32 a0,
238      bits32 a1,
239      bits32 a2,
240      int16 count,
241      bits32 *z0Ptr,
242      bits32 *z1Ptr,
243      bits32 *z2Ptr
244  )
245 {
246     bits32 z0, z1, z2;
247     int8 negCount;
248 
249     z2 = a2<<count;
250     z1 = a1<<count;
251     z0 = a0<<count;
252     if ( 0 < count ) {
253         negCount = ( ( - count ) & 31 );
254         z1 |= a2>>negCount;
255         z0 |= a1>>negCount;
256     }
257     *z2Ptr = z2;
258     *z1Ptr = z1;
259     *z0Ptr = z0;
260 
261 }
262 
263 /*
264 -------------------------------------------------------------------------------
265 Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
266 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
267 any carry out is lost.  The result is broken into two 32-bit pieces which
268 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
269 -------------------------------------------------------------------------------
270 */
271 static __inline void
add64(bits32 a0,bits32 a1,bits32 b0,bits32 b1,bits32 * z0Ptr,bits32 * z1Ptr)272  add64(
273      bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
274 {
275     bits32 z1;
276 
277     z1 = a1 + b1;
278     *z1Ptr = z1;
279     *z0Ptr = a0 + b0 + ( z1 < a1 );
280 
281 }
282 
283 /*
284 -------------------------------------------------------------------------------
285 Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
286 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
287 modulo 2^96, so any carry out is lost.  The result is broken into three
288 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
289 `z1Ptr', and `z2Ptr'.
290 -------------------------------------------------------------------------------
291 */
292 static __inline void
add96(bits32 a0,bits32 a1,bits32 a2,bits32 b0,bits32 b1,bits32 b2,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)293  add96(
294      bits32 a0,
295      bits32 a1,
296      bits32 a2,
297      bits32 b0,
298      bits32 b1,
299      bits32 b2,
300      bits32 *z0Ptr,
301      bits32 *z1Ptr,
302      bits32 *z2Ptr
303  )
304 {
305     bits32 z0, z1, z2;
306     int8 carry0, carry1;
307 
308     z2 = a2 + b2;
309     carry1 = ( z2 < a2 );
310     z1 = a1 + b1;
311     carry0 = ( z1 < a1 );
312     z0 = a0 + b0;
313     z1 += carry1;
314     z0 += ( z1 < carry1 );
315     z0 += carry0;
316     *z2Ptr = z2;
317     *z1Ptr = z1;
318     *z0Ptr = z0;
319 
320 }
321 
322 /*
323 -------------------------------------------------------------------------------
324 Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
325 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
326 2^64, so any borrow out (carry out) is lost.  The result is broken into two
327 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
328 `z1Ptr'.
329 -------------------------------------------------------------------------------
330 */
331 static __inline void
sub64(bits32 a0,bits32 a1,bits32 b0,bits32 b1,bits32 * z0Ptr,bits32 * z1Ptr)332  sub64(
333      bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
334 {
335 
336     *z1Ptr = a1 - b1;
337     *z0Ptr = a0 - b0 - ( a1 < b1 );
338 
339 }
340 
341 /*
342 -------------------------------------------------------------------------------
343 Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
344 the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
345 is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
346 into three 32-bit pieces which are stored at the locations pointed to by
347 `z0Ptr', `z1Ptr', and `z2Ptr'.
348 -------------------------------------------------------------------------------
349 */
350 static __inline void
sub96(bits32 a0,bits32 a1,bits32 a2,bits32 b0,bits32 b1,bits32 b2,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)351  sub96(
352      bits32 a0,
353      bits32 a1,
354      bits32 a2,
355      bits32 b0,
356      bits32 b1,
357      bits32 b2,
358      bits32 *z0Ptr,
359      bits32 *z1Ptr,
360      bits32 *z2Ptr
361  )
362 {
363     bits32 z0, z1, z2;
364     int8 borrow0, borrow1;
365 
366     z2 = a2 - b2;
367     borrow1 = ( a2 < b2 );
368     z1 = a1 - b1;
369     borrow0 = ( a1 < b1 );
370     z0 = a0 - b0;
371     z0 -= ( z1 < borrow1 );
372     z1 -= borrow1;
373     z0 -= borrow0;
374     *z2Ptr = z2;
375     *z1Ptr = z1;
376     *z0Ptr = z0;
377 
378 }
379 
380 /*
381 -------------------------------------------------------------------------------
382 Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
383 into two 32-bit pieces which are stored at the locations pointed to by
384 `z0Ptr' and `z1Ptr'.
385 -------------------------------------------------------------------------------
386 */
387 static __inline void
mul32To64(bits32 a,bits32 b,bits32 * z0Ptr,bits32 * z1Ptr)388  mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
389 {
390     bits16 aHigh, aLow, bHigh, bLow;
391     bits32 z0, zMiddleA, zMiddleB, z1;
392 
393     aLow = a;
394     aHigh = a>>16;
395     bLow = b;
396     bHigh = b>>16;
397     z1 = ( (bits32) aLow ) * bLow;
398     zMiddleA = ( (bits32) aLow ) * bHigh;
399     zMiddleB = ( (bits32) aHigh ) * bLow;
400     z0 = ( (bits32) aHigh ) * bHigh;
401     zMiddleA += zMiddleB;
402     z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
403     zMiddleA <<= 16;
404     z1 += zMiddleA;
405     z0 += ( z1 < zMiddleA );
406     *z1Ptr = z1;
407     *z0Ptr = z0;
408 
409 }
410 
411 /*
412 -------------------------------------------------------------------------------
413 Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
414 to obtain a 96-bit product.  The product is broken into three 32-bit pieces
415 which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
416 `z2Ptr'.
417 -------------------------------------------------------------------------------
418 */
419 static __inline void
mul64By32To96(bits32 a0,bits32 a1,bits32 b,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr)420  mul64By32To96(
421      bits32 a0,
422      bits32 a1,
423      bits32 b,
424      bits32 *z0Ptr,
425      bits32 *z1Ptr,
426      bits32 *z2Ptr
427  )
428 {
429     bits32 z0, z1, z2, more1;
430 
431     mul32To64( a1, b, &z1, &z2 );
432     mul32To64( a0, b, &z0, &more1 );
433     add64( z0, more1, 0, z1, &z0, &z1 );
434     *z2Ptr = z2;
435     *z1Ptr = z1;
436     *z0Ptr = z0;
437 
438 }
439 
440 /*
441 -------------------------------------------------------------------------------
442 Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
443 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
444 product.  The product is broken into four 32-bit pieces which are stored at
445 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
446 -------------------------------------------------------------------------------
447 */
448 static __inline void
mul64To128(bits32 a0,bits32 a1,bits32 b0,bits32 b1,bits32 * z0Ptr,bits32 * z1Ptr,bits32 * z2Ptr,bits32 * z3Ptr)449  mul64To128(
450      bits32 a0,
451      bits32 a1,
452      bits32 b0,
453      bits32 b1,
454      bits32 *z0Ptr,
455      bits32 *z1Ptr,
456      bits32 *z2Ptr,
457      bits32 *z3Ptr
458  )
459 {
460     bits32 z0, z1, z2, z3;
461     bits32 more1, more2;
462 
463     mul32To64( a1, b1, &z2, &z3 );
464     mul32To64( a1, b0, &z1, &more2 );
465     add64( z1, more2, 0, z2, &z1, &z2 );
466     mul32To64( a0, b0, &z0, &more1 );
467     add64( z0, more1, 0, z1, &z0, &z1 );
468     mul32To64( a0, b1, &more1, &more2 );
469     add64( more1, more2, 0, z2, &more1, &z2 );
470     add64( z0, z1, 0, more1, &z0, &z1 );
471     *z3Ptr = z3;
472     *z2Ptr = z2;
473     *z1Ptr = z1;
474     *z0Ptr = z0;
475 
476 }
477 
478 /*
479 -------------------------------------------------------------------------------
480 Returns an approximation to the 32-bit integer quotient obtained by dividing
481 `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
482 divisor `b' must be at least 2^31.  If q is the exact quotient truncated
483 toward zero, the approximation returned lies between q and q + 2 inclusive.
484 If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
485 unsigned integer is returned.
486 -------------------------------------------------------------------------------
487 */
estimateDiv64To32(bits32 a0,bits32 a1,bits32 b)488 static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
489 {
490     bits32 b0, b1;
491     bits32 rem0, rem1, term0, term1;
492     bits32 z;
493 
494     if ( b <= a0 ) return 0xFFFFFFFF;
495     b0 = b>>16;
496     z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16;
497     mul32To64( b, z, &term0, &term1 );
498     sub64( a0, a1, term0, term1, &rem0, &rem1 );
499     while ( ( (sbits32) rem0 ) < 0 ) {
500         z -= 0x10000;
501         b1 = b<<16;
502         add64( rem0, rem1, b0, b1, &rem0, &rem1 );
503     }
504     rem0 = ( rem0<<16 ) | ( rem1>>16 );
505     z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0;
506     return z;
507 
508 }
509 
510 #ifndef SOFTFLOAT_FOR_GCC
511 /*
512 -------------------------------------------------------------------------------
513 Returns an approximation to the square root of the 32-bit significand given
514 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
515 `aExp' (the least significant bit) is 1, the integer returned approximates
516 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
517 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
518 case, the approximation returned lies strictly within +/-2 of the exact
519 value.
520 -------------------------------------------------------------------------------
521 */
estimateSqrt32(int16 aExp,bits32 a)522 static bits32 estimateSqrt32( int16 aExp, bits32 a )
523 {
524     static const bits16 sqrtOddAdjustments[] = {
525         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
526         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
527     };
528     static const bits16 sqrtEvenAdjustments[] = {
529         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
530         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
531     };
532     int8 index;
533     bits32 z;
534 
535     index = ( a>>27 ) & 15;
536     if ( aExp & 1 ) {
537         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
538         z = ( ( a / z )<<14 ) + ( z<<15 );
539         a >>= 1;
540     }
541     else {
542         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
543         z = a / z + z;
544         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
545         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
546     }
547     return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 );
548 
549 }
550 #endif
551 
552 /*
553 -------------------------------------------------------------------------------
554 Returns the number of leading 0 bits before the most-significant 1 bit of
555 `a'.  If `a' is zero, 32 is returned.
556 -------------------------------------------------------------------------------
557 */
countLeadingZeros32(bits32 a)558 static int8 countLeadingZeros32( bits32 a )
559 {
560     static const int8 countLeadingZerosHigh[] = {
561         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
562         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
563         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
564         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
565         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
566         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
567         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
568         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
569         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
577     };
578     int8 shiftCount;
579 
580     shiftCount = 0;
581     if ( a < 0x10000 ) {
582         shiftCount += 16;
583         a <<= 16;
584     }
585     if ( a < 0x1000000 ) {
586         shiftCount += 8;
587         a <<= 8;
588     }
589     shiftCount += countLeadingZerosHigh[ a>>24 ];
590     return shiftCount;
591 
592 }
593 
594 /*
595 -------------------------------------------------------------------------------
596 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
597 equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
598 returns 0.
599 -------------------------------------------------------------------------------
600 */
eq64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)601 static __inline flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
602 {
603 
604     return ( a0 == b0 ) && ( a1 == b1 );
605 
606 }
607 
608 /*
609 -------------------------------------------------------------------------------
610 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
611 than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
612 Otherwise, returns 0.
613 -------------------------------------------------------------------------------
614 */
le64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)615 static __inline flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
616 {
617 
618     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
619 
620 }
621 
622 /*
623 -------------------------------------------------------------------------------
624 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
625 than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
626 returns 0.
627 -------------------------------------------------------------------------------
628 */
lt64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)629 static __inline flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
630 {
631 
632     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
633 
634 }
635 
636 /*
637 -------------------------------------------------------------------------------
638 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
639 equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
640 returns 0.
641 -------------------------------------------------------------------------------
642 */
ne64(bits32 a0,bits32 a1,bits32 b0,bits32 b1)643 static __inline flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
644 {
645 
646     return ( a0 != b0 ) || ( a1 != b1 );
647 
648 }
649 
650