1/* $NetBSD: softfloat-macros,v 1.2 2009/02/16 10:23:35 tron Exp $ */
2
3/*
4===============================================================================
5
6This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
7Arithmetic Package, Release 2a.
8
9Written by John R. Hauser.  This work was made possible in part by the
10International Computer Science Institute, located at Suite 600, 1947 Center
11Street, Berkeley, California 94704.  Funding was partially provided by the
12National Science Foundation under grant MIP-9311980.  The original version
13of this code was written as part of a project to build a fixed-point vector
14processor in collaboration with the University of California at Berkeley,
15overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
16is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
17arithmetic/SoftFloat.html'.
18
19THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
20has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
21TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
22PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
23AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
24
25Derivative works are acceptable, even for commercial purposes, so long as
26(1) they include prominent notice that the work is derivative, and (2) they
27include prominent notice akin to these four paragraphs for those parts of
28this code that are retained.
29
30===============================================================================
31*/
32
33/*
34-------------------------------------------------------------------------------
35Shifts `a' right by the number of bits given in `count'.  If any nonzero
36bits are shifted off, they are ``jammed'' into the least significant bit of
37the result by setting the least significant bit to 1.  The value of `count'
38can be arbitrarily large; in particular, if `count' is greater than 32, the
39result will be either 0 or 1, depending on whether `a' is zero or nonzero.
40The result is stored in the location pointed to by `zPtr'.
41-------------------------------------------------------------------------------
42*/
43INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
44{
45    bits32 z;
46
47    if ( count == 0 ) {
48        z = a;
49    }
50    else if ( count < 32 ) {
51        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
52    }
53    else {
54        z = ( a != 0 );
55    }
56    *zPtr = z;
57
58}
59
60/*
61-------------------------------------------------------------------------------
62Shifts `a' right by the number of bits given in `count'.  If any nonzero
63bits are shifted off, they are ``jammed'' into the least significant bit of
64the result by setting the least significant bit to 1.  The value of `count'
65can be arbitrarily large; in particular, if `count' is greater than 64, the
66result will be either 0 or 1, depending on whether `a' is zero or nonzero.
67The result is stored in the location pointed to by `zPtr'.
68-------------------------------------------------------------------------------
69*/
70INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
71{
72    bits64 z;
73
74    if ( count == 0 ) {
75        z = a;
76    }
77    else if ( count < 64 ) {
78        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
79    }
80    else {
81        z = ( a != 0 );
82    }
83    *zPtr = z;
84
85}
86
87/*
88-------------------------------------------------------------------------------
89Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
90_plus_ the number of bits given in `count'.  The shifted result is at most
9164 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
92bits shifted off form a second 64-bit result as follows:  The _last_ bit
93shifted off is the most-significant bit of the extra result, and the other
9463 bits of the extra result are all zero if and only if _all_but_the_last_
95bits shifted off were all zero.  This extra result is stored in the location
96pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
97    (This routine makes more sense if `a0' and `a1' are considered to form a
98fixed-point value with binary point between `a0' and `a1'.  This fixed-point
99value is shifted right by the number of bits given in `count', and the
100integer part of the result is returned at the location pointed to by
101`z0Ptr'.  The fractional part of the result may be slightly corrupted as
102described above, and is returned at the location pointed to by `z1Ptr'.)
103-------------------------------------------------------------------------------
104*/
105INLINE void
106 shift64ExtraRightJamming(
107     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
108{
109    bits64 z0, z1;
110    int8 negCount = ( - count ) & 63;
111
112    if ( count == 0 ) {
113        z1 = a1;
114        z0 = a0;
115    }
116    else if ( count < 64 ) {
117        z1 = ( a0<<negCount ) | ( a1 != 0 );
118        z0 = a0>>count;
119    }
120    else {
121        if ( count == 64 ) {
122            z1 = a0 | ( a1 != 0 );
123        }
124        else {
125            z1 = ( ( a0 | a1 ) != 0 );
126        }
127        z0 = 0;
128    }
129    *z1Ptr = z1;
130    *z0Ptr = z0;
131
132}
133
134/*
135-------------------------------------------------------------------------------
136Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
137number of bits given in `count'.  Any bits shifted off are lost.  The value
138of `count' can be arbitrarily large; in particular, if `count' is greater
139than 128, the result will be 0.  The result is broken into two 64-bit pieces
140which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
141-------------------------------------------------------------------------------
142*/
143INLINE void
144 shift128Right(
145     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
146{
147    bits64 z0, z1;
148    int8 negCount = ( - count ) & 63;
149
150    if ( count == 0 ) {
151        z1 = a1;
152        z0 = a0;
153    }
154    else if ( count < 64 ) {
155        z1 = ( a0<<negCount ) | ( a1>>count );
156        z0 = a0>>count;
157    }
158    else {
159        z1 = ( count < 128 ) ? ( a0>>( count & 63 ) ) : 0;
160        z0 = 0;
161    }
162    *z1Ptr = z1;
163    *z0Ptr = z0;
164
165}
166
167/*
168-------------------------------------------------------------------------------
169Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170number of bits given in `count'.  If any nonzero bits are shifted off, they
171are ``jammed'' into the least significant bit of the result by setting the
172least significant bit to 1.  The value of `count' can be arbitrarily large;
173in particular, if `count' is greater than 128, the result will be either
1740 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175nonzero.  The result is broken into two 64-bit pieces which are stored at
176the locations pointed to by `z0Ptr' and `z1Ptr'.
177-------------------------------------------------------------------------------
178*/
179INLINE void
180 shift128RightJamming(
181     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
182{
183    bits64 z0, z1;
184    int8 negCount = ( - count ) & 63;
185
186    if ( count == 0 ) {
187        z1 = a1;
188        z0 = a0;
189    }
190    else if ( count < 64 ) {
191        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
192        z0 = a0>>count;
193    }
194    else {
195        if ( count == 64 ) {
196            z1 = a0 | ( a1 != 0 );
197        }
198        else if ( count < 128 ) {
199            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
200        }
201        else {
202            z1 = ( ( a0 | a1 ) != 0 );
203        }
204        z0 = 0;
205    }
206    *z1Ptr = z1;
207    *z0Ptr = z0;
208
209}
210
211/*
212-------------------------------------------------------------------------------
213Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
214by 64 _plus_ the number of bits given in `count'.  The shifted result is
215at most 128 nonzero bits; these are broken into two 64-bit pieces which are
216stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
217off form a third 64-bit result as follows:  The _last_ bit shifted off is
218the most-significant bit of the extra result, and the other 63 bits of the
219extra result are all zero if and only if _all_but_the_last_ bits shifted off
220were all zero.  This extra result is stored in the location pointed to by
221`z2Ptr'.  The value of `count' can be arbitrarily large.
222    (This routine makes more sense if `a0', `a1', and `a2' are considered
223to form a fixed-point value with binary point between `a1' and `a2'.  This
224fixed-point value is shifted right by the number of bits given in `count',
225and the integer part of the result is returned at the locations pointed to
226by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
227corrupted as described above, and is returned at the location pointed to by
228`z2Ptr'.)
229-------------------------------------------------------------------------------
230*/
231INLINE void
232 shift128ExtraRightJamming(
233     bits64 a0,
234     bits64 a1,
235     bits64 a2,
236     int16 count,
237     bits64 *z0Ptr,
238     bits64 *z1Ptr,
239     bits64 *z2Ptr
240 )
241{
242    bits64 z0, z1, z2;
243    int8 negCount = ( - count ) & 63;
244
245    if ( count == 0 ) {
246        z2 = a2;
247        z1 = a1;
248        z0 = a0;
249    }
250    else {
251        if ( count < 64 ) {
252            z2 = a1<<negCount;
253            z1 = ( a0<<negCount ) | ( a1>>count );
254            z0 = a0>>count;
255        }
256        else {
257            if ( count == 64 ) {
258                z2 = a1;
259                z1 = a0;
260            }
261            else {
262                a2 |= a1;
263                if ( count < 128 ) {
264                    z2 = a0<<negCount;
265                    z1 = a0>>( count & 63 );
266                }
267                else {
268                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
269                    z1 = 0;
270                }
271            }
272            z0 = 0;
273        }
274        z2 |= ( a2 != 0 );
275    }
276    *z2Ptr = z2;
277    *z1Ptr = z1;
278    *z0Ptr = z0;
279
280}
281
282/*
283-------------------------------------------------------------------------------
284Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
285number of bits given in `count'.  Any bits shifted off are lost.  The value
286of `count' must be less than 64.  The result is broken into two 64-bit
287pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
288-------------------------------------------------------------------------------
289*/
290INLINE void
291 shortShift128Left(
292     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
293{
294
295    *z1Ptr = a1<<count;
296    *z0Ptr =
297        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
298
299}
300
301/*
302-------------------------------------------------------------------------------
303Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
304by the number of bits given in `count'.  Any bits shifted off are lost.
305The value of `count' must be less than 64.  The result is broken into three
30664-bit pieces which are stored at the locations pointed to by `z0Ptr',
307`z1Ptr', and `z2Ptr'.
308-------------------------------------------------------------------------------
309*/
310INLINE void
311 shortShift192Left(
312     bits64 a0,
313     bits64 a1,
314     bits64 a2,
315     int16 count,
316     bits64 *z0Ptr,
317     bits64 *z1Ptr,
318     bits64 *z2Ptr
319 )
320{
321    bits64 z0, z1, z2;
322    int8 negCount;
323
324    z2 = a2<<count;
325    z1 = a1<<count;
326    z0 = a0<<count;
327    if ( 0 < count ) {
328        negCount = ( ( - count ) & 63 );
329        z1 |= a2>>negCount;
330        z0 |= a1>>negCount;
331    }
332    *z2Ptr = z2;
333    *z1Ptr = z1;
334    *z0Ptr = z0;
335
336}
337
338/*
339-------------------------------------------------------------------------------
340Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
341value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
342any carry out is lost.  The result is broken into two 64-bit pieces which
343are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
344-------------------------------------------------------------------------------
345*/
346INLINE void
347 add128(
348     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
349{
350    bits64 z1;
351
352    z1 = a1 + b1;
353    *z1Ptr = z1;
354    *z0Ptr = a0 + b0 + ( z1 < a1 );
355
356}
357
358/*
359-------------------------------------------------------------------------------
360Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
361192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
362modulo 2^192, so any carry out is lost.  The result is broken into three
36364-bit pieces which are stored at the locations pointed to by `z0Ptr',
364`z1Ptr', and `z2Ptr'.
365-------------------------------------------------------------------------------
366*/
367INLINE void
368 add192(
369     bits64 a0,
370     bits64 a1,
371     bits64 a2,
372     bits64 b0,
373     bits64 b1,
374     bits64 b2,
375     bits64 *z0Ptr,
376     bits64 *z1Ptr,
377     bits64 *z2Ptr
378 )
379{
380    bits64 z0, z1, z2;
381    int8 carry0, carry1;
382
383    z2 = a2 + b2;
384    carry1 = ( z2 < a2 );
385    z1 = a1 + b1;
386    carry0 = ( z1 < a1 );
387    z0 = a0 + b0;
388    z1 += carry1;
389    z0 += ( z1 < (bits64)carry1 );
390    z0 += carry0;
391    *z2Ptr = z2;
392    *z1Ptr = z1;
393    *z0Ptr = z0;
394
395}
396
397/*
398-------------------------------------------------------------------------------
399Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
400128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
4012^128, so any borrow out (carry out) is lost.  The result is broken into two
40264-bit pieces which are stored at the locations pointed to by `z0Ptr' and
403`z1Ptr'.
404-------------------------------------------------------------------------------
405*/
406INLINE void
407 sub128(
408     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
409{
410
411    *z1Ptr = a1 - b1;
412    *z0Ptr = a0 - b0 - ( a1 < b1 );
413
414}
415
416/*
417-------------------------------------------------------------------------------
418Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
419from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
420Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
421result is broken into three 64-bit pieces which are stored at the locations
422pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
423-------------------------------------------------------------------------------
424*/
425INLINE void
426 sub192(
427     bits64 a0,
428     bits64 a1,
429     bits64 a2,
430     bits64 b0,
431     bits64 b1,
432     bits64 b2,
433     bits64 *z0Ptr,
434     bits64 *z1Ptr,
435     bits64 *z2Ptr
436 )
437{
438    bits64 z0, z1, z2;
439    int8 borrow0, borrow1;
440
441    z2 = a2 - b2;
442    borrow1 = ( a2 < b2 );
443    z1 = a1 - b1;
444    borrow0 = ( a1 < b1 );
445    z0 = a0 - b0;
446    z0 -= ( z1 < (bits64)borrow1 );
447    z1 -= borrow1;
448    z0 -= borrow0;
449    *z2Ptr = z2;
450    *z1Ptr = z1;
451    *z0Ptr = z0;
452
453}
454
455/*
456-------------------------------------------------------------------------------
457Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
458into two 64-bit pieces which are stored at the locations pointed to by
459`z0Ptr' and `z1Ptr'.
460-------------------------------------------------------------------------------
461*/
462INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
463{
464    bits32 aHigh, aLow, bHigh, bLow;
465    bits64 z0, zMiddleA, zMiddleB, z1;
466
467    aLow = a;
468    aHigh = a>>32;
469    bLow = b;
470    bHigh = b>>32;
471    z1 = ( (bits64) aLow ) * bLow;
472    zMiddleA = ( (bits64) aLow ) * bHigh;
473    zMiddleB = ( (bits64) aHigh ) * bLow;
474    z0 = ( (bits64) aHigh ) * bHigh;
475    zMiddleA += zMiddleB;
476    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
477    zMiddleA <<= 32;
478    z1 += zMiddleA;
479    z0 += ( z1 < zMiddleA );
480    *z1Ptr = z1;
481    *z0Ptr = z0;
482
483}
484
485/*
486-------------------------------------------------------------------------------
487Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
488`b' to obtain a 192-bit product.  The product is broken into three 64-bit
489pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
490`z2Ptr'.
491-------------------------------------------------------------------------------
492*/
493INLINE void
494 mul128By64To192(
495     bits64 a0,
496     bits64 a1,
497     bits64 b,
498     bits64 *z0Ptr,
499     bits64 *z1Ptr,
500     bits64 *z2Ptr
501 )
502{
503    bits64 z0, z1, z2, more1;
504
505    mul64To128( a1, b, &z1, &z2 );
506    mul64To128( a0, b, &z0, &more1 );
507    add128( z0, more1, 0, z1, &z0, &z1 );
508    *z2Ptr = z2;
509    *z1Ptr = z1;
510    *z0Ptr = z0;
511
512}
513
514/*
515-------------------------------------------------------------------------------
516Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
517128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
518product.  The product is broken into four 64-bit pieces which are stored at
519the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
520-------------------------------------------------------------------------------
521*/
522INLINE void
523 mul128To256(
524     bits64 a0,
525     bits64 a1,
526     bits64 b0,
527     bits64 b1,
528     bits64 *z0Ptr,
529     bits64 *z1Ptr,
530     bits64 *z2Ptr,
531     bits64 *z3Ptr
532 )
533{
534    bits64 z0, z1, z2, z3;
535    bits64 more1, more2;
536
537    mul64To128( a1, b1, &z2, &z3 );
538    mul64To128( a1, b0, &z1, &more2 );
539    add128( z1, more2, 0, z2, &z1, &z2 );
540    mul64To128( a0, b0, &z0, &more1 );
541    add128( z0, more1, 0, z1, &z0, &z1 );
542    mul64To128( a0, b1, &more1, &more2 );
543    add128( more1, more2, 0, z2, &more1, &z2 );
544    add128( z0, z1, 0, more1, &z0, &z1 );
545    *z3Ptr = z3;
546    *z2Ptr = z2;
547    *z1Ptr = z1;
548    *z0Ptr = z0;
549
550}
551
552/*
553-------------------------------------------------------------------------------
554Returns an approximation to the 64-bit integer quotient obtained by dividing
555`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
556divisor `b' must be at least 2^63.  If q is the exact quotient truncated
557toward zero, the approximation returned lies between q and q + 2 inclusive.
558If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
559unsigned integer is returned.
560-------------------------------------------------------------------------------
561*/
562static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
563{
564    bits64 b0, b1;
565    bits64 rem0, rem1, term0, term1;
566    bits64 z;
567
568    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
569    b0 = b>>32;
570    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
571    mul64To128( b, z, &term0, &term1 );
572    sub128( a0, a1, term0, term1, &rem0, &rem1 );
573    while ( ( (sbits64) rem0 ) < 0 ) {
574        z -= LIT64( 0x100000000 );
575        b1 = b<<32;
576        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
577    }
578    rem0 = ( rem0<<32 ) | ( rem1>>32 );
579    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
580    return z;
581
582}
583
584#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
585/*
586-------------------------------------------------------------------------------
587Returns an approximation to the square root of the 32-bit significand given
588by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
589`aExp' (the least significant bit) is 1, the integer returned approximates
5902^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
591is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
592case, the approximation returned lies strictly within +/-2 of the exact
593value.
594-------------------------------------------------------------------------------
595*/
596static bits32 estimateSqrt32( int16 aExp, bits32 a )
597{
598    static const bits16 sqrtOddAdjustments[] = {
599        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
600        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
601    };
602    static const bits16 sqrtEvenAdjustments[] = {
603        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
604        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
605    };
606    int8 idx;
607    bits32 z;
608
609    idx = ( a>>27 ) & 15;
610    if ( aExp & 1 ) {
611        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
612        z = ( ( a / z )<<14 ) + ( z<<15 );
613        a >>= 1;
614    }
615    else {
616        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
617        z = a / z + z;
618        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
619        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
620    }
621    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
622
623}
624#endif
625
626/*
627-------------------------------------------------------------------------------
628Returns the number of leading 0 bits before the most-significant 1 bit of
629`a'.  If `a' is zero, 32 is returned.
630-------------------------------------------------------------------------------
631*/
632static int8 countLeadingZeros32( bits32 a )
633{
634    static const int8 countLeadingZerosHigh[] = {
635        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
636        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
637        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
638        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
639        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
640        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
641        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
642        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
644        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
651    };
652    int8 shiftCount;
653
654    shiftCount = 0;
655    if ( a < 0x10000 ) {
656        shiftCount += 16;
657        a <<= 16;
658    }
659    if ( a < 0x1000000 ) {
660        shiftCount += 8;
661        a <<= 8;
662    }
663    shiftCount += countLeadingZerosHigh[ a>>24 ];
664    return shiftCount;
665
666}
667
668/*
669-------------------------------------------------------------------------------
670Returns the number of leading 0 bits before the most-significant 1 bit of
671`a'.  If `a' is zero, 64 is returned.
672-------------------------------------------------------------------------------
673*/
674static int8 countLeadingZeros64( bits64 a )
675{
676    int8 shiftCount;
677
678    shiftCount = 0;
679    if ( a < ( (bits64) 1 )<<32 ) {
680        shiftCount += 32;
681    }
682    else {
683        a >>= 32;
684    }
685    shiftCount += countLeadingZeros32( a );
686    return shiftCount;
687
688}
689
690/*
691-------------------------------------------------------------------------------
692Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
693is equal to the 128-bit value formed by concatenating `b0' and `b1'.
694Otherwise, returns 0.
695-------------------------------------------------------------------------------
696*/
697INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
698{
699
700    return ( a0 == b0 ) && ( a1 == b1 );
701
702}
703
704/*
705-------------------------------------------------------------------------------
706Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
707than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
708Otherwise, returns 0.
709-------------------------------------------------------------------------------
710*/
711INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
712{
713
714    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
715
716}
717
718/*
719-------------------------------------------------------------------------------
720Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
721than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
722returns 0.
723-------------------------------------------------------------------------------
724*/
725INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
726{
727
728    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
729
730}
731
732/*
733-------------------------------------------------------------------------------
734Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
735not equal to the 128-bit value formed by concatenating `b0' and `b1'.
736Otherwise, returns 0.
737-------------------------------------------------------------------------------
738*/
739INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
740{
741
742    return ( a0 != b0 ) || ( a1 != b1 );
743
744}
745
746