1/* $NetBSD: softfloat-macros,v 1.2 2009/02/16 10:23:35 tron Exp $ */
2/* $FreeBSD$ */
3
4/*
5===============================================================================
6
7This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
8Arithmetic Package, Release 2a.
9
10Written by John R. Hauser.  This work was made possible in part by the
11International Computer Science Institute, located at Suite 600, 1947 Center
12Street, Berkeley, California 94704.  Funding was partially provided by the
13National Science Foundation under grant MIP-9311980.  The original version
14of this code was written as part of a project to build a fixed-point vector
15processor in collaboration with the University of California at Berkeley,
16overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
17is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
18arithmetic/SoftFloat.html'.
19
20THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
21has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
22TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
23PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
24AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
25
26Derivative works are acceptable, even for commercial purposes, so long as
27(1) they include prominent notice that the work is derivative, and (2) they
28include prominent notice akin to these four paragraphs for those parts of
29this code that are retained.
30
31===============================================================================
32*/
33
34/*
35-------------------------------------------------------------------------------
36Shifts `a' right by the number of bits given in `count'.  If any nonzero
37bits are shifted off, they are ``jammed'' into the least significant bit of
38the result by setting the least significant bit to 1.  The value of `count'
39can be arbitrarily large; in particular, if `count' is greater than 32, the
40result will be either 0 or 1, depending on whether `a' is zero or nonzero.
41The result is stored in the location pointed to by `zPtr'.
42-------------------------------------------------------------------------------
43*/
44INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
45{
46    bits32 z;
47
48    if ( count == 0 ) {
49        z = a;
50    }
51    else if ( count < 32 ) {
52        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
53    }
54    else {
55        z = ( a != 0 );
56    }
57    *zPtr = z;
58
59}
60
61/*
62-------------------------------------------------------------------------------
63Shifts `a' right by the number of bits given in `count'.  If any nonzero
64bits are shifted off, they are ``jammed'' into the least significant bit of
65the result by setting the least significant bit to 1.  The value of `count'
66can be arbitrarily large; in particular, if `count' is greater than 64, the
67result will be either 0 or 1, depending on whether `a' is zero or nonzero.
68The result is stored in the location pointed to by `zPtr'.
69-------------------------------------------------------------------------------
70*/
71INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
72{
73    bits64 z;
74
75    if ( count == 0 ) {
76        z = a;
77    }
78    else if ( count < 64 ) {
79        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
80    }
81    else {
82        z = ( a != 0 );
83    }
84    *zPtr = z;
85
86}
87
88/*
89-------------------------------------------------------------------------------
90Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
91_plus_ the number of bits given in `count'.  The shifted result is at most
9264 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
93bits shifted off form a second 64-bit result as follows:  The _last_ bit
94shifted off is the most-significant bit of the extra result, and the other
9563 bits of the extra result are all zero if and only if _all_but_the_last_
96bits shifted off were all zero.  This extra result is stored in the location
97pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
98    (This routine makes more sense if `a0' and `a1' are considered to form a
99fixed-point value with binary point between `a0' and `a1'.  This fixed-point
100value is shifted right by the number of bits given in `count', and the
101integer part of the result is returned at the location pointed to by
102`z0Ptr'.  The fractional part of the result may be slightly corrupted as
103described above, and is returned at the location pointed to by `z1Ptr'.)
104-------------------------------------------------------------------------------
105*/
106INLINE void
107 shift64ExtraRightJamming(
108     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
109{
110    bits64 z0, z1;
111    int8 negCount = ( - count ) & 63;
112
113    if ( count == 0 ) {
114        z1 = a1;
115        z0 = a0;
116    }
117    else if ( count < 64 ) {
118        z1 = ( a0<<negCount ) | ( a1 != 0 );
119        z0 = a0>>count;
120    }
121    else {
122        if ( count == 64 ) {
123            z1 = a0 | ( a1 != 0 );
124        }
125        else {
126            z1 = ( ( a0 | a1 ) != 0 );
127        }
128        z0 = 0;
129    }
130    *z1Ptr = z1;
131    *z0Ptr = z0;
132
133}
134
135/*
136-------------------------------------------------------------------------------
137Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
138number of bits given in `count'.  Any bits shifted off are lost.  The value
139of `count' can be arbitrarily large; in particular, if `count' is greater
140than 128, the result will be 0.  The result is broken into two 64-bit pieces
141which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
142-------------------------------------------------------------------------------
143*/
144INLINE void
145 shift128Right(
146     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
147{
148    bits64 z0, z1;
149    int8 negCount = ( - count ) & 63;
150
151    if ( count == 0 ) {
152        z1 = a1;
153        z0 = a0;
154    }
155    else if ( count < 64 ) {
156        z1 = ( a0<<negCount ) | ( a1>>count );
157        z0 = a0>>count;
158    }
159    else {
160        z1 = ( count < 128 ) ? ( a0>>( count & 63 ) ) : 0;
161        z0 = 0;
162    }
163    *z1Ptr = z1;
164    *z0Ptr = z0;
165
166}
167
168/*
169-------------------------------------------------------------------------------
170Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
171number of bits given in `count'.  If any nonzero bits are shifted off, they
172are ``jammed'' into the least significant bit of the result by setting the
173least significant bit to 1.  The value of `count' can be arbitrarily large;
174in particular, if `count' is greater than 128, the result will be either
1750 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
176nonzero.  The result is broken into two 64-bit pieces which are stored at
177the locations pointed to by `z0Ptr' and `z1Ptr'.
178-------------------------------------------------------------------------------
179*/
180INLINE void
181 shift128RightJamming(
182     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
183{
184    bits64 z0, z1;
185    int8 negCount = ( - count ) & 63;
186
187    if ( count == 0 ) {
188        z1 = a1;
189        z0 = a0;
190    }
191    else if ( count < 64 ) {
192        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
193        z0 = a0>>count;
194    }
195    else {
196        if ( count == 64 ) {
197            z1 = a0 | ( a1 != 0 );
198        }
199        else if ( count < 128 ) {
200            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
201        }
202        else {
203            z1 = ( ( a0 | a1 ) != 0 );
204        }
205        z0 = 0;
206    }
207    *z1Ptr = z1;
208    *z0Ptr = z0;
209
210}
211
212/*
213-------------------------------------------------------------------------------
214Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
215by 64 _plus_ the number of bits given in `count'.  The shifted result is
216at most 128 nonzero bits; these are broken into two 64-bit pieces which are
217stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
218off form a third 64-bit result as follows:  The _last_ bit shifted off is
219the most-significant bit of the extra result, and the other 63 bits of the
220extra result are all zero if and only if _all_but_the_last_ bits shifted off
221were all zero.  This extra result is stored in the location pointed to by
222`z2Ptr'.  The value of `count' can be arbitrarily large.
223    (This routine makes more sense if `a0', `a1', and `a2' are considered
224to form a fixed-point value with binary point between `a1' and `a2'.  This
225fixed-point value is shifted right by the number of bits given in `count',
226and the integer part of the result is returned at the locations pointed to
227by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
228corrupted as described above, and is returned at the location pointed to by
229`z2Ptr'.)
230-------------------------------------------------------------------------------
231*/
232INLINE void
233 shift128ExtraRightJamming(
234     bits64 a0,
235     bits64 a1,
236     bits64 a2,
237     int16 count,
238     bits64 *z0Ptr,
239     bits64 *z1Ptr,
240     bits64 *z2Ptr
241 )
242{
243    bits64 z0, z1, z2;
244    int8 negCount = ( - count ) & 63;
245
246    if ( count == 0 ) {
247        z2 = a2;
248        z1 = a1;
249        z0 = a0;
250    }
251    else {
252        if ( count < 64 ) {
253            z2 = a1<<negCount;
254            z1 = ( a0<<negCount ) | ( a1>>count );
255            z0 = a0>>count;
256        }
257        else {
258            if ( count == 64 ) {
259                z2 = a1;
260                z1 = a0;
261            }
262            else {
263                a2 |= a1;
264                if ( count < 128 ) {
265                    z2 = a0<<negCount;
266                    z1 = a0>>( count & 63 );
267                }
268                else {
269                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
270                    z1 = 0;
271                }
272            }
273            z0 = 0;
274        }
275        z2 |= ( a2 != 0 );
276    }
277    *z2Ptr = z2;
278    *z1Ptr = z1;
279    *z0Ptr = z0;
280
281}
282
283/*
284-------------------------------------------------------------------------------
285Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
286number of bits given in `count'.  Any bits shifted off are lost.  The value
287of `count' must be less than 64.  The result is broken into two 64-bit
288pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
289-------------------------------------------------------------------------------
290*/
291INLINE void
292 shortShift128Left(
293     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
294{
295
296    *z1Ptr = a1<<count;
297    *z0Ptr =
298        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
299
300}
301
302/*
303-------------------------------------------------------------------------------
304Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
305by the number of bits given in `count'.  Any bits shifted off are lost.
306The value of `count' must be less than 64.  The result is broken into three
30764-bit pieces which are stored at the locations pointed to by `z0Ptr',
308`z1Ptr', and `z2Ptr'.
309-------------------------------------------------------------------------------
310*/
311INLINE void
312 shortShift192Left(
313     bits64 a0,
314     bits64 a1,
315     bits64 a2,
316     int16 count,
317     bits64 *z0Ptr,
318     bits64 *z1Ptr,
319     bits64 *z2Ptr
320 )
321{
322    bits64 z0, z1, z2;
323    int8 negCount;
324
325    z2 = a2<<count;
326    z1 = a1<<count;
327    z0 = a0<<count;
328    if ( 0 < count ) {
329        negCount = ( ( - count ) & 63 );
330        z1 |= a2>>negCount;
331        z0 |= a1>>negCount;
332    }
333    *z2Ptr = z2;
334    *z1Ptr = z1;
335    *z0Ptr = z0;
336
337}
338
339/*
340-------------------------------------------------------------------------------
341Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
342value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
343any carry out is lost.  The result is broken into two 64-bit pieces which
344are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
345-------------------------------------------------------------------------------
346*/
347INLINE void
348 add128(
349     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
350{
351    bits64 z1;
352
353    z1 = a1 + b1;
354    *z1Ptr = z1;
355    *z0Ptr = a0 + b0 + ( z1 < a1 );
356
357}
358
359/*
360-------------------------------------------------------------------------------
361Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
362192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
363modulo 2^192, so any carry out is lost.  The result is broken into three
36464-bit pieces which are stored at the locations pointed to by `z0Ptr',
365`z1Ptr', and `z2Ptr'.
366-------------------------------------------------------------------------------
367*/
368INLINE void
369 add192(
370     bits64 a0,
371     bits64 a1,
372     bits64 a2,
373     bits64 b0,
374     bits64 b1,
375     bits64 b2,
376     bits64 *z0Ptr,
377     bits64 *z1Ptr,
378     bits64 *z2Ptr
379 )
380{
381    bits64 z0, z1, z2;
382    int8 carry0, carry1;
383
384    z2 = a2 + b2;
385    carry1 = ( z2 < a2 );
386    z1 = a1 + b1;
387    carry0 = ( z1 < a1 );
388    z0 = a0 + b0;
389    z1 += carry1;
390    z0 += ( z1 < (bits64)carry1 );
391    z0 += carry0;
392    *z2Ptr = z2;
393    *z1Ptr = z1;
394    *z0Ptr = z0;
395
396}
397
398/*
399-------------------------------------------------------------------------------
400Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
401128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
4022^128, so any borrow out (carry out) is lost.  The result is broken into two
40364-bit pieces which are stored at the locations pointed to by `z0Ptr' and
404`z1Ptr'.
405-------------------------------------------------------------------------------
406*/
407INLINE void
408 sub128(
409     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
410{
411
412    *z1Ptr = a1 - b1;
413    *z0Ptr = a0 - b0 - ( a1 < b1 );
414
415}
416
417/*
418-------------------------------------------------------------------------------
419Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
420from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
421Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
422result is broken into three 64-bit pieces which are stored at the locations
423pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
424-------------------------------------------------------------------------------
425*/
426INLINE void
427 sub192(
428     bits64 a0,
429     bits64 a1,
430     bits64 a2,
431     bits64 b0,
432     bits64 b1,
433     bits64 b2,
434     bits64 *z0Ptr,
435     bits64 *z1Ptr,
436     bits64 *z2Ptr
437 )
438{
439    bits64 z0, z1, z2;
440    int8 borrow0, borrow1;
441
442    z2 = a2 - b2;
443    borrow1 = ( a2 < b2 );
444    z1 = a1 - b1;
445    borrow0 = ( a1 < b1 );
446    z0 = a0 - b0;
447    z0 -= ( z1 < (bits64)borrow1 );
448    z1 -= borrow1;
449    z0 -= borrow0;
450    *z2Ptr = z2;
451    *z1Ptr = z1;
452    *z0Ptr = z0;
453
454}
455
456/*
457-------------------------------------------------------------------------------
458Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
459into two 64-bit pieces which are stored at the locations pointed to by
460`z0Ptr' and `z1Ptr'.
461-------------------------------------------------------------------------------
462*/
463INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
464{
465    bits32 aHigh, aLow, bHigh, bLow;
466    bits64 z0, zMiddleA, zMiddleB, z1;
467
468    aLow = a;
469    aHigh = a>>32;
470    bLow = b;
471    bHigh = b>>32;
472    z1 = ( (bits64) aLow ) * bLow;
473    zMiddleA = ( (bits64) aLow ) * bHigh;
474    zMiddleB = ( (bits64) aHigh ) * bLow;
475    z0 = ( (bits64) aHigh ) * bHigh;
476    zMiddleA += zMiddleB;
477    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
478    zMiddleA <<= 32;
479    z1 += zMiddleA;
480    z0 += ( z1 < zMiddleA );
481    *z1Ptr = z1;
482    *z0Ptr = z0;
483
484}
485
486/*
487-------------------------------------------------------------------------------
488Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
489`b' to obtain a 192-bit product.  The product is broken into three 64-bit
490pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
491`z2Ptr'.
492-------------------------------------------------------------------------------
493*/
494INLINE void
495 mul128By64To192(
496     bits64 a0,
497     bits64 a1,
498     bits64 b,
499     bits64 *z0Ptr,
500     bits64 *z1Ptr,
501     bits64 *z2Ptr
502 )
503{
504    bits64 z0, z1, z2, more1;
505
506    mul64To128( a1, b, &z1, &z2 );
507    mul64To128( a0, b, &z0, &more1 );
508    add128( z0, more1, 0, z1, &z0, &z1 );
509    *z2Ptr = z2;
510    *z1Ptr = z1;
511    *z0Ptr = z0;
512
513}
514
515/*
516-------------------------------------------------------------------------------
517Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
518128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
519product.  The product is broken into four 64-bit pieces which are stored at
520the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
521-------------------------------------------------------------------------------
522*/
523INLINE void
524 mul128To256(
525     bits64 a0,
526     bits64 a1,
527     bits64 b0,
528     bits64 b1,
529     bits64 *z0Ptr,
530     bits64 *z1Ptr,
531     bits64 *z2Ptr,
532     bits64 *z3Ptr
533 )
534{
535    bits64 z0, z1, z2, z3;
536    bits64 more1, more2;
537
538    mul64To128( a1, b1, &z2, &z3 );
539    mul64To128( a1, b0, &z1, &more2 );
540    add128( z1, more2, 0, z2, &z1, &z2 );
541    mul64To128( a0, b0, &z0, &more1 );
542    add128( z0, more1, 0, z1, &z0, &z1 );
543    mul64To128( a0, b1, &more1, &more2 );
544    add128( more1, more2, 0, z2, &more1, &z2 );
545    add128( z0, z1, 0, more1, &z0, &z1 );
546    *z3Ptr = z3;
547    *z2Ptr = z2;
548    *z1Ptr = z1;
549    *z0Ptr = z0;
550
551}
552
553/*
554-------------------------------------------------------------------------------
555Returns an approximation to the 64-bit integer quotient obtained by dividing
556`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
557divisor `b' must be at least 2^63.  If q is the exact quotient truncated
558toward zero, the approximation returned lies between q and q + 2 inclusive.
559If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
560unsigned integer is returned.
561-------------------------------------------------------------------------------
562*/
563static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
564{
565    bits64 b0, b1;
566    bits64 rem0, rem1, term0, term1;
567    bits64 z;
568
569    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
570    b0 = b>>32;
571    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
572    mul64To128( b, z, &term0, &term1 );
573    sub128( a0, a1, term0, term1, &rem0, &rem1 );
574    while ( ( (sbits64) rem0 ) < 0 ) {
575        z -= LIT64( 0x100000000 );
576        b1 = b<<32;
577        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
578    }
579    rem0 = ( rem0<<32 ) | ( rem1>>32 );
580    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
581    return z;
582
583}
584
585#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
586/*
587-------------------------------------------------------------------------------
588Returns an approximation to the square root of the 32-bit significand given
589by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
590`aExp' (the least significant bit) is 1, the integer returned approximates
5912^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
592is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
593case, the approximation returned lies strictly within +/-2 of the exact
594value.
595-------------------------------------------------------------------------------
596*/
597static bits32 estimateSqrt32( int16 aExp, bits32 a )
598{
599    static const bits16 sqrtOddAdjustments[] = {
600        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
601        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
602    };
603    static const bits16 sqrtEvenAdjustments[] = {
604        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
605        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
606    };
607    int8 idx;
608    bits32 z;
609
610    idx = ( a>>27 ) & 15;
611    if ( aExp & 1 ) {
612        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
613        z = ( ( a / z )<<14 ) + ( z<<15 );
614        a >>= 1;
615    }
616    else {
617        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
618        z = a / z + z;
619        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
620        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
621    }
622    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
623
624}
625#endif
626
627/*
628-------------------------------------------------------------------------------
629Returns the number of leading 0 bits before the most-significant 1 bit of
630`a'.  If `a' is zero, 32 is returned.
631-------------------------------------------------------------------------------
632*/
633static int8 countLeadingZeros32( bits32 a )
634{
635    static const int8 countLeadingZerosHigh[] = {
636        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
637        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
638        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
639        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
640        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
641        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
642        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
644        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
652    };
653    int8 shiftCount;
654
655    shiftCount = 0;
656    if ( a < 0x10000 ) {
657        shiftCount += 16;
658        a <<= 16;
659    }
660    if ( a < 0x1000000 ) {
661        shiftCount += 8;
662        a <<= 8;
663    }
664    shiftCount += countLeadingZerosHigh[ a>>24 ];
665    return shiftCount;
666
667}
668
669/*
670-------------------------------------------------------------------------------
671Returns the number of leading 0 bits before the most-significant 1 bit of
672`a'.  If `a' is zero, 64 is returned.
673-------------------------------------------------------------------------------
674*/
675static int8 countLeadingZeros64( bits64 a )
676{
677    int8 shiftCount;
678
679    shiftCount = 0;
680    if ( a < ( (bits64) 1 )<<32 ) {
681        shiftCount += 32;
682    }
683    else {
684        a >>= 32;
685    }
686    shiftCount += countLeadingZeros32( a );
687    return shiftCount;
688
689}
690
691/*
692-------------------------------------------------------------------------------
693Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
694is equal to the 128-bit value formed by concatenating `b0' and `b1'.
695Otherwise, returns 0.
696-------------------------------------------------------------------------------
697*/
698INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
699{
700
701    return ( a0 == b0 ) && ( a1 == b1 );
702
703}
704
705/*
706-------------------------------------------------------------------------------
707Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
708than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
709Otherwise, returns 0.
710-------------------------------------------------------------------------------
711*/
712INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
713{
714
715    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
716
717}
718
719/*
720-------------------------------------------------------------------------------
721Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
722than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
723returns 0.
724-------------------------------------------------------------------------------
725*/
726INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
727{
728
729    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
730
731}
732
733/*
734-------------------------------------------------------------------------------
735Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
736not equal to the 128-bit value formed by concatenating `b0' and `b1'.
737Otherwise, returns 0.
738-------------------------------------------------------------------------------
739*/
740INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
741{
742
743    return ( a0 != b0 ) || ( a1 != b1 );
744
745}
746
747