1 /* $NetBSD: softfloat-macros.h,v 1.3 2020/09/01 15:45:20 thorpej Exp $ */
2
3 /*============================================================================
4
5 This C source fragment is part of the Berkeley SoftFloat IEEE Floating-Point
6 Arithmetic Package, Release 2c, by John R. Hauser.
7
8 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
9 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
10 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
11 AND ORGANIZATIONS WHO CAN AND WILL TOLERATE ALL LOSSES, COSTS, OR OTHER
12 PROBLEMS THEY INCUR DUE TO THE SOFTWARE WITHOUT RECOMPENSE FROM JOHN HAUSER OR
13 THE INTERNATIONAL COMPUTER SCIENCE INSTITUTE, AND WHO FURTHERMORE EFFECTIVELY
14 INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE INSTITUTE
15 (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR OTHER
16 PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE, OR
17 INCURRED BY ANYONE DUE TO A DERIVATIVE WORK THEY CREATE USING ANY PART OF THE
18 SOFTWARE.
19
20 Derivative works require also that (1) the source code for the derivative work
21 includes prominent notice that the work is derivative, and (2) the source code
22 includes prominent notice of these three paragraphs for those parts of this
23 code that are retained.
24
25 =============================================================================*/
26
27 /*----------------------------------------------------------------------------
28 | Shifts `a' right by the number of bits given in `count'. If any nonzero
29 | bits are shifted off, they are "jammed" into the least significant bit of
30 | the result by setting the least significant bit to 1. The value of `count'
31 | can be arbitrarily large; in particular, if `count' is greater than 32, the
32 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
33 | The result is stored in the location pointed to by `zPtr'.
34 *----------------------------------------------------------------------------*/
35
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)36 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
37 {
38 bits32 z;
39
40 if ( count == 0 ) {
41 z = a;
42 }
43 else if ( count < 32 ) {
44 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
45 }
46 else {
47 z = ( a != 0 );
48 }
49 *zPtr = z;
50
51 }
52
53 /*----------------------------------------------------------------------------
54 | Shifts `a' right by the number of bits given in `count'. If any nonzero
55 | bits are shifted off, they are "jammed" into the least significant bit of
56 | the result by setting the least significant bit to 1. The value of `count'
57 | can be arbitrarily large; in particular, if `count' is greater than 64, the
58 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
59 | The result is stored in the location pointed to by `zPtr'.
60 *----------------------------------------------------------------------------*/
61
shift64RightJamming(bits64 a,int16 count,bits64 * zPtr)62 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
63 {
64 bits64 z;
65
66 if ( count == 0 ) {
67 z = a;
68 }
69 else if ( count < 64 ) {
70 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
71 }
72 else {
73 z = ( a != 0 );
74 }
75 *zPtr = z;
76
77 }
78
79 /*----------------------------------------------------------------------------
80 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
81 | _plus_ the number of bits given in `count'. The shifted result is at most
82 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
83 | bits shifted off form a second 64-bit result as follows: The _last_ bit
84 | shifted off is the most-significant bit of the extra result, and the other
85 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
86 | bits shifted off were all zero. This extra result is stored in the location
87 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
88 | (This routine makes more sense if `a0' and `a1' are considered to form
89 | a fixed-point value with binary point between `a0' and `a1'. This fixed-
90 | point value is shifted right by the number of bits given in `count', and
91 | the integer part of the result is returned at the location pointed to by
92 | `z0Ptr'. The fractional part of the result may be slightly corrupted as
93 | described above, and is returned at the location pointed to by `z1Ptr'.)
94 *----------------------------------------------------------------------------*/
95
96 INLINE void
shift64ExtraRightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)97 shift64ExtraRightJamming(
98 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
99 {
100 bits64 z0, z1;
101 int8 negCount = ( - count ) & 63;
102
103 if ( count == 0 ) {
104 z1 = a1;
105 z0 = a0;
106 }
107 else if ( count < 64 ) {
108 z1 = ( a0<<negCount ) | ( a1 != 0 );
109 z0 = a0>>count;
110 }
111 else {
112 if ( count == 64 ) {
113 z1 = a0 | ( a1 != 0 );
114 }
115 else {
116 z1 = ( ( a0 | a1 ) != 0 );
117 }
118 z0 = 0;
119 }
120 *z1Ptr = z1;
121 *z0Ptr = z0;
122
123 }
124
125 /*----------------------------------------------------------------------------
126 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
127 | number of bits given in `count'. Any bits shifted off are lost. The value
128 | of `count' can be arbitrarily large; in particular, if `count' is greater
129 | than 128, the result will be 0. The result is broken into two 64-bit pieces
130 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
131 *----------------------------------------------------------------------------*/
132
133 INLINE void
shift128Right(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)134 shift128Right(
135 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
136 {
137 bits64 z0, z1;
138 int8 negCount = ( - count ) & 63;
139
140 if ( count == 0 ) {
141 z1 = a1;
142 z0 = a0;
143 }
144 else if ( count < 64 ) {
145 z1 = ( a0<<negCount ) | ( a1>>count );
146 z0 = a0>>count;
147 }
148 else {
149 z1 = ( count < 128 ) ? ( a0>>( count & 63 ) ) : 0;
150 z0 = 0;
151 }
152 *z1Ptr = z1;
153 *z0Ptr = z0;
154
155 }
156
157 /*----------------------------------------------------------------------------
158 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
159 | number of bits given in `count'. If any nonzero bits are shifted off, they
160 | are "jammed" into the least significant bit of the result by setting the
161 | least significant bit to 1. The value of `count' can be arbitrarily large;
162 | in particular, if `count' is greater than 128, the result will be either
163 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
164 | nonzero. The result is broken into two 64-bit pieces which are stored at
165 | the locations pointed to by `z0Ptr' and `z1Ptr'.
166 *----------------------------------------------------------------------------*/
167
168 INLINE void
shift128RightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)169 shift128RightJamming(
170 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
171 {
172 bits64 z0, z1;
173 int8 negCount = ( - count ) & 63;
174
175 if ( count == 0 ) {
176 z1 = a1;
177 z0 = a0;
178 }
179 else if ( count < 64 ) {
180 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
181 z0 = a0>>count;
182 }
183 else {
184 if ( count == 64 ) {
185 z1 = a0 | ( a1 != 0 );
186 }
187 else if ( count < 128 ) {
188 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
189 }
190 else {
191 z1 = ( ( a0 | a1 ) != 0 );
192 }
193 z0 = 0;
194 }
195 *z1Ptr = z1;
196 *z0Ptr = z0;
197
198 }
199
200 /*----------------------------------------------------------------------------
201 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
202 | by 64 _plus_ the number of bits given in `count'. The shifted result is
203 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
204 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
205 | off form a third 64-bit result as follows: The _last_ bit shifted off is
206 | the most-significant bit of the extra result, and the other 63 bits of the
207 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
208 | were all zero. This extra result is stored in the location pointed to by
209 | `z2Ptr'. The value of `count' can be arbitrarily large.
210 | (This routine makes more sense if `a0', `a1', and `a2' are considered
211 | to form a fixed-point value with binary point between `a1' and `a2'. This
212 | fixed-point value is shifted right by the number of bits given in `count',
213 | and the integer part of the result is returned at the locations pointed to
214 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
215 | corrupted as described above, and is returned at the location pointed to by
216 | `z2Ptr'.)
217 *----------------------------------------------------------------------------*/
218
219 INLINE void
shift128ExtraRightJamming(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)220 shift128ExtraRightJamming(
221 bits64 a0,
222 bits64 a1,
223 bits64 a2,
224 int16 count,
225 bits64 *z0Ptr,
226 bits64 *z1Ptr,
227 bits64 *z2Ptr
228 )
229 {
230 bits64 z0, z1, z2;
231 int8 negCount = ( - count ) & 63;
232
233 if ( count == 0 ) {
234 z2 = a2;
235 z1 = a1;
236 z0 = a0;
237 }
238 else {
239 if ( count < 64 ) {
240 z2 = a1<<negCount;
241 z1 = ( a0<<negCount ) | ( a1>>count );
242 z0 = a0>>count;
243 }
244 else {
245 if ( count == 64 ) {
246 z2 = a1;
247 z1 = a0;
248 }
249 else {
250 a2 |= a1;
251 if ( count < 128 ) {
252 z2 = a0<<negCount;
253 z1 = a0>>( count & 63 );
254 }
255 else {
256 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
257 z1 = 0;
258 }
259 }
260 z0 = 0;
261 }
262 z2 |= ( a2 != 0 );
263 }
264 *z2Ptr = z2;
265 *z1Ptr = z1;
266 *z0Ptr = z0;
267
268 }
269
270 /*----------------------------------------------------------------------------
271 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
272 | number of bits given in `count'. Any bits shifted off are lost. The value
273 | of `count' must be less than 64. The result is broken into two 64-bit
274 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
275 *----------------------------------------------------------------------------*/
276
277 INLINE void
shortShift128Left(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)278 shortShift128Left(
279 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
280 {
281
282 *z1Ptr = a1<<count;
283 *z0Ptr =
284 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
285
286 }
287
288 /*----------------------------------------------------------------------------
289 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
290 | by the number of bits given in `count'. Any bits shifted off are lost.
291 | The value of `count' must be less than 64. The result is broken into three
292 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
293 | `z1Ptr', and `z2Ptr'.
294 *----------------------------------------------------------------------------*/
295
296 INLINE void
shortShift192Left(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)297 shortShift192Left(
298 bits64 a0,
299 bits64 a1,
300 bits64 a2,
301 int16 count,
302 bits64 *z0Ptr,
303 bits64 *z1Ptr,
304 bits64 *z2Ptr
305 )
306 {
307 bits64 z0, z1, z2;
308 int8 negCount;
309
310 z2 = a2<<count;
311 z1 = a1<<count;
312 z0 = a0<<count;
313 if ( 0 < count ) {
314 negCount = ( ( - count ) & 63 );
315 z1 |= a2>>negCount;
316 z0 |= a1>>negCount;
317 }
318 *z2Ptr = z2;
319 *z1Ptr = z1;
320 *z0Ptr = z0;
321
322 }
323
324 /*----------------------------------------------------------------------------
325 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
326 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
327 | any carry out is lost. The result is broken into two 64-bit pieces which
328 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
329 *----------------------------------------------------------------------------*/
330
331 INLINE void
add128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)332 add128(
333 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
334 {
335 bits64 z1;
336
337 z1 = a1 + b1;
338 *z1Ptr = z1;
339 *z0Ptr = a0 + b0 + ( z1 < a1 );
340
341 }
342
343 /*----------------------------------------------------------------------------
344 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
345 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
346 | modulo 2^192, so any carry out is lost. The result is broken into three
347 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
348 | `z1Ptr', and `z2Ptr'.
349 *----------------------------------------------------------------------------*/
350
351 INLINE void
add192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)352 add192(
353 bits64 a0,
354 bits64 a1,
355 bits64 a2,
356 bits64 b0,
357 bits64 b1,
358 bits64 b2,
359 bits64 *z0Ptr,
360 bits64 *z1Ptr,
361 bits64 *z2Ptr
362 )
363 {
364 bits64 z0, z1, z2;
365 int8 carry0, carry1;
366
367 z2 = a2 + b2;
368 carry1 = ( z2 < a2 );
369 z1 = a1 + b1;
370 carry0 = ( z1 < a1 );
371 z0 = a0 + b0;
372 z1 += carry1;
373 z0 += ( z1 < carry1 );
374 z0 += carry0;
375 *z2Ptr = z2;
376 *z1Ptr = z1;
377 *z0Ptr = z0;
378
379 }
380
381 /*----------------------------------------------------------------------------
382 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
383 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
384 | 2^128, so any borrow out (carry out) is lost. The result is broken into two
385 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
386 | `z1Ptr'.
387 *----------------------------------------------------------------------------*/
388
389 INLINE void
sub128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)390 sub128(
391 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
392 {
393
394 *z1Ptr = a1 - b1;
395 *z0Ptr = a0 - b0 - ( a1 < b1 );
396
397 }
398
399 /*----------------------------------------------------------------------------
400 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
401 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
402 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
403 | result is broken into three 64-bit pieces which are stored at the locations
404 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
405 *----------------------------------------------------------------------------*/
406
407 INLINE void
sub192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)408 sub192(
409 bits64 a0,
410 bits64 a1,
411 bits64 a2,
412 bits64 b0,
413 bits64 b1,
414 bits64 b2,
415 bits64 *z0Ptr,
416 bits64 *z1Ptr,
417 bits64 *z2Ptr
418 )
419 {
420 bits64 z0, z1, z2;
421 int8 borrow0, borrow1;
422
423 z2 = a2 - b2;
424 borrow1 = ( a2 < b2 );
425 z1 = a1 - b1;
426 borrow0 = ( a1 < b1 );
427 z0 = a0 - b0;
428 z0 -= ( z1 < borrow1 );
429 z1 -= borrow1;
430 z0 -= borrow0;
431 *z2Ptr = z2;
432 *z1Ptr = z1;
433 *z0Ptr = z0;
434
435 }
436
437 /*----------------------------------------------------------------------------
438 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
439 | into two 64-bit pieces which are stored at the locations pointed to by
440 | `z0Ptr' and `z1Ptr'.
441 *----------------------------------------------------------------------------*/
442
mul64To128(bits64 a,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr)443 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
444 {
445 bits32 aHigh, aLow, bHigh, bLow;
446 bits64 z0, zMiddleA, zMiddleB, z1;
447
448 aLow = a;
449 aHigh = a>>32;
450 bLow = b;
451 bHigh = b>>32;
452 z1 = ( (bits64) aLow ) * bLow;
453 zMiddleA = ( (bits64) aLow ) * bHigh;
454 zMiddleB = ( (bits64) aHigh ) * bLow;
455 z0 = ( (bits64) aHigh ) * bHigh;
456 zMiddleA += zMiddleB;
457 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
458 zMiddleA <<= 32;
459 z1 += zMiddleA;
460 z0 += ( z1 < zMiddleA );
461 *z1Ptr = z1;
462 *z0Ptr = z0;
463
464 }
465
466 /*----------------------------------------------------------------------------
467 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
468 | `b' to obtain a 192-bit product. The product is broken into three 64-bit
469 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
470 | `z2Ptr'.
471 *----------------------------------------------------------------------------*/
472
473 INLINE void
mul128By64To192(bits64 a0,bits64 a1,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)474 mul128By64To192(
475 bits64 a0,
476 bits64 a1,
477 bits64 b,
478 bits64 *z0Ptr,
479 bits64 *z1Ptr,
480 bits64 *z2Ptr
481 )
482 {
483 bits64 z0, z1, z2, more1;
484
485 mul64To128( a1, b, &z1, &z2 );
486 mul64To128( a0, b, &z0, &more1 );
487 add128( z0, more1, 0, z1, &z0, &z1 );
488 *z2Ptr = z2;
489 *z1Ptr = z1;
490 *z0Ptr = z0;
491
492 }
493
494 /*----------------------------------------------------------------------------
495 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
496 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
497 | product. The product is broken into four 64-bit pieces which are stored at
498 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
499 *----------------------------------------------------------------------------*/
500
501 INLINE void
mul128To256(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr,bits64 * z3Ptr)502 mul128To256(
503 bits64 a0,
504 bits64 a1,
505 bits64 b0,
506 bits64 b1,
507 bits64 *z0Ptr,
508 bits64 *z1Ptr,
509 bits64 *z2Ptr,
510 bits64 *z3Ptr
511 )
512 {
513 bits64 z0, z1, z2, z3;
514 bits64 more1, more2;
515
516 mul64To128( a1, b1, &z2, &z3 );
517 mul64To128( a1, b0, &z1, &more2 );
518 add128( z1, more2, 0, z2, &z1, &z2 );
519 mul64To128( a0, b0, &z0, &more1 );
520 add128( z0, more1, 0, z1, &z0, &z1 );
521 mul64To128( a0, b1, &more1, &more2 );
522 add128( more1, more2, 0, z2, &more1, &z2 );
523 add128( z0, z1, 0, more1, &z0, &z1 );
524 *z3Ptr = z3;
525 *z2Ptr = z2;
526 *z1Ptr = z1;
527 *z0Ptr = z0;
528
529 }
530
531 /*----------------------------------------------------------------------------
532 | Returns an approximation to the 64-bit integer quotient obtained by dividing
533 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
534 | divisor `b' must be at least 2^63. If q is the exact quotient truncated
535 | toward zero, the approximation returned lies between q and q + 2 inclusive.
536 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
537 | unsigned integer is returned.
538 *----------------------------------------------------------------------------*/
539
estimateDiv128To64(bits64 a0,bits64 a1,bits64 b)540 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
541 {
542 bits64 b0, b1;
543 bits64 rem0, rem1, term0, term1;
544 bits64 z;
545
546 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
547 b0 = b>>32;
548 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
549 mul64To128( b, z, &term0, &term1 );
550 sub128( a0, a1, term0, term1, &rem0, &rem1 );
551 while ( ( (sbits64) rem0 ) < 0 ) {
552 z -= LIT64( 0x100000000 );
553 b1 = b<<32;
554 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
555 }
556 rem0 = ( rem0<<32 ) | ( rem1>>32 );
557 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
558 return z;
559
560 }
561
562 #ifndef SOFTFLOAT_FOR_GCC /* Not used */
563 /*----------------------------------------------------------------------------
564 | Returns an approximation to the square root of the 32-bit significand given
565 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
566 | `aExp' (the least significant bit) is 1, the integer returned approximates
567 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
568 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
569 | case, the approximation returned lies strictly within +/-2 of the exact
570 | value.
571 *----------------------------------------------------------------------------*/
572
estimateSqrt32(int16 aExp,bits32 a)573 static bits32 estimateSqrt32( int16 aExp, bits32 a )
574 {
575 static const bits16 sqrtOddAdjustments[] = {
576 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
577 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
578 };
579 static const bits16 sqrtEvenAdjustments[] = {
580 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
581 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
582 };
583 int8 index;
584 bits32 z;
585
586 index = ( a>>27 ) & 15;
587 if ( aExp & 1 ) {
588 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
589 z = ( ( a / z )<<14 ) + ( z<<15 );
590 a >>= 1;
591 }
592 else {
593 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
594 z = a / z + z;
595 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
596 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
597 }
598 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
599
600 }
601 #endif
602
603 /*----------------------------------------------------------------------------
604 | Returns the number of leading 0 bits before the most-significant 1 bit of
605 | `a'. If `a' is zero, 32 is returned.
606 *----------------------------------------------------------------------------*/
607
countLeadingZeros32(bits32 a)608 static int8 countLeadingZeros32( bits32 a )
609 {
610 static const int8 countLeadingZerosHigh[] = {
611 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
612 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
613 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
614 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
615 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
616 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
617 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
618 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
621 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
625 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
627 };
628 int8 shiftCount;
629
630 shiftCount = 0;
631 if ( a < 0x10000 ) {
632 shiftCount += 16;
633 a <<= 16;
634 }
635 if ( a < 0x1000000 ) {
636 shiftCount += 8;
637 a <<= 8;
638 }
639 shiftCount += countLeadingZerosHigh[ a>>24 ];
640 return shiftCount;
641
642 }
643
644 /*----------------------------------------------------------------------------
645 | Returns the number of leading 0 bits before the most-significant 1 bit of
646 | `a'. If `a' is zero, 64 is returned.
647 *----------------------------------------------------------------------------*/
648
countLeadingZeros64(bits64 a)649 static int8 countLeadingZeros64( bits64 a )
650 {
651 int8 shiftCount;
652
653 shiftCount = 0;
654 if ( a < ( (bits64) 1 )<<32 ) {
655 shiftCount += 32;
656 }
657 else {
658 a >>= 32;
659 }
660 shiftCount += countLeadingZeros32( a );
661 return shiftCount;
662
663 }
664
665 /*----------------------------------------------------------------------------
666 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
667 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
668 | Otherwise, returns 0.
669 *----------------------------------------------------------------------------*/
670
eq128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)671 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
672 {
673
674 return ( a0 == b0 ) && ( a1 == b1 );
675
676 }
677
678 /*----------------------------------------------------------------------------
679 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
680 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
681 | Otherwise, returns 0.
682 *----------------------------------------------------------------------------*/
683
le128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)684 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
685 {
686
687 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
688
689 }
690
691 /*----------------------------------------------------------------------------
692 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
693 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
694 | returns 0.
695 *----------------------------------------------------------------------------*/
696
lt128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)697 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
698 {
699
700 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
701
702 }
703
704 /*----------------------------------------------------------------------------
705 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
706 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
707 | Otherwise, returns 0.
708 *----------------------------------------------------------------------------*/
709
ne128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)710 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
711 {
712
713 return ( a0 != b0 ) || ( a1 != b1 );
714
715 }
716
717