1 /******************************************************************************
2 Copyright (c) 2007-2011, Intel Corp.
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its contributors
14 may be used to endorse or promote products derived from this software
15 without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 THE POSSIBILITY OF SUCH DAMAGE.
28 ******************************************************************************/
29
30 #if defined(FAST_SQRT) + defined(SQRT) + defined(RSQRT) + defined(MAKE_INCLUDE) != 1
31 # error Exactly one of SQRT, FAST_SQRT, RSQRT, or MAKE_INCLUDE must be defined.
32 #endif
33
34 #if defined(FAST_SQRT)
35 # define __ENTRY_NAME F_FAST_SQRT_NAME
36 # define ___BASE_NAME FAST_SQRT_BASE_NAME
37 # define IF_SQRT(x) x
38 # define IF_RSQRT(x)
39 #elif defined(RSQRT)
40 # define __ENTRY_NAME F_RSQRT_NAME
41 # define ___BASE_NAME RSQRT_BASE_NAME
42 # define IF_SQRT(x)
43 # define IF_RSQRT(x) x
44 #elif defined(SQRT)
45 # define __ENTRY_NAME F_SQRT_NAME
46 # define ___BASE_NAME SQRT_BASE_NAME
47 # define IF_SQRT(x) x
48 # define IF_RSQRT(x)
49 #endif
50
51 #if !defined(F_ENTRY_NAME)
52 # define F_ENTRY_NAME __ENTRY_NAME
53 #endif
54 #if !defined(BASE_NAME)
55 # define BASE_NAME ___BASE_NAME
56 #endif
57
58 #if !defined(BUILD_FILE_EXTENSION)
59 # define BUILD_FILE_EXTENSION c
60 #endif
61
62
63 #include "dpml_private.h"
64
65 #if (DYNAMIC_ROUNDING_MODES) || (COMPILER == epc_cc)
66 # define ESTABLISH_ROUND_TO_ZERO(old_mode) \
67 INIT_FPU_STATE_AND_ROUND_TO_ZERO(old_mode)
68 # define RESTORE_ROUNDING_MODE(old_mode) \
69 RESTORE_FPU_STATE(old_mode)
70 #else
71 # define ESTABLISH_ROUND_TO_ZERO(old_mode)
72 # define RESTORE_ROUNDING_MODE(old_mode)
73 #endif
74
75
76 #if !defined(F_MUL_CHOPPED)
77
78 /* This definition of F_MUL_CHOPPED is used for dynamic
79 rounding modes and when no directed rounding is available.
80 In the later case results will not be correctly rounded. */
81
82 # define F_MUL_CHOPPED(x,y,z) (z) = (x) * (y)
83
84 #endif
85
86
87 /*
88 ** NUM_FRAC_BITS specifies the number of mantissa bits used for
89 ** indexing the table (the table index also includes the low-order
90 ** exponent bit). NUM_FRAC_BITS also affects the table size:
91 **
92 ** sizeof(D_SQRT_TABLE_NAME) = (1 << (NUM_FRAC_BITS + 1))
93 ** * (2*sizeof(float)+sizeof(double))
94 */
95
96 #define NUM_FRAC_BITS 7
97 #define INDEX_MASK MAKE_MASK((NUM_FRAC_BITS + 1), 0)
98
99
100 #if (IEEE_FLOATING)
101
102 /*
103 ** LOC_OF_EXPON is the bit offset within u.B_SIGNED_HI_32 of the
104 ** low-order exponent bit of u.f, where u is a B_UNION. (We assume
105 ** the highest bits of B_SIGNED_HI_32 hold the sign bit and exponent).
106 **
107 ** From LOC_OF_EXPON, EXP_BITS_OF_ONE_HALF and HI_EXP_BIT_MASK are derived.
108 */
109
110 # define LOC_OF_EXPON ((BITS_PER_LS_INT_TYPE - 1) - B_EXP_WIDTH)
111 # define EXP_BITS_OF_ONE_HALF ((U_LS_INT_TYPE)(B_EXP_BIAS-B_NORM-1) << LOC_OF_EXPON)
112 # define HI_EXP_BIT_MASK (MAKE_MASK(B_EXP_WIDTH-1, 1) << LOC_OF_EXPON)
113
114 # define GET_SQRT_TABLE_INDEX(exp,index) \
115 index = (exp >> (LOC_OF_EXPON - NUM_FRAC_BITS)); \
116 index &= INDEX_MASK
117
118 /*
119 ** SAVE_EXP saves the exponent in a temporary so it can be used in
120 ** the INPUT_IS_ABNORMAL macro
121 */
122
123 # define SAVE_EXP(exp) save_exp = (exp)
124 # define INPUT_IS_ABNORMAL \
125 ((U_LS_INT_TYPE)(save_exp-((LS_INT_TYPE)1 << LOC_OF_EXPON)) >= \
126 (U_LS_INT_TYPE)hi_exp_mask)
127 #endif
128
129 #if (VAX_FLOATING)
130
131 # define EXP_BITS_OF_ONE_HALF 0x4000
132 # define HI_EXP_BIT_MASK 0x7fe0
133
134 # define GET_SQRT_TABLE_INDEX(exp,index) \
135 index = ((exp << 3) | ((U_INT_32)exp >> 29)); \
136 index &= INDEX_MASK
137
138 # define SAVE_EXP(exp) /* INPUT_IS_ABNORMAL doesn't need it */
139 # define INPUT_IS_ABNORMAL (x <= (F_TYPE)0.0)
140
141 #endif
142
143
144 #if ((ARCHITECTURE == alpha) || (BITS_PER_WORD == 64))
145
146 /* We can do 64-bit stores */
147 /* This is an optimization of the 'else' clause below */
148 # if QUAD_PRECISION
149 # define STORE_EXP_TO_V_UNION \
150 V_UNION_128_BIT_STORE
151 # else
152 # define STORE_EXP_TO_V_UNION \
153 V_UNION_64_BIT_STORE
154 # endif
155
156 #else
157
158 /* Store it in 32-bits pieces */
159 # if QUAD_PRECISION
160 # define STORE_EXP_TO_V_UNION \
161 v.B_SIGNED_HI_32 = ((U_INT_32)exp) >> 1; \
162 v.B_SIGNED_LO1_32 = 0; \
163 v.B_SIGNED_LO2_32 = 0; \
164 v.B_SIGNED_LO3_32 = 0
165 # else
166 # define STORE_EXP_TO_V_UNION \
167 v.B_SIGNED_HI_32 = ((U_INT_32)exp) >> 1; \
168 v.B_SIGNED_LO_32 = 0
169 # endif
170
171 #endif
172
173
174 /* This condition is complicated. */
175
176 #if (VAX_FLOATING) == (ENDIANESS == little_endian)
177 # define V_UNION_64_BIT_STORE \
178 v.B_UNSIGNED_HI_64 = ((U_INT_64)(U_INT_32)exp) >> 1
179 # define V_UNION_128_BIT_STORE \
180 v.B_UNSIGNED_HI_64 = ((U_INT_64)(U_INT_32)exp) >> 1; \
181 v.B_UNSIGNED_LO_64 = 0
182 #elif ((ARCHITECTURE == alpha) && defined(HAS_LOAD_WRONG_STORE_SIZE_PENALTY))
183 # define V_UNION_64_BIT_STORE \
184 v.B_UNSIGNED_HI_64 = ((U_WORD)exp) >> 1
185 # define V_UNION_128_BIT_STORE \
186 v.B_UNSIGNED_HI_64 = ((U_WORD)exp) >> 1; \
187 v.B_UNSIGNED_LO_64 = 0
188 #else
189 # define V_UNION_64_BIT_STORE \
190 v.B_UNSIGNED_HI_64 = ((U_INT_64)(U_INT_32)exp) << 31
191 # define V_UNION_128_BIT_STORE \
192 v.B_UNSIGNED_HI_64 = ((U_INT_64)(U_INT_32)exp) << 31; \
193 v.B_UNSIGNED_LO_64 = 0
194 #endif
195
196
197 /*
198 ** The definitions of SQRT_COEF_STRUCT and D_SQRT_TABLE_NAME also
199 ** appear in the generated .c file for the table.
200 */
201 typedef struct {
202 float a, b;
203 double c;
204 } SQRT_COEF_STRUCT;
205
206 extern const SQRT_COEF_STRUCT D_SQRT_TABLE_NAME[(1<<(NUM_FRAC_BITS+1))];
207
208
209 /*
210 ** SCALE_AND_DO_INDEXED_POLY_APPROX
211 **
212 ** Inputs:
213 ** x any number
214 ** = f * 2^(2*i+j)
215 ** where 1/2 <= f < 1, integer i and j,
216 ** and j = 0 or 1
217 ** ignoring f <= 0
218 **
219 ** Outputs:
220 ** half_scale = 2^(i-1) (SQRT, F_SQRT)
221 **
222 ** flah_scale = 2^(1-i) (RSQRT)
223 ** (the name is clear, albeit cute)
224 **
225 ** scaled_x = f * 2^j
226 ** so 1/2 <= scaled_x < 2
227 **
228 ** y ~= 1/sqrt(scaled_x)
229 **
230 ** so sqrt(x) ~= y * scaled_x * 2 * half_scale
231 ** and 1/sqrt(x) ~= y / (2 * half_scale)
232 **
233 ** Temporaries:
234 ** u, a, b, c, index
235 */
236
237 #define SCALE_AND_DO_INDEXED_POLY_APPROX \
238 u.f = (B_TYPE)x; \
239 exp = u.B_HI_LS_INT_TYPE; \
240 B_COPY_SIGN_AND_EXP((B_TYPE)x, half, y); \
241 ASSERT( ((0.5 <= y) && (y < 1.0)) ); \
242 GET_SQRT_TABLE_INDEX(exp,index); \
243 b = (B_TYPE)D_SQRT_TABLE_NAME[index].b; \
244 b *= y; \
245 c = (B_TYPE)D_SQRT_TABLE_NAME[index].c; \
246 lo_exp_bit_and_hi_frac = exp & ~hi_exp_mask; \
247 u.B_HI_LS_INT_TYPE = (exp_of_one_half | lo_exp_bit_and_hi_frac); \
248 c += b; \
249 scaled_x = u.f; \
250 ASSERT( (((0.5 <= scaled_x) && (scaled_x < 2.0)) || (scaled_x < 0.0)) ); \
251 y *= y; \
252 a = (B_TYPE)D_SQRT_TABLE_NAME[index].a; \
253 SAVE_EXP(exp); \
254 IF_SQRT ({ \
255 exp ^= lo_exp_bit_and_hi_frac; \
256 exp += exp_of_one_half; \
257 }) \
258 IF_RSQRT({ \
259 exp ^= lo_exp_bit_and_hi_frac; \
260 exp = 3*exp_of_one_half - exp; \
261 }) \
262 y *= a; \
263 STORE_EXP_TO_V_UNION; \
264 y += c; \
265 IF_SQRT ( half_scale = v.f ); \
266 IF_RSQRT( flah_scale = v.f ); \
267 /* end of SCALE_AND_DO_INDEXED_POLY_APPROX */
268
269
270
271 /*----------------------------------------------------------------------------*/
272 /* Tuckerman's Rounding */
273 /*----------------------------------------------------------------------------*/
274
275 /*
276 ** Tuckerman's rounding is used to compute the correctly rounded sqrt(x).
277 ** It's 'good to the last bit', or more precisely 'to within 1/2 lsb(sqrt(x))'.
278 ** This is a short proof of Tuckerman's rounding.
279 **
280 ** Let z be a machine-precision approximation to sqrt(x); then z+lsb(z) is the
281 ** smallest representable number larger than z (NB: z-lsb(z) is the largest
282 ** representable number less than z, _except_ when z is a power of 2).
283 ** Within this proof, let [] represent _truncation_ to machine precision,
284 ** and {} represent _rounding_ to machine precision.
285 **
286 ** Note that for _any_ y (not necessarily representable in machine precision),
287 **
288 ** z + 1/2 lsb(z) <= y <==> z < {y}.
289 **
290 ** For sqrt(x), we never have equality:
291 ** z + 1/2 lsb(z) <= sqrt(x) ==> z + 1/2 lsb(z) < sqrt(x),
292 ** because if they were equal, we'd have:
293 ** (z + 1/2 lsb(z))^2 = x
294 ** which is impossible, because to represent the left hand side requires more
295 ** than twice the machine precision, while the right hand side is representable.
296 **
297 ** Now the following statements are equivalent in turn:
298 **
299 ** z < {sqrt(x)}
300 ** z + 1/2 lsb(z) <= sqrt(x)
301 ** z + 1/2 lsb(z) < sqrt(x)
302 ** (z + 1/2 lsb(z))^2 < x
303 ** z (z + 1/2 lsb(z)) < x (the reverse is proved below)
304 ** [ z (z + 1/2 lsb(z)) ] < x.
305 **
306 ** To complete the reverse of the third inference above, suppose it were false.
307 ** Then: z (z + 1/2 lsb(z)) < x <= (z + 1/2 lsb(z))^2. The left hand side is
308 ** some multiple of 1/2 lsb(z)^2. The right hand side is only larger by
309 ** d = 1/4 lsb(z)^2, so [rhs] = [rhs-d] = [lhs]. But the inequality implies
310 ** [lhs] < x <= [rhs], and we have a contradiction.
311 **
312 ** In conclusion,
313 ** z < {sqrt(x)} <==> [ z (z + 1/2 lsb(z)) ] < x.
314 */
315
316 /*
317 ** Here we cover another question: How closely must y approximate sqrt(x) to
318 ** ensure {y} = {sqrt(x)}, where x is a representable number? We state without
319 ** proof that the closest sqrt(x) approaches a value halfway between consecutive
320 ** representable numbers occurs either when x is just larger than a power of 4,
321 ** or just less than a power of 4. We have:
322 **
323 ** sqrt(4^k*(1+lsb( 1 ))) = 2^k*(1 + lsb( 1 )/2 - lsb( 1 )^2/8 + ...), and
324 ** sqrt(4^k*(1-lsb(1/2)) = 2^k*(1 - lsb(1/2)/2 - lsb(1/2)^2/8 - ...).
325 **
326 ** So if |y - sqrt(x)| < lsb(sqrt(x))^2/8 - O(lsb^3), {y} = {sqrt(x)}.
327 ** For our purposes, this means that 50-bit accuracy (barely) suffices to
328 ** produce a correctly-rounded 24-bit result, since (2^(1-24))^2/8 = 2^(1-50).
329 ** After our Newton's iteration, we have nearly 53-bit accuracy. All is well.
330 */
331
332 /*----------------------------------------------------------------------------*/
333 /* Computing 'x+' and 'x-' */
334 /*----------------------------------------------------------------------------*/
335
336 /*
337 ** For Tuckerman's rounding, we need to compute the (machine-)representable
338 ** numbers just after and before a representable x: 'x+' = x + lsb(x) and
339 ** 'x-' = x - lsb(x-lsb(x)). Letting '{}' denote rounding to machine precision,
340 ** we compute these by:
341 **
342 ** 'x+' = {x + {c x}} (1)
343 ** 'x-' = {x - {c x}} (2)
344 **
345 ** for some appropriate constant c, where neither x+{c x} nor x-{c x} are midway
346 ** between two consecutive representable numbers.
347 **
348 ** The weakest preconditions that satisfy the above are:
349 **
350 ** 1/2 lsb(x) < {c x} < 3/2 lsb(x) (1a), when x != 2^n(1-lsb(1/2))
351 ** 1/2 lsb(x) < {c x} < 2 lsb(x) (1b), when x = 2^n(1-lsb(1/2))
352 ** 1/2 lsb(x) < {c x} < 3/2 lsb(x) (2a), when x != 2^n
353 ** 1/4 lsb(x) < {c x} < 3/4 lsb(x) (2b), when x = 2^n
354 **
355 ** For (1a), (1b), and (2a), we can take:
356 **
357 ** 1/2 lsb(x)/x < c < 3/2 lsb(x)/x, which we can 'shrink' to simplify:
358 ** 1/2 lsb(1)/1 < c < 3/2 lsb(1)/2
359 ** 1/2 lsb(1) < c < 3/4 lsb(1)
360 **
361 ** For (2b), we require:
362 **
363 ** 1/4 lsb(1) < c < 3/4 lsb(1)
364 **
365 ** Thus, in any case, we can use any c in the range:
366 **
367 ** 1/2 lsb(1) < c < 3/4 lsb(1)
368 **
369 ** We choose the midpoint:
370 **
371 ** c = 5/8 lsb(1) = 5/8 2^(1-p) = 5/4 2^(-p)
372 **
373 ** FWIW: It's possibly to compute 'x-' by: 'x-' = {x * (1-lsb(1/2))},
374 ** but 'x+' isn't necessarily computed by: 'x+' = {x * (1+lsb(1))}.
375 */
376
377 #if defined(SQRT)
378 # if (F_PRECISION == 24)
379 # define ULP_FACTOR (F_TYPE)7.450580596923828125e-8
380 # elif (F_PRECISION == 53)
381 # define ULP_FACTOR (F_TYPE)1.387778780781445675529539585113525390625e-16
382 # elif (F_PRECISION == 56)
383 # define ULP_FACTOR (F_TYPE)1.7347234759768070944119244813919067382813e-17
384 # elif (F_PRECISION == 113)
385 # define ULP_FACTOR (F_TYPE)1.203706215242022408159986214115579574086314e-34
386 # else
387 # define ULP_FACTOR (F_TYPE)1.25/(F_POW_2(F_PRECISION))
388 # endif
389 #endif
390
391
392
393 /*----------------------------------------------------------------------------*/
394 /* Newton's Iteration */
395 /*----------------------------------------------------------------------------*/
396
397 /*
398 Newton's iteration for 1 / (nth root of x) is:
399
400 y' = y + [ (1 - x * y^n) * y / n ]
401
402 So, the iteration for 1 / sqrt(x) is:
403
404 y' = y + [ (1 - x * y^2) * y * 0.5 ]
405
406 If we want to do one iteration, multiply the result by x,
407 and multiply the result by a scale factor we get:
408
409 y' = scale * x * ( y + [ (1 - x * y^2) * y * 0.5 ] )
410 y' = scale * x * y * ( 1 + [ (1 - x * y^2) * 0.5 ] )
411 y' = scale/2 * x * y * ( 2 + [ (1 - x * y^2) ] ) gives about 5/4 lsb error
412 y' = scale/2 * x * y * ( 3 - x * y^2 ) gives about 8/4 lsb error
413
414 So iterate to get better 1/sqrt(x) and multiply by x to get sqrt(x).
415 */
416
417 /*
418 ** For quad precision, we need additional Newton's iterations.
419 ** For lower precisions, the iteration (if needed) is embedded
420 ** in the ITERATE_AND_MAYBE_CHECK_LAST_BIT macro.
421 */
422 #if QUAD_PRECISION
423
424 /*
425 ** NEWTONS_ITERATION
426 **
427 ** Inputs:
428 ** scaled_x any number
429 ** ignoring scaled_x <= 0
430 **
431 ** y ~= 1/sqrt(scaled_x)
432 **
433 ** Outputs:
434 ** y ~= 1/sqrt(scaled_x)
435 ** y becomes a better approximation
436 **
437 ** Temporaries:
438 ** a, b, c
439 */
440 # define NEWTONS_ITERATION \
441 a = y * scaled_x; \
442 b = a * y; \
443 b = one - b; \
444 b *= y; \
445 c = y + y; \
446 c += b; \
447 y = c * half
448
449 #else
450
451 # define NEWTONS_ITERATION
452
453 #endif
454
455
456
457 /*----------------------------------------------------------------------------*/
458 /* ITERATE_AND_MAYBE_CHECK_LAST_BIT */
459 /*----------------------------------------------------------------------------*/
460
461 #if 0 /* To make all arms 'elif's */
462 #elif FAST_SQRT && (F_PRECISION <= 24)
463
464 /* Don't do a Newton's iteration */
465
466 # define ITERATE_AND_MAYBE_CHECK_LAST_BIT \
467 a = y * scaled_x; \
468 b = half_scale + half_scale; \
469 f_type_y = (F_TYPE)(a * b)
470
471 # define RESULT f_type_y
472
473 #elif RSQRT && (F_PRECISION <= 24)
474
475 /* Don't do a Newton's iteration */
476
477 # define ITERATE_AND_MAYBE_CHECK_LAST_BIT \
478 b = flah_scale + flah_scale; \
479 f_type_y = (F_TYPE)(y * b)
480
481 # define RESULT f_type_y
482
483 #elif SQRT && (F_PRECISION <= 24) && (B_PRECISION < 2*F_PRECISION)
484
485 /* This case is unlikely enough that we will worry about it
486 when we need to (if ever). There is code in older versions of
487 sqrt that does a tuckermans rounding on single prec values. */
488
489 # error "We need to worry about it now."
490
491 #elif SQRT && (F_PRECISION <= 24) && (B_PRECISION >= 2*F_PRECISION)
492
493 /* Make sure the last bit is correctly rounded by computing
494 a double-precision result, and then rounding it to single. */
495
496 # define ITERATE_AND_MAYBE_CHECK_LAST_BIT \
497 a = y * scaled_x; \
498 b = a * y; \
499 c = a * half_scale; \
500 b = three - b; \
501 f_type_y = (F_TYPE)(c * b)
502
503 # define RESULT f_type_y
504
505 #elif RSQRT
506
507 /* Do more accurate iteration (about 1 lsb error) */
508
509 # define ITERATE_AND_MAYBE_CHECK_LAST_BIT \
510 c = y * flah_scale; \
511 f_type_y = (F_TYPE)((c+c)+c*(one-scaled_x*(y*y)));
512
513 # define RESULT f_type_y
514
515 #elif RSQRT
516
517 /* Do sloppy iteration (about 2 lsb error).
518 y = (y * flah_scale) * (three - (y*scaled_x) * y) */
519
520 # define ITERATE_AND_MAYBE_CHECK_LAST_BIT \
521 a = y * scaled_x; \
522 b = a * y; \
523 c = y * flah_scale; \
524 b = three - b; \
525 y = c * b
526
527 # define RESULT y
528
529 #elif FAST_SQRT
530
531 /* Do sloppy iteration (about 2 lsb error).
532 y = ((y*scaled_x) * half_scale) * (three - (y*scaled_x) * y) */
533
534 # define ITERATE_AND_MAYBE_CHECK_LAST_BIT \
535 a = y * scaled_x; \
536 b = a * y; \
537 c = a * half_scale; \
538 b = three - b; \
539 y = c * b
540
541 # define RESULT y
542
543 #elif SQRT
544
545 /* Do more accurate iteration and check last bit.
546 [ NB: we compute ulp = 2*ULP_FACTOR*c, because y ~= 2*c.] */
547
548 # define DECLARE_old_mode U_WORD old_mode;
549 # define DECLARE_ulp_stuff F_TYPE ulp, y_less_1_ulp, y_plus_1_ulp;
550 # define ITERATE_AND_MAYBE_CHECK_LAST_BIT \
551 a = y * scaled_x; \
552 ulp = 2.0*ULP_FACTOR; \
553 b = a * y; \
554 c = a * half_scale; \
555 b = one - b; \
556 a = c + c; \
557 b = c * b; \
558 ulp *= c; \
559 y = a + b; \
560 y_less_1_ulp = y - ulp; \
561 ASSERT( y_less_1_ulp < y ); \
562 y_plus_1_ulp = y + ulp; \
563 ASSERT( y_plus_1_ulp > y ); \
564 ESTABLISH_ROUND_TO_ZERO(old_mode); \
565 F_MUL_CHOPPED(y, y_less_1_ulp, a); \
566 F_MUL_CHOPPED(y, y_plus_1_ulp, b); \
567 RESTORE_ROUNDING_MODE(old_mode); \
568 y = ((a >= x) ? y_less_1_ulp : y); \
569 y = ((b < x) ? y_plus_1_ulp : y);
570
571 # define RESULT y
572
573 #else
574
575 error "Can't define ITERATE_AND_MAYBE_CHECK_LAST_BIT"
576
577 #endif
578
579
580 #ifndef DECLARE_old_mode
581 #define DECLARE_old_mode
582 #endif
583 #ifndef DECLARE_ulp_stuff
584 #define DECLARE_ulp_stuff
585 #endif
586
587
588
589 /*----------------------------------------------------------------------------*/
590 /* The Function Itself! */
591 /*----------------------------------------------------------------------------*/
592
593
594
595 F_TYPE F_ENTRY_NAME(F_TYPE x)
596 {
597 EXCEPTION_RECORD_DECLARATION
598 B_UNION u, v;
599
600 F_TYPE f_type_y;
601 B_TYPE y, a, b, c;
602 B_TYPE scaled_x;
603 B_TYPE IF_SQRT (half_scale)
604 IF_RSQRT(flah_scale);
605 const B_TYPE half = (B_TYPE)0.5;
606 const B_TYPE one = (B_TYPE)1.0;
607 const B_TYPE three = (B_TYPE)3.0;
608 DECLARE_old_mode
609 DECLARE_ulp_stuff
610
611 LS_INT_TYPE exp, save_exp;
612 U_LS_INT_TYPE index;
613 U_LS_INT_TYPE lo_exp_bit_and_hi_frac;
614 U_LS_INT_TYPE hi_exp_mask = HI_EXP_BIT_MASK;
615 U_LS_INT_TYPE exp_of_one_half = EXP_BITS_OF_ONE_HALF;
616
617 #if defined(HAS_SQRT_INSTRUCTION) && ( FAST_SQRT || SQRT ) && ( SINGLE_PRECISION || DOUBLE_PRECISION )
618 u.f = (B_TYPE)x;
619 save_exp = u.B_HI_LS_INT_TYPE;
620
621 if INPUT_IS_ABNORMAL
622 goto abnormal_input;
623
624 F_HW_SQRT(x,RESULT);
625
626 return RESULT;
627 #else
628 SCALE_AND_DO_INDEXED_POLY_APPROX;
629
630 if INPUT_IS_ABNORMAL
631 goto abnormal_input;
632
633 NEWTONS_ITERATION;
634 NEWTONS_ITERATION;
635
636 ITERATE_AND_MAYBE_CHECK_LAST_BIT;
637
638 return RESULT;
639 #endif
640
641
642 abnormal_input:
643
644 #if VAX_FLOATING
645
646 /* x is either 0 or negative */
647
648 if (x == (F_TYPE)0.0) {
649 #if RSQRT
650 GET_EXCEPTION_RESULT_1(RSQRT_OF_POS_ZERO, x, RESULT);
651 #else
652 RESULT = x;
653 #endif
654 } else {
655 GET_EXCEPTION_RESULT_1(SQRT_OF_NEGATIVE, x, RESULT);
656 }
657 return RESULT;
658
659
660 #elif (IEEE_FLOATING)
661
662 F_CLASSIFY(x, index);
663
664 switch (index) {
665
666 case F_C_SIG_NAN:
667 case F_C_QUIET_NAN:
668 RESULT = x;
669 return RESULT;
670 break;
671
672 #if RSQRT
673
674 case F_C_POS_INF:
675 RESULT = (F_TYPE)0.0;
676 return RESULT;
677 break;
678 case F_C_POS_ZERO:
679 GET_EXCEPTION_RESULT_1(RSQRT_OF_POS_ZERO, x, RESULT);
680 return RESULT;
681 break;
682 case F_C_NEG_ZERO:
683 GET_EXCEPTION_RESULT_1(RSQRT_OF_NEG_ZERO, x, RESULT);
684 return RESULT;
685 break;
686
687 #else
688
689 case F_C_POS_INF:
690 case F_C_POS_ZERO:
691 case F_C_NEG_ZERO:
692 RESULT = x;
693 return RESULT;
694 break;
695
696 #endif
697
698 case F_C_NEG_INF:
699 case F_C_NEG_NORM:
700 case F_C_NEG_DENORM:
701 GET_EXCEPTION_RESULT_1(SQRT_OF_NEGATIVE, x, RESULT);
702 return RESULT;
703 break;
704
705 default:
706
707 /* must be positive denorm */
708
709 F_MAKE_FLOAT(
710 ((WORD) (2*F_PRECISION + 1) << F_EXP_POS), f_type_y);
711 F_COPY_SIGN_AND_EXP(x, f_type_y, x);
712 x -= f_type_y;
713
714 #if defined(HAS_SQRT_INSTRUCTION) && ( FAST_SQRT || SQRT ) && ( SINGLE_PRECISION || DOUBLE_PRECISION )
715 F_HW_SQRT(x,RESULT);
716 #else
717 SCALE_AND_DO_INDEXED_POLY_APPROX;
718
719 NEWTONS_ITERATION;
720 NEWTONS_ITERATION;
721
722 ITERATE_AND_MAYBE_CHECK_LAST_BIT;
723 #endif
724
725 /* Scale down again (up for RSQRT) */
726
727 IF_SQRT ( SUB_FROM_EXP_FIELD(RESULT, F_PRECISION) );
728 IF_RSQRT( ADD_TO_EXP_FIELD(RESULT, F_PRECISION) );
729 return RESULT;
730 break;
731 }
732
733 #endif
734
735 } /* sqrt */
736
737
738
739 /*----------------------------------------------------------------------------*/
740 /* MPHOC code to generate the table */
741 /*----------------------------------------------------------------------------*/
742
743
744 #if MAKE_INCLUDE
745
746 #undef F_NAME_SUFFIX
747 #define F_NAME_SUFFIX TABLE_SUFFIX
748
749 @divert divertText
750
751
752 /*
753 ** Print header information.
754 */
755 print;
756 print "#include \"dpml_private.h\"";
757 print;
758 print "#define NUM_FRAC_BITS ", STR(NUM_FRAC_BITS);
759 print;
760 /*
761 ** The definitions of SQRT_COEF_STRUCT and D_SQRT_TABLE_NAME also
762 ** appear in the code.
763 */
764 print "typedef struct {";
765 print " float a, b;";
766 print " double c;";
767 print "} SQRT_COEF_STRUCT;";
768 print;
769 print "const SQRT_COEF_STRUCT D_SQRT_TABLE_NAME[(1<<(NUM_FRAC_BITS+1))] = {";
770 print;
771
772 /*
773 ** Generate and print the polynomial coefficients.
774 */
rsqrt_f(r)775 function rsqrt_f(r) { return 1/sqrt(r); }
776
777 precision = ceil( (D_PRECISION + 16)/MP_RADIX_BITS );
778
779 /*
780 ** For each half fo the table, ...
781 */
782 for (h = 1; h <= 2; h++) {
783
784 xaa = 0.5;
785 xbb = 1.0;
786 xkk = 1.0/h;
787 print;
788 printf("/*\n**\t");
789 printf("a*x^2 + b*x + c");
790 printf(" ~= sqrt(%5r/x),\t\t%5r <= x < %5r", xkk, xaa, xbb);
791 printf("\n*/\n");
792
793 for (i = 0; i < 2^NUM_FRAC_BITS; i++) {
794 xa = xaa + (xbb-xaa) * i /2^NUM_FRAC_BITS;
795 xb = xaa + (xbb-xaa) * (i+1)/2^NUM_FRAC_BITS;
796 /*
797 ** Determine a minimum-error quadratic approximation to
798 ** sqrt(xkk/x) in the range xa <= x <= xb. (This doesn't
799 ** minimize the error after a Newton's iteration; that'd
800 ** require a weighting function of x^(1/4), a needless
801 ** complication for this single-precision approximation).
802 */
803 tol = S_PRECISION+2;
804 flags = 0;
805 err = remes(flags, xa, xb, rsqrt_f, tol, °ree, &rsqrt_c);
806 if (degree != 2) print("*** degree = %i\n", degree);
807 for (j = 0; j <= degree; j++)
808 rsqrt_c[j] = rsqrt_c[j] * sqrt(xkk);
809 /*
810 ** Now round the x^2 and x coefficients to single precision,
811 ** by subtracting Chebyshev polynomials. The additional error
812 ** is negligible (less than 3%; e.g., if the polynomial was good to
813 ** 27 bits, it's degraded to only 27-log2(1.03) = 26.96 bits).
814 **
815 ** The algebra is simplified by expressing the range xa..xb in terms of
816 ** the range's midpoint and radius.
817 */
818 xm = (xb + xa)/2;
819 xr = (xb - xa)/2;
820 z = xm / xr;
821 /*
822 ** The Chebyshev polynomials we subtract are multiples of:
823 **
824 ** w <-> (x-xm)/xr
825 ** 1-2*w^2 <-> 1-2*((x-xm)/xr)^2
826 **
827 ** The x terms are collected, scaled (by t), and subtracted from the
828 ** polynomial coefficients.
829 **
830 ** First we subtract (a multiple of) the 2nd degree Chebyshev polynomial
831 ** to produce a new polynomial with the desired (representable in single
832 ** precision) 2nd degree polynomial coefficient. This minimizes the
833 ** maximum absolute error between the 'Remes' polynomial and the new
834 ** polynomial (since the difference is a Chebyshev polynomial, which
835 ** has the 'equal ripple' property).
836 **
837 ** Then we subtract (a multiple of) the 1st degree Chebyshev polynomial
838 ** to produce a new polynomial with the desired (representable in single
839 ** precision) 1st degree coefficient. This minimizes the maximum
840 ** absolute error between the previous polynomial and the newer one
841 ** (under the constraints of having the same 2nd degree coefficient,
842 ** and the desired 1st degree coefficient). The 0th degree coefficient
843 ** is rounded to double precision (somebody's got to!), and this has
844 ** no significant effect on the single precision result.
845 **
846 ** Is the resulting polynomial optimal? Nope; nobody claims it is.
847 ** Is it 'best' in some sense? Yes -- the theory is clear and the code
848 ** is short (disregarding this phillipic). Is it close enough? Yep.
849 ** Why? That's a good question....
850 **
851 ** To see why this works, consider the polynomial for 1/sqrt(x) for
852 ** 1 <= x < 1+2^-7,
853 **
854 ** 0.37... x^2 + -1.24... x + 1.87...
855 **
856 ** Simply rounding the x coefficient to 24 bits may corrupt the result
857 ** of the polynomial by as much as (1+2^-7) * 0.5*s_lsb(1.24), where
858 ** s_lsb(z) = 2^floor(log2(|z|) + 1 - 24) is the value of z's least
859 ** significant bit when z is expressed in single precision. This is
860 ** as much as 2^-24, which is 2*s_lsb(1/sqrt(x)) -- two single-precision
861 ** lsb of the result! Rounding the x^2 coefficient has similar effects,
862 ** affecting the result by 1/2 single-precision lsb. We can do better.
863 **
864 ** If rounding increases the x coefficient by t, |t| <= 0.5*lsb(1.24),
865 ** the corruption can be partly compensated by adjusting the constant
866 ** coefficient, decreasing it by (for example) t*(1 + 1+2^-7)/2.
867 ** The corruption is then:
868 **
869 ** t*( x - (1+1+2^-7)/2 )
870 **
871 ** Since 1 <= x < 1+2^-7, and |t| <= 0.5*lsb(1.24), we have:
872 **
873 ** | t*( x - (1+1+2^-7)/2 ) | <= 0.5*lsb(1.24) * 2^-8 = 2^(-24 -8)
874 **
875 ** which is only 0.0078125*s_lsb(1/sqrt(x)) -- a factor of 256 smaller
876 ** than the corruption from simply rounding the x coefficient.
877 **
878 ** To minimize the (absolute value of the) maximum corruption, we add
879 ** a multiple of a Chebyshev polynomial, for the particular range of x,
880 ** because Chebyshev polynomials are 'minimax' (or 'equal ripple')
881 ** polynomials.
882 ** For the range -1 <= w <= 1, the Chebyshev polynomials are:
883 **
884 ** 1, w, 2*w^2-1, 4*w^3-3*w, ....
885 **
886 ** To convert these to polynomials in x for the range a <= x <= b,
887 ** substitute (x-m)/r, with m = (b+a)/2, r = (b-a)/2, and z = m/r.
888 ** The Chebyshev polynomials become:
889 **
890 ** 1, x/r - z, 2*(x/r)^2 - 4*z*(x/r) + 2*z^2-1,
891 ** 4*(x/r)^3 - 12*z*(x/r)^2 + (12*z^2-3)*(x/r) - 4*z^3+3*z, ....
892 **
893 ** For 1 <= x < 1+2^-7, these are:
894 **
895 ** 1, 2^8*x - (2^8+1), 2^17*x^2 - (2^18+2^10)*x + (2^17+2^10+1),
896 ** 2^26*x^3 - 3*(2^26+2^18)*x^2 + 3*(2^26+2^19+3*2^8)*x
897 ** - (2^26+3*2^18+2^11+2^8+1), ....
898 **
899 ** Each of these are 'equal ripple', oscillating between +/-1. We see
900 ** our previous adjustment, ( x - (1+1+2^-7)/2 ), appear here with a
901 ** factor of 2^8. Scaling it by t*2^-8 gives our previous result; this
902 ** scaling also reduces the 'ripple' to +/-t*2^-8.
903 **
904 ** When we use the 2nd degree Chebyshev polynomial to round the 2nd
905 ** degree coefficient to single precision, we must scale the polynomial
906 ** by a factor of t*2^-17, where here |t| <= 0.5*lsb(0.37). This means
907 ** that the effect of this corruption, the size of the 'ripple', is less
908 ** than 0.5*lsb(0.37)*2^-17 = 2^-43, or 2^-18*s_lsb(1/sqrt(x)). This is
909 ** far better than the the 1/2 lsb we got when we simply rounded the x^2
910 ** coefficient.
911 **
912 ** Can this technique be applied to other polynomial coefficients?
913 ** It is an invention of my own conception developed outside the term
914 ** of my contract, and for which I've received no compensation.
915 */
916 t = rsqrt_c[2] - bround(rsqrt_c[2], S_PRECISION);
917 rsqrt_c[2] = rsqrt_c[2] - t;
918 rsqrt_c[1] = rsqrt_c[1] + t * 2*z * xr;
919 rsqrt_c[0] = rsqrt_c[0] + t * (0.5-z^2) * xr^2;
920 t = rsqrt_c[1] - bround(rsqrt_c[1], S_PRECISION);
921 rsqrt_c[2] = rsqrt_c[2];
922 rsqrt_c[1] = rsqrt_c[1] - t;
923 rsqrt_c[0] = rsqrt_c[0] + t * z * xr;
924 t = rsqrt_c[0] - bround(rsqrt_c[1], D_PRECISION);
925 printf("{\t%.10r,\t%.10r,\t%.20r\t},\n",
926 rsqrt_c[2], rsqrt_c[1], rsqrt_c[0]);
927 }
928 }
929
930 /*
931 ** Print the trailer.
932 */
933 print;
934 print "};";
935 print;
936
937
938 @end_divert
939 @eval my $outText = MphocEval( GetStream( "divertText" ) ); \
940 my $headerText = GetHeaderText( STR(BUILD_FILE_NAME), \
941 "Double precision square root table", __FILE__); \
942 print "$headerText\n\n$outText";
943
944
945 #endif /* MAKE_INCLUDE */
946
947
948 /*----------------------------------------------------------------------------*/
949 /* Testing */
950 /*----------------------------------------------------------------------------*/
951
952 #if MAKE_MTC
953
954
955 @divert > dpml_sqrt.mtc
956
957
958 build default = "sqrt.a";
959
960 function SINGLE_SQRT = F_CHAR F_SQRT_NAME(F_CHAR.v.r);
961 function FAST_SINGLE_SQRT = F_CHAR F_FAST_SQRT_NAME(F_CHAR.v.r);
962 function DOUBLE_SQRT = B_CHAR B_SQRT_NAME(B_CHAR.v.r);
963 function FAST_DOUBLE_SQRT = B_CHAR B_FAST_SQRT_NAME(B_CHAR.v.r);
964 function MP_SQRT = void mp_sqrt(m.r.r, m.r.w);
965
966
967
968 type SQRT_ACCURACY = accuracy
969 error = lsb;
970 stats = max;
971 points = 1024;
972 ;
973
974
975 domain SINGLE_SQRT_DENORMS = { [ 0.0 , 1e-37 ]:uniform:10001 } ;
976 domain DOUBLE_SQRT_DENORMS = { [ 0.0 , 1e-307 ]:uniform:10001 } ;
977 domain SINGLE_SQRT_ACCURACY = { [ 0.0 , 17.0 ]:uniform:100001 } ;
978 domain DOUBLE_SQRT_ACCURACY = { [ 0.0 , 17.0 ]:uniform:100001 } ;
979
980 domain SQRT_KEYPOINTS =
981 lsb = 0.5; { 2.0 | der } { 5.0 | der } { 10.0 | der }
982 lsb = 0.5; { MTC_POS_TINY | der } { MTC_POS_HUGE | der }
983 { 0.0 | 0.0 } { 1.0 | 1.0 } { MTC_NEG_ZERO | MTC_NEG_ZERO }
984 { MTC_POS_INFINITY | MTC_POS_INFINITY } { MTC_NAN | MTC_NAN }
985 ;
986
987 domain FAST_SINGLE_SQRT_KEYPOINTS =
988 lsb = 1.0; { 2.0 | der } { 5.0 | der } { 10.0 | der }
989 lsb = 1.0; { MTC_POS_TINY | der } { MTC_POS_HUGE | der }
990 { 0.0 | 0.0 } { 1.0 | 1.0 } { MTC_NEG_ZERO | MTC_NEG_ZERO }
991 { MTC_POS_INFINITY | MTC_POS_INFINITY } { MTC_NAN | MTC_NAN }
992 ;
993
994 domain FAST_DOUBLE_SQRT_KEYPOINTS =
995 lsb = 2.0; { 2.0 | der } { 5.0 | der } { 10.0 | der }
996 lsb = 2.0; { MTC_POS_TINY | der } { MTC_POS_HUGE | der }
997 { 0.0 | 0.0 } { 1.0 | 1.0 } { MTC_NEG_ZERO | MTC_NEG_ZERO }
998 { MTC_POS_INFINITY | MTC_POS_INFINITY } { MTC_NAN | MTC_NAN }
999 ;
1000
1001
1002 test sqrt_acc_sd =
1003 type = SQRT_ACCURACY;
1004 domain = SINGLE_SQRT_ACCURACY;
1005 function = SINGLE_SQRT;
1006 comparison_function = FAST_DOUBLE_SQRT;
1007 output =
1008 file = "sqrt_acc_sd.out";
1009 ;
1010 ;
1011
1012 test sqrt_denorm_acc_sd =
1013 type = SQRT_ACCURACY;
1014 domain = SINGLE_SQRT_DENORMS;
1015 function = SINGLE_SQRT;
1016 comparison_function = FAST_DOUBLE_SQRT;
1017 output =
1018 file = "sqrt_denorm_acc_sd.out";
1019 ;
1020 ;
1021
1022 test fast_sqrt_acc_sd =
1023 type = SQRT_ACCURACY;
1024 domain = SINGLE_SQRT_ACCURACY;
1025 function = FAST_SINGLE_SQRT;
1026 comparison_function = FAST_DOUBLE_SQRT;
1027 output =
1028 file = "fast_sqrt_acc_sd.out";
1029 ;
1030 ;
1031
1032
1033 test sqrt_acc_dm =
1034 type = SQRT_ACCURACY;
1035 domain = DOUBLE_SQRT_ACCURACY;
1036 function = DOUBLE_SQRT;
1037 comparison_function = MP_SQRT;
1038 output =
1039 file = "sqrt_acc_dm.out";
1040 ;
1041 ;
1042
1043 test sqrt_denorm_acc_dm =
1044 type = SQRT_ACCURACY;
1045 domain = DOUBLE_SQRT_DENORMS;
1046 function = DOUBLE_SQRT;
1047 comparison_function = MP_SQRT;
1048 output =
1049 file = "sqrt_denorm_acc_dm.out";
1050 ;
1051 ;
1052
1053 test fast_sqrt_acc_dm =
1054 type = SQRT_ACCURACY;
1055 domain = DOUBLE_SQRT_ACCURACY;
1056 function = FAST_DOUBLE_SQRT;
1057 comparison_function = MP_SQRT;
1058 output =
1059 file = "fast_sqrt_acc_dm.out";
1060 ;
1061 ;
1062
1063
1064 test sqrt_key_sd =
1065 type = key_point;
1066 domain = SQRT_KEYPOINTS;
1067 function = SINGLE_SQRT;
1068 comparison_function = DOUBLE_SQRT;
1069 output =
1070 file = "sqrt_key_sd.out" ;
1071 style = verbose;
1072 ;
1073 ;
1074
1075 test sqrt_key_dm =
1076 type = key_point;
1077 domain = SQRT_KEYPOINTS;
1078 function = DOUBLE_SQRT;
1079 comparison_function = MP_SQRT;
1080 output =
1081 file = "sqrt_key_dm.out" ;
1082 style = verbose;
1083 ;
1084 ;
1085
1086 test fast_sqrt_key_sd =
1087 type = key_point;
1088 domain = FAST_SINGLE_SQRT_KEYPOINTS;
1089 function = FAST_SINGLE_SQRT;
1090 comparison_function = FAST_DOUBLE_SQRT;
1091 output =
1092 file = "fast_sqrt_key_sd.out" ;
1093 style = verbose;
1094 ;
1095 ;
1096
1097 test fast_sqrt_key_dm =
1098 type = key_point;
1099 domain = FAST_DOUBLE_SQRT_KEYPOINTS;
1100 function = FAST_DOUBLE_SQRT;
1101 comparison_function = MP_SQRT;
1102 output =
1103 file = "fast_sqrt_key_dm.out" ;
1104 style = verbose;
1105 ;
1106 ;
1107
1108
1109 @end_divert
1110
1111
1112 #endif /* MAKE_MTC */
1113
1114
1115