1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * _D_cplx_div(z, w) returns z / w with infinities handled according 31 * to C99. 32 * 33 * If z and w are both finite and w is nonzero, _D_cplx_div(z, w) 34 * delivers the complex quotient q according to the usual formula: 35 * let a = Re(z), b = Im(z), c = Re(w), and d = Im(w); then q = x + 36 * I * y where x = (a * c + b * d) / r and y = (b * c - a * d) / r 37 * with r = c * c + d * d. This implementation scales to avoid 38 * premature underflow or overflow. 39 * 40 * If z is neither NaN nor zero and w is zero, or if z is infinite 41 * and w is finite and nonzero, _D_cplx_div delivers an infinite 42 * result. If z is finite and w is infinite, _D_cplx_div delivers 43 * a zero result. 44 * 45 * If z and w are both zero or both infinite, or if either z or w is 46 * a complex NaN, _D_cplx_div delivers NaN + I * NaN. C99 doesn't 47 * specify these cases. 48 * 49 * This implementation can raise spurious underflow, overflow, in- 50 * valid operation, inexact, and division-by-zero exceptions. C99 51 * allows this. 52 * 53 * Warning: Do not attempt to "optimize" this code by removing multi- 54 * plications by zero. 55 */ 56 57 #if !defined(sparc) && !defined(__sparc) 58 #error This code is for SPARC only 59 #endif 60 61 static union { 62 int i[2]; 63 double d; 64 } inf = { 65 0x7ff00000, 0 66 }; 67 68 /* 69 * Return +1 if x is +Inf, -1 if x is -Inf, and 0 otherwise 70 */ 71 static int 72 testinf(double x) 73 { 74 union { 75 int i[2]; 76 double d; 77 } xx; 78 79 xx.d = x; 80 return (((((xx.i[0] << 1) - 0xffe00000) | xx.i[1]) == 0)? 81 (1 | (xx.i[0] >> 31)) : 0); 82 } 83 84 double _Complex 85 _D_cplx_div(double _Complex z, double _Complex w) 86 { 87 double _Complex v; 88 union { 89 int i[2]; 90 double d; 91 } aa, bb, cc, dd, ss; 92 double a, b, c, d, r; 93 int ha, hb, hc, hd, hz, hw, hs, i, j; 94 95 /* 96 * The following is equivalent to 97 * 98 * a = creal(z); b = cimag(z); 99 * c = creal(w); d = cimag(w); 100 */ 101 a = ((double *)&z)[0]; 102 b = ((double *)&z)[1]; 103 c = ((double *)&w)[0]; 104 d = ((double *)&w)[1]; 105 106 /* extract high-order words to estimate |z| and |w| */ 107 aa.d = a; 108 bb.d = b; 109 ha = aa.i[0] & ~0x80000000; 110 hb = bb.i[0] & ~0x80000000; 111 hz = (ha > hb)? ha : hb; 112 113 cc.d = c; 114 dd.d = d; 115 hc = cc.i[0] & ~0x80000000; 116 hd = dd.i[0] & ~0x80000000; 117 hw = (hc > hd)? hc : hd; 118 119 /* check for special cases */ 120 if (hw >= 0x7ff00000) { /* w is inf or nan */ 121 r = 0.0; 122 i = testinf(c); 123 j = testinf(d); 124 if (i | j) { /* w is infinite */ 125 /* 126 * "factor out" infinity, being careful to preserve 127 * signs of finite values 128 */ 129 c = i? i : ((cc.i[0] < 0)? -0.0 : 0.0); 130 d = j? j : ((dd.i[0] < 0)? -0.0 : 0.0); 131 if (hz >= 0x7fe00000) { 132 /* scale to avoid overflow below */ 133 c *= 0.5; 134 d *= 0.5; 135 } 136 } 137 ((double *)&v)[0] = (a * c + b * d) * r; 138 ((double *)&v)[1] = (b * c - a * d) * r; 139 return (v); 140 } 141 142 if (hw < 0x00100000) { 143 /* 144 * This nonsense is needed to work around some SPARC 145 * implementations of nonstandard mode; if both parts 146 * of w are subnormal, multiply them by one to force 147 * them to be flushed to zero when nonstandard mode 148 * is enabled. Sheesh. 149 */ 150 cc.d = c = c * 1.0; 151 dd.d = d = d * 1.0; 152 hc = cc.i[0] & ~0x80000000; 153 hd = dd.i[0] & ~0x80000000; 154 hw = (hc > hd)? hc : hd; 155 } 156 157 if (hw == 0 && (cc.i[1] | dd.i[1]) == 0) { 158 /* w is zero; multiply z by 1/Re(w) - I * Im(w) */ 159 c = 1.0 / c; 160 i = testinf(a); 161 j = testinf(b); 162 if (i | j) { /* z is infinite */ 163 a = i; 164 b = j; 165 } 166 ((double *)&v)[0] = a * c + b * d; 167 ((double *)&v)[1] = b * c - a * d; 168 return (v); 169 } 170 171 if (hz >= 0x7ff00000) { /* z is inf or nan */ 172 r = 1.0; 173 i = testinf(a); 174 j = testinf(b); 175 if (i | j) { /* z is infinite */ 176 a = i; 177 b = j; 178 r = inf.d; 179 } 180 ((double *)&v)[0] = (a * c + b * d) * r; 181 ((double *)&v)[1] = (b * c - a * d) * r; 182 return (v); 183 } 184 185 /* 186 * Scale c and d to compute 1/|w|^2 and the real and imaginary 187 * parts of the quotient. 188 * 189 * Note that for any s, if we let c' = sc, d' = sd, c'' = sc', 190 * and d'' = sd', then 191 * 192 * (ac'' + bd'') / (c'^2 + d'^2) = (ac + bd) / (c^2 + d^2) 193 * 194 * and similarly for the imaginary part of the quotient. We want 195 * to choose s such that (i) r := 1/(c'^2 + d'^2) can be computed 196 * without overflow or harmful underflow, and (ii) (ac'' + bd'') 197 * and (bc'' - ad'') can be computed without spurious overflow or 198 * harmful underflow. To avoid unnecessary rounding, we restrict 199 * s to a power of two. 200 * 201 * To satisfy (i), we need to choose s such that max(|c'|,|d'|) 202 * is not too far from one. To satisfy (ii), we need to choose 203 * s such that max(|c''|,|d''|) is also not too far from one. 204 * There is some leeway in our choice, but to keep the logic 205 * from getting overly complicated, we simply attempt to roughly 206 * balance these constraints by choosing s so as to make r about 207 * the same size as max(|c''|,|d''|). This corresponds to choos- 208 * ing s to be a power of two near |w|^(-3/4). 209 * 210 * Regarding overflow, observe that if max(|c''|,|d''|) <= 1/2, 211 * then the computation of (ac'' + bd'') and (bc'' - ad'') can- 212 * not overflow; otherwise, the computation of either of these 213 * values can only incur overflow if the true result would be 214 * within a factor of two of the overflow threshold. In other 215 * words, if we bias the choice of s such that at least one of 216 * 217 * max(|c''|,|d''|) <= 1/2 or r >= 2 218 * 219 * always holds, then no undeserved overflow can occur. 220 * 221 * To cope with underflow, note that if r < 2^-53, then any 222 * intermediate results that underflow are insignificant; either 223 * they will be added to normal results, rendering the under- 224 * flow no worse than ordinary roundoff, or they will contribute 225 * to a final result that is smaller than the smallest subnormal 226 * number. Therefore, we need only modify the preceding logic 227 * when z is very small and w is not too far from one. In that 228 * case, we can reduce the effect of any intermediate underflow 229 * to no worse than ordinary roundoff error by choosing s so as 230 * to make max(|c''|,|d''|) large enough that at least one of 231 * (ac'' + bd'') or (bc'' - ad'') is normal. 232 */ 233 hs = (((hw >> 2) - hw) + 0x6fd7ffff) & 0xfff00000; 234 if (hz < 0x07200000) { /* |z| < 2^-909 */ 235 if (((hw - 0x32800000) | (0x47100000 - hw)) >= 0) 236 hs = (((0x47100000 - hw) >> 1) & 0xfff00000) 237 + 0x3ff00000; 238 } 239 ss.i[0] = hs; 240 ss.i[1] = 0; 241 242 c *= ss.d; 243 d *= ss.d; 244 r = 1.0 / (c * c + d * d); 245 246 c *= ss.d; 247 d *= ss.d; 248 ((double *)&v)[0] = (a * c + b * d) * r; 249 ((double *)&v)[1] = (b * c - a * d) * r; 250 return (v); 251 } 252