1 /* 2 * Copyright (c) 1992 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This software was developed by the Computer Systems Engineering group 6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7 * contributed to Berkeley. 8 * 9 * All advertising materials mentioning features or use of this software 10 * must display the following acknowledgement: 11 * This product includes software developed by the University of 12 * California, Lawrence Berkeley Laboratories. 13 * 14 * %sccs.include.redist.c% 15 * 16 * @(#)fpu_implode.c 7.3 (Berkeley) 10/11/92 17 * 18 * from: $Header: fpu_implode.c,v 1.4 92/06/17 05:41:33 torek Exp $ 19 */ 20 21 /* 22 * FPU subroutines: `implode' internal format numbers into the machine's 23 * `packed binary' format. 24 */ 25 26 #include <sys/types.h> 27 28 #include <machine/ieee.h> 29 #include <machine/instr.h> 30 #include <machine/reg.h> 31 32 #include <sparc/fpu/fpu_arith.h> 33 #include <sparc/fpu/fpu_emu.h> 34 35 /* 36 * Round a number (algorithm from Motorola MC68882 manual, modified for 37 * our internal format). Set inexact exception if rounding is required. 38 * Return true iff we rounded up. 39 * 40 * After rounding, we discard the guard and round bits by shifting right 41 * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky). 42 * This saves effort later. 43 * 44 * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's 45 * responsibility to fix this if necessary. 46 */ 47 static int 48 round(register struct fpemu *fe, register struct fpn *fp) 49 { 50 register u_int m0, m1, m2, m3; 51 register int gr, s, ret; 52 53 m0 = fp->fp_mant[0]; 54 m1 = fp->fp_mant[1]; 55 m2 = fp->fp_mant[2]; 56 m3 = fp->fp_mant[3]; 57 gr = m3 & 3; 58 s = fp->fp_sticky; 59 60 /* mant >>= FP_NG */ 61 m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG)); 62 m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG)); 63 m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG)); 64 m0 >>= FP_NG; 65 66 if ((gr | s) == 0) /* result is exact: no rounding needed */ 67 goto rounddown; 68 69 fe->fe_cx |= FSR_NX; /* inexact */ 70 71 /* Go to rounddown to round down; break to round up. */ 72 switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) { 73 74 case FSR_RD_RN: 75 default: 76 /* 77 * Round only if guard is set (gr & 2). If guard is set, 78 * but round & sticky both clear, then we want to round 79 * but have a tie, so round to even, i.e., add 1 iff odd. 80 */ 81 if ((gr & 2) == 0) 82 goto rounddown; 83 if ((gr & 1) || fp->fp_sticky || (m3 & 1)) 84 break; 85 goto rounddown; 86 87 case FSR_RD_RZ: 88 /* Round towards zero, i.e., down. */ 89 goto rounddown; 90 91 case FSR_RD_RM: 92 /* Round towards -Inf: up if negative, down if positive. */ 93 if (fp->fp_sign) 94 break; 95 goto rounddown; 96 97 case FSR_RD_RP: 98 /* Round towards +Inf: up if positive, down otherwise. */ 99 if (!fp->fp_sign) 100 break; 101 goto rounddown; 102 } 103 104 /* Bump low bit of mantissa, with carry. */ 105 #ifdef sparc /* ``cheating'' (left out FPU_DECL_CARRY; know this is faster) */ 106 FPU_ADDS(m3, m3, 1); 107 FPU_ADDCS(m2, m2, 0); 108 FPU_ADDCS(m1, m1, 0); 109 FPU_ADDC(m0, m0, 0); 110 #else 111 if (++m3 == 0 && ++m2 == 0 && ++m1 == 0) 112 m0++; 113 #endif 114 fp->fp_mant[0] = m0; 115 fp->fp_mant[1] = m1; 116 fp->fp_mant[2] = m2; 117 fp->fp_mant[3] = m3; 118 return (1); 119 120 rounddown: 121 fp->fp_mant[0] = m0; 122 fp->fp_mant[1] = m1; 123 fp->fp_mant[2] = m2; 124 fp->fp_mant[3] = m3; 125 return (0); 126 } 127 128 /* 129 * For overflow: return true if overflow is to go to +/-Inf, according 130 * to the sign of the overflowing result. If false, overflow is to go 131 * to the largest magnitude value instead. 132 */ 133 static int 134 toinf(struct fpemu *fe, int sign) 135 { 136 int inf; 137 138 /* look at rounding direction */ 139 switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) { 140 141 default: 142 case FSR_RD_RN: /* the nearest value is always Inf */ 143 inf = 1; 144 break; 145 146 case FSR_RD_RZ: /* toward 0 => never towards Inf */ 147 inf = 0; 148 break; 149 150 case FSR_RD_RP: /* toward +Inf iff positive */ 151 inf = sign == 0; 152 break; 153 154 case FSR_RD_RM: /* toward -Inf iff negative */ 155 inf = sign; 156 break; 157 } 158 return (inf); 159 } 160 161 /* 162 * fpn -> int (int value returned as return value). 163 * 164 * N.B.: this conversion always rounds towards zero (this is a peculiarity 165 * of the SPARC instruction set). 166 */ 167 u_int 168 fpu_ftoi(fe, fp) 169 struct fpemu *fe; 170 register struct fpn *fp; 171 { 172 register u_int i; 173 register int sign, exp; 174 175 sign = fp->fp_sign; 176 switch (fp->fp_class) { 177 178 case FPC_ZERO: 179 return (0); 180 181 case FPC_NUM: 182 /* 183 * If exp >= 2^32, overflow. Otherwise shift value right 184 * into last mantissa word (this will not exceed 0xffffffff), 185 * shifting any guard and round bits out into the sticky 186 * bit. Then ``round'' towards zero, i.e., just set an 187 * inexact exception if sticky is set (see round()). 188 * If the result is > 0x80000000, or is positive and equals 189 * 0x80000000, overflow; otherwise the last fraction word 190 * is the result. 191 */ 192 if ((exp = fp->fp_exp) >= 32) 193 break; 194 /* NB: the following includes exp < 0 cases */ 195 if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0) 196 fe->fe_cx |= FSR_NX; 197 i = fp->fp_mant[3]; 198 if (i >= ((u_int)0x80000000 + sign)) 199 break; 200 return (sign ? -i : i); 201 202 default: /* Inf, qNaN, sNaN */ 203 break; 204 } 205 /* overflow: replace any inexact exception with invalid */ 206 fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV; 207 return (0x7fffffff + sign); 208 } 209 210 /* 211 * fpn -> single (32 bit single returned as return value). 212 * We assume <= 29 bits in a single-precision fraction (1.f part). 213 */ 214 u_int 215 fpu_ftos(fe, fp) 216 struct fpemu *fe; 217 register struct fpn *fp; 218 { 219 register u_int sign = fp->fp_sign << 31; 220 register int exp; 221 222 #define SNG_EXP(e) ((e) << SNG_FRACBITS) /* makes e an exponent */ 223 #define SNG_MASK (SNG_EXP(1) - 1) /* mask for fraction */ 224 225 /* Take care of non-numbers first. */ 226 if (ISNAN(fp)) { 227 /* 228 * Preserve upper bits of NaN, per SPARC V8 appendix N. 229 * Note that fp->fp_mant[0] has the quiet bit set, 230 * even if it is classified as a signalling NaN. 231 */ 232 (void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS); 233 exp = SNG_EXP_INFNAN; 234 goto done; 235 } 236 if (ISINF(fp)) 237 return (sign | SNG_EXP(SNG_EXP_INFNAN)); 238 if (ISZERO(fp)) 239 return (sign); 240 241 /* 242 * Normals (including subnormals). Drop all the fraction bits 243 * (including the explicit ``implied'' 1 bit) down into the 244 * single-precision range. If the number is subnormal, move 245 * the ``implied'' 1 into the explicit range as well, and shift 246 * right to introduce leading zeroes. Rounding then acts 247 * differently for normals and subnormals: the largest subnormal 248 * may round to the smallest normal (1.0 x 2^minexp), or may 249 * remain subnormal. In the latter case, signal an underflow 250 * if the result was inexact or if underflow traps are enabled. 251 * 252 * Rounding a normal, on the other hand, always produces another 253 * normal (although either way the result might be too big for 254 * single precision, and cause an overflow). If rounding a 255 * normal produces 2.0 in the fraction, we need not adjust that 256 * fraction at all, since both 1.0 and 2.0 are zero under the 257 * fraction mask. 258 * 259 * Note that the guard and round bits vanish from the number after 260 * rounding. 261 */ 262 if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) { /* subnormal */ 263 /* -NG for g,r; -SNG_FRACBITS-exp for fraction */ 264 (void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp); 265 if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1)) 266 return (sign | SNG_EXP(1) | 0); 267 if ((fe->fe_cx & FSR_NX) || 268 (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) 269 fe->fe_cx |= FSR_UF; 270 return (sign | SNG_EXP(0) | fp->fp_mant[3]); 271 } 272 /* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */ 273 (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS); 274 #ifdef DIAGNOSTIC 275 if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0) 276 panic("fpu_ftos"); 277 #endif 278 if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2)) 279 exp++; 280 if (exp >= SNG_EXP_INFNAN) { 281 /* overflow to inf or to max single */ 282 fe->fe_cx |= FSR_OF | FSR_NX; 283 if (toinf(fe, sign)) 284 return (sign | SNG_EXP(SNG_EXP_INFNAN)); 285 return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK); 286 } 287 done: 288 /* phew, made it */ 289 return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK)); 290 } 291 292 /* 293 * fpn -> double (32 bit high-order result returned; 32-bit low order result 294 * left in res[1]). Assumes <= 61 bits in double precision fraction. 295 * 296 * This code mimics fpu_ftos; see it for comments. 297 */ 298 u_int 299 fpu_ftod(fe, fp, res) 300 struct fpemu *fe; 301 register struct fpn *fp; 302 u_int *res; 303 { 304 register u_int sign = fp->fp_sign << 31; 305 register int exp; 306 307 #define DBL_EXP(e) ((e) << (DBL_FRACBITS & 31)) 308 #define DBL_MASK (DBL_EXP(1) - 1) 309 310 if (ISNAN(fp)) { 311 (void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS); 312 exp = DBL_EXP_INFNAN; 313 goto done; 314 } 315 if (ISINF(fp)) { 316 sign |= DBL_EXP(DBL_EXP_INFNAN); 317 goto zero; 318 } 319 if (ISZERO(fp)) { 320 zero: res[1] = 0; 321 return (sign); 322 } 323 324 if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) { 325 (void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp); 326 if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) { 327 res[1] = 0; 328 return (sign | DBL_EXP(1) | 0); 329 } 330 if ((fe->fe_cx & FSR_NX) || 331 (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) 332 fe->fe_cx |= FSR_UF; 333 exp = 0; 334 goto done; 335 } 336 (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS); 337 if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2)) 338 exp++; 339 if (exp >= DBL_EXP_INFNAN) { 340 fe->fe_cx |= FSR_OF | FSR_NX; 341 if (toinf(fe, sign)) { 342 res[1] = 0; 343 return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0); 344 } 345 res[1] = ~0; 346 return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK); 347 } 348 done: 349 res[1] = fp->fp_mant[3]; 350 return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK)); 351 } 352 353 /* 354 * fpn -> extended (32 bit high-order result returned; low-order fraction 355 * words left in res[1]..res[3]). Like ftod, which is like ftos ... but 356 * our internal format *is* extended precision, plus 2 bits for guard/round, 357 * so we can avoid a small bit of work. 358 */ 359 u_int 360 fpu_ftox(fe, fp, res) 361 struct fpemu *fe; 362 register struct fpn *fp; 363 u_int *res; 364 { 365 register u_int sign = fp->fp_sign << 31; 366 register int exp; 367 368 #define EXT_EXP(e) ((e) << (EXT_FRACBITS & 31)) 369 #define EXT_MASK (EXT_EXP(1) - 1) 370 371 if (ISNAN(fp)) { 372 (void) fpu_shr(fp, 2); /* since we are not rounding */ 373 exp = EXT_EXP_INFNAN; 374 goto done; 375 } 376 if (ISINF(fp)) { 377 sign |= EXT_EXP(EXT_EXP_INFNAN); 378 goto zero; 379 } 380 if (ISZERO(fp)) { 381 zero: res[1] = res[2] = res[3] = 0; 382 return (sign); 383 } 384 385 if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) { 386 (void) fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp); 387 if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(1)) { 388 res[1] = res[2] = res[3] = 0; 389 return (sign | EXT_EXP(1) | 0); 390 } 391 if ((fe->fe_cx & FSR_NX) || 392 (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) 393 fe->fe_cx |= FSR_UF; 394 exp = 0; 395 goto done; 396 } 397 /* Since internal == extended, no need to shift here. */ 398 if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(2)) 399 exp++; 400 if (exp >= EXT_EXP_INFNAN) { 401 fe->fe_cx |= FSR_OF | FSR_NX; 402 if (toinf(fe, sign)) { 403 res[1] = res[2] = res[3] = 0; 404 return (sign | EXT_EXP(EXT_EXP_INFNAN) | 0); 405 } 406 res[1] = res[2] = res[3] = ~0; 407 return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK); 408 } 409 done: 410 res[1] = fp->fp_mant[1]; 411 res[2] = fp->fp_mant[2]; 412 res[3] = fp->fp_mant[3]; 413 return (sign | EXT_EXP(exp) | (fp->fp_mant[0] & EXT_MASK)); 414 } 415 416 /* 417 * Implode an fpn, writing the result into the given space. 418 */ 419 void 420 fpu_implode(fe, fp, type, space) 421 struct fpemu *fe; 422 register struct fpn *fp; 423 int type; 424 register u_int *space; 425 { 426 427 switch (type) { 428 429 case FTYPE_INT: 430 space[0] = fpu_ftoi(fe, fp); 431 break; 432 433 case FTYPE_SNG: 434 space[0] = fpu_ftos(fe, fp); 435 break; 436 437 case FTYPE_DBL: 438 space[0] = fpu_ftod(fe, fp, space); 439 break; 440 441 case FTYPE_EXT: 442 /* funky rounding precision options ?? */ 443 space[0] = fpu_ftox(fe, fp, space); 444 break; 445 446 default: 447 panic("fpu_implode"); 448 } 449 } 450