1 /****************************************************************************** 2 Copyright (c) 2007-2011, Intel Corp. 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of Intel Corporation nor the names of its contributors 14 may be used to endorse or promote products derived from this software 15 without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 THE POSSIBILITY OF SUCH DAMAGE. 28 ******************************************************************************/ 29 30 #ifndef DPML_PRIVATE_H 31 #define DPML_PRIVATE_H 32 33 34 #ifndef TRUE 35 # define TRUE 1 36 #endif 37 38 #ifndef FALSE 39 # define FALSE 0 40 #endif 41 42 43 #include "build.h" 44 #include "op_system.h" 45 #include "compiler.h" 46 #include "architecture.h" 47 #include "i_format.h" 48 #include "f_format.h" 49 50 #if NEW_DPML_MACROS == 1 51 52 # if MULTIPLE_ISSUE 53 # define PIPELINED 1 54 # else 55 # define PIPELINED 0 56 # endif 57 58 #endif 59 60 #define DPML_NULL_MACRO 61 #define DPML_NULL_MACRO_TOKEN 1 62 63 /* 64 * For values that are small powers of two, the follow macros are useful for 65 * generating the base two log of that values. For example, 66 * LOG2(BITS_PER_F_TYPE) will evaluate to 5, 6 or 7 for floating point 67 * types s/f, t/g or x. 68 */ 69 70 #define __LOG2(name) PASTE_2(__LOG2_,name) 71 #define __LOG2_1 0 72 #define __LOG2_2 1 73 #define __LOG2_4 2 74 #define __LOG2_8 3 75 #define __LOG2_16 4 76 #define __LOG2_32 5 77 #define __LOG2_64 6 78 #define __LOG2_128 7 79 #define __LOG2_256 8 80 #define __LOG2_512 9 81 #define __LOG2_1024 10 82 #define __LOG2_2048 11 83 #define __LOG2_4096 12 84 #define __LOG2_8192 13 85 #define __LOG2_16384 14 86 #define __LOG2_32768 15 87 #define __LOG2_65536 16 88 89 90 #if defined(F_TYPE) 91 # define GENERIC_TYPE F_TYPE 92 #else 93 # define GENERIC_TYPE I_TYPE 94 #endif 95 96 #ifndef TYPE1 97 # define TYPE1 GENERIC_TYPE 98 #endif 99 100 #ifndef TYPE2 101 # define TYPE2 GENERIC_TYPE 102 #endif 103 104 #ifndef TYPE3 105 # define TYPE3 GENERIC_TYPE 106 #endif 107 108 #if ((defined(ALPHA) || defined(alpha)) && (defined(wnt) || defined(vms))) 109 # define EXP_WORD_TYPE INT_64 110 #else 111 # define EXP_WORD_TYPE WORD 112 #endif 113 114 #if defined(MAKE_INCLUDE) || defined(MAKE_MTC) 115 # include "mtc_macros.h" 116 # include "mphoc_macros.h" 117 #endif 118 119 #include "poly_macros.h" 120 #include "assert.h" 121 #include "dpml_names.h" 122 #include "dpml_exception.h" 123 124 # define C_F_PROTO( name ) extern F_COMPLEX name( F_TYPE ) 125 # define C_FF_PROTO( name ) extern F_COMPLEX name( F_TYPE, F_TYPE ) 126 # define C_FI_PROTO( name ) extern F_COMPLEX name( F_TYPE, WORD ) 127 # define C_FFFF_PROTO( name ) extern F_COMPLEX name( F_TYPE, F_TYPE, F_TYPE, F_TYPE ) 128 # define C_p_PROTO( name ) extern F_COMPLEX name( F_COMPLEX * ) 129 # define C_s_PROTO( name ) extern F_COMPLEX name( F_COMPLEX ) 130 131 # define C_B_PROTO( name ) extern B_COMPLEX name( B_TYPE ) 132 # define C_BB_PROTO( name ) extern B_COMPLEX name( B_TYPE, B_TYPE ) 133 # define C_BBBB_PROTO( name ) extern B_COMPLEX name( B_TYPE, B_TYPE, B_TYPE, B_TYPE ) 134 135 # define F_F_PROTO( name ) extern F_TYPE name( F_TYPE ) 136 # define F_FF_PROTO( name ) extern F_TYPE name( F_TYPE, F_TYPE ) 137 # define F_FI_PROTO( name ) extern F_TYPE name( F_TYPE, WORD ) 138 # define F_FpI_PROTO( name ) extern F_TYPE name( F_TYPE, WORD* ) 139 # define F_IF_PROTO( name ) extern F_TYPE name( WORD, F_TYPE ) 140 141 # define B_B_PROTO( name ) extern B_TYPE name( B_TYPE ) 142 # define B_BB_PROTO( name ) extern B_TYPE name( B_TYPE, B_TYPE ) 143 # define B_BI_PROTO( name ) extern B_TYPE name( B_TYPE, WORD ) 144 # define B_BpI_PROTO( name ) extern B_TYPE name( B_TYPE, WORD* ) 145 # define B_IB_PROTO( name ) extern B_TYPE name( WORD, B_TYPE ) 146 147 # define I_F_PROTO( name ) extern WORD name( F_TYPE ) 148 # define I_FpF_PROTO( name ) extern WORD name( F_TYPE, F_TYPE* ) 149 # define I_FIpF_PROTO( name ) extern WORD name( F_TYPE, WORD, F_TYPE* ) 150 # define I_FIpFpF_PROTO( name ) extern WORD name( F_TYPE, WORD, F_TYPE*, F_TYPE* ) 151 152 # define I_B_PROTO( name ) extern WORD name( B_TYPE ) 153 # define I_BpB_PROTO( name ) extern WORD name( B_TYPE, B_TYPE* ) 154 # define I_BIpB_PROTO( name ) extern WORD name( B_TYPE, WORD, B_TYPE* ) 155 # define I_BIpBpB_PROTO( name ) extern WORD name( B_TYPE, WORD, B_TYPE*, B_TYPE* ) 156 157 # define I_II_PROTO( name ) extern WORD name( WORD, WORD ) 158 159 #define F_C_NAN 0 160 #define F_C_INF 1 161 #define F_C_NORM 2 162 #define F_C_DENORM 3 163 #define F_C_ZERO 4 164 165 #define F_C_POS_CLASS(n) ((n) << 1) 166 #define F_C_NEG_CLASS(n) (((n) << 1) | 1) 167 #define F_C_BASE_CLASS(c) ((c) >> 1) 168 #define F_C_IS_NEG_CLASS(c) ((c) & 1) 169 #define F_C_IS_POS_CLASS(c) (((c) & 1) == 0) 170 171 /* The F_C_* defs must be in the current order, enumerated from 0 to 9 */ 172 173 # define F_C_SIG_NAN F_C_POS_CLASS(F_C_NAN) /* 0 */ 174 # define F_C_QUIET_NAN F_C_NEG_CLASS(F_C_NAN) /* 1 */ 175 # define F_C_POS_INF F_C_POS_CLASS(F_C_INF) /* 2 */ 176 # define F_C_NEG_INF F_C_NEG_CLASS(F_C_INF) /* 3 */ 177 # define F_C_POS_NORM F_C_POS_CLASS(F_C_NORM) /* 4 */ 178 # define F_C_NEG_NORM F_C_NEG_CLASS(F_C_NORM) /* 5 */ 179 # define F_C_POS_DENORM F_C_POS_CLASS(F_C_DENORM) /* 6 */ 180 # define F_C_NEG_DENORM F_C_NEG_CLASS(F_C_DENORM) /* 7 */ 181 # define F_C_POS_ZERO F_C_POS_CLASS(F_C_ZERO) /* 8 */ 182 # define F_C_NEG_ZERO F_C_NEG_CLASS(F_C_ZERO) /* 9 */ 183 184 # define F_C_NUM_CLASSES 10 185 # define F_C_CLASS_BIT_WIDTH 4 186 187 188 #define AS_WORD(p) (*(WORD *)&(p)) 189 #define AS_CHAR(p) (*(char *)&(p)) 190 #define AS_SHORT(p) (*(short *)&(p)) 191 #define AS_INT(p) (*(int *)&(p)) 192 #define AS_LONG(p) (*(long *)&(p)) 193 #define AS_FLOAT(p) (*(float *)&(p)) 194 #define AS_DOUBLE(p) (*(double *)&(p)) 195 #define AS_F_TYPE(p) (*(F_TYPE *)&(p)) 196 #define AS_B_TYPE(p) (*(B_TYPE *)&(p)) 197 198 199 200 /* Environment specific macro definitions that pre-empt the generic 201 (and perhaps slow) definitions below are in include files per 202 ARCHITECTURE. The macros defined in these files should be a subset of 203 the macros defined below (i.e. if there is a specific version, there 204 should also be a generic version that will work with any ANSI C 205 compiler). [ In practice, we may not get around to writing the generic 206 versions until we need them. ] */ 207 208 209 #if (ARCHITECTURE == vax) 210 211 # include "vax_macros.h" 212 213 #elif (ARCHITECTURE == mips) 214 215 # include "mips_macros.h" 216 217 #elif (ARCHITECTURE == hp_pa) 218 219 # include "ix86_macros.h" 220 221 #elif (ARCHITECTURE == cray) 222 223 # include "cray_macros.h" 224 225 #elif (ARCHITECTURE == alpha) 226 227 # include "alpha_macros.h" 228 229 #elif (ARCHITECTURE == ix86) 230 231 # include "ix86_macros.h" 232 233 #elif (ARCHITECTURE == merced) 234 235 #include "ix86_macros.h" 236 237 #elif (ARCHITECTURE == amd64 ) 238 239 # include "ix86_macros.h" 240 241 #elif (ARCHITECTURE == sparc ) 242 243 # include "ix86_macros.h" 244 245 #elif (ARCHITECTURE == s390 ) 246 247 # include "ix86_macros.h" 248 249 #else 250 251 # error Unknown ARCHITECTURE. 252 253 #endif 254 255 256 # if (defined( _WIN32 ) && defined( _M_IX86 )) || (defined(merced) && !defined(HPUX_OS)) 257 258 /* Disallow use of intrinsic math functions on Windows NT on Intel */ 259 260 double acos( double ) ; 261 # pragma function( acos ) 262 double asin( double ) ; 263 # pragma function( asin ) 264 double atan( double ) ; 265 # pragma function( atan ) 266 double atan2( double, double ) ; 267 # pragma function( atan2 ) 268 double cos( double ) ; 269 # pragma function( cos ) 270 double cosh( double ) ; 271 # pragma function( cosh ) 272 double exp( double ) ; 273 # pragma function( exp ) 274 double fabs( double ) ; 275 # pragma function( fabs ) 276 double fmod( double, double ) ; 277 # pragma function( fmod ) 278 double log( double ) ; 279 # pragma function( log ) 280 double log10( double ) ; 281 # pragma function( log10 ) 282 double pow( double, double ) ; 283 # pragma function( pow ) 284 double sin( double ) ; 285 # pragma function( sin ) 286 double sinh( double ) ; 287 # pragma function( sinh ) 288 double sqrt( double ) ; 289 # pragma function( sqrt ) 290 double tan( double ) ; 291 # pragma function( tan ) 292 double tanh( double ) ; 293 # pragma function( tanh ) 294 295 # endif /* defined( _WIN32 ) && defined( _M_IX86 ) */ 296 297 298 # if defined(merced) 299 300 float acosf( float ) ; 301 # pragma function( acosf ) 302 float asinf( float ) ; 303 # pragma function( asinf ) 304 float atanf( float ) ; 305 # pragma function( atanf ) 306 float atan2f( float, float ) ; 307 # pragma function( atan2f ) 308 float cosf( float ) ; 309 # pragma function( cosf ) 310 float coshf( float ) ; 311 # pragma function( coshf ) 312 float expf( float ) ; 313 # pragma function( expf ) 314 float fabsf( float ) ; 315 # pragma function( fabsf ) 316 float fmodf( float, float ) ; 317 # pragma function( fmodf ) 318 float logf( float ) ; 319 # pragma function( logf ) 320 float log10f( float ) ; 321 # pragma function( log10f ) 322 float powf( float, float ) ; 323 # pragma function( powf ) 324 float sinf( float ) ; 325 # pragma function( sinf ) 326 float sinhf( float ) ; 327 # pragma function( sinhf ) 328 float sqrtf( float ) ; 329 # pragma function( sqrtf ) 330 float tanf( float ) ; 331 # pragma function( tanf ) 332 float tanhf( float ) ; 333 # pragma function( tanhf ) 334 float ceilf( float ) ; 335 # pragma function( ceilf ) 336 float floorf( float ) ; 337 # pragma function( floorf ) 338 339 #endif 340 341 342 343 /* General macros and generic (though perhaps slow) versions of the 344 specific macro definitions included above. */ 345 346 347 348 #ifndef F_IS_NAN 349 #define F_IS_NAN(x) (x != x) 350 #endif 351 352 #ifndef F_IS_ZERO 353 #define F_IS_ZERO(x) (x == 0.0) 354 #endif 355 356 #ifndef F_IS_NEG 357 #define F_IS_NEG(x) (x < 0.0) 358 #endif 359 360 #ifndef F_IS_POS 361 #define F_IS_POS(x) (x > 0.0) 362 #endif 363 364 #ifndef F_SET_FLAG_IF_ZERO 365 #define F_SET_FLAG_IF_ZERO(x,flag) { \ 366 (flag) = ((x) == 0.0); \ 367 } 368 #endif 369 370 #if 0 371 #ifndef F_SET_FLAG_IF_NEG 372 #define F_SET_FLAG_IF_NEG(x,flag) { \ 373 (flag) = ((x) < 0.0); \ 374 } 375 #endif 376 #endif 377 378 #ifndef F_SET_FLAG_IF_POS 379 #define F_SET_FLAG_IF_POS(x,flag) { \ 380 (flag) = ((x) > 0.0); \ 381 } 382 #endif 383 384 385 #if (VAX_FLOATING) 386 387 #ifndef F_EXP_WORD_IS_ABNORMAL 388 #define F_EXP_WORD_IS_ABNORMAL(exp_word) (!((exp_word) & F_EXP_MASK)) 389 #endif 390 391 #ifndef F_EXP_WORD_IS_ABNORMAL_OR_NEG 392 #define F_EXP_WORD_IS_ABNORMAL_OR_NEG(exp_word) \ 393 ((INT_16)((exp_word) & ((1 << 16) - 1)) < (INT_16)(1 << F_EXP_POS)) 394 #endif 395 #ifndef B_EXP_WORD_IS_ABNORMAL_OR_NEG 396 #define B_EXP_WORD_IS_ABNORMAL_OR_NEG(exp_word) \ 397 ((INT_16)((exp_word) & ((1 << 16) - 1)) < (INT_16)(1 << B_EXP_POS)) 398 #endif 399 400 #ifndef F_EXP_WORD_IS_INFINITE_OR_NAN 401 /* It is assumed that ROP detection has already been done */ 402 #define F_EXP_WORD_IS_INFINITE_OR_NAN(exp_word) (0) 403 #endif 404 405 #ifndef F_SET_FLAG_IF_ZERO_OR_DENORM 406 #define F_SET_FLAG_IF_ZERO_OR_DENORM(x,flag) { \ 407 F_UNION u; \ 408 u.f = (x); \ 409 (flag) = (!(u.F_HI_WORD & F_SIGN_EXP_MASK)); \ 410 } 411 #endif 412 413 #ifndef F_SET_FLAG_IF_DENORM 414 #define F_SET_FLAG_IF_DENORM(x,flag) { \ 415 (flag) = 0; \ 416 } 417 #endif 418 419 #ifndef F_SET_FLAG_IF_INF 420 #define F_SET_FLAG_IF_INF(x,flag) { \ 421 (flag) = 0; \ 422 } 423 #endif 424 425 #ifndef F_SET_FLAG_IF_FINITE 426 #define F_SET_FLAG_IF_FINITE(x,flag) { \ 427 (flag) = 1; \ 428 } 429 #endif 430 431 #ifndef F_SET_FLAG_IF_NAN 432 #define F_SET_FLAG_IF_NAN(x,flag) { \ 433 F_UNION u; \ 434 u.f = (x); \ 435 (flag) = ((u.F_HI_WORD & F_SIGN_EXP_MASK) == F_SIGN_BIT_MASK); \ 436 } 437 #endif 438 439 #ifndef F_SET_FLAG_IF_NAN_OR_INF 440 #define F_SET_FLAG_IF_NAN_OR_INF(x,flag) { \ 441 F_UNION u; \ 442 u.f = (x); \ 443 (flag) = ((u.F_HI_WORD & F_SIGN_EXP_MASK) == F_SIGN_BIT_MASK); \ 444 } 445 #endif 446 447 #ifndef F_SET_FLAG_IF_NORM 448 #define F_SET_FLAG_IF_NORM(x,flag) { \ 449 F_UNION u; \ 450 u.f = (x); \ 451 (flag) = (u.F_HI_WORD & F_EXP_MASK); \ 452 } 453 #endif 454 455 #ifndef F_CLASSIFY 456 #define F_CLASSIFY(x,class) { \ 457 U_WORD exp; \ 458 F_UNION u; \ 459 u.f = (x); \ 460 (class) = (((U_WORD)u.F_HI_WORD >> F_SIGN_BIT_POS) & 0x1); \ 461 exp = (u.F_HI_WORD & F_EXP_MASK); \ 462 if (exp) \ 463 (class) += F_C_POS_NORM; \ 464 else \ 465 (class) = ((class) ? F_C_SIG_NAN : F_C_POS_ZERO); \ 466 } 467 #endif 468 469 #ifndef F_CLASSIFY_AND_GET_EXP_WORD 470 #define F_CLASSIFY_AND_GET_EXP_WORD(x,class,exp_word) { \ 471 U_WORD exp; \ 472 F_UNION u; \ 473 u.f = (x); \ 474 exp_word = u.F_HI_WORD; \ 475 (class) = (((U_WORD)u.F_HI_WORD >> F_SIGN_BIT_POS) & 0x1); \ 476 exp = (u.F_HI_WORD & F_EXP_MASK); \ 477 if (exp) \ 478 (class) += F_C_POS_NORM; \ 479 else \ 480 (class) = ((class) ? F_C_SIG_NAN : F_C_POS_ZERO); \ 481 } 482 #endif 483 484 485 486 #elif (IEEE_FLOATING) 487 488 489 490 #ifndef F_EXP_WORD_IS_ABNORMAL 491 #define F_EXP_WORD_IS_ABNORMAL(exp_word) \ 492 (((exp_word) & F_EXP_MASK) - ((U_WORD)1 << F_EXP_POS) \ 493 >= MAKE_MASK(F_EXP_WIDTH - 1, F_EXP_POS + 1)) 494 #endif 495 496 #ifndef F_EXP_WORD_IS_ABNORMAL_OR_NEG 497 #define F_EXP_WORD_IS_ABNORMAL_OR_NEG(exp_word) \ 498 ((exp_word) - ((U_WORD)1 << F_EXP_POS) \ 499 >= MAKE_MASK(F_EXP_WIDTH - 1, F_EXP_POS + 1)) 500 #endif 501 #ifndef B_EXP_WORD_IS_ABNORMAL_OR_NEG 502 #define B_EXP_WORD_IS_ABNORMAL_OR_NEG(exp_word) \ 503 ((exp_word) - ((U_WORD)1 << B_EXP_POS) \ 504 >= MAKE_MASK(B_EXP_WIDTH - 1, B_EXP_POS + 1)) 505 #endif 506 507 #ifndef F_EXP_WORD_IS_INFINITE_OR_NAN 508 #define F_EXP_WORD_IS_INFINITE_OR_NAN(exp_word) \ 509 (((exp_word) & F_EXP_MASK) == F_EXP_MASK) 510 #endif 511 512 #ifndef F_SET_FLAG_IF_ZERO_OR_DENORM 513 #define F_SET_FLAG_IF_ZERO_OR_DENORM(x,flag) { \ 514 F_UNION u; \ 515 u.f = (x); \ 516 flag = (!(u.F_HI_WORD & F_EXP_MASK)); \ 517 } 518 #endif 519 520 #ifndef F_SET_FLAG_IF_DENORM 521 #define F_SET_FLAG_IF_DENORM(x,flag) { \ 522 F_UNION u; \ 523 u.f = (x); \ 524 flag = (!(u.F_HI_WORD & F_EXP_MASK) \ 525 && ((u.F_HI_WORD & F_MANTISSA_MASK) OR_LOW_BITS_SET(u))); \ 526 } 527 #endif 528 529 #ifndef F_SET_FLAG_IF_INF 530 #define F_SET_FLAG_IF_INF(x,flag) { \ 531 F_UNION u; \ 532 u.f = (x); \ 533 (flag) = (((u.F_HI_WORD & F_EXP_MASK) == F_EXP_MASK) \ 534 && (!((u.F_HI_WORD & F_MANTISSA_MASK) OR_LOW_BITS_SET(u)))); \ 535 } 536 #endif 537 538 #ifndef F_SET_FLAG_IF_FINITE 539 #define F_SET_FLAG_IF_FINITE(x,flag) { \ 540 F_UNION u; \ 541 u.f = (x); \ 542 (flag) = ((u.F_HI_WORD & F_EXP_MASK) != F_EXP_MASK); \ 543 } 544 #endif 545 546 #ifndef F_SET_FLAG_IF_NAN 547 #define F_SET_FLAG_IF_NAN(x,flag) { \ 548 F_UNION u; \ 549 u.f = (x); \ 550 (flag) = (((u.F_HI_WORD & F_EXP_MASK) == F_EXP_MASK) \ 551 && ((u.F_HI_WORD & F_MANTISSA_MASK) OR_LOW_BITS_SET(u))); \ 552 } 553 #endif 554 555 #ifndef F_SET_FLAG_IF_NAN_OR_INF 556 #define F_SET_FLAG_IF_NAN_OR_INF(x,flag) { \ 557 F_UNION u; \ 558 u.f = (x); \ 559 (flag) = ((u.F_HI_WORD & F_EXP_MASK) == F_EXP_MASK); \ 560 } 561 #endif 562 563 #ifndef F_SET_FLAG_IF_NORM 564 #define F_SET_FLAG_IF_NORM(x,flag) { \ 565 F_UNION u; \ 566 u.f = (x); \ 567 (flag) = (u.F_HI_WORD & F_EXP_MASK); \ 568 (flag) = ((flag) && (flag < F_EXP_MASK)); \ 569 } 570 #endif 571 572 #ifndef F_CLASSIFY 573 #define F_CLASSIFY(x,class) { \ 574 U_WORD exp; \ 575 F_UNION u; \ 576 u.f = (x); \ 577 (class) = (((U_WORD)u.F_HI_WORD >> F_SIGN_BIT_POS) & 0x1); \ 578 exp = (u.F_HI_WORD & F_EXP_MASK); \ 579 if (exp) { \ 580 if (exp < F_EXP_MASK) \ 581 (class) += F_C_POS_NORM; \ 582 else { \ 583 u.F_HI_WORD &= F_MANTISSA_MASK; \ 584 if (u.F_HI_WORD OR_LOW_BITS_SET(u)) { \ 585 (class) = (((U_WORD)u.F_HI_WORD >> F_MSB_POS) & 0x1); \ 586 } else \ 587 (class) += F_C_POS_INF; \ 588 } \ 589 } else { \ 590 u.F_HI_WORD &= F_MANTISSA_MASK; \ 591 (class) += \ 592 ((u.F_HI_WORD OR_LOW_BITS_SET(u)) ? F_C_POS_DENORM : F_C_POS_ZERO); \ 593 } \ 594 } 595 #endif 596 597 #ifndef F_CLASSIFY_AND_GET_EXP_WORD 598 #define F_CLASSIFY_AND_GET_EXP_WORD(x,class,exp_word) { \ 599 U_WORD exp; \ 600 F_UNION u; \ 601 u.f = (x); \ 602 exp_word = u.F_HI_WORD; \ 603 (class) = (((U_WORD)u.F_HI_WORD >> F_SIGN_BIT_POS) & 0x1); \ 604 exp = (u.F_HI_WORD & F_EXP_MASK); \ 605 if (exp) { \ 606 if (exp < F_EXP_MASK) \ 607 (class) += F_C_POS_NORM; \ 608 else { \ 609 u.F_HI_WORD &= F_MANTISSA_MASK; \ 610 if (u.F_HI_WORD OR_LOW_BITS_SET(u)) { \ 611 (class) = (((U_WORD)u.F_HI_WORD >> F_MSB_POS) & 0x1); \ 612 } else \ 613 (class) += F_C_POS_INF; \ 614 } \ 615 } else { \ 616 u.F_HI_WORD &= F_MANTISSA_MASK; \ 617 (class) += \ 618 ((u.F_HI_WORD OR_LOW_BITS_SET(u)) ? F_C_POS_DENORM : F_C_POS_ZERO); \ 619 } \ 620 } 621 #endif 622 623 624 625 626 #endif /* floating type */ 627 628 629 #ifndef F_SET_FLAG_IF_NEG 630 #define F_SET_FLAG_IF_NEG(x,flag) { \ 631 F_UNION u; \ 632 u.f = (x); \ 633 (flag) = ((u.F_HI_WORD) & F_SIGN_BIT_MASK); \ 634 } 635 #endif 636 637 638 #ifndef F_EXP_WORD_IS_ZERO_OR_DENORM 639 #define F_EXP_WORD_IS_ZERO_OR_DENORM(exp_word) \ 640 (!((exp_word) & F_EXP_MASK)) 641 #endif 642 643 #ifndef B_EXP_WORD_IS_ZERO_OR_DENORM 644 #define B_EXP_WORD_IS_ZERO_OR_DENORM(exp_word) \ 645 (!((exp_word) & B_EXP_MASK)) 646 #endif 647 648 #ifndef F_EXP_WORD_IS_NEG 649 #define F_EXP_WORD_IS_NEG(exp_word) \ 650 ((exp_word) & F_SIGN_BIT_MASK) 651 #endif 652 653 #ifndef B_EXP_WORD_IS_NEG 654 #define B_EXP_WORD_IS_NEG(exp_word) \ 655 ((exp_word) & B_SIGN_BIT_MASK) 656 #endif 657 658 #ifndef F_EXP_WORD_IS_POS 659 #define F_EXP_WORD_IS_POS(exp_word) \ 660 (!((exp_word) & F_SIGN_BIT_MASK)) 661 #endif 662 663 664 #ifndef SET_BIT 665 # define SET_BIT(pos) ((U_WORD)1 << (pos)) 666 #endif 667 668 #ifndef MAKE_MASK 669 # define MAKE_MASK(width,pos) ((((U_WORD)1 << (width)) - 1) << (pos)) 670 #endif 671 672 673 /* Rounding modes are done in an architecture specific way. If no 674 specific macros were defined, assume there are no rounding modes. */ 675 676 #ifndef GET_ROUNDING_MODE 677 #define GET_ROUNDING_MODE(old) 678 #endif 679 680 #ifndef SET_ROUNDING_MODE 681 #define SET_ROUNDING_MODE(new) 682 #endif 683 684 #ifndef SWAP_ROUNDING_MODE 685 #define SWAP_ROUNDING_MODE(new,old) 686 #endif 687 688 #ifndef FPU_STATUS_WORD_TYPE 689 #define FPU_STATUS_WORD_TYPE WORD 690 #endif 691 692 #ifndef INIT_FPU_STATE_AND_ROUND_TO_NEAREST 693 #define INIT_FPU_STATE_AND_ROUND_TO_NEAREST(status_word) 694 #endif 695 696 #ifndef INIT_FPU_STATE_AND_ROUND_TO_ZERO 697 #define INIT_FPU_STATE_AND_ROUND_TO_ZERO(status_word) 698 #endif 699 700 #ifndef RESTORE_FPU_STATE 701 #define RESTORE_FPU_STATE(status_word) 702 #endif 703 704 705 706 /* Constants in bytes, for table indexing */ 707 708 #define BYTES_PER_S_TYPE (BITS_PER_S_TYPE/BITS_PER_CHAR) 709 #define BYTES_PER_D_TYPE (BITS_PER_D_TYPE/BITS_PER_CHAR) 710 #define BYTES_PER_Q_TYPE (BITS_PER_Q_TYPE/BITS_PER_CHAR) 711 #define BYTES_PER_B_TYPE (BITS_PER_B_TYPE/BITS_PER_CHAR) 712 #define BYTES_PER_R_TYPE (BITS_PER_R_TYPE/BITS_PER_CHAR) 713 714 715 716 /* Make_float primitives */ 717 718 #define S_MAKE_FLOAT(i,s) { \ 719 S_UNION u; \ 720 u.S_HI_WORD = (i); \ 721 s = u.f; \ 722 } 723 #if WORDS_PER_D_TYPE == 1 724 # define D_MAKE_FLOAT(i,s) { \ 725 D_UNION u; \ 726 u.D_HI_WORD = (i); \ 727 s = u.f; \ 728 } 729 #elif WORDS_PER_D_TYPE == 2 730 # define D_MAKE_FLOAT(i,s) { \ 731 D_UNION u; \ 732 u.D_HI_WORD = (i); \ 733 u.D_LO_WORD = 0; \ 734 s = u.f; \ 735 } 736 #else 737 # error Surprising number of words per D_FLOAT 738 #endif 739 740 #define D_MAKE_FLOAT_64(i,s) { \ 741 D_UNION u; \ 742 u.D_UNSIGNED_HI_64 = (i); \ 743 s = u.f; \ 744 } 745 746 #define Q_MAKE_FLOAT(i,s) { \ 747 Q_UNION u; \ 748 u.f = 0.0; \ 749 u.Q_HI_WORD = (i); \ 750 s = u.f; \ 751 } 752 753 #define F_EXP_MAKE_FLOAT PASTE_2(F_PREC_CHAR,_MAKE_FLOAT) 754 #define B_EXP_MAKE_FLOAT PASTE_2(B_PREC_CHAR,_MAKE_FLOAT) 755 756 #define F_MAKE_FLOAT(i,s) F_EXP_MAKE_FLOAT(i,s) 757 #define B_MAKE_FLOAT(i,s) B_EXP_MAKE_FLOAT(i,s) 758 759 /* The following several macros are intended to be used as a set. It 760 is the combination of F_SAVE_SIGN_AND_GET_ABS and F_RESTORE_SIGN (or 761 F_NEGATE_IF_SIGN_NEG) that should be efficient (i.e. if slowing one of 762 them down will make the combination faster, go ahead and do it. */ 763 764 #ifndef F_SIGN_TYPE 765 766 # define F_SIGN_TYPE U_WORD 767 768 # define F_SAVE_SIGN_AND_GET_ABS(x, sign, abs_x) { \ 769 F_TYPE save_x = (x); \ 770 F_ABS((x), (abs_x)); \ 771 (sign) = ((abs_x) != save_x); \ 772 } 773 774 # define F_CHANGE_SIGN(sign) \ 775 (sign) = !(sign) 776 777 # define F_RESTORE_SIGN(sign, x) \ 778 ASSERT((x) >= 0.0); \ 779 if (sign) F_NEGATE(x); 780 781 # define F_NEGATE_IF_SIGN_NEG(sign, x) \ 782 if (sign) F_NEGATE(x); 783 784 #endif 785 786 787 788 789 790 791 792 #ifndef S_NEGATE 793 #define S_NEGATE(x) (x) = -(x) 794 #endif 795 796 #ifndef D_NEGATE 797 #define D_NEGATE(x) (x) = -(x) 798 #endif 799 800 #ifndef F_NEGATE 801 #define F_NEGATE(x) (x) = -(x) 802 #endif 803 804 #ifndef B_NEGATE 805 #define B_NEGATE(x) (x) = -(x) 806 #endif 807 808 809 #ifndef S_SET_NEG_BIT 810 #define S_SET_NEG_BIT(x) if ((x) > 0.0) S_NEGATE(x); 811 #endif 812 813 #ifndef D_SET_NEG_BIT 814 #define D_SET_NEG_BIT(x) if ((x) > 0.0) D_NEGATE(x); 815 #endif 816 817 #ifndef F_SET_NEG_BIT 818 #define F_SET_NEG_BIT(x) if ((x) > 0.0) F_NEGATE(x); 819 #endif 820 821 #ifndef B_SET_NEG_BIT 822 #define B_SET_NEG_BIT(x) if ((x) > 0.0) B_NEGATE(x); 823 #endif 824 825 826 #ifndef S_CLEAR_NEG_BIT 827 #define S_CLEAR_NEG_BIT(x) if ((x) < 0.0) S_NEGATE(x); 828 #endif 829 830 #ifndef D_CLEAR_NEG_BIT 831 #define D_CLEAR_NEG_BIT(x) if ((x) < 0.0) D_NEGATE(x); 832 #endif 833 834 #ifndef F_CLEAR_NEG_BIT 835 #define F_CLEAR_NEG_BIT(x) if ((x) < 0.0) F_NEGATE(x); 836 #endif 837 838 #ifndef B_CLEAR_NEG_BIT 839 #define B_CLEAR_NEG_BIT(x) if ((x) < 0.0) B_NEGATE(x); 840 #endif 841 842 843 #ifndef S_ABS 844 #define S_ABS(x,abs_x) { \ 845 (abs_x) = (x); \ 846 S_CLEAR_NEG_BIT(abs_x); \ 847 } 848 #endif 849 850 #ifndef D_ABS 851 #define D_ABS(x,abs_x) { \ 852 (abs_x) = (x); \ 853 D_CLEAR_NEG_BIT(abs_x); \ 854 } 855 #endif 856 857 #ifndef F_ABS 858 #define F_ABS(x,abs_x) { \ 859 (abs_x) = (x); \ 860 F_CLEAR_NEG_BIT(abs_x); \ 861 } 862 #endif 863 864 #ifndef B_ABS 865 #define B_ABS(x,abs_x) { \ 866 (abs_x) = (x); \ 867 B_CLEAR_NEG_BIT(abs_x); \ 868 } 869 #endif 870 871 872 /* Note that these copy_sign macros do not work correctly with -0.0 */ 873 874 #ifndef S_COPY_SIGN 875 #undef S_COPY_SIGN_IS_FAST 876 #define S_COPY_SIGN(value,sign,result) { \ 877 if ((sign) < 0.0) \ 878 { \ 879 S_ABS((value), (result)); \ 880 S_NEGATE(result); \ 881 } \ 882 else \ 883 S_ABS((value), (result)); \ 884 } 885 #endif 886 887 #ifndef D_COPY_SIGN 888 #undef D_COPY_SIGN_IS_FAST 889 #define D_COPY_SIGN(value,sign,result) { \ 890 if ((sign) < 0.0) \ 891 { \ 892 D_ABS((value), (result)); \ 893 D_NEGATE(result); \ 894 } \ 895 else \ 896 D_ABS((value), (result)); \ 897 } 898 #endif 899 900 #ifndef F_COPY_SIGN 901 #undef F_COPY_SIGN_IS_FAST 902 #define F_COPY_SIGN(value,sign,result) { \ 903 if ((sign) < 0.0) \ 904 { \ 905 F_ABS((value), (result)); \ 906 F_NEGATE(result); \ 907 } \ 908 else \ 909 F_ABS((value), (result)); \ 910 } 911 #endif 912 913 #ifndef B_COPY_SIGN 914 #undef B_COPY_SIGN_IS_FAST 915 #define B_COPY_SIGN(value,sign,result) { \ 916 if ((sign) < 0.0) \ 917 { \ 918 B_ABS((value), (result)); \ 919 B_NEGATE(result); \ 920 } \ 921 else \ 922 B_ABS((value), (result)); \ 923 } 924 #endif 925 926 927 #ifndef S_COPY_SIGN_AND_EXP 928 #undef S_COPY_SIGN_AND_EXP_IS_FAST 929 #define S_COPY_SIGN_AND_EXP(value,sign_and_exp,result) { \ 930 S_UNION u; \ 931 U_WORD new_sign_exp; \ 932 u.f = sign_and_exp; \ 933 new_sign_exp = u.S_HI_WORD & S_SIGN_EXP_MASK; \ 934 u.f = value; \ 935 u.S_HI_WORD &= ~S_SIGN_EXP_MASK; \ 936 u.S_HI_WORD |= new_sign_exp; \ 937 result = u.f; \ 938 } 939 #endif 940 941 #ifndef D_COPY_SIGN_AND_EXP 942 #undef D_COPY_SIGN_AND_EXP_IS_FAST 943 #define D_COPY_SIGN_AND_EXP(value,sign_and_exp,result) { \ 944 D_UNION u; \ 945 U_WORD new_sign_exp; \ 946 u.f = sign_and_exp; \ 947 new_sign_exp = u.D_HI_WORD & D_SIGN_EXP_MASK; \ 948 u.f = value; \ 949 u.D_HI_WORD &= ~D_SIGN_EXP_MASK; \ 950 u.D_HI_WORD |= new_sign_exp; \ 951 result = u.f; \ 952 } 953 #endif 954 955 #ifndef F_COPY_SIGN_AND_EXP 956 #undef F_COPY_SIGN_AND_EXP_IS_FAST 957 #define F_COPY_SIGN_AND_EXP(value,sign_and_exp,result) { \ 958 F_UNION u; \ 959 U_WORD new_sign_exp; \ 960 u.f = sign_and_exp; \ 961 new_sign_exp = u.F_HI_WORD & F_SIGN_EXP_MASK; \ 962 u.f = value; \ 963 u.F_HI_WORD &= ~F_SIGN_EXP_MASK; \ 964 u.F_HI_WORD |= new_sign_exp; \ 965 result = u.f; \ 966 } 967 #endif 968 969 #ifndef B_COPY_SIGN_AND_EXP 970 #undef B_COPY_SIGN_AND_EXP_IS_FAST 971 #define B_COPY_SIGN_AND_EXP(value,sign_and_exp,result) { \ 972 B_UNION u; \ 973 U_WORD new_sign_exp; \ 974 u.f = sign_and_exp; \ 975 new_sign_exp = u.B_HI_WORD & B_SIGN_EXP_MASK; \ 976 u.f = value; \ 977 u.B_HI_WORD &= ~B_SIGN_EXP_MASK; \ 978 u.B_HI_WORD |= new_sign_exp; \ 979 result = u.f; \ 980 } 981 #endif 982 983 984 985 986 987 988 989 #ifndef F_COPY_NEG_SIGN 990 991 /* F_COPY_NEG_SIGN assumes the input value is non-negative. If the 992 input value is negative, the sign of the result is undefined. If the 993 input value is non-negative and sign is negative, the result will be 994 -(value). If value is non-negative and sign is non-negative, the 995 result will = value. */ 996 997 #if F_COPY_SIGN_IS_FAST 998 999 # define F_COPY_NEG_SIGN(sign,abs_sign,value) \ 1000 ASSERT((value) >= 0.0); \ 1001 F_COPY_SIGN((value),(sign),(value)) 1002 1003 #else 1004 1005 # define F_COPY_NEG_SIGN(sign,abs_sign,value) { \ 1006 ASSERT((value) >= 0.0); \ 1007 if ((abs_sign) != (sign)) \ 1008 F_NEGATE(value); \ 1009 } 1010 1011 #endif 1012 1013 #endif 1014 1015 1016 1017 #if (F_MAX_BIN_EXP > 2 * F_PRECISION) 1018 1019 # define GOTO_CLEANUP_IF_POTENTIAL_OVERFLOW(x, t) 1020 1021 #else 1022 1023 # define GOTO_CLEANUP_IF_POTENTIAL_OVERFLOW(x, t) { \ 1024 F_TYPE abs_x; \ 1025 F_ABS(x, abs_x); \ 1026 if (abs_x >= t) \ 1027 goto cleanup; \ 1028 } 1029 1030 #endif 1031 1032 1033 #if (DPML_DEBUG) 1034 # define DPML_DEBUG_ABS(x) (((x) < 0.0) ? (-(x)) : (x)) 1035 #endif 1036 1037 1038 1039 #ifndef F_POS_RINT 1040 #undef F_POS_RINT_IS_FAST 1041 #define F_POS_RINT_PRECISION_LIMIT (F_PRECISION - 1) 1042 #define F_POS_RINT(x,y) { \ 1043 F_TYPE t = F_POW_2(F_PRECISION - 1); \ 1044 ASSERT((x) < t); \ 1045 (y) = (x) + t; \ 1046 (y) -= t; \ 1047 } 1048 #endif 1049 1050 #ifndef F_NEG_RINT 1051 #undef F_NEG_RINT_IS_FAST 1052 #define F_NEG_RINT_PRECISION_LIMIT (F_PRECISION - 1) 1053 #define F_NEG_RINT(x,y) { \ 1054 F_TYPE t = F_POW_2(F_PRECISION - 1); \ 1055 ASSERT((x) > -t); \ 1056 (y) = (x) - t; \ 1057 (y) += t; \ 1058 } 1059 #endif 1060 1061 1062 #ifndef S_RINT 1063 #define S_RINT_PRECISION_LIMIT (F_PRECISION - 1) 1064 #define S_RINT(x,y) { \ 1065 S_TYPE t = S_POW_2(S_PRECISION - 1); \ 1066 ASSERT(DPML_DEBUG_ABS(x) < t); \ 1067 S_COPY_SIGN(t, (x), t); \ 1068 (y) = (x) + t; \ 1069 (y) -= t; \ 1070 } 1071 #endif 1072 1073 #ifndef D_RINT 1074 #define D_RINT_PRECISION_LIMIT (F_PRECISION - 1) 1075 #define D_RINT(x,y) { \ 1076 D_TYPE t = D_POW_2(D_PRECISION - 1); \ 1077 ASSERT(DPML_DEBUG_ABS(x) < t); \ 1078 D_COPY_SIGN(t, (x), t); \ 1079 (y) = (x) + t; \ 1080 (y) -= t; \ 1081 } 1082 #endif 1083 1084 #ifndef F_RINT 1085 #undef F_RINT_IS_FAST 1086 #define F_RINT_PRECISION_LIMIT (F_PRECISION - 1) 1087 #define F_RINT(x,y) { \ 1088 F_TYPE t = F_POW_2(F_PRECISION - 1); \ 1089 ASSERT(DPML_DEBUG_ABS(x) < t); \ 1090 F_COPY_SIGN(t, (x), t); \ 1091 (y) = (x) + t; \ 1092 (y) -= t; \ 1093 } 1094 #endif 1095 1096 #ifndef B_RINT 1097 #undef B_RINT_IS_FAST 1098 #define B_RINT_PRECISION_LIMIT (B_PRECISION - 1) 1099 #define B_RINT(x,y) { \ 1100 B_TYPE t = B_POW_2(B_PRECISION - 1); \ 1101 ASSERT(DPML_DEBUG_ABS(x) < t); \ 1102 B_COPY_SIGN(t, (x), t); \ 1103 (y) = (x) + t; \ 1104 (y) -= t; \ 1105 } 1106 #endif 1107 1108 1109 #ifndef S_RINT_TO_FLOATING_AND_WORD 1110 #define S_RINT_TO_FLOATING_AND_WORD_PRECISION_LIMIT (S_RINT_PRECISION_LIMIT) 1111 #define S_RINT_TO_FLOATING_AND_WORD(x, flt_int_x, int_x) { \ 1112 S_RINT((x), (flt_int_x)); \ 1113 (int_x) = (WORD) (flt_int_x); \ 1114 } 1115 #endif 1116 1117 #ifndef D_RINT_TO_FLOATING_AND_WORD 1118 #define D_RINT_TO_FLOATING_AND_WORD_PRECISION_LIMIT (D_RINT_PRECISION_LIMIT) 1119 #define D_RINT_TO_FLOATING_AND_WORD(x, flt_int_x, int_x) { \ 1120 D_RINT((x), (flt_int_x)); \ 1121 (int_x) = (WORD) (flt_int_x); \ 1122 } 1123 #endif 1124 1125 #ifndef F_RINT_TO_FLOATING_AND_WORD 1126 #define F_RINT_TO_FLOATING_AND_WORD_PRECISION_LIMIT (F_RINT_PRECISION_LIMIT) 1127 #define F_RINT_TO_FLOATING_AND_WORD(x, flt_int_x, int_x) { \ 1128 F_RINT((x), (flt_int_x)); \ 1129 (int_x) = (WORD) (flt_int_x); \ 1130 } 1131 #endif 1132 1133 #ifndef B_RINT_TO_FLOATING_AND_WORD 1134 #define B_RINT_TO_FLOATING_AND_WORD_PRECISION_LIMIT (B_RINT_PRECISION_LIMIT) 1135 #define B_RINT_TO_FLOATING_AND_WORD(x, flt_int_x, int_x) { \ 1136 B_RINT((x), (flt_int_x)); \ 1137 (int_x) = (WORD) (flt_int_x); \ 1138 } 1139 #endif 1140 1141 1142 #ifndef F_POS_TRUNC 1143 #undef F_POS_TRUNC_IS_FAST 1144 #define F_POS_TRUNC_PRECISION_LIMIT (F_PRECISION - 1) 1145 #define F_POS_TRUNC(x,y) { \ 1146 F_TYPE orig_x = (x); \ 1147 F_TYPE t = F_POW_2(F_PRECISION - 1); \ 1148 ASSERT((x) < t); \ 1149 (y) = x + t; \ 1150 (y) -= t; \ 1151 if ((y) > orig_x) \ 1152 (y) -= 1.0; \ 1153 } 1154 #endif 1155 1156 #ifndef F_NEG_TRUNC 1157 #undef F_NEG_TRUNC_IS_FAST 1158 #define F_NEG_TRUNC_PRECISION_LIMIT (F_PRECISION - 1) 1159 #define F_NEG_TRUNC(x,y) { \ 1160 F_TYPE orig_x = (x); \ 1161 F_TYPE t = F_POW_2(F_PRECISION - 1); \ 1162 ASSERT((x) > -t); \ 1163 (y) = x - t; \ 1164 (y) += t; \ 1165 if ((y) < orig_x) \ 1166 (y) += 1.0; \ 1167 } 1168 #endif 1169 1170 #ifndef F_TRUNC 1171 #undef F_TRUNC_IS_FAST 1172 #define F_TRUNC_PRECISION_LIMIT (F_PRECISION - 1) 1173 #define F_TRUNC(x,y) { \ 1174 F_TYPE orig_x = (x); \ 1175 F_TYPE abs_x, t = F_POW_2(F_PRECISION - 1); \ 1176 F_ABS(orig_x, abs_x); \ 1177 ASSERT(abs_x < t); \ 1178 (y) = abs_x + t; \ 1179 (y) -= t; \ 1180 if ((y) > abs_x) \ 1181 (y) -= 1.0; \ 1182 if (abs_x != orig_x) \ 1183 F_NEGATE(y); \ 1184 } 1185 #endif 1186 1187 1188 #ifndef F_CVT_TO_WORD_CHOPPED 1189 #undef F_CVT_TO_WORD_CHOPPED_IS_FAST 1190 #define F_CVT_TO_WORD_CHOPPED_PRECISION_LIMIT (BITS_PER_WORD - 1) 1191 #define F_CVT_TO_WORD_CHOPPED(x,i) (i) = (WORD)(x) 1192 #endif 1193 1194 #ifndef F_CVT_TO_WORD_ROUNDED 1195 #undef F_CVT_TO_WORD_ROUNDED_IS_FAST 1196 #define F_CVT_TO_WORD_ROUNDED_PRECISION_LIMIT (F_PRECISION - 1) 1197 #define F_CVT_TO_WORD_ROUNDED(x,i) { \ 1198 U_WORD status_word; \ 1199 F_TYPE y, t; \ 1200 t = F_POW_2(F_PRECISION - 1); \ 1201 ASSERT(DPML_DEBUG_ABS(x) < t); \ 1202 F_COPY_SIGN(t, (x), t); \ 1203 INIT_FPU_STATE_AND_ROUND_TO_NEAREST(status_word); \ 1204 y = (x) + t; \ 1205 RESTORE_FPU_STATE(status_word); \ 1206 y -= t; \ 1207 (i) = (WORD)y; \ 1208 } 1209 #endif 1210 1211 #ifndef F_CVT_TO_WORD_ROUNDED_UP 1212 #undef F_CVT_TO_WORD_ROUNDED_UP_IS_FAST 1213 #define F_CVT_TO_WORD_ROUNDED_UP_PRECISION_LIMIT (F_PRECISION - 1) 1214 #define F_CVT_TO_WORD_ROUNDED_UP(x,i) { \ 1215 F_TYPE y, t; \ 1216 t = F_POW_2(F_PRECISION - 1); \ 1217 ASSERT(DPML_DEBUG_ABS(x) < t); \ 1218 F_COPY_SIGN(t, (x), t); \ 1219 y = (x) + t; \ 1220 y -= t; \ 1221 if (y < x) \ 1222 y += 1.0; \ 1223 (i) = (WORD)y; \ 1224 } 1225 #endif 1226 1227 #ifndef F_CVT_TO_WORD_ROUNDED_DOWN 1228 #undef F_CVT_TO_WORD_ROUNDED_DOWN_IS_FAST 1229 #define F_CVT_TO_WORD_ROUNDED_DOWN_PRECISION_LIMIT (F_PRECISION - 1) 1230 #define F_CVT_TO_WORD_ROUNDED_DOWN(x,i) { \ 1231 F_TYPE y, t; \ 1232 t = F_POW_2(F_PRECISION - 1); \ 1233 ASSERT(DPML_DEBUG_ABS(x) < t); \ 1234 F_COPY_SIGN(t, (x), t); \ 1235 y = (x) + t; \ 1236 y -= t; \ 1237 if (y > x) \ 1238 y -= 1.0; \ 1239 (i) = (WORD)y; \ 1240 } 1241 #endif 1242 1243 1244 1245 #if 0 1246 1247 These do not yet have generic definitions: 1248 1249 #define ARITH_SHIFT_WORD_RIGHT(i,j) 1250 #define F_ADD_CHOPPED 1251 #define F_ADD_ROUNDED_UP 1252 #define F_ADD_ROUNDED_DOWN 1253 #define F_MUL_CHOPPED 1254 #define F_MUL_ROUNDED_UP 1255 #define F_MUL_ROUNDED_DOWN 1256 1257 #endif 1258 1259 1260 1261 #ifndef EXT_MUL 1262 #define EXT_MUL(i,j,lo,hi) { \ 1263 WORD I = (i); \ 1264 WORD J = (j); \ 1265 U_WORD sign, i_neg, j_neg; \ 1266 i_neg = (I < 0); \ 1267 sign = i_neg; \ 1268 if (i_neg) { I = ~((U_WORD)(I)) + 1; i_neg = (I < 0); } \ 1269 j_neg = (J < 0); \ 1270 if (j_neg) { sign ^= 1; J = ~((U_WORD)J) + 1; j_neg = (J < 0); } \ 1271 if (i_neg | j_neg) { \ 1272 if (i_neg) { \ 1273 (lo) = (U_WORD)J << (BITS_PER_WORD - 1); \ 1274 (hi) = (U_WORD)J >> 1; \ 1275 } else { \ 1276 (lo) = (U_WORD)I << (BITS_PER_WORD - 1); \ 1277 (hi) = (U_WORD)I >> 1; \ 1278 } \ 1279 } else { \ 1280 EXT_UMUL(I,J,(lo),(hi)); \ 1281 } \ 1282 if (sign) { \ 1283 (lo) = ~((U_WORD)(lo)) + 1; \ 1284 (hi) = ~((U_WORD)(hi)); \ 1285 if (!lo) (hi) += 1; \ 1286 } \ 1287 } 1288 #endif 1289 1290 1291 1292 #ifndef EXT_MULH 1293 #define EXT_MULH(i,j,hi) { \ 1294 WORD lo; \ 1295 EXT_MUL((i),(j),(lo),(hi)); \ 1296 } 1297 #endif 1298 1299 1300 1301 #ifndef EXT_MUL1 1302 #define EXT_MUL1(i,u1,u2) EXT_MUL((i),(u1),(u1),(u2)) 1303 #endif 1304 1305 1306 1307 #ifndef EXT_UMUL 1308 #define EXT_UMUL(i,j,lo,hi) { \ 1309 U_WORD i1, i2, j1, j2, p1, p2; \ 1310 i2 = (U_WORD)(i) >> (BITS_PER_WORD / 2); \ 1311 j2 = (U_WORD)(j) >> (BITS_PER_WORD / 2); \ 1312 p2 = i2 * j2; \ 1313 i1 = (U_WORD)((i) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1314 p1 = i1 * j2; \ 1315 j1 = (U_WORD)((j) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1316 (lo) = i1 * j1; \ 1317 (hi) = p2; \ 1318 (hi) += (U_WORD)(p1 >> (BITS_PER_WORD / 2)); \ 1319 ADD_AND_CARRY((p1 << (BITS_PER_WORD / 2)), (lo), (hi)); \ 1320 p2 = i2 * j1; \ 1321 (hi) += (U_WORD)(p2 >> (BITS_PER_WORD / 2)); \ 1322 ADD_AND_CARRY((p2 << (BITS_PER_WORD / 2)), (lo), (hi)); \ 1323 } 1324 #endif 1325 1326 1327 1328 #ifndef EXT_UMULH 1329 #define EXT_UMULH(i,j,hi) { \ 1330 U_WORD lo; \ 1331 EXT_UMUL((i),(j),(lo),(hi)); \ 1332 } 1333 #endif 1334 1335 1336 1337 #ifndef EXT_UMUL1 1338 #define EXT_UMUL1(i,u1,u2) EXT_UMUL((i),(u1),(u1),(u2)) 1339 #endif 1340 1341 1342 1343 #ifndef EXT_UMUL2 1344 #define EXT_UMUL2(i,u1,u2,u3) { \ 1345 U_WORD c1, c2, i1, i2, j1, j2, j3, j4, p1, p2, p3; \ 1346 i2 = (U_WORD)(i) >> (BITS_PER_WORD / 2); \ 1347 j2 = (U_WORD)(u1) >> (BITS_PER_WORD / 2); \ 1348 p2 = i2 * j2; \ 1349 i1 = (U_WORD)((i) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1350 j1 = (U_WORD)((u1) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1351 j3 = (U_WORD)((u2) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1352 j4 = (U_WORD)(u2) >> (BITS_PER_WORD / 2); \ 1353 u2 = i1 * j3; \ 1354 u3 = i2 * j4; \ 1355 u1 = i1 * j1; \ 1356 ADD_AND_CARRY(p2, u2, u3); \ 1357 p1 = i1 * j2; \ 1358 p2 = i2 * j1; \ 1359 p1 += p2; \ 1360 c1 = (p1 < p2); \ 1361 p2 = i1 * j4; \ 1362 p3 = i2 * j3; \ 1363 p2 += p3; \ 1364 c2 = (p2 < p3); \ 1365 p2 += c1; \ 1366 c1 = (p2 < c1); \ 1367 c1 += c2; \ 1368 u3 += (c1 << (BITS_PER_WORD / 2)); \ 1369 ADD_AND_CARRY_2( (p1 << (BITS_PER_WORD / 2)), u1, u2, u3); \ 1370 ADD_AND_CARRY( (p1 >> (BITS_PER_WORD / 2)), u2, u3); \ 1371 ADD_AND_CARRY( (p2 << (BITS_PER_WORD / 2)), u2, u3); \ 1372 u3 += (p2 >> (BITS_PER_WORD / 2)); \ 1373 } 1374 #endif 1375 1376 1377 1378 #ifndef EXT_UMUL3 1379 #define EXT_UMUL3(i,u1,u2,u3,u4) { \ 1380 U_WORD c1, c2, c3, i1, i2, j1, j2, j3, j4, j5, j6, p1, p2, p3, p4; \ 1381 i2 = (U_WORD)(i) >> (BITS_PER_WORD / 2); \ 1382 j2 = (U_WORD)(u1) >> (BITS_PER_WORD / 2); \ 1383 p2 = i2 * j2; \ 1384 i1 = (U_WORD)((i) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1385 j1 = (U_WORD)((u1) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1386 j3 = (U_WORD)((u2) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1387 p1 = i1 * j3; \ 1388 j4 = (U_WORD)(u2) >> (BITS_PER_WORD / 2); \ 1389 j5 = (U_WORD)((u3) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1390 j6 = (U_WORD)(u3) >> (BITS_PER_WORD / 2); \ 1391 u1 = i1 * j1; \ 1392 u2 = p1; \ 1393 u3 = i2 * j4; \ 1394 p1 = i1 * j5; \ 1395 u4 = i2 * j6; \ 1396 ADD_AND_CARRY_2(p2, u2, u3, u4); \ 1397 ADD_AND_CARRY(p1, u3, u4); \ 1398 p1 = i1 * j2; \ 1399 p2 = i2 * j1; \ 1400 p1 += p2; \ 1401 c1 = (p1 < p2); \ 1402 p2 = i1 * j4; \ 1403 p3 = i2 * j3; \ 1404 p2 += p3; \ 1405 c2 = (p2 < p3); \ 1406 p3 = i1 * j6; \ 1407 p4 = i2 * j5; \ 1408 p3 += p4; \ 1409 c3 = (p3 < p4); \ 1410 p2 += c1; \ 1411 c1 = (p2 < c1); \ 1412 c2 += c1; \ 1413 p3 += c2; \ 1414 c2 = (p3 < c2); \ 1415 c3 += c2; \ 1416 u4 += (c3 << (BITS_PER_WORD / 2)); \ 1417 ADD_AND_CARRY_3( (p1 << (BITS_PER_WORD / 2)), u1, u2, u3, u4); \ 1418 ADD_AND_CARRY_2( (p1 >> (BITS_PER_WORD / 2)), u2, u3, u4); \ 1419 ADD_AND_CARRY_2( (p2 << (BITS_PER_WORD / 2)), u2, u3, u4); \ 1420 ADD_AND_CARRY( (p2 >> (BITS_PER_WORD / 2)), u3, u4); \ 1421 ADD_AND_CARRY( (p3 << (BITS_PER_WORD / 2)), u3, u4); \ 1422 u4 += (p3 >> (BITS_PER_WORD / 2)); \ 1423 } 1424 #endif 1425 1426 1427 1428 #if (BITS_PER_WORD == 32) && !defined(UMUL32_64_BY_64_GIVING_96) 1429 #define UMUL32_64_BY_64_GIVING_96(x0,x1,y0,y1,z1,z2,z3) { \ 1430 U_WORD z0, c1, c2, c3, i1, i2, i3, i4, j1, j2, j3, j4, p1, p2, p3, p4; \ 1431 i2 = (U_WORD)(x0) >> (BITS_PER_WORD / 2); \ 1432 j2 = (U_WORD)(y0) >> (BITS_PER_WORD / 2); \ 1433 p2 = i2 * j2; \ 1434 i4 = (U_WORD)(x1) >> (BITS_PER_WORD / 2); \ 1435 j4 = (U_WORD)(y1) >> (BITS_PER_WORD / 2); \ 1436 i1 = (U_WORD)((x0) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1437 j1 = (U_WORD)((y0) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1438 p4 = i4 * j4; \ 1439 i3 = (U_WORD)((x1) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1440 j3 = (U_WORD)((y1) << (BITS_PER_WORD / 2)) >> (BITS_PER_WORD / 2); \ 1441 z0 = i1 * j1; \ 1442 z0 >> (BITS_PER_WORD / 2); \ 1443 p1 = i1 * j2; \ 1444 p1 += z0; \ 1445 p1 >> (BITS_PER_WORD / 2); \ 1446 z1 = i1 * j3; \ 1447 z1 += p1; \ 1448 p1 = i2 * j1; \ 1449 p1 >> (BITS_PER_WORD / 2); \ 1450 p2 += p1; \ 1451 z1 += p2; \ 1452 c1 = (z1 < p2); \ 1453 p1 = i3 * j1; \ 1454 z1 += p1; \ 1455 c1 += (z1 < p1); \ 1456 z2 = i2 * j4; \ 1457 p1 = i3 * j3; \ 1458 z2 += p1; \ 1459 c2 = (z2 < p1); \ 1460 p1 = i4 * j2; \ 1461 z2 += p1; \ 1462 c2 += (z2 < p1); \ 1463 z2 += c1; \ 1464 c2 += (z2 < c1); \ 1465 z3 = p4 + c2; \ 1466 p2 = i1 * j4; \ 1467 p1 = i2 * j3; \ 1468 p2 += p1; \ 1469 c2 = (p2 < p1); \ 1470 p1 = i3 * j2; \ 1471 p2 += p1; \ 1472 c2 += (p2 < p1); \ 1473 p1 = i4 * j1; \ 1474 p2 += p1; \ 1475 c2 += (p2 < p1); \ 1476 p3 = i3 * j4; \ 1477 p1 = i4 * j3; \ 1478 p3 += p1; \ 1479 c3 = (p3 < p1); \ 1480 p3 += c2; \ 1481 c3 += (p3 < c2); \ 1482 z3 += (c3 << (BITS_PER_WORD / 2)); \ 1483 z3 += (p3 >> (BITS_PER_WORD / 2)); \ 1484 ADD_AND_CARRY( (p3 << (BITS_PER_WORD / 2)), z2, z3); \ 1485 ADD_AND_CARRY( (p2 >> (BITS_PER_WORD / 2)), z2, z3); \ 1486 ADD_AND_CARRY_2( (p2 << (BITS_PER_WORD / 2)), z1, z2, z3); \ 1487 } 1488 #endif 1489 1490 1491 1492 #ifndef ADD_AND_CARRY 1493 #define ADD_AND_CARRY(i,u1,u2) { \ 1494 U_WORD carry; \ 1495 (u1) += (i); \ 1496 carry = ((u1) < (i)); \ 1497 (u2) += carry; \ 1498 } 1499 #endif 1500 1501 1502 #ifndef ADD_AND_CARRY_2 1503 #define ADD_AND_CARRY_2(i,u1,u2,u3) { \ 1504 U_WORD carry; \ 1505 (u1) += (i); \ 1506 carry = ((u1) < (i)); \ 1507 (u2) += carry; \ 1508 carry = ((u2) < carry); \ 1509 (u3) += carry; \ 1510 } 1511 #endif 1512 1513 1514 #ifndef ADD_AND_CARRY_3 1515 #define ADD_AND_CARRY_3(i,u1,u2,u3,u4) { \ 1516 U_WORD carry; \ 1517 (u1) += (i); \ 1518 carry = ((u1) < (i)); \ 1519 (u2) += carry; \ 1520 carry = ((u2) < carry); \ 1521 (u3) += carry; \ 1522 carry = ((u3) < carry); \ 1523 (u4) += carry; \ 1524 } 1525 #endif 1526 1527 1528 1529 #ifndef U_MUL_BY_10 1530 #define U_MUL_BY_10(i) { \ 1531 (i) = (U_WORD)(i) + ((U_WORD)(i) << 2); \ 1532 (i) = (U_WORD)(i) << 1; \ 1533 } 1534 #endif 1535 1536 #ifndef LEFT_NORMALIZE_WORD 1537 #define LEFT_NORMALIZE_WORD(i,j) { \ 1538 (j) = 0; \ 1539 while ((WORD)(i) > 0) { \ 1540 (i) <<= 1; \ 1541 (j) += 1; \ 1542 } \ 1543 } 1544 #endif 1545 1546 #ifndef SHIFT_WORD_LEFT 1547 #define SHIFT_WORD_LEFT(shift, u) { \ 1548 (u) <<= (shift); \ 1549 } 1550 #endif 1551 1552 #ifndef SHIFT_2_WORDS_LEFT 1553 #define SHIFT_2_WORDS_LEFT(shift, u1, u2) { \ 1554 ASSERT((shift) != 0); \ 1555 (u1) <<= (shift); \ 1556 (u1) |= ((u2) >> (BITS_PER_WORD - (shift))); \ 1557 (u2) <<= (shift); \ 1558 } 1559 #endif 1560 1561 #ifndef SHIFT_3_WORDS_LEFT 1562 #define SHIFT_3_WORDS_LEFT(shift, u1, u2, u3) { \ 1563 ASSERT((shift) != 0); \ 1564 (u1) <<= (shift); \ 1565 (u1) |= ((u2) >> (BITS_PER_WORD - (shift))); \ 1566 (u2) <<= (shift); \ 1567 (u2) |= ((u3) >> (BITS_PER_WORD - (shift))); \ 1568 (u3) <<= (shift); \ 1569 } 1570 #endif 1571 1572 #ifndef SHIFT_4_WORDS_LEFT 1573 #define SHIFT_4_WORDS_LEFT(shift, u1, u2, u3, u4) { \ 1574 ASSERT((shift) != 0); \ 1575 (u1) <<= (shift); \ 1576 (u1) |= ((u2) >> (BITS_PER_WORD - (shift))); \ 1577 (u2) <<= (shift); \ 1578 (u2) |= ((u3) >> (BITS_PER_WORD - (shift))); \ 1579 (u3) <<= (shift); \ 1580 (u3) |= ((u4) >> (BITS_PER_WORD - (shift))); \ 1581 (u4) <<= (shift); \ 1582 } 1583 #endif 1584 1585 #ifndef SHIFT_WORD_RIGHT 1586 #define SHIFT_WORD_RIGHT(shift, u) { \ 1587 (u) >>= (shift); \ 1588 } 1589 #endif 1590 1591 #ifndef SHIFT_2_WORDS_RIGHT 1592 #define SHIFT_2_WORDS_RIGHT(shift, u1, u2) { \ 1593 ASSERT((shift) != 0); \ 1594 (u1) >>= (shift); \ 1595 (u1) |= ((u2) << (BITS_PER_WORD - (shift))); \ 1596 (u2) >>= (shift); \ 1597 } 1598 #endif 1599 1600 #ifndef SHIFT_3_WORDS_RIGHT 1601 #define SHIFT_3_WORDS_RIGHT(shift, u1, u2, u3) { \ 1602 ASSERT((shift) != 0); \ 1603 (u1) >>= (shift); \ 1604 (u1) |= ((u2) << (BITS_PER_WORD - (shift))); \ 1605 (u2) >>= (shift); \ 1606 (u2) |= ((u3) << (BITS_PER_WORD - (shift))); \ 1607 (u3) >>= (shift); \ 1608 } 1609 #endif 1610 1611 #ifndef SHIFT_4_WORDS_RIGHT 1612 #define SHIFT_4_WORDS_RIGHT(shift, u1, u2, u3, u4) { \ 1613 ASSERT((shift) != 0); \ 1614 (u1) >>= (shift); \ 1615 (u1) |= ((u2) << (BITS_PER_WORD - (shift))); \ 1616 (u2) >>= (shift); \ 1617 (u2) |= ((u3) << (BITS_PER_WORD - (shift))); \ 1618 (u3) >>= (shift); \ 1619 (u3) |= ((u4) << (BITS_PER_WORD - (shift))); \ 1620 (u4) >>= (shift); \ 1621 } 1622 #endif 1623 1624 1625 1626 1627 #ifndef D_GET_EXP_WORD 1628 #define D_GET_EXP_WORD(x,exp_word) { \ 1629 D_UNION u; \ 1630 u.f = (x); \ 1631 (exp_word) = u.D_HI_WORD; \ 1632 } 1633 #endif 1634 1635 #ifndef GET_EXP_WORD 1636 #define GET_EXP_WORD(x,exp_word) { \ 1637 F_UNION u; \ 1638 u.f = (x); \ 1639 (exp_word) = u.F_HI_WORD; \ 1640 } 1641 #endif 1642 1643 1644 #ifndef D_PUT_EXP_WORD 1645 #define D_PUT_EXP_WORD(x,exp_word) { \ 1646 D_UNION u; \ 1647 u.f = (x); \ 1648 u.D_HI_WORD = (exp_word); \ 1649 (x) = u.f; \ 1650 } 1651 #endif 1652 1653 #ifndef PUT_EXP_WORD 1654 #define PUT_EXP_WORD(x,exp_word) { \ 1655 F_UNION u; \ 1656 u.f = (x); \ 1657 u.F_HI_WORD = (exp_word); \ 1658 (x) = u.f; \ 1659 } 1660 #endif 1661 1662 1663 #ifndef GET_SIGN_WORD 1664 #define GET_SIGN_WORD(x,sign_word) { \ 1665 F_UNION u; \ 1666 u.f = (x); \ 1667 (sign_word) = u.F_HI_WORD; \ 1668 } 1669 #endif 1670 1671 #ifndef PUT_SIGN_WORD 1672 #define PUT_SIGN_WORD(x,sign_word) { \ 1673 F_UNION u; \ 1674 u.f = (x); \ 1675 u.F_HI_WORD = (sign_word); \ 1676 (x) = u.f; \ 1677 } 1678 #endif 1679 1680 #ifndef GET_HI_FRAC_WORD 1681 #define GET_HI_FRAC_WORD(x,hi_frac_word) { \ 1682 F_UNION u; \ 1683 u.f = (x); \ 1684 (hi_frac_word) = u.F_HI_WORD; \ 1685 } 1686 #endif 1687 1688 #ifndef PUT_HI_FRAC_WORD 1689 #define PUT_HI_FRAC_WORD(x,hi_frac_word) { \ 1690 F_UNION u; \ 1691 u.f = (x); \ 1692 u.F_HI_WORD = (hi_frac_word); \ 1693 (x) = u.f; \ 1694 } 1695 #endif 1696 1697 #ifndef GET_LO_FRAC_WORD 1698 #define GET_LO_FRAC_WORD(x,lo_frac_word) { \ 1699 F_UNION u; \ 1700 u.f = (x); \ 1701 (lo_frac_word) = u.F_LO_WORD; \ 1702 } 1703 #endif 1704 1705 #ifndef PUT_LO_FRAC_WORD 1706 #define PUT_LO_FRAC_WORD(x,lo_frac_word) { \ 1707 F_UNION u; \ 1708 u.f = (x); \ 1709 u.F_LO_WORD = (lo_frac_word); \ 1710 (x) = u.f; \ 1711 } 1712 #endif 1713 1714 1715 #ifndef GET_EXP_BITS 1716 #define GET_EXP_BITS(x,mask,exp_bits) { \ 1717 GET_EXP_WORD((x),(exp_bits)); \ 1718 (exp_bits) &= (mask); \ 1719 } 1720 #endif 1721 1722 #ifndef PUT_EXP_BITS 1723 #define PUT_EXP_BITS(x,mask,exp_bits) { \ 1724 F_UNION u; \ 1725 u.f = (x); \ 1726 u.F_HI_WORD &= ~(mask); \ 1727 u.F_HI_WORD |= (exp_bits); \ 1728 (x) = u.f; \ 1729 } 1730 #endif 1731 1732 #ifndef D_PUT_EXP_BITS 1733 #define D_PUT_EXP_BITS(x,mask,exp_bits) { \ 1734 D_UNION u; \ 1735 u.f = (x); \ 1736 u.D_HI_WORD &= ~(mask); \ 1737 u.D_HI_WORD |= (exp_bits); \ 1738 (x) = u.f; \ 1739 } 1740 #endif 1741 1742 #ifndef GET_EXP_FIELD 1743 #define GET_EXP_FIELD(x,exp_field) { \ 1744 GET_EXP_BITS((x),F_EXP_MASK,(exp_field)); \ 1745 } 1746 #endif 1747 1748 #ifndef F_GET_EXP_FIELD 1749 #define F_GET_EXP_FIELD(x, exp_word) { \ 1750 F_UNION u; \ 1751 u.f = (x); \ 1752 (exp_word) = u.F_HI_WORD; \ 1753 (exp_word) &= F_EXP_MASK; \ 1754 } 1755 #endif 1756 1757 #ifndef B_GET_EXP_FIELD 1758 #define B_GET_EXP_FIELD(x, exp_word) { \ 1759 B_UNION u; \ 1760 u.f = (x); \ 1761 (exp_word) = u.B_HI_WORD; \ 1762 (exp_word) &= B_EXP_MASK; \ 1763 } 1764 #endif 1765 1766 #ifndef S_GET_EXP_FIELD 1767 #define S_GET_EXP_FIELD(x, exp_word) { \ 1768 S_UNION u; \ 1769 u.f = (x); \ 1770 (exp_word) = u.S_HI_WORD; \ 1771 (exp_word) &= S_EXP_MASK; \ 1772 } 1773 #endif 1774 1775 #ifndef D_GET_EXP_FIELD 1776 #define D_GET_EXP_FIELD(x, exp_word) { \ 1777 D_UNION u; \ 1778 u.f = (x); \ 1779 (exp_word) = u.D_HI_WORD; \ 1780 (exp_word) &= D_EXP_MASK; \ 1781 } 1782 #endif 1783 1784 1785 #ifndef PUT_EXP_FIELD 1786 #define PUT_EXP_FIELD(x,exp_field) { \ 1787 PUT_EXP_BITS((x),F_EXP_MASK,(exp_field)); \ 1788 } 1789 #endif 1790 1791 1792 #ifndef ALIGN_W_EXP_FIELD 1793 #define ALIGN_W_EXP_FIELD(w) ((U_WORD)(w) << F_EXP_POS) 1794 #endif 1795 1796 #ifndef D_ALIGN_W_EXP_FIELD 1797 #define D_ALIGN_W_EXP_FIELD(w) ((U_WORD)(w) << D_EXP_POS) 1798 #endif 1799 1800 #ifndef B_ALIGN_W_EXP_FIELD 1801 #define B_ALIGN_W_EXP_FIELD(w) ((U_WORD)(w) << B_EXP_POS) 1802 #endif 1803 1804 1805 #ifndef ALIGN_EXP_FIELD_W_WORD 1806 #define ALIGN_EXP_FIELD_W_WORD(w) (((U_WORD)(w)) >> F_EXP_POS) 1807 #endif 1808 1809 #ifndef D_ALIGN_EXP_FIELD_W_WORD 1810 #define D_ALIGN_EXP_FIELD_W_WORD(w) (((U_WORD)(w)) >> D_EXP_POS) 1811 #endif 1812 1813 #ifndef B_ALIGN_EXP_FIELD_W_WORD 1814 #define B_ALIGN_EXP_FIELD_W_WORD(w) (((U_WORD)(w)) >> B_EXP_POS) 1815 #endif 1816 1817 1818 #ifndef GET_SIGN_EXP_FIELD 1819 #define GET_SIGN_EXP_FIELD(x,sign_exp_field) { \ 1820 GET_EXP_BITS((x),F_SIGN_EXP_MASK,(sign_exp_field)); \ 1821 } 1822 #endif 1823 1824 #ifndef PUT_SIGN_EXP_FIELD 1825 #define PUT_SIGN_EXP_FIELD(x,sign_exp_field) { \ 1826 PUT_EXP_BITS((x),F_SIGN_EXP_MASK,(sign_exp_field)); \ 1827 } 1828 #endif 1829 1830 #ifndef D_PUT_SIGN_EXP_FIELD 1831 #define D_PUT_SIGN_EXP_FIELD(x,sign_exp_field) { \ 1832 D_PUT_EXP_BITS((x),D_SIGN_EXP_MASK,(sign_exp_field)); \ 1833 } 1834 #endif 1835 1836 #ifndef ADD_TO_EXP_WORD 1837 #define ADD_TO_EXP_WORD(x,increment) { \ 1838 F_UNION u; \ 1839 u.f = (x); \ 1840 u.F_HI_WORD += (increment); \ 1841 (x) = u.f; \ 1842 } 1843 #endif 1844 #ifndef B_ADD_TO_EXP_WORD 1845 #define B_ADD_TO_EXP_WORD(x,increment) { \ 1846 B_UNION u; \ 1847 u.f = (x); \ 1848 u.B_HI_WORD += (increment); \ 1849 (x) = u.f; \ 1850 } 1851 #endif 1852 1853 #ifndef ADD_TO_EXP_FIELD 1854 #define ADD_TO_EXP_FIELD(x,increment) { \ 1855 ADD_TO_EXP_WORD((x),((U_WORD)(increment) << F_EXP_POS)); \ 1856 } 1857 #endif 1858 #ifndef B_ADD_TO_EXP_FIELD 1859 #define B_ADD_TO_EXP_FIELD(x,increment) { \ 1860 B_ADD_TO_EXP_WORD((x),((U_WORD)(increment) << B_EXP_POS)); \ 1861 } 1862 #endif 1863 1864 #ifndef SUB_FROM_EXP_WORD 1865 #define SUB_FROM_EXP_WORD(x,decrement) { \ 1866 F_UNION u; \ 1867 u.f = (x); \ 1868 u.F_HI_WORD -= (decrement); \ 1869 (x) = u.f; \ 1870 } 1871 #endif 1872 1873 #ifndef SUB_FROM_EXP_FIELD 1874 #define SUB_FROM_EXP_FIELD(x,decrement) { \ 1875 SUB_FROM_EXP_WORD((x),((U_WORD)(decrement) << F_EXP_POS)); \ 1876 } 1877 #endif 1878 1879 #ifndef SCALE_EXPONENT_BY_INT 1880 #define SCALE_EXPONENT_BY_INT(x,increment) { \ 1881 ADD_TO_EXP_FIELD((x),(increment)); \ 1882 } 1883 #endif 1884 #ifndef B_SCALE_EXPONENT_BY_INT 1885 #define B_SCALE_EXPONENT_BY_INT(x,increment) { \ 1886 B_ADD_TO_EXP_FIELD((x),(increment)); \ 1887 } 1888 #endif 1889 1890 #ifndef SCALE_EXPONENT_BY_FLT 1891 #define SCALE_EXPONENT_BY_FLT(x,increment) { \ 1892 (x) *= F_POW_2(increment); \ 1893 } 1894 #endif 1895 #ifndef B_SCALE_EXPONENT_BY_FLT 1896 #define B_SCALE_EXPONENT_BY_FLT(x,increment) { \ 1897 (x) *= B_POW_2(increment); \ 1898 } 1899 #endif 1900 1901 1902 #if (SCALE_METHOD == by_int) 1903 1904 #ifndef SCALE_EXPONENT 1905 #define SCALE_EXPONENT(x,increment) SCALE_EXPONENT_BY_INT((x),(increment)) 1906 #endif 1907 #ifndef B_SCALE_EXPONENT 1908 # define B_SCALE_EXPONENT(x,increment) B_SCALE_EXPONENT_BY_INT((x),(increment)) 1909 #endif 1910 1911 #else /* scale by float */ 1912 1913 #ifndef SCALE_EXPONENT 1914 #define SCALE_EXPONENT(x,increment) SCALE_EXPONENT_BY_FLT((x),(increment)) 1915 #endif 1916 #ifndef B_SCALE_EXPONENT 1917 # define B_SCALE_EXPONENT(x,increment) B_SCALE_EXPONENT_BY_FLT((x),(increment)) 1918 #endif 1919 1920 #endif /* SCALE_METHOD */ 1921 1922 1923 #ifndef CVT_TO_HI_LO_BY_FLT 1924 #define CVT_TO_HI_LO_BY_FLT(x,big,y) { \ 1925 F_TYPE t = (big); \ 1926 F_COPY_SIGN(t, (x), t); \ 1927 HI(y) = (x) + t; \ 1928 HI(y) -= t; \ 1929 LO(y) = (x) - HI(y); \ 1930 } 1931 #endif 1932 1933 1934 #ifndef CVT_TO_HI_LO_BY_FLT_SIGNED 1935 #define CVT_TO_HI_LO_BY_FLT_SIGNED(x,big,y) { \ 1936 HI(y) = (x) + (big); \ 1937 HI(y) -= (big); \ 1938 LO(y) = (x) - HI(y); \ 1939 } 1940 #endif 1941 1942 1943 #ifndef CVT_TO_HI_LO_BY_INT 1944 #define CVT_TO_HI_LO_BY_INT(x,n,y) { \ 1945 F_UNION u; \ 1946 u.f = (x); \ 1947 u.F_LO_WORD &= ~(PDP_SHUFFLE(MAKE_MASK((n), 0))); \ 1948 HI(y) = u.f; \ 1949 LO(y) = (x) - HI(y); \ 1950 } 1951 #endif 1952 1953 1954 #ifndef SPLIT_TO_HI_LO_BY_INT 1955 #if ((F_PRECISION / 2) <= BITS_PER_WORD) 1956 #define SPLIT_TO_HI_LO_BY_INT(x,y) { \ 1957 F_UNION u; \ 1958 u.f = (x); \ 1959 u.F_LO_WORD &= ~(PDP_SHUFFLE(MAKE_MASK((F_PRECISION / 2), 0))); \ 1960 HI(y) = u.f; \ 1961 LO(y) = (x) - HI(y); \ 1962 } 1963 #else 1964 #define SPLIT_TO_HI_LO_BY_INT(x,y) { \ 1965 F_UNION u; \ 1966 u.f = (x); \ 1967 u.F_LO3_WORD = 0; \ 1968 u.F_LO2_WORD &= ~(PDP_SHUFFLE(MAKE_MASK(((F_PRECISION / 2) - BITS_PER_WORD), 0))); \ 1969 HI(y) = u.f; \ 1970 LO(y) = (x) - HI(y); \ 1971 } 1972 #endif 1973 #endif 1974 1975 1976 #if PRECISION_BACKUP_AVAILABLE 1977 1978 #ifndef EXTENDED_MUL_SUB 1979 #define EXTENDED_MUL_SUB(a,b,c,y) { \ 1980 y = (B_TYPE)(a) - ( (B_TYPE)(b) * (B_TYPE)(c) ); \ 1981 } 1982 #endif 1983 1984 #ifndef QUICK_EXTENDED_MUL_SUB 1985 #define QUICK_EXTENDED_MUL_SUB(a,b,c,y) { \ 1986 y = (B_TYPE)(a) - ( (B_TYPE)(b) * (B_TYPE)(c) ); \ 1987 } 1988 #endif 1989 1990 #else /* no PRECISION_BACKUP_AVAILABLE */ 1991 1992 #ifndef EXTENDED_MUL_SUB 1993 #define EXTENDED_MUL_SUB(a,b,c,y) { \ 1994 y = ((((a \ 1995 - HI(b) * HI(c)) \ 1996 - HI(b) * LO(c)) \ 1997 - LO(b) * HI(c)) \ 1998 - LO(b) * LO(c)); \ 1999 } 2000 #endif 2001 2002 #ifndef QUICK_EXTENDED_MUL_SUB 2003 #define QUICK_EXTENDED_MUL_SUB(a,b,c,y) { \ 2004 y = ((a \ 2005 - b * HI(c)) \ 2006 - b * LO(c)); \ 2007 } 2008 #endif 2009 2010 #endif /* PRECISION_BACKUP_AVAILABLE */ 2011 2012 2013 #if (QUAD_PRECISION) && !(defined(merced) && !defined(VMS)) 2014 # define C_C_PROTO(n) C_p_PROTO(n) 2015 # define COMPLEX_QUAD_DECL(n) F_COMPLEX n 2016 # define COMPLEX_ARGS_INIT(x) F_TYPE PASTE(r,x)=x->r, PASTE(i,x)=x->i 2017 # define COMPLEX_ARGS(x) F_COMPLEX *x 2018 # define PASS_CMPLX(a,b,p) ( p.r = a, p.i = b, (&p)) 2019 # define COMPLEX_PROTOTYPE F_COMPLEX * 2020 # define COMPLEX_B_PROTOTYPE B_COMPLEX * 2021 #elif defined(merced) && !defined(VMS) 2022 # define C_C_PROTO(n) C_s_PROTO(n) 2023 # define COMPLEX_QUAD_DECL(n) F_COMPLEX n 2024 # define COMPLEX_ARGS_INIT(x) F_TYPE PASTE(r,x)=x.r, PASTE(i,x)=x.i 2025 # define COMPLEX_ARGS(x) F_COMPLEX x 2026 # define PASS_CMPLX(a,b,p) (p.r = a, p.i = b, p) 2027 # define COMPLEX_PROTOTYPE F_COMPLEX 2028 # define COMPLEX_B_PROTOTYPE B_COMPLEX 2029 #else 2030 # define C_C_PROTO(n) C_FF_PROTO(n) 2031 # define COMPLEX_QUAD_DECL(n) 2032 # define COMPLEX_ARGS_INIT(x) 2033 # define COMPLEX_ARGS(x) F_TYPE PASTE(r,x), F_TYPE PASTE(i,x) 2034 # define PASS_CMPLX(a,b,p) (F_TYPE) a, (F_TYPE) b 2035 # define COMPLEX_PROTOTYPE F_TYPE, F_TYPE 2036 # define COMPLEX_B_PROTOTYPE B_TYPE, B_TYPE 2037 #endif 2038 2039 2040 #ifndef S_RECEIVE_COMPLEX_RESULT 2041 # define S_RECEIVE_COMPLEX_RESULT(a,b,f) \ 2042 { S_COMPLEX _t = f; a = _t.r; b = _t.i; } 2043 #endif 2044 #ifndef S_RETURN_COMPLEX_RESULT 2045 # define S_RETURN_COMPLEX_RESULT(a,b) \ 2046 { S_COMPLEX _t; _t.r = a; _t.i = b; return _t; } 2047 #endif 2048 #ifndef D_RECEIVE_COMPLEX_RESULT 2049 # define D_RECEIVE_COMPLEX_RESULT(a,b,f) \ 2050 { D_COMPLEX _t = f; a = _t.r; b = _t.i; } 2051 #endif 2052 #ifndef D_RETURN_COMPLEX_RESULT 2053 # define D_RETURN_COMPLEX_RESULT(a,b) \ 2054 { D_COMPLEX _t; _t.r = a; _t.i = b; return _t; } 2055 #endif 2056 #ifndef Q_RECEIVE_COMPLEX_RESULT 2057 # define Q_RECEIVE_COMPLEX_RESULT(a,b,f) \ 2058 { Q_COMPLEX _t = f; a = _t.r; b = _t.i; } 2059 #endif 2060 #ifndef Q_RETURN_COMPLEX_RESULT 2061 # define Q_RETURN_COMPLEX_RESULT(a,b) \ 2062 { Q_COMPLEX _t; _t.r = a; _t.i = b; return _t; } 2063 #endif 2064 2065 2066 2067 #ifndef RECEIVE_COMPLEX_RESULT 2068 # if defined(SINGLE_PRECISION) 2069 # define RECEIVE_COMPLEX_RESULT(a,b,f) S_RECEIVE_COMPLEX_RESULT(a,b,f) 2070 # elif defined(DOUBLE_PRECISION) 2071 # define RECEIVE_COMPLEX_RESULT(a,b,f) D_RECEIVE_COMPLEX_RESULT(a,b,f) 2072 # else 2073 # define RECEIVE_COMPLEX_RESULT(a,b,f) Q_RECEIVE_COMPLEX_RESULT(a,b,f) 2074 # endif 2075 #endif 2076 2077 #ifndef RETURN_COMPLEX_RESULT 2078 # if defined(SINGLE_PRECISION) 2079 # define RETURN_COMPLEX_RESULT(a,b) S_RETURN_COMPLEX_RESULT(a,b) 2080 # elif defined(DOUBLE_PRECISION) 2081 # define RETURN_COMPLEX_RESULT(a,b) D_RETURN_COMPLEX_RESULT(a,b) 2082 # else 2083 # define RETURN_COMPLEX_RESULT(a,b) Q_RETURN_COMPLEX_RESULT(a,b) 2084 # endif 2085 #endif 2086 2087 2088 #ifndef ADD_SUB_BIG 2089 # define ADD_SUB_BIG(x,big) \ 2090 (x) += (big); (x) -= (big) 2091 #endif 2092 2093 #ifndef SHORTEN_VIA_CASTS 2094 # define SHORTEN_VIA_CASTS(in,out) \ 2095 (out) = (F_TYPE)((R_TYPE)(in)) 2096 #endif 2097 2098 #ifndef ASSIGN_WITH_F_TYPE_PRECISION 2099 # define ASSIGN_WITH_F_TYPE_PRECISION(x,y) \ 2100 (y) = (F_TYPE)(x) 2101 #endif 2102 2103 /* 2104 * The following macros are use to scale denormalized values to normalized 2105 * results. All scaling is done by an implicit multiplication by a power 2106 * of two. The power of two used to scale the denormalized values is 2107 * defined by the macro __LOG2_DENORM_SCALE, which defaults to F_PRECISION. 2108 * Based on __LOG2_DENORM_SCALE, three other constants are specified for 2109 * convienence: 2110 * 2111 * __DENORM_SCALE 2^__LOG2_DENORM_SCALE 2112 * __DENORM_SCALE_BIASED_EXP the aligned, biased and unbiased 2113 * __DENORM_SCALE_UNBIASED_EXP exponent field of __DENORM_SCALE 2114 * __LOG2_DENORM_SCALE_ALIGNED_W_EXP __LOG2_DENORM_SCALE aligned with 2115 * exponent field 2116 * 2117 * The technique used for scaling involves minipulataing the exponent field 2118 * of the value to be scaled. Specifically, if x is denormalized value with 2119 * bit pattern: 2120 * 2121 * +-+-----------+------------------------+ 2122 * x: |s|000 ... 000| F | 2123 * +-+-----------+------------------------+ 2124 * 2125 * Then x = (-1)^s*2^F_MIN_BIN_EXP*2^F_NORM*[F/2^(P_PRECISION - 1)]. Define u 2126 * and v, to be a floating point numbers with the following bits patterns: 2127 * 2128 * +-+-----------+------------------------+ 2129 * u: |s| E | F | 2130 * +-+-----------+------------------------+ 2131 * 2132 * +-+-----------+------------------------+ 2133 * v: |s| E | 0 | 2134 * +-+-----------+------------------------+ 2135 * 2136 * I.e. u has the bit pattern of x, with the exponent field set to E and v 2137 * is u with the fraction field cleared. It follows that u and v have values: 2138 * 2139 * u = (-1)^s*2^(E-F_EXP_BIAS)*2^F_NORM*[1 + F/2^(P_PRECISION - 1)] 2140 * v = (-1)^s*2^(E-F_EXP_BIAS)*2^F_NORM 2141 * 2142 * If z is defined as u - v, then 2143 * 2144 * z = (-1)^s*2^(E-F_EXP_BIAS)*2^F_NORM*[F/2^(P_PRECISION - 1)] 2145 * = 2^*(E-F_EXP_BIAS-F_MIN_BIN_EXP)* 2146 * (-1)^s*2^F_MIN_BIN_EXP*2^F_NORM*[F/2^(P_PRECISION - 1)] 2147 * = 2^*(E-F_EXP_BIAS-F_MIN_BIN_EXP)*x 2148 * 2149 * I.e. z is x scaled up by 2^e, where e = E - F_EXP_BIAS - F_MIN_BIN_EXP. In 2150 * the macros below, specifying __LOG2_DENORM_SCALE is equivalent to specifying 2151 * e in the above discussion. 2152 */ 2153 2154 #if !defined(__LOG2_DENORM_SCALE) 2155 # if F_COPY_SIGN_AND_EXP_IS_FAST 2156 # define __LOG2_DENORM_SCALE (F_PRECISION - F_MIN_BIN_EXP) 2157 # else 2158 # define __LOG2_DENORM_SCALE F_PRECISION 2159 # endif 2160 #endif 2161 2162 #undef __DENORM_SCALE_UNBIASED_EXP 2163 #define __DENORM_SCALE_UNBIASED_EXP ALIGN_W_EXP_FIELD(__LOG2_DENORM_SCALE \ 2164 - F_NORM) 2165 #undef __DENORM_SCALE_BIASED_EXP 2166 #define __DENORM_SCALE_BIASED_EXP ALIGN_W_EXP_FIELD(__LOG2_DENORM_SCALE \ 2167 - F_NORM + F_EXP_BIAS) 2168 2169 #undef __LOG2_DENORM_SCALE_ALIGNED_W_EXP 2170 #define __LOG2_DENORM_SCALE_ALIGNED_W_EXP \ 2171 ALIGN_W_EXP_FIELD(__LOG2_DENORM_SCALE) 2172 2173 #define __LOG2_DENORM_CONST (__LOG2_DENORM_SCALE + F_NORM + \ 2174 F_MIN_BIN_EXP) 2175 #define __DENORM_CONST_BIASED_EXP ALIGN_W_EXP_FIELD(__LOG2_DENORM_CONST \ 2176 - F_NORM + F_EXP_BIAS) 2177 2178 #if F_COPY_SIGN_AND_EXP_IS_FAST && \ 2179 (__LOG2_DENORM_CONST >= 0) && (__LOG2_DENORM_CONST <= __MAX_F_POW_2_EXP) 2180 2181 # undef __DENORM_CONST 2182 # define __DENORM_CONST (F_TYPE) F_POW_2(__LOG2_DENORM_CONST) 2183 2184 # if defined(__NEED_SIGNED_DENORM_TO_NORM) 2185 # define DENORM_TO_NORM(p,q) \ 2186 { \ 2187 F_TYPE __denorm_const; \ 2188 F_COPY_SIGN(__DENORM_CONST,p,__denorm_const); \ 2189 F_COPY_SIGN_AND_EXP(p, __denorm_const, q); \ 2190 q -= __denorm_const; \ 2191 } 2192 # else 2193 # define DENORM_TO_NORM(p,q) \ 2194 { \ 2195 F_COPY_SIGN_AND_EXP(p, __DENORM_CONST, q); \ 2196 q -= __DENORM_CONST; \ 2197 } 2198 # endif 2199 2200 # define DENORM_TO_NORM_AND_EXP(p,e,q) \ 2201 { DENORM_TO_NORM(p,q); GET_EXP_FIELD(q,e) } 2202 2203 #else 2204 2205 # define __DENORM_TO_NORM_EXP ALIGN_W_EXP_FIELD(__LOG2_DENORM_SCALE + \ 2206 F_NORM + F_EXP_BIAS + F_MIN_BIN_EXP) 2207 # define __DENORM_TO_NORM(p,q) \ 2208 F_UNION u; \ 2209 u.f = p; \ 2210 u.F_HI_WORD = (u.F_HI_WORD & ~F_EXP_MASK) | \ 2211 __DENORM_TO_NORM_EXP; \ 2212 q = u.f; \ 2213 u.F_HI_WORD &= F_SIGN_EXP_MASK; \ 2214 CLEAR_LOW_BITS(u); \ 2215 q -= u.f 2216 2217 # define DENORM_TO_NORM(p,q) { __DENORM_TO_NORM(p,q); } 2218 2219 # define DENORM_TO_NORM_AND_EXP(p,e,q) \ 2220 { \ 2221 __DENORM_TO_NORM(p,q); \ 2222 u.f = q; \ 2223 e = u.F_HI_WORD & F_EXP_MASK; \ 2224 } 2225 2226 #endif 2227 2228 /* 2229 * The following macros support extended precision multiplication of a sequence 2230 * of unsigned HALF_WORDs. The basic operation is an extended integer multiply 2231 * and add. It has four inputs and three results. The inputs are an addend 2232 * in hi and lo parts (w_hi, w_lo), the carry in from the previous operation, 2233 * c_in, and the multiplier and multiplicand F and g. The three outputs are 2234 * the carry out, c_out, and the hi and lo digits of the sum, z_hi and z_lo. 2235 * Letting B = 2^BITS_PER_WORD, the basic operation is 2236 * 2237 * c_out*B^2 + z_hi*B + z_lo <== (w_hi*B + w_lo) + c_in*B + F*g 2238 * 2239 * The are 6 different macros, one for the basic operation and 5 special 2240 * cases. E.g. ignore the carry out or carry is zero. 2241 * 2242 * They macros are defined as a group in order to be consistent. If 2243 * BITS_PER_DIGIT is defined, it is assumed that the arithmetic macros have 2244 * been in one of the architecture specific include files. 2245 */ 2246 2247 #if !defined(BITS_PER_DIGIT) 2248 2249 # define BITS_PER_DIGIT BITS_PER_HALF_WORD 2250 # define DIGIT_TYPE PASTE_2(U_INT_, BITS_PER_DIGIT) 2251 # define SIGNED_DIGIT_TYPE PASTE_2(INT_, BITS_PER_DIGIT) 2252 2253 2254 # define XMUL_XADDC_W_C_IN(F, g, w_hi, w_lo, c_in, c_out, z_hi, z_lo) \ 2255 { \ 2256 U_WORD prod, addend, t; \ 2257 \ 2258 prod = ((U_WORD) F)*((U_WORD) g); \ 2259 addend = ((U_WORD)w_hi << BITS_PER_DIGIT) + (U_WORD) w_lo; \ 2260 t = (U_WORD) c_in << BITS_PER_DIGIT; \ 2261 prod += t; /* no carry out possible */ \ 2262 prod += addend; \ 2263 c_out = (prod < addend); \ 2264 z_hi = prod >> BITS_PER_DIGIT; \ 2265 z_lo = prod & MAKE_MASK(BITS_PER_DIGIT, 0); \ 2266 } 2267 2268 # define XMUL_XADD_W_C_IN(F, g, w_hi, w_lo, c_in, z_hi, z_lo) \ 2269 { \ 2270 U_WORD prod, addend, t; \ 2271 \ 2272 prod = ((U_WORD) F)*((U_WORD) g); \ 2273 addend = ((U_WORD) w_hi << BITS_PER_DIGIT) + (U_WORD) w_lo; \ 2274 t = (U_WORD) c_in << BITS_PER_DIGIT; \ 2275 prod += t; /* no carry out possible */ \ 2276 prod += addend; \ 2277 z_hi = prod >> BITS_PER_DIGIT; \ 2278 z_lo = prod & MAKE_MASK(BITS_PER_DIGIT, 0); \ 2279 } 2280 2281 # define XMUL_XADDC(F, g, w_hi, w_lo, c_out, z_hi, z_lo) \ 2282 { \ 2283 U_WORD prod, addend; \ 2284 \ 2285 prod = ((U_WORD) F)*((U_WORD) g); \ 2286 addend = ((U_WORD) w_hi << BITS_PER_DIGIT) + (U_WORD) w_lo; \ 2287 prod += addend; \ 2288 c_out = (prod < addend); \ 2289 z_hi = prod >> BITS_PER_DIGIT; \ 2290 z_lo = prod & MAKE_MASK(BITS_PER_DIGIT, 0); \ 2291 } 2292 2293 # define XMUL_XADD(F, g, w_hi, w_lo, z_hi, z_lo) \ 2294 { \ 2295 U_WORD prod, addend; \ 2296 \ 2297 prod = ((U_WORD) F)*((U_WORD) g); \ 2298 addend = ((U_WORD) w_hi << BITS_PER_DIGIT) + (U_WORD) w_lo; \ 2299 prod += addend; \ 2300 z_hi = prod >> BITS_PER_DIGIT; \ 2301 z_lo = prod & MAKE_MASK(BITS_PER_DIGIT, 0); \ 2302 } 2303 2304 # define XMUL_ADD(F, g, w_lo, z_hi, z_lo) \ 2305 { \ 2306 U_WORD prod; \ 2307 \ 2308 prod = ((U_WORD) F)*((U_WORD) g); \ 2309 prod += (U_WORD) w_lo; \ 2310 z_hi = prod >> BITS_PER_DIGIT; \ 2311 z_lo = prod & MAKE_MASK(BITS_PER_DIGIT, 0); \ 2312 } 2313 2314 # define MUL_ADD(F, g, w_lo, z_lo) z_lo = F*g + w_lo 2315 2316 # define XMUL(F, g, z_hi, z_lo) \ 2317 { \ 2318 U_WORD prod; \ 2319 \ 2320 prod = ((U_WORD) F)*((U_WORD) g); \ 2321 z_hi = prod >> BITS_PER_DIGIT; \ 2322 z_lo = prod & MAKE_MASK(BITS_PER_DIGIT, 0); \ 2323 } 2324 2325 #endif /* !defined(BITS_PER_DIGIT) */ 2326 2327 /* 2328 ** It is occasionally useful to access the high or low 32 bits of a double 2329 ** precison as a 32 bit integer. Unfortunately, for some architectures, 2330 ** (notably, alpha ev6) this can result in a memory access trap cause by 2331 ** writing 32 bits and then trying to read 64 bits from the same location. 2332 ** To work around this problem, we define the "load/store" integer type and 2333 ** appropriate macros. 2334 */ 2335 2336 #if defined(HAS_LOAD_WRONG_STORE_SIZE_PENALTY) 2337 # define BITS_PER_LS_INT_TYPE BITS_PER_WORD 2338 # define LS_INT_TYPE WORD 2339 # define U_LS_INT_TYPE U_WORD 2340 # define B_HI_LS_INT_TYPE B_SIGNED_HI_WORD 2341 #else 2342 # define BITS_PER_LS_INT_TYPE BITS_PER_INT 2343 # define LS_INT_TYPE INT_32 2344 # define U_LS_INT_TYPE U_INT_32 2345 # define B_HI_LS_INT_TYPE B_SIGNED_HI_32 2346 #endif 2347 2348 /* 2349 ** For platforms that have hardware SQRT instructions available (e.g., EV6), 2350 ** the performance of some DPML functions may be improved by replacing a call 2351 ** to (or the inlining of) the SQRT function with the equivalent hardware 2352 ** instruction. 2353 */ 2354 2355 #if IEEE_FLOATING 2356 # define S_HW_SQRT_NAME(x) __SQRTS(x) 2357 # define D_HW_SQRT_NAME(x) __SQRTT(x) 2358 #elif VAX_FLOATING 2359 # define S_HW_SQRT_NAME(x) __SQRTF(x) 2360 # define D_HW_SQRT_NAME(x) __SQRTG(x) 2361 #endif 2362 2363 #define S_HW_SQRT(x,y) (y = S_HW_SQRT_NAME(x)) 2364 #define D_HW_SQRT(x,y) (y = D_HW_SQRT_NAME(x)) 2365 2366 #if SINGLE_PRECISION 2367 # define F_HW_SQRT_NAME S_HW_SQRT_NAME 2368 # define B_HW_SQRT_NAME D_HW_SQRT_NAME 2369 # define F_HW_SQRT S_HW_SQRT 2370 # define B_HW_SQRT D_HW_SQRT 2371 #elif DOUBLE_PRECISION 2372 # define F_HW_SQRT_NAME D_HW_SQRT_NAME 2373 # define B_HW_SQRT_NAME D_HW_SQRT_NAME 2374 # define F_HW_SQRT D_HW_SQRT 2375 # define B_HW_SQRT D_HW_SQRT 2376 #else 2377 # define F_HW_SQRT_NAME F_SQRT_NAME 2378 # define B_HW_SQRT_NAME B_SQRT_NAME 2379 # define F_HW_SQRT F_SQRT 2380 # define B_HW_SQRT B_SQRT 2381 #endif 2382 2383 #if defined(HAS_SQRT_INSTRUCTION) 2384 # define F_HW_OR_SW_SQRT_NAME F_HW_SQRT_NAME 2385 # define B_HW_OR_SW_SQRT_NAME B_HW_SQRT_NAME 2386 # define F_HW_OR_SW_SQRT F_HW_SQRT 2387 # define B_HW_OR_SW_SQRT B_HW_SQRT 2388 #else 2389 # define F_HW_OR_SW_SQRT_NAME F_SQRT_NAME 2390 # define B_HW_OR_SW_SQRT_NAME B_SQRT_NAME 2391 # define F_HW_OR_SW_SQRT F_SQRT 2392 # define B_HW_OR_SW_SQRT B_SQRT 2393 #endif 2394 2395 /* F_HW_OR_SW_PRECISE_SQRT is defined for hypot to use 2396 ** F_PRECISE_SQRT which is defined in sqrt_macros.h. 2397 ** Both F_PRECISE_SQRT and F_HW_OR_SW_PRECISE_SQRT are 2398 ** used only in dpml_hypot.c 2399 */ 2400 2401 #if defined(HAS_SQRT_INSTRUCTION) 2402 # define F_HW_OR_SW_PRECISE_SQRT F_HW_SQRT 2403 #else 2404 # define F_HW_OR_SW_PRECISE_SQRT F_PRECISE_SQRT 2405 # endif 2406 2407 #if defined GROUP 2408 # define D_GROUP(x) GROUP(x) 2409 #else 2410 # define D_GROUP_NAME PASTE_2(__INTERNAL_NAME(group),_d) 2411 extern double D_GROUP_NAME( double ); 2412 # define D_GROUP(x) D_GROUP_NAME(x) 2413 #endif 2414 2415 #endif /* DPML_PRIVATE_H */ 2416 2417