1// Copyright 2014 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// This file implements multi-precision floating-point numbers. 6// Like in the GNU MPFR library (http://www.mpfr.org/), operands 7// can be of mixed precision. Unlike MPFR, the rounding mode is 8// not specified with each operation, but with each operand. The 9// rounding mode of the result operand determines the rounding 10// mode of an operation. This is a from-scratch implementation. 11 12package big 13 14import ( 15 "fmt" 16 "math" 17 "math/bits" 18) 19 20const debugFloat = false // enable for debugging 21 22// A nonzero finite Float represents a multi-precision floating point number 23// 24// sign × mantissa × 2**exponent 25// 26// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 27// A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 28// All Floats are ordered, and the ordering of two Floats x and y 29// is defined by x.Cmp(y). 30// 31// Each Float value also has a precision, rounding mode, and accuracy. 32// The precision is the maximum number of mantissa bits available to 33// represent the value. The rounding mode specifies how a result should 34// be rounded to fit into the mantissa bits, and accuracy describes the 35// rounding error with respect to the exact result. 36// 37// Unless specified otherwise, all operations (including setters) that 38// specify a *Float variable for the result (usually via the receiver 39// with the exception of MantExp), round the numeric result according 40// to the precision and rounding mode of the result variable. 41// 42// If the provided result precision is 0 (see below), it is set to the 43// precision of the argument with the largest precision value before any 44// rounding takes place, and the rounding mode remains unchanged. Thus, 45// uninitialized Floats provided as result arguments will have their 46// precision set to a reasonable value determined by the operands and 47// their mode is the zero value for RoundingMode (ToNearestEven). 48// 49// By setting the desired precision to 24 or 53 and using matching rounding 50// mode (typically ToNearestEven), Float operations produce the same results 51// as the corresponding float32 or float64 IEEE-754 arithmetic for operands 52// that correspond to normal (i.e., not denormal) float32 or float64 numbers. 53// Exponent underflow and overflow lead to a 0 or an Infinity for different 54// values than IEEE-754 because Float exponents have a much larger range. 55// 56// The zero (uninitialized) value for a Float is ready to use and represents 57// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 58// 59type Float struct { 60 prec uint32 61 mode RoundingMode 62 acc Accuracy 63 form form 64 neg bool 65 mant nat 66 exp int32 67} 68 69// An ErrNaN panic is raised by a Float operation that would lead to 70// a NaN under IEEE-754 rules. An ErrNaN implements the error interface. 71type ErrNaN struct { 72 msg string 73} 74 75func (err ErrNaN) Error() string { 76 return err.msg 77} 78 79// NewFloat allocates and returns a new Float set to x, 80// with precision 53 and rounding mode ToNearestEven. 81// NewFloat panics with ErrNaN if x is a NaN. 82func NewFloat(x float64) *Float { 83 if math.IsNaN(x) { 84 panic(ErrNaN{"NewFloat(NaN)"}) 85 } 86 return new(Float).SetFloat64(x) 87} 88 89// Exponent and precision limits. 90const ( 91 MaxExp = math.MaxInt32 // largest supported exponent 92 MinExp = math.MinInt32 // smallest supported exponent 93 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 94) 95 96// Internal representation: The mantissa bits x.mant of a nonzero finite 97// Float x are stored in a nat slice long enough to hold up to x.prec bits; 98// the slice may (but doesn't have to) be shorter if the mantissa contains 99// trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 100// the msb is shifted all the way "to the left"). Thus, if the mantissa has 101// trailing 0 bits or x.prec is not a multiple of the Word size _W, 102// x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 103// to the value 0.5; the exponent x.exp shifts the binary point as needed. 104// 105// A zero or non-finite Float x ignores x.mant and x.exp. 106// 107// x form neg mant exp 108// ---------------------------------------------------------- 109// ±0 zero sign - - 110// 0 < |x| < +Inf finite sign mantissa exponent 111// ±Inf inf sign - - 112 113// A form value describes the internal representation. 114type form byte 115 116// The form value order is relevant - do not change! 117const ( 118 zero form = iota 119 finite 120 inf 121) 122 123// RoundingMode determines how a Float value is rounded to the 124// desired precision. Rounding may change the Float value; the 125// rounding error is described by the Float's Accuracy. 126type RoundingMode byte 127 128// These constants define supported rounding modes. 129const ( 130 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 131 ToNearestAway // == IEEE 754-2008 roundTiesToAway 132 ToZero // == IEEE 754-2008 roundTowardZero 133 AwayFromZero // no IEEE 754-2008 equivalent 134 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 135 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 136) 137 138//go:generate stringer -type=RoundingMode 139 140// Accuracy describes the rounding error produced by the most recent 141// operation that generated a Float value, relative to the exact value. 142type Accuracy int8 143 144// Constants describing the Accuracy of a Float. 145const ( 146 Below Accuracy = -1 147 Exact Accuracy = 0 148 Above Accuracy = +1 149) 150 151//go:generate stringer -type=Accuracy 152 153// SetPrec sets z's precision to prec and returns the (possibly) rounded 154// value of z. Rounding occurs according to z's rounding mode if the mantissa 155// cannot be represented in prec bits without loss of precision. 156// SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. 157// If prec > MaxPrec, it is set to MaxPrec. 158func (z *Float) SetPrec(prec uint) *Float { 159 z.acc = Exact // optimistically assume no rounding is needed 160 161 // special case 162 if prec == 0 { 163 z.prec = 0 164 if z.form == finite { 165 // truncate z to 0 166 z.acc = makeAcc(z.neg) 167 z.form = zero 168 } 169 return z 170 } 171 172 // general case 173 if prec > MaxPrec { 174 prec = MaxPrec 175 } 176 old := z.prec 177 z.prec = uint32(prec) 178 if z.prec < old { 179 z.round(0) 180 } 181 return z 182} 183 184func makeAcc(above bool) Accuracy { 185 if above { 186 return Above 187 } 188 return Below 189} 190 191// SetMode sets z's rounding mode to mode and returns an exact z. 192// z remains unchanged otherwise. 193// z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 194func (z *Float) SetMode(mode RoundingMode) *Float { 195 z.mode = mode 196 z.acc = Exact 197 return z 198} 199 200// Prec returns the mantissa precision of x in bits. 201// The result may be 0 for |x| == 0 and |x| == Inf. 202func (x *Float) Prec() uint { 203 return uint(x.prec) 204} 205 206// MinPrec returns the minimum precision required to represent x exactly 207// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 208// The result is 0 for |x| == 0 and |x| == Inf. 209func (x *Float) MinPrec() uint { 210 if x.form != finite { 211 return 0 212 } 213 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 214} 215 216// Mode returns the rounding mode of x. 217func (x *Float) Mode() RoundingMode { 218 return x.mode 219} 220 221// Acc returns the accuracy of x produced by the most recent operation. 222func (x *Float) Acc() Accuracy { 223 return x.acc 224} 225 226// Sign returns: 227// 228// -1 if x < 0 229// 0 if x is ±0 230// +1 if x > 0 231// 232func (x *Float) Sign() int { 233 if debugFloat { 234 x.validate() 235 } 236 if x.form == zero { 237 return 0 238 } 239 if x.neg { 240 return -1 241 } 242 return 1 243} 244 245// MantExp breaks x into its mantissa and exponent components 246// and returns the exponent. If a non-nil mant argument is 247// provided its value is set to the mantissa of x, with the 248// same precision and rounding mode as x. The components 249// satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. 250// Calling MantExp with a nil argument is an efficient way to 251// get the exponent of the receiver. 252// 253// Special cases are: 254// 255// ( ±0).MantExp(mant) = 0, with mant set to ±0 256// (±Inf).MantExp(mant) = 0, with mant set to ±Inf 257// 258// x and mant may be the same in which case x is set to its 259// mantissa value. 260func (x *Float) MantExp(mant *Float) (exp int) { 261 if debugFloat { 262 x.validate() 263 } 264 if x.form == finite { 265 exp = int(x.exp) 266 } 267 if mant != nil { 268 mant.Copy(x) 269 if mant.form == finite { 270 mant.exp = 0 271 } 272 } 273 return 274} 275 276func (z *Float) setExpAndRound(exp int64, sbit uint) { 277 if exp < MinExp { 278 // underflow 279 z.acc = makeAcc(z.neg) 280 z.form = zero 281 return 282 } 283 284 if exp > MaxExp { 285 // overflow 286 z.acc = makeAcc(!z.neg) 287 z.form = inf 288 return 289 } 290 291 z.form = finite 292 z.exp = int32(exp) 293 z.round(sbit) 294} 295 296// SetMantExp sets z to mant × 2**exp and and returns z. 297// The result z has the same precision and rounding mode 298// as mant. SetMantExp is an inverse of MantExp but does 299// not require 0.5 <= |mant| < 1.0. Specifically: 300// 301// mant := new(Float) 302// new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 303// 304// Special cases are: 305// 306// z.SetMantExp( ±0, exp) = ±0 307// z.SetMantExp(±Inf, exp) = ±Inf 308// 309// z and mant may be the same in which case z's exponent 310// is set to exp. 311func (z *Float) SetMantExp(mant *Float, exp int) *Float { 312 if debugFloat { 313 z.validate() 314 mant.validate() 315 } 316 z.Copy(mant) 317 if z.form != finite { 318 return z 319 } 320 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 321 return z 322} 323 324// Signbit returns true if x is negative or negative zero. 325func (x *Float) Signbit() bool { 326 return x.neg 327} 328 329// IsInf reports whether x is +Inf or -Inf. 330func (x *Float) IsInf() bool { 331 return x.form == inf 332} 333 334// IsInt reports whether x is an integer. 335// ±Inf values are not integers. 336func (x *Float) IsInt() bool { 337 if debugFloat { 338 x.validate() 339 } 340 // special cases 341 if x.form != finite { 342 return x.form == zero 343 } 344 // x.form == finite 345 if x.exp <= 0 { 346 return false 347 } 348 // x.exp > 0 349 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 350} 351 352// debugging support 353func (x *Float) validate() { 354 if !debugFloat { 355 // avoid performance bugs 356 panic("validate called but debugFloat is not set") 357 } 358 if x.form != finite { 359 return 360 } 361 m := len(x.mant) 362 if m == 0 { 363 panic("nonzero finite number with empty mantissa") 364 } 365 const msb = 1 << (_W - 1) 366 if x.mant[m-1]&msb == 0 { 367 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) 368 } 369 if x.prec == 0 { 370 panic("zero precision finite number") 371 } 372} 373 374// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 375// sbit must be 0 or 1 and summarizes any "sticky bit" information one might 376// have before calling round. z's mantissa must be normalized (with the msb set) 377// or empty. 378// 379// CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 380// sign of z. For correct rounding, the sign of z must be set correctly before 381// calling round. 382func (z *Float) round(sbit uint) { 383 if debugFloat { 384 z.validate() 385 } 386 387 z.acc = Exact 388 if z.form != finite { 389 // ±0 or ±Inf => nothing left to do 390 return 391 } 392 // z.form == finite && len(z.mant) > 0 393 // m > 0 implies z.prec > 0 (checked by validate) 394 395 m := uint32(len(z.mant)) // present mantissa length in words 396 bits := m * _W // present mantissa bits; bits > 0 397 if bits <= z.prec { 398 // mantissa fits => nothing to do 399 return 400 } 401 // bits > z.prec 402 403 // Rounding is based on two bits: the rounding bit (rbit) and the 404 // sticky bit (sbit). The rbit is the bit immediately before the 405 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 406 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 407 // 408 // rbit sbit => "fractional part" 409 // 410 // 0 0 == 0 411 // 0 1 > 0 , < 0.5 412 // 1 0 == 0.5 413 // 1 1 > 0.5, < 1.0 414 415 // bits > z.prec: mantissa too large => round 416 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 417 rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit 418 // The sticky bit is only needed for rounding ToNearestEven 419 // or when the rounding bit is zero. Avoid computation otherwise. 420 if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) { 421 sbit = z.mant.sticky(r) 422 } 423 sbit &= 1 // be safe and ensure it's a single bit 424 425 // cut off extra words 426 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 427 if m > n { 428 copy(z.mant, z.mant[m-n:]) // move n last words to front 429 z.mant = z.mant[:n] 430 } 431 432 // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word 433 ntz := n*_W - z.prec // 0 <= ntz < _W 434 lsb := Word(1) << ntz 435 436 // round if result is inexact 437 if rbit|sbit != 0 { 438 // Make rounding decision: The result mantissa is truncated ("rounded down") 439 // by default. Decide if we need to increment, or "round up", the (unsigned) 440 // mantissa. 441 inc := false 442 switch z.mode { 443 case ToNegativeInf: 444 inc = z.neg 445 case ToZero: 446 // nothing to do 447 case ToNearestEven: 448 inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) 449 case ToNearestAway: 450 inc = rbit != 0 451 case AwayFromZero: 452 inc = true 453 case ToPositiveInf: 454 inc = !z.neg 455 default: 456 panic("unreachable") 457 } 458 459 // A positive result (!z.neg) is Above the exact result if we increment, 460 // and it's Below if we truncate (Exact results require no rounding). 461 // For a negative result (z.neg) it is exactly the opposite. 462 z.acc = makeAcc(inc != z.neg) 463 464 if inc { 465 // add 1 to mantissa 466 if addVW(z.mant, z.mant, lsb) != 0 { 467 // mantissa overflow => adjust exponent 468 if z.exp >= MaxExp { 469 // exponent overflow 470 z.form = inf 471 return 472 } 473 z.exp++ 474 // adjust mantissa: divide by 2 to compensate for exponent adjustment 475 shrVU(z.mant, z.mant, 1) 476 // set msb == carry == 1 from the mantissa overflow above 477 const msb = 1 << (_W - 1) 478 z.mant[n-1] |= msb 479 } 480 } 481 } 482 483 // zero out trailing bits in least-significant word 484 z.mant[0] &^= lsb - 1 485 486 if debugFloat { 487 z.validate() 488 } 489} 490 491func (z *Float) setBits64(neg bool, x uint64) *Float { 492 if z.prec == 0 { 493 z.prec = 64 494 } 495 z.acc = Exact 496 z.neg = neg 497 if x == 0 { 498 z.form = zero 499 return z 500 } 501 // x != 0 502 z.form = finite 503 s := bits.LeadingZeros64(x) 504 z.mant = z.mant.setUint64(x << uint(s)) 505 z.exp = int32(64 - s) // always fits 506 if z.prec < 64 { 507 z.round(0) 508 } 509 return z 510} 511 512// SetUint64 sets z to the (possibly rounded) value of x and returns z. 513// If z's precision is 0, it is changed to 64 (and rounding will have 514// no effect). 515func (z *Float) SetUint64(x uint64) *Float { 516 return z.setBits64(false, x) 517} 518 519// SetInt64 sets z to the (possibly rounded) value of x and returns z. 520// If z's precision is 0, it is changed to 64 (and rounding will have 521// no effect). 522func (z *Float) SetInt64(x int64) *Float { 523 u := x 524 if u < 0 { 525 u = -u 526 } 527 // We cannot simply call z.SetUint64(uint64(u)) and change 528 // the sign afterwards because the sign affects rounding. 529 return z.setBits64(x < 0, uint64(u)) 530} 531 532// SetFloat64 sets z to the (possibly rounded) value of x and returns z. 533// If z's precision is 0, it is changed to 53 (and rounding will have 534// no effect). SetFloat64 panics with ErrNaN if x is a NaN. 535func (z *Float) SetFloat64(x float64) *Float { 536 if z.prec == 0 { 537 z.prec = 53 538 } 539 if math.IsNaN(x) { 540 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 541 } 542 z.acc = Exact 543 z.neg = math.Signbit(x) // handle -0, -Inf correctly 544 if x == 0 { 545 z.form = zero 546 return z 547 } 548 if math.IsInf(x, 0) { 549 z.form = inf 550 return z 551 } 552 // normalized x != 0 553 z.form = finite 554 fmant, exp := math.Frexp(x) // get normalized mantissa 555 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 556 z.exp = int32(exp) // always fits 557 if z.prec < 53 { 558 z.round(0) 559 } 560 return z 561} 562 563// fnorm normalizes mantissa m by shifting it to the left 564// such that the msb of the most-significant word (msw) is 1. 565// It returns the shift amount. It assumes that len(m) != 0. 566func fnorm(m nat) int64 { 567 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 568 panic("msw of mantissa is 0") 569 } 570 s := nlz(m[len(m)-1]) 571 if s > 0 { 572 c := shlVU(m, m, s) 573 if debugFloat && c != 0 { 574 panic("nlz or shlVU incorrect") 575 } 576 } 577 return int64(s) 578} 579 580// SetInt sets z to the (possibly rounded) value of x and returns z. 581// If z's precision is 0, it is changed to the larger of x.BitLen() 582// or 64 (and rounding will have no effect). 583func (z *Float) SetInt(x *Int) *Float { 584 // TODO(gri) can be more efficient if z.prec > 0 585 // but small compared to the size of x, or if there 586 // are many trailing 0's. 587 bits := uint32(x.BitLen()) 588 if z.prec == 0 { 589 z.prec = umax32(bits, 64) 590 } 591 z.acc = Exact 592 z.neg = x.neg 593 if len(x.abs) == 0 { 594 z.form = zero 595 return z 596 } 597 // x != 0 598 z.mant = z.mant.set(x.abs) 599 fnorm(z.mant) 600 z.setExpAndRound(int64(bits), 0) 601 return z 602} 603 604// SetRat sets z to the (possibly rounded) value of x and returns z. 605// If z's precision is 0, it is changed to the largest of a.BitLen(), 606// b.BitLen(), or 64; with x = a/b. 607func (z *Float) SetRat(x *Rat) *Float { 608 if x.IsInt() { 609 return z.SetInt(x.Num()) 610 } 611 var a, b Float 612 a.SetInt(x.Num()) 613 b.SetInt(x.Denom()) 614 if z.prec == 0 { 615 z.prec = umax32(a.prec, b.prec) 616 } 617 return z.Quo(&a, &b) 618} 619 620// SetInf sets z to the infinite Float -Inf if signbit is 621// set, or +Inf if signbit is not set, and returns z. The 622// precision of z is unchanged and the result is always 623// Exact. 624func (z *Float) SetInf(signbit bool) *Float { 625 z.acc = Exact 626 z.form = inf 627 z.neg = signbit 628 return z 629} 630 631// Set sets z to the (possibly rounded) value of x and returns z. 632// If z's precision is 0, it is changed to the precision of x 633// before setting z (and rounding will have no effect). 634// Rounding is performed according to z's precision and rounding 635// mode; and z's accuracy reports the result error relative to the 636// exact (not rounded) result. 637func (z *Float) Set(x *Float) *Float { 638 if debugFloat { 639 x.validate() 640 } 641 z.acc = Exact 642 if z != x { 643 z.form = x.form 644 z.neg = x.neg 645 if x.form == finite { 646 z.exp = x.exp 647 z.mant = z.mant.set(x.mant) 648 } 649 if z.prec == 0 { 650 z.prec = x.prec 651 } else if z.prec < x.prec { 652 z.round(0) 653 } 654 } 655 return z 656} 657 658// Copy sets z to x, with the same precision, rounding mode, and 659// accuracy as x, and returns z. x is not changed even if z and 660// x are the same. 661func (z *Float) Copy(x *Float) *Float { 662 if debugFloat { 663 x.validate() 664 } 665 if z != x { 666 z.prec = x.prec 667 z.mode = x.mode 668 z.acc = x.acc 669 z.form = x.form 670 z.neg = x.neg 671 if z.form == finite { 672 z.mant = z.mant.set(x.mant) 673 z.exp = x.exp 674 } 675 } 676 return z 677} 678 679// msb32 returns the 32 most significant bits of x. 680func msb32(x nat) uint32 { 681 i := len(x) - 1 682 if i < 0 { 683 return 0 684 } 685 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 686 panic("x not normalized") 687 } 688 switch _W { 689 case 32: 690 return uint32(x[i]) 691 case 64: 692 return uint32(x[i] >> 32) 693 } 694 panic("unreachable") 695} 696 697// msb64 returns the 64 most significant bits of x. 698func msb64(x nat) uint64 { 699 i := len(x) - 1 700 if i < 0 { 701 return 0 702 } 703 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 704 panic("x not normalized") 705 } 706 switch _W { 707 case 32: 708 v := uint64(x[i]) << 32 709 if i > 0 { 710 v |= uint64(x[i-1]) 711 } 712 return v 713 case 64: 714 return uint64(x[i]) 715 } 716 panic("unreachable") 717} 718 719// Uint64 returns the unsigned integer resulting from truncating x 720// towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 721// if x is an integer and Below otherwise. 722// The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 723// for x > math.MaxUint64. 724func (x *Float) Uint64() (uint64, Accuracy) { 725 if debugFloat { 726 x.validate() 727 } 728 729 switch x.form { 730 case finite: 731 if x.neg { 732 return 0, Above 733 } 734 // 0 < x < +Inf 735 if x.exp <= 0 { 736 // 0 < x < 1 737 return 0, Below 738 } 739 // 1 <= x < Inf 740 if x.exp <= 64 { 741 // u = trunc(x) fits into a uint64 742 u := msb64(x.mant) >> (64 - uint32(x.exp)) 743 if x.MinPrec() <= 64 { 744 return u, Exact 745 } 746 return u, Below // x truncated 747 } 748 // x too large 749 return math.MaxUint64, Below 750 751 case zero: 752 return 0, Exact 753 754 case inf: 755 if x.neg { 756 return 0, Above 757 } 758 return math.MaxUint64, Below 759 } 760 761 panic("unreachable") 762} 763 764// Int64 returns the integer resulting from truncating x towards zero. 765// If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 766// an integer, and Above (x < 0) or Below (x > 0) otherwise. 767// The result is (math.MinInt64, Above) for x < math.MinInt64, 768// and (math.MaxInt64, Below) for x > math.MaxInt64. 769func (x *Float) Int64() (int64, Accuracy) { 770 if debugFloat { 771 x.validate() 772 } 773 774 switch x.form { 775 case finite: 776 // 0 < |x| < +Inf 777 acc := makeAcc(x.neg) 778 if x.exp <= 0 { 779 // 0 < |x| < 1 780 return 0, acc 781 } 782 // x.exp > 0 783 784 // 1 <= |x| < +Inf 785 if x.exp <= 63 { 786 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 787 i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) 788 if x.neg { 789 i = -i 790 } 791 if x.MinPrec() <= uint(x.exp) { 792 return i, Exact 793 } 794 return i, acc // x truncated 795 } 796 if x.neg { 797 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 798 if x.exp == 64 && x.MinPrec() == 1 { 799 acc = Exact 800 } 801 return math.MinInt64, acc 802 } 803 // x too large 804 return math.MaxInt64, Below 805 806 case zero: 807 return 0, Exact 808 809 case inf: 810 if x.neg { 811 return math.MinInt64, Above 812 } 813 return math.MaxInt64, Below 814 } 815 816 panic("unreachable") 817} 818 819// Float32 returns the float32 value nearest to x. If x is too small to be 820// represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 821// is (0, Below) or (-0, Above), respectively, depending on the sign of x. 822// If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 823// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 824func (x *Float) Float32() (float32, Accuracy) { 825 if debugFloat { 826 x.validate() 827 } 828 829 switch x.form { 830 case finite: 831 // 0 < |x| < +Inf 832 833 const ( 834 fbits = 32 // float size 835 mbits = 23 // mantissa size (excluding implicit msb) 836 ebits = fbits - mbits - 1 // 8 exponent size 837 bias = 1<<(ebits-1) - 1 // 127 exponent bias 838 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 839 emin = 1 - bias // -126 smallest unbiased exponent (normal) 840 emax = bias // 127 largest unbiased exponent (normal) 841 ) 842 843 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. 844 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 845 846 // Compute precision p for float32 mantissa. 847 // If the exponent is too small, we have a denormal number before 848 // rounding and fewer than p mantissa bits of precision available 849 // (the exponent remains fixed but the mantissa gets shifted right). 850 p := mbits + 1 // precision of normal float 851 if e < emin { 852 // recompute precision 853 p = mbits + 1 - emin + int(e) 854 // If p == 0, the mantissa of x is shifted so much to the right 855 // that its msb falls immediately to the right of the float32 856 // mantissa space. In other words, if the smallest denormal is 857 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 858 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 859 // If m == 0.5, it is rounded down to even, i.e., 0.0. 860 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 861 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 862 // underflow to ±0 863 if x.neg { 864 var z float32 865 return -z, Above 866 } 867 return 0.0, Below 868 } 869 // otherwise, round up 870 // We handle p == 0 explicitly because it's easy and because 871 // Float.round doesn't support rounding to 0 bits of precision. 872 if p == 0 { 873 if x.neg { 874 return -math.SmallestNonzeroFloat32, Below 875 } 876 return math.SmallestNonzeroFloat32, Above 877 } 878 } 879 // p > 0 880 881 // round 882 var r Float 883 r.prec = uint32(p) 884 r.Set(x) 885 e = r.exp - 1 886 887 // Rounding may have caused r to overflow to ±Inf 888 // (rounding never causes underflows to 0). 889 // If the exponent is too large, also overflow to ±Inf. 890 if r.form == inf || e > emax { 891 // overflow 892 if x.neg { 893 return float32(math.Inf(-1)), Below 894 } 895 return float32(math.Inf(+1)), Above 896 } 897 // e <= emax 898 899 // Determine sign, biased exponent, and mantissa. 900 var sign, bexp, mant uint32 901 if x.neg { 902 sign = 1 << (fbits - 1) 903 } 904 905 // Rounding may have caused a denormal number to 906 // become normal. Check again. 907 if e < emin { 908 // denormal number: recompute precision 909 // Since rounding may have at best increased precision 910 // and we have eliminated p <= 0 early, we know p > 0. 911 // bexp == 0 for denormals 912 p = mbits + 1 - emin + int(e) 913 mant = msb32(r.mant) >> uint(fbits-p) 914 } else { 915 // normal number: emin <= e <= emax 916 bexp = uint32(e+bias) << mbits 917 mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 918 } 919 920 return math.Float32frombits(sign | bexp | mant), r.acc 921 922 case zero: 923 if x.neg { 924 var z float32 925 return -z, Exact 926 } 927 return 0.0, Exact 928 929 case inf: 930 if x.neg { 931 return float32(math.Inf(-1)), Exact 932 } 933 return float32(math.Inf(+1)), Exact 934 } 935 936 panic("unreachable") 937} 938 939// Float64 returns the float64 value nearest to x. If x is too small to be 940// represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 941// is (0, Below) or (-0, Above), respectively, depending on the sign of x. 942// If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 943// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 944func (x *Float) Float64() (float64, Accuracy) { 945 if debugFloat { 946 x.validate() 947 } 948 949 switch x.form { 950 case finite: 951 // 0 < |x| < +Inf 952 953 const ( 954 fbits = 64 // float size 955 mbits = 52 // mantissa size (excluding implicit msb) 956 ebits = fbits - mbits - 1 // 11 exponent size 957 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 958 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 959 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 960 emax = bias // 1023 largest unbiased exponent (normal) 961 ) 962 963 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. 964 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 965 966 // Compute precision p for float64 mantissa. 967 // If the exponent is too small, we have a denormal number before 968 // rounding and fewer than p mantissa bits of precision available 969 // (the exponent remains fixed but the mantissa gets shifted right). 970 p := mbits + 1 // precision of normal float 971 if e < emin { 972 // recompute precision 973 p = mbits + 1 - emin + int(e) 974 // If p == 0, the mantissa of x is shifted so much to the right 975 // that its msb falls immediately to the right of the float64 976 // mantissa space. In other words, if the smallest denormal is 977 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 978 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 979 // If m == 0.5, it is rounded down to even, i.e., 0.0. 980 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 981 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 982 // underflow to ±0 983 if x.neg { 984 var z float64 985 return -z, Above 986 } 987 return 0.0, Below 988 } 989 // otherwise, round up 990 // We handle p == 0 explicitly because it's easy and because 991 // Float.round doesn't support rounding to 0 bits of precision. 992 if p == 0 { 993 if x.neg { 994 return -math.SmallestNonzeroFloat64, Below 995 } 996 return math.SmallestNonzeroFloat64, Above 997 } 998 } 999 // p > 0 1000 1001 // round 1002 var r Float 1003 r.prec = uint32(p) 1004 r.Set(x) 1005 e = r.exp - 1 1006 1007 // Rounding may have caused r to overflow to ±Inf 1008 // (rounding never causes underflows to 0). 1009 // If the exponent is too large, also overflow to ±Inf. 1010 if r.form == inf || e > emax { 1011 // overflow 1012 if x.neg { 1013 return math.Inf(-1), Below 1014 } 1015 return math.Inf(+1), Above 1016 } 1017 // e <= emax 1018 1019 // Determine sign, biased exponent, and mantissa. 1020 var sign, bexp, mant uint64 1021 if x.neg { 1022 sign = 1 << (fbits - 1) 1023 } 1024 1025 // Rounding may have caused a denormal number to 1026 // become normal. Check again. 1027 if e < emin { 1028 // denormal number: recompute precision 1029 // Since rounding may have at best increased precision 1030 // and we have eliminated p <= 0 early, we know p > 0. 1031 // bexp == 0 for denormals 1032 p = mbits + 1 - emin + int(e) 1033 mant = msb64(r.mant) >> uint(fbits-p) 1034 } else { 1035 // normal number: emin <= e <= emax 1036 bexp = uint64(e+bias) << mbits 1037 mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1038 } 1039 1040 return math.Float64frombits(sign | bexp | mant), r.acc 1041 1042 case zero: 1043 if x.neg { 1044 var z float64 1045 return -z, Exact 1046 } 1047 return 0.0, Exact 1048 1049 case inf: 1050 if x.neg { 1051 return math.Inf(-1), Exact 1052 } 1053 return math.Inf(+1), Exact 1054 } 1055 1056 panic("unreachable") 1057} 1058 1059// Int returns the result of truncating x towards zero; 1060// or nil if x is an infinity. 1061// The result is Exact if x.IsInt(); otherwise it is Below 1062// for x > 0, and Above for x < 0. 1063// If a non-nil *Int argument z is provided, Int stores 1064// the result in z instead of allocating a new Int. 1065func (x *Float) Int(z *Int) (*Int, Accuracy) { 1066 if debugFloat { 1067 x.validate() 1068 } 1069 1070 if z == nil && x.form <= finite { 1071 z = new(Int) 1072 } 1073 1074 switch x.form { 1075 case finite: 1076 // 0 < |x| < +Inf 1077 acc := makeAcc(x.neg) 1078 if x.exp <= 0 { 1079 // 0 < |x| < 1 1080 return z.SetInt64(0), acc 1081 } 1082 // x.exp > 0 1083 1084 // 1 <= |x| < +Inf 1085 // determine minimum required precision for x 1086 allBits := uint(len(x.mant)) * _W 1087 exp := uint(x.exp) 1088 if x.MinPrec() <= exp { 1089 acc = Exact 1090 } 1091 // shift mantissa as needed 1092 if z == nil { 1093 z = new(Int) 1094 } 1095 z.neg = x.neg 1096 switch { 1097 case exp > allBits: 1098 z.abs = z.abs.shl(x.mant, exp-allBits) 1099 default: 1100 z.abs = z.abs.set(x.mant) 1101 case exp < allBits: 1102 z.abs = z.abs.shr(x.mant, allBits-exp) 1103 } 1104 return z, acc 1105 1106 case zero: 1107 return z.SetInt64(0), Exact 1108 1109 case inf: 1110 return nil, makeAcc(x.neg) 1111 } 1112 1113 panic("unreachable") 1114} 1115 1116// Rat returns the rational number corresponding to x; 1117// or nil if x is an infinity. 1118// The result is Exact if x is not an Inf. 1119// If a non-nil *Rat argument z is provided, Rat stores 1120// the result in z instead of allocating a new Rat. 1121func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1122 if debugFloat { 1123 x.validate() 1124 } 1125 1126 if z == nil && x.form <= finite { 1127 z = new(Rat) 1128 } 1129 1130 switch x.form { 1131 case finite: 1132 // 0 < |x| < +Inf 1133 allBits := int32(len(x.mant)) * _W 1134 // build up numerator and denominator 1135 z.a.neg = x.neg 1136 switch { 1137 case x.exp > allBits: 1138 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1139 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1140 // z already in normal form 1141 default: 1142 z.a.abs = z.a.abs.set(x.mant) 1143 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1144 // z already in normal form 1145 case x.exp < allBits: 1146 z.a.abs = z.a.abs.set(x.mant) 1147 t := z.b.abs.setUint64(1) 1148 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1149 z.norm() 1150 } 1151 return z, Exact 1152 1153 case zero: 1154 return z.SetInt64(0), Exact 1155 1156 case inf: 1157 return nil, makeAcc(x.neg) 1158 } 1159 1160 panic("unreachable") 1161} 1162 1163// Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1164// and returns z. 1165func (z *Float) Abs(x *Float) *Float { 1166 z.Set(x) 1167 z.neg = false 1168 return z 1169} 1170 1171// Neg sets z to the (possibly rounded) value of x with its sign negated, 1172// and returns z. 1173func (z *Float) Neg(x *Float) *Float { 1174 z.Set(x) 1175 z.neg = !z.neg 1176 return z 1177} 1178 1179func validateBinaryOperands(x, y *Float) { 1180 if !debugFloat { 1181 // avoid performance bugs 1182 panic("validateBinaryOperands called but debugFloat is not set") 1183 } 1184 if len(x.mant) == 0 { 1185 panic("empty mantissa for x") 1186 } 1187 if len(y.mant) == 0 { 1188 panic("empty mantissa for y") 1189 } 1190} 1191 1192// z = x + y, ignoring signs of x and y for the addition 1193// but using the sign of z for rounding the result. 1194// x and y must have a non-empty mantissa and valid exponent. 1195func (z *Float) uadd(x, y *Float) { 1196 // Note: This implementation requires 2 shifts most of the 1197 // time. It is also inefficient if exponents or precisions 1198 // differ by wide margins. The following article describes 1199 // an efficient (but much more complicated) implementation 1200 // compatible with the internal representation used here: 1201 // 1202 // Vincent Lefèvre: "The Generic Multiple-Precision Floating- 1203 // Point Addition With Exact Rounding (as in the MPFR Library)" 1204 // http://www.vinc17.net/research/papers/rnc6.pdf 1205 1206 if debugFloat { 1207 validateBinaryOperands(x, y) 1208 } 1209 1210 // compute exponents ex, ey for mantissa with "binary point" 1211 // on the right (mantissa.0) - use int64 to avoid overflow 1212 ex := int64(x.exp) - int64(len(x.mant))*_W 1213 ey := int64(y.exp) - int64(len(y.mant))*_W 1214 1215 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1216 1217 // TODO(gri) having a combined add-and-shift primitive 1218 // could make this code significantly faster 1219 switch { 1220 case ex < ey: 1221 if al { 1222 t := nat(nil).shl(y.mant, uint(ey-ex)) 1223 z.mant = z.mant.add(x.mant, t) 1224 } else { 1225 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1226 z.mant = z.mant.add(x.mant, z.mant) 1227 } 1228 default: 1229 // ex == ey, no shift needed 1230 z.mant = z.mant.add(x.mant, y.mant) 1231 case ex > ey: 1232 if al { 1233 t := nat(nil).shl(x.mant, uint(ex-ey)) 1234 z.mant = z.mant.add(t, y.mant) 1235 } else { 1236 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1237 z.mant = z.mant.add(z.mant, y.mant) 1238 } 1239 ex = ey 1240 } 1241 // len(z.mant) > 0 1242 1243 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1244} 1245 1246// z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1247// but using the sign of z for rounding the result. 1248// x and y must have a non-empty mantissa and valid exponent. 1249func (z *Float) usub(x, y *Float) { 1250 // This code is symmetric to uadd. 1251 // We have not factored the common code out because 1252 // eventually uadd (and usub) should be optimized 1253 // by special-casing, and the code will diverge. 1254 1255 if debugFloat { 1256 validateBinaryOperands(x, y) 1257 } 1258 1259 ex := int64(x.exp) - int64(len(x.mant))*_W 1260 ey := int64(y.exp) - int64(len(y.mant))*_W 1261 1262 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1263 1264 switch { 1265 case ex < ey: 1266 if al { 1267 t := nat(nil).shl(y.mant, uint(ey-ex)) 1268 z.mant = t.sub(x.mant, t) 1269 } else { 1270 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1271 z.mant = z.mant.sub(x.mant, z.mant) 1272 } 1273 default: 1274 // ex == ey, no shift needed 1275 z.mant = z.mant.sub(x.mant, y.mant) 1276 case ex > ey: 1277 if al { 1278 t := nat(nil).shl(x.mant, uint(ex-ey)) 1279 z.mant = t.sub(t, y.mant) 1280 } else { 1281 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1282 z.mant = z.mant.sub(z.mant, y.mant) 1283 } 1284 ex = ey 1285 } 1286 1287 // operands may have canceled each other out 1288 if len(z.mant) == 0 { 1289 z.acc = Exact 1290 z.form = zero 1291 z.neg = false 1292 return 1293 } 1294 // len(z.mant) > 0 1295 1296 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1297} 1298 1299// z = x * y, ignoring signs of x and y for the multiplication 1300// but using the sign of z for rounding the result. 1301// x and y must have a non-empty mantissa and valid exponent. 1302func (z *Float) umul(x, y *Float) { 1303 if debugFloat { 1304 validateBinaryOperands(x, y) 1305 } 1306 1307 // Note: This is doing too much work if the precision 1308 // of z is less than the sum of the precisions of x 1309 // and y which is often the case (e.g., if all floats 1310 // have the same precision). 1311 // TODO(gri) Optimize this for the common case. 1312 1313 e := int64(x.exp) + int64(y.exp) 1314 if x == y { 1315 z.mant = z.mant.sqr(x.mant) 1316 } else { 1317 z.mant = z.mant.mul(x.mant, y.mant) 1318 } 1319 z.setExpAndRound(e-fnorm(z.mant), 0) 1320} 1321 1322// z = x / y, ignoring signs of x and y for the division 1323// but using the sign of z for rounding the result. 1324// x and y must have a non-empty mantissa and valid exponent. 1325func (z *Float) uquo(x, y *Float) { 1326 if debugFloat { 1327 validateBinaryOperands(x, y) 1328 } 1329 1330 // mantissa length in words for desired result precision + 1 1331 // (at least one extra bit so we get the rounding bit after 1332 // the division) 1333 n := int(z.prec/_W) + 1 1334 1335 // compute adjusted x.mant such that we get enough result precision 1336 xadj := x.mant 1337 if d := n - len(x.mant) + len(y.mant); d > 0 { 1338 // d extra words needed => add d "0 digits" to x 1339 xadj = make(nat, len(x.mant)+d) 1340 copy(xadj[d:], x.mant) 1341 } 1342 // TODO(gri): If we have too many digits (d < 0), we should be able 1343 // to shorten x for faster division. But we must be extra careful 1344 // with rounding in that case. 1345 1346 // Compute d before division since there may be aliasing of x.mant 1347 // (via xadj) or y.mant with z.mant. 1348 d := len(xadj) - len(y.mant) 1349 1350 // divide 1351 var r nat 1352 z.mant, r = z.mant.div(nil, xadj, y.mant) 1353 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1354 1355 // The result is long enough to include (at least) the rounding bit. 1356 // If there's a non-zero remainder, the corresponding fractional part 1357 // (if it were computed), would have a non-zero sticky bit (if it were 1358 // zero, it couldn't have a non-zero remainder). 1359 var sbit uint 1360 if len(r) > 0 { 1361 sbit = 1 1362 } 1363 1364 z.setExpAndRound(e-fnorm(z.mant), sbit) 1365} 1366 1367// ucmp returns -1, 0, or +1, depending on whether 1368// |x| < |y|, |x| == |y|, or |x| > |y|. 1369// x and y must have a non-empty mantissa and valid exponent. 1370func (x *Float) ucmp(y *Float) int { 1371 if debugFloat { 1372 validateBinaryOperands(x, y) 1373 } 1374 1375 switch { 1376 case x.exp < y.exp: 1377 return -1 1378 case x.exp > y.exp: 1379 return +1 1380 } 1381 // x.exp == y.exp 1382 1383 // compare mantissas 1384 i := len(x.mant) 1385 j := len(y.mant) 1386 for i > 0 || j > 0 { 1387 var xm, ym Word 1388 if i > 0 { 1389 i-- 1390 xm = x.mant[i] 1391 } 1392 if j > 0 { 1393 j-- 1394 ym = y.mant[j] 1395 } 1396 switch { 1397 case xm < ym: 1398 return -1 1399 case xm > ym: 1400 return +1 1401 } 1402 } 1403 1404 return 0 1405} 1406 1407// Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1408// 1409// When neither the inputs nor result are NaN, the sign of a product or 1410// quotient is the exclusive OR of the operands’ signs; the sign of a sum, 1411// or of a difference x−y regarded as a sum x+(−y), differs from at most 1412// one of the addends’ signs; and the sign of the result of conversions, 1413// the quantize operation, the roundToIntegral operations, and the 1414// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1415// These rules shall apply even when operands or results are zero or infinite. 1416// 1417// When the sum of two operands with opposite signs (or the difference of 1418// two operands with like signs) is exactly zero, the sign of that sum (or 1419// difference) shall be +0 in all rounding-direction attributes except 1420// roundTowardNegative; under that attribute, the sign of an exact zero 1421// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same 1422// sign as x even when x is zero. 1423// 1424// See also: https://play.golang.org/p/RtH3UCt5IH 1425 1426// Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1427// it is changed to the larger of x's or y's precision before the operation. 1428// Rounding is performed according to z's precision and rounding mode; and 1429// z's accuracy reports the result error relative to the exact (not rounded) 1430// result. Add panics with ErrNaN if x and y are infinities with opposite 1431// signs. The value of z is undefined in that case. 1432// 1433// BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect. 1434func (z *Float) Add(x, y *Float) *Float { 1435 if debugFloat { 1436 x.validate() 1437 y.validate() 1438 } 1439 1440 if z.prec == 0 { 1441 z.prec = umax32(x.prec, y.prec) 1442 } 1443 1444 if x.form == finite && y.form == finite { 1445 // x + y (common case) 1446 1447 // Below we set z.neg = x.neg, and when z aliases y this will 1448 // change the y operand's sign. This is fine, because if an 1449 // operand aliases the receiver it'll be overwritten, but we still 1450 // want the original x.neg and y.neg values when we evaluate 1451 // x.neg != y.neg, so we need to save y.neg before setting z.neg. 1452 yneg := y.neg 1453 1454 z.neg = x.neg 1455 if x.neg == yneg { 1456 // x + y == x + y 1457 // (-x) + (-y) == -(x + y) 1458 z.uadd(x, y) 1459 } else { 1460 // x + (-y) == x - y == -(y - x) 1461 // (-x) + y == y - x == -(x - y) 1462 if x.ucmp(y) > 0 { 1463 z.usub(x, y) 1464 } else { 1465 z.neg = !z.neg 1466 z.usub(y, x) 1467 } 1468 } 1469 return z 1470 } 1471 1472 if x.form == inf && y.form == inf && x.neg != y.neg { 1473 // +Inf + -Inf 1474 // -Inf + +Inf 1475 // value of z is undefined but make sure it's valid 1476 z.acc = Exact 1477 z.form = zero 1478 z.neg = false 1479 panic(ErrNaN{"addition of infinities with opposite signs"}) 1480 } 1481 1482 if x.form == zero && y.form == zero { 1483 // ±0 + ±0 1484 z.acc = Exact 1485 z.form = zero 1486 z.neg = x.neg && y.neg // -0 + -0 == -0 1487 return z 1488 } 1489 1490 if x.form == inf || y.form == zero { 1491 // ±Inf + y 1492 // x + ±0 1493 return z.Set(x) 1494 } 1495 1496 // ±0 + y 1497 // x + ±Inf 1498 return z.Set(y) 1499} 1500 1501// Sub sets z to the rounded difference x-y and returns z. 1502// Precision, rounding, and accuracy reporting are as for Add. 1503// Sub panics with ErrNaN if x and y are infinities with equal 1504// signs. The value of z is undefined in that case. 1505func (z *Float) Sub(x, y *Float) *Float { 1506 if debugFloat { 1507 x.validate() 1508 y.validate() 1509 } 1510 1511 if z.prec == 0 { 1512 z.prec = umax32(x.prec, y.prec) 1513 } 1514 1515 if x.form == finite && y.form == finite { 1516 // x - y (common case) 1517 yneg := y.neg 1518 z.neg = x.neg 1519 if x.neg != yneg { 1520 // x - (-y) == x + y 1521 // (-x) - y == -(x + y) 1522 z.uadd(x, y) 1523 } else { 1524 // x - y == x - y == -(y - x) 1525 // (-x) - (-y) == y - x == -(x - y) 1526 if x.ucmp(y) > 0 { 1527 z.usub(x, y) 1528 } else { 1529 z.neg = !z.neg 1530 z.usub(y, x) 1531 } 1532 } 1533 return z 1534 } 1535 1536 if x.form == inf && y.form == inf && x.neg == y.neg { 1537 // +Inf - +Inf 1538 // -Inf - -Inf 1539 // value of z is undefined but make sure it's valid 1540 z.acc = Exact 1541 z.form = zero 1542 z.neg = false 1543 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1544 } 1545 1546 if x.form == zero && y.form == zero { 1547 // ±0 - ±0 1548 z.acc = Exact 1549 z.form = zero 1550 z.neg = x.neg && !y.neg // -0 - +0 == -0 1551 return z 1552 } 1553 1554 if x.form == inf || y.form == zero { 1555 // ±Inf - y 1556 // x - ±0 1557 return z.Set(x) 1558 } 1559 1560 // ±0 - y 1561 // x - ±Inf 1562 return z.Neg(y) 1563} 1564 1565// Mul sets z to the rounded product x*y and returns z. 1566// Precision, rounding, and accuracy reporting are as for Add. 1567// Mul panics with ErrNaN if one operand is zero and the other 1568// operand an infinity. The value of z is undefined in that case. 1569func (z *Float) Mul(x, y *Float) *Float { 1570 if debugFloat { 1571 x.validate() 1572 y.validate() 1573 } 1574 1575 if z.prec == 0 { 1576 z.prec = umax32(x.prec, y.prec) 1577 } 1578 1579 z.neg = x.neg != y.neg 1580 1581 if x.form == finite && y.form == finite { 1582 // x * y (common case) 1583 z.umul(x, y) 1584 return z 1585 } 1586 1587 z.acc = Exact 1588 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1589 // ±0 * ±Inf 1590 // ±Inf * ±0 1591 // value of z is undefined but make sure it's valid 1592 z.form = zero 1593 z.neg = false 1594 panic(ErrNaN{"multiplication of zero with infinity"}) 1595 } 1596 1597 if x.form == inf || y.form == inf { 1598 // ±Inf * y 1599 // x * ±Inf 1600 z.form = inf 1601 return z 1602 } 1603 1604 // ±0 * y 1605 // x * ±0 1606 z.form = zero 1607 return z 1608} 1609 1610// Quo sets z to the rounded quotient x/y and returns z. 1611// Precision, rounding, and accuracy reporting are as for Add. 1612// Quo panics with ErrNaN if both operands are zero or infinities. 1613// The value of z is undefined in that case. 1614func (z *Float) Quo(x, y *Float) *Float { 1615 if debugFloat { 1616 x.validate() 1617 y.validate() 1618 } 1619 1620 if z.prec == 0 { 1621 z.prec = umax32(x.prec, y.prec) 1622 } 1623 1624 z.neg = x.neg != y.neg 1625 1626 if x.form == finite && y.form == finite { 1627 // x / y (common case) 1628 z.uquo(x, y) 1629 return z 1630 } 1631 1632 z.acc = Exact 1633 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1634 // ±0 / ±0 1635 // ±Inf / ±Inf 1636 // value of z is undefined but make sure it's valid 1637 z.form = zero 1638 z.neg = false 1639 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1640 } 1641 1642 if x.form == zero || y.form == inf { 1643 // ±0 / y 1644 // x / ±Inf 1645 z.form = zero 1646 return z 1647 } 1648 1649 // x / ±0 1650 // ±Inf / y 1651 z.form = inf 1652 return z 1653} 1654 1655// Cmp compares x and y and returns: 1656// 1657// -1 if x < y 1658// 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1659// +1 if x > y 1660// 1661func (x *Float) Cmp(y *Float) int { 1662 if debugFloat { 1663 x.validate() 1664 y.validate() 1665 } 1666 1667 mx := x.ord() 1668 my := y.ord() 1669 switch { 1670 case mx < my: 1671 return -1 1672 case mx > my: 1673 return +1 1674 } 1675 // mx == my 1676 1677 // only if |mx| == 1 we have to compare the mantissae 1678 switch mx { 1679 case -1: 1680 return y.ucmp(x) 1681 case +1: 1682 return x.ucmp(y) 1683 } 1684 1685 return 0 1686} 1687 1688// ord classifies x and returns: 1689// 1690// -2 if -Inf == x 1691// -1 if -Inf < x < 0 1692// 0 if x == 0 (signed or unsigned) 1693// +1 if 0 < x < +Inf 1694// +2 if x == +Inf 1695// 1696func (x *Float) ord() int { 1697 var m int 1698 switch x.form { 1699 case finite: 1700 m = 1 1701 case zero: 1702 return 0 1703 case inf: 1704 m = 2 1705 } 1706 if x.neg { 1707 m = -m 1708 } 1709 return m 1710} 1711 1712func umax32(x, y uint32) uint32 { 1713 if x > y { 1714 return x 1715 } 1716 return y 1717} 1718