1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file implements multi-precision floating-point numbers.
6// Like in the GNU MPFR library (https://www.mpfr.org/), operands
7// can be of mixed precision. Unlike MPFR, the rounding mode is
8// not specified with each operation, but with each operand. The
9// rounding mode of the result operand determines the rounding
10// mode of an operation. This is a from-scratch implementation.
11
12package big
13
14import (
15	"fmt"
16	"math"
17	"math/bits"
18)
19
20const debugFloat = false // enable for debugging
21
22// A nonzero finite Float represents a multi-precision floating point number
23//
24//   sign × mantissa × 2**exponent
25//
26// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
27// A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
28// All Floats are ordered, and the ordering of two Floats x and y
29// is defined by x.Cmp(y).
30//
31// Each Float value also has a precision, rounding mode, and accuracy.
32// The precision is the maximum number of mantissa bits available to
33// represent the value. The rounding mode specifies how a result should
34// be rounded to fit into the mantissa bits, and accuracy describes the
35// rounding error with respect to the exact result.
36//
37// Unless specified otherwise, all operations (including setters) that
38// specify a *Float variable for the result (usually via the receiver
39// with the exception of MantExp), round the numeric result according
40// to the precision and rounding mode of the result variable.
41//
42// If the provided result precision is 0 (see below), it is set to the
43// precision of the argument with the largest precision value before any
44// rounding takes place, and the rounding mode remains unchanged. Thus,
45// uninitialized Floats provided as result arguments will have their
46// precision set to a reasonable value determined by the operands, and
47// their mode is the zero value for RoundingMode (ToNearestEven).
48//
49// By setting the desired precision to 24 or 53 and using matching rounding
50// mode (typically ToNearestEven), Float operations produce the same results
51// as the corresponding float32 or float64 IEEE-754 arithmetic for operands
52// that correspond to normal (i.e., not denormal) float32 or float64 numbers.
53// Exponent underflow and overflow lead to a 0 or an Infinity for different
54// values than IEEE-754 because Float exponents have a much larger range.
55//
56// The zero (uninitialized) value for a Float is ready to use and represents
57// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
58//
59// Operations always take pointer arguments (*Float) rather
60// than Float values, and each unique Float value requires
61// its own unique *Float pointer. To "copy" a Float value,
62// an existing (or newly allocated) Float must be set to
63// a new value using the Float.Set method; shallow copies
64// of Floats are not supported and may lead to errors.
65type Float struct {
66	prec uint32
67	mode RoundingMode
68	acc  Accuracy
69	form form
70	neg  bool
71	mant nat
72	exp  int32
73}
74
75// An ErrNaN panic is raised by a Float operation that would lead to
76// a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
77type ErrNaN struct {
78	msg string
79}
80
81func (err ErrNaN) Error() string {
82	return err.msg
83}
84
85// NewFloat allocates and returns a new Float set to x,
86// with precision 53 and rounding mode ToNearestEven.
87// NewFloat panics with ErrNaN if x is a NaN.
88func NewFloat(x float64) *Float {
89	if math.IsNaN(x) {
90		panic(ErrNaN{"NewFloat(NaN)"})
91	}
92	return new(Float).SetFloat64(x)
93}
94
95// Exponent and precision limits.
96const (
97	MaxExp  = math.MaxInt32  // largest supported exponent
98	MinExp  = math.MinInt32  // smallest supported exponent
99	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
100)
101
102// Internal representation: The mantissa bits x.mant of a nonzero finite
103// Float x are stored in a nat slice long enough to hold up to x.prec bits;
104// the slice may (but doesn't have to) be shorter if the mantissa contains
105// trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
106// the msb is shifted all the way "to the left"). Thus, if the mantissa has
107// trailing 0 bits or x.prec is not a multiple of the Word size _W,
108// x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
109// to the value 0.5; the exponent x.exp shifts the binary point as needed.
110//
111// A zero or non-finite Float x ignores x.mant and x.exp.
112//
113// x                 form      neg      mant         exp
114// ----------------------------------------------------------
115// ±0                zero      sign     -            -
116// 0 < |x| < +Inf    finite    sign     mantissa     exponent
117// ±Inf              inf       sign     -            -
118
119// A form value describes the internal representation.
120type form byte
121
122// The form value order is relevant - do not change!
123const (
124	zero form = iota
125	finite
126	inf
127)
128
129// RoundingMode determines how a Float value is rounded to the
130// desired precision. Rounding may change the Float value; the
131// rounding error is described by the Float's Accuracy.
132type RoundingMode byte
133
134// These constants define supported rounding modes.
135const (
136	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
137	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
138	ToZero                            // == IEEE 754-2008 roundTowardZero
139	AwayFromZero                      // no IEEE 754-2008 equivalent
140	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
141	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
142)
143
144//go:generate stringer -type=RoundingMode
145
146// Accuracy describes the rounding error produced by the most recent
147// operation that generated a Float value, relative to the exact value.
148type Accuracy int8
149
150// Constants describing the Accuracy of a Float.
151const (
152	Below Accuracy = -1
153	Exact Accuracy = 0
154	Above Accuracy = +1
155)
156
157//go:generate stringer -type=Accuracy
158
159// SetPrec sets z's precision to prec and returns the (possibly) rounded
160// value of z. Rounding occurs according to z's rounding mode if the mantissa
161// cannot be represented in prec bits without loss of precision.
162// SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
163// If prec > MaxPrec, it is set to MaxPrec.
164func (z *Float) SetPrec(prec uint) *Float {
165	z.acc = Exact // optimistically assume no rounding is needed
166
167	// special case
168	if prec == 0 {
169		z.prec = 0
170		if z.form == finite {
171			// truncate z to 0
172			z.acc = makeAcc(z.neg)
173			z.form = zero
174		}
175		return z
176	}
177
178	// general case
179	if prec > MaxPrec {
180		prec = MaxPrec
181	}
182	old := z.prec
183	z.prec = uint32(prec)
184	if z.prec < old {
185		z.round(0)
186	}
187	return z
188}
189
190func makeAcc(above bool) Accuracy {
191	if above {
192		return Above
193	}
194	return Below
195}
196
197// SetMode sets z's rounding mode to mode and returns an exact z.
198// z remains unchanged otherwise.
199// z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
200func (z *Float) SetMode(mode RoundingMode) *Float {
201	z.mode = mode
202	z.acc = Exact
203	return z
204}
205
206// Prec returns the mantissa precision of x in bits.
207// The result may be 0 for |x| == 0 and |x| == Inf.
208func (x *Float) Prec() uint {
209	return uint(x.prec)
210}
211
212// MinPrec returns the minimum precision required to represent x exactly
213// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
214// The result is 0 for |x| == 0 and |x| == Inf.
215func (x *Float) MinPrec() uint {
216	if x.form != finite {
217		return 0
218	}
219	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
220}
221
222// Mode returns the rounding mode of x.
223func (x *Float) Mode() RoundingMode {
224	return x.mode
225}
226
227// Acc returns the accuracy of x produced by the most recent operation.
228func (x *Float) Acc() Accuracy {
229	return x.acc
230}
231
232// Sign returns:
233//
234//	-1 if x <   0
235//	 0 if x is ±0
236//	+1 if x >   0
237//
238func (x *Float) Sign() int {
239	if debugFloat {
240		x.validate()
241	}
242	if x.form == zero {
243		return 0
244	}
245	if x.neg {
246		return -1
247	}
248	return 1
249}
250
251// MantExp breaks x into its mantissa and exponent components
252// and returns the exponent. If a non-nil mant argument is
253// provided its value is set to the mantissa of x, with the
254// same precision and rounding mode as x. The components
255// satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
256// Calling MantExp with a nil argument is an efficient way to
257// get the exponent of the receiver.
258//
259// Special cases are:
260//
261//	(  ±0).MantExp(mant) = 0, with mant set to   ±0
262//	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
263//
264// x and mant may be the same in which case x is set to its
265// mantissa value.
266func (x *Float) MantExp(mant *Float) (exp int) {
267	if debugFloat {
268		x.validate()
269	}
270	if x.form == finite {
271		exp = int(x.exp)
272	}
273	if mant != nil {
274		mant.Copy(x)
275		if mant.form == finite {
276			mant.exp = 0
277		}
278	}
279	return
280}
281
282func (z *Float) setExpAndRound(exp int64, sbit uint) {
283	if exp < MinExp {
284		// underflow
285		z.acc = makeAcc(z.neg)
286		z.form = zero
287		return
288	}
289
290	if exp > MaxExp {
291		// overflow
292		z.acc = makeAcc(!z.neg)
293		z.form = inf
294		return
295	}
296
297	z.form = finite
298	z.exp = int32(exp)
299	z.round(sbit)
300}
301
302// SetMantExp sets z to mant × 2**exp and returns z.
303// The result z has the same precision and rounding mode
304// as mant. SetMantExp is an inverse of MantExp but does
305// not require 0.5 <= |mant| < 1.0. Specifically:
306//
307//	mant := new(Float)
308//	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
309//
310// Special cases are:
311//
312//	z.SetMantExp(  ±0, exp) =   ±0
313//	z.SetMantExp(±Inf, exp) = ±Inf
314//
315// z and mant may be the same in which case z's exponent
316// is set to exp.
317func (z *Float) SetMantExp(mant *Float, exp int) *Float {
318	if debugFloat {
319		z.validate()
320		mant.validate()
321	}
322	z.Copy(mant)
323	if z.form != finite {
324		return z
325	}
326	z.setExpAndRound(int64(z.exp)+int64(exp), 0)
327	return z
328}
329
330// Signbit reports whether x is negative or negative zero.
331func (x *Float) Signbit() bool {
332	return x.neg
333}
334
335// IsInf reports whether x is +Inf or -Inf.
336func (x *Float) IsInf() bool {
337	return x.form == inf
338}
339
340// IsInt reports whether x is an integer.
341// ±Inf values are not integers.
342func (x *Float) IsInt() bool {
343	if debugFloat {
344		x.validate()
345	}
346	// special cases
347	if x.form != finite {
348		return x.form == zero
349	}
350	// x.form == finite
351	if x.exp <= 0 {
352		return false
353	}
354	// x.exp > 0
355	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
356}
357
358// debugging support
359func (x *Float) validate() {
360	if !debugFloat {
361		// avoid performance bugs
362		panic("validate called but debugFloat is not set")
363	}
364	if x.form != finite {
365		return
366	}
367	m := len(x.mant)
368	if m == 0 {
369		panic("nonzero finite number with empty mantissa")
370	}
371	const msb = 1 << (_W - 1)
372	if x.mant[m-1]&msb == 0 {
373		panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
374	}
375	if x.prec == 0 {
376		panic("zero precision finite number")
377	}
378}
379
380// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
381// sbit must be 0 or 1 and summarizes any "sticky bit" information one might
382// have before calling round. z's mantissa must be normalized (with the msb set)
383// or empty.
384//
385// CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
386// sign of z. For correct rounding, the sign of z must be set correctly before
387// calling round.
388func (z *Float) round(sbit uint) {
389	if debugFloat {
390		z.validate()
391	}
392
393	z.acc = Exact
394	if z.form != finite {
395		// ±0 or ±Inf => nothing left to do
396		return
397	}
398	// z.form == finite && len(z.mant) > 0
399	// m > 0 implies z.prec > 0 (checked by validate)
400
401	m := uint32(len(z.mant)) // present mantissa length in words
402	bits := m * _W           // present mantissa bits; bits > 0
403	if bits <= z.prec {
404		// mantissa fits => nothing to do
405		return
406	}
407	// bits > z.prec
408
409	// Rounding is based on two bits: the rounding bit (rbit) and the
410	// sticky bit (sbit). The rbit is the bit immediately before the
411	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
412	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
413	//
414	//   rbit  sbit  => "fractional part"
415	//
416	//   0     0        == 0
417	//   0     1        >  0  , < 0.5
418	//   1     0        == 0.5
419	//   1     1        >  0.5, < 1.0
420
421	// bits > z.prec: mantissa too large => round
422	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
423	rbit := z.mant.bit(r) & 1    // rounding bit; be safe and ensure it's a single bit
424	// The sticky bit is only needed for rounding ToNearestEven
425	// or when the rounding bit is zero. Avoid computation otherwise.
426	if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) {
427		sbit = z.mant.sticky(r)
428	}
429	sbit &= 1 // be safe and ensure it's a single bit
430
431	// cut off extra words
432	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
433	if m > n {
434		copy(z.mant, z.mant[m-n:]) // move n last words to front
435		z.mant = z.mant[:n]
436	}
437
438	// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
439	ntz := n*_W - z.prec // 0 <= ntz < _W
440	lsb := Word(1) << ntz
441
442	// round if result is inexact
443	if rbit|sbit != 0 {
444		// Make rounding decision: The result mantissa is truncated ("rounded down")
445		// by default. Decide if we need to increment, or "round up", the (unsigned)
446		// mantissa.
447		inc := false
448		switch z.mode {
449		case ToNegativeInf:
450			inc = z.neg
451		case ToZero:
452			// nothing to do
453		case ToNearestEven:
454			inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0)
455		case ToNearestAway:
456			inc = rbit != 0
457		case AwayFromZero:
458			inc = true
459		case ToPositiveInf:
460			inc = !z.neg
461		default:
462			panic("unreachable")
463		}
464
465		// A positive result (!z.neg) is Above the exact result if we increment,
466		// and it's Below if we truncate (Exact results require no rounding).
467		// For a negative result (z.neg) it is exactly the opposite.
468		z.acc = makeAcc(inc != z.neg)
469
470		if inc {
471			// add 1 to mantissa
472			if addVW(z.mant, z.mant, lsb) != 0 {
473				// mantissa overflow => adjust exponent
474				if z.exp >= MaxExp {
475					// exponent overflow
476					z.form = inf
477					return
478				}
479				z.exp++
480				// adjust mantissa: divide by 2 to compensate for exponent adjustment
481				shrVU(z.mant, z.mant, 1)
482				// set msb == carry == 1 from the mantissa overflow above
483				const msb = 1 << (_W - 1)
484				z.mant[n-1] |= msb
485			}
486		}
487	}
488
489	// zero out trailing bits in least-significant word
490	z.mant[0] &^= lsb - 1
491
492	if debugFloat {
493		z.validate()
494	}
495}
496
497func (z *Float) setBits64(neg bool, x uint64) *Float {
498	if z.prec == 0 {
499		z.prec = 64
500	}
501	z.acc = Exact
502	z.neg = neg
503	if x == 0 {
504		z.form = zero
505		return z
506	}
507	// x != 0
508	z.form = finite
509	s := bits.LeadingZeros64(x)
510	z.mant = z.mant.setUint64(x << uint(s))
511	z.exp = int32(64 - s) // always fits
512	if z.prec < 64 {
513		z.round(0)
514	}
515	return z
516}
517
518// SetUint64 sets z to the (possibly rounded) value of x and returns z.
519// If z's precision is 0, it is changed to 64 (and rounding will have
520// no effect).
521func (z *Float) SetUint64(x uint64) *Float {
522	return z.setBits64(false, x)
523}
524
525// SetInt64 sets z to the (possibly rounded) value of x and returns z.
526// If z's precision is 0, it is changed to 64 (and rounding will have
527// no effect).
528func (z *Float) SetInt64(x int64) *Float {
529	u := x
530	if u < 0 {
531		u = -u
532	}
533	// We cannot simply call z.SetUint64(uint64(u)) and change
534	// the sign afterwards because the sign affects rounding.
535	return z.setBits64(x < 0, uint64(u))
536}
537
538// SetFloat64 sets z to the (possibly rounded) value of x and returns z.
539// If z's precision is 0, it is changed to 53 (and rounding will have
540// no effect). SetFloat64 panics with ErrNaN if x is a NaN.
541func (z *Float) SetFloat64(x float64) *Float {
542	if z.prec == 0 {
543		z.prec = 53
544	}
545	if math.IsNaN(x) {
546		panic(ErrNaN{"Float.SetFloat64(NaN)"})
547	}
548	z.acc = Exact
549	z.neg = math.Signbit(x) // handle -0, -Inf correctly
550	if x == 0 {
551		z.form = zero
552		return z
553	}
554	if math.IsInf(x, 0) {
555		z.form = inf
556		return z
557	}
558	// normalized x != 0
559	z.form = finite
560	fmant, exp := math.Frexp(x) // get normalized mantissa
561	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
562	z.exp = int32(exp) // always fits
563	if z.prec < 53 {
564		z.round(0)
565	}
566	return z
567}
568
569// fnorm normalizes mantissa m by shifting it to the left
570// such that the msb of the most-significant word (msw) is 1.
571// It returns the shift amount. It assumes that len(m) != 0.
572func fnorm(m nat) int64 {
573	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
574		panic("msw of mantissa is 0")
575	}
576	s := nlz(m[len(m)-1])
577	if s > 0 {
578		c := shlVU(m, m, s)
579		if debugFloat && c != 0 {
580			panic("nlz or shlVU incorrect")
581		}
582	}
583	return int64(s)
584}
585
586// SetInt sets z to the (possibly rounded) value of x and returns z.
587// If z's precision is 0, it is changed to the larger of x.BitLen()
588// or 64 (and rounding will have no effect).
589func (z *Float) SetInt(x *Int) *Float {
590	// TODO(gri) can be more efficient if z.prec > 0
591	// but small compared to the size of x, or if there
592	// are many trailing 0's.
593	bits := uint32(x.BitLen())
594	if z.prec == 0 {
595		z.prec = umax32(bits, 64)
596	}
597	z.acc = Exact
598	z.neg = x.neg
599	if len(x.abs) == 0 {
600		z.form = zero
601		return z
602	}
603	// x != 0
604	z.mant = z.mant.set(x.abs)
605	fnorm(z.mant)
606	z.setExpAndRound(int64(bits), 0)
607	return z
608}
609
610// SetRat sets z to the (possibly rounded) value of x and returns z.
611// If z's precision is 0, it is changed to the largest of a.BitLen(),
612// b.BitLen(), or 64; with x = a/b.
613func (z *Float) SetRat(x *Rat) *Float {
614	if x.IsInt() {
615		return z.SetInt(x.Num())
616	}
617	var a, b Float
618	a.SetInt(x.Num())
619	b.SetInt(x.Denom())
620	if z.prec == 0 {
621		z.prec = umax32(a.prec, b.prec)
622	}
623	return z.Quo(&a, &b)
624}
625
626// SetInf sets z to the infinite Float -Inf if signbit is
627// set, or +Inf if signbit is not set, and returns z. The
628// precision of z is unchanged and the result is always
629// Exact.
630func (z *Float) SetInf(signbit bool) *Float {
631	z.acc = Exact
632	z.form = inf
633	z.neg = signbit
634	return z
635}
636
637// Set sets z to the (possibly rounded) value of x and returns z.
638// If z's precision is 0, it is changed to the precision of x
639// before setting z (and rounding will have no effect).
640// Rounding is performed according to z's precision and rounding
641// mode; and z's accuracy reports the result error relative to the
642// exact (not rounded) result.
643func (z *Float) Set(x *Float) *Float {
644	if debugFloat {
645		x.validate()
646	}
647	z.acc = Exact
648	if z != x {
649		z.form = x.form
650		z.neg = x.neg
651		if x.form == finite {
652			z.exp = x.exp
653			z.mant = z.mant.set(x.mant)
654		}
655		if z.prec == 0 {
656			z.prec = x.prec
657		} else if z.prec < x.prec {
658			z.round(0)
659		}
660	}
661	return z
662}
663
664// Copy sets z to x, with the same precision, rounding mode, and
665// accuracy as x, and returns z. x is not changed even if z and
666// x are the same.
667func (z *Float) Copy(x *Float) *Float {
668	if debugFloat {
669		x.validate()
670	}
671	if z != x {
672		z.prec = x.prec
673		z.mode = x.mode
674		z.acc = x.acc
675		z.form = x.form
676		z.neg = x.neg
677		if z.form == finite {
678			z.mant = z.mant.set(x.mant)
679			z.exp = x.exp
680		}
681	}
682	return z
683}
684
685// msb32 returns the 32 most significant bits of x.
686func msb32(x nat) uint32 {
687	i := len(x) - 1
688	if i < 0 {
689		return 0
690	}
691	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
692		panic("x not normalized")
693	}
694	switch _W {
695	case 32:
696		return uint32(x[i])
697	case 64:
698		return uint32(x[i] >> 32)
699	}
700	panic("unreachable")
701}
702
703// msb64 returns the 64 most significant bits of x.
704func msb64(x nat) uint64 {
705	i := len(x) - 1
706	if i < 0 {
707		return 0
708	}
709	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
710		panic("x not normalized")
711	}
712	switch _W {
713	case 32:
714		v := uint64(x[i]) << 32
715		if i > 0 {
716			v |= uint64(x[i-1])
717		}
718		return v
719	case 64:
720		return uint64(x[i])
721	}
722	panic("unreachable")
723}
724
725// Uint64 returns the unsigned integer resulting from truncating x
726// towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
727// if x is an integer and Below otherwise.
728// The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
729// for x > math.MaxUint64.
730func (x *Float) Uint64() (uint64, Accuracy) {
731	if debugFloat {
732		x.validate()
733	}
734
735	switch x.form {
736	case finite:
737		if x.neg {
738			return 0, Above
739		}
740		// 0 < x < +Inf
741		if x.exp <= 0 {
742			// 0 < x < 1
743			return 0, Below
744		}
745		// 1 <= x < Inf
746		if x.exp <= 64 {
747			// u = trunc(x) fits into a uint64
748			u := msb64(x.mant) >> (64 - uint32(x.exp))
749			if x.MinPrec() <= 64 {
750				return u, Exact
751			}
752			return u, Below // x truncated
753		}
754		// x too large
755		return math.MaxUint64, Below
756
757	case zero:
758		return 0, Exact
759
760	case inf:
761		if x.neg {
762			return 0, Above
763		}
764		return math.MaxUint64, Below
765	}
766
767	panic("unreachable")
768}
769
770// Int64 returns the integer resulting from truncating x towards zero.
771// If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
772// an integer, and Above (x < 0) or Below (x > 0) otherwise.
773// The result is (math.MinInt64, Above) for x < math.MinInt64,
774// and (math.MaxInt64, Below) for x > math.MaxInt64.
775func (x *Float) Int64() (int64, Accuracy) {
776	if debugFloat {
777		x.validate()
778	}
779
780	switch x.form {
781	case finite:
782		// 0 < |x| < +Inf
783		acc := makeAcc(x.neg)
784		if x.exp <= 0 {
785			// 0 < |x| < 1
786			return 0, acc
787		}
788		// x.exp > 0
789
790		// 1 <= |x| < +Inf
791		if x.exp <= 63 {
792			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
793			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
794			if x.neg {
795				i = -i
796			}
797			if x.MinPrec() <= uint(x.exp) {
798				return i, Exact
799			}
800			return i, acc // x truncated
801		}
802		if x.neg {
803			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
804			if x.exp == 64 && x.MinPrec() == 1 {
805				acc = Exact
806			}
807			return math.MinInt64, acc
808		}
809		// x too large
810		return math.MaxInt64, Below
811
812	case zero:
813		return 0, Exact
814
815	case inf:
816		if x.neg {
817			return math.MinInt64, Above
818		}
819		return math.MaxInt64, Below
820	}
821
822	panic("unreachable")
823}
824
825// Float32 returns the float32 value nearest to x. If x is too small to be
826// represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
827// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
828// If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
829// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
830func (x *Float) Float32() (float32, Accuracy) {
831	if debugFloat {
832		x.validate()
833	}
834
835	switch x.form {
836	case finite:
837		// 0 < |x| < +Inf
838
839		const (
840			fbits = 32                //        float size
841			mbits = 23                //        mantissa size (excluding implicit msb)
842			ebits = fbits - mbits - 1 //     8  exponent size
843			bias  = 1<<(ebits-1) - 1  //   127  exponent bias
844			dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
845			emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
846			emax  = bias              //   127  largest unbiased exponent (normal)
847		)
848
849		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
850		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
851
852		// Compute precision p for float32 mantissa.
853		// If the exponent is too small, we have a denormal number before
854		// rounding and fewer than p mantissa bits of precision available
855		// (the exponent remains fixed but the mantissa gets shifted right).
856		p := mbits + 1 // precision of normal float
857		if e < emin {
858			// recompute precision
859			p = mbits + 1 - emin + int(e)
860			// If p == 0, the mantissa of x is shifted so much to the right
861			// that its msb falls immediately to the right of the float32
862			// mantissa space. In other words, if the smallest denormal is
863			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
864			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
865			// If m == 0.5, it is rounded down to even, i.e., 0.0.
866			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
867			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
868				// underflow to ±0
869				if x.neg {
870					var z float32
871					return -z, Above
872				}
873				return 0.0, Below
874			}
875			// otherwise, round up
876			// We handle p == 0 explicitly because it's easy and because
877			// Float.round doesn't support rounding to 0 bits of precision.
878			if p == 0 {
879				if x.neg {
880					return -math.SmallestNonzeroFloat32, Below
881				}
882				return math.SmallestNonzeroFloat32, Above
883			}
884		}
885		// p > 0
886
887		// round
888		var r Float
889		r.prec = uint32(p)
890		r.Set(x)
891		e = r.exp - 1
892
893		// Rounding may have caused r to overflow to ±Inf
894		// (rounding never causes underflows to 0).
895		// If the exponent is too large, also overflow to ±Inf.
896		if r.form == inf || e > emax {
897			// overflow
898			if x.neg {
899				return float32(math.Inf(-1)), Below
900			}
901			return float32(math.Inf(+1)), Above
902		}
903		// e <= emax
904
905		// Determine sign, biased exponent, and mantissa.
906		var sign, bexp, mant uint32
907		if x.neg {
908			sign = 1 << (fbits - 1)
909		}
910
911		// Rounding may have caused a denormal number to
912		// become normal. Check again.
913		if e < emin {
914			// denormal number: recompute precision
915			// Since rounding may have at best increased precision
916			// and we have eliminated p <= 0 early, we know p > 0.
917			// bexp == 0 for denormals
918			p = mbits + 1 - emin + int(e)
919			mant = msb32(r.mant) >> uint(fbits-p)
920		} else {
921			// normal number: emin <= e <= emax
922			bexp = uint32(e+bias) << mbits
923			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
924		}
925
926		return math.Float32frombits(sign | bexp | mant), r.acc
927
928	case zero:
929		if x.neg {
930			var z float32
931			return -z, Exact
932		}
933		return 0.0, Exact
934
935	case inf:
936		if x.neg {
937			return float32(math.Inf(-1)), Exact
938		}
939		return float32(math.Inf(+1)), Exact
940	}
941
942	panic("unreachable")
943}
944
945// Float64 returns the float64 value nearest to x. If x is too small to be
946// represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
947// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
948// If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
949// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
950func (x *Float) Float64() (float64, Accuracy) {
951	if debugFloat {
952		x.validate()
953	}
954
955	switch x.form {
956	case finite:
957		// 0 < |x| < +Inf
958
959		const (
960			fbits = 64                //        float size
961			mbits = 52                //        mantissa size (excluding implicit msb)
962			ebits = fbits - mbits - 1 //    11  exponent size
963			bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
964			dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
965			emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
966			emax  = bias              //  1023  largest unbiased exponent (normal)
967		)
968
969		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
970		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
971
972		// Compute precision p for float64 mantissa.
973		// If the exponent is too small, we have a denormal number before
974		// rounding and fewer than p mantissa bits of precision available
975		// (the exponent remains fixed but the mantissa gets shifted right).
976		p := mbits + 1 // precision of normal float
977		if e < emin {
978			// recompute precision
979			p = mbits + 1 - emin + int(e)
980			// If p == 0, the mantissa of x is shifted so much to the right
981			// that its msb falls immediately to the right of the float64
982			// mantissa space. In other words, if the smallest denormal is
983			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
984			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
985			// If m == 0.5, it is rounded down to even, i.e., 0.0.
986			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
987			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
988				// underflow to ±0
989				if x.neg {
990					var z float64
991					return -z, Above
992				}
993				return 0.0, Below
994			}
995			// otherwise, round up
996			// We handle p == 0 explicitly because it's easy and because
997			// Float.round doesn't support rounding to 0 bits of precision.
998			if p == 0 {
999				if x.neg {
1000					return -math.SmallestNonzeroFloat64, Below
1001				}
1002				return math.SmallestNonzeroFloat64, Above
1003			}
1004		}
1005		// p > 0
1006
1007		// round
1008		var r Float
1009		r.prec = uint32(p)
1010		r.Set(x)
1011		e = r.exp - 1
1012
1013		// Rounding may have caused r to overflow to ±Inf
1014		// (rounding never causes underflows to 0).
1015		// If the exponent is too large, also overflow to ±Inf.
1016		if r.form == inf || e > emax {
1017			// overflow
1018			if x.neg {
1019				return math.Inf(-1), Below
1020			}
1021			return math.Inf(+1), Above
1022		}
1023		// e <= emax
1024
1025		// Determine sign, biased exponent, and mantissa.
1026		var sign, bexp, mant uint64
1027		if x.neg {
1028			sign = 1 << (fbits - 1)
1029		}
1030
1031		// Rounding may have caused a denormal number to
1032		// become normal. Check again.
1033		if e < emin {
1034			// denormal number: recompute precision
1035			// Since rounding may have at best increased precision
1036			// and we have eliminated p <= 0 early, we know p > 0.
1037			// bexp == 0 for denormals
1038			p = mbits + 1 - emin + int(e)
1039			mant = msb64(r.mant) >> uint(fbits-p)
1040		} else {
1041			// normal number: emin <= e <= emax
1042			bexp = uint64(e+bias) << mbits
1043			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
1044		}
1045
1046		return math.Float64frombits(sign | bexp | mant), r.acc
1047
1048	case zero:
1049		if x.neg {
1050			var z float64
1051			return -z, Exact
1052		}
1053		return 0.0, Exact
1054
1055	case inf:
1056		if x.neg {
1057			return math.Inf(-1), Exact
1058		}
1059		return math.Inf(+1), Exact
1060	}
1061
1062	panic("unreachable")
1063}
1064
1065// Int returns the result of truncating x towards zero;
1066// or nil if x is an infinity.
1067// The result is Exact if x.IsInt(); otherwise it is Below
1068// for x > 0, and Above for x < 0.
1069// If a non-nil *Int argument z is provided, Int stores
1070// the result in z instead of allocating a new Int.
1071func (x *Float) Int(z *Int) (*Int, Accuracy) {
1072	if debugFloat {
1073		x.validate()
1074	}
1075
1076	if z == nil && x.form <= finite {
1077		z = new(Int)
1078	}
1079
1080	switch x.form {
1081	case finite:
1082		// 0 < |x| < +Inf
1083		acc := makeAcc(x.neg)
1084		if x.exp <= 0 {
1085			// 0 < |x| < 1
1086			return z.SetInt64(0), acc
1087		}
1088		// x.exp > 0
1089
1090		// 1 <= |x| < +Inf
1091		// determine minimum required precision for x
1092		allBits := uint(len(x.mant)) * _W
1093		exp := uint(x.exp)
1094		if x.MinPrec() <= exp {
1095			acc = Exact
1096		}
1097		// shift mantissa as needed
1098		if z == nil {
1099			z = new(Int)
1100		}
1101		z.neg = x.neg
1102		switch {
1103		case exp > allBits:
1104			z.abs = z.abs.shl(x.mant, exp-allBits)
1105		default:
1106			z.abs = z.abs.set(x.mant)
1107		case exp < allBits:
1108			z.abs = z.abs.shr(x.mant, allBits-exp)
1109		}
1110		return z, acc
1111
1112	case zero:
1113		return z.SetInt64(0), Exact
1114
1115	case inf:
1116		return nil, makeAcc(x.neg)
1117	}
1118
1119	panic("unreachable")
1120}
1121
1122// Rat returns the rational number corresponding to x;
1123// or nil if x is an infinity.
1124// The result is Exact if x is not an Inf.
1125// If a non-nil *Rat argument z is provided, Rat stores
1126// the result in z instead of allocating a new Rat.
1127func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
1128	if debugFloat {
1129		x.validate()
1130	}
1131
1132	if z == nil && x.form <= finite {
1133		z = new(Rat)
1134	}
1135
1136	switch x.form {
1137	case finite:
1138		// 0 < |x| < +Inf
1139		allBits := int32(len(x.mant)) * _W
1140		// build up numerator and denominator
1141		z.a.neg = x.neg
1142		switch {
1143		case x.exp > allBits:
1144			z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
1145			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
1146			// z already in normal form
1147		default:
1148			z.a.abs = z.a.abs.set(x.mant)
1149			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
1150			// z already in normal form
1151		case x.exp < allBits:
1152			z.a.abs = z.a.abs.set(x.mant)
1153			t := z.b.abs.setUint64(1)
1154			z.b.abs = t.shl(t, uint(allBits-x.exp))
1155			z.norm()
1156		}
1157		return z, Exact
1158
1159	case zero:
1160		return z.SetInt64(0), Exact
1161
1162	case inf:
1163		return nil, makeAcc(x.neg)
1164	}
1165
1166	panic("unreachable")
1167}
1168
1169// Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
1170// and returns z.
1171func (z *Float) Abs(x *Float) *Float {
1172	z.Set(x)
1173	z.neg = false
1174	return z
1175}
1176
1177// Neg sets z to the (possibly rounded) value of x with its sign negated,
1178// and returns z.
1179func (z *Float) Neg(x *Float) *Float {
1180	z.Set(x)
1181	z.neg = !z.neg
1182	return z
1183}
1184
1185func validateBinaryOperands(x, y *Float) {
1186	if !debugFloat {
1187		// avoid performance bugs
1188		panic("validateBinaryOperands called but debugFloat is not set")
1189	}
1190	if len(x.mant) == 0 {
1191		panic("empty mantissa for x")
1192	}
1193	if len(y.mant) == 0 {
1194		panic("empty mantissa for y")
1195	}
1196}
1197
1198// z = x + y, ignoring signs of x and y for the addition
1199// but using the sign of z for rounding the result.
1200// x and y must have a non-empty mantissa and valid exponent.
1201func (z *Float) uadd(x, y *Float) {
1202	// Note: This implementation requires 2 shifts most of the
1203	// time. It is also inefficient if exponents or precisions
1204	// differ by wide margins. The following article describes
1205	// an efficient (but much more complicated) implementation
1206	// compatible with the internal representation used here:
1207	//
1208	// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
1209	// Point Addition With Exact Rounding (as in the MPFR Library)"
1210	// http://www.vinc17.net/research/papers/rnc6.pdf
1211
1212	if debugFloat {
1213		validateBinaryOperands(x, y)
1214	}
1215
1216	// compute exponents ex, ey for mantissa with "binary point"
1217	// on the right (mantissa.0) - use int64 to avoid overflow
1218	ex := int64(x.exp) - int64(len(x.mant))*_W
1219	ey := int64(y.exp) - int64(len(y.mant))*_W
1220
1221	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
1222
1223	// TODO(gri) having a combined add-and-shift primitive
1224	//           could make this code significantly faster
1225	switch {
1226	case ex < ey:
1227		if al {
1228			t := nat(nil).shl(y.mant, uint(ey-ex))
1229			z.mant = z.mant.add(x.mant, t)
1230		} else {
1231			z.mant = z.mant.shl(y.mant, uint(ey-ex))
1232			z.mant = z.mant.add(x.mant, z.mant)
1233		}
1234	default:
1235		// ex == ey, no shift needed
1236		z.mant = z.mant.add(x.mant, y.mant)
1237	case ex > ey:
1238		if al {
1239			t := nat(nil).shl(x.mant, uint(ex-ey))
1240			z.mant = z.mant.add(t, y.mant)
1241		} else {
1242			z.mant = z.mant.shl(x.mant, uint(ex-ey))
1243			z.mant = z.mant.add(z.mant, y.mant)
1244		}
1245		ex = ey
1246	}
1247	// len(z.mant) > 0
1248
1249	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
1250}
1251
1252// z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
1253// but using the sign of z for rounding the result.
1254// x and y must have a non-empty mantissa and valid exponent.
1255func (z *Float) usub(x, y *Float) {
1256	// This code is symmetric to uadd.
1257	// We have not factored the common code out because
1258	// eventually uadd (and usub) should be optimized
1259	// by special-casing, and the code will diverge.
1260
1261	if debugFloat {
1262		validateBinaryOperands(x, y)
1263	}
1264
1265	ex := int64(x.exp) - int64(len(x.mant))*_W
1266	ey := int64(y.exp) - int64(len(y.mant))*_W
1267
1268	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
1269
1270	switch {
1271	case ex < ey:
1272		if al {
1273			t := nat(nil).shl(y.mant, uint(ey-ex))
1274			z.mant = t.sub(x.mant, t)
1275		} else {
1276			z.mant = z.mant.shl(y.mant, uint(ey-ex))
1277			z.mant = z.mant.sub(x.mant, z.mant)
1278		}
1279	default:
1280		// ex == ey, no shift needed
1281		z.mant = z.mant.sub(x.mant, y.mant)
1282	case ex > ey:
1283		if al {
1284			t := nat(nil).shl(x.mant, uint(ex-ey))
1285			z.mant = t.sub(t, y.mant)
1286		} else {
1287			z.mant = z.mant.shl(x.mant, uint(ex-ey))
1288			z.mant = z.mant.sub(z.mant, y.mant)
1289		}
1290		ex = ey
1291	}
1292
1293	// operands may have canceled each other out
1294	if len(z.mant) == 0 {
1295		z.acc = Exact
1296		z.form = zero
1297		z.neg = false
1298		return
1299	}
1300	// len(z.mant) > 0
1301
1302	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
1303}
1304
1305// z = x * y, ignoring signs of x and y for the multiplication
1306// but using the sign of z for rounding the result.
1307// x and y must have a non-empty mantissa and valid exponent.
1308func (z *Float) umul(x, y *Float) {
1309	if debugFloat {
1310		validateBinaryOperands(x, y)
1311	}
1312
1313	// Note: This is doing too much work if the precision
1314	// of z is less than the sum of the precisions of x
1315	// and y which is often the case (e.g., if all floats
1316	// have the same precision).
1317	// TODO(gri) Optimize this for the common case.
1318
1319	e := int64(x.exp) + int64(y.exp)
1320	if x == y {
1321		z.mant = z.mant.sqr(x.mant)
1322	} else {
1323		z.mant = z.mant.mul(x.mant, y.mant)
1324	}
1325	z.setExpAndRound(e-fnorm(z.mant), 0)
1326}
1327
1328// z = x / y, ignoring signs of x and y for the division
1329// but using the sign of z for rounding the result.
1330// x and y must have a non-empty mantissa and valid exponent.
1331func (z *Float) uquo(x, y *Float) {
1332	if debugFloat {
1333		validateBinaryOperands(x, y)
1334	}
1335
1336	// mantissa length in words for desired result precision + 1
1337	// (at least one extra bit so we get the rounding bit after
1338	// the division)
1339	n := int(z.prec/_W) + 1
1340
1341	// compute adjusted x.mant such that we get enough result precision
1342	xadj := x.mant
1343	if d := n - len(x.mant) + len(y.mant); d > 0 {
1344		// d extra words needed => add d "0 digits" to x
1345		xadj = make(nat, len(x.mant)+d)
1346		copy(xadj[d:], x.mant)
1347	}
1348	// TODO(gri): If we have too many digits (d < 0), we should be able
1349	// to shorten x for faster division. But we must be extra careful
1350	// with rounding in that case.
1351
1352	// Compute d before division since there may be aliasing of x.mant
1353	// (via xadj) or y.mant with z.mant.
1354	d := len(xadj) - len(y.mant)
1355
1356	// divide
1357	var r nat
1358	z.mant, r = z.mant.div(nil, xadj, y.mant)
1359	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
1360
1361	// The result is long enough to include (at least) the rounding bit.
1362	// If there's a non-zero remainder, the corresponding fractional part
1363	// (if it were computed), would have a non-zero sticky bit (if it were
1364	// zero, it couldn't have a non-zero remainder).
1365	var sbit uint
1366	if len(r) > 0 {
1367		sbit = 1
1368	}
1369
1370	z.setExpAndRound(e-fnorm(z.mant), sbit)
1371}
1372
1373// ucmp returns -1, 0, or +1, depending on whether
1374// |x| < |y|, |x| == |y|, or |x| > |y|.
1375// x and y must have a non-empty mantissa and valid exponent.
1376func (x *Float) ucmp(y *Float) int {
1377	if debugFloat {
1378		validateBinaryOperands(x, y)
1379	}
1380
1381	switch {
1382	case x.exp < y.exp:
1383		return -1
1384	case x.exp > y.exp:
1385		return +1
1386	}
1387	// x.exp == y.exp
1388
1389	// compare mantissas
1390	i := len(x.mant)
1391	j := len(y.mant)
1392	for i > 0 || j > 0 {
1393		var xm, ym Word
1394		if i > 0 {
1395			i--
1396			xm = x.mant[i]
1397		}
1398		if j > 0 {
1399			j--
1400			ym = y.mant[j]
1401		}
1402		switch {
1403		case xm < ym:
1404			return -1
1405		case xm > ym:
1406			return +1
1407		}
1408	}
1409
1410	return 0
1411}
1412
1413// Handling of sign bit as defined by IEEE 754-2008, section 6.3:
1414//
1415// When neither the inputs nor result are NaN, the sign of a product or
1416// quotient is the exclusive OR of the operands’ signs; the sign of a sum,
1417// or of a difference x−y regarded as a sum x+(−y), differs from at most
1418// one of the addends’ signs; and the sign of the result of conversions,
1419// the quantize operation, the roundToIntegral operations, and the
1420// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
1421// These rules shall apply even when operands or results are zero or infinite.
1422//
1423// When the sum of two operands with opposite signs (or the difference of
1424// two operands with like signs) is exactly zero, the sign of that sum (or
1425// difference) shall be +0 in all rounding-direction attributes except
1426// roundTowardNegative; under that attribute, the sign of an exact zero
1427// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
1428// sign as x even when x is zero.
1429//
1430// See also: https://play.golang.org/p/RtH3UCt5IH
1431
1432// Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
1433// it is changed to the larger of x's or y's precision before the operation.
1434// Rounding is performed according to z's precision and rounding mode; and
1435// z's accuracy reports the result error relative to the exact (not rounded)
1436// result. Add panics with ErrNaN if x and y are infinities with opposite
1437// signs. The value of z is undefined in that case.
1438func (z *Float) Add(x, y *Float) *Float {
1439	if debugFloat {
1440		x.validate()
1441		y.validate()
1442	}
1443
1444	if z.prec == 0 {
1445		z.prec = umax32(x.prec, y.prec)
1446	}
1447
1448	if x.form == finite && y.form == finite {
1449		// x + y (common case)
1450
1451		// Below we set z.neg = x.neg, and when z aliases y this will
1452		// change the y operand's sign. This is fine, because if an
1453		// operand aliases the receiver it'll be overwritten, but we still
1454		// want the original x.neg and y.neg values when we evaluate
1455		// x.neg != y.neg, so we need to save y.neg before setting z.neg.
1456		yneg := y.neg
1457
1458		z.neg = x.neg
1459		if x.neg == yneg {
1460			// x + y == x + y
1461			// (-x) + (-y) == -(x + y)
1462			z.uadd(x, y)
1463		} else {
1464			// x + (-y) == x - y == -(y - x)
1465			// (-x) + y == y - x == -(x - y)
1466			if x.ucmp(y) > 0 {
1467				z.usub(x, y)
1468			} else {
1469				z.neg = !z.neg
1470				z.usub(y, x)
1471			}
1472		}
1473		if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
1474			z.neg = true
1475		}
1476		return z
1477	}
1478
1479	if x.form == inf && y.form == inf && x.neg != y.neg {
1480		// +Inf + -Inf
1481		// -Inf + +Inf
1482		// value of z is undefined but make sure it's valid
1483		z.acc = Exact
1484		z.form = zero
1485		z.neg = false
1486		panic(ErrNaN{"addition of infinities with opposite signs"})
1487	}
1488
1489	if x.form == zero && y.form == zero {
1490		// ±0 + ±0
1491		z.acc = Exact
1492		z.form = zero
1493		z.neg = x.neg && y.neg // -0 + -0 == -0
1494		return z
1495	}
1496
1497	if x.form == inf || y.form == zero {
1498		// ±Inf + y
1499		// x + ±0
1500		return z.Set(x)
1501	}
1502
1503	// ±0 + y
1504	// x + ±Inf
1505	return z.Set(y)
1506}
1507
1508// Sub sets z to the rounded difference x-y and returns z.
1509// Precision, rounding, and accuracy reporting are as for Add.
1510// Sub panics with ErrNaN if x and y are infinities with equal
1511// signs. The value of z is undefined in that case.
1512func (z *Float) Sub(x, y *Float) *Float {
1513	if debugFloat {
1514		x.validate()
1515		y.validate()
1516	}
1517
1518	if z.prec == 0 {
1519		z.prec = umax32(x.prec, y.prec)
1520	}
1521
1522	if x.form == finite && y.form == finite {
1523		// x - y (common case)
1524		yneg := y.neg
1525		z.neg = x.neg
1526		if x.neg != yneg {
1527			// x - (-y) == x + y
1528			// (-x) - y == -(x + y)
1529			z.uadd(x, y)
1530		} else {
1531			// x - y == x - y == -(y - x)
1532			// (-x) - (-y) == y - x == -(x - y)
1533			if x.ucmp(y) > 0 {
1534				z.usub(x, y)
1535			} else {
1536				z.neg = !z.neg
1537				z.usub(y, x)
1538			}
1539		}
1540		if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
1541			z.neg = true
1542		}
1543		return z
1544	}
1545
1546	if x.form == inf && y.form == inf && x.neg == y.neg {
1547		// +Inf - +Inf
1548		// -Inf - -Inf
1549		// value of z is undefined but make sure it's valid
1550		z.acc = Exact
1551		z.form = zero
1552		z.neg = false
1553		panic(ErrNaN{"subtraction of infinities with equal signs"})
1554	}
1555
1556	if x.form == zero && y.form == zero {
1557		// ±0 - ±0
1558		z.acc = Exact
1559		z.form = zero
1560		z.neg = x.neg && !y.neg // -0 - +0 == -0
1561		return z
1562	}
1563
1564	if x.form == inf || y.form == zero {
1565		// ±Inf - y
1566		// x - ±0
1567		return z.Set(x)
1568	}
1569
1570	// ±0 - y
1571	// x - ±Inf
1572	return z.Neg(y)
1573}
1574
1575// Mul sets z to the rounded product x*y and returns z.
1576// Precision, rounding, and accuracy reporting are as for Add.
1577// Mul panics with ErrNaN if one operand is zero and the other
1578// operand an infinity. The value of z is undefined in that case.
1579func (z *Float) Mul(x, y *Float) *Float {
1580	if debugFloat {
1581		x.validate()
1582		y.validate()
1583	}
1584
1585	if z.prec == 0 {
1586		z.prec = umax32(x.prec, y.prec)
1587	}
1588
1589	z.neg = x.neg != y.neg
1590
1591	if x.form == finite && y.form == finite {
1592		// x * y (common case)
1593		z.umul(x, y)
1594		return z
1595	}
1596
1597	z.acc = Exact
1598	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
1599		// ±0 * ±Inf
1600		// ±Inf * ±0
1601		// value of z is undefined but make sure it's valid
1602		z.form = zero
1603		z.neg = false
1604		panic(ErrNaN{"multiplication of zero with infinity"})
1605	}
1606
1607	if x.form == inf || y.form == inf {
1608		// ±Inf * y
1609		// x * ±Inf
1610		z.form = inf
1611		return z
1612	}
1613
1614	// ±0 * y
1615	// x * ±0
1616	z.form = zero
1617	return z
1618}
1619
1620// Quo sets z to the rounded quotient x/y and returns z.
1621// Precision, rounding, and accuracy reporting are as for Add.
1622// Quo panics with ErrNaN if both operands are zero or infinities.
1623// The value of z is undefined in that case.
1624func (z *Float) Quo(x, y *Float) *Float {
1625	if debugFloat {
1626		x.validate()
1627		y.validate()
1628	}
1629
1630	if z.prec == 0 {
1631		z.prec = umax32(x.prec, y.prec)
1632	}
1633
1634	z.neg = x.neg != y.neg
1635
1636	if x.form == finite && y.form == finite {
1637		// x / y (common case)
1638		z.uquo(x, y)
1639		return z
1640	}
1641
1642	z.acc = Exact
1643	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
1644		// ±0 / ±0
1645		// ±Inf / ±Inf
1646		// value of z is undefined but make sure it's valid
1647		z.form = zero
1648		z.neg = false
1649		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
1650	}
1651
1652	if x.form == zero || y.form == inf {
1653		// ±0 / y
1654		// x / ±Inf
1655		z.form = zero
1656		return z
1657	}
1658
1659	// x / ±0
1660	// ±Inf / y
1661	z.form = inf
1662	return z
1663}
1664
1665// Cmp compares x and y and returns:
1666//
1667//   -1 if x <  y
1668//    0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
1669//   +1 if x >  y
1670//
1671func (x *Float) Cmp(y *Float) int {
1672	if debugFloat {
1673		x.validate()
1674		y.validate()
1675	}
1676
1677	mx := x.ord()
1678	my := y.ord()
1679	switch {
1680	case mx < my:
1681		return -1
1682	case mx > my:
1683		return +1
1684	}
1685	// mx == my
1686
1687	// only if |mx| == 1 we have to compare the mantissae
1688	switch mx {
1689	case -1:
1690		return y.ucmp(x)
1691	case +1:
1692		return x.ucmp(y)
1693	}
1694
1695	return 0
1696}
1697
1698// ord classifies x and returns:
1699//
1700//	-2 if -Inf == x
1701//	-1 if -Inf < x < 0
1702//	 0 if x == 0 (signed or unsigned)
1703//	+1 if 0 < x < +Inf
1704//	+2 if x == +Inf
1705//
1706func (x *Float) ord() int {
1707	var m int
1708	switch x.form {
1709	case finite:
1710		m = 1
1711	case zero:
1712		return 0
1713	case inf:
1714		m = 2
1715	}
1716	if x.neg {
1717		m = -m
1718	}
1719	return m
1720}
1721
1722func umax32(x, y uint32) uint32 {
1723	if x > y {
1724		return x
1725	}
1726	return y
1727}
1728