1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file implements multi-precision floating-point numbers.
6// Like in the GNU MPFR library (http://www.mpfr.org/), operands
7// can be of mixed precision. Unlike MPFR, the rounding mode is
8// not specified with each operation, but with each operand. The
9// rounding mode of the result operand determines the rounding
10// mode of an operation. This is a from-scratch implementation.
11
12package big
13
14import (
15	"fmt"
16	"math"
17)
18
19const debugFloat = false // enable for debugging
20
21// A nonzero finite Float represents a multi-precision floating point number
22//
23//   sign × mantissa × 2**exponent
24//
25// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
26// A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
27// All Floats are ordered, and the ordering of two Floats x and y
28// is defined by x.Cmp(y).
29//
30// Each Float value also has a precision, rounding mode, and accuracy.
31// The precision is the maximum number of mantissa bits available to
32// represent the value. The rounding mode specifies how a result should
33// be rounded to fit into the mantissa bits, and accuracy describes the
34// rounding error with respect to the exact result.
35//
36// Unless specified otherwise, all operations (including setters) that
37// specify a *Float variable for the result (usually via the receiver
38// with the exception of MantExp), round the numeric result according
39// to the precision and rounding mode of the result variable.
40//
41// If the provided result precision is 0 (see below), it is set to the
42// precision of the argument with the largest precision value before any
43// rounding takes place, and the rounding mode remains unchanged. Thus,
44// uninitialized Floats provided as result arguments will have their
45// precision set to a reasonable value determined by the operands and
46// their mode is the zero value for RoundingMode (ToNearestEven).
47//
48// By setting the desired precision to 24 or 53 and using matching rounding
49// mode (typically ToNearestEven), Float operations produce the same results
50// as the corresponding float32 or float64 IEEE-754 arithmetic for operands
51// that correspond to normal (i.e., not denormal) float32 or float64 numbers.
52// Exponent underflow and overflow lead to a 0 or an Infinity for different
53// values than IEEE-754 because Float exponents have a much larger range.
54//
55// The zero (uninitialized) value for a Float is ready to use and represents
56// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
57//
58type Float struct {
59	prec uint32
60	mode RoundingMode
61	acc  Accuracy
62	form form
63	neg  bool
64	mant nat
65	exp  int32
66}
67
68// An ErrNaN panic is raised by a Float operation that would lead to
69// a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
70type ErrNaN struct {
71	msg string
72}
73
74func (err ErrNaN) Error() string {
75	return err.msg
76}
77
78// NewFloat allocates and returns a new Float set to x,
79// with precision 53 and rounding mode ToNearestEven.
80// NewFloat panics with ErrNaN if x is a NaN.
81func NewFloat(x float64) *Float {
82	if math.IsNaN(x) {
83		panic(ErrNaN{"NewFloat(NaN)"})
84	}
85	return new(Float).SetFloat64(x)
86}
87
88// Exponent and precision limits.
89const (
90	MaxExp  = math.MaxInt32  // largest supported exponent
91	MinExp  = math.MinInt32  // smallest supported exponent
92	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
93)
94
95// Internal representation: The mantissa bits x.mant of a nonzero finite
96// Float x are stored in a nat slice long enough to hold up to x.prec bits;
97// the slice may (but doesn't have to) be shorter if the mantissa contains
98// trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
99// the msb is shifted all the way "to the left"). Thus, if the mantissa has
100// trailing 0 bits or x.prec is not a multiple of the the Word size _W,
101// x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
102// to the value 0.5; the exponent x.exp shifts the binary point as needed.
103//
104// A zero or non-finite Float x ignores x.mant and x.exp.
105//
106// x                 form      neg      mant         exp
107// ----------------------------------------------------------
108// ±0                zero      sign     -            -
109// 0 < |x| < +Inf    finite    sign     mantissa     exponent
110// ±Inf              inf       sign     -            -
111
112// A form value describes the internal representation.
113type form byte
114
115// The form value order is relevant - do not change!
116const (
117	zero form = iota
118	finite
119	inf
120)
121
122// RoundingMode determines how a Float value is rounded to the
123// desired precision. Rounding may change the Float value; the
124// rounding error is described by the Float's Accuracy.
125type RoundingMode byte
126
127// These constants define supported rounding modes.
128const (
129	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
130	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
131	ToZero                            // == IEEE 754-2008 roundTowardZero
132	AwayFromZero                      // no IEEE 754-2008 equivalent
133	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
134	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
135)
136
137//go:generate stringer -type=RoundingMode
138
139// Accuracy describes the rounding error produced by the most recent
140// operation that generated a Float value, relative to the exact value.
141type Accuracy int8
142
143// Constants describing the Accuracy of a Float.
144const (
145	Below Accuracy = -1
146	Exact Accuracy = 0
147	Above Accuracy = +1
148)
149
150//go:generate stringer -type=Accuracy
151
152// SetPrec sets z's precision to prec and returns the (possibly) rounded
153// value of z. Rounding occurs according to z's rounding mode if the mantissa
154// cannot be represented in prec bits without loss of precision.
155// SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
156// If prec > MaxPrec, it is set to MaxPrec.
157func (z *Float) SetPrec(prec uint) *Float {
158	z.acc = Exact // optimistically assume no rounding is needed
159
160	// special case
161	if prec == 0 {
162		z.prec = 0
163		if z.form == finite {
164			// truncate z to 0
165			z.acc = makeAcc(z.neg)
166			z.form = zero
167		}
168		return z
169	}
170
171	// general case
172	if prec > MaxPrec {
173		prec = MaxPrec
174	}
175	old := z.prec
176	z.prec = uint32(prec)
177	if z.prec < old {
178		z.round(0)
179	}
180	return z
181}
182
183func makeAcc(above bool) Accuracy {
184	if above {
185		return Above
186	}
187	return Below
188}
189
190// SetMode sets z's rounding mode to mode and returns an exact z.
191// z remains unchanged otherwise.
192// z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
193func (z *Float) SetMode(mode RoundingMode) *Float {
194	z.mode = mode
195	z.acc = Exact
196	return z
197}
198
199// Prec returns the mantissa precision of x in bits.
200// The result may be 0 for |x| == 0 and |x| == Inf.
201func (x *Float) Prec() uint {
202	return uint(x.prec)
203}
204
205// MinPrec returns the minimum precision required to represent x exactly
206// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
207// The result is 0 for |x| == 0 and |x| == Inf.
208func (x *Float) MinPrec() uint {
209	if x.form != finite {
210		return 0
211	}
212	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
213}
214
215// Mode returns the rounding mode of x.
216func (x *Float) Mode() RoundingMode {
217	return x.mode
218}
219
220// Acc returns the accuracy of x produced by the most recent operation.
221func (x *Float) Acc() Accuracy {
222	return x.acc
223}
224
225// Sign returns:
226//
227//	-1 if x <   0
228//	 0 if x is ±0
229//	+1 if x >   0
230//
231func (x *Float) Sign() int {
232	if debugFloat {
233		x.validate()
234	}
235	if x.form == zero {
236		return 0
237	}
238	if x.neg {
239		return -1
240	}
241	return 1
242}
243
244// MantExp breaks x into its mantissa and exponent components
245// and returns the exponent. If a non-nil mant argument is
246// provided its value is set to the mantissa of x, with the
247// same precision and rounding mode as x. The components
248// satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
249// Calling MantExp with a nil argument is an efficient way to
250// get the exponent of the receiver.
251//
252// Special cases are:
253//
254//	(  ±0).MantExp(mant) = 0, with mant set to   ±0
255//	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
256//
257// x and mant may be the same in which case x is set to its
258// mantissa value.
259func (x *Float) MantExp(mant *Float) (exp int) {
260	if debugFloat {
261		x.validate()
262	}
263	if x.form == finite {
264		exp = int(x.exp)
265	}
266	if mant != nil {
267		mant.Copy(x)
268		if mant.form == finite {
269			mant.exp = 0
270		}
271	}
272	return
273}
274
275func (z *Float) setExpAndRound(exp int64, sbit uint) {
276	if exp < MinExp {
277		// underflow
278		z.acc = makeAcc(z.neg)
279		z.form = zero
280		return
281	}
282
283	if exp > MaxExp {
284		// overflow
285		z.acc = makeAcc(!z.neg)
286		z.form = inf
287		return
288	}
289
290	z.form = finite
291	z.exp = int32(exp)
292	z.round(sbit)
293}
294
295// SetMantExp sets z to mant × 2**exp and and returns z.
296// The result z has the same precision and rounding mode
297// as mant. SetMantExp is an inverse of MantExp but does
298// not require 0.5 <= |mant| < 1.0. Specifically:
299//
300//	mant := new(Float)
301//	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
302//
303// Special cases are:
304//
305//	z.SetMantExp(  ±0, exp) =   ±0
306//	z.SetMantExp(±Inf, exp) = ±Inf
307//
308// z and mant may be the same in which case z's exponent
309// is set to exp.
310func (z *Float) SetMantExp(mant *Float, exp int) *Float {
311	if debugFloat {
312		z.validate()
313		mant.validate()
314	}
315	z.Copy(mant)
316	if z.form != finite {
317		return z
318	}
319	z.setExpAndRound(int64(z.exp)+int64(exp), 0)
320	return z
321}
322
323// Signbit returns true if x is negative or negative zero.
324func (x *Float) Signbit() bool {
325	return x.neg
326}
327
328// IsInf reports whether x is +Inf or -Inf.
329func (x *Float) IsInf() bool {
330	return x.form == inf
331}
332
333// IsInt reports whether x is an integer.
334// ±Inf values are not integers.
335func (x *Float) IsInt() bool {
336	if debugFloat {
337		x.validate()
338	}
339	// special cases
340	if x.form != finite {
341		return x.form == zero
342	}
343	// x.form == finite
344	if x.exp <= 0 {
345		return false
346	}
347	// x.exp > 0
348	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
349}
350
351// debugging support
352func (x *Float) validate() {
353	if !debugFloat {
354		// avoid performance bugs
355		panic("validate called but debugFloat is not set")
356	}
357	if x.form != finite {
358		return
359	}
360	m := len(x.mant)
361	if m == 0 {
362		panic("nonzero finite number with empty mantissa")
363	}
364	const msb = 1 << (_W - 1)
365	if x.mant[m-1]&msb == 0 {
366		panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
367	}
368	if x.prec == 0 {
369		panic("zero precision finite number")
370	}
371}
372
373// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
374// sbit must be 0 or 1 and summarizes any "sticky bit" information one might
375// have before calling round. z's mantissa must be normalized (with the msb set)
376// or empty.
377//
378// CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
379// sign of z. For correct rounding, the sign of z must be set correctly before
380// calling round.
381func (z *Float) round(sbit uint) {
382	if debugFloat {
383		z.validate()
384	}
385
386	z.acc = Exact
387	if z.form != finite {
388		// ±0 or ±Inf => nothing left to do
389		return
390	}
391	// z.form == finite && len(z.mant) > 0
392	// m > 0 implies z.prec > 0 (checked by validate)
393
394	m := uint32(len(z.mant)) // present mantissa length in words
395	bits := m * _W           // present mantissa bits
396	if bits <= z.prec {
397		// mantissa fits => nothing to do
398		return
399	}
400	// bits > z.prec
401
402	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
403
404	// Rounding is based on two bits: the rounding bit (rbit) and the
405	// sticky bit (sbit). The rbit is the bit immediately before the
406	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
407	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
408	//
409	//   rbit  sbit  => "fractional part"
410	//
411	//   0     0        == 0
412	//   0     1        >  0  , < 0.5
413	//   1     0        == 0.5
414	//   1     1        >  0.5, < 1.0
415
416	// bits > z.prec: mantissa too large => round
417	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
418	rbit := z.mant.bit(r)        // rounding bit
419	if sbit == 0 {
420		sbit = z.mant.sticky(r)
421	}
422	if debugFloat && sbit&^1 != 0 {
423		panic(fmt.Sprintf("invalid sbit %#x", sbit))
424	}
425
426	// convert ToXInf rounding modes
427	mode := z.mode
428	switch mode {
429	case ToNegativeInf:
430		mode = ToZero
431		if z.neg {
432			mode = AwayFromZero
433		}
434	case ToPositiveInf:
435		mode = AwayFromZero
436		if z.neg {
437			mode = ToZero
438		}
439	}
440
441	// cut off extra words
442	if m > n {
443		copy(z.mant, z.mant[m-n:]) // move n last words to front
444		z.mant = z.mant[:n]
445	}
446
447	// determine number of trailing zero bits t
448	t := n*_W - z.prec // 0 <= t < _W
449	lsb := Word(1) << t
450
451	// make rounding decision
452	// TODO(gri) This can be simplified (see Bits.round in bits_test.go).
453	switch mode {
454	case ToZero:
455		// nothing to do
456	case ToNearestEven, ToNearestAway:
457		if rbit == 0 {
458			// rounding bits == 0b0x
459			mode = ToZero
460		} else if sbit == 1 {
461			// rounding bits == 0b11
462			mode = AwayFromZero
463		}
464	case AwayFromZero:
465		if rbit|sbit == 0 {
466			mode = ToZero
467		}
468	default:
469		// ToXInf modes have been converted to ToZero or AwayFromZero
470		panic("unreachable")
471	}
472
473	// round and determine accuracy
474	switch mode {
475	case ToZero:
476		if rbit|sbit != 0 {
477			z.acc = Below
478		}
479
480	case ToNearestEven, ToNearestAway:
481		if debugFloat && rbit != 1 {
482			panic("internal error in rounding")
483		}
484		if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 {
485			z.acc = Below
486			break
487		}
488		// mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0
489		fallthrough
490
491	case AwayFromZero:
492		// add 1 to mantissa
493		if addVW(z.mant, z.mant, lsb) != 0 {
494			// overflow => shift mantissa right by 1 and add msb
495			shrVU(z.mant, z.mant, 1)
496			z.mant[n-1] |= 1 << (_W - 1)
497			// adjust exponent
498			if z.exp < MaxExp {
499				z.exp++
500			} else {
501				// exponent overflow
502				z.acc = makeAcc(!z.neg)
503				z.form = inf
504				return
505			}
506		}
507		z.acc = Above
508	}
509
510	// zero out trailing bits in least-significant word
511	z.mant[0] &^= lsb - 1
512
513	// update accuracy
514	if z.acc != Exact && z.neg {
515		z.acc = -z.acc
516	}
517
518	if debugFloat {
519		z.validate()
520	}
521
522	return
523}
524
525func (z *Float) setBits64(neg bool, x uint64) *Float {
526	if z.prec == 0 {
527		z.prec = 64
528	}
529	z.acc = Exact
530	z.neg = neg
531	if x == 0 {
532		z.form = zero
533		return z
534	}
535	// x != 0
536	z.form = finite
537	s := nlz64(x)
538	z.mant = z.mant.setUint64(x << s)
539	z.exp = int32(64 - s) // always fits
540	if z.prec < 64 {
541		z.round(0)
542	}
543	return z
544}
545
546// SetUint64 sets z to the (possibly rounded) value of x and returns z.
547// If z's precision is 0, it is changed to 64 (and rounding will have
548// no effect).
549func (z *Float) SetUint64(x uint64) *Float {
550	return z.setBits64(false, x)
551}
552
553// SetInt64 sets z to the (possibly rounded) value of x and returns z.
554// If z's precision is 0, it is changed to 64 (and rounding will have
555// no effect).
556func (z *Float) SetInt64(x int64) *Float {
557	u := x
558	if u < 0 {
559		u = -u
560	}
561	// We cannot simply call z.SetUint64(uint64(u)) and change
562	// the sign afterwards because the sign affects rounding.
563	return z.setBits64(x < 0, uint64(u))
564}
565
566// SetFloat64 sets z to the (possibly rounded) value of x and returns z.
567// If z's precision is 0, it is changed to 53 (and rounding will have
568// no effect). SetFloat64 panics with ErrNaN if x is a NaN.
569func (z *Float) SetFloat64(x float64) *Float {
570	if z.prec == 0 {
571		z.prec = 53
572	}
573	if math.IsNaN(x) {
574		panic(ErrNaN{"Float.SetFloat64(NaN)"})
575	}
576	z.acc = Exact
577	z.neg = math.Signbit(x) // handle -0, -Inf correctly
578	if x == 0 {
579		z.form = zero
580		return z
581	}
582	if math.IsInf(x, 0) {
583		z.form = inf
584		return z
585	}
586	// normalized x != 0
587	z.form = finite
588	fmant, exp := math.Frexp(x) // get normalized mantissa
589	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
590	z.exp = int32(exp) // always fits
591	if z.prec < 53 {
592		z.round(0)
593	}
594	return z
595}
596
597// fnorm normalizes mantissa m by shifting it to the left
598// such that the msb of the most-significant word (msw) is 1.
599// It returns the shift amount. It assumes that len(m) != 0.
600func fnorm(m nat) int64 {
601	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
602		panic("msw of mantissa is 0")
603	}
604	s := nlz(m[len(m)-1])
605	if s > 0 {
606		c := shlVU(m, m, s)
607		if debugFloat && c != 0 {
608			panic("nlz or shlVU incorrect")
609		}
610	}
611	return int64(s)
612}
613
614// SetInt sets z to the (possibly rounded) value of x and returns z.
615// If z's precision is 0, it is changed to the larger of x.BitLen()
616// or 64 (and rounding will have no effect).
617func (z *Float) SetInt(x *Int) *Float {
618	// TODO(gri) can be more efficient if z.prec > 0
619	// but small compared to the size of x, or if there
620	// are many trailing 0's.
621	bits := uint32(x.BitLen())
622	if z.prec == 0 {
623		z.prec = umax32(bits, 64)
624	}
625	z.acc = Exact
626	z.neg = x.neg
627	if len(x.abs) == 0 {
628		z.form = zero
629		return z
630	}
631	// x != 0
632	z.mant = z.mant.set(x.abs)
633	fnorm(z.mant)
634	z.setExpAndRound(int64(bits), 0)
635	return z
636}
637
638// SetRat sets z to the (possibly rounded) value of x and returns z.
639// If z's precision is 0, it is changed to the largest of a.BitLen(),
640// b.BitLen(), or 64; with x = a/b.
641func (z *Float) SetRat(x *Rat) *Float {
642	if x.IsInt() {
643		return z.SetInt(x.Num())
644	}
645	var a, b Float
646	a.SetInt(x.Num())
647	b.SetInt(x.Denom())
648	if z.prec == 0 {
649		z.prec = umax32(a.prec, b.prec)
650	}
651	return z.Quo(&a, &b)
652}
653
654// SetInf sets z to the infinite Float -Inf if signbit is
655// set, or +Inf if signbit is not set, and returns z. The
656// precision of z is unchanged and the result is always
657// Exact.
658func (z *Float) SetInf(signbit bool) *Float {
659	z.acc = Exact
660	z.form = inf
661	z.neg = signbit
662	return z
663}
664
665// Set sets z to the (possibly rounded) value of x and returns z.
666// If z's precision is 0, it is changed to the precision of x
667// before setting z (and rounding will have no effect).
668// Rounding is performed according to z's precision and rounding
669// mode; and z's accuracy reports the result error relative to the
670// exact (not rounded) result.
671func (z *Float) Set(x *Float) *Float {
672	if debugFloat {
673		x.validate()
674	}
675	z.acc = Exact
676	if z != x {
677		z.form = x.form
678		z.neg = x.neg
679		if x.form == finite {
680			z.exp = x.exp
681			z.mant = z.mant.set(x.mant)
682		}
683		if z.prec == 0 {
684			z.prec = x.prec
685		} else if z.prec < x.prec {
686			z.round(0)
687		}
688	}
689	return z
690}
691
692// Copy sets z to x, with the same precision, rounding mode, and
693// accuracy as x, and returns z. x is not changed even if z and
694// x are the same.
695func (z *Float) Copy(x *Float) *Float {
696	if debugFloat {
697		x.validate()
698	}
699	if z != x {
700		z.prec = x.prec
701		z.mode = x.mode
702		z.acc = x.acc
703		z.form = x.form
704		z.neg = x.neg
705		if z.form == finite {
706			z.mant = z.mant.set(x.mant)
707			z.exp = x.exp
708		}
709	}
710	return z
711}
712
713// msb32 returns the 32 most significant bits of x.
714func msb32(x nat) uint32 {
715	i := len(x) - 1
716	if i < 0 {
717		return 0
718	}
719	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
720		panic("x not normalized")
721	}
722	switch _W {
723	case 32:
724		return uint32(x[i])
725	case 64:
726		return uint32(x[i] >> 32)
727	}
728	panic("unreachable")
729}
730
731// msb64 returns the 64 most significant bits of x.
732func msb64(x nat) uint64 {
733	i := len(x) - 1
734	if i < 0 {
735		return 0
736	}
737	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
738		panic("x not normalized")
739	}
740	switch _W {
741	case 32:
742		v := uint64(x[i]) << 32
743		if i > 0 {
744			v |= uint64(x[i-1])
745		}
746		return v
747	case 64:
748		return uint64(x[i])
749	}
750	panic("unreachable")
751}
752
753// Uint64 returns the unsigned integer resulting from truncating x
754// towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
755// if x is an integer and Below otherwise.
756// The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
757// for x > math.MaxUint64.
758func (x *Float) Uint64() (uint64, Accuracy) {
759	if debugFloat {
760		x.validate()
761	}
762
763	switch x.form {
764	case finite:
765		if x.neg {
766			return 0, Above
767		}
768		// 0 < x < +Inf
769		if x.exp <= 0 {
770			// 0 < x < 1
771			return 0, Below
772		}
773		// 1 <= x < Inf
774		if x.exp <= 64 {
775			// u = trunc(x) fits into a uint64
776			u := msb64(x.mant) >> (64 - uint32(x.exp))
777			if x.MinPrec() <= 64 {
778				return u, Exact
779			}
780			return u, Below // x truncated
781		}
782		// x too large
783		return math.MaxUint64, Below
784
785	case zero:
786		return 0, Exact
787
788	case inf:
789		if x.neg {
790			return 0, Above
791		}
792		return math.MaxUint64, Below
793	}
794
795	panic("unreachable")
796}
797
798// Int64 returns the integer resulting from truncating x towards zero.
799// If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
800// an integer, and Above (x < 0) or Below (x > 0) otherwise.
801// The result is (math.MinInt64, Above) for x < math.MinInt64,
802// and (math.MaxInt64, Below) for x > math.MaxInt64.
803func (x *Float) Int64() (int64, Accuracy) {
804	if debugFloat {
805		x.validate()
806	}
807
808	switch x.form {
809	case finite:
810		// 0 < |x| < +Inf
811		acc := makeAcc(x.neg)
812		if x.exp <= 0 {
813			// 0 < |x| < 1
814			return 0, acc
815		}
816		// x.exp > 0
817
818		// 1 <= |x| < +Inf
819		if x.exp <= 63 {
820			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
821			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
822			if x.neg {
823				i = -i
824			}
825			if x.MinPrec() <= uint(x.exp) {
826				return i, Exact
827			}
828			return i, acc // x truncated
829		}
830		if x.neg {
831			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
832			if x.exp == 64 && x.MinPrec() == 1 {
833				acc = Exact
834			}
835			return math.MinInt64, acc
836		}
837		// x too large
838		return math.MaxInt64, Below
839
840	case zero:
841		return 0, Exact
842
843	case inf:
844		if x.neg {
845			return math.MinInt64, Above
846		}
847		return math.MaxInt64, Below
848	}
849
850	panic("unreachable")
851}
852
853// Float32 returns the float32 value nearest to x. If x is too small to be
854// represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
855// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
856// If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
857// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
858func (x *Float) Float32() (float32, Accuracy) {
859	if debugFloat {
860		x.validate()
861	}
862
863	switch x.form {
864	case finite:
865		// 0 < |x| < +Inf
866
867		const (
868			fbits = 32                //        float size
869			mbits = 23                //        mantissa size (excluding implicit msb)
870			ebits = fbits - mbits - 1 //     8  exponent size
871			bias  = 1<<(ebits-1) - 1  //   127  exponent bias
872			dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
873			emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
874			emax  = bias              //   127  largest unbiased exponent (normal)
875		)
876
877		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
878		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
879		p := mbits + 1 // precision of normal float
880
881		// If the exponent is too small, we may have a denormal number
882		// in which case we have fewer mantissa bits available: reduce
883		// precision accordingly.
884		if e < emin {
885			p -= emin - int(e)
886			// Make sure we have at least 1 bit so that we don't
887			// lose numbers rounded up to the smallest denormal.
888			if p < 1 {
889				p = 1
890			}
891		}
892
893		// round
894		var r Float
895		r.prec = uint32(p)
896		r.Set(x)
897		e = r.exp - 1
898
899		// Rounding may have caused r to overflow to ±Inf
900		// (rounding never causes underflows to 0).
901		if r.form == inf {
902			e = emax + 1 // cause overflow below
903		}
904
905		// If the exponent is too large, overflow to ±Inf.
906		if e > emax {
907			// overflow
908			if x.neg {
909				return float32(math.Inf(-1)), Below
910			}
911			return float32(math.Inf(+1)), Above
912		}
913		// e <= emax
914
915		// Determine sign, biased exponent, and mantissa.
916		var sign, bexp, mant uint32
917		if x.neg {
918			sign = 1 << (fbits - 1)
919		}
920
921		// Rounding may have caused a denormal number to
922		// become normal. Check again.
923		if e < emin {
924			// denormal number
925			if e < dmin {
926				// underflow to ±0
927				if x.neg {
928					var z float32
929					return -z, Above
930				}
931				return 0.0, Below
932			}
933			// bexp = 0
934			mant = msb32(r.mant) >> (fbits - r.prec)
935		} else {
936			// normal number: emin <= e <= emax
937			bexp = uint32(e+bias) << mbits
938			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
939		}
940
941		return math.Float32frombits(sign | bexp | mant), r.acc
942
943	case zero:
944		if x.neg {
945			var z float32
946			return -z, Exact
947		}
948		return 0.0, Exact
949
950	case inf:
951		if x.neg {
952			return float32(math.Inf(-1)), Exact
953		}
954		return float32(math.Inf(+1)), Exact
955	}
956
957	panic("unreachable")
958}
959
960// Float64 returns the float64 value nearest to x. If x is too small to be
961// represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
962// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
963// If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
964// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
965func (x *Float) Float64() (float64, Accuracy) {
966	if debugFloat {
967		x.validate()
968	}
969
970	switch x.form {
971	case finite:
972		// 0 < |x| < +Inf
973
974		const (
975			fbits = 64                //        float size
976			mbits = 52                //        mantissa size (excluding implicit msb)
977			ebits = fbits - mbits - 1 //    11  exponent size
978			bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
979			dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
980			emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
981			emax  = bias              //  1023  largest unbiased exponent (normal)
982		)
983
984		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
985		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
986		p := mbits + 1 // precision of normal float
987
988		// If the exponent is too small, we may have a denormal number
989		// in which case we have fewer mantissa bits available: reduce
990		// precision accordingly.
991		if e < emin {
992			p -= emin - int(e)
993			// Make sure we have at least 1 bit so that we don't
994			// lose numbers rounded up to the smallest denormal.
995			if p < 1 {
996				p = 1
997			}
998		}
999
1000		// round
1001		var r Float
1002		r.prec = uint32(p)
1003		r.Set(x)
1004		e = r.exp - 1
1005
1006		// Rounding may have caused r to overflow to ±Inf
1007		// (rounding never causes underflows to 0).
1008		if r.form == inf {
1009			e = emax + 1 // cause overflow below
1010		}
1011
1012		// If the exponent is too large, overflow to ±Inf.
1013		if e > emax {
1014			// overflow
1015			if x.neg {
1016				return math.Inf(-1), Below
1017			}
1018			return math.Inf(+1), Above
1019		}
1020		// e <= emax
1021
1022		// Determine sign, biased exponent, and mantissa.
1023		var sign, bexp, mant uint64
1024		if x.neg {
1025			sign = 1 << (fbits - 1)
1026		}
1027
1028		// Rounding may have caused a denormal number to
1029		// become normal. Check again.
1030		if e < emin {
1031			// denormal number
1032			if e < dmin {
1033				// underflow to ±0
1034				if x.neg {
1035					var z float64
1036					return -z, Above
1037				}
1038				return 0.0, Below
1039			}
1040			// bexp = 0
1041			mant = msb64(r.mant) >> (fbits - r.prec)
1042		} else {
1043			// normal number: emin <= e <= emax
1044			bexp = uint64(e+bias) << mbits
1045			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
1046		}
1047
1048		return math.Float64frombits(sign | bexp | mant), r.acc
1049
1050	case zero:
1051		if x.neg {
1052			var z float64
1053			return -z, Exact
1054		}
1055		return 0.0, Exact
1056
1057	case inf:
1058		if x.neg {
1059			return math.Inf(-1), Exact
1060		}
1061		return math.Inf(+1), Exact
1062	}
1063
1064	panic("unreachable")
1065}
1066
1067// Int returns the result of truncating x towards zero;
1068// or nil if x is an infinity.
1069// The result is Exact if x.IsInt(); otherwise it is Below
1070// for x > 0, and Above for x < 0.
1071// If a non-nil *Int argument z is provided, Int stores
1072// the result in z instead of allocating a new Int.
1073func (x *Float) Int(z *Int) (*Int, Accuracy) {
1074	if debugFloat {
1075		x.validate()
1076	}
1077
1078	if z == nil && x.form <= finite {
1079		z = new(Int)
1080	}
1081
1082	switch x.form {
1083	case finite:
1084		// 0 < |x| < +Inf
1085		acc := makeAcc(x.neg)
1086		if x.exp <= 0 {
1087			// 0 < |x| < 1
1088			return z.SetInt64(0), acc
1089		}
1090		// x.exp > 0
1091
1092		// 1 <= |x| < +Inf
1093		// determine minimum required precision for x
1094		allBits := uint(len(x.mant)) * _W
1095		exp := uint(x.exp)
1096		if x.MinPrec() <= exp {
1097			acc = Exact
1098		}
1099		// shift mantissa as needed
1100		if z == nil {
1101			z = new(Int)
1102		}
1103		z.neg = x.neg
1104		switch {
1105		case exp > allBits:
1106			z.abs = z.abs.shl(x.mant, exp-allBits)
1107		default:
1108			z.abs = z.abs.set(x.mant)
1109		case exp < allBits:
1110			z.abs = z.abs.shr(x.mant, allBits-exp)
1111		}
1112		return z, acc
1113
1114	case zero:
1115		return z.SetInt64(0), Exact
1116
1117	case inf:
1118		return nil, makeAcc(x.neg)
1119	}
1120
1121	panic("unreachable")
1122}
1123
1124// Rat returns the rational number corresponding to x;
1125// or nil if x is an infinity.
1126// The result is Exact if x is not an Inf.
1127// If a non-nil *Rat argument z is provided, Rat stores
1128// the result in z instead of allocating a new Rat.
1129func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
1130	if debugFloat {
1131		x.validate()
1132	}
1133
1134	if z == nil && x.form <= finite {
1135		z = new(Rat)
1136	}
1137
1138	switch x.form {
1139	case finite:
1140		// 0 < |x| < +Inf
1141		allBits := int32(len(x.mant)) * _W
1142		// build up numerator and denominator
1143		z.a.neg = x.neg
1144		switch {
1145		case x.exp > allBits:
1146			z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
1147			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
1148			// z already in normal form
1149		default:
1150			z.a.abs = z.a.abs.set(x.mant)
1151			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
1152			// z already in normal form
1153		case x.exp < allBits:
1154			z.a.abs = z.a.abs.set(x.mant)
1155			t := z.b.abs.setUint64(1)
1156			z.b.abs = t.shl(t, uint(allBits-x.exp))
1157			z.norm()
1158		}
1159		return z, Exact
1160
1161	case zero:
1162		return z.SetInt64(0), Exact
1163
1164	case inf:
1165		return nil, makeAcc(x.neg)
1166	}
1167
1168	panic("unreachable")
1169}
1170
1171// Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
1172// and returns z.
1173func (z *Float) Abs(x *Float) *Float {
1174	z.Set(x)
1175	z.neg = false
1176	return z
1177}
1178
1179// Neg sets z to the (possibly rounded) value of x with its sign negated,
1180// and returns z.
1181func (z *Float) Neg(x *Float) *Float {
1182	z.Set(x)
1183	z.neg = !z.neg
1184	return z
1185}
1186
1187func validateBinaryOperands(x, y *Float) {
1188	if !debugFloat {
1189		// avoid performance bugs
1190		panic("validateBinaryOperands called but debugFloat is not set")
1191	}
1192	if len(x.mant) == 0 {
1193		panic("empty mantissa for x")
1194	}
1195	if len(y.mant) == 0 {
1196		panic("empty mantissa for y")
1197	}
1198}
1199
1200// z = x + y, ignoring signs of x and y for the addition
1201// but using the sign of z for rounding the result.
1202// x and y must have a non-empty mantissa and valid exponent.
1203func (z *Float) uadd(x, y *Float) {
1204	// Note: This implementation requires 2 shifts most of the
1205	// time. It is also inefficient if exponents or precisions
1206	// differ by wide margins. The following article describes
1207	// an efficient (but much more complicated) implementation
1208	// compatible with the internal representation used here:
1209	//
1210	// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
1211	// Point Addition With Exact Rounding (as in the MPFR Library)"
1212	// http://www.vinc17.net/research/papers/rnc6.pdf
1213
1214	if debugFloat {
1215		validateBinaryOperands(x, y)
1216	}
1217
1218	// compute exponents ex, ey for mantissa with "binary point"
1219	// on the right (mantissa.0) - use int64 to avoid overflow
1220	ex := int64(x.exp) - int64(len(x.mant))*_W
1221	ey := int64(y.exp) - int64(len(y.mant))*_W
1222
1223	// TODO(gri) having a combined add-and-shift primitive
1224	//           could make this code significantly faster
1225	switch {
1226	case ex < ey:
1227		// cannot re-use z.mant w/o testing for aliasing
1228		t := nat(nil).shl(y.mant, uint(ey-ex))
1229		z.mant = z.mant.add(x.mant, t)
1230	default:
1231		// ex == ey, no shift needed
1232		z.mant = z.mant.add(x.mant, y.mant)
1233	case ex > ey:
1234		// cannot re-use z.mant w/o testing for aliasing
1235		t := nat(nil).shl(x.mant, uint(ex-ey))
1236		z.mant = z.mant.add(t, y.mant)
1237		ex = ey
1238	}
1239	// len(z.mant) > 0
1240
1241	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
1242}
1243
1244// z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
1245// but using the sign of z for rounding the result.
1246// x and y must have a non-empty mantissa and valid exponent.
1247func (z *Float) usub(x, y *Float) {
1248	// This code is symmetric to uadd.
1249	// We have not factored the common code out because
1250	// eventually uadd (and usub) should be optimized
1251	// by special-casing, and the code will diverge.
1252
1253	if debugFloat {
1254		validateBinaryOperands(x, y)
1255	}
1256
1257	ex := int64(x.exp) - int64(len(x.mant))*_W
1258	ey := int64(y.exp) - int64(len(y.mant))*_W
1259
1260	switch {
1261	case ex < ey:
1262		// cannot re-use z.mant w/o testing for aliasing
1263		t := nat(nil).shl(y.mant, uint(ey-ex))
1264		z.mant = t.sub(x.mant, t)
1265	default:
1266		// ex == ey, no shift needed
1267		z.mant = z.mant.sub(x.mant, y.mant)
1268	case ex > ey:
1269		// cannot re-use z.mant w/o testing for aliasing
1270		t := nat(nil).shl(x.mant, uint(ex-ey))
1271		z.mant = t.sub(t, y.mant)
1272		ex = ey
1273	}
1274
1275	// operands may have canceled each other out
1276	if len(z.mant) == 0 {
1277		z.acc = Exact
1278		z.form = zero
1279		z.neg = false
1280		return
1281	}
1282	// len(z.mant) > 0
1283
1284	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
1285}
1286
1287// z = x * y, ignoring signs of x and y for the multiplication
1288// but using the sign of z for rounding the result.
1289// x and y must have a non-empty mantissa and valid exponent.
1290func (z *Float) umul(x, y *Float) {
1291	if debugFloat {
1292		validateBinaryOperands(x, y)
1293	}
1294
1295	// Note: This is doing too much work if the precision
1296	// of z is less than the sum of the precisions of x
1297	// and y which is often the case (e.g., if all floats
1298	// have the same precision).
1299	// TODO(gri) Optimize this for the common case.
1300
1301	e := int64(x.exp) + int64(y.exp)
1302	z.mant = z.mant.mul(x.mant, y.mant)
1303
1304	z.setExpAndRound(e-fnorm(z.mant), 0)
1305}
1306
1307// z = x / y, ignoring signs of x and y for the division
1308// but using the sign of z for rounding the result.
1309// x and y must have a non-empty mantissa and valid exponent.
1310func (z *Float) uquo(x, y *Float) {
1311	if debugFloat {
1312		validateBinaryOperands(x, y)
1313	}
1314
1315	// mantissa length in words for desired result precision + 1
1316	// (at least one extra bit so we get the rounding bit after
1317	// the division)
1318	n := int(z.prec/_W) + 1
1319
1320	// compute adjusted x.mant such that we get enough result precision
1321	xadj := x.mant
1322	if d := n - len(x.mant) + len(y.mant); d > 0 {
1323		// d extra words needed => add d "0 digits" to x
1324		xadj = make(nat, len(x.mant)+d)
1325		copy(xadj[d:], x.mant)
1326	}
1327	// TODO(gri): If we have too many digits (d < 0), we should be able
1328	// to shorten x for faster division. But we must be extra careful
1329	// with rounding in that case.
1330
1331	// Compute d before division since there may be aliasing of x.mant
1332	// (via xadj) or y.mant with z.mant.
1333	d := len(xadj) - len(y.mant)
1334
1335	// divide
1336	var r nat
1337	z.mant, r = z.mant.div(nil, xadj, y.mant)
1338	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
1339
1340	// The result is long enough to include (at least) the rounding bit.
1341	// If there's a non-zero remainder, the corresponding fractional part
1342	// (if it were computed), would have a non-zero sticky bit (if it were
1343	// zero, it couldn't have a non-zero remainder).
1344	var sbit uint
1345	if len(r) > 0 {
1346		sbit = 1
1347	}
1348
1349	z.setExpAndRound(e-fnorm(z.mant), sbit)
1350}
1351
1352// ucmp returns -1, 0, or +1, depending on whether
1353// |x| < |y|, |x| == |y|, or |x| > |y|.
1354// x and y must have a non-empty mantissa and valid exponent.
1355func (x *Float) ucmp(y *Float) int {
1356	if debugFloat {
1357		validateBinaryOperands(x, y)
1358	}
1359
1360	switch {
1361	case x.exp < y.exp:
1362		return -1
1363	case x.exp > y.exp:
1364		return +1
1365	}
1366	// x.exp == y.exp
1367
1368	// compare mantissas
1369	i := len(x.mant)
1370	j := len(y.mant)
1371	for i > 0 || j > 0 {
1372		var xm, ym Word
1373		if i > 0 {
1374			i--
1375			xm = x.mant[i]
1376		}
1377		if j > 0 {
1378			j--
1379			ym = y.mant[j]
1380		}
1381		switch {
1382		case xm < ym:
1383			return -1
1384		case xm > ym:
1385			return +1
1386		}
1387	}
1388
1389	return 0
1390}
1391
1392// Handling of sign bit as defined by IEEE 754-2008, section 6.3:
1393//
1394// When neither the inputs nor result are NaN, the sign of a product or
1395// quotient is the exclusive OR of the operands’ signs; the sign of a sum,
1396// or of a difference x−y regarded as a sum x+(−y), differs from at most
1397// one of the addends’ signs; and the sign of the result of conversions,
1398// the quantize operation, the roundToIntegral operations, and the
1399// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
1400// These rules shall apply even when operands or results are zero or infinite.
1401//
1402// When the sum of two operands with opposite signs (or the difference of
1403// two operands with like signs) is exactly zero, the sign of that sum (or
1404// difference) shall be +0 in all rounding-direction attributes except
1405// roundTowardNegative; under that attribute, the sign of an exact zero
1406// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
1407// sign as x even when x is zero.
1408//
1409// See also: https://play.golang.org/p/RtH3UCt5IH
1410
1411// Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
1412// it is changed to the larger of x's or y's precision before the operation.
1413// Rounding is performed according to z's precision and rounding mode; and
1414// z's accuracy reports the result error relative to the exact (not rounded)
1415// result. Add panics with ErrNaN if x and y are infinities with opposite
1416// signs. The value of z is undefined in that case.
1417//
1418// BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect.
1419func (z *Float) Add(x, y *Float) *Float {
1420	if debugFloat {
1421		x.validate()
1422		y.validate()
1423	}
1424
1425	if z.prec == 0 {
1426		z.prec = umax32(x.prec, y.prec)
1427	}
1428
1429	if x.form == finite && y.form == finite {
1430		// x + y (commom case)
1431		z.neg = x.neg
1432		if x.neg == y.neg {
1433			// x + y == x + y
1434			// (-x) + (-y) == -(x + y)
1435			z.uadd(x, y)
1436		} else {
1437			// x + (-y) == x - y == -(y - x)
1438			// (-x) + y == y - x == -(x - y)
1439			if x.ucmp(y) > 0 {
1440				z.usub(x, y)
1441			} else {
1442				z.neg = !z.neg
1443				z.usub(y, x)
1444			}
1445		}
1446		return z
1447	}
1448
1449	if x.form == inf && y.form == inf && x.neg != y.neg {
1450		// +Inf + -Inf
1451		// -Inf + +Inf
1452		// value of z is undefined but make sure it's valid
1453		z.acc = Exact
1454		z.form = zero
1455		z.neg = false
1456		panic(ErrNaN{"addition of infinities with opposite signs"})
1457	}
1458
1459	if x.form == zero && y.form == zero {
1460		// ±0 + ±0
1461		z.acc = Exact
1462		z.form = zero
1463		z.neg = x.neg && y.neg // -0 + -0 == -0
1464		return z
1465	}
1466
1467	if x.form == inf || y.form == zero {
1468		// ±Inf + y
1469		// x + ±0
1470		return z.Set(x)
1471	}
1472
1473	// ±0 + y
1474	// x + ±Inf
1475	return z.Set(y)
1476}
1477
1478// Sub sets z to the rounded difference x-y and returns z.
1479// Precision, rounding, and accuracy reporting are as for Add.
1480// Sub panics with ErrNaN if x and y are infinities with equal
1481// signs. The value of z is undefined in that case.
1482func (z *Float) Sub(x, y *Float) *Float {
1483	if debugFloat {
1484		x.validate()
1485		y.validate()
1486	}
1487
1488	if z.prec == 0 {
1489		z.prec = umax32(x.prec, y.prec)
1490	}
1491
1492	if x.form == finite && y.form == finite {
1493		// x - y (common case)
1494		z.neg = x.neg
1495		if x.neg != y.neg {
1496			// x - (-y) == x + y
1497			// (-x) - y == -(x + y)
1498			z.uadd(x, y)
1499		} else {
1500			// x - y == x - y == -(y - x)
1501			// (-x) - (-y) == y - x == -(x - y)
1502			if x.ucmp(y) > 0 {
1503				z.usub(x, y)
1504			} else {
1505				z.neg = !z.neg
1506				z.usub(y, x)
1507			}
1508		}
1509		return z
1510	}
1511
1512	if x.form == inf && y.form == inf && x.neg == y.neg {
1513		// +Inf - +Inf
1514		// -Inf - -Inf
1515		// value of z is undefined but make sure it's valid
1516		z.acc = Exact
1517		z.form = zero
1518		z.neg = false
1519		panic(ErrNaN{"subtraction of infinities with equal signs"})
1520	}
1521
1522	if x.form == zero && y.form == zero {
1523		// ±0 - ±0
1524		z.acc = Exact
1525		z.form = zero
1526		z.neg = x.neg && !y.neg // -0 - +0 == -0
1527		return z
1528	}
1529
1530	if x.form == inf || y.form == zero {
1531		// ±Inf - y
1532		// x - ±0
1533		return z.Set(x)
1534	}
1535
1536	// ±0 - y
1537	// x - ±Inf
1538	return z.Neg(y)
1539}
1540
1541// Mul sets z to the rounded product x*y and returns z.
1542// Precision, rounding, and accuracy reporting are as for Add.
1543// Mul panics with ErrNaN if one operand is zero and the other
1544// operand an infinity. The value of z is undefined in that case.
1545func (z *Float) Mul(x, y *Float) *Float {
1546	if debugFloat {
1547		x.validate()
1548		y.validate()
1549	}
1550
1551	if z.prec == 0 {
1552		z.prec = umax32(x.prec, y.prec)
1553	}
1554
1555	z.neg = x.neg != y.neg
1556
1557	if x.form == finite && y.form == finite {
1558		// x * y (common case)
1559		z.umul(x, y)
1560		return z
1561	}
1562
1563	z.acc = Exact
1564	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
1565		// ±0 * ±Inf
1566		// ±Inf * ±0
1567		// value of z is undefined but make sure it's valid
1568		z.form = zero
1569		z.neg = false
1570		panic(ErrNaN{"multiplication of zero with infinity"})
1571	}
1572
1573	if x.form == inf || y.form == inf {
1574		// ±Inf * y
1575		// x * ±Inf
1576		z.form = inf
1577		return z
1578	}
1579
1580	// ±0 * y
1581	// x * ±0
1582	z.form = zero
1583	return z
1584}
1585
1586// Quo sets z to the rounded quotient x/y and returns z.
1587// Precision, rounding, and accuracy reporting are as for Add.
1588// Quo panics with ErrNaN if both operands are zero or infinities.
1589// The value of z is undefined in that case.
1590func (z *Float) Quo(x, y *Float) *Float {
1591	if debugFloat {
1592		x.validate()
1593		y.validate()
1594	}
1595
1596	if z.prec == 0 {
1597		z.prec = umax32(x.prec, y.prec)
1598	}
1599
1600	z.neg = x.neg != y.neg
1601
1602	if x.form == finite && y.form == finite {
1603		// x / y (common case)
1604		z.uquo(x, y)
1605		return z
1606	}
1607
1608	z.acc = Exact
1609	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
1610		// ±0 / ±0
1611		// ±Inf / ±Inf
1612		// value of z is undefined but make sure it's valid
1613		z.form = zero
1614		z.neg = false
1615		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
1616	}
1617
1618	if x.form == zero || y.form == inf {
1619		// ±0 / y
1620		// x / ±Inf
1621		z.form = zero
1622		return z
1623	}
1624
1625	// x / ±0
1626	// ±Inf / y
1627	z.form = inf
1628	return z
1629}
1630
1631// Cmp compares x and y and returns:
1632//
1633//   -1 if x <  y
1634//    0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
1635//   +1 if x >  y
1636//
1637func (x *Float) Cmp(y *Float) int {
1638	if debugFloat {
1639		x.validate()
1640		y.validate()
1641	}
1642
1643	mx := x.ord()
1644	my := y.ord()
1645	switch {
1646	case mx < my:
1647		return -1
1648	case mx > my:
1649		return +1
1650	}
1651	// mx == my
1652
1653	// only if |mx| == 1 we have to compare the mantissae
1654	switch mx {
1655	case -1:
1656		return y.ucmp(x)
1657	case +1:
1658		return x.ucmp(y)
1659	}
1660
1661	return 0
1662}
1663
1664// ord classifies x and returns:
1665//
1666//	-2 if -Inf == x
1667//	-1 if -Inf < x < 0
1668//	 0 if x == 0 (signed or unsigned)
1669//	+1 if 0 < x < +Inf
1670//	+2 if x == +Inf
1671//
1672func (x *Float) ord() int {
1673	var m int
1674	switch x.form {
1675	case finite:
1676		m = 1
1677	case zero:
1678		return 0
1679	case inf:
1680		m = 2
1681	}
1682	if x.neg {
1683		m = -m
1684	}
1685	return m
1686}
1687
1688func umax32(x, y uint32) uint32 {
1689	if x > y {
1690		return x
1691	}
1692	return y
1693}
1694