1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package strconv
6
7// decimal to binary floating point conversion.
8// Algorithm:
9//   1) Store input in multiprecision decimal.
10//   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
11//   3) Multiply by 2^precision and round to get mantissa.
12
13import "math"
14
15var optimize = true // set to false to force slow-path conversions for testing
16
17func equalIgnoreCase(s1, s2 string) bool {
18	if len(s1) != len(s2) {
19		return false
20	}
21	for i := 0; i < len(s1); i++ {
22		c1 := s1[i]
23		if 'A' <= c1 && c1 <= 'Z' {
24			c1 += 'a' - 'A'
25		}
26		c2 := s2[i]
27		if 'A' <= c2 && c2 <= 'Z' {
28			c2 += 'a' - 'A'
29		}
30		if c1 != c2 {
31			return false
32		}
33	}
34	return true
35}
36
37func special(s string) (f float64, ok bool) {
38	if len(s) == 0 {
39		return
40	}
41	switch s[0] {
42	default:
43		return
44	case '+':
45		if equalIgnoreCase(s, "+inf") || equalIgnoreCase(s, "+infinity") {
46			return math.Inf(1), true
47		}
48	case '-':
49		if equalIgnoreCase(s, "-inf") || equalIgnoreCase(s, "-infinity") {
50			return math.Inf(-1), true
51		}
52	case 'n', 'N':
53		if equalIgnoreCase(s, "nan") {
54			return math.NaN(), true
55		}
56	case 'i', 'I':
57		if equalIgnoreCase(s, "inf") || equalIgnoreCase(s, "infinity") {
58			return math.Inf(1), true
59		}
60	}
61	return
62}
63
64func (b *decimal) set(s string) (ok bool) {
65	i := 0
66	b.neg = false
67	b.trunc = false
68
69	// optional sign
70	if i >= len(s) {
71		return
72	}
73	switch {
74	case s[i] == '+':
75		i++
76	case s[i] == '-':
77		b.neg = true
78		i++
79	}
80
81	// digits
82	sawdot := false
83	sawdigits := false
84	for ; i < len(s); i++ {
85		switch {
86		case s[i] == '_':
87			// readFloat already checked underscores
88			continue
89		case s[i] == '.':
90			if sawdot {
91				return
92			}
93			sawdot = true
94			b.dp = b.nd
95			continue
96
97		case '0' <= s[i] && s[i] <= '9':
98			sawdigits = true
99			if s[i] == '0' && b.nd == 0 { // ignore leading zeros
100				b.dp--
101				continue
102			}
103			if b.nd < len(b.d) {
104				b.d[b.nd] = s[i]
105				b.nd++
106			} else if s[i] != '0' {
107				b.trunc = true
108			}
109			continue
110		}
111		break
112	}
113	if !sawdigits {
114		return
115	}
116	if !sawdot {
117		b.dp = b.nd
118	}
119
120	// optional exponent moves decimal point.
121	// if we read a very large, very long number,
122	// just be sure to move the decimal point by
123	// a lot (say, 100000).  it doesn't matter if it's
124	// not the exact number.
125	if i < len(s) && lower(s[i]) == 'e' {
126		i++
127		if i >= len(s) {
128			return
129		}
130		esign := 1
131		if s[i] == '+' {
132			i++
133		} else if s[i] == '-' {
134			i++
135			esign = -1
136		}
137		if i >= len(s) || s[i] < '0' || s[i] > '9' {
138			return
139		}
140		e := 0
141		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
142			if s[i] == '_' {
143				// readFloat already checked underscores
144				continue
145			}
146			if e < 10000 {
147				e = e*10 + int(s[i]) - '0'
148			}
149		}
150		b.dp += e * esign
151	}
152
153	if i != len(s) {
154		return
155	}
156
157	ok = true
158	return
159}
160
161// readFloat reads a decimal mantissa and exponent from a float
162// string representation. It returns ok==false if the number
163// is invalid.
164func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) {
165	i := 0
166	underscores := false
167
168	// optional sign
169	if i >= len(s) {
170		return
171	}
172	switch {
173	case s[i] == '+':
174		i++
175	case s[i] == '-':
176		neg = true
177		i++
178	}
179
180	// digits
181	base := uint64(10)
182	maxMantDigits := 19 // 10^19 fits in uint64
183	expChar := byte('e')
184	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
185		base = 16
186		maxMantDigits = 16 // 16^16 fits in uint64
187		i += 2
188		expChar = 'p'
189		hex = true
190	}
191	sawdot := false
192	sawdigits := false
193	nd := 0
194	ndMant := 0
195	dp := 0
196	for ; i < len(s); i++ {
197		switch c := s[i]; true {
198		case c == '_':
199			underscores = true
200			continue
201
202		case c == '.':
203			if sawdot {
204				return
205			}
206			sawdot = true
207			dp = nd
208			continue
209
210		case '0' <= c && c <= '9':
211			sawdigits = true
212			if c == '0' && nd == 0 { // ignore leading zeros
213				dp--
214				continue
215			}
216			nd++
217			if ndMant < maxMantDigits {
218				mantissa *= base
219				mantissa += uint64(c - '0')
220				ndMant++
221			} else if c != '0' {
222				trunc = true
223			}
224			continue
225
226		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
227			sawdigits = true
228			nd++
229			if ndMant < maxMantDigits {
230				mantissa *= 16
231				mantissa += uint64(lower(c) - 'a' + 10)
232				ndMant++
233			} else {
234				trunc = true
235			}
236			continue
237		}
238		break
239	}
240	if !sawdigits {
241		return
242	}
243	if !sawdot {
244		dp = nd
245	}
246
247	if base == 16 {
248		dp *= 4
249		ndMant *= 4
250	}
251
252	// optional exponent moves decimal point.
253	// if we read a very large, very long number,
254	// just be sure to move the decimal point by
255	// a lot (say, 100000).  it doesn't matter if it's
256	// not the exact number.
257	if i < len(s) && lower(s[i]) == expChar {
258		i++
259		if i >= len(s) {
260			return
261		}
262		esign := 1
263		if s[i] == '+' {
264			i++
265		} else if s[i] == '-' {
266			i++
267			esign = -1
268		}
269		if i >= len(s) || s[i] < '0' || s[i] > '9' {
270			return
271		}
272		e := 0
273		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
274			if s[i] == '_' {
275				underscores = true
276				continue
277			}
278			if e < 10000 {
279				e = e*10 + int(s[i]) - '0'
280			}
281		}
282		dp += e * esign
283	} else if base == 16 {
284		// Must have exponent.
285		return
286	}
287
288	if i != len(s) {
289		return
290	}
291
292	if mantissa != 0 {
293		exp = dp - ndMant
294	}
295
296	if underscores && !underscoreOK(s) {
297		return
298	}
299
300	ok = true
301	return
302}
303
304// decimal power of ten to binary power of two.
305var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
306
307func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
308	var exp int
309	var mant uint64
310
311	// Zero is always a special case.
312	if d.nd == 0 {
313		mant = 0
314		exp = flt.bias
315		goto out
316	}
317
318	// Obvious overflow/underflow.
319	// These bounds are for 64-bit floats.
320	// Will have to change if we want to support 80-bit floats in the future.
321	if d.dp > 310 {
322		goto overflow
323	}
324	if d.dp < -330 {
325		// zero
326		mant = 0
327		exp = flt.bias
328		goto out
329	}
330
331	// Scale by powers of two until in range [0.5, 1.0)
332	exp = 0
333	for d.dp > 0 {
334		var n int
335		if d.dp >= len(powtab) {
336			n = 27
337		} else {
338			n = powtab[d.dp]
339		}
340		d.Shift(-n)
341		exp += n
342	}
343	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
344		var n int
345		if -d.dp >= len(powtab) {
346			n = 27
347		} else {
348			n = powtab[-d.dp]
349		}
350		d.Shift(n)
351		exp -= n
352	}
353
354	// Our range is [0.5,1) but floating point range is [1,2).
355	exp--
356
357	// Minimum representable exponent is flt.bias+1.
358	// If the exponent is smaller, move it up and
359	// adjust d accordingly.
360	if exp < flt.bias+1 {
361		n := flt.bias + 1 - exp
362		d.Shift(-n)
363		exp += n
364	}
365
366	if exp-flt.bias >= 1<<flt.expbits-1 {
367		goto overflow
368	}
369
370	// Extract 1+flt.mantbits bits.
371	d.Shift(int(1 + flt.mantbits))
372	mant = d.RoundedInteger()
373
374	// Rounding might have added a bit; shift down.
375	if mant == 2<<flt.mantbits {
376		mant >>= 1
377		exp++
378		if exp-flt.bias >= 1<<flt.expbits-1 {
379			goto overflow
380		}
381	}
382
383	// Denormalized?
384	if mant&(1<<flt.mantbits) == 0 {
385		exp = flt.bias
386	}
387	goto out
388
389overflow:
390	// ±Inf
391	mant = 0
392	exp = 1<<flt.expbits - 1 + flt.bias
393	overflow = true
394
395out:
396	// Assemble bits.
397	bits := mant & (uint64(1)<<flt.mantbits - 1)
398	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
399	if d.neg {
400		bits |= 1 << flt.mantbits << flt.expbits
401	}
402	return bits, overflow
403}
404
405// Exact powers of 10.
406var float64pow10 = []float64{
407	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
408	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
409	1e20, 1e21, 1e22,
410}
411var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
412
413// If possible to convert decimal representation to 64-bit float f exactly,
414// entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
415// Three common cases:
416//	value is exact integer
417//	value is exact integer * exact power of ten
418//	value is exact integer / exact power of ten
419// These all produce potentially inexact but correctly rounded answers.
420func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
421	if mantissa>>float64info.mantbits != 0 {
422		return
423	}
424	f = float64(mantissa)
425	if neg {
426		f = -f
427	}
428	switch {
429	case exp == 0:
430		// an integer.
431		return f, true
432	// Exact integers are <= 10^15.
433	// Exact powers of ten are <= 10^22.
434	case exp > 0 && exp <= 15+22: // int * 10^k
435		// If exponent is big but number of digits is not,
436		// can move a few zeros into the integer part.
437		if exp > 22 {
438			f *= float64pow10[exp-22]
439			exp = 22
440		}
441		if f > 1e15 || f < -1e15 {
442			// the exponent was really too large.
443			return
444		}
445		return f * float64pow10[exp], true
446	case exp < 0 && exp >= -22: // int / 10^k
447		return f / float64pow10[-exp], true
448	}
449	return
450}
451
452// If possible to compute mantissa*10^exp to 32-bit float f exactly,
453// entirely in floating-point math, do so, avoiding the machinery above.
454func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
455	if mantissa>>float32info.mantbits != 0 {
456		return
457	}
458	f = float32(mantissa)
459	if neg {
460		f = -f
461	}
462	switch {
463	case exp == 0:
464		return f, true
465	// Exact integers are <= 10^7.
466	// Exact powers of ten are <= 10^10.
467	case exp > 0 && exp <= 7+10: // int * 10^k
468		// If exponent is big but number of digits is not,
469		// can move a few zeros into the integer part.
470		if exp > 10 {
471			f *= float32pow10[exp-10]
472			exp = 10
473		}
474		if f > 1e7 || f < -1e7 {
475			// the exponent was really too large.
476			return
477		}
478		return f * float32pow10[exp], true
479	case exp < 0 && exp >= -10: // int / 10^k
480		return f / float32pow10[-exp], true
481	}
482	return
483}
484
485// atofHex converts the hex floating-point string s
486// to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
487// and returns it as a float64.
488// The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
489// If trunc is true, trailing non-zero bits have been omitted from the mantissa.
490func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
491	maxExp := 1<<flt.expbits + flt.bias - 2
492	minExp := flt.bias + 1
493	exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
494
495	// Shift mantissa and exponent to bring representation into float range.
496	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
497	// For rounding, we need two more, where the bottom bit represents
498	// whether that bit or any later bit was non-zero.
499	// (If the mantissa has already lost non-zero bits, trunc is true,
500	// and we OR in a 1 below after shifting left appropriately.)
501	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
502		mantissa <<= 1
503		exp--
504	}
505	if trunc {
506		mantissa |= 1
507	}
508	for mantissa>>(1+flt.mantbits+2) != 0 {
509		mantissa = mantissa>>1 | mantissa&1
510		exp++
511	}
512
513	// If exponent is too negative,
514	// denormalize in hopes of making it representable.
515	// (The -2 is for the rounding bits.)
516	for mantissa > 1 && exp < minExp-2 {
517		mantissa = mantissa>>1 | mantissa&1
518		exp++
519	}
520
521	// Round using two bottom bits.
522	round := mantissa & 3
523	mantissa >>= 2
524	round |= mantissa & 1 // round to even (round up if mantissa is odd)
525	exp += 2
526	if round == 3 {
527		mantissa++
528		if mantissa == 1<<(1+flt.mantbits) {
529			mantissa >>= 1
530			exp++
531		}
532	}
533
534	if mantissa>>flt.mantbits == 0 { // Denormal or zero.
535		exp = flt.bias
536	}
537	var err error
538	if exp > maxExp { // infinity and range error
539		mantissa = 1 << flt.mantbits
540		exp = maxExp + 1
541		err = rangeError(fnParseFloat, s)
542	}
543
544	bits := mantissa & (1<<flt.mantbits - 1)
545	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
546	if neg {
547		bits |= 1 << flt.mantbits << flt.expbits
548	}
549	if flt == &float32info {
550		return float64(math.Float32frombits(uint32(bits))), err
551	}
552	return math.Float64frombits(bits), err
553}
554
555const fnParseFloat = "ParseFloat"
556
557func atof32(s string) (f float32, err error) {
558	if val, ok := special(s); ok {
559		return float32(val), nil
560	}
561
562	mantissa, exp, neg, trunc, hex, ok := readFloat(s)
563	if !ok {
564		return 0, syntaxError(fnParseFloat, s)
565	}
566
567	if hex {
568		f, err := atofHex(s, &float32info, mantissa, exp, neg, trunc)
569		return float32(f), err
570	}
571
572	if optimize {
573		// Try pure floating-point arithmetic conversion.
574		if !trunc {
575			if f, ok := atof32exact(mantissa, exp, neg); ok {
576				return f, nil
577			}
578		}
579		// Try another fast path.
580		ext := new(extFloat)
581		if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float32info); ok {
582			b, ovf := ext.floatBits(&float32info)
583			f = math.Float32frombits(uint32(b))
584			if ovf {
585				err = rangeError(fnParseFloat, s)
586			}
587			return f, err
588		}
589	}
590
591	// Slow fallback.
592	var d decimal
593	if !d.set(s) {
594		return 0, syntaxError(fnParseFloat, s)
595	}
596	b, ovf := d.floatBits(&float32info)
597	f = math.Float32frombits(uint32(b))
598	if ovf {
599		err = rangeError(fnParseFloat, s)
600	}
601	return f, err
602}
603
604func atof64(s string) (f float64, err error) {
605	if val, ok := special(s); ok {
606		return val, nil
607	}
608
609	mantissa, exp, neg, trunc, hex, ok := readFloat(s)
610	if !ok {
611		return 0, syntaxError(fnParseFloat, s)
612	}
613
614	if hex {
615		return atofHex(s, &float64info, mantissa, exp, neg, trunc)
616	}
617
618	if optimize {
619		// Try pure floating-point arithmetic conversion.
620		if !trunc {
621			if f, ok := atof64exact(mantissa, exp, neg); ok {
622				return f, nil
623			}
624		}
625		// Try another fast path.
626		ext := new(extFloat)
627		if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float64info); ok {
628			b, ovf := ext.floatBits(&float64info)
629			f = math.Float64frombits(b)
630			if ovf {
631				err = rangeError(fnParseFloat, s)
632			}
633			return f, err
634		}
635	}
636
637	// Slow fallback.
638	var d decimal
639	if !d.set(s) {
640		return 0, syntaxError(fnParseFloat, s)
641	}
642	b, ovf := d.floatBits(&float64info)
643	f = math.Float64frombits(b)
644	if ovf {
645		err = rangeError(fnParseFloat, s)
646	}
647	return f, err
648}
649
650// ParseFloat converts the string s to a floating-point number
651// with the precision specified by bitSize: 32 for float32, or 64 for float64.
652// When bitSize=32, the result still has type float64, but it will be
653// convertible to float32 without changing its value.
654//
655// ParseFloat accepts decimal and hexadecimal floating-point number syntax.
656// If s is well-formed and near a valid floating-point number,
657// ParseFloat returns the nearest floating-point number rounded
658// using IEEE754 unbiased rounding.
659// (Parsing a hexadecimal floating-point value only rounds when
660// there are more bits in the hexadecimal representation than
661// will fit in the mantissa.)
662//
663// The errors that ParseFloat returns have concrete type *NumError
664// and include err.Num = s.
665//
666// If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
667//
668// If s is syntactically well-formed but is more than 1/2 ULP
669// away from the largest floating point number of the given size,
670// ParseFloat returns f = ±Inf, err.Err = ErrRange.
671//
672// ParseFloat recognizes the strings "NaN", "+Inf", and "-Inf" as their
673// respective special floating point values. It ignores case when matching.
674func ParseFloat(s string, bitSize int) (float64, error) {
675	if bitSize == 32 {
676		f, err := atof32(s)
677		return float64(f), err
678	}
679	return atof64(s)
680}
681