1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package strconv
6
7// decimal to binary floating point conversion.
8// Algorithm:
9//   1) Store input in multiprecision decimal.
10//   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
11//   3) Multiply by 2^precision and round to get mantissa.
12
13import "math"
14import "runtime"
15
16var optimize = true // set to false to force slow-path conversions for testing
17
18// commonPrefixLenIgnoreCase returns the length of the common
19// prefix of s and prefix, with the character case of s ignored.
20// The prefix argument must be all lower-case.
21func commonPrefixLenIgnoreCase(s, prefix string) int {
22	n := len(prefix)
23	if n > len(s) {
24		n = len(s)
25	}
26	for i := 0; i < n; i++ {
27		c := s[i]
28		if 'A' <= c && c <= 'Z' {
29			c += 'a' - 'A'
30		}
31		if c != prefix[i] {
32			return i
33		}
34	}
35	return n
36}
37
38// special returns the floating-point value for the special,
39// possibly signed floating-point representations inf, infinity,
40// and NaN. The result is ok if a prefix of s contains one
41// of these representations and n is the length of that prefix.
42// The character case is ignored.
43func special(s string) (f float64, n int, ok bool) {
44	if len(s) == 0 {
45		return 0, 0, false
46	}
47	sign := 1
48	nsign := 0
49	switch s[0] {
50	case '+', '-':
51		if s[0] == '-' {
52			sign = -1
53		}
54		nsign = 1
55		s = s[1:]
56		fallthrough
57	case 'i', 'I':
58		n := commonPrefixLenIgnoreCase(s, "infinity")
59		// Anything longer than "inf" is ok, but if we
60		// don't have "infinity", only consume "inf".
61		if 3 < n && n < 8 {
62			n = 3
63		}
64		if n == 3 || n == 8 {
65			return math.Inf(sign), nsign + n, true
66		}
67	case 'n', 'N':
68		if commonPrefixLenIgnoreCase(s, "nan") == 3 {
69			return math.NaN(), 3, true
70		}
71	}
72	return 0, 0, false
73}
74
75func (b *decimal) set(s string) (ok bool) {
76	i := 0
77	b.neg = false
78	b.trunc = false
79
80	// optional sign
81	if i >= len(s) {
82		return
83	}
84	switch {
85	case s[i] == '+':
86		i++
87	case s[i] == '-':
88		b.neg = true
89		i++
90	}
91
92	// digits
93	sawdot := false
94	sawdigits := false
95	for ; i < len(s); i++ {
96		switch {
97		case s[i] == '_':
98			// readFloat already checked underscores
99			continue
100		case s[i] == '.':
101			if sawdot {
102				return
103			}
104			sawdot = true
105			b.dp = b.nd
106			continue
107
108		case '0' <= s[i] && s[i] <= '9':
109			sawdigits = true
110			if s[i] == '0' && b.nd == 0 { // ignore leading zeros
111				b.dp--
112				continue
113			}
114			if b.nd < len(b.d) {
115				b.d[b.nd] = s[i]
116				b.nd++
117			} else if s[i] != '0' {
118				b.trunc = true
119			}
120			continue
121		}
122		break
123	}
124	if !sawdigits {
125		return
126	}
127	if !sawdot {
128		b.dp = b.nd
129	}
130
131	// optional exponent moves decimal point.
132	// if we read a very large, very long number,
133	// just be sure to move the decimal point by
134	// a lot (say, 100000).  it doesn't matter if it's
135	// not the exact number.
136	if i < len(s) && lower(s[i]) == 'e' {
137		i++
138		if i >= len(s) {
139			return
140		}
141		esign := 1
142		if s[i] == '+' {
143			i++
144		} else if s[i] == '-' {
145			i++
146			esign = -1
147		}
148		if i >= len(s) || s[i] < '0' || s[i] > '9' {
149			return
150		}
151		e := 0
152		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
153			if s[i] == '_' {
154				// readFloat already checked underscores
155				continue
156			}
157			if e < 10000 {
158				e = e*10 + int(s[i]) - '0'
159			}
160		}
161		b.dp += e * esign
162	}
163
164	if i != len(s) {
165		return
166	}
167
168	ok = true
169	return
170}
171
172// readFloat reads a decimal or hexadecimal mantissa and exponent from a float
173// string representation in s; the number may be followed by other characters.
174// readFloat reports the number of bytes consumed (i), and whether the number
175// is valid (ok).
176func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) {
177	underscores := false
178
179	// optional sign
180	if i >= len(s) {
181		return
182	}
183	switch {
184	case s[i] == '+':
185		i++
186	case s[i] == '-':
187		neg = true
188		i++
189	}
190
191	// digits
192	base := uint64(10)
193	maxMantDigits := 19 // 10^19 fits in uint64
194	expChar := byte('e')
195	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
196		base = 16
197		maxMantDigits = 16 // 16^16 fits in uint64
198		i += 2
199		expChar = 'p'
200		hex = true
201	}
202	sawdot := false
203	sawdigits := false
204	nd := 0
205	ndMant := 0
206	dp := 0
207loop:
208	for ; i < len(s); i++ {
209		switch c := s[i]; true {
210		case c == '_':
211			underscores = true
212			continue
213
214		case c == '.':
215			if sawdot {
216				break loop
217			}
218			sawdot = true
219			dp = nd
220			continue
221
222		case '0' <= c && c <= '9':
223			sawdigits = true
224			if c == '0' && nd == 0 { // ignore leading zeros
225				dp--
226				continue
227			}
228			nd++
229			if ndMant < maxMantDigits {
230				mantissa *= base
231				mantissa += uint64(c - '0')
232				ndMant++
233			} else if c != '0' {
234				trunc = true
235			}
236			continue
237
238		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
239			sawdigits = true
240			nd++
241			if ndMant < maxMantDigits {
242				mantissa *= 16
243				mantissa += uint64(lower(c) - 'a' + 10)
244				ndMant++
245			} else {
246				trunc = true
247			}
248			continue
249		}
250		break
251	}
252	if !sawdigits {
253		return
254	}
255	if !sawdot {
256		dp = nd
257	}
258
259	if base == 16 {
260		dp *= 4
261		ndMant *= 4
262	}
263
264	// optional exponent moves decimal point.
265	// if we read a very large, very long number,
266	// just be sure to move the decimal point by
267	// a lot (say, 100000).  it doesn't matter if it's
268	// not the exact number.
269	if i < len(s) && lower(s[i]) == expChar {
270		i++
271		if i >= len(s) {
272			return
273		}
274		esign := 1
275		if s[i] == '+' {
276			i++
277		} else if s[i] == '-' {
278			i++
279			esign = -1
280		}
281		if i >= len(s) || s[i] < '0' || s[i] > '9' {
282			return
283		}
284		e := 0
285		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
286			if s[i] == '_' {
287				underscores = true
288				continue
289			}
290			if e < 10000 {
291				e = e*10 + int(s[i]) - '0'
292			}
293		}
294		dp += e * esign
295	} else if base == 16 {
296		// Must have exponent.
297		return
298	}
299
300	if mantissa != 0 {
301		exp = dp - ndMant
302	}
303
304	if underscores && !underscoreOK(s[:i]) {
305		return
306	}
307
308	ok = true
309	return
310}
311
312// decimal power of ten to binary power of two.
313var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
314
315func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
316	var exp int
317	var mant uint64
318
319	// Zero is always a special case.
320	if d.nd == 0 {
321		mant = 0
322		exp = flt.bias
323		goto out
324	}
325
326	// Obvious overflow/underflow.
327	// These bounds are for 64-bit floats.
328	// Will have to change if we want to support 80-bit floats in the future.
329	if d.dp > 310 {
330		goto overflow
331	}
332	if d.dp < -330 {
333		// zero
334		mant = 0
335		exp = flt.bias
336		goto out
337	}
338
339	// Scale by powers of two until in range [0.5, 1.0)
340	exp = 0
341	for d.dp > 0 {
342		var n int
343		if d.dp >= len(powtab) {
344			n = 27
345		} else {
346			n = powtab[d.dp]
347		}
348		d.Shift(-n)
349		exp += n
350	}
351	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
352		var n int
353		if -d.dp >= len(powtab) {
354			n = 27
355		} else {
356			n = powtab[-d.dp]
357		}
358		d.Shift(n)
359		exp -= n
360	}
361
362	// Our range is [0.5,1) but floating point range is [1,2).
363	exp--
364
365	// Minimum representable exponent is flt.bias+1.
366	// If the exponent is smaller, move it up and
367	// adjust d accordingly.
368	if exp < flt.bias+1 {
369		n := flt.bias + 1 - exp
370		d.Shift(-n)
371		exp += n
372	}
373
374	if exp-flt.bias >= 1<<flt.expbits-1 {
375		goto overflow
376	}
377
378	// Extract 1+flt.mantbits bits.
379	d.Shift(int(1 + flt.mantbits))
380	mant = d.RoundedInteger()
381
382	// Rounding might have added a bit; shift down.
383	if mant == 2<<flt.mantbits {
384		mant >>= 1
385		exp++
386		if exp-flt.bias >= 1<<flt.expbits-1 {
387			goto overflow
388		}
389	}
390
391	// Denormalized?
392	if mant&(1<<flt.mantbits) == 0 {
393		exp = flt.bias
394	}
395	goto out
396
397overflow:
398	// ±Inf
399	mant = 0
400	exp = 1<<flt.expbits - 1 + flt.bias
401	overflow = true
402
403out:
404	// Assemble bits.
405	bits := mant & (uint64(1)<<flt.mantbits - 1)
406	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
407	if d.neg {
408		bits |= 1 << flt.mantbits << flt.expbits
409	}
410	return bits, overflow
411}
412
413// Exact powers of 10.
414var float64pow10 = []float64{
415	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
416	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
417	1e20, 1e21, 1e22,
418}
419var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
420
421// If possible to convert decimal representation to 64-bit float f exactly,
422// entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
423// Three common cases:
424//	value is exact integer
425//	value is exact integer * exact power of ten
426//	value is exact integer / exact power of ten
427// These all produce potentially inexact but correctly rounded answers.
428func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
429	if mantissa>>float64info.mantbits != 0 {
430		return
431	}
432	// gccgo gets this wrong on 32-bit i386 when not using -msse.
433	// See TestRoundTrip in atof_test.go for a test case.
434	if runtime.GOARCH == "386" {
435		return
436	}
437	f = float64(mantissa)
438	if neg {
439		f = -f
440	}
441	switch {
442	case exp == 0:
443		// an integer.
444		return f, true
445	// Exact integers are <= 10^15.
446	// Exact powers of ten are <= 10^22.
447	case exp > 0 && exp <= 15+22: // int * 10^k
448		// If exponent is big but number of digits is not,
449		// can move a few zeros into the integer part.
450		if exp > 22 {
451			f *= float64pow10[exp-22]
452			exp = 22
453		}
454		if f > 1e15 || f < -1e15 {
455			// the exponent was really too large.
456			return
457		}
458		return f * float64pow10[exp], true
459	case exp < 0 && exp >= -22: // int / 10^k
460		return f / float64pow10[-exp], true
461	}
462	return
463}
464
465// If possible to compute mantissa*10^exp to 32-bit float f exactly,
466// entirely in floating-point math, do so, avoiding the machinery above.
467func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
468	if mantissa>>float32info.mantbits != 0 {
469		return
470	}
471	f = float32(mantissa)
472	if neg {
473		f = -f
474	}
475	switch {
476	case exp == 0:
477		return f, true
478	// Exact integers are <= 10^7.
479	// Exact powers of ten are <= 10^10.
480	case exp > 0 && exp <= 7+10: // int * 10^k
481		// If exponent is big but number of digits is not,
482		// can move a few zeros into the integer part.
483		if exp > 10 {
484			f *= float32pow10[exp-10]
485			exp = 10
486		}
487		if f > 1e7 || f < -1e7 {
488			// the exponent was really too large.
489			return
490		}
491		return f * float32pow10[exp], true
492	case exp < 0 && exp >= -10: // int / 10^k
493		return f / float32pow10[-exp], true
494	}
495	return
496}
497
498// atofHex converts the hex floating-point string s
499// to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
500// and returns it as a float64.
501// The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
502// If trunc is true, trailing non-zero bits have been omitted from the mantissa.
503func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
504	maxExp := 1<<flt.expbits + flt.bias - 2
505	minExp := flt.bias + 1
506	exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
507
508	// Shift mantissa and exponent to bring representation into float range.
509	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
510	// For rounding, we need two more, where the bottom bit represents
511	// whether that bit or any later bit was non-zero.
512	// (If the mantissa has already lost non-zero bits, trunc is true,
513	// and we OR in a 1 below after shifting left appropriately.)
514	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
515		mantissa <<= 1
516		exp--
517	}
518	if trunc {
519		mantissa |= 1
520	}
521	for mantissa>>(1+flt.mantbits+2) != 0 {
522		mantissa = mantissa>>1 | mantissa&1
523		exp++
524	}
525
526	// If exponent is too negative,
527	// denormalize in hopes of making it representable.
528	// (The -2 is for the rounding bits.)
529	for mantissa > 1 && exp < minExp-2 {
530		mantissa = mantissa>>1 | mantissa&1
531		exp++
532	}
533
534	// Round using two bottom bits.
535	round := mantissa & 3
536	mantissa >>= 2
537	round |= mantissa & 1 // round to even (round up if mantissa is odd)
538	exp += 2
539	if round == 3 {
540		mantissa++
541		if mantissa == 1<<(1+flt.mantbits) {
542			mantissa >>= 1
543			exp++
544		}
545	}
546
547	if mantissa>>flt.mantbits == 0 { // Denormal or zero.
548		exp = flt.bias
549	}
550	var err error
551	if exp > maxExp { // infinity and range error
552		mantissa = 1 << flt.mantbits
553		exp = maxExp + 1
554		err = rangeError(fnParseFloat, s)
555	}
556
557	bits := mantissa & (1<<flt.mantbits - 1)
558	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
559	if neg {
560		bits |= 1 << flt.mantbits << flt.expbits
561	}
562	if flt == &float32info {
563		return float64(math.Float32frombits(uint32(bits))), err
564	}
565	return math.Float64frombits(bits), err
566}
567
568const fnParseFloat = "ParseFloat"
569
570func atof32(s string) (f float32, n int, err error) {
571	if val, n, ok := special(s); ok {
572		return float32(val), n, nil
573	}
574
575	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
576	if !ok {
577		return 0, n, syntaxError(fnParseFloat, s)
578	}
579
580	if hex {
581		f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc)
582		return float32(f), n, err
583	}
584
585	if optimize {
586		// Try pure floating-point arithmetic conversion, and if that fails,
587		// the Eisel-Lemire algorithm.
588		if !trunc {
589			if f, ok := atof32exact(mantissa, exp, neg); ok {
590				return f, n, nil
591			}
592		}
593		f, ok := eiselLemire32(mantissa, exp, neg)
594		if ok {
595			if !trunc {
596				return f, n, nil
597			}
598			// Even if the mantissa was truncated, we may
599			// have found the correct result. Confirm by
600			// converting the upper mantissa bound.
601			fUp, ok := eiselLemire32(mantissa+1, exp, neg)
602			if ok && f == fUp {
603				return f, n, nil
604			}
605		}
606	}
607
608	// Slow fallback.
609	var d decimal
610	if !d.set(s[:n]) {
611		return 0, n, syntaxError(fnParseFloat, s)
612	}
613	b, ovf := d.floatBits(&float32info)
614	f = math.Float32frombits(uint32(b))
615	if ovf {
616		err = rangeError(fnParseFloat, s)
617	}
618	return f, n, err
619}
620
621func atof64(s string) (f float64, n int, err error) {
622	if val, n, ok := special(s); ok {
623		return val, n, nil
624	}
625
626	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
627	if !ok {
628		return 0, n, syntaxError(fnParseFloat, s)
629	}
630
631	if hex {
632		f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
633		return f, n, err
634	}
635
636	if optimize {
637		// Try pure floating-point arithmetic conversion, and if that fails,
638		// the Eisel-Lemire algorithm.
639		if !trunc {
640			if f, ok := atof64exact(mantissa, exp, neg); ok {
641				return f, n, nil
642			}
643		}
644		f, ok := eiselLemire64(mantissa, exp, neg)
645		if ok {
646			if !trunc {
647				return f, n, nil
648			}
649			// Even if the mantissa was truncated, we may
650			// have found the correct result. Confirm by
651			// converting the upper mantissa bound.
652			fUp, ok := eiselLemire64(mantissa+1, exp, neg)
653			if ok && f == fUp {
654				return f, n, nil
655			}
656		}
657	}
658
659	// Slow fallback.
660	var d decimal
661	if !d.set(s[:n]) {
662		return 0, n, syntaxError(fnParseFloat, s)
663	}
664	b, ovf := d.floatBits(&float64info)
665	f = math.Float64frombits(b)
666	if ovf {
667		err = rangeError(fnParseFloat, s)
668	}
669	return f, n, err
670}
671
672// ParseFloat converts the string s to a floating-point number
673// with the precision specified by bitSize: 32 for float32, or 64 for float64.
674// When bitSize=32, the result still has type float64, but it will be
675// convertible to float32 without changing its value.
676//
677// ParseFloat accepts decimal and hexadecimal floating-point number syntax.
678// If s is well-formed and near a valid floating-point number,
679// ParseFloat returns the nearest floating-point number rounded
680// using IEEE754 unbiased rounding.
681// (Parsing a hexadecimal floating-point value only rounds when
682// there are more bits in the hexadecimal representation than
683// will fit in the mantissa.)
684//
685// The errors that ParseFloat returns have concrete type *NumError
686// and include err.Num = s.
687//
688// If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
689//
690// If s is syntactically well-formed but is more than 1/2 ULP
691// away from the largest floating point number of the given size,
692// ParseFloat returns f = ±Inf, err.Err = ErrRange.
693//
694// ParseFloat recognizes the strings "NaN", and the (possibly signed) strings "Inf" and "Infinity"
695// as their respective special floating point values. It ignores case when matching.
696func ParseFloat(s string, bitSize int) (float64, error) {
697	f, n, err := parseFloatPrefix(s, bitSize)
698	if err == nil && n != len(s) {
699		return 0, syntaxError(fnParseFloat, s)
700	}
701	return f, err
702}
703
704func parseFloatPrefix(s string, bitSize int) (float64, int, error) {
705	if bitSize == 32 {
706		f, n, err := atof32(s)
707		return float64(f), n, err
708	}
709	return atof64(s)
710}
711