1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package format
6
7import (
8	"reflect"
9	"unicode/utf8"
10)
11
12// A Parser parses a format string. The result from the parse are set in the
13// struct fields.
14type Parser struct {
15	Verb rune
16
17	WidthPresent bool
18	PrecPresent  bool
19	Minus        bool
20	Plus         bool
21	Sharp        bool
22	Space        bool
23	Zero         bool
24
25	// For the formats %+v %#v, we set the plusV/sharpV flags
26	// and clear the plus/sharp flags since %+v and %#v are in effect
27	// different, flagless formats set at the top level.
28	PlusV  bool
29	SharpV bool
30
31	HasIndex bool
32
33	Width int
34	Prec  int // precision
35
36	// retain arguments across calls.
37	Args []interface{}
38	// retain current argument number across calls
39	ArgNum int
40
41	// reordered records whether the format string used argument reordering.
42	Reordered bool
43	// goodArgNum records whether the most recent reordering directive was valid.
44	goodArgNum bool
45
46	// position info
47	format   string
48	startPos int
49	endPos   int
50	Status   Status
51}
52
53// Reset initializes a parser to scan format strings for the given args.
54func (p *Parser) Reset(args []interface{}) {
55	p.Args = args
56	p.ArgNum = 0
57	p.startPos = 0
58	p.Reordered = false
59}
60
61// Text returns the part of the format string that was parsed by the last call
62// to Scan. It returns the original substitution clause if the current scan
63// parsed a substitution.
64func (p *Parser) Text() string { return p.format[p.startPos:p.endPos] }
65
66// SetFormat sets a new format string to parse. It does not reset the argument
67// count.
68func (p *Parser) SetFormat(format string) {
69	p.format = format
70	p.startPos = 0
71	p.endPos = 0
72}
73
74// Status indicates the result type of a call to Scan.
75type Status int
76
77const (
78	StatusText Status = iota
79	StatusSubstitution
80	StatusBadWidthSubstitution
81	StatusBadPrecSubstitution
82	StatusNoVerb
83	StatusBadArgNum
84	StatusMissingArg
85)
86
87// ClearFlags reset the parser to default behavior.
88func (p *Parser) ClearFlags() {
89	p.WidthPresent = false
90	p.PrecPresent = false
91	p.Minus = false
92	p.Plus = false
93	p.Sharp = false
94	p.Space = false
95	p.Zero = false
96
97	p.PlusV = false
98	p.SharpV = false
99
100	p.HasIndex = false
101}
102
103// Scan scans the next part of the format string and sets the status to
104// indicate whether it scanned a string literal, substitution or error.
105func (p *Parser) Scan() bool {
106	p.Status = StatusText
107	format := p.format
108	end := len(format)
109	if p.endPos >= end {
110		return false
111	}
112	afterIndex := false // previous item in format was an index like [3].
113
114	p.startPos = p.endPos
115	p.goodArgNum = true
116	i := p.startPos
117	for i < end && format[i] != '%' {
118		i++
119	}
120	if i > p.startPos {
121		p.endPos = i
122		return true
123	}
124	// Process one verb
125	i++
126
127	p.Status = StatusSubstitution
128
129	// Do we have flags?
130	p.ClearFlags()
131
132simpleFormat:
133	for ; i < end; i++ {
134		c := p.format[i]
135		switch c {
136		case '#':
137			p.Sharp = true
138		case '0':
139			p.Zero = !p.Minus // Only allow zero padding to the left.
140		case '+':
141			p.Plus = true
142		case '-':
143			p.Minus = true
144			p.Zero = false // Do not pad with zeros to the right.
145		case ' ':
146			p.Space = true
147		default:
148			// Fast path for common case of ascii lower case simple verbs
149			// without precision or width or argument indices.
150			if 'a' <= c && c <= 'z' && p.ArgNum < len(p.Args) {
151				if c == 'v' {
152					// Go syntax
153					p.SharpV = p.Sharp
154					p.Sharp = false
155					// Struct-field syntax
156					p.PlusV = p.Plus
157					p.Plus = false
158				}
159				p.Verb = rune(c)
160				p.ArgNum++
161				p.endPos = i + 1
162				return true
163			}
164			// Format is more complex than simple flags and a verb or is malformed.
165			break simpleFormat
166		}
167	}
168
169	// Do we have an explicit argument index?
170	i, afterIndex = p.updateArgNumber(format, i)
171
172	// Do we have width?
173	if i < end && format[i] == '*' {
174		i++
175		p.Width, p.WidthPresent = p.intFromArg()
176
177		if !p.WidthPresent {
178			p.Status = StatusBadWidthSubstitution
179		}
180
181		// We have a negative width, so take its value and ensure
182		// that the minus flag is set
183		if p.Width < 0 {
184			p.Width = -p.Width
185			p.Minus = true
186			p.Zero = false // Do not pad with zeros to the right.
187		}
188		afterIndex = false
189	} else {
190		p.Width, p.WidthPresent, i = parsenum(format, i, end)
191		if afterIndex && p.WidthPresent { // "%[3]2d"
192			p.goodArgNum = false
193		}
194	}
195
196	// Do we have precision?
197	if i+1 < end && format[i] == '.' {
198		i++
199		if afterIndex { // "%[3].2d"
200			p.goodArgNum = false
201		}
202		i, afterIndex = p.updateArgNumber(format, i)
203		if i < end && format[i] == '*' {
204			i++
205			p.Prec, p.PrecPresent = p.intFromArg()
206			// Negative precision arguments don't make sense
207			if p.Prec < 0 {
208				p.Prec = 0
209				p.PrecPresent = false
210			}
211			if !p.PrecPresent {
212				p.Status = StatusBadPrecSubstitution
213			}
214			afterIndex = false
215		} else {
216			p.Prec, p.PrecPresent, i = parsenum(format, i, end)
217			if !p.PrecPresent {
218				p.Prec = 0
219				p.PrecPresent = true
220			}
221		}
222	}
223
224	if !afterIndex {
225		i, afterIndex = p.updateArgNumber(format, i)
226	}
227	p.HasIndex = afterIndex
228
229	if i >= end {
230		p.endPos = i
231		p.Status = StatusNoVerb
232		return true
233	}
234
235	verb, w := utf8.DecodeRuneInString(format[i:])
236	p.endPos = i + w
237	p.Verb = verb
238
239	switch {
240	case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec.
241		p.startPos = p.endPos - 1
242		p.Status = StatusText
243	case !p.goodArgNum:
244		p.Status = StatusBadArgNum
245	case p.ArgNum >= len(p.Args): // No argument left over to print for the current verb.
246		p.Status = StatusMissingArg
247		p.ArgNum++
248	case verb == 'v':
249		// Go syntax
250		p.SharpV = p.Sharp
251		p.Sharp = false
252		// Struct-field syntax
253		p.PlusV = p.Plus
254		p.Plus = false
255		fallthrough
256	default:
257		p.ArgNum++
258	}
259	return true
260}
261
262// intFromArg gets the ArgNumth element of Args. On return, isInt reports
263// whether the argument has integer type.
264func (p *Parser) intFromArg() (num int, isInt bool) {
265	if p.ArgNum < len(p.Args) {
266		arg := p.Args[p.ArgNum]
267		num, isInt = arg.(int) // Almost always OK.
268		if !isInt {
269			// Work harder.
270			switch v := reflect.ValueOf(arg); v.Kind() {
271			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
272				n := v.Int()
273				if int64(int(n)) == n {
274					num = int(n)
275					isInt = true
276				}
277			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
278				n := v.Uint()
279				if int64(n) >= 0 && uint64(int(n)) == n {
280					num = int(n)
281					isInt = true
282				}
283			default:
284				// Already 0, false.
285			}
286		}
287		p.ArgNum++
288		if tooLarge(num) {
289			num = 0
290			isInt = false
291		}
292	}
293	return
294}
295
296// parseArgNumber returns the value of the bracketed number, minus 1
297// (explicit argument numbers are one-indexed but we want zero-indexed).
298// The opening bracket is known to be present at format[0].
299// The returned values are the index, the number of bytes to consume
300// up to the closing paren, if present, and whether the number parsed
301// ok. The bytes to consume will be 1 if no closing paren is present.
302func parseArgNumber(format string) (index int, wid int, ok bool) {
303	// There must be at least 3 bytes: [n].
304	if len(format) < 3 {
305		return 0, 1, false
306	}
307
308	// Find closing bracket.
309	for i := 1; i < len(format); i++ {
310		if format[i] == ']' {
311			width, ok, newi := parsenum(format, 1, i)
312			if !ok || newi != i {
313				return 0, i + 1, false
314			}
315			return width - 1, i + 1, true // arg numbers are one-indexed and skip paren.
316		}
317	}
318	return 0, 1, false
319}
320
321// updateArgNumber returns the next argument to evaluate, which is either the value of the passed-in
322// argNum or the value of the bracketed integer that begins format[i:]. It also returns
323// the new value of i, that is, the index of the next byte of the format to process.
324func (p *Parser) updateArgNumber(format string, i int) (newi int, found bool) {
325	if len(format) <= i || format[i] != '[' {
326		return i, false
327	}
328	p.Reordered = true
329	index, wid, ok := parseArgNumber(format[i:])
330	if ok && 0 <= index && index < len(p.Args) {
331		p.ArgNum = index
332		return i + wid, true
333	}
334	p.goodArgNum = false
335	return i + wid, ok
336}
337
338// tooLarge reports whether the magnitude of the integer is
339// too large to be used as a formatting width or precision.
340func tooLarge(x int) bool {
341	const max int = 1e6
342	return x > max || x < -max
343}
344
345// parsenum converts ASCII to integer.  num is 0 (and isnum is false) if no number present.
346func parsenum(s string, start, end int) (num int, isnum bool, newi int) {
347	if start >= end {
348		return 0, false, end
349	}
350	for newi = start; newi < end && '0' <= s[newi] && s[newi] <= '9'; newi++ {
351		if tooLarge(num) {
352			return 0, false, end // Overflow; crazy long number most likely.
353		}
354		num = num*10 + int(s[newi]-'0')
355		isnum = true
356	}
357	return
358}
359