1// Copyright 2018 Frank Schroeder. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4//
5// Parts of the lexer are from the template/text/parser package
6// For these parts the following applies:
7//
8// Copyright 2011 The Go Authors. All rights reserved.
9// Use of this source code is governed by a BSD-style
10// license that can be found in the LICENSE file of the go 1.2
11// distribution.
12
13package properties
14
15import (
16	"fmt"
17	"strconv"
18	"strings"
19	"unicode/utf8"
20)
21
22// item represents a token or text string returned from the scanner.
23type item struct {
24	typ itemType // The type of this item.
25	pos int      // The starting position, in bytes, of this item in the input string.
26	val string   // The value of this item.
27}
28
29func (i item) String() string {
30	switch {
31	case i.typ == itemEOF:
32		return "EOF"
33	case i.typ == itemError:
34		return i.val
35	case len(i.val) > 10:
36		return fmt.Sprintf("%.10q...", i.val)
37	}
38	return fmt.Sprintf("%q", i.val)
39}
40
41// itemType identifies the type of lex items.
42type itemType int
43
44const (
45	itemError itemType = iota // error occurred; value is text of error
46	itemEOF
47	itemKey     // a key
48	itemValue   // a value
49	itemComment // a comment
50)
51
52// defines a constant for EOF
53const eof = -1
54
55// permitted whitespace characters space, FF and TAB
56const whitespace = " \f\t"
57
58// stateFn represents the state of the scanner as a function that returns the next state.
59type stateFn func(*lexer) stateFn
60
61// lexer holds the state of the scanner.
62type lexer struct {
63	input   string    // the string being scanned
64	state   stateFn   // the next lexing function to enter
65	pos     int       // current position in the input
66	start   int       // start position of this item
67	width   int       // width of last rune read from input
68	lastPos int       // position of most recent item returned by nextItem
69	runes   []rune    // scanned runes for this item
70	items   chan item // channel of scanned items
71}
72
73// next returns the next rune in the input.
74func (l *lexer) next() rune {
75	if l.pos >= len(l.input) {
76		l.width = 0
77		return eof
78	}
79	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
80	l.width = w
81	l.pos += l.width
82	return r
83}
84
85// peek returns but does not consume the next rune in the input.
86func (l *lexer) peek() rune {
87	r := l.next()
88	l.backup()
89	return r
90}
91
92// backup steps back one rune. Can only be called once per call of next.
93func (l *lexer) backup() {
94	l.pos -= l.width
95}
96
97// emit passes an item back to the client.
98func (l *lexer) emit(t itemType) {
99	i := item{t, l.start, string(l.runes)}
100	l.items <- i
101	l.start = l.pos
102	l.runes = l.runes[:0]
103}
104
105// ignore skips over the pending input before this point.
106func (l *lexer) ignore() {
107	l.start = l.pos
108}
109
110// appends the rune to the current value
111func (l *lexer) appendRune(r rune) {
112	l.runes = append(l.runes, r)
113}
114
115// accept consumes the next rune if it's from the valid set.
116func (l *lexer) accept(valid string) bool {
117	if strings.ContainsRune(valid, l.next()) {
118		return true
119	}
120	l.backup()
121	return false
122}
123
124// acceptRun consumes a run of runes from the valid set.
125func (l *lexer) acceptRun(valid string) {
126	for strings.ContainsRune(valid, l.next()) {
127	}
128	l.backup()
129}
130
131// acceptRunUntil consumes a run of runes up to a terminator.
132func (l *lexer) acceptRunUntil(term rune) {
133	for term != l.next() {
134	}
135	l.backup()
136}
137
138// hasText returns true if the current parsed text is not empty.
139func (l *lexer) isNotEmpty() bool {
140	return l.pos > l.start
141}
142
143// lineNumber reports which line we're on, based on the position of
144// the previous item returned by nextItem. Doing it this way
145// means we don't have to worry about peek double counting.
146func (l *lexer) lineNumber() int {
147	return 1 + strings.Count(l.input[:l.lastPos], "\n")
148}
149
150// errorf returns an error token and terminates the scan by passing
151// back a nil pointer that will be the next state, terminating l.nextItem.
152func (l *lexer) errorf(format string, args ...interface{}) stateFn {
153	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
154	return nil
155}
156
157// nextItem returns the next item from the input.
158func (l *lexer) nextItem() item {
159	i := <-l.items
160	l.lastPos = i.pos
161	return i
162}
163
164// lex creates a new scanner for the input string.
165func lex(input string) *lexer {
166	l := &lexer{
167		input: input,
168		items: make(chan item),
169		runes: make([]rune, 0, 32),
170	}
171	go l.run()
172	return l
173}
174
175// run runs the state machine for the lexer.
176func (l *lexer) run() {
177	for l.state = lexBeforeKey(l); l.state != nil; {
178		l.state = l.state(l)
179	}
180}
181
182// state functions
183
184// lexBeforeKey scans until a key begins.
185func lexBeforeKey(l *lexer) stateFn {
186	switch r := l.next(); {
187	case isEOF(r):
188		l.emit(itemEOF)
189		return nil
190
191	case isEOL(r):
192		l.ignore()
193		return lexBeforeKey
194
195	case isComment(r):
196		return lexComment
197
198	case isWhitespace(r):
199		l.ignore()
200		return lexBeforeKey
201
202	default:
203		l.backup()
204		return lexKey
205	}
206}
207
208// lexComment scans a comment line. The comment character has already been scanned.
209func lexComment(l *lexer) stateFn {
210	l.acceptRun(whitespace)
211	l.ignore()
212	for {
213		switch r := l.next(); {
214		case isEOF(r):
215			l.ignore()
216			l.emit(itemEOF)
217			return nil
218		case isEOL(r):
219			l.emit(itemComment)
220			return lexBeforeKey
221		default:
222			l.appendRune(r)
223		}
224	}
225}
226
227// lexKey scans the key up to a delimiter
228func lexKey(l *lexer) stateFn {
229	var r rune
230
231Loop:
232	for {
233		switch r = l.next(); {
234
235		case isEscape(r):
236			err := l.scanEscapeSequence()
237			if err != nil {
238				return l.errorf(err.Error())
239			}
240
241		case isEndOfKey(r):
242			l.backup()
243			break Loop
244
245		case isEOF(r):
246			break Loop
247
248		default:
249			l.appendRune(r)
250		}
251	}
252
253	if len(l.runes) > 0 {
254		l.emit(itemKey)
255	}
256
257	if isEOF(r) {
258		l.emit(itemEOF)
259		return nil
260	}
261
262	return lexBeforeValue
263}
264
265// lexBeforeValue scans the delimiter between key and value.
266// Leading and trailing whitespace is ignored.
267// We expect to be just after the key.
268func lexBeforeValue(l *lexer) stateFn {
269	l.acceptRun(whitespace)
270	l.accept(":=")
271	l.acceptRun(whitespace)
272	l.ignore()
273	return lexValue
274}
275
276// lexValue scans text until the end of the line. We expect to be just after the delimiter.
277func lexValue(l *lexer) stateFn {
278	for {
279		switch r := l.next(); {
280		case isEscape(r):
281			if isEOL(l.peek()) {
282				l.next()
283				l.acceptRun(whitespace)
284			} else {
285				err := l.scanEscapeSequence()
286				if err != nil {
287					return l.errorf(err.Error())
288				}
289			}
290
291		case isEOL(r):
292			l.emit(itemValue)
293			l.ignore()
294			return lexBeforeKey
295
296		case isEOF(r):
297			l.emit(itemValue)
298			l.emit(itemEOF)
299			return nil
300
301		default:
302			l.appendRune(r)
303		}
304	}
305}
306
307// scanEscapeSequence scans either one of the escaped characters
308// or a unicode literal. We expect to be after the escape character.
309func (l *lexer) scanEscapeSequence() error {
310	switch r := l.next(); {
311
312	case isEscapedCharacter(r):
313		l.appendRune(decodeEscapedCharacter(r))
314		return nil
315
316	case atUnicodeLiteral(r):
317		return l.scanUnicodeLiteral()
318
319	case isEOF(r):
320		return fmt.Errorf("premature EOF")
321
322	// silently drop the escape character and append the rune as is
323	default:
324		l.appendRune(r)
325		return nil
326	}
327}
328
329// scans a unicode literal in the form \uXXXX. We expect to be after the \u.
330func (l *lexer) scanUnicodeLiteral() error {
331	// scan the digits
332	d := make([]rune, 4)
333	for i := 0; i < 4; i++ {
334		d[i] = l.next()
335		if d[i] == eof || !strings.ContainsRune("0123456789abcdefABCDEF", d[i]) {
336			return fmt.Errorf("invalid unicode literal")
337		}
338	}
339
340	// decode the digits into a rune
341	r, err := strconv.ParseInt(string(d), 16, 0)
342	if err != nil {
343		return err
344	}
345
346	l.appendRune(rune(r))
347	return nil
348}
349
350// decodeEscapedCharacter returns the unescaped rune. We expect to be after the escape character.
351func decodeEscapedCharacter(r rune) rune {
352	switch r {
353	case 'f':
354		return '\f'
355	case 'n':
356		return '\n'
357	case 'r':
358		return '\r'
359	case 't':
360		return '\t'
361	default:
362		return r
363	}
364}
365
366// atUnicodeLiteral reports whether we are at a unicode literal.
367// The escape character has already been consumed.
368func atUnicodeLiteral(r rune) bool {
369	return r == 'u'
370}
371
372// isComment reports whether we are at the start of a comment.
373func isComment(r rune) bool {
374	return r == '#' || r == '!'
375}
376
377// isEndOfKey reports whether the rune terminates the current key.
378func isEndOfKey(r rune) bool {
379	return strings.ContainsRune(" \f\t\r\n:=", r)
380}
381
382// isEOF reports whether we are at EOF.
383func isEOF(r rune) bool {
384	return r == eof
385}
386
387// isEOL reports whether we are at a new line character.
388func isEOL(r rune) bool {
389	return r == '\n' || r == '\r'
390}
391
392// isEscape reports whether the rune is the escape character which
393// prefixes unicode literals and other escaped characters.
394func isEscape(r rune) bool {
395	return r == '\\'
396}
397
398// isEscapedCharacter reports whether we are at one of the characters that need escaping.
399// The escape character has already been consumed.
400func isEscapedCharacter(r rune) bool {
401	return strings.ContainsRune(" :=fnrt", r)
402}
403
404// isWhitespace reports whether the rune is a whitespace character.
405func isWhitespace(r rune) bool {
406	return strings.ContainsRune(whitespace, r)
407}
408