1package parse
2
3import (
4	"unicode"
5	"unicode/utf8"
6)
7
8// eof rune sent when end of file is reached
9var eof = rune(0)
10
11// token is a lexical token.
12type token uint
13
14// list of lexical tokens.
15const (
16	// special tokens
17	tokenIllegal token = iota
18	tokenEOF
19
20	// identifiers and literals
21	tokenIdent
22
23	// operators and delimiters
24	tokenLbrack
25	tokenRbrack
26	tokenQuote
27)
28
29// predefined mode bits to control recognition of tokens.
30const (
31	scanIdent byte = 1 << iota
32	scanLbrack
33	scanRbrack
34	scanEscape
35)
36
37// returns true if rune is accepted.
38type acceptFunc func(r rune, i int) bool
39
40// scanner implements a lexical scanner that reads unicode
41// characters and tokens from a string buffer.
42type scanner struct {
43	buf   string
44	pos   int
45	start int
46	width int
47	mode  byte
48
49	accept acceptFunc
50}
51
52// init initializes a scanner with a new buffer.
53func (s *scanner) init(buf string) {
54	s.buf = buf
55	s.pos = 0
56	s.start = 0
57	s.width = 0
58	s.accept = nil
59}
60
61// read returns the next unicode character. It returns eof at
62// the end of the string buffer.
63func (s *scanner) read() rune {
64	if s.pos >= len(s.buf) {
65		s.width = 0
66		return eof
67	}
68	r, w := utf8.DecodeRuneInString(s.buf[s.pos:])
69	s.width = w
70	s.pos += s.width
71	return r
72}
73
74func (s *scanner) unread() {
75	s.pos -= s.width
76}
77
78// skip skips over the curring unicode character in the buffer
79// by slicing and removing from the buffer.
80func (s *scanner) skip() {
81	l := s.buf[:s.pos-1]
82	r := s.buf[s.pos:]
83	s.buf = l + r
84}
85
86// peek returns the next unicode character in the buffer without
87// advancing the scanner. It returns eof if the scanner's position
88// is at the last character of the source.
89func (s *scanner) peek() rune {
90	r := s.read()
91	s.unread()
92	return r
93}
94
95// string returns the string corresponding to the most recently
96// scanned token. Valid after calling scan().
97func (s *scanner) string() string {
98	return s.buf[s.start:s.pos]
99}
100
101// scan reads the next token or Unicode character from source and
102// returns it. It returns EOF at the end of the source.
103func (s *scanner) scan() token {
104	s.start = s.pos
105	r := s.read()
106	switch {
107	case r == eof:
108		return tokenEOF
109	case s.scanLbrack(r):
110		return tokenLbrack
111	case s.scanRbrack(r):
112		return tokenRbrack
113	case s.scanIdent(r):
114		return tokenIdent
115	}
116	return tokenIllegal
117}
118
119// scanIdent reads the next token or Unicode character from source
120// and returns true if the Ident character is accepted.
121func (s *scanner) scanIdent(r rune) bool {
122	if s.mode&scanIdent == 0 {
123		return false
124	}
125	if s.scanEscaped(r) {
126		s.skip()
127	} else if !s.accept(r, s.pos-s.start) {
128		return false
129	}
130loop:
131	for {
132		r := s.read()
133		switch {
134		case r == eof:
135			s.unread()
136			break loop
137		case s.scanLbrack(r):
138			s.unread()
139			s.unread()
140			break loop
141		}
142		if s.scanEscaped(r) {
143			s.skip()
144			continue
145		}
146		if !s.accept(r, s.pos-s.start) {
147			s.unread()
148			break loop
149		}
150	}
151	return true
152}
153
154// scanLbrack reads the next token or Unicode character from source
155// and returns true if the open bracket is encountered.
156func (s *scanner) scanLbrack(r rune) bool {
157	if s.mode&scanLbrack == 0 {
158		return false
159	}
160	if r == '$' {
161		if s.read() == '{' {
162			return true
163		}
164		s.unread()
165	}
166	return false
167}
168
169// scanRbrack reads the next token or Unicode character from source
170// and returns true if the closing bracket is encountered.
171func (s *scanner) scanRbrack(r rune) bool {
172	if s.mode&scanRbrack == 0 {
173		return false
174	}
175	return r == '}'
176}
177
178// scanEscaped reads the next token or Unicode character from source
179// and returns true if it being escaped and should be sipped.
180func (s *scanner) scanEscaped(r rune) bool {
181	if s.mode&scanEscape == 0 {
182		return false
183	}
184	if r == '$' {
185		if s.peek() == '$' {
186			return true
187		}
188	}
189	if r != '\\' {
190		return false
191	}
192	switch s.peek() {
193	case '/', '\\':
194		return true
195	default:
196		return false
197	}
198}
199
200//
201// scanner functions accept or reject runes.
202//
203
204func acceptRune(r rune, i int) bool {
205	return true
206}
207
208func acceptIdent(r rune, i int) bool {
209	return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_'
210}
211
212func acceptColon(r rune, i int) bool {
213	return r == ':'
214}
215
216func acceptOneHash(r rune, i int) bool {
217	return r == '#' && i == 1
218}
219
220func acceptNone(r rune, i int) bool {
221	return false
222}
223
224func acceptNotClosing(r rune, i int) bool {
225	return r != '}'
226}
227
228func acceptHashFunc(r rune, i int) bool {
229	return r == '#' && i < 3
230}
231
232func acceptPercentFunc(r rune, i int) bool {
233	return r == '%' && i < 3
234}
235
236func acceptDefaultFunc(r rune, i int) bool {
237	switch {
238	case i == 1 && r == ':':
239		return true
240	case i == 2 && (r == '=' || r == '-' || r == '?' || r == '+'):
241		return true
242	default:
243		return false
244	}
245}
246
247func acceptReplaceFunc(r rune, i int) bool {
248	switch {
249	case i == 1 && r == '/':
250		return true
251	case i == 2 && (r == '/' || r == '#' || r == '%'):
252		return true
253	default:
254		return false
255	}
256}
257
258func acceptOneEqual(r rune, i int) bool {
259	return i == 1 && r == '='
260}
261
262func acceptOneColon(r rune, i int) bool {
263	return i == 1 && r == ':'
264}
265
266func rejectColonClose(r rune, i int) bool {
267	return r != ':' && r != '}'
268}
269
270func acceptSlash(r rune, i int) bool {
271	return r == '/'
272}
273
274func acceptNotSlash(r rune, i int) bool {
275	return r != '/'
276}
277
278func acceptCasingFunc(r rune, i int) bool {
279	return (r == ',' || r == '^') && i < 3
280}
281