1package lexer
2
3import (
4	"bytes"
5	"fmt"
6	"github.com/gobwas/glob/util/runes"
7	"unicode/utf8"
8)
9
10const (
11	char_any           = '*'
12	char_comma         = ','
13	char_single        = '?'
14	char_escape        = '\\'
15	char_range_open    = '['
16	char_range_close   = ']'
17	char_terms_open    = '{'
18	char_terms_close   = '}'
19	char_range_not     = '!'
20	char_range_between = '-'
21)
22
23var specials = []byte{
24	char_any,
25	char_single,
26	char_escape,
27	char_range_open,
28	char_range_close,
29	char_terms_open,
30	char_terms_close,
31}
32
33func Special(c byte) bool {
34	return bytes.IndexByte(specials, c) != -1
35}
36
37type tokens []Token
38
39func (i *tokens) shift() (ret Token) {
40	ret = (*i)[0]
41	copy(*i, (*i)[1:])
42	*i = (*i)[:len(*i)-1]
43	return
44}
45
46func (i *tokens) push(v Token) {
47	*i = append(*i, v)
48}
49
50func (i *tokens) empty() bool {
51	return len(*i) == 0
52}
53
54var eof rune = 0
55
56type lexer struct {
57	data string
58	pos  int
59	err  error
60
61	tokens     tokens
62	termsLevel int
63
64	lastRune     rune
65	lastRuneSize int
66	hasRune      bool
67}
68
69func NewLexer(source string) *lexer {
70	l := &lexer{
71		data:   source,
72		tokens: tokens(make([]Token, 0, 4)),
73	}
74	return l
75}
76
77func (l *lexer) Next() Token {
78	if l.err != nil {
79		return Token{Error, l.err.Error()}
80	}
81	if !l.tokens.empty() {
82		return l.tokens.shift()
83	}
84
85	l.fetchItem()
86	return l.Next()
87}
88
89func (l *lexer) peek() (r rune, w int) {
90	if l.pos == len(l.data) {
91		return eof, 0
92	}
93
94	r, w = utf8.DecodeRuneInString(l.data[l.pos:])
95	if r == utf8.RuneError {
96		l.errorf("could not read rune")
97		r = eof
98		w = 0
99	}
100
101	return
102}
103
104func (l *lexer) read() rune {
105	if l.hasRune {
106		l.hasRune = false
107		l.seek(l.lastRuneSize)
108		return l.lastRune
109	}
110
111	r, s := l.peek()
112	l.seek(s)
113
114	l.lastRune = r
115	l.lastRuneSize = s
116
117	return r
118}
119
120func (l *lexer) seek(w int) {
121	l.pos += w
122}
123
124func (l *lexer) unread() {
125	if l.hasRune {
126		l.errorf("could not unread rune")
127		return
128	}
129	l.seek(-l.lastRuneSize)
130	l.hasRune = true
131}
132
133func (l *lexer) errorf(f string, v ...interface{}) {
134	l.err = fmt.Errorf(f, v...)
135}
136
137func (l *lexer) inTerms() bool {
138	return l.termsLevel > 0
139}
140
141func (l *lexer) termsEnter() {
142	l.termsLevel++
143}
144
145func (l *lexer) termsLeave() {
146	l.termsLevel--
147}
148
149var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open}
150var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma)
151
152func (l *lexer) fetchItem() {
153	r := l.read()
154	switch {
155	case r == eof:
156		l.tokens.push(Token{EOF, ""})
157
158	case r == char_terms_open:
159		l.termsEnter()
160		l.tokens.push(Token{TermsOpen, string(r)})
161
162	case r == char_comma && l.inTerms():
163		l.tokens.push(Token{Separator, string(r)})
164
165	case r == char_terms_close && l.inTerms():
166		l.tokens.push(Token{TermsClose, string(r)})
167		l.termsLeave()
168
169	case r == char_range_open:
170		l.tokens.push(Token{RangeOpen, string(r)})
171		l.fetchRange()
172
173	case r == char_single:
174		l.tokens.push(Token{Single, string(r)})
175
176	case r == char_any:
177		if l.read() == char_any {
178			l.tokens.push(Token{Super, string(r) + string(r)})
179		} else {
180			l.unread()
181			l.tokens.push(Token{Any, string(r)})
182		}
183
184	default:
185		l.unread()
186
187		var breakers []rune
188		if l.inTerms() {
189			breakers = inTermsBreakers
190		} else {
191			breakers = inTextBreakers
192		}
193		l.fetchText(breakers)
194	}
195}
196
197func (l *lexer) fetchRange() {
198	var wantHi bool
199	var wantClose bool
200	var seenNot bool
201	for {
202		r := l.read()
203		if r == eof {
204			l.errorf("unexpected end of input")
205			return
206		}
207
208		if wantClose {
209			if r != char_range_close {
210				l.errorf("expected close range character")
211			} else {
212				l.tokens.push(Token{RangeClose, string(r)})
213			}
214			return
215		}
216
217		if wantHi {
218			l.tokens.push(Token{RangeHi, string(r)})
219			wantClose = true
220			continue
221		}
222
223		if !seenNot && r == char_range_not {
224			l.tokens.push(Token{Not, string(r)})
225			seenNot = true
226			continue
227		}
228
229		if n, w := l.peek(); n == char_range_between {
230			l.seek(w)
231			l.tokens.push(Token{RangeLo, string(r)})
232			l.tokens.push(Token{RangeBetween, string(n)})
233			wantHi = true
234			continue
235		}
236
237		l.unread() // unread first peek and fetch as text
238		l.fetchText([]rune{char_range_close})
239		wantClose = true
240	}
241}
242
243func (l *lexer) fetchText(breakers []rune) {
244	var data []rune
245	var escaped bool
246
247reading:
248	for {
249		r := l.read()
250		if r == eof {
251			break
252		}
253
254		if !escaped {
255			if r == char_escape {
256				escaped = true
257				continue
258			}
259
260			if runes.IndexRune(breakers, r) != -1 {
261				l.unread()
262				break reading
263			}
264		}
265
266		escaped = false
267		data = append(data, r)
268	}
269
270	if len(data) > 0 {
271		l.tokens.push(Token{Text, string(data)})
272	}
273}
274