1package lexer 2 3import ( 4 "bytes" 5 "fmt" 6 "github.com/gobwas/glob/util/runes" 7 "unicode/utf8" 8) 9 10const ( 11 char_any = '*' 12 char_comma = ',' 13 char_single = '?' 14 char_escape = '\\' 15 char_range_open = '[' 16 char_range_close = ']' 17 char_terms_open = '{' 18 char_terms_close = '}' 19 char_range_not = '!' 20 char_range_between = '-' 21) 22 23var specials = []byte{ 24 char_any, 25 char_single, 26 char_escape, 27 char_range_open, 28 char_range_close, 29 char_terms_open, 30 char_terms_close, 31} 32 33func Special(c byte) bool { 34 return bytes.IndexByte(specials, c) != -1 35} 36 37type tokens []Token 38 39func (i *tokens) shift() (ret Token) { 40 ret = (*i)[0] 41 copy(*i, (*i)[1:]) 42 *i = (*i)[:len(*i)-1] 43 return 44} 45 46func (i *tokens) push(v Token) { 47 *i = append(*i, v) 48} 49 50func (i *tokens) empty() bool { 51 return len(*i) == 0 52} 53 54var eof rune = 0 55 56type lexer struct { 57 data string 58 pos int 59 err error 60 61 tokens tokens 62 termsLevel int 63 64 lastRune rune 65 lastRuneSize int 66 hasRune bool 67} 68 69func NewLexer(source string) *lexer { 70 l := &lexer{ 71 data: source, 72 tokens: tokens(make([]Token, 0, 4)), 73 } 74 return l 75} 76 77func (l *lexer) Next() Token { 78 if l.err != nil { 79 return Token{Error, l.err.Error()} 80 } 81 if !l.tokens.empty() { 82 return l.tokens.shift() 83 } 84 85 l.fetchItem() 86 return l.Next() 87} 88 89func (l *lexer) peek() (r rune, w int) { 90 if l.pos == len(l.data) { 91 return eof, 0 92 } 93 94 r, w = utf8.DecodeRuneInString(l.data[l.pos:]) 95 if r == utf8.RuneError { 96 l.errorf("could not read rune") 97 r = eof 98 w = 0 99 } 100 101 return 102} 103 104func (l *lexer) read() rune { 105 if l.hasRune { 106 l.hasRune = false 107 l.seek(l.lastRuneSize) 108 return l.lastRune 109 } 110 111 r, s := l.peek() 112 l.seek(s) 113 114 l.lastRune = r 115 l.lastRuneSize = s 116 117 return r 118} 119 120func (l *lexer) seek(w int) { 121 l.pos += w 122} 123 124func (l *lexer) unread() { 125 if l.hasRune { 126 l.errorf("could not unread rune") 127 return 128 } 129 l.seek(-l.lastRuneSize) 130 l.hasRune = true 131} 132 133func (l *lexer) errorf(f string, v ...interface{}) { 134 l.err = fmt.Errorf(f, v...) 135} 136 137func (l *lexer) inTerms() bool { 138 return l.termsLevel > 0 139} 140 141func (l *lexer) termsEnter() { 142 l.termsLevel++ 143} 144 145func (l *lexer) termsLeave() { 146 l.termsLevel-- 147} 148 149var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open} 150var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma) 151 152func (l *lexer) fetchItem() { 153 r := l.read() 154 switch { 155 case r == eof: 156 l.tokens.push(Token{EOF, ""}) 157 158 case r == char_terms_open: 159 l.termsEnter() 160 l.tokens.push(Token{TermsOpen, string(r)}) 161 162 case r == char_comma && l.inTerms(): 163 l.tokens.push(Token{Separator, string(r)}) 164 165 case r == char_terms_close && l.inTerms(): 166 l.tokens.push(Token{TermsClose, string(r)}) 167 l.termsLeave() 168 169 case r == char_range_open: 170 l.tokens.push(Token{RangeOpen, string(r)}) 171 l.fetchRange() 172 173 case r == char_single: 174 l.tokens.push(Token{Single, string(r)}) 175 176 case r == char_any: 177 if l.read() == char_any { 178 l.tokens.push(Token{Super, string(r) + string(r)}) 179 } else { 180 l.unread() 181 l.tokens.push(Token{Any, string(r)}) 182 } 183 184 default: 185 l.unread() 186 187 var breakers []rune 188 if l.inTerms() { 189 breakers = inTermsBreakers 190 } else { 191 breakers = inTextBreakers 192 } 193 l.fetchText(breakers) 194 } 195} 196 197func (l *lexer) fetchRange() { 198 var wantHi bool 199 var wantClose bool 200 var seenNot bool 201 for { 202 r := l.read() 203 if r == eof { 204 l.errorf("unexpected end of input") 205 return 206 } 207 208 if wantClose { 209 if r != char_range_close { 210 l.errorf("expected close range character") 211 } else { 212 l.tokens.push(Token{RangeClose, string(r)}) 213 } 214 return 215 } 216 217 if wantHi { 218 l.tokens.push(Token{RangeHi, string(r)}) 219 wantClose = true 220 continue 221 } 222 223 if !seenNot && r == char_range_not { 224 l.tokens.push(Token{Not, string(r)}) 225 seenNot = true 226 continue 227 } 228 229 if n, w := l.peek(); n == char_range_between { 230 l.seek(w) 231 l.tokens.push(Token{RangeLo, string(r)}) 232 l.tokens.push(Token{RangeBetween, string(n)}) 233 wantHi = true 234 continue 235 } 236 237 l.unread() // unread first peek and fetch as text 238 l.fetchText([]rune{char_range_close}) 239 wantClose = true 240 } 241} 242 243func (l *lexer) fetchText(breakers []rune) { 244 var data []rune 245 var escaped bool 246 247reading: 248 for { 249 r := l.read() 250 if r == eof { 251 break 252 } 253 254 if !escaped { 255 if r == char_escape { 256 escaped = true 257 continue 258 } 259 260 if runes.IndexRune(breakers, r) != -1 { 261 l.unread() 262 break reading 263 } 264 } 265 266 escaped = false 267 data = append(data, r) 268 } 269 270 if len(data) > 0 { 271 l.tokens.push(Token{Text, string(data)}) 272 } 273} 274