1// Copyright 2018 Frank Schroeder. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4// 5// Parts of the lexer are from the template/text/parser package 6// For these parts the following applies: 7// 8// Copyright 2011 The Go Authors. All rights reserved. 9// Use of this source code is governed by a BSD-style 10// license that can be found in the LICENSE file of the go 1.2 11// distribution. 12 13package properties 14 15import ( 16 "fmt" 17 "strconv" 18 "strings" 19 "unicode/utf8" 20) 21 22// item represents a token or text string returned from the scanner. 23type item struct { 24 typ itemType // The type of this item. 25 pos int // The starting position, in bytes, of this item in the input string. 26 val string // The value of this item. 27} 28 29func (i item) String() string { 30 switch { 31 case i.typ == itemEOF: 32 return "EOF" 33 case i.typ == itemError: 34 return i.val 35 case len(i.val) > 10: 36 return fmt.Sprintf("%.10q...", i.val) 37 } 38 return fmt.Sprintf("%q", i.val) 39} 40 41// itemType identifies the type of lex items. 42type itemType int 43 44const ( 45 itemError itemType = iota // error occurred; value is text of error 46 itemEOF 47 itemKey // a key 48 itemValue // a value 49 itemComment // a comment 50) 51 52// defines a constant for EOF 53const eof = -1 54 55// permitted whitespace characters space, FF and TAB 56const whitespace = " \f\t" 57 58// stateFn represents the state of the scanner as a function that returns the next state. 59type stateFn func(*lexer) stateFn 60 61// lexer holds the state of the scanner. 62type lexer struct { 63 input string // the string being scanned 64 state stateFn // the next lexing function to enter 65 pos int // current position in the input 66 start int // start position of this item 67 width int // width of last rune read from input 68 lastPos int // position of most recent item returned by nextItem 69 runes []rune // scanned runes for this item 70 items chan item // channel of scanned items 71} 72 73// next returns the next rune in the input. 74func (l *lexer) next() rune { 75 if l.pos >= len(l.input) { 76 l.width = 0 77 return eof 78 } 79 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 80 l.width = w 81 l.pos += l.width 82 return r 83} 84 85// peek returns but does not consume the next rune in the input. 86func (l *lexer) peek() rune { 87 r := l.next() 88 l.backup() 89 return r 90} 91 92// backup steps back one rune. Can only be called once per call of next. 93func (l *lexer) backup() { 94 l.pos -= l.width 95} 96 97// emit passes an item back to the client. 98func (l *lexer) emit(t itemType) { 99 i := item{t, l.start, string(l.runes)} 100 l.items <- i 101 l.start = l.pos 102 l.runes = l.runes[:0] 103} 104 105// ignore skips over the pending input before this point. 106func (l *lexer) ignore() { 107 l.start = l.pos 108} 109 110// appends the rune to the current value 111func (l *lexer) appendRune(r rune) { 112 l.runes = append(l.runes, r) 113} 114 115// accept consumes the next rune if it's from the valid set. 116func (l *lexer) accept(valid string) bool { 117 if strings.ContainsRune(valid, l.next()) { 118 return true 119 } 120 l.backup() 121 return false 122} 123 124// acceptRun consumes a run of runes from the valid set. 125func (l *lexer) acceptRun(valid string) { 126 for strings.ContainsRune(valid, l.next()) { 127 } 128 l.backup() 129} 130 131// acceptRunUntil consumes a run of runes up to a terminator. 132func (l *lexer) acceptRunUntil(term rune) { 133 for term != l.next() { 134 } 135 l.backup() 136} 137 138// hasText returns true if the current parsed text is not empty. 139func (l *lexer) isNotEmpty() bool { 140 return l.pos > l.start 141} 142 143// lineNumber reports which line we're on, based on the position of 144// the previous item returned by nextItem. Doing it this way 145// means we don't have to worry about peek double counting. 146func (l *lexer) lineNumber() int { 147 return 1 + strings.Count(l.input[:l.lastPos], "\n") 148} 149 150// errorf returns an error token and terminates the scan by passing 151// back a nil pointer that will be the next state, terminating l.nextItem. 152func (l *lexer) errorf(format string, args ...interface{}) stateFn { 153 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} 154 return nil 155} 156 157// nextItem returns the next item from the input. 158func (l *lexer) nextItem() item { 159 i := <-l.items 160 l.lastPos = i.pos 161 return i 162} 163 164// lex creates a new scanner for the input string. 165func lex(input string) *lexer { 166 l := &lexer{ 167 input: input, 168 items: make(chan item), 169 runes: make([]rune, 0, 32), 170 } 171 go l.run() 172 return l 173} 174 175// run runs the state machine for the lexer. 176func (l *lexer) run() { 177 for l.state = lexBeforeKey(l); l.state != nil; { 178 l.state = l.state(l) 179 } 180} 181 182// state functions 183 184// lexBeforeKey scans until a key begins. 185func lexBeforeKey(l *lexer) stateFn { 186 switch r := l.next(); { 187 case isEOF(r): 188 l.emit(itemEOF) 189 return nil 190 191 case isEOL(r): 192 l.ignore() 193 return lexBeforeKey 194 195 case isComment(r): 196 return lexComment 197 198 case isWhitespace(r): 199 l.ignore() 200 return lexBeforeKey 201 202 default: 203 l.backup() 204 return lexKey 205 } 206} 207 208// lexComment scans a comment line. The comment character has already been scanned. 209func lexComment(l *lexer) stateFn { 210 l.acceptRun(whitespace) 211 l.ignore() 212 for { 213 switch r := l.next(); { 214 case isEOF(r): 215 l.ignore() 216 l.emit(itemEOF) 217 return nil 218 case isEOL(r): 219 l.emit(itemComment) 220 return lexBeforeKey 221 default: 222 l.appendRune(r) 223 } 224 } 225} 226 227// lexKey scans the key up to a delimiter 228func lexKey(l *lexer) stateFn { 229 var r rune 230 231Loop: 232 for { 233 switch r = l.next(); { 234 235 case isEscape(r): 236 err := l.scanEscapeSequence() 237 if err != nil { 238 return l.errorf(err.Error()) 239 } 240 241 case isEndOfKey(r): 242 l.backup() 243 break Loop 244 245 case isEOF(r): 246 break Loop 247 248 default: 249 l.appendRune(r) 250 } 251 } 252 253 if len(l.runes) > 0 { 254 l.emit(itemKey) 255 } 256 257 if isEOF(r) { 258 l.emit(itemEOF) 259 return nil 260 } 261 262 return lexBeforeValue 263} 264 265// lexBeforeValue scans the delimiter between key and value. 266// Leading and trailing whitespace is ignored. 267// We expect to be just after the key. 268func lexBeforeValue(l *lexer) stateFn { 269 l.acceptRun(whitespace) 270 l.accept(":=") 271 l.acceptRun(whitespace) 272 l.ignore() 273 return lexValue 274} 275 276// lexValue scans text until the end of the line. We expect to be just after the delimiter. 277func lexValue(l *lexer) stateFn { 278 for { 279 switch r := l.next(); { 280 case isEscape(r): 281 if isEOL(l.peek()) { 282 l.next() 283 l.acceptRun(whitespace) 284 } else { 285 err := l.scanEscapeSequence() 286 if err != nil { 287 return l.errorf(err.Error()) 288 } 289 } 290 291 case isEOL(r): 292 l.emit(itemValue) 293 l.ignore() 294 return lexBeforeKey 295 296 case isEOF(r): 297 l.emit(itemValue) 298 l.emit(itemEOF) 299 return nil 300 301 default: 302 l.appendRune(r) 303 } 304 } 305} 306 307// scanEscapeSequence scans either one of the escaped characters 308// or a unicode literal. We expect to be after the escape character. 309func (l *lexer) scanEscapeSequence() error { 310 switch r := l.next(); { 311 312 case isEscapedCharacter(r): 313 l.appendRune(decodeEscapedCharacter(r)) 314 return nil 315 316 case atUnicodeLiteral(r): 317 return l.scanUnicodeLiteral() 318 319 case isEOF(r): 320 return fmt.Errorf("premature EOF") 321 322 // silently drop the escape character and append the rune as is 323 default: 324 l.appendRune(r) 325 return nil 326 } 327} 328 329// scans a unicode literal in the form \uXXXX. We expect to be after the \u. 330func (l *lexer) scanUnicodeLiteral() error { 331 // scan the digits 332 d := make([]rune, 4) 333 for i := 0; i < 4; i++ { 334 d[i] = l.next() 335 if d[i] == eof || !strings.ContainsRune("0123456789abcdefABCDEF", d[i]) { 336 return fmt.Errorf("invalid unicode literal") 337 } 338 } 339 340 // decode the digits into a rune 341 r, err := strconv.ParseInt(string(d), 16, 0) 342 if err != nil { 343 return err 344 } 345 346 l.appendRune(rune(r)) 347 return nil 348} 349 350// decodeEscapedCharacter returns the unescaped rune. We expect to be after the escape character. 351func decodeEscapedCharacter(r rune) rune { 352 switch r { 353 case 'f': 354 return '\f' 355 case 'n': 356 return '\n' 357 case 'r': 358 return '\r' 359 case 't': 360 return '\t' 361 default: 362 return r 363 } 364} 365 366// atUnicodeLiteral reports whether we are at a unicode literal. 367// The escape character has already been consumed. 368func atUnicodeLiteral(r rune) bool { 369 return r == 'u' 370} 371 372// isComment reports whether we are at the start of a comment. 373func isComment(r rune) bool { 374 return r == '#' || r == '!' 375} 376 377// isEndOfKey reports whether the rune terminates the current key. 378func isEndOfKey(r rune) bool { 379 return strings.ContainsRune(" \f\t\r\n:=", r) 380} 381 382// isEOF reports whether we are at EOF. 383func isEOF(r rune) bool { 384 return r == eof 385} 386 387// isEOL reports whether we are at a new line character. 388func isEOL(r rune) bool { 389 return r == '\n' || r == '\r' 390} 391 392// isEscape reports whether the rune is the escape character which 393// prefixes unicode literals and other escaped characters. 394func isEscape(r rune) bool { 395 return r == '\\' 396} 397 398// isEscapedCharacter reports whether we are at one of the characters that need escaping. 399// The escape character has already been consumed. 400func isEscapedCharacter(r rune) bool { 401 return strings.ContainsRune(" :=fnrt", r) 402} 403 404// isWhitespace reports whether the rune is a whitespace character. 405func isWhitespace(r rune) bool { 406 return strings.ContainsRune(whitespace, r) 407} 408