1package parse 2 3import ( 4 "unicode" 5 "unicode/utf8" 6) 7 8// eof rune sent when end of file is reached 9var eof = rune(0) 10 11// token is a lexical token. 12type token uint 13 14// list of lexical tokens. 15const ( 16 // special tokens 17 tokenIllegal token = iota 18 tokenEOF 19 20 // identifiers and literals 21 tokenIdent 22 23 // operators and delimiters 24 tokenLbrack 25 tokenRbrack 26 tokenQuote 27) 28 29// predefined mode bits to control recognition of tokens. 30const ( 31 scanIdent byte = 1 << iota 32 scanLbrack 33 scanRbrack 34 scanEscape 35) 36 37// returns true if rune is accepted. 38type acceptFunc func(r rune, i int) bool 39 40// scanner implements a lexical scanner that reads unicode 41// characters and tokens from a string buffer. 42type scanner struct { 43 buf string 44 pos int 45 start int 46 width int 47 mode byte 48 49 accept acceptFunc 50} 51 52// init initializes a scanner with a new buffer. 53func (s *scanner) init(buf string) { 54 s.buf = buf 55 s.pos = 0 56 s.start = 0 57 s.width = 0 58 s.accept = nil 59} 60 61// read returns the next unicode character. It returns eof at 62// the end of the string buffer. 63func (s *scanner) read() rune { 64 if s.pos >= len(s.buf) { 65 s.width = 0 66 return eof 67 } 68 r, w := utf8.DecodeRuneInString(s.buf[s.pos:]) 69 s.width = w 70 s.pos += s.width 71 return r 72} 73 74func (s *scanner) unread() { 75 s.pos -= s.width 76} 77 78// skip skips over the curring unicode character in the buffer 79// by slicing and removing from the buffer. 80func (s *scanner) skip() { 81 l := s.buf[:s.pos-1] 82 r := s.buf[s.pos:] 83 s.buf = l + r 84} 85 86// peek returns the next unicode character in the buffer without 87// advancing the scanner. It returns eof if the scanner's position 88// is at the last character of the source. 89func (s *scanner) peek() rune { 90 r := s.read() 91 s.unread() 92 return r 93} 94 95// string returns the string corresponding to the most recently 96// scanned token. Valid after calling scan(). 97func (s *scanner) string() string { 98 return s.buf[s.start:s.pos] 99} 100 101// scan reads the next token or Unicode character from source and 102// returns it. It returns EOF at the end of the source. 103func (s *scanner) scan() token { 104 s.start = s.pos 105 r := s.read() 106 switch { 107 case r == eof: 108 return tokenEOF 109 case s.scanLbrack(r): 110 return tokenLbrack 111 case s.scanRbrack(r): 112 return tokenRbrack 113 case s.scanIdent(r): 114 return tokenIdent 115 } 116 return tokenIllegal 117} 118 119// scanIdent reads the next token or Unicode character from source 120// and returns true if the Ident character is accepted. 121func (s *scanner) scanIdent(r rune) bool { 122 if s.mode&scanIdent == 0 { 123 return false 124 } 125 if s.scanEscaped(r) { 126 s.skip() 127 } else if !s.accept(r, s.pos-s.start) { 128 return false 129 } 130loop: 131 for { 132 r := s.read() 133 switch { 134 case r == eof: 135 s.unread() 136 break loop 137 case s.scanLbrack(r): 138 s.unread() 139 s.unread() 140 break loop 141 } 142 if s.scanEscaped(r) { 143 s.skip() 144 continue 145 } 146 if !s.accept(r, s.pos-s.start) { 147 s.unread() 148 break loop 149 } 150 } 151 return true 152} 153 154// scanLbrack reads the next token or Unicode character from source 155// and returns true if the open bracket is encountered. 156func (s *scanner) scanLbrack(r rune) bool { 157 if s.mode&scanLbrack == 0 { 158 return false 159 } 160 if r == '$' { 161 if s.read() == '{' { 162 return true 163 } 164 s.unread() 165 } 166 return false 167} 168 169// scanRbrack reads the next token or Unicode character from source 170// and returns true if the closing bracket is encountered. 171func (s *scanner) scanRbrack(r rune) bool { 172 if s.mode&scanRbrack == 0 { 173 return false 174 } 175 return r == '}' 176} 177 178// scanEscaped reads the next token or Unicode character from source 179// and returns true if it being escaped and should be sipped. 180func (s *scanner) scanEscaped(r rune) bool { 181 if s.mode&scanEscape == 0 { 182 return false 183 } 184 if r == '$' { 185 if s.peek() == '$' { 186 return true 187 } 188 } 189 if r != '\\' { 190 return false 191 } 192 switch s.peek() { 193 case '/', '\\': 194 return true 195 default: 196 return false 197 } 198} 199 200// 201// scanner functions accept or reject runes. 202// 203 204func acceptRune(r rune, i int) bool { 205 return true 206} 207 208func acceptIdent(r rune, i int) bool { 209 return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' 210} 211 212func acceptColon(r rune, i int) bool { 213 return r == ':' 214} 215 216func acceptOneHash(r rune, i int) bool { 217 return r == '#' && i == 1 218} 219 220func acceptNone(r rune, i int) bool { 221 return false 222} 223 224func acceptNotClosing(r rune, i int) bool { 225 return r != '}' 226} 227 228func acceptHashFunc(r rune, i int) bool { 229 return r == '#' && i < 3 230} 231 232func acceptPercentFunc(r rune, i int) bool { 233 return r == '%' && i < 3 234} 235 236func acceptDefaultFunc(r rune, i int) bool { 237 switch { 238 case i == 1 && r == ':': 239 return true 240 case i == 2 && (r == '=' || r == '-' || r == '?' || r == '+'): 241 return true 242 default: 243 return false 244 } 245} 246 247func acceptReplaceFunc(r rune, i int) bool { 248 switch { 249 case i == 1 && r == '/': 250 return true 251 case i == 2 && (r == '/' || r == '#' || r == '%'): 252 return true 253 default: 254 return false 255 } 256} 257 258func acceptOneEqual(r rune, i int) bool { 259 return i == 1 && r == '=' 260} 261 262func acceptOneColon(r rune, i int) bool { 263 return i == 1 && r == ':' 264} 265 266func rejectColonClose(r rune, i int) bool { 267 return r != ':' && r != '}' 268} 269 270func acceptSlash(r rune, i int) bool { 271 return r == '/' 272} 273 274func acceptNotSlash(r rune, i int) bool { 275 return r != '/' 276} 277 278func acceptCasingFunc(r rune, i int) bool { 279 return (r == ',' || r == '^') && i < 3 280} 281