1// TOML Parser. 2 3package toml 4 5import ( 6 "errors" 7 "fmt" 8 "math" 9 "reflect" 10 "regexp" 11 "strconv" 12 "strings" 13 "time" 14) 15 16type tomlParser struct { 17 flowIdx int 18 flow []token 19 tree *Tree 20 currentTable []string 21 seenTableKeys []string 22} 23 24type tomlParserStateFn func() tomlParserStateFn 25 26// Formats and panics an error message based on a token 27func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) { 28 panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...)) 29} 30 31func (p *tomlParser) run() { 32 for state := p.parseStart; state != nil; { 33 state = state() 34 } 35} 36 37func (p *tomlParser) peek() *token { 38 if p.flowIdx >= len(p.flow) { 39 return nil 40 } 41 return &p.flow[p.flowIdx] 42} 43 44func (p *tomlParser) assume(typ tokenType) { 45 tok := p.getToken() 46 if tok == nil { 47 p.raiseError(tok, "was expecting token %s, but token stream is empty", tok) 48 } 49 if tok.typ != typ { 50 p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok) 51 } 52} 53 54func (p *tomlParser) getToken() *token { 55 tok := p.peek() 56 if tok == nil { 57 return nil 58 } 59 p.flowIdx++ 60 return tok 61} 62 63func (p *tomlParser) parseStart() tomlParserStateFn { 64 tok := p.peek() 65 66 // end of stream, parsing is finished 67 if tok == nil { 68 return nil 69 } 70 71 switch tok.typ { 72 case tokenDoubleLeftBracket: 73 return p.parseGroupArray 74 case tokenLeftBracket: 75 return p.parseGroup 76 case tokenKey: 77 return p.parseAssign 78 case tokenEOF: 79 return nil 80 default: 81 p.raiseError(tok, "unexpected token") 82 } 83 return nil 84} 85 86func (p *tomlParser) parseGroupArray() tomlParserStateFn { 87 startToken := p.getToken() // discard the [[ 88 key := p.getToken() 89 if key.typ != tokenKeyGroupArray { 90 p.raiseError(key, "unexpected token %s, was expecting a table array key", key) 91 } 92 93 // get or create table array element at the indicated part in the path 94 keys, err := parseKey(key.val) 95 if err != nil { 96 p.raiseError(key, "invalid table array key: %s", err) 97 } 98 p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries 99 destTree := p.tree.GetPath(keys) 100 var array []*Tree 101 if destTree == nil { 102 array = make([]*Tree, 0) 103 } else if target, ok := destTree.([]*Tree); ok && target != nil { 104 array = destTree.([]*Tree) 105 } else { 106 p.raiseError(key, "key %s is already assigned and not of type table array", key) 107 } 108 p.currentTable = keys 109 110 // add a new tree to the end of the table array 111 newTree := newTree() 112 newTree.position = startToken.Position 113 array = append(array, newTree) 114 p.tree.SetPath(p.currentTable, array) 115 116 // remove all keys that were children of this table array 117 prefix := key.val + "." 118 found := false 119 for ii := 0; ii < len(p.seenTableKeys); { 120 tableKey := p.seenTableKeys[ii] 121 if strings.HasPrefix(tableKey, prefix) { 122 p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...) 123 } else { 124 found = (tableKey == key.val) 125 ii++ 126 } 127 } 128 129 // keep this key name from use by other kinds of assignments 130 if !found { 131 p.seenTableKeys = append(p.seenTableKeys, key.val) 132 } 133 134 // move to next parser state 135 p.assume(tokenDoubleRightBracket) 136 return p.parseStart 137} 138 139func (p *tomlParser) parseGroup() tomlParserStateFn { 140 startToken := p.getToken() // discard the [ 141 key := p.getToken() 142 if key.typ != tokenKeyGroup { 143 p.raiseError(key, "unexpected token %s, was expecting a table key", key) 144 } 145 for _, item := range p.seenTableKeys { 146 if item == key.val { 147 p.raiseError(key, "duplicated tables") 148 } 149 } 150 151 p.seenTableKeys = append(p.seenTableKeys, key.val) 152 keys, err := parseKey(key.val) 153 if err != nil { 154 p.raiseError(key, "invalid table array key: %s", err) 155 } 156 if err := p.tree.createSubTree(keys, startToken.Position); err != nil { 157 p.raiseError(key, "%s", err) 158 } 159 p.assume(tokenRightBracket) 160 p.currentTable = keys 161 return p.parseStart 162} 163 164func (p *tomlParser) parseAssign() tomlParserStateFn { 165 key := p.getToken() 166 p.assume(tokenEqual) 167 168 value := p.parseRvalue() 169 var tableKey []string 170 if len(p.currentTable) > 0 { 171 tableKey = p.currentTable 172 } else { 173 tableKey = []string{} 174 } 175 176 // find the table to assign, looking out for arrays of tables 177 var targetNode *Tree 178 switch node := p.tree.GetPath(tableKey).(type) { 179 case []*Tree: 180 targetNode = node[len(node)-1] 181 case *Tree: 182 targetNode = node 183 default: 184 p.raiseError(key, "Unknown table type for path: %s", 185 strings.Join(tableKey, ".")) 186 } 187 188 // assign value to the found table 189 keyVals := []string{key.val} 190 if len(keyVals) != 1 { 191 p.raiseError(key, "Invalid key") 192 } 193 keyVal := keyVals[0] 194 localKey := []string{keyVal} 195 finalKey := append(tableKey, keyVal) 196 if targetNode.GetPath(localKey) != nil { 197 p.raiseError(key, "The following key was defined twice: %s", 198 strings.Join(finalKey, ".")) 199 } 200 var toInsert interface{} 201 202 switch value.(type) { 203 case *Tree, []*Tree: 204 toInsert = value 205 default: 206 toInsert = &tomlValue{value: value, position: key.Position} 207 } 208 targetNode.values[keyVal] = toInsert 209 return p.parseStart 210} 211 212var numberUnderscoreInvalidRegexp *regexp.Regexp 213var hexNumberUnderscoreInvalidRegexp *regexp.Regexp 214 215func numberContainsInvalidUnderscore(value string) error { 216 if numberUnderscoreInvalidRegexp.MatchString(value) { 217 return errors.New("invalid use of _ in number") 218 } 219 return nil 220} 221 222func hexNumberContainsInvalidUnderscore(value string) error { 223 if hexNumberUnderscoreInvalidRegexp.MatchString(value) { 224 return errors.New("invalid use of _ in hex number") 225 } 226 return nil 227} 228 229func cleanupNumberToken(value string) string { 230 cleanedVal := strings.Replace(value, "_", "", -1) 231 return cleanedVal 232} 233 234func (p *tomlParser) parseRvalue() interface{} { 235 tok := p.getToken() 236 if tok == nil || tok.typ == tokenEOF { 237 p.raiseError(tok, "expecting a value") 238 } 239 240 switch tok.typ { 241 case tokenString: 242 return tok.val 243 case tokenTrue: 244 return true 245 case tokenFalse: 246 return false 247 case tokenInf: 248 if tok.val[0] == '-' { 249 return math.Inf(-1) 250 } 251 return math.Inf(1) 252 case tokenNan: 253 return math.NaN() 254 case tokenInteger: 255 cleanedVal := cleanupNumberToken(tok.val) 256 var err error 257 var val int64 258 if len(cleanedVal) >= 3 && cleanedVal[0] == '0' { 259 switch cleanedVal[1] { 260 case 'x': 261 err = hexNumberContainsInvalidUnderscore(tok.val) 262 if err != nil { 263 p.raiseError(tok, "%s", err) 264 } 265 val, err = strconv.ParseInt(cleanedVal[2:], 16, 64) 266 case 'o': 267 err = numberContainsInvalidUnderscore(tok.val) 268 if err != nil { 269 p.raiseError(tok, "%s", err) 270 } 271 val, err = strconv.ParseInt(cleanedVal[2:], 8, 64) 272 case 'b': 273 err = numberContainsInvalidUnderscore(tok.val) 274 if err != nil { 275 p.raiseError(tok, "%s", err) 276 } 277 val, err = strconv.ParseInt(cleanedVal[2:], 2, 64) 278 default: 279 panic("invalid base") // the lexer should catch this first 280 } 281 } else { 282 err = numberContainsInvalidUnderscore(tok.val) 283 if err != nil { 284 p.raiseError(tok, "%s", err) 285 } 286 val, err = strconv.ParseInt(cleanedVal, 10, 64) 287 } 288 if err != nil { 289 p.raiseError(tok, "%s", err) 290 } 291 return val 292 case tokenFloat: 293 err := numberContainsInvalidUnderscore(tok.val) 294 if err != nil { 295 p.raiseError(tok, "%s", err) 296 } 297 cleanedVal := cleanupNumberToken(tok.val) 298 val, err := strconv.ParseFloat(cleanedVal, 64) 299 if err != nil { 300 p.raiseError(tok, "%s", err) 301 } 302 return val 303 case tokenDate: 304 val, err := time.ParseInLocation(time.RFC3339Nano, tok.val, time.UTC) 305 if err != nil { 306 p.raiseError(tok, "%s", err) 307 } 308 return val 309 case tokenLeftBracket: 310 return p.parseArray() 311 case tokenLeftCurlyBrace: 312 return p.parseInlineTable() 313 case tokenEqual: 314 p.raiseError(tok, "cannot have multiple equals for the same key") 315 case tokenError: 316 p.raiseError(tok, "%s", tok) 317 } 318 319 p.raiseError(tok, "never reached") 320 321 return nil 322} 323 324func tokenIsComma(t *token) bool { 325 return t != nil && t.typ == tokenComma 326} 327 328func (p *tomlParser) parseInlineTable() *Tree { 329 tree := newTree() 330 var previous *token 331Loop: 332 for { 333 follow := p.peek() 334 if follow == nil || follow.typ == tokenEOF { 335 p.raiseError(follow, "unterminated inline table") 336 } 337 switch follow.typ { 338 case tokenRightCurlyBrace: 339 p.getToken() 340 break Loop 341 case tokenKey: 342 if !tokenIsComma(previous) && previous != nil { 343 p.raiseError(follow, "comma expected between fields in inline table") 344 } 345 key := p.getToken() 346 p.assume(tokenEqual) 347 value := p.parseRvalue() 348 tree.Set(key.val, value) 349 case tokenComma: 350 if previous == nil { 351 p.raiseError(follow, "inline table cannot start with a comma") 352 } 353 if tokenIsComma(previous) { 354 p.raiseError(follow, "need field between two commas in inline table") 355 } 356 p.getToken() 357 default: 358 p.raiseError(follow, "unexpected token type in inline table: %s", follow.String()) 359 } 360 previous = follow 361 } 362 if tokenIsComma(previous) { 363 p.raiseError(previous, "trailing comma at the end of inline table") 364 } 365 return tree 366} 367 368func (p *tomlParser) parseArray() interface{} { 369 var array []interface{} 370 arrayType := reflect.TypeOf(nil) 371 for { 372 follow := p.peek() 373 if follow == nil || follow.typ == tokenEOF { 374 p.raiseError(follow, "unterminated array") 375 } 376 if follow.typ == tokenRightBracket { 377 p.getToken() 378 break 379 } 380 val := p.parseRvalue() 381 if arrayType == nil { 382 arrayType = reflect.TypeOf(val) 383 } 384 if reflect.TypeOf(val) != arrayType { 385 p.raiseError(follow, "mixed types in array") 386 } 387 array = append(array, val) 388 follow = p.peek() 389 if follow == nil || follow.typ == tokenEOF { 390 p.raiseError(follow, "unterminated array") 391 } 392 if follow.typ != tokenRightBracket && follow.typ != tokenComma { 393 p.raiseError(follow, "missing comma") 394 } 395 if follow.typ == tokenComma { 396 p.getToken() 397 } 398 } 399 // An array of Trees is actually an array of inline 400 // tables, which is a shorthand for a table array. If the 401 // array was not converted from []interface{} to []*Tree, 402 // the two notations would not be equivalent. 403 if arrayType == reflect.TypeOf(newTree()) { 404 tomlArray := make([]*Tree, len(array)) 405 for i, v := range array { 406 tomlArray[i] = v.(*Tree) 407 } 408 return tomlArray 409 } 410 return array 411} 412 413func parseToml(flow []token) *Tree { 414 result := newTree() 415 result.position = Position{1, 1} 416 parser := &tomlParser{ 417 flowIdx: 0, 418 flow: flow, 419 tree: result, 420 currentTable: make([]string, 0), 421 seenTableKeys: make([]string, 0), 422 } 423 parser.run() 424 return result 425} 426 427func init() { 428 numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`) 429 hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`) 430} 431