1// TOML Parser. 2 3package toml 4 5import ( 6 "errors" 7 "fmt" 8 "math" 9 "reflect" 10 "regexp" 11 "strconv" 12 "strings" 13 "time" 14) 15 16type tomlParser struct { 17 flowIdx int 18 flow []token 19 tree *Tree 20 currentTable []string 21 seenTableKeys []string 22} 23 24type tomlParserStateFn func() tomlParserStateFn 25 26// Formats and panics an error message based on a token 27func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) { 28 panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...)) 29} 30 31func (p *tomlParser) run() { 32 for state := p.parseStart; state != nil; { 33 state = state() 34 } 35} 36 37func (p *tomlParser) peek() *token { 38 if p.flowIdx >= len(p.flow) { 39 return nil 40 } 41 return &p.flow[p.flowIdx] 42} 43 44func (p *tomlParser) assume(typ tokenType) { 45 tok := p.getToken() 46 if tok == nil { 47 p.raiseError(tok, "was expecting token %s, but token stream is empty", tok) 48 } 49 if tok.typ != typ { 50 p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok) 51 } 52} 53 54func (p *tomlParser) getToken() *token { 55 tok := p.peek() 56 if tok == nil { 57 return nil 58 } 59 p.flowIdx++ 60 return tok 61} 62 63func (p *tomlParser) parseStart() tomlParserStateFn { 64 tok := p.peek() 65 66 // end of stream, parsing is finished 67 if tok == nil { 68 return nil 69 } 70 71 switch tok.typ { 72 case tokenDoubleLeftBracket: 73 return p.parseGroupArray 74 case tokenLeftBracket: 75 return p.parseGroup 76 case tokenKey: 77 return p.parseAssign 78 case tokenEOF: 79 return nil 80 case tokenError: 81 p.raiseError(tok, "parsing error: %s", tok.String()) 82 default: 83 p.raiseError(tok, "unexpected token %s", tok.typ) 84 } 85 return nil 86} 87 88func (p *tomlParser) parseGroupArray() tomlParserStateFn { 89 startToken := p.getToken() // discard the [[ 90 key := p.getToken() 91 if key.typ != tokenKeyGroupArray { 92 p.raiseError(key, "unexpected token %s, was expecting a table array key", key) 93 } 94 95 // get or create table array element at the indicated part in the path 96 keys, err := parseKey(key.val) 97 if err != nil { 98 p.raiseError(key, "invalid table array key: %s", err) 99 } 100 p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries 101 destTree := p.tree.GetPath(keys) 102 var array []*Tree 103 if destTree == nil { 104 array = make([]*Tree, 0) 105 } else if target, ok := destTree.([]*Tree); ok && target != nil { 106 array = destTree.([]*Tree) 107 } else { 108 p.raiseError(key, "key %s is already assigned and not of type table array", key) 109 } 110 p.currentTable = keys 111 112 // add a new tree to the end of the table array 113 newTree := newTree() 114 newTree.position = startToken.Position 115 array = append(array, newTree) 116 p.tree.SetPath(p.currentTable, array) 117 118 // remove all keys that were children of this table array 119 prefix := key.val + "." 120 found := false 121 for ii := 0; ii < len(p.seenTableKeys); { 122 tableKey := p.seenTableKeys[ii] 123 if strings.HasPrefix(tableKey, prefix) { 124 p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...) 125 } else { 126 found = (tableKey == key.val) 127 ii++ 128 } 129 } 130 131 // keep this key name from use by other kinds of assignments 132 if !found { 133 p.seenTableKeys = append(p.seenTableKeys, key.val) 134 } 135 136 // move to next parser state 137 p.assume(tokenDoubleRightBracket) 138 return p.parseStart 139} 140 141func (p *tomlParser) parseGroup() tomlParserStateFn { 142 startToken := p.getToken() // discard the [ 143 key := p.getToken() 144 if key.typ != tokenKeyGroup { 145 p.raiseError(key, "unexpected token %s, was expecting a table key", key) 146 } 147 for _, item := range p.seenTableKeys { 148 if item == key.val { 149 p.raiseError(key, "duplicated tables") 150 } 151 } 152 153 p.seenTableKeys = append(p.seenTableKeys, key.val) 154 keys, err := parseKey(key.val) 155 if err != nil { 156 p.raiseError(key, "invalid table array key: %s", err) 157 } 158 if err := p.tree.createSubTree(keys, startToken.Position); err != nil { 159 p.raiseError(key, "%s", err) 160 } 161 p.assume(tokenRightBracket) 162 p.currentTable = keys 163 return p.parseStart 164} 165 166func (p *tomlParser) parseAssign() tomlParserStateFn { 167 key := p.getToken() 168 p.assume(tokenEqual) 169 170 parsedKey, err := parseKey(key.val) 171 if err != nil { 172 p.raiseError(key, "invalid key: %s", err.Error()) 173 } 174 175 value := p.parseRvalue() 176 var tableKey []string 177 if len(p.currentTable) > 0 { 178 tableKey = p.currentTable 179 } else { 180 tableKey = []string{} 181 } 182 183 prefixKey := parsedKey[0 : len(parsedKey)-1] 184 tableKey = append(tableKey, prefixKey...) 185 186 // find the table to assign, looking out for arrays of tables 187 var targetNode *Tree 188 switch node := p.tree.GetPath(tableKey).(type) { 189 case []*Tree: 190 targetNode = node[len(node)-1] 191 case *Tree: 192 targetNode = node 193 case nil: 194 // create intermediate 195 if err := p.tree.createSubTree(tableKey, key.Position); err != nil { 196 p.raiseError(key, "could not create intermediate group: %s", err) 197 } 198 targetNode = p.tree.GetPath(tableKey).(*Tree) 199 default: 200 p.raiseError(key, "Unknown table type for path: %s", 201 strings.Join(tableKey, ".")) 202 } 203 204 // assign value to the found table 205 keyVal := parsedKey[len(parsedKey)-1] 206 localKey := []string{keyVal} 207 finalKey := append(tableKey, keyVal) 208 if targetNode.GetPath(localKey) != nil { 209 p.raiseError(key, "The following key was defined twice: %s", 210 strings.Join(finalKey, ".")) 211 } 212 var toInsert interface{} 213 214 switch value.(type) { 215 case *Tree, []*Tree: 216 toInsert = value 217 default: 218 toInsert = &tomlValue{value: value, position: key.Position} 219 } 220 targetNode.values[keyVal] = toInsert 221 return p.parseStart 222} 223 224var numberUnderscoreInvalidRegexp *regexp.Regexp 225var hexNumberUnderscoreInvalidRegexp *regexp.Regexp 226 227func numberContainsInvalidUnderscore(value string) error { 228 if numberUnderscoreInvalidRegexp.MatchString(value) { 229 return errors.New("invalid use of _ in number") 230 } 231 return nil 232} 233 234func hexNumberContainsInvalidUnderscore(value string) error { 235 if hexNumberUnderscoreInvalidRegexp.MatchString(value) { 236 return errors.New("invalid use of _ in hex number") 237 } 238 return nil 239} 240 241func cleanupNumberToken(value string) string { 242 cleanedVal := strings.Replace(value, "_", "", -1) 243 return cleanedVal 244} 245 246func (p *tomlParser) parseRvalue() interface{} { 247 tok := p.getToken() 248 if tok == nil || tok.typ == tokenEOF { 249 p.raiseError(tok, "expecting a value") 250 } 251 252 switch tok.typ { 253 case tokenString: 254 return tok.val 255 case tokenTrue: 256 return true 257 case tokenFalse: 258 return false 259 case tokenInf: 260 if tok.val[0] == '-' { 261 return math.Inf(-1) 262 } 263 return math.Inf(1) 264 case tokenNan: 265 return math.NaN() 266 case tokenInteger: 267 cleanedVal := cleanupNumberToken(tok.val) 268 var err error 269 var val int64 270 if len(cleanedVal) >= 3 && cleanedVal[0] == '0' { 271 switch cleanedVal[1] { 272 case 'x': 273 err = hexNumberContainsInvalidUnderscore(tok.val) 274 if err != nil { 275 p.raiseError(tok, "%s", err) 276 } 277 val, err = strconv.ParseInt(cleanedVal[2:], 16, 64) 278 case 'o': 279 err = numberContainsInvalidUnderscore(tok.val) 280 if err != nil { 281 p.raiseError(tok, "%s", err) 282 } 283 val, err = strconv.ParseInt(cleanedVal[2:], 8, 64) 284 case 'b': 285 err = numberContainsInvalidUnderscore(tok.val) 286 if err != nil { 287 p.raiseError(tok, "%s", err) 288 } 289 val, err = strconv.ParseInt(cleanedVal[2:], 2, 64) 290 default: 291 panic("invalid base") // the lexer should catch this first 292 } 293 } else { 294 err = numberContainsInvalidUnderscore(tok.val) 295 if err != nil { 296 p.raiseError(tok, "%s", err) 297 } 298 val, err = strconv.ParseInt(cleanedVal, 10, 64) 299 } 300 if err != nil { 301 p.raiseError(tok, "%s", err) 302 } 303 return val 304 case tokenFloat: 305 err := numberContainsInvalidUnderscore(tok.val) 306 if err != nil { 307 p.raiseError(tok, "%s", err) 308 } 309 cleanedVal := cleanupNumberToken(tok.val) 310 val, err := strconv.ParseFloat(cleanedVal, 64) 311 if err != nil { 312 p.raiseError(tok, "%s", err) 313 } 314 return val 315 case tokenDate: 316 layout := time.RFC3339Nano 317 if !strings.Contains(tok.val, "T") { 318 layout = strings.Replace(layout, "T", " ", 1) 319 } 320 val, err := time.ParseInLocation(layout, tok.val, time.UTC) 321 if err != nil { 322 p.raiseError(tok, "%s", err) 323 } 324 return val 325 case tokenLocalDate: 326 v := strings.Replace(tok.val, " ", "T", -1) 327 isDateTime := false 328 isTime := false 329 for _, c := range v { 330 if c == 'T' || c == 't' { 331 isDateTime = true 332 break 333 } 334 if c == ':' { 335 isTime = true 336 break 337 } 338 } 339 340 var val interface{} 341 var err error 342 343 if isDateTime { 344 val, err = ParseLocalDateTime(v) 345 } else if isTime { 346 val, err = ParseLocalTime(v) 347 } else { 348 val, err = ParseLocalDate(v) 349 } 350 351 if err != nil { 352 p.raiseError(tok, "%s", err) 353 } 354 return val 355 case tokenLeftBracket: 356 return p.parseArray() 357 case tokenLeftCurlyBrace: 358 return p.parseInlineTable() 359 case tokenEqual: 360 p.raiseError(tok, "cannot have multiple equals for the same key") 361 case tokenError: 362 p.raiseError(tok, "%s", tok) 363 } 364 365 p.raiseError(tok, "never reached") 366 367 return nil 368} 369 370func tokenIsComma(t *token) bool { 371 return t != nil && t.typ == tokenComma 372} 373 374func (p *tomlParser) parseInlineTable() *Tree { 375 tree := newTree() 376 var previous *token 377Loop: 378 for { 379 follow := p.peek() 380 if follow == nil || follow.typ == tokenEOF { 381 p.raiseError(follow, "unterminated inline table") 382 } 383 switch follow.typ { 384 case tokenRightCurlyBrace: 385 p.getToken() 386 break Loop 387 case tokenKey, tokenInteger, tokenString: 388 if !tokenIsComma(previous) && previous != nil { 389 p.raiseError(follow, "comma expected between fields in inline table") 390 } 391 key := p.getToken() 392 p.assume(tokenEqual) 393 394 parsedKey, err := parseKey(key.val) 395 if err != nil { 396 p.raiseError(key, "invalid key: %s", err) 397 } 398 399 value := p.parseRvalue() 400 tree.SetPath(parsedKey, value) 401 case tokenComma: 402 if tokenIsComma(previous) { 403 p.raiseError(follow, "need field between two commas in inline table") 404 } 405 p.getToken() 406 default: 407 p.raiseError(follow, "unexpected token type in inline table: %s", follow.String()) 408 } 409 previous = follow 410 } 411 if tokenIsComma(previous) { 412 p.raiseError(previous, "trailing comma at the end of inline table") 413 } 414 return tree 415} 416 417func (p *tomlParser) parseArray() interface{} { 418 var array []interface{} 419 arrayType := reflect.TypeOf(nil) 420 for { 421 follow := p.peek() 422 if follow == nil || follow.typ == tokenEOF { 423 p.raiseError(follow, "unterminated array") 424 } 425 if follow.typ == tokenRightBracket { 426 p.getToken() 427 break 428 } 429 val := p.parseRvalue() 430 if arrayType == nil { 431 arrayType = reflect.TypeOf(val) 432 } 433 if reflect.TypeOf(val) != arrayType { 434 p.raiseError(follow, "mixed types in array") 435 } 436 array = append(array, val) 437 follow = p.peek() 438 if follow == nil || follow.typ == tokenEOF { 439 p.raiseError(follow, "unterminated array") 440 } 441 if follow.typ != tokenRightBracket && follow.typ != tokenComma { 442 p.raiseError(follow, "missing comma") 443 } 444 if follow.typ == tokenComma { 445 p.getToken() 446 } 447 } 448 // An array of Trees is actually an array of inline 449 // tables, which is a shorthand for a table array. If the 450 // array was not converted from []interface{} to []*Tree, 451 // the two notations would not be equivalent. 452 if arrayType == reflect.TypeOf(newTree()) { 453 tomlArray := make([]*Tree, len(array)) 454 for i, v := range array { 455 tomlArray[i] = v.(*Tree) 456 } 457 return tomlArray 458 } 459 return array 460} 461 462func parseToml(flow []token) *Tree { 463 result := newTree() 464 result.position = Position{1, 1} 465 parser := &tomlParser{ 466 flowIdx: 0, 467 flow: flow, 468 tree: result, 469 currentTable: make([]string, 0), 470 seenTableKeys: make([]string, 0), 471 } 472 parser.run() 473 return result 474} 475 476func init() { 477 numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`) 478 hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`) 479} 480