1// TOML Parser. 2 3package toml 4 5import ( 6 "errors" 7 "fmt" 8 "math" 9 "reflect" 10 "regexp" 11 "strconv" 12 "strings" 13 "time" 14) 15 16type tomlParser struct { 17 flowIdx int 18 flow []token 19 tree *Tree 20 currentTable []string 21 seenTableKeys []string 22} 23 24type tomlParserStateFn func() tomlParserStateFn 25 26// Formats and panics an error message based on a token 27func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) { 28 panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...)) 29} 30 31func (p *tomlParser) run() { 32 for state := p.parseStart; state != nil; { 33 state = state() 34 } 35} 36 37func (p *tomlParser) peek() *token { 38 if p.flowIdx >= len(p.flow) { 39 return nil 40 } 41 return &p.flow[p.flowIdx] 42} 43 44func (p *tomlParser) assume(typ tokenType) { 45 tok := p.getToken() 46 if tok == nil { 47 p.raiseError(tok, "was expecting token %s, but token stream is empty", tok) 48 } 49 if tok.typ != typ { 50 p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok) 51 } 52} 53 54func (p *tomlParser) getToken() *token { 55 tok := p.peek() 56 if tok == nil { 57 return nil 58 } 59 p.flowIdx++ 60 return tok 61} 62 63func (p *tomlParser) parseStart() tomlParserStateFn { 64 tok := p.peek() 65 66 // end of stream, parsing is finished 67 if tok == nil { 68 return nil 69 } 70 71 switch tok.typ { 72 case tokenDoubleLeftBracket: 73 return p.parseGroupArray 74 case tokenLeftBracket: 75 return p.parseGroup 76 case tokenKey: 77 return p.parseAssign 78 case tokenEOF: 79 return nil 80 case tokenError: 81 p.raiseError(tok, "parsing error: %s", tok.String()) 82 default: 83 p.raiseError(tok, "unexpected token %s", tok.typ) 84 } 85 return nil 86} 87 88func (p *tomlParser) parseGroupArray() tomlParserStateFn { 89 startToken := p.getToken() // discard the [[ 90 key := p.getToken() 91 if key.typ != tokenKeyGroupArray { 92 p.raiseError(key, "unexpected token %s, was expecting a table array key", key) 93 } 94 95 // get or create table array element at the indicated part in the path 96 keys, err := parseKey(key.val) 97 if err != nil { 98 p.raiseError(key, "invalid table array key: %s", err) 99 } 100 p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries 101 destTree := p.tree.GetPath(keys) 102 var array []*Tree 103 if destTree == nil { 104 array = make([]*Tree, 0) 105 } else if target, ok := destTree.([]*Tree); ok && target != nil { 106 array = destTree.([]*Tree) 107 } else { 108 p.raiseError(key, "key %s is already assigned and not of type table array", key) 109 } 110 p.currentTable = keys 111 112 // add a new tree to the end of the table array 113 newTree := newTree() 114 newTree.position = startToken.Position 115 array = append(array, newTree) 116 p.tree.SetPath(p.currentTable, array) 117 118 // remove all keys that were children of this table array 119 prefix := key.val + "." 120 found := false 121 for ii := 0; ii < len(p.seenTableKeys); { 122 tableKey := p.seenTableKeys[ii] 123 if strings.HasPrefix(tableKey, prefix) { 124 p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...) 125 } else { 126 found = (tableKey == key.val) 127 ii++ 128 } 129 } 130 131 // keep this key name from use by other kinds of assignments 132 if !found { 133 p.seenTableKeys = append(p.seenTableKeys, key.val) 134 } 135 136 // move to next parser state 137 p.assume(tokenDoubleRightBracket) 138 return p.parseStart 139} 140 141func (p *tomlParser) parseGroup() tomlParserStateFn { 142 startToken := p.getToken() // discard the [ 143 key := p.getToken() 144 if key.typ != tokenKeyGroup { 145 p.raiseError(key, "unexpected token %s, was expecting a table key", key) 146 } 147 for _, item := range p.seenTableKeys { 148 if item == key.val { 149 p.raiseError(key, "duplicated tables") 150 } 151 } 152 153 p.seenTableKeys = append(p.seenTableKeys, key.val) 154 keys, err := parseKey(key.val) 155 if err != nil { 156 p.raiseError(key, "invalid table array key: %s", err) 157 } 158 if err := p.tree.createSubTree(keys, startToken.Position); err != nil { 159 p.raiseError(key, "%s", err) 160 } 161 destTree := p.tree.GetPath(keys) 162 if target, ok := destTree.(*Tree); ok && target != nil && target.inline { 163 p.raiseError(key, "could not re-define exist inline table or its sub-table : %s", 164 strings.Join(keys, ".")) 165 } 166 p.assume(tokenRightBracket) 167 p.currentTable = keys 168 return p.parseStart 169} 170 171func (p *tomlParser) parseAssign() tomlParserStateFn { 172 key := p.getToken() 173 p.assume(tokenEqual) 174 175 parsedKey, err := parseKey(key.val) 176 if err != nil { 177 p.raiseError(key, "invalid key: %s", err.Error()) 178 } 179 180 value := p.parseRvalue() 181 var tableKey []string 182 if len(p.currentTable) > 0 { 183 tableKey = p.currentTable 184 } else { 185 tableKey = []string{} 186 } 187 188 prefixKey := parsedKey[0 : len(parsedKey)-1] 189 tableKey = append(tableKey, prefixKey...) 190 191 // find the table to assign, looking out for arrays of tables 192 var targetNode *Tree 193 switch node := p.tree.GetPath(tableKey).(type) { 194 case []*Tree: 195 targetNode = node[len(node)-1] 196 case *Tree: 197 targetNode = node 198 case nil: 199 // create intermediate 200 if err := p.tree.createSubTree(tableKey, key.Position); err != nil { 201 p.raiseError(key, "could not create intermediate group: %s", err) 202 } 203 targetNode = p.tree.GetPath(tableKey).(*Tree) 204 default: 205 p.raiseError(key, "Unknown table type for path: %s", 206 strings.Join(tableKey, ".")) 207 } 208 209 if targetNode.inline { 210 p.raiseError(key, "could not add key or sub-table to exist inline table or its sub-table : %s", 211 strings.Join(tableKey, ".")) 212 } 213 214 // assign value to the found table 215 keyVal := parsedKey[len(parsedKey)-1] 216 localKey := []string{keyVal} 217 finalKey := append(tableKey, keyVal) 218 if targetNode.GetPath(localKey) != nil { 219 p.raiseError(key, "The following key was defined twice: %s", 220 strings.Join(finalKey, ".")) 221 } 222 var toInsert interface{} 223 224 switch value.(type) { 225 case *Tree, []*Tree: 226 toInsert = value 227 default: 228 toInsert = &tomlValue{value: value, position: key.Position} 229 } 230 targetNode.values[keyVal] = toInsert 231 return p.parseStart 232} 233 234var numberUnderscoreInvalidRegexp *regexp.Regexp 235var hexNumberUnderscoreInvalidRegexp *regexp.Regexp 236 237func numberContainsInvalidUnderscore(value string) error { 238 if numberUnderscoreInvalidRegexp.MatchString(value) { 239 return errors.New("invalid use of _ in number") 240 } 241 return nil 242} 243 244func hexNumberContainsInvalidUnderscore(value string) error { 245 if hexNumberUnderscoreInvalidRegexp.MatchString(value) { 246 return errors.New("invalid use of _ in hex number") 247 } 248 return nil 249} 250 251func cleanupNumberToken(value string) string { 252 cleanedVal := strings.Replace(value, "_", "", -1) 253 return cleanedVal 254} 255 256func (p *tomlParser) parseRvalue() interface{} { 257 tok := p.getToken() 258 if tok == nil || tok.typ == tokenEOF { 259 p.raiseError(tok, "expecting a value") 260 } 261 262 switch tok.typ { 263 case tokenString: 264 return tok.val 265 case tokenTrue: 266 return true 267 case tokenFalse: 268 return false 269 case tokenInf: 270 if tok.val[0] == '-' { 271 return math.Inf(-1) 272 } 273 return math.Inf(1) 274 case tokenNan: 275 return math.NaN() 276 case tokenInteger: 277 cleanedVal := cleanupNumberToken(tok.val) 278 var err error 279 var val int64 280 if len(cleanedVal) >= 3 && cleanedVal[0] == '0' { 281 switch cleanedVal[1] { 282 case 'x': 283 err = hexNumberContainsInvalidUnderscore(tok.val) 284 if err != nil { 285 p.raiseError(tok, "%s", err) 286 } 287 val, err = strconv.ParseInt(cleanedVal[2:], 16, 64) 288 case 'o': 289 err = numberContainsInvalidUnderscore(tok.val) 290 if err != nil { 291 p.raiseError(tok, "%s", err) 292 } 293 val, err = strconv.ParseInt(cleanedVal[2:], 8, 64) 294 case 'b': 295 err = numberContainsInvalidUnderscore(tok.val) 296 if err != nil { 297 p.raiseError(tok, "%s", err) 298 } 299 val, err = strconv.ParseInt(cleanedVal[2:], 2, 64) 300 default: 301 panic("invalid base") // the lexer should catch this first 302 } 303 } else { 304 err = numberContainsInvalidUnderscore(tok.val) 305 if err != nil { 306 p.raiseError(tok, "%s", err) 307 } 308 val, err = strconv.ParseInt(cleanedVal, 10, 64) 309 } 310 if err != nil { 311 p.raiseError(tok, "%s", err) 312 } 313 return val 314 case tokenFloat: 315 err := numberContainsInvalidUnderscore(tok.val) 316 if err != nil { 317 p.raiseError(tok, "%s", err) 318 } 319 cleanedVal := cleanupNumberToken(tok.val) 320 val, err := strconv.ParseFloat(cleanedVal, 64) 321 if err != nil { 322 p.raiseError(tok, "%s", err) 323 } 324 return val 325 case tokenDate: 326 layout := time.RFC3339Nano 327 if !strings.Contains(tok.val, "T") { 328 layout = strings.Replace(layout, "T", " ", 1) 329 } 330 val, err := time.ParseInLocation(layout, tok.val, time.UTC) 331 if err != nil { 332 p.raiseError(tok, "%s", err) 333 } 334 return val 335 case tokenLocalDate: 336 v := strings.Replace(tok.val, " ", "T", -1) 337 isDateTime := false 338 isTime := false 339 for _, c := range v { 340 if c == 'T' || c == 't' { 341 isDateTime = true 342 break 343 } 344 if c == ':' { 345 isTime = true 346 break 347 } 348 } 349 350 var val interface{} 351 var err error 352 353 if isDateTime { 354 val, err = ParseLocalDateTime(v) 355 } else if isTime { 356 val, err = ParseLocalTime(v) 357 } else { 358 val, err = ParseLocalDate(v) 359 } 360 361 if err != nil { 362 p.raiseError(tok, "%s", err) 363 } 364 return val 365 case tokenLeftBracket: 366 return p.parseArray() 367 case tokenLeftCurlyBrace: 368 return p.parseInlineTable() 369 case tokenEqual: 370 p.raiseError(tok, "cannot have multiple equals for the same key") 371 case tokenError: 372 p.raiseError(tok, "%s", tok) 373 } 374 375 p.raiseError(tok, "never reached") 376 377 return nil 378} 379 380func tokenIsComma(t *token) bool { 381 return t != nil && t.typ == tokenComma 382} 383 384func (p *tomlParser) parseInlineTable() *Tree { 385 tree := newTree() 386 var previous *token 387Loop: 388 for { 389 follow := p.peek() 390 if follow == nil || follow.typ == tokenEOF { 391 p.raiseError(follow, "unterminated inline table") 392 } 393 switch follow.typ { 394 case tokenRightCurlyBrace: 395 p.getToken() 396 break Loop 397 case tokenKey, tokenInteger, tokenString: 398 if !tokenIsComma(previous) && previous != nil { 399 p.raiseError(follow, "comma expected between fields in inline table") 400 } 401 key := p.getToken() 402 p.assume(tokenEqual) 403 404 parsedKey, err := parseKey(key.val) 405 if err != nil { 406 p.raiseError(key, "invalid key: %s", err) 407 } 408 409 value := p.parseRvalue() 410 tree.SetPath(parsedKey, value) 411 case tokenComma: 412 if tokenIsComma(previous) { 413 p.raiseError(follow, "need field between two commas in inline table") 414 } 415 p.getToken() 416 default: 417 p.raiseError(follow, "unexpected token type in inline table: %s", follow.String()) 418 } 419 previous = follow 420 } 421 if tokenIsComma(previous) { 422 p.raiseError(previous, "trailing comma at the end of inline table") 423 } 424 tree.inline = true 425 return tree 426} 427 428func (p *tomlParser) parseArray() interface{} { 429 var array []interface{} 430 arrayType := reflect.TypeOf(newTree()) 431 for { 432 follow := p.peek() 433 if follow == nil || follow.typ == tokenEOF { 434 p.raiseError(follow, "unterminated array") 435 } 436 if follow.typ == tokenRightBracket { 437 p.getToken() 438 break 439 } 440 val := p.parseRvalue() 441 if reflect.TypeOf(val) != arrayType { 442 arrayType = nil 443 } 444 array = append(array, val) 445 follow = p.peek() 446 if follow == nil || follow.typ == tokenEOF { 447 p.raiseError(follow, "unterminated array") 448 } 449 if follow.typ != tokenRightBracket && follow.typ != tokenComma { 450 p.raiseError(follow, "missing comma") 451 } 452 if follow.typ == tokenComma { 453 p.getToken() 454 } 455 } 456 457 // if the array is a mixed-type array or its length is 0, 458 // don't convert it to a table array 459 if len(array) <= 0 { 460 arrayType = nil 461 } 462 // An array of Trees is actually an array of inline 463 // tables, which is a shorthand for a table array. If the 464 // array was not converted from []interface{} to []*Tree, 465 // the two notations would not be equivalent. 466 if arrayType == reflect.TypeOf(newTree()) { 467 tomlArray := make([]*Tree, len(array)) 468 for i, v := range array { 469 tomlArray[i] = v.(*Tree) 470 } 471 return tomlArray 472 } 473 return array 474} 475 476func parseToml(flow []token) *Tree { 477 result := newTree() 478 result.position = Position{1, 1} 479 parser := &tomlParser{ 480 flowIdx: 0, 481 flow: flow, 482 tree: result, 483 currentTable: make([]string, 0), 484 seenTableKeys: make([]string, 0), 485 } 486 parser.run() 487 return result 488} 489 490func init() { 491 numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`) 492 hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`) 493} 494