1// TOML Parser. 2 3package toml 4 5import ( 6 "errors" 7 "fmt" 8 "math" 9 "reflect" 10 "strconv" 11 "strings" 12 "time" 13) 14 15type tomlParser struct { 16 flowIdx int 17 flow []token 18 tree *Tree 19 currentTable []string 20 seenTableKeys []string 21} 22 23type tomlParserStateFn func() tomlParserStateFn 24 25// Formats and panics an error message based on a token 26func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) { 27 panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...)) 28} 29 30func (p *tomlParser) run() { 31 for state := p.parseStart; state != nil; { 32 state = state() 33 } 34} 35 36func (p *tomlParser) peek() *token { 37 if p.flowIdx >= len(p.flow) { 38 return nil 39 } 40 return &p.flow[p.flowIdx] 41} 42 43func (p *tomlParser) assume(typ tokenType) { 44 tok := p.getToken() 45 if tok == nil { 46 p.raiseError(tok, "was expecting token %s, but token stream is empty", tok) 47 } 48 if tok.typ != typ { 49 p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok) 50 } 51} 52 53func (p *tomlParser) getToken() *token { 54 tok := p.peek() 55 if tok == nil { 56 return nil 57 } 58 p.flowIdx++ 59 return tok 60} 61 62func (p *tomlParser) parseStart() tomlParserStateFn { 63 tok := p.peek() 64 65 // end of stream, parsing is finished 66 if tok == nil { 67 return nil 68 } 69 70 switch tok.typ { 71 case tokenDoubleLeftBracket: 72 return p.parseGroupArray 73 case tokenLeftBracket: 74 return p.parseGroup 75 case tokenKey: 76 return p.parseAssign 77 case tokenEOF: 78 return nil 79 case tokenError: 80 p.raiseError(tok, "parsing error: %s", tok.String()) 81 default: 82 p.raiseError(tok, "unexpected token %s", tok.typ) 83 } 84 return nil 85} 86 87func (p *tomlParser) parseGroupArray() tomlParserStateFn { 88 startToken := p.getToken() // discard the [[ 89 key := p.getToken() 90 if key.typ != tokenKeyGroupArray { 91 p.raiseError(key, "unexpected token %s, was expecting a table array key", key) 92 } 93 94 // get or create table array element at the indicated part in the path 95 keys, err := parseKey(key.val) 96 if err != nil { 97 p.raiseError(key, "invalid table array key: %s", err) 98 } 99 p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries 100 destTree := p.tree.GetPath(keys) 101 var array []*Tree 102 if destTree == nil { 103 array = make([]*Tree, 0) 104 } else if target, ok := destTree.([]*Tree); ok && target != nil { 105 array = destTree.([]*Tree) 106 } else { 107 p.raiseError(key, "key %s is already assigned and not of type table array", key) 108 } 109 p.currentTable = keys 110 111 // add a new tree to the end of the table array 112 newTree := newTree() 113 newTree.position = startToken.Position 114 array = append(array, newTree) 115 p.tree.SetPath(p.currentTable, array) 116 117 // remove all keys that were children of this table array 118 prefix := key.val + "." 119 found := false 120 for ii := 0; ii < len(p.seenTableKeys); { 121 tableKey := p.seenTableKeys[ii] 122 if strings.HasPrefix(tableKey, prefix) { 123 p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...) 124 } else { 125 found = (tableKey == key.val) 126 ii++ 127 } 128 } 129 130 // keep this key name from use by other kinds of assignments 131 if !found { 132 p.seenTableKeys = append(p.seenTableKeys, key.val) 133 } 134 135 // move to next parser state 136 p.assume(tokenDoubleRightBracket) 137 return p.parseStart 138} 139 140func (p *tomlParser) parseGroup() tomlParserStateFn { 141 startToken := p.getToken() // discard the [ 142 key := p.getToken() 143 if key.typ != tokenKeyGroup { 144 p.raiseError(key, "unexpected token %s, was expecting a table key", key) 145 } 146 for _, item := range p.seenTableKeys { 147 if item == key.val { 148 p.raiseError(key, "duplicated tables") 149 } 150 } 151 152 p.seenTableKeys = append(p.seenTableKeys, key.val) 153 keys, err := parseKey(key.val) 154 if err != nil { 155 p.raiseError(key, "invalid table array key: %s", err) 156 } 157 if err := p.tree.createSubTree(keys, startToken.Position); err != nil { 158 p.raiseError(key, "%s", err) 159 } 160 destTree := p.tree.GetPath(keys) 161 if target, ok := destTree.(*Tree); ok && target != nil && target.inline { 162 p.raiseError(key, "could not re-define exist inline table or its sub-table : %s", 163 strings.Join(keys, ".")) 164 } 165 p.assume(tokenRightBracket) 166 p.currentTable = keys 167 return p.parseStart 168} 169 170func (p *tomlParser) parseAssign() tomlParserStateFn { 171 key := p.getToken() 172 p.assume(tokenEqual) 173 174 parsedKey, err := parseKey(key.val) 175 if err != nil { 176 p.raiseError(key, "invalid key: %s", err.Error()) 177 } 178 179 value := p.parseRvalue() 180 var tableKey []string 181 if len(p.currentTable) > 0 { 182 tableKey = p.currentTable 183 } else { 184 tableKey = []string{} 185 } 186 187 prefixKey := parsedKey[0 : len(parsedKey)-1] 188 tableKey = append(tableKey, prefixKey...) 189 190 // find the table to assign, looking out for arrays of tables 191 var targetNode *Tree 192 switch node := p.tree.GetPath(tableKey).(type) { 193 case []*Tree: 194 targetNode = node[len(node)-1] 195 case *Tree: 196 targetNode = node 197 case nil: 198 // create intermediate 199 if err := p.tree.createSubTree(tableKey, key.Position); err != nil { 200 p.raiseError(key, "could not create intermediate group: %s", err) 201 } 202 targetNode = p.tree.GetPath(tableKey).(*Tree) 203 default: 204 p.raiseError(key, "Unknown table type for path: %s", 205 strings.Join(tableKey, ".")) 206 } 207 208 if targetNode.inline { 209 p.raiseError(key, "could not add key or sub-table to exist inline table or its sub-table : %s", 210 strings.Join(tableKey, ".")) 211 } 212 213 // assign value to the found table 214 keyVal := parsedKey[len(parsedKey)-1] 215 localKey := []string{keyVal} 216 finalKey := append(tableKey, keyVal) 217 if targetNode.GetPath(localKey) != nil { 218 p.raiseError(key, "The following key was defined twice: %s", 219 strings.Join(finalKey, ".")) 220 } 221 var toInsert interface{} 222 223 switch value.(type) { 224 case *Tree, []*Tree: 225 toInsert = value 226 default: 227 toInsert = &tomlValue{value: value, position: key.Position} 228 } 229 targetNode.values[keyVal] = toInsert 230 return p.parseStart 231} 232 233var errInvalidUnderscore = errors.New("invalid use of _ in number") 234 235func numberContainsInvalidUnderscore(value string) error { 236 // For large numbers, you may use underscores between digits to enhance 237 // readability. Each underscore must be surrounded by at least one digit on 238 // each side. 239 240 hasBefore := false 241 for idx, r := range value { 242 if r == '_' { 243 if !hasBefore || idx+1 >= len(value) { 244 // can't end with an underscore 245 return errInvalidUnderscore 246 } 247 } 248 hasBefore = isDigit(r) 249 } 250 return nil 251} 252 253var errInvalidUnderscoreHex = errors.New("invalid use of _ in hex number") 254 255func hexNumberContainsInvalidUnderscore(value string) error { 256 hasBefore := false 257 for idx, r := range value { 258 if r == '_' { 259 if !hasBefore || idx+1 >= len(value) { 260 // can't end with an underscore 261 return errInvalidUnderscoreHex 262 } 263 } 264 hasBefore = isHexDigit(r) 265 } 266 return nil 267} 268 269func cleanupNumberToken(value string) string { 270 cleanedVal := strings.Replace(value, "_", "", -1) 271 return cleanedVal 272} 273 274func (p *tomlParser) parseRvalue() interface{} { 275 tok := p.getToken() 276 if tok == nil || tok.typ == tokenEOF { 277 p.raiseError(tok, "expecting a value") 278 } 279 280 switch tok.typ { 281 case tokenString: 282 return tok.val 283 case tokenTrue: 284 return true 285 case tokenFalse: 286 return false 287 case tokenInf: 288 if tok.val[0] == '-' { 289 return math.Inf(-1) 290 } 291 return math.Inf(1) 292 case tokenNan: 293 return math.NaN() 294 case tokenInteger: 295 cleanedVal := cleanupNumberToken(tok.val) 296 var err error 297 var val int64 298 if len(cleanedVal) >= 3 && cleanedVal[0] == '0' { 299 switch cleanedVal[1] { 300 case 'x': 301 err = hexNumberContainsInvalidUnderscore(tok.val) 302 if err != nil { 303 p.raiseError(tok, "%s", err) 304 } 305 val, err = strconv.ParseInt(cleanedVal[2:], 16, 64) 306 case 'o': 307 err = numberContainsInvalidUnderscore(tok.val) 308 if err != nil { 309 p.raiseError(tok, "%s", err) 310 } 311 val, err = strconv.ParseInt(cleanedVal[2:], 8, 64) 312 case 'b': 313 err = numberContainsInvalidUnderscore(tok.val) 314 if err != nil { 315 p.raiseError(tok, "%s", err) 316 } 317 val, err = strconv.ParseInt(cleanedVal[2:], 2, 64) 318 default: 319 panic("invalid base") // the lexer should catch this first 320 } 321 } else { 322 err = numberContainsInvalidUnderscore(tok.val) 323 if err != nil { 324 p.raiseError(tok, "%s", err) 325 } 326 val, err = strconv.ParseInt(cleanedVal, 10, 64) 327 } 328 if err != nil { 329 p.raiseError(tok, "%s", err) 330 } 331 return val 332 case tokenFloat: 333 err := numberContainsInvalidUnderscore(tok.val) 334 if err != nil { 335 p.raiseError(tok, "%s", err) 336 } 337 cleanedVal := cleanupNumberToken(tok.val) 338 val, err := strconv.ParseFloat(cleanedVal, 64) 339 if err != nil { 340 p.raiseError(tok, "%s", err) 341 } 342 return val 343 case tokenLocalTime: 344 val, err := ParseLocalTime(tok.val) 345 if err != nil { 346 p.raiseError(tok, "%s", err) 347 } 348 return val 349 case tokenLocalDate: 350 // a local date may be followed by: 351 // * nothing: this is a local date 352 // * a local time: this is a local date-time 353 354 next := p.peek() 355 if next == nil || next.typ != tokenLocalTime { 356 val, err := ParseLocalDate(tok.val) 357 if err != nil { 358 p.raiseError(tok, "%s", err) 359 } 360 return val 361 } 362 363 localDate := tok 364 localTime := p.getToken() 365 366 next = p.peek() 367 if next == nil || next.typ != tokenTimeOffset { 368 v := localDate.val + "T" + localTime.val 369 val, err := ParseLocalDateTime(v) 370 if err != nil { 371 p.raiseError(tok, "%s", err) 372 } 373 return val 374 } 375 376 offset := p.getToken() 377 378 layout := time.RFC3339Nano 379 v := localDate.val + "T" + localTime.val + offset.val 380 val, err := time.ParseInLocation(layout, v, time.UTC) 381 if err != nil { 382 p.raiseError(tok, "%s", err) 383 } 384 return val 385 case tokenLeftBracket: 386 return p.parseArray() 387 case tokenLeftCurlyBrace: 388 return p.parseInlineTable() 389 case tokenEqual: 390 p.raiseError(tok, "cannot have multiple equals for the same key") 391 case tokenError: 392 p.raiseError(tok, "%s", tok) 393 default: 394 panic(fmt.Errorf("unhandled token: %v", tok)) 395 } 396 397 return nil 398} 399 400func tokenIsComma(t *token) bool { 401 return t != nil && t.typ == tokenComma 402} 403 404func (p *tomlParser) parseInlineTable() *Tree { 405 tree := newTree() 406 var previous *token 407Loop: 408 for { 409 follow := p.peek() 410 if follow == nil || follow.typ == tokenEOF { 411 p.raiseError(follow, "unterminated inline table") 412 } 413 switch follow.typ { 414 case tokenRightCurlyBrace: 415 p.getToken() 416 break Loop 417 case tokenKey, tokenInteger, tokenString: 418 if !tokenIsComma(previous) && previous != nil { 419 p.raiseError(follow, "comma expected between fields in inline table") 420 } 421 key := p.getToken() 422 p.assume(tokenEqual) 423 424 parsedKey, err := parseKey(key.val) 425 if err != nil { 426 p.raiseError(key, "invalid key: %s", err) 427 } 428 429 value := p.parseRvalue() 430 tree.SetPath(parsedKey, value) 431 case tokenComma: 432 if tokenIsComma(previous) { 433 p.raiseError(follow, "need field between two commas in inline table") 434 } 435 p.getToken() 436 default: 437 p.raiseError(follow, "unexpected token type in inline table: %s", follow.String()) 438 } 439 previous = follow 440 } 441 if tokenIsComma(previous) { 442 p.raiseError(previous, "trailing comma at the end of inline table") 443 } 444 tree.inline = true 445 return tree 446} 447 448func (p *tomlParser) parseArray() interface{} { 449 var array []interface{} 450 arrayType := reflect.TypeOf(newTree()) 451 for { 452 follow := p.peek() 453 if follow == nil || follow.typ == tokenEOF { 454 p.raiseError(follow, "unterminated array") 455 } 456 if follow.typ == tokenRightBracket { 457 p.getToken() 458 break 459 } 460 val := p.parseRvalue() 461 if reflect.TypeOf(val) != arrayType { 462 arrayType = nil 463 } 464 array = append(array, val) 465 follow = p.peek() 466 if follow == nil || follow.typ == tokenEOF { 467 p.raiseError(follow, "unterminated array") 468 } 469 if follow.typ != tokenRightBracket && follow.typ != tokenComma { 470 p.raiseError(follow, "missing comma") 471 } 472 if follow.typ == tokenComma { 473 p.getToken() 474 } 475 } 476 477 // if the array is a mixed-type array or its length is 0, 478 // don't convert it to a table array 479 if len(array) <= 0 { 480 arrayType = nil 481 } 482 // An array of Trees is actually an array of inline 483 // tables, which is a shorthand for a table array. If the 484 // array was not converted from []interface{} to []*Tree, 485 // the two notations would not be equivalent. 486 if arrayType == reflect.TypeOf(newTree()) { 487 tomlArray := make([]*Tree, len(array)) 488 for i, v := range array { 489 tomlArray[i] = v.(*Tree) 490 } 491 return tomlArray 492 } 493 return array 494} 495 496func parseToml(flow []token) *Tree { 497 result := newTree() 498 result.position = Position{1, 1} 499 parser := &tomlParser{ 500 flowIdx: 0, 501 flow: flow, 502 tree: result, 503 currentTable: make([]string, 0), 504 seenTableKeys: make([]string, 0), 505 } 506 parser.run() 507 return result 508} 509