1// TOML Parser. 2 3package toml 4 5import ( 6 "errors" 7 "fmt" 8 "reflect" 9 "regexp" 10 "strconv" 11 "strings" 12 "time" 13) 14 15type tomlParser struct { 16 flow chan token 17 tree *TomlTree 18 tokensBuffer []token 19 currentTable []string 20 seenTableKeys []string 21} 22 23type tomlParserStateFn func() tomlParserStateFn 24 25// Formats and panics an error message based on a token 26func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) { 27 panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...)) 28} 29 30func (p *tomlParser) run() { 31 for state := p.parseStart; state != nil; { 32 state = state() 33 } 34} 35 36func (p *tomlParser) peek() *token { 37 if len(p.tokensBuffer) != 0 { 38 return &(p.tokensBuffer[0]) 39 } 40 41 tok, ok := <-p.flow 42 if !ok { 43 return nil 44 } 45 p.tokensBuffer = append(p.tokensBuffer, tok) 46 return &tok 47} 48 49func (p *tomlParser) assume(typ tokenType) { 50 tok := p.getToken() 51 if tok == nil { 52 p.raiseError(tok, "was expecting token %s, but token stream is empty", tok) 53 } 54 if tok.typ != typ { 55 p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok) 56 } 57} 58 59func (p *tomlParser) getToken() *token { 60 if len(p.tokensBuffer) != 0 { 61 tok := p.tokensBuffer[0] 62 p.tokensBuffer = p.tokensBuffer[1:] 63 return &tok 64 } 65 tok, ok := <-p.flow 66 if !ok { 67 return nil 68 } 69 return &tok 70} 71 72func (p *tomlParser) parseStart() tomlParserStateFn { 73 tok := p.peek() 74 75 // end of stream, parsing is finished 76 if tok == nil { 77 return nil 78 } 79 80 switch tok.typ { 81 case tokenDoubleLeftBracket: 82 return p.parseGroupArray 83 case tokenLeftBracket: 84 return p.parseGroup 85 case tokenKey: 86 return p.parseAssign 87 case tokenEOF: 88 return nil 89 default: 90 p.raiseError(tok, "unexpected token") 91 } 92 return nil 93} 94 95func (p *tomlParser) parseGroupArray() tomlParserStateFn { 96 startToken := p.getToken() // discard the [[ 97 key := p.getToken() 98 if key.typ != tokenKeyGroupArray { 99 p.raiseError(key, "unexpected token %s, was expecting a table array key", key) 100 } 101 102 // get or create table array element at the indicated part in the path 103 keys, err := parseKey(key.val) 104 if err != nil { 105 p.raiseError(key, "invalid table array key: %s", err) 106 } 107 p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries 108 destTree := p.tree.GetPath(keys) 109 var array []*TomlTree 110 if destTree == nil { 111 array = make([]*TomlTree, 0) 112 } else if target, ok := destTree.([]*TomlTree); ok && target != nil { 113 array = destTree.([]*TomlTree) 114 } else { 115 p.raiseError(key, "key %s is already assigned and not of type table array", key) 116 } 117 p.currentTable = keys 118 119 // add a new tree to the end of the table array 120 newTree := newTomlTree() 121 newTree.position = startToken.Position 122 array = append(array, newTree) 123 p.tree.SetPath(p.currentTable, array) 124 125 // remove all keys that were children of this table array 126 prefix := key.val + "." 127 found := false 128 for ii := 0; ii < len(p.seenTableKeys); { 129 tableKey := p.seenTableKeys[ii] 130 if strings.HasPrefix(tableKey, prefix) { 131 p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...) 132 } else { 133 found = (tableKey == key.val) 134 ii++ 135 } 136 } 137 138 // keep this key name from use by other kinds of assignments 139 if !found { 140 p.seenTableKeys = append(p.seenTableKeys, key.val) 141 } 142 143 // move to next parser state 144 p.assume(tokenDoubleRightBracket) 145 return p.parseStart 146} 147 148func (p *tomlParser) parseGroup() tomlParserStateFn { 149 startToken := p.getToken() // discard the [ 150 key := p.getToken() 151 if key.typ != tokenKeyGroup { 152 p.raiseError(key, "unexpected token %s, was expecting a table key", key) 153 } 154 for _, item := range p.seenTableKeys { 155 if item == key.val { 156 p.raiseError(key, "duplicated tables") 157 } 158 } 159 160 p.seenTableKeys = append(p.seenTableKeys, key.val) 161 keys, err := parseKey(key.val) 162 if err != nil { 163 p.raiseError(key, "invalid table array key: %s", err) 164 } 165 if err := p.tree.createSubTree(keys, startToken.Position); err != nil { 166 p.raiseError(key, "%s", err) 167 } 168 p.assume(tokenRightBracket) 169 p.currentTable = keys 170 return p.parseStart 171} 172 173func (p *tomlParser) parseAssign() tomlParserStateFn { 174 key := p.getToken() 175 p.assume(tokenEqual) 176 177 value := p.parseRvalue() 178 var tableKey []string 179 if len(p.currentTable) > 0 { 180 tableKey = p.currentTable 181 } else { 182 tableKey = []string{} 183 } 184 185 // find the table to assign, looking out for arrays of tables 186 var targetNode *TomlTree 187 switch node := p.tree.GetPath(tableKey).(type) { 188 case []*TomlTree: 189 targetNode = node[len(node)-1] 190 case *TomlTree: 191 targetNode = node 192 default: 193 p.raiseError(key, "Unknown table type for path: %s", 194 strings.Join(tableKey, ".")) 195 } 196 197 // assign value to the found table 198 keyVals, err := parseKey(key.val) 199 if err != nil { 200 p.raiseError(key, "%s", err) 201 } 202 if len(keyVals) != 1 { 203 p.raiseError(key, "Invalid key") 204 } 205 keyVal := keyVals[0] 206 localKey := []string{keyVal} 207 finalKey := append(tableKey, keyVal) 208 if targetNode.GetPath(localKey) != nil { 209 p.raiseError(key, "The following key was defined twice: %s", 210 strings.Join(finalKey, ".")) 211 } 212 var toInsert interface{} 213 214 switch value.(type) { 215 case *TomlTree, []*TomlTree: 216 toInsert = value 217 default: 218 toInsert = &tomlValue{value, key.Position} 219 } 220 targetNode.values[keyVal] = toInsert 221 return p.parseStart 222} 223 224var numberUnderscoreInvalidRegexp *regexp.Regexp 225 226func cleanupNumberToken(value string) (string, error) { 227 if numberUnderscoreInvalidRegexp.MatchString(value) { 228 return "", errors.New("invalid use of _ in number") 229 } 230 cleanedVal := strings.Replace(value, "_", "", -1) 231 return cleanedVal, nil 232} 233 234func (p *tomlParser) parseRvalue() interface{} { 235 tok := p.getToken() 236 if tok == nil || tok.typ == tokenEOF { 237 p.raiseError(tok, "expecting a value") 238 } 239 240 switch tok.typ { 241 case tokenString: 242 return tok.val 243 case tokenTrue: 244 return true 245 case tokenFalse: 246 return false 247 case tokenInteger: 248 cleanedVal, err := cleanupNumberToken(tok.val) 249 if err != nil { 250 p.raiseError(tok, "%s", err) 251 } 252 val, err := strconv.ParseInt(cleanedVal, 10, 64) 253 if err != nil { 254 p.raiseError(tok, "%s", err) 255 } 256 return val 257 case tokenFloat: 258 cleanedVal, err := cleanupNumberToken(tok.val) 259 if err != nil { 260 p.raiseError(tok, "%s", err) 261 } 262 val, err := strconv.ParseFloat(cleanedVal, 64) 263 if err != nil { 264 p.raiseError(tok, "%s", err) 265 } 266 return val 267 case tokenDate: 268 val, err := time.ParseInLocation(time.RFC3339Nano, tok.val, time.UTC) 269 if err != nil { 270 p.raiseError(tok, "%s", err) 271 } 272 return val 273 case tokenLeftBracket: 274 return p.parseArray() 275 case tokenLeftCurlyBrace: 276 return p.parseInlineTable() 277 case tokenEqual: 278 p.raiseError(tok, "cannot have multiple equals for the same key") 279 case tokenError: 280 p.raiseError(tok, "%s", tok) 281 } 282 283 p.raiseError(tok, "never reached") 284 285 return nil 286} 287 288func tokenIsComma(t *token) bool { 289 return t != nil && t.typ == tokenComma 290} 291 292func (p *tomlParser) parseInlineTable() *TomlTree { 293 tree := newTomlTree() 294 var previous *token 295Loop: 296 for { 297 follow := p.peek() 298 if follow == nil || follow.typ == tokenEOF { 299 p.raiseError(follow, "unterminated inline table") 300 } 301 switch follow.typ { 302 case tokenRightCurlyBrace: 303 p.getToken() 304 break Loop 305 case tokenKey: 306 if !tokenIsComma(previous) && previous != nil { 307 p.raiseError(follow, "comma expected between fields in inline table") 308 } 309 key := p.getToken() 310 p.assume(tokenEqual) 311 value := p.parseRvalue() 312 tree.Set(key.val, value) 313 case tokenComma: 314 if previous == nil { 315 p.raiseError(follow, "inline table cannot start with a comma") 316 } 317 if tokenIsComma(previous) { 318 p.raiseError(follow, "need field between two commas in inline table") 319 } 320 p.getToken() 321 default: 322 p.raiseError(follow, "unexpected token type in inline table: %s", follow.typ.String()) 323 } 324 previous = follow 325 } 326 if tokenIsComma(previous) { 327 p.raiseError(previous, "trailing comma at the end of inline table") 328 } 329 return tree 330} 331 332func (p *tomlParser) parseArray() interface{} { 333 var array []interface{} 334 arrayType := reflect.TypeOf(nil) 335 for { 336 follow := p.peek() 337 if follow == nil || follow.typ == tokenEOF { 338 p.raiseError(follow, "unterminated array") 339 } 340 if follow.typ == tokenRightBracket { 341 p.getToken() 342 break 343 } 344 val := p.parseRvalue() 345 if arrayType == nil { 346 arrayType = reflect.TypeOf(val) 347 } 348 if reflect.TypeOf(val) != arrayType { 349 p.raiseError(follow, "mixed types in array") 350 } 351 array = append(array, val) 352 follow = p.peek() 353 if follow == nil || follow.typ == tokenEOF { 354 p.raiseError(follow, "unterminated array") 355 } 356 if follow.typ != tokenRightBracket && follow.typ != tokenComma { 357 p.raiseError(follow, "missing comma") 358 } 359 if follow.typ == tokenComma { 360 p.getToken() 361 } 362 } 363 // An array of TomlTrees is actually an array of inline 364 // tables, which is a shorthand for a table array. If the 365 // array was not converted from []interface{} to []*TomlTree, 366 // the two notations would not be equivalent. 367 if arrayType == reflect.TypeOf(newTomlTree()) { 368 tomlArray := make([]*TomlTree, len(array)) 369 for i, v := range array { 370 tomlArray[i] = v.(*TomlTree) 371 } 372 return tomlArray 373 } 374 return array 375} 376 377func parseToml(flow chan token) *TomlTree { 378 result := newTomlTree() 379 result.position = Position{1, 1} 380 parser := &tomlParser{ 381 flow: flow, 382 tree: result, 383 tokensBuffer: make([]token, 0), 384 currentTable: make([]string, 0), 385 seenTableKeys: make([]string, 0), 386 } 387 parser.run() 388 return result 389} 390 391func init() { 392 numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d]|_$|^_)`) 393} 394