1package toml 2 3import ( 4 "fmt" 5 "strconv" 6 "strings" 7 "time" 8 "unicode" 9 "unicode/utf8" 10) 11 12type parser struct { 13 mapping map[string]interface{} 14 types map[string]tomlType 15 lx *lexer 16 17 // A list of keys in the order that they appear in the TOML data. 18 ordered []Key 19 20 // the full key for the current hash in scope 21 context Key 22 23 // the base key name for everything except hashes 24 currentKey string 25 26 // rough approximation of line number 27 approxLine int 28 29 // A map of 'key.group.names' to whether they were created implicitly. 30 implicits map[string]bool 31} 32 33type parseError string 34 35func (pe parseError) Error() string { 36 return string(pe) 37} 38 39func parse(data string) (p *parser, err error) { 40 defer func() { 41 if r := recover(); r != nil { 42 var ok bool 43 if err, ok = r.(parseError); ok { 44 return 45 } 46 panic(r) 47 } 48 }() 49 50 p = &parser{ 51 mapping: make(map[string]interface{}), 52 types: make(map[string]tomlType), 53 lx: lex(data), 54 ordered: make([]Key, 0), 55 implicits: make(map[string]bool), 56 } 57 for { 58 item := p.next() 59 if item.typ == itemEOF { 60 break 61 } 62 p.topLevel(item) 63 } 64 65 return p, nil 66} 67 68func (p *parser) panicf(format string, v ...interface{}) { 69 msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s", 70 p.approxLine, p.current(), fmt.Sprintf(format, v...)) 71 panic(parseError(msg)) 72} 73 74func (p *parser) next() item { 75 it := p.lx.nextItem() 76 if it.typ == itemError { 77 p.panicf("%s", it.val) 78 } 79 return it 80} 81 82func (p *parser) bug(format string, v ...interface{}) { 83 panic(fmt.Sprintf("BUG: "+format+"\n\n", v...)) 84} 85 86func (p *parser) expect(typ itemType) item { 87 it := p.next() 88 p.assertEqual(typ, it.typ) 89 return it 90} 91 92func (p *parser) assertEqual(expected, got itemType) { 93 if expected != got { 94 p.bug("Expected '%s' but got '%s'.", expected, got) 95 } 96} 97 98func (p *parser) topLevel(item item) { 99 switch item.typ { 100 case itemCommentStart: 101 p.approxLine = item.line 102 p.expect(itemText) 103 case itemTableStart: 104 kg := p.next() 105 p.approxLine = kg.line 106 107 var key Key 108 for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() { 109 key = append(key, p.keyString(kg)) 110 } 111 p.assertEqual(itemTableEnd, kg.typ) 112 113 p.establishContext(key, false) 114 p.setType("", tomlHash) 115 p.ordered = append(p.ordered, key) 116 case itemArrayTableStart: 117 kg := p.next() 118 p.approxLine = kg.line 119 120 var key Key 121 for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() { 122 key = append(key, p.keyString(kg)) 123 } 124 p.assertEqual(itemArrayTableEnd, kg.typ) 125 126 p.establishContext(key, true) 127 p.setType("", tomlArrayHash) 128 p.ordered = append(p.ordered, key) 129 case itemKeyStart: 130 kname := p.next() 131 p.approxLine = kname.line 132 p.currentKey = p.keyString(kname) 133 134 val, typ := p.value(p.next()) 135 p.setValue(p.currentKey, val) 136 p.setType(p.currentKey, typ) 137 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 138 p.currentKey = "" 139 default: 140 p.bug("Unexpected type at top level: %s", item.typ) 141 } 142} 143 144// Gets a string for a key (or part of a key in a table name). 145func (p *parser) keyString(it item) string { 146 switch it.typ { 147 case itemText: 148 return it.val 149 case itemString, itemMultilineString, 150 itemRawString, itemRawMultilineString: 151 s, _ := p.value(it) 152 return s.(string) 153 default: 154 p.bug("Unexpected key type: %s", it.typ) 155 panic("unreachable") 156 } 157} 158 159// value translates an expected value from the lexer into a Go value wrapped 160// as an empty interface. 161func (p *parser) value(it item) (interface{}, tomlType) { 162 switch it.typ { 163 case itemString: 164 return p.replaceEscapes(it.val), p.typeOfPrimitive(it) 165 case itemMultilineString: 166 trimmed := stripFirstNewline(stripEscapedWhitespace(it.val)) 167 return p.replaceEscapes(trimmed), p.typeOfPrimitive(it) 168 case itemRawString: 169 return it.val, p.typeOfPrimitive(it) 170 case itemRawMultilineString: 171 return stripFirstNewline(it.val), p.typeOfPrimitive(it) 172 case itemBool: 173 switch it.val { 174 case "true": 175 return true, p.typeOfPrimitive(it) 176 case "false": 177 return false, p.typeOfPrimitive(it) 178 } 179 p.bug("Expected boolean value, but got '%s'.", it.val) 180 case itemInteger: 181 if !numUnderscoresOK(it.val) { 182 p.panicf("Invalid integer %q: underscores must be surrounded by digits", 183 it.val) 184 } 185 val := strings.Replace(it.val, "_", "", -1) 186 num, err := strconv.ParseInt(val, 10, 64) 187 if err != nil { 188 // Distinguish integer values. Normally, it'd be a bug if the lexer 189 // provides an invalid integer, but it's possible that the number is 190 // out of range of valid values (which the lexer cannot determine). 191 // So mark the former as a bug but the latter as a legitimate user 192 // error. 193 if e, ok := err.(*strconv.NumError); ok && 194 e.Err == strconv.ErrRange { 195 196 p.panicf("Integer '%s' is out of the range of 64-bit "+ 197 "signed integers.", it.val) 198 } else { 199 p.bug("Expected integer value, but got '%s'.", it.val) 200 } 201 } 202 return num, p.typeOfPrimitive(it) 203 case itemFloat: 204 parts := strings.FieldsFunc(it.val, func(r rune) bool { 205 switch r { 206 case '.', 'e', 'E': 207 return true 208 } 209 return false 210 }) 211 for _, part := range parts { 212 if !numUnderscoresOK(part) { 213 p.panicf("Invalid float %q: underscores must be "+ 214 "surrounded by digits", it.val) 215 } 216 } 217 if !numPeriodsOK(it.val) { 218 // As a special case, numbers like '123.' or '1.e2', 219 // which are valid as far as Go/strconv are concerned, 220 // must be rejected because TOML says that a fractional 221 // part consists of '.' followed by 1+ digits. 222 p.panicf("Invalid float %q: '.' must be followed "+ 223 "by one or more digits", it.val) 224 } 225 val := strings.Replace(it.val, "_", "", -1) 226 num, err := strconv.ParseFloat(val, 64) 227 if err != nil { 228 if e, ok := err.(*strconv.NumError); ok && 229 e.Err == strconv.ErrRange { 230 231 p.panicf("Float '%s' is out of the range of 64-bit "+ 232 "IEEE-754 floating-point numbers.", it.val) 233 } else { 234 p.panicf("Invalid float value: %q", it.val) 235 } 236 } 237 return num, p.typeOfPrimitive(it) 238 case itemDatetime: 239 var t time.Time 240 var ok bool 241 var err error 242 for _, format := range []string{ 243 "2006-01-02T15:04:05Z07:00", 244 "2006-01-02T15:04:05", 245 "2006-01-02", 246 } { 247 t, err = time.ParseInLocation(format, it.val, time.Local) 248 if err == nil { 249 ok = true 250 break 251 } 252 } 253 if !ok { 254 p.panicf("Invalid TOML Datetime: %q.", it.val) 255 } 256 return t, p.typeOfPrimitive(it) 257 case itemArray: 258 array := make([]interface{}, 0) 259 types := make([]tomlType, 0) 260 261 for it = p.next(); it.typ != itemArrayEnd; it = p.next() { 262 if it.typ == itemCommentStart { 263 p.expect(itemText) 264 continue 265 } 266 267 val, typ := p.value(it) 268 array = append(array, val) 269 types = append(types, typ) 270 } 271 return array, p.typeOfArray(types) 272 case itemInlineTableStart: 273 var ( 274 hash = make(map[string]interface{}) 275 outerContext = p.context 276 outerKey = p.currentKey 277 ) 278 279 p.context = append(p.context, p.currentKey) 280 p.currentKey = "" 281 for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { 282 if it.typ != itemKeyStart { 283 p.bug("Expected key start but instead found %q, around line %d", 284 it.val, p.approxLine) 285 } 286 if it.typ == itemCommentStart { 287 p.expect(itemText) 288 continue 289 } 290 291 // retrieve key 292 k := p.next() 293 p.approxLine = k.line 294 kname := p.keyString(k) 295 296 // retrieve value 297 p.currentKey = kname 298 val, typ := p.value(p.next()) 299 // make sure we keep metadata up to date 300 p.setType(kname, typ) 301 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 302 hash[kname] = val 303 } 304 p.context = outerContext 305 p.currentKey = outerKey 306 return hash, tomlHash 307 } 308 p.bug("Unexpected value type: %s", it.typ) 309 panic("unreachable") 310} 311 312// numUnderscoresOK checks whether each underscore in s is surrounded by 313// characters that are not underscores. 314func numUnderscoresOK(s string) bool { 315 accept := false 316 for _, r := range s { 317 if r == '_' { 318 if !accept { 319 return false 320 } 321 accept = false 322 continue 323 } 324 accept = true 325 } 326 return accept 327} 328 329// numPeriodsOK checks whether every period in s is followed by a digit. 330func numPeriodsOK(s string) bool { 331 period := false 332 for _, r := range s { 333 if period && !isDigit(r) { 334 return false 335 } 336 period = r == '.' 337 } 338 return !period 339} 340 341// establishContext sets the current context of the parser, 342// where the context is either a hash or an array of hashes. Which one is 343// set depends on the value of the `array` parameter. 344// 345// Establishing the context also makes sure that the key isn't a duplicate, and 346// will create implicit hashes automatically. 347func (p *parser) establishContext(key Key, array bool) { 348 var ok bool 349 350 // Always start at the top level and drill down for our context. 351 hashContext := p.mapping 352 keyContext := make(Key, 0) 353 354 // We only need implicit hashes for key[0:-1] 355 for _, k := range key[0 : len(key)-1] { 356 _, ok = hashContext[k] 357 keyContext = append(keyContext, k) 358 359 // No key? Make an implicit hash and move on. 360 if !ok { 361 p.addImplicit(keyContext) 362 hashContext[k] = make(map[string]interface{}) 363 } 364 365 // If the hash context is actually an array of tables, then set 366 // the hash context to the last element in that array. 367 // 368 // Otherwise, it better be a table, since this MUST be a key group (by 369 // virtue of it not being the last element in a key). 370 switch t := hashContext[k].(type) { 371 case []map[string]interface{}: 372 hashContext = t[len(t)-1] 373 case map[string]interface{}: 374 hashContext = t 375 default: 376 p.panicf("Key '%s' was already created as a hash.", keyContext) 377 } 378 } 379 380 p.context = keyContext 381 if array { 382 // If this is the first element for this array, then allocate a new 383 // list of tables for it. 384 k := key[len(key)-1] 385 if _, ok := hashContext[k]; !ok { 386 hashContext[k] = make([]map[string]interface{}, 0, 5) 387 } 388 389 // Add a new table. But make sure the key hasn't already been used 390 // for something else. 391 if hash, ok := hashContext[k].([]map[string]interface{}); ok { 392 hashContext[k] = append(hash, make(map[string]interface{})) 393 } else { 394 p.panicf("Key '%s' was already created and cannot be used as "+ 395 "an array.", keyContext) 396 } 397 } else { 398 p.setValue(key[len(key)-1], make(map[string]interface{})) 399 } 400 p.context = append(p.context, key[len(key)-1]) 401} 402 403// setValue sets the given key to the given value in the current context. 404// It will make sure that the key hasn't already been defined, account for 405// implicit key groups. 406func (p *parser) setValue(key string, value interface{}) { 407 var tmpHash interface{} 408 var ok bool 409 410 hash := p.mapping 411 keyContext := make(Key, 0) 412 for _, k := range p.context { 413 keyContext = append(keyContext, k) 414 if tmpHash, ok = hash[k]; !ok { 415 p.bug("Context for key '%s' has not been established.", keyContext) 416 } 417 switch t := tmpHash.(type) { 418 case []map[string]interface{}: 419 // The context is a table of hashes. Pick the most recent table 420 // defined as the current hash. 421 hash = t[len(t)-1] 422 case map[string]interface{}: 423 hash = t 424 default: 425 p.bug("Expected hash to have type 'map[string]interface{}', but "+ 426 "it has '%T' instead.", tmpHash) 427 } 428 } 429 keyContext = append(keyContext, key) 430 431 if _, ok := hash[key]; ok { 432 // Typically, if the given key has already been set, then we have 433 // to raise an error since duplicate keys are disallowed. However, 434 // it's possible that a key was previously defined implicitly. In this 435 // case, it is allowed to be redefined concretely. (See the 436 // `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.) 437 // 438 // But we have to make sure to stop marking it as an implicit. (So that 439 // another redefinition provokes an error.) 440 // 441 // Note that since it has already been defined (as a hash), we don't 442 // want to overwrite it. So our business is done. 443 if p.isImplicit(keyContext) { 444 p.removeImplicit(keyContext) 445 return 446 } 447 448 // Otherwise, we have a concrete key trying to override a previous 449 // key, which is *always* wrong. 450 p.panicf("Key '%s' has already been defined.", keyContext) 451 } 452 hash[key] = value 453} 454 455// setType sets the type of a particular value at a given key. 456// It should be called immediately AFTER setValue. 457// 458// Note that if `key` is empty, then the type given will be applied to the 459// current context (which is either a table or an array of tables). 460func (p *parser) setType(key string, typ tomlType) { 461 keyContext := make(Key, 0, len(p.context)+1) 462 for _, k := range p.context { 463 keyContext = append(keyContext, k) 464 } 465 if len(key) > 0 { // allow type setting for hashes 466 keyContext = append(keyContext, key) 467 } 468 p.types[keyContext.String()] = typ 469} 470 471// addImplicit sets the given Key as having been created implicitly. 472func (p *parser) addImplicit(key Key) { 473 p.implicits[key.String()] = true 474} 475 476// removeImplicit stops tagging the given key as having been implicitly 477// created. 478func (p *parser) removeImplicit(key Key) { 479 p.implicits[key.String()] = false 480} 481 482// isImplicit returns true if the key group pointed to by the key was created 483// implicitly. 484func (p *parser) isImplicit(key Key) bool { 485 return p.implicits[key.String()] 486} 487 488// current returns the full key name of the current context. 489func (p *parser) current() string { 490 if len(p.currentKey) == 0 { 491 return p.context.String() 492 } 493 if len(p.context) == 0 { 494 return p.currentKey 495 } 496 return fmt.Sprintf("%s.%s", p.context, p.currentKey) 497} 498 499func stripFirstNewline(s string) string { 500 if len(s) == 0 || s[0] != '\n' { 501 return s 502 } 503 return s[1:] 504} 505 506func stripEscapedWhitespace(s string) string { 507 esc := strings.Split(s, "\\\n") 508 if len(esc) > 1 { 509 for i := 1; i < len(esc); i++ { 510 esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace) 511 } 512 } 513 return strings.Join(esc, "") 514} 515 516func (p *parser) replaceEscapes(str string) string { 517 var replaced []rune 518 s := []byte(str) 519 r := 0 520 for r < len(s) { 521 if s[r] != '\\' { 522 c, size := utf8.DecodeRune(s[r:]) 523 r += size 524 replaced = append(replaced, c) 525 continue 526 } 527 r += 1 528 if r >= len(s) { 529 p.bug("Escape sequence at end of string.") 530 return "" 531 } 532 switch s[r] { 533 default: 534 p.bug("Expected valid escape code after \\, but got %q.", s[r]) 535 return "" 536 case 'b': 537 replaced = append(replaced, rune(0x0008)) 538 r += 1 539 case 't': 540 replaced = append(replaced, rune(0x0009)) 541 r += 1 542 case 'n': 543 replaced = append(replaced, rune(0x000A)) 544 r += 1 545 case 'f': 546 replaced = append(replaced, rune(0x000C)) 547 r += 1 548 case 'r': 549 replaced = append(replaced, rune(0x000D)) 550 r += 1 551 case '"': 552 replaced = append(replaced, rune(0x0022)) 553 r += 1 554 case '\\': 555 replaced = append(replaced, rune(0x005C)) 556 r += 1 557 case 'u': 558 // At this point, we know we have a Unicode escape of the form 559 // `uXXXX` at [r, r+5). (Because the lexer guarantees this 560 // for us.) 561 escaped := p.asciiEscapeToUnicode(s[r+1 : r+5]) 562 replaced = append(replaced, escaped) 563 r += 5 564 case 'U': 565 // At this point, we know we have a Unicode escape of the form 566 // `uXXXX` at [r, r+9). (Because the lexer guarantees this 567 // for us.) 568 escaped := p.asciiEscapeToUnicode(s[r+1 : r+9]) 569 replaced = append(replaced, escaped) 570 r += 9 571 } 572 } 573 return string(replaced) 574} 575 576func (p *parser) asciiEscapeToUnicode(bs []byte) rune { 577 s := string(bs) 578 hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) 579 if err != nil { 580 p.bug("Could not parse '%s' as a hexadecimal number, but the "+ 581 "lexer claims it's OK: %s", s, err) 582 } 583 if !utf8.ValidRune(rune(hex)) { 584 p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s) 585 } 586 return rune(hex) 587} 588 589func isStringType(ty itemType) bool { 590 return ty == itemString || ty == itemMultilineString || 591 ty == itemRawString || ty == itemRawMultilineString 592} 593