1package toml 2 3import ( 4 "errors" 5 "fmt" 6 "strconv" 7 "strings" 8 "time" 9 "unicode/utf8" 10 11 "github.com/BurntSushi/toml/internal" 12) 13 14type parser struct { 15 mapping map[string]interface{} 16 types map[string]tomlType 17 lx *lexer 18 19 ordered []Key // List of keys in the order that they appear in the TOML data. 20 context Key // Full key for the current hash in scope. 21 currentKey string // Base key name for everything except hashes. 22 approxLine int // Rough approximation of line number 23 implicits map[string]bool // Record implied keys (e.g. 'key.group.names'). 24} 25 26// ParseError is used when a file can't be parsed: for example invalid integer 27// literals, duplicate keys, etc. 28type ParseError struct { 29 Message string 30 Line int 31 LastKey string 32} 33 34func (pe ParseError) Error() string { 35 return fmt.Sprintf("Near line %d (last key parsed '%s'): %s", 36 pe.Line, pe.LastKey, pe.Message) 37} 38 39func parse(data string) (p *parser, err error) { 40 defer func() { 41 if r := recover(); r != nil { 42 var ok bool 43 if err, ok = r.(ParseError); ok { 44 return 45 } 46 panic(r) 47 } 48 }() 49 50 // Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString() 51 // which mangles stuff. 52 if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { 53 data = data[2:] 54 } 55 56 // Examine first few bytes for NULL bytes; this probably means it's a UTF-16 57 // file (second byte in surrogate pair being NULL). Again, do this here to 58 // avoid having to deal with UTF-8/16 stuff in the lexer. 59 ex := 6 60 if len(data) < 6 { 61 ex = len(data) 62 } 63 if strings.ContainsRune(data[:ex], 0) { 64 return nil, errors.New("files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8") 65 } 66 67 p = &parser{ 68 mapping: make(map[string]interface{}), 69 types: make(map[string]tomlType), 70 lx: lex(data), 71 ordered: make([]Key, 0), 72 implicits: make(map[string]bool), 73 } 74 for { 75 item := p.next() 76 if item.typ == itemEOF { 77 break 78 } 79 p.topLevel(item) 80 } 81 82 return p, nil 83} 84 85func (p *parser) panicf(format string, v ...interface{}) { 86 msg := fmt.Sprintf(format, v...) 87 panic(ParseError{ 88 Message: msg, 89 Line: p.approxLine, 90 LastKey: p.current(), 91 }) 92} 93 94func (p *parser) next() item { 95 it := p.lx.nextItem() 96 //fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.line, it.val) 97 if it.typ == itemError { 98 p.panicf("%s", it.val) 99 } 100 return it 101} 102 103func (p *parser) bug(format string, v ...interface{}) { 104 panic(fmt.Sprintf("BUG: "+format+"\n\n", v...)) 105} 106 107func (p *parser) expect(typ itemType) item { 108 it := p.next() 109 p.assertEqual(typ, it.typ) 110 return it 111} 112 113func (p *parser) assertEqual(expected, got itemType) { 114 if expected != got { 115 p.bug("Expected '%s' but got '%s'.", expected, got) 116 } 117} 118 119func (p *parser) topLevel(item item) { 120 switch item.typ { 121 case itemCommentStart: // # .. 122 p.approxLine = item.line 123 p.expect(itemText) 124 case itemTableStart: // [ .. ] 125 name := p.next() 126 p.approxLine = name.line 127 128 var key Key 129 for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() { 130 key = append(key, p.keyString(name)) 131 } 132 p.assertEqual(itemTableEnd, name.typ) 133 134 p.addContext(key, false) 135 p.setType("", tomlHash) 136 p.ordered = append(p.ordered, key) 137 case itemArrayTableStart: // [[ .. ]] 138 name := p.next() 139 p.approxLine = name.line 140 141 var key Key 142 for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() { 143 key = append(key, p.keyString(name)) 144 } 145 p.assertEqual(itemArrayTableEnd, name.typ) 146 147 p.addContext(key, true) 148 p.setType("", tomlArrayHash) 149 p.ordered = append(p.ordered, key) 150 case itemKeyStart: // key = .. 151 outerContext := p.context 152 /// Read all the key parts (e.g. 'a' and 'b' in 'a.b') 153 k := p.next() 154 p.approxLine = k.line 155 var key Key 156 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { 157 key = append(key, p.keyString(k)) 158 } 159 p.assertEqual(itemKeyEnd, k.typ) 160 161 /// The current key is the last part. 162 p.currentKey = key[len(key)-1] 163 164 /// All the other parts (if any) are the context; need to set each part 165 /// as implicit. 166 context := key[:len(key)-1] 167 for i := range context { 168 p.addImplicitContext(append(p.context, context[i:i+1]...)) 169 } 170 171 /// Set value. 172 val, typ := p.value(p.next(), false) 173 p.set(p.currentKey, val, typ) 174 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 175 176 /// Remove the context we added (preserving any context from [tbl] lines). 177 p.context = outerContext 178 p.currentKey = "" 179 default: 180 p.bug("Unexpected type at top level: %s", item.typ) 181 } 182} 183 184// Gets a string for a key (or part of a key in a table name). 185func (p *parser) keyString(it item) string { 186 switch it.typ { 187 case itemText: 188 return it.val 189 case itemString, itemMultilineString, 190 itemRawString, itemRawMultilineString: 191 s, _ := p.value(it, false) 192 return s.(string) 193 default: 194 p.bug("Unexpected key type: %s", it.typ) 195 } 196 panic("unreachable") 197} 198 199var datetimeRepl = strings.NewReplacer( 200 "z", "Z", 201 "t", "T", 202 " ", "T") 203 204// value translates an expected value from the lexer into a Go value wrapped 205// as an empty interface. 206func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) { 207 switch it.typ { 208 case itemString: 209 return p.replaceEscapes(it.val), p.typeOfPrimitive(it) 210 case itemMultilineString: 211 return p.replaceEscapes(stripFirstNewline(stripEscapedNewlines(it.val))), p.typeOfPrimitive(it) 212 case itemRawString: 213 return it.val, p.typeOfPrimitive(it) 214 case itemRawMultilineString: 215 return stripFirstNewline(it.val), p.typeOfPrimitive(it) 216 case itemInteger: 217 return p.valueInteger(it) 218 case itemFloat: 219 return p.valueFloat(it) 220 case itemBool: 221 switch it.val { 222 case "true": 223 return true, p.typeOfPrimitive(it) 224 case "false": 225 return false, p.typeOfPrimitive(it) 226 default: 227 p.bug("Expected boolean value, but got '%s'.", it.val) 228 } 229 case itemDatetime: 230 return p.valueDatetime(it) 231 case itemArray: 232 return p.valueArray(it) 233 case itemInlineTableStart: 234 return p.valueInlineTable(it, parentIsArray) 235 default: 236 p.bug("Unexpected value type: %s", it.typ) 237 } 238 panic("unreachable") 239} 240 241func (p *parser) valueInteger(it item) (interface{}, tomlType) { 242 if !numUnderscoresOK(it.val) { 243 p.panicf("Invalid integer %q: underscores must be surrounded by digits", it.val) 244 } 245 if numHasLeadingZero(it.val) { 246 p.panicf("Invalid integer %q: cannot have leading zeroes", it.val) 247 } 248 249 num, err := strconv.ParseInt(it.val, 0, 64) 250 if err != nil { 251 // Distinguish integer values. Normally, it'd be a bug if the lexer 252 // provides an invalid integer, but it's possible that the number is 253 // out of range of valid values (which the lexer cannot determine). 254 // So mark the former as a bug but the latter as a legitimate user 255 // error. 256 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { 257 p.panicf("Integer '%s' is out of the range of 64-bit signed integers.", it.val) 258 } else { 259 p.bug("Expected integer value, but got '%s'.", it.val) 260 } 261 } 262 return num, p.typeOfPrimitive(it) 263} 264 265func (p *parser) valueFloat(it item) (interface{}, tomlType) { 266 parts := strings.FieldsFunc(it.val, func(r rune) bool { 267 switch r { 268 case '.', 'e', 'E': 269 return true 270 } 271 return false 272 }) 273 for _, part := range parts { 274 if !numUnderscoresOK(part) { 275 p.panicf("Invalid float %q: underscores must be surrounded by digits", it.val) 276 } 277 } 278 if len(parts) > 0 && numHasLeadingZero(parts[0]) { 279 p.panicf("Invalid float %q: cannot have leading zeroes", it.val) 280 } 281 if !numPeriodsOK(it.val) { 282 // As a special case, numbers like '123.' or '1.e2', 283 // which are valid as far as Go/strconv are concerned, 284 // must be rejected because TOML says that a fractional 285 // part consists of '.' followed by 1+ digits. 286 p.panicf("Invalid float %q: '.' must be followed by one or more digits", it.val) 287 } 288 val := strings.Replace(it.val, "_", "", -1) 289 if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does. 290 val = "nan" 291 } 292 num, err := strconv.ParseFloat(val, 64) 293 if err != nil { 294 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { 295 p.panicf("Float '%s' is out of the range of 64-bit IEEE-754 floating-point numbers.", it.val) 296 } else { 297 p.panicf("Invalid float value: %q", it.val) 298 } 299 } 300 return num, p.typeOfPrimitive(it) 301} 302 303var dtTypes = []struct { 304 fmt string 305 zone *time.Location 306}{ 307 {time.RFC3339Nano, time.Local}, 308 {"2006-01-02T15:04:05.999999999", internal.LocalDatetime}, 309 {"2006-01-02", internal.LocalDate}, 310 {"15:04:05.999999999", internal.LocalTime}, 311} 312 313func (p *parser) valueDatetime(it item) (interface{}, tomlType) { 314 it.val = datetimeRepl.Replace(it.val) 315 var ( 316 t time.Time 317 ok bool 318 err error 319 ) 320 for _, dt := range dtTypes { 321 t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone) 322 if err == nil { 323 ok = true 324 break 325 } 326 } 327 if !ok { 328 p.panicf("Invalid TOML Datetime: %q.", it.val) 329 } 330 return t, p.typeOfPrimitive(it) 331} 332 333func (p *parser) valueArray(it item) (interface{}, tomlType) { 334 p.setType(p.currentKey, tomlArray) 335 336 // p.setType(p.currentKey, typ) 337 var ( 338 array []interface{} 339 types []tomlType 340 ) 341 for it = p.next(); it.typ != itemArrayEnd; it = p.next() { 342 if it.typ == itemCommentStart { 343 p.expect(itemText) 344 continue 345 } 346 347 val, typ := p.value(it, true) 348 array = append(array, val) 349 types = append(types, typ) 350 } 351 return array, tomlArray 352} 353 354func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) { 355 var ( 356 hash = make(map[string]interface{}) 357 outerContext = p.context 358 outerKey = p.currentKey 359 ) 360 361 p.context = append(p.context, p.currentKey) 362 prevContext := p.context 363 p.currentKey = "" 364 365 p.addImplicit(p.context) 366 p.addContext(p.context, parentIsArray) 367 368 /// Loop over all table key/value pairs. 369 for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { 370 if it.typ == itemCommentStart { 371 p.expect(itemText) 372 continue 373 } 374 375 /// Read all key parts. 376 k := p.next() 377 p.approxLine = k.line 378 var key Key 379 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { 380 key = append(key, p.keyString(k)) 381 } 382 p.assertEqual(itemKeyEnd, k.typ) 383 384 /// The current key is the last part. 385 p.currentKey = key[len(key)-1] 386 387 /// All the other parts (if any) are the context; need to set each part 388 /// as implicit. 389 context := key[:len(key)-1] 390 for i := range context { 391 p.addImplicitContext(append(p.context, context[i:i+1]...)) 392 } 393 394 /// Set the value. 395 val, typ := p.value(p.next(), false) 396 p.set(p.currentKey, val, typ) 397 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 398 hash[p.currentKey] = val 399 400 /// Restore context. 401 p.context = prevContext 402 } 403 p.context = outerContext 404 p.currentKey = outerKey 405 return hash, tomlHash 406} 407 408// numHasLeadingZero checks if this number has leading zeroes, allowing for '0', 409// +/- signs, and base prefixes. 410func numHasLeadingZero(s string) bool { 411 if len(s) > 1 && s[0] == '0' && isDigit(rune(s[1])) { // >1 to allow "0" and isDigit to allow 0x 412 return true 413 } 414 if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' { 415 return true 416 } 417 return false 418} 419 420// numUnderscoresOK checks whether each underscore in s is surrounded by 421// characters that are not underscores. 422func numUnderscoresOK(s string) bool { 423 switch s { 424 case "nan", "+nan", "-nan", "inf", "-inf", "+inf": 425 return true 426 } 427 accept := false 428 for _, r := range s { 429 if r == '_' { 430 if !accept { 431 return false 432 } 433 } 434 435 // isHexadecimal is a superset of all the permissable characters 436 // surrounding an underscore. 437 accept = isHexadecimal(r) 438 } 439 return accept 440} 441 442// numPeriodsOK checks whether every period in s is followed by a digit. 443func numPeriodsOK(s string) bool { 444 period := false 445 for _, r := range s { 446 if period && !isDigit(r) { 447 return false 448 } 449 period = r == '.' 450 } 451 return !period 452} 453 454// Set the current context of the parser, where the context is either a hash or 455// an array of hashes, depending on the value of the `array` parameter. 456// 457// Establishing the context also makes sure that the key isn't a duplicate, and 458// will create implicit hashes automatically. 459func (p *parser) addContext(key Key, array bool) { 460 var ok bool 461 462 // Always start at the top level and drill down for our context. 463 hashContext := p.mapping 464 keyContext := make(Key, 0) 465 466 // We only need implicit hashes for key[0:-1] 467 for _, k := range key[0 : len(key)-1] { 468 _, ok = hashContext[k] 469 keyContext = append(keyContext, k) 470 471 // No key? Make an implicit hash and move on. 472 if !ok { 473 p.addImplicit(keyContext) 474 hashContext[k] = make(map[string]interface{}) 475 } 476 477 // If the hash context is actually an array of tables, then set 478 // the hash context to the last element in that array. 479 // 480 // Otherwise, it better be a table, since this MUST be a key group (by 481 // virtue of it not being the last element in a key). 482 switch t := hashContext[k].(type) { 483 case []map[string]interface{}: 484 hashContext = t[len(t)-1] 485 case map[string]interface{}: 486 hashContext = t 487 default: 488 p.panicf("Key '%s' was already created as a hash.", keyContext) 489 } 490 } 491 492 p.context = keyContext 493 if array { 494 // If this is the first element for this array, then allocate a new 495 // list of tables for it. 496 k := key[len(key)-1] 497 if _, ok := hashContext[k]; !ok { 498 hashContext[k] = make([]map[string]interface{}, 0, 4) 499 } 500 501 // Add a new table. But make sure the key hasn't already been used 502 // for something else. 503 if hash, ok := hashContext[k].([]map[string]interface{}); ok { 504 hashContext[k] = append(hash, make(map[string]interface{})) 505 } else { 506 p.panicf("Key '%s' was already created and cannot be used as an array.", keyContext) 507 } 508 } else { 509 p.setValue(key[len(key)-1], make(map[string]interface{})) 510 } 511 p.context = append(p.context, key[len(key)-1]) 512} 513 514// set calls setValue and setType. 515func (p *parser) set(key string, val interface{}, typ tomlType) { 516 p.setValue(p.currentKey, val) 517 p.setType(p.currentKey, typ) 518} 519 520// setValue sets the given key to the given value in the current context. 521// It will make sure that the key hasn't already been defined, account for 522// implicit key groups. 523func (p *parser) setValue(key string, value interface{}) { 524 var ( 525 tmpHash interface{} 526 ok bool 527 hash = p.mapping 528 keyContext Key 529 ) 530 for _, k := range p.context { 531 keyContext = append(keyContext, k) 532 if tmpHash, ok = hash[k]; !ok { 533 p.bug("Context for key '%s' has not been established.", keyContext) 534 } 535 switch t := tmpHash.(type) { 536 case []map[string]interface{}: 537 // The context is a table of hashes. Pick the most recent table 538 // defined as the current hash. 539 hash = t[len(t)-1] 540 case map[string]interface{}: 541 hash = t 542 default: 543 p.panicf("Key '%s' has already been defined.", keyContext) 544 } 545 } 546 keyContext = append(keyContext, key) 547 548 if _, ok := hash[key]; ok { 549 // Normally redefining keys isn't allowed, but the key could have been 550 // defined implicitly and it's allowed to be redefined concretely. (See 551 // the `valid/implicit-and-explicit-after.toml` in toml-test) 552 // 553 // But we have to make sure to stop marking it as an implicit. (So that 554 // another redefinition provokes an error.) 555 // 556 // Note that since it has already been defined (as a hash), we don't 557 // want to overwrite it. So our business is done. 558 if p.isArray(keyContext) { 559 p.removeImplicit(keyContext) 560 hash[key] = value 561 return 562 } 563 if p.isImplicit(keyContext) { 564 p.removeImplicit(keyContext) 565 return 566 } 567 568 // Otherwise, we have a concrete key trying to override a previous 569 // key, which is *always* wrong. 570 p.panicf("Key '%s' has already been defined.", keyContext) 571 } 572 573 hash[key] = value 574} 575 576// setType sets the type of a particular value at a given key. 577// It should be called immediately AFTER setValue. 578// 579// Note that if `key` is empty, then the type given will be applied to the 580// current context (which is either a table or an array of tables). 581func (p *parser) setType(key string, typ tomlType) { 582 keyContext := make(Key, 0, len(p.context)+1) 583 for _, k := range p.context { 584 keyContext = append(keyContext, k) 585 } 586 if len(key) > 0 { // allow type setting for hashes 587 keyContext = append(keyContext, key) 588 } 589 p.types[keyContext.String()] = typ 590} 591 592// Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and 593// "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly). 594func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = true } 595func (p *parser) removeImplicit(key Key) { p.implicits[key.String()] = false } 596func (p *parser) isImplicit(key Key) bool { return p.implicits[key.String()] } 597func (p *parser) isArray(key Key) bool { return p.types[key.String()] == tomlArray } 598func (p *parser) addImplicitContext(key Key) { 599 p.addImplicit(key) 600 p.addContext(key, false) 601} 602 603// current returns the full key name of the current context. 604func (p *parser) current() string { 605 if len(p.currentKey) == 0 { 606 return p.context.String() 607 } 608 if len(p.context) == 0 { 609 return p.currentKey 610 } 611 return fmt.Sprintf("%s.%s", p.context, p.currentKey) 612} 613 614func stripFirstNewline(s string) string { 615 if len(s) > 0 && s[0] == '\n' { 616 return s[1:] 617 } 618 if len(s) > 1 && s[0] == '\r' && s[1] == '\n' { 619 return s[2:] 620 } 621 return s 622} 623 624// Remove newlines inside triple-quoted strings if a line ends with "\". 625func stripEscapedNewlines(s string) string { 626 split := strings.Split(s, "\n") 627 if len(split) < 1 { 628 return s 629 } 630 631 escNL := false // Keep track of the last non-blank line was escaped. 632 for i, line := range split { 633 line = strings.TrimRight(line, " \t\r") 634 635 if len(line) == 0 || line[len(line)-1] != '\\' { 636 split[i] = strings.TrimRight(split[i], "\r") 637 if !escNL && i != len(split)-1 { 638 split[i] += "\n" 639 } 640 continue 641 } 642 643 escBS := true 644 for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- { 645 escBS = !escBS 646 } 647 if escNL { 648 line = strings.TrimLeft(line, " \t\r") 649 } 650 escNL = !escBS 651 652 if escBS { 653 split[i] += "\n" 654 continue 655 } 656 657 split[i] = line[:len(line)-1] // Remove \ 658 if len(split)-1 > i { 659 split[i+1] = strings.TrimLeft(split[i+1], " \t\r") 660 } 661 } 662 return strings.Join(split, "") 663} 664 665func (p *parser) replaceEscapes(str string) string { 666 var replaced []rune 667 s := []byte(str) 668 r := 0 669 for r < len(s) { 670 if s[r] != '\\' { 671 c, size := utf8.DecodeRune(s[r:]) 672 r += size 673 replaced = append(replaced, c) 674 continue 675 } 676 r += 1 677 if r >= len(s) { 678 p.bug("Escape sequence at end of string.") 679 return "" 680 } 681 switch s[r] { 682 default: 683 p.bug("Expected valid escape code after \\, but got %q.", s[r]) 684 return "" 685 case ' ', '\t': 686 p.panicf("invalid escape: '\\%c'", s[r]) 687 return "" 688 case 'b': 689 replaced = append(replaced, rune(0x0008)) 690 r += 1 691 case 't': 692 replaced = append(replaced, rune(0x0009)) 693 r += 1 694 case 'n': 695 replaced = append(replaced, rune(0x000A)) 696 r += 1 697 case 'f': 698 replaced = append(replaced, rune(0x000C)) 699 r += 1 700 case 'r': 701 replaced = append(replaced, rune(0x000D)) 702 r += 1 703 case '"': 704 replaced = append(replaced, rune(0x0022)) 705 r += 1 706 case '\\': 707 replaced = append(replaced, rune(0x005C)) 708 r += 1 709 case 'u': 710 // At this point, we know we have a Unicode escape of the form 711 // `uXXXX` at [r, r+5). (Because the lexer guarantees this 712 // for us.) 713 escaped := p.asciiEscapeToUnicode(s[r+1 : r+5]) 714 replaced = append(replaced, escaped) 715 r += 5 716 case 'U': 717 // At this point, we know we have a Unicode escape of the form 718 // `uXXXX` at [r, r+9). (Because the lexer guarantees this 719 // for us.) 720 escaped := p.asciiEscapeToUnicode(s[r+1 : r+9]) 721 replaced = append(replaced, escaped) 722 r += 9 723 } 724 } 725 return string(replaced) 726} 727 728func (p *parser) asciiEscapeToUnicode(bs []byte) rune { 729 s := string(bs) 730 hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) 731 if err != nil { 732 p.bug("Could not parse '%s' as a hexadecimal number, but the "+ 733 "lexer claims it's OK: %s", s, err) 734 } 735 if !utf8.ValidRune(rune(hex)) { 736 p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s) 737 } 738 return rune(hex) 739} 740