1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Code to parse a template. 6 7package template 8 9import ( 10 "fmt" 11 "io" 12 "io/ioutil" 13 "reflect" 14 "strconv" 15 "strings" 16 "unicode" 17 "unicode/utf8" 18) 19 20// Errors returned during parsing and execution. Users may extract the information and reformat 21// if they desire. 22type Error struct { 23 Line int 24 Msg string 25} 26 27func (e *Error) Error() string { return fmt.Sprintf("line %d: %s", e.Line, e.Msg) } 28 29// checkError is a deferred function to turn a panic with type *Error into a plain error return. 30// Other panics are unexpected and so are re-enabled. 31func checkError(error *error) { 32 if v := recover(); v != nil { 33 if e, ok := v.(*Error); ok { 34 *error = e 35 } else { 36 // runtime errors should crash 37 panic(v) 38 } 39 } 40} 41 42// Most of the literals are aces. 43var lbrace = []byte{'{'} 44var rbrace = []byte{'}'} 45var space = []byte{' '} 46var tab = []byte{'\t'} 47 48// The various types of "tokens", which are plain text or (usually) brace-delimited descriptors 49const ( 50 tokAlternates = iota 51 tokComment 52 tokEnd 53 tokLiteral 54 tokOr 55 tokRepeated 56 tokSection 57 tokText 58 tokVariable 59) 60 61// FormatterMap is the type describing the mapping from formatter 62// names to the functions that implement them. 63type FormatterMap map[string]func(io.Writer, string, ...interface{}) 64 65// Built-in formatters. 66var builtins = FormatterMap{ 67 "html": HTMLFormatter, 68 "str": StringFormatter, 69 "": StringFormatter, 70} 71 72// The parsed state of a template is a vector of xxxElement structs. 73// Sections have line numbers so errors can be reported better during execution. 74 75// Plain text. 76type textElement struct { 77 text []byte 78} 79 80// A literal such as .meta-left or .meta-right 81type literalElement struct { 82 text []byte 83} 84 85// A variable invocation to be evaluated 86type variableElement struct { 87 linenum int 88 args []interface{} // The fields and literals in the invocation. 89 fmts []string // Names of formatters to apply. len(fmts) > 0 90} 91 92// A variableElement arg to be evaluated as a field name 93type fieldName string 94 95// A .section block, possibly with a .or 96type sectionElement struct { 97 linenum int // of .section itself 98 field string // cursor field for this block 99 start int // first element 100 or int // first element of .or block 101 end int // one beyond last element 102} 103 104// A .repeated block, possibly with a .or and a .alternates 105type repeatedElement struct { 106 sectionElement // It has the same structure... 107 altstart int // ... except for alternates 108 altend int 109} 110 111// Template is the type that represents a template definition. 112// It is unchanged after parsing. 113type Template struct { 114 fmap FormatterMap // formatters for variables 115 // Used during parsing: 116 ldelim, rdelim []byte // delimiters; default {} 117 buf []byte // input text to process 118 p int // position in buf 119 linenum int // position in input 120 // Parsed results: 121 elems []interface{} 122} 123 124// New creates a new template with the specified formatter map (which 125// may be nil) to define auxiliary functions for formatting variables. 126func New(fmap FormatterMap) *Template { 127 t := new(Template) 128 t.fmap = fmap 129 t.ldelim = lbrace 130 t.rdelim = rbrace 131 t.elems = make([]interface{}, 0, 16) 132 return t 133} 134 135// Report error and stop executing. The line number must be provided explicitly. 136func (t *Template) execError(st *state, line int, err string, args ...interface{}) { 137 panic(&Error{line, fmt.Sprintf(err, args...)}) 138} 139 140// Report error, panic to terminate parsing. 141// The line number comes from the template state. 142func (t *Template) parseError(err string, args ...interface{}) { 143 panic(&Error{t.linenum, fmt.Sprintf(err, args...)}) 144} 145 146// Is this an exported - upper case - name? 147func isExported(name string) bool { 148 r, _ := utf8.DecodeRuneInString(name) 149 return unicode.IsUpper(r) 150} 151 152// -- Lexical analysis 153 154// Is c a space character? 155func isSpace(c uint8) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' } 156 157// Safely, does s[n:n+len(t)] == t? 158func equal(s []byte, n int, t []byte) bool { 159 b := s[n:] 160 if len(t) > len(b) { // not enough space left for a match. 161 return false 162 } 163 for i, c := range t { 164 if c != b[i] { 165 return false 166 } 167 } 168 return true 169} 170 171// isQuote returns true if c is a string- or character-delimiting quote character. 172func isQuote(c byte) bool { 173 return c == '"' || c == '`' || c == '\'' 174} 175 176// endQuote returns the end quote index for the quoted string that 177// starts at n, or -1 if no matching end quote is found before the end 178// of the line. 179func endQuote(s []byte, n int) int { 180 quote := s[n] 181 for n++; n < len(s); n++ { 182 switch s[n] { 183 case '\\': 184 if quote == '"' || quote == '\'' { 185 n++ 186 } 187 case '\n': 188 return -1 189 case quote: 190 return n 191 } 192 } 193 return -1 194} 195 196// nextItem returns the next item from the input buffer. If the returned 197// item is empty, we are at EOF. The item will be either a 198// delimited string or a non-empty string between delimited 199// strings. Tokens stop at (but include, if plain text) a newline. 200// Action tokens on a line by themselves drop any space on 201// either side, up to and including the newline. 202func (t *Template) nextItem() []byte { 203 startOfLine := t.p == 0 || t.buf[t.p-1] == '\n' 204 start := t.p 205 var i int 206 newline := func() { 207 t.linenum++ 208 i++ 209 } 210 // Leading space up to but not including newline 211 for i = start; i < len(t.buf); i++ { 212 if t.buf[i] == '\n' || !isSpace(t.buf[i]) { 213 break 214 } 215 } 216 leadingSpace := i > start 217 // What's left is nothing, newline, delimited string, or plain text 218 switch { 219 case i == len(t.buf): 220 // EOF; nothing to do 221 case t.buf[i] == '\n': 222 newline() 223 case equal(t.buf, i, t.ldelim): 224 left := i // Start of left delimiter. 225 right := -1 // Will be (immediately after) right delimiter. 226 haveText := false // Delimiters contain text. 227 i += len(t.ldelim) 228 // Find the end of the action. 229 for ; i < len(t.buf); i++ { 230 if t.buf[i] == '\n' { 231 break 232 } 233 if isQuote(t.buf[i]) { 234 i = endQuote(t.buf, i) 235 if i == -1 { 236 t.parseError("unmatched quote") 237 return nil 238 } 239 continue 240 } 241 if equal(t.buf, i, t.rdelim) { 242 i += len(t.rdelim) 243 right = i 244 break 245 } 246 haveText = true 247 } 248 if right < 0 { 249 t.parseError("unmatched opening delimiter") 250 return nil 251 } 252 // Is this a special action (starts with '.' or '#') and the only thing on the line? 253 if startOfLine && haveText { 254 firstChar := t.buf[left+len(t.ldelim)] 255 if firstChar == '.' || firstChar == '#' { 256 // It's special and the first thing on the line. Is it the last? 257 for j := right; j < len(t.buf) && isSpace(t.buf[j]); j++ { 258 if t.buf[j] == '\n' { 259 // Yes it is. Drop the surrounding space and return the {.foo} 260 t.linenum++ 261 t.p = j + 1 262 return t.buf[left:right] 263 } 264 } 265 } 266 } 267 // No it's not. If there's leading space, return that. 268 if leadingSpace { 269 // not trimming space: return leading space if there is some. 270 t.p = left 271 return t.buf[start:left] 272 } 273 // Return the word, leave the trailing space. 274 start = left 275 break 276 default: 277 for ; i < len(t.buf); i++ { 278 if t.buf[i] == '\n' { 279 newline() 280 break 281 } 282 if equal(t.buf, i, t.ldelim) { 283 break 284 } 285 } 286 } 287 item := t.buf[start:i] 288 t.p = i 289 return item 290} 291 292// Turn a byte array into a space-split array of strings, 293// taking into account quoted strings. 294func words(buf []byte) []string { 295 s := make([]string, 0, 5) 296 for i := 0; i < len(buf); { 297 // One word per loop 298 for i < len(buf) && isSpace(buf[i]) { 299 i++ 300 } 301 if i == len(buf) { 302 break 303 } 304 // Got a word 305 start := i 306 if isQuote(buf[i]) { 307 i = endQuote(buf, i) 308 if i < 0 { 309 i = len(buf) 310 } else { 311 i++ 312 } 313 } 314 // Even with quotes, break on space only. This handles input 315 // such as {""|} and catches quoting mistakes. 316 for i < len(buf) && !isSpace(buf[i]) { 317 i++ 318 } 319 s = append(s, string(buf[start:i])) 320 } 321 return s 322} 323 324// Analyze an item and return its token type and, if it's an action item, an array of 325// its constituent words. 326func (t *Template) analyze(item []byte) (tok int, w []string) { 327 // item is known to be non-empty 328 if !equal(item, 0, t.ldelim) { // doesn't start with left delimiter 329 tok = tokText 330 return 331 } 332 if !equal(item, len(item)-len(t.rdelim), t.rdelim) { // doesn't end with right delimiter 333 t.parseError("internal error: unmatched opening delimiter") // lexing should prevent this 334 return 335 } 336 if len(item) <= len(t.ldelim)+len(t.rdelim) { // no contents 337 t.parseError("empty directive") 338 return 339 } 340 // Comment 341 if item[len(t.ldelim)] == '#' { 342 tok = tokComment 343 return 344 } 345 // Split into words 346 w = words(item[len(t.ldelim) : len(item)-len(t.rdelim)]) // drop final delimiter 347 if len(w) == 0 { 348 t.parseError("empty directive") 349 return 350 } 351 first := w[0] 352 if first[0] != '.' { 353 tok = tokVariable 354 return 355 } 356 if len(first) > 1 && first[1] >= '0' && first[1] <= '9' { 357 // Must be a float. 358 tok = tokVariable 359 return 360 } 361 switch first { 362 case ".meta-left", ".meta-right", ".space", ".tab": 363 tok = tokLiteral 364 return 365 case ".or": 366 tok = tokOr 367 return 368 case ".end": 369 tok = tokEnd 370 return 371 case ".section": 372 if len(w) != 2 { 373 t.parseError("incorrect fields for .section: %s", item) 374 return 375 } 376 tok = tokSection 377 return 378 case ".repeated": 379 if len(w) != 3 || w[1] != "section" { 380 t.parseError("incorrect fields for .repeated: %s", item) 381 return 382 } 383 tok = tokRepeated 384 return 385 case ".alternates": 386 if len(w) != 2 || w[1] != "with" { 387 t.parseError("incorrect fields for .alternates: %s", item) 388 return 389 } 390 tok = tokAlternates 391 return 392 } 393 t.parseError("bad directive: %s", item) 394 return 395} 396 397// formatter returns the Formatter with the given name in the Template, or nil if none exists. 398func (t *Template) formatter(name string) func(io.Writer, string, ...interface{}) { 399 if t.fmap != nil { 400 if fn := t.fmap[name]; fn != nil { 401 return fn 402 } 403 } 404 return builtins[name] 405} 406 407// -- Parsing 408 409// newVariable allocates a new variable-evaluation element. 410func (t *Template) newVariable(words []string) *variableElement { 411 formatters := extractFormatters(words) 412 args := make([]interface{}, len(words)) 413 414 // Build argument list, processing any literals 415 for i, word := range words { 416 var lerr error 417 switch word[0] { 418 case '"', '`', '\'': 419 v, err := strconv.Unquote(word) 420 if err == nil && word[0] == '\'' { 421 args[i], _ = utf8.DecodeRuneInString(v) 422 } else { 423 args[i], lerr = v, err 424 } 425 426 case '.', '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 427 v, err := strconv.ParseInt(word, 0, 64) 428 if err == nil { 429 args[i] = v 430 } else { 431 v, err := strconv.ParseFloat(word, 64) 432 args[i], lerr = v, err 433 } 434 435 default: 436 args[i] = fieldName(word) 437 } 438 if lerr != nil { 439 t.parseError("invalid literal: %q: %s", word, lerr) 440 } 441 } 442 443 // We could remember the function address here and avoid the lookup later, 444 // but it's more dynamic to let the user change the map contents underfoot. 445 // We do require the name to be present, though. 446 447 // Is it in user-supplied map? 448 for _, f := range formatters { 449 if t.formatter(f) == nil { 450 t.parseError("unknown formatter: %q", f) 451 } 452 } 453 454 return &variableElement{t.linenum, args, formatters} 455} 456 457// extractFormatters extracts a list of formatters from words. 458// After the final space-separated argument in a variable, formatters may be 459// specified separated by pipe symbols. For example: {a b c|d|e} 460// The words parameter still has the formatters joined by '|' in the last word. 461// extractFormatters splits formatters, replaces the last word with the content 462// found before the first '|' within it, and returns the formatters obtained. 463// If no formatters are found in words, the default formatter is returned. 464func extractFormatters(words []string) (formatters []string) { 465 // "" is the default formatter. 466 formatters = []string{""} 467 if len(words) == 0 { 468 return 469 } 470 var bar int 471 lastWord := words[len(words)-1] 472 if isQuote(lastWord[0]) { 473 end := endQuote([]byte(lastWord), 0) 474 if end < 0 || end+1 == len(lastWord) || lastWord[end+1] != '|' { 475 return 476 } 477 bar = end + 1 478 } else { 479 bar = strings.IndexRune(lastWord, '|') 480 if bar < 0 { 481 return 482 } 483 } 484 words[len(words)-1] = lastWord[0:bar] 485 formatters = strings.Split(lastWord[bar+1:], "|") 486 return 487} 488 489// Grab the next item. If it's simple, just append it to the template. 490// Otherwise return its details. 491func (t *Template) parseSimple(item []byte) (done bool, tok int, w []string) { 492 tok, w = t.analyze(item) 493 done = true // assume for simplicity 494 switch tok { 495 case tokComment: 496 return 497 case tokText: 498 t.elems = append(t.elems, &textElement{item}) 499 return 500 case tokLiteral: 501 switch w[0] { 502 case ".meta-left": 503 t.elems = append(t.elems, &literalElement{t.ldelim}) 504 case ".meta-right": 505 t.elems = append(t.elems, &literalElement{t.rdelim}) 506 case ".space": 507 t.elems = append(t.elems, &literalElement{space}) 508 case ".tab": 509 t.elems = append(t.elems, &literalElement{tab}) 510 default: 511 t.parseError("internal error: unknown literal: %s", w[0]) 512 } 513 return 514 case tokVariable: 515 t.elems = append(t.elems, t.newVariable(w)) 516 return 517 } 518 return false, tok, w 519} 520 521// parseRepeated and parseSection are mutually recursive 522 523func (t *Template) parseRepeated(words []string) *repeatedElement { 524 r := new(repeatedElement) 525 t.elems = append(t.elems, r) 526 r.linenum = t.linenum 527 r.field = words[2] 528 // Scan section, collecting true and false (.or) blocks. 529 r.start = len(t.elems) 530 r.or = -1 531 r.altstart = -1 532 r.altend = -1 533Loop: 534 for { 535 item := t.nextItem() 536 if len(item) == 0 { 537 t.parseError("missing .end for .repeated section") 538 break 539 } 540 done, tok, w := t.parseSimple(item) 541 if done { 542 continue 543 } 544 switch tok { 545 case tokEnd: 546 break Loop 547 case tokOr: 548 if r.or >= 0 { 549 t.parseError("extra .or in .repeated section") 550 break Loop 551 } 552 r.altend = len(t.elems) 553 r.or = len(t.elems) 554 case tokSection: 555 t.parseSection(w) 556 case tokRepeated: 557 t.parseRepeated(w) 558 case tokAlternates: 559 if r.altstart >= 0 { 560 t.parseError("extra .alternates in .repeated section") 561 break Loop 562 } 563 if r.or >= 0 { 564 t.parseError(".alternates inside .or block in .repeated section") 565 break Loop 566 } 567 r.altstart = len(t.elems) 568 default: 569 t.parseError("internal error: unknown repeated section item: %s", item) 570 break Loop 571 } 572 } 573 if r.altend < 0 { 574 r.altend = len(t.elems) 575 } 576 r.end = len(t.elems) 577 return r 578} 579 580func (t *Template) parseSection(words []string) *sectionElement { 581 s := new(sectionElement) 582 t.elems = append(t.elems, s) 583 s.linenum = t.linenum 584 s.field = words[1] 585 // Scan section, collecting true and false (.or) blocks. 586 s.start = len(t.elems) 587 s.or = -1 588Loop: 589 for { 590 item := t.nextItem() 591 if len(item) == 0 { 592 t.parseError("missing .end for .section") 593 break 594 } 595 done, tok, w := t.parseSimple(item) 596 if done { 597 continue 598 } 599 switch tok { 600 case tokEnd: 601 break Loop 602 case tokOr: 603 if s.or >= 0 { 604 t.parseError("extra .or in .section") 605 break Loop 606 } 607 s.or = len(t.elems) 608 case tokSection: 609 t.parseSection(w) 610 case tokRepeated: 611 t.parseRepeated(w) 612 case tokAlternates: 613 t.parseError(".alternates not in .repeated") 614 default: 615 t.parseError("internal error: unknown section item: %s", item) 616 } 617 } 618 s.end = len(t.elems) 619 return s 620} 621 622func (t *Template) parse() { 623 for { 624 item := t.nextItem() 625 if len(item) == 0 { 626 break 627 } 628 done, tok, w := t.parseSimple(item) 629 if done { 630 continue 631 } 632 switch tok { 633 case tokOr, tokEnd, tokAlternates: 634 t.parseError("unexpected %s", w[0]) 635 case tokSection: 636 t.parseSection(w) 637 case tokRepeated: 638 t.parseRepeated(w) 639 default: 640 t.parseError("internal error: bad directive in parse: %s", item) 641 } 642 } 643} 644 645// -- Execution 646 647// -- Public interface 648 649// Parse initializes a Template by parsing its definition. The string 650// s contains the template text. If any errors occur, Parse returns 651// the error. 652func (t *Template) Parse(s string) (err error) { 653 if t.elems == nil { 654 return &Error{1, "template not allocated with New"} 655 } 656 if !validDelim(t.ldelim) || !validDelim(t.rdelim) { 657 return &Error{1, fmt.Sprintf("bad delimiter strings %q %q", t.ldelim, t.rdelim)} 658 } 659 defer checkError(&err) 660 t.buf = []byte(s) 661 t.p = 0 662 t.linenum = 1 663 t.parse() 664 return nil 665} 666 667// ParseFile is like Parse but reads the template definition from the 668// named file. 669func (t *Template) ParseFile(filename string) (err error) { 670 b, err := ioutil.ReadFile(filename) 671 if err != nil { 672 return err 673 } 674 return t.Parse(string(b)) 675} 676 677// Execute applies a parsed template to the specified data object, 678// generating output to wr. 679func (t *Template) Execute(wr io.Writer, data interface{}) (err error) { 680 // Extract the driver data. 681 val := reflect.ValueOf(data) 682 defer checkError(&err) 683 t.p = 0 684 t.execute(0, len(t.elems), &state{parent: nil, data: val, wr: wr}) 685 return nil 686} 687 688// SetDelims sets the left and right delimiters for operations in the 689// template. They are validated during parsing. They could be 690// validated here but it's better to keep the routine simple. The 691// delimiters are very rarely invalid and Parse has the necessary 692// error-handling interface already. 693func (t *Template) SetDelims(left, right string) { 694 t.ldelim = []byte(left) 695 t.rdelim = []byte(right) 696} 697 698// Parse creates a Template with default parameters (such as {} for 699// metacharacters). The string s contains the template text while 700// the formatter map fmap, which may be nil, defines auxiliary functions 701// for formatting variables. The template is returned. If any errors 702// occur, err will be non-nil. 703func Parse(s string, fmap FormatterMap) (t *Template, err error) { 704 t = New(fmap) 705 err = t.Parse(s) 706 if err != nil { 707 t = nil 708 } 709 return 710} 711 712// ParseFile is a wrapper function that creates a Template with default 713// parameters (such as {} for metacharacters). The filename identifies 714// a file containing the template text, while the formatter map fmap, which 715// may be nil, defines auxiliary functions for formatting variables. 716// The template is returned. If any errors occur, err will be non-nil. 717func ParseFile(filename string, fmap FormatterMap) (t *Template, err error) { 718 b, err := ioutil.ReadFile(filename) 719 if err != nil { 720 return nil, err 721 } 722 return Parse(string(b), fmap) 723} 724 725// MustParse is like Parse but panics if the template cannot be parsed. 726func MustParse(s string, fmap FormatterMap) *Template { 727 t, err := Parse(s, fmap) 728 if err != nil { 729 panic("template.MustParse error: " + err.Error()) 730 } 731 return t 732} 733 734// MustParseFile is like ParseFile but panics if the file cannot be read 735// or the template cannot be parsed. 736func MustParseFile(filename string, fmap FormatterMap) *Template { 737 b, err := ioutil.ReadFile(filename) 738 if err != nil { 739 panic("template.MustParseFile error: " + err.Error()) 740 } 741 return MustParse(string(b), fmap) 742} 743