1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Code to parse a template.
6
7package template
8
9import (
10	"fmt"
11	"io"
12	"io/ioutil"
13	"reflect"
14	"strconv"
15	"strings"
16	"unicode"
17	"unicode/utf8"
18)
19
20// Errors returned during parsing and execution.  Users may extract the information and reformat
21// if they desire.
22type Error struct {
23	Line int
24	Msg  string
25}
26
27func (e *Error) Error() string { return fmt.Sprintf("line %d: %s", e.Line, e.Msg) }
28
29// checkError is a deferred function to turn a panic with type *Error into a plain error return.
30// Other panics are unexpected and so are re-enabled.
31func checkError(error *error) {
32	if v := recover(); v != nil {
33		if e, ok := v.(*Error); ok {
34			*error = e
35		} else {
36			// runtime errors should crash
37			panic(v)
38		}
39	}
40}
41
42// Most of the literals are aces.
43var lbrace = []byte{'{'}
44var rbrace = []byte{'}'}
45var space = []byte{' '}
46var tab = []byte{'\t'}
47
48// The various types of "tokens", which are plain text or (usually) brace-delimited descriptors
49const (
50	tokAlternates = iota
51	tokComment
52	tokEnd
53	tokLiteral
54	tokOr
55	tokRepeated
56	tokSection
57	tokText
58	tokVariable
59)
60
61// FormatterMap is the type describing the mapping from formatter
62// names to the functions that implement them.
63type FormatterMap map[string]func(io.Writer, string, ...interface{})
64
65// Built-in formatters.
66var builtins = FormatterMap{
67	"html": HTMLFormatter,
68	"str":  StringFormatter,
69	"":     StringFormatter,
70}
71
72// The parsed state of a template is a vector of xxxElement structs.
73// Sections have line numbers so errors can be reported better during execution.
74
75// Plain text.
76type textElement struct {
77	text []byte
78}
79
80// A literal such as .meta-left or .meta-right
81type literalElement struct {
82	text []byte
83}
84
85// A variable invocation to be evaluated
86type variableElement struct {
87	linenum int
88	args    []interface{} // The fields and literals in the invocation.
89	fmts    []string      // Names of formatters to apply. len(fmts) > 0
90}
91
92// A variableElement arg to be evaluated as a field name
93type fieldName string
94
95// A .section block, possibly with a .or
96type sectionElement struct {
97	linenum int    // of .section itself
98	field   string // cursor field for this block
99	start   int    // first element
100	or      int    // first element of .or block
101	end     int    // one beyond last element
102}
103
104// A .repeated block, possibly with a .or and a .alternates
105type repeatedElement struct {
106	sectionElement     // It has the same structure...
107	altstart       int // ... except for alternates
108	altend         int
109}
110
111// Template is the type that represents a template definition.
112// It is unchanged after parsing.
113type Template struct {
114	fmap FormatterMap // formatters for variables
115	// Used during parsing:
116	ldelim, rdelim []byte // delimiters; default {}
117	buf            []byte // input text to process
118	p              int    // position in buf
119	linenum        int    // position in input
120	// Parsed results:
121	elems []interface{}
122}
123
124// New creates a new template with the specified formatter map (which
125// may be nil) to define auxiliary functions for formatting variables.
126func New(fmap FormatterMap) *Template {
127	t := new(Template)
128	t.fmap = fmap
129	t.ldelim = lbrace
130	t.rdelim = rbrace
131	t.elems = make([]interface{}, 0, 16)
132	return t
133}
134
135// Report error and stop executing.  The line number must be provided explicitly.
136func (t *Template) execError(st *state, line int, err string, args ...interface{}) {
137	panic(&Error{line, fmt.Sprintf(err, args...)})
138}
139
140// Report error, panic to terminate parsing.
141// The line number comes from the template state.
142func (t *Template) parseError(err string, args ...interface{}) {
143	panic(&Error{t.linenum, fmt.Sprintf(err, args...)})
144}
145
146// Is this an exported - upper case - name?
147func isExported(name string) bool {
148	r, _ := utf8.DecodeRuneInString(name)
149	return unicode.IsUpper(r)
150}
151
152// -- Lexical analysis
153
154// Is c a space character?
155func isSpace(c uint8) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' }
156
157// Safely, does s[n:n+len(t)] == t?
158func equal(s []byte, n int, t []byte) bool {
159	b := s[n:]
160	if len(t) > len(b) { // not enough space left for a match.
161		return false
162	}
163	for i, c := range t {
164		if c != b[i] {
165			return false
166		}
167	}
168	return true
169}
170
171// isQuote returns true if c is a string- or character-delimiting quote character.
172func isQuote(c byte) bool {
173	return c == '"' || c == '`' || c == '\''
174}
175
176// endQuote returns the end quote index for the quoted string that
177// starts at n, or -1 if no matching end quote is found before the end
178// of the line.
179func endQuote(s []byte, n int) int {
180	quote := s[n]
181	for n++; n < len(s); n++ {
182		switch s[n] {
183		case '\\':
184			if quote == '"' || quote == '\'' {
185				n++
186			}
187		case '\n':
188			return -1
189		case quote:
190			return n
191		}
192	}
193	return -1
194}
195
196// nextItem returns the next item from the input buffer.  If the returned
197// item is empty, we are at EOF.  The item will be either a
198// delimited string or a non-empty string between delimited
199// strings. Tokens stop at (but include, if plain text) a newline.
200// Action tokens on a line by themselves drop any space on
201// either side, up to and including the newline.
202func (t *Template) nextItem() []byte {
203	startOfLine := t.p == 0 || t.buf[t.p-1] == '\n'
204	start := t.p
205	var i int
206	newline := func() {
207		t.linenum++
208		i++
209	}
210	// Leading space up to but not including newline
211	for i = start; i < len(t.buf); i++ {
212		if t.buf[i] == '\n' || !isSpace(t.buf[i]) {
213			break
214		}
215	}
216	leadingSpace := i > start
217	// What's left is nothing, newline, delimited string, or plain text
218	switch {
219	case i == len(t.buf):
220		// EOF; nothing to do
221	case t.buf[i] == '\n':
222		newline()
223	case equal(t.buf, i, t.ldelim):
224		left := i         // Start of left delimiter.
225		right := -1       // Will be (immediately after) right delimiter.
226		haveText := false // Delimiters contain text.
227		i += len(t.ldelim)
228		// Find the end of the action.
229		for ; i < len(t.buf); i++ {
230			if t.buf[i] == '\n' {
231				break
232			}
233			if isQuote(t.buf[i]) {
234				i = endQuote(t.buf, i)
235				if i == -1 {
236					t.parseError("unmatched quote")
237					return nil
238				}
239				continue
240			}
241			if equal(t.buf, i, t.rdelim) {
242				i += len(t.rdelim)
243				right = i
244				break
245			}
246			haveText = true
247		}
248		if right < 0 {
249			t.parseError("unmatched opening delimiter")
250			return nil
251		}
252		// Is this a special action (starts with '.' or '#') and the only thing on the line?
253		if startOfLine && haveText {
254			firstChar := t.buf[left+len(t.ldelim)]
255			if firstChar == '.' || firstChar == '#' {
256				// It's special and the first thing on the line. Is it the last?
257				for j := right; j < len(t.buf) && isSpace(t.buf[j]); j++ {
258					if t.buf[j] == '\n' {
259						// Yes it is. Drop the surrounding space and return the {.foo}
260						t.linenum++
261						t.p = j + 1
262						return t.buf[left:right]
263					}
264				}
265			}
266		}
267		// No it's not. If there's leading space, return that.
268		if leadingSpace {
269			// not trimming space: return leading space if there is some.
270			t.p = left
271			return t.buf[start:left]
272		}
273		// Return the word, leave the trailing space.
274		start = left
275		break
276	default:
277		for ; i < len(t.buf); i++ {
278			if t.buf[i] == '\n' {
279				newline()
280				break
281			}
282			if equal(t.buf, i, t.ldelim) {
283				break
284			}
285		}
286	}
287	item := t.buf[start:i]
288	t.p = i
289	return item
290}
291
292// Turn a byte array into a space-split array of strings,
293// taking into account quoted strings.
294func words(buf []byte) []string {
295	s := make([]string, 0, 5)
296	for i := 0; i < len(buf); {
297		// One word per loop
298		for i < len(buf) && isSpace(buf[i]) {
299			i++
300		}
301		if i == len(buf) {
302			break
303		}
304		// Got a word
305		start := i
306		if isQuote(buf[i]) {
307			i = endQuote(buf, i)
308			if i < 0 {
309				i = len(buf)
310			} else {
311				i++
312			}
313		}
314		// Even with quotes, break on space only.  This handles input
315		// such as {""|} and catches quoting mistakes.
316		for i < len(buf) && !isSpace(buf[i]) {
317			i++
318		}
319		s = append(s, string(buf[start:i]))
320	}
321	return s
322}
323
324// Analyze an item and return its token type and, if it's an action item, an array of
325// its constituent words.
326func (t *Template) analyze(item []byte) (tok int, w []string) {
327	// item is known to be non-empty
328	if !equal(item, 0, t.ldelim) { // doesn't start with left delimiter
329		tok = tokText
330		return
331	}
332	if !equal(item, len(item)-len(t.rdelim), t.rdelim) { // doesn't end with right delimiter
333		t.parseError("internal error: unmatched opening delimiter") // lexing should prevent this
334		return
335	}
336	if len(item) <= len(t.ldelim)+len(t.rdelim) { // no contents
337		t.parseError("empty directive")
338		return
339	}
340	// Comment
341	if item[len(t.ldelim)] == '#' {
342		tok = tokComment
343		return
344	}
345	// Split into words
346	w = words(item[len(t.ldelim) : len(item)-len(t.rdelim)]) // drop final delimiter
347	if len(w) == 0 {
348		t.parseError("empty directive")
349		return
350	}
351	first := w[0]
352	if first[0] != '.' {
353		tok = tokVariable
354		return
355	}
356	if len(first) > 1 && first[1] >= '0' && first[1] <= '9' {
357		// Must be a float.
358		tok = tokVariable
359		return
360	}
361	switch first {
362	case ".meta-left", ".meta-right", ".space", ".tab":
363		tok = tokLiteral
364		return
365	case ".or":
366		tok = tokOr
367		return
368	case ".end":
369		tok = tokEnd
370		return
371	case ".section":
372		if len(w) != 2 {
373			t.parseError("incorrect fields for .section: %s", item)
374			return
375		}
376		tok = tokSection
377		return
378	case ".repeated":
379		if len(w) != 3 || w[1] != "section" {
380			t.parseError("incorrect fields for .repeated: %s", item)
381			return
382		}
383		tok = tokRepeated
384		return
385	case ".alternates":
386		if len(w) != 2 || w[1] != "with" {
387			t.parseError("incorrect fields for .alternates: %s", item)
388			return
389		}
390		tok = tokAlternates
391		return
392	}
393	t.parseError("bad directive: %s", item)
394	return
395}
396
397// formatter returns the Formatter with the given name in the Template, or nil if none exists.
398func (t *Template) formatter(name string) func(io.Writer, string, ...interface{}) {
399	if t.fmap != nil {
400		if fn := t.fmap[name]; fn != nil {
401			return fn
402		}
403	}
404	return builtins[name]
405}
406
407// -- Parsing
408
409// newVariable allocates a new variable-evaluation element.
410func (t *Template) newVariable(words []string) *variableElement {
411	formatters := extractFormatters(words)
412	args := make([]interface{}, len(words))
413
414	// Build argument list, processing any literals
415	for i, word := range words {
416		var lerr error
417		switch word[0] {
418		case '"', '`', '\'':
419			v, err := strconv.Unquote(word)
420			if err == nil && word[0] == '\'' {
421				args[i], _ = utf8.DecodeRuneInString(v)
422			} else {
423				args[i], lerr = v, err
424			}
425
426		case '.', '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
427			v, err := strconv.ParseInt(word, 0, 64)
428			if err == nil {
429				args[i] = v
430			} else {
431				v, err := strconv.ParseFloat(word, 64)
432				args[i], lerr = v, err
433			}
434
435		default:
436			args[i] = fieldName(word)
437		}
438		if lerr != nil {
439			t.parseError("invalid literal: %q: %s", word, lerr)
440		}
441	}
442
443	// We could remember the function address here and avoid the lookup later,
444	// but it's more dynamic to let the user change the map contents underfoot.
445	// We do require the name to be present, though.
446
447	// Is it in user-supplied map?
448	for _, f := range formatters {
449		if t.formatter(f) == nil {
450			t.parseError("unknown formatter: %q", f)
451		}
452	}
453
454	return &variableElement{t.linenum, args, formatters}
455}
456
457// extractFormatters extracts a list of formatters from words.
458// After the final space-separated argument in a variable, formatters may be
459// specified separated by pipe symbols. For example: {a b c|d|e}
460// The words parameter still has the formatters joined by '|' in the last word.
461// extractFormatters splits formatters, replaces the last word with the content
462// found before the first '|' within it, and returns the formatters obtained.
463// If no formatters are found in words, the default formatter is returned.
464func extractFormatters(words []string) (formatters []string) {
465	// "" is the default formatter.
466	formatters = []string{""}
467	if len(words) == 0 {
468		return
469	}
470	var bar int
471	lastWord := words[len(words)-1]
472	if isQuote(lastWord[0]) {
473		end := endQuote([]byte(lastWord), 0)
474		if end < 0 || end+1 == len(lastWord) || lastWord[end+1] != '|' {
475			return
476		}
477		bar = end + 1
478	} else {
479		bar = strings.IndexRune(lastWord, '|')
480		if bar < 0 {
481			return
482		}
483	}
484	words[len(words)-1] = lastWord[0:bar]
485	formatters = strings.Split(lastWord[bar+1:], "|")
486	return
487}
488
489// Grab the next item.  If it's simple, just append it to the template.
490// Otherwise return its details.
491func (t *Template) parseSimple(item []byte) (done bool, tok int, w []string) {
492	tok, w = t.analyze(item)
493	done = true // assume for simplicity
494	switch tok {
495	case tokComment:
496		return
497	case tokText:
498		t.elems = append(t.elems, &textElement{item})
499		return
500	case tokLiteral:
501		switch w[0] {
502		case ".meta-left":
503			t.elems = append(t.elems, &literalElement{t.ldelim})
504		case ".meta-right":
505			t.elems = append(t.elems, &literalElement{t.rdelim})
506		case ".space":
507			t.elems = append(t.elems, &literalElement{space})
508		case ".tab":
509			t.elems = append(t.elems, &literalElement{tab})
510		default:
511			t.parseError("internal error: unknown literal: %s", w[0])
512		}
513		return
514	case tokVariable:
515		t.elems = append(t.elems, t.newVariable(w))
516		return
517	}
518	return false, tok, w
519}
520
521// parseRepeated and parseSection are mutually recursive
522
523func (t *Template) parseRepeated(words []string) *repeatedElement {
524	r := new(repeatedElement)
525	t.elems = append(t.elems, r)
526	r.linenum = t.linenum
527	r.field = words[2]
528	// Scan section, collecting true and false (.or) blocks.
529	r.start = len(t.elems)
530	r.or = -1
531	r.altstart = -1
532	r.altend = -1
533Loop:
534	for {
535		item := t.nextItem()
536		if len(item) == 0 {
537			t.parseError("missing .end for .repeated section")
538			break
539		}
540		done, tok, w := t.parseSimple(item)
541		if done {
542			continue
543		}
544		switch tok {
545		case tokEnd:
546			break Loop
547		case tokOr:
548			if r.or >= 0 {
549				t.parseError("extra .or in .repeated section")
550				break Loop
551			}
552			r.altend = len(t.elems)
553			r.or = len(t.elems)
554		case tokSection:
555			t.parseSection(w)
556		case tokRepeated:
557			t.parseRepeated(w)
558		case tokAlternates:
559			if r.altstart >= 0 {
560				t.parseError("extra .alternates in .repeated section")
561				break Loop
562			}
563			if r.or >= 0 {
564				t.parseError(".alternates inside .or block in .repeated section")
565				break Loop
566			}
567			r.altstart = len(t.elems)
568		default:
569			t.parseError("internal error: unknown repeated section item: %s", item)
570			break Loop
571		}
572	}
573	if r.altend < 0 {
574		r.altend = len(t.elems)
575	}
576	r.end = len(t.elems)
577	return r
578}
579
580func (t *Template) parseSection(words []string) *sectionElement {
581	s := new(sectionElement)
582	t.elems = append(t.elems, s)
583	s.linenum = t.linenum
584	s.field = words[1]
585	// Scan section, collecting true and false (.or) blocks.
586	s.start = len(t.elems)
587	s.or = -1
588Loop:
589	for {
590		item := t.nextItem()
591		if len(item) == 0 {
592			t.parseError("missing .end for .section")
593			break
594		}
595		done, tok, w := t.parseSimple(item)
596		if done {
597			continue
598		}
599		switch tok {
600		case tokEnd:
601			break Loop
602		case tokOr:
603			if s.or >= 0 {
604				t.parseError("extra .or in .section")
605				break Loop
606			}
607			s.or = len(t.elems)
608		case tokSection:
609			t.parseSection(w)
610		case tokRepeated:
611			t.parseRepeated(w)
612		case tokAlternates:
613			t.parseError(".alternates not in .repeated")
614		default:
615			t.parseError("internal error: unknown section item: %s", item)
616		}
617	}
618	s.end = len(t.elems)
619	return s
620}
621
622func (t *Template) parse() {
623	for {
624		item := t.nextItem()
625		if len(item) == 0 {
626			break
627		}
628		done, tok, w := t.parseSimple(item)
629		if done {
630			continue
631		}
632		switch tok {
633		case tokOr, tokEnd, tokAlternates:
634			t.parseError("unexpected %s", w[0])
635		case tokSection:
636			t.parseSection(w)
637		case tokRepeated:
638			t.parseRepeated(w)
639		default:
640			t.parseError("internal error: bad directive in parse: %s", item)
641		}
642	}
643}
644
645// -- Execution
646
647// -- Public interface
648
649// Parse initializes a Template by parsing its definition.  The string
650// s contains the template text.  If any errors occur, Parse returns
651// the error.
652func (t *Template) Parse(s string) (err error) {
653	if t.elems == nil {
654		return &Error{1, "template not allocated with New"}
655	}
656	if !validDelim(t.ldelim) || !validDelim(t.rdelim) {
657		return &Error{1, fmt.Sprintf("bad delimiter strings %q %q", t.ldelim, t.rdelim)}
658	}
659	defer checkError(&err)
660	t.buf = []byte(s)
661	t.p = 0
662	t.linenum = 1
663	t.parse()
664	return nil
665}
666
667// ParseFile is like Parse but reads the template definition from the
668// named file.
669func (t *Template) ParseFile(filename string) (err error) {
670	b, err := ioutil.ReadFile(filename)
671	if err != nil {
672		return err
673	}
674	return t.Parse(string(b))
675}
676
677// Execute applies a parsed template to the specified data object,
678// generating output to wr.
679func (t *Template) Execute(wr io.Writer, data interface{}) (err error) {
680	// Extract the driver data.
681	val := reflect.ValueOf(data)
682	defer checkError(&err)
683	t.p = 0
684	t.execute(0, len(t.elems), &state{parent: nil, data: val, wr: wr})
685	return nil
686}
687
688// SetDelims sets the left and right delimiters for operations in the
689// template.  They are validated during parsing.  They could be
690// validated here but it's better to keep the routine simple.  The
691// delimiters are very rarely invalid and Parse has the necessary
692// error-handling interface already.
693func (t *Template) SetDelims(left, right string) {
694	t.ldelim = []byte(left)
695	t.rdelim = []byte(right)
696}
697
698// Parse creates a Template with default parameters (such as {} for
699// metacharacters).  The string s contains the template text while
700// the formatter map fmap, which may be nil, defines auxiliary functions
701// for formatting variables.  The template is returned. If any errors
702// occur, err will be non-nil.
703func Parse(s string, fmap FormatterMap) (t *Template, err error) {
704	t = New(fmap)
705	err = t.Parse(s)
706	if err != nil {
707		t = nil
708	}
709	return
710}
711
712// ParseFile is a wrapper function that creates a Template with default
713// parameters (such as {} for metacharacters).  The filename identifies
714// a file containing the template text, while the formatter map fmap, which
715// may be nil, defines auxiliary functions for formatting variables.
716// The template is returned. If any errors occur, err will be non-nil.
717func ParseFile(filename string, fmap FormatterMap) (t *Template, err error) {
718	b, err := ioutil.ReadFile(filename)
719	if err != nil {
720		return nil, err
721	}
722	return Parse(string(b), fmap)
723}
724
725// MustParse is like Parse but panics if the template cannot be parsed.
726func MustParse(s string, fmap FormatterMap) *Template {
727	t, err := Parse(s, fmap)
728	if err != nil {
729		panic("template.MustParse error: " + err.Error())
730	}
731	return t
732}
733
734// MustParseFile is like ParseFile but panics if the file cannot be read
735// or the template cannot be parsed.
736func MustParseFile(filename string, fmap FormatterMap) *Template {
737	b, err := ioutil.ReadFile(filename)
738	if err != nil {
739		panic("template.MustParseFile error: " + err.Error())
740	}
741	return MustParse(string(b), fmap)
742}
743