1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"fmt"
9)
10
11// context describes the state an HTML parser must be in when it reaches the
12// portion of HTML produced by evaluating a particular template node.
13//
14// The zero value of type context is the start context for a template that
15// produces an HTML fragment as defined at
16// https://www.w3.org/TR/html5/syntax.html#the-end
17// where the context element is null.
18type context struct {
19	state   state
20	delim   delim
21	urlPart urlPart
22	jsCtx   jsCtx
23	attr    attr
24	element element
25	err     *Error
26}
27
28func (c context) String() string {
29	return fmt.Sprintf("{%v %v %v %v %v %v %v}", c.state, c.delim, c.urlPart, c.jsCtx, c.attr, c.element, c.err)
30}
31
32// eq reports whether two contexts are equal.
33func (c context) eq(d context) bool {
34	return c.state == d.state &&
35		c.delim == d.delim &&
36		c.urlPart == d.urlPart &&
37		c.jsCtx == d.jsCtx &&
38		c.attr == d.attr &&
39		c.element == d.element &&
40		c.err == d.err
41}
42
43// mangle produces an identifier that includes a suffix that distinguishes it
44// from template names mangled with different contexts.
45func (c context) mangle(templateName string) string {
46	// The mangled name for the default context is the input templateName.
47	if c.state == stateText {
48		return templateName
49	}
50	s := templateName + "$htmltemplate_" + c.state.String()
51	if c.delim != delimNone {
52		s += "_" + c.delim.String()
53	}
54	if c.urlPart != urlPartNone {
55		s += "_" + c.urlPart.String()
56	}
57	if c.jsCtx != jsCtxRegexp {
58		s += "_" + c.jsCtx.String()
59	}
60	if c.attr != attrNone {
61		s += "_" + c.attr.String()
62	}
63	if c.element != elementNone {
64		s += "_" + c.element.String()
65	}
66	return s
67}
68
69// state describes a high-level HTML parser state.
70//
71// It bounds the top of the element stack, and by extension the HTML insertion
72// mode, but also contains state that does not correspond to anything in the
73// HTML5 parsing algorithm because a single token production in the HTML
74// grammar may contain embedded actions in a template. For instance, the quoted
75// HTML attribute produced by
76//     <div title="Hello {{.World}}">
77// is a single token in HTML's grammar but in a template spans several nodes.
78type state uint8
79
80//go:generate stringer -type state
81
82const (
83	// stateText is parsed character data. An HTML parser is in
84	// this state when its parse position is outside an HTML tag,
85	// directive, comment, and special element body.
86	stateText state = iota
87	// stateTag occurs before an HTML attribute or the end of a tag.
88	stateTag
89	// stateAttrName occurs inside an attribute name.
90	// It occurs between the ^'s in ` ^name^ = value`.
91	stateAttrName
92	// stateAfterName occurs after an attr name has ended but before any
93	// equals sign. It occurs between the ^'s in ` name^ ^= value`.
94	stateAfterName
95	// stateBeforeValue occurs after the equals sign but before the value.
96	// It occurs between the ^'s in ` name =^ ^value`.
97	stateBeforeValue
98	// stateHTMLCmt occurs inside an <!-- HTML comment -->.
99	stateHTMLCmt
100	// stateRCDATA occurs inside an RCDATA element (<textarea> or <title>)
101	// as described at https://www.w3.org/TR/html5/syntax.html#elements-0
102	stateRCDATA
103	// stateAttr occurs inside an HTML attribute whose content is text.
104	stateAttr
105	// stateURL occurs inside an HTML attribute whose content is a URL.
106	stateURL
107	// stateSrcset occurs inside an HTML srcset attribute.
108	stateSrcset
109	// stateJS occurs inside an event handler or script element.
110	stateJS
111	// stateJSDqStr occurs inside a JavaScript double quoted string.
112	stateJSDqStr
113	// stateJSSqStr occurs inside a JavaScript single quoted string.
114	stateJSSqStr
115	// stateJSRegexp occurs inside a JavaScript regexp literal.
116	stateJSRegexp
117	// stateJSBlockCmt occurs inside a JavaScript /* block comment */.
118	stateJSBlockCmt
119	// stateJSLineCmt occurs inside a JavaScript // line comment.
120	stateJSLineCmt
121	// stateCSS occurs inside a <style> element or style attribute.
122	stateCSS
123	// stateCSSDqStr occurs inside a CSS double quoted string.
124	stateCSSDqStr
125	// stateCSSSqStr occurs inside a CSS single quoted string.
126	stateCSSSqStr
127	// stateCSSDqURL occurs inside a CSS double quoted url("...").
128	stateCSSDqURL
129	// stateCSSSqURL occurs inside a CSS single quoted url('...').
130	stateCSSSqURL
131	// stateCSSURL occurs inside a CSS unquoted url(...).
132	stateCSSURL
133	// stateCSSBlockCmt occurs inside a CSS /* block comment */.
134	stateCSSBlockCmt
135	// stateCSSLineCmt occurs inside a CSS // line comment.
136	stateCSSLineCmt
137	// stateError is an infectious error state outside any valid
138	// HTML/CSS/JS construct.
139	stateError
140)
141
142// isComment is true for any state that contains content meant for template
143// authors & maintainers, not for end-users or machines.
144func isComment(s state) bool {
145	switch s {
146	case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt:
147		return true
148	}
149	return false
150}
151
152// isInTag return whether s occurs solely inside an HTML tag.
153func isInTag(s state) bool {
154	switch s {
155	case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr:
156		return true
157	}
158	return false
159}
160
161// delim is the delimiter that will end the current HTML attribute.
162type delim uint8
163
164//go:generate stringer -type delim
165
166const (
167	// delimNone occurs outside any attribute.
168	delimNone delim = iota
169	// delimDoubleQuote occurs when a double quote (") closes the attribute.
170	delimDoubleQuote
171	// delimSingleQuote occurs when a single quote (') closes the attribute.
172	delimSingleQuote
173	// delimSpaceOrTagEnd occurs when a space or right angle bracket (>)
174	// closes the attribute.
175	delimSpaceOrTagEnd
176)
177
178// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
179// encoding strategies.
180type urlPart uint8
181
182//go:generate stringer -type urlPart
183
184const (
185	// urlPartNone occurs when not in a URL, or possibly at the start:
186	// ^ in "^http://auth/path?k=v#frag".
187	urlPartNone urlPart = iota
188	// urlPartPreQuery occurs in the scheme, authority, or path; between the
189	// ^s in "h^ttp://auth/path^?k=v#frag".
190	urlPartPreQuery
191	// urlPartQueryOrFrag occurs in the query portion between the ^s in
192	// "http://auth/path?^k=v#frag^".
193	urlPartQueryOrFrag
194	// urlPartUnknown occurs due to joining of contexts both before and
195	// after the query separator.
196	urlPartUnknown
197)
198
199// jsCtx determines whether a '/' starts a regular expression literal or a
200// division operator.
201type jsCtx uint8
202
203//go:generate stringer -type jsCtx
204
205const (
206	// jsCtxRegexp occurs where a '/' would start a regexp literal.
207	jsCtxRegexp jsCtx = iota
208	// jsCtxDivOp occurs where a '/' would start a division operator.
209	jsCtxDivOp
210	// jsCtxUnknown occurs where a '/' is ambiguous due to context joining.
211	jsCtxUnknown
212)
213
214// element identifies the HTML element when inside a start tag or special body.
215// Certain HTML element (for example <script> and <style>) have bodies that are
216// treated differently from stateText so the element type is necessary to
217// transition into the correct context at the end of a tag and to identify the
218// end delimiter for the body.
219type element uint8
220
221//go:generate stringer -type element
222
223const (
224	// elementNone occurs outside a special tag or special element body.
225	elementNone element = iota
226	// elementScript corresponds to the raw text <script> element
227	// with JS MIME type or no type attribute.
228	elementScript
229	// elementStyle corresponds to the raw text <style> element.
230	elementStyle
231	// elementTextarea corresponds to the RCDATA <textarea> element.
232	elementTextarea
233	// elementTitle corresponds to the RCDATA <title> element.
234	elementTitle
235)
236
237//go:generate stringer -type attr
238
239// attr identifies the current HTML attribute when inside the attribute,
240// that is, starting from stateAttrName until stateTag/stateText (exclusive).
241type attr uint8
242
243const (
244	// attrNone corresponds to a normal attribute or no attribute.
245	attrNone attr = iota
246	// attrScript corresponds to an event handler attribute.
247	attrScript
248	// attrScriptType corresponds to the type attribute in script HTML element
249	attrScriptType
250	// attrStyle corresponds to the style attribute whose value is CSS.
251	attrStyle
252	// attrURL corresponds to an attribute whose value is a URL.
253	attrURL
254	// attrSrcset corresponds to a srcset attribute.
255	attrSrcset
256)
257