1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package template 6 7import ( 8 "fmt" 9) 10 11// context describes the state an HTML parser must be in when it reaches the 12// portion of HTML produced by evaluating a particular template node. 13// 14// The zero value of type context is the start context for a template that 15// produces an HTML fragment as defined at 16// http://www.w3.org/TR/html5/syntax.html#the-end 17// where the context element is null. 18type context struct { 19 state state 20 delim delim 21 urlPart urlPart 22 jsCtx jsCtx 23 attr attr 24 element element 25 err *Error 26} 27 28func (c context) String() string { 29 return fmt.Sprintf("{%v %v %v %v %v %v %v}", c.state, c.delim, c.urlPart, c.jsCtx, c.attr, c.element, c.err) 30} 31 32// eq reports whether two contexts are equal. 33func (c context) eq(d context) bool { 34 return c.state == d.state && 35 c.delim == d.delim && 36 c.urlPart == d.urlPart && 37 c.jsCtx == d.jsCtx && 38 c.attr == d.attr && 39 c.element == d.element && 40 c.err == d.err 41} 42 43// mangle produces an identifier that includes a suffix that distinguishes it 44// from template names mangled with different contexts. 45func (c context) mangle(templateName string) string { 46 // The mangled name for the default context is the input templateName. 47 if c.state == stateText { 48 return templateName 49 } 50 s := templateName + "$htmltemplate_" + c.state.String() 51 if c.delim != 0 { 52 s += "_" + c.delim.String() 53 } 54 if c.urlPart != 0 { 55 s += "_" + c.urlPart.String() 56 } 57 if c.jsCtx != 0 { 58 s += "_" + c.jsCtx.String() 59 } 60 if c.attr != 0 { 61 s += "_" + c.attr.String() 62 } 63 if c.element != 0 { 64 s += "_" + c.element.String() 65 } 66 return s 67} 68 69// state describes a high-level HTML parser state. 70// 71// It bounds the top of the element stack, and by extension the HTML insertion 72// mode, but also contains state that does not correspond to anything in the 73// HTML5 parsing algorithm because a single token production in the HTML 74// grammar may contain embedded actions in a template. For instance, the quoted 75// HTML attribute produced by 76// <div title="Hello {{.World}}"> 77// is a single token in HTML's grammar but in a template spans several nodes. 78type state uint8 79 80const ( 81 // stateText is parsed character data. An HTML parser is in 82 // this state when its parse position is outside an HTML tag, 83 // directive, comment, and special element body. 84 stateText state = iota 85 // stateTag occurs before an HTML attribute or the end of a tag. 86 stateTag 87 // stateAttrName occurs inside an attribute name. 88 // It occurs between the ^'s in ` ^name^ = value`. 89 stateAttrName 90 // stateAfterName occurs after an attr name has ended but before any 91 // equals sign. It occurs between the ^'s in ` name^ ^= value`. 92 stateAfterName 93 // stateBeforeValue occurs after the equals sign but before the value. 94 // It occurs between the ^'s in ` name =^ ^value`. 95 stateBeforeValue 96 // stateHTMLCmt occurs inside an <!-- HTML comment -->. 97 stateHTMLCmt 98 // stateRCDATA occurs inside an RCDATA element (<textarea> or <title>) 99 // as described at http://www.w3.org/TR/html5/syntax.html#elements-0 100 stateRCDATA 101 // stateAttr occurs inside an HTML attribute whose content is text. 102 stateAttr 103 // stateURL occurs inside an HTML attribute whose content is a URL. 104 stateURL 105 // stateSrcset occurs inside an HTML srcset attribute. 106 stateSrcset 107 // stateJS occurs inside an event handler or script element. 108 stateJS 109 // stateJSDqStr occurs inside a JavaScript double quoted string. 110 stateJSDqStr 111 // stateJSSqStr occurs inside a JavaScript single quoted string. 112 stateJSSqStr 113 // stateJSRegexp occurs inside a JavaScript regexp literal. 114 stateJSRegexp 115 // stateJSBlockCmt occurs inside a JavaScript /* block comment */. 116 stateJSBlockCmt 117 // stateJSLineCmt occurs inside a JavaScript // line comment. 118 stateJSLineCmt 119 // stateCSS occurs inside a <style> element or style attribute. 120 stateCSS 121 // stateCSSDqStr occurs inside a CSS double quoted string. 122 stateCSSDqStr 123 // stateCSSSqStr occurs inside a CSS single quoted string. 124 stateCSSSqStr 125 // stateCSSDqURL occurs inside a CSS double quoted url("..."). 126 stateCSSDqURL 127 // stateCSSSqURL occurs inside a CSS single quoted url('...'). 128 stateCSSSqURL 129 // stateCSSURL occurs inside a CSS unquoted url(...). 130 stateCSSURL 131 // stateCSSBlockCmt occurs inside a CSS /* block comment */. 132 stateCSSBlockCmt 133 // stateCSSLineCmt occurs inside a CSS // line comment. 134 stateCSSLineCmt 135 // stateError is an infectious error state outside any valid 136 // HTML/CSS/JS construct. 137 stateError 138) 139 140var stateNames = [...]string{ 141 stateText: "stateText", 142 stateTag: "stateTag", 143 stateAttrName: "stateAttrName", 144 stateAfterName: "stateAfterName", 145 stateBeforeValue: "stateBeforeValue", 146 stateHTMLCmt: "stateHTMLCmt", 147 stateRCDATA: "stateRCDATA", 148 stateAttr: "stateAttr", 149 stateURL: "stateURL", 150 stateSrcset: "stateSrcset", 151 stateJS: "stateJS", 152 stateJSDqStr: "stateJSDqStr", 153 stateJSSqStr: "stateJSSqStr", 154 stateJSRegexp: "stateJSRegexp", 155 stateJSBlockCmt: "stateJSBlockCmt", 156 stateJSLineCmt: "stateJSLineCmt", 157 stateCSS: "stateCSS", 158 stateCSSDqStr: "stateCSSDqStr", 159 stateCSSSqStr: "stateCSSSqStr", 160 stateCSSDqURL: "stateCSSDqURL", 161 stateCSSSqURL: "stateCSSSqURL", 162 stateCSSURL: "stateCSSURL", 163 stateCSSBlockCmt: "stateCSSBlockCmt", 164 stateCSSLineCmt: "stateCSSLineCmt", 165 stateError: "stateError", 166} 167 168func (s state) String() string { 169 if int(s) < len(stateNames) { 170 return stateNames[s] 171 } 172 return fmt.Sprintf("illegal state %d", int(s)) 173} 174 175// isComment is true for any state that contains content meant for template 176// authors & maintainers, not for end-users or machines. 177func isComment(s state) bool { 178 switch s { 179 case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt: 180 return true 181 } 182 return false 183} 184 185// isInTag return whether s occurs solely inside an HTML tag. 186func isInTag(s state) bool { 187 switch s { 188 case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr: 189 return true 190 } 191 return false 192} 193 194// delim is the delimiter that will end the current HTML attribute. 195type delim uint8 196 197const ( 198 // delimNone occurs outside any attribute. 199 delimNone delim = iota 200 // delimDoubleQuote occurs when a double quote (") closes the attribute. 201 delimDoubleQuote 202 // delimSingleQuote occurs when a single quote (') closes the attribute. 203 delimSingleQuote 204 // delimSpaceOrTagEnd occurs when a space or right angle bracket (>) 205 // closes the attribute. 206 delimSpaceOrTagEnd 207) 208 209var delimNames = [...]string{ 210 delimNone: "delimNone", 211 delimDoubleQuote: "delimDoubleQuote", 212 delimSingleQuote: "delimSingleQuote", 213 delimSpaceOrTagEnd: "delimSpaceOrTagEnd", 214} 215 216func (d delim) String() string { 217 if int(d) < len(delimNames) { 218 return delimNames[d] 219 } 220 return fmt.Sprintf("illegal delim %d", int(d)) 221} 222 223// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different 224// encoding strategies. 225type urlPart uint8 226 227const ( 228 // urlPartNone occurs when not in a URL, or possibly at the start: 229 // ^ in "^http://auth/path?k=v#frag". 230 urlPartNone urlPart = iota 231 // urlPartPreQuery occurs in the scheme, authority, or path; between the 232 // ^s in "h^ttp://auth/path^?k=v#frag". 233 urlPartPreQuery 234 // urlPartQueryOrFrag occurs in the query portion between the ^s in 235 // "http://auth/path?^k=v#frag^". 236 urlPartQueryOrFrag 237 // urlPartUnknown occurs due to joining of contexts both before and 238 // after the query separator. 239 urlPartUnknown 240) 241 242var urlPartNames = [...]string{ 243 urlPartNone: "urlPartNone", 244 urlPartPreQuery: "urlPartPreQuery", 245 urlPartQueryOrFrag: "urlPartQueryOrFrag", 246 urlPartUnknown: "urlPartUnknown", 247} 248 249func (u urlPart) String() string { 250 if int(u) < len(urlPartNames) { 251 return urlPartNames[u] 252 } 253 return fmt.Sprintf("illegal urlPart %d", int(u)) 254} 255 256// jsCtx determines whether a '/' starts a regular expression literal or a 257// division operator. 258type jsCtx uint8 259 260const ( 261 // jsCtxRegexp occurs where a '/' would start a regexp literal. 262 jsCtxRegexp jsCtx = iota 263 // jsCtxDivOp occurs where a '/' would start a division operator. 264 jsCtxDivOp 265 // jsCtxUnknown occurs where a '/' is ambiguous due to context joining. 266 jsCtxUnknown 267) 268 269func (c jsCtx) String() string { 270 switch c { 271 case jsCtxRegexp: 272 return "jsCtxRegexp" 273 case jsCtxDivOp: 274 return "jsCtxDivOp" 275 case jsCtxUnknown: 276 return "jsCtxUnknown" 277 } 278 return fmt.Sprintf("illegal jsCtx %d", int(c)) 279} 280 281// element identifies the HTML element when inside a start tag or special body. 282// Certain HTML element (for example <script> and <style>) have bodies that are 283// treated differently from stateText so the element type is necessary to 284// transition into the correct context at the end of a tag and to identify the 285// end delimiter for the body. 286type element uint8 287 288const ( 289 // elementNone occurs outside a special tag or special element body. 290 elementNone element = iota 291 // elementScript corresponds to the raw text <script> element 292 // with JS MIME type or no type attribute. 293 elementScript 294 // elementStyle corresponds to the raw text <style> element. 295 elementStyle 296 // elementTextarea corresponds to the RCDATA <textarea> element. 297 elementTextarea 298 // elementTitle corresponds to the RCDATA <title> element. 299 elementTitle 300) 301 302var elementNames = [...]string{ 303 elementNone: "elementNone", 304 elementScript: "elementScript", 305 elementStyle: "elementStyle", 306 elementTextarea: "elementTextarea", 307 elementTitle: "elementTitle", 308} 309 310func (e element) String() string { 311 if int(e) < len(elementNames) { 312 return elementNames[e] 313 } 314 return fmt.Sprintf("illegal element %d", int(e)) 315} 316 317// attr identifies the current HTML attribute when inside the attribute, 318// that is, starting from stateAttrName until stateTag/stateText (exclusive). 319type attr uint8 320 321const ( 322 // attrNone corresponds to a normal attribute or no attribute. 323 attrNone attr = iota 324 // attrScript corresponds to an event handler attribute. 325 attrScript 326 // attrScriptType corresponds to the type attribute in script HTML element 327 attrScriptType 328 // attrStyle corresponds to the style attribute whose value is CSS. 329 attrStyle 330 // attrURL corresponds to an attribute whose value is a URL. 331 attrURL 332 // attrSrcset corresponds to a srcset attribute. 333 attrSrcset 334) 335 336var attrNames = [...]string{ 337 attrNone: "attrNone", 338 attrScript: "attrScript", 339 attrScriptType: "attrScriptType", 340 attrStyle: "attrStyle", 341 attrURL: "attrURL", 342 attrSrcset: "attrSrcset", 343} 344 345func (a attr) String() string { 346 if int(a) < len(attrNames) { 347 return attrNames[a] 348 } 349 return fmt.Sprintf("illegal attr %d", int(a)) 350} 351