1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package template 6 7import ( 8 "fmt" 9) 10 11// context describes the state an HTML parser must be in when it reaches the 12// portion of HTML produced by evaluating a particular template node. 13// 14// The zero value of type context is the start context for a template that 15// produces an HTML fragment as defined at 16// https://www.w3.org/TR/html5/syntax.html#the-end 17// where the context element is null. 18type context struct { 19 state state 20 delim delim 21 urlPart urlPart 22 jsCtx jsCtx 23 attr attr 24 element element 25 err *Error 26} 27 28func (c context) String() string { 29 return fmt.Sprintf("{%v %v %v %v %v %v %v}", c.state, c.delim, c.urlPart, c.jsCtx, c.attr, c.element, c.err) 30} 31 32// eq reports whether two contexts are equal. 33func (c context) eq(d context) bool { 34 return c.state == d.state && 35 c.delim == d.delim && 36 c.urlPart == d.urlPart && 37 c.jsCtx == d.jsCtx && 38 c.attr == d.attr && 39 c.element == d.element && 40 c.err == d.err 41} 42 43// mangle produces an identifier that includes a suffix that distinguishes it 44// from template names mangled with different contexts. 45func (c context) mangle(templateName string) string { 46 // The mangled name for the default context is the input templateName. 47 if c.state == stateText { 48 return templateName 49 } 50 s := templateName + "$htmltemplate_" + c.state.String() 51 if c.delim != delimNone { 52 s += "_" + c.delim.String() 53 } 54 if c.urlPart != urlPartNone { 55 s += "_" + c.urlPart.String() 56 } 57 if c.jsCtx != jsCtxRegexp { 58 s += "_" + c.jsCtx.String() 59 } 60 if c.attr != attrNone { 61 s += "_" + c.attr.String() 62 } 63 if c.element != elementNone { 64 s += "_" + c.element.String() 65 } 66 return s 67} 68 69// state describes a high-level HTML parser state. 70// 71// It bounds the top of the element stack, and by extension the HTML insertion 72// mode, but also contains state that does not correspond to anything in the 73// HTML5 parsing algorithm because a single token production in the HTML 74// grammar may contain embedded actions in a template. For instance, the quoted 75// HTML attribute produced by 76// <div title="Hello {{.World}}"> 77// is a single token in HTML's grammar but in a template spans several nodes. 78type state uint8 79 80//go:generate stringer -type state 81 82const ( 83 // stateText is parsed character data. An HTML parser is in 84 // this state when its parse position is outside an HTML tag, 85 // directive, comment, and special element body. 86 stateText state = iota 87 // stateTag occurs before an HTML attribute or the end of a tag. 88 stateTag 89 // stateAttrName occurs inside an attribute name. 90 // It occurs between the ^'s in ` ^name^ = value`. 91 stateAttrName 92 // stateAfterName occurs after an attr name has ended but before any 93 // equals sign. It occurs between the ^'s in ` name^ ^= value`. 94 stateAfterName 95 // stateBeforeValue occurs after the equals sign but before the value. 96 // It occurs between the ^'s in ` name =^ ^value`. 97 stateBeforeValue 98 // stateHTMLCmt occurs inside an <!-- HTML comment -->. 99 stateHTMLCmt 100 // stateRCDATA occurs inside an RCDATA element (<textarea> or <title>) 101 // as described at https://www.w3.org/TR/html5/syntax.html#elements-0 102 stateRCDATA 103 // stateAttr occurs inside an HTML attribute whose content is text. 104 stateAttr 105 // stateURL occurs inside an HTML attribute whose content is a URL. 106 stateURL 107 // stateSrcset occurs inside an HTML srcset attribute. 108 stateSrcset 109 // stateJS occurs inside an event handler or script element. 110 stateJS 111 // stateJSDqStr occurs inside a JavaScript double quoted string. 112 stateJSDqStr 113 // stateJSSqStr occurs inside a JavaScript single quoted string. 114 stateJSSqStr 115 // stateJSRegexp occurs inside a JavaScript regexp literal. 116 stateJSRegexp 117 // stateJSBlockCmt occurs inside a JavaScript /* block comment */. 118 stateJSBlockCmt 119 // stateJSLineCmt occurs inside a JavaScript // line comment. 120 stateJSLineCmt 121 // stateCSS occurs inside a <style> element or style attribute. 122 stateCSS 123 // stateCSSDqStr occurs inside a CSS double quoted string. 124 stateCSSDqStr 125 // stateCSSSqStr occurs inside a CSS single quoted string. 126 stateCSSSqStr 127 // stateCSSDqURL occurs inside a CSS double quoted url("..."). 128 stateCSSDqURL 129 // stateCSSSqURL occurs inside a CSS single quoted url('...'). 130 stateCSSSqURL 131 // stateCSSURL occurs inside a CSS unquoted url(...). 132 stateCSSURL 133 // stateCSSBlockCmt occurs inside a CSS /* block comment */. 134 stateCSSBlockCmt 135 // stateCSSLineCmt occurs inside a CSS // line comment. 136 stateCSSLineCmt 137 // stateError is an infectious error state outside any valid 138 // HTML/CSS/JS construct. 139 stateError 140) 141 142// isComment is true for any state that contains content meant for template 143// authors & maintainers, not for end-users or machines. 144func isComment(s state) bool { 145 switch s { 146 case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt: 147 return true 148 } 149 return false 150} 151 152// isInTag return whether s occurs solely inside an HTML tag. 153func isInTag(s state) bool { 154 switch s { 155 case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr: 156 return true 157 } 158 return false 159} 160 161// delim is the delimiter that will end the current HTML attribute. 162type delim uint8 163 164//go:generate stringer -type delim 165 166const ( 167 // delimNone occurs outside any attribute. 168 delimNone delim = iota 169 // delimDoubleQuote occurs when a double quote (") closes the attribute. 170 delimDoubleQuote 171 // delimSingleQuote occurs when a single quote (') closes the attribute. 172 delimSingleQuote 173 // delimSpaceOrTagEnd occurs when a space or right angle bracket (>) 174 // closes the attribute. 175 delimSpaceOrTagEnd 176) 177 178// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different 179// encoding strategies. 180type urlPart uint8 181 182//go:generate stringer -type urlPart 183 184const ( 185 // urlPartNone occurs when not in a URL, or possibly at the start: 186 // ^ in "^http://auth/path?k=v#frag". 187 urlPartNone urlPart = iota 188 // urlPartPreQuery occurs in the scheme, authority, or path; between the 189 // ^s in "h^ttp://auth/path^?k=v#frag". 190 urlPartPreQuery 191 // urlPartQueryOrFrag occurs in the query portion between the ^s in 192 // "http://auth/path?^k=v#frag^". 193 urlPartQueryOrFrag 194 // urlPartUnknown occurs due to joining of contexts both before and 195 // after the query separator. 196 urlPartUnknown 197) 198 199// jsCtx determines whether a '/' starts a regular expression literal or a 200// division operator. 201type jsCtx uint8 202 203//go:generate stringer -type jsCtx 204 205const ( 206 // jsCtxRegexp occurs where a '/' would start a regexp literal. 207 jsCtxRegexp jsCtx = iota 208 // jsCtxDivOp occurs where a '/' would start a division operator. 209 jsCtxDivOp 210 // jsCtxUnknown occurs where a '/' is ambiguous due to context joining. 211 jsCtxUnknown 212) 213 214// element identifies the HTML element when inside a start tag or special body. 215// Certain HTML element (for example <script> and <style>) have bodies that are 216// treated differently from stateText so the element type is necessary to 217// transition into the correct context at the end of a tag and to identify the 218// end delimiter for the body. 219type element uint8 220 221//go:generate stringer -type element 222 223const ( 224 // elementNone occurs outside a special tag or special element body. 225 elementNone element = iota 226 // elementScript corresponds to the raw text <script> element 227 // with JS MIME type or no type attribute. 228 elementScript 229 // elementStyle corresponds to the raw text <style> element. 230 elementStyle 231 // elementTextarea corresponds to the RCDATA <textarea> element. 232 elementTextarea 233 // elementTitle corresponds to the RCDATA <title> element. 234 elementTitle 235) 236 237//go:generate stringer -type attr 238 239// attr identifies the current HTML attribute when inside the attribute, 240// that is, starting from stateAttrName until stateTag/stateText (exclusive). 241type attr uint8 242 243const ( 244 // attrNone corresponds to a normal attribute or no attribute. 245 attrNone attr = iota 246 // attrScript corresponds to an event handler attribute. 247 attrScript 248 // attrScriptType corresponds to the type attribute in script HTML element 249 attrScriptType 250 // attrStyle corresponds to the style attribute whose value is CSS. 251 attrStyle 252 // attrURL corresponds to an attribute whose value is a URL. 253 attrURL 254 // attrSrcset corresponds to a srcset attribute. 255 attrSrcset 256) 257