1 // Copyright (c) 2007, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 // --- 31 // Author: csilvers@google.com (Craig Silverstein) 32 // 33 // We allow template variables to have modifiers, each possibly with a 34 // value associated with it. Format is 35 // {{VARNAME:modname[=modifier-value]:modname[=modifier-value]:...}} 36 // Modname refers to a functor that takes the variable's value 37 // and modifier-value (empty-string if no modifier-value was 38 // specified), and returns a munged value. Modifiers are applied 39 // left-to-right. We define the legal modnames here, and the 40 // functors they refer to. 41 // 42 // Modifiers have a long-name, an optional short-name (one char; 43 // may be \0 if you don't want a shortname), and a functor that's 44 // applied to the variable. 45 // 46 // In addition to the list of modifiers hard-coded in the source code 47 // here, it is possible to dynamicly register modifiers using a long 48 // name starting with "x-". If you wish to define your own modifier 49 // class, in your own source code, just subclass TemplateModifier -- 50 // see template_modifiers.cc for details of how to do that. 51 // 52 // Adding a new built-in modifier, to this file, takes several steps, 53 // both in this .h file and in the corresponding .cc file: 54 // 1) .h file: Define a struct for the modifier. It must subclass 55 // TemplateModifier. 56 // 2) .h file: declare a variable that's an instance of the struct. 57 // This is used for people who want to modify the string themselves, 58 // via TemplateDictionary::SetEscapedValue. 59 // 5) .cc file: define the new modifier's Modify method. 60 // 6) .cc file: give storage for the variable declared in the .h file (in 2). 61 // 7) .cc file: add the modifier to the g_modifiers array. 62 63 #ifndef TEMPLATE_TEMPLATE_MODIFIERS_H_ 64 #define TEMPLATE_TEMPLATE_MODIFIERS_H_ 65 66 #include <sys/types.h> // for size_t 67 #include <string> 68 #include <ctemplate/template_emitter.h> // so we can inline operator() 69 #include <ctemplate/per_expand_data.h> // could probably just forward-declare 70 71 // NOTE: if you are statically linking the template library into your binary 72 // (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL=' 73 // as a compiler flag in your project file to turn off the dllimports. 74 #ifndef CTEMPLATE_DLL_DECL 75 # define CTEMPLATE_DLL_DECL __declspec(dllimport) 76 #endif 77 78 namespace ctemplate { 79 80 class Template; 81 82 #define MODIFY_SIGNATURE_ \ 83 public: \ 84 virtual void Modify(const char* in, size_t inlen, \ 85 const PerExpandData*, ExpandEmitter* outbuf, \ 86 const std::string& arg) const 87 88 // If you wish to write your own modifier, it should subclass this 89 // method. Your subclass should only define Modify(); for efficiency, 90 // we do not make operator() virtual. 91 class CTEMPLATE_DLL_DECL TemplateModifier { 92 public: 93 // This function takes a string as input, a char*/size_t pair, and 94 // appends the modified version to the end of outbuf. In addition 95 // to the variable-value to modify (specified via in/inlen), each 96 // Modify passes in two pieces of user-supplied data: 97 // 1) arg: this is the modifier-value, for modifiers that take a 98 // value (e.g. "{{VAR:modifier=value}}"). This value 99 // comes from the template file. For modifiers that take 100 // no modval argument, arg will always be "". For modifiers 101 // that do take such an argument, arg will always start with "=". 102 // 2) per_expand_data: this is a set of data that the application can 103 // associate with a TemplateDictionary, and is passed in to 104 // every variable expanded using that dictionary. This value 105 // comes from the source code. 106 virtual void Modify(const char* in, size_t inlen, 107 const PerExpandData* per_expand_data, 108 ExpandEmitter* outbuf, 109 const std::string& arg) const = 0; 110 111 // This function can be used to speed up modification. If Modify() 112 // is often a noop, you can implement MightModify() to indicate 113 // situations where it's safe to avoid the call to Modify(), because 114 // Modify() won't do any modifications in this case. Note it's 115 // always safe to return true here; you should just return false if 116 // you're certain Modify() can be ignored. This function is 117 // advisory; the template system is not required to call 118 // MightModify() before Modify(). MightModify(const PerExpandData *,const std::string &)119 virtual bool MightModify(const PerExpandData* /*per_expand_data*/, 120 const std::string& /*arg*/) const { 121 return true; 122 } 123 124 // We support both modifiers that take an argument, and those that don't. 125 // We also support passing in a string, or a char*/int pair. operator()126 std::string operator()(const char* in, size_t inlen, const std::string& arg="") const { 127 std::string out; 128 // we'll reserve some space to account for minimal escaping: say 12% 129 out.reserve(inlen + inlen/8 + 16); 130 StringEmitter outbuf(&out); 131 Modify(in, inlen, NULL, &outbuf, arg); 132 return out; 133 } operator()134 std::string operator()(const std::string& in, const std::string& arg="") const { 135 return operator()(in.data(), in.size(), arg); 136 } 137 138 virtual ~TemplateModifier(); // always need a virtual destructor! 139 }; 140 141 142 // Returns the input verbatim (for testing) 143 class CTEMPLATE_DLL_DECL NullModifier : public TemplateModifier { 144 MODIFY_SIGNATURE_; 145 }; 146 extern CTEMPLATE_DLL_DECL NullModifier null_modifier; 147 148 // Escapes < > " ' & <non-space whitespace> to < > " 149 // ' & <space> 150 class CTEMPLATE_DLL_DECL HtmlEscape : public TemplateModifier { 151 MODIFY_SIGNATURE_; 152 }; 153 extern CTEMPLATE_DLL_DECL HtmlEscape html_escape; 154 155 // Same as HtmlEscape but leaves all whitespace alone. Eg. for <pre>..</pre> 156 class CTEMPLATE_DLL_DECL PreEscape : public TemplateModifier { 157 MODIFY_SIGNATURE_; 158 }; 159 extern CTEMPLATE_DLL_DECL PreEscape pre_escape; 160 161 // Like HtmlEscape but allows HTML entities, <br> tags, <wbr> tags, 162 // matched <b> and </b> tags, matched <i> and </i> tags, matched <em> and </em> 163 // tags, and matched <span dir=(rtl|ltr)> tags. 164 class CTEMPLATE_DLL_DECL SnippetEscape : public TemplateModifier { 165 MODIFY_SIGNATURE_; 166 }; 167 extern CTEMPLATE_DLL_DECL SnippetEscape snippet_escape; 168 169 // Replaces characters not safe for an unquoted attribute with underscore. 170 // Safe characters are alphanumeric, underscore, dash, period, and colon. 171 // The equal sign is also considered safe unless it is at the start 172 // or end of the input in which case it is replaced with underscore. 173 // 174 // We added the equal sign to the safe characters to allow this modifier 175 // to be used on attribute name/value pairs in HTML tags such as 176 // <div {{CLASS:H=attribute}}> 177 // where CLASS is expanded to "class=bla". 178 // 179 // Note: The equal sign is replaced when found at either boundaries of the 180 // string due to the concern it may be lead to XSS under some special 181 // circumstances: Say, if this string is the value of an attribute in an 182 // HTML tag and ends with an equal sign, a browser may possibly end up 183 // interpreting the next token as the value of this string rather than 184 // a new attribute (esoteric). 185 class CTEMPLATE_DLL_DECL CleanseAttribute : public TemplateModifier { 186 MODIFY_SIGNATURE_; 187 }; 188 extern CTEMPLATE_DLL_DECL CleanseAttribute cleanse_attribute; 189 190 // Removes characters not safe for a CSS value. Safe characters are 191 // alphanumeric, space, underscore, period, coma, exclamation mark, 192 // pound, percent, and dash. 193 class CTEMPLATE_DLL_DECL CleanseCss : public TemplateModifier { 194 MODIFY_SIGNATURE_; 195 }; 196 extern CTEMPLATE_DLL_DECL CleanseCss cleanse_css; 197 198 // Checks that a url is either an absolute http(s) URL or a relative 199 // url that doesn't have a protocol hidden in it (ie [foo.html] is 200 // fine, but not [javascript:foo]) and then performs another type of 201 // escaping. Returns the url escaped with the specified modifier if 202 // good, otherwise returns a safe replacement URL. 203 // This is normally "#", but for <img> tags, it is not safe to set 204 // the src attribute to "#". This is because this causes some browsers 205 // to reload the page, which can cause a DoS. 206 class CTEMPLATE_DLL_DECL ValidateUrl : public TemplateModifier { 207 public: ValidateUrl(const TemplateModifier & chained_modifier,const char * unsafe_url_replacement)208 explicit ValidateUrl(const TemplateModifier& chained_modifier, 209 const char* unsafe_url_replacement) 210 : chained_modifier_(chained_modifier), 211 unsafe_url_replacement_(unsafe_url_replacement), 212 unsafe_url_replacement_length_(strlen(unsafe_url_replacement)) { } 213 MODIFY_SIGNATURE_; 214 static const char* const kUnsafeUrlReplacement; 215 static const char* const kUnsafeImgSrcUrlReplacement; 216 private: 217 const TemplateModifier& chained_modifier_; 218 const char* unsafe_url_replacement_; 219 int unsafe_url_replacement_length_; 220 }; 221 extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_html_escape; 222 extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_javascript_escape; 223 extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_css_escape; 224 extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_html_escape; 225 extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_javascript_escape; 226 extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_css_escape; 227 228 // Escapes < > & " ' to < > & " ' (same as in HtmlEscape). 229 // If you use it within a CDATA section, you may be escaping more characters 230 // than strictly necessary. If this turns out to be an issue, we will need 231 // to add a variant just for CDATA. 232 class CTEMPLATE_DLL_DECL XmlEscape : public TemplateModifier { 233 MODIFY_SIGNATURE_; 234 }; 235 extern CTEMPLATE_DLL_DECL XmlEscape xml_escape; 236 237 // Escapes characters that cannot appear unescaped in a javascript string 238 // assuming UTF-8 encoded input. 239 // This does NOT escape all characters that cannot appear unescaped in a 240 // javascript regular expression literal. 241 class CTEMPLATE_DLL_DECL JavascriptEscape : public TemplateModifier { 242 MODIFY_SIGNATURE_; 243 }; 244 extern CTEMPLATE_DLL_DECL JavascriptEscape javascript_escape; 245 246 // Checks that the input is a valid javascript non-string literal 247 // meaning a boolean (true, false) or a numeric value (decimal, hex or octal). 248 // If valid, we output the input as is, otherwise we output null instead. 249 // Input of zero length is considered valid and nothing is output. 250 // 251 // The emphasis is on safety against injection of javascript code rather 252 // than perfect validation, as such it is possible for non-valid literals to 253 // pass through. 254 // 255 // You would use this modifier for javascript variables that are not 256 // enclosed in quotes such as: 257 // <script>var a = {{VALUE}};</script> OR 258 // <a href="url" onclick="doSubmit({{ID}})"> 259 // For variables that are quoted (i.e. string literals) use javascript_escape. 260 // 261 // Limitations: 262 // . NaN, +/-Infinity and null are not recognized. 263 // . Output is not guaranteed to be a valid literal, 264 // e.g: +55+-e34 will output as is. 265 // e.g: trueeee will output nothing as it is not a valid boolean. 266 // 267 // Details: 268 // . For Hex numbers, it checks for case-insensitive 0x[0-9A-F]+ 269 // that should be a proper check. 270 // . For other numbers, it checks for case-insensitive [0-9eE+-.]* 271 // so can also accept invalid numbers such as the number 5..45--10. 272 // . "true" and "false" (without quotes) are also accepted and that's it. 273 // 274 class CTEMPLATE_DLL_DECL JavascriptNumber : public TemplateModifier { 275 MODIFY_SIGNATURE_; 276 }; 277 extern CTEMPLATE_DLL_DECL JavascriptNumber javascript_number; 278 279 // Escapes characters not in [0-9a-zA-Z.,_:*/~!()-] as %-prefixed hex. 280 // Space is encoded as a +. 281 class CTEMPLATE_DLL_DECL UrlQueryEscape : public TemplateModifier { 282 MODIFY_SIGNATURE_; 283 }; 284 extern CTEMPLATE_DLL_DECL UrlQueryEscape url_query_escape; 285 286 // Escapes " \ / <FF> <CR> <LF> <BS> <TAB> to \" \\ \/ \f \r \n \b \t 287 // Also escapes < > & to their corresponding \uXXXX representation 288 // (\u003C, \u003E, \u0026 respectively). 289 class CTEMPLATE_DLL_DECL JsonEscape : public TemplateModifier { 290 MODIFY_SIGNATURE_; 291 }; 292 extern CTEMPLATE_DLL_DECL JsonEscape json_escape; 293 294 // Inserts the given prefix (given as the argument to this modifier) 295 // after every newline in the text. Note that it does *not* insert 296 // prefix at the very beginning of the text -- in its expected use, 297 // that prefix will already be present before this text, in the 298 // template. This is meant to be used internally, and is not exported 299 // via the g_modifiers list. 300 class CTEMPLATE_DLL_DECL PrefixLine : public TemplateModifier { 301 MODIFY_SIGNATURE_; 302 }; 303 extern CTEMPLATE_DLL_DECL PrefixLine prefix_line; 304 305 306 #undef MODIFY_SIGNATURE_ 307 308 309 // Registers a new template modifier. 310 // long_name must start with "x-". 311 // If the modifier takes a value (eg "{{VAR:x-name=value}}"), then 312 // long_name should end with "=". This is similar to getopt(3) syntax. 313 // We also allow value-specializations, with specific values specified 314 // as part of long-name. For instance: 315 // AddModifier("x-mod=", &my_modifierA); 316 // AddModifier("x-mod=bar", &my_modifierB); 317 // AddModifier("x-mod2", &my_modifierC); 318 // For the template 319 // {{VAR1:x-mod=foo}} {{VAR2:x-mod=bar}} {{VAR3:x-mod=baz}} {{VAR4:x-mod2}} 320 // VAR1 and VAR3 would get modified by my_modifierA, VAR2 by my_modifierB, 321 // and VAR4 by my_modifierC. The order of the AddModifier calls is not 322 // significant. 323 extern CTEMPLATE_DLL_DECL 324 bool AddModifier(const char* long_name, const TemplateModifier* modifier); 325 326 // Same as AddModifier() above except that the modifier is considered 327 // to produce safe output that can be inserted in any context without 328 // the need for additional escaping. This difference only impacts 329 // the Auto-Escape mode: In that mode, when a variable (or template-include) 330 // has a modifier added via AddXssSafeModifier(), it is excluded from 331 // further escaping, effectively treated as though it had the :none modifier. 332 // Because Auto-Escape is disabled for any variable and template-include 333 // that includes such a modifier, use this function with care and ensure 334 // that it may not emit harmful output that could lead to XSS. 335 // 336 // Some valid uses of AddXssSafeModifier: 337 // . A modifier that converts a string to an integer since 338 // an integer is generally safe in any context. 339 // . A modifier that returns one of a fixed number of safe values 340 // depending on properties of the input. 341 // 342 // Some not recommended uses of AddXssSafeModifier: 343 // . A modifier that applies some extra formatting to the input 344 // before returning it since the output will still contain 345 // harmful content if the input does. 346 // . A modifier that applies one type of escaping to the input 347 // (say HTML-escape). This may be dangerous when the modifier 348 // is used in a different context (say Javascript) where this 349 // escaping may be inadequate. 350 extern CTEMPLATE_DLL_DECL 351 bool AddXssSafeModifier(const char* long_name, const TemplateModifier* modifier); 352 353 } 354 355 #endif // TEMPLATE_TEMPLATE_MODIFIERS_H_ 356