1// Copyright (c) 2007, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30// --- 31// Author: csilvers@google.com (Craig Silverstein) 32// 33// We allow template variables to have modifiers, each possibly with a 34// value associated with it. Format is 35// {{VARNAME:modname[=modifier-value]:modname[=modifier-value]:...}} 36// Modname refers to a functor that takes the variable's value 37// and modifier-value (empty-string if no modifier-value was 38// specified), and returns a munged value. Modifiers are applied 39// left-to-right. We define the legal modnames here, and the 40// functors they refer to. 41// 42// Modifiers have a long-name, an optional short-name (one char; 43// may be \0 if you don't want a shortname), and a functor that's 44// applied to the variable. 45// 46// In addition to the list of modifiers hard-coded in the source code 47// here, it is possible to dynamicly register modifiers using a long 48// name starting with "x-". If you wish to define your own modifier 49// class, in your own source code, just subclass TemplateModifier -- 50// see template_modifiers.cc for details of how to do that. 51// 52// Adding a new built-in modifier, to this file, takes several steps, 53// both in this .h file and in the corresponding .cc file: 54// 1) .h file: Define a struct for the modifier. It must subclass 55// TemplateModifier. 56// 2) .h file: declare a variable that's an instance of the struct. 57// This is used for people who want to modify the string themselves, 58// via TemplateDictionary::SetEscapedValue. 59// 5) .cc file: define the new modifier's Modify method. 60// 6) .cc file: give storage for the variable declared in the .h file (in 2). 61// 7) .cc file: add the modifier to the g_modifiers array. 62 63#ifndef TEMPLATE_TEMPLATE_MODIFIERS_H_ 64#define TEMPLATE_TEMPLATE_MODIFIERS_H_ 65 66#include <sys/types.h> // for size_t 67#include <string> 68#include <ctemplate/template_emitter.h> // so we can inline operator() 69#include <ctemplate/per_expand_data.h> // could probably just forward-declare 70 71@ac_windows_dllexport_defines@ 72 73namespace ctemplate { 74 75class Template; 76 77#define MODIFY_SIGNATURE_ \ 78 public: \ 79 virtual void Modify(const char* in, size_t inlen, \ 80 const PerExpandData*, ExpandEmitter* outbuf, \ 81 const std::string& arg) const 82 83// If you wish to write your own modifier, it should subclass this 84// method. Your subclass should only define Modify(); for efficiency, 85// we do not make operator() virtual. 86class @ac_windows_dllexport@ TemplateModifier { 87 public: 88 // This function takes a string as input, a char*/size_t pair, and 89 // appends the modified version to the end of outbuf. In addition 90 // to the variable-value to modify (specified via in/inlen), each 91 // Modify passes in two pieces of user-supplied data: 92 // 1) arg: this is the modifier-value, for modifiers that take a 93 // value (e.g. "{{VAR:modifier=value}}"). This value 94 // comes from the template file. For modifiers that take 95 // no modval argument, arg will always be "". For modifiers 96 // that do take such an argument, arg will always start with "=". 97 // 2) per_expand_data: this is a set of data that the application can 98 // associate with a TemplateDictionary, and is passed in to 99 // every variable expanded using that dictionary. This value 100 // comes from the source code. 101 virtual void Modify(const char* in, size_t inlen, 102 const PerExpandData* per_expand_data, 103 ExpandEmitter* outbuf, 104 const std::string& arg) const = 0; 105 106 // This function can be used to speed up modification. If Modify() 107 // is often a noop, you can implement MightModify() to indicate 108 // situations where it's safe to avoid the call to Modify(), because 109 // Modify() won't do any modifications in this case. Note it's 110 // always safe to return true here; you should just return false if 111 // you're certain Modify() can be ignored. This function is 112 // advisory; the template system is not required to call 113 // MightModify() before Modify(). 114 virtual bool MightModify(const PerExpandData* /*per_expand_data*/, 115 const std::string& /*arg*/) const { 116 return true; 117 } 118 119 // We support both modifiers that take an argument, and those that don't. 120 // We also support passing in a string, or a char*/int pair. 121 std::string operator()(const char* in, size_t inlen, const std::string& arg="") const { 122 std::string out; 123 // we'll reserve some space to account for minimal escaping: say 12% 124 out.reserve(inlen + inlen/8 + 16); 125 StringEmitter outbuf(&out); 126 Modify(in, inlen, NULL, &outbuf, arg); 127 return out; 128 } 129 std::string operator()(const std::string& in, const std::string& arg="") const { 130 return operator()(in.data(), in.size(), arg); 131 } 132 133 virtual ~TemplateModifier(); // always need a virtual destructor! 134}; 135 136 137// Returns the input verbatim (for testing) 138class @ac_windows_dllexport@ NullModifier : public TemplateModifier { 139 MODIFY_SIGNATURE_; 140}; 141extern @ac_windows_dllexport@ NullModifier null_modifier; 142 143// Escapes < > " ' & <non-space whitespace> to < > " 144// ' & <space> 145class @ac_windows_dllexport@ HtmlEscape : public TemplateModifier { 146 MODIFY_SIGNATURE_; 147}; 148extern @ac_windows_dllexport@ HtmlEscape html_escape; 149 150// Same as HtmlEscape but leaves all whitespace alone. Eg. for <pre>..</pre> 151class @ac_windows_dllexport@ PreEscape : public TemplateModifier { 152 MODIFY_SIGNATURE_; 153}; 154extern @ac_windows_dllexport@ PreEscape pre_escape; 155 156// Like HtmlEscape but allows HTML entities, <br> tags, <wbr> tags, 157// matched <b> and </b> tags, matched <i> and </i> tags, matched <em> and </em> 158// tags, and matched <span dir=(rtl|ltr)> tags. 159class @ac_windows_dllexport@ SnippetEscape : public TemplateModifier { 160 MODIFY_SIGNATURE_; 161}; 162extern @ac_windows_dllexport@ SnippetEscape snippet_escape; 163 164// Replaces characters not safe for an unquoted attribute with underscore. 165// Safe characters are alphanumeric, underscore, dash, period, and colon. 166// The equal sign is also considered safe unless it is at the start 167// or end of the input in which case it is replaced with underscore. 168// 169// We added the equal sign to the safe characters to allow this modifier 170// to be used on attribute name/value pairs in HTML tags such as 171// <div {{CLASS:H=attribute}}> 172// where CLASS is expanded to "class=bla". 173// 174// Note: The equal sign is replaced when found at either boundaries of the 175// string due to the concern it may be lead to XSS under some special 176// circumstances: Say, if this string is the value of an attribute in an 177// HTML tag and ends with an equal sign, a browser may possibly end up 178// interpreting the next token as the value of this string rather than 179// a new attribute (esoteric). 180class @ac_windows_dllexport@ CleanseAttribute : public TemplateModifier { 181 MODIFY_SIGNATURE_; 182}; 183extern @ac_windows_dllexport@ CleanseAttribute cleanse_attribute; 184 185// Removes characters not safe for a CSS value. Safe characters are 186// alphanumeric, space, underscore, period, coma, exclamation mark, 187// pound, percent, and dash. 188class @ac_windows_dllexport@ CleanseCss : public TemplateModifier { 189 MODIFY_SIGNATURE_; 190}; 191extern @ac_windows_dllexport@ CleanseCss cleanse_css; 192 193// Checks that a url is either an absolute http(s) URL or a relative 194// url that doesn't have a protocol hidden in it (ie [foo.html] is 195// fine, but not [javascript:foo]) and then performs another type of 196// escaping. Returns the url escaped with the specified modifier if 197// good, otherwise returns a safe replacement URL. 198// This is normally "#", but for <img> tags, it is not safe to set 199// the src attribute to "#". This is because this causes some browsers 200// to reload the page, which can cause a DoS. 201class @ac_windows_dllexport@ ValidateUrl : public TemplateModifier { 202 public: 203 explicit ValidateUrl(const TemplateModifier& chained_modifier, 204 const char* unsafe_url_replacement) 205 : chained_modifier_(chained_modifier), 206 unsafe_url_replacement_(unsafe_url_replacement), 207 unsafe_url_replacement_length_(strlen(unsafe_url_replacement)) { } 208 MODIFY_SIGNATURE_; 209 static const char* const kUnsafeUrlReplacement; 210 static const char* const kUnsafeImgSrcUrlReplacement; 211 private: 212 const TemplateModifier& chained_modifier_; 213 const char* unsafe_url_replacement_; 214 int unsafe_url_replacement_length_; 215}; 216extern @ac_windows_dllexport@ ValidateUrl validate_url_and_html_escape; 217extern @ac_windows_dllexport@ ValidateUrl validate_url_and_javascript_escape; 218extern @ac_windows_dllexport@ ValidateUrl validate_url_and_css_escape; 219extern @ac_windows_dllexport@ ValidateUrl validate_img_src_url_and_html_escape; 220extern @ac_windows_dllexport@ ValidateUrl validate_img_src_url_and_javascript_escape; 221extern @ac_windows_dllexport@ ValidateUrl validate_img_src_url_and_css_escape; 222 223// Escapes < > & " ' to < > & " ' (same as in HtmlEscape). 224// If you use it within a CDATA section, you may be escaping more characters 225// than strictly necessary. If this turns out to be an issue, we will need 226// to add a variant just for CDATA. 227class @ac_windows_dllexport@ XmlEscape : public TemplateModifier { 228 MODIFY_SIGNATURE_; 229}; 230extern @ac_windows_dllexport@ XmlEscape xml_escape; 231 232// Escapes characters that cannot appear unescaped in a javascript string 233// assuming UTF-8 encoded input. 234// This does NOT escape all characters that cannot appear unescaped in a 235// javascript regular expression literal. 236class @ac_windows_dllexport@ JavascriptEscape : public TemplateModifier { 237 MODIFY_SIGNATURE_; 238}; 239extern @ac_windows_dllexport@ JavascriptEscape javascript_escape; 240 241// Checks that the input is a valid javascript non-string literal 242// meaning a boolean (true, false) or a numeric value (decimal, hex or octal). 243// If valid, we output the input as is, otherwise we output null instead. 244// Input of zero length is considered valid and nothing is output. 245// 246// The emphasis is on safety against injection of javascript code rather 247// than perfect validation, as such it is possible for non-valid literals to 248// pass through. 249// 250// You would use this modifier for javascript variables that are not 251// enclosed in quotes such as: 252// <script>var a = {{VALUE}};</script> OR 253// <a href="url" onclick="doSubmit({{ID}})"> 254// For variables that are quoted (i.e. string literals) use javascript_escape. 255// 256// Limitations: 257// . NaN, +/-Infinity and null are not recognized. 258// . Output is not guaranteed to be a valid literal, 259// e.g: +55+-e34 will output as is. 260// e.g: trueeee will output nothing as it is not a valid boolean. 261// 262// Details: 263// . For Hex numbers, it checks for case-insensitive 0x[0-9A-F]+ 264// that should be a proper check. 265// . For other numbers, it checks for case-insensitive [0-9eE+-.]* 266// so can also accept invalid numbers such as the number 5..45--10. 267// . "true" and "false" (without quotes) are also accepted and that's it. 268// 269class @ac_windows_dllexport@ JavascriptNumber : public TemplateModifier { 270 MODIFY_SIGNATURE_; 271}; 272extern @ac_windows_dllexport@ JavascriptNumber javascript_number; 273 274// Escapes characters not in [0-9a-zA-Z.,_:*/~!()-] as %-prefixed hex. 275// Space is encoded as a +. 276class @ac_windows_dllexport@ UrlQueryEscape : public TemplateModifier { 277 MODIFY_SIGNATURE_; 278}; 279extern @ac_windows_dllexport@ UrlQueryEscape url_query_escape; 280 281// Escapes " \ / <FF> <CR> <LF> <BS> <TAB> to \" \\ \/ \f \r \n \b \t 282// Also escapes < > & to their corresponding \uXXXX representation 283// (\u003C, \u003E, \u0026 respectively). 284class @ac_windows_dllexport@ JsonEscape : public TemplateModifier { 285 MODIFY_SIGNATURE_; 286}; 287extern @ac_windows_dllexport@ JsonEscape json_escape; 288 289// Inserts the given prefix (given as the argument to this modifier) 290// after every newline in the text. Note that it does *not* insert 291// prefix at the very beginning of the text -- in its expected use, 292// that prefix will already be present before this text, in the 293// template. This is meant to be used internally, and is not exported 294// via the g_modifiers list. 295class @ac_windows_dllexport@ PrefixLine : public TemplateModifier { 296 MODIFY_SIGNATURE_; 297}; 298extern @ac_windows_dllexport@ PrefixLine prefix_line; 299 300 301#undef MODIFY_SIGNATURE_ 302 303 304// Registers a new template modifier. 305// long_name must start with "x-". 306// If the modifier takes a value (eg "{{VAR:x-name=value}}"), then 307// long_name should end with "=". This is similar to getopt(3) syntax. 308// We also allow value-specializations, with specific values specified 309// as part of long-name. For instance: 310// AddModifier("x-mod=", &my_modifierA); 311// AddModifier("x-mod=bar", &my_modifierB); 312// AddModifier("x-mod2", &my_modifierC); 313// For the template 314// {{VAR1:x-mod=foo}} {{VAR2:x-mod=bar}} {{VAR3:x-mod=baz}} {{VAR4:x-mod2}} 315// VAR1 and VAR3 would get modified by my_modifierA, VAR2 by my_modifierB, 316// and VAR4 by my_modifierC. The order of the AddModifier calls is not 317// significant. 318extern @ac_windows_dllexport@ 319bool AddModifier(const char* long_name, const TemplateModifier* modifier); 320 321// Same as AddModifier() above except that the modifier is considered 322// to produce safe output that can be inserted in any context without 323// the need for additional escaping. This difference only impacts 324// the Auto-Escape mode: In that mode, when a variable (or template-include) 325// has a modifier added via AddXssSafeModifier(), it is excluded from 326// further escaping, effectively treated as though it had the :none modifier. 327// Because Auto-Escape is disabled for any variable and template-include 328// that includes such a modifier, use this function with care and ensure 329// that it may not emit harmful output that could lead to XSS. 330// 331// Some valid uses of AddXssSafeModifier: 332// . A modifier that converts a string to an integer since 333// an integer is generally safe in any context. 334// . A modifier that returns one of a fixed number of safe values 335// depending on properties of the input. 336// 337// Some not recommended uses of AddXssSafeModifier: 338// . A modifier that applies some extra formatting to the input 339// before returning it since the output will still contain 340// harmful content if the input does. 341// . A modifier that applies one type of escaping to the input 342// (say HTML-escape). This may be dangerous when the modifier 343// is used in a different context (say Javascript) where this 344// escaping may be inadequate. 345extern @ac_windows_dllexport@ 346bool AddXssSafeModifier(const char* long_name, const TemplateModifier* modifier); 347 348} 349 350#endif // TEMPLATE_TEMPLATE_MODIFIERS_H_ 351