1 // Copyright (c) 2007, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // ---
31 // Author: csilvers@google.com (Craig Silverstein)
32 //
33 // We allow template variables to have modifiers, each possibly with a
34 // value associated with it.  Format is
35 //    {{VARNAME:modname[=modifier-value]:modname[=modifier-value]:...}}
36 // Modname refers to a functor that takes the variable's value
37 // and modifier-value (empty-string if no modifier-value was
38 // specified), and returns a munged value.  Modifiers are applied
39 // left-to-right.  We define the legal modnames here, and the
40 // functors they refer to.
41 //
42 // Modifiers have a long-name, an optional short-name (one char;
43 // may be \0 if you don't want a shortname), and a functor that's
44 // applied to the variable.
45 //
46 // In addition to the list of modifiers hard-coded in the source code
47 // here, it is possible to dynamicly register modifiers using a long
48 // name starting with "x-".  If you wish to define your own modifier
49 // class, in your own source code, just subclass TemplateModifier --
50 // see template_modifiers.cc for details of how to do that.
51 //
52 // Adding a new built-in modifier, to this file, takes several steps,
53 // both in this .h file and in the corresponding .cc file:
54 // 1) .h file: Define a struct for the modifier.  It must subclass
55 //     TemplateModifier.
56 // 2) .h file: declare a variable that's an instance of the struct.
57 //    This is used for people who want to modify the string themselves,
58 //    via TemplateDictionary::SetEscapedValue.
59 // 5) .cc file: define the new modifier's Modify method.
60 // 6) .cc file: give storage for the variable declared in the .h file (in 2).
61 // 7) .cc file: add the modifier to the g_modifiers array.
62 
63 #ifndef TEMPLATE_TEMPLATE_MODIFIERS_H_
64 #define TEMPLATE_TEMPLATE_MODIFIERS_H_
65 
66 #include <sys/types.h>   // for size_t
67 #include <string>
68 #include <ctemplate/template_emitter.h>   // so we can inline operator()
69 #include <ctemplate/per_expand_data.h>    // could probably just forward-declare
70 
71 // NOTE: if you are statically linking the template library into your binary
72 // (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
73 // as a compiler flag in your project file to turn off the dllimports.
74 #ifndef CTEMPLATE_DLL_DECL
75 # define CTEMPLATE_DLL_DECL  __declspec(dllimport)
76 #endif
77 
78 namespace ctemplate {
79 
80 class Template;
81 
82 #define MODIFY_SIGNATURE_                                               \
83  public:                                                                \
84   virtual void Modify(const char* in, size_t inlen,                     \
85                       const PerExpandData*, ExpandEmitter* outbuf,      \
86                       const std::string& arg) const
87 
88 // If you wish to write your own modifier, it should subclass this
89 // method.  Your subclass should only define Modify(); for efficiency,
90 // we do not make operator() virtual.
91 class CTEMPLATE_DLL_DECL TemplateModifier {
92  public:
93   // This function takes a string as input, a char*/size_t pair, and
94   // appends the modified version to the end of outbuf.  In addition
95   // to the variable-value to modify (specified via in/inlen), each
96   // Modify passes in two pieces of user-supplied data:
97   // 1) arg: this is the modifier-value, for modifiers that take a
98   //         value (e.g. "{{VAR:modifier=value}}").  This value
99   //         comes from the template file.  For modifiers that take
100   //         no modval argument, arg will always be "".  For modifiers
101   //         that do take such an argument, arg will always start with "=".
102   // 2) per_expand_data: this is a set of data that the application can
103   //         associate with a TemplateDictionary, and is passed in to
104   //         every variable expanded using that dictionary.  This value
105   //         comes from the source code.
106   virtual void Modify(const char* in, size_t inlen,
107                       const PerExpandData* per_expand_data,
108                       ExpandEmitter* outbuf,
109                       const std::string& arg) const = 0;
110 
111   // This function can be used to speed up modification.  If Modify()
112   // is often a noop, you can implement MightModify() to indicate
113   // situations where it's safe to avoid the call to Modify(), because
114   // Modify() won't do any modifications in this case.  Note it's
115   // always safe to return true here; you should just return false if
116   // you're certain Modify() can be ignored.  This function is
117   // advisory; the template system is not required to call
118   // MightModify() before Modify().
MightModify(const PerExpandData *,const std::string &)119   virtual bool MightModify(const PerExpandData* /*per_expand_data*/,
120                            const std::string& /*arg*/) const {
121     return true;
122   }
123 
124   // We support both modifiers that take an argument, and those that don't.
125   // We also support passing in a string, or a char*/int pair.
operator()126   std::string operator()(const char* in, size_t inlen, const std::string& arg="") const {
127     std::string out;
128     // we'll reserve some space to account for minimal escaping: say 12%
129     out.reserve(inlen + inlen/8 + 16);
130     StringEmitter outbuf(&out);
131     Modify(in, inlen, NULL, &outbuf, arg);
132     return out;
133   }
operator()134   std::string operator()(const std::string& in, const std::string& arg="") const {
135     return operator()(in.data(), in.size(), arg);
136   }
137 
138   virtual ~TemplateModifier();   // always need a virtual destructor!
139 };
140 
141 
142 // Returns the input verbatim (for testing)
143 class CTEMPLATE_DLL_DECL NullModifier : public TemplateModifier {
144   MODIFY_SIGNATURE_;
145 };
146 extern CTEMPLATE_DLL_DECL NullModifier null_modifier;
147 
148 // Escapes < > " ' & <non-space whitespace> to &lt; &gt; &quot;
149 // &#39; &amp; <space>
150 class CTEMPLATE_DLL_DECL HtmlEscape : public TemplateModifier {
151   MODIFY_SIGNATURE_;
152 };
153 extern CTEMPLATE_DLL_DECL HtmlEscape html_escape;
154 
155 // Same as HtmlEscape but leaves all whitespace alone. Eg. for <pre>..</pre>
156 class CTEMPLATE_DLL_DECL PreEscape : public TemplateModifier {
157   MODIFY_SIGNATURE_;
158 };
159 extern CTEMPLATE_DLL_DECL PreEscape pre_escape;
160 
161 // Like HtmlEscape but allows HTML entities, <br> tags, <wbr> tags,
162 // matched <b> and </b> tags, matched <i> and </i> tags, matched <em> and </em>
163 // tags, and matched <span dir=(rtl|ltr)> tags.
164 class CTEMPLATE_DLL_DECL SnippetEscape : public TemplateModifier {
165   MODIFY_SIGNATURE_;
166 };
167 extern CTEMPLATE_DLL_DECL SnippetEscape snippet_escape;
168 
169 // Replaces characters not safe for an unquoted attribute with underscore.
170 // Safe characters are alphanumeric, underscore, dash, period, and colon.
171 // The equal sign is also considered safe unless it is at the start
172 // or end of the input in which case it is replaced with underscore.
173 //
174 // We added the equal sign to the safe characters to allow this modifier
175 // to be used on attribute name/value pairs in HTML tags such as
176 //   <div {{CLASS:H=attribute}}>
177 // where CLASS is expanded to "class=bla".
178 //
179 // Note: The equal sign is replaced when found at either boundaries of the
180 // string due to the concern it may be lead to XSS under some special
181 // circumstances: Say, if this string is the value of an attribute in an
182 // HTML tag and ends with an equal sign, a browser may possibly end up
183 // interpreting the next token as the value of this string rather than
184 // a new attribute (esoteric).
185 class CTEMPLATE_DLL_DECL CleanseAttribute : public TemplateModifier {
186   MODIFY_SIGNATURE_;
187 };
188 extern CTEMPLATE_DLL_DECL CleanseAttribute cleanse_attribute;
189 
190 // Removes characters not safe for a CSS value. Safe characters are
191 // alphanumeric, space, underscore, period, coma, exclamation mark,
192 // pound, percent, and dash.
193 class CTEMPLATE_DLL_DECL CleanseCss : public TemplateModifier {
194   MODIFY_SIGNATURE_;
195 };
196 extern CTEMPLATE_DLL_DECL CleanseCss cleanse_css;
197 
198 // Checks that a url is either an absolute http(s) URL or a relative
199 // url that doesn't have a protocol hidden in it (ie [foo.html] is
200 // fine, but not [javascript:foo]) and then performs another type of
201 // escaping. Returns the url escaped with the specified modifier if
202 // good, otherwise returns a safe replacement URL.
203 // This is normally "#", but for <img> tags, it is not safe to set
204 // the src attribute to "#".  This is because this causes some browsers
205 // to reload the page, which can cause a DoS.
206 class CTEMPLATE_DLL_DECL ValidateUrl : public TemplateModifier {
207  public:
ValidateUrl(const TemplateModifier & chained_modifier,const char * unsafe_url_replacement)208   explicit ValidateUrl(const TemplateModifier& chained_modifier,
209                        const char* unsafe_url_replacement)
210       : chained_modifier_(chained_modifier),
211         unsafe_url_replacement_(unsafe_url_replacement),
212         unsafe_url_replacement_length_(strlen(unsafe_url_replacement)) { }
213   MODIFY_SIGNATURE_;
214   static const char* const kUnsafeUrlReplacement;
215   static const char* const kUnsafeImgSrcUrlReplacement;
216  private:
217   const TemplateModifier& chained_modifier_;
218   const char* unsafe_url_replacement_;
219   int unsafe_url_replacement_length_;
220 };
221 extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_html_escape;
222 extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_javascript_escape;
223 extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_css_escape;
224 extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_html_escape;
225 extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_javascript_escape;
226 extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_css_escape;
227 
228 // Escapes < > & " ' to &lt; &gt; &amp; &quot; &#39; (same as in HtmlEscape).
229 // If you use it within a CDATA section, you may be escaping more characters
230 // than strictly necessary. If this turns out to be an issue, we will need
231 // to add a variant just for CDATA.
232 class CTEMPLATE_DLL_DECL XmlEscape : public TemplateModifier {
233   MODIFY_SIGNATURE_;
234 };
235 extern CTEMPLATE_DLL_DECL XmlEscape xml_escape;
236 
237 // Escapes characters that cannot appear unescaped in a javascript string
238 // assuming UTF-8 encoded input.
239 // This does NOT escape all characters that cannot appear unescaped in a
240 // javascript regular expression literal.
241 class CTEMPLATE_DLL_DECL JavascriptEscape : public TemplateModifier {
242   MODIFY_SIGNATURE_;
243 };
244 extern CTEMPLATE_DLL_DECL JavascriptEscape javascript_escape;
245 
246 // Checks that the input is a valid javascript non-string literal
247 // meaning a boolean (true, false) or a numeric value (decimal, hex or octal).
248 // If valid, we output the input as is, otherwise we output null instead.
249 // Input of zero length is considered valid and nothing is output.
250 //
251 // The emphasis is on safety against injection of javascript code rather
252 // than perfect validation, as such it is possible for non-valid literals to
253 // pass through.
254 //
255 // You would use this modifier for javascript variables that are not
256 // enclosed in quotes such as:
257 //    <script>var a = {{VALUE}};</script> OR
258 //    <a href="url" onclick="doSubmit({{ID}})">
259 // For variables that are quoted (i.e. string literals) use javascript_escape.
260 //
261 // Limitations:
262 // . NaN, +/-Infinity and null are not recognized.
263 // . Output is not guaranteed to be a valid literal,
264 //   e.g: +55+-e34 will output as is.
265 //   e.g: trueeee will output nothing as it is not a valid boolean.
266 //
267 // Details:
268 // . For Hex numbers, it checks for case-insensitive 0x[0-9A-F]+
269 //   that should be a proper check.
270 // . For other numbers, it checks for case-insensitive [0-9eE+-.]*
271 //   so can also accept invalid numbers such as the number 5..45--10.
272 // . "true" and "false" (without quotes) are also accepted and that's it.
273 //
274 class CTEMPLATE_DLL_DECL JavascriptNumber : public TemplateModifier {
275   MODIFY_SIGNATURE_;
276 };
277 extern CTEMPLATE_DLL_DECL JavascriptNumber javascript_number;
278 
279 // Escapes characters not in [0-9a-zA-Z.,_:*/~!()-] as %-prefixed hex.
280 // Space is encoded as a +.
281 class CTEMPLATE_DLL_DECL UrlQueryEscape : public TemplateModifier {
282   MODIFY_SIGNATURE_;
283 };
284 extern CTEMPLATE_DLL_DECL UrlQueryEscape url_query_escape;
285 
286 // Escapes " \ / <FF> <CR> <LF> <BS> <TAB> to \" \\ \/ \f \r \n \b \t
287 // Also escapes < > & to their corresponding \uXXXX representation
288 // (\u003C, \u003E, \u0026 respectively).
289 class CTEMPLATE_DLL_DECL JsonEscape : public TemplateModifier {
290   MODIFY_SIGNATURE_;
291 };
292 extern CTEMPLATE_DLL_DECL JsonEscape json_escape;
293 
294 // Inserts the given prefix (given as the argument to this modifier)
295 // after every newline in the text.  Note that it does *not* insert
296 // prefix at the very beginning of the text -- in its expected use,
297 // that prefix will already be present before this text, in the
298 // template.  This is meant to be used internally, and is not exported
299 // via the g_modifiers list.
300 class CTEMPLATE_DLL_DECL PrefixLine : public TemplateModifier {
301   MODIFY_SIGNATURE_;
302 };
303 extern CTEMPLATE_DLL_DECL PrefixLine prefix_line;
304 
305 
306 #undef MODIFY_SIGNATURE_
307 
308 
309 // Registers a new template modifier.
310 // long_name must start with "x-".
311 // If the modifier takes a value (eg "{{VAR:x-name=value}}"), then
312 // long_name should end with "=".  This is similar to getopt(3) syntax.
313 // We also allow value-specializations, with specific values specified
314 // as part of long-name.  For instance:
315 //    AddModifier("x-mod=", &my_modifierA);
316 //    AddModifier("x-mod=bar", &my_modifierB);
317 //    AddModifier("x-mod2", &my_modifierC);
318 // For the template
319 //    {{VAR1:x-mod=foo}} {{VAR2:x-mod=bar}} {{VAR3:x-mod=baz}} {{VAR4:x-mod2}}
320 // VAR1 and VAR3 would get modified by my_modifierA, VAR2 by my_modifierB,
321 // and VAR4 by my_modifierC.  The order of the AddModifier calls is not
322 // significant.
323 extern CTEMPLATE_DLL_DECL
324 bool AddModifier(const char* long_name, const TemplateModifier* modifier);
325 
326 // Same as AddModifier() above except that the modifier is considered
327 // to produce safe output that can be inserted in any context without
328 // the need for additional escaping. This difference only impacts
329 // the Auto-Escape mode: In that mode, when a variable (or template-include)
330 // has a modifier added via AddXssSafeModifier(), it is excluded from
331 // further escaping, effectively treated as though it had the :none modifier.
332 // Because Auto-Escape is disabled for any variable and template-include
333 // that includes such a modifier, use this function with care and ensure
334 // that it may not emit harmful output that could lead to XSS.
335 //
336 // Some valid uses of AddXssSafeModifier:
337 // . A modifier that converts a string to an integer since
338 //   an integer is generally safe in any context.
339 // . A modifier that returns one of a fixed number of safe values
340 //   depending on properties of the input.
341 //
342 // Some not recommended uses of AddXssSafeModifier:
343 // . A modifier that applies some extra formatting to the input
344 //   before returning it since the output will still contain
345 //   harmful content if the input does.
346 // . A modifier that applies one type of escaping to the input
347 //   (say HTML-escape). This may be dangerous when the modifier
348 //   is used in a different context (say Javascript) where this
349 //   escaping may be inadequate.
350 extern CTEMPLATE_DLL_DECL
351 bool AddXssSafeModifier(const char* long_name, const TemplateModifier* modifier);
352 
353 }
354 
355 #endif  // TEMPLATE_TEMPLATE_MODIFIERS_H_
356