1 /*
2 * Copyright (C) 2011 Adam Barth. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "XSSFilter.h"
28
29 #include "DOMWindow.h"
30 #include "Document.h"
31 #include "DocumentLoader.h"
32 #include "Frame.h"
33 #include "HTMLDocumentParser.h"
34 #include "HTMLNames.h"
35 #include "HTMLParamElement.h"
36 #include "HTMLParserIdioms.h"
37 #include "SecurityOrigin.h"
38 #include "Settings.h"
39 #include "TextEncoding.h"
40 #include "TextResourceDecoder.h"
41 #include <wtf/text/CString.h>
42
43 namespace WebCore {
44
45 using namespace HTMLNames;
46
47 namespace {
48
isNonCanonicalCharacter(UChar c)49 bool isNonCanonicalCharacter(UChar c)
50 {
51 // We remove all non-ASCII characters, including non-printable ASCII characters.
52 //
53 // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
54 // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
55 // adverse effect that we remove any legitimate zeros from a string.
56 //
57 // For instance: new String("http://localhost:8000") => new String("http://localhost:8").
58 return (c == '\\' || c == '0' || c == '\0' || c >= 127);
59 }
60
canonicalize(const String & string)61 String canonicalize(const String& string)
62 {
63 return string.removeCharacters(&isNonCanonicalCharacter);
64 }
65
isRequiredForInjection(UChar c)66 bool isRequiredForInjection(UChar c)
67 {
68 return (c == '\'' || c == '"' || c == '<' || c == '>');
69 }
70
hasName(const HTMLToken & token,const QualifiedName & name)71 bool hasName(const HTMLToken& token, const QualifiedName& name)
72 {
73 return equalIgnoringNullity(token.name(), static_cast<const String&>(name.localName()));
74 }
75
findAttributeWithName(const HTMLToken & token,const QualifiedName & name,size_t & indexOfMatchingAttribute)76 bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute)
77 {
78 for (size_t i = 0; i < token.attributes().size(); ++i) {
79 if (equalIgnoringNullity(token.attributes().at(i).m_name, name.localName())) {
80 indexOfMatchingAttribute = i;
81 return true;
82 }
83 }
84 return false;
85 }
86
isNameOfInlineEventHandler(const Vector<UChar,32> & name)87 bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name)
88 {
89 const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
90 if (name.size() < lengthOfShortestInlineEventHandlerName)
91 return false;
92 return name[0] == 'o' && name[1] == 'n';
93 }
94
containsJavaScriptURL(const Vector<UChar,32> & value)95 bool containsJavaScriptURL(const Vector<UChar, 32>& value)
96 {
97 static const char javaScriptScheme[] = "javascript:";
98 static const size_t lengthOfJavaScriptScheme = sizeof(javaScriptScheme) - 1;
99
100 size_t i;
101 for (i = 0; i < value.size(); ++i) {
102 if (!isHTMLSpace(value[i]))
103 break;
104 }
105
106 if (value.size() - i < lengthOfJavaScriptScheme)
107 return false;
108
109 return equalIgnoringCase(value.data() + i, javaScriptScheme, lengthOfJavaScriptScheme);
110 }
111
decodeURL(const String & string,const TextEncoding & encoding)112 String decodeURL(const String& string, const TextEncoding& encoding)
113 {
114 String workingString = string;
115 workingString.replace('+', ' ');
116 workingString = decodeURLEscapeSequences(workingString);
117 CString workingStringUTF8 = workingString.utf8();
118 String decodedString = encoding.decode(workingStringUTF8.data(), workingStringUTF8.length());
119 // FIXME: Is this check necessary?
120 if (decodedString.isEmpty())
121 return canonicalize(workingString);
122 return canonicalize(decodedString);
123 }
124
125 }
126
XSSFilter(HTMLDocumentParser * parser)127 XSSFilter::XSSFilter(HTMLDocumentParser* parser)
128 : m_parser(parser)
129 , m_isEnabled(false)
130 , m_xssProtection(XSSProtectionEnabled)
131 , m_state(Uninitialized)
132 {
133 ASSERT(m_parser);
134 if (Frame* frame = parser->document()->frame()) {
135 if (Settings* settings = frame->settings())
136 m_isEnabled = settings->xssAuditorEnabled();
137 }
138 // Although tempting to call init() at this point, the various objects
139 // we want to reference might not all have been constructed yet.
140 }
141
init()142 void XSSFilter::init()
143 {
144 const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter.
145 const int suffixTreeDepth = 5;
146
147 ASSERT(m_state == Uninitialized);
148 m_state = Initial;
149
150 if (!m_isEnabled)
151 return;
152
153 // In theory, the Document could have detached from the Frame after the
154 // XSSFilter was constructed.
155 if (!m_parser->document()->frame()) {
156 m_isEnabled = false;
157 return;
158 }
159
160 const KURL& url = m_parser->document()->url();
161
162 if (url.protocolIsData()) {
163 m_isEnabled = false;
164 return;
165 }
166
167 TextResourceDecoder* decoder = m_parser->document()->decoder();
168 m_decodedURL = decoder ? decodeURL(url.string(), decoder->encoding()) : url.string();
169 if (m_decodedURL.find(isRequiredForInjection, 0) == notFound)
170 m_decodedURL = String();
171
172 if (DocumentLoader* documentLoader = m_parser->document()->frame()->loader()->documentLoader()) {
173 DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, ("X-XSS-Protection"));
174 m_xssProtection = parseXSSProtectionHeader(documentLoader->response().httpHeaderField(XSSProtectionHeader));
175
176 FormData* httpBody = documentLoader->originalRequest().httpBody();
177 if (httpBody && !httpBody->isEmpty()) {
178 String httpBodyAsString = httpBody->flattenToString();
179 m_decodedHTTPBody = decoder ? decodeURL(httpBodyAsString, decoder->encoding()) : httpBodyAsString;
180 if (m_decodedHTTPBody.find(isRequiredForInjection, 0) == notFound)
181 m_decodedHTTPBody = String();
182 if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree)
183 m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth));
184 }
185 }
186
187 if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty())
188 m_isEnabled = false;
189 }
190
filterToken(HTMLToken & token)191 void XSSFilter::filterToken(HTMLToken& token)
192 {
193 if (m_state == Uninitialized) {
194 init();
195 ASSERT(m_state == Initial);
196 }
197
198 if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled)
199 return;
200
201 bool didBlockScript = false;
202
203 switch (m_state) {
204 case Uninitialized:
205 ASSERT_NOT_REACHED();
206 break;
207 case Initial:
208 didBlockScript = filterTokenInitial(token);
209 break;
210 case AfterScriptStartTag:
211 didBlockScript = filterTokenAfterScriptStartTag(token);
212 ASSERT(m_state == Initial);
213 m_cachedSnippet = String();
214 break;
215 }
216
217 if (didBlockScript) {
218 // FIXME: Consider using a more helpful console message.
219 DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n"));
220 // FIXME: We should add the real line number to the console.
221 m_parser->document()->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
222
223 if (m_xssProtection == XSSProtectionBlockEnabled) {
224 m_parser->document()->frame()->loader()->stopAllLoaders();
225 m_parser->document()->frame()->navigationScheduler()->scheduleLocationChange(m_parser->document()->securityOrigin(), blankURL(), String());
226 }
227 }
228 }
229
filterTokenInitial(HTMLToken & token)230 bool XSSFilter::filterTokenInitial(HTMLToken& token)
231 {
232 ASSERT(m_state == Initial);
233
234 if (token.type() != HTMLToken::StartTag)
235 return false;
236
237 bool didBlockScript = eraseDangerousAttributesIfInjected(token);
238
239 if (hasName(token, scriptTag))
240 didBlockScript |= filterScriptToken(token);
241 else if (hasName(token, objectTag))
242 didBlockScript |= filterObjectToken(token);
243 else if (hasName(token, paramTag))
244 didBlockScript |= filterParamToken(token);
245 else if (hasName(token, embedTag))
246 didBlockScript |= filterEmbedToken(token);
247 else if (hasName(token, appletTag))
248 didBlockScript |= filterAppletToken(token);
249 else if (hasName(token, iframeTag))
250 didBlockScript |= filterIframeToken(token);
251 else if (hasName(token, metaTag))
252 didBlockScript |= filterMetaToken(token);
253 else if (hasName(token, baseTag))
254 didBlockScript |= filterBaseToken(token);
255 else if (hasName(token, formTag))
256 didBlockScript |= filterFormToken(token);
257
258 return didBlockScript;
259 }
260
filterTokenAfterScriptStartTag(HTMLToken & token)261 bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token)
262 {
263 ASSERT(m_state == AfterScriptStartTag);
264 m_state = Initial;
265
266 if (token.type() != HTMLToken::Character) {
267 ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile);
268 return false;
269 }
270
271 int start = 0;
272 // FIXME: We probably want to grab only the first few characters of the
273 // contents of the script element.
274 int end = token.endIndex() - token.startIndex();
275 if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) {
276 token.eraseCharacters();
277 token.appendToCharacter(' '); // Technically, character tokens can't be empty.
278 return true;
279 }
280 return false;
281 }
282
filterScriptToken(HTMLToken & token)283 bool XSSFilter::filterScriptToken(HTMLToken& token)
284 {
285 ASSERT(m_state == Initial);
286 ASSERT(token.type() == HTMLToken::StartTag);
287 ASSERT(hasName(token, scriptTag));
288
289 if (eraseAttributeIfInjected(token, srcAttr, blankURL().string()))
290 return true;
291
292 m_state = AfterScriptStartTag;
293 m_cachedSnippet = m_parser->sourceForToken(token);
294 return false;
295 }
296
filterObjectToken(HTMLToken & token)297 bool XSSFilter::filterObjectToken(HTMLToken& token)
298 {
299 ASSERT(m_state == Initial);
300 ASSERT(token.type() == HTMLToken::StartTag);
301 ASSERT(hasName(token, objectTag));
302
303 bool didBlockScript = false;
304
305 didBlockScript |= eraseAttributeIfInjected(token, dataAttr, blankURL().string());
306 didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
307 didBlockScript |= eraseAttributeIfInjected(token, classidAttr);
308
309 return didBlockScript;
310 }
311
filterParamToken(HTMLToken & token)312 bool XSSFilter::filterParamToken(HTMLToken& token)
313 {
314 ASSERT(m_state == Initial);
315 ASSERT(token.type() == HTMLToken::StartTag);
316 ASSERT(hasName(token, paramTag));
317
318 size_t indexOfNameAttribute;
319 if (!findAttributeWithName(token, nameAttr, indexOfNameAttribute))
320 return false;
321
322 const HTMLToken::Attribute& nameAttribute = token.attributes().at(indexOfNameAttribute);
323 String name = String(nameAttribute.m_value.data(), nameAttribute.m_value.size());
324
325 if (!HTMLParamElement::isURLParameter(name))
326 return false;
327
328 return eraseAttributeIfInjected(token, valueAttr, blankURL().string());
329 }
330
filterEmbedToken(HTMLToken & token)331 bool XSSFilter::filterEmbedToken(HTMLToken& token)
332 {
333 ASSERT(m_state == Initial);
334 ASSERT(token.type() == HTMLToken::StartTag);
335 ASSERT(hasName(token, embedTag));
336
337 bool didBlockScript = false;
338
339 didBlockScript |= eraseAttributeIfInjected(token, srcAttr, blankURL().string());
340 didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
341
342 return didBlockScript;
343 }
344
filterAppletToken(HTMLToken & token)345 bool XSSFilter::filterAppletToken(HTMLToken& token)
346 {
347 ASSERT(m_state == Initial);
348 ASSERT(token.type() == HTMLToken::StartTag);
349 ASSERT(hasName(token, appletTag));
350
351 bool didBlockScript = false;
352
353 didBlockScript |= eraseAttributeIfInjected(token, codeAttr);
354 didBlockScript |= eraseAttributeIfInjected(token, objectAttr);
355
356 return didBlockScript;
357 }
358
filterIframeToken(HTMLToken & token)359 bool XSSFilter::filterIframeToken(HTMLToken& token)
360 {
361 ASSERT(m_state == Initial);
362 ASSERT(token.type() == HTMLToken::StartTag);
363 ASSERT(hasName(token, iframeTag));
364
365 return eraseAttributeIfInjected(token, srcAttr);
366 }
367
filterMetaToken(HTMLToken & token)368 bool XSSFilter::filterMetaToken(HTMLToken& token)
369 {
370 ASSERT(m_state == Initial);
371 ASSERT(token.type() == HTMLToken::StartTag);
372 ASSERT(hasName(token, metaTag));
373
374 return eraseAttributeIfInjected(token, http_equivAttr);
375 }
376
filterBaseToken(HTMLToken & token)377 bool XSSFilter::filterBaseToken(HTMLToken& token)
378 {
379 ASSERT(m_state == Initial);
380 ASSERT(token.type() == HTMLToken::StartTag);
381 ASSERT(hasName(token, baseTag));
382
383 return eraseAttributeIfInjected(token, hrefAttr);
384 }
385
filterFormToken(HTMLToken & token)386 bool XSSFilter::filterFormToken(HTMLToken& token)
387 {
388 ASSERT(m_state == Initial);
389 ASSERT(token.type() == HTMLToken::StartTag);
390 ASSERT(hasName(token, formTag));
391
392 return eraseAttributeIfInjected(token, actionAttr);
393 }
394
eraseDangerousAttributesIfInjected(HTMLToken & token)395 bool XSSFilter::eraseDangerousAttributesIfInjected(HTMLToken& token)
396 {
397 DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)"));
398
399 bool didBlockScript = false;
400 for (size_t i = 0; i < token.attributes().size(); ++i) {
401 const HTMLToken::Attribute& attribute = token.attributes().at(i);
402 bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name);
403 bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value);
404 if (!isInlineEventHandler && !valueContainsJavaScriptURL)
405 continue;
406 if (!isContainedInRequest(snippetForAttribute(token, attribute)))
407 continue;
408 token.eraseValueOfAttribute(i);
409 if (valueContainsJavaScriptURL)
410 token.appendToAttributeValue(i, safeJavaScriptURL);
411 didBlockScript = true;
412 }
413 return didBlockScript;
414 }
415
eraseAttributeIfInjected(HTMLToken & token,const QualifiedName & attributeName,const String & replacementValue)416 bool XSSFilter::eraseAttributeIfInjected(HTMLToken& token, const QualifiedName& attributeName, const String& replacementValue)
417 {
418 size_t indexOfAttribute;
419 if (findAttributeWithName(token, attributeName, indexOfAttribute)) {
420 const HTMLToken::Attribute& attribute = token.attributes().at(indexOfAttribute);
421 if (isContainedInRequest(snippetForAttribute(token, attribute))) {
422 if (attributeName == srcAttr && isSameOriginResource(String(attribute.m_value.data(), attribute.m_value.size())))
423 return false;
424 token.eraseValueOfAttribute(indexOfAttribute);
425 if (!replacementValue.isEmpty())
426 token.appendToAttributeValue(indexOfAttribute, replacementValue);
427 return true;
428 }
429 }
430 return false;
431 }
432
snippetForRange(const HTMLToken & token,int start,int end)433 String XSSFilter::snippetForRange(const HTMLToken& token, int start, int end)
434 {
435 // FIXME: There's an extra allocation here that we could save by
436 // passing the range to the parser.
437 return m_parser->sourceForToken(token).substring(start, end - start);
438 }
439
snippetForAttribute(const HTMLToken & token,const HTMLToken::Attribute & attribute)440 String XSSFilter::snippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute)
441 {
442 // FIXME: We should grab one character before the name also.
443 int start = attribute.m_nameRange.m_start - token.startIndex();
444 // FIXME: We probably want to grab only the first few characters of the attribute value.
445 int end = attribute.m_valueRange.m_end - token.startIndex();
446 return snippetForRange(token, start, end);
447 }
448
isContainedInRequest(const String & snippet)449 bool XSSFilter::isContainedInRequest(const String& snippet)
450 {
451 ASSERT(!snippet.isEmpty());
452 String canonicalizedSnippet = canonicalize(snippet);
453 ASSERT(!canonicalizedSnippet.isEmpty());
454 if (m_decodedURL.find(canonicalizedSnippet, 0, false) != notFound)
455 return true;
456 if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(canonicalizedSnippet))
457 return false;
458 return m_decodedHTTPBody.find(canonicalizedSnippet, 0, false) != notFound;
459 }
460
isSameOriginResource(const String & url)461 bool XSSFilter::isSameOriginResource(const String& url)
462 {
463 // If the resource is loaded from the same URL as the enclosing page, it's
464 // probably not an XSS attack, so we reduce false positives by allowing the
465 // request. If the resource has a query string, we're more suspicious,
466 // however, because that's pretty rare and the attacker might be able to
467 // trick a server-side script into doing something dangerous with the query
468 // string.
469 KURL resourceURL(m_parser->document()->url(), url);
470 return (m_parser->document()->url().host() == resourceURL.host() && resourceURL.query().isEmpty());
471 }
472
473 }
474