1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "services/network/cross_origin_read_blocking.h"
6
7 #include <stddef.h>
8
9 #include <algorithm>
10 #include <set>
11 #include <string>
12 #include <unordered_set>
13 #include <vector>
14
15 #include "base/command_line.h"
16 #include "base/containers/flat_set.h"
17 #include "base/feature_list.h"
18 #include "base/lazy_instance.h"
19 #include "base/logging.h"
20 #include "base/metrics/histogram_macros.h"
21 #include "base/no_destructor.h"
22 #include "base/stl_util.h"
23 #include "base/strings/string_piece.h"
24 #include "base/strings/string_util.h"
25 #include "net/base/mime_sniffer.h"
26 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
27 #include "net/http/http_response_headers.h"
28 #include "services/network/public/cpp/cross_origin_embedder_policy.h"
29 #include "services/network/public/cpp/cross_origin_resource_policy.h"
30 #include "services/network/public/cpp/features.h"
31 #include "services/network/public/cpp/initiator_lock_compatibility.h"
32 #include "services/network/public/mojom/network_context.mojom.h"
33 #include "services/network/public/mojom/network_service.mojom.h"
34 #include "services/network/public/mojom/url_response_head.mojom.h"
35
36 using base::StringPiece;
37 using MimeType = network::CrossOriginReadBlocking::MimeType;
38 using SniffingResult = network::CrossOriginReadBlocking::SniffingResult;
39
40 namespace network {
41
42 namespace {
43
44 // MIME types
45 const char kTextHtml[] = "text/html";
46 const char kTextXml[] = "text/xml";
47 const char kAppXml[] = "application/xml";
48 const char kAppJson[] = "application/json";
49 const char kImageSvg[] = "image/svg+xml";
50 const char kDashVideo[] = "application/dash+xml"; // https://crbug.com/947498
51 const char kTextJson[] = "text/json";
52 const char kTextPlain[] = "text/plain";
53
54 // Javascript MIME type suffixes for use in CORB protection logging. See also
55 // https://mimesniff.spec.whatwg.org/#javascript-mime-type.
56 const char* kJavaScriptSuffixes[] = {"ecmascript",
57 "javascript",
58 "x-ecmascript",
59 "x-javascript",
60 "javascript1.0",
61 "javascript1.1",
62 "javascript1.2",
63 "javascript1.3",
64 "javascript1.4",
65 "javascript1.5",
66 "jscript",
67 "livescript",
68 "js",
69 "x-js"};
70
71 // TODO(lukasza): Remove kJsonProtobuf once this MIME type is not used in
72 // practice. See also https://crbug.com/826756#c3
73 const char kJsonProtobuf[] = "application/json+protobuf";
74
75 // MIME type suffixes
76 const char kJsonSuffix[] = "+json";
77 const char kXmlSuffix[] = "+xml";
78
AdvancePastWhitespace(StringPiece * data)79 void AdvancePastWhitespace(StringPiece* data) {
80 size_t offset = data->find_first_not_of(" \t\r\n");
81 if (offset == base::StringPiece::npos) {
82 // |data| was entirely whitespace.
83 *data = StringPiece();
84 } else {
85 data->remove_prefix(offset);
86 }
87 }
88
89 // Returns kYes if |data| starts with one of the string patterns in
90 // |signatures|, kMaybe if |data| is a prefix of one of the patterns in
91 // |signatures|, and kNo otherwise.
92 //
93 // When kYes is returned, the matching prefix is erased from |data|.
MatchesSignature(StringPiece * data,const StringPiece signatures[],size_t arr_size,base::CompareCase compare_case)94 SniffingResult MatchesSignature(StringPiece* data,
95 const StringPiece signatures[],
96 size_t arr_size,
97 base::CompareCase compare_case) {
98 for (size_t i = 0; i < arr_size; ++i) {
99 if (signatures[i].length() <= data->length()) {
100 if (base::StartsWith(*data, signatures[i], compare_case)) {
101 // When |signatures[i]| is a prefix of |data|, it constitutes a match.
102 // Strip the matching characters, and return.
103 data->remove_prefix(signatures[i].length());
104 return CrossOriginReadBlocking::kYes;
105 }
106 } else {
107 if (base::StartsWith(signatures[i], *data, compare_case)) {
108 // When |data| is a prefix of |signatures[i]|, that means that
109 // subsequent bytes in the stream could cause a match to occur.
110 return CrossOriginReadBlocking::kMaybe;
111 }
112 }
113 }
114 return CrossOriginReadBlocking::kNo;
115 }
116
FindFirstJavascriptLineTerminator(const base::StringPiece & hay,size_t pos)117 size_t FindFirstJavascriptLineTerminator(const base::StringPiece& hay,
118 size_t pos) {
119 // https://www.ecma-international.org/ecma-262/8.0/index.html#prod-LineTerminator
120 // defines LineTerminator ::= <LF> | <CR> | <LS> | <PS>.
121 //
122 // https://www.ecma-international.org/ecma-262/8.0/index.html#sec-line-terminators
123 // defines <LF>, <CR>, <LS> ::= "\u2028", <PS> ::= "\u2029".
124 //
125 // In UTF8 encoding <LS> is 0xE2 0x80 0xA8 and <PS> is 0xE2 0x80 0xA9.
126 while (true) {
127 pos = hay.find_first_of("\n\r\xe2", pos);
128 if (pos == base::StringPiece::npos)
129 break;
130
131 if (hay[pos] != '\xe2') {
132 DCHECK(hay[pos] == '\r' || hay[pos] == '\n');
133 break;
134 }
135
136 // TODO(lukasza): Prevent matching 3 bytes that span/straddle 2 UTF8
137 // characters.
138 base::StringPiece substr = hay.substr(pos);
139 if (substr.starts_with("\u2028") || substr.starts_with("\u2029"))
140 break;
141
142 pos++; // Skip the \xe2 character.
143 }
144 return pos;
145 }
146
147 // Checks if |data| starts with an HTML comment (i.e. with "<!-- ... -->").
148 // - If there is a valid, terminated comment then returns kYes.
149 // - If there is a start of a comment, but the comment is not completed (e.g.
150 // |data| == "<!-" or |data| == "<!-- not terminated yet") then returns
151 // kMaybe.
152 // - Returns kNo otherwise.
153 //
154 // Mutates |data| to advance past the comment when returning kYes. Note that
155 // SingleLineHTMLCloseComment ECMAscript rule is taken into account which means
156 // that characters following an HTML comment are consumed up to the nearest line
157 // terminating character.
MaybeSkipHtmlComment(StringPiece * data)158 SniffingResult MaybeSkipHtmlComment(StringPiece* data) {
159 constexpr StringPiece kStartString = "<!--";
160 if (!data->starts_with(kStartString)) {
161 if (kStartString.starts_with(*data))
162 return CrossOriginReadBlocking::kMaybe;
163 return CrossOriginReadBlocking::kNo;
164 }
165
166 constexpr StringPiece kEndString = "-->";
167 size_t end_of_html_comment = data->find(kEndString, kStartString.length());
168 if (end_of_html_comment == StringPiece::npos)
169 return CrossOriginReadBlocking::kMaybe;
170 end_of_html_comment += kEndString.length();
171
172 // Skipping until the first line terminating character. See
173 // https://crbug.com/839945 for the motivation behind this.
174 size_t end_of_line =
175 FindFirstJavascriptLineTerminator(*data, end_of_html_comment);
176 if (end_of_line == base::StringPiece::npos)
177 return CrossOriginReadBlocking::kMaybe;
178
179 // Found real end of the combined HTML/JS comment.
180 data->remove_prefix(end_of_line);
181 return CrossOriginReadBlocking::kYes;
182 }
183
184 // Removes headers that should be blocked in cross-origin case.
185 //
186 // Note that corbSanitizedResponse in https://fetch.spec.whatwg.org/#main-fetch
187 // has an empty list of headers, but the code below doesn't remove all the
188 // headers for improved user experience - for better error messages for CORS.
189 // See also https://github.com/whatwg/fetch/pull/686#issuecomment-383711732 and
190 // the http/tests/xmlhttprequest/origin-exact-matching/07.html layout test.
191 //
192 // Note that CORB doesn't block responses allowed through CORS - this means
193 // that the list of allowed headers below doesn't have to consider header
194 // names listed in the Access-Control-Expose-Headers header.
BlockResponseHeaders(const scoped_refptr<net::HttpResponseHeaders> & headers)195 void BlockResponseHeaders(
196 const scoped_refptr<net::HttpResponseHeaders>& headers) {
197 DCHECK(headers);
198 std::unordered_set<std::string> names_of_headers_to_remove;
199
200 size_t it = 0;
201 std::string name;
202 std::string value;
203 while (headers->EnumerateHeaderLines(&it, &name, &value)) {
204 // Don't remove CORS headers - doing so would lead to incorrect error
205 // messages for CORS-blocked responses (e.g. Blink would say "[...] No
206 // 'Access-Control-Allow-Origin' header is present [...]" instead of saying
207 // something like "[...] Access-Control-Allow-Origin' header has a value
208 // 'http://www2.localhost:8000' that is not equal to the supplied origin
209 // [...]").
210 if (base::StartsWith(name, "Access-Control-",
211 base::CompareCase::INSENSITIVE_ASCII)) {
212 continue;
213 }
214
215 // Remove all other headers.
216 names_of_headers_to_remove.insert(base::ToLowerASCII(name));
217 }
218
219 headers->RemoveHeaders(names_of_headers_to_remove);
220 }
221
GetPluginProxyingProcesses()222 std::set<int>& GetPluginProxyingProcesses() {
223 static base::NoDestructor<std::set<int>> set;
224 return *set;
225 }
226
227 // The function below returns a set of MIME types below may be blocked by CORB
228 // without any confirmation sniffing (in contrast to HTML/JSON/XML which require
229 // confirmation sniffing because images, scripts, etc. are frequently
230 // mislabelled by http servers as HTML/JSON/XML).
231 //
232 // CORB cannot block images, scripts, stylesheets and other resources that the
233 // web standards allows to be fetched in `no-cors` mode. CORB cannot block
234 // these resources even if they are not explicitly labeled with their type - in
235 // practice http servers may serve images as application/octet-stream or even as
236 // text/html. OTOH, CORB *can* block all Content-Types that are very unlikely
237 // to represent images, scripts, stylesheets, etc. - such Content-Types are
238 // returned by GetNeverSniffedMimeTypes.
239 //
240 // Some of the Content-Types returned below might seem like a layering violation
241 // (e.g. why would //services/network care about application/zip or
242 // application/pdf or application/msword), but note that the decision to list a
243 // Content-Type below is not driven by whether the type is handled above or
244 // below //services/network layer. Instead the decision to list a Content-Type
245 // below is driven by whether the Content-Type is unlikely to be attached to an
246 // image, script, stylesheet or other subresource type that web standards
247 // require to be fetched in `no-cors` mode. In particular, CORB would still
248 // want to prevent cross-site disclosure of "application/msword" even if Chrome
249 // did not support this type (AFAIK today this support is only present on
250 // ChromeOS) in one of Chrome's many layers. Similarly, CORB wants to prevent
251 // disclosure of "application/zip" even though Chrome doesn't have built-in
252 // support for this resource type. And CORB also wants to protect
253 // "application/pdf" even though Chrome happens to support this resource type.
GetNeverSniffedMimeTypes()254 base::flat_set<std::string>& GetNeverSniffedMimeTypes() {
255 static base::NoDestructor<base::flat_set<std::string>> s_types{{
256 // The types below (zip, protobuf, etc.) are based on most commonly used
257 // content types according to HTTP Archive - see:
258 // https://github.com/whatwg/fetch/issues/860#issuecomment-457330454
259 "application/gzip",
260 "application/x-gzip",
261 "application/x-protobuf",
262 "application/zip",
263 "text/event-stream",
264 // The types listed below were initially taken from the list of types
265 // handled by MimeHandlerView (although we would want to protect them even
266 // if Chrome didn't support rendering these content types and/or if there
267 // was no such thing as MimeHandlerView).
268 "application/msexcel",
269 "application/mspowerpoint",
270 "application/msword",
271 "application/msword-template",
272 "application/pdf",
273 "application/vnd.ces-quickpoint",
274 "application/vnd.ces-quicksheet",
275 "application/vnd.ces-quickword",
276 "application/vnd.ms-excel",
277 "application/vnd.ms-excel.sheet.macroenabled.12",
278 "application/vnd.ms-powerpoint",
279 "application/vnd.ms-powerpoint.presentation.macroenabled.12",
280 "application/vnd.ms-word",
281 "application/vnd.ms-word.document.12",
282 "application/vnd.ms-word.document.macroenabled.12",
283 "application/vnd.msword",
284 "application/"
285 "vnd.openxmlformats-officedocument.presentationml.presentation",
286 "application/"
287 "vnd.openxmlformats-officedocument.presentationml.template",
288 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
289 "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
290 "application/"
291 "vnd.openxmlformats-officedocument.wordprocessingml.document",
292 "application/"
293 "vnd.openxmlformats-officedocument.wordprocessingml.template",
294 "application/vnd.presentation-openxml",
295 "application/vnd.presentation-openxmlm",
296 "application/vnd.spreadsheet-openxml",
297 "application/vnd.wordprocessing-openxml",
298 "text/csv",
299 // Block signed documents to protect (potentially sensitive) unencrypted
300 // body of the signed document. There should be no need to block
301 // encrypted documents (e.g. `multipart/encrypted` nor
302 // `application/pgp-encrypted`) and no need to block the signatures (e.g.
303 // `application/pgp-signature`).
304 "multipart/signed",
305 // Block multipart responses because a protected type (e.g. JSON) can
306 // become multipart if returned in a range request with multiple parts.
307 // This is compatible with the web because the renderer can only see into
308 // the result of a fetch for a multipart file when the request is made
309 // with CORS. Media tags only make single-range requests which will not
310 // have the multipart type.
311 "multipart/byteranges",
312 // TODO(lukasza): https://crbug.com/802836#c11: Add
313 // application/signed-exchange.
314 }};
315
316 // All items need to be lower-case, to support case-insensitive comparisons
317 // later.
318 DCHECK(std::all_of(
319 s_types->begin(), s_types->end(),
320 [](const std::string& s) { return s == base::ToLowerASCII(s); }));
321
322 return *s_types;
323 }
324
325 } // namespace
326
GetCanonicalMimeType(base::StringPiece mime_type)327 MimeType CrossOriginReadBlocking::GetCanonicalMimeType(
328 base::StringPiece mime_type) {
329 // Checking for image/svg+xml and application/dash+xml early ensures that they
330 // won't get classified as MimeType::kXml by the presence of the "+xml"
331 // suffix.
332 if (base::LowerCaseEqualsASCII(mime_type, kImageSvg) ||
333 base::LowerCaseEqualsASCII(mime_type, kDashVideo))
334 return MimeType::kOthers;
335
336 // See also https://mimesniff.spec.whatwg.org/#html-mime-type
337 if (base::LowerCaseEqualsASCII(mime_type, kTextHtml))
338 return MimeType::kHtml;
339
340 // See also https://mimesniff.spec.whatwg.org/#json-mime-type
341 constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
342 if (base::LowerCaseEqualsASCII(mime_type, kAppJson) ||
343 base::LowerCaseEqualsASCII(mime_type, kTextJson) ||
344 base::LowerCaseEqualsASCII(mime_type, kJsonProtobuf) ||
345 base::EndsWith(mime_type, kJsonSuffix, kCaseInsensitive)) {
346 return MimeType::kJson;
347 }
348
349 // See also https://mimesniff.spec.whatwg.org/#xml-mime-type
350 if (base::LowerCaseEqualsASCII(mime_type, kAppXml) ||
351 base::LowerCaseEqualsASCII(mime_type, kTextXml) ||
352 base::EndsWith(mime_type, kXmlSuffix, kCaseInsensitive)) {
353 return MimeType::kXml;
354 }
355
356 if (base::LowerCaseEqualsASCII(mime_type, kTextPlain))
357 return MimeType::kPlain;
358
359 if (base::Contains(GetNeverSniffedMimeTypes(),
360 base::ToLowerASCII(mime_type))) {
361 return MimeType::kNeverSniffed;
362 }
363
364 return MimeType::kOthers;
365 }
366
IsBlockableScheme(const GURL & url)367 bool CrossOriginReadBlocking::IsBlockableScheme(const GURL& url) {
368 // We exclude ftp:// from here. FTP doesn't provide a Content-Type
369 // header which our policy depends on, so we cannot protect any
370 // response from FTP servers.
371 return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme);
372 }
373
IsValidCorsHeaderSet(const url::Origin & frame_origin,const std::string & access_control_origin)374 bool CrossOriginReadBlocking::IsValidCorsHeaderSet(
375 const url::Origin& frame_origin,
376 const std::string& access_control_origin) {
377 // Many websites are sending back "\"*\"" instead of "*". This is
378 // non-standard practice, and not supported by Chrome. Refer to
379 // CrossOriginAccessControl::passesAccessControlCheck().
380
381 // Note that "null" offers no more protection than "*" because it matches any
382 // unique origin, such as data URLs. Any origin can thus access it, so don't
383 // bother trying to block this case.
384
385 // TODO(dsjang): * is not allowed for the response from a request
386 // with cookies. This allows for more than what the renderer will
387 // eventually be able to receive, so we won't see illegal cross-site
388 // documents allowed by this. We have to find a way to see if this
389 // response is from a cookie-tagged request or not in the future.
390 if (access_control_origin == "*" || access_control_origin == "null")
391 return true;
392
393 return frame_origin.IsSameOriginWith(
394 url::Origin::Create(GURL(access_control_origin)));
395 }
396
397 // This function is a slight modification of |net::SniffForHTML|.
SniffForHTML(StringPiece data)398 SniffingResult CrossOriginReadBlocking::SniffForHTML(StringPiece data) {
399 // The content sniffers used by Chrome and Firefox are using "<!--" as one of
400 // the HTML signatures, but it also appears in valid JavaScript, considered as
401 // well-formed JS by the browser. Since we do not want to block any JS, we
402 // exclude it from our HTML signatures. This can weaken our CORB policy,
403 // but we can break less websites.
404 //
405 // Note that <body> and <br> are not included below, since <b is a prefix of
406 // them.
407 //
408 // TODO(dsjang): parameterize |net::SniffForHTML| with an option that decides
409 // whether to include <!-- or not, so that we can remove this function.
410 // TODO(dsjang): Once CrossOriginReadBlocking is moved into the browser
411 // process, we should do single-thread checking here for the static
412 // initializer.
413 static constexpr StringPiece kHtmlSignatures[] = {
414 StringPiece("<!doctype html"), // HTML5 spec
415 StringPiece("<script"), // HTML5 spec, Mozilla
416 StringPiece("<html"), // HTML5 spec, Mozilla
417 StringPiece("<head"), // HTML5 spec, Mozilla
418 StringPiece("<iframe"), // Mozilla
419 StringPiece("<h1"), // Mozilla
420 StringPiece("<div"), // Mozilla
421 StringPiece("<font"), // Mozilla
422 StringPiece("<table"), // Mozilla
423 StringPiece("<a"), // Mozilla
424 StringPiece("<style"), // Mozilla
425 StringPiece("<title"), // Mozilla
426 StringPiece("<b"), // Mozilla (note: subsumes <body>, <br>)
427 StringPiece("<p") // Mozilla
428 };
429
430 while (data.length() > 0) {
431 AdvancePastWhitespace(&data);
432
433 SniffingResult signature_match =
434 MatchesSignature(&data, kHtmlSignatures, base::size(kHtmlSignatures),
435 base::CompareCase::INSENSITIVE_ASCII);
436 if (signature_match != kNo)
437 return signature_match;
438
439 SniffingResult comment_match = MaybeSkipHtmlComment(&data);
440 if (comment_match != kYes)
441 return comment_match;
442 }
443
444 // All of |data| was consumed, without a clear determination.
445 return kMaybe;
446 }
447
SniffForXML(base::StringPiece data)448 SniffingResult CrossOriginReadBlocking::SniffForXML(base::StringPiece data) {
449 // TODO(dsjang): Once CrossOriginReadBlocking is moved into the browser
450 // process, we should do single-thread checking here for the static
451 // initializer.
452 AdvancePastWhitespace(&data);
453 static constexpr StringPiece kXmlSignatures[] = {StringPiece("<?xml")};
454 return MatchesSignature(&data, kXmlSignatures, base::size(kXmlSignatures),
455 base::CompareCase::SENSITIVE);
456 }
457
SniffForJSON(base::StringPiece data)458 SniffingResult CrossOriginReadBlocking::SniffForJSON(base::StringPiece data) {
459 // Currently this function looks for an opening brace ('{'), followed by a
460 // double-quoted string literal, followed by a colon. Importantly, such a
461 // sequence is a Javascript syntax error: although the JSON object syntax is
462 // exactly Javascript's object-initializer syntax, a Javascript object-
463 // initializer expression is not valid as a standalone Javascript statement.
464 //
465 // TODO(nick): We have to come up with a better way to sniff JSON. The
466 // following are known limitations of this function:
467 // https://crbug.com/795470/ Support non-dictionary values (e.g. lists)
468 enum {
469 kStartState,
470 kLeftBraceState,
471 kLeftQuoteState,
472 kEscapeState,
473 kRightQuoteState,
474 } state = kStartState;
475
476 for (size_t i = 0; i < data.length(); ++i) {
477 const char c = data[i];
478 if (state != kLeftQuoteState && state != kEscapeState) {
479 // Whitespace is ignored (outside of string literals)
480 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
481 continue;
482 } else {
483 // Inside string literals, control characters should result in rejection.
484 if ((c >= 0 && c < 32) || c == 127)
485 return kNo;
486 }
487
488 switch (state) {
489 case kStartState:
490 if (c == '{')
491 state = kLeftBraceState;
492 else
493 return kNo;
494 break;
495 case kLeftBraceState:
496 if (c == '"')
497 state = kLeftQuoteState;
498 else
499 return kNo;
500 break;
501 case kLeftQuoteState:
502 if (c == '"')
503 state = kRightQuoteState;
504 else if (c == '\\')
505 state = kEscapeState;
506 break;
507 case kEscapeState:
508 // Simplification: don't bother rejecting hex escapes.
509 state = kLeftQuoteState;
510 break;
511 case kRightQuoteState:
512 if (c == ':')
513 return kYes;
514 else
515 return kNo;
516 break;
517 }
518 }
519 return kMaybe;
520 }
521
SniffForFetchOnlyResource(base::StringPiece data)522 SniffingResult CrossOriginReadBlocking::SniffForFetchOnlyResource(
523 base::StringPiece data) {
524 // kScriptBreakingPrefixes contains prefixes that are conventionally used to
525 // prevent a JSON response from becoming a valid Javascript program (an attack
526 // vector known as XSSI). The presence of such a prefix is a strong signal
527 // that the resource is meant to be consumed only by the fetch API or
528 // XMLHttpRequest, and is meant to be protected from use in non-CORS, cross-
529 // origin contexts like <script>, <img>, etc.
530 //
531 // These prefixes work either by inducing a syntax error, or inducing an
532 // infinite loop. In either case, the prefix must create a guarantee that no
533 // matter what bytes follow it, the entire response would be worthless to
534 // execute as a <script>.
535 static constexpr StringPiece kScriptBreakingPrefixes[] = {
536 // Parser breaker prefix.
537 //
538 // Built into angular.js (followed by a comma and a newline):
539 // https://docs.angularjs.org/api/ng/service/$http
540 //
541 // Built into the Java Spring framework (followed by a comma and a space):
542 // https://goo.gl/xP7FWn
543 //
544 // Observed on google.com (without a comma, followed by a newline).
545 StringPiece(")]}'"),
546
547 // Apache struts: https://struts.apache.org/plugins/json/#prefix
548 StringPiece("{}&&"),
549
550 // Spring framework (historically): https://goo.gl/JYPFAv
551 StringPiece("{} &&"),
552
553 // Infinite loops.
554 StringPiece("for(;;);"), // observed on facebook.com
555 StringPiece("while(1);"),
556 StringPiece("for (;;);"),
557 StringPiece("while (1);"),
558 };
559 SniffingResult has_parser_breaker = MatchesSignature(
560 &data, kScriptBreakingPrefixes, base::size(kScriptBreakingPrefixes),
561 base::CompareCase::SENSITIVE);
562 if (has_parser_breaker != kNo)
563 return has_parser_breaker;
564
565 // A non-empty JSON object also effectively introduces a JS syntax error.
566 return SniffForJSON(data);
567 }
568
569 // static
SanitizeBlockedResponse(network::mojom::URLResponseHead * response)570 void CrossOriginReadBlocking::SanitizeBlockedResponse(
571 network::mojom::URLResponseHead* response) {
572 DCHECK(response);
573 response->content_length = 0;
574 if (response->headers)
575 BlockResponseHeaders(response->headers);
576 }
577
578 // static
LogAction(Action action)579 void CrossOriginReadBlocking::LogAction(Action action) {
580 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.Browser.Action", action);
581 }
582
583 // An interface to enable incremental content sniffing. These are instantiated
584 // for each each request; thus they can be stateful.
585 class CrossOriginReadBlocking::ResponseAnalyzer::ConfirmationSniffer {
586 public:
587 virtual ~ConfirmationSniffer() = default;
588
589 // Called after data is read from the network. |sniffing_buffer| contains the
590 // entire response body delivered thus far. To support streaming,
591 // |new_data_offset| gives the offset into |sniffing_buffer| at which new data
592 // was appended since the last read.
593 virtual void OnDataAvailable(base::StringPiece sniffing_buffer,
594 size_t new_data_offset) = 0;
595
596 // Returns true if the return value of IsConfirmedContentType() might change
597 // with the addition of more data. Returns false if a final decision is
598 // available.
599 virtual bool WantsMoreData() const = 0;
600
601 // Returns true if the data has been confirmed to be of the CORB-protected
602 // content type that this sniffer is intended to detect.
603 virtual bool IsConfirmedContentType() const = 0;
604 };
605
606 // A ConfirmationSniffer that wraps one of the sniffing functions from
607 // network::CrossOriginReadBlocking.
608 class CrossOriginReadBlocking::ResponseAnalyzer::SimpleConfirmationSniffer
609 : public CrossOriginReadBlocking::ResponseAnalyzer::ConfirmationSniffer {
610 public:
611 // The function pointer type corresponding to one of the available sniffing
612 // functions from network::CrossOriginReadBlocking.
613 using SnifferFunction =
614 decltype(&network::CrossOriginReadBlocking::SniffForHTML);
615
SimpleConfirmationSniffer(SnifferFunction sniffer_function)616 explicit SimpleConfirmationSniffer(SnifferFunction sniffer_function)
617 : sniffer_function_(sniffer_function) {}
618 ~SimpleConfirmationSniffer() override = default;
619
OnDataAvailable(base::StringPiece sniffing_buffer,size_t new_data_offset)620 void OnDataAvailable(base::StringPiece sniffing_buffer,
621 size_t new_data_offset) final {
622 DCHECK_LE(new_data_offset, sniffing_buffer.length());
623 if (new_data_offset == sniffing_buffer.length()) {
624 // No new data -- do nothing. This happens at end-of-stream.
625 return;
626 }
627 // The sniffing functions don't support streaming, so with each new chunk of
628 // data, call the sniffer on the whole buffer.
629 last_sniff_result_ = (*sniffer_function_)(sniffing_buffer);
630 }
631
WantsMoreData() const632 bool WantsMoreData() const final {
633 // kNo and kYes results are final, meaning that sniffing can stop once they
634 // occur. A kMaybe result corresponds to an indeterminate state, that could
635 // change to kYes or kNo with more data.
636 return last_sniff_result_ == SniffingResult::kMaybe;
637 }
638
IsConfirmedContentType() const639 bool IsConfirmedContentType() const final {
640 // Only confirm the mime type if an affirmative pattern (e.g. an HTML tag,
641 // if using the HTML sniffer) was detected.
642 //
643 // Note that if the stream ends (or net::kMaxBytesToSniff has been reached)
644 // and |last_sniff_result_| is kMaybe, the response is allowed to go
645 // through.
646 return last_sniff_result_ == SniffingResult::kYes;
647 }
648
649 private:
650 // The function that actually knows how to sniff for a content type.
651 SnifferFunction sniffer_function_;
652
653 // Result of sniffing the data available thus far.
654 SniffingResult last_sniff_result_ = SniffingResult::kMaybe;
655
656 DISALLOW_COPY_AND_ASSIGN(SimpleConfirmationSniffer);
657 };
658
ResponseAnalyzer(const GURL & request_url,const base::Optional<url::Origin> & request_initiator,const network::mojom::URLResponseHead & response,const base::Optional<url::Origin> & request_initiator_site_lock,mojom::RequestMode request_mode,const base::Optional<url::Origin> & isolated_world_origin,mojom::NetworkServiceClient * network_service_client)659 CrossOriginReadBlocking::ResponseAnalyzer::ResponseAnalyzer(
660 const GURL& request_url,
661 const base::Optional<url::Origin>& request_initiator,
662 const network::mojom::URLResponseHead& response,
663 const base::Optional<url::Origin>& request_initiator_site_lock,
664 mojom::RequestMode request_mode,
665 const base::Optional<url::Origin>& isolated_world_origin,
666 mojom::NetworkServiceClient* network_service_client)
667 : seems_sensitive_from_cors_heuristic_(
668 SeemsSensitiveFromCORSHeuristic(response)),
669 seems_sensitive_from_cache_heuristic_(
670 SeemsSensitiveFromCacheHeuristic(response)),
671 supports_range_requests_(SupportsRangeRequests(response)),
672 has_nosniff_header_(HasNoSniff(response)),
673 content_length_(response.content_length),
674 http_response_code_(response.headers ? response.headers->response_code()
675 : 0),
676 isolated_world_origin_(isolated_world_origin),
677 network_service_client_(network_service_client) {
678 // CORB should look directly at the Content-Type header if one has been
679 // received from the network. Ignoring |response.mime_type| helps avoid
680 // breaking legitimate websites (which might happen more often when blocking
681 // would be based on the mime type sniffed by MimeSniffingResourceHandler).
682 //
683 // This value could be computed later in ShouldBlockBasedOnHeaders after
684 // has_nosniff_header, but we compute it here to keep
685 // ShouldBlockBasedOnHeaders (which is called twice) const.
686 //
687 // TODO(nick): What if the mime type is omitted? Should that be treated the
688 // same as text/plain? https://crbug.com/795971
689 std::string mime_type;
690 if (response.headers)
691 response.headers->GetMimeType(&mime_type);
692 // Canonicalize the MIME type. Note that even if it doesn't claim to be a
693 // blockable type (i.e., HTML, XML, JSON, or plain text), it may still fail
694 // the checks during the SniffForFetchOnlyResource() phase.
695 canonical_mime_type_ =
696 network::CrossOriginReadBlocking::GetCanonicalMimeType(mime_type);
697
698 should_block_based_on_headers_ = ShouldBlockBasedOnHeaders(
699 request_mode, request_url, request_initiator, response,
700 request_initiator_site_lock, canonical_mime_type_,
701 &is_cors_blocking_expected_);
702
703 // Check if the response seems sensitive and if so include in our CORB
704 // protection logging. We have not sniffed yet, so the answer might be
705 // kNeedToSniffMore.
706 if (seems_sensitive_from_cors_heuristic_ ||
707 seems_sensitive_from_cache_heuristic_) {
708 // Create a new Origin with a unique internal identifier so we can pretend
709 // the request is cross-origin.
710 url::Origin cross_origin_request_initiator = url::Origin();
711 BlockingDecision would_protect_based_on_headers = ShouldBlockBasedOnHeaders(
712 request_mode, request_url, cross_origin_request_initiator, response,
713 cross_origin_request_initiator, canonical_mime_type_,
714 nullptr /* is_cors_blocking_expected */);
715 corb_protection_logging_needs_sniffing_ =
716 (would_protect_based_on_headers ==
717 BlockingDecision::kNeedToSniffMore) &&
718 base::FeatureList::IsEnabled(
719 network::features::kCORBProtectionSniffing);
720 hypothetical_sniffing_mode_ =
721 corb_protection_logging_needs_sniffing_ &&
722 should_block_based_on_headers_ != BlockingDecision::kNeedToSniffMore;
723 mime_type_bucket_ = GetMimeTypeBucket(response);
724 UMA_HISTOGRAM_BOOLEAN("SiteIsolation.CORBProtection.SensitiveResource",
725 true);
726 if (!corb_protection_logging_needs_sniffing_) {
727 // If we are not going to sniff, then we can and must log everything now.
728 LogSensitiveResponseProtection(
729 BlockingDecisionToProtectionDecision(would_protect_based_on_headers));
730 }
731 } else {
732 UMA_HISTOGRAM_BOOLEAN("SiteIsolation.CORBProtection.SensitiveResource",
733 false);
734 }
735 if (needs_sniffing())
736 CreateSniffers();
737 }
738
739 CrossOriginReadBlocking::ResponseAnalyzer::~ResponseAnalyzer() = default;
740
741 // static
742 CrossOriginReadBlocking::ResponseAnalyzer::BlockingDecision
ShouldBlockBasedOnHeaders(mojom::RequestMode request_mode,const GURL & request_url,const base::Optional<url::Origin> & request_initiator,const network::mojom::URLResponseHead & response,const base::Optional<url::Origin> & request_initiator_site_lock,MimeType canonical_mime_type,bool * is_cors_blocking_expected)743 CrossOriginReadBlocking::ResponseAnalyzer::ShouldBlockBasedOnHeaders(
744 mojom::RequestMode request_mode,
745 const GURL& request_url,
746 const base::Optional<url::Origin>& request_initiator,
747 const network::mojom::URLResponseHead& response,
748 const base::Optional<url::Origin>& request_initiator_site_lock,
749 MimeType canonical_mime_type,
750 bool* is_cors_blocking_expected) {
751 if (is_cors_blocking_expected)
752 *is_cors_blocking_expected = false;
753
754 // The checks in this method are ordered to rule out blocking in most cases as
755 // quickly as possible. Checks that are likely to lead to returning false or
756 // that are inexpensive should be near the top.
757 url::Origin target_origin = url::Origin::Create(request_url);
758
759 // Compute the |initiator| of the request, falling back to a unique origin if
760 // there was no initiator or if it was incompatible with the lock. Using a
761 // unique origin makes CORB treat the response as cross-origin and thus
762 // considers it eligible for blocking (based on content-type, sniffing, etc.).
763 url::Origin initiator =
764 GetTrustworthyInitiator(request_initiator_site_lock, request_initiator);
765
766 // Don't block same-origin documents.
767 if (initiator.IsSameOriginWith(target_origin))
768 return kAllow;
769
770 // Only block documents from HTTP(S) schemes. Checking the scheme of
771 // |target_origin| ensures that we also protect content of blob: and
772 // filesystem: URLs if their nested origins have a HTTP(S) scheme.
773 if (!IsBlockableScheme(target_origin.GetURL()))
774 return kAllow;
775
776 // Allow the response through if this is a CORS request and the response has
777 // valid CORS headers.
778 switch (request_mode) {
779 case mojom::RequestMode::kNavigate:
780 case mojom::RequestMode::kNoCors:
781 case mojom::RequestMode::kSameOrigin:
782 break;
783
784 case mojom::RequestMode::kCors:
785 case mojom::RequestMode::kCorsWithForcedPreflight:
786 std::string cors_header;
787 response.headers->GetNormalizedHeader("access-control-allow-origin",
788 &cors_header);
789 if (IsValidCorsHeaderSet(initiator, cors_header))
790 return kAllow;
791
792 // At this point we know that the response is 1) cross-origin from the
793 // initiator, 2) in CORS mode, 3) without valid ACAO header.
794 if (is_cors_blocking_expected)
795 *is_cors_blocking_expected = true;
796 break;
797 }
798
799 // Requests from foo.example.com will consult foo.example.com's service worker
800 // first (if one has been registered). The service worker can handle requests
801 // initiated by foo.example.com even if they are cross-origin (e.g. requests
802 // for bar.example.com). This is okay and should not be blocked by CORB,
803 // unless the initiator opted out of CORS / opted into receiving an opaque
804 // response. See also https://crbug.com/803672.
805 if (response.was_fetched_via_service_worker) {
806 switch (response.response_type) {
807 case network::mojom::FetchResponseType::kBasic:
808 case network::mojom::FetchResponseType::kCors:
809 case network::mojom::FetchResponseType::kDefault:
810 case network::mojom::FetchResponseType::kError:
811 // Non-opaque responses shouldn't be blocked.
812 return kAllow;
813 case network::mojom::FetchResponseType::kOpaque:
814 case network::mojom::FetchResponseType::kOpaqueRedirect:
815 // Opaque responses are eligible for blocking. Continue on...
816 break;
817 }
818 }
819
820 // Some types (e.g. ZIP) are protected without any confirmation sniffing.
821 if (canonical_mime_type == MimeType::kNeverSniffed)
822 return kBlock;
823
824 // CORS is currently implemented in the renderer process, so it's useful for
825 // CORB to filter failed "cors" mode fetches to avoid leaking the responses to
826 // the renderer when possible (e.g., depending on MIME type and sniffing).
827 // This will eventually be fixed with OOR-CORS.
828 //
829 // In the mean time, we can try to filter a few additional failed CORS
830 // fetches, treating the Cross-Origin-Resource-Policy (CORP) header as an
831 // opt-in to CORB. CORP headers are enforced elsewhere and normally only
832 // apply to "no-cors" mode fetches. If such a header happens to be on the
833 // response during other fetch modes, and if the same-origin and
834 // IsValidCorsHeaderSet checks above have failed (and thus the request will
835 // fail in the renderer), then we can let CORB filter the response without
836 // caring about MIME type or sniffing.
837 //
838 // To make CrossOriginResourcePolicy::IsBlocked apply to all fetch modes in
839 // this case and not just "no-cors", we pass kNoCors as a hard-coded value.
840 // This does not affect the usual enforcement of CORP headers.
841 //
842 // TODO(lukasza): Once OOR-CORS launches (https://crbug.com/736308), this code
843 // block will no longer be necessary since all failed CORS requests will be
844 // blocked before reaching the renderer process (even without CORB's help).
845 // Of course this assumes that OOR-CORS will use trustworthy
846 // |request_initiator| (i.e. vetted against |request_initiator|site_lock|).
847 constexpr mojom::RequestMode kOverreachingRequestMode =
848 mojom::RequestMode::kNoCors;
849 // COEP is not supported when OOR-CORS is disabled.
850 if (CrossOriginResourcePolicy::IsBlocked(
851 request_url, request_url, request_initiator, response,
852 kOverreachingRequestMode, request_initiator_site_lock,
853 CrossOriginEmbedderPolicy())) {
854 // Ignore mime types and/or sniffing and have CORB block all responses with
855 // COR*P* header.
856 return kBlock;
857 }
858
859 // If this is a partial response, sniffing is not possible, so allow the
860 // response if it's not a protected mime type.
861 std::string range_header;
862 response.headers->GetNormalizedHeader("content-range", &range_header);
863 bool has_range_header = !range_header.empty();
864 if (has_range_header) {
865 switch (canonical_mime_type) {
866 case MimeType::kOthers:
867 case MimeType::kPlain: // See also https://crbug.com/801709
868 return kAllow;
869 case MimeType::kHtml:
870 case MimeType::kJson:
871 case MimeType::kXml:
872 return kBlock;
873 case MimeType::kInvalidMimeType:
874 case MimeType::kNeverSniffed: // Handled much earlier.
875 NOTREACHED();
876 return kBlock;
877 }
878 }
879
880 // We intend to block the response at this point. However, we will usually
881 // sniff the contents to confirm the MIME type, to avoid blocking incorrectly
882 // labeled JavaScript, JSONP, etc files.
883 //
884 // Note: if there is a nosniff header, it means we should honor the response
885 // mime type without trying to confirm it.
886 //
887 // Decide whether to block based on the MIME type.
888 switch (canonical_mime_type) {
889 case MimeType::kHtml:
890 case MimeType::kXml:
891 case MimeType::kJson:
892 case MimeType::kPlain:
893 if (HasNoSniff(response))
894 return kBlock;
895 else
896 return kNeedToSniffMore;
897 break;
898
899 case MimeType::kOthers:
900 // Stylesheets shouldn't be sniffed for JSON parser breakers - see
901 // https://crbug.com/809259.
902 if (base::LowerCaseEqualsASCII(response.mime_type, "text/css"))
903 return kAllow;
904 else
905 return kNeedToSniffMore;
906 break;
907
908 case MimeType::kInvalidMimeType:
909 case MimeType::kNeverSniffed: // Handled much earlier.
910 NOTREACHED();
911 return kBlock;
912 }
913 NOTREACHED();
914 return kBlock;
915 }
916
917 // static
HasNoSniff(const network::mojom::URLResponseHead & response)918 bool CrossOriginReadBlocking::ResponseAnalyzer::HasNoSniff(
919 const network::mojom::URLResponseHead& response) {
920 if (!response.headers)
921 return false;
922 std::string nosniff_header;
923 response.headers->GetNormalizedHeader("x-content-type-options",
924 &nosniff_header);
925 return base::LowerCaseEqualsASCII(nosniff_header, "nosniff");
926 }
927
928 // static
SeemsSensitiveFromCORSHeuristic(const network::mojom::URLResponseHead & response)929 bool CrossOriginReadBlocking::ResponseAnalyzer::SeemsSensitiveFromCORSHeuristic(
930 const network::mojom::URLResponseHead& response) {
931 // Check if the response has an Access-Control-Allow-Origin with a value other
932 // than "*" or "null" ("null" offers no more protection than "*" because it
933 // matches any unique origin).
934 if (!response.headers)
935 return false;
936 std::string cors_header_value;
937 response.headers->GetNormalizedHeader("access-control-allow-origin",
938 &cors_header_value);
939 if (cors_header_value != "*" && cors_header_value != "null" &&
940 cors_header_value != "") {
941 return true;
942 }
943 return false;
944 }
945
946 // static
947 bool CrossOriginReadBlocking::ResponseAnalyzer::
SeemsSensitiveFromCacheHeuristic(const network::mojom::URLResponseHead & response)948 SeemsSensitiveFromCacheHeuristic(
949 const network::mojom::URLResponseHead& response) {
950 // Check if the response has both Vary: Origin and Cache-Control: Private
951 // headers, which we take as a signal that it may be a sensitive resource. We
952 // require both to reduce the number of false positives (as both headers are
953 // sometimes used on non-sensitive resources). Cache-Control: no-store appears
954 // on non-sensitive resources that change frequently, so we ignore it here.
955 if (!response.headers)
956 return false;
957 bool has_vary_origin = response.headers->HasHeaderValue("vary", "origin");
958 bool has_cache_private =
959 response.headers->HasHeaderValue("cache-control", "private");
960 return has_vary_origin && has_cache_private;
961 }
962
963 // static
SupportsRangeRequests(const network::mojom::URLResponseHead & response)964 bool CrossOriginReadBlocking::ResponseAnalyzer::SupportsRangeRequests(
965 const network::mojom::URLResponseHead& response) {
966 if (response.headers) {
967 std::string value;
968 response.headers->GetNormalizedHeader("accept-ranges", &value);
969 if (!value.empty() && !base::LowerCaseEqualsASCII(value, "none")) {
970 return true;
971 }
972 }
973 return false;
974 }
975
976 // static
977 CrossOriginReadBlocking::ResponseAnalyzer::MimeTypeBucket
GetMimeTypeBucket(const network::mojom::URLResponseHead & response)978 CrossOriginReadBlocking::ResponseAnalyzer::GetMimeTypeBucket(
979 const network::mojom::URLResponseHead& response) {
980 std::string mime_type;
981 if (response.headers)
982 response.headers->GetMimeType(&mime_type);
983 MimeType canonical_mime_type = GetCanonicalMimeType(mime_type);
984 switch (canonical_mime_type) {
985 case MimeType::kHtml:
986 case MimeType::kXml:
987 case MimeType::kJson:
988 case MimeType::kNeverSniffed:
989 case MimeType::kPlain:
990 return kProtected;
991 break;
992 case MimeType::kOthers:
993 break;
994 case MimeType::kInvalidMimeType:
995 NOTREACHED();
996 break;
997 }
998
999 // Javascript is assumed public. See also
1000 // https://mimesniff.spec.whatwg.org/#javascript-mime-type.
1001 constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
1002 for (const std::string& suffix : kJavaScriptSuffixes) {
1003 if (base::EndsWith(mime_type, suffix, kCaseInsensitive)) {
1004 return kPublic;
1005 }
1006 }
1007
1008 // Images are assumed public. See also
1009 // https://mimesniff.spec.whatwg.org/#image-mime-type.
1010 if (base::StartsWith(mime_type, "image", kCaseInsensitive)) {
1011 return kPublic;
1012 }
1013
1014 // Audio and video are assumed public. See also
1015 // https://mimesniff.spec.whatwg.org/#audio-or-video-mime-type.
1016 if (base::StartsWith(mime_type, "audio", kCaseInsensitive) ||
1017 base::StartsWith(mime_type, "video", kCaseInsensitive) ||
1018 base::LowerCaseEqualsASCII(mime_type, "application/ogg") ||
1019 base::LowerCaseEqualsASCII(mime_type, "application/dash+xml")) {
1020 return kPublic;
1021 }
1022
1023 // CSS files are assumed public and must be sent with text/css.
1024 if (base::LowerCaseEqualsASCII(mime_type, "text/css")) {
1025 return kPublic;
1026 }
1027 return kOther;
1028 }
1029
CreateSniffers()1030 void CrossOriginReadBlocking::ResponseAnalyzer::CreateSniffers() {
1031 // Create one or more |sniffers_| to confirm that the body is actually the
1032 // MIME type advertised in the Content-Type header.
1033 DCHECK(needs_sniffing());
1034 DCHECK(sniffers_.empty());
1035
1036 // When the MIME type is "text/plain", create sniffers for HTML, XML and
1037 // JSON. If any of these sniffers match, the response will be blocked.
1038 const bool use_all = canonical_mime_type_ == MimeType::kPlain;
1039
1040 // HTML sniffer.
1041 if (use_all || canonical_mime_type_ == MimeType::kHtml) {
1042 sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
1043 &network::CrossOriginReadBlocking::SniffForHTML));
1044 }
1045
1046 // XML sniffer.
1047 if (use_all || canonical_mime_type_ == MimeType::kXml) {
1048 sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
1049 &network::CrossOriginReadBlocking::SniffForXML));
1050 }
1051
1052 // JSON sniffer.
1053 if (use_all || canonical_mime_type_ == MimeType::kJson) {
1054 sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
1055 &network::CrossOriginReadBlocking::SniffForJSON));
1056 }
1057
1058 // Parser-breaker sniffer.
1059 //
1060 // Because these prefixes are an XSSI-defeating mechanism, CORB considers
1061 // them distinctive enough to be worth blocking no matter the Content-Type
1062 // header. So this sniffer is created unconditionally.
1063 //
1064 // For MimeType::kOthers, this will be the only sniffer that's active.
1065 sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
1066 &network::CrossOriginReadBlocking::SniffForFetchOnlyResource));
1067 }
1068
SniffResponseBody(base::StringPiece data,size_t new_data_offset)1069 void CrossOriginReadBlocking::ResponseAnalyzer::SniffResponseBody(
1070 base::StringPiece data,
1071 size_t new_data_offset) {
1072 DCHECK(needs_sniffing());
1073 DCHECK(!sniffers_.empty());
1074 DCHECK(!found_blockable_content_);
1075
1076 DCHECK_LE(data.size(), static_cast<size_t>(net::kMaxBytesToSniff));
1077 DCHECK_LE(new_data_offset, data.size());
1078 bool has_new_data = (new_data_offset < data.size());
1079
1080 for (size_t i = 0; i < sniffers_.size();) {
1081 if (has_new_data)
1082 sniffers_[i]->OnDataAvailable(data, new_data_offset);
1083
1084 if (sniffers_[i]->WantsMoreData()) {
1085 i++;
1086 continue;
1087 }
1088
1089 if (sniffers_[i]->IsConfirmedContentType()) {
1090 found_blockable_content_ = true;
1091 sniffers_.clear();
1092 break;
1093 } else {
1094 // This response is CORB-exempt as far as this sniffer is concerned;
1095 // remove it from the list.
1096 sniffers_.erase(sniffers_.begin() + i);
1097 }
1098 }
1099 }
1100
ShouldAllow() const1101 bool CrossOriginReadBlocking::ResponseAnalyzer::ShouldAllow() const {
1102 // If we're in hypothetical mode then CORB must have decided to kAllow (see
1103 // comment in ShouldBlock). Thus we just need to wait until the sniffers are
1104 // all done (i.e. empty).
1105 if (hypothetical_sniffing_mode_) {
1106 DCHECK_EQ(should_block_based_on_headers_, kAllow);
1107 return sniffers_.empty();
1108 }
1109 switch (should_block_based_on_headers_) {
1110 case kAllow:
1111 return true;
1112 case kNeedToSniffMore:
1113 return sniffers_.empty() && !found_blockable_content_;
1114 case kBlock:
1115 return false;
1116 }
1117 }
1118
ShouldBlock() const1119 bool CrossOriginReadBlocking::ResponseAnalyzer::ShouldBlock() const {
1120 // If we're in *hypothetical* sniffing mode then the following must be true:
1121 // (1) We are only sniffing to find out if CORB would have blocked the request
1122 // were it made cross origin (CORB itself did *not* need to sniff the file).
1123 // (2) CORB must have decided to kAllow (if it was kBlock then the protection
1124 // decision would have been kBlock as well, no hypothetical mode needed).
1125 if (hypothetical_sniffing_mode_) {
1126 DCHECK_EQ(should_block_based_on_headers_, kAllow);
1127 return false;
1128 }
1129 switch (should_block_based_on_headers_) {
1130 case kAllow:
1131 return false;
1132 case kNeedToSniffMore:
1133 return sniffers_.empty() && found_blockable_content_;
1134 case kBlock:
1135 return true;
1136 }
1137 }
1138
ShouldReportBlockedResponse() const1139 bool CrossOriginReadBlocking::ResponseAnalyzer::ShouldReportBlockedResponse()
1140 const {
1141 if (!ShouldBlock())
1142 return false;
1143
1144 // Don't bother showing a warning message when blocking responses that are
1145 // already empty.
1146 if (content_length_ == 0)
1147 return false;
1148 if (http_response_code_ == 204)
1149 return false;
1150
1151 // Don't bother showing a warning message when blocking responses that are
1152 // associated with error responses (e.g. it is quite common to serve a
1153 // text/html 404 error page for an <img> tag pointing to a wrong URL).
1154 if (400 <= http_response_code_ && http_response_code_ <= 599)
1155 return false;
1156
1157 return true;
1158 }
1159
LogAllowedResponse()1160 void CrossOriginReadBlocking::ResponseAnalyzer::LogAllowedResponse() {
1161 // Only log the ContentScript UMA when the request really came from a content
1162 // script.
1163 //
1164 // Note that we will never get here in the following cases:
1165 // 1) When CORB is disabled (e.g. for allowlisted content scripts OR for
1166 // extension background pages).
1167 // 2) When CorbAllowlistAlsoAppliesToOorCors and OOR-CORS features are both
1168 // enabled, because:
1169 // 2a) The former feature forces |ignore_isolated_world_origin| for
1170 // non-allowlisted content scripts and OOR-CORS in this case resets
1171 // |isolated_world_origin| to base::nullopt.
1172 // 2b) Even if we could preserve |isolated_world_origin_| just for UMA,
1173 // CORS would block the response before LogAllowedResponse gets a
1174 // chance to run.
1175 bool is_for_non_http_isolated_world =
1176 isolated_world_origin_.has_value() &&
1177 isolated_world_origin_->scheme() != url::kHttpScheme &&
1178 isolated_world_origin_->scheme() != url::kHttpsScheme;
1179 if (is_for_non_http_isolated_world) {
1180 // We log whether CORS would block this response if it were enabled for
1181 // content scripts. Caveat: This will be true even in cases where the
1182 // server would have sent an ACAO response header if Chrome had sent an
1183 // Origin request header.
1184 UMA_HISTOGRAM_BOOLEAN(
1185 "SiteIsolation.XSD.Browser.AllowedByCorbButNotCors.ContentScript",
1186 is_cors_blocking_expected_);
1187
1188 // Ask the browser process to log Rappor and UKM metrics.
1189 if (network_service_client_ && is_cors_blocking_expected_) {
1190 network_service_client_->LogCrossOriginFetchFromContentScript3(
1191 isolated_world_origin_->host());
1192 }
1193 }
1194
1195 if (corb_protection_logging_needs_sniffing_) {
1196 LogSensitiveResponseProtection(
1197 SniffingDecisionToProtectionDecision(found_blockable_content_));
1198 }
1199 // Note that if a response is allowed because of hitting EOF or
1200 // kMaxBytesToSniff, then |sniffers_| are not emptied and consequently
1201 // ShouldAllow doesn't start returning true. This means that we can't
1202 // DCHECK(ShouldAllow()) or DCHECK(sniffers_.empty()) here - the decision to
1203 // allow the response could have been made in the
1204 // CrossSiteDocumentResourceHandler layer without CrossOriginReadBlocking
1205 // realizing that it has hit EOF or kMaxBytesToSniff.
1206
1207 // Note that the response might be allowed even if ShouldBlock() returns true
1208 // - for example to allow responses to requests initiated by content scripts.
1209 // This means that we cannot DCHECK(!ShouldBlock()) here.
1210
1211 CrossOriginReadBlocking::LogAction(
1212 needs_sniffing()
1213 ? network::CrossOriginReadBlocking::Action::kAllowedAfterSniffing
1214 : network::CrossOriginReadBlocking::Action::kAllowedWithoutSniffing);
1215 }
1216
LogBlockedResponse()1217 void CrossOriginReadBlocking::ResponseAnalyzer::LogBlockedResponse() {
1218 DCHECK(!ShouldAllow());
1219 DCHECK(ShouldBlock());
1220 DCHECK(sniffers_.empty());
1221
1222 if (corb_protection_logging_needs_sniffing_) {
1223 LogSensitiveResponseProtection(
1224 SniffingDecisionToProtectionDecision(found_blockable_content_));
1225 }
1226
1227 CrossOriginReadBlocking::LogAction(
1228 needs_sniffing()
1229 ? network::CrossOriginReadBlocking::Action::kBlockedAfterSniffing
1230 : network::CrossOriginReadBlocking::Action::kBlockedWithoutSniffing);
1231
1232 UMA_HISTOGRAM_ENUMERATION(
1233 "SiteIsolation.XSD.Browser.Blocked.CanonicalMimeType",
1234 canonical_mime_type_);
1235 }
1236
1237 // static
1238 CrossOriginReadBlocking::ResponseAnalyzer::CrossOriginProtectionDecision
BlockingDecisionToProtectionDecision(BlockingDecision blocking_decision)1239 CrossOriginReadBlocking::ResponseAnalyzer::BlockingDecisionToProtectionDecision(
1240 BlockingDecision blocking_decision) {
1241 switch (blocking_decision) {
1242 case kAllow:
1243 return CrossOriginProtectionDecision::kAllow;
1244 case kBlock:
1245 return CrossOriginProtectionDecision::kBlock;
1246 case kNeedToSniffMore:
1247 return CrossOriginProtectionDecision::kNeedToSniffMore;
1248 }
1249 }
1250
1251 // static
1252 CrossOriginReadBlocking::ResponseAnalyzer::CrossOriginProtectionDecision
SniffingDecisionToProtectionDecision(bool found_blockable_content)1253 CrossOriginReadBlocking::ResponseAnalyzer::SniffingDecisionToProtectionDecision(
1254 bool found_blockable_content) {
1255 if (found_blockable_content)
1256 return CrossOriginProtectionDecision::kBlockedAfterSniffing;
1257 return CrossOriginProtectionDecision::kAllowedAfterSniffing;
1258 }
1259
LogSensitiveResponseProtection(CrossOriginProtectionDecision protection_decision) const1260 void CrossOriginReadBlocking::ResponseAnalyzer::LogSensitiveResponseProtection(
1261 CrossOriginProtectionDecision protection_decision) const {
1262 DCHECK(seems_sensitive_from_cors_heuristic_ ||
1263 seems_sensitive_from_cache_heuristic_);
1264 if (seems_sensitive_from_cors_heuristic_) {
1265 switch (mime_type_bucket_) {
1266 case kProtected:
1267 UMA_HISTOGRAM_ENUMERATION(
1268 "SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType",
1269 protection_decision);
1270 // We report if a response with a protected MIME type supports range
1271 // requests since we want to measure how often making a multipart range
1272 // requests would have allowed bypassing CORB.
1273 if (protection_decision == CrossOriginProtectionDecision::kBlock) {
1274 UMA_HISTOGRAM_BOOLEAN(
1275 "SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType."
1276 "BlockedWithRangeSupport",
1277 supports_range_requests_);
1278 UMA_HISTOGRAM_BOOLEAN(
1279 "SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType."
1280 "BlockedWithoutSniffing.HasNoSniff",
1281 has_nosniff_header_);
1282 } else if (protection_decision ==
1283 CrossOriginProtectionDecision::kBlockedAfterSniffing) {
1284 UMA_HISTOGRAM_BOOLEAN(
1285 "SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType."
1286 "BlockedAfterSniffingWithRangeSupport",
1287 supports_range_requests_);
1288 }
1289 break;
1290 case kPublic:
1291 UMA_HISTOGRAM_ENUMERATION(
1292 "SiteIsolation.CORBProtection.CORSHeuristic.PublicMimeType",
1293 protection_decision);
1294 break;
1295 case kOther:
1296 UMA_HISTOGRAM_ENUMERATION(
1297 "SiteIsolation.CORBProtection.CORSHeuristic.OtherMimeType",
1298 protection_decision);
1299 }
1300 }
1301 if (seems_sensitive_from_cache_heuristic_) {
1302 switch (mime_type_bucket_) {
1303 case kProtected:
1304 UMA_HISTOGRAM_ENUMERATION(
1305 "SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType",
1306 protection_decision);
1307 if (protection_decision == CrossOriginProtectionDecision::kBlock) {
1308 UMA_HISTOGRAM_BOOLEAN(
1309 "SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType."
1310 "BlockedWithRangeSupport",
1311 supports_range_requests_);
1312 UMA_HISTOGRAM_BOOLEAN(
1313 "SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType."
1314 "BlockedWithoutSniffing.HasNoSniff",
1315 has_nosniff_header_);
1316 } else if (protection_decision ==
1317 CrossOriginProtectionDecision::kBlockedAfterSniffing) {
1318 UMA_HISTOGRAM_BOOLEAN(
1319 "SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType."
1320 "BlockedAfterSniffingWithRangeSupport",
1321 supports_range_requests_);
1322 }
1323 break;
1324 case kPublic:
1325 UMA_HISTOGRAM_ENUMERATION(
1326 "SiteIsolation.CORBProtection.CacheHeuristic.PublicMimeType",
1327 protection_decision);
1328 break;
1329 case kOther:
1330 UMA_HISTOGRAM_ENUMERATION(
1331 "SiteIsolation.CORBProtection.CacheHeuristic.OtherMimeType",
1332 protection_decision);
1333 }
1334 }
1335 // Also log if the server supports range requests, since these may allow
1336 // bypassing CORB.
1337 UMA_HISTOGRAM_BOOLEAN(
1338 "SiteIsolation.CORBProtection.SensitiveWithRangeSupport",
1339 supports_range_requests_);
1340 }
1341
1342 // static
AddExceptionForPlugin(int process_id)1343 void CrossOriginReadBlocking::AddExceptionForPlugin(int process_id) {
1344 std::set<int>& plugin_proxies = GetPluginProxyingProcesses();
1345 plugin_proxies.insert(process_id);
1346 }
1347
1348 // static
ShouldAllowForPlugin(int process_id)1349 bool CrossOriginReadBlocking::ShouldAllowForPlugin(int process_id) {
1350 std::set<int>& plugin_proxies = GetPluginProxyingProcesses();
1351 return base::Contains(plugin_proxies, process_id);
1352 }
1353
1354 // static
RemoveExceptionForPlugin(int process_id)1355 void CrossOriginReadBlocking::RemoveExceptionForPlugin(int process_id) {
1356 std::set<int>& plugin_proxies = GetPluginProxyingProcesses();
1357 plugin_proxies.erase(process_id);
1358 }
1359
1360 } // namespace network
1361