1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "services/network/cross_origin_read_blocking.h"
6 
7 #include <stddef.h>
8 
9 #include <algorithm>
10 #include <set>
11 #include <string>
12 #include <unordered_set>
13 #include <vector>
14 
15 #include "base/command_line.h"
16 #include "base/containers/flat_set.h"
17 #include "base/feature_list.h"
18 #include "base/lazy_instance.h"
19 #include "base/logging.h"
20 #include "base/metrics/histogram_macros.h"
21 #include "base/no_destructor.h"
22 #include "base/stl_util.h"
23 #include "base/strings/string_piece.h"
24 #include "base/strings/string_util.h"
25 #include "net/base/mime_sniffer.h"
26 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
27 #include "net/http/http_response_headers.h"
28 #include "services/network/public/cpp/cross_origin_embedder_policy.h"
29 #include "services/network/public/cpp/cross_origin_resource_policy.h"
30 #include "services/network/public/cpp/features.h"
31 #include "services/network/public/cpp/initiator_lock_compatibility.h"
32 #include "services/network/public/mojom/network_context.mojom.h"
33 #include "services/network/public/mojom/network_service.mojom.h"
34 #include "services/network/public/mojom/url_response_head.mojom.h"
35 
36 using base::StringPiece;
37 using MimeType = network::CrossOriginReadBlocking::MimeType;
38 using SniffingResult = network::CrossOriginReadBlocking::SniffingResult;
39 
40 namespace network {
41 
42 namespace {
43 
44 // MIME types
45 const char kTextHtml[] = "text/html";
46 const char kTextXml[] = "text/xml";
47 const char kAppXml[] = "application/xml";
48 const char kAppJson[] = "application/json";
49 const char kImageSvg[] = "image/svg+xml";
50 const char kDashVideo[] = "application/dash+xml";  // https://crbug.com/947498
51 const char kTextJson[] = "text/json";
52 const char kTextPlain[] = "text/plain";
53 
54 // Javascript MIME type suffixes for use in CORB protection logging. See also
55 // https://mimesniff.spec.whatwg.org/#javascript-mime-type.
56 const char* kJavaScriptSuffixes[] = {"ecmascript",
57                                      "javascript",
58                                      "x-ecmascript",
59                                      "x-javascript",
60                                      "javascript1.0",
61                                      "javascript1.1",
62                                      "javascript1.2",
63                                      "javascript1.3",
64                                      "javascript1.4",
65                                      "javascript1.5",
66                                      "jscript",
67                                      "livescript",
68                                      "js",
69                                      "x-js"};
70 
71 // TODO(lukasza): Remove kJsonProtobuf once this MIME type is not used in
72 // practice.  See also https://crbug.com/826756#c3
73 const char kJsonProtobuf[] = "application/json+protobuf";
74 
75 // MIME type suffixes
76 const char kJsonSuffix[] = "+json";
77 const char kXmlSuffix[] = "+xml";
78 
AdvancePastWhitespace(StringPiece * data)79 void AdvancePastWhitespace(StringPiece* data) {
80   size_t offset = data->find_first_not_of(" \t\r\n");
81   if (offset == base::StringPiece::npos) {
82     // |data| was entirely whitespace.
83     *data = StringPiece();
84   } else {
85     data->remove_prefix(offset);
86   }
87 }
88 
89 // Returns kYes if |data| starts with one of the string patterns in
90 // |signatures|, kMaybe if |data| is a prefix of one of the patterns in
91 // |signatures|, and kNo otherwise.
92 //
93 // When kYes is returned, the matching prefix is erased from |data|.
MatchesSignature(StringPiece * data,const StringPiece signatures[],size_t arr_size,base::CompareCase compare_case)94 SniffingResult MatchesSignature(StringPiece* data,
95                                 const StringPiece signatures[],
96                                 size_t arr_size,
97                                 base::CompareCase compare_case) {
98   for (size_t i = 0; i < arr_size; ++i) {
99     if (signatures[i].length() <= data->length()) {
100       if (base::StartsWith(*data, signatures[i], compare_case)) {
101         // When |signatures[i]| is a prefix of |data|, it constitutes a match.
102         // Strip the matching characters, and return.
103         data->remove_prefix(signatures[i].length());
104         return CrossOriginReadBlocking::kYes;
105       }
106     } else {
107       if (base::StartsWith(signatures[i], *data, compare_case)) {
108         // When |data| is a prefix of |signatures[i]|, that means that
109         // subsequent bytes in the stream could cause a match to occur.
110         return CrossOriginReadBlocking::kMaybe;
111       }
112     }
113   }
114   return CrossOriginReadBlocking::kNo;
115 }
116 
FindFirstJavascriptLineTerminator(const base::StringPiece & hay,size_t pos)117 size_t FindFirstJavascriptLineTerminator(const base::StringPiece& hay,
118                                          size_t pos) {
119   // https://www.ecma-international.org/ecma-262/8.0/index.html#prod-LineTerminator
120   // defines LineTerminator ::= <LF> | <CR> | <LS> | <PS>.
121   //
122   // https://www.ecma-international.org/ecma-262/8.0/index.html#sec-line-terminators
123   // defines <LF>, <CR>, <LS> ::= "\u2028", <PS> ::= "\u2029".
124   //
125   // In UTF8 encoding <LS> is 0xE2 0x80 0xA8 and <PS> is 0xE2 0x80 0xA9.
126   while (true) {
127     pos = hay.find_first_of("\n\r\xe2", pos);
128     if (pos == base::StringPiece::npos)
129       break;
130 
131     if (hay[pos] != '\xe2') {
132       DCHECK(hay[pos] == '\r' || hay[pos] == '\n');
133       break;
134     }
135 
136     // TODO(lukasza): Prevent matching 3 bytes that span/straddle 2 UTF8
137     // characters.
138     base::StringPiece substr = hay.substr(pos);
139     if (substr.starts_with("\u2028") || substr.starts_with("\u2029"))
140       break;
141 
142     pos++;  // Skip the \xe2 character.
143   }
144   return pos;
145 }
146 
147 // Checks if |data| starts with an HTML comment (i.e. with "<!-- ... -->").
148 // - If there is a valid, terminated comment then returns kYes.
149 // - If there is a start of a comment, but the comment is not completed (e.g.
150 //   |data| == "<!-" or |data| == "<!-- not terminated yet") then returns
151 //   kMaybe.
152 // - Returns kNo otherwise.
153 //
154 // Mutates |data| to advance past the comment when returning kYes.  Note that
155 // SingleLineHTMLCloseComment ECMAscript rule is taken into account which means
156 // that characters following an HTML comment are consumed up to the nearest line
157 // terminating character.
MaybeSkipHtmlComment(StringPiece * data)158 SniffingResult MaybeSkipHtmlComment(StringPiece* data) {
159   constexpr StringPiece kStartString = "<!--";
160   if (!data->starts_with(kStartString)) {
161     if (kStartString.starts_with(*data))
162       return CrossOriginReadBlocking::kMaybe;
163     return CrossOriginReadBlocking::kNo;
164   }
165 
166   constexpr StringPiece kEndString = "-->";
167   size_t end_of_html_comment = data->find(kEndString, kStartString.length());
168   if (end_of_html_comment == StringPiece::npos)
169     return CrossOriginReadBlocking::kMaybe;
170   end_of_html_comment += kEndString.length();
171 
172   // Skipping until the first line terminating character.  See
173   // https://crbug.com/839945 for the motivation behind this.
174   size_t end_of_line =
175       FindFirstJavascriptLineTerminator(*data, end_of_html_comment);
176   if (end_of_line == base::StringPiece::npos)
177     return CrossOriginReadBlocking::kMaybe;
178 
179   // Found real end of the combined HTML/JS comment.
180   data->remove_prefix(end_of_line);
181   return CrossOriginReadBlocking::kYes;
182 }
183 
184 // Removes headers that should be blocked in cross-origin case.
185 //
186 // Note that corbSanitizedResponse in https://fetch.spec.whatwg.org/#main-fetch
187 // has an empty list of headers, but the code below doesn't remove all the
188 // headers for improved user experience - for better error messages for CORS.
189 // See also https://github.com/whatwg/fetch/pull/686#issuecomment-383711732 and
190 // the http/tests/xmlhttprequest/origin-exact-matching/07.html layout test.
191 //
192 // Note that CORB doesn't block responses allowed through CORS - this means
193 // that the list of allowed headers below doesn't have to consider header
194 // names listed in the Access-Control-Expose-Headers header.
BlockResponseHeaders(const scoped_refptr<net::HttpResponseHeaders> & headers)195 void BlockResponseHeaders(
196     const scoped_refptr<net::HttpResponseHeaders>& headers) {
197   DCHECK(headers);
198   std::unordered_set<std::string> names_of_headers_to_remove;
199 
200   size_t it = 0;
201   std::string name;
202   std::string value;
203   while (headers->EnumerateHeaderLines(&it, &name, &value)) {
204     // Don't remove CORS headers - doing so would lead to incorrect error
205     // messages for CORS-blocked responses (e.g. Blink would say "[...] No
206     // 'Access-Control-Allow-Origin' header is present [...]" instead of saying
207     // something like "[...] Access-Control-Allow-Origin' header has a value
208     // 'http://www2.localhost:8000' that is not equal to the supplied origin
209     // [...]").
210     if (base::StartsWith(name, "Access-Control-",
211                          base::CompareCase::INSENSITIVE_ASCII)) {
212       continue;
213     }
214 
215     // Remove all other headers.
216     names_of_headers_to_remove.insert(base::ToLowerASCII(name));
217   }
218 
219   headers->RemoveHeaders(names_of_headers_to_remove);
220 }
221 
GetPluginProxyingProcesses()222 std::set<int>& GetPluginProxyingProcesses() {
223   static base::NoDestructor<std::set<int>> set;
224   return *set;
225 }
226 
227 // The function below returns a set of MIME types below may be blocked by CORB
228 // without any confirmation sniffing (in contrast to HTML/JSON/XML which require
229 // confirmation sniffing because images, scripts, etc. are frequently
230 // mislabelled by http servers as HTML/JSON/XML).
231 //
232 // CORB cannot block images, scripts, stylesheets and other resources that the
233 // web standards allows to be fetched in `no-cors` mode.  CORB cannot block
234 // these resources even if they are not explicitly labeled with their type - in
235 // practice http servers may serve images as application/octet-stream or even as
236 // text/html.  OTOH, CORB *can* block all Content-Types that are very unlikely
237 // to represent images, scripts, stylesheets, etc. - such Content-Types are
238 // returned by GetNeverSniffedMimeTypes.
239 //
240 // Some of the Content-Types returned below might seem like a layering violation
241 // (e.g. why would //services/network care about application/zip or
242 // application/pdf or application/msword), but note that the decision to list a
243 // Content-Type below is not driven by whether the type is handled above or
244 // below //services/network layer.  Instead the decision to list a Content-Type
245 // below is driven by whether the Content-Type is unlikely to be attached to an
246 // image, script, stylesheet or other subresource type that web standards
247 // require to be fetched in `no-cors` mode.  In particular, CORB would still
248 // want to prevent cross-site disclosure of "application/msword" even if Chrome
249 // did not support this type (AFAIK today this support is only present on
250 // ChromeOS) in one of Chrome's many layers.  Similarly, CORB wants to prevent
251 // disclosure of "application/zip" even though Chrome doesn't have built-in
252 // support for this resource type.  And CORB also wants to protect
253 // "application/pdf" even though Chrome happens to support this resource type.
GetNeverSniffedMimeTypes()254 base::flat_set<std::string>& GetNeverSniffedMimeTypes() {
255   static base::NoDestructor<base::flat_set<std::string>> s_types{{
256       // The types below (zip, protobuf, etc.) are based on most commonly used
257       // content types according to HTTP Archive - see:
258       // https://github.com/whatwg/fetch/issues/860#issuecomment-457330454
259       "application/gzip",
260       "application/x-gzip",
261       "application/x-protobuf",
262       "application/zip",
263       "text/event-stream",
264       // The types listed below were initially taken from the list of types
265       // handled by MimeHandlerView (although we would want to protect them even
266       // if Chrome didn't support rendering these content types and/or if there
267       // was no such thing as MimeHandlerView).
268       "application/msexcel",
269       "application/mspowerpoint",
270       "application/msword",
271       "application/msword-template",
272       "application/pdf",
273       "application/vnd.ces-quickpoint",
274       "application/vnd.ces-quicksheet",
275       "application/vnd.ces-quickword",
276       "application/vnd.ms-excel",
277       "application/vnd.ms-excel.sheet.macroenabled.12",
278       "application/vnd.ms-powerpoint",
279       "application/vnd.ms-powerpoint.presentation.macroenabled.12",
280       "application/vnd.ms-word",
281       "application/vnd.ms-word.document.12",
282       "application/vnd.ms-word.document.macroenabled.12",
283       "application/vnd.msword",
284       "application/"
285           "vnd.openxmlformats-officedocument.presentationml.presentation",
286       "application/"
287           "vnd.openxmlformats-officedocument.presentationml.template",
288       "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
289       "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
290       "application/"
291           "vnd.openxmlformats-officedocument.wordprocessingml.document",
292       "application/"
293           "vnd.openxmlformats-officedocument.wordprocessingml.template",
294       "application/vnd.presentation-openxml",
295       "application/vnd.presentation-openxmlm",
296       "application/vnd.spreadsheet-openxml",
297       "application/vnd.wordprocessing-openxml",
298       "text/csv",
299       // Block signed documents to protect (potentially sensitive) unencrypted
300       // body of the signed document.  There should be no need to block
301       // encrypted documents (e.g. `multipart/encrypted` nor
302       // `application/pgp-encrypted`) and no need to block the signatures (e.g.
303       // `application/pgp-signature`).
304       "multipart/signed",
305       // Block multipart responses because a protected type (e.g. JSON) can
306       // become multipart if returned in a range request with multiple parts.
307       // This is compatible with the web because the renderer can only see into
308       // the result of a fetch for a multipart file when the request is made
309       // with CORS. Media tags only make single-range requests which will not
310       // have the multipart type.
311       "multipart/byteranges",
312       // TODO(lukasza): https://crbug.com/802836#c11: Add
313       // application/signed-exchange.
314   }};
315 
316   // All items need to be lower-case, to support case-insensitive comparisons
317   // later.
318   DCHECK(std::all_of(
319       s_types->begin(), s_types->end(),
320       [](const std::string& s) { return s == base::ToLowerASCII(s); }));
321 
322   return *s_types;
323 }
324 
325 }  // namespace
326 
GetCanonicalMimeType(base::StringPiece mime_type)327 MimeType CrossOriginReadBlocking::GetCanonicalMimeType(
328     base::StringPiece mime_type) {
329   // Checking for image/svg+xml and application/dash+xml early ensures that they
330   // won't get classified as MimeType::kXml by the presence of the "+xml"
331   // suffix.
332   if (base::LowerCaseEqualsASCII(mime_type, kImageSvg) ||
333       base::LowerCaseEqualsASCII(mime_type, kDashVideo))
334     return MimeType::kOthers;
335 
336   // See also https://mimesniff.spec.whatwg.org/#html-mime-type
337   if (base::LowerCaseEqualsASCII(mime_type, kTextHtml))
338     return MimeType::kHtml;
339 
340   // See also https://mimesniff.spec.whatwg.org/#json-mime-type
341   constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
342   if (base::LowerCaseEqualsASCII(mime_type, kAppJson) ||
343       base::LowerCaseEqualsASCII(mime_type, kTextJson) ||
344       base::LowerCaseEqualsASCII(mime_type, kJsonProtobuf) ||
345       base::EndsWith(mime_type, kJsonSuffix, kCaseInsensitive)) {
346     return MimeType::kJson;
347   }
348 
349   // See also https://mimesniff.spec.whatwg.org/#xml-mime-type
350   if (base::LowerCaseEqualsASCII(mime_type, kAppXml) ||
351       base::LowerCaseEqualsASCII(mime_type, kTextXml) ||
352       base::EndsWith(mime_type, kXmlSuffix, kCaseInsensitive)) {
353     return MimeType::kXml;
354   }
355 
356   if (base::LowerCaseEqualsASCII(mime_type, kTextPlain))
357     return MimeType::kPlain;
358 
359   if (base::Contains(GetNeverSniffedMimeTypes(),
360                      base::ToLowerASCII(mime_type))) {
361     return MimeType::kNeverSniffed;
362   }
363 
364   return MimeType::kOthers;
365 }
366 
IsBlockableScheme(const GURL & url)367 bool CrossOriginReadBlocking::IsBlockableScheme(const GURL& url) {
368   // We exclude ftp:// from here. FTP doesn't provide a Content-Type
369   // header which our policy depends on, so we cannot protect any
370   // response from FTP servers.
371   return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme);
372 }
373 
IsValidCorsHeaderSet(const url::Origin & frame_origin,const std::string & access_control_origin)374 bool CrossOriginReadBlocking::IsValidCorsHeaderSet(
375     const url::Origin& frame_origin,
376     const std::string& access_control_origin) {
377   // Many websites are sending back "\"*\"" instead of "*". This is
378   // non-standard practice, and not supported by Chrome. Refer to
379   // CrossOriginAccessControl::passesAccessControlCheck().
380 
381   // Note that "null" offers no more protection than "*" because it matches any
382   // unique origin, such as data URLs. Any origin can thus access it, so don't
383   // bother trying to block this case.
384 
385   // TODO(dsjang): * is not allowed for the response from a request
386   // with cookies. This allows for more than what the renderer will
387   // eventually be able to receive, so we won't see illegal cross-site
388   // documents allowed by this. We have to find a way to see if this
389   // response is from a cookie-tagged request or not in the future.
390   if (access_control_origin == "*" || access_control_origin == "null")
391     return true;
392 
393   return frame_origin.IsSameOriginWith(
394       url::Origin::Create(GURL(access_control_origin)));
395 }
396 
397 // This function is a slight modification of |net::SniffForHTML|.
SniffForHTML(StringPiece data)398 SniffingResult CrossOriginReadBlocking::SniffForHTML(StringPiece data) {
399   // The content sniffers used by Chrome and Firefox are using "<!--" as one of
400   // the HTML signatures, but it also appears in valid JavaScript, considered as
401   // well-formed JS by the browser.  Since we do not want to block any JS, we
402   // exclude it from our HTML signatures. This can weaken our CORB policy,
403   // but we can break less websites.
404   //
405   // Note that <body> and <br> are not included below, since <b is a prefix of
406   // them.
407   //
408   // TODO(dsjang): parameterize |net::SniffForHTML| with an option that decides
409   // whether to include <!-- or not, so that we can remove this function.
410   // TODO(dsjang): Once CrossOriginReadBlocking is moved into the browser
411   // process, we should do single-thread checking here for the static
412   // initializer.
413   static constexpr StringPiece kHtmlSignatures[] = {
414       StringPiece("<!doctype html"),  // HTML5 spec
415       StringPiece("<script"),         // HTML5 spec, Mozilla
416       StringPiece("<html"),           // HTML5 spec, Mozilla
417       StringPiece("<head"),           // HTML5 spec, Mozilla
418       StringPiece("<iframe"),         // Mozilla
419       StringPiece("<h1"),             // Mozilla
420       StringPiece("<div"),            // Mozilla
421       StringPiece("<font"),           // Mozilla
422       StringPiece("<table"),          // Mozilla
423       StringPiece("<a"),              // Mozilla
424       StringPiece("<style"),          // Mozilla
425       StringPiece("<title"),          // Mozilla
426       StringPiece("<b"),              // Mozilla (note: subsumes <body>, <br>)
427       StringPiece("<p")               // Mozilla
428   };
429 
430   while (data.length() > 0) {
431     AdvancePastWhitespace(&data);
432 
433     SniffingResult signature_match =
434         MatchesSignature(&data, kHtmlSignatures, base::size(kHtmlSignatures),
435                          base::CompareCase::INSENSITIVE_ASCII);
436     if (signature_match != kNo)
437       return signature_match;
438 
439     SniffingResult comment_match = MaybeSkipHtmlComment(&data);
440     if (comment_match != kYes)
441       return comment_match;
442   }
443 
444   // All of |data| was consumed, without a clear determination.
445   return kMaybe;
446 }
447 
SniffForXML(base::StringPiece data)448 SniffingResult CrossOriginReadBlocking::SniffForXML(base::StringPiece data) {
449   // TODO(dsjang): Once CrossOriginReadBlocking is moved into the browser
450   // process, we should do single-thread checking here for the static
451   // initializer.
452   AdvancePastWhitespace(&data);
453   static constexpr StringPiece kXmlSignatures[] = {StringPiece("<?xml")};
454   return MatchesSignature(&data, kXmlSignatures, base::size(kXmlSignatures),
455                           base::CompareCase::SENSITIVE);
456 }
457 
SniffForJSON(base::StringPiece data)458 SniffingResult CrossOriginReadBlocking::SniffForJSON(base::StringPiece data) {
459   // Currently this function looks for an opening brace ('{'), followed by a
460   // double-quoted string literal, followed by a colon. Importantly, such a
461   // sequence is a Javascript syntax error: although the JSON object syntax is
462   // exactly Javascript's object-initializer syntax, a Javascript object-
463   // initializer expression is not valid as a standalone Javascript statement.
464   //
465   // TODO(nick): We have to come up with a better way to sniff JSON. The
466   // following are known limitations of this function:
467   // https://crbug.com/795470/ Support non-dictionary values (e.g. lists)
468   enum {
469     kStartState,
470     kLeftBraceState,
471     kLeftQuoteState,
472     kEscapeState,
473     kRightQuoteState,
474   } state = kStartState;
475 
476   for (size_t i = 0; i < data.length(); ++i) {
477     const char c = data[i];
478     if (state != kLeftQuoteState && state != kEscapeState) {
479       // Whitespace is ignored (outside of string literals)
480       if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
481         continue;
482     } else {
483       // Inside string literals, control characters should result in rejection.
484       if ((c >= 0 && c < 32) || c == 127)
485         return kNo;
486     }
487 
488     switch (state) {
489       case kStartState:
490         if (c == '{')
491           state = kLeftBraceState;
492         else
493           return kNo;
494         break;
495       case kLeftBraceState:
496         if (c == '"')
497           state = kLeftQuoteState;
498         else
499           return kNo;
500         break;
501       case kLeftQuoteState:
502         if (c == '"')
503           state = kRightQuoteState;
504         else if (c == '\\')
505           state = kEscapeState;
506         break;
507       case kEscapeState:
508         // Simplification: don't bother rejecting hex escapes.
509         state = kLeftQuoteState;
510         break;
511       case kRightQuoteState:
512         if (c == ':')
513           return kYes;
514         else
515           return kNo;
516         break;
517     }
518   }
519   return kMaybe;
520 }
521 
SniffForFetchOnlyResource(base::StringPiece data)522 SniffingResult CrossOriginReadBlocking::SniffForFetchOnlyResource(
523     base::StringPiece data) {
524   // kScriptBreakingPrefixes contains prefixes that are conventionally used to
525   // prevent a JSON response from becoming a valid Javascript program (an attack
526   // vector known as XSSI). The presence of such a prefix is a strong signal
527   // that the resource is meant to be consumed only by the fetch API or
528   // XMLHttpRequest, and is meant to be protected from use in non-CORS, cross-
529   // origin contexts like <script>, <img>, etc.
530   //
531   // These prefixes work either by inducing a syntax error, or inducing an
532   // infinite loop. In either case, the prefix must create a guarantee that no
533   // matter what bytes follow it, the entire response would be worthless to
534   // execute as a <script>.
535   static constexpr StringPiece kScriptBreakingPrefixes[] = {
536       // Parser breaker prefix.
537       //
538       // Built into angular.js (followed by a comma and a newline):
539       //   https://docs.angularjs.org/api/ng/service/$http
540       //
541       // Built into the Java Spring framework (followed by a comma and a space):
542       //   https://goo.gl/xP7FWn
543       //
544       // Observed on google.com (without a comma, followed by a newline).
545       StringPiece(")]}'"),
546 
547       // Apache struts: https://struts.apache.org/plugins/json/#prefix
548       StringPiece("{}&&"),
549 
550       // Spring framework (historically): https://goo.gl/JYPFAv
551       StringPiece("{} &&"),
552 
553       // Infinite loops.
554       StringPiece("for(;;);"),  // observed on facebook.com
555       StringPiece("while(1);"),
556       StringPiece("for (;;);"),
557       StringPiece("while (1);"),
558   };
559   SniffingResult has_parser_breaker = MatchesSignature(
560       &data, kScriptBreakingPrefixes, base::size(kScriptBreakingPrefixes),
561       base::CompareCase::SENSITIVE);
562   if (has_parser_breaker != kNo)
563     return has_parser_breaker;
564 
565   // A non-empty JSON object also effectively introduces a JS syntax error.
566   return SniffForJSON(data);
567 }
568 
569 // static
SanitizeBlockedResponse(network::mojom::URLResponseHead * response)570 void CrossOriginReadBlocking::SanitizeBlockedResponse(
571     network::mojom::URLResponseHead* response) {
572   DCHECK(response);
573   response->content_length = 0;
574   if (response->headers)
575     BlockResponseHeaders(response->headers);
576 }
577 
578 // static
LogAction(Action action)579 void CrossOriginReadBlocking::LogAction(Action action) {
580   UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.Browser.Action", action);
581 }
582 
583 // An interface to enable incremental content sniffing. These are instantiated
584 // for each each request; thus they can be stateful.
585 class CrossOriginReadBlocking::ResponseAnalyzer::ConfirmationSniffer {
586  public:
587   virtual ~ConfirmationSniffer() = default;
588 
589   // Called after data is read from the network. |sniffing_buffer| contains the
590   // entire response body delivered thus far. To support streaming,
591   // |new_data_offset| gives the offset into |sniffing_buffer| at which new data
592   // was appended since the last read.
593   virtual void OnDataAvailable(base::StringPiece sniffing_buffer,
594                                size_t new_data_offset) = 0;
595 
596   // Returns true if the return value of IsConfirmedContentType() might change
597   // with the addition of more data. Returns false if a final decision is
598   // available.
599   virtual bool WantsMoreData() const = 0;
600 
601   // Returns true if the data has been confirmed to be of the CORB-protected
602   // content type that this sniffer is intended to detect.
603   virtual bool IsConfirmedContentType() const = 0;
604 };
605 
606 // A ConfirmationSniffer that wraps one of the sniffing functions from
607 // network::CrossOriginReadBlocking.
608 class CrossOriginReadBlocking::ResponseAnalyzer::SimpleConfirmationSniffer
609     : public CrossOriginReadBlocking::ResponseAnalyzer::ConfirmationSniffer {
610  public:
611   // The function pointer type corresponding to one of the available sniffing
612   // functions from network::CrossOriginReadBlocking.
613   using SnifferFunction =
614       decltype(&network::CrossOriginReadBlocking::SniffForHTML);
615 
SimpleConfirmationSniffer(SnifferFunction sniffer_function)616   explicit SimpleConfirmationSniffer(SnifferFunction sniffer_function)
617       : sniffer_function_(sniffer_function) {}
618   ~SimpleConfirmationSniffer() override = default;
619 
OnDataAvailable(base::StringPiece sniffing_buffer,size_t new_data_offset)620   void OnDataAvailable(base::StringPiece sniffing_buffer,
621                        size_t new_data_offset) final {
622     DCHECK_LE(new_data_offset, sniffing_buffer.length());
623     if (new_data_offset == sniffing_buffer.length()) {
624       // No new data -- do nothing. This happens at end-of-stream.
625       return;
626     }
627     // The sniffing functions don't support streaming, so with each new chunk of
628     // data, call the sniffer on the whole buffer.
629     last_sniff_result_ = (*sniffer_function_)(sniffing_buffer);
630   }
631 
WantsMoreData() const632   bool WantsMoreData() const final {
633     // kNo and kYes results are final, meaning that sniffing can stop once they
634     // occur. A kMaybe result corresponds to an indeterminate state, that could
635     // change to kYes or kNo with more data.
636     return last_sniff_result_ == SniffingResult::kMaybe;
637   }
638 
IsConfirmedContentType() const639   bool IsConfirmedContentType() const final {
640     // Only confirm the mime type if an affirmative pattern (e.g. an HTML tag,
641     // if using the HTML sniffer) was detected.
642     //
643     // Note that if the stream ends (or net::kMaxBytesToSniff has been reached)
644     // and |last_sniff_result_| is kMaybe, the response is allowed to go
645     // through.
646     return last_sniff_result_ == SniffingResult::kYes;
647   }
648 
649  private:
650   // The function that actually knows how to sniff for a content type.
651   SnifferFunction sniffer_function_;
652 
653   // Result of sniffing the data available thus far.
654   SniffingResult last_sniff_result_ = SniffingResult::kMaybe;
655 
656   DISALLOW_COPY_AND_ASSIGN(SimpleConfirmationSniffer);
657 };
658 
ResponseAnalyzer(const GURL & request_url,const base::Optional<url::Origin> & request_initiator,const network::mojom::URLResponseHead & response,const base::Optional<url::Origin> & request_initiator_site_lock,mojom::RequestMode request_mode,const base::Optional<url::Origin> & isolated_world_origin,mojom::NetworkServiceClient * network_service_client)659 CrossOriginReadBlocking::ResponseAnalyzer::ResponseAnalyzer(
660     const GURL& request_url,
661     const base::Optional<url::Origin>& request_initiator,
662     const network::mojom::URLResponseHead& response,
663     const base::Optional<url::Origin>& request_initiator_site_lock,
664     mojom::RequestMode request_mode,
665     const base::Optional<url::Origin>& isolated_world_origin,
666     mojom::NetworkServiceClient* network_service_client)
667     : seems_sensitive_from_cors_heuristic_(
668           SeemsSensitiveFromCORSHeuristic(response)),
669       seems_sensitive_from_cache_heuristic_(
670           SeemsSensitiveFromCacheHeuristic(response)),
671       supports_range_requests_(SupportsRangeRequests(response)),
672       has_nosniff_header_(HasNoSniff(response)),
673       content_length_(response.content_length),
674       http_response_code_(response.headers ? response.headers->response_code()
675                                            : 0),
676       isolated_world_origin_(isolated_world_origin),
677       network_service_client_(network_service_client) {
678   // CORB should look directly at the Content-Type header if one has been
679   // received from the network. Ignoring |response.mime_type| helps avoid
680   // breaking legitimate websites (which might happen more often when blocking
681   // would be based on the mime type sniffed by MimeSniffingResourceHandler).
682   //
683   // This value could be computed later in ShouldBlockBasedOnHeaders after
684   // has_nosniff_header, but we compute it here to keep
685   // ShouldBlockBasedOnHeaders (which is called twice) const.
686   //
687   // TODO(nick): What if the mime type is omitted? Should that be treated the
688   // same as text/plain? https://crbug.com/795971
689   std::string mime_type;
690   if (response.headers)
691     response.headers->GetMimeType(&mime_type);
692   // Canonicalize the MIME type.  Note that even if it doesn't claim to be a
693   // blockable type (i.e., HTML, XML, JSON, or plain text), it may still fail
694   // the checks during the SniffForFetchOnlyResource() phase.
695   canonical_mime_type_ =
696       network::CrossOriginReadBlocking::GetCanonicalMimeType(mime_type);
697 
698   should_block_based_on_headers_ = ShouldBlockBasedOnHeaders(
699       request_mode, request_url, request_initiator, response,
700       request_initiator_site_lock, canonical_mime_type_,
701       &is_cors_blocking_expected_);
702 
703   // Check if the response seems sensitive and if so include in our CORB
704   // protection logging. We have not sniffed yet, so the answer might be
705   // kNeedToSniffMore.
706   if (seems_sensitive_from_cors_heuristic_ ||
707       seems_sensitive_from_cache_heuristic_) {
708     // Create a new Origin with a unique internal identifier so we can pretend
709     // the request is cross-origin.
710     url::Origin cross_origin_request_initiator = url::Origin();
711     BlockingDecision would_protect_based_on_headers = ShouldBlockBasedOnHeaders(
712         request_mode, request_url, cross_origin_request_initiator, response,
713         cross_origin_request_initiator, canonical_mime_type_,
714         nullptr /* is_cors_blocking_expected */);
715     corb_protection_logging_needs_sniffing_ =
716         (would_protect_based_on_headers ==
717          BlockingDecision::kNeedToSniffMore) &&
718         base::FeatureList::IsEnabled(
719             network::features::kCORBProtectionSniffing);
720     hypothetical_sniffing_mode_ =
721         corb_protection_logging_needs_sniffing_ &&
722         should_block_based_on_headers_ != BlockingDecision::kNeedToSniffMore;
723     mime_type_bucket_ = GetMimeTypeBucket(response);
724     UMA_HISTOGRAM_BOOLEAN("SiteIsolation.CORBProtection.SensitiveResource",
725                           true);
726     if (!corb_protection_logging_needs_sniffing_) {
727       // If we are not going to sniff, then we can and must log everything now.
728       LogSensitiveResponseProtection(
729           BlockingDecisionToProtectionDecision(would_protect_based_on_headers));
730     }
731   } else {
732     UMA_HISTOGRAM_BOOLEAN("SiteIsolation.CORBProtection.SensitiveResource",
733                           false);
734   }
735   if (needs_sniffing())
736     CreateSniffers();
737 }
738 
739 CrossOriginReadBlocking::ResponseAnalyzer::~ResponseAnalyzer() = default;
740 
741 // static
742 CrossOriginReadBlocking::ResponseAnalyzer::BlockingDecision
ShouldBlockBasedOnHeaders(mojom::RequestMode request_mode,const GURL & request_url,const base::Optional<url::Origin> & request_initiator,const network::mojom::URLResponseHead & response,const base::Optional<url::Origin> & request_initiator_site_lock,MimeType canonical_mime_type,bool * is_cors_blocking_expected)743 CrossOriginReadBlocking::ResponseAnalyzer::ShouldBlockBasedOnHeaders(
744     mojom::RequestMode request_mode,
745     const GURL& request_url,
746     const base::Optional<url::Origin>& request_initiator,
747     const network::mojom::URLResponseHead& response,
748     const base::Optional<url::Origin>& request_initiator_site_lock,
749     MimeType canonical_mime_type,
750     bool* is_cors_blocking_expected) {
751   if (is_cors_blocking_expected)
752     *is_cors_blocking_expected = false;
753 
754   // The checks in this method are ordered to rule out blocking in most cases as
755   // quickly as possible.  Checks that are likely to lead to returning false or
756   // that are inexpensive should be near the top.
757   url::Origin target_origin = url::Origin::Create(request_url);
758 
759   // Compute the |initiator| of the request, falling back to a unique origin if
760   // there was no initiator or if it was incompatible with the lock. Using a
761   // unique origin makes CORB treat the response as cross-origin and thus
762   // considers it eligible for blocking (based on content-type, sniffing, etc.).
763   url::Origin initiator =
764       GetTrustworthyInitiator(request_initiator_site_lock, request_initiator);
765 
766   // Don't block same-origin documents.
767   if (initiator.IsSameOriginWith(target_origin))
768     return kAllow;
769 
770   // Only block documents from HTTP(S) schemes.  Checking the scheme of
771   // |target_origin| ensures that we also protect content of blob: and
772   // filesystem: URLs if their nested origins have a HTTP(S) scheme.
773   if (!IsBlockableScheme(target_origin.GetURL()))
774     return kAllow;
775 
776   // Allow the response through if this is a CORS request and the response has
777   // valid CORS headers.
778   switch (request_mode) {
779     case mojom::RequestMode::kNavigate:
780     case mojom::RequestMode::kNoCors:
781     case mojom::RequestMode::kSameOrigin:
782       break;
783 
784     case mojom::RequestMode::kCors:
785     case mojom::RequestMode::kCorsWithForcedPreflight:
786       std::string cors_header;
787       response.headers->GetNormalizedHeader("access-control-allow-origin",
788                                             &cors_header);
789       if (IsValidCorsHeaderSet(initiator, cors_header))
790         return kAllow;
791 
792       // At this point we know that the response is 1) cross-origin from the
793       // initiator, 2) in CORS mode, 3) without valid ACAO header.
794       if (is_cors_blocking_expected)
795         *is_cors_blocking_expected = true;
796       break;
797   }
798 
799   // Requests from foo.example.com will consult foo.example.com's service worker
800   // first (if one has been registered).  The service worker can handle requests
801   // initiated by foo.example.com even if they are cross-origin (e.g. requests
802   // for bar.example.com).  This is okay and should not be blocked by CORB,
803   // unless the initiator opted out of CORS / opted into receiving an opaque
804   // response.  See also https://crbug.com/803672.
805   if (response.was_fetched_via_service_worker) {
806     switch (response.response_type) {
807       case network::mojom::FetchResponseType::kBasic:
808       case network::mojom::FetchResponseType::kCors:
809       case network::mojom::FetchResponseType::kDefault:
810       case network::mojom::FetchResponseType::kError:
811         // Non-opaque responses shouldn't be blocked.
812         return kAllow;
813       case network::mojom::FetchResponseType::kOpaque:
814       case network::mojom::FetchResponseType::kOpaqueRedirect:
815         // Opaque responses are eligible for blocking. Continue on...
816         break;
817     }
818   }
819 
820   // Some types (e.g. ZIP) are protected without any confirmation sniffing.
821   if (canonical_mime_type == MimeType::kNeverSniffed)
822     return kBlock;
823 
824   // CORS is currently implemented in the renderer process, so it's useful for
825   // CORB to filter failed "cors" mode fetches to avoid leaking the responses to
826   // the renderer when possible (e.g., depending on MIME type and sniffing).
827   // This will eventually be fixed with OOR-CORS.
828   //
829   // In the mean time, we can try to filter a few additional failed CORS
830   // fetches, treating the Cross-Origin-Resource-Policy (CORP) header as an
831   // opt-in to CORB.  CORP headers are enforced elsewhere and normally only
832   // apply to "no-cors" mode fetches.  If such a header happens to be on the
833   // response during other fetch modes, and if the same-origin and
834   // IsValidCorsHeaderSet checks above have failed (and thus the request will
835   // fail in the renderer), then we can let CORB filter the response without
836   // caring about MIME type or sniffing.
837   //
838   // To make CrossOriginResourcePolicy::IsBlocked apply to all fetch modes in
839   // this case and not just "no-cors", we pass kNoCors as a hard-coded value.
840   // This does not affect the usual enforcement of CORP headers.
841   //
842   // TODO(lukasza): Once OOR-CORS launches (https://crbug.com/736308), this code
843   // block will no longer be necessary since all failed CORS requests will be
844   // blocked before reaching the renderer process (even without CORB's help).
845   // Of course this assumes that OOR-CORS will use trustworthy
846   // |request_initiator| (i.e. vetted against |request_initiator|site_lock|).
847   constexpr mojom::RequestMode kOverreachingRequestMode =
848       mojom::RequestMode::kNoCors;
849   // COEP is not supported when OOR-CORS is disabled.
850   if (CrossOriginResourcePolicy::IsBlocked(
851           request_url, request_url, request_initiator, response,
852           kOverreachingRequestMode, request_initiator_site_lock,
853           CrossOriginEmbedderPolicy())) {
854     // Ignore mime types and/or sniffing and have CORB block all responses with
855     // COR*P* header.
856     return kBlock;
857   }
858 
859   // If this is a partial response, sniffing is not possible, so allow the
860   // response if it's not a protected mime type.
861   std::string range_header;
862   response.headers->GetNormalizedHeader("content-range", &range_header);
863   bool has_range_header = !range_header.empty();
864   if (has_range_header) {
865     switch (canonical_mime_type) {
866       case MimeType::kOthers:
867       case MimeType::kPlain:  // See also https://crbug.com/801709
868         return kAllow;
869       case MimeType::kHtml:
870       case MimeType::kJson:
871       case MimeType::kXml:
872         return kBlock;
873       case MimeType::kInvalidMimeType:
874       case MimeType::kNeverSniffed:  // Handled much earlier.
875         NOTREACHED();
876         return kBlock;
877     }
878   }
879 
880   // We intend to block the response at this point.  However, we will usually
881   // sniff the contents to confirm the MIME type, to avoid blocking incorrectly
882   // labeled JavaScript, JSONP, etc files.
883   //
884   // Note: if there is a nosniff header, it means we should honor the response
885   // mime type without trying to confirm it.
886   //
887   // Decide whether to block based on the MIME type.
888   switch (canonical_mime_type) {
889     case MimeType::kHtml:
890     case MimeType::kXml:
891     case MimeType::kJson:
892     case MimeType::kPlain:
893       if (HasNoSniff(response))
894         return kBlock;
895       else
896         return kNeedToSniffMore;
897       break;
898 
899     case MimeType::kOthers:
900       // Stylesheets shouldn't be sniffed for JSON parser breakers - see
901       // https://crbug.com/809259.
902       if (base::LowerCaseEqualsASCII(response.mime_type, "text/css"))
903         return kAllow;
904       else
905         return kNeedToSniffMore;
906       break;
907 
908     case MimeType::kInvalidMimeType:
909     case MimeType::kNeverSniffed:  // Handled much earlier.
910       NOTREACHED();
911       return kBlock;
912   }
913   NOTREACHED();
914   return kBlock;
915 }
916 
917 // static
HasNoSniff(const network::mojom::URLResponseHead & response)918 bool CrossOriginReadBlocking::ResponseAnalyzer::HasNoSniff(
919     const network::mojom::URLResponseHead& response) {
920   if (!response.headers)
921     return false;
922   std::string nosniff_header;
923   response.headers->GetNormalizedHeader("x-content-type-options",
924                                         &nosniff_header);
925   return base::LowerCaseEqualsASCII(nosniff_header, "nosniff");
926 }
927 
928 // static
SeemsSensitiveFromCORSHeuristic(const network::mojom::URLResponseHead & response)929 bool CrossOriginReadBlocking::ResponseAnalyzer::SeemsSensitiveFromCORSHeuristic(
930     const network::mojom::URLResponseHead& response) {
931   // Check if the response has an Access-Control-Allow-Origin with a value other
932   // than "*" or "null" ("null" offers no more protection than "*" because it
933   // matches any unique origin).
934   if (!response.headers)
935     return false;
936   std::string cors_header_value;
937   response.headers->GetNormalizedHeader("access-control-allow-origin",
938                                         &cors_header_value);
939   if (cors_header_value != "*" && cors_header_value != "null" &&
940       cors_header_value != "") {
941     return true;
942   }
943   return false;
944 }
945 
946 // static
947 bool CrossOriginReadBlocking::ResponseAnalyzer::
SeemsSensitiveFromCacheHeuristic(const network::mojom::URLResponseHead & response)948     SeemsSensitiveFromCacheHeuristic(
949         const network::mojom::URLResponseHead& response) {
950   // Check if the response has both Vary: Origin and Cache-Control: Private
951   // headers, which we take as a signal that it may be a sensitive resource. We
952   // require both to reduce the number of false positives (as both headers are
953   // sometimes used on non-sensitive resources). Cache-Control: no-store appears
954   // on non-sensitive resources that change frequently, so we ignore it here.
955   if (!response.headers)
956     return false;
957   bool has_vary_origin = response.headers->HasHeaderValue("vary", "origin");
958   bool has_cache_private =
959       response.headers->HasHeaderValue("cache-control", "private");
960   return has_vary_origin && has_cache_private;
961 }
962 
963 // static
SupportsRangeRequests(const network::mojom::URLResponseHead & response)964 bool CrossOriginReadBlocking::ResponseAnalyzer::SupportsRangeRequests(
965     const network::mojom::URLResponseHead& response) {
966   if (response.headers) {
967     std::string value;
968     response.headers->GetNormalizedHeader("accept-ranges", &value);
969     if (!value.empty() && !base::LowerCaseEqualsASCII(value, "none")) {
970       return true;
971     }
972   }
973   return false;
974 }
975 
976 // static
977 CrossOriginReadBlocking::ResponseAnalyzer::MimeTypeBucket
GetMimeTypeBucket(const network::mojom::URLResponseHead & response)978 CrossOriginReadBlocking::ResponseAnalyzer::GetMimeTypeBucket(
979     const network::mojom::URLResponseHead& response) {
980   std::string mime_type;
981   if (response.headers)
982     response.headers->GetMimeType(&mime_type);
983   MimeType canonical_mime_type = GetCanonicalMimeType(mime_type);
984   switch (canonical_mime_type) {
985     case MimeType::kHtml:
986     case MimeType::kXml:
987     case MimeType::kJson:
988     case MimeType::kNeverSniffed:
989     case MimeType::kPlain:
990       return kProtected;
991       break;
992     case MimeType::kOthers:
993       break;
994     case MimeType::kInvalidMimeType:
995       NOTREACHED();
996       break;
997   }
998 
999   // Javascript is assumed public. See also
1000   // https://mimesniff.spec.whatwg.org/#javascript-mime-type.
1001   constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
1002   for (const std::string& suffix : kJavaScriptSuffixes) {
1003     if (base::EndsWith(mime_type, suffix, kCaseInsensitive)) {
1004       return kPublic;
1005     }
1006   }
1007 
1008   // Images are assumed public. See also
1009   // https://mimesniff.spec.whatwg.org/#image-mime-type.
1010   if (base::StartsWith(mime_type, "image", kCaseInsensitive)) {
1011     return kPublic;
1012   }
1013 
1014   // Audio and video are assumed public. See also
1015   // https://mimesniff.spec.whatwg.org/#audio-or-video-mime-type.
1016   if (base::StartsWith(mime_type, "audio", kCaseInsensitive) ||
1017       base::StartsWith(mime_type, "video", kCaseInsensitive) ||
1018       base::LowerCaseEqualsASCII(mime_type, "application/ogg") ||
1019       base::LowerCaseEqualsASCII(mime_type, "application/dash+xml")) {
1020     return kPublic;
1021   }
1022 
1023   // CSS files are assumed public and must be sent with text/css.
1024   if (base::LowerCaseEqualsASCII(mime_type, "text/css")) {
1025     return kPublic;
1026   }
1027   return kOther;
1028 }
1029 
CreateSniffers()1030 void CrossOriginReadBlocking::ResponseAnalyzer::CreateSniffers() {
1031   // Create one or more |sniffers_| to confirm that the body is actually the
1032   // MIME type advertised in the Content-Type header.
1033   DCHECK(needs_sniffing());
1034   DCHECK(sniffers_.empty());
1035 
1036   // When the MIME type is "text/plain", create sniffers for HTML, XML and
1037   // JSON. If any of these sniffers match, the response will be blocked.
1038   const bool use_all = canonical_mime_type_ == MimeType::kPlain;
1039 
1040   // HTML sniffer.
1041   if (use_all || canonical_mime_type_ == MimeType::kHtml) {
1042     sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
1043         &network::CrossOriginReadBlocking::SniffForHTML));
1044   }
1045 
1046   // XML sniffer.
1047   if (use_all || canonical_mime_type_ == MimeType::kXml) {
1048     sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
1049         &network::CrossOriginReadBlocking::SniffForXML));
1050   }
1051 
1052   // JSON sniffer.
1053   if (use_all || canonical_mime_type_ == MimeType::kJson) {
1054     sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
1055         &network::CrossOriginReadBlocking::SniffForJSON));
1056   }
1057 
1058   // Parser-breaker sniffer.
1059   //
1060   // Because these prefixes are an XSSI-defeating mechanism, CORB considers
1061   // them distinctive enough to be worth blocking no matter the Content-Type
1062   // header. So this sniffer is created unconditionally.
1063   //
1064   // For MimeType::kOthers, this will be the only sniffer that's active.
1065   sniffers_.push_back(std::make_unique<SimpleConfirmationSniffer>(
1066       &network::CrossOriginReadBlocking::SniffForFetchOnlyResource));
1067 }
1068 
SniffResponseBody(base::StringPiece data,size_t new_data_offset)1069 void CrossOriginReadBlocking::ResponseAnalyzer::SniffResponseBody(
1070     base::StringPiece data,
1071     size_t new_data_offset) {
1072   DCHECK(needs_sniffing());
1073   DCHECK(!sniffers_.empty());
1074   DCHECK(!found_blockable_content_);
1075 
1076   DCHECK_LE(data.size(), static_cast<size_t>(net::kMaxBytesToSniff));
1077   DCHECK_LE(new_data_offset, data.size());
1078   bool has_new_data = (new_data_offset < data.size());
1079 
1080   for (size_t i = 0; i < sniffers_.size();) {
1081     if (has_new_data)
1082       sniffers_[i]->OnDataAvailable(data, new_data_offset);
1083 
1084     if (sniffers_[i]->WantsMoreData()) {
1085       i++;
1086       continue;
1087     }
1088 
1089     if (sniffers_[i]->IsConfirmedContentType()) {
1090       found_blockable_content_ = true;
1091       sniffers_.clear();
1092       break;
1093     } else {
1094       // This response is CORB-exempt as far as this sniffer is concerned;
1095       // remove it from the list.
1096       sniffers_.erase(sniffers_.begin() + i);
1097     }
1098   }
1099 }
1100 
ShouldAllow() const1101 bool CrossOriginReadBlocking::ResponseAnalyzer::ShouldAllow() const {
1102   // If we're in hypothetical mode then CORB must have decided to kAllow (see
1103   // comment in ShouldBlock). Thus we just need to wait until the sniffers are
1104   // all done (i.e. empty).
1105   if (hypothetical_sniffing_mode_) {
1106     DCHECK_EQ(should_block_based_on_headers_, kAllow);
1107     return sniffers_.empty();
1108   }
1109   switch (should_block_based_on_headers_) {
1110     case kAllow:
1111       return true;
1112     case kNeedToSniffMore:
1113       return sniffers_.empty() && !found_blockable_content_;
1114     case kBlock:
1115       return false;
1116   }
1117 }
1118 
ShouldBlock() const1119 bool CrossOriginReadBlocking::ResponseAnalyzer::ShouldBlock() const {
1120   // If we're in *hypothetical* sniffing mode then the following must be true:
1121   // (1) We are only sniffing to find out if CORB would have blocked the request
1122   // were it made cross origin (CORB itself did *not* need to sniff the file).
1123   // (2) CORB must have decided to kAllow (if it was kBlock then the protection
1124   // decision would have been kBlock as well, no hypothetical mode needed).
1125   if (hypothetical_sniffing_mode_) {
1126     DCHECK_EQ(should_block_based_on_headers_, kAllow);
1127     return false;
1128   }
1129   switch (should_block_based_on_headers_) {
1130     case kAllow:
1131       return false;
1132     case kNeedToSniffMore:
1133       return sniffers_.empty() && found_blockable_content_;
1134     case kBlock:
1135       return true;
1136   }
1137 }
1138 
ShouldReportBlockedResponse() const1139 bool CrossOriginReadBlocking::ResponseAnalyzer::ShouldReportBlockedResponse()
1140     const {
1141   if (!ShouldBlock())
1142     return false;
1143 
1144   // Don't bother showing a warning message when blocking responses that are
1145   // already empty.
1146   if (content_length_ == 0)
1147     return false;
1148   if (http_response_code_ == 204)
1149     return false;
1150 
1151   // Don't bother showing a warning message when blocking responses that are
1152   // associated with error responses (e.g. it is quite common to serve a
1153   // text/html 404 error page for an <img> tag pointing to a wrong URL).
1154   if (400 <= http_response_code_ && http_response_code_ <= 599)
1155     return false;
1156 
1157   return true;
1158 }
1159 
LogAllowedResponse()1160 void CrossOriginReadBlocking::ResponseAnalyzer::LogAllowedResponse() {
1161   // Only log the ContentScript UMA when the request really came from a content
1162   // script.
1163   //
1164   // Note that we will never get here in the following cases:
1165   // 1) When CORB is disabled (e.g. for allowlisted content scripts OR for
1166   //    extension background pages).
1167   // 2) When CorbAllowlistAlsoAppliesToOorCors and OOR-CORS features are both
1168   //    enabled, because:
1169   //    2a) The former feature forces |ignore_isolated_world_origin| for
1170   //        non-allowlisted content scripts and OOR-CORS in this case resets
1171   //        |isolated_world_origin| to base::nullopt.
1172   //    2b) Even if we could preserve |isolated_world_origin_| just for UMA,
1173   //        CORS would block the response before LogAllowedResponse gets a
1174   //        chance to run.
1175   bool is_for_non_http_isolated_world =
1176       isolated_world_origin_.has_value() &&
1177       isolated_world_origin_->scheme() != url::kHttpScheme &&
1178       isolated_world_origin_->scheme() != url::kHttpsScheme;
1179   if (is_for_non_http_isolated_world) {
1180     // We log whether CORS would block this response if it were enabled for
1181     // content scripts.  Caveat: This will be true even in cases where the
1182     // server would have sent an ACAO response header if Chrome had sent an
1183     // Origin request header.
1184     UMA_HISTOGRAM_BOOLEAN(
1185         "SiteIsolation.XSD.Browser.AllowedByCorbButNotCors.ContentScript",
1186         is_cors_blocking_expected_);
1187 
1188     // Ask the browser process to log Rappor and UKM metrics.
1189     if (network_service_client_ && is_cors_blocking_expected_) {
1190       network_service_client_->LogCrossOriginFetchFromContentScript3(
1191           isolated_world_origin_->host());
1192     }
1193   }
1194 
1195   if (corb_protection_logging_needs_sniffing_) {
1196     LogSensitiveResponseProtection(
1197         SniffingDecisionToProtectionDecision(found_blockable_content_));
1198   }
1199   // Note that if a response is allowed because of hitting EOF or
1200   // kMaxBytesToSniff, then |sniffers_| are not emptied and consequently
1201   // ShouldAllow doesn't start returning true.  This means that we can't
1202   // DCHECK(ShouldAllow()) or DCHECK(sniffers_.empty()) here - the decision to
1203   // allow the response could have been made in the
1204   // CrossSiteDocumentResourceHandler layer without CrossOriginReadBlocking
1205   // realizing that it has hit EOF or kMaxBytesToSniff.
1206 
1207   // Note that the response might be allowed even if ShouldBlock() returns true
1208   // - for example to allow responses to requests initiated by content scripts.
1209   // This means that we cannot DCHECK(!ShouldBlock()) here.
1210 
1211   CrossOriginReadBlocking::LogAction(
1212       needs_sniffing()
1213           ? network::CrossOriginReadBlocking::Action::kAllowedAfterSniffing
1214           : network::CrossOriginReadBlocking::Action::kAllowedWithoutSniffing);
1215 }
1216 
LogBlockedResponse()1217 void CrossOriginReadBlocking::ResponseAnalyzer::LogBlockedResponse() {
1218   DCHECK(!ShouldAllow());
1219   DCHECK(ShouldBlock());
1220   DCHECK(sniffers_.empty());
1221 
1222   if (corb_protection_logging_needs_sniffing_) {
1223     LogSensitiveResponseProtection(
1224         SniffingDecisionToProtectionDecision(found_blockable_content_));
1225   }
1226 
1227   CrossOriginReadBlocking::LogAction(
1228       needs_sniffing()
1229           ? network::CrossOriginReadBlocking::Action::kBlockedAfterSniffing
1230           : network::CrossOriginReadBlocking::Action::kBlockedWithoutSniffing);
1231 
1232   UMA_HISTOGRAM_ENUMERATION(
1233       "SiteIsolation.XSD.Browser.Blocked.CanonicalMimeType",
1234       canonical_mime_type_);
1235 }
1236 
1237 // static
1238 CrossOriginReadBlocking::ResponseAnalyzer::CrossOriginProtectionDecision
BlockingDecisionToProtectionDecision(BlockingDecision blocking_decision)1239 CrossOriginReadBlocking::ResponseAnalyzer::BlockingDecisionToProtectionDecision(
1240     BlockingDecision blocking_decision) {
1241   switch (blocking_decision) {
1242     case kAllow:
1243       return CrossOriginProtectionDecision::kAllow;
1244     case kBlock:
1245       return CrossOriginProtectionDecision::kBlock;
1246     case kNeedToSniffMore:
1247       return CrossOriginProtectionDecision::kNeedToSniffMore;
1248   }
1249 }
1250 
1251 // static
1252 CrossOriginReadBlocking::ResponseAnalyzer::CrossOriginProtectionDecision
SniffingDecisionToProtectionDecision(bool found_blockable_content)1253 CrossOriginReadBlocking::ResponseAnalyzer::SniffingDecisionToProtectionDecision(
1254     bool found_blockable_content) {
1255   if (found_blockable_content)
1256     return CrossOriginProtectionDecision::kBlockedAfterSniffing;
1257   return CrossOriginProtectionDecision::kAllowedAfterSniffing;
1258 }
1259 
LogSensitiveResponseProtection(CrossOriginProtectionDecision protection_decision) const1260 void CrossOriginReadBlocking::ResponseAnalyzer::LogSensitiveResponseProtection(
1261     CrossOriginProtectionDecision protection_decision) const {
1262   DCHECK(seems_sensitive_from_cors_heuristic_ ||
1263          seems_sensitive_from_cache_heuristic_);
1264   if (seems_sensitive_from_cors_heuristic_) {
1265     switch (mime_type_bucket_) {
1266       case kProtected:
1267         UMA_HISTOGRAM_ENUMERATION(
1268             "SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType",
1269             protection_decision);
1270         // We report if a response with a protected MIME type supports range
1271         // requests since we want to measure how often making a multipart range
1272         // requests would have allowed bypassing CORB.
1273         if (protection_decision == CrossOriginProtectionDecision::kBlock) {
1274           UMA_HISTOGRAM_BOOLEAN(
1275               "SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType."
1276               "BlockedWithRangeSupport",
1277               supports_range_requests_);
1278           UMA_HISTOGRAM_BOOLEAN(
1279               "SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType."
1280               "BlockedWithoutSniffing.HasNoSniff",
1281               has_nosniff_header_);
1282         } else if (protection_decision ==
1283                    CrossOriginProtectionDecision::kBlockedAfterSniffing) {
1284           UMA_HISTOGRAM_BOOLEAN(
1285               "SiteIsolation.CORBProtection.CORSHeuristic.ProtectedMimeType."
1286               "BlockedAfterSniffingWithRangeSupport",
1287               supports_range_requests_);
1288         }
1289         break;
1290       case kPublic:
1291         UMA_HISTOGRAM_ENUMERATION(
1292             "SiteIsolation.CORBProtection.CORSHeuristic.PublicMimeType",
1293             protection_decision);
1294         break;
1295       case kOther:
1296         UMA_HISTOGRAM_ENUMERATION(
1297             "SiteIsolation.CORBProtection.CORSHeuristic.OtherMimeType",
1298             protection_decision);
1299     }
1300   }
1301   if (seems_sensitive_from_cache_heuristic_) {
1302     switch (mime_type_bucket_) {
1303       case kProtected:
1304         UMA_HISTOGRAM_ENUMERATION(
1305             "SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType",
1306             protection_decision);
1307         if (protection_decision == CrossOriginProtectionDecision::kBlock) {
1308           UMA_HISTOGRAM_BOOLEAN(
1309               "SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType."
1310               "BlockedWithRangeSupport",
1311               supports_range_requests_);
1312           UMA_HISTOGRAM_BOOLEAN(
1313               "SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType."
1314               "BlockedWithoutSniffing.HasNoSniff",
1315               has_nosniff_header_);
1316         } else if (protection_decision ==
1317                    CrossOriginProtectionDecision::kBlockedAfterSniffing) {
1318           UMA_HISTOGRAM_BOOLEAN(
1319               "SiteIsolation.CORBProtection.CacheHeuristic.ProtectedMimeType."
1320               "BlockedAfterSniffingWithRangeSupport",
1321               supports_range_requests_);
1322         }
1323         break;
1324       case kPublic:
1325         UMA_HISTOGRAM_ENUMERATION(
1326             "SiteIsolation.CORBProtection.CacheHeuristic.PublicMimeType",
1327             protection_decision);
1328         break;
1329       case kOther:
1330         UMA_HISTOGRAM_ENUMERATION(
1331             "SiteIsolation.CORBProtection.CacheHeuristic.OtherMimeType",
1332             protection_decision);
1333     }
1334   }
1335   // Also log if the server supports range requests, since these may allow
1336   // bypassing CORB.
1337   UMA_HISTOGRAM_BOOLEAN(
1338       "SiteIsolation.CORBProtection.SensitiveWithRangeSupport",
1339       supports_range_requests_);
1340 }
1341 
1342 // static
AddExceptionForPlugin(int process_id)1343 void CrossOriginReadBlocking::AddExceptionForPlugin(int process_id) {
1344   std::set<int>& plugin_proxies = GetPluginProxyingProcesses();
1345   plugin_proxies.insert(process_id);
1346 }
1347 
1348 // static
ShouldAllowForPlugin(int process_id)1349 bool CrossOriginReadBlocking::ShouldAllowForPlugin(int process_id) {
1350   std::set<int>& plugin_proxies = GetPluginProxyingProcesses();
1351   return base::Contains(plugin_proxies, process_id);
1352 }
1353 
1354 // static
RemoveExceptionForPlugin(int process_id)1355 void CrossOriginReadBlocking::RemoveExceptionForPlugin(int process_id) {
1356   std::set<int>& plugin_proxies = GetPluginProxyingProcesses();
1357   plugin_proxies.erase(process_id);
1358 }
1359 
1360 }  // namespace network
1361