1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "extensions/common/url_pattern.h"
6 
7 #include <stddef.h>
8 
9 #include <ostream>
10 
11 #include "base/stl_util.h"
12 #include "base/strings/pattern.h"
13 #include "base/strings/strcat.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_split.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/stringprintf.h"
18 #include "content/public/common/url_constants.h"
19 #include "extensions/common/constants.h"
20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
21 #include "net/base/url_util.h"
22 #include "url/gurl.h"
23 #include "url/url_util.h"
24 
25 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
26 
27 namespace {
28 
29 // TODO(aa): What about more obscure schemes like javascript: ?
30 // Note: keep this array in sync with kValidSchemeMasks.
31 const char* const kValidSchemes[] = {
32     url::kHttpScheme,         url::kHttpsScheme,
33     url::kFileScheme,         url::kFtpScheme,
34     content::kChromeUIScheme, extensions::kExtensionScheme,
35     url::kFileSystemScheme,   url::kWsScheme,
36     url::kWssScheme,          url::kDataScheme,
37     url::kQrcScheme,
38 };
39 
40 const int kValidSchemeMasks[] = {
41     URLPattern::SCHEME_HTTP,       URLPattern::SCHEME_HTTPS,
42     URLPattern::SCHEME_FILE,       URLPattern::SCHEME_FTP,
43     URLPattern::SCHEME_CHROMEUI,   URLPattern::SCHEME_EXTENSION,
44     URLPattern::SCHEME_FILESYSTEM, URLPattern::SCHEME_WS,
45     URLPattern::SCHEME_WSS,        URLPattern::SCHEME_DATA,
46     URLPattern::SCHEME_QRC,
47 };
48 
49 static_assert(base::size(kValidSchemes) == base::size(kValidSchemeMasks),
50               "must keep these arrays in sync");
51 
52 const char kParseSuccess[] = "Success.";
53 const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator.";
54 const char kParseErrorInvalidScheme[] = "Invalid scheme.";
55 const char kParseErrorWrongSchemeType[] = "Wrong scheme type.";
56 const char kParseErrorEmptyHost[] = "Host can not be empty.";
57 const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard.";
58 const char kParseErrorEmptyPath[] = "Empty path.";
59 const char kParseErrorInvalidPort[] = "Invalid port.";
60 const char kParseErrorInvalidHost[] = "Invalid host.";
61 
62 // Message explaining each URLPattern::ParseResult.
63 const char* const kParseResultMessages[] = {
64   kParseSuccess,
65   kParseErrorMissingSchemeSeparator,
66   kParseErrorInvalidScheme,
67   kParseErrorWrongSchemeType,
68   kParseErrorEmptyHost,
69   kParseErrorInvalidHostWildcard,
70   kParseErrorEmptyPath,
71   kParseErrorInvalidPort,
72   kParseErrorInvalidHost,
73 };
74 
75 static_assert(static_cast<int>(URLPattern::ParseResult::kNumParseResults) ==
76                   base::size(kParseResultMessages),
77               "must add message for each parse result");
78 
79 const char kPathSeparator[] = "/";
80 
IsStandardScheme(base::StringPiece scheme)81 bool IsStandardScheme(base::StringPiece scheme) {
82   // "*" gets the same treatment as a standard scheme.
83   if (scheme == "*")
84     return true;
85 
86   return url::IsStandard(scheme.data(),
87                          url::Component(0, static_cast<int>(scheme.length())));
88 }
89 
IsValidPortForScheme(base::StringPiece scheme,base::StringPiece port)90 bool IsValidPortForScheme(base::StringPiece scheme, base::StringPiece port) {
91   if (port == "*")
92     return true;
93 
94   // Only accept non-wildcard ports if the scheme uses ports.
95   if (url::DefaultPortForScheme(scheme.data(), scheme.length()) ==
96       url::PORT_UNSPECIFIED) {
97     return false;
98   }
99 
100   int parsed_port = url::PORT_UNSPECIFIED;
101   if (!base::StringToInt(port, &parsed_port))
102     return false;
103   return (parsed_port >= 0) && (parsed_port < 65536);
104 }
105 
106 // Returns |path| with the trailing wildcard stripped if one existed.
107 //
108 // The functions that rely on this (OverlapsWith and Contains) are only
109 // called for the patterns inside URLPatternSet. In those cases, we know that
110 // the path will have only a single wildcard at the end. This makes figuring
111 // out overlap much easier. It seems like there is probably a computer-sciency
112 // way to solve the general case, but we don't need that yet.
StripTrailingWildcard(base::StringPiece path)113 base::StringPiece StripTrailingWildcard(base::StringPiece path) {
114   if (path.ends_with("*"))
115     path.remove_suffix(1);
116   return path;
117 }
118 
119 // Removes trailing dot from |host_piece| if any.
CanonicalizeHostForMatching(base::StringPiece host_piece)120 base::StringPiece CanonicalizeHostForMatching(base::StringPiece host_piece) {
121   if (host_piece.ends_with("."))
122     host_piece.remove_suffix(1);
123   return host_piece;
124 }
125 
126 }  // namespace
127 
128 // static
IsValidSchemeForExtensions(base::StringPiece scheme)129 bool URLPattern::IsValidSchemeForExtensions(base::StringPiece scheme) {
130   for (size_t i = 0; i < base::size(kValidSchemes); ++i) {
131     if (scheme == kValidSchemes[i])
132       return true;
133   }
134   return false;
135 }
136 
137 // static
GetValidSchemeMaskForExtensions()138 int URLPattern::GetValidSchemeMaskForExtensions() {
139   int result = 0;
140   for (size_t i = 0; i < base::size(kValidSchemeMasks); ++i)
141     result |= kValidSchemeMasks[i];
142   return result;
143 }
144 
URLPattern()145 URLPattern::URLPattern()
146     : valid_schemes_(SCHEME_NONE),
147       match_all_urls_(false),
148       match_subdomains_(false),
149       port_("*") {}
150 
URLPattern(int valid_schemes)151 URLPattern::URLPattern(int valid_schemes)
152     : valid_schemes_(valid_schemes),
153       match_all_urls_(false),
154       match_subdomains_(false),
155       port_("*") {}
156 
URLPattern(int valid_schemes,base::StringPiece pattern)157 URLPattern::URLPattern(int valid_schemes, base::StringPiece pattern)
158     // Strict error checking is used, because this constructor is only
159     // appropriate when we know |pattern| is valid.
160     : valid_schemes_(valid_schemes),
161       match_all_urls_(false),
162       match_subdomains_(false),
163       port_("*") {
164   ParseResult result = Parse(pattern);
165   if (result != ParseResult::kSuccess) {
166     const char* error_string = GetParseResultString(result);
167     // Temporarily add more logging to investigate why this code path is
168     // reached. For http://crbug.com/856948
169     LOG(ERROR) << "Invalid pattern was given " << pattern << " result "
170                << error_string;
171     NOTREACHED() << "URLPattern invalid: '" << pattern
172                  << "'; error: " << error_string;
173   }
174 }
175 
176 URLPattern::URLPattern(const URLPattern& other) = default;
177 
178 URLPattern::URLPattern(URLPattern&& other) = default;
179 
~URLPattern()180 URLPattern::~URLPattern() {
181 }
182 
183 URLPattern& URLPattern::operator=(const URLPattern& other) = default;
184 
185 URLPattern& URLPattern::operator=(URLPattern&& other) = default;
186 
operator <(const URLPattern & other) const187 bool URLPattern::operator<(const URLPattern& other) const {
188   return GetAsString() < other.GetAsString();
189 }
190 
operator >(const URLPattern & other) const191 bool URLPattern::operator>(const URLPattern& other) const {
192   return GetAsString() > other.GetAsString();
193 }
194 
operator ==(const URLPattern & other) const195 bool URLPattern::operator==(const URLPattern& other) const {
196   return GetAsString() == other.GetAsString();
197 }
198 
operator <<(std::ostream & out,const URLPattern & url_pattern)199 std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) {
200   return out << '"' << url_pattern.GetAsString() << '"';
201 }
202 
Parse(base::StringPiece pattern)203 URLPattern::ParseResult URLPattern::Parse(base::StringPiece pattern) {
204   spec_.clear();
205   SetMatchAllURLs(false);
206   SetMatchSubdomains(false);
207   SetPort("*");
208 
209   // Special case pattern to match every valid URL.
210   if (pattern == kAllUrlsPattern) {
211     SetMatchAllURLs(true);
212     return ParseResult::kSuccess;
213   }
214 
215   // Parse out the scheme.
216   size_t scheme_end_pos = pattern.find(url::kStandardSchemeSeparator);
217   bool has_standard_scheme_separator = true;
218 
219   // Some urls also use ':' alone as the scheme separator.
220   if (scheme_end_pos == base::StringPiece::npos) {
221     scheme_end_pos = pattern.find(':');
222     has_standard_scheme_separator = false;
223   }
224 
225   if (scheme_end_pos == base::StringPiece::npos)
226     return ParseResult::kMissingSchemeSeparator;
227 
228   if (!SetScheme(pattern.substr(0, scheme_end_pos)))
229     return ParseResult::kInvalidScheme;
230 
231   bool standard_scheme = IsStandardScheme(scheme_);
232   if (standard_scheme != has_standard_scheme_separator)
233     return ParseResult::kWrongSchemeSeparator;
234 
235   // Advance past the scheme separator.
236   scheme_end_pos +=
237       (standard_scheme ? strlen(url::kStandardSchemeSeparator) : 1);
238   if (scheme_end_pos >= pattern.size())
239     return ParseResult::kEmptyHost;
240 
241   // Parse out the host and path.
242   size_t host_start_pos = scheme_end_pos;
243   size_t path_start_pos = 0;
244 
245   if (!standard_scheme) {
246     path_start_pos = host_start_pos;
247   } else if (scheme_ == url::kFileScheme) {
248     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
249     if (host_end_pos == base::StringPiece::npos) {
250       // Allow hostname omission.
251       // e.g. file://* is interpreted as file:///*,
252       // file://foo* is interpreted as file:///foo*.
253       path_start_pos = host_start_pos - 1;
254     } else {
255       // Ignore hostname if scheme is file://.
256       // e.g. file://localhost/foo is equal to file:///foo.
257       path_start_pos = host_end_pos;
258     }
259   } else {
260     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
261 
262     // Host is required.
263     if (host_start_pos == host_end_pos)
264       return ParseResult::kEmptyHost;
265 
266     if (host_end_pos == base::StringPiece::npos)
267       return ParseResult::kEmptyPath;
268 
269     base::StringPiece host_and_port =
270         pattern.substr(host_start_pos, host_end_pos - host_start_pos);
271 
272     size_t port_separator_pos = base::StringPiece::npos;
273     if (host_and_port[0] != '[') {
274       // Not IPv6 (either IPv4 or just a normal address).
275       port_separator_pos = host_and_port.find(':');
276     } else {  // IPv6.
277       size_t host_end_pos = host_and_port.find(']');
278       if (host_end_pos == base::StringPiece::npos)
279         return ParseResult::kInvalidHost;
280       if (host_end_pos == 1)
281         return ParseResult::kEmptyHost;
282 
283       if (host_end_pos < host_and_port.length() - 1) {
284         // The host isn't the only component. Check for a port. This would
285         // require a ':' to follow the closing ']' from the host.
286         if (host_and_port[host_end_pos + 1] != ':')
287           return ParseResult::kInvalidHost;
288 
289         port_separator_pos = host_end_pos + 1;
290       }
291     }
292 
293     if (port_separator_pos != base::StringPiece::npos &&
294         !SetPort(host_and_port.substr(port_separator_pos + 1))) {
295       return ParseResult::kInvalidPort;
296     }
297 
298     // Note: this substr() will be the entire string if the port position
299     // wasn't found.
300     base::StringPiece host_piece = host_and_port.substr(0, port_separator_pos);
301 
302     if (host_piece.empty())
303       return ParseResult::kEmptyHost;
304 
305     if (host_piece == "*") {
306       match_subdomains_ = true;
307       host_piece = base::StringPiece();
308     } else if (host_piece.starts_with("*.")) {
309       if (host_piece.length() == 2) {
310         // We don't allow just '*.' as a host.
311         return ParseResult::kEmptyHost;
312       }
313       match_subdomains_ = true;
314       host_piece = host_piece.substr(2);
315     }
316 
317     host_ = host_piece.as_string();
318 
319     path_start_pos = host_end_pos;
320   }
321 
322   SetPath(pattern.substr(path_start_pos));
323 
324   // No other '*' can occur in the host, though. This isn't necessary, but is
325   // done as a convenience to developers who might otherwise be confused and
326   // think '*' works as a glob in the host.
327   if (host_.find('*') != std::string::npos)
328     return ParseResult::kInvalidHostWildcard;
329 
330   if (!host_.empty()) {
331     // If |host_| is present (i.e., isn't a wildcard), we need to canonicalize
332     // it.
333     url::CanonHostInfo host_info;
334     host_ = net::CanonicalizeHost(host_, &host_info);
335     // net::CanonicalizeHost() returns an empty string on failure.
336     if (host_.empty())
337       return ParseResult::kInvalidHost;
338   }
339 
340   // Null characters are not allowed in hosts.
341   if (host_.find('\0') != std::string::npos)
342     return ParseResult::kInvalidHost;
343 
344   return ParseResult::kSuccess;
345 }
346 
SetValidSchemes(int valid_schemes)347 void URLPattern::SetValidSchemes(int valid_schemes) {
348   // TODO(devlin): Should we check that valid_schemes agrees with |scheme_|
349   // here? Otherwise, valid_schemes_ and schemes_ may stop agreeing with each
350   // other (e.g., in the case of `*://*/*`, where the scheme should only be
351   // http or https).
352   spec_.clear();
353   valid_schemes_ = valid_schemes;
354 }
355 
SetHost(base::StringPiece host)356 void URLPattern::SetHost(base::StringPiece host) {
357   spec_.clear();
358   host_.assign(host.data(), host.size());
359 }
360 
SetMatchAllURLs(bool val)361 void URLPattern::SetMatchAllURLs(bool val) {
362   spec_.clear();
363   match_all_urls_ = val;
364 
365   if (val) {
366     match_subdomains_ = true;
367     scheme_ = "*";
368     host_.clear();
369     SetPath("/*");
370   }
371 }
372 
SetMatchSubdomains(bool val)373 void URLPattern::SetMatchSubdomains(bool val) {
374   spec_.clear();
375   match_subdomains_ = val;
376 }
377 
SetScheme(base::StringPiece scheme)378 bool URLPattern::SetScheme(base::StringPiece scheme) {
379   spec_.clear();
380   scheme_.assign(scheme.data(), scheme.size());
381   if (scheme_ == "*") {
382     valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
383   } else if (!IsValidScheme(scheme_)) {
384     return false;
385   }
386   return true;
387 }
388 
IsValidScheme(base::StringPiece scheme) const389 bool URLPattern::IsValidScheme(base::StringPiece scheme) const {
390   if (valid_schemes_ == SCHEME_ALL)
391     return true;
392 
393   for (size_t i = 0; i < base::size(kValidSchemes); ++i) {
394     if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
395       return true;
396   }
397 
398   return false;
399 }
400 
SetPath(base::StringPiece path)401 void URLPattern::SetPath(base::StringPiece path) {
402   spec_.clear();
403   path_.assign(path.data(), path.size());
404   path_escaped_ = path_;
405   base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
406   base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
407 }
408 
SetPort(base::StringPiece port)409 bool URLPattern::SetPort(base::StringPiece port) {
410   spec_.clear();
411   if (IsValidPortForScheme(scheme_, port)) {
412     port_.assign(port.data(), port.size());
413     return true;
414   }
415   return false;
416 }
417 
MatchesURL(const GURL & test) const418 bool URLPattern::MatchesURL(const GURL& test) const {
419   // Invalid URLs can never match.
420   if (!test.is_valid())
421     return false;
422 
423   const GURL* test_url = &test;
424   bool has_inner_url = test.inner_url() != nullptr;
425 
426   if (has_inner_url) {
427     if (!test.SchemeIsFileSystem())
428       return false;  // The only nested URLs we handle are filesystem URLs.
429     test_url = test.inner_url();
430   }
431 
432   // Ensure the scheme matches first, since <all_urls> may not match this URL if
433   // the scheme is excluded.
434   if (!MatchesScheme(test_url->scheme_piece()))
435     return false;
436 
437   if (match_all_urls_)
438     return true;
439 
440   // Unless |match_all_urls_| is true, the grammar only permits matching
441   // URLs with nonempty paths.
442   if (!test.has_path())
443     return false;
444 
445   std::string path_for_request = test.PathForRequest();
446   if (has_inner_url) {
447     path_for_request = base::StringPrintf("%s%s", test_url->path_piece().data(),
448                                           path_for_request.c_str());
449   }
450 
451   return MatchesSecurityOriginHelper(*test_url) &&
452          MatchesPath(path_for_request);
453 }
454 
MatchesSecurityOrigin(const GURL & test) const455 bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
456   const GURL* test_url = &test;
457   bool has_inner_url = test.inner_url() != NULL;
458 
459   if (has_inner_url) {
460     if (!test.SchemeIsFileSystem())
461       return false;  // The only nested URLs we handle are filesystem URLs.
462     test_url = test.inner_url();
463   }
464 
465   if (!MatchesScheme(test_url->scheme()))
466     return false;
467 
468   if (match_all_urls_)
469     return true;
470 
471   return MatchesSecurityOriginHelper(*test_url);
472 }
473 
MatchesScheme(base::StringPiece test) const474 bool URLPattern::MatchesScheme(base::StringPiece test) const {
475   if (!IsValidScheme(test))
476     return false;
477 
478   return scheme_ == "*" || test == scheme_;
479 }
480 
MatchesHost(base::StringPiece host) const481 bool URLPattern::MatchesHost(base::StringPiece host) const {
482   // TODO(devlin): This is a bit sad. Parsing urls is expensive. However, it's
483   // important that we do this conversion to a GURL in order to canonicalize the
484   // host (the pattern's host_ already is canonicalized from Parse()). We can't
485   // just do string comparison.
486   return MatchesHost(
487       GURL(base::StringPrintf("%s%s%s/", url::kHttpScheme,
488                               url::kStandardSchemeSeparator, host.data())));
489 }
490 
MatchesHost(const GURL & test) const491 bool URLPattern::MatchesHost(const GURL& test) const {
492   base::StringPiece test_host(CanonicalizeHostForMatching(test.host_piece()));
493   const base::StringPiece pattern_host(CanonicalizeHostForMatching(host_));
494 
495   // If the hosts are exactly equal, we have a match.
496   if (test_host == pattern_host)
497     return true;
498 
499   // If we're matching subdomains, and we have no host in the match pattern,
500   // that means that we're matching all hosts, which means we have a match no
501   // matter what the test host is.
502   if (match_subdomains_ && pattern_host.empty())
503     return true;
504 
505   // Otherwise, we can only match if our match pattern matches subdomains.
506   if (!match_subdomains_)
507     return false;
508 
509   // We don't do subdomain matching against IP addresses, so we can give up now
510   // if the test host is an IP address.
511   if (test.HostIsIPAddress())
512     return false;
513 
514   // Check if the test host is a subdomain of our host.
515   if (test_host.length() <= (pattern_host.length() + 1))
516     return false;
517 
518   if (!test_host.ends_with(pattern_host))
519     return false;
520 
521   return test_host[test_host.length() - pattern_host.length() - 1] == '.';
522 }
523 
MatchesEffectiveTld(net::registry_controlled_domains::PrivateRegistryFilter private_filter,net::registry_controlled_domains::UnknownRegistryFilter unknown_filter) const524 bool URLPattern::MatchesEffectiveTld(
525     net::registry_controlled_domains::PrivateRegistryFilter private_filter,
526     net::registry_controlled_domains::UnknownRegistryFilter unknown_filter)
527     const {
528   // Check if it matches all urls or is a pattern like http://*/*.
529   if (match_all_urls_ || (match_subdomains_ && host_.empty()))
530     return true;
531 
532   // If this doesn't even match subdomains, it can't possibly be a TLD wildcard.
533   if (!match_subdomains_)
534     return false;
535 
536   // If there was more than just a TLD in the host (e.g., *.foobar.com), it
537   // doesn't match all hosts in an effective TLD.
538   if (net::registry_controlled_domains::HostHasRegistryControlledDomain(
539           host_, unknown_filter, private_filter)) {
540     return false;
541   }
542 
543   // At this point the host could either be just a TLD ("com") or some unknown
544   // TLD-like string ("notatld"). To disambiguate between them construct a
545   // fake URL, and check the registry.
546   //
547   // If we recognized this TLD, then this is a pattern like *.com, and it
548   // matches an effective TLD.
549   return net::registry_controlled_domains::HostHasRegistryControlledDomain(
550       "notatld." + host_, unknown_filter, private_filter);
551 }
552 
MatchesSingleOrigin() const553 bool URLPattern::MatchesSingleOrigin() const {
554   // Strictly speaking, the port is part of the origin, but in URLPattern it
555   // defaults to *. It's not very interesting anyway, so leave it out.
556   return !MatchesEffectiveTld() && scheme_ != "*" && !match_subdomains_;
557 }
558 
MatchesPath(base::StringPiece test) const559 bool URLPattern::MatchesPath(base::StringPiece test) const {
560   // Make the behaviour of OverlapsWith consistent with MatchesURL, which is
561   // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
562   // The below if is a no-copy way of doing (test + "/*" == path_escaped_).
563   if (path_escaped_.length() == test.length() + 2 &&
564       base::StartsWith(path_escaped_.c_str(), test,
565                        base::CompareCase::SENSITIVE) &&
566       base::EndsWith(path_escaped_, "/*", base::CompareCase::SENSITIVE)) {
567     return true;
568   }
569 
570   return base::MatchPattern(test, path_escaped_);
571 }
572 
GetAsString() const573 const std::string& URLPattern::GetAsString() const {
574   if (!spec_.empty())
575     return spec_;
576 
577   if (match_all_urls_) {
578     spec_ = kAllUrlsPattern;
579     return spec_;
580   }
581 
582   bool standard_scheme = IsStandardScheme(scheme_);
583 
584   std::string spec = scheme_ +
585       (standard_scheme ? url::kStandardSchemeSeparator : ":");
586 
587   if (scheme_ != url::kFileScheme && standard_scheme) {
588     if (match_subdomains_) {
589       spec += "*";
590       if (!host_.empty())
591         spec += ".";
592     }
593 
594     if (!host_.empty())
595       spec += host_;
596 
597     if (port_ != "*") {
598       spec += ":";
599       spec += port_;
600     }
601   }
602 
603   if (!path_.empty())
604     spec += path_;
605 
606   spec_ = std::move(spec);
607   return spec_;
608 }
609 
OverlapsWith(const URLPattern & other) const610 bool URLPattern::OverlapsWith(const URLPattern& other) const {
611   if (match_all_urls() || other.match_all_urls())
612     return true;
613   return (MatchesAnyScheme(other.GetExplicitSchemes()) ||
614           other.MatchesAnyScheme(GetExplicitSchemes()))
615       && (MatchesHost(other.host()) || other.MatchesHost(host()))
616       && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port()))
617       && (MatchesPath(StripTrailingWildcard(other.path())) ||
618           other.MatchesPath(StripTrailingWildcard(path())));
619 }
620 
Contains(const URLPattern & other) const621 bool URLPattern::Contains(const URLPattern& other) const {
622   // Important: it's not enough to just check match_all_urls(); we also need to
623   // make sure that the schemes in this pattern are a superset of those in
624   // |other|.
625   if (match_all_urls() &&
626       (valid_schemes_ & other.valid_schemes_) == other.valid_schemes_) {
627     return true;
628   }
629 
630   return MatchesAllSchemes(other.GetExplicitSchemes()) &&
631          MatchesHost(other.host()) &&
632          (!other.match_subdomains_ || match_subdomains_) &&
633          MatchesPortPattern(other.port()) &&
634          MatchesPath(StripTrailingWildcard(other.path()));
635 }
636 
CreateIntersection(const URLPattern & other) const637 base::Optional<URLPattern> URLPattern::CreateIntersection(
638     const URLPattern& other) const {
639   // Easy case: Schemes don't overlap. Return nullopt.
640   int intersection_schemes = URLPattern::SCHEME_NONE;
641   if (valid_schemes_ == URLPattern::SCHEME_ALL)
642     intersection_schemes = other.valid_schemes_;
643   else if (other.valid_schemes_ == URLPattern::SCHEME_ALL)
644     intersection_schemes = valid_schemes_;
645   else
646     intersection_schemes = valid_schemes_ & other.valid_schemes_;
647 
648   if (intersection_schemes == URLPattern::SCHEME_NONE)
649     return base::nullopt;
650 
651   {
652     // In a few cases, we can (mostly) return a copy of one of the patterns.
653     // This can happen when either:
654     // - The URLPattern's are identical (possibly excluding valid_schemes_)
655     // - One of the patterns has match_all_urls() equal to true.
656     // NOTE(devlin): Theoretically, we could use Contains() instead of
657     // match_all_urls() here. However, Contains() strips the trailing wildcard
658     // from the path, which could yield the incorrect result.
659     const URLPattern* copy_source = nullptr;
660     if (*this == other || other.match_all_urls())
661       copy_source = this;
662     else if (match_all_urls())
663       copy_source = &other;
664 
665     if (copy_source) {
666       // NOTE: equality checks don't take into account valid_schemes_, and
667       // schemes can be different in the case of match_all_urls() as well, so
668       // we can't always just return *copy_source.
669       if (intersection_schemes == copy_source->valid_schemes_)
670         return *copy_source;
671       URLPattern result(intersection_schemes);
672       ParseResult parse_result = result.Parse(copy_source->GetAsString());
673       CHECK_EQ(ParseResult::kSuccess, parse_result);
674       return result;
675     }
676   }
677 
678   // No more easy cases. Go through component by component to find the patterns
679   // that intersect.
680 
681   // Note: Alias the function type (rather than using auto) because
682   // MatchesHost() is overloaded.
683   using match_function_type = bool (URLPattern::*)(base::StringPiece) const;
684 
685   auto get_intersection = [this, &other](base::StringPiece own_str,
686                                          base::StringPiece other_str,
687                                          match_function_type match_function,
688                                          base::StringPiece* out) {
689     if ((this->*match_function)(other_str)) {
690       *out = other_str;
691       return true;
692     }
693     if ((other.*match_function)(own_str)) {
694       *out = own_str;
695       return true;
696     }
697     return false;
698   };
699 
700   base::StringPiece scheme;
701   base::StringPiece host;
702   base::StringPiece port;
703   base::StringPiece path;
704   // If any pieces fail to overlap, then there is no intersection.
705   if (!get_intersection(scheme_, other.scheme_, &URLPattern::MatchesScheme,
706                         &scheme) ||
707       !get_intersection(host_, other.host_, &URLPattern::MatchesHost, &host) ||
708       !get_intersection(port_, other.port_, &URLPattern::MatchesPortPattern,
709                         &port) ||
710       !get_intersection(path_, other.path_, &URLPattern::MatchesPath, &path)) {
711     return base::nullopt;
712   }
713 
714   // Only match subdomains if both patterns match subdomains.
715   base::StringPiece subdomains;
716   if (match_subdomains_ && other.match_subdomains_) {
717     // The host may be empty (e.g., in the case of *://*/* - in that case, only
718     // append '*' instead of '*.'.
719     subdomains = host.empty() ? "*" : "*.";
720   }
721 
722   base::StringPiece scheme_separator =
723       IsStandardScheme(scheme) ? url::kStandardSchemeSeparator : ":";
724 
725   std::string pattern_str = base::StrCat(
726       {scheme, scheme_separator, subdomains, host, ":", port, path});
727 
728   URLPattern pattern(intersection_schemes);
729   ParseResult result = pattern.Parse(pattern_str);
730   // TODO(devlin): I don't think there's any way this should ever fail, but
731   // use a CHECK() to flush any cases out. If nothing crops up, downgrade this
732   // to a DCHECK in M72.
733   CHECK_EQ(ParseResult::kSuccess, result);
734 
735   return pattern;
736 }
737 
MatchesAnyScheme(const std::vector<std::string> & schemes) const738 bool URLPattern::MatchesAnyScheme(
739     const std::vector<std::string>& schemes) const {
740   for (auto i = schemes.cbegin(); i != schemes.cend(); ++i) {
741     if (MatchesScheme(*i))
742       return true;
743   }
744 
745   return false;
746 }
747 
MatchesAllSchemes(const std::vector<std::string> & schemes) const748 bool URLPattern::MatchesAllSchemes(
749     const std::vector<std::string>& schemes) const {
750   for (auto i = schemes.cbegin(); i != schemes.cend(); ++i) {
751     if (!MatchesScheme(*i))
752       return false;
753   }
754 
755   return true;
756 }
757 
MatchesSecurityOriginHelper(const GURL & test) const758 bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
759   // Ignore hostname if scheme is file://.
760   if (scheme_ != url::kFileScheme && !MatchesHost(test))
761     return false;
762 
763   if (!MatchesPortPattern(base::NumberToString(test.EffectiveIntPort())))
764     return false;
765 
766   return true;
767 }
768 
MatchesPortPattern(base::StringPiece port) const769 bool URLPattern::MatchesPortPattern(base::StringPiece port) const {
770   return port_ == "*" || port_ == port;
771 }
772 
GetExplicitSchemes() const773 std::vector<std::string> URLPattern::GetExplicitSchemes() const {
774   std::vector<std::string> result;
775 
776   if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
777     result.push_back(scheme_);
778     return result;
779   }
780 
781   for (size_t i = 0; i < base::size(kValidSchemes); ++i) {
782     if (MatchesScheme(kValidSchemes[i])) {
783       result.push_back(kValidSchemes[i]);
784     }
785   }
786 
787   return result;
788 }
789 
ConvertToExplicitSchemes() const790 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
791   std::vector<std::string> explicit_schemes = GetExplicitSchemes();
792   std::vector<URLPattern> result;
793 
794   for (std::vector<std::string>::const_iterator i = explicit_schemes.begin();
795        i != explicit_schemes.end(); ++i) {
796     URLPattern temp = *this;
797     temp.SetScheme(*i);
798     temp.SetMatchAllURLs(false);
799     result.push_back(temp);
800   }
801 
802   return result;
803 }
804 
805 // static
GetParseResultString(URLPattern::ParseResult parse_result)806 const char* URLPattern::GetParseResultString(
807     URLPattern::ParseResult parse_result) {
808   return kParseResultMessages[static_cast<int>(parse_result)];
809 }
810