1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "extensions/common/url_pattern.h"
6
7 #include <stddef.h>
8
9 #include <ostream>
10
11 #include "base/stl_util.h"
12 #include "base/strings/pattern.h"
13 #include "base/strings/strcat.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_split.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/stringprintf.h"
18 #include "content/public/common/url_constants.h"
19 #include "extensions/common/constants.h"
20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
21 #include "net/base/url_util.h"
22 #include "url/gurl.h"
23 #include "url/url_util.h"
24
25 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
26
27 namespace {
28
29 // TODO(aa): What about more obscure schemes like javascript: ?
30 // Note: keep this array in sync with kValidSchemeMasks.
31 const char* const kValidSchemes[] = {
32 url::kHttpScheme, url::kHttpsScheme,
33 url::kFileScheme, url::kFtpScheme,
34 content::kChromeUIScheme, extensions::kExtensionScheme,
35 url::kFileSystemScheme, url::kWsScheme,
36 url::kWssScheme, url::kDataScheme,
37 url::kQrcScheme,
38 };
39
40 const int kValidSchemeMasks[] = {
41 URLPattern::SCHEME_HTTP, URLPattern::SCHEME_HTTPS,
42 URLPattern::SCHEME_FILE, URLPattern::SCHEME_FTP,
43 URLPattern::SCHEME_CHROMEUI, URLPattern::SCHEME_EXTENSION,
44 URLPattern::SCHEME_FILESYSTEM, URLPattern::SCHEME_WS,
45 URLPattern::SCHEME_WSS, URLPattern::SCHEME_DATA,
46 URLPattern::SCHEME_QRC,
47 };
48
49 static_assert(base::size(kValidSchemes) == base::size(kValidSchemeMasks),
50 "must keep these arrays in sync");
51
52 const char kParseSuccess[] = "Success.";
53 const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator.";
54 const char kParseErrorInvalidScheme[] = "Invalid scheme.";
55 const char kParseErrorWrongSchemeType[] = "Wrong scheme type.";
56 const char kParseErrorEmptyHost[] = "Host can not be empty.";
57 const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard.";
58 const char kParseErrorEmptyPath[] = "Empty path.";
59 const char kParseErrorInvalidPort[] = "Invalid port.";
60 const char kParseErrorInvalidHost[] = "Invalid host.";
61
62 // Message explaining each URLPattern::ParseResult.
63 const char* const kParseResultMessages[] = {
64 kParseSuccess,
65 kParseErrorMissingSchemeSeparator,
66 kParseErrorInvalidScheme,
67 kParseErrorWrongSchemeType,
68 kParseErrorEmptyHost,
69 kParseErrorInvalidHostWildcard,
70 kParseErrorEmptyPath,
71 kParseErrorInvalidPort,
72 kParseErrorInvalidHost,
73 };
74
75 static_assert(static_cast<int>(URLPattern::ParseResult::kNumParseResults) ==
76 base::size(kParseResultMessages),
77 "must add message for each parse result");
78
79 const char kPathSeparator[] = "/";
80
IsStandardScheme(base::StringPiece scheme)81 bool IsStandardScheme(base::StringPiece scheme) {
82 // "*" gets the same treatment as a standard scheme.
83 if (scheme == "*")
84 return true;
85
86 return url::IsStandard(scheme.data(),
87 url::Component(0, static_cast<int>(scheme.length())));
88 }
89
IsValidPortForScheme(base::StringPiece scheme,base::StringPiece port)90 bool IsValidPortForScheme(base::StringPiece scheme, base::StringPiece port) {
91 if (port == "*")
92 return true;
93
94 // Only accept non-wildcard ports if the scheme uses ports.
95 if (url::DefaultPortForScheme(scheme.data(), scheme.length()) ==
96 url::PORT_UNSPECIFIED) {
97 return false;
98 }
99
100 int parsed_port = url::PORT_UNSPECIFIED;
101 if (!base::StringToInt(port, &parsed_port))
102 return false;
103 return (parsed_port >= 0) && (parsed_port < 65536);
104 }
105
106 // Returns |path| with the trailing wildcard stripped if one existed.
107 //
108 // The functions that rely on this (OverlapsWith and Contains) are only
109 // called for the patterns inside URLPatternSet. In those cases, we know that
110 // the path will have only a single wildcard at the end. This makes figuring
111 // out overlap much easier. It seems like there is probably a computer-sciency
112 // way to solve the general case, but we don't need that yet.
StripTrailingWildcard(base::StringPiece path)113 base::StringPiece StripTrailingWildcard(base::StringPiece path) {
114 if (path.ends_with("*"))
115 path.remove_suffix(1);
116 return path;
117 }
118
119 // Removes trailing dot from |host_piece| if any.
CanonicalizeHostForMatching(base::StringPiece host_piece)120 base::StringPiece CanonicalizeHostForMatching(base::StringPiece host_piece) {
121 if (host_piece.ends_with("."))
122 host_piece.remove_suffix(1);
123 return host_piece;
124 }
125
126 } // namespace
127
128 // static
IsValidSchemeForExtensions(base::StringPiece scheme)129 bool URLPattern::IsValidSchemeForExtensions(base::StringPiece scheme) {
130 for (size_t i = 0; i < base::size(kValidSchemes); ++i) {
131 if (scheme == kValidSchemes[i])
132 return true;
133 }
134 return false;
135 }
136
137 // static
GetValidSchemeMaskForExtensions()138 int URLPattern::GetValidSchemeMaskForExtensions() {
139 int result = 0;
140 for (size_t i = 0; i < base::size(kValidSchemeMasks); ++i)
141 result |= kValidSchemeMasks[i];
142 return result;
143 }
144
URLPattern()145 URLPattern::URLPattern()
146 : valid_schemes_(SCHEME_NONE),
147 match_all_urls_(false),
148 match_subdomains_(false),
149 port_("*") {}
150
URLPattern(int valid_schemes)151 URLPattern::URLPattern(int valid_schemes)
152 : valid_schemes_(valid_schemes),
153 match_all_urls_(false),
154 match_subdomains_(false),
155 port_("*") {}
156
URLPattern(int valid_schemes,base::StringPiece pattern)157 URLPattern::URLPattern(int valid_schemes, base::StringPiece pattern)
158 // Strict error checking is used, because this constructor is only
159 // appropriate when we know |pattern| is valid.
160 : valid_schemes_(valid_schemes),
161 match_all_urls_(false),
162 match_subdomains_(false),
163 port_("*") {
164 ParseResult result = Parse(pattern);
165 if (result != ParseResult::kSuccess) {
166 const char* error_string = GetParseResultString(result);
167 // Temporarily add more logging to investigate why this code path is
168 // reached. For http://crbug.com/856948
169 LOG(ERROR) << "Invalid pattern was given " << pattern << " result "
170 << error_string;
171 NOTREACHED() << "URLPattern invalid: '" << pattern
172 << "'; error: " << error_string;
173 }
174 }
175
176 URLPattern::URLPattern(const URLPattern& other) = default;
177
178 URLPattern::URLPattern(URLPattern&& other) = default;
179
~URLPattern()180 URLPattern::~URLPattern() {
181 }
182
183 URLPattern& URLPattern::operator=(const URLPattern& other) = default;
184
185 URLPattern& URLPattern::operator=(URLPattern&& other) = default;
186
operator <(const URLPattern & other) const187 bool URLPattern::operator<(const URLPattern& other) const {
188 return GetAsString() < other.GetAsString();
189 }
190
operator >(const URLPattern & other) const191 bool URLPattern::operator>(const URLPattern& other) const {
192 return GetAsString() > other.GetAsString();
193 }
194
operator ==(const URLPattern & other) const195 bool URLPattern::operator==(const URLPattern& other) const {
196 return GetAsString() == other.GetAsString();
197 }
198
operator <<(std::ostream & out,const URLPattern & url_pattern)199 std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) {
200 return out << '"' << url_pattern.GetAsString() << '"';
201 }
202
Parse(base::StringPiece pattern)203 URLPattern::ParseResult URLPattern::Parse(base::StringPiece pattern) {
204 spec_.clear();
205 SetMatchAllURLs(false);
206 SetMatchSubdomains(false);
207 SetPort("*");
208
209 // Special case pattern to match every valid URL.
210 if (pattern == kAllUrlsPattern) {
211 SetMatchAllURLs(true);
212 return ParseResult::kSuccess;
213 }
214
215 // Parse out the scheme.
216 size_t scheme_end_pos = pattern.find(url::kStandardSchemeSeparator);
217 bool has_standard_scheme_separator = true;
218
219 // Some urls also use ':' alone as the scheme separator.
220 if (scheme_end_pos == base::StringPiece::npos) {
221 scheme_end_pos = pattern.find(':');
222 has_standard_scheme_separator = false;
223 }
224
225 if (scheme_end_pos == base::StringPiece::npos)
226 return ParseResult::kMissingSchemeSeparator;
227
228 if (!SetScheme(pattern.substr(0, scheme_end_pos)))
229 return ParseResult::kInvalidScheme;
230
231 bool standard_scheme = IsStandardScheme(scheme_);
232 if (standard_scheme != has_standard_scheme_separator)
233 return ParseResult::kWrongSchemeSeparator;
234
235 // Advance past the scheme separator.
236 scheme_end_pos +=
237 (standard_scheme ? strlen(url::kStandardSchemeSeparator) : 1);
238 if (scheme_end_pos >= pattern.size())
239 return ParseResult::kEmptyHost;
240
241 // Parse out the host and path.
242 size_t host_start_pos = scheme_end_pos;
243 size_t path_start_pos = 0;
244
245 if (!standard_scheme) {
246 path_start_pos = host_start_pos;
247 } else if (scheme_ == url::kFileScheme) {
248 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
249 if (host_end_pos == base::StringPiece::npos) {
250 // Allow hostname omission.
251 // e.g. file://* is interpreted as file:///*,
252 // file://foo* is interpreted as file:///foo*.
253 path_start_pos = host_start_pos - 1;
254 } else {
255 // Ignore hostname if scheme is file://.
256 // e.g. file://localhost/foo is equal to file:///foo.
257 path_start_pos = host_end_pos;
258 }
259 } else {
260 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
261
262 // Host is required.
263 if (host_start_pos == host_end_pos)
264 return ParseResult::kEmptyHost;
265
266 if (host_end_pos == base::StringPiece::npos)
267 return ParseResult::kEmptyPath;
268
269 base::StringPiece host_and_port =
270 pattern.substr(host_start_pos, host_end_pos - host_start_pos);
271
272 size_t port_separator_pos = base::StringPiece::npos;
273 if (host_and_port[0] != '[') {
274 // Not IPv6 (either IPv4 or just a normal address).
275 port_separator_pos = host_and_port.find(':');
276 } else { // IPv6.
277 size_t host_end_pos = host_and_port.find(']');
278 if (host_end_pos == base::StringPiece::npos)
279 return ParseResult::kInvalidHost;
280 if (host_end_pos == 1)
281 return ParseResult::kEmptyHost;
282
283 if (host_end_pos < host_and_port.length() - 1) {
284 // The host isn't the only component. Check for a port. This would
285 // require a ':' to follow the closing ']' from the host.
286 if (host_and_port[host_end_pos + 1] != ':')
287 return ParseResult::kInvalidHost;
288
289 port_separator_pos = host_end_pos + 1;
290 }
291 }
292
293 if (port_separator_pos != base::StringPiece::npos &&
294 !SetPort(host_and_port.substr(port_separator_pos + 1))) {
295 return ParseResult::kInvalidPort;
296 }
297
298 // Note: this substr() will be the entire string if the port position
299 // wasn't found.
300 base::StringPiece host_piece = host_and_port.substr(0, port_separator_pos);
301
302 if (host_piece.empty())
303 return ParseResult::kEmptyHost;
304
305 if (host_piece == "*") {
306 match_subdomains_ = true;
307 host_piece = base::StringPiece();
308 } else if (host_piece.starts_with("*.")) {
309 if (host_piece.length() == 2) {
310 // We don't allow just '*.' as a host.
311 return ParseResult::kEmptyHost;
312 }
313 match_subdomains_ = true;
314 host_piece = host_piece.substr(2);
315 }
316
317 host_ = host_piece.as_string();
318
319 path_start_pos = host_end_pos;
320 }
321
322 SetPath(pattern.substr(path_start_pos));
323
324 // No other '*' can occur in the host, though. This isn't necessary, but is
325 // done as a convenience to developers who might otherwise be confused and
326 // think '*' works as a glob in the host.
327 if (host_.find('*') != std::string::npos)
328 return ParseResult::kInvalidHostWildcard;
329
330 if (!host_.empty()) {
331 // If |host_| is present (i.e., isn't a wildcard), we need to canonicalize
332 // it.
333 url::CanonHostInfo host_info;
334 host_ = net::CanonicalizeHost(host_, &host_info);
335 // net::CanonicalizeHost() returns an empty string on failure.
336 if (host_.empty())
337 return ParseResult::kInvalidHost;
338 }
339
340 // Null characters are not allowed in hosts.
341 if (host_.find('\0') != std::string::npos)
342 return ParseResult::kInvalidHost;
343
344 return ParseResult::kSuccess;
345 }
346
SetValidSchemes(int valid_schemes)347 void URLPattern::SetValidSchemes(int valid_schemes) {
348 // TODO(devlin): Should we check that valid_schemes agrees with |scheme_|
349 // here? Otherwise, valid_schemes_ and schemes_ may stop agreeing with each
350 // other (e.g., in the case of `*://*/*`, where the scheme should only be
351 // http or https).
352 spec_.clear();
353 valid_schemes_ = valid_schemes;
354 }
355
SetHost(base::StringPiece host)356 void URLPattern::SetHost(base::StringPiece host) {
357 spec_.clear();
358 host_.assign(host.data(), host.size());
359 }
360
SetMatchAllURLs(bool val)361 void URLPattern::SetMatchAllURLs(bool val) {
362 spec_.clear();
363 match_all_urls_ = val;
364
365 if (val) {
366 match_subdomains_ = true;
367 scheme_ = "*";
368 host_.clear();
369 SetPath("/*");
370 }
371 }
372
SetMatchSubdomains(bool val)373 void URLPattern::SetMatchSubdomains(bool val) {
374 spec_.clear();
375 match_subdomains_ = val;
376 }
377
SetScheme(base::StringPiece scheme)378 bool URLPattern::SetScheme(base::StringPiece scheme) {
379 spec_.clear();
380 scheme_.assign(scheme.data(), scheme.size());
381 if (scheme_ == "*") {
382 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
383 } else if (!IsValidScheme(scheme_)) {
384 return false;
385 }
386 return true;
387 }
388
IsValidScheme(base::StringPiece scheme) const389 bool URLPattern::IsValidScheme(base::StringPiece scheme) const {
390 if (valid_schemes_ == SCHEME_ALL)
391 return true;
392
393 for (size_t i = 0; i < base::size(kValidSchemes); ++i) {
394 if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
395 return true;
396 }
397
398 return false;
399 }
400
SetPath(base::StringPiece path)401 void URLPattern::SetPath(base::StringPiece path) {
402 spec_.clear();
403 path_.assign(path.data(), path.size());
404 path_escaped_ = path_;
405 base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
406 base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
407 }
408
SetPort(base::StringPiece port)409 bool URLPattern::SetPort(base::StringPiece port) {
410 spec_.clear();
411 if (IsValidPortForScheme(scheme_, port)) {
412 port_.assign(port.data(), port.size());
413 return true;
414 }
415 return false;
416 }
417
MatchesURL(const GURL & test) const418 bool URLPattern::MatchesURL(const GURL& test) const {
419 // Invalid URLs can never match.
420 if (!test.is_valid())
421 return false;
422
423 const GURL* test_url = &test;
424 bool has_inner_url = test.inner_url() != nullptr;
425
426 if (has_inner_url) {
427 if (!test.SchemeIsFileSystem())
428 return false; // The only nested URLs we handle are filesystem URLs.
429 test_url = test.inner_url();
430 }
431
432 // Ensure the scheme matches first, since <all_urls> may not match this URL if
433 // the scheme is excluded.
434 if (!MatchesScheme(test_url->scheme_piece()))
435 return false;
436
437 if (match_all_urls_)
438 return true;
439
440 // Unless |match_all_urls_| is true, the grammar only permits matching
441 // URLs with nonempty paths.
442 if (!test.has_path())
443 return false;
444
445 std::string path_for_request = test.PathForRequest();
446 if (has_inner_url) {
447 path_for_request = base::StringPrintf("%s%s", test_url->path_piece().data(),
448 path_for_request.c_str());
449 }
450
451 return MatchesSecurityOriginHelper(*test_url) &&
452 MatchesPath(path_for_request);
453 }
454
MatchesSecurityOrigin(const GURL & test) const455 bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
456 const GURL* test_url = &test;
457 bool has_inner_url = test.inner_url() != NULL;
458
459 if (has_inner_url) {
460 if (!test.SchemeIsFileSystem())
461 return false; // The only nested URLs we handle are filesystem URLs.
462 test_url = test.inner_url();
463 }
464
465 if (!MatchesScheme(test_url->scheme()))
466 return false;
467
468 if (match_all_urls_)
469 return true;
470
471 return MatchesSecurityOriginHelper(*test_url);
472 }
473
MatchesScheme(base::StringPiece test) const474 bool URLPattern::MatchesScheme(base::StringPiece test) const {
475 if (!IsValidScheme(test))
476 return false;
477
478 return scheme_ == "*" || test == scheme_;
479 }
480
MatchesHost(base::StringPiece host) const481 bool URLPattern::MatchesHost(base::StringPiece host) const {
482 // TODO(devlin): This is a bit sad. Parsing urls is expensive. However, it's
483 // important that we do this conversion to a GURL in order to canonicalize the
484 // host (the pattern's host_ already is canonicalized from Parse()). We can't
485 // just do string comparison.
486 return MatchesHost(
487 GURL(base::StringPrintf("%s%s%s/", url::kHttpScheme,
488 url::kStandardSchemeSeparator, host.data())));
489 }
490
MatchesHost(const GURL & test) const491 bool URLPattern::MatchesHost(const GURL& test) const {
492 base::StringPiece test_host(CanonicalizeHostForMatching(test.host_piece()));
493 const base::StringPiece pattern_host(CanonicalizeHostForMatching(host_));
494
495 // If the hosts are exactly equal, we have a match.
496 if (test_host == pattern_host)
497 return true;
498
499 // If we're matching subdomains, and we have no host in the match pattern,
500 // that means that we're matching all hosts, which means we have a match no
501 // matter what the test host is.
502 if (match_subdomains_ && pattern_host.empty())
503 return true;
504
505 // Otherwise, we can only match if our match pattern matches subdomains.
506 if (!match_subdomains_)
507 return false;
508
509 // We don't do subdomain matching against IP addresses, so we can give up now
510 // if the test host is an IP address.
511 if (test.HostIsIPAddress())
512 return false;
513
514 // Check if the test host is a subdomain of our host.
515 if (test_host.length() <= (pattern_host.length() + 1))
516 return false;
517
518 if (!test_host.ends_with(pattern_host))
519 return false;
520
521 return test_host[test_host.length() - pattern_host.length() - 1] == '.';
522 }
523
MatchesEffectiveTld(net::registry_controlled_domains::PrivateRegistryFilter private_filter,net::registry_controlled_domains::UnknownRegistryFilter unknown_filter) const524 bool URLPattern::MatchesEffectiveTld(
525 net::registry_controlled_domains::PrivateRegistryFilter private_filter,
526 net::registry_controlled_domains::UnknownRegistryFilter unknown_filter)
527 const {
528 // Check if it matches all urls or is a pattern like http://*/*.
529 if (match_all_urls_ || (match_subdomains_ && host_.empty()))
530 return true;
531
532 // If this doesn't even match subdomains, it can't possibly be a TLD wildcard.
533 if (!match_subdomains_)
534 return false;
535
536 // If there was more than just a TLD in the host (e.g., *.foobar.com), it
537 // doesn't match all hosts in an effective TLD.
538 if (net::registry_controlled_domains::HostHasRegistryControlledDomain(
539 host_, unknown_filter, private_filter)) {
540 return false;
541 }
542
543 // At this point the host could either be just a TLD ("com") or some unknown
544 // TLD-like string ("notatld"). To disambiguate between them construct a
545 // fake URL, and check the registry.
546 //
547 // If we recognized this TLD, then this is a pattern like *.com, and it
548 // matches an effective TLD.
549 return net::registry_controlled_domains::HostHasRegistryControlledDomain(
550 "notatld." + host_, unknown_filter, private_filter);
551 }
552
MatchesSingleOrigin() const553 bool URLPattern::MatchesSingleOrigin() const {
554 // Strictly speaking, the port is part of the origin, but in URLPattern it
555 // defaults to *. It's not very interesting anyway, so leave it out.
556 return !MatchesEffectiveTld() && scheme_ != "*" && !match_subdomains_;
557 }
558
MatchesPath(base::StringPiece test) const559 bool URLPattern::MatchesPath(base::StringPiece test) const {
560 // Make the behaviour of OverlapsWith consistent with MatchesURL, which is
561 // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
562 // The below if is a no-copy way of doing (test + "/*" == path_escaped_).
563 if (path_escaped_.length() == test.length() + 2 &&
564 base::StartsWith(path_escaped_.c_str(), test,
565 base::CompareCase::SENSITIVE) &&
566 base::EndsWith(path_escaped_, "/*", base::CompareCase::SENSITIVE)) {
567 return true;
568 }
569
570 return base::MatchPattern(test, path_escaped_);
571 }
572
GetAsString() const573 const std::string& URLPattern::GetAsString() const {
574 if (!spec_.empty())
575 return spec_;
576
577 if (match_all_urls_) {
578 spec_ = kAllUrlsPattern;
579 return spec_;
580 }
581
582 bool standard_scheme = IsStandardScheme(scheme_);
583
584 std::string spec = scheme_ +
585 (standard_scheme ? url::kStandardSchemeSeparator : ":");
586
587 if (scheme_ != url::kFileScheme && standard_scheme) {
588 if (match_subdomains_) {
589 spec += "*";
590 if (!host_.empty())
591 spec += ".";
592 }
593
594 if (!host_.empty())
595 spec += host_;
596
597 if (port_ != "*") {
598 spec += ":";
599 spec += port_;
600 }
601 }
602
603 if (!path_.empty())
604 spec += path_;
605
606 spec_ = std::move(spec);
607 return spec_;
608 }
609
OverlapsWith(const URLPattern & other) const610 bool URLPattern::OverlapsWith(const URLPattern& other) const {
611 if (match_all_urls() || other.match_all_urls())
612 return true;
613 return (MatchesAnyScheme(other.GetExplicitSchemes()) ||
614 other.MatchesAnyScheme(GetExplicitSchemes()))
615 && (MatchesHost(other.host()) || other.MatchesHost(host()))
616 && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port()))
617 && (MatchesPath(StripTrailingWildcard(other.path())) ||
618 other.MatchesPath(StripTrailingWildcard(path())));
619 }
620
Contains(const URLPattern & other) const621 bool URLPattern::Contains(const URLPattern& other) const {
622 // Important: it's not enough to just check match_all_urls(); we also need to
623 // make sure that the schemes in this pattern are a superset of those in
624 // |other|.
625 if (match_all_urls() &&
626 (valid_schemes_ & other.valid_schemes_) == other.valid_schemes_) {
627 return true;
628 }
629
630 return MatchesAllSchemes(other.GetExplicitSchemes()) &&
631 MatchesHost(other.host()) &&
632 (!other.match_subdomains_ || match_subdomains_) &&
633 MatchesPortPattern(other.port()) &&
634 MatchesPath(StripTrailingWildcard(other.path()));
635 }
636
CreateIntersection(const URLPattern & other) const637 base::Optional<URLPattern> URLPattern::CreateIntersection(
638 const URLPattern& other) const {
639 // Easy case: Schemes don't overlap. Return nullopt.
640 int intersection_schemes = URLPattern::SCHEME_NONE;
641 if (valid_schemes_ == URLPattern::SCHEME_ALL)
642 intersection_schemes = other.valid_schemes_;
643 else if (other.valid_schemes_ == URLPattern::SCHEME_ALL)
644 intersection_schemes = valid_schemes_;
645 else
646 intersection_schemes = valid_schemes_ & other.valid_schemes_;
647
648 if (intersection_schemes == URLPattern::SCHEME_NONE)
649 return base::nullopt;
650
651 {
652 // In a few cases, we can (mostly) return a copy of one of the patterns.
653 // This can happen when either:
654 // - The URLPattern's are identical (possibly excluding valid_schemes_)
655 // - One of the patterns has match_all_urls() equal to true.
656 // NOTE(devlin): Theoretically, we could use Contains() instead of
657 // match_all_urls() here. However, Contains() strips the trailing wildcard
658 // from the path, which could yield the incorrect result.
659 const URLPattern* copy_source = nullptr;
660 if (*this == other || other.match_all_urls())
661 copy_source = this;
662 else if (match_all_urls())
663 copy_source = &other;
664
665 if (copy_source) {
666 // NOTE: equality checks don't take into account valid_schemes_, and
667 // schemes can be different in the case of match_all_urls() as well, so
668 // we can't always just return *copy_source.
669 if (intersection_schemes == copy_source->valid_schemes_)
670 return *copy_source;
671 URLPattern result(intersection_schemes);
672 ParseResult parse_result = result.Parse(copy_source->GetAsString());
673 CHECK_EQ(ParseResult::kSuccess, parse_result);
674 return result;
675 }
676 }
677
678 // No more easy cases. Go through component by component to find the patterns
679 // that intersect.
680
681 // Note: Alias the function type (rather than using auto) because
682 // MatchesHost() is overloaded.
683 using match_function_type = bool (URLPattern::*)(base::StringPiece) const;
684
685 auto get_intersection = [this, &other](base::StringPiece own_str,
686 base::StringPiece other_str,
687 match_function_type match_function,
688 base::StringPiece* out) {
689 if ((this->*match_function)(other_str)) {
690 *out = other_str;
691 return true;
692 }
693 if ((other.*match_function)(own_str)) {
694 *out = own_str;
695 return true;
696 }
697 return false;
698 };
699
700 base::StringPiece scheme;
701 base::StringPiece host;
702 base::StringPiece port;
703 base::StringPiece path;
704 // If any pieces fail to overlap, then there is no intersection.
705 if (!get_intersection(scheme_, other.scheme_, &URLPattern::MatchesScheme,
706 &scheme) ||
707 !get_intersection(host_, other.host_, &URLPattern::MatchesHost, &host) ||
708 !get_intersection(port_, other.port_, &URLPattern::MatchesPortPattern,
709 &port) ||
710 !get_intersection(path_, other.path_, &URLPattern::MatchesPath, &path)) {
711 return base::nullopt;
712 }
713
714 // Only match subdomains if both patterns match subdomains.
715 base::StringPiece subdomains;
716 if (match_subdomains_ && other.match_subdomains_) {
717 // The host may be empty (e.g., in the case of *://*/* - in that case, only
718 // append '*' instead of '*.'.
719 subdomains = host.empty() ? "*" : "*.";
720 }
721
722 base::StringPiece scheme_separator =
723 IsStandardScheme(scheme) ? url::kStandardSchemeSeparator : ":";
724
725 std::string pattern_str = base::StrCat(
726 {scheme, scheme_separator, subdomains, host, ":", port, path});
727
728 URLPattern pattern(intersection_schemes);
729 ParseResult result = pattern.Parse(pattern_str);
730 // TODO(devlin): I don't think there's any way this should ever fail, but
731 // use a CHECK() to flush any cases out. If nothing crops up, downgrade this
732 // to a DCHECK in M72.
733 CHECK_EQ(ParseResult::kSuccess, result);
734
735 return pattern;
736 }
737
MatchesAnyScheme(const std::vector<std::string> & schemes) const738 bool URLPattern::MatchesAnyScheme(
739 const std::vector<std::string>& schemes) const {
740 for (auto i = schemes.cbegin(); i != schemes.cend(); ++i) {
741 if (MatchesScheme(*i))
742 return true;
743 }
744
745 return false;
746 }
747
MatchesAllSchemes(const std::vector<std::string> & schemes) const748 bool URLPattern::MatchesAllSchemes(
749 const std::vector<std::string>& schemes) const {
750 for (auto i = schemes.cbegin(); i != schemes.cend(); ++i) {
751 if (!MatchesScheme(*i))
752 return false;
753 }
754
755 return true;
756 }
757
MatchesSecurityOriginHelper(const GURL & test) const758 bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
759 // Ignore hostname if scheme is file://.
760 if (scheme_ != url::kFileScheme && !MatchesHost(test))
761 return false;
762
763 if (!MatchesPortPattern(base::NumberToString(test.EffectiveIntPort())))
764 return false;
765
766 return true;
767 }
768
MatchesPortPattern(base::StringPiece port) const769 bool URLPattern::MatchesPortPattern(base::StringPiece port) const {
770 return port_ == "*" || port_ == port;
771 }
772
GetExplicitSchemes() const773 std::vector<std::string> URLPattern::GetExplicitSchemes() const {
774 std::vector<std::string> result;
775
776 if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
777 result.push_back(scheme_);
778 return result;
779 }
780
781 for (size_t i = 0; i < base::size(kValidSchemes); ++i) {
782 if (MatchesScheme(kValidSchemes[i])) {
783 result.push_back(kValidSchemes[i]);
784 }
785 }
786
787 return result;
788 }
789
ConvertToExplicitSchemes() const790 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
791 std::vector<std::string> explicit_schemes = GetExplicitSchemes();
792 std::vector<URLPattern> result;
793
794 for (std::vector<std::string>::const_iterator i = explicit_schemes.begin();
795 i != explicit_schemes.end(); ++i) {
796 URLPattern temp = *this;
797 temp.SetScheme(*i);
798 temp.SetMatchAllURLs(false);
799 result.push_back(temp);
800 }
801
802 return result;
803 }
804
805 // static
GetParseResultString(URLPattern::ParseResult parse_result)806 const char* URLPattern::GetParseResultString(
807 URLPattern::ParseResult parse_result) {
808 return kParseResultMessages[static_cast<int>(parse_result)];
809 }
810