1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "extensions/browser/api/declarative_net_request/indexed_rule.h"
6
7 #include <algorithm>
8 #include <utility>
9
10 #include "base/check_op.h"
11 #include "base/metrics/histogram_macros.h"
12 #include "base/notreached.h"
13 #include "base/numerics/safe_conversions.h"
14 #include "base/stl_util.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/strings/string_util.h"
17 #include "components/url_pattern_index/url_pattern_index.h"
18 #include "extensions/browser/api/declarative_net_request/constants.h"
19 #include "extensions/browser/api/declarative_net_request/utils.h"
20 #include "extensions/common/api/declarative_net_request.h"
21 #include "extensions/common/api/declarative_net_request/utils.h"
22 #include "net/http/http_util.h"
23 #include "third_party/re2/src/re2/re2.h"
24 #include "url/gurl.h"
25 #include "url/url_constants.h"
26
27 namespace extensions {
28 namespace declarative_net_request {
29
30 namespace {
31
32 namespace flat_rule = url_pattern_index::flat;
33 namespace dnr_api = extensions::api::declarative_net_request;
34
35 constexpr char kAnchorCharacter = '|';
36 constexpr char kSeparatorCharacter = '^';
37 constexpr char kWildcardCharacter = '*';
38
39 // Returns true if bitmask |sub| is a subset of |super|.
IsSubset(unsigned sub,unsigned super)40 constexpr bool IsSubset(unsigned sub, unsigned super) {
41 return (super | sub) == super;
42 }
43
44 // Helper class to parse the url filter of a Declarative Net Request API rule.
45 class UrlFilterParser {
46 public:
47 // This sets the |url_pattern_type|, |anchor_left|, |anchor_right| and
48 // |url_pattern| fields on the |indexed_rule_|.
Parse(std::unique_ptr<std::string> url_filter,IndexedRule * indexed_rule)49 static void Parse(std::unique_ptr<std::string> url_filter,
50 IndexedRule* indexed_rule) {
51 DCHECK(indexed_rule);
52 UrlFilterParser(url_filter ? std::move(*url_filter) : std::string(),
53 indexed_rule)
54 .ParseImpl();
55 }
56
57 private:
UrlFilterParser(std::string url_filter,IndexedRule * indexed_rule)58 UrlFilterParser(std::string url_filter, IndexedRule* indexed_rule)
59 : url_filter_(std::move(url_filter)),
60 url_filter_len_(url_filter_.length()),
61 index_(0),
62 indexed_rule_(indexed_rule) {}
63
ParseImpl()64 void ParseImpl() {
65 ParseLeftAnchor();
66 DCHECK_LE(index_, 2u);
67
68 ParseFilterString();
69 DCHECK(index_ == url_filter_len_ || index_ + 1 == url_filter_len_);
70
71 ParseRightAnchor();
72 DCHECK_EQ(url_filter_len_, index_);
73 }
74
ParseLeftAnchor()75 void ParseLeftAnchor() {
76 indexed_rule_->anchor_left = flat_rule::AnchorType_NONE;
77
78 if (IsAtAnchor()) {
79 ++index_;
80 indexed_rule_->anchor_left = flat_rule::AnchorType_BOUNDARY;
81 if (IsAtAnchor()) {
82 ++index_;
83 indexed_rule_->anchor_left = flat_rule::AnchorType_SUBDOMAIN;
84 }
85 }
86 }
87
ParseFilterString()88 void ParseFilterString() {
89 indexed_rule_->url_pattern_type = flat_rule::UrlPatternType_SUBSTRING;
90 size_t left_index = index_;
91 while (index_ < url_filter_len_ && !IsAtRightAnchor()) {
92 if (IsAtSeparatorOrWildcard())
93 indexed_rule_->url_pattern_type = flat_rule::UrlPatternType_WILDCARDED;
94 ++index_;
95 }
96 // Note: Empty url patterns are supported.
97 indexed_rule_->url_pattern =
98 url_filter_.substr(left_index, index_ - left_index);
99 }
100
ParseRightAnchor()101 void ParseRightAnchor() {
102 indexed_rule_->anchor_right = flat_rule::AnchorType_NONE;
103 if (IsAtRightAnchor()) {
104 ++index_;
105 indexed_rule_->anchor_right = flat_rule::AnchorType_BOUNDARY;
106 }
107 }
108
IsAtSeparatorOrWildcard() const109 bool IsAtSeparatorOrWildcard() const {
110 return IsAtValidIndex() && (url_filter_[index_] == kSeparatorCharacter ||
111 url_filter_[index_] == kWildcardCharacter);
112 }
113
IsAtRightAnchor() const114 bool IsAtRightAnchor() const {
115 return IsAtAnchor() && index_ > 0 && index_ + 1 == url_filter_len_;
116 }
117
IsAtValidIndex() const118 bool IsAtValidIndex() const { return index_ < url_filter_len_; }
119
IsAtAnchor() const120 bool IsAtAnchor() const {
121 return IsAtValidIndex() && url_filter_[index_] == kAnchorCharacter;
122 }
123
124 const std::string url_filter_;
125 const size_t url_filter_len_;
126 size_t index_;
127 IndexedRule* indexed_rule_; // Must outlive this instance.
128
129 DISALLOW_COPY_AND_ASSIGN(UrlFilterParser);
130 };
131
IsCaseSensitive(const dnr_api::Rule & parsed_rule)132 bool IsCaseSensitive(const dnr_api::Rule& parsed_rule) {
133 // If case sensitivity is not explicitly specified, rules are considered case
134 // sensitive by default.
135 if (!parsed_rule.condition.is_url_filter_case_sensitive)
136 return true;
137
138 return *parsed_rule.condition.is_url_filter_case_sensitive;
139 }
140
141 // Returns a bitmask of flat_rule::OptionFlag corresponding to |parsed_rule|.
GetOptionsMask(const dnr_api::Rule & parsed_rule)142 uint8_t GetOptionsMask(const dnr_api::Rule& parsed_rule) {
143 uint8_t mask = flat_rule::OptionFlag_NONE;
144
145 if (parsed_rule.action.type == dnr_api::RULE_ACTION_TYPE_ALLOW)
146 mask |= flat_rule::OptionFlag_IS_WHITELIST;
147
148 if (!IsCaseSensitive(parsed_rule))
149 mask |= flat_rule::OptionFlag_IS_CASE_INSENSITIVE;
150
151 switch (parsed_rule.condition.domain_type) {
152 case dnr_api::DOMAIN_TYPE_FIRSTPARTY:
153 mask |= flat_rule::OptionFlag_APPLIES_TO_FIRST_PARTY;
154 break;
155 case dnr_api::DOMAIN_TYPE_THIRDPARTY:
156 mask |= flat_rule::OptionFlag_APPLIES_TO_THIRD_PARTY;
157 break;
158 case dnr_api::DOMAIN_TYPE_NONE:
159 mask |= (flat_rule::OptionFlag_APPLIES_TO_FIRST_PARTY |
160 flat_rule::OptionFlag_APPLIES_TO_THIRD_PARTY);
161 break;
162 }
163 return mask;
164 }
165
GetActivationTypes(const dnr_api::Rule & parsed_rule)166 uint8_t GetActivationTypes(const dnr_api::Rule& parsed_rule) {
167 // Extensions don't use any activation types currently.
168 return flat_rule::ActivationType_NONE;
169 }
170
GetElementType(dnr_api::ResourceType resource_type)171 flat_rule::ElementType GetElementType(dnr_api::ResourceType resource_type) {
172 switch (resource_type) {
173 case dnr_api::RESOURCE_TYPE_NONE:
174 return flat_rule::ElementType_NONE;
175 case dnr_api::RESOURCE_TYPE_MAIN_FRAME:
176 return flat_rule::ElementType_MAIN_FRAME;
177 case dnr_api::RESOURCE_TYPE_SUB_FRAME:
178 return flat_rule::ElementType_SUBDOCUMENT;
179 case dnr_api::RESOURCE_TYPE_STYLESHEET:
180 return flat_rule::ElementType_STYLESHEET;
181 case dnr_api::RESOURCE_TYPE_SCRIPT:
182 return flat_rule::ElementType_SCRIPT;
183 case dnr_api::RESOURCE_TYPE_IMAGE:
184 return flat_rule::ElementType_IMAGE;
185 case dnr_api::RESOURCE_TYPE_FONT:
186 return flat_rule::ElementType_FONT;
187 case dnr_api::RESOURCE_TYPE_OBJECT:
188 return flat_rule::ElementType_OBJECT;
189 case dnr_api::RESOURCE_TYPE_XMLHTTPREQUEST:
190 return flat_rule::ElementType_XMLHTTPREQUEST;
191 case dnr_api::RESOURCE_TYPE_PING:
192 return flat_rule::ElementType_PING;
193 case dnr_api::RESOURCE_TYPE_CSP_REPORT:
194 return flat_rule::ElementType_CSP_REPORT;
195 case dnr_api::RESOURCE_TYPE_MEDIA:
196 return flat_rule::ElementType_MEDIA;
197 case dnr_api::RESOURCE_TYPE_WEBSOCKET:
198 return flat_rule::ElementType_WEBSOCKET;
199 case dnr_api::RESOURCE_TYPE_OTHER:
200 return flat_rule::ElementType_OTHER;
201 }
202 NOTREACHED();
203 return flat_rule::ElementType_NONE;
204 }
205
206 // Returns a bitmask of flat_rule::ElementType corresponding to passed
207 // |resource_types|.
GetResourceTypesMask(const std::vector<dnr_api::ResourceType> * resource_types)208 uint16_t GetResourceTypesMask(
209 const std::vector<dnr_api::ResourceType>* resource_types) {
210 uint16_t mask = flat_rule::ElementType_NONE;
211 if (!resource_types)
212 return mask;
213
214 for (const auto resource_type : *resource_types)
215 mask |= GetElementType(resource_type);
216 return mask;
217 }
218
219 // Computes the bitmask of flat_rule::ElementType taking into consideration the
220 // included and excluded resource types for |rule| and its associated action
221 // type.
ComputeElementTypes(const dnr_api::Rule & rule,uint16_t * element_types)222 ParseResult ComputeElementTypes(const dnr_api::Rule& rule,
223 uint16_t* element_types) {
224 uint16_t include_element_type_mask =
225 GetResourceTypesMask(rule.condition.resource_types.get());
226 uint16_t exclude_element_type_mask =
227 GetResourceTypesMask(rule.condition.excluded_resource_types.get());
228
229 // OBJECT_SUBREQUEST is not used by Extensions.
230 if (exclude_element_type_mask ==
231 (flat_rule::ElementType_ANY &
232 ~flat_rule::ElementType_OBJECT_SUBREQUEST)) {
233 return ParseResult::ERROR_NO_APPLICABLE_RESOURCE_TYPES;
234 }
235
236 if (include_element_type_mask & exclude_element_type_mask)
237 return ParseResult::ERROR_RESOURCE_TYPE_DUPLICATED;
238
239 if (rule.action.type == dnr_api::RULE_ACTION_TYPE_ALLOWALLREQUESTS) {
240 // For allowAllRequests rule, the resourceTypes key must always be specified
241 // and may only include main_frame and sub_frame types.
242 const uint16_t frame_element_type_mask =
243 flat_rule::ElementType_MAIN_FRAME | flat_rule::ElementType_SUBDOCUMENT;
244 if (include_element_type_mask == flat_rule::ElementType_NONE ||
245 !IsSubset(include_element_type_mask, frame_element_type_mask)) {
246 return ParseResult::ERROR_INVALID_ALLOW_ALL_REQUESTS_RESOURCE_TYPE;
247 }
248 }
249
250 if (include_element_type_mask != flat_rule::ElementType_NONE)
251 *element_types = include_element_type_mask;
252 else if (exclude_element_type_mask != flat_rule::ElementType_NONE)
253 *element_types = flat_rule::ElementType_ANY & ~exclude_element_type_mask;
254 else
255 *element_types = url_pattern_index::kDefaultFlatElementTypesMask;
256
257 return ParseResult::SUCCESS;
258 }
259
260 // Lower-cases and sorts |domains|, as required by the url_pattern_index
261 // component and stores the result in |output|. Returns false in case of
262 // failure, when one of the input strings contains non-ascii characters.
CanonicalizeDomains(std::unique_ptr<std::vector<std::string>> domains,std::vector<std::string> * output)263 bool CanonicalizeDomains(std::unique_ptr<std::vector<std::string>> domains,
264 std::vector<std::string>* output) {
265 DCHECK(output);
266 DCHECK(output->empty());
267
268 if (!domains)
269 return true;
270
271 // Convert to lower case as required by the url_pattern_index component.
272 for (const std::string& domain : *domains) {
273 if (!base::IsStringASCII(domain))
274 return false;
275
276 output->push_back(base::ToLowerASCII(domain));
277 }
278
279 std::sort(output->begin(), output->end(),
280 [](const std::string& left, const std::string& right) {
281 return url_pattern_index::CompareDomains(left, right) < 0;
282 });
283
284 return true;
285 }
286
287 // Returns if the redirect URL will be used as a relative URL.
IsRedirectUrlRelative(const std::string & redirect_url)288 bool IsRedirectUrlRelative(const std::string& redirect_url) {
289 return !redirect_url.empty() && redirect_url[0] == '/';
290 }
291
IsValidTransformScheme(const std::unique_ptr<std::string> & scheme)292 bool IsValidTransformScheme(const std::unique_ptr<std::string>& scheme) {
293 if (!scheme)
294 return true;
295
296 for (size_t i = 0; i < base::size(kAllowedTransformSchemes); ++i) {
297 if (*scheme == kAllowedTransformSchemes[i])
298 return true;
299 }
300 return false;
301 }
302
IsValidPort(const std::unique_ptr<std::string> & port)303 bool IsValidPort(const std::unique_ptr<std::string>& port) {
304 if (!port || port->empty())
305 return true;
306
307 unsigned port_num = 0;
308 return base::StringToUint(*port, &port_num) && port_num <= 65535;
309 }
310
IsEmptyOrStartsWith(const std::unique_ptr<std::string> & str,char starts_with)311 bool IsEmptyOrStartsWith(const std::unique_ptr<std::string>& str,
312 char starts_with) {
313 return !str || str->empty() || str->at(0) == starts_with;
314 }
315
316 // Validates the given url |transform|.
ValidateTransform(const dnr_api::URLTransform & transform)317 ParseResult ValidateTransform(const dnr_api::URLTransform& transform) {
318 if (!IsValidTransformScheme(transform.scheme))
319 return ParseResult::ERROR_INVALID_TRANSFORM_SCHEME;
320
321 if (!IsValidPort(transform.port))
322 return ParseResult::ERROR_INVALID_TRANSFORM_PORT;
323
324 if (!IsEmptyOrStartsWith(transform.query, '?'))
325 return ParseResult::ERROR_INVALID_TRANSFORM_QUERY;
326
327 if (!IsEmptyOrStartsWith(transform.fragment, '#'))
328 return ParseResult::ERROR_INVALID_TRANSFORM_FRAGMENT;
329
330 // Only one of |query| or |query_transform| should be specified.
331 if (transform.query && transform.query_transform)
332 return ParseResult::ERROR_QUERY_AND_TRANSFORM_BOTH_SPECIFIED;
333
334 return ParseResult::SUCCESS;
335 }
336
337 // Parses the "action.redirect" dictionary of a dnr_api::Rule.
ParseRedirect(dnr_api::Redirect redirect,const GURL & base_url,IndexedRule * indexed_rule)338 ParseResult ParseRedirect(dnr_api::Redirect redirect,
339 const GURL& base_url,
340 IndexedRule* indexed_rule) {
341 DCHECK(indexed_rule);
342
343 if (redirect.url) {
344 GURL redirect_url = GURL(*redirect.url);
345 if (!redirect_url.is_valid())
346 return ParseResult::ERROR_INVALID_REDIRECT_URL;
347
348 if (redirect_url.SchemeIs(url::kJavaScriptScheme))
349 return ParseResult::ERROR_JAVASCRIPT_REDIRECT;
350
351 indexed_rule->redirect_url = std::move(*redirect.url);
352 return ParseResult::SUCCESS;
353 }
354
355 if (redirect.extension_path) {
356 if (!IsRedirectUrlRelative(*redirect.extension_path))
357 return ParseResult::ERROR_INVALID_EXTENSION_PATH;
358
359 GURL redirect_url = base_url.Resolve(*redirect.extension_path);
360
361 // Sanity check that Resolve works as expected.
362 DCHECK_EQ(base_url.GetOrigin(), redirect_url.GetOrigin());
363
364 if (!redirect_url.is_valid())
365 return ParseResult::ERROR_INVALID_EXTENSION_PATH;
366
367 indexed_rule->redirect_url = redirect_url.spec();
368 return ParseResult::SUCCESS;
369 }
370
371 if (redirect.transform) {
372 indexed_rule->url_transform = std::move(redirect.transform);
373 return ValidateTransform(*indexed_rule->url_transform);
374 }
375
376 if (redirect.regex_substitution) {
377 if (redirect.regex_substitution->empty())
378 return ParseResult::ERROR_INVALID_REGEX_SUBSTITUTION;
379
380 indexed_rule->regex_substitution = std::move(*redirect.regex_substitution);
381 return ParseResult::SUCCESS;
382 }
383
384 return ParseResult::ERROR_INVALID_REDIRECT;
385 }
386
DoesActionSupportPriority(dnr_api::RuleActionType type)387 bool DoesActionSupportPriority(dnr_api::RuleActionType type) {
388 switch (type) {
389 case dnr_api::RULE_ACTION_TYPE_BLOCK:
390 case dnr_api::RULE_ACTION_TYPE_REDIRECT:
391 case dnr_api::RULE_ACTION_TYPE_ALLOW:
392 case dnr_api::RULE_ACTION_TYPE_UPGRADESCHEME:
393 case dnr_api::RULE_ACTION_TYPE_ALLOWALLREQUESTS:
394 case dnr_api::RULE_ACTION_TYPE_MODIFYHEADERS:
395 return true;
396 case dnr_api::RULE_ACTION_TYPE_NONE:
397 break;
398 }
399 NOTREACHED();
400 return false;
401 }
402
GetActionTypePriority(dnr_api::RuleActionType action_type)403 uint8_t GetActionTypePriority(dnr_api::RuleActionType action_type) {
404 switch (action_type) {
405 case dnr_api::RULE_ACTION_TYPE_ALLOW:
406 return 5;
407 case dnr_api::RULE_ACTION_TYPE_ALLOWALLREQUESTS:
408 return 4;
409 case dnr_api::RULE_ACTION_TYPE_BLOCK:
410 return 3;
411 case dnr_api::RULE_ACTION_TYPE_UPGRADESCHEME:
412 return 2;
413 case dnr_api::RULE_ACTION_TYPE_REDIRECT:
414 return 1;
415 case dnr_api::RULE_ACTION_TYPE_MODIFYHEADERS:
416 return 0;
417 case dnr_api::RULE_ACTION_TYPE_NONE:
418 break;
419 }
420 NOTREACHED();
421 return 0;
422 }
423
RecordLargeRegexUMA(bool is_large_regex)424 void RecordLargeRegexUMA(bool is_large_regex) {
425 UMA_HISTOGRAM_BOOLEAN(kIsLargeRegexHistogram, is_large_regex);
426 }
427
ValidateHeaders(const std::vector<dnr_api::ModifyHeaderInfo> & headers,bool are_request_headers)428 ParseResult ValidateHeaders(
429 const std::vector<dnr_api::ModifyHeaderInfo>& headers,
430 bool are_request_headers) {
431 if (headers.empty()) {
432 return are_request_headers ? ParseResult::ERROR_EMPTY_REQUEST_HEADERS_LIST
433 : ParseResult::ERROR_EMPTY_RESPONSE_HEADERS_LIST;
434 }
435
436 for (const auto& header_info : headers) {
437 if (!net::HttpUtil::IsValidHeaderName(header_info.header))
438 return ParseResult::ERROR_INVALID_HEADER_NAME;
439
440 // Ensure that request headers cannot be appended.
441 if (are_request_headers &&
442 header_info.operation == dnr_api::HEADER_OPERATION_APPEND) {
443 return ParseResult::ERROR_APPEND_REQUEST_HEADER_UNSUPPORTED;
444 }
445
446 if (header_info.value) {
447 if (!net::HttpUtil::IsValidHeaderValue(*header_info.value))
448 return ParseResult::ERROR_INVALID_HEADER_VALUE;
449
450 // Check that a remove operation must not specify a value.
451 if (header_info.operation == dnr_api::HEADER_OPERATION_REMOVE)
452 return ParseResult::ERROR_HEADER_VALUE_PRESENT;
453 } else if (header_info.operation == dnr_api::HEADER_OPERATION_APPEND ||
454 header_info.operation == dnr_api::HEADER_OPERATION_SET) {
455 // Check that an append or set operation must specify a value.
456 return ParseResult::ERROR_HEADER_VALUE_NOT_SPECIFIED;
457 }
458 }
459
460 return ParseResult::SUCCESS;
461 }
462
463 } // namespace
464
465 IndexedRule::IndexedRule() = default;
466 IndexedRule::~IndexedRule() = default;
467 IndexedRule::IndexedRule(IndexedRule&& other) = default;
468 IndexedRule& IndexedRule::operator=(IndexedRule&& other) = default;
469
470 // static
CreateIndexedRule(dnr_api::Rule parsed_rule,const GURL & base_url,IndexedRule * indexed_rule)471 ParseResult IndexedRule::CreateIndexedRule(dnr_api::Rule parsed_rule,
472 const GURL& base_url,
473 IndexedRule* indexed_rule) {
474 DCHECK(indexed_rule);
475
476 if (parsed_rule.id < kMinValidID)
477 return ParseResult::ERROR_INVALID_RULE_ID;
478
479 const bool is_priority_supported =
480 DoesActionSupportPriority(parsed_rule.action.type);
481 if (is_priority_supported) {
482 if (!parsed_rule.priority)
483 return ParseResult::ERROR_EMPTY_RULE_PRIORITY;
484 if (*parsed_rule.priority < kMinValidPriority)
485 return ParseResult::ERROR_INVALID_RULE_PRIORITY;
486 }
487
488 const bool is_redirect_rule =
489 parsed_rule.action.type == dnr_api::RULE_ACTION_TYPE_REDIRECT;
490
491 if (is_redirect_rule) {
492 if (!parsed_rule.action.redirect)
493 return ParseResult::ERROR_INVALID_REDIRECT;
494
495 ParseResult result = ParseRedirect(std::move(*parsed_rule.action.redirect),
496 base_url, indexed_rule);
497 if (result != ParseResult::SUCCESS)
498 return result;
499 }
500
501 if (parsed_rule.condition.domains && parsed_rule.condition.domains->empty())
502 return ParseResult::ERROR_EMPTY_DOMAINS_LIST;
503
504 if (parsed_rule.condition.resource_types &&
505 parsed_rule.condition.resource_types->empty()) {
506 return ParseResult::ERROR_EMPTY_RESOURCE_TYPES_LIST;
507 }
508
509 if (parsed_rule.condition.url_filter && parsed_rule.condition.regex_filter)
510 return ParseResult::ERROR_MULTIPLE_FILTERS_SPECIFIED;
511
512 const bool is_regex_rule = !!parsed_rule.condition.regex_filter;
513
514 if (!is_regex_rule && indexed_rule->regex_substitution)
515 return ParseResult::ERROR_REGEX_SUBSTITUTION_WITHOUT_FILTER;
516
517 if (is_regex_rule) {
518 if (parsed_rule.condition.regex_filter->empty())
519 return ParseResult::ERROR_EMPTY_REGEX_FILTER;
520
521 if (!base::IsStringASCII(*parsed_rule.condition.regex_filter))
522 return ParseResult::ERROR_NON_ASCII_REGEX_FILTER;
523
524 bool require_capturing = indexed_rule->regex_substitution.has_value();
525
526 // TODO(karandeepb): Regex compilation can be expensive. Also, these need to
527 // be compiled again once the ruleset is loaded, which means duplicate work.
528 // We should maintain a global cache of compiled regexes.
529 re2::RE2 regex(
530 *parsed_rule.condition.regex_filter,
531 CreateRE2Options(IsCaseSensitive(parsed_rule), require_capturing));
532
533 if (regex.error_code() == re2::RE2::ErrorPatternTooLarge) {
534 RecordLargeRegexUMA(true);
535 return ParseResult::ERROR_REGEX_TOO_LARGE;
536 }
537
538 if (!regex.ok())
539 return ParseResult::ERROR_INVALID_REGEX_FILTER;
540
541 std::string error;
542 if (indexed_rule->regex_substitution &&
543 !regex.CheckRewriteString(*indexed_rule->regex_substitution, &error)) {
544 return ParseResult::ERROR_INVALID_REGEX_SUBSTITUTION;
545 }
546
547 RecordLargeRegexUMA(false);
548 }
549
550 if (parsed_rule.condition.url_filter) {
551 if (parsed_rule.condition.url_filter->empty())
552 return ParseResult::ERROR_EMPTY_URL_FILTER;
553
554 if (!base::IsStringASCII(*parsed_rule.condition.url_filter))
555 return ParseResult::ERROR_NON_ASCII_URL_FILTER;
556 }
557
558 indexed_rule->action_type = parsed_rule.action.type;
559 indexed_rule->id = base::checked_cast<uint32_t>(parsed_rule.id);
560 indexed_rule->priority = parsed_rule.priority ? ComputeIndexedRulePriority(
561 *parsed_rule.priority,
562 indexed_rule->action_type)
563 : kDefaultPriority;
564 indexed_rule->options = GetOptionsMask(parsed_rule);
565 indexed_rule->activation_types = GetActivationTypes(parsed_rule);
566
567 {
568 ParseResult result =
569 ComputeElementTypes(parsed_rule, &indexed_rule->element_types);
570 if (result != ParseResult::SUCCESS)
571 return result;
572 }
573
574 if (!CanonicalizeDomains(std::move(parsed_rule.condition.domains),
575 &indexed_rule->domains)) {
576 return ParseResult::ERROR_NON_ASCII_DOMAIN;
577 }
578
579 if (!CanonicalizeDomains(std::move(parsed_rule.condition.excluded_domains),
580 &indexed_rule->excluded_domains)) {
581 return ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN;
582 }
583
584 if (is_regex_rule) {
585 indexed_rule->url_pattern_type =
586 url_pattern_index::flat::UrlPatternType_REGEXP;
587 indexed_rule->url_pattern = std::move(*parsed_rule.condition.regex_filter);
588 } else {
589 // Parse the |anchor_left|, |anchor_right|, |url_pattern_type| and
590 // |url_pattern| fields.
591 UrlFilterParser::Parse(std::move(parsed_rule.condition.url_filter),
592 indexed_rule);
593 }
594
595 // url_pattern_index doesn't support patterns starting with a domain anchor
596 // followed by a wildcard, e.g. ||*xyz.
597 if (indexed_rule->anchor_left == flat_rule::AnchorType_SUBDOMAIN &&
598 !indexed_rule->url_pattern.empty() &&
599 indexed_rule->url_pattern.front() == kWildcardCharacter) {
600 return ParseResult::ERROR_INVALID_URL_FILTER;
601 }
602
603 // Lower-case case-insensitive patterns as required by url pattern index.
604 if (indexed_rule->options & flat_rule::OptionFlag_IS_CASE_INSENSITIVE)
605 indexed_rule->url_pattern = base::ToLowerASCII(indexed_rule->url_pattern);
606
607 if (parsed_rule.action.type == dnr_api::RULE_ACTION_TYPE_MODIFYHEADERS) {
608 if (!parsed_rule.action.request_headers &&
609 !parsed_rule.action.response_headers)
610 return ParseResult::ERROR_NO_HEADERS_SPECIFIED;
611
612 if (parsed_rule.action.request_headers) {
613 indexed_rule->request_headers =
614 std::move(*parsed_rule.action.request_headers);
615
616 ParseResult result = ValidateHeaders(indexed_rule->request_headers,
617 true /* are_request_headers */);
618 if (result != ParseResult::SUCCESS)
619 return result;
620 }
621
622 if (parsed_rule.action.response_headers) {
623 indexed_rule->response_headers =
624 std::move(*parsed_rule.action.response_headers);
625
626 ParseResult result = ValidateHeaders(indexed_rule->response_headers,
627 false /* are_request_headers */);
628 if (result != ParseResult::SUCCESS)
629 return result;
630 }
631 }
632
633 // Some sanity checks to ensure we return a valid IndexedRule.
634 DCHECK_GE(indexed_rule->id, static_cast<uint32_t>(kMinValidID));
635 DCHECK_GE(indexed_rule->priority, static_cast<uint32_t>(kMinValidPriority));
636 DCHECK(IsSubset(indexed_rule->options, flat_rule::OptionFlag_ANY));
637 DCHECK(IsSubset(indexed_rule->element_types, flat_rule::ElementType_ANY));
638 DCHECK_EQ(flat_rule::ActivationType_NONE, indexed_rule->activation_types);
639 DCHECK_NE(flat_rule::AnchorType_SUBDOMAIN, indexed_rule->anchor_right);
640
641 return ParseResult::SUCCESS;
642 }
643
ComputeIndexedRulePriority(int parsed_rule_priority,dnr_api::RuleActionType action_type)644 uint64_t ComputeIndexedRulePriority(int parsed_rule_priority,
645 dnr_api::RuleActionType action_type) {
646 if (!DoesActionSupportPriority(action_type))
647 return kDefaultPriority;
648 // Incorporate the action's priority into the rule priority, so e.g. allow
649 // rules will be given a higher priority than block rules with the same
650 // priority specified in the rule JSON.
651 return (base::checked_cast<uint32_t>(parsed_rule_priority) << 8) |
652 GetActionTypePriority(action_type);
653 }
654
655 } // namespace declarative_net_request
656 } // namespace extensions
657