1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "extensions/browser/api/declarative_net_request/indexed_rule.h"
6 
7 #include <algorithm>
8 #include <utility>
9 
10 #include "base/check_op.h"
11 #include "base/metrics/histogram_macros.h"
12 #include "base/notreached.h"
13 #include "base/numerics/safe_conversions.h"
14 #include "base/stl_util.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/strings/string_util.h"
17 #include "components/url_pattern_index/url_pattern_index.h"
18 #include "extensions/browser/api/declarative_net_request/constants.h"
19 #include "extensions/browser/api/declarative_net_request/utils.h"
20 #include "extensions/common/api/declarative_net_request.h"
21 #include "extensions/common/api/declarative_net_request/utils.h"
22 #include "net/http/http_util.h"
23 #include "third_party/re2/src/re2/re2.h"
24 #include "url/gurl.h"
25 #include "url/url_constants.h"
26 
27 namespace extensions {
28 namespace declarative_net_request {
29 
30 namespace {
31 
32 namespace flat_rule = url_pattern_index::flat;
33 namespace dnr_api = extensions::api::declarative_net_request;
34 
35 constexpr char kAnchorCharacter = '|';
36 constexpr char kSeparatorCharacter = '^';
37 constexpr char kWildcardCharacter = '*';
38 
39 // Returns true if bitmask |sub| is a subset of |super|.
IsSubset(unsigned sub,unsigned super)40 constexpr bool IsSubset(unsigned sub, unsigned super) {
41   return (super | sub) == super;
42 }
43 
44 // Helper class to parse the url filter of a Declarative Net Request API rule.
45 class UrlFilterParser {
46  public:
47   // This sets the |url_pattern_type|, |anchor_left|, |anchor_right| and
48   // |url_pattern| fields on the |indexed_rule_|.
Parse(std::unique_ptr<std::string> url_filter,IndexedRule * indexed_rule)49   static void Parse(std::unique_ptr<std::string> url_filter,
50                     IndexedRule* indexed_rule) {
51     DCHECK(indexed_rule);
52     UrlFilterParser(url_filter ? std::move(*url_filter) : std::string(),
53                     indexed_rule)
54         .ParseImpl();
55   }
56 
57  private:
UrlFilterParser(std::string url_filter,IndexedRule * indexed_rule)58   UrlFilterParser(std::string url_filter, IndexedRule* indexed_rule)
59       : url_filter_(std::move(url_filter)),
60         url_filter_len_(url_filter_.length()),
61         index_(0),
62         indexed_rule_(indexed_rule) {}
63 
ParseImpl()64   void ParseImpl() {
65     ParseLeftAnchor();
66     DCHECK_LE(index_, 2u);
67 
68     ParseFilterString();
69     DCHECK(index_ == url_filter_len_ || index_ + 1 == url_filter_len_);
70 
71     ParseRightAnchor();
72     DCHECK_EQ(url_filter_len_, index_);
73   }
74 
ParseLeftAnchor()75   void ParseLeftAnchor() {
76     indexed_rule_->anchor_left = flat_rule::AnchorType_NONE;
77 
78     if (IsAtAnchor()) {
79       ++index_;
80       indexed_rule_->anchor_left = flat_rule::AnchorType_BOUNDARY;
81       if (IsAtAnchor()) {
82         ++index_;
83         indexed_rule_->anchor_left = flat_rule::AnchorType_SUBDOMAIN;
84       }
85     }
86   }
87 
ParseFilterString()88   void ParseFilterString() {
89     indexed_rule_->url_pattern_type = flat_rule::UrlPatternType_SUBSTRING;
90     size_t left_index = index_;
91     while (index_ < url_filter_len_ && !IsAtRightAnchor()) {
92       if (IsAtSeparatorOrWildcard())
93         indexed_rule_->url_pattern_type = flat_rule::UrlPatternType_WILDCARDED;
94       ++index_;
95     }
96     // Note: Empty url patterns are supported.
97     indexed_rule_->url_pattern =
98         url_filter_.substr(left_index, index_ - left_index);
99   }
100 
ParseRightAnchor()101   void ParseRightAnchor() {
102     indexed_rule_->anchor_right = flat_rule::AnchorType_NONE;
103     if (IsAtRightAnchor()) {
104       ++index_;
105       indexed_rule_->anchor_right = flat_rule::AnchorType_BOUNDARY;
106     }
107   }
108 
IsAtSeparatorOrWildcard() const109   bool IsAtSeparatorOrWildcard() const {
110     return IsAtValidIndex() && (url_filter_[index_] == kSeparatorCharacter ||
111                                 url_filter_[index_] == kWildcardCharacter);
112   }
113 
IsAtRightAnchor() const114   bool IsAtRightAnchor() const {
115     return IsAtAnchor() && index_ > 0 && index_ + 1 == url_filter_len_;
116   }
117 
IsAtValidIndex() const118   bool IsAtValidIndex() const { return index_ < url_filter_len_; }
119 
IsAtAnchor() const120   bool IsAtAnchor() const {
121     return IsAtValidIndex() && url_filter_[index_] == kAnchorCharacter;
122   }
123 
124   const std::string url_filter_;
125   const size_t url_filter_len_;
126   size_t index_;
127   IndexedRule* indexed_rule_;  // Must outlive this instance.
128 
129   DISALLOW_COPY_AND_ASSIGN(UrlFilterParser);
130 };
131 
IsCaseSensitive(const dnr_api::Rule & parsed_rule)132 bool IsCaseSensitive(const dnr_api::Rule& parsed_rule) {
133   // If case sensitivity is not explicitly specified, rules are considered case
134   // sensitive by default.
135   if (!parsed_rule.condition.is_url_filter_case_sensitive)
136     return true;
137 
138   return *parsed_rule.condition.is_url_filter_case_sensitive;
139 }
140 
141 // Returns a bitmask of flat_rule::OptionFlag corresponding to |parsed_rule|.
GetOptionsMask(const dnr_api::Rule & parsed_rule)142 uint8_t GetOptionsMask(const dnr_api::Rule& parsed_rule) {
143   uint8_t mask = flat_rule::OptionFlag_NONE;
144 
145   if (parsed_rule.action.type == dnr_api::RULE_ACTION_TYPE_ALLOW)
146     mask |= flat_rule::OptionFlag_IS_WHITELIST;
147 
148   if (!IsCaseSensitive(parsed_rule))
149     mask |= flat_rule::OptionFlag_IS_CASE_INSENSITIVE;
150 
151   switch (parsed_rule.condition.domain_type) {
152     case dnr_api::DOMAIN_TYPE_FIRSTPARTY:
153       mask |= flat_rule::OptionFlag_APPLIES_TO_FIRST_PARTY;
154       break;
155     case dnr_api::DOMAIN_TYPE_THIRDPARTY:
156       mask |= flat_rule::OptionFlag_APPLIES_TO_THIRD_PARTY;
157       break;
158     case dnr_api::DOMAIN_TYPE_NONE:
159       mask |= (flat_rule::OptionFlag_APPLIES_TO_FIRST_PARTY |
160                flat_rule::OptionFlag_APPLIES_TO_THIRD_PARTY);
161       break;
162   }
163   return mask;
164 }
165 
GetActivationTypes(const dnr_api::Rule & parsed_rule)166 uint8_t GetActivationTypes(const dnr_api::Rule& parsed_rule) {
167   // Extensions don't use any activation types currently.
168   return flat_rule::ActivationType_NONE;
169 }
170 
GetElementType(dnr_api::ResourceType resource_type)171 flat_rule::ElementType GetElementType(dnr_api::ResourceType resource_type) {
172   switch (resource_type) {
173     case dnr_api::RESOURCE_TYPE_NONE:
174       return flat_rule::ElementType_NONE;
175     case dnr_api::RESOURCE_TYPE_MAIN_FRAME:
176       return flat_rule::ElementType_MAIN_FRAME;
177     case dnr_api::RESOURCE_TYPE_SUB_FRAME:
178       return flat_rule::ElementType_SUBDOCUMENT;
179     case dnr_api::RESOURCE_TYPE_STYLESHEET:
180       return flat_rule::ElementType_STYLESHEET;
181     case dnr_api::RESOURCE_TYPE_SCRIPT:
182       return flat_rule::ElementType_SCRIPT;
183     case dnr_api::RESOURCE_TYPE_IMAGE:
184       return flat_rule::ElementType_IMAGE;
185     case dnr_api::RESOURCE_TYPE_FONT:
186       return flat_rule::ElementType_FONT;
187     case dnr_api::RESOURCE_TYPE_OBJECT:
188       return flat_rule::ElementType_OBJECT;
189     case dnr_api::RESOURCE_TYPE_XMLHTTPREQUEST:
190       return flat_rule::ElementType_XMLHTTPREQUEST;
191     case dnr_api::RESOURCE_TYPE_PING:
192       return flat_rule::ElementType_PING;
193     case dnr_api::RESOURCE_TYPE_CSP_REPORT:
194       return flat_rule::ElementType_CSP_REPORT;
195     case dnr_api::RESOURCE_TYPE_MEDIA:
196       return flat_rule::ElementType_MEDIA;
197     case dnr_api::RESOURCE_TYPE_WEBSOCKET:
198       return flat_rule::ElementType_WEBSOCKET;
199     case dnr_api::RESOURCE_TYPE_OTHER:
200       return flat_rule::ElementType_OTHER;
201   }
202   NOTREACHED();
203   return flat_rule::ElementType_NONE;
204 }
205 
206 // Returns a bitmask of flat_rule::ElementType corresponding to passed
207 // |resource_types|.
GetResourceTypesMask(const std::vector<dnr_api::ResourceType> * resource_types)208 uint16_t GetResourceTypesMask(
209     const std::vector<dnr_api::ResourceType>* resource_types) {
210   uint16_t mask = flat_rule::ElementType_NONE;
211   if (!resource_types)
212     return mask;
213 
214   for (const auto resource_type : *resource_types)
215     mask |= GetElementType(resource_type);
216   return mask;
217 }
218 
219 // Computes the bitmask of flat_rule::ElementType taking into consideration the
220 // included and excluded resource types for |rule| and its associated action
221 // type.
ComputeElementTypes(const dnr_api::Rule & rule,uint16_t * element_types)222 ParseResult ComputeElementTypes(const dnr_api::Rule& rule,
223                                 uint16_t* element_types) {
224   uint16_t include_element_type_mask =
225       GetResourceTypesMask(rule.condition.resource_types.get());
226   uint16_t exclude_element_type_mask =
227       GetResourceTypesMask(rule.condition.excluded_resource_types.get());
228 
229   // OBJECT_SUBREQUEST is not used by Extensions.
230   if (exclude_element_type_mask ==
231       (flat_rule::ElementType_ANY &
232        ~flat_rule::ElementType_OBJECT_SUBREQUEST)) {
233     return ParseResult::ERROR_NO_APPLICABLE_RESOURCE_TYPES;
234   }
235 
236   if (include_element_type_mask & exclude_element_type_mask)
237     return ParseResult::ERROR_RESOURCE_TYPE_DUPLICATED;
238 
239   if (rule.action.type == dnr_api::RULE_ACTION_TYPE_ALLOWALLREQUESTS) {
240     // For allowAllRequests rule, the resourceTypes key must always be specified
241     // and may only include main_frame and sub_frame types.
242     const uint16_t frame_element_type_mask =
243         flat_rule::ElementType_MAIN_FRAME | flat_rule::ElementType_SUBDOCUMENT;
244     if (include_element_type_mask == flat_rule::ElementType_NONE ||
245         !IsSubset(include_element_type_mask, frame_element_type_mask)) {
246       return ParseResult::ERROR_INVALID_ALLOW_ALL_REQUESTS_RESOURCE_TYPE;
247     }
248   }
249 
250   if (include_element_type_mask != flat_rule::ElementType_NONE)
251     *element_types = include_element_type_mask;
252   else if (exclude_element_type_mask != flat_rule::ElementType_NONE)
253     *element_types = flat_rule::ElementType_ANY & ~exclude_element_type_mask;
254   else
255     *element_types = url_pattern_index::kDefaultFlatElementTypesMask;
256 
257   return ParseResult::SUCCESS;
258 }
259 
260 // Lower-cases and sorts |domains|, as required by the url_pattern_index
261 // component and stores the result in |output|. Returns false in case of
262 // failure, when one of the input strings contains non-ascii characters.
CanonicalizeDomains(std::unique_ptr<std::vector<std::string>> domains,std::vector<std::string> * output)263 bool CanonicalizeDomains(std::unique_ptr<std::vector<std::string>> domains,
264                          std::vector<std::string>* output) {
265   DCHECK(output);
266   DCHECK(output->empty());
267 
268   if (!domains)
269     return true;
270 
271   // Convert to lower case as required by the url_pattern_index component.
272   for (const std::string& domain : *domains) {
273     if (!base::IsStringASCII(domain))
274       return false;
275 
276     output->push_back(base::ToLowerASCII(domain));
277   }
278 
279   std::sort(output->begin(), output->end(),
280             [](const std::string& left, const std::string& right) {
281               return url_pattern_index::CompareDomains(left, right) < 0;
282             });
283 
284   return true;
285 }
286 
287 // Returns if the redirect URL will be used as a relative URL.
IsRedirectUrlRelative(const std::string & redirect_url)288 bool IsRedirectUrlRelative(const std::string& redirect_url) {
289   return !redirect_url.empty() && redirect_url[0] == '/';
290 }
291 
IsValidTransformScheme(const std::unique_ptr<std::string> & scheme)292 bool IsValidTransformScheme(const std::unique_ptr<std::string>& scheme) {
293   if (!scheme)
294     return true;
295 
296   for (size_t i = 0; i < base::size(kAllowedTransformSchemes); ++i) {
297     if (*scheme == kAllowedTransformSchemes[i])
298       return true;
299   }
300   return false;
301 }
302 
IsValidPort(const std::unique_ptr<std::string> & port)303 bool IsValidPort(const std::unique_ptr<std::string>& port) {
304   if (!port || port->empty())
305     return true;
306 
307   unsigned port_num = 0;
308   return base::StringToUint(*port, &port_num) && port_num <= 65535;
309 }
310 
IsEmptyOrStartsWith(const std::unique_ptr<std::string> & str,char starts_with)311 bool IsEmptyOrStartsWith(const std::unique_ptr<std::string>& str,
312                          char starts_with) {
313   return !str || str->empty() || str->at(0) == starts_with;
314 }
315 
316 // Validates the given url |transform|.
ValidateTransform(const dnr_api::URLTransform & transform)317 ParseResult ValidateTransform(const dnr_api::URLTransform& transform) {
318   if (!IsValidTransformScheme(transform.scheme))
319     return ParseResult::ERROR_INVALID_TRANSFORM_SCHEME;
320 
321   if (!IsValidPort(transform.port))
322     return ParseResult::ERROR_INVALID_TRANSFORM_PORT;
323 
324   if (!IsEmptyOrStartsWith(transform.query, '?'))
325     return ParseResult::ERROR_INVALID_TRANSFORM_QUERY;
326 
327   if (!IsEmptyOrStartsWith(transform.fragment, '#'))
328     return ParseResult::ERROR_INVALID_TRANSFORM_FRAGMENT;
329 
330   // Only one of |query| or |query_transform| should be specified.
331   if (transform.query && transform.query_transform)
332     return ParseResult::ERROR_QUERY_AND_TRANSFORM_BOTH_SPECIFIED;
333 
334   return ParseResult::SUCCESS;
335 }
336 
337 // Parses the "action.redirect" dictionary of a dnr_api::Rule.
ParseRedirect(dnr_api::Redirect redirect,const GURL & base_url,IndexedRule * indexed_rule)338 ParseResult ParseRedirect(dnr_api::Redirect redirect,
339                           const GURL& base_url,
340                           IndexedRule* indexed_rule) {
341   DCHECK(indexed_rule);
342 
343   if (redirect.url) {
344     GURL redirect_url = GURL(*redirect.url);
345     if (!redirect_url.is_valid())
346       return ParseResult::ERROR_INVALID_REDIRECT_URL;
347 
348     if (redirect_url.SchemeIs(url::kJavaScriptScheme))
349       return ParseResult::ERROR_JAVASCRIPT_REDIRECT;
350 
351     indexed_rule->redirect_url = std::move(*redirect.url);
352     return ParseResult::SUCCESS;
353   }
354 
355   if (redirect.extension_path) {
356     if (!IsRedirectUrlRelative(*redirect.extension_path))
357       return ParseResult::ERROR_INVALID_EXTENSION_PATH;
358 
359     GURL redirect_url = base_url.Resolve(*redirect.extension_path);
360 
361     // Sanity check that Resolve works as expected.
362     DCHECK_EQ(base_url.GetOrigin(), redirect_url.GetOrigin());
363 
364     if (!redirect_url.is_valid())
365       return ParseResult::ERROR_INVALID_EXTENSION_PATH;
366 
367     indexed_rule->redirect_url = redirect_url.spec();
368     return ParseResult::SUCCESS;
369   }
370 
371   if (redirect.transform) {
372     indexed_rule->url_transform = std::move(redirect.transform);
373     return ValidateTransform(*indexed_rule->url_transform);
374   }
375 
376   if (redirect.regex_substitution) {
377     if (redirect.regex_substitution->empty())
378       return ParseResult::ERROR_INVALID_REGEX_SUBSTITUTION;
379 
380     indexed_rule->regex_substitution = std::move(*redirect.regex_substitution);
381     return ParseResult::SUCCESS;
382   }
383 
384   return ParseResult::ERROR_INVALID_REDIRECT;
385 }
386 
DoesActionSupportPriority(dnr_api::RuleActionType type)387 bool DoesActionSupportPriority(dnr_api::RuleActionType type) {
388   switch (type) {
389     case dnr_api::RULE_ACTION_TYPE_BLOCK:
390     case dnr_api::RULE_ACTION_TYPE_REDIRECT:
391     case dnr_api::RULE_ACTION_TYPE_ALLOW:
392     case dnr_api::RULE_ACTION_TYPE_UPGRADESCHEME:
393     case dnr_api::RULE_ACTION_TYPE_ALLOWALLREQUESTS:
394     case dnr_api::RULE_ACTION_TYPE_MODIFYHEADERS:
395       return true;
396     case dnr_api::RULE_ACTION_TYPE_NONE:
397       break;
398   }
399   NOTREACHED();
400   return false;
401 }
402 
GetActionTypePriority(dnr_api::RuleActionType action_type)403 uint8_t GetActionTypePriority(dnr_api::RuleActionType action_type) {
404   switch (action_type) {
405     case dnr_api::RULE_ACTION_TYPE_ALLOW:
406       return 5;
407     case dnr_api::RULE_ACTION_TYPE_ALLOWALLREQUESTS:
408       return 4;
409     case dnr_api::RULE_ACTION_TYPE_BLOCK:
410       return 3;
411     case dnr_api::RULE_ACTION_TYPE_UPGRADESCHEME:
412       return 2;
413     case dnr_api::RULE_ACTION_TYPE_REDIRECT:
414       return 1;
415     case dnr_api::RULE_ACTION_TYPE_MODIFYHEADERS:
416       return 0;
417     case dnr_api::RULE_ACTION_TYPE_NONE:
418       break;
419   }
420   NOTREACHED();
421   return 0;
422 }
423 
RecordLargeRegexUMA(bool is_large_regex)424 void RecordLargeRegexUMA(bool is_large_regex) {
425   UMA_HISTOGRAM_BOOLEAN(kIsLargeRegexHistogram, is_large_regex);
426 }
427 
ValidateHeaders(const std::vector<dnr_api::ModifyHeaderInfo> & headers,bool are_request_headers)428 ParseResult ValidateHeaders(
429     const std::vector<dnr_api::ModifyHeaderInfo>& headers,
430     bool are_request_headers) {
431   if (headers.empty()) {
432     return are_request_headers ? ParseResult::ERROR_EMPTY_REQUEST_HEADERS_LIST
433                                : ParseResult::ERROR_EMPTY_RESPONSE_HEADERS_LIST;
434   }
435 
436   for (const auto& header_info : headers) {
437     if (!net::HttpUtil::IsValidHeaderName(header_info.header))
438       return ParseResult::ERROR_INVALID_HEADER_NAME;
439 
440     // Ensure that request headers cannot be appended.
441     if (are_request_headers &&
442         header_info.operation == dnr_api::HEADER_OPERATION_APPEND) {
443       return ParseResult::ERROR_APPEND_REQUEST_HEADER_UNSUPPORTED;
444     }
445 
446     if (header_info.value) {
447       if (!net::HttpUtil::IsValidHeaderValue(*header_info.value))
448         return ParseResult::ERROR_INVALID_HEADER_VALUE;
449 
450       // Check that a remove operation must not specify a value.
451       if (header_info.operation == dnr_api::HEADER_OPERATION_REMOVE)
452         return ParseResult::ERROR_HEADER_VALUE_PRESENT;
453     } else if (header_info.operation == dnr_api::HEADER_OPERATION_APPEND ||
454                header_info.operation == dnr_api::HEADER_OPERATION_SET) {
455       // Check that an append or set operation must specify a value.
456       return ParseResult::ERROR_HEADER_VALUE_NOT_SPECIFIED;
457     }
458   }
459 
460   return ParseResult::SUCCESS;
461 }
462 
463 }  // namespace
464 
465 IndexedRule::IndexedRule() = default;
466 IndexedRule::~IndexedRule() = default;
467 IndexedRule::IndexedRule(IndexedRule&& other) = default;
468 IndexedRule& IndexedRule::operator=(IndexedRule&& other) = default;
469 
470 // static
CreateIndexedRule(dnr_api::Rule parsed_rule,const GURL & base_url,IndexedRule * indexed_rule)471 ParseResult IndexedRule::CreateIndexedRule(dnr_api::Rule parsed_rule,
472                                            const GURL& base_url,
473                                            IndexedRule* indexed_rule) {
474   DCHECK(indexed_rule);
475 
476   if (parsed_rule.id < kMinValidID)
477     return ParseResult::ERROR_INVALID_RULE_ID;
478 
479   const bool is_priority_supported =
480       DoesActionSupportPriority(parsed_rule.action.type);
481   if (is_priority_supported) {
482     if (!parsed_rule.priority)
483       return ParseResult::ERROR_EMPTY_RULE_PRIORITY;
484     if (*parsed_rule.priority < kMinValidPriority)
485       return ParseResult::ERROR_INVALID_RULE_PRIORITY;
486   }
487 
488   const bool is_redirect_rule =
489       parsed_rule.action.type == dnr_api::RULE_ACTION_TYPE_REDIRECT;
490 
491   if (is_redirect_rule) {
492     if (!parsed_rule.action.redirect)
493       return ParseResult::ERROR_INVALID_REDIRECT;
494 
495     ParseResult result = ParseRedirect(std::move(*parsed_rule.action.redirect),
496                                        base_url, indexed_rule);
497     if (result != ParseResult::SUCCESS)
498       return result;
499   }
500 
501   if (parsed_rule.condition.domains && parsed_rule.condition.domains->empty())
502     return ParseResult::ERROR_EMPTY_DOMAINS_LIST;
503 
504   if (parsed_rule.condition.resource_types &&
505       parsed_rule.condition.resource_types->empty()) {
506     return ParseResult::ERROR_EMPTY_RESOURCE_TYPES_LIST;
507   }
508 
509   if (parsed_rule.condition.url_filter && parsed_rule.condition.regex_filter)
510     return ParseResult::ERROR_MULTIPLE_FILTERS_SPECIFIED;
511 
512   const bool is_regex_rule = !!parsed_rule.condition.regex_filter;
513 
514   if (!is_regex_rule && indexed_rule->regex_substitution)
515     return ParseResult::ERROR_REGEX_SUBSTITUTION_WITHOUT_FILTER;
516 
517   if (is_regex_rule) {
518     if (parsed_rule.condition.regex_filter->empty())
519       return ParseResult::ERROR_EMPTY_REGEX_FILTER;
520 
521     if (!base::IsStringASCII(*parsed_rule.condition.regex_filter))
522       return ParseResult::ERROR_NON_ASCII_REGEX_FILTER;
523 
524     bool require_capturing = indexed_rule->regex_substitution.has_value();
525 
526     // TODO(karandeepb): Regex compilation can be expensive. Also, these need to
527     // be compiled again once the ruleset is loaded, which means duplicate work.
528     // We should maintain a global cache of compiled regexes.
529     re2::RE2 regex(
530         *parsed_rule.condition.regex_filter,
531         CreateRE2Options(IsCaseSensitive(parsed_rule), require_capturing));
532 
533     if (regex.error_code() == re2::RE2::ErrorPatternTooLarge) {
534       RecordLargeRegexUMA(true);
535       return ParseResult::ERROR_REGEX_TOO_LARGE;
536     }
537 
538     if (!regex.ok())
539       return ParseResult::ERROR_INVALID_REGEX_FILTER;
540 
541     std::string error;
542     if (indexed_rule->regex_substitution &&
543         !regex.CheckRewriteString(*indexed_rule->regex_substitution, &error)) {
544       return ParseResult::ERROR_INVALID_REGEX_SUBSTITUTION;
545     }
546 
547     RecordLargeRegexUMA(false);
548   }
549 
550   if (parsed_rule.condition.url_filter) {
551     if (parsed_rule.condition.url_filter->empty())
552       return ParseResult::ERROR_EMPTY_URL_FILTER;
553 
554     if (!base::IsStringASCII(*parsed_rule.condition.url_filter))
555       return ParseResult::ERROR_NON_ASCII_URL_FILTER;
556   }
557 
558   indexed_rule->action_type = parsed_rule.action.type;
559   indexed_rule->id = base::checked_cast<uint32_t>(parsed_rule.id);
560   indexed_rule->priority = parsed_rule.priority ? ComputeIndexedRulePriority(
561                                                       *parsed_rule.priority,
562                                                       indexed_rule->action_type)
563                                                 : kDefaultPriority;
564   indexed_rule->options = GetOptionsMask(parsed_rule);
565   indexed_rule->activation_types = GetActivationTypes(parsed_rule);
566 
567   {
568     ParseResult result =
569         ComputeElementTypes(parsed_rule, &indexed_rule->element_types);
570     if (result != ParseResult::SUCCESS)
571       return result;
572   }
573 
574   if (!CanonicalizeDomains(std::move(parsed_rule.condition.domains),
575                            &indexed_rule->domains)) {
576     return ParseResult::ERROR_NON_ASCII_DOMAIN;
577   }
578 
579   if (!CanonicalizeDomains(std::move(parsed_rule.condition.excluded_domains),
580                            &indexed_rule->excluded_domains)) {
581     return ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN;
582   }
583 
584   if (is_regex_rule) {
585     indexed_rule->url_pattern_type =
586         url_pattern_index::flat::UrlPatternType_REGEXP;
587     indexed_rule->url_pattern = std::move(*parsed_rule.condition.regex_filter);
588   } else {
589     // Parse the |anchor_left|, |anchor_right|, |url_pattern_type| and
590     // |url_pattern| fields.
591     UrlFilterParser::Parse(std::move(parsed_rule.condition.url_filter),
592                            indexed_rule);
593   }
594 
595   // url_pattern_index doesn't support patterns starting with a domain anchor
596   // followed by a wildcard, e.g. ||*xyz.
597   if (indexed_rule->anchor_left == flat_rule::AnchorType_SUBDOMAIN &&
598       !indexed_rule->url_pattern.empty() &&
599       indexed_rule->url_pattern.front() == kWildcardCharacter) {
600     return ParseResult::ERROR_INVALID_URL_FILTER;
601   }
602 
603   // Lower-case case-insensitive patterns as required by url pattern index.
604   if (indexed_rule->options & flat_rule::OptionFlag_IS_CASE_INSENSITIVE)
605     indexed_rule->url_pattern = base::ToLowerASCII(indexed_rule->url_pattern);
606 
607   if (parsed_rule.action.type == dnr_api::RULE_ACTION_TYPE_MODIFYHEADERS) {
608     if (!parsed_rule.action.request_headers &&
609         !parsed_rule.action.response_headers)
610       return ParseResult::ERROR_NO_HEADERS_SPECIFIED;
611 
612     if (parsed_rule.action.request_headers) {
613       indexed_rule->request_headers =
614           std::move(*parsed_rule.action.request_headers);
615 
616       ParseResult result = ValidateHeaders(indexed_rule->request_headers,
617                                            true /* are_request_headers */);
618       if (result != ParseResult::SUCCESS)
619         return result;
620     }
621 
622     if (parsed_rule.action.response_headers) {
623       indexed_rule->response_headers =
624           std::move(*parsed_rule.action.response_headers);
625 
626       ParseResult result = ValidateHeaders(indexed_rule->response_headers,
627                                            false /* are_request_headers */);
628       if (result != ParseResult::SUCCESS)
629         return result;
630     }
631   }
632 
633   // Some sanity checks to ensure we return a valid IndexedRule.
634   DCHECK_GE(indexed_rule->id, static_cast<uint32_t>(kMinValidID));
635   DCHECK_GE(indexed_rule->priority, static_cast<uint32_t>(kMinValidPriority));
636   DCHECK(IsSubset(indexed_rule->options, flat_rule::OptionFlag_ANY));
637   DCHECK(IsSubset(indexed_rule->element_types, flat_rule::ElementType_ANY));
638   DCHECK_EQ(flat_rule::ActivationType_NONE, indexed_rule->activation_types);
639   DCHECK_NE(flat_rule::AnchorType_SUBDOMAIN, indexed_rule->anchor_right);
640 
641   return ParseResult::SUCCESS;
642 }
643 
ComputeIndexedRulePriority(int parsed_rule_priority,dnr_api::RuleActionType action_type)644 uint64_t ComputeIndexedRulePriority(int parsed_rule_priority,
645                                     dnr_api::RuleActionType action_type) {
646   if (!DoesActionSupportPriority(action_type))
647     return kDefaultPriority;
648   // Incorporate the action's priority into the rule priority, so e.g. allow
649   // rules will be given a higher priority than block rules with the same
650   // priority specified in the rule JSON.
651   return (base::checked_cast<uint32_t>(parsed_rule_priority) << 8) |
652          GetActionTypePriority(action_type);
653 }
654 
655 }  // namespace declarative_net_request
656 }  // namespace extensions
657