1 // Copyright 2018 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULE_PARSER_RULE_PARSER_H_ 6 #define COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULE_PARSER_RULE_PARSER_H_ 7 8 #include <stddef.h> 9 #include <ostream> 10 #include <string> 11 12 #include "base/macros.h" 13 #include "base/strings/string_piece.h" 14 #include "components/subresource_filter/tools/rule_parser/rule.h" 15 16 namespace subresource_filter { 17 18 // A parser of EasyList rules. It is intended to be (re-)used for parsing 19 // multiple rules. 20 // TODO(pkalinnikov): Support 'sitekey', 'collapse', and 'donottrack' options. 21 class RuleParser { 22 public: 23 // Detailed information about a parse error (if any). 24 struct ParseError { 25 // Indicates the type of an error occured during a Parse(...) call. 26 enum ErrorCode { 27 NONE, // Parsing was successful. 28 29 EMPTY_RULE, // The parsed line does not contain any rule. 30 BAD_ALLOWLIST_SYNTAX, // Used wrong syntax for an allowlist rule. 31 UNKNOWN_OPTION, // Using of unknown option in a URL rule. 32 NOT_A_TRISTATE_OPTION, // Used negation for a non-tristate option. 33 DEPRECATED_OPTION, // Used a deprecated option. 34 ALLOWLIST_ONLY_OPTION, // The option applies to allowlist rules only. 35 NO_VALUE_PROVIDED, // A valued option is used without a value. 36 37 WRONG_CSS_RULE_DELIM, // Using of a wrong delimiter in a CSS rule. 38 EMPTY_CSS_SELECTOR, // No CSS selector specified in a CSS rule. 39 40 UNSUPPORTED_FEATURE, // Using not currently supported EasyList feature. 41 }; 42 43 // TODO(pkalinnikov): Introduce warnings for, e.g., using an inverted 44 // "document" activation type, using unsupported option, etc. This would let 45 // a client have a best-effort version of the rule. Leave it up to clients 46 // to decide what warnings/errors are critical for them. 47 48 // Constructs a ParseError in a default (no error) state. 49 ParseError(); 50 ~ParseError(); 51 52 ErrorCode error_code = NONE; 53 54 // A copy of the parsed line. If no error occurred, it is empty. 55 std::string line; 56 57 // Position of the character in the |line| that introduced the error. If 58 // |error_code| != NONE, then 0 <= |error_index| <= line.size(), otherwise 59 // |error_index| == std::string::npos. 60 size_t error_index = std::string::npos; 61 }; 62 63 RuleParser(); 64 ~RuleParser(); 65 66 // Returns a human-readable detailed explanation of a parsing error. 67 static const char* GetParseErrorCodeDescription(ParseError::ErrorCode code); 68 69 // Parses a rule from the |line|. Returns the type of the rule parsed, or 70 // RULE_TYPE_UNSPECIFIED on error. Notes: 71 // - When parsing a URL rule, URL syntax is not verified. 72 // - When parsing a CSS rule, the CSS selector syntax is not verified. 73 RuleType Parse(base::StringPiece line); 74 75 // Returns error diagnostics on the latest parsed line. parse_error()76 const ParseError& parse_error() const { return parse_error_; } 77 78 // Gets the last parsed rule type. It is guaranteed to return the same value 79 // as the last Parse(...) invocation, or RULE_TYPE_UNSPECIFIED if no calls 80 // were done. rule_type()81 RuleType rule_type() const { return rule_type_; } 82 83 // Gets the last parsed URL filtering rule. The result is undefined if 84 // rule_type() != RULE_TYPE_URL, url_rule()85 const UrlRule& url_rule() const { return url_rule_; } 86 87 // Gets the last parsed CSS element hiding rule. The result is undefined if 88 // rule_type() != RULE_TYPE_CSS. css_rule()89 const CssRule& css_rule() const { return css_rule_; } 90 91 private: 92 // Parses the |part| and saves parsed URL filtering rule to the |url_rule_| 93 // member. |origin| is used for a proper error reporting. Returns 94 // RULE_TYPE_URL ff the |part| is a well-formed URL rule. Otherwise returns 95 // RULE_TYPE_UNSPECIFIED and sets |parse_error_|. 96 RuleType ParseUrlRule(base::StringPiece origin, base::StringPiece part); 97 98 // Parses the |options| segment of a URL filtering rule and saves the parsed 99 // options to the |url_rule_| member. Returns true if the options were parsed 100 // correctly. Otherwise sets an error in |parse_error_| and returns false. 101 bool ParseUrlRuleOptions(base::StringPiece origin, base::StringPiece options); 102 103 // Parses the |part| and saves parsed CSS rule to the |css_rule_| member. 104 // |css_section_start| denotes a position of '#' in the |part|, used to 105 // separate a CSS selector. Returns true iff the line is a well-formed CSS 106 // rule. Sets |parse_error_| on error. 107 RuleType ParseCssRule(base::StringPiece origin, 108 base::StringPiece part, 109 size_t css_section_start); 110 111 // Sets |parse_error_| to contain specific error, starting at |error_begin|. 112 void SetParseError(ParseError::ErrorCode code, 113 base::StringPiece origin, 114 const char* error_begin); 115 116 ParseError parse_error_; 117 RuleType rule_type_; 118 UrlRule url_rule_; 119 CssRule css_rule_; 120 121 DISALLOW_COPY_AND_ASSIGN(RuleParser); 122 }; 123 124 // Pretty-prints the parsing |error| to |out|, e.g. like this: 125 // (error:22) Unknown URL rule option: 126 // @@example.org$script,unknown_option 127 // ^ 128 std::ostream& operator<<(std::ostream& out, 129 const RuleParser::ParseError& error); 130 131 } // namespace subresource_filter 132 133 #endif // COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULE_PARSER_RULE_PARSER_H_ 134