1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULE_PARSER_RULE_PARSER_H_
6 #define COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULE_PARSER_RULE_PARSER_H_
7 
8 #include <stddef.h>
9 #include <ostream>
10 #include <string>
11 
12 #include "base/macros.h"
13 #include "base/strings/string_piece.h"
14 #include "components/subresource_filter/tools/rule_parser/rule.h"
15 
16 namespace subresource_filter {
17 
18 // A parser of EasyList rules. It is intended to be (re-)used for parsing
19 // multiple rules.
20 // TODO(pkalinnikov): Support 'sitekey', 'collapse', and 'donottrack' options.
21 class RuleParser {
22  public:
23   // Detailed information about a parse error (if any).
24   struct ParseError {
25     // Indicates the type of an error occured during a Parse(...) call.
26     enum ErrorCode {
27       NONE,  // Parsing was successful.
28 
29       EMPTY_RULE,             // The parsed line does not contain any rule.
30       BAD_ALLOWLIST_SYNTAX,   // Used wrong syntax for an allowlist rule.
31       UNKNOWN_OPTION,         // Using of unknown option in a URL rule.
32       NOT_A_TRISTATE_OPTION,  // Used negation for a non-tristate option.
33       DEPRECATED_OPTION,      // Used a deprecated option.
34       ALLOWLIST_ONLY_OPTION,  // The option applies to allowlist rules only.
35       NO_VALUE_PROVIDED,      // A valued option is used without a value.
36 
37       WRONG_CSS_RULE_DELIM,  // Using of a wrong delimiter in a CSS rule.
38       EMPTY_CSS_SELECTOR,    // No CSS selector specified in a CSS rule.
39 
40       UNSUPPORTED_FEATURE,  // Using not currently supported EasyList feature.
41     };
42 
43     // TODO(pkalinnikov): Introduce warnings for, e.g., using an inverted
44     // "document" activation type, using unsupported option, etc. This would let
45     // a client have a best-effort version of the rule. Leave it up to clients
46     // to decide what warnings/errors are critical for them.
47 
48     // Constructs a ParseError in a default (no error) state.
49     ParseError();
50     ~ParseError();
51 
52     ErrorCode error_code = NONE;
53 
54     // A copy of the parsed line. If no error occurred, it is empty.
55     std::string line;
56 
57     // Position of the character in the |line| that introduced the error. If
58     // |error_code| != NONE, then 0 <= |error_index| <= line.size(), otherwise
59     // |error_index| == std::string::npos.
60     size_t error_index = std::string::npos;
61   };
62 
63   RuleParser();
64   ~RuleParser();
65 
66   // Returns a human-readable detailed explanation of a parsing error.
67   static const char* GetParseErrorCodeDescription(ParseError::ErrorCode code);
68 
69   // Parses a rule from the |line|. Returns the type of the rule parsed, or
70   // RULE_TYPE_UNSPECIFIED on error. Notes:
71   //  - When parsing a URL rule, URL syntax is not verified.
72   //  - When parsing a CSS rule, the CSS selector syntax is not verified.
73   RuleType Parse(base::StringPiece line);
74 
75   // Returns error diagnostics on the latest parsed line.
parse_error()76   const ParseError& parse_error() const { return parse_error_; }
77 
78   // Gets the last parsed rule type. It is guaranteed to return the same value
79   // as the last Parse(...) invocation, or RULE_TYPE_UNSPECIFIED if no calls
80   // were done.
rule_type()81   RuleType rule_type() const { return rule_type_; }
82 
83   // Gets the last parsed URL filtering rule. The result is undefined if
84   // rule_type() != RULE_TYPE_URL,
url_rule()85   const UrlRule& url_rule() const { return url_rule_; }
86 
87   // Gets the last parsed CSS element hiding rule. The result is undefined if
88   // rule_type() != RULE_TYPE_CSS.
css_rule()89   const CssRule& css_rule() const { return css_rule_; }
90 
91  private:
92   // Parses the |part| and saves parsed URL filtering rule to the |url_rule_|
93   // member. |origin| is used for a proper error reporting. Returns
94   // RULE_TYPE_URL ff the |part| is a well-formed URL rule. Otherwise returns
95   // RULE_TYPE_UNSPECIFIED and sets |parse_error_|.
96   RuleType ParseUrlRule(base::StringPiece origin, base::StringPiece part);
97 
98   // Parses the |options| segment of a URL filtering rule and saves the parsed
99   // options to the |url_rule_| member. Returns true if the options were parsed
100   // correctly. Otherwise sets an error in |parse_error_| and returns false.
101   bool ParseUrlRuleOptions(base::StringPiece origin, base::StringPiece options);
102 
103   // Parses the |part| and saves parsed CSS rule to the |css_rule_| member.
104   // |css_section_start| denotes a position of '#' in the |part|, used to
105   // separate a CSS selector. Returns true iff the line is a well-formed CSS
106   // rule. Sets |parse_error_| on error.
107   RuleType ParseCssRule(base::StringPiece origin,
108                         base::StringPiece part,
109                         size_t css_section_start);
110 
111   // Sets |parse_error_| to contain specific error, starting at |error_begin|.
112   void SetParseError(ParseError::ErrorCode code,
113                      base::StringPiece origin,
114                      const char* error_begin);
115 
116   ParseError parse_error_;
117   RuleType rule_type_;
118   UrlRule url_rule_;
119   CssRule css_rule_;
120 
121   DISALLOW_COPY_AND_ASSIGN(RuleParser);
122 };
123 
124 // Pretty-prints the parsing |error| to |out|, e.g. like this:
125 //   (error:22) Unknown URL rule option:
126 //   @@example.org$script,unknown_option
127 //                        ^
128 std::ostream& operator<<(std::ostream& out,
129                          const RuleParser::ParseError& error);
130 
131 }  // namespace subresource_filter
132 
133 #endif  // COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULE_PARSER_RULE_PARSER_H_
134