1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/subresource_filter/tools/ruleset_converter/rule_stream.h"
6 
7 #include <algorithm>
8 #include <memory>
9 #include <string>
10 #include <vector>
11 
12 #include "base/files/file_util.h"
13 #include "base/files/scoped_temp_dir.h"
14 #include "base/logging.h"
15 #include "base/stl_util.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_util.h"
18 #include "components/subresource_filter/tools/rule_parser/rule_parser.h"
19 #include "components/subresource_filter/tools/ruleset_converter/ruleset_test_util.h"
20 #include "components/url_pattern_index/proto/rules.pb.h"
21 #include "testing/gmock/include/gmock/gmock.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23 
24 namespace subresource_filter {
25 
26 namespace {
27 
28 // Returns a small number of predefined rules in text format.
GetSomeRules()29 std::vector<std::string> GetSomeRules() {
30   return std::vector<std::string>{
31       "example.com",
32       "||ex.com$image",
33       "|http://example.com/?key=value$~third-party,domain=ex.com",
34       "&key1=value1&key2=value2|$script,image,font",
35       "domain1.com,domain1.com###id",
36       "@@whitelisted.com$document,domain=example.com|~sub.example.com",
37       "###absolute_evil_id",
38       "@@whitelisted.com$match-case,document,domain=another.example.com",
39       "domain.com,~sub.domain.com,sub.sub.domain.com#@#id",
40       "#@#absolute_good_id",
41       "host$websocket",
42   };
43 }
44 
45 // Returns some rules which Chrome doesn't support fully or partially, mixed
46 // with a couple of supported rules, or otherwise weird ones.
GetSomeChromeUnfriendlyRules()47 std::vector<std::string> GetSomeChromeUnfriendlyRules() {
48   return std::vector<std::string>{
49       "/a[0-9].com/$image",
50       "a.com$image,popup"
51       "a.com$popup",
52       "a.com$~image",
53       "a.com$~popup",
54       "a.com$~image,~popup",
55       "@@a.com$subdocument,document",
56       "@@a.com$document,generichide",
57       "@@a.com$document",
58       "@@a.com$genericblock",
59       "@@a.com$elemhide",
60       "@@a.com$generichide",
61       "@@a.com$elemhide,generichide",
62       "@@a.com$image,elemhide,generichide",
63       "a.com$image,~image",
64   };
65 }
66 
67 // Generates and returns many rules in text format.
GetManyRules()68 std::vector<std::string> GetManyRules() {
69   constexpr size_t kNumberOfUrlRules = 10123;
70   constexpr size_t kNumberOfCssRules = 5321;
71 
72   std::vector<std::string> text_rules;
73 
74   for (size_t i = 0; i != kNumberOfUrlRules; ++i) {
75     std::string text_rule;
76     if (!(i & 3))
77       text_rule += "@@";
78     if (i & 1)
79       text_rule += "sub.";
80     text_rule += "example" + base::NumberToString(i) + ".com";
81     text_rule += '$';
82     text_rule += (i & 7) ? "script" : "image";
83     if (i & 1)
84       text_rule += ",domain=example.com|~but_not.example.com";
85     text_rules.push_back(text_rule);
86   }
87 
88   for (size_t i = 0; i != kNumberOfCssRules; ++i) {
89     std::string text_rule = "domain.com";
90     if (i & 1)
91       text_rule += ",~but_not.domain.com";
92     text_rule += (i & 3) ? "##" : "#@#";
93     text_rule += "#id" + base::NumberToString(i);
94     text_rules.push_back(text_rule);
95   }
96 
97   return text_rules;
98 }
99 
100 // Reads the provided |ruleset_file| skipping every second rule (independently
101 // for URL and CSS rules), and EXPECTs that it contains exactly all the rules
102 // from |expected_contents| in the same order.
ReadHalfRulesOfTestRulesetAndExpectContents(const ScopedTempRulesetFile & ruleset_file,const TestRulesetContents & expected_contents)103 void ReadHalfRulesOfTestRulesetAndExpectContents(
104     const ScopedTempRulesetFile& ruleset_file,
105     const TestRulesetContents& expected_contents) {
106   std::unique_ptr<RuleInputStream> input = ruleset_file.OpenForInput();
107 
108   TestRulesetContents contents;
109 
110   bool take_url_rule = true;
111   bool take_css_rule = true;
112   url_pattern_index::proto::RuleType rule_type =
113       url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
114   while ((rule_type = input->FetchNextRule()) !=
115          url_pattern_index::proto::RULE_TYPE_UNSPECIFIED) {
116     if (rule_type == url_pattern_index::proto::RULE_TYPE_URL) {
117       if (take_url_rule)
118         contents.url_rules.push_back(input->GetUrlRule());
119       take_url_rule = !take_url_rule;
120     } else {
121       ASSERT_EQ(url_pattern_index::proto::RULE_TYPE_CSS, rule_type);
122       if (take_css_rule)
123         contents.css_rules.push_back(input->GetCssRule());
124       take_css_rule = !take_css_rule;
125     }
126   }
127 
128   EXPECT_EQ(contents, expected_contents);
129 }
130 
131 }  // namespace
132 
TEST(RuleStreamTest,WriteAndReadRuleset)133 TEST(RuleStreamTest, WriteAndReadRuleset) {
134   for (int small_or_big = 0; small_or_big < 2; ++small_or_big) {
135     TestRulesetContents contents;
136     if (small_or_big)
137       contents.AppendRules(GetManyRules());
138     else
139       contents.AppendRules(GetSomeRules());
140 
141     TestRulesetContents only_url_rules;
142     only_url_rules.url_rules = contents.url_rules;
143 
144     for (auto format : {RulesetFormat::kFilterList, RulesetFormat::kProto,
145                         RulesetFormat::kUnindexedRuleset}) {
146       ScopedTempRulesetFile ruleset_file(format);
147       ruleset_file.WriteRuleset(contents);
148       // Note: kUnindexedRuleset discards CSS rules, test it differently.
149       EXPECT_EQ(ruleset_file.ReadContents(),
150                 format == RulesetFormat::kUnindexedRuleset ? only_url_rules
151                                                            : contents);
152     }
153   }
154 }
155 
TEST(RuleStreamTest,WriteAndReadHalfRuleset)156 TEST(RuleStreamTest, WriteAndReadHalfRuleset) {
157   TestRulesetContents contents;
158   contents.AppendRules(GetManyRules());
159 
160   TestRulesetContents half_contents;
161   for (size_t i = 0, size = contents.url_rules.size(); i < size; i += 2)
162     half_contents.url_rules.push_back(contents.url_rules[i]);
163   for (size_t i = 0, size = contents.css_rules.size(); i < size; i += 2)
164     half_contents.css_rules.push_back(contents.css_rules[i]);
165 
166   TestRulesetContents half_url_rules;
167   half_url_rules.url_rules = half_contents.url_rules;
168 
169   for (auto format : {RulesetFormat::kFilterList, RulesetFormat::kProto,
170                       RulesetFormat::kUnindexedRuleset}) {
171     ScopedTempRulesetFile ruleset_file(format);
172     ruleset_file.WriteRuleset(contents);
173     // Note: kUnindexedRuleset discards CSS rules, test it differently.
174     ReadHalfRulesOfTestRulesetAndExpectContents(
175         ruleset_file, format == RulesetFormat::kUnindexedRuleset
176                           ? half_url_rules
177                           : half_contents);
178   }
179 }
180 
TEST(RuleStreamTest,TransferAllRulesToSameStream)181 TEST(RuleStreamTest, TransferAllRulesToSameStream) {
182   TestRulesetContents contents;
183   contents.AppendRules(GetManyRules());
184 
185   ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
186   ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
187   source_ruleset.WriteRuleset(contents);
188 
189   std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
190   std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
191   TransferRules(input.get(), output.get(), output.get());
192   EXPECT_TRUE(output->Finish());
193   input.reset();
194   output.reset();
195 
196   EXPECT_EQ(target_ruleset.ReadContents(), contents);
197 }
198 
TEST(RuleStreamTest,TransferUrlRulesToOneStream)199 TEST(RuleStreamTest, TransferUrlRulesToOneStream) {
200   TestRulesetContents contents;
201   contents.AppendRules(GetManyRules());
202 
203   ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
204   ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
205   source_ruleset.WriteRuleset(contents);
206 
207   std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
208   std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
209   TransferRules(input.get(), output.get(), nullptr);
210   EXPECT_TRUE(output->Finish());
211   input.reset();
212   output.reset();
213 
214   contents.css_rules.clear();
215   EXPECT_EQ(target_ruleset.ReadContents(), contents);
216 }
217 
TEST(RuleStreamTest,TransferCssRulesToOneStream)218 TEST(RuleStreamTest, TransferCssRulesToOneStream) {
219   TestRulesetContents contents;
220   contents.AppendRules(GetManyRules());
221 
222   ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
223   ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
224   source_ruleset.WriteRuleset(contents);
225 
226   std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
227   std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
228   TransferRules(input.get(), nullptr, output.get());
229   EXPECT_TRUE(output->Finish());
230   input.reset();
231   output.reset();
232 
233   contents.url_rules.clear();
234   EXPECT_EQ(target_ruleset.ReadContents(), contents);
235 }
236 
TEST(RuleStreamTest,TransferAllRulesToDifferentStreams)237 TEST(RuleStreamTest, TransferAllRulesToDifferentStreams) {
238   TestRulesetContents contents;
239   contents.AppendRules(GetManyRules());
240 
241   ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
242   ScopedTempRulesetFile target_ruleset_url(RulesetFormat::kFilterList);
243   ScopedTempRulesetFile target_ruleset_css(RulesetFormat::kFilterList);
244   source_ruleset.WriteRuleset(contents);
245 
246   std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
247   std::unique_ptr<RuleOutputStream> output_url =
248       target_ruleset_url.OpenForOutput();
249   std::unique_ptr<RuleOutputStream> output_css =
250       target_ruleset_css.OpenForOutput();
251   TransferRules(input.get(), output_url.get(), output_css.get());
252   EXPECT_TRUE(output_url->Finish());
253   EXPECT_TRUE(output_css->Finish());
254   input.reset();
255   output_url.reset();
256   output_css.reset();
257 
258   TestRulesetContents only_url_rules;
259   only_url_rules.url_rules = contents.url_rules;
260   EXPECT_EQ(target_ruleset_url.ReadContents(), only_url_rules);
261 
262   contents.url_rules.clear();
263   EXPECT_EQ(target_ruleset_css.ReadContents(), contents);
264 }
265 
TEST(RuleStreamTest,TransferRulesAndDiscardRegexpRules)266 TEST(RuleStreamTest, TransferRulesAndDiscardRegexpRules) {
267   TestRulesetContents contents;
268   contents.AppendRules(GetManyRules());
269 
270   ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
271   ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
272   source_ruleset.WriteRuleset(contents);
273 
274   std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
275   std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
276   TransferRules(input.get(), output.get(), nullptr, 54 /* chrome_version */);
277   EXPECT_TRUE(output->Finish());
278   input.reset();
279   output.reset();
280 
281   base::EraseIf(contents.url_rules,
282                 [](const url_pattern_index::proto::UrlRule& rule) {
283                   return rule.url_pattern_type() ==
284                          url_pattern_index::proto::URL_PATTERN_TYPE_REGEXP;
285                 });
286   contents.css_rules.clear();
287   EXPECT_EQ(target_ruleset.ReadContents(), contents);
288 }
289 
TEST(RuleStreamTest,TransferRulesChromeVersion)290 TEST(RuleStreamTest, TransferRulesChromeVersion) {
291   TestRulesetContents contents;
292   contents.AppendRules(GetSomeChromeUnfriendlyRules());
293   contents.AppendRules(GetManyRules());
294 
295   ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
296   source_ruleset.WriteRuleset(contents);
297 
298   for (int chrome_version : {0, 54, 59}) {
299     TestRulesetContents expected_contents;
300     for (url_pattern_index::proto::UrlRule url_rule : contents.url_rules) {
301       if (DeleteUrlRuleOrAmend(&url_rule, chrome_version))
302         continue;
303       expected_contents.url_rules.push_back(url_rule);
304     }
305 
306     ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
307     std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
308     std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
309     TransferRules(input.get(), output.get(), nullptr, chrome_version);
310     EXPECT_TRUE(output->Finish());
311     input.reset();
312     output.reset();
313 
314     EXPECT_EQ(target_ruleset.ReadContents(), expected_contents);
315   }
316 }
317 
TEST(RuleStreamTest,TransferRulesFromFilterListWithUnsupportedOptions)318 TEST(RuleStreamTest, TransferRulesFromFilterListWithUnsupportedOptions) {
319   std::vector<std::string> text_rules = GetSomeRules();
320   const size_t number_of_correct_rules = text_rules.size();
321 
322   // Insert several rules with non-critical parse errors.
323   text_rules.insert(text_rules.begin(), "host1$donottrack");
324   text_rules.push_back("");
325   text_rules.insert(text_rules.begin() + text_rules.size() / 2,
326                     "host3$collapse");
327 
328   ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
329   ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
330 
331   // Output all the rules to the |source_ruleset| file.
332   std::string joined_rules = base::JoinString(text_rules, "\n");
333   base::WriteFile(source_ruleset.ruleset_path(), joined_rules.data(),
334                   joined_rules.size());
335 
336   // Filter out the rules with parse errors, and save the rest to |contents|.
337   TestRulesetContents contents;
338   contents.AppendRules(text_rules, true /* allow_errors */);
339 
340   // Make sure all the rules with no errors were transferred.
341   {
342     std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
343     std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
344     TransferRules(input.get(), output.get(), output.get());
345     EXPECT_TRUE(output->Finish());
346   }
347 
348   EXPECT_EQ(number_of_correct_rules,
349             contents.url_rules.size() + contents.css_rules.size());
350   EXPECT_EQ(target_ruleset.ReadContents(), contents);
351 }
352 
TEST(RuleStreamTest,DeleteUrlRuleOrAmend)353 TEST(RuleStreamTest, DeleteUrlRuleOrAmend) {
354   const struct TestCase {
355     const char* rule;
356     const char* chrome_54_rule;
357     const char* chrome_59_rule;
358   } kTestCases[] = {
359       {"/a[0-9].com/$image", nullptr, nullptr},
360       {"a.com$image,popup", "a.com$image,~popup", "#54"},
361       {"a.com$popup", nullptr, nullptr},
362       {"a.com$~image", "a.com$~image,~popup,~websocket", "#0"},
363       {"a.com$~popup", "a.com$~popup,~websocket", "a.com"},
364       {"a.com$~image,~popup", "a.com$~image,~popup,~websocket", "#0"},
365       {"@@a.com$subdocument,document", "#0", "#0"},
366       {"@@a.com$document,generichide", "@@a.com$document", "#54"},
367       {"@@a.com$document", "#0", "#0"},
368       {"@@a.com$genericblock", "#0", "#0"},
369       {"@@a.com$elemhide", nullptr, nullptr},
370       {"@@a.com$generichide", nullptr, nullptr},
371       {"@@a.com$elemhide,generichide", nullptr, nullptr},
372       {"@@a.com$image,elemhide,generichide", "@@a.com$image", "#54"},
373       {"a.com$image,~image", nullptr, nullptr},
374   };
375 
376   auto get_target_rule = [](const TestCase& test, std::string target_rule) {
377     RuleParser parser;
378     if (target_rule == "#0")
379       target_rule = test.rule;
380     else if (target_rule == "#54")
381       target_rule = test.chrome_54_rule;
382     EXPECT_EQ(url_pattern_index::proto::RULE_TYPE_URL,
383               parser.Parse(target_rule));
384     return parser.url_rule().ToProtobuf();
385   };
386 
387   RuleParser parser;
388   for (const auto& test : kTestCases) {
389     SCOPED_TRACE(test.rule);
390     ASSERT_EQ(url_pattern_index::proto::RULE_TYPE_URL, parser.Parse(test.rule));
391     const url_pattern_index::proto::UrlRule current_rule =
392         parser.url_rule().ToProtobuf();
393 
394     url_pattern_index::proto::UrlRule modified_rule = current_rule;
395     EXPECT_FALSE(DeleteUrlRuleOrAmend(&modified_rule, 0));
396     EXPECT_TRUE(AreUrlRulesEqual(modified_rule, current_rule));
397 
398     modified_rule = current_rule;
399     EXPECT_EQ(!test.chrome_54_rule, DeleteUrlRuleOrAmend(&modified_rule, 54));
400     if (test.chrome_54_rule) {
401       EXPECT_TRUE(AreUrlRulesEqual(modified_rule,
402                                    get_target_rule(test, test.chrome_54_rule)));
403     }
404 
405     modified_rule = current_rule;
406     EXPECT_EQ(!test.chrome_59_rule, DeleteUrlRuleOrAmend(&modified_rule, 59));
407     if (test.chrome_59_rule) {
408       EXPECT_TRUE(AreUrlRulesEqual(modified_rule,
409                                    get_target_rule(test, test.chrome_59_rule)));
410     }
411   }
412 }
413 
414 }  // namespace subresource_filter
415