1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/subresource_filter/tools/ruleset_converter/rule_stream.h"
6
7 #include <algorithm>
8 #include <memory>
9 #include <string>
10 #include <vector>
11
12 #include "base/files/file_util.h"
13 #include "base/files/scoped_temp_dir.h"
14 #include "base/logging.h"
15 #include "base/stl_util.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_util.h"
18 #include "components/subresource_filter/tools/rule_parser/rule_parser.h"
19 #include "components/subresource_filter/tools/ruleset_converter/ruleset_test_util.h"
20 #include "components/url_pattern_index/proto/rules.pb.h"
21 #include "testing/gmock/include/gmock/gmock.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23
24 namespace subresource_filter {
25
26 namespace {
27
28 // Returns a small number of predefined rules in text format.
GetSomeRules()29 std::vector<std::string> GetSomeRules() {
30 return std::vector<std::string>{
31 "example.com",
32 "||ex.com$image",
33 "|http://example.com/?key=value$~third-party,domain=ex.com",
34 "&key1=value1&key2=value2|$script,image,font",
35 "domain1.com,domain1.com###id",
36 "@@whitelisted.com$document,domain=example.com|~sub.example.com",
37 "###absolute_evil_id",
38 "@@whitelisted.com$match-case,document,domain=another.example.com",
39 "domain.com,~sub.domain.com,sub.sub.domain.com#@#id",
40 "#@#absolute_good_id",
41 "host$websocket",
42 };
43 }
44
45 // Returns some rules which Chrome doesn't support fully or partially, mixed
46 // with a couple of supported rules, or otherwise weird ones.
GetSomeChromeUnfriendlyRules()47 std::vector<std::string> GetSomeChromeUnfriendlyRules() {
48 return std::vector<std::string>{
49 "/a[0-9].com/$image",
50 "a.com$image,popup"
51 "a.com$popup",
52 "a.com$~image",
53 "a.com$~popup",
54 "a.com$~image,~popup",
55 "@@a.com$subdocument,document",
56 "@@a.com$document,generichide",
57 "@@a.com$document",
58 "@@a.com$genericblock",
59 "@@a.com$elemhide",
60 "@@a.com$generichide",
61 "@@a.com$elemhide,generichide",
62 "@@a.com$image,elemhide,generichide",
63 "a.com$image,~image",
64 };
65 }
66
67 // Generates and returns many rules in text format.
GetManyRules()68 std::vector<std::string> GetManyRules() {
69 constexpr size_t kNumberOfUrlRules = 10123;
70 constexpr size_t kNumberOfCssRules = 5321;
71
72 std::vector<std::string> text_rules;
73
74 for (size_t i = 0; i != kNumberOfUrlRules; ++i) {
75 std::string text_rule;
76 if (!(i & 3))
77 text_rule += "@@";
78 if (i & 1)
79 text_rule += "sub.";
80 text_rule += "example" + base::NumberToString(i) + ".com";
81 text_rule += '$';
82 text_rule += (i & 7) ? "script" : "image";
83 if (i & 1)
84 text_rule += ",domain=example.com|~but_not.example.com";
85 text_rules.push_back(text_rule);
86 }
87
88 for (size_t i = 0; i != kNumberOfCssRules; ++i) {
89 std::string text_rule = "domain.com";
90 if (i & 1)
91 text_rule += ",~but_not.domain.com";
92 text_rule += (i & 3) ? "##" : "#@#";
93 text_rule += "#id" + base::NumberToString(i);
94 text_rules.push_back(text_rule);
95 }
96
97 return text_rules;
98 }
99
100 // Reads the provided |ruleset_file| skipping every second rule (independently
101 // for URL and CSS rules), and EXPECTs that it contains exactly all the rules
102 // from |expected_contents| in the same order.
ReadHalfRulesOfTestRulesetAndExpectContents(const ScopedTempRulesetFile & ruleset_file,const TestRulesetContents & expected_contents)103 void ReadHalfRulesOfTestRulesetAndExpectContents(
104 const ScopedTempRulesetFile& ruleset_file,
105 const TestRulesetContents& expected_contents) {
106 std::unique_ptr<RuleInputStream> input = ruleset_file.OpenForInput();
107
108 TestRulesetContents contents;
109
110 bool take_url_rule = true;
111 bool take_css_rule = true;
112 url_pattern_index::proto::RuleType rule_type =
113 url_pattern_index::proto::RULE_TYPE_UNSPECIFIED;
114 while ((rule_type = input->FetchNextRule()) !=
115 url_pattern_index::proto::RULE_TYPE_UNSPECIFIED) {
116 if (rule_type == url_pattern_index::proto::RULE_TYPE_URL) {
117 if (take_url_rule)
118 contents.url_rules.push_back(input->GetUrlRule());
119 take_url_rule = !take_url_rule;
120 } else {
121 ASSERT_EQ(url_pattern_index::proto::RULE_TYPE_CSS, rule_type);
122 if (take_css_rule)
123 contents.css_rules.push_back(input->GetCssRule());
124 take_css_rule = !take_css_rule;
125 }
126 }
127
128 EXPECT_EQ(contents, expected_contents);
129 }
130
131 } // namespace
132
TEST(RuleStreamTest,WriteAndReadRuleset)133 TEST(RuleStreamTest, WriteAndReadRuleset) {
134 for (int small_or_big = 0; small_or_big < 2; ++small_or_big) {
135 TestRulesetContents contents;
136 if (small_or_big)
137 contents.AppendRules(GetManyRules());
138 else
139 contents.AppendRules(GetSomeRules());
140
141 TestRulesetContents only_url_rules;
142 only_url_rules.url_rules = contents.url_rules;
143
144 for (auto format : {RulesetFormat::kFilterList, RulesetFormat::kProto,
145 RulesetFormat::kUnindexedRuleset}) {
146 ScopedTempRulesetFile ruleset_file(format);
147 ruleset_file.WriteRuleset(contents);
148 // Note: kUnindexedRuleset discards CSS rules, test it differently.
149 EXPECT_EQ(ruleset_file.ReadContents(),
150 format == RulesetFormat::kUnindexedRuleset ? only_url_rules
151 : contents);
152 }
153 }
154 }
155
TEST(RuleStreamTest,WriteAndReadHalfRuleset)156 TEST(RuleStreamTest, WriteAndReadHalfRuleset) {
157 TestRulesetContents contents;
158 contents.AppendRules(GetManyRules());
159
160 TestRulesetContents half_contents;
161 for (size_t i = 0, size = contents.url_rules.size(); i < size; i += 2)
162 half_contents.url_rules.push_back(contents.url_rules[i]);
163 for (size_t i = 0, size = contents.css_rules.size(); i < size; i += 2)
164 half_contents.css_rules.push_back(contents.css_rules[i]);
165
166 TestRulesetContents half_url_rules;
167 half_url_rules.url_rules = half_contents.url_rules;
168
169 for (auto format : {RulesetFormat::kFilterList, RulesetFormat::kProto,
170 RulesetFormat::kUnindexedRuleset}) {
171 ScopedTempRulesetFile ruleset_file(format);
172 ruleset_file.WriteRuleset(contents);
173 // Note: kUnindexedRuleset discards CSS rules, test it differently.
174 ReadHalfRulesOfTestRulesetAndExpectContents(
175 ruleset_file, format == RulesetFormat::kUnindexedRuleset
176 ? half_url_rules
177 : half_contents);
178 }
179 }
180
TEST(RuleStreamTest,TransferAllRulesToSameStream)181 TEST(RuleStreamTest, TransferAllRulesToSameStream) {
182 TestRulesetContents contents;
183 contents.AppendRules(GetManyRules());
184
185 ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
186 ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
187 source_ruleset.WriteRuleset(contents);
188
189 std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
190 std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
191 TransferRules(input.get(), output.get(), output.get());
192 EXPECT_TRUE(output->Finish());
193 input.reset();
194 output.reset();
195
196 EXPECT_EQ(target_ruleset.ReadContents(), contents);
197 }
198
TEST(RuleStreamTest,TransferUrlRulesToOneStream)199 TEST(RuleStreamTest, TransferUrlRulesToOneStream) {
200 TestRulesetContents contents;
201 contents.AppendRules(GetManyRules());
202
203 ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
204 ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
205 source_ruleset.WriteRuleset(contents);
206
207 std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
208 std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
209 TransferRules(input.get(), output.get(), nullptr);
210 EXPECT_TRUE(output->Finish());
211 input.reset();
212 output.reset();
213
214 contents.css_rules.clear();
215 EXPECT_EQ(target_ruleset.ReadContents(), contents);
216 }
217
TEST(RuleStreamTest,TransferCssRulesToOneStream)218 TEST(RuleStreamTest, TransferCssRulesToOneStream) {
219 TestRulesetContents contents;
220 contents.AppendRules(GetManyRules());
221
222 ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
223 ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
224 source_ruleset.WriteRuleset(contents);
225
226 std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
227 std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
228 TransferRules(input.get(), nullptr, output.get());
229 EXPECT_TRUE(output->Finish());
230 input.reset();
231 output.reset();
232
233 contents.url_rules.clear();
234 EXPECT_EQ(target_ruleset.ReadContents(), contents);
235 }
236
TEST(RuleStreamTest,TransferAllRulesToDifferentStreams)237 TEST(RuleStreamTest, TransferAllRulesToDifferentStreams) {
238 TestRulesetContents contents;
239 contents.AppendRules(GetManyRules());
240
241 ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
242 ScopedTempRulesetFile target_ruleset_url(RulesetFormat::kFilterList);
243 ScopedTempRulesetFile target_ruleset_css(RulesetFormat::kFilterList);
244 source_ruleset.WriteRuleset(contents);
245
246 std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
247 std::unique_ptr<RuleOutputStream> output_url =
248 target_ruleset_url.OpenForOutput();
249 std::unique_ptr<RuleOutputStream> output_css =
250 target_ruleset_css.OpenForOutput();
251 TransferRules(input.get(), output_url.get(), output_css.get());
252 EXPECT_TRUE(output_url->Finish());
253 EXPECT_TRUE(output_css->Finish());
254 input.reset();
255 output_url.reset();
256 output_css.reset();
257
258 TestRulesetContents only_url_rules;
259 only_url_rules.url_rules = contents.url_rules;
260 EXPECT_EQ(target_ruleset_url.ReadContents(), only_url_rules);
261
262 contents.url_rules.clear();
263 EXPECT_EQ(target_ruleset_css.ReadContents(), contents);
264 }
265
TEST(RuleStreamTest,TransferRulesAndDiscardRegexpRules)266 TEST(RuleStreamTest, TransferRulesAndDiscardRegexpRules) {
267 TestRulesetContents contents;
268 contents.AppendRules(GetManyRules());
269
270 ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
271 ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
272 source_ruleset.WriteRuleset(contents);
273
274 std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
275 std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
276 TransferRules(input.get(), output.get(), nullptr, 54 /* chrome_version */);
277 EXPECT_TRUE(output->Finish());
278 input.reset();
279 output.reset();
280
281 base::EraseIf(contents.url_rules,
282 [](const url_pattern_index::proto::UrlRule& rule) {
283 return rule.url_pattern_type() ==
284 url_pattern_index::proto::URL_PATTERN_TYPE_REGEXP;
285 });
286 contents.css_rules.clear();
287 EXPECT_EQ(target_ruleset.ReadContents(), contents);
288 }
289
TEST(RuleStreamTest,TransferRulesChromeVersion)290 TEST(RuleStreamTest, TransferRulesChromeVersion) {
291 TestRulesetContents contents;
292 contents.AppendRules(GetSomeChromeUnfriendlyRules());
293 contents.AppendRules(GetManyRules());
294
295 ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
296 source_ruleset.WriteRuleset(contents);
297
298 for (int chrome_version : {0, 54, 59}) {
299 TestRulesetContents expected_contents;
300 for (url_pattern_index::proto::UrlRule url_rule : contents.url_rules) {
301 if (DeleteUrlRuleOrAmend(&url_rule, chrome_version))
302 continue;
303 expected_contents.url_rules.push_back(url_rule);
304 }
305
306 ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
307 std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
308 std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
309 TransferRules(input.get(), output.get(), nullptr, chrome_version);
310 EXPECT_TRUE(output->Finish());
311 input.reset();
312 output.reset();
313
314 EXPECT_EQ(target_ruleset.ReadContents(), expected_contents);
315 }
316 }
317
TEST(RuleStreamTest,TransferRulesFromFilterListWithUnsupportedOptions)318 TEST(RuleStreamTest, TransferRulesFromFilterListWithUnsupportedOptions) {
319 std::vector<std::string> text_rules = GetSomeRules();
320 const size_t number_of_correct_rules = text_rules.size();
321
322 // Insert several rules with non-critical parse errors.
323 text_rules.insert(text_rules.begin(), "host1$donottrack");
324 text_rules.push_back("");
325 text_rules.insert(text_rules.begin() + text_rules.size() / 2,
326 "host3$collapse");
327
328 ScopedTempRulesetFile source_ruleset(RulesetFormat::kFilterList);
329 ScopedTempRulesetFile target_ruleset(RulesetFormat::kFilterList);
330
331 // Output all the rules to the |source_ruleset| file.
332 std::string joined_rules = base::JoinString(text_rules, "\n");
333 base::WriteFile(source_ruleset.ruleset_path(), joined_rules.data(),
334 joined_rules.size());
335
336 // Filter out the rules with parse errors, and save the rest to |contents|.
337 TestRulesetContents contents;
338 contents.AppendRules(text_rules, true /* allow_errors */);
339
340 // Make sure all the rules with no errors were transferred.
341 {
342 std::unique_ptr<RuleInputStream> input = source_ruleset.OpenForInput();
343 std::unique_ptr<RuleOutputStream> output = target_ruleset.OpenForOutput();
344 TransferRules(input.get(), output.get(), output.get());
345 EXPECT_TRUE(output->Finish());
346 }
347
348 EXPECT_EQ(number_of_correct_rules,
349 contents.url_rules.size() + contents.css_rules.size());
350 EXPECT_EQ(target_ruleset.ReadContents(), contents);
351 }
352
TEST(RuleStreamTest,DeleteUrlRuleOrAmend)353 TEST(RuleStreamTest, DeleteUrlRuleOrAmend) {
354 const struct TestCase {
355 const char* rule;
356 const char* chrome_54_rule;
357 const char* chrome_59_rule;
358 } kTestCases[] = {
359 {"/a[0-9].com/$image", nullptr, nullptr},
360 {"a.com$image,popup", "a.com$image,~popup", "#54"},
361 {"a.com$popup", nullptr, nullptr},
362 {"a.com$~image", "a.com$~image,~popup,~websocket", "#0"},
363 {"a.com$~popup", "a.com$~popup,~websocket", "a.com"},
364 {"a.com$~image,~popup", "a.com$~image,~popup,~websocket", "#0"},
365 {"@@a.com$subdocument,document", "#0", "#0"},
366 {"@@a.com$document,generichide", "@@a.com$document", "#54"},
367 {"@@a.com$document", "#0", "#0"},
368 {"@@a.com$genericblock", "#0", "#0"},
369 {"@@a.com$elemhide", nullptr, nullptr},
370 {"@@a.com$generichide", nullptr, nullptr},
371 {"@@a.com$elemhide,generichide", nullptr, nullptr},
372 {"@@a.com$image,elemhide,generichide", "@@a.com$image", "#54"},
373 {"a.com$image,~image", nullptr, nullptr},
374 };
375
376 auto get_target_rule = [](const TestCase& test, std::string target_rule) {
377 RuleParser parser;
378 if (target_rule == "#0")
379 target_rule = test.rule;
380 else if (target_rule == "#54")
381 target_rule = test.chrome_54_rule;
382 EXPECT_EQ(url_pattern_index::proto::RULE_TYPE_URL,
383 parser.Parse(target_rule));
384 return parser.url_rule().ToProtobuf();
385 };
386
387 RuleParser parser;
388 for (const auto& test : kTestCases) {
389 SCOPED_TRACE(test.rule);
390 ASSERT_EQ(url_pattern_index::proto::RULE_TYPE_URL, parser.Parse(test.rule));
391 const url_pattern_index::proto::UrlRule current_rule =
392 parser.url_rule().ToProtobuf();
393
394 url_pattern_index::proto::UrlRule modified_rule = current_rule;
395 EXPECT_FALSE(DeleteUrlRuleOrAmend(&modified_rule, 0));
396 EXPECT_TRUE(AreUrlRulesEqual(modified_rule, current_rule));
397
398 modified_rule = current_rule;
399 EXPECT_EQ(!test.chrome_54_rule, DeleteUrlRuleOrAmend(&modified_rule, 54));
400 if (test.chrome_54_rule) {
401 EXPECT_TRUE(AreUrlRulesEqual(modified_rule,
402 get_target_rule(test, test.chrome_54_rule)));
403 }
404
405 modified_rule = current_rule;
406 EXPECT_EQ(!test.chrome_59_rule, DeleteUrlRuleOrAmend(&modified_rule, 59));
407 if (test.chrome_59_rule) {
408 EXPECT_TRUE(AreUrlRulesEqual(modified_rule,
409 get_target_rule(test, test.chrome_59_rule)));
410 }
411 }
412 }
413
414 } // namespace subresource_filter
415