1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/supervised_user/supervised_user_url_filter.h"
6 
7 #include <memory>
8 
9 #include "base/bind.h"
10 #include "base/callback_helpers.h"
11 #include "base/memory/ref_counted.h"
12 #include "base/run_loop.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "base/test/task_environment.h"
15 #include "chrome/browser/supervised_user/supervised_user_site_list.h"
16 #include "extensions/buildflags/buildflags.h"
17 #include "testing/gtest/include/gtest/gtest.h"
18 #include "url/gurl.h"
19 
20 class SupervisedUserURLFilterTest : public ::testing::Test,
21                                     public SupervisedUserURLFilter::Observer {
22  public:
SupervisedUserURLFilterTest()23   SupervisedUserURLFilterTest() {
24     filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::BLOCK);
25     filter_.AddObserver(this);
26   }
27 
~SupervisedUserURLFilterTest()28   ~SupervisedUserURLFilterTest() override { filter_.RemoveObserver(this); }
29 
30   // SupervisedUserURLFilter::Observer:
OnSiteListUpdated()31   void OnSiteListUpdated() override { run_loop_.Quit(); }
OnURLChecked(const GURL & url,SupervisedUserURLFilter::FilteringBehavior behavior,supervised_user_error_page::FilteringBehaviorReason reason,bool uncertain)32   void OnURLChecked(const GURL& url,
33                     SupervisedUserURLFilter::FilteringBehavior behavior,
34                     supervised_user_error_page::FilteringBehaviorReason reason,
35                     bool uncertain) override {
36     behavior_ = behavior;
37     reason_ = reason;
38   }
39 
40  protected:
IsURLAllowlisted(const std::string & url)41   bool IsURLAllowlisted(const std::string& url) {
42     return filter_.GetFilteringBehaviorForURL(GURL(url)) ==
43            SupervisedUserURLFilter::ALLOW;
44   }
45 
ExpectURLInDefaultAllowlist(const std::string & url)46   void ExpectURLInDefaultAllowlist(const std::string& url) {
47     ExpectURLCheckMatches(url, SupervisedUserURLFilter::ALLOW,
48                           supervised_user_error_page::DEFAULT);
49   }
50 
ExpectURLInDefaultDenylist(const std::string & url)51   void ExpectURLInDefaultDenylist(const std::string& url) {
52     ExpectURLCheckMatches(url, SupervisedUserURLFilter::BLOCK,
53                           supervised_user_error_page::DEFAULT);
54   }
55 
ExpectURLInManualAllowlist(const std::string & url)56   void ExpectURLInManualAllowlist(const std::string& url) {
57     ExpectURLCheckMatches(url, SupervisedUserURLFilter::ALLOW,
58                           supervised_user_error_page::MANUAL);
59   }
60 
ExpectURLInManualDenylist(const std::string & url)61   void ExpectURLInManualDenylist(const std::string& url) {
62     ExpectURLCheckMatches(url, SupervisedUserURLFilter::BLOCK,
63                           supervised_user_error_page::MANUAL);
64   }
65 
66   base::test::TaskEnvironment task_environment_;
67   base::RunLoop run_loop_;
68   SupervisedUserURLFilter filter_;
69   SupervisedUserURLFilter::FilteringBehavior behavior_;
70   supervised_user_error_page::FilteringBehaviorReason reason_;
71 
72  private:
ExpectURLCheckMatches(const std::string & url,SupervisedUserURLFilter::FilteringBehavior expected_behavior,supervised_user_error_page::FilteringBehaviorReason expected_reason,bool skip_manual_parent_filter=false)73   void ExpectURLCheckMatches(
74       const std::string& url,
75       SupervisedUserURLFilter::FilteringBehavior expected_behavior,
76       supervised_user_error_page::FilteringBehaviorReason expected_reason,
77       bool skip_manual_parent_filter = false) {
78     bool called_synchronously =
79         filter_.GetFilteringBehaviorForURLWithAsyncChecks(
80             GURL(url), base::DoNothing(), skip_manual_parent_filter);
81     ASSERT_TRUE(called_synchronously);
82 
83     EXPECT_EQ(behavior_, expected_behavior);
84     EXPECT_EQ(reason_, expected_reason);
85   }
86 };
87 
TEST_F(SupervisedUserURLFilterTest,Basic)88 TEST_F(SupervisedUserURLFilterTest, Basic) {
89   std::vector<std::string> list;
90   // Allow domain and all subdomains, for any filtered scheme.
91   list.push_back("google.com");
92   filter_.SetFromPatternsForTesting(list);
93   run_loop_.Run();
94 
95   EXPECT_TRUE(IsURLAllowlisted("http://google.com"));
96   EXPECT_TRUE(IsURLAllowlisted("http://google.com/"));
97   EXPECT_TRUE(IsURLAllowlisted("http://google.com/whatever"));
98   EXPECT_TRUE(IsURLAllowlisted("https://google.com/"));
99   EXPECT_FALSE(IsURLAllowlisted("http://notgoogle.com/"));
100   EXPECT_TRUE(IsURLAllowlisted("http://mail.google.com"));
101   EXPECT_TRUE(IsURLAllowlisted("http://x.mail.google.com"));
102   EXPECT_TRUE(IsURLAllowlisted("https://x.mail.google.com/"));
103   EXPECT_TRUE(IsURLAllowlisted("http://x.y.google.com/a/b"));
104   EXPECT_FALSE(IsURLAllowlisted("http://youtube.com/"));
105 
106   EXPECT_TRUE(IsURLAllowlisted("bogus://youtube.com/"));
107   EXPECT_TRUE(IsURLAllowlisted("chrome://youtube.com/"));
108   EXPECT_TRUE(IsURLAllowlisted("chrome://extensions/"));
109   EXPECT_TRUE(IsURLAllowlisted("chrome-extension://foo/main.html"));
110   EXPECT_TRUE(IsURLAllowlisted("file:///home/chronos/user/Downloads/img.jpg"));
111 }
112 
TEST_F(SupervisedUserURLFilterTest,EffectiveURL)113 TEST_F(SupervisedUserURLFilterTest, EffectiveURL) {
114   std::vector<std::string> list;
115   // Allow domain and all subdomains, for any filtered scheme.
116   list.push_back("example.com");
117   filter_.SetFromPatternsForTesting(list);
118   run_loop_.Run();
119 
120   ASSERT_TRUE(IsURLAllowlisted("http://example.com"));
121   ASSERT_TRUE(IsURLAllowlisted("https://example.com"));
122 
123   // AMP Cache URLs.
124   EXPECT_FALSE(IsURLAllowlisted("https://cdn.ampproject.org"));
125   EXPECT_TRUE(IsURLAllowlisted("https://cdn.ampproject.org/c/example.com"));
126   EXPECT_TRUE(IsURLAllowlisted("https://cdn.ampproject.org/c/www.example.com"));
127   EXPECT_TRUE(
128       IsURLAllowlisted("https://cdn.ampproject.org/c/example.com/path"));
129   EXPECT_TRUE(IsURLAllowlisted("https://cdn.ampproject.org/c/s/example.com"));
130   EXPECT_FALSE(IsURLAllowlisted("https://cdn.ampproject.org/c/other.com"));
131 
132   EXPECT_FALSE(IsURLAllowlisted("https://sub.cdn.ampproject.org"));
133   EXPECT_TRUE(IsURLAllowlisted("https://sub.cdn.ampproject.org/c/example.com"));
134   EXPECT_TRUE(
135       IsURLAllowlisted("https://sub.cdn.ampproject.org/c/www.example.com"));
136   EXPECT_TRUE(
137       IsURLAllowlisted("https://sub.cdn.ampproject.org/c/example.com/path"));
138   EXPECT_TRUE(
139       IsURLAllowlisted("https://sub.cdn.ampproject.org/c/s/example.com"));
140   EXPECT_FALSE(IsURLAllowlisted("https://sub.cdn.ampproject.org/c/other.com"));
141 
142   // Google AMP viewer URLs.
143   EXPECT_FALSE(IsURLAllowlisted("https://www.google.com"));
144   EXPECT_FALSE(IsURLAllowlisted("https://www.google.com/amp/"));
145   EXPECT_TRUE(IsURLAllowlisted("https://www.google.com/amp/example.com"));
146   EXPECT_TRUE(IsURLAllowlisted("https://www.google.com/amp/www.example.com"));
147   EXPECT_TRUE(IsURLAllowlisted("https://www.google.com/amp/s/example.com"));
148   EXPECT_TRUE(
149       IsURLAllowlisted("https://www.google.com/amp/s/example.com/path"));
150   EXPECT_FALSE(IsURLAllowlisted("https://www.google.com/amp/other.com"));
151 
152   // Google web cache URLs.
153   EXPECT_FALSE(IsURLAllowlisted("https://webcache.googleusercontent.com"));
154   EXPECT_FALSE(
155       IsURLAllowlisted("https://webcache.googleusercontent.com/search"));
156   EXPECT_FALSE(IsURLAllowlisted(
157       "https://webcache.googleusercontent.com/search?q=example.com"));
158   EXPECT_TRUE(IsURLAllowlisted(
159       "https://webcache.googleusercontent.com/search?q=cache:example.com"));
160   EXPECT_TRUE(
161       IsURLAllowlisted("https://webcache.googleusercontent.com/"
162                        "search?q=cache:example.com+search_query"));
163   EXPECT_TRUE(
164       IsURLAllowlisted("https://webcache.googleusercontent.com/"
165                        "search?q=cache:123456789-01:example.com+search_query"));
166   EXPECT_FALSE(IsURLAllowlisted(
167       "https://webcache.googleusercontent.com/search?q=cache:other.com"));
168   EXPECT_FALSE(
169       IsURLAllowlisted("https://webcache.googleusercontent.com/"
170                        "search?q=cache:other.com+example.com"));
171   EXPECT_FALSE(
172       IsURLAllowlisted("https://webcache.googleusercontent.com/"
173                        "search?q=cache:123456789-01:other.com+example.com"));
174 
175   // Google Translate URLs.
176   EXPECT_FALSE(IsURLAllowlisted("https://translate.google.com"));
177   EXPECT_FALSE(IsURLAllowlisted("https://translate.googleusercontent.com"));
178   EXPECT_TRUE(
179       IsURLAllowlisted("https://translate.google.com/translate?u=example.com"));
180   EXPECT_TRUE(IsURLAllowlisted(
181       "https://translate.googleusercontent.com/translate?u=example.com"));
182   EXPECT_TRUE(IsURLAllowlisted(
183       "https://translate.google.com/translate?u=www.example.com"));
184   EXPECT_TRUE(IsURLAllowlisted(
185       "https://translate.google.com/translate?u=https://example.com"));
186   EXPECT_FALSE(
187       IsURLAllowlisted("https://translate.google.com/translate?u=other.com"));
188 }
189 
TEST_F(SupervisedUserURLFilterTest,Inactive)190 TEST_F(SupervisedUserURLFilterTest, Inactive) {
191   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::ALLOW);
192 
193   std::vector<std::string> list;
194   list.push_back("google.com");
195   filter_.SetFromPatternsForTesting(list);
196   run_loop_.Run();
197 
198   // If the filter is inactive, every URL should be allowed.
199   EXPECT_TRUE(IsURLAllowlisted("http://google.com"));
200   EXPECT_TRUE(IsURLAllowlisted("https://www.example.com"));
201 }
202 
TEST_F(SupervisedUserURLFilterTest,Scheme)203 TEST_F(SupervisedUserURLFilterTest, Scheme) {
204   std::vector<std::string> list;
205   // Filter only http, ftp and ws schemes.
206   list.push_back("http://secure.com");
207   list.push_back("ftp://secure.com");
208   list.push_back("ws://secure.com");
209   filter_.SetFromPatternsForTesting(list);
210   run_loop_.Run();
211 
212   EXPECT_TRUE(IsURLAllowlisted("http://secure.com"));
213   EXPECT_TRUE(IsURLAllowlisted("http://secure.com/whatever"));
214   EXPECT_TRUE(IsURLAllowlisted("ftp://secure.com/"));
215   EXPECT_TRUE(IsURLAllowlisted("ws://secure.com"));
216   EXPECT_FALSE(IsURLAllowlisted("https://secure.com/"));
217   EXPECT_FALSE(IsURLAllowlisted("wss://secure.com"));
218   EXPECT_TRUE(IsURLAllowlisted("http://www.secure.com"));
219   EXPECT_FALSE(IsURLAllowlisted("https://www.secure.com"));
220   EXPECT_FALSE(IsURLAllowlisted("wss://www.secure.com"));
221 }
222 
TEST_F(SupervisedUserURLFilterTest,Path)223 TEST_F(SupervisedUserURLFilterTest, Path) {
224   std::vector<std::string> list;
225   // Filter only a certain path prefix.
226   list.push_back("path.to/ruin");
227   filter_.SetFromPatternsForTesting(list);
228   run_loop_.Run();
229 
230   EXPECT_TRUE(IsURLAllowlisted("http://path.to/ruin"));
231   EXPECT_TRUE(IsURLAllowlisted("https://path.to/ruin"));
232   EXPECT_TRUE(IsURLAllowlisted("http://path.to/ruins"));
233   EXPECT_TRUE(IsURLAllowlisted("http://path.to/ruin/signup"));
234   EXPECT_TRUE(IsURLAllowlisted("http://www.path.to/ruin"));
235   EXPECT_FALSE(IsURLAllowlisted("http://path.to/fortune"));
236 }
237 
TEST_F(SupervisedUserURLFilterTest,PathAndScheme)238 TEST_F(SupervisedUserURLFilterTest, PathAndScheme) {
239   std::vector<std::string> list;
240   // Filter only a certain path prefix and scheme.
241   list.push_back("https://s.aaa.com/path");
242   filter_.SetFromPatternsForTesting(list);
243   run_loop_.Run();
244 
245   EXPECT_TRUE(IsURLAllowlisted("https://s.aaa.com/path"));
246   EXPECT_TRUE(IsURLAllowlisted("https://s.aaa.com/path/bbb"));
247   EXPECT_FALSE(IsURLAllowlisted("http://s.aaa.com/path"));
248   EXPECT_FALSE(IsURLAllowlisted("https://aaa.com/path"));
249   EXPECT_FALSE(IsURLAllowlisted("https://x.aaa.com/path"));
250   EXPECT_FALSE(IsURLAllowlisted("https://s.aaa.com/bbb"));
251   EXPECT_FALSE(IsURLAllowlisted("https://s.aaa.com/"));
252 }
253 
TEST_F(SupervisedUserURLFilterTest,Host)254 TEST_F(SupervisedUserURLFilterTest, Host) {
255   std::vector<std::string> list;
256   // Filter only a certain hostname, without subdomains.
257   list.push_back(".www.example.com");
258   filter_.SetFromPatternsForTesting(list);
259   run_loop_.Run();
260 
261   EXPECT_TRUE(IsURLAllowlisted("http://www.example.com"));
262   EXPECT_FALSE(IsURLAllowlisted("http://example.com"));
263   EXPECT_FALSE(IsURLAllowlisted("http://subdomain.example.com"));
264 }
265 
TEST_F(SupervisedUserURLFilterTest,IPAddress)266 TEST_F(SupervisedUserURLFilterTest, IPAddress) {
267   std::vector<std::string> list;
268   // Filter an ip address.
269   list.push_back("123.123.123.123");
270   filter_.SetFromPatternsForTesting(list);
271   run_loop_.Run();
272 
273   EXPECT_TRUE(IsURLAllowlisted("http://123.123.123.123/"));
274   EXPECT_FALSE(IsURLAllowlisted("http://123.123.123.124/"));
275 }
276 
TEST_F(SupervisedUserURLFilterTest,Canonicalization)277 TEST_F(SupervisedUserURLFilterTest, Canonicalization) {
278   // We assume that the hosts and URLs are already canonicalized.
279   std::map<std::string, bool> hosts;
280   hosts["www.moose.org"] = true;
281   hosts["www.xn--n3h.net"] = true;
282   std::map<GURL, bool> urls;
283   urls[GURL("http://www.example.com/foo/")] = true;
284   urls[GURL("http://www.example.com/%C3%85t%C3%B8mstr%C3%B6m")] = true;
285   filter_.SetManualHosts(std::move(hosts));
286   filter_.SetManualURLs(std::move(urls));
287 
288   // Base cases.
289   EXPECT_TRUE(IsURLAllowlisted("http://www.example.com/foo/"));
290   EXPECT_TRUE(
291       IsURLAllowlisted("http://www.example.com/%C3%85t%C3%B8mstr%C3%B6m"));
292 
293   // Verify that non-URI characters are escaped.
294   EXPECT_TRUE(IsURLAllowlisted(
295       "http://www.example.com/\xc3\x85t\xc3\xb8mstr\xc3\xb6m"));
296 
297   // Verify that unnecessary URI escapes are unescaped.
298   EXPECT_TRUE(IsURLAllowlisted("http://www.example.com/%66%6F%6F/"));
299 
300   // Verify that the default port are removed.
301   EXPECT_TRUE(IsURLAllowlisted("http://www.example.com:80/foo/"));
302 
303   // Verify that scheme and hostname are lowercased.
304   EXPECT_TRUE(IsURLAllowlisted("htTp://wWw.eXamPle.com/foo/"));
305   EXPECT_TRUE(IsURLAllowlisted("HttP://WwW.mOOsE.orG/blurp/"));
306 
307   // Verify that UTF-8 in hostnames are converted to punycode.
308   EXPECT_TRUE(IsURLAllowlisted("http://www.\xe2\x98\x83\x0a.net/bla/"));
309 
310   // Verify that query and ref are stripped.
311   EXPECT_TRUE(IsURLAllowlisted("http://www.example.com/foo/?bar=baz#ref"));
312 }
313 
TEST_F(SupervisedUserURLFilterTest,HasFilteredScheme)314 TEST_F(SupervisedUserURLFilterTest, HasFilteredScheme) {
315   EXPECT_TRUE(
316       SupervisedUserURLFilter::HasFilteredScheme(GURL("http://example.com")));
317   EXPECT_TRUE(
318       SupervisedUserURLFilter::HasFilteredScheme(GURL("https://example.com")));
319   EXPECT_TRUE(
320       SupervisedUserURLFilter::HasFilteredScheme(GURL("ftp://example.com")));
321   EXPECT_TRUE(
322       SupervisedUserURLFilter::HasFilteredScheme(GURL("ws://example.com")));
323   EXPECT_TRUE(
324       SupervisedUserURLFilter::HasFilteredScheme(GURL("wss://example.com")));
325 
326   EXPECT_FALSE(
327       SupervisedUserURLFilter::HasFilteredScheme(GURL("file://example.com")));
328   EXPECT_FALSE(
329       SupervisedUserURLFilter::HasFilteredScheme(
330           GURL("filesystem://80cols.com")));
331   EXPECT_FALSE(
332       SupervisedUserURLFilter::HasFilteredScheme(GURL("chrome://example.com")));
333   EXPECT_FALSE(
334       SupervisedUserURLFilter::HasFilteredScheme(GURL("wtf://example.com")));
335   EXPECT_FALSE(
336       SupervisedUserURLFilter::HasFilteredScheme(GURL("gopher://example.com")));
337 }
338 
TEST_F(SupervisedUserURLFilterTest,HostMatchesPattern)339 TEST_F(SupervisedUserURLFilterTest, HostMatchesPattern) {
340   EXPECT_TRUE(SupervisedUserURLFilter::HostMatchesPattern("www.google.com",
341                                                           "google.com"));
342   EXPECT_TRUE(
343       SupervisedUserURLFilter::HostMatchesPattern("www.google.com",
344                                                   "*.google.com"));
345   EXPECT_TRUE(
346       SupervisedUserURLFilter::HostMatchesPattern("google.com",
347                                                   "*.google.com"));
348   EXPECT_TRUE(
349       SupervisedUserURLFilter::HostMatchesPattern("accounts.google.com",
350                                                   "*.google.com"));
351   EXPECT_FALSE(
352       SupervisedUserURLFilter::HostMatchesPattern("www.google.de",
353                                                   "*.google.com"));
354   EXPECT_FALSE(
355       SupervisedUserURLFilter::HostMatchesPattern("notgoogle.com",
356                                                   "*.google.com"));
357 
358 
359   EXPECT_TRUE(
360       SupervisedUserURLFilter::HostMatchesPattern("www.google.com",
361                                                   "www.google.*"));
362   EXPECT_TRUE(
363       SupervisedUserURLFilter::HostMatchesPattern("www.google.de",
364                                                   "www.google.*"));
365   EXPECT_TRUE(
366       SupervisedUserURLFilter::HostMatchesPattern("www.google.co.uk",
367                                                   "www.google.*"));
368   EXPECT_FALSE(
369       SupervisedUserURLFilter::HostMatchesPattern("www.google.blogspot.com",
370                                                   "www.google.*"));
371   EXPECT_FALSE(
372       SupervisedUserURLFilter::HostMatchesPattern("www.google",
373                                                   "www.google.*"));
374   EXPECT_FALSE(
375       SupervisedUserURLFilter::HostMatchesPattern("google.com",
376                                                   "www.google.*"));
377   EXPECT_FALSE(
378       SupervisedUserURLFilter::HostMatchesPattern("mail.google.com",
379                                                   "www.google.*"));
380   EXPECT_FALSE(
381       SupervisedUserURLFilter::HostMatchesPattern("www.googleplex.com",
382                                                   "www.google.*"));
383   EXPECT_FALSE(
384       SupervisedUserURLFilter::HostMatchesPattern("www.googleco.uk",
385                                                   "www.google.*"));
386 
387 
388   EXPECT_TRUE(
389       SupervisedUserURLFilter::HostMatchesPattern("www.google.com",
390                                                   "*.google.*"));
391   EXPECT_TRUE(
392       SupervisedUserURLFilter::HostMatchesPattern("google.com",
393                                                   "*.google.*"));
394   EXPECT_TRUE(
395       SupervisedUserURLFilter::HostMatchesPattern("accounts.google.com",
396                                                   "*.google.*"));
397   EXPECT_TRUE(
398       SupervisedUserURLFilter::HostMatchesPattern("mail.google.com",
399                                                   "*.google.*"));
400   EXPECT_TRUE(
401       SupervisedUserURLFilter::HostMatchesPattern("www.google.de",
402                                                   "*.google.*"));
403   EXPECT_TRUE(
404       SupervisedUserURLFilter::HostMatchesPattern("google.de",
405                                                   "*.google.*"));
406   EXPECT_FALSE(
407       SupervisedUserURLFilter::HostMatchesPattern("google.blogspot.com",
408                                                   "*.google.*"));
409   EXPECT_FALSE(
410       SupervisedUserURLFilter::HostMatchesPattern("google", "*.google.*"));
411   EXPECT_FALSE(
412       SupervisedUserURLFilter::HostMatchesPattern("notgoogle.com",
413                                                   "*.google.*"));
414   EXPECT_FALSE(
415       SupervisedUserURLFilter::HostMatchesPattern("www.googleplex.com",
416                                                   "*.google.*"));
417 
418   // Now test a few invalid patterns. They should never match.
419   EXPECT_FALSE(
420       SupervisedUserURLFilter::HostMatchesPattern("www.google.com", ""));
421   EXPECT_FALSE(
422       SupervisedUserURLFilter::HostMatchesPattern("www.google.com", "."));
423   EXPECT_FALSE(
424       SupervisedUserURLFilter::HostMatchesPattern("www.google.com", "*"));
425   EXPECT_FALSE(
426       SupervisedUserURLFilter::HostMatchesPattern("www.google.com", ".*"));
427   EXPECT_FALSE(
428       SupervisedUserURLFilter::HostMatchesPattern("www.google.com", "*."));
429   EXPECT_FALSE(
430       SupervisedUserURLFilter::HostMatchesPattern("www.google.com", "*.*"));
431   EXPECT_FALSE(
432       SupervisedUserURLFilter::HostMatchesPattern("www.google..com", "*..*"));
433   EXPECT_FALSE(
434       SupervisedUserURLFilter::HostMatchesPattern("www.google.com", "*.*.com"));
435   EXPECT_FALSE(
436       SupervisedUserURLFilter::HostMatchesPattern("www.google.com", "www.*.*"));
437   EXPECT_FALSE(
438       SupervisedUserURLFilter::HostMatchesPattern("www.google.com",
439                                                   "*.goo.*le.*"));
440   EXPECT_FALSE(
441       SupervisedUserURLFilter::HostMatchesPattern("www.google.com",
442                                                   "*google*"));
443   EXPECT_FALSE(
444       SupervisedUserURLFilter::HostMatchesPattern("www.google.com",
445                                                   "www.*.google.com"));
446 }
447 
TEST_F(SupervisedUserURLFilterTest,PatternsWithoutConflicts)448 TEST_F(SupervisedUserURLFilterTest, PatternsWithoutConflicts) {
449   std::map<std::string, bool> hosts;
450 
451   // The third rule is redundant with the first, but it's not a conflict
452   // since they have the same value (allow).
453   hosts["*.google.com"] = true;
454   hosts["accounts.google.com"] = false;
455   hosts["mail.google.com"] = true;
456 
457   filter_.SetManualHosts(std::move(hosts));
458   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::BLOCK);
459 
460   EXPECT_TRUE(IsURLAllowlisted("http://www.google.com/foo/"));
461   EXPECT_FALSE(IsURLAllowlisted("http://accounts.google.com/bar/"));
462   EXPECT_TRUE(IsURLAllowlisted("http://mail.google.com/moose/"));
463   EXPECT_FALSE(IsURLAllowlisted("http://www.google.co.uk/blurp/"));
464 
465   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::ALLOW);
466 
467   EXPECT_TRUE(IsURLAllowlisted("http://www.google.com/foo/"));
468   EXPECT_FALSE(IsURLAllowlisted("http://accounts.google.com/bar/"));
469   EXPECT_TRUE(IsURLAllowlisted("http://mail.google.com/moose/"));
470   EXPECT_TRUE(IsURLAllowlisted("http://www.google.co.uk/blurp/"));
471 }
472 
TEST_F(SupervisedUserURLFilterTest,PatternsWithConflicts)473 TEST_F(SupervisedUserURLFilterTest, PatternsWithConflicts) {
474   std::map<std::string, bool> hosts;
475 
476   // The fourth rule conflicts with the first for "www.google.com" host.
477   // Blocking then takes precedence.
478   hosts["*.google.com"] = true;
479   hosts["accounts.google.com"] = false;
480   hosts["mail.google.com"] = true;
481   hosts["www.google.*"] = false;
482 
483   filter_.SetManualHosts(std::move(hosts));
484   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::BLOCK);
485 
486   EXPECT_FALSE(IsURLAllowlisted("http://www.google.com/foo/"));
487   EXPECT_FALSE(IsURLAllowlisted("http://accounts.google.com/bar/"));
488   EXPECT_TRUE(IsURLAllowlisted("http://mail.google.com/moose/"));
489   EXPECT_FALSE(IsURLAllowlisted("http://www.google.co.uk/blurp/"));
490 
491   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::ALLOW);
492 
493   EXPECT_FALSE(IsURLAllowlisted("http://www.google.com/foo/"));
494   EXPECT_FALSE(IsURLAllowlisted("http://accounts.google.com/bar/"));
495   EXPECT_TRUE(IsURLAllowlisted("http://mail.google.com/moose/"));
496   EXPECT_FALSE(IsURLAllowlisted("http://www.google.co.uk/blurp/"));
497 }
498 
TEST_F(SupervisedUserURLFilterTest,Reason)499 TEST_F(SupervisedUserURLFilterTest, Reason) {
500   std::map<std::string, bool> hosts;
501   std::map<GURL, bool> urls;
502   hosts["youtube.com"] = true;
503   hosts["*.google.*"] = true;
504   urls[GURL("https://youtube.com/robots.txt")] = false;
505   urls[GURL("https://google.co.uk/robots.txt")] = false;
506 
507   filter_.SetManualHosts(std::move(hosts));
508   filter_.SetManualURLs(std::move(urls));
509 
510   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::BLOCK);
511 
512   ExpectURLInDefaultDenylist("https://m.youtube.com/feed/trending");
513   ExpectURLInDefaultDenylist("https://com.google");
514   ExpectURLInManualAllowlist("https://youtube.com/feed/trending");
515   ExpectURLInManualAllowlist("https://google.com/humans.txt");
516   ExpectURLInManualDenylist("https://youtube.com/robots.txt");
517   ExpectURLInManualDenylist("https://google.co.uk/robots.txt");
518 
519   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::ALLOW);
520 
521   ExpectURLInDefaultAllowlist("https://m.youtube.com/feed/trending");
522   ExpectURLInDefaultAllowlist("https://com.google");
523   ExpectURLInManualAllowlist("https://youtube.com/feed/trending");
524   ExpectURLInManualAllowlist("https://google.com/humans.txt");
525   ExpectURLInManualDenylist("https://youtube.com/robots.txt");
526   ExpectURLInManualDenylist("https://google.co.uk/robots.txt");
527 }
528 
TEST_F(SupervisedUserURLFilterTest,AllowlistsPatterns)529 TEST_F(SupervisedUserURLFilterTest, AllowlistsPatterns) {
530   std::vector<std::string> patterns1;
531   patterns1.push_back("google.com");
532   patterns1.push_back("example.com");
533 
534   std::vector<std::string> patterns2;
535   patterns2.push_back("secure.com");
536   patterns2.push_back("example.com");
537 
538   const std::string id1 = "ID1";
539   const std::string id2 = "ID2";
540   const base::string16 title1 = base::ASCIIToUTF16("Title 1");
541   const base::string16 title2 = base::ASCIIToUTF16("Title 2");
542   const std::vector<std::string> hostname_hashes;
543   const GURL entry_point("https://entry.com");
544 
545   scoped_refptr<SupervisedUserSiteList> site_list1 = base::WrapRefCounted(
546       new SupervisedUserSiteList(id1, title1, entry_point, base::FilePath(),
547                                  patterns1, hostname_hashes));
548   scoped_refptr<SupervisedUserSiteList> site_list2 = base::WrapRefCounted(
549       new SupervisedUserSiteList(id2, title2, entry_point, base::FilePath(),
550                                  patterns2, hostname_hashes));
551 
552   std::vector<scoped_refptr<SupervisedUserSiteList>> site_lists;
553   site_lists.push_back(site_list1);
554   site_lists.push_back(site_list2);
555 
556   filter_.SetFromSiteListsForTesting(site_lists);
557   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::BLOCK);
558   run_loop_.Run();
559 
560   std::map<std::string, base::string16> expected_allowlists;
561   expected_allowlists[id1] = title1;
562   expected_allowlists[id2] = title2;
563 
564   std::map<std::string, base::string16> actual_allowlists =
565       filter_.GetMatchingAllowlistTitles(GURL("https://example.com"));
566   ASSERT_EQ(expected_allowlists, actual_allowlists);
567 
568   expected_allowlists.erase(id2);
569 
570   actual_allowlists =
571       filter_.GetMatchingAllowlistTitles(GURL("https://google.com"));
572   ASSERT_EQ(expected_allowlists, actual_allowlists);
573 }
574 
TEST_F(SupervisedUserURLFilterTest,AllowlistsHostnameHashes)575 TEST_F(SupervisedUserURLFilterTest, AllowlistsHostnameHashes) {
576   std::vector<std::string> patterns1;
577   patterns1.push_back("google.com");
578   patterns1.push_back("example.com");
579 
580   std::vector<std::string> patterns2;
581   patterns2.push_back("secure.com");
582   patterns2.push_back("example.com");
583 
584   std::vector<std::string> patterns3;
585 
586   std::vector<std::string> hostname_hashes1;
587   std::vector<std::string> hostname_hashes2;
588   std::vector<std::string> hostname_hashes3;
589   // example.com
590   hostname_hashes3.push_back("0caaf24ab1a0c33440c06afe99df986365b0781f");
591   // secure.com
592   hostname_hashes3.push_back("529597fa818be828ffc7b59763fb2b185f419fc5");
593 
594   const std::string id1 = "ID1";
595   const std::string id2 = "ID2";
596   const std::string id3 = "ID3";
597   const base::string16 title1 = base::ASCIIToUTF16("Title 1");
598   const base::string16 title2 = base::ASCIIToUTF16("Title 2");
599   const base::string16 title3 = base::ASCIIToUTF16("Title 3");
600   const GURL entry_point("https://entry.com");
601 
602   scoped_refptr<SupervisedUserSiteList> site_list1 = base::WrapRefCounted(
603       new SupervisedUserSiteList(id1, title1, entry_point, base::FilePath(),
604                                  patterns1, hostname_hashes1));
605   scoped_refptr<SupervisedUserSiteList> site_list2 = base::WrapRefCounted(
606       new SupervisedUserSiteList(id2, title2, entry_point, base::FilePath(),
607                                  patterns2, hostname_hashes2));
608   scoped_refptr<SupervisedUserSiteList> site_list3 = base::WrapRefCounted(
609       new SupervisedUserSiteList(id3, title3, entry_point, base::FilePath(),
610                                  patterns3, hostname_hashes3));
611 
612   std::vector<scoped_refptr<SupervisedUserSiteList>> site_lists;
613   site_lists.push_back(site_list1);
614   site_lists.push_back(site_list2);
615   site_lists.push_back(site_list3);
616 
617   filter_.SetFromSiteListsForTesting(site_lists);
618   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::BLOCK);
619   run_loop_.Run();
620 
621   std::map<std::string, base::string16> expected_allowlists;
622   expected_allowlists[id1] = title1;
623   expected_allowlists[id2] = title2;
624   expected_allowlists[id3] = title3;
625 
626   std::map<std::string, base::string16> actual_allowlists =
627       filter_.GetMatchingAllowlistTitles(GURL("http://example.com"));
628   ASSERT_EQ(expected_allowlists, actual_allowlists);
629 
630   expected_allowlists.erase(id1);
631 
632   actual_allowlists =
633       filter_.GetMatchingAllowlistTitles(GURL("https://secure.com"));
634   ASSERT_EQ(expected_allowlists, actual_allowlists);
635 }
636 
637 #if BUILDFLAG(ENABLE_EXTENSIONS)
TEST_F(SupervisedUserURLFilterTest,ChromeWebstoreDownloadsAreAlwaysAllowed)638 TEST_F(SupervisedUserURLFilterTest, ChromeWebstoreDownloadsAreAlwaysAllowed) {
639   // When installing an extension from Chrome Webstore, it tries to download the
640   // crx file from "https://clients2.google.com/service/update2/", which
641   // redirects to "https://clients2.googleusercontent.com/crx/blobs/"
642   // or "https://chrome.google.com/webstore/download/".
643   // All URLs should be allowed regardless from the default filtering
644   // behavior.
645   GURL crx_download_url1(
646       "https://clients2.google.com/service/update2/"
647       "crx?response=redirect&os=linux&arch=x64&nacl_arch=x86-64&prod="
648       "chromiumcrx&prodchannel=&prodversion=55.0.2882.0&lang=en-US&x=id%"
649       "3Dciniambnphakdoflgeamacamhfllbkmo%26installsource%3Dondemand%26uc");
650   GURL crx_download_url2(
651       "https://clients2.googleusercontent.com/crx/blobs/"
652       "QgAAAC6zw0qH2DJtnXe8Z7rUJP1iCQF099oik9f2ErAYeFAX7_"
653       "CIyrNH5qBru1lUSBNvzmjILCGwUjcIBaJqxgegSNy2melYqfodngLxKtHsGBehAMZSmuWSg6"
654       "FupAcPS3Ih6NSVCOB9KNh6Mw/extension_2_0.crx");
655   GURL crx_download_url3(
656       "https://chrome.google.com/webstore/download/"
657       "QgAAAC6zw0qH2DJtnXe8Z7rUJP1iCQF099oik9f2ErAYeFAX7_"
658       "CIyrNH5qBru1lUSBNvzmjILCGwUjcIBaJqxgegSNy2melYqfodngLxKtHsGBehAMZSmuWSg6"
659       "FupAcPS3Ih6NSVCOB9KNh6Mw/extension_2_0.crx");
660 
661   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::BLOCK);
662   EXPECT_EQ(SupervisedUserURLFilter::ALLOW,
663             filter_.GetFilteringBehaviorForURL(crx_download_url1));
664   EXPECT_EQ(SupervisedUserURLFilter::ALLOW,
665             filter_.GetFilteringBehaviorForURL(crx_download_url2));
666   EXPECT_EQ(SupervisedUserURLFilter::ALLOW,
667             filter_.GetFilteringBehaviorForURL(crx_download_url3));
668 
669   // Set explicit host rules to block those website, and make sure the
670   // update URLs still work.
671   std::map<std::string, bool> hosts;
672   hosts["clients2.google.com"] = false;
673   hosts["clients2.googleusercontent.com"] = false;
674   filter_.SetManualHosts(std::move(hosts));
675   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::ALLOW);
676   EXPECT_EQ(SupervisedUserURLFilter::ALLOW,
677             filter_.GetFilteringBehaviorForURL(crx_download_url1));
678   EXPECT_EQ(SupervisedUserURLFilter::ALLOW,
679             filter_.GetFilteringBehaviorForURL(crx_download_url2));
680   EXPECT_EQ(SupervisedUserURLFilter::ALLOW,
681             filter_.GetFilteringBehaviorForURL(crx_download_url3));
682 }
683 #endif
684 
TEST_F(SupervisedUserURLFilterTest,GoogleFamiliesAlwaysAllowed)685 TEST_F(SupervisedUserURLFilterTest, GoogleFamiliesAlwaysAllowed) {
686   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::BLOCK);
687   EXPECT_TRUE(IsURLAllowlisted("https://families.google.com/"));
688   EXPECT_TRUE(IsURLAllowlisted("https://families.google.com"));
689   EXPECT_TRUE(IsURLAllowlisted("https://families.google.com/something"));
690   EXPECT_TRUE(IsURLAllowlisted("http://families.google.com/"));
691   EXPECT_FALSE(IsURLAllowlisted("https://families.google.com:8080/"));
692   EXPECT_FALSE(IsURLAllowlisted("https://subdomain.families.google.com/"));
693 }
694 
TEST_F(SupervisedUserURLFilterTest,PlayTermsAlwaysAllowed)695 TEST_F(SupervisedUserURLFilterTest, PlayTermsAlwaysAllowed) {
696   filter_.SetDefaultFilteringBehavior(SupervisedUserURLFilter::BLOCK);
697   EXPECT_TRUE(IsURLAllowlisted("https://play.google.com/about/play-terms"));
698   EXPECT_TRUE(IsURLAllowlisted("https://play.google.com/about/play-terms/"));
699   EXPECT_TRUE(IsURLAllowlisted(
700       "https://play.google.com/intl/pt-BR_pt/about/play-terms/"));
701   EXPECT_TRUE(
702       IsURLAllowlisted("https://play.google.com/about/play-terms/index.html"));
703   EXPECT_FALSE(IsURLAllowlisted("http://play.google.com/about/play-terms/"));
704   EXPECT_FALSE(
705       IsURLAllowlisted("https://subdomain.play.google.com/about/play-terms/"));
706   EXPECT_FALSE(IsURLAllowlisted("https://play.google.com/"));
707   EXPECT_FALSE(IsURLAllowlisted("https://play.google.com/about"));
708 }
709