1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/safe_search_api/url_checker.h"
6
7 #include <string>
8 #include <utility>
9 #include <vector>
10
11 #include "base/bind.h"
12 #include "base/callback.h"
13 #include "base/feature_list.h"
14 #include "base/json/json_reader.h"
15 #include "base/logging.h"
16 #include "base/metrics/histogram_macros.h"
17 #include "base/stl_util.h"
18 #include "base/strings/string_piece.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/stringprintf.h"
21 #include "base/time/time.h"
22 #include "base/values.h"
23 #include "components/google/core/common/google_util.h"
24
25 namespace safe_search_api {
26
27 namespace {
28
29 const size_t kDefaultCacheSize = 1000;
30 const size_t kDefaultCacheTimeoutSeconds = 3600;
31
32 } // namespace
33
34 // Consider all URLs within a google domain to be safe.
35 const base::Feature kAllowAllGoogleUrls{"SafeSearchAllowAllGoogleURLs",
36 base::FEATURE_DISABLED_BY_DEFAULT};
37
38 struct URLChecker::Check {
39 Check(const GURL& url, CheckCallback callback);
40 ~Check();
41
42 GURL url;
43 std::vector<CheckCallback> callbacks;
44 };
45
Check(const GURL & url,CheckCallback callback)46 URLChecker::Check::Check(const GURL& url, CheckCallback callback) : url(url) {
47 callbacks.push_back(std::move(callback));
48 }
49
~Check()50 URLChecker::Check::~Check() {
51 for (const CheckCallback& callback : callbacks) {
52 DCHECK(!callback);
53 }
54 }
55
CheckResult(Classification classification,bool uncertain)56 URLChecker::CheckResult::CheckResult(Classification classification,
57 bool uncertain)
58 : classification(classification),
59 uncertain(uncertain),
60 timestamp(base::TimeTicks::Now()) {}
61
URLChecker(std::unique_ptr<URLCheckerClient> async_checker)62 URLChecker::URLChecker(std::unique_ptr<URLCheckerClient> async_checker)
63 : URLChecker(std::move(async_checker), kDefaultCacheSize) {}
64
URLChecker(std::unique_ptr<URLCheckerClient> async_checker,size_t cache_size)65 URLChecker::URLChecker(std::unique_ptr<URLCheckerClient> async_checker,
66 size_t cache_size)
67 : async_checker_(std::move(async_checker)),
68 cache_(cache_size),
69 cache_timeout_(
70 base::TimeDelta::FromSeconds(kDefaultCacheTimeoutSeconds)) {}
71
72 URLChecker::~URLChecker() = default;
73
CheckURL(const GURL & url,CheckCallback callback)74 bool URLChecker::CheckURL(const GURL& url, CheckCallback callback) {
75 if (base::FeatureList::IsEnabled(kAllowAllGoogleUrls)) {
76 // Hack: For now, allow all Google URLs to save QPS.
77 if (google_util::IsGoogleDomainUrl(url, google_util::ALLOW_SUBDOMAIN,
78 google_util::ALLOW_NON_STANDARD_PORTS)) {
79 std::move(callback).Run(url, Classification::SAFE, false);
80 return true;
81 }
82 // Hack: For now, allow all YouTube URLs since YouTube has its own Safety
83 // Mode anyway.
84 if (google_util::IsYoutubeDomainUrl(
85 url, google_util::ALLOW_SUBDOMAIN,
86 google_util::ALLOW_NON_STANDARD_PORTS)) {
87 std::move(callback).Run(url, Classification::SAFE, false);
88 return true;
89 }
90 }
91
92 auto cache_it = cache_.Get(url);
93 if (cache_it != cache_.end()) {
94 const CheckResult& result = cache_it->second;
95 base::TimeDelta age = base::TimeTicks::Now() - result.timestamp;
96 if (age < cache_timeout_) {
97 DVLOG(1) << "Cache hit! " << url.spec() << " is "
98 << (result.classification == Classification::UNSAFE ? "NOT" : "")
99 << " safe; certain: " << !result.uncertain;
100 std::move(callback).Run(url, result.classification, result.uncertain);
101 return true;
102 }
103 DVLOG(1) << "Outdated cache entry for " << url.spec() << ", purging";
104 cache_.Erase(cache_it);
105 }
106
107 // See if we already have a check in progress for this URL.
108 for (const auto& check : checks_in_progress_) {
109 if (check->url == url) {
110 DVLOG(1) << "Adding to pending check for " << url.spec();
111 check->callbacks.push_back(std::move(callback));
112 return false;
113 }
114 }
115
116 auto it = checks_in_progress_.insert(
117 checks_in_progress_.begin(),
118 std::make_unique<Check>(url, std::move(callback)));
119 async_checker_->CheckURL(url,
120 base::BindOnce(&URLChecker::OnAsyncCheckComplete,
121 base::Unretained(this), it));
122
123 return false;
124 }
125
OnAsyncCheckComplete(CheckList::iterator it,const GURL & url,ClientClassification api_classification)126 void URLChecker::OnAsyncCheckComplete(CheckList::iterator it,
127 const GURL& url,
128 ClientClassification api_classification) {
129 bool uncertain = api_classification == ClientClassification::kUnknown;
130
131 // Fallback to a |SAFE| classification when the result is not explicitly
132 // marked as restricted.
133 Classification classification = Classification::SAFE;
134 if (api_classification == ClientClassification::kRestricted) {
135 classification = Classification::UNSAFE;
136 }
137
138 std::vector<CheckCallback> callbacks = std::move(it->get()->callbacks);
139 checks_in_progress_.erase(it);
140
141 cache_.Put(url, CheckResult(classification, uncertain));
142
143 for (size_t i = 0; i < callbacks.size(); i++)
144 std::move(callbacks[i]).Run(url, classification, uncertain);
145 }
146
147 } // namespace safe_search_api
148