1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/safe_search_api/url_checker.h"
6 
7 #include <string>
8 #include <utility>
9 #include <vector>
10 
11 #include "base/bind.h"
12 #include "base/callback.h"
13 #include "base/feature_list.h"
14 #include "base/json/json_reader.h"
15 #include "base/logging.h"
16 #include "base/metrics/histogram_macros.h"
17 #include "base/stl_util.h"
18 #include "base/strings/string_piece.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/stringprintf.h"
21 #include "base/time/time.h"
22 #include "base/values.h"
23 #include "components/google/core/common/google_util.h"
24 
25 namespace safe_search_api {
26 
27 namespace {
28 
29 const size_t kDefaultCacheSize = 1000;
30 const size_t kDefaultCacheTimeoutSeconds = 3600;
31 
32 }  // namespace
33 
34 // Consider all URLs within a google domain to be safe.
35 const base::Feature kAllowAllGoogleUrls{"SafeSearchAllowAllGoogleURLs",
36                                         base::FEATURE_DISABLED_BY_DEFAULT};
37 
38 struct URLChecker::Check {
39   Check(const GURL& url, CheckCallback callback);
40   ~Check();
41 
42   GURL url;
43   std::vector<CheckCallback> callbacks;
44 };
45 
Check(const GURL & url,CheckCallback callback)46 URLChecker::Check::Check(const GURL& url, CheckCallback callback) : url(url) {
47   callbacks.push_back(std::move(callback));
48 }
49 
~Check()50 URLChecker::Check::~Check() {
51   for (const CheckCallback& callback : callbacks) {
52     DCHECK(!callback);
53   }
54 }
55 
CheckResult(Classification classification,bool uncertain)56 URLChecker::CheckResult::CheckResult(Classification classification,
57                                      bool uncertain)
58     : classification(classification),
59       uncertain(uncertain),
60       timestamp(base::TimeTicks::Now()) {}
61 
URLChecker(std::unique_ptr<URLCheckerClient> async_checker)62 URLChecker::URLChecker(std::unique_ptr<URLCheckerClient> async_checker)
63     : URLChecker(std::move(async_checker), kDefaultCacheSize) {}
64 
URLChecker(std::unique_ptr<URLCheckerClient> async_checker,size_t cache_size)65 URLChecker::URLChecker(std::unique_ptr<URLCheckerClient> async_checker,
66                        size_t cache_size)
67     : async_checker_(std::move(async_checker)),
68       cache_(cache_size),
69       cache_timeout_(
70           base::TimeDelta::FromSeconds(kDefaultCacheTimeoutSeconds)) {}
71 
72 URLChecker::~URLChecker() = default;
73 
CheckURL(const GURL & url,CheckCallback callback)74 bool URLChecker::CheckURL(const GURL& url, CheckCallback callback) {
75   if (base::FeatureList::IsEnabled(kAllowAllGoogleUrls)) {
76     // Hack: For now, allow all Google URLs to save QPS.
77     if (google_util::IsGoogleDomainUrl(url, google_util::ALLOW_SUBDOMAIN,
78                                        google_util::ALLOW_NON_STANDARD_PORTS)) {
79       std::move(callback).Run(url, Classification::SAFE, false);
80       return true;
81     }
82     // Hack: For now, allow all YouTube URLs since YouTube has its own Safety
83     // Mode anyway.
84     if (google_util::IsYoutubeDomainUrl(
85             url, google_util::ALLOW_SUBDOMAIN,
86             google_util::ALLOW_NON_STANDARD_PORTS)) {
87       std::move(callback).Run(url, Classification::SAFE, false);
88       return true;
89     }
90   }
91 
92   auto cache_it = cache_.Get(url);
93   if (cache_it != cache_.end()) {
94     const CheckResult& result = cache_it->second;
95     base::TimeDelta age = base::TimeTicks::Now() - result.timestamp;
96     if (age < cache_timeout_) {
97       DVLOG(1) << "Cache hit! " << url.spec() << " is "
98                << (result.classification == Classification::UNSAFE ? "NOT" : "")
99                << " safe; certain: " << !result.uncertain;
100       std::move(callback).Run(url, result.classification, result.uncertain);
101       return true;
102     }
103     DVLOG(1) << "Outdated cache entry for " << url.spec() << ", purging";
104     cache_.Erase(cache_it);
105   }
106 
107   // See if we already have a check in progress for this URL.
108   for (const auto& check : checks_in_progress_) {
109     if (check->url == url) {
110       DVLOG(1) << "Adding to pending check for " << url.spec();
111       check->callbacks.push_back(std::move(callback));
112       return false;
113     }
114   }
115 
116   auto it = checks_in_progress_.insert(
117       checks_in_progress_.begin(),
118       std::make_unique<Check>(url, std::move(callback)));
119   async_checker_->CheckURL(url,
120                            base::BindOnce(&URLChecker::OnAsyncCheckComplete,
121                                           base::Unretained(this), it));
122 
123   return false;
124 }
125 
OnAsyncCheckComplete(CheckList::iterator it,const GURL & url,ClientClassification api_classification)126 void URLChecker::OnAsyncCheckComplete(CheckList::iterator it,
127                                       const GURL& url,
128                                       ClientClassification api_classification) {
129   bool uncertain = api_classification == ClientClassification::kUnknown;
130 
131   // Fallback to a |SAFE| classification when the result is not explicitly
132   // marked as restricted.
133   Classification classification = Classification::SAFE;
134   if (api_classification == ClientClassification::kRestricted) {
135     classification = Classification::UNSAFE;
136   }
137 
138   std::vector<CheckCallback> callbacks = std::move(it->get()->callbacks);
139   checks_in_progress_.erase(it);
140 
141   cache_.Put(url, CheckResult(classification, uncertain));
142 
143   for (size_t i = 0; i < callbacks.size(); i++)
144     std::move(callbacks[i]).Run(url, classification, uncertain);
145 }
146 
147 }  // namespace safe_search_api
148