1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/safe_browsing/core/verdict_cache_manager.h"
6 
7 #include "base/base64.h"
8 #include "base/command_line.h"
9 #include "base/metrics/histogram_functions.h"
10 #include "base/metrics/histogram_macros.h"
11 #include "base/optional.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "base/task/post_task.h"
15 #include "base/time/time.h"
16 #include "components/history/core/browser/history_service_observer.h"
17 #include "components/safe_browsing/core/common/thread_utils.h"
18 #include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
19 #include "components/safe_browsing/core/proto/csd.pb.h"
20 
21 namespace safe_browsing {
22 
23 namespace {
24 
25 // Keys for storing password protection verdict into a DictionaryValue.
26 const char kCacheCreationTime[] = "cache_creation_time";
27 const char kVerdictProto[] = "verdict_proto";
28 const char kRealTimeThreatInfoProto[] = "rt_threat_info_proto";
29 const char kPasswordOnFocusCacheKey[] = "password_on_focus_cache_key";
30 const char kRealTimeUrlCacheKey[] = "real_time_url_cache_key";
31 
32 // Command-line flag for caching an artificial unsafe verdict.
33 const char kUnsafeUrlFlag[] = "mark_as_real_time_phishing";
34 
35 // The maximum number of entries to be removed in a single cleanup. Removing too
36 // many entries all at once could cause jank.
37 const int kMaxRemovedEntriesCount = 1000;
38 
39 // The interval between the construction and the first cleanup is performed.
40 const int kCleanUpIntervalInitSecond = 120;
41 
42 // The interval between every cleanup task.
43 const int kCleanUpIntervalSecond = 1800;
44 
45 // A helper class to include all match params. It is used as a centralized
46 // place to determine if the current cache entry should be considered as a
47 // match.
48 struct MatchParams {
MatchParamssafe_browsing::__anon89435efc0111::MatchParams49   MatchParams()
50       : is_exact_host(false),
51         is_exact_path(false),
52         is_only_exact_match_allowed(true) {}
53 
ShouldMatchsafe_browsing::__anon89435efc0111::MatchParams54   bool ShouldMatch() {
55     return !is_only_exact_match_allowed || (is_exact_host && is_exact_path);
56   }
57   // Indicates whether the current cache entry and the url have the same host.
58   bool is_exact_host;
59   // Indicates whether the current cache entry and the url have the same path.
60   bool is_exact_path;
61   // Indicates whether the current cache entry is only applicable for exact
62   // match.
63   bool is_only_exact_match_allowed;
64 };
65 
66 // Given a URL of either http or https scheme, return its http://hostname.
67 // e.g., "https://www.foo.com:80/bar/test.cgi" -> "http://www.foo.com".
GetHostNameWithHTTPScheme(const GURL & url)68 GURL GetHostNameWithHTTPScheme(const GURL& url) {
69   DCHECK(url.SchemeIsHTTPOrHTTPS());
70   std::string result(url::kHttpScheme);
71   result.append(url::kStandardSchemeSeparator).append(url.host());
72   return GURL(result);
73 }
74 // e.g, ("www.foo.com", "/bar/test.cgi") -> "http://www.foo.com/bar/test/cgi"
GetUrlWithHostAndPath(const std::string & host,const std::string & path)75 GURL GetUrlWithHostAndPath(const std::string& host, const std::string& path) {
76   std::string result(url::kHttpScheme);
77   result.append(url::kStandardSchemeSeparator).append(host).append(path);
78   return GURL(result);
79 }
80 
81 // e.g, "www.foo.com/bar/test/cgi" -> "http://www.foo.com"
GetHostNameFromCacheExpression(const std::string & cache_expression)82 GURL GetHostNameFromCacheExpression(const std::string& cache_expression) {
83   std::string cache_expression_url(url::kHttpScheme);
84   cache_expression_url.append(url::kStandardSchemeSeparator)
85       .append(cache_expression);
86   return GetHostNameWithHTTPScheme(GURL(cache_expression_url));
87 }
88 
89 // Convert a Proto object into a DictionaryValue.
90 template <class T>
CreateDictionaryFromVerdict(const T & verdict,const base::Time & receive_time,const char * proto_name)91 std::unique_ptr<base::DictionaryValue> CreateDictionaryFromVerdict(
92     const T& verdict,
93     const base::Time& receive_time,
94     const char* proto_name) {
95   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
96   std::unique_ptr<base::DictionaryValue> result =
97       std::make_unique<base::DictionaryValue>();
98   result->SetInteger(kCacheCreationTime,
99                      static_cast<int>(receive_time.ToDoubleT()));
100   std::string serialized_proto(verdict.SerializeAsString());
101   // Performs a base64 encoding on the serialized proto.
102   base::Base64Encode(serialized_proto, &serialized_proto);
103   result->SetString(proto_name, serialized_proto);
104   return result;
105 }
106 
107 // Generate path variants of the given URL.
GeneratePathVariantsWithoutQuery(const GURL & url,std::vector<std::string> * paths)108 void GeneratePathVariantsWithoutQuery(const GURL& url,
109                                       std::vector<std::string>* paths) {
110   std::string canonical_path;
111   V4ProtocolManagerUtil::CanonicalizeUrl(
112       url, /*canonicalized_hostname=*/nullptr, &canonical_path,
113       /*canonicalized_query=*/nullptr);
114   V4ProtocolManagerUtil::GeneratePathVariantsToCheck(canonical_path,
115                                                      std::string(), paths);
116 }
117 
118 template <class T>
ParseVerdictEntry(base::Value * verdict_entry,int * out_verdict_received_time,T * out_verdict,const char * proto_name)119 bool ParseVerdictEntry(base::Value* verdict_entry,
120                        int* out_verdict_received_time,
121                        T* out_verdict,
122                        const char* proto_name) {
123   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
124 
125   if (!verdict_entry || !verdict_entry->is_dict() || !out_verdict)
126     return false;
127   base::Value* cache_creation_time_value =
128       verdict_entry->FindKey(kCacheCreationTime);
129 
130   if (!cache_creation_time_value || !cache_creation_time_value->is_int())
131     return false;
132   *out_verdict_received_time = cache_creation_time_value->GetInt();
133 
134   base::Value* verdict_proto_value = verdict_entry->FindKey(proto_name);
135   if (!verdict_proto_value || !verdict_proto_value->is_string())
136     return false;
137   std::string serialized_proto = verdict_proto_value->GetString();
138 
139   return base::Base64Decode(serialized_proto, &serialized_proto) &&
140          out_verdict->ParseFromString(serialized_proto);
141 }
142 
143 // Return the path of the cache expression. e.g.:
144 // "www.google.com"     -> ""
145 // "www.google.com/abc" -> "/abc"
146 // "foo.com/foo/bar/"  -> "/foo/bar/"
GetCacheExpressionPath(const std::string & cache_expression)147 std::string GetCacheExpressionPath(const std::string& cache_expression) {
148   DCHECK(!cache_expression.empty());
149   size_t first_slash_pos = cache_expression.find_first_of("/");
150   if (first_slash_pos == std::string::npos)
151     return "";
152   return cache_expression.substr(first_slash_pos);
153 }
154 
155 // Returns the number of path segments in |cache_expression_path|.
156 // For example, return 0 for "/", since there is no path after the leading
157 // slash; return 3 for "/abc/def/gh.html".
GetPathDepth(const std::string & cache_expression_path)158 size_t GetPathDepth(const std::string& cache_expression_path) {
159   return base::SplitString(base::StringPiece(cache_expression_path), "/",
160                            base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY)
161       .size();
162 }
163 
GetHostDepth(const std::string & hostname)164 size_t GetHostDepth(const std::string& hostname) {
165   return base::SplitString(base::StringPiece(hostname), ".",
166                            base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY)
167       .size();
168 }
169 
PathVariantsMatchCacheExpression(const std::vector<std::string> & generated_paths,const std::string & cache_expression_path)170 bool PathVariantsMatchCacheExpression(
171     const std::vector<std::string>& generated_paths,
172     const std::string& cache_expression_path) {
173   return base::Contains(generated_paths, cache_expression_path);
174 }
175 
IsCacheExpired(int cache_creation_time,int cache_duration)176 bool IsCacheExpired(int cache_creation_time, int cache_duration) {
177   // Note that we assume client's clock is accurate or almost accurate.
178   return base::Time::Now().ToDoubleT() >
179          static_cast<double>(cache_creation_time + cache_duration);
180 }
181 
182 template <class T>
RemoveExpiredEntries(base::Value * verdict_dictionary,const char * proto_name)183 size_t RemoveExpiredEntries(base::Value* verdict_dictionary,
184                             const char* proto_name) {
185   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
186   std::vector<std::string> expired_keys;
187   for (const auto& item : verdict_dictionary->DictItems()) {
188     int verdict_received_time;
189     T verdict;
190     if (!ParseVerdictEntry<T>(&item.second, &verdict_received_time, &verdict,
191                               proto_name) ||
192         IsCacheExpired(verdict_received_time, verdict.cache_duration_sec())) {
193       expired_keys.push_back(item.first);
194     }
195   }
196 
197   for (const std::string& key : expired_keys)
198     verdict_dictionary->RemoveKey(key);
199 
200   return expired_keys.size();
201 }
202 
GetKeyOfTypeFromTriggerType(LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type)203 std::string GetKeyOfTypeFromTriggerType(
204     LoginReputationClientRequest::TriggerType trigger_type,
205     ReusedPasswordAccountType password_type) {
206   return trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
207              ? kPasswordOnFocusCacheKey
208              : base::NumberToString(
209                    static_cast<std::underlying_type_t<
210                        ReusedPasswordAccountType::AccountType>>(
211                        password_type.account_type()));
212 }
213 
214 // If the verdict doesn't have |cache_expression_match_type| field, always
215 // interpret it as exact match only.
216 template <typename T>
IsOnlyExactMatchAllowed(T verdict)217 bool IsOnlyExactMatchAllowed(T verdict) {
218   NOTREACHED();
219   return true;
220 }
221 template <>
IsOnlyExactMatchAllowed(RTLookupResponse::ThreatInfo verdict)222 bool IsOnlyExactMatchAllowed<RTLookupResponse::ThreatInfo>(
223     RTLookupResponse::ThreatInfo verdict) {
224   return verdict.cache_expression_match_type() ==
225          RTLookupResponse::ThreatInfo::EXACT_MATCH;
226 }
227 // Always do fuzzy matching for password protection verdicts.
228 template <>
IsOnlyExactMatchAllowed(LoginReputationClientResponse verdict)229 bool IsOnlyExactMatchAllowed<LoginReputationClientResponse>(
230     LoginReputationClientResponse verdict) {
231   return false;
232 }
233 
234 template <typename T>
GetCacheExpression(T verdict)235 std::string GetCacheExpression(T verdict) {
236   NOTREACHED();
237   return "";
238 }
239 
240 template <>
GetCacheExpression(RTLookupResponse::ThreatInfo verdict)241 std::string GetCacheExpression<RTLookupResponse::ThreatInfo>(
242     RTLookupResponse::ThreatInfo verdict) {
243   // The old cache doesn't have |cache_expression_using_match_type| field
244   // setup, so it should fallback to |cache_expression| field. This check
245   // should be removed once |cache_expression| field is deprecated in
246   // RTLookupResponse.
247   if (verdict.cache_expression_match_type() ==
248       RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED)
249     return verdict.cache_expression();
250   return verdict.cache_expression_using_match_type();
251 }
252 
253 template <>
GetCacheExpression(LoginReputationClientResponse verdict)254 std::string GetCacheExpression<LoginReputationClientResponse>(
255     LoginReputationClientResponse verdict) {
256   return verdict.cache_expression();
257 }
258 
259 template <class T>
GetMostMatchingCachedVerdictWithPathMatching(const GURL & url,const std::string & type_key,scoped_refptr<HostContentSettingsMap> content_settings,const ContentSettingsType contents_setting_type,const char * proto_name,T * out_response,MatchParams match_params)260 typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
261     const GURL& url,
262     const std::string& type_key,
263     scoped_refptr<HostContentSettingsMap> content_settings,
264     const ContentSettingsType contents_setting_type,
265     const char* proto_name,
266     T* out_response,
267     MatchParams match_params) {
268   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
269 
270   GURL hostname = GetHostNameWithHTTPScheme(url);
271   std::unique_ptr<base::DictionaryValue> cache_dictionary =
272       base::DictionaryValue::From(content_settings->GetWebsiteSetting(
273           hostname, GURL(), contents_setting_type, nullptr));
274 
275   if (!cache_dictionary || cache_dictionary->empty())
276     return T::VERDICT_TYPE_UNSPECIFIED;
277 
278   base::Value* verdict_dictionary =
279       cache_dictionary->FindKeyOfType(type_key, base::Value::Type::DICTIONARY);
280   if (!verdict_dictionary) {
281     return T::VERDICT_TYPE_UNSPECIFIED;
282   }
283 
284   std::vector<std::string> paths;
285   GeneratePathVariantsWithoutQuery(url, &paths);
286 
287   std::string root_path;
288   V4ProtocolManagerUtil::CanonicalizeUrl(
289       url, /*canonicalized_hostname*/ nullptr, &root_path,
290       /*canonicalized_query*/ nullptr);
291 
292   int max_path_depth = -1;
293   typename T::VerdictType most_matching_verdict_type =
294       T::VERDICT_TYPE_UNSPECIFIED;
295   // For all the verdicts of the same origin, we key them by |cache_expression|.
296   // Its corresponding value is a DictionaryValue contains its creation time and
297   // the serialized verdict proto.
298   for (const auto& item : verdict_dictionary->DictItems()) {
299     int verdict_received_time;
300     T verdict;
301     // Ignore any entry that we cannot parse. These invalid entries will be
302     // cleaned up during shutdown.
303     if (!ParseVerdictEntry<T>(&item.second, &verdict_received_time, &verdict,
304                               proto_name))
305       continue;
306     // Since verdict content settings are keyed by origin, we only need to
307     // compare the path part of the cache_expression and the given url.
308     std::string cache_expression_path =
309         GetCacheExpressionPath(GetCacheExpression(verdict));
310 
311     match_params.is_only_exact_match_allowed = IsOnlyExactMatchAllowed(verdict);
312     match_params.is_exact_path = (root_path == cache_expression_path);
313     // Finds the most specific match.
314     int path_depth = static_cast<int>(GetPathDepth(cache_expression_path));
315     if (path_depth > max_path_depth &&
316         PathVariantsMatchCacheExpression(paths, cache_expression_path) &&
317         match_params.ShouldMatch()) {
318       max_path_depth = path_depth;
319       // If the most matching verdict is expired, set the result to
320       // VERDICT_TYPE_UNSPECIFIED.
321       most_matching_verdict_type =
322           IsCacheExpired(verdict_received_time, verdict.cache_duration_sec())
323               ? T::VERDICT_TYPE_UNSPECIFIED
324               : verdict.verdict_type();
325       out_response->CopyFrom(verdict);
326     }
327   }
328   return most_matching_verdict_type;
329 }
330 
331 template <class T>
GetMostMatchingCachedVerdictWithHostAndPathMatching(const GURL & url,const std::string & type_key,scoped_refptr<HostContentSettingsMap> content_settings,const ContentSettingsType contents_setting_type,const char * proto_name,T * out_response)332 typename T::VerdictType GetMostMatchingCachedVerdictWithHostAndPathMatching(
333     const GURL& url,
334     const std::string& type_key,
335     scoped_refptr<HostContentSettingsMap> content_settings,
336     const ContentSettingsType contents_setting_type,
337     const char* proto_name,
338     T* out_response) {
339   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
340   auto most_matching_verdict_type = T::VERDICT_TYPE_UNSPECIFIED;
341   MatchParams match_params;
342 
343   std::string root_host, root_path;
344   V4ProtocolManagerUtil::CanonicalizeUrl(url, &root_host, &root_path,
345                                          /*canonicalized_query*/ nullptr);
346   std::vector<std::string> host_variants;
347   V4ProtocolManagerUtil::GenerateHostVariantsToCheck(root_host, &host_variants);
348   int max_path_depth = -1;
349   for (const auto& host : host_variants) {
350     int depth = static_cast<int>(GetHostDepth(host));
351     GURL url_to_check = GetUrlWithHostAndPath(host, root_path);
352     match_params.is_exact_host = (root_host == host);
353     auto verdict_type = GetMostMatchingCachedVerdictWithPathMatching<T>(
354         url_to_check, type_key, content_settings, contents_setting_type,
355         proto_name, out_response, match_params);
356     if (depth > max_path_depth && verdict_type != T::VERDICT_TYPE_UNSPECIFIED) {
357       max_path_depth = depth;
358       most_matching_verdict_type = verdict_type;
359     }
360   }
361 
362   return most_matching_verdict_type;
363 }
364 
365 }  // namespace
366 
VerdictCacheManager(history::HistoryService * history_service,scoped_refptr<HostContentSettingsMap> content_settings)367 VerdictCacheManager::VerdictCacheManager(
368     history::HistoryService* history_service,
369     scoped_refptr<HostContentSettingsMap> content_settings)
370     : stored_verdict_count_password_on_focus_(base::nullopt),
371       stored_verdict_count_password_entry_(base::nullopt),
372       stored_verdict_count_real_time_url_check_(base::nullopt),
373       content_settings_(content_settings) {
374   if (history_service)
375     history_service_observation_.Observe(history_service);
376   if (!content_settings->IsOffTheRecord()) {
377     ScheduleNextCleanUpAfterInterval(
378         base::TimeDelta::FromSeconds(kCleanUpIntervalInitSecond));
379   }
380   CacheArtificialVerdict();
381 }
382 
Shutdown()383 void VerdictCacheManager::Shutdown() {
384   CleanUpExpiredVerdicts();
385   if (history_service_observation_.IsObserving())
386     history_service_observation_.RemoveObservation();
387   weak_factory_.InvalidateWeakPtrs();
388 }
389 
~VerdictCacheManager()390 VerdictCacheManager::~VerdictCacheManager() {}
391 
CachePhishGuardVerdict(LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type,const LoginReputationClientResponse & verdict,const base::Time & receive_time)392 void VerdictCacheManager::CachePhishGuardVerdict(
393     LoginReputationClientRequest::TriggerType trigger_type,
394     ReusedPasswordAccountType password_type,
395     const LoginReputationClientResponse& verdict,
396     const base::Time& receive_time) {
397   DCHECK(content_settings_);
398   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
399          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
400 
401   GURL hostname = GetHostNameFromCacheExpression(GetCacheExpression(verdict));
402 
403   std::unique_ptr<base::DictionaryValue> cache_dictionary =
404       base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
405           hostname, GURL(), ContentSettingsType::PASSWORD_PROTECTION, nullptr));
406 
407   if (!cache_dictionary)
408     cache_dictionary = std::make_unique<base::DictionaryValue>();
409 
410   std::unique_ptr<base::DictionaryValue> verdict_entry(
411       CreateDictionaryFromVerdict<LoginReputationClientResponse>(
412           verdict, receive_time, kVerdictProto));
413 
414   std::string type_key =
415       GetKeyOfTypeFromTriggerType(trigger_type, password_type);
416   base::Value* verdict_dictionary =
417       cache_dictionary->FindKeyOfType(type_key, base::Value::Type::DICTIONARY);
418   if (!verdict_dictionary) {
419     verdict_dictionary = cache_dictionary->SetKey(
420         type_key, base::Value(base::Value::Type::DICTIONARY));
421   }
422 
423   // Increases stored verdict count if we haven't seen this cache expression
424   // before.
425   if (!verdict_dictionary->FindKey(GetCacheExpression(verdict))) {
426     base::Optional<size_t>* stored_verdict_count =
427         trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
428             ? &stored_verdict_count_password_on_focus_
429             : &stored_verdict_count_password_entry_;
430     *stored_verdict_count = GetStoredPhishGuardVerdictCount(trigger_type) + 1;
431   }
432 
433   // If same cache_expression is already in this verdict_dictionary, we simply
434   // override it.
435   verdict_dictionary->SetKey(
436       GetCacheExpression(verdict),
437       base::Value::FromUniquePtrValue(std::move(verdict_entry)));
438   content_settings_->SetWebsiteSettingDefaultScope(
439       hostname, GURL(), ContentSettingsType::PASSWORD_PROTECTION,
440       std::move(cache_dictionary));
441 }
442 
443 LoginReputationClientResponse::VerdictType
GetCachedPhishGuardVerdict(const GURL & url,LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type,LoginReputationClientResponse * out_response)444 VerdictCacheManager::GetCachedPhishGuardVerdict(
445     const GURL& url,
446     LoginReputationClientRequest::TriggerType trigger_type,
447     ReusedPasswordAccountType password_type,
448     LoginReputationClientResponse* out_response) {
449   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
450          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
451 
452   std::string type_key =
453       GetKeyOfTypeFromTriggerType(trigger_type, password_type);
454   return GetMostMatchingCachedVerdictWithHostAndPathMatching<
455       LoginReputationClientResponse>(url, type_key, content_settings_,
456                                      ContentSettingsType::PASSWORD_PROTECTION,
457                                      kVerdictProto, out_response);
458 }
459 
GetStoredPhishGuardVerdictCount(LoginReputationClientRequest::TriggerType trigger_type)460 size_t VerdictCacheManager::GetStoredPhishGuardVerdictCount(
461     LoginReputationClientRequest::TriggerType trigger_type) {
462   DCHECK(content_settings_);
463   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
464          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
465   base::Optional<size_t>* stored_verdict_count =
466       trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
467           ? &stored_verdict_count_password_on_focus_
468           : &stored_verdict_count_password_entry_;
469   // If we have already computed this, return its value.
470   if (stored_verdict_count->has_value())
471     return stored_verdict_count->value();
472 
473   ContentSettingsForOneType settings;
474   content_settings_->GetSettingsForOneType(
475       ContentSettingsType::PASSWORD_PROTECTION, &settings);
476   stored_verdict_count_password_on_focus_ = 0;
477   stored_verdict_count_password_entry_ = 0;
478   for (const ContentSettingPatternSource& source : settings) {
479     for (const auto& item : source.setting_value.DictItems()) {
480       if (item.first == base::StringPiece(kPasswordOnFocusCacheKey)) {
481         stored_verdict_count_password_on_focus_.value() +=
482             item.second.DictSize();
483       } else {
484         stored_verdict_count_password_entry_.value() += item.second.DictSize();
485       }
486     }
487   }
488   return stored_verdict_count->value();
489 }
490 
GetStoredRealTimeUrlCheckVerdictCount()491 size_t VerdictCacheManager::GetStoredRealTimeUrlCheckVerdictCount() {
492   // If we have already computed this, return its value.
493   if (stored_verdict_count_real_time_url_check_.has_value())
494     return stored_verdict_count_real_time_url_check_.value();
495 
496   ContentSettingsForOneType settings;
497   content_settings_->GetSettingsForOneType(
498       ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, &settings);
499   stored_verdict_count_real_time_url_check_ = 0;
500   for (const ContentSettingPatternSource& source : settings) {
501     for (const auto& item : source.setting_value.DictItems()) {
502       if (item.first == base::StringPiece(kRealTimeUrlCacheKey)) {
503         stored_verdict_count_real_time_url_check_.value() +=
504             item.second.DictSize();
505       }
506     }
507   }
508   return stored_verdict_count_real_time_url_check_.value();
509 }
510 
CacheRealTimeUrlVerdict(const GURL & url,const RTLookupResponse & verdict,const base::Time & receive_time,bool store_old_cache)511 void VerdictCacheManager::CacheRealTimeUrlVerdict(
512     const GURL& url,
513     const RTLookupResponse& verdict,
514     const base::Time& receive_time,
515     bool store_old_cache) {
516   std::vector<std::string> visited_cache_expressions;
517   for (const auto& threat_info : verdict.threat_info()) {
518     // If |cache_expression_match_type| is unspecified, ignore this entry.
519     if (threat_info.cache_expression_match_type() ==
520             RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED &&
521         !store_old_cache) {
522       continue;
523     }
524     std::string cache_expression = store_old_cache
525                                        ? threat_info.cache_expression()
526                                        : GetCacheExpression(threat_info);
527     // TODO(crbug.com/1033692): For the same cache_expression, threat_info is in
528     // decreasing order of severity. To avoid lower severity threat being
529     // overridden by higher one, only store threat info that is first seen for a
530     // cache expression.
531     if (base::Contains(visited_cache_expressions, cache_expression))
532       continue;
533 
534     GURL hostname = GetHostNameFromCacheExpression(cache_expression);
535     std::unique_ptr<base::DictionaryValue> cache_dictionary =
536         base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
537             hostname, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
538             nullptr));
539 
540     if (!cache_dictionary)
541       cache_dictionary = std::make_unique<base::DictionaryValue>();
542 
543     base::Value* verdict_dictionary = cache_dictionary->FindKeyOfType(
544         kRealTimeUrlCacheKey, base::Value::Type::DICTIONARY);
545     if (!verdict_dictionary) {
546       verdict_dictionary = cache_dictionary->SetKey(
547           kRealTimeUrlCacheKey, base::Value(base::Value::Type::DICTIONARY));
548     }
549 
550     std::unique_ptr<base::DictionaryValue> threat_info_entry(
551         CreateDictionaryFromVerdict<RTLookupResponse::ThreatInfo>(
552             threat_info, receive_time, kRealTimeThreatInfoProto));
553     // Increases stored verdict count if we haven't seen this cache expression
554     // before.
555     if (!verdict_dictionary->FindKey(cache_expression)) {
556       stored_verdict_count_real_time_url_check_ =
557           GetStoredRealTimeUrlCheckVerdictCount() + 1;
558     }
559 
560     verdict_dictionary->SetKey(
561         cache_expression,
562         base::Value::FromUniquePtrValue(std::move(threat_info_entry)));
563     visited_cache_expressions.push_back(cache_expression);
564 
565     content_settings_->SetWebsiteSettingDefaultScope(
566         hostname, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
567         std::move(cache_dictionary));
568   }
569   base::UmaHistogramCounts10000(
570       "SafeBrowsing.RT.CacheManager.RealTimeVerdictCount",
571       GetStoredRealTimeUrlCheckVerdictCount());
572 }
573 
574 RTLookupResponse::ThreatInfo::VerdictType
GetCachedRealTimeUrlVerdict(const GURL & url,RTLookupResponse::ThreatInfo * out_threat_info)575 VerdictCacheManager::GetCachedRealTimeUrlVerdict(
576     const GURL& url,
577     RTLookupResponse::ThreatInfo* out_threat_info) {
578   return GetMostMatchingCachedVerdictWithHostAndPathMatching<
579       RTLookupResponse::ThreatInfo>(
580       url, kRealTimeUrlCacheKey, content_settings_,
581       ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
582       kRealTimeThreatInfoProto, out_threat_info);
583 }
584 
ScheduleNextCleanUpAfterInterval(base::TimeDelta interval)585 void VerdictCacheManager::ScheduleNextCleanUpAfterInterval(
586     base::TimeDelta interval) {
587   cleanup_timer_.Stop();
588   cleanup_timer_.Start(FROM_HERE, interval, this,
589                        &VerdictCacheManager::CleanUpExpiredVerdicts);
590 }
591 
CleanUpExpiredVerdicts()592 void VerdictCacheManager::CleanUpExpiredVerdicts() {
593   DCHECK(content_settings_);
594   SCOPED_UMA_HISTOGRAM_TIMER("SafeBrowsing.RT.CacheManager.CleanUpTime");
595   CleanUpExpiredPhishGuardVerdicts();
596   CleanUpExpiredRealTimeUrlCheckVerdicts();
597   ScheduleNextCleanUpAfterInterval(
598       base::TimeDelta::FromSeconds(kCleanUpIntervalSecond));
599 }
600 
CleanUpExpiredPhishGuardVerdicts()601 void VerdictCacheManager::CleanUpExpiredPhishGuardVerdicts() {
602   if (GetStoredPhishGuardVerdictCount(
603           LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) <= 0 &&
604       GetStoredPhishGuardVerdictCount(
605           LoginReputationClientRequest::PASSWORD_REUSE_EVENT) <= 0)
606     return;
607 
608   ContentSettingsForOneType password_protection_settings;
609   content_settings_->GetSettingsForOneType(
610       ContentSettingsType::PASSWORD_PROTECTION, &password_protection_settings);
611 
612   int removed_count = 0;
613   for (ContentSettingPatternSource& source : password_protection_settings) {
614     // Find all verdicts associated with this origin.
615     std::unique_ptr<base::Value> cache_dictionary =
616         base::Value::ToUniquePtrValue(std::move(source.setting_value));
617     bool has_expired_password_on_focus_entry = RemoveExpiredPhishGuardVerdicts(
618         LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE,
619         cache_dictionary.get());
620     bool has_expired_password_reuse_entry = RemoveExpiredPhishGuardVerdicts(
621         LoginReputationClientRequest::PASSWORD_REUSE_EVENT,
622         cache_dictionary.get());
623 
624     if (!cache_dictionary->DictEmpty() &&
625         !has_expired_password_on_focus_entry &&
626         !has_expired_password_reuse_entry) {
627       continue;
628     }
629 
630     // Set the website setting of this origin with the updated
631     // |cache_dictionary|.
632     content_settings_->SetWebsiteSettingCustomScope(
633         source.primary_pattern, source.secondary_pattern,
634         ContentSettingsType::PASSWORD_PROTECTION,
635         cache_dictionary->DictEmpty() ? nullptr : std::move(cache_dictionary));
636 
637     if ((++removed_count) == kMaxRemovedEntriesCount) {
638       return;
639     }
640   }
641 }
642 
CleanUpExpiredRealTimeUrlCheckVerdicts()643 void VerdictCacheManager::CleanUpExpiredRealTimeUrlCheckVerdicts() {
644   if (GetStoredRealTimeUrlCheckVerdictCount() == 0) {
645     return;
646   }
647   ContentSettingsForOneType safe_browsing_url_check_data_settings;
648   content_settings_->GetSettingsForOneType(
649       ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
650       &safe_browsing_url_check_data_settings);
651 
652   int removed_count = 0;
653   for (ContentSettingPatternSource& source :
654        safe_browsing_url_check_data_settings) {
655     // Find all verdicts associated with this origin.
656     std::unique_ptr<base::Value> cache_dictionary =
657         base::Value::ToUniquePtrValue(std::move(source.setting_value));
658     bool has_expired_entry =
659         RemoveExpiredRealTimeUrlCheckVerdicts(cache_dictionary.get());
660 
661     if (!cache_dictionary->DictEmpty() && !has_expired_entry) {
662       continue;
663     }
664 
665     // Set the website setting of this origin with the updated
666     // |cache_dictionary|.
667     content_settings_->SetWebsiteSettingCustomScope(
668         source.primary_pattern, source.secondary_pattern,
669         ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
670         cache_dictionary->DictEmpty() ? nullptr : std::move(cache_dictionary));
671 
672     if ((++removed_count) == kMaxRemovedEntriesCount) {
673       return;
674     }
675   }
676 }
677 
678 // Overridden from history::HistoryServiceObserver.
OnURLsDeleted(history::HistoryService * history_service,const history::DeletionInfo & deletion_info)679 void VerdictCacheManager::OnURLsDeleted(
680     history::HistoryService* history_service,
681     const history::DeletionInfo& deletion_info) {
682   base::PostTask(FROM_HERE, CreateTaskTraits(ThreadID::UI),
683                  base::BindRepeating(
684                      &VerdictCacheManager::RemoveContentSettingsOnURLsDeleted,
685                      GetWeakPtr(), deletion_info.IsAllHistory(),
686                      deletion_info.deleted_rows()));
687 }
688 
689 // Overridden from history::HistoryServiceObserver.
HistoryServiceBeingDeleted(history::HistoryService * history_service)690 void VerdictCacheManager::HistoryServiceBeingDeleted(
691     history::HistoryService* history_service) {
692   DCHECK(history_service_observation_.IsObservingSource(history_service));
693   history_service_observation_.RemoveObservation();
694 }
695 
RemoveExpiredPhishGuardVerdicts(LoginReputationClientRequest::TriggerType trigger_type,base::Value * cache_dictionary)696 bool VerdictCacheManager::RemoveExpiredPhishGuardVerdicts(
697     LoginReputationClientRequest::TriggerType trigger_type,
698     base::Value* cache_dictionary) {
699   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
700          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
701   if (!cache_dictionary || cache_dictionary->DictEmpty())
702     return false;
703 
704   size_t verdicts_removed = 0;
705   std::vector<std::string> empty_keys;
706   for (auto item : cache_dictionary->DictItems()) {
707     if (trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE &&
708         item.first == std::string(kPasswordOnFocusCacheKey)) {
709       size_t removed_cnt = RemoveExpiredEntries<LoginReputationClientResponse>(
710           &item.second, kVerdictProto);
711       verdicts_removed += removed_cnt;
712       if (stored_verdict_count_password_on_focus_.has_value())
713         stored_verdict_count_password_on_focus_.value() -= removed_cnt;
714     } else {
715       size_t removed_cnt = RemoveExpiredEntries<LoginReputationClientResponse>(
716           &item.second, kVerdictProto);
717       verdicts_removed += removed_cnt;
718       if (stored_verdict_count_password_entry_.has_value())
719         stored_verdict_count_password_entry_.value() -= removed_cnt;
720     }
721 
722     if (item.second.DictSize() == 0U)
723       empty_keys.push_back(item.first);
724   }
725   for (const auto& key : empty_keys)
726     cache_dictionary->RemoveKey(key);
727 
728   return verdicts_removed > 0U;
729 }
730 
RemoveExpiredRealTimeUrlCheckVerdicts(base::Value * cache_dictionary)731 bool VerdictCacheManager::RemoveExpiredRealTimeUrlCheckVerdicts(
732     base::Value* cache_dictionary) {
733   if (!cache_dictionary || cache_dictionary->DictEmpty())
734     return false;
735 
736   size_t verdicts_removed = 0;
737   std::vector<std::string> empty_keys;
738   for (auto item : cache_dictionary->DictItems()) {
739     size_t removed_cnt = RemoveExpiredEntries<RTLookupResponse::ThreatInfo>(
740         &item.second, kRealTimeThreatInfoProto);
741     verdicts_removed += removed_cnt;
742     if (stored_verdict_count_real_time_url_check_.has_value())
743       stored_verdict_count_real_time_url_check_.value() -= removed_cnt;
744     if (item.second.DictSize() == 0U)
745       empty_keys.push_back(item.first);
746   }
747   for (const auto& key : empty_keys)
748     cache_dictionary->RemoveKey(key);
749 
750   return verdicts_removed > 0U;
751 }
752 
RemoveContentSettingsOnURLsDeleted(bool all_history,const history::URLRows & deleted_rows)753 void VerdictCacheManager::RemoveContentSettingsOnURLsDeleted(
754     bool all_history,
755     const history::URLRows& deleted_rows) {
756   DCHECK(CurrentlyOnThread(ThreadID::UI));
757   DCHECK(content_settings_);
758 
759   if (all_history) {
760     content_settings_->ClearSettingsForOneType(
761         ContentSettingsType::PASSWORD_PROTECTION);
762     stored_verdict_count_password_on_focus_ = 0;
763     stored_verdict_count_password_entry_ = 0;
764     stored_verdict_count_real_time_url_check_ = 0;
765     content_settings_->ClearSettingsForOneType(
766         ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA);
767     return;
768   }
769 
770   // For now, if a URL is deleted from history, we simply remove all the
771   // cached verdicts of the same origin. This is a pretty aggressive deletion.
772   // We might revisit this logic later to decide if we want to only delete the
773   // cached verdict whose cache expression matches this URL.
774   for (const history::URLRow& row : deleted_rows) {
775     if (!row.url().SchemeIsHTTPOrHTTPS())
776       continue;
777 
778     GURL url_key = GetHostNameWithHTTPScheme(row.url());
779     stored_verdict_count_password_on_focus_ =
780         GetStoredPhishGuardVerdictCount(
781             LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) -
782         GetPhishGuardVerdictCountForURL(
783             url_key, LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE);
784     stored_verdict_count_password_entry_ =
785         GetStoredPhishGuardVerdictCount(
786             LoginReputationClientRequest::PASSWORD_REUSE_EVENT) -
787         GetPhishGuardVerdictCountForURL(
788             url_key, LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
789     stored_verdict_count_real_time_url_check_ =
790         GetStoredRealTimeUrlCheckVerdictCount() -
791         GetRealTimeUrlCheckVerdictCountForURL(url_key);
792     content_settings_->SetWebsiteSettingDefaultScope(
793         url_key, GURL(), ContentSettingsType::PASSWORD_PROTECTION, nullptr);
794     content_settings_->SetWebsiteSettingDefaultScope(
795         url_key, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
796         nullptr);
797   }
798 }
799 
GetPhishGuardVerdictCountForURL(const GURL & url,LoginReputationClientRequest::TriggerType trigger_type)800 size_t VerdictCacheManager::GetPhishGuardVerdictCountForURL(
801     const GURL& url,
802     LoginReputationClientRequest::TriggerType trigger_type) {
803   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
804          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
805   std::unique_ptr<base::DictionaryValue> cache_dictionary =
806       base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
807           url, GURL(), ContentSettingsType::PASSWORD_PROTECTION, nullptr));
808   if (!cache_dictionary || cache_dictionary->empty())
809     return 0;
810 
811   int verdict_cnt = 0;
812   if (trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) {
813     base::Value* password_on_focus_dict = nullptr;
814     password_on_focus_dict =
815         cache_dictionary->FindKey(kPasswordOnFocusCacheKey);
816     verdict_cnt +=
817         password_on_focus_dict ? password_on_focus_dict->DictSize() : 0;
818   } else {
819     for (const auto& item : cache_dictionary->DictItems()) {
820       if (item.first == kPasswordOnFocusCacheKey)
821         continue;
822       verdict_cnt += item.second.DictSize();
823     }
824   }
825   return verdict_cnt;
826 }
827 
GetRealTimeUrlCheckVerdictCountForURL(const GURL & url)828 size_t VerdictCacheManager::GetRealTimeUrlCheckVerdictCountForURL(
829     const GURL& url) {
830   std::unique_ptr<base::DictionaryValue> cache_dictionary =
831       base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
832           url, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
833           nullptr));
834   if (!cache_dictionary || cache_dictionary->empty())
835     return 0;
836   base::Value* verdict_dictionary =
837       cache_dictionary->FindKey(kRealTimeUrlCacheKey);
838   return verdict_dictionary ? verdict_dictionary->DictSize() : 0;
839 }
840 
CacheArtificialVerdict()841 void VerdictCacheManager::CacheArtificialVerdict() {
842   std::string phishing_url_string =
843       base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
844           kUnsafeUrlFlag);
845   if (phishing_url_string.empty())
846     return;
847 
848   GURL artificial_unsafe_url(phishing_url_string);
849   if (!artificial_unsafe_url.is_valid())
850     return;
851 
852   has_artificial_unsafe_url_ = true;
853 
854   RTLookupResponse response;
855   RTLookupResponse::ThreatInfo* threat_info = response.add_threat_info();
856   threat_info->set_verdict_type(RTLookupResponse::ThreatInfo::DANGEROUS);
857   threat_info->set_threat_type(
858       RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING);
859   threat_info->set_cache_duration_sec(3000);
860   threat_info->set_cache_expression_using_match_type(
861       artificial_unsafe_url.GetContent());
862   threat_info->set_cache_expression_match_type(
863       RTLookupResponse::ThreatInfo::EXACT_MATCH);
864   RemoveContentSettingsOnURLsDeleted(/*all_history=*/false,
865                                      {history::URLRow(artificial_unsafe_url)});
866   CacheRealTimeUrlVerdict(artificial_unsafe_url, response, base::Time::Now(),
867                           /*store_old_cache=*/false);
868 }
869 
StopCleanUpTimerForTesting()870 void VerdictCacheManager::StopCleanUpTimerForTesting() {
871   if (cleanup_timer_.IsRunning()) {
872     cleanup_timer_.AbandonAndStop();
873   }
874 }
875 
876 // static
877 bool VerdictCacheManager::has_artificial_unsafe_url_ = false;
878 
879 // static
has_artificial_unsafe_url()880 bool VerdictCacheManager::has_artificial_unsafe_url() {
881   return has_artificial_unsafe_url_;
882 }
883 
884 }  // namespace safe_browsing
885