1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/safe_browsing/core/verdict_cache_manager.h"
6 
7 #include "base/base64.h"
8 #include "base/optional.h"
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/string_split.h"
11 #include "base/task/post_task.h"
12 #include "components/history/core/browser/history_service_observer.h"
13 #include "components/safe_browsing/core/common/thread_utils.h"
14 #include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
15 #include "components/safe_browsing/core/proto/csd.pb.h"
16 
17 namespace safe_browsing {
18 
19 namespace {
20 
21 // Keys for storing password protection verdict into a DictionaryValue.
22 const char kCacheCreationTime[] = "cache_creation_time";
23 const char kVerdictProto[] = "verdict_proto";
24 const char kRealTimeThreatInfoProto[] = "rt_threat_info_proto";
25 const char kPasswordOnFocusCacheKey[] = "password_on_focus_cache_key";
26 const char kRealTimeUrlCacheKey[] = "real_time_url_cache_key";
27 
28 // A helper class to include all match params. It is used as a centralized
29 // place to determine if the current cache entry should be considered as a
30 // match.
31 struct MatchParams {
MatchParamssafe_browsing::__anon07e4ff170111::MatchParams32   MatchParams()
33       : is_exact_host(false),
34         is_exact_path(false),
35         is_only_exact_match_allowed(true) {}
36 
ShouldMatchsafe_browsing::__anon07e4ff170111::MatchParams37   bool ShouldMatch() {
38     return !is_only_exact_match_allowed || (is_exact_host && is_exact_path);
39   }
40   // Indicates whether the current cache entry and the url have the same host.
41   bool is_exact_host;
42   // Indicates whether the current cache entry and the url have the same path.
43   bool is_exact_path;
44   // Indicates whether the current cache entry is only applicable for exact
45   // match.
46   bool is_only_exact_match_allowed;
47 };
48 
49 // Given a URL of either http or https scheme, return its http://hostname.
50 // e.g., "https://www.foo.com:80/bar/test.cgi" -> "http://www.foo.com".
GetHostNameWithHTTPScheme(const GURL & url)51 GURL GetHostNameWithHTTPScheme(const GURL& url) {
52   DCHECK(url.SchemeIsHTTPOrHTTPS());
53   std::string result(url::kHttpScheme);
54   result.append(url::kStandardSchemeSeparator).append(url.host());
55   return GURL(result);
56 }
57 // e.g, ("www.foo.com", "/bar/test.cgi") -> "http://www.foo.com/bar/test/cgi"
GetUrlWithHostAndPath(const std::string & host,const std::string & path)58 GURL GetUrlWithHostAndPath(const std::string& host, const std::string& path) {
59   std::string result(url::kHttpScheme);
60   result.append(url::kStandardSchemeSeparator).append(host).append(path);
61   return GURL(result);
62 }
63 
64 // e.g, "www.foo.com/bar/test/cgi" -> "http://www.foo.com"
GetHostNameFromCacheExpression(const std::string & cache_expression)65 GURL GetHostNameFromCacheExpression(const std::string& cache_expression) {
66   std::string cache_expression_url(url::kHttpScheme);
67   cache_expression_url.append(url::kStandardSchemeSeparator)
68       .append(cache_expression);
69   return GetHostNameWithHTTPScheme(GURL(cache_expression_url));
70 }
71 
72 // Convert a Proto object into a DictionaryValue.
73 template <class T>
CreateDictionaryFromVerdict(const T & verdict,const base::Time & receive_time,const char * proto_name)74 std::unique_ptr<base::DictionaryValue> CreateDictionaryFromVerdict(
75     const T& verdict,
76     const base::Time& receive_time,
77     const char* proto_name) {
78   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
79   std::unique_ptr<base::DictionaryValue> result =
80       std::make_unique<base::DictionaryValue>();
81   result->SetInteger(kCacheCreationTime,
82                      static_cast<int>(receive_time.ToDoubleT()));
83   std::string serialized_proto(verdict.SerializeAsString());
84   // Performs a base64 encoding on the serialized proto.
85   base::Base64Encode(serialized_proto, &serialized_proto);
86   result->SetString(proto_name, serialized_proto);
87   return result;
88 }
89 
90 // Generate path variants of the given URL.
GeneratePathVariantsWithoutQuery(const GURL & url,std::vector<std::string> * paths)91 void GeneratePathVariantsWithoutQuery(const GURL& url,
92                                       std::vector<std::string>* paths) {
93   std::string canonical_path;
94   V4ProtocolManagerUtil::CanonicalizeUrl(
95       url, /*canonicalized_hostname=*/nullptr, &canonical_path,
96       /*canonicalized_query=*/nullptr);
97   V4ProtocolManagerUtil::GeneratePathVariantsToCheck(canonical_path,
98                                                      std::string(), paths);
99 }
100 
101 template <class T>
ParseVerdictEntry(base::Value * verdict_entry,int * out_verdict_received_time,T * out_verdict,const char * proto_name)102 bool ParseVerdictEntry(base::Value* verdict_entry,
103                        int* out_verdict_received_time,
104                        T* out_verdict,
105                        const char* proto_name) {
106   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
107 
108   if (!verdict_entry || !verdict_entry->is_dict() || !out_verdict)
109     return false;
110   base::Value* cache_creation_time_value =
111       verdict_entry->FindKey(kCacheCreationTime);
112 
113   if (!cache_creation_time_value || !cache_creation_time_value->is_int())
114     return false;
115   *out_verdict_received_time = cache_creation_time_value->GetInt();
116 
117   base::Value* verdict_proto_value = verdict_entry->FindKey(proto_name);
118   if (!verdict_proto_value || !verdict_proto_value->is_string())
119     return false;
120   std::string serialized_proto = verdict_proto_value->GetString();
121 
122   return base::Base64Decode(serialized_proto, &serialized_proto) &&
123          out_verdict->ParseFromString(serialized_proto);
124 }
125 
126 // Return the path of the cache expression. e.g.:
127 // "www.google.com"     -> ""
128 // "www.google.com/abc" -> "/abc"
129 // "foo.com/foo/bar/"  -> "/foo/bar/"
GetCacheExpressionPath(const std::string & cache_expression)130 std::string GetCacheExpressionPath(const std::string& cache_expression) {
131   DCHECK(!cache_expression.empty());
132   size_t first_slash_pos = cache_expression.find_first_of("/");
133   if (first_slash_pos == std::string::npos)
134     return "";
135   return cache_expression.substr(first_slash_pos);
136 }
137 
138 // Returns the number of path segments in |cache_expression_path|.
139 // For example, return 0 for "/", since there is no path after the leading
140 // slash; return 3 for "/abc/def/gh.html".
GetPathDepth(const std::string & cache_expression_path)141 size_t GetPathDepth(const std::string& cache_expression_path) {
142   return base::SplitString(base::StringPiece(cache_expression_path), "/",
143                            base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY)
144       .size();
145 }
146 
GetHostDepth(const std::string & hostname)147 size_t GetHostDepth(const std::string& hostname) {
148   return base::SplitString(base::StringPiece(hostname), ".",
149                            base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY)
150       .size();
151 }
152 
PathVariantsMatchCacheExpression(const std::vector<std::string> & generated_paths,const std::string & cache_expression_path)153 bool PathVariantsMatchCacheExpression(
154     const std::vector<std::string>& generated_paths,
155     const std::string& cache_expression_path) {
156   return base::Contains(generated_paths, cache_expression_path);
157 }
158 
IsCacheExpired(int cache_creation_time,int cache_duration)159 bool IsCacheExpired(int cache_creation_time, int cache_duration) {
160   // Note that we assume client's clock is accurate or almost accurate.
161   return base::Time::Now().ToDoubleT() >
162          static_cast<double>(cache_creation_time + cache_duration);
163 }
164 
165 template <class T>
RemoveExpiredEntries(base::Value * verdict_dictionary,const char * proto_name)166 size_t RemoveExpiredEntries(base::Value* verdict_dictionary,
167                             const char* proto_name) {
168   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
169   std::vector<std::string> expired_keys;
170   for (const auto& item : verdict_dictionary->DictItems()) {
171     int verdict_received_time;
172     T verdict;
173     if (!ParseVerdictEntry<T>(&item.second, &verdict_received_time, &verdict,
174                               proto_name) ||
175         IsCacheExpired(verdict_received_time, verdict.cache_duration_sec())) {
176       expired_keys.push_back(item.first);
177     }
178   }
179 
180   for (const std::string& key : expired_keys)
181     verdict_dictionary->RemoveKey(key);
182 
183   return expired_keys.size();
184 }
185 
186 // Helper function to determine if the given origin matches content settings
187 // map's patterns.
OriginMatchPrimaryPattern(const GURL & origin,const ContentSettingsPattern & primary_pattern,const ContentSettingsPattern & secondary_pattern_unused)188 bool OriginMatchPrimaryPattern(
189     const GURL& origin,
190     const ContentSettingsPattern& primary_pattern,
191     const ContentSettingsPattern& secondary_pattern_unused) {
192   return ContentSettingsPattern::FromURLNoWildcard(origin) == primary_pattern;
193 }
194 
GetKeyOfTypeFromTriggerType(LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type)195 std::string GetKeyOfTypeFromTriggerType(
196     LoginReputationClientRequest::TriggerType trigger_type,
197     ReusedPasswordAccountType password_type) {
198   return trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
199              ? kPasswordOnFocusCacheKey
200              : base::NumberToString(
201                    static_cast<std::underlying_type_t<
202                        ReusedPasswordAccountType::AccountType>>(
203                        password_type.account_type()));
204 }
205 
206 // If the verdict doesn't have |cache_expression_match_type| field, always
207 // interpret it as exact match only.
208 template <typename T>
IsOnlyExactMatchAllowed(T verdict)209 bool IsOnlyExactMatchAllowed(T verdict) {
210   NOTREACHED();
211   return true;
212 }
213 template <>
IsOnlyExactMatchAllowed(RTLookupResponse::ThreatInfo verdict)214 bool IsOnlyExactMatchAllowed<RTLookupResponse::ThreatInfo>(
215     RTLookupResponse::ThreatInfo verdict) {
216   return verdict.cache_expression_match_type() ==
217          RTLookupResponse::ThreatInfo::EXACT_MATCH;
218 }
219 // Always do fuzzy matching for password protection verdicts.
220 template <>
IsOnlyExactMatchAllowed(LoginReputationClientResponse verdict)221 bool IsOnlyExactMatchAllowed<LoginReputationClientResponse>(
222     LoginReputationClientResponse verdict) {
223   return false;
224 }
225 
226 template <typename T>
GetCacheExpression(T verdict)227 std::string GetCacheExpression(T verdict) {
228   NOTREACHED();
229   return "";
230 }
231 
232 template <>
GetCacheExpression(RTLookupResponse::ThreatInfo verdict)233 std::string GetCacheExpression<RTLookupResponse::ThreatInfo>(
234     RTLookupResponse::ThreatInfo verdict) {
235   // The old cache doesn't have |cache_expression_using_match_type| field
236   // setup, so it should fallback to |cache_expression| field. This check
237   // should be removed once |cache_expression| field is deprecated in
238   // RTLookupResponse.
239   if (verdict.cache_expression_match_type() ==
240       RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED)
241     return verdict.cache_expression();
242   return verdict.cache_expression_using_match_type();
243 }
244 
245 template <>
GetCacheExpression(LoginReputationClientResponse verdict)246 std::string GetCacheExpression<LoginReputationClientResponse>(
247     LoginReputationClientResponse verdict) {
248   return verdict.cache_expression();
249 }
250 
251 template <class T>
GetMostMatchingCachedVerdictWithPathMatching(const GURL & url,const std::string & type_key,scoped_refptr<HostContentSettingsMap> content_settings,const ContentSettingsType contents_setting_type,const char * proto_name,T * out_response,MatchParams match_params)252 typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
253     const GURL& url,
254     const std::string& type_key,
255     scoped_refptr<HostContentSettingsMap> content_settings,
256     const ContentSettingsType contents_setting_type,
257     const char* proto_name,
258     T* out_response,
259     MatchParams match_params) {
260   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
261 
262   GURL hostname = GetHostNameWithHTTPScheme(url);
263   std::unique_ptr<base::DictionaryValue> cache_dictionary =
264       base::DictionaryValue::From(content_settings->GetWebsiteSetting(
265           hostname, GURL(), contents_setting_type, std::string(), nullptr));
266 
267   if (!cache_dictionary || cache_dictionary->empty())
268     return T::VERDICT_TYPE_UNSPECIFIED;
269 
270   base::Value* verdict_dictionary =
271       cache_dictionary->FindKeyOfType(type_key, base::Value::Type::DICTIONARY);
272   if (!verdict_dictionary) {
273     return T::VERDICT_TYPE_UNSPECIFIED;
274   }
275 
276   std::vector<std::string> paths;
277   GeneratePathVariantsWithoutQuery(url, &paths);
278 
279   std::string root_path;
280   V4ProtocolManagerUtil::CanonicalizeUrl(
281       url, /*canonicalized_hostname*/ nullptr, &root_path,
282       /*canonicalized_query*/ nullptr);
283 
284   int max_path_depth = -1;
285   typename T::VerdictType most_matching_verdict_type =
286       T::VERDICT_TYPE_UNSPECIFIED;
287   // For all the verdicts of the same origin, we key them by |cache_expression|.
288   // Its corresponding value is a DictionaryValue contains its creation time and
289   // the serialized verdict proto.
290   for (const auto& item : verdict_dictionary->DictItems()) {
291     int verdict_received_time;
292     T verdict;
293     // Ignore any entry that we cannot parse. These invalid entries will be
294     // cleaned up during shutdown.
295     if (!ParseVerdictEntry<T>(&item.second, &verdict_received_time, &verdict,
296                               proto_name))
297       continue;
298     // Since verdict content settings are keyed by origin, we only need to
299     // compare the path part of the cache_expression and the given url.
300     std::string cache_expression_path =
301         GetCacheExpressionPath(GetCacheExpression(verdict));
302 
303     match_params.is_only_exact_match_allowed = IsOnlyExactMatchAllowed(verdict);
304     match_params.is_exact_path = (root_path == cache_expression_path);
305     // Finds the most specific match.
306     int path_depth = static_cast<int>(GetPathDepth(cache_expression_path));
307     if (path_depth > max_path_depth &&
308         PathVariantsMatchCacheExpression(paths, cache_expression_path) &&
309         match_params.ShouldMatch()) {
310       max_path_depth = path_depth;
311       // If the most matching verdict is expired, set the result to
312       // VERDICT_TYPE_UNSPECIFIED.
313       most_matching_verdict_type =
314           IsCacheExpired(verdict_received_time, verdict.cache_duration_sec())
315               ? T::VERDICT_TYPE_UNSPECIFIED
316               : verdict.verdict_type();
317       out_response->CopyFrom(verdict);
318     }
319   }
320   return most_matching_verdict_type;
321 }
322 
323 template <class T>
GetMostMatchingCachedVerdictWithHostAndPathMatching(const GURL & url,const std::string & type_key,scoped_refptr<HostContentSettingsMap> content_settings,const ContentSettingsType contents_setting_type,const char * proto_name,T * out_response)324 typename T::VerdictType GetMostMatchingCachedVerdictWithHostAndPathMatching(
325     const GURL& url,
326     const std::string& type_key,
327     scoped_refptr<HostContentSettingsMap> content_settings,
328     const ContentSettingsType contents_setting_type,
329     const char* proto_name,
330     T* out_response) {
331   DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
332   auto most_matching_verdict_type = T::VERDICT_TYPE_UNSPECIFIED;
333   MatchParams match_params;
334 
335   std::string root_host, root_path;
336   V4ProtocolManagerUtil::CanonicalizeUrl(url, &root_host, &root_path,
337                                          /*canonicalized_query*/ nullptr);
338   std::vector<std::string> host_variants;
339   V4ProtocolManagerUtil::GenerateHostVariantsToCheck(root_host, &host_variants);
340   int max_path_depth = -1;
341   for (const auto& host : host_variants) {
342     int depth = static_cast<int>(GetHostDepth(host));
343     GURL url_to_check = GetUrlWithHostAndPath(host, root_path);
344     match_params.is_exact_host = (root_host == host);
345     auto verdict_type = GetMostMatchingCachedVerdictWithPathMatching<T>(
346         url_to_check, type_key, content_settings, contents_setting_type,
347         proto_name, out_response, match_params);
348     if (depth > max_path_depth && verdict_type != T::VERDICT_TYPE_UNSPECIFIED) {
349       max_path_depth = depth;
350       most_matching_verdict_type = verdict_type;
351     }
352   }
353 
354   return most_matching_verdict_type;
355 }
356 
357 }  // namespace
358 
VerdictCacheManager(history::HistoryService * history_service,scoped_refptr<HostContentSettingsMap> content_settings)359 VerdictCacheManager::VerdictCacheManager(
360     history::HistoryService* history_service,
361     scoped_refptr<HostContentSettingsMap> content_settings)
362     : stored_verdict_count_password_on_focus_(base::nullopt),
363       stored_verdict_count_password_entry_(base::nullopt),
364       content_settings_(content_settings) {
365   if (history_service)
366     history_service_observer_.Add(history_service);
367 }
368 
Shutdown()369 void VerdictCacheManager::Shutdown() {
370   CleanUpExpiredVerdicts();
371   history_service_observer_.RemoveAll();
372   weak_factory_.InvalidateWeakPtrs();
373 }
374 
~VerdictCacheManager()375 VerdictCacheManager::~VerdictCacheManager() {}
376 
CachePhishGuardVerdict(LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type,const LoginReputationClientResponse & verdict,const base::Time & receive_time)377 void VerdictCacheManager::CachePhishGuardVerdict(
378     LoginReputationClientRequest::TriggerType trigger_type,
379     ReusedPasswordAccountType password_type,
380     const LoginReputationClientResponse& verdict,
381     const base::Time& receive_time) {
382   DCHECK(content_settings_);
383   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
384          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
385 
386   GURL hostname = GetHostNameFromCacheExpression(GetCacheExpression(verdict));
387 
388   std::unique_ptr<base::DictionaryValue> cache_dictionary =
389       base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
390           hostname, GURL(), ContentSettingsType::PASSWORD_PROTECTION,
391           std::string(), nullptr));
392 
393   if (!cache_dictionary)
394     cache_dictionary = std::make_unique<base::DictionaryValue>();
395 
396   std::unique_ptr<base::DictionaryValue> verdict_entry(
397       CreateDictionaryFromVerdict<LoginReputationClientResponse>(
398           verdict, receive_time, kVerdictProto));
399 
400   std::string type_key =
401       GetKeyOfTypeFromTriggerType(trigger_type, password_type);
402   base::Value* verdict_dictionary =
403       cache_dictionary->FindKeyOfType(type_key, base::Value::Type::DICTIONARY);
404   if (!verdict_dictionary) {
405     verdict_dictionary = cache_dictionary->SetKey(
406         type_key, base::Value(base::Value::Type::DICTIONARY));
407   }
408 
409   // Increases stored verdict count if we haven't seen this cache expression
410   // before.
411   if (!verdict_dictionary->FindKey(GetCacheExpression(verdict))) {
412     base::Optional<size_t>* stored_verdict_count =
413         trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
414             ? &stored_verdict_count_password_on_focus_
415             : &stored_verdict_count_password_entry_;
416     *stored_verdict_count = GetStoredPhishGuardVerdictCount(trigger_type) + 1;
417   }
418 
419   // If same cache_expression is already in this verdict_dictionary, we simply
420   // override it.
421   verdict_dictionary->SetKey(
422       GetCacheExpression(verdict),
423       base::Value::FromUniquePtrValue(std::move(verdict_entry)));
424   content_settings_->SetWebsiteSettingDefaultScope(
425       hostname, GURL(), ContentSettingsType::PASSWORD_PROTECTION, std::string(),
426       std::move(cache_dictionary));
427 }
428 
429 LoginReputationClientResponse::VerdictType
GetCachedPhishGuardVerdict(const GURL & url,LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type,LoginReputationClientResponse * out_response)430 VerdictCacheManager::GetCachedPhishGuardVerdict(
431     const GURL& url,
432     LoginReputationClientRequest::TriggerType trigger_type,
433     ReusedPasswordAccountType password_type,
434     LoginReputationClientResponse* out_response) {
435   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
436          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
437 
438   std::string type_key =
439       GetKeyOfTypeFromTriggerType(trigger_type, password_type);
440   return GetMostMatchingCachedVerdictWithHostAndPathMatching<
441       LoginReputationClientResponse>(url, type_key, content_settings_,
442                                      ContentSettingsType::PASSWORD_PROTECTION,
443                                      kVerdictProto, out_response);
444 }
445 
GetStoredPhishGuardVerdictCount(LoginReputationClientRequest::TriggerType trigger_type)446 size_t VerdictCacheManager::GetStoredPhishGuardVerdictCount(
447     LoginReputationClientRequest::TriggerType trigger_type) {
448   DCHECK(content_settings_);
449   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
450          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
451   base::Optional<size_t>* stored_verdict_count =
452       trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
453           ? &stored_verdict_count_password_on_focus_
454           : &stored_verdict_count_password_entry_;
455   // If we have already computed this, return its value.
456   if (stored_verdict_count->has_value())
457     return stored_verdict_count->value();
458 
459   ContentSettingsForOneType password_protection_settings;
460   content_settings_->GetSettingsForOneType(
461       ContentSettingsType::PASSWORD_PROTECTION, std::string(),
462       &password_protection_settings);
463   stored_verdict_count_password_on_focus_ = 0;
464   stored_verdict_count_password_entry_ = 0;
465   if (password_protection_settings.empty())
466     return 0;
467 
468   for (const ContentSettingPatternSource& source :
469        password_protection_settings) {
470     std::unique_ptr<base::DictionaryValue> cache_dictionary =
471         base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
472             GURL(source.primary_pattern.ToString()), GURL(),
473             ContentSettingsType::PASSWORD_PROTECTION, std::string(), nullptr));
474     if (cache_dictionary.get() && !cache_dictionary->empty()) {
475       for (const auto& item : cache_dictionary->DictItems()) {
476         if (item.first == base::StringPiece(kPasswordOnFocusCacheKey)) {
477           stored_verdict_count_password_on_focus_.value() +=
478               item.second.DictSize();
479         } else {
480           stored_verdict_count_password_entry_.value() +=
481               item.second.DictSize();
482         }
483       }
484     }
485   }
486 
487   return stored_verdict_count->value();
488 }
489 
CacheRealTimeUrlVerdict(const GURL & url,const RTLookupResponse & verdict,const base::Time & receive_time,bool store_old_cache)490 void VerdictCacheManager::CacheRealTimeUrlVerdict(
491     const GURL& url,
492     const RTLookupResponse& verdict,
493     const base::Time& receive_time,
494     bool store_old_cache) {
495   std::vector<std::string> visited_cache_expressions;
496   for (const auto& threat_info : verdict.threat_info()) {
497     // If |cache_expression_match_type| is unspecified, ignore this entry.
498     if (threat_info.cache_expression_match_type() ==
499             RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED &&
500         !store_old_cache) {
501       continue;
502     }
503     std::string cache_expression = store_old_cache
504                                        ? threat_info.cache_expression()
505                                        : GetCacheExpression(threat_info);
506     // TODO(crbug.com/1033692): For the same cache_expression, threat_info is in
507     // decreasing order of severity. To avoid lower severity threat being
508     // overridden by higher one, only store threat info that is first seen for a
509     // cache expression.
510     if (base::Contains(visited_cache_expressions, cache_expression))
511       continue;
512 
513     GURL hostname = GetHostNameFromCacheExpression(cache_expression);
514     std::unique_ptr<base::DictionaryValue> cache_dictionary =
515         base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
516             hostname, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
517             std::string(), nullptr));
518 
519     if (!cache_dictionary)
520       cache_dictionary = std::make_unique<base::DictionaryValue>();
521 
522     base::Value* verdict_dictionary = cache_dictionary->FindKeyOfType(
523         kRealTimeUrlCacheKey, base::Value::Type::DICTIONARY);
524     if (!verdict_dictionary) {
525       verdict_dictionary = cache_dictionary->SetKey(
526           kRealTimeUrlCacheKey, base::Value(base::Value::Type::DICTIONARY));
527     }
528 
529     std::unique_ptr<base::DictionaryValue> threat_info_entry(
530         CreateDictionaryFromVerdict<RTLookupResponse::ThreatInfo>(
531             threat_info, receive_time, kRealTimeThreatInfoProto));
532     // Increases stored verdict count if we haven't seen this cache expression
533     // before.
534     if (!verdict_dictionary->FindKey(cache_expression)) {
535       stored_verdict_count_real_time_url_check_++;
536     }
537 
538     verdict_dictionary->SetKey(
539         cache_expression,
540         base::Value::FromUniquePtrValue(std::move(threat_info_entry)));
541     visited_cache_expressions.push_back(cache_expression);
542 
543     content_settings_->SetWebsiteSettingDefaultScope(
544         hostname, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
545         std::string(), std::move(cache_dictionary));
546   }
547 }
548 
549 RTLookupResponse::ThreatInfo::VerdictType
GetCachedRealTimeUrlVerdict(const GURL & url,RTLookupResponse::ThreatInfo * out_threat_info)550 VerdictCacheManager::GetCachedRealTimeUrlVerdict(
551     const GURL& url,
552     RTLookupResponse::ThreatInfo* out_threat_info) {
553   return GetMostMatchingCachedVerdictWithHostAndPathMatching<
554       RTLookupResponse::ThreatInfo>(
555       url, kRealTimeUrlCacheKey, content_settings_,
556       ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
557       kRealTimeThreatInfoProto, out_threat_info);
558 }
559 
CleanUpExpiredVerdicts()560 void VerdictCacheManager::CleanUpExpiredVerdicts() {
561   DCHECK(content_settings_);
562 
563   CleanUpExpiredPhishGuardVerdicts();
564   CleanUpExpiredRealTimeUrlCheckVerdicts();
565 }
566 
CleanUpExpiredPhishGuardVerdicts()567 void VerdictCacheManager::CleanUpExpiredPhishGuardVerdicts() {
568   if (GetStoredPhishGuardVerdictCount(
569           LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) <= 0 &&
570       GetStoredPhishGuardVerdictCount(
571           LoginReputationClientRequest::PASSWORD_REUSE_EVENT) <= 0)
572     return;
573 
574   ContentSettingsForOneType password_protection_settings;
575   content_settings_->GetSettingsForOneType(
576       ContentSettingsType::PASSWORD_PROTECTION, std::string(),
577       &password_protection_settings);
578 
579   for (const ContentSettingPatternSource& source :
580        password_protection_settings) {
581     GURL primary_pattern_url = GURL(source.primary_pattern.ToString());
582     // Find all verdicts associated with this origin.
583     std::unique_ptr<base::DictionaryValue> cache_dictionary =
584         base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
585             primary_pattern_url, GURL(),
586             ContentSettingsType::PASSWORD_PROTECTION, std::string(), nullptr));
587     bool has_expired_password_on_focus_entry = RemoveExpiredPhishGuardVerdicts(
588         LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE,
589         cache_dictionary.get());
590     bool has_expired_password_reuse_entry = RemoveExpiredPhishGuardVerdicts(
591         LoginReputationClientRequest::PASSWORD_REUSE_EVENT,
592         cache_dictionary.get());
593 
594     if (cache_dictionary->size() == 0u) {
595       content_settings_->ClearSettingsForOneTypeWithPredicate(
596           ContentSettingsType::PASSWORD_PROTECTION, base::Time(),
597           base::Time::Max(),
598           base::BindRepeating(&OriginMatchPrimaryPattern, primary_pattern_url));
599     } else if (has_expired_password_on_focus_entry ||
600                has_expired_password_reuse_entry) {
601       // Set the website setting of this origin with the updated
602       // |cache_dictionary|.
603       content_settings_->SetWebsiteSettingDefaultScope(
604           primary_pattern_url, GURL(), ContentSettingsType::PASSWORD_PROTECTION,
605           std::string(), std::move(cache_dictionary));
606     }
607   }
608 }
609 
CleanUpExpiredRealTimeUrlCheckVerdicts()610 void VerdictCacheManager::CleanUpExpiredRealTimeUrlCheckVerdicts() {
611   ContentSettingsForOneType safe_browsing_url_check_data_settings;
612   content_settings_->GetSettingsForOneType(
613       ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, std::string(),
614       &safe_browsing_url_check_data_settings);
615 
616   for (const ContentSettingPatternSource& source :
617        safe_browsing_url_check_data_settings) {
618     GURL primary_pattern_url = GURL(source.primary_pattern.ToString());
619     // Find all verdicts associated with this origin.
620     std::unique_ptr<base::DictionaryValue> cache_dictionary =
621         base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
622             primary_pattern_url, GURL(),
623             ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, std::string(),
624             nullptr));
625     bool has_expired_entry =
626         RemoveExpiredRealTimeUrlCheckVerdicts(cache_dictionary.get());
627 
628     if (cache_dictionary->size() == 0u) {
629       content_settings_->ClearSettingsForOneTypeWithPredicate(
630           ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, base::Time(),
631           base::Time::Max(),
632           base::BindRepeating(&OriginMatchPrimaryPattern, primary_pattern_url));
633     } else if (has_expired_entry) {
634       // Set the website setting of this origin with the updated
635       // |cache_dictionary|.
636       content_settings_->SetWebsiteSettingDefaultScope(
637           primary_pattern_url, GURL(),
638           ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, std::string(),
639           std::move(cache_dictionary));
640     }
641   }
642 }
643 
644 // Overridden from history::HistoryServiceObserver.
OnURLsDeleted(history::HistoryService * history_service,const history::DeletionInfo & deletion_info)645 void VerdictCacheManager::OnURLsDeleted(
646     history::HistoryService* history_service,
647     const history::DeletionInfo& deletion_info) {
648   base::PostTask(FROM_HERE, CreateTaskTraits(ThreadID::UI),
649                  base::BindRepeating(
650                      &VerdictCacheManager::RemoveContentSettingsOnURLsDeleted,
651                      GetWeakPtr(), deletion_info.IsAllHistory(),
652                      deletion_info.deleted_rows()));
653 }
654 
655 // Overridden from history::HistoryServiceObserver.
HistoryServiceBeingDeleted(history::HistoryService * history_service)656 void VerdictCacheManager::HistoryServiceBeingDeleted(
657     history::HistoryService* history_service) {
658   history_service_observer_.Remove(history_service);
659 }
660 
RemoveExpiredPhishGuardVerdicts(LoginReputationClientRequest::TriggerType trigger_type,base::DictionaryValue * cache_dictionary)661 bool VerdictCacheManager::RemoveExpiredPhishGuardVerdicts(
662     LoginReputationClientRequest::TriggerType trigger_type,
663     base::DictionaryValue* cache_dictionary) {
664   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
665          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
666   if (!cache_dictionary || cache_dictionary->empty())
667     return false;
668 
669   size_t verdicts_removed = 0;
670   std::vector<std::string> empty_keys;
671   for (auto item : cache_dictionary->DictItems()) {
672     if (trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE &&
673         item.first == std::string(kPasswordOnFocusCacheKey)) {
674       size_t removed_cnt = RemoveExpiredEntries<LoginReputationClientResponse>(
675           &item.second, kVerdictProto);
676       verdicts_removed += removed_cnt;
677       if (stored_verdict_count_password_on_focus_.has_value())
678         stored_verdict_count_password_on_focus_.value() -= removed_cnt;
679     } else {
680       size_t removed_cnt = RemoveExpiredEntries<LoginReputationClientResponse>(
681           &item.second, kVerdictProto);
682       verdicts_removed += removed_cnt;
683       if (stored_verdict_count_password_entry_.has_value())
684         stored_verdict_count_password_entry_.value() -= removed_cnt;
685     }
686 
687     if (item.second.DictSize() == 0U)
688       empty_keys.push_back(item.first);
689   }
690   for (const auto& key : empty_keys)
691     cache_dictionary->RemoveKey(key);
692 
693   return verdicts_removed > 0U;
694 }
695 
RemoveExpiredRealTimeUrlCheckVerdicts(base::DictionaryValue * cache_dictionary)696 bool VerdictCacheManager::RemoveExpiredRealTimeUrlCheckVerdicts(
697     base::DictionaryValue* cache_dictionary) {
698   if (!cache_dictionary || cache_dictionary->empty())
699     return false;
700 
701   size_t verdicts_removed = 0;
702   std::vector<std::string> empty_keys;
703   for (auto item : cache_dictionary->DictItems()) {
704     size_t removed_cnt = RemoveExpiredEntries<RTLookupResponse::ThreatInfo>(
705         &item.second, kRealTimeThreatInfoProto);
706     verdicts_removed += removed_cnt;
707     stored_verdict_count_real_time_url_check_ -= removed_cnt;
708     if (item.second.DictSize() == 0U)
709       empty_keys.push_back(item.first);
710   }
711   for (const auto& key : empty_keys)
712     cache_dictionary->RemoveKey(key);
713 
714   return verdicts_removed > 0U;
715 }
716 
RemoveContentSettingsOnURLsDeleted(bool all_history,const history::URLRows & deleted_rows)717 void VerdictCacheManager::RemoveContentSettingsOnURLsDeleted(
718     bool all_history,
719     const history::URLRows& deleted_rows) {
720   DCHECK(CurrentlyOnThread(ThreadID::UI));
721   DCHECK(content_settings_);
722 
723   if (all_history) {
724     content_settings_->ClearSettingsForOneType(
725         ContentSettingsType::PASSWORD_PROTECTION);
726     stored_verdict_count_password_on_focus_ = 0;
727     stored_verdict_count_password_entry_ = 0;
728     stored_verdict_count_real_time_url_check_ = 0;
729     content_settings_->ClearSettingsForOneType(
730         ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA);
731     return;
732   }
733 
734   // For now, if a URL is deleted from history, we simply remove all the
735   // cached verdicts of the same origin. This is a pretty aggressive deletion.
736   // We might revisit this logic later to decide if we want to only delete the
737   // cached verdict whose cache expression matches this URL.
738   for (const history::URLRow& row : deleted_rows) {
739     if (!row.url().SchemeIsHTTPOrHTTPS())
740       continue;
741 
742     GURL url_key = GetHostNameWithHTTPScheme(row.url());
743     stored_verdict_count_password_on_focus_ =
744         GetStoredPhishGuardVerdictCount(
745             LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) -
746         GetPhishGuardVerdictCountForURL(
747             url_key, LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE);
748     stored_verdict_count_password_entry_ =
749         GetStoredPhishGuardVerdictCount(
750             LoginReputationClientRequest::PASSWORD_REUSE_EVENT) -
751         GetPhishGuardVerdictCountForURL(
752             url_key, LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
753     stored_verdict_count_real_time_url_check_ -=
754         GetRealTimeUrlCheckVerdictCountForURL(url_key);
755     content_settings_->ClearSettingsForOneTypeWithPredicate(
756         ContentSettingsType::PASSWORD_PROTECTION, base::Time(),
757         base::Time::Max(),
758         base::BindRepeating(&OriginMatchPrimaryPattern, url_key));
759     content_settings_->ClearSettingsForOneTypeWithPredicate(
760         ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, base::Time(),
761         base::Time::Max(),
762         base::BindRepeating(&OriginMatchPrimaryPattern, url_key));
763   }
764 }
765 
GetPhishGuardVerdictCountForURL(const GURL & url,LoginReputationClientRequest::TriggerType trigger_type)766 size_t VerdictCacheManager::GetPhishGuardVerdictCountForURL(
767     const GURL& url,
768     LoginReputationClientRequest::TriggerType trigger_type) {
769   DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
770          trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
771   std::unique_ptr<base::DictionaryValue> cache_dictionary =
772       base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
773           url, GURL(), ContentSettingsType::PASSWORD_PROTECTION, std::string(),
774           nullptr));
775   if (!cache_dictionary || cache_dictionary->empty())
776     return 0;
777 
778   int verdict_cnt = 0;
779   if (trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) {
780     base::Value* password_on_focus_dict = nullptr;
781     password_on_focus_dict =
782         cache_dictionary->FindKey(kPasswordOnFocusCacheKey);
783     verdict_cnt +=
784         password_on_focus_dict ? password_on_focus_dict->DictSize() : 0;
785   } else {
786     for (const auto& item : cache_dictionary->DictItems()) {
787       if (item.first == kPasswordOnFocusCacheKey)
788         continue;
789       verdict_cnt += item.second.DictSize();
790     }
791   }
792   return verdict_cnt;
793 }
794 
GetRealTimeUrlCheckVerdictCountForURL(const GURL & url)795 size_t VerdictCacheManager::GetRealTimeUrlCheckVerdictCountForURL(
796     const GURL& url) {
797   std::unique_ptr<base::DictionaryValue> cache_dictionary =
798       base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
799           url, GURL(), ContentSettingsType::PASSWORD_PROTECTION, std::string(),
800           nullptr));
801   if (!cache_dictionary || cache_dictionary->empty())
802     return 0;
803   base::Value* verdict_dictionary =
804       cache_dictionary->FindKey(kRealTimeUrlCacheKey);
805   return verdict_dictionary ? verdict_dictionary->DictSize() : 0;
806 }
807 
808 }  // namespace safe_browsing
809