1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/safe_browsing/core/verdict_cache_manager.h"
6
7 #include "base/base64.h"
8 #include "base/optional.h"
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/string_split.h"
11 #include "base/task/post_task.h"
12 #include "components/history/core/browser/history_service_observer.h"
13 #include "components/safe_browsing/core/common/thread_utils.h"
14 #include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
15 #include "components/safe_browsing/core/proto/csd.pb.h"
16
17 namespace safe_browsing {
18
19 namespace {
20
21 // Keys for storing password protection verdict into a DictionaryValue.
22 const char kCacheCreationTime[] = "cache_creation_time";
23 const char kVerdictProto[] = "verdict_proto";
24 const char kRealTimeThreatInfoProto[] = "rt_threat_info_proto";
25 const char kPasswordOnFocusCacheKey[] = "password_on_focus_cache_key";
26 const char kRealTimeUrlCacheKey[] = "real_time_url_cache_key";
27
28 // A helper class to include all match params. It is used as a centralized
29 // place to determine if the current cache entry should be considered as a
30 // match.
31 struct MatchParams {
MatchParamssafe_browsing::__anon07e4ff170111::MatchParams32 MatchParams()
33 : is_exact_host(false),
34 is_exact_path(false),
35 is_only_exact_match_allowed(true) {}
36
ShouldMatchsafe_browsing::__anon07e4ff170111::MatchParams37 bool ShouldMatch() {
38 return !is_only_exact_match_allowed || (is_exact_host && is_exact_path);
39 }
40 // Indicates whether the current cache entry and the url have the same host.
41 bool is_exact_host;
42 // Indicates whether the current cache entry and the url have the same path.
43 bool is_exact_path;
44 // Indicates whether the current cache entry is only applicable for exact
45 // match.
46 bool is_only_exact_match_allowed;
47 };
48
49 // Given a URL of either http or https scheme, return its http://hostname.
50 // e.g., "https://www.foo.com:80/bar/test.cgi" -> "http://www.foo.com".
GetHostNameWithHTTPScheme(const GURL & url)51 GURL GetHostNameWithHTTPScheme(const GURL& url) {
52 DCHECK(url.SchemeIsHTTPOrHTTPS());
53 std::string result(url::kHttpScheme);
54 result.append(url::kStandardSchemeSeparator).append(url.host());
55 return GURL(result);
56 }
57 // e.g, ("www.foo.com", "/bar/test.cgi") -> "http://www.foo.com/bar/test/cgi"
GetUrlWithHostAndPath(const std::string & host,const std::string & path)58 GURL GetUrlWithHostAndPath(const std::string& host, const std::string& path) {
59 std::string result(url::kHttpScheme);
60 result.append(url::kStandardSchemeSeparator).append(host).append(path);
61 return GURL(result);
62 }
63
64 // e.g, "www.foo.com/bar/test/cgi" -> "http://www.foo.com"
GetHostNameFromCacheExpression(const std::string & cache_expression)65 GURL GetHostNameFromCacheExpression(const std::string& cache_expression) {
66 std::string cache_expression_url(url::kHttpScheme);
67 cache_expression_url.append(url::kStandardSchemeSeparator)
68 .append(cache_expression);
69 return GetHostNameWithHTTPScheme(GURL(cache_expression_url));
70 }
71
72 // Convert a Proto object into a DictionaryValue.
73 template <class T>
CreateDictionaryFromVerdict(const T & verdict,const base::Time & receive_time,const char * proto_name)74 std::unique_ptr<base::DictionaryValue> CreateDictionaryFromVerdict(
75 const T& verdict,
76 const base::Time& receive_time,
77 const char* proto_name) {
78 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
79 std::unique_ptr<base::DictionaryValue> result =
80 std::make_unique<base::DictionaryValue>();
81 result->SetInteger(kCacheCreationTime,
82 static_cast<int>(receive_time.ToDoubleT()));
83 std::string serialized_proto(verdict.SerializeAsString());
84 // Performs a base64 encoding on the serialized proto.
85 base::Base64Encode(serialized_proto, &serialized_proto);
86 result->SetString(proto_name, serialized_proto);
87 return result;
88 }
89
90 // Generate path variants of the given URL.
GeneratePathVariantsWithoutQuery(const GURL & url,std::vector<std::string> * paths)91 void GeneratePathVariantsWithoutQuery(const GURL& url,
92 std::vector<std::string>* paths) {
93 std::string canonical_path;
94 V4ProtocolManagerUtil::CanonicalizeUrl(
95 url, /*canonicalized_hostname=*/nullptr, &canonical_path,
96 /*canonicalized_query=*/nullptr);
97 V4ProtocolManagerUtil::GeneratePathVariantsToCheck(canonical_path,
98 std::string(), paths);
99 }
100
101 template <class T>
ParseVerdictEntry(base::Value * verdict_entry,int * out_verdict_received_time,T * out_verdict,const char * proto_name)102 bool ParseVerdictEntry(base::Value* verdict_entry,
103 int* out_verdict_received_time,
104 T* out_verdict,
105 const char* proto_name) {
106 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
107
108 if (!verdict_entry || !verdict_entry->is_dict() || !out_verdict)
109 return false;
110 base::Value* cache_creation_time_value =
111 verdict_entry->FindKey(kCacheCreationTime);
112
113 if (!cache_creation_time_value || !cache_creation_time_value->is_int())
114 return false;
115 *out_verdict_received_time = cache_creation_time_value->GetInt();
116
117 base::Value* verdict_proto_value = verdict_entry->FindKey(proto_name);
118 if (!verdict_proto_value || !verdict_proto_value->is_string())
119 return false;
120 std::string serialized_proto = verdict_proto_value->GetString();
121
122 return base::Base64Decode(serialized_proto, &serialized_proto) &&
123 out_verdict->ParseFromString(serialized_proto);
124 }
125
126 // Return the path of the cache expression. e.g.:
127 // "www.google.com" -> ""
128 // "www.google.com/abc" -> "/abc"
129 // "foo.com/foo/bar/" -> "/foo/bar/"
GetCacheExpressionPath(const std::string & cache_expression)130 std::string GetCacheExpressionPath(const std::string& cache_expression) {
131 DCHECK(!cache_expression.empty());
132 size_t first_slash_pos = cache_expression.find_first_of("/");
133 if (first_slash_pos == std::string::npos)
134 return "";
135 return cache_expression.substr(first_slash_pos);
136 }
137
138 // Returns the number of path segments in |cache_expression_path|.
139 // For example, return 0 for "/", since there is no path after the leading
140 // slash; return 3 for "/abc/def/gh.html".
GetPathDepth(const std::string & cache_expression_path)141 size_t GetPathDepth(const std::string& cache_expression_path) {
142 return base::SplitString(base::StringPiece(cache_expression_path), "/",
143 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY)
144 .size();
145 }
146
GetHostDepth(const std::string & hostname)147 size_t GetHostDepth(const std::string& hostname) {
148 return base::SplitString(base::StringPiece(hostname), ".",
149 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY)
150 .size();
151 }
152
PathVariantsMatchCacheExpression(const std::vector<std::string> & generated_paths,const std::string & cache_expression_path)153 bool PathVariantsMatchCacheExpression(
154 const std::vector<std::string>& generated_paths,
155 const std::string& cache_expression_path) {
156 return base::Contains(generated_paths, cache_expression_path);
157 }
158
IsCacheExpired(int cache_creation_time,int cache_duration)159 bool IsCacheExpired(int cache_creation_time, int cache_duration) {
160 // Note that we assume client's clock is accurate or almost accurate.
161 return base::Time::Now().ToDoubleT() >
162 static_cast<double>(cache_creation_time + cache_duration);
163 }
164
165 template <class T>
RemoveExpiredEntries(base::Value * verdict_dictionary,const char * proto_name)166 size_t RemoveExpiredEntries(base::Value* verdict_dictionary,
167 const char* proto_name) {
168 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
169 std::vector<std::string> expired_keys;
170 for (const auto& item : verdict_dictionary->DictItems()) {
171 int verdict_received_time;
172 T verdict;
173 if (!ParseVerdictEntry<T>(&item.second, &verdict_received_time, &verdict,
174 proto_name) ||
175 IsCacheExpired(verdict_received_time, verdict.cache_duration_sec())) {
176 expired_keys.push_back(item.first);
177 }
178 }
179
180 for (const std::string& key : expired_keys)
181 verdict_dictionary->RemoveKey(key);
182
183 return expired_keys.size();
184 }
185
186 // Helper function to determine if the given origin matches content settings
187 // map's patterns.
OriginMatchPrimaryPattern(const GURL & origin,const ContentSettingsPattern & primary_pattern,const ContentSettingsPattern & secondary_pattern_unused)188 bool OriginMatchPrimaryPattern(
189 const GURL& origin,
190 const ContentSettingsPattern& primary_pattern,
191 const ContentSettingsPattern& secondary_pattern_unused) {
192 return ContentSettingsPattern::FromURLNoWildcard(origin) == primary_pattern;
193 }
194
GetKeyOfTypeFromTriggerType(LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type)195 std::string GetKeyOfTypeFromTriggerType(
196 LoginReputationClientRequest::TriggerType trigger_type,
197 ReusedPasswordAccountType password_type) {
198 return trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
199 ? kPasswordOnFocusCacheKey
200 : base::NumberToString(
201 static_cast<std::underlying_type_t<
202 ReusedPasswordAccountType::AccountType>>(
203 password_type.account_type()));
204 }
205
206 // If the verdict doesn't have |cache_expression_match_type| field, always
207 // interpret it as exact match only.
208 template <typename T>
IsOnlyExactMatchAllowed(T verdict)209 bool IsOnlyExactMatchAllowed(T verdict) {
210 NOTREACHED();
211 return true;
212 }
213 template <>
IsOnlyExactMatchAllowed(RTLookupResponse::ThreatInfo verdict)214 bool IsOnlyExactMatchAllowed<RTLookupResponse::ThreatInfo>(
215 RTLookupResponse::ThreatInfo verdict) {
216 return verdict.cache_expression_match_type() ==
217 RTLookupResponse::ThreatInfo::EXACT_MATCH;
218 }
219 // Always do fuzzy matching for password protection verdicts.
220 template <>
IsOnlyExactMatchAllowed(LoginReputationClientResponse verdict)221 bool IsOnlyExactMatchAllowed<LoginReputationClientResponse>(
222 LoginReputationClientResponse verdict) {
223 return false;
224 }
225
226 template <typename T>
GetCacheExpression(T verdict)227 std::string GetCacheExpression(T verdict) {
228 NOTREACHED();
229 return "";
230 }
231
232 template <>
GetCacheExpression(RTLookupResponse::ThreatInfo verdict)233 std::string GetCacheExpression<RTLookupResponse::ThreatInfo>(
234 RTLookupResponse::ThreatInfo verdict) {
235 // The old cache doesn't have |cache_expression_using_match_type| field
236 // setup, so it should fallback to |cache_expression| field. This check
237 // should be removed once |cache_expression| field is deprecated in
238 // RTLookupResponse.
239 if (verdict.cache_expression_match_type() ==
240 RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED)
241 return verdict.cache_expression();
242 return verdict.cache_expression_using_match_type();
243 }
244
245 template <>
GetCacheExpression(LoginReputationClientResponse verdict)246 std::string GetCacheExpression<LoginReputationClientResponse>(
247 LoginReputationClientResponse verdict) {
248 return verdict.cache_expression();
249 }
250
251 template <class T>
GetMostMatchingCachedVerdictWithPathMatching(const GURL & url,const std::string & type_key,scoped_refptr<HostContentSettingsMap> content_settings,const ContentSettingsType contents_setting_type,const char * proto_name,T * out_response,MatchParams match_params)252 typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
253 const GURL& url,
254 const std::string& type_key,
255 scoped_refptr<HostContentSettingsMap> content_settings,
256 const ContentSettingsType contents_setting_type,
257 const char* proto_name,
258 T* out_response,
259 MatchParams match_params) {
260 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
261
262 GURL hostname = GetHostNameWithHTTPScheme(url);
263 std::unique_ptr<base::DictionaryValue> cache_dictionary =
264 base::DictionaryValue::From(content_settings->GetWebsiteSetting(
265 hostname, GURL(), contents_setting_type, std::string(), nullptr));
266
267 if (!cache_dictionary || cache_dictionary->empty())
268 return T::VERDICT_TYPE_UNSPECIFIED;
269
270 base::Value* verdict_dictionary =
271 cache_dictionary->FindKeyOfType(type_key, base::Value::Type::DICTIONARY);
272 if (!verdict_dictionary) {
273 return T::VERDICT_TYPE_UNSPECIFIED;
274 }
275
276 std::vector<std::string> paths;
277 GeneratePathVariantsWithoutQuery(url, &paths);
278
279 std::string root_path;
280 V4ProtocolManagerUtil::CanonicalizeUrl(
281 url, /*canonicalized_hostname*/ nullptr, &root_path,
282 /*canonicalized_query*/ nullptr);
283
284 int max_path_depth = -1;
285 typename T::VerdictType most_matching_verdict_type =
286 T::VERDICT_TYPE_UNSPECIFIED;
287 // For all the verdicts of the same origin, we key them by |cache_expression|.
288 // Its corresponding value is a DictionaryValue contains its creation time and
289 // the serialized verdict proto.
290 for (const auto& item : verdict_dictionary->DictItems()) {
291 int verdict_received_time;
292 T verdict;
293 // Ignore any entry that we cannot parse. These invalid entries will be
294 // cleaned up during shutdown.
295 if (!ParseVerdictEntry<T>(&item.second, &verdict_received_time, &verdict,
296 proto_name))
297 continue;
298 // Since verdict content settings are keyed by origin, we only need to
299 // compare the path part of the cache_expression and the given url.
300 std::string cache_expression_path =
301 GetCacheExpressionPath(GetCacheExpression(verdict));
302
303 match_params.is_only_exact_match_allowed = IsOnlyExactMatchAllowed(verdict);
304 match_params.is_exact_path = (root_path == cache_expression_path);
305 // Finds the most specific match.
306 int path_depth = static_cast<int>(GetPathDepth(cache_expression_path));
307 if (path_depth > max_path_depth &&
308 PathVariantsMatchCacheExpression(paths, cache_expression_path) &&
309 match_params.ShouldMatch()) {
310 max_path_depth = path_depth;
311 // If the most matching verdict is expired, set the result to
312 // VERDICT_TYPE_UNSPECIFIED.
313 most_matching_verdict_type =
314 IsCacheExpired(verdict_received_time, verdict.cache_duration_sec())
315 ? T::VERDICT_TYPE_UNSPECIFIED
316 : verdict.verdict_type();
317 out_response->CopyFrom(verdict);
318 }
319 }
320 return most_matching_verdict_type;
321 }
322
323 template <class T>
GetMostMatchingCachedVerdictWithHostAndPathMatching(const GURL & url,const std::string & type_key,scoped_refptr<HostContentSettingsMap> content_settings,const ContentSettingsType contents_setting_type,const char * proto_name,T * out_response)324 typename T::VerdictType GetMostMatchingCachedVerdictWithHostAndPathMatching(
325 const GURL& url,
326 const std::string& type_key,
327 scoped_refptr<HostContentSettingsMap> content_settings,
328 const ContentSettingsType contents_setting_type,
329 const char* proto_name,
330 T* out_response) {
331 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
332 auto most_matching_verdict_type = T::VERDICT_TYPE_UNSPECIFIED;
333 MatchParams match_params;
334
335 std::string root_host, root_path;
336 V4ProtocolManagerUtil::CanonicalizeUrl(url, &root_host, &root_path,
337 /*canonicalized_query*/ nullptr);
338 std::vector<std::string> host_variants;
339 V4ProtocolManagerUtil::GenerateHostVariantsToCheck(root_host, &host_variants);
340 int max_path_depth = -1;
341 for (const auto& host : host_variants) {
342 int depth = static_cast<int>(GetHostDepth(host));
343 GURL url_to_check = GetUrlWithHostAndPath(host, root_path);
344 match_params.is_exact_host = (root_host == host);
345 auto verdict_type = GetMostMatchingCachedVerdictWithPathMatching<T>(
346 url_to_check, type_key, content_settings, contents_setting_type,
347 proto_name, out_response, match_params);
348 if (depth > max_path_depth && verdict_type != T::VERDICT_TYPE_UNSPECIFIED) {
349 max_path_depth = depth;
350 most_matching_verdict_type = verdict_type;
351 }
352 }
353
354 return most_matching_verdict_type;
355 }
356
357 } // namespace
358
VerdictCacheManager(history::HistoryService * history_service,scoped_refptr<HostContentSettingsMap> content_settings)359 VerdictCacheManager::VerdictCacheManager(
360 history::HistoryService* history_service,
361 scoped_refptr<HostContentSettingsMap> content_settings)
362 : stored_verdict_count_password_on_focus_(base::nullopt),
363 stored_verdict_count_password_entry_(base::nullopt),
364 content_settings_(content_settings) {
365 if (history_service)
366 history_service_observer_.Add(history_service);
367 }
368
Shutdown()369 void VerdictCacheManager::Shutdown() {
370 CleanUpExpiredVerdicts();
371 history_service_observer_.RemoveAll();
372 weak_factory_.InvalidateWeakPtrs();
373 }
374
~VerdictCacheManager()375 VerdictCacheManager::~VerdictCacheManager() {}
376
CachePhishGuardVerdict(LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type,const LoginReputationClientResponse & verdict,const base::Time & receive_time)377 void VerdictCacheManager::CachePhishGuardVerdict(
378 LoginReputationClientRequest::TriggerType trigger_type,
379 ReusedPasswordAccountType password_type,
380 const LoginReputationClientResponse& verdict,
381 const base::Time& receive_time) {
382 DCHECK(content_settings_);
383 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
384 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
385
386 GURL hostname = GetHostNameFromCacheExpression(GetCacheExpression(verdict));
387
388 std::unique_ptr<base::DictionaryValue> cache_dictionary =
389 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
390 hostname, GURL(), ContentSettingsType::PASSWORD_PROTECTION,
391 std::string(), nullptr));
392
393 if (!cache_dictionary)
394 cache_dictionary = std::make_unique<base::DictionaryValue>();
395
396 std::unique_ptr<base::DictionaryValue> verdict_entry(
397 CreateDictionaryFromVerdict<LoginReputationClientResponse>(
398 verdict, receive_time, kVerdictProto));
399
400 std::string type_key =
401 GetKeyOfTypeFromTriggerType(trigger_type, password_type);
402 base::Value* verdict_dictionary =
403 cache_dictionary->FindKeyOfType(type_key, base::Value::Type::DICTIONARY);
404 if (!verdict_dictionary) {
405 verdict_dictionary = cache_dictionary->SetKey(
406 type_key, base::Value(base::Value::Type::DICTIONARY));
407 }
408
409 // Increases stored verdict count if we haven't seen this cache expression
410 // before.
411 if (!verdict_dictionary->FindKey(GetCacheExpression(verdict))) {
412 base::Optional<size_t>* stored_verdict_count =
413 trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
414 ? &stored_verdict_count_password_on_focus_
415 : &stored_verdict_count_password_entry_;
416 *stored_verdict_count = GetStoredPhishGuardVerdictCount(trigger_type) + 1;
417 }
418
419 // If same cache_expression is already in this verdict_dictionary, we simply
420 // override it.
421 verdict_dictionary->SetKey(
422 GetCacheExpression(verdict),
423 base::Value::FromUniquePtrValue(std::move(verdict_entry)));
424 content_settings_->SetWebsiteSettingDefaultScope(
425 hostname, GURL(), ContentSettingsType::PASSWORD_PROTECTION, std::string(),
426 std::move(cache_dictionary));
427 }
428
429 LoginReputationClientResponse::VerdictType
GetCachedPhishGuardVerdict(const GURL & url,LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type,LoginReputationClientResponse * out_response)430 VerdictCacheManager::GetCachedPhishGuardVerdict(
431 const GURL& url,
432 LoginReputationClientRequest::TriggerType trigger_type,
433 ReusedPasswordAccountType password_type,
434 LoginReputationClientResponse* out_response) {
435 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
436 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
437
438 std::string type_key =
439 GetKeyOfTypeFromTriggerType(trigger_type, password_type);
440 return GetMostMatchingCachedVerdictWithHostAndPathMatching<
441 LoginReputationClientResponse>(url, type_key, content_settings_,
442 ContentSettingsType::PASSWORD_PROTECTION,
443 kVerdictProto, out_response);
444 }
445
GetStoredPhishGuardVerdictCount(LoginReputationClientRequest::TriggerType trigger_type)446 size_t VerdictCacheManager::GetStoredPhishGuardVerdictCount(
447 LoginReputationClientRequest::TriggerType trigger_type) {
448 DCHECK(content_settings_);
449 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
450 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
451 base::Optional<size_t>* stored_verdict_count =
452 trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
453 ? &stored_verdict_count_password_on_focus_
454 : &stored_verdict_count_password_entry_;
455 // If we have already computed this, return its value.
456 if (stored_verdict_count->has_value())
457 return stored_verdict_count->value();
458
459 ContentSettingsForOneType password_protection_settings;
460 content_settings_->GetSettingsForOneType(
461 ContentSettingsType::PASSWORD_PROTECTION, std::string(),
462 &password_protection_settings);
463 stored_verdict_count_password_on_focus_ = 0;
464 stored_verdict_count_password_entry_ = 0;
465 if (password_protection_settings.empty())
466 return 0;
467
468 for (const ContentSettingPatternSource& source :
469 password_protection_settings) {
470 std::unique_ptr<base::DictionaryValue> cache_dictionary =
471 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
472 GURL(source.primary_pattern.ToString()), GURL(),
473 ContentSettingsType::PASSWORD_PROTECTION, std::string(), nullptr));
474 if (cache_dictionary.get() && !cache_dictionary->empty()) {
475 for (const auto& item : cache_dictionary->DictItems()) {
476 if (item.first == base::StringPiece(kPasswordOnFocusCacheKey)) {
477 stored_verdict_count_password_on_focus_.value() +=
478 item.second.DictSize();
479 } else {
480 stored_verdict_count_password_entry_.value() +=
481 item.second.DictSize();
482 }
483 }
484 }
485 }
486
487 return stored_verdict_count->value();
488 }
489
CacheRealTimeUrlVerdict(const GURL & url,const RTLookupResponse & verdict,const base::Time & receive_time,bool store_old_cache)490 void VerdictCacheManager::CacheRealTimeUrlVerdict(
491 const GURL& url,
492 const RTLookupResponse& verdict,
493 const base::Time& receive_time,
494 bool store_old_cache) {
495 std::vector<std::string> visited_cache_expressions;
496 for (const auto& threat_info : verdict.threat_info()) {
497 // If |cache_expression_match_type| is unspecified, ignore this entry.
498 if (threat_info.cache_expression_match_type() ==
499 RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED &&
500 !store_old_cache) {
501 continue;
502 }
503 std::string cache_expression = store_old_cache
504 ? threat_info.cache_expression()
505 : GetCacheExpression(threat_info);
506 // TODO(crbug.com/1033692): For the same cache_expression, threat_info is in
507 // decreasing order of severity. To avoid lower severity threat being
508 // overridden by higher one, only store threat info that is first seen for a
509 // cache expression.
510 if (base::Contains(visited_cache_expressions, cache_expression))
511 continue;
512
513 GURL hostname = GetHostNameFromCacheExpression(cache_expression);
514 std::unique_ptr<base::DictionaryValue> cache_dictionary =
515 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
516 hostname, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
517 std::string(), nullptr));
518
519 if (!cache_dictionary)
520 cache_dictionary = std::make_unique<base::DictionaryValue>();
521
522 base::Value* verdict_dictionary = cache_dictionary->FindKeyOfType(
523 kRealTimeUrlCacheKey, base::Value::Type::DICTIONARY);
524 if (!verdict_dictionary) {
525 verdict_dictionary = cache_dictionary->SetKey(
526 kRealTimeUrlCacheKey, base::Value(base::Value::Type::DICTIONARY));
527 }
528
529 std::unique_ptr<base::DictionaryValue> threat_info_entry(
530 CreateDictionaryFromVerdict<RTLookupResponse::ThreatInfo>(
531 threat_info, receive_time, kRealTimeThreatInfoProto));
532 // Increases stored verdict count if we haven't seen this cache expression
533 // before.
534 if (!verdict_dictionary->FindKey(cache_expression)) {
535 stored_verdict_count_real_time_url_check_++;
536 }
537
538 verdict_dictionary->SetKey(
539 cache_expression,
540 base::Value::FromUniquePtrValue(std::move(threat_info_entry)));
541 visited_cache_expressions.push_back(cache_expression);
542
543 content_settings_->SetWebsiteSettingDefaultScope(
544 hostname, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
545 std::string(), std::move(cache_dictionary));
546 }
547 }
548
549 RTLookupResponse::ThreatInfo::VerdictType
GetCachedRealTimeUrlVerdict(const GURL & url,RTLookupResponse::ThreatInfo * out_threat_info)550 VerdictCacheManager::GetCachedRealTimeUrlVerdict(
551 const GURL& url,
552 RTLookupResponse::ThreatInfo* out_threat_info) {
553 return GetMostMatchingCachedVerdictWithHostAndPathMatching<
554 RTLookupResponse::ThreatInfo>(
555 url, kRealTimeUrlCacheKey, content_settings_,
556 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
557 kRealTimeThreatInfoProto, out_threat_info);
558 }
559
CleanUpExpiredVerdicts()560 void VerdictCacheManager::CleanUpExpiredVerdicts() {
561 DCHECK(content_settings_);
562
563 CleanUpExpiredPhishGuardVerdicts();
564 CleanUpExpiredRealTimeUrlCheckVerdicts();
565 }
566
CleanUpExpiredPhishGuardVerdicts()567 void VerdictCacheManager::CleanUpExpiredPhishGuardVerdicts() {
568 if (GetStoredPhishGuardVerdictCount(
569 LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) <= 0 &&
570 GetStoredPhishGuardVerdictCount(
571 LoginReputationClientRequest::PASSWORD_REUSE_EVENT) <= 0)
572 return;
573
574 ContentSettingsForOneType password_protection_settings;
575 content_settings_->GetSettingsForOneType(
576 ContentSettingsType::PASSWORD_PROTECTION, std::string(),
577 &password_protection_settings);
578
579 for (const ContentSettingPatternSource& source :
580 password_protection_settings) {
581 GURL primary_pattern_url = GURL(source.primary_pattern.ToString());
582 // Find all verdicts associated with this origin.
583 std::unique_ptr<base::DictionaryValue> cache_dictionary =
584 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
585 primary_pattern_url, GURL(),
586 ContentSettingsType::PASSWORD_PROTECTION, std::string(), nullptr));
587 bool has_expired_password_on_focus_entry = RemoveExpiredPhishGuardVerdicts(
588 LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE,
589 cache_dictionary.get());
590 bool has_expired_password_reuse_entry = RemoveExpiredPhishGuardVerdicts(
591 LoginReputationClientRequest::PASSWORD_REUSE_EVENT,
592 cache_dictionary.get());
593
594 if (cache_dictionary->size() == 0u) {
595 content_settings_->ClearSettingsForOneTypeWithPredicate(
596 ContentSettingsType::PASSWORD_PROTECTION, base::Time(),
597 base::Time::Max(),
598 base::BindRepeating(&OriginMatchPrimaryPattern, primary_pattern_url));
599 } else if (has_expired_password_on_focus_entry ||
600 has_expired_password_reuse_entry) {
601 // Set the website setting of this origin with the updated
602 // |cache_dictionary|.
603 content_settings_->SetWebsiteSettingDefaultScope(
604 primary_pattern_url, GURL(), ContentSettingsType::PASSWORD_PROTECTION,
605 std::string(), std::move(cache_dictionary));
606 }
607 }
608 }
609
CleanUpExpiredRealTimeUrlCheckVerdicts()610 void VerdictCacheManager::CleanUpExpiredRealTimeUrlCheckVerdicts() {
611 ContentSettingsForOneType safe_browsing_url_check_data_settings;
612 content_settings_->GetSettingsForOneType(
613 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, std::string(),
614 &safe_browsing_url_check_data_settings);
615
616 for (const ContentSettingPatternSource& source :
617 safe_browsing_url_check_data_settings) {
618 GURL primary_pattern_url = GURL(source.primary_pattern.ToString());
619 // Find all verdicts associated with this origin.
620 std::unique_ptr<base::DictionaryValue> cache_dictionary =
621 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
622 primary_pattern_url, GURL(),
623 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, std::string(),
624 nullptr));
625 bool has_expired_entry =
626 RemoveExpiredRealTimeUrlCheckVerdicts(cache_dictionary.get());
627
628 if (cache_dictionary->size() == 0u) {
629 content_settings_->ClearSettingsForOneTypeWithPredicate(
630 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, base::Time(),
631 base::Time::Max(),
632 base::BindRepeating(&OriginMatchPrimaryPattern, primary_pattern_url));
633 } else if (has_expired_entry) {
634 // Set the website setting of this origin with the updated
635 // |cache_dictionary|.
636 content_settings_->SetWebsiteSettingDefaultScope(
637 primary_pattern_url, GURL(),
638 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, std::string(),
639 std::move(cache_dictionary));
640 }
641 }
642 }
643
644 // Overridden from history::HistoryServiceObserver.
OnURLsDeleted(history::HistoryService * history_service,const history::DeletionInfo & deletion_info)645 void VerdictCacheManager::OnURLsDeleted(
646 history::HistoryService* history_service,
647 const history::DeletionInfo& deletion_info) {
648 base::PostTask(FROM_HERE, CreateTaskTraits(ThreadID::UI),
649 base::BindRepeating(
650 &VerdictCacheManager::RemoveContentSettingsOnURLsDeleted,
651 GetWeakPtr(), deletion_info.IsAllHistory(),
652 deletion_info.deleted_rows()));
653 }
654
655 // Overridden from history::HistoryServiceObserver.
HistoryServiceBeingDeleted(history::HistoryService * history_service)656 void VerdictCacheManager::HistoryServiceBeingDeleted(
657 history::HistoryService* history_service) {
658 history_service_observer_.Remove(history_service);
659 }
660
RemoveExpiredPhishGuardVerdicts(LoginReputationClientRequest::TriggerType trigger_type,base::DictionaryValue * cache_dictionary)661 bool VerdictCacheManager::RemoveExpiredPhishGuardVerdicts(
662 LoginReputationClientRequest::TriggerType trigger_type,
663 base::DictionaryValue* cache_dictionary) {
664 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
665 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
666 if (!cache_dictionary || cache_dictionary->empty())
667 return false;
668
669 size_t verdicts_removed = 0;
670 std::vector<std::string> empty_keys;
671 for (auto item : cache_dictionary->DictItems()) {
672 if (trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE &&
673 item.first == std::string(kPasswordOnFocusCacheKey)) {
674 size_t removed_cnt = RemoveExpiredEntries<LoginReputationClientResponse>(
675 &item.second, kVerdictProto);
676 verdicts_removed += removed_cnt;
677 if (stored_verdict_count_password_on_focus_.has_value())
678 stored_verdict_count_password_on_focus_.value() -= removed_cnt;
679 } else {
680 size_t removed_cnt = RemoveExpiredEntries<LoginReputationClientResponse>(
681 &item.second, kVerdictProto);
682 verdicts_removed += removed_cnt;
683 if (stored_verdict_count_password_entry_.has_value())
684 stored_verdict_count_password_entry_.value() -= removed_cnt;
685 }
686
687 if (item.second.DictSize() == 0U)
688 empty_keys.push_back(item.first);
689 }
690 for (const auto& key : empty_keys)
691 cache_dictionary->RemoveKey(key);
692
693 return verdicts_removed > 0U;
694 }
695
RemoveExpiredRealTimeUrlCheckVerdicts(base::DictionaryValue * cache_dictionary)696 bool VerdictCacheManager::RemoveExpiredRealTimeUrlCheckVerdicts(
697 base::DictionaryValue* cache_dictionary) {
698 if (!cache_dictionary || cache_dictionary->empty())
699 return false;
700
701 size_t verdicts_removed = 0;
702 std::vector<std::string> empty_keys;
703 for (auto item : cache_dictionary->DictItems()) {
704 size_t removed_cnt = RemoveExpiredEntries<RTLookupResponse::ThreatInfo>(
705 &item.second, kRealTimeThreatInfoProto);
706 verdicts_removed += removed_cnt;
707 stored_verdict_count_real_time_url_check_ -= removed_cnt;
708 if (item.second.DictSize() == 0U)
709 empty_keys.push_back(item.first);
710 }
711 for (const auto& key : empty_keys)
712 cache_dictionary->RemoveKey(key);
713
714 return verdicts_removed > 0U;
715 }
716
RemoveContentSettingsOnURLsDeleted(bool all_history,const history::URLRows & deleted_rows)717 void VerdictCacheManager::RemoveContentSettingsOnURLsDeleted(
718 bool all_history,
719 const history::URLRows& deleted_rows) {
720 DCHECK(CurrentlyOnThread(ThreadID::UI));
721 DCHECK(content_settings_);
722
723 if (all_history) {
724 content_settings_->ClearSettingsForOneType(
725 ContentSettingsType::PASSWORD_PROTECTION);
726 stored_verdict_count_password_on_focus_ = 0;
727 stored_verdict_count_password_entry_ = 0;
728 stored_verdict_count_real_time_url_check_ = 0;
729 content_settings_->ClearSettingsForOneType(
730 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA);
731 return;
732 }
733
734 // For now, if a URL is deleted from history, we simply remove all the
735 // cached verdicts of the same origin. This is a pretty aggressive deletion.
736 // We might revisit this logic later to decide if we want to only delete the
737 // cached verdict whose cache expression matches this URL.
738 for (const history::URLRow& row : deleted_rows) {
739 if (!row.url().SchemeIsHTTPOrHTTPS())
740 continue;
741
742 GURL url_key = GetHostNameWithHTTPScheme(row.url());
743 stored_verdict_count_password_on_focus_ =
744 GetStoredPhishGuardVerdictCount(
745 LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) -
746 GetPhishGuardVerdictCountForURL(
747 url_key, LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE);
748 stored_verdict_count_password_entry_ =
749 GetStoredPhishGuardVerdictCount(
750 LoginReputationClientRequest::PASSWORD_REUSE_EVENT) -
751 GetPhishGuardVerdictCountForURL(
752 url_key, LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
753 stored_verdict_count_real_time_url_check_ -=
754 GetRealTimeUrlCheckVerdictCountForURL(url_key);
755 content_settings_->ClearSettingsForOneTypeWithPredicate(
756 ContentSettingsType::PASSWORD_PROTECTION, base::Time(),
757 base::Time::Max(),
758 base::BindRepeating(&OriginMatchPrimaryPattern, url_key));
759 content_settings_->ClearSettingsForOneTypeWithPredicate(
760 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, base::Time(),
761 base::Time::Max(),
762 base::BindRepeating(&OriginMatchPrimaryPattern, url_key));
763 }
764 }
765
GetPhishGuardVerdictCountForURL(const GURL & url,LoginReputationClientRequest::TriggerType trigger_type)766 size_t VerdictCacheManager::GetPhishGuardVerdictCountForURL(
767 const GURL& url,
768 LoginReputationClientRequest::TriggerType trigger_type) {
769 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
770 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
771 std::unique_ptr<base::DictionaryValue> cache_dictionary =
772 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
773 url, GURL(), ContentSettingsType::PASSWORD_PROTECTION, std::string(),
774 nullptr));
775 if (!cache_dictionary || cache_dictionary->empty())
776 return 0;
777
778 int verdict_cnt = 0;
779 if (trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) {
780 base::Value* password_on_focus_dict = nullptr;
781 password_on_focus_dict =
782 cache_dictionary->FindKey(kPasswordOnFocusCacheKey);
783 verdict_cnt +=
784 password_on_focus_dict ? password_on_focus_dict->DictSize() : 0;
785 } else {
786 for (const auto& item : cache_dictionary->DictItems()) {
787 if (item.first == kPasswordOnFocusCacheKey)
788 continue;
789 verdict_cnt += item.second.DictSize();
790 }
791 }
792 return verdict_cnt;
793 }
794
GetRealTimeUrlCheckVerdictCountForURL(const GURL & url)795 size_t VerdictCacheManager::GetRealTimeUrlCheckVerdictCountForURL(
796 const GURL& url) {
797 std::unique_ptr<base::DictionaryValue> cache_dictionary =
798 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
799 url, GURL(), ContentSettingsType::PASSWORD_PROTECTION, std::string(),
800 nullptr));
801 if (!cache_dictionary || cache_dictionary->empty())
802 return 0;
803 base::Value* verdict_dictionary =
804 cache_dictionary->FindKey(kRealTimeUrlCacheKey);
805 return verdict_dictionary ? verdict_dictionary->DictSize() : 0;
806 }
807
808 } // namespace safe_browsing
809