1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/safe_browsing/core/verdict_cache_manager.h"
6
7 #include "base/base64.h"
8 #include "base/command_line.h"
9 #include "base/metrics/histogram_functions.h"
10 #include "base/metrics/histogram_macros.h"
11 #include "base/optional.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "base/task/post_task.h"
15 #include "base/time/time.h"
16 #include "components/history/core/browser/history_service_observer.h"
17 #include "components/safe_browsing/core/common/thread_utils.h"
18 #include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
19 #include "components/safe_browsing/core/proto/csd.pb.h"
20
21 namespace safe_browsing {
22
23 namespace {
24
25 // Keys for storing password protection verdict into a DictionaryValue.
26 const char kCacheCreationTime[] = "cache_creation_time";
27 const char kVerdictProto[] = "verdict_proto";
28 const char kRealTimeThreatInfoProto[] = "rt_threat_info_proto";
29 const char kPasswordOnFocusCacheKey[] = "password_on_focus_cache_key";
30 const char kRealTimeUrlCacheKey[] = "real_time_url_cache_key";
31
32 // Command-line flag for caching an artificial unsafe verdict.
33 const char kUnsafeUrlFlag[] = "mark_as_real_time_phishing";
34
35 // The maximum number of entries to be removed in a single cleanup. Removing too
36 // many entries all at once could cause jank.
37 const int kMaxRemovedEntriesCount = 1000;
38
39 // The interval between the construction and the first cleanup is performed.
40 const int kCleanUpIntervalInitSecond = 120;
41
42 // The interval between every cleanup task.
43 const int kCleanUpIntervalSecond = 1800;
44
45 // A helper class to include all match params. It is used as a centralized
46 // place to determine if the current cache entry should be considered as a
47 // match.
48 struct MatchParams {
MatchParamssafe_browsing::__anon89435efc0111::MatchParams49 MatchParams()
50 : is_exact_host(false),
51 is_exact_path(false),
52 is_only_exact_match_allowed(true) {}
53
ShouldMatchsafe_browsing::__anon89435efc0111::MatchParams54 bool ShouldMatch() {
55 return !is_only_exact_match_allowed || (is_exact_host && is_exact_path);
56 }
57 // Indicates whether the current cache entry and the url have the same host.
58 bool is_exact_host;
59 // Indicates whether the current cache entry and the url have the same path.
60 bool is_exact_path;
61 // Indicates whether the current cache entry is only applicable for exact
62 // match.
63 bool is_only_exact_match_allowed;
64 };
65
66 // Given a URL of either http or https scheme, return its http://hostname.
67 // e.g., "https://www.foo.com:80/bar/test.cgi" -> "http://www.foo.com".
GetHostNameWithHTTPScheme(const GURL & url)68 GURL GetHostNameWithHTTPScheme(const GURL& url) {
69 DCHECK(url.SchemeIsHTTPOrHTTPS());
70 std::string result(url::kHttpScheme);
71 result.append(url::kStandardSchemeSeparator).append(url.host());
72 return GURL(result);
73 }
74 // e.g, ("www.foo.com", "/bar/test.cgi") -> "http://www.foo.com/bar/test/cgi"
GetUrlWithHostAndPath(const std::string & host,const std::string & path)75 GURL GetUrlWithHostAndPath(const std::string& host, const std::string& path) {
76 std::string result(url::kHttpScheme);
77 result.append(url::kStandardSchemeSeparator).append(host).append(path);
78 return GURL(result);
79 }
80
81 // e.g, "www.foo.com/bar/test/cgi" -> "http://www.foo.com"
GetHostNameFromCacheExpression(const std::string & cache_expression)82 GURL GetHostNameFromCacheExpression(const std::string& cache_expression) {
83 std::string cache_expression_url(url::kHttpScheme);
84 cache_expression_url.append(url::kStandardSchemeSeparator)
85 .append(cache_expression);
86 return GetHostNameWithHTTPScheme(GURL(cache_expression_url));
87 }
88
89 // Convert a Proto object into a DictionaryValue.
90 template <class T>
CreateDictionaryFromVerdict(const T & verdict,const base::Time & receive_time,const char * proto_name)91 std::unique_ptr<base::DictionaryValue> CreateDictionaryFromVerdict(
92 const T& verdict,
93 const base::Time& receive_time,
94 const char* proto_name) {
95 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
96 std::unique_ptr<base::DictionaryValue> result =
97 std::make_unique<base::DictionaryValue>();
98 result->SetInteger(kCacheCreationTime,
99 static_cast<int>(receive_time.ToDoubleT()));
100 std::string serialized_proto(verdict.SerializeAsString());
101 // Performs a base64 encoding on the serialized proto.
102 base::Base64Encode(serialized_proto, &serialized_proto);
103 result->SetString(proto_name, serialized_proto);
104 return result;
105 }
106
107 // Generate path variants of the given URL.
GeneratePathVariantsWithoutQuery(const GURL & url,std::vector<std::string> * paths)108 void GeneratePathVariantsWithoutQuery(const GURL& url,
109 std::vector<std::string>* paths) {
110 std::string canonical_path;
111 V4ProtocolManagerUtil::CanonicalizeUrl(
112 url, /*canonicalized_hostname=*/nullptr, &canonical_path,
113 /*canonicalized_query=*/nullptr);
114 V4ProtocolManagerUtil::GeneratePathVariantsToCheck(canonical_path,
115 std::string(), paths);
116 }
117
118 template <class T>
ParseVerdictEntry(base::Value * verdict_entry,int * out_verdict_received_time,T * out_verdict,const char * proto_name)119 bool ParseVerdictEntry(base::Value* verdict_entry,
120 int* out_verdict_received_time,
121 T* out_verdict,
122 const char* proto_name) {
123 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
124
125 if (!verdict_entry || !verdict_entry->is_dict() || !out_verdict)
126 return false;
127 base::Value* cache_creation_time_value =
128 verdict_entry->FindKey(kCacheCreationTime);
129
130 if (!cache_creation_time_value || !cache_creation_time_value->is_int())
131 return false;
132 *out_verdict_received_time = cache_creation_time_value->GetInt();
133
134 base::Value* verdict_proto_value = verdict_entry->FindKey(proto_name);
135 if (!verdict_proto_value || !verdict_proto_value->is_string())
136 return false;
137 std::string serialized_proto = verdict_proto_value->GetString();
138
139 return base::Base64Decode(serialized_proto, &serialized_proto) &&
140 out_verdict->ParseFromString(serialized_proto);
141 }
142
143 // Return the path of the cache expression. e.g.:
144 // "www.google.com" -> ""
145 // "www.google.com/abc" -> "/abc"
146 // "foo.com/foo/bar/" -> "/foo/bar/"
GetCacheExpressionPath(const std::string & cache_expression)147 std::string GetCacheExpressionPath(const std::string& cache_expression) {
148 DCHECK(!cache_expression.empty());
149 size_t first_slash_pos = cache_expression.find_first_of("/");
150 if (first_slash_pos == std::string::npos)
151 return "";
152 return cache_expression.substr(first_slash_pos);
153 }
154
155 // Returns the number of path segments in |cache_expression_path|.
156 // For example, return 0 for "/", since there is no path after the leading
157 // slash; return 3 for "/abc/def/gh.html".
GetPathDepth(const std::string & cache_expression_path)158 size_t GetPathDepth(const std::string& cache_expression_path) {
159 return base::SplitString(base::StringPiece(cache_expression_path), "/",
160 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY)
161 .size();
162 }
163
GetHostDepth(const std::string & hostname)164 size_t GetHostDepth(const std::string& hostname) {
165 return base::SplitString(base::StringPiece(hostname), ".",
166 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY)
167 .size();
168 }
169
PathVariantsMatchCacheExpression(const std::vector<std::string> & generated_paths,const std::string & cache_expression_path)170 bool PathVariantsMatchCacheExpression(
171 const std::vector<std::string>& generated_paths,
172 const std::string& cache_expression_path) {
173 return base::Contains(generated_paths, cache_expression_path);
174 }
175
IsCacheExpired(int cache_creation_time,int cache_duration)176 bool IsCacheExpired(int cache_creation_time, int cache_duration) {
177 // Note that we assume client's clock is accurate or almost accurate.
178 return base::Time::Now().ToDoubleT() >
179 static_cast<double>(cache_creation_time + cache_duration);
180 }
181
182 template <class T>
RemoveExpiredEntries(base::Value * verdict_dictionary,const char * proto_name)183 size_t RemoveExpiredEntries(base::Value* verdict_dictionary,
184 const char* proto_name) {
185 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
186 std::vector<std::string> expired_keys;
187 for (const auto& item : verdict_dictionary->DictItems()) {
188 int verdict_received_time;
189 T verdict;
190 if (!ParseVerdictEntry<T>(&item.second, &verdict_received_time, &verdict,
191 proto_name) ||
192 IsCacheExpired(verdict_received_time, verdict.cache_duration_sec())) {
193 expired_keys.push_back(item.first);
194 }
195 }
196
197 for (const std::string& key : expired_keys)
198 verdict_dictionary->RemoveKey(key);
199
200 return expired_keys.size();
201 }
202
GetKeyOfTypeFromTriggerType(LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type)203 std::string GetKeyOfTypeFromTriggerType(
204 LoginReputationClientRequest::TriggerType trigger_type,
205 ReusedPasswordAccountType password_type) {
206 return trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
207 ? kPasswordOnFocusCacheKey
208 : base::NumberToString(
209 static_cast<std::underlying_type_t<
210 ReusedPasswordAccountType::AccountType>>(
211 password_type.account_type()));
212 }
213
214 // If the verdict doesn't have |cache_expression_match_type| field, always
215 // interpret it as exact match only.
216 template <typename T>
IsOnlyExactMatchAllowed(T verdict)217 bool IsOnlyExactMatchAllowed(T verdict) {
218 NOTREACHED();
219 return true;
220 }
221 template <>
IsOnlyExactMatchAllowed(RTLookupResponse::ThreatInfo verdict)222 bool IsOnlyExactMatchAllowed<RTLookupResponse::ThreatInfo>(
223 RTLookupResponse::ThreatInfo verdict) {
224 return verdict.cache_expression_match_type() ==
225 RTLookupResponse::ThreatInfo::EXACT_MATCH;
226 }
227 // Always do fuzzy matching for password protection verdicts.
228 template <>
IsOnlyExactMatchAllowed(LoginReputationClientResponse verdict)229 bool IsOnlyExactMatchAllowed<LoginReputationClientResponse>(
230 LoginReputationClientResponse verdict) {
231 return false;
232 }
233
234 template <typename T>
GetCacheExpression(T verdict)235 std::string GetCacheExpression(T verdict) {
236 NOTREACHED();
237 return "";
238 }
239
240 template <>
GetCacheExpression(RTLookupResponse::ThreatInfo verdict)241 std::string GetCacheExpression<RTLookupResponse::ThreatInfo>(
242 RTLookupResponse::ThreatInfo verdict) {
243 // The old cache doesn't have |cache_expression_using_match_type| field
244 // setup, so it should fallback to |cache_expression| field. This check
245 // should be removed once |cache_expression| field is deprecated in
246 // RTLookupResponse.
247 if (verdict.cache_expression_match_type() ==
248 RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED)
249 return verdict.cache_expression();
250 return verdict.cache_expression_using_match_type();
251 }
252
253 template <>
GetCacheExpression(LoginReputationClientResponse verdict)254 std::string GetCacheExpression<LoginReputationClientResponse>(
255 LoginReputationClientResponse verdict) {
256 return verdict.cache_expression();
257 }
258
259 template <class T>
GetMostMatchingCachedVerdictWithPathMatching(const GURL & url,const std::string & type_key,scoped_refptr<HostContentSettingsMap> content_settings,const ContentSettingsType contents_setting_type,const char * proto_name,T * out_response,MatchParams match_params)260 typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
261 const GURL& url,
262 const std::string& type_key,
263 scoped_refptr<HostContentSettingsMap> content_settings,
264 const ContentSettingsType contents_setting_type,
265 const char* proto_name,
266 T* out_response,
267 MatchParams match_params) {
268 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
269
270 GURL hostname = GetHostNameWithHTTPScheme(url);
271 std::unique_ptr<base::DictionaryValue> cache_dictionary =
272 base::DictionaryValue::From(content_settings->GetWebsiteSetting(
273 hostname, GURL(), contents_setting_type, nullptr));
274
275 if (!cache_dictionary || cache_dictionary->empty())
276 return T::VERDICT_TYPE_UNSPECIFIED;
277
278 base::Value* verdict_dictionary =
279 cache_dictionary->FindKeyOfType(type_key, base::Value::Type::DICTIONARY);
280 if (!verdict_dictionary) {
281 return T::VERDICT_TYPE_UNSPECIFIED;
282 }
283
284 std::vector<std::string> paths;
285 GeneratePathVariantsWithoutQuery(url, &paths);
286
287 std::string root_path;
288 V4ProtocolManagerUtil::CanonicalizeUrl(
289 url, /*canonicalized_hostname*/ nullptr, &root_path,
290 /*canonicalized_query*/ nullptr);
291
292 int max_path_depth = -1;
293 typename T::VerdictType most_matching_verdict_type =
294 T::VERDICT_TYPE_UNSPECIFIED;
295 // For all the verdicts of the same origin, we key them by |cache_expression|.
296 // Its corresponding value is a DictionaryValue contains its creation time and
297 // the serialized verdict proto.
298 for (const auto& item : verdict_dictionary->DictItems()) {
299 int verdict_received_time;
300 T verdict;
301 // Ignore any entry that we cannot parse. These invalid entries will be
302 // cleaned up during shutdown.
303 if (!ParseVerdictEntry<T>(&item.second, &verdict_received_time, &verdict,
304 proto_name))
305 continue;
306 // Since verdict content settings are keyed by origin, we only need to
307 // compare the path part of the cache_expression and the given url.
308 std::string cache_expression_path =
309 GetCacheExpressionPath(GetCacheExpression(verdict));
310
311 match_params.is_only_exact_match_allowed = IsOnlyExactMatchAllowed(verdict);
312 match_params.is_exact_path = (root_path == cache_expression_path);
313 // Finds the most specific match.
314 int path_depth = static_cast<int>(GetPathDepth(cache_expression_path));
315 if (path_depth > max_path_depth &&
316 PathVariantsMatchCacheExpression(paths, cache_expression_path) &&
317 match_params.ShouldMatch()) {
318 max_path_depth = path_depth;
319 // If the most matching verdict is expired, set the result to
320 // VERDICT_TYPE_UNSPECIFIED.
321 most_matching_verdict_type =
322 IsCacheExpired(verdict_received_time, verdict.cache_duration_sec())
323 ? T::VERDICT_TYPE_UNSPECIFIED
324 : verdict.verdict_type();
325 out_response->CopyFrom(verdict);
326 }
327 }
328 return most_matching_verdict_type;
329 }
330
331 template <class T>
GetMostMatchingCachedVerdictWithHostAndPathMatching(const GURL & url,const std::string & type_key,scoped_refptr<HostContentSettingsMap> content_settings,const ContentSettingsType contents_setting_type,const char * proto_name,T * out_response)332 typename T::VerdictType GetMostMatchingCachedVerdictWithHostAndPathMatching(
333 const GURL& url,
334 const std::string& type_key,
335 scoped_refptr<HostContentSettingsMap> content_settings,
336 const ContentSettingsType contents_setting_type,
337 const char* proto_name,
338 T* out_response) {
339 DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
340 auto most_matching_verdict_type = T::VERDICT_TYPE_UNSPECIFIED;
341 MatchParams match_params;
342
343 std::string root_host, root_path;
344 V4ProtocolManagerUtil::CanonicalizeUrl(url, &root_host, &root_path,
345 /*canonicalized_query*/ nullptr);
346 std::vector<std::string> host_variants;
347 V4ProtocolManagerUtil::GenerateHostVariantsToCheck(root_host, &host_variants);
348 int max_path_depth = -1;
349 for (const auto& host : host_variants) {
350 int depth = static_cast<int>(GetHostDepth(host));
351 GURL url_to_check = GetUrlWithHostAndPath(host, root_path);
352 match_params.is_exact_host = (root_host == host);
353 auto verdict_type = GetMostMatchingCachedVerdictWithPathMatching<T>(
354 url_to_check, type_key, content_settings, contents_setting_type,
355 proto_name, out_response, match_params);
356 if (depth > max_path_depth && verdict_type != T::VERDICT_TYPE_UNSPECIFIED) {
357 max_path_depth = depth;
358 most_matching_verdict_type = verdict_type;
359 }
360 }
361
362 return most_matching_verdict_type;
363 }
364
365 } // namespace
366
VerdictCacheManager(history::HistoryService * history_service,scoped_refptr<HostContentSettingsMap> content_settings)367 VerdictCacheManager::VerdictCacheManager(
368 history::HistoryService* history_service,
369 scoped_refptr<HostContentSettingsMap> content_settings)
370 : stored_verdict_count_password_on_focus_(base::nullopt),
371 stored_verdict_count_password_entry_(base::nullopt),
372 stored_verdict_count_real_time_url_check_(base::nullopt),
373 content_settings_(content_settings) {
374 if (history_service)
375 history_service_observation_.Observe(history_service);
376 if (!content_settings->IsOffTheRecord()) {
377 ScheduleNextCleanUpAfterInterval(
378 base::TimeDelta::FromSeconds(kCleanUpIntervalInitSecond));
379 }
380 CacheArtificialVerdict();
381 }
382
Shutdown()383 void VerdictCacheManager::Shutdown() {
384 CleanUpExpiredVerdicts();
385 if (history_service_observation_.IsObserving())
386 history_service_observation_.RemoveObservation();
387 weak_factory_.InvalidateWeakPtrs();
388 }
389
~VerdictCacheManager()390 VerdictCacheManager::~VerdictCacheManager() {}
391
CachePhishGuardVerdict(LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type,const LoginReputationClientResponse & verdict,const base::Time & receive_time)392 void VerdictCacheManager::CachePhishGuardVerdict(
393 LoginReputationClientRequest::TriggerType trigger_type,
394 ReusedPasswordAccountType password_type,
395 const LoginReputationClientResponse& verdict,
396 const base::Time& receive_time) {
397 DCHECK(content_settings_);
398 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
399 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
400
401 GURL hostname = GetHostNameFromCacheExpression(GetCacheExpression(verdict));
402
403 std::unique_ptr<base::DictionaryValue> cache_dictionary =
404 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
405 hostname, GURL(), ContentSettingsType::PASSWORD_PROTECTION, nullptr));
406
407 if (!cache_dictionary)
408 cache_dictionary = std::make_unique<base::DictionaryValue>();
409
410 std::unique_ptr<base::DictionaryValue> verdict_entry(
411 CreateDictionaryFromVerdict<LoginReputationClientResponse>(
412 verdict, receive_time, kVerdictProto));
413
414 std::string type_key =
415 GetKeyOfTypeFromTriggerType(trigger_type, password_type);
416 base::Value* verdict_dictionary =
417 cache_dictionary->FindKeyOfType(type_key, base::Value::Type::DICTIONARY);
418 if (!verdict_dictionary) {
419 verdict_dictionary = cache_dictionary->SetKey(
420 type_key, base::Value(base::Value::Type::DICTIONARY));
421 }
422
423 // Increases stored verdict count if we haven't seen this cache expression
424 // before.
425 if (!verdict_dictionary->FindKey(GetCacheExpression(verdict))) {
426 base::Optional<size_t>* stored_verdict_count =
427 trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
428 ? &stored_verdict_count_password_on_focus_
429 : &stored_verdict_count_password_entry_;
430 *stored_verdict_count = GetStoredPhishGuardVerdictCount(trigger_type) + 1;
431 }
432
433 // If same cache_expression is already in this verdict_dictionary, we simply
434 // override it.
435 verdict_dictionary->SetKey(
436 GetCacheExpression(verdict),
437 base::Value::FromUniquePtrValue(std::move(verdict_entry)));
438 content_settings_->SetWebsiteSettingDefaultScope(
439 hostname, GURL(), ContentSettingsType::PASSWORD_PROTECTION,
440 std::move(cache_dictionary));
441 }
442
443 LoginReputationClientResponse::VerdictType
GetCachedPhishGuardVerdict(const GURL & url,LoginReputationClientRequest::TriggerType trigger_type,ReusedPasswordAccountType password_type,LoginReputationClientResponse * out_response)444 VerdictCacheManager::GetCachedPhishGuardVerdict(
445 const GURL& url,
446 LoginReputationClientRequest::TriggerType trigger_type,
447 ReusedPasswordAccountType password_type,
448 LoginReputationClientResponse* out_response) {
449 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
450 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
451
452 std::string type_key =
453 GetKeyOfTypeFromTriggerType(trigger_type, password_type);
454 return GetMostMatchingCachedVerdictWithHostAndPathMatching<
455 LoginReputationClientResponse>(url, type_key, content_settings_,
456 ContentSettingsType::PASSWORD_PROTECTION,
457 kVerdictProto, out_response);
458 }
459
GetStoredPhishGuardVerdictCount(LoginReputationClientRequest::TriggerType trigger_type)460 size_t VerdictCacheManager::GetStoredPhishGuardVerdictCount(
461 LoginReputationClientRequest::TriggerType trigger_type) {
462 DCHECK(content_settings_);
463 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
464 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
465 base::Optional<size_t>* stored_verdict_count =
466 trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE
467 ? &stored_verdict_count_password_on_focus_
468 : &stored_verdict_count_password_entry_;
469 // If we have already computed this, return its value.
470 if (stored_verdict_count->has_value())
471 return stored_verdict_count->value();
472
473 ContentSettingsForOneType settings;
474 content_settings_->GetSettingsForOneType(
475 ContentSettingsType::PASSWORD_PROTECTION, &settings);
476 stored_verdict_count_password_on_focus_ = 0;
477 stored_verdict_count_password_entry_ = 0;
478 for (const ContentSettingPatternSource& source : settings) {
479 for (const auto& item : source.setting_value.DictItems()) {
480 if (item.first == base::StringPiece(kPasswordOnFocusCacheKey)) {
481 stored_verdict_count_password_on_focus_.value() +=
482 item.second.DictSize();
483 } else {
484 stored_verdict_count_password_entry_.value() += item.second.DictSize();
485 }
486 }
487 }
488 return stored_verdict_count->value();
489 }
490
GetStoredRealTimeUrlCheckVerdictCount()491 size_t VerdictCacheManager::GetStoredRealTimeUrlCheckVerdictCount() {
492 // If we have already computed this, return its value.
493 if (stored_verdict_count_real_time_url_check_.has_value())
494 return stored_verdict_count_real_time_url_check_.value();
495
496 ContentSettingsForOneType settings;
497 content_settings_->GetSettingsForOneType(
498 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA, &settings);
499 stored_verdict_count_real_time_url_check_ = 0;
500 for (const ContentSettingPatternSource& source : settings) {
501 for (const auto& item : source.setting_value.DictItems()) {
502 if (item.first == base::StringPiece(kRealTimeUrlCacheKey)) {
503 stored_verdict_count_real_time_url_check_.value() +=
504 item.second.DictSize();
505 }
506 }
507 }
508 return stored_verdict_count_real_time_url_check_.value();
509 }
510
CacheRealTimeUrlVerdict(const GURL & url,const RTLookupResponse & verdict,const base::Time & receive_time,bool store_old_cache)511 void VerdictCacheManager::CacheRealTimeUrlVerdict(
512 const GURL& url,
513 const RTLookupResponse& verdict,
514 const base::Time& receive_time,
515 bool store_old_cache) {
516 std::vector<std::string> visited_cache_expressions;
517 for (const auto& threat_info : verdict.threat_info()) {
518 // If |cache_expression_match_type| is unspecified, ignore this entry.
519 if (threat_info.cache_expression_match_type() ==
520 RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED &&
521 !store_old_cache) {
522 continue;
523 }
524 std::string cache_expression = store_old_cache
525 ? threat_info.cache_expression()
526 : GetCacheExpression(threat_info);
527 // TODO(crbug.com/1033692): For the same cache_expression, threat_info is in
528 // decreasing order of severity. To avoid lower severity threat being
529 // overridden by higher one, only store threat info that is first seen for a
530 // cache expression.
531 if (base::Contains(visited_cache_expressions, cache_expression))
532 continue;
533
534 GURL hostname = GetHostNameFromCacheExpression(cache_expression);
535 std::unique_ptr<base::DictionaryValue> cache_dictionary =
536 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
537 hostname, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
538 nullptr));
539
540 if (!cache_dictionary)
541 cache_dictionary = std::make_unique<base::DictionaryValue>();
542
543 base::Value* verdict_dictionary = cache_dictionary->FindKeyOfType(
544 kRealTimeUrlCacheKey, base::Value::Type::DICTIONARY);
545 if (!verdict_dictionary) {
546 verdict_dictionary = cache_dictionary->SetKey(
547 kRealTimeUrlCacheKey, base::Value(base::Value::Type::DICTIONARY));
548 }
549
550 std::unique_ptr<base::DictionaryValue> threat_info_entry(
551 CreateDictionaryFromVerdict<RTLookupResponse::ThreatInfo>(
552 threat_info, receive_time, kRealTimeThreatInfoProto));
553 // Increases stored verdict count if we haven't seen this cache expression
554 // before.
555 if (!verdict_dictionary->FindKey(cache_expression)) {
556 stored_verdict_count_real_time_url_check_ =
557 GetStoredRealTimeUrlCheckVerdictCount() + 1;
558 }
559
560 verdict_dictionary->SetKey(
561 cache_expression,
562 base::Value::FromUniquePtrValue(std::move(threat_info_entry)));
563 visited_cache_expressions.push_back(cache_expression);
564
565 content_settings_->SetWebsiteSettingDefaultScope(
566 hostname, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
567 std::move(cache_dictionary));
568 }
569 base::UmaHistogramCounts10000(
570 "SafeBrowsing.RT.CacheManager.RealTimeVerdictCount",
571 GetStoredRealTimeUrlCheckVerdictCount());
572 }
573
574 RTLookupResponse::ThreatInfo::VerdictType
GetCachedRealTimeUrlVerdict(const GURL & url,RTLookupResponse::ThreatInfo * out_threat_info)575 VerdictCacheManager::GetCachedRealTimeUrlVerdict(
576 const GURL& url,
577 RTLookupResponse::ThreatInfo* out_threat_info) {
578 return GetMostMatchingCachedVerdictWithHostAndPathMatching<
579 RTLookupResponse::ThreatInfo>(
580 url, kRealTimeUrlCacheKey, content_settings_,
581 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
582 kRealTimeThreatInfoProto, out_threat_info);
583 }
584
ScheduleNextCleanUpAfterInterval(base::TimeDelta interval)585 void VerdictCacheManager::ScheduleNextCleanUpAfterInterval(
586 base::TimeDelta interval) {
587 cleanup_timer_.Stop();
588 cleanup_timer_.Start(FROM_HERE, interval, this,
589 &VerdictCacheManager::CleanUpExpiredVerdicts);
590 }
591
CleanUpExpiredVerdicts()592 void VerdictCacheManager::CleanUpExpiredVerdicts() {
593 DCHECK(content_settings_);
594 SCOPED_UMA_HISTOGRAM_TIMER("SafeBrowsing.RT.CacheManager.CleanUpTime");
595 CleanUpExpiredPhishGuardVerdicts();
596 CleanUpExpiredRealTimeUrlCheckVerdicts();
597 ScheduleNextCleanUpAfterInterval(
598 base::TimeDelta::FromSeconds(kCleanUpIntervalSecond));
599 }
600
CleanUpExpiredPhishGuardVerdicts()601 void VerdictCacheManager::CleanUpExpiredPhishGuardVerdicts() {
602 if (GetStoredPhishGuardVerdictCount(
603 LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) <= 0 &&
604 GetStoredPhishGuardVerdictCount(
605 LoginReputationClientRequest::PASSWORD_REUSE_EVENT) <= 0)
606 return;
607
608 ContentSettingsForOneType password_protection_settings;
609 content_settings_->GetSettingsForOneType(
610 ContentSettingsType::PASSWORD_PROTECTION, &password_protection_settings);
611
612 int removed_count = 0;
613 for (ContentSettingPatternSource& source : password_protection_settings) {
614 // Find all verdicts associated with this origin.
615 std::unique_ptr<base::Value> cache_dictionary =
616 base::Value::ToUniquePtrValue(std::move(source.setting_value));
617 bool has_expired_password_on_focus_entry = RemoveExpiredPhishGuardVerdicts(
618 LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE,
619 cache_dictionary.get());
620 bool has_expired_password_reuse_entry = RemoveExpiredPhishGuardVerdicts(
621 LoginReputationClientRequest::PASSWORD_REUSE_EVENT,
622 cache_dictionary.get());
623
624 if (!cache_dictionary->DictEmpty() &&
625 !has_expired_password_on_focus_entry &&
626 !has_expired_password_reuse_entry) {
627 continue;
628 }
629
630 // Set the website setting of this origin with the updated
631 // |cache_dictionary|.
632 content_settings_->SetWebsiteSettingCustomScope(
633 source.primary_pattern, source.secondary_pattern,
634 ContentSettingsType::PASSWORD_PROTECTION,
635 cache_dictionary->DictEmpty() ? nullptr : std::move(cache_dictionary));
636
637 if ((++removed_count) == kMaxRemovedEntriesCount) {
638 return;
639 }
640 }
641 }
642
CleanUpExpiredRealTimeUrlCheckVerdicts()643 void VerdictCacheManager::CleanUpExpiredRealTimeUrlCheckVerdicts() {
644 if (GetStoredRealTimeUrlCheckVerdictCount() == 0) {
645 return;
646 }
647 ContentSettingsForOneType safe_browsing_url_check_data_settings;
648 content_settings_->GetSettingsForOneType(
649 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
650 &safe_browsing_url_check_data_settings);
651
652 int removed_count = 0;
653 for (ContentSettingPatternSource& source :
654 safe_browsing_url_check_data_settings) {
655 // Find all verdicts associated with this origin.
656 std::unique_ptr<base::Value> cache_dictionary =
657 base::Value::ToUniquePtrValue(std::move(source.setting_value));
658 bool has_expired_entry =
659 RemoveExpiredRealTimeUrlCheckVerdicts(cache_dictionary.get());
660
661 if (!cache_dictionary->DictEmpty() && !has_expired_entry) {
662 continue;
663 }
664
665 // Set the website setting of this origin with the updated
666 // |cache_dictionary|.
667 content_settings_->SetWebsiteSettingCustomScope(
668 source.primary_pattern, source.secondary_pattern,
669 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
670 cache_dictionary->DictEmpty() ? nullptr : std::move(cache_dictionary));
671
672 if ((++removed_count) == kMaxRemovedEntriesCount) {
673 return;
674 }
675 }
676 }
677
678 // Overridden from history::HistoryServiceObserver.
OnURLsDeleted(history::HistoryService * history_service,const history::DeletionInfo & deletion_info)679 void VerdictCacheManager::OnURLsDeleted(
680 history::HistoryService* history_service,
681 const history::DeletionInfo& deletion_info) {
682 base::PostTask(FROM_HERE, CreateTaskTraits(ThreadID::UI),
683 base::BindRepeating(
684 &VerdictCacheManager::RemoveContentSettingsOnURLsDeleted,
685 GetWeakPtr(), deletion_info.IsAllHistory(),
686 deletion_info.deleted_rows()));
687 }
688
689 // Overridden from history::HistoryServiceObserver.
HistoryServiceBeingDeleted(history::HistoryService * history_service)690 void VerdictCacheManager::HistoryServiceBeingDeleted(
691 history::HistoryService* history_service) {
692 DCHECK(history_service_observation_.IsObservingSource(history_service));
693 history_service_observation_.RemoveObservation();
694 }
695
RemoveExpiredPhishGuardVerdicts(LoginReputationClientRequest::TriggerType trigger_type,base::Value * cache_dictionary)696 bool VerdictCacheManager::RemoveExpiredPhishGuardVerdicts(
697 LoginReputationClientRequest::TriggerType trigger_type,
698 base::Value* cache_dictionary) {
699 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
700 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
701 if (!cache_dictionary || cache_dictionary->DictEmpty())
702 return false;
703
704 size_t verdicts_removed = 0;
705 std::vector<std::string> empty_keys;
706 for (auto item : cache_dictionary->DictItems()) {
707 if (trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE &&
708 item.first == std::string(kPasswordOnFocusCacheKey)) {
709 size_t removed_cnt = RemoveExpiredEntries<LoginReputationClientResponse>(
710 &item.second, kVerdictProto);
711 verdicts_removed += removed_cnt;
712 if (stored_verdict_count_password_on_focus_.has_value())
713 stored_verdict_count_password_on_focus_.value() -= removed_cnt;
714 } else {
715 size_t removed_cnt = RemoveExpiredEntries<LoginReputationClientResponse>(
716 &item.second, kVerdictProto);
717 verdicts_removed += removed_cnt;
718 if (stored_verdict_count_password_entry_.has_value())
719 stored_verdict_count_password_entry_.value() -= removed_cnt;
720 }
721
722 if (item.second.DictSize() == 0U)
723 empty_keys.push_back(item.first);
724 }
725 for (const auto& key : empty_keys)
726 cache_dictionary->RemoveKey(key);
727
728 return verdicts_removed > 0U;
729 }
730
RemoveExpiredRealTimeUrlCheckVerdicts(base::Value * cache_dictionary)731 bool VerdictCacheManager::RemoveExpiredRealTimeUrlCheckVerdicts(
732 base::Value* cache_dictionary) {
733 if (!cache_dictionary || cache_dictionary->DictEmpty())
734 return false;
735
736 size_t verdicts_removed = 0;
737 std::vector<std::string> empty_keys;
738 for (auto item : cache_dictionary->DictItems()) {
739 size_t removed_cnt = RemoveExpiredEntries<RTLookupResponse::ThreatInfo>(
740 &item.second, kRealTimeThreatInfoProto);
741 verdicts_removed += removed_cnt;
742 if (stored_verdict_count_real_time_url_check_.has_value())
743 stored_verdict_count_real_time_url_check_.value() -= removed_cnt;
744 if (item.second.DictSize() == 0U)
745 empty_keys.push_back(item.first);
746 }
747 for (const auto& key : empty_keys)
748 cache_dictionary->RemoveKey(key);
749
750 return verdicts_removed > 0U;
751 }
752
RemoveContentSettingsOnURLsDeleted(bool all_history,const history::URLRows & deleted_rows)753 void VerdictCacheManager::RemoveContentSettingsOnURLsDeleted(
754 bool all_history,
755 const history::URLRows& deleted_rows) {
756 DCHECK(CurrentlyOnThread(ThreadID::UI));
757 DCHECK(content_settings_);
758
759 if (all_history) {
760 content_settings_->ClearSettingsForOneType(
761 ContentSettingsType::PASSWORD_PROTECTION);
762 stored_verdict_count_password_on_focus_ = 0;
763 stored_verdict_count_password_entry_ = 0;
764 stored_verdict_count_real_time_url_check_ = 0;
765 content_settings_->ClearSettingsForOneType(
766 ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA);
767 return;
768 }
769
770 // For now, if a URL is deleted from history, we simply remove all the
771 // cached verdicts of the same origin. This is a pretty aggressive deletion.
772 // We might revisit this logic later to decide if we want to only delete the
773 // cached verdict whose cache expression matches this URL.
774 for (const history::URLRow& row : deleted_rows) {
775 if (!row.url().SchemeIsHTTPOrHTTPS())
776 continue;
777
778 GURL url_key = GetHostNameWithHTTPScheme(row.url());
779 stored_verdict_count_password_on_focus_ =
780 GetStoredPhishGuardVerdictCount(
781 LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) -
782 GetPhishGuardVerdictCountForURL(
783 url_key, LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE);
784 stored_verdict_count_password_entry_ =
785 GetStoredPhishGuardVerdictCount(
786 LoginReputationClientRequest::PASSWORD_REUSE_EVENT) -
787 GetPhishGuardVerdictCountForURL(
788 url_key, LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
789 stored_verdict_count_real_time_url_check_ =
790 GetStoredRealTimeUrlCheckVerdictCount() -
791 GetRealTimeUrlCheckVerdictCountForURL(url_key);
792 content_settings_->SetWebsiteSettingDefaultScope(
793 url_key, GURL(), ContentSettingsType::PASSWORD_PROTECTION, nullptr);
794 content_settings_->SetWebsiteSettingDefaultScope(
795 url_key, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
796 nullptr);
797 }
798 }
799
GetPhishGuardVerdictCountForURL(const GURL & url,LoginReputationClientRequest::TriggerType trigger_type)800 size_t VerdictCacheManager::GetPhishGuardVerdictCountForURL(
801 const GURL& url,
802 LoginReputationClientRequest::TriggerType trigger_type) {
803 DCHECK(trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE ||
804 trigger_type == LoginReputationClientRequest::PASSWORD_REUSE_EVENT);
805 std::unique_ptr<base::DictionaryValue> cache_dictionary =
806 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
807 url, GURL(), ContentSettingsType::PASSWORD_PROTECTION, nullptr));
808 if (!cache_dictionary || cache_dictionary->empty())
809 return 0;
810
811 int verdict_cnt = 0;
812 if (trigger_type == LoginReputationClientRequest::UNFAMILIAR_LOGIN_PAGE) {
813 base::Value* password_on_focus_dict = nullptr;
814 password_on_focus_dict =
815 cache_dictionary->FindKey(kPasswordOnFocusCacheKey);
816 verdict_cnt +=
817 password_on_focus_dict ? password_on_focus_dict->DictSize() : 0;
818 } else {
819 for (const auto& item : cache_dictionary->DictItems()) {
820 if (item.first == kPasswordOnFocusCacheKey)
821 continue;
822 verdict_cnt += item.second.DictSize();
823 }
824 }
825 return verdict_cnt;
826 }
827
GetRealTimeUrlCheckVerdictCountForURL(const GURL & url)828 size_t VerdictCacheManager::GetRealTimeUrlCheckVerdictCountForURL(
829 const GURL& url) {
830 std::unique_ptr<base::DictionaryValue> cache_dictionary =
831 base::DictionaryValue::From(content_settings_->GetWebsiteSetting(
832 url, GURL(), ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
833 nullptr));
834 if (!cache_dictionary || cache_dictionary->empty())
835 return 0;
836 base::Value* verdict_dictionary =
837 cache_dictionary->FindKey(kRealTimeUrlCacheKey);
838 return verdict_dictionary ? verdict_dictionary->DictSize() : 0;
839 }
840
CacheArtificialVerdict()841 void VerdictCacheManager::CacheArtificialVerdict() {
842 std::string phishing_url_string =
843 base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
844 kUnsafeUrlFlag);
845 if (phishing_url_string.empty())
846 return;
847
848 GURL artificial_unsafe_url(phishing_url_string);
849 if (!artificial_unsafe_url.is_valid())
850 return;
851
852 has_artificial_unsafe_url_ = true;
853
854 RTLookupResponse response;
855 RTLookupResponse::ThreatInfo* threat_info = response.add_threat_info();
856 threat_info->set_verdict_type(RTLookupResponse::ThreatInfo::DANGEROUS);
857 threat_info->set_threat_type(
858 RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING);
859 threat_info->set_cache_duration_sec(3000);
860 threat_info->set_cache_expression_using_match_type(
861 artificial_unsafe_url.GetContent());
862 threat_info->set_cache_expression_match_type(
863 RTLookupResponse::ThreatInfo::EXACT_MATCH);
864 RemoveContentSettingsOnURLsDeleted(/*all_history=*/false,
865 {history::URLRow(artificial_unsafe_url)});
866 CacheRealTimeUrlVerdict(artificial_unsafe_url, response, base::Time::Now(),
867 /*store_old_cache=*/false);
868 }
869
StopCleanUpTimerForTesting()870 void VerdictCacheManager::StopCleanUpTimerForTesting() {
871 if (cleanup_timer_.IsRunning()) {
872 cleanup_timer_.AbandonAndStop();
873 }
874 }
875
876 // static
877 bool VerdictCacheManager::has_artificial_unsafe_url_ = false;
878
879 // static
has_artificial_unsafe_url()880 bool VerdictCacheManager::has_artificial_unsafe_url() {
881 return has_artificial_unsafe_url_;
882 }
883
884 } // namespace safe_browsing
885