1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/reputation/reputation_service.h"
6 
7 #include <cstddef>
8 #include <string>
9 #include <utility>
10 
11 #include "base/macros.h"
12 #include "base/memory/scoped_refptr.h"
13 #include "base/memory/singleton.h"
14 #include "chrome/browser/lookalikes/lookalike_url_blocking_page.h"
15 #include "chrome/browser/lookalikes/lookalike_url_navigation_throttle.h"
16 #include "chrome/browser/lookalikes/lookalike_url_service.h"
17 #include "chrome/browser/profiles/incognito_helpers.h"
18 #include "chrome/browser/profiles/profile.h"
19 #include "chrome/browser/reputation/local_heuristics.h"
20 #include "chrome/browser/safe_browsing/user_interaction_observer.h"
21 #include "components/keyed_service/content/browser_context_dependency_manager.h"
22 #include "components/keyed_service/content/browser_context_keyed_service_factory.h"
23 #include "components/lookalikes/core/lookalike_url_util.h"
24 #include "components/reputation/core/safety_tips_config.h"
25 #include "components/security_state/core/features.h"
26 #include "components/security_state/core/security_state.h"
27 #include "components/url_formatter/spoof_checks/top_domains/top500_domains.h"
28 #include "services/metrics/public/cpp/ukm_source_id.h"
29 #include "url/url_constants.h"
30 
31 namespace {
32 
33 using security_state::SafetyTipStatus;
34 
35 // This factory helps construct and find the singleton ReputationService linked
36 // to a Profile.
37 class ReputationServiceFactory : public BrowserContextKeyedServiceFactory {
38  public:
GetForProfile(Profile * profile)39   static ReputationService* GetForProfile(Profile* profile) {
40     return static_cast<ReputationService*>(
41         GetInstance()->GetServiceForBrowserContext(profile,
42                                                    /*create_service=*/true));
43   }
GetInstance()44   static ReputationServiceFactory* GetInstance() {
45     return base::Singleton<ReputationServiceFactory>::get();
46   }
47 
48  private:
49   friend struct base::DefaultSingletonTraits<ReputationServiceFactory>;
50 
ReputationServiceFactory()51   ReputationServiceFactory()
52       : BrowserContextKeyedServiceFactory(
53             "ReputationServiceFactory",
54             BrowserContextDependencyManager::GetInstance()) {}
55 
56   ~ReputationServiceFactory() override = default;
57 
58   // BrowserContextKeyedServiceFactory:
BuildServiceInstanceFor(content::BrowserContext * profile) const59   KeyedService* BuildServiceInstanceFor(
60       content::BrowserContext* profile) const override {
61     return new ReputationService(static_cast<Profile*>(profile));
62   }
63 
GetBrowserContextToUse(content::BrowserContext * context) const64   content::BrowserContext* GetBrowserContextToUse(
65       content::BrowserContext* context) const override {
66     return chrome::GetBrowserContextOwnInstanceInIncognito(context);
67   }
68 
69   DISALLOW_COPY_AND_ASSIGN(ReputationServiceFactory);
70 };
71 
72 // Returns whether or not the Safety Tip should be suppressed for the given URL.
73 // Checks SafeBrowsing-style permutations of |url| against the component updater
74 // allowlist, as well as any enterprise-set allowlisting of the hostname, and
75 // returns whether the URL is explicitly allowed. Fails closed, so that warnings
76 // are suppressed if the component is unavailable.
ShouldSuppressWarning(Profile * profile,const GURL & url)77 bool ShouldSuppressWarning(Profile* profile, const GURL& url) {
78   // Check any policy-set allowlist.
79   if (IsAllowedByEnterprisePolicy(profile->GetPrefs(), url)) {
80     return true;
81   }
82 
83   auto* proto = reputation::GetSafetyTipsRemoteConfigProto();
84   if (!proto) {
85     // This happens when the component hasn't downloaded yet. This should only
86     // happen for a short time after initial upgrade to M79.
87     //
88     // Disable all Safety Tips during that time. Otherwise, we would continue to
89     // flag on any known false positives until the client received the update.
90     return true;
91   }
92   return reputation::IsUrlAllowlistedBySafetyTipsComponent(proto, url);
93 }
94 
95 }  // namespace
96 
ReputationService(Profile * profile)97 ReputationService::ReputationService(Profile* profile)
98     : profile_(profile),
99       sensitive_keywords_(top500_domains::kTopKeywords),
100       num_sensitive_keywords_(top500_domains::kNumTopKeywords) {}
101 
102 ReputationService::~ReputationService() = default;
103 
104 // static
Get(Profile * profile)105 ReputationService* ReputationService::Get(Profile* profile) {
106   return ReputationServiceFactory::GetForProfile(profile);
107 }
108 
GetReputationStatus(const GURL & url,content::WebContents * web_contents,ReputationCheckCallback callback)109 void ReputationService::GetReputationStatus(const GURL& url,
110                                             content::WebContents* web_contents,
111                                             ReputationCheckCallback callback) {
112   DCHECK(url.SchemeIsHTTPOrHTTPS());
113 
114   bool has_delayed_warning =
115       !!safe_browsing::SafeBrowsingUserInteractionObserver::FromWebContents(
116           web_contents);
117 
118   LookalikeUrlService* service = LookalikeUrlService::Get(profile_);
119   if (service->EngagedSitesNeedUpdating()) {
120     service->ForceUpdateEngagedSites(
121         base::BindOnce(&ReputationService::GetReputationStatusWithEngagedSites,
122                        weak_factory_.GetWeakPtr(), url, has_delayed_warning,
123                        std::move(callback)));
124     // If the engaged sites need updating, there's nothing to do until callback.
125     return;
126   }
127 
128   GetReputationStatusWithEngagedSites(url, has_delayed_warning,
129                                       std::move(callback),
130                                       service->GetLatestEngagedSites());
131 }
132 
IsIgnored(const GURL & url) const133 bool ReputationService::IsIgnored(const GURL& url) const {
134   return warning_dismissed_origins_.count(url::Origin::Create(url)) > 0;
135 }
136 
SetUserIgnore(const GURL & url)137 void ReputationService::SetUserIgnore(const GURL& url) {
138   warning_dismissed_origins_.insert(url::Origin::Create(url));
139 }
140 
OnUIDisabledFirstVisit(const GURL & url)141 void ReputationService::OnUIDisabledFirstVisit(const GURL& url) {
142   warning_dismissed_origins_.insert(url::Origin::Create(url));
143 }
144 
SetSensitiveKeywordsForTesting(const char * const * new_keywords,size_t num_new_keywords)145 void ReputationService::SetSensitiveKeywordsForTesting(
146     const char* const* new_keywords,
147     size_t num_new_keywords) {
148   sensitive_keywords_ = new_keywords;
149   num_sensitive_keywords_ = num_new_keywords;
150 }
151 
GetReputationStatusWithEngagedSites(const GURL & url,bool has_delayed_warning,ReputationCheckCallback callback,const std::vector<DomainInfo> & engaged_sites)152 void ReputationService::GetReputationStatusWithEngagedSites(
153     const GURL& url,
154     bool has_delayed_warning,
155     ReputationCheckCallback callback,
156     const std::vector<DomainInfo>& engaged_sites) {
157   const DomainInfo navigated_domain = GetDomainInfo(url);
158 
159   ReputationCheckResult result;
160 
161   // We evaluate every heuristic for metrics, but only display the first result
162   // for the UI. We use |done_checking_reputation_status| to track when we've
163   // settled on the safety tip to show in the UI, so as to not overwrite this
164   // decision with other heuristics that may trigger later.
165   bool done_checking_reputation_status = false;
166 
167   // 0. Server-side warning suppression.
168   // If the URL is on the allowlist list, do nothing else. This is only used to
169   // mitigate false positives, so no further processing should be done.
170   if (ShouldSuppressWarning(profile_, url)) {
171     done_checking_reputation_status = true;
172   }
173 
174   // 1. Engagement check
175   // Ensure that this URL is not already engaged. We can't use the synchronous
176   // SiteEngagementService::IsEngagementAtLeast as it has side effects.  This
177   // check intentionally ignores the scheme.
178   const auto already_engaged =
179       std::find_if(engaged_sites.begin(), engaged_sites.end(),
180                    [navigated_domain](const DomainInfo& engaged_domain) {
181                      return (navigated_domain.domain_and_registry ==
182                              engaged_domain.domain_and_registry);
183                    });
184   if (already_engaged != engaged_sites.end()) {
185     done_checking_reputation_status = true;
186   }
187 
188   // 2. Server-side blocklist check.
189   SafetyTipStatus status = reputation::GetSafetyTipUrlBlockType(url);
190   if (status != SafetyTipStatus::kNone) {
191     if (!done_checking_reputation_status) {
192       result.safety_tip_status = status;
193     }
194 
195     result.triggered_heuristics.blocklist_heuristic_triggered = true;
196     done_checking_reputation_status = true;
197   }
198 
199   // 3. Protect against bad false positives by allowing top domains.
200   // Empty domain_and_registry happens on private domains.
201   if (navigated_domain.domain_and_registry.empty() ||
202       IsTopDomain(navigated_domain)) {
203     done_checking_reputation_status = true;
204   }
205 
206   // 4. Lookalike heuristics.
207   GURL safe_url;
208   if (already_engaged == engaged_sites.end() &&
209       ShouldTriggerSafetyTipFromLookalike(url, navigated_domain, engaged_sites,
210                                           &safe_url)) {
211     if (!done_checking_reputation_status) {
212       result.suggested_url = safe_url;
213       result.safety_tip_status = SafetyTipStatus::kLookalike;
214     }
215 
216     result.triggered_heuristics.lookalike_heuristic_triggered = true;
217     done_checking_reputation_status = true;
218   }
219 
220   // 5. Keyword heuristics.
221   if (ShouldTriggerSafetyTipFromKeywordInURL(url, navigated_domain,
222                                              sensitive_keywords_,
223                                              num_sensitive_keywords_)) {
224     if (!done_checking_reputation_status) {
225       result.safety_tip_status = SafetyTipStatus::kBadKeyword;
226     }
227 
228     result.triggered_heuristics.keywords_heuristic_triggered = true;
229     done_checking_reputation_status = true;
230   }
231 
232   // 6. This case is an experimental variation on Safe Browsing delayed warnings
233   // (https://crbug.com/1057157) to measure the effect of simplified domain
234   // display (https://crbug.com/1090393). In this experiment, Chrome delays Safe
235   // Browsing warnings until user interaction to see if the simplified domain
236   // display UI treatment affects how people interact with the page. In this
237   // variation, Chrome shows a Safety Tip on such pages, to try to isolate the
238   // effect of the UI treatment to when people's attention is drawn to the
239   // omnibox.
240   if (has_delayed_warning &&
241       base::FeatureList::IsEnabled(
242           security_state::features::kSafetyTipUIOnDelayedWarning)) {
243     // Intentionally don't check |done_checking_reputation_status| here, as we
244     // want this Safety Tip to take precedence. In this case, where there is a
245     // delayed Safe Browsing warning, we know the page is actually suspicious.
246     result.safety_tip_status = SafetyTipStatus::kBadReputation;
247     result.triggered_heuristics.blocklist_heuristic_triggered = true;
248     done_checking_reputation_status = true;
249   }
250 
251   if (IsIgnored(url)) {
252     if (result.safety_tip_status == SafetyTipStatus::kBadReputation) {
253       result.safety_tip_status = SafetyTipStatus::kBadReputationIgnored;
254     } else if (result.safety_tip_status == SafetyTipStatus::kLookalike) {
255       result.safety_tip_status = SafetyTipStatus::kLookalikeIgnored;
256     }
257     // The local allowlist is used by both the interstitial and safety tips, so
258     // it's possible to hit this case even when we're not in the conditions
259     // above. It's also possible to get kNone here when a domain is added to
260     // the server-side allowlist after it has been ignored. In these cases,
261     // there's no additional action required.
262   }
263   result.url = url;
264 
265   DCHECK(done_checking_reputation_status ||
266          !result.triggered_heuristics.triggered_any());
267   std::move(callback).Run(result);
268 }
269