1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.h"
6 
7 #include <memory>
8 
9 #include "base/metrics/field_trial_params.h"
10 #include "base/metrics/histogram_macros.h"
11 #include "base/rand_util.h"
12 #include "base/stl_util.h"
13 #include "base/strings/stringprintf.h"
14 #include "base/time/time.h"
15 #include "base/timer/timer.h"
16 #include "chrome/browser/browser_process.h"
17 #include "chrome/browser/profiles/profile.h"
18 #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer.h"
19 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
20 #include "chrome/common/pref_names.h"
21 #include "components/prefs/pref_service.h"
22 #include "components/safe_browsing/content/web_ui/safe_browsing_ui.h"
23 #include "components/safe_browsing/core/common/utils.h"
24 #include "components/safe_browsing/core/features.h"
25 #include "components/sessions/content/session_tab_helper.h"
26 #include "content/public/browser/navigation_details.h"
27 #include "content/public/browser/render_frame_host.h"
28 #include "content/public/browser/render_process_host.h"
29 #include "content/public/browser/web_contents.h"
30 
31 using content::WebContents;
32 
33 namespace safe_browsing {
34 
35 namespace {
36 
37 constexpr size_t kMaxNumberOfNavigationsToAppend = 5;
38 
39 // Logging the number of events cleaned up every 2 minutes is excessive, so we
40 // sample by this rate.
41 const double kNavigationCleanUpSamplingRate = 0.01;
42 
43 // Given when an event happened and its TTL, determine if it is already expired.
44 // Note, if for some reason this event's timestamp is in the future, this
45 // event's timestamp is invalid, hence we treat it as expired.
IsEventExpired(const base::Time & event_time,double ttl_in_second)46 bool IsEventExpired(const base::Time& event_time, double ttl_in_second) {
47   double current_time_in_second = base::Time::Now().ToDoubleT();
48   double event_time_in_second = event_time.ToDoubleT();
49   if (current_time_in_second <= event_time_in_second)
50     return true;
51   return current_time_in_second - event_time_in_second > ttl_in_second;
52 }
53 
54 // Helper function to determine if the URL type should be LANDING_REFERRER or
55 // LANDING_PAGE, and modify AttributionResult accordingly.
GetURLTypeAndAdjustAttributionResult(size_t user_gesture_count,SafeBrowsingNavigationObserverManager::AttributionResult * out_result)56 ReferrerChainEntry::URLType GetURLTypeAndAdjustAttributionResult(
57     size_t user_gesture_count,
58     SafeBrowsingNavigationObserverManager::AttributionResult* out_result) {
59   // Landing page refers to the page user directly interacts with to trigger
60   // this event (e.g. clicking on download button). Landing referrer page is the
61   // one user interacts with right before navigating to the landing page.
62   // Since we are tracing navigations backwards, if we've reached
63   // user gesture limit before this navigation event, this is a navigation
64   // leading to the landing referrer page, otherwise it leads to landing page.
65   if (user_gesture_count == 0) {
66     *out_result = SafeBrowsingNavigationObserverManager::SUCCESS;
67     return ReferrerChainEntry::EVENT_URL;
68   } else if (user_gesture_count == 2) {
69     *out_result =
70         SafeBrowsingNavigationObserverManager::SUCCESS_LANDING_REFERRER;
71     return ReferrerChainEntry::LANDING_REFERRER;
72   } else if (user_gesture_count == 1) {
73     *out_result = SafeBrowsingNavigationObserverManager::SUCCESS_LANDING_PAGE;
74     return ReferrerChainEntry::LANDING_PAGE;
75   } else {
76     *out_result = SafeBrowsingNavigationObserverManager::SUCCESS_REFERRER;
77     return ReferrerChainEntry::REFERRER;
78   }
79 }
80 
GetOrigin(const std::string & url)81 std::string GetOrigin(const std::string& url) {
82   return GURL(url).GetOrigin().spec();
83 }
84 
85 }  // namespace
86 
87 // The expiration period of a user gesture. Any user gesture that happened 1.0
88 // second ago is considered as expired and not relevant to upcoming navigation
89 // events.
90 static const double kUserGestureTTLInSecond = 1.0;
91 // The expiration period of navigation events and resolved IP addresses. Any
92 // navigation related records that happened 2 minutes ago are considered as
93 // expired. So we clean up these navigation footprints every 2 minutes.
94 static const double kNavigationFootprintTTLInSecond = 120.0;
95 // The maximum number of latest NavigationEvent we keep. It is used to limit
96 // memory usage of navigation tracking. This number is picked based on UMA
97 // metric "SafeBrowsing.NavigationObserver.NavigationEventCleanUpCount".
98 // Lowering it could make room for abuse.
99 static const int kNavigationRecordMaxSize = 100;
100 // The maximum number of ReferrerChainEntry. It is used to limit the size of
101 // reports (e.g. ClientDownloadRequest) we send to SB server.
102 static const int kReferrerChainMaxLength = 10;
103 
104 // -------------------------ReferrerChainData-----------------------
105 
106 // String value of kDownloadReferrerChainDataKey is not used.
107 const char ReferrerChainData::kDownloadReferrerChainDataKey[] =
108     "referrer_chain_data_key";
109 
ReferrerChainData(std::unique_ptr<ReferrerChain> referrer_chain,size_t referrer_chain_length,size_t recent_navigations_to_collect)110 ReferrerChainData::ReferrerChainData(
111     std::unique_ptr<ReferrerChain> referrer_chain,
112     size_t referrer_chain_length,
113     size_t recent_navigations_to_collect)
114     : referrer_chain_(std::move(referrer_chain)),
115       referrer_chain_length_(referrer_chain_length),
116       recent_navigations_to_collect_(recent_navigations_to_collect) {}
117 
~ReferrerChainData()118 ReferrerChainData::~ReferrerChainData() {}
119 
GetReferrerChain()120 ReferrerChain* ReferrerChainData::GetReferrerChain() {
121   return referrer_chain_.get();
122 }
123 
124 // -------------------------NavigationEventList---------------------
NavigationEventList(std::size_t size_limit)125 NavigationEventList::NavigationEventList(std::size_t size_limit)
126     : size_limit_(size_limit) {
127   DCHECK_GT(size_limit_, 0U);
128 }
129 
~NavigationEventList()130 NavigationEventList::~NavigationEventList() {}
131 
FindNavigationEvent(const base::Time & last_event_timestamp,const GURL & target_url,const GURL & target_main_frame_url,SessionID target_tab_id)132 NavigationEvent* NavigationEventList::FindNavigationEvent(
133     const base::Time& last_event_timestamp,
134     const GURL& target_url,
135     const GURL& target_main_frame_url,
136     SessionID target_tab_id) {
137   if (target_url.is_empty() && target_main_frame_url.is_empty())
138     return nullptr;
139 
140   // If target_url is empty, we should back trace navigation based on its
141   // main frame URL instead.
142   GURL search_url = target_url.is_empty() ? target_main_frame_url : target_url;
143 
144   // Since navigation events are recorded in chronological order, we traverse
145   // the vector in reverse order to get the latest match.
146   for (auto rit = navigation_events_.rbegin(); rit != navigation_events_.rend();
147        ++rit) {
148     auto* nav_event = rit->get();
149 
150     // The next event cannot come before the previous one.
151     if (nav_event->last_updated > last_event_timestamp)
152       continue;
153 
154     // If tab id is not valid, we only compare url, otherwise we compare both.
155     if (nav_event->GetDestinationUrl() == search_url &&
156         (!target_tab_id.is_valid() ||
157          nav_event->target_tab_id == target_tab_id)) {
158       // If both source_url and source_main_frame_url are empty, we should check
159       // if a retargeting navigation caused this navigation. In this case, we
160       // skip this navigation event and looks for the retargeting navigation
161       // event.
162       if (nav_event->source_url.is_empty() &&
163           nav_event->source_main_frame_url.is_empty()) {
164         NavigationEvent* retargeting_nav_event = FindRetargetingNavigationEvent(
165             nav_event->last_updated, nav_event->target_tab_id);
166         if (!retargeting_nav_event)
167           return nav_event;
168         // If there is a server redirection immediately after retargeting, we
169         // need to adjust our search url to the original request.
170         if (!nav_event->server_redirect_urls.empty()) {
171           // Adjust retargeting navigation event's attributes.
172           retargeting_nav_event->server_redirect_urls.push_back(
173               std::move(search_url));
174         } else {
175           // The retargeting_nav_event original request url is unreliable, since
176           // that navigation can be canceled.
177           retargeting_nav_event->original_request_url = std::move(search_url);
178         }
179         return retargeting_nav_event;
180       } else {
181         return nav_event;
182       }
183     }
184   }
185   return nullptr;
186 }
187 
FindRetargetingNavigationEvent(const base::Time & last_event_timestamp,SessionID target_tab_id)188 NavigationEvent* NavigationEventList::FindRetargetingNavigationEvent(
189     const base::Time& last_event_timestamp,
190     SessionID target_tab_id) {
191   // Since navigation events are recorded in chronological order, we traverse
192   // the vector in reverse order to get the latest match.
193   for (auto rit = navigation_events_.rbegin(); rit != navigation_events_.rend();
194        ++rit) {
195     auto* nav_event = rit->get();
196 
197     // The next event cannot come before the previous one.
198     if (nav_event->last_updated > last_event_timestamp)
199       continue;
200 
201     // In addition to url and tab_id checking, we need to compare the
202     // source_tab_id and target_tab_id to make sure it is a retargeting event.
203     if (nav_event->target_tab_id == target_tab_id &&
204         nav_event->source_tab_id != nav_event->target_tab_id) {
205       return nav_event;
206     }
207   }
208   return nullptr;
209 }
210 
RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event)211 void NavigationEventList::RecordNavigationEvent(
212     std::unique_ptr<NavigationEvent> nav_event) {
213   // Skip page refresh and in-page navigation.
214   if (nav_event->source_url == nav_event->GetDestinationUrl() &&
215       nav_event->source_tab_id == nav_event->target_tab_id)
216     return;
217 
218   if (navigation_events_.size() == size_limit_)
219     navigation_events_.pop_front();
220   navigation_events_.push_back(std::move(nav_event));
221 }
222 
CleanUpNavigationEvents()223 std::size_t NavigationEventList::CleanUpNavigationEvents() {
224   // Remove any stale NavigationEnvent, if it is older than
225   // kNavigationFootprintTTLInSecond.
226   std::size_t removal_count = 0;
227   while (!navigation_events_.empty() &&
228          IsEventExpired(navigation_events_[0]->last_updated,
229                         kNavigationFootprintTTLInSecond)) {
230     navigation_events_.pop_front();
231     removal_count++;
232   }
233   return removal_count;
234 }
235 
236 // -----------------SafeBrowsingNavigationObserverManager-----------
237 // static
IsUserGestureExpired(const base::Time & timestamp)238 bool SafeBrowsingNavigationObserverManager::IsUserGestureExpired(
239     const base::Time& timestamp) {
240   return IsEventExpired(timestamp, kUserGestureTTLInSecond);
241 }
242 
243 // static
ClearURLRef(const GURL & url)244 GURL SafeBrowsingNavigationObserverManager::ClearURLRef(const GURL& url) {
245   if (url.has_ref()) {
246     url::Replacements<char> replacements;
247     replacements.ClearRef();
248     return url.ReplaceComponents(replacements);
249   }
250   return url;
251 }
252 
253 // static
IsEnabledAndReady(Profile * profile)254 bool SafeBrowsingNavigationObserverManager::IsEnabledAndReady(
255     Profile* profile) {
256   return IsSafeBrowsingEnabled(*profile->GetPrefs()) &&
257          g_browser_process->safe_browsing_service() &&
258          g_browser_process->safe_browsing_service()
259              ->navigation_observer_manager();
260 }
261 
262 // static
SanitizeReferrerChain(ReferrerChain * referrer_chain)263 void SafeBrowsingNavigationObserverManager::SanitizeReferrerChain(
264     ReferrerChain* referrer_chain) {
265   for (int i = 0; i < referrer_chain->size(); i++) {
266     ReferrerChainEntry* entry = referrer_chain->Mutable(i);
267     ReferrerChainEntry entry_copy(*entry);
268     entry->Clear();
269     if (entry_copy.has_url())
270       entry->set_url(GetOrigin(entry_copy.url()));
271     if (entry_copy.has_main_frame_url())
272       entry->set_main_frame_url(GetOrigin(entry_copy.main_frame_url()));
273     entry->set_type(entry_copy.type());
274     for (int j = 0; j < entry_copy.ip_addresses_size(); j++)
275       entry->add_ip_addresses(entry_copy.ip_addresses(j));
276     if (entry_copy.has_referrer_url())
277       entry->set_referrer_url(GetOrigin(entry_copy.referrer_url()));
278     if (entry_copy.has_referrer_main_frame_url())
279       entry->set_referrer_main_frame_url(
280           GetOrigin(entry_copy.referrer_main_frame_url()));
281     entry->set_is_retargeting(entry_copy.is_retargeting());
282     entry->set_navigation_time_msec(entry_copy.navigation_time_msec());
283     entry->set_navigation_initiation(entry_copy.navigation_initiation());
284     for (int j = 0; j < entry_copy.server_redirect_chain_size(); j++) {
285       ReferrerChainEntry::ServerRedirect* server_redirect_entry =
286           entry->add_server_redirect_chain();
287       if (entry_copy.server_redirect_chain(j).has_url()) {
288         server_redirect_entry->set_url(
289             GetOrigin(entry_copy.server_redirect_chain(j).url()));
290       }
291     }
292   }
293 }
294 
SafeBrowsingNavigationObserverManager()295 SafeBrowsingNavigationObserverManager::SafeBrowsingNavigationObserverManager()
296     : navigation_event_list_(kNavigationRecordMaxSize) {
297   // Notify WebUIInfoSingleton that a new ReferrerChainProvider was created.
298   WebUIInfoSingleton::GetInstance()->set_referrer_chain_provider(this);
299 
300   // Schedule clean up in 2 minutes.
301   ScheduleNextCleanUpAfterInterval(
302       base::TimeDelta::FromSecondsD(kNavigationFootprintTTLInSecond));
303 }
304 
RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event)305 void SafeBrowsingNavigationObserverManager::RecordNavigationEvent(
306     std::unique_ptr<NavigationEvent> nav_event) {
307   navigation_event_list_.RecordNavigationEvent(std::move(nav_event));
308 }
309 
RecordUserGestureForWebContents(content::WebContents * web_contents)310 void SafeBrowsingNavigationObserverManager::RecordUserGestureForWebContents(
311     content::WebContents* web_contents) {
312   const base::Time timestamp = base::Time::Now();
313   auto insertion_result =
314       user_gesture_map_.insert(std::make_pair(web_contents, timestamp));
315   // Update the timestamp if entry already exists.
316   if (!insertion_result.second)
317     insertion_result.first->second = timestamp;
318 }
319 
OnUserGestureConsumed(content::WebContents * web_contents)320 void SafeBrowsingNavigationObserverManager::OnUserGestureConsumed(
321     content::WebContents* web_contents) {
322   user_gesture_map_.erase(web_contents);
323 }
324 
HasUserGesture(content::WebContents * web_contents)325 bool SafeBrowsingNavigationObserverManager::HasUserGesture(
326     content::WebContents* web_contents) {
327   if (!web_contents)
328     return false;
329   if (user_gesture_map_.find(web_contents) != user_gesture_map_.end())
330     return true;
331   return false;
332 }
333 
HasUnexpiredUserGesture(content::WebContents * web_contents)334 bool SafeBrowsingNavigationObserverManager::HasUnexpiredUserGesture(
335     content::WebContents* web_contents) {
336   if (!web_contents)
337     return false;
338   auto it = user_gesture_map_.find(web_contents);
339   if (it == user_gesture_map_.end())
340     return false;
341   return !IsUserGestureExpired(it->second);
342 }
343 
RecordHostToIpMapping(const std::string & host,const std::string & ip)344 void SafeBrowsingNavigationObserverManager::RecordHostToIpMapping(
345     const std::string& host,
346     const std::string& ip) {
347   auto insert_result = host_to_ip_map_.insert(
348       std::make_pair(host, std::vector<ResolvedIPAddress>()));
349   if (!insert_result.second) {
350     // host_to_ip_map already contains this key.
351     // If this IP is already in the vector, we update its timestamp.
352     for (auto& vector_entry : insert_result.first->second) {
353       if (vector_entry.ip == ip) {
354         vector_entry.timestamp = base::Time::Now();
355         return;
356       }
357     }
358   }
359   // If this is a new IP of this host, and we added to the end of the vector.
360   insert_result.first->second.push_back(
361       ResolvedIPAddress(base::Time::Now(), ip));
362 }
363 
OnWebContentDestroyed(content::WebContents * web_contents)364 void SafeBrowsingNavigationObserverManager::OnWebContentDestroyed(
365     content::WebContents* web_contents) {
366   user_gesture_map_.erase(web_contents);
367 }
368 
CleanUpStaleNavigationFootprints()369 void SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootprints() {
370   CleanUpNavigationEvents();
371   CleanUpUserGestures();
372   CleanUpIpAddresses();
373   ScheduleNextCleanUpAfterInterval(
374       base::TimeDelta::FromSecondsD(kNavigationFootprintTTLInSecond));
375 }
376 
377 SafeBrowsingNavigationObserverManager::AttributionResult
IdentifyReferrerChainByEventURL(const GURL & event_url,SessionID event_tab_id,int user_gesture_count_limit,ReferrerChain * out_referrer_chain)378 SafeBrowsingNavigationObserverManager::IdentifyReferrerChainByEventURL(
379     const GURL& event_url,
380     SessionID event_tab_id,
381     int user_gesture_count_limit,
382     ReferrerChain* out_referrer_chain) {
383   if (!event_url.is_valid())
384     return INVALID_URL;
385 
386   NavigationEvent* nav_event = navigation_event_list_.FindNavigationEvent(
387       base::Time::Now(), ClearURLRef(event_url), GURL(), event_tab_id);
388   if (!nav_event) {
389     // We cannot find a single navigation event related to this event.
390     return NAVIGATION_EVENT_NOT_FOUND;
391   }
392   AttributionResult result = SUCCESS;
393   AddToReferrerChain(out_referrer_chain, nav_event, GURL(),
394                      ReferrerChainEntry::EVENT_URL);
395   int user_gesture_count = 0;
396   GetRemainingReferrerChain(nav_event, user_gesture_count,
397                             user_gesture_count_limit, out_referrer_chain,
398                             &result);
399   return result;
400 }
401 
402 SafeBrowsingNavigationObserverManager::AttributionResult
IdentifyReferrerChainByWebContents(content::WebContents * web_contents,int user_gesture_count_limit,ReferrerChain * out_referrer_chain)403 SafeBrowsingNavigationObserverManager::IdentifyReferrerChainByWebContents(
404     content::WebContents* web_contents,
405     int user_gesture_count_limit,
406     ReferrerChain* out_referrer_chain) {
407   if (!web_contents)
408     return INVALID_URL;
409   GURL last_committed_url = web_contents->GetLastCommittedURL();
410   if (!last_committed_url.is_valid())
411     return INVALID_URL;
412   bool has_user_gesture = HasUserGesture(web_contents);
413   SessionID tab_id = sessions::SessionTabHelper::IdForTab(web_contents);
414   return IdentifyReferrerChainByHostingPage(
415       ClearURLRef(last_committed_url), GURL(), tab_id, has_user_gesture,
416       user_gesture_count_limit, out_referrer_chain);
417 }
418 
419 SafeBrowsingNavigationObserverManager::AttributionResult
IdentifyReferrerChainByHostingPage(const GURL & initiating_frame_url,const GURL & initiating_main_frame_url,SessionID tab_id,bool has_user_gesture,int user_gesture_count_limit,ReferrerChain * out_referrer_chain)420 SafeBrowsingNavigationObserverManager::IdentifyReferrerChainByHostingPage(
421     const GURL& initiating_frame_url,
422     const GURL& initiating_main_frame_url,
423     SessionID tab_id,
424     bool has_user_gesture,
425     int user_gesture_count_limit,
426     ReferrerChain* out_referrer_chain) {
427   if (!initiating_frame_url.is_valid())
428     return INVALID_URL;
429 
430   NavigationEvent* nav_event = navigation_event_list_.FindNavigationEvent(
431       base::Time::Now(), ClearURLRef(initiating_frame_url),
432       ClearURLRef(initiating_main_frame_url), tab_id);
433   if (!nav_event) {
434     // We cannot find a single navigation event related to this hosting page.
435     return NAVIGATION_EVENT_NOT_FOUND;
436   }
437 
438   AttributionResult result = SUCCESS;
439 
440   int user_gesture_count = 0;
441   // If this initiating_frame has user gesture, we consider this as the landing
442   // page of this event.
443   if (has_user_gesture) {
444     user_gesture_count = 1;
445     AddToReferrerChain(
446         out_referrer_chain, nav_event, initiating_main_frame_url,
447         GetURLTypeAndAdjustAttributionResult(user_gesture_count, &result));
448   } else {
449     AddToReferrerChain(out_referrer_chain, nav_event, initiating_main_frame_url,
450                        ReferrerChainEntry::CLIENT_REDIRECT);
451   }
452 
453   GetRemainingReferrerChain(nav_event, user_gesture_count,
454                             user_gesture_count_limit, out_referrer_chain,
455                             &result);
456   return result;
457 }
458 
459 SafeBrowsingNavigationObserverManager::
~SafeBrowsingNavigationObserverManager()460     ~SafeBrowsingNavigationObserverManager() {}
461 
RecordNewWebContents(content::WebContents * source_web_contents,int source_render_process_id,int source_render_frame_id,const GURL & target_url,ui::PageTransition page_transition,content::WebContents * target_web_contents,bool renderer_initiated)462 void SafeBrowsingNavigationObserverManager::RecordNewWebContents(
463     content::WebContents* source_web_contents,
464     int source_render_process_id,
465     int source_render_frame_id,
466     const GURL& target_url,
467     ui::PageTransition page_transition,
468     content::WebContents* target_web_contents,
469     bool renderer_initiated) {
470   DCHECK(source_web_contents);
471   DCHECK(target_web_contents);
472 
473   content::RenderFrameHost* rfh = content::RenderFrameHost::FromID(
474       source_render_process_id, source_render_frame_id);
475   // Remove the "#" at the end of URL, since it does not point to any actual
476   // page fragment ID.
477   GURL cleaned_target_url =
478       SafeBrowsingNavigationObserverManager::ClearURLRef(target_url);
479 
480   std::unique_ptr<NavigationEvent> nav_event =
481       std::make_unique<NavigationEvent>();
482   if (rfh) {
483     nav_event->source_url = SafeBrowsingNavigationObserverManager::ClearURLRef(
484         rfh->GetLastCommittedURL());
485   }
486   nav_event->source_tab_id =
487       sessions::SessionTabHelper::IdForTab(source_web_contents);
488   nav_event->source_main_frame_url =
489       SafeBrowsingNavigationObserverManager::ClearURLRef(
490           source_web_contents->GetLastCommittedURL());
491   nav_event->original_request_url = cleaned_target_url;
492   nav_event->target_tab_id =
493       sessions::SessionTabHelper::IdForTab(target_web_contents);
494   nav_event->frame_id = rfh ? rfh->GetFrameTreeNodeId()
495                             : content::RenderFrameHost::kNoFrameTreeNodeId;
496   nav_event->maybe_launched_by_external_application =
497       ui::PageTransitionCoreTypeIs(page_transition,
498                                    ui::PAGE_TRANSITION_AUTO_TOPLEVEL);
499 
500   if (!renderer_initiated) {
501     nav_event->navigation_initiation = ReferrerChainEntry::BROWSER_INITIATED;
502   } else if (HasUnexpiredUserGesture(source_web_contents)) {
503     OnUserGestureConsumed(source_web_contents);
504     nav_event->navigation_initiation =
505         ReferrerChainEntry::RENDERER_INITIATED_WITH_USER_GESTURE;
506   } else {
507     nav_event->navigation_initiation =
508         ReferrerChainEntry::RENDERER_INITIATED_WITHOUT_USER_GESTURE;
509   }
510 
511   navigation_event_list_.RecordNavigationEvent(std::move(nav_event));
512 }
513 
514 // static
CountOfRecentNavigationsToAppend(const Profile & profile,AttributionResult result)515 size_t SafeBrowsingNavigationObserverManager::CountOfRecentNavigationsToAppend(
516     const Profile& profile,
517     AttributionResult result) {
518   if (!IsExtendedReportingEnabled(*profile.GetPrefs()) ||
519       profile.IsOffTheRecord() || result == SUCCESS_LANDING_REFERRER) {
520     return 0u;
521   }
522   return kMaxNumberOfNavigationsToAppend;
523 }
524 
AppendRecentNavigations(size_t recent_navigation_count,ReferrerChain * out_referrer_chain)525 void SafeBrowsingNavigationObserverManager::AppendRecentNavigations(
526     size_t recent_navigation_count,
527     ReferrerChain* out_referrer_chain) {
528   if (recent_navigation_count <= 0u)
529     return;
530   int current_referrer_chain_size = out_referrer_chain->size();
531   double last_navigation_time_msec =
532       current_referrer_chain_size == 0
533           ? base::Time::Now().ToJavaTime()
534           : out_referrer_chain->Get(current_referrer_chain_size - 1)
535                 .navigation_time_msec();
536   auto it = navigation_event_list_.navigation_events().rbegin();
537   while (it != navigation_event_list_.navigation_events().rend() &&
538          recent_navigation_count > 0u) {
539     // Skip navigations that happened after |last_navigation_time_msec|.
540     if (it->get()->last_updated.ToJavaTime() < last_navigation_time_msec) {
541       AddToReferrerChain(out_referrer_chain, it->get(), GURL(),
542                          ReferrerChainEntry::RECENT_NAVIGATION);
543       recent_navigation_count--;
544     }
545     it++;
546   }
547 }
548 
CleanUpNavigationEvents()549 void SafeBrowsingNavigationObserverManager::CleanUpNavigationEvents() {
550   std::size_t removal_count = navigation_event_list_.CleanUpNavigationEvents();
551 
552   if (base::RandDouble() < kNavigationCleanUpSamplingRate) {
553     UMA_HISTOGRAM_COUNTS_10000(
554         "SafeBrowsing.NavigationObserver.NavigationEventCleanUpCount",
555         removal_count);
556   }
557 }
558 
CleanUpUserGestures()559 void SafeBrowsingNavigationObserverManager::CleanUpUserGestures() {
560   for (auto it = user_gesture_map_.begin(); it != user_gesture_map_.end();) {
561     if (IsEventExpired(it->second, kNavigationFootprintTTLInSecond))
562       it = user_gesture_map_.erase(it);
563     else
564       ++it;
565   }
566 }
567 
CleanUpIpAddresses()568 void SafeBrowsingNavigationObserverManager::CleanUpIpAddresses() {
569   std::size_t remove_count = 0;
570   for (auto it = host_to_ip_map_.begin(); it != host_to_ip_map_.end();) {
571     std::size_t size_before_removal = it->second.size();
572     base::EraseIf(it->second, [](const ResolvedIPAddress& resolved_ip) {
573       return IsEventExpired(resolved_ip.timestamp,
574                             kNavigationFootprintTTLInSecond);
575     });
576     std::size_t size_after_removal = it->second.size();
577     remove_count += (size_before_removal - size_after_removal);
578     if (size_after_removal == 0)
579       it = host_to_ip_map_.erase(it);
580     else
581       ++it;
582   }
583 }
584 
IsCleanUpScheduled() const585 bool SafeBrowsingNavigationObserverManager::IsCleanUpScheduled() const {
586   return cleanup_timer_.IsRunning();
587 }
588 
ScheduleNextCleanUpAfterInterval(base::TimeDelta interval)589 void SafeBrowsingNavigationObserverManager::ScheduleNextCleanUpAfterInterval(
590     base::TimeDelta interval) {
591   DCHECK_GT(interval, base::TimeDelta());
592   cleanup_timer_.Stop();
593   cleanup_timer_.Start(
594       FROM_HERE, interval, this,
595       &SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootprints);
596 }
597 
AddToReferrerChain(ReferrerChain * referrer_chain,NavigationEvent * nav_event,const GURL & destination_main_frame_url,ReferrerChainEntry::URLType type)598 void SafeBrowsingNavigationObserverManager::AddToReferrerChain(
599     ReferrerChain* referrer_chain,
600     NavigationEvent* nav_event,
601     const GURL& destination_main_frame_url,
602     ReferrerChainEntry::URLType type) {
603   std::unique_ptr<ReferrerChainEntry> referrer_chain_entry =
604       std::make_unique<ReferrerChainEntry>();
605   referrer_chain_entry->set_navigation_initiation(
606       nav_event->navigation_initiation);
607   const GURL destination_url = nav_event->GetDestinationUrl();
608   referrer_chain_entry->set_url(ShortURLForReporting(destination_url));
609   if (destination_main_frame_url.is_valid() &&
610       destination_url != destination_main_frame_url)
611     referrer_chain_entry->set_main_frame_url(
612         ShortURLForReporting(destination_main_frame_url));
613   referrer_chain_entry->set_type(type);
614   auto ip_it = host_to_ip_map_.find(destination_url.host());
615   if (ip_it != host_to_ip_map_.end()) {
616     for (const ResolvedIPAddress& entry : ip_it->second) {
617       referrer_chain_entry->add_ip_addresses(entry.ip);
618     }
619   }
620   // Since we only track navigation to landing referrer, we will not log the
621   // referrer of the landing referrer page.
622   if (type != ReferrerChainEntry::LANDING_REFERRER) {
623     referrer_chain_entry->set_referrer_url(
624         ShortURLForReporting(nav_event->source_url));
625     // Only set |referrer_main_frame_url| if it is diff from |referrer_url|.
626     if (nav_event->source_main_frame_url.is_valid() &&
627         nav_event->source_url != nav_event->source_main_frame_url) {
628       referrer_chain_entry->set_referrer_main_frame_url(
629           ShortURLForReporting(nav_event->source_main_frame_url));
630     }
631   }
632   referrer_chain_entry->set_is_retargeting(nav_event->source_tab_id !=
633                                            nav_event->target_tab_id);
634   referrer_chain_entry->set_navigation_time_msec(
635       nav_event->last_updated.ToJavaTime());
636   if (!nav_event->server_redirect_urls.empty()) {
637     // The first entry in |server_redirect_chain| should be the original request
638     // url.
639     ReferrerChainEntry::ServerRedirect* server_redirect =
640         referrer_chain_entry->add_server_redirect_chain();
641     server_redirect->set_url(
642         ShortURLForReporting(nav_event->original_request_url));
643     for (const GURL& redirect : nav_event->server_redirect_urls) {
644       server_redirect = referrer_chain_entry->add_server_redirect_chain();
645       server_redirect->set_url(ShortURLForReporting(redirect));
646     }
647   }
648   referrer_chain_entry->set_maybe_launched_by_external_application(
649       nav_event->maybe_launched_by_external_application);
650   referrer_chain->Add()->Swap(referrer_chain_entry.get());
651 }
652 
GetRemainingReferrerChain(NavigationEvent * last_nav_event_traced,int current_user_gesture_count,int user_gesture_count_limit,ReferrerChain * out_referrer_chain,SafeBrowsingNavigationObserverManager::AttributionResult * out_result)653 void SafeBrowsingNavigationObserverManager::GetRemainingReferrerChain(
654     NavigationEvent* last_nav_event_traced,
655     int current_user_gesture_count,
656     int user_gesture_count_limit,
657     ReferrerChain* out_referrer_chain,
658     SafeBrowsingNavigationObserverManager::AttributionResult* out_result) {
659   GURL last_main_frame_url_traced(last_nav_event_traced->source_main_frame_url);
660   while (current_user_gesture_count < user_gesture_count_limit) {
661     // Back trace to the next nav_event that was initiated by the user.
662     while (!last_nav_event_traced->IsUserInitiated()) {
663       last_nav_event_traced = navigation_event_list_.FindNavigationEvent(
664           last_nav_event_traced->last_updated,
665           last_nav_event_traced->source_url,
666           last_nav_event_traced->source_main_frame_url,
667           last_nav_event_traced->source_tab_id);
668       if (!last_nav_event_traced)
669         return;
670       AddToReferrerChain(out_referrer_chain, last_nav_event_traced,
671                          last_main_frame_url_traced,
672                          ReferrerChainEntry::CLIENT_REDIRECT);
673       // Stop searching if the size of out_referrer_chain already reached its
674       // limit.
675       if (out_referrer_chain->size() == kReferrerChainMaxLength)
676         return;
677       last_main_frame_url_traced = last_nav_event_traced->source_main_frame_url;
678     }
679 
680     current_user_gesture_count++;
681 
682     last_nav_event_traced = navigation_event_list_.FindNavigationEvent(
683         last_nav_event_traced->last_updated, last_nav_event_traced->source_url,
684         last_nav_event_traced->source_main_frame_url,
685         last_nav_event_traced->source_tab_id);
686     if (!last_nav_event_traced)
687       return;
688 
689     AddToReferrerChain(out_referrer_chain, last_nav_event_traced,
690                        last_main_frame_url_traced,
691                        GetURLTypeAndAdjustAttributionResult(
692                            current_user_gesture_count, out_result));
693     // Stop searching if the size of out_referrer_chain already reached its
694     // limit.
695     if (out_referrer_chain->size() == kReferrerChainMaxLength)
696       return;
697     last_main_frame_url_traced = last_nav_event_traced->source_main_frame_url;
698   }
699 }
700 
701 }  // namespace safe_browsing
702