1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/navigation_predictor/navigation_predictor.h"
6 
7 #include <algorithm>
8 #include <memory>
9 
10 #include "base/check_op.h"
11 #include "base/metrics/field_trial_params.h"
12 #include "base/metrics/histogram_functions.h"
13 #include "base/metrics/histogram_macros.h"
14 #include "base/optional.h"
15 #include "base/rand_util.h"
16 #include "base/stl_util.h"
17 #include "base/system/sys_info.h"
18 #include "chrome/browser/navigation_predictor/navigation_predictor_keyed_service.h"
19 #include "chrome/browser/navigation_predictor/navigation_predictor_keyed_service_factory.h"
20 #include "chrome/browser/prefetch/no_state_prefetch/prerender_manager_factory.h"
21 #include "chrome/browser/profiles/profile.h"
22 #include "chrome/browser/search_engines/template_url_service_factory.h"
23 #include "components/no_state_prefetch/browser/prerender_manager.h"
24 #include "components/search_engines/template_url_service.h"
25 #include "content/public/browser/navigation_handle.h"
26 #include "content/public/browser/site_instance.h"
27 #include "content/public/browser/web_contents.h"
28 #include "mojo/public/cpp/bindings/message.h"
29 #include "mojo/public/cpp/bindings/self_owned_receiver.h"
30 #include "services/metrics/public/cpp/metrics_utils.h"
31 #include "services/metrics/public/cpp/ukm_builders.h"
32 #include "services/metrics/public/cpp/ukm_recorder.h"
33 #include "third_party/blink/public/common/features.h"
34 #include "url/gurl.h"
35 #include "url/url_canon.h"
36 
37 namespace {
38 
39 // A feature to allow multiple prerenders. The feature itself is always enabled,
40 // but the params it exposes are variable.
41 const base::Feature kNavigationPredictorMultiplePrerenders{
42     "NavigationPredictorMultiplePrerenders", base::FEATURE_ENABLED_BY_DEFAULT};
43 
GetURLWithoutRefParams(const GURL & gurl)44 std::string GetURLWithoutRefParams(const GURL& gurl) {
45   url::Replacements<char> replacements;
46   replacements.ClearRef();
47   return gurl.ReplaceComponents(replacements).spec();
48 }
49 
50 // Returns true if |a| and |b| are both valid HTTP/HTTPS URLs and have the
51 // same scheme, host, path and query params. This method does not take into
52 // account the ref params of the two URLs.
AreGURLsEqualExcludingRefParams(const GURL & a,const GURL & b)53 bool AreGURLsEqualExcludingRefParams(const GURL& a, const GURL& b) {
54   return GetURLWithoutRefParams(a) == GetURLWithoutRefParams(b);
55 }
56 }  // namespace
57 
58 struct NavigationPredictor::NavigationScore {
NavigationScoreNavigationPredictor::NavigationScore59   NavigationScore(const GURL& url,
60                   double ratio_area,
61                   bool is_url_incremented_by_one,
62                   size_t area_rank,
63                   double score,
64                   double ratio_distance_root_top,
65                   bool contains_image,
66                   bool is_in_iframe,
67                   size_t index)
68       : url(url),
69         ratio_area(ratio_area),
70         is_url_incremented_by_one(is_url_incremented_by_one),
71         area_rank(area_rank),
72         score(score),
73         ratio_distance_root_top(ratio_distance_root_top),
74         contains_image(contains_image),
75         is_in_iframe(is_in_iframe),
76         index(index) {}
77   // URL of the target link.
78   const GURL url;
79 
80   // The ratio between the absolute clickable region of an anchor element and
81   // the document area. This should be in the range [0, 1].
82   const double ratio_area;
83 
84   // Whether the url increments the current page's url by 1.
85   const bool is_url_incremented_by_one;
86 
87   // Rank in terms of anchor element area. It starts at 0, a lower rank implies
88   // a larger area. Capped at 100.
89   const size_t area_rank;
90 
91   // Calculated navigation score, based on |area_rank| and other metrics.
92   double score;
93 
94   // The distance from the top of the document to the anchor element, expressed
95   // as a ratio with the length of the document.
96   const double ratio_distance_root_top;
97 
98   // Multiple anchor elements may point to the same |url|. |contains_image| is
99   // true if at least one of the anchor elements pointing to |url| contains an
100   // image.
101   const bool contains_image;
102 
103   // |is_in_iframe| is true if at least one of the anchor elements point to
104   // |url| is in an iframe.
105   const bool is_in_iframe;
106 
107   // An index reported to UKM.
108   const size_t index;
109 
110   // Rank of the |score| in this document. It starts at 0, a lower rank implies
111   // a higher |score|.
112   base::Optional<size_t> score_rank;
113 };
114 
NavigationPredictor(content::WebContents * web_contents)115 NavigationPredictor::NavigationPredictor(content::WebContents* web_contents)
116     : browser_context_(web_contents->GetBrowserContext()),
117       ratio_area_scale_(base::GetFieldTrialParamByFeatureAsInt(
118           blink::features::kNavigationPredictor,
119           "ratio_area_scale",
120           100)),
121       is_in_iframe_scale_(base::GetFieldTrialParamByFeatureAsInt(
122           blink::features::kNavigationPredictor,
123           "is_in_iframe_scale",
124           0)),
125       is_same_host_scale_(base::GetFieldTrialParamByFeatureAsInt(
126           blink::features::kNavigationPredictor,
127           "is_same_host_scale",
128           0)),
129       contains_image_scale_(base::GetFieldTrialParamByFeatureAsInt(
130           blink::features::kNavigationPredictor,
131           "contains_image_scale",
132           50)),
133       is_url_incremented_scale_(base::GetFieldTrialParamByFeatureAsInt(
134           blink::features::kNavigationPredictor,
135           "is_url_incremented_scale",
136           100)),
137       area_rank_scale_(base::GetFieldTrialParamByFeatureAsInt(
138           blink::features::kNavigationPredictor,
139           "area_rank_scale",
140           100)),
141       ratio_distance_root_top_scale_(base::GetFieldTrialParamByFeatureAsInt(
142           blink::features::kNavigationPredictor,
143           "ratio_distance_root_top_scale",
144           0)),
145       link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
146           blink::features::kNavigationPredictor,
147           "link_total_scale",
148           0)),
149       iframe_link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
150           blink::features::kNavigationPredictor,
151           "iframe_link_total_scale",
152           0)),
153       increment_link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
154           blink::features::kNavigationPredictor,
155           "increment_link_total_scale",
156           0)),
157       same_origin_link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
158           blink::features::kNavigationPredictor,
159           "same_origin_link_total_scale",
160           0)),
161       image_link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
162           blink::features::kNavigationPredictor,
163           "image_link_total_scale",
164           0)),
165       clickable_space_scale_(base::GetFieldTrialParamByFeatureAsInt(
166           blink::features::kNavigationPredictor,
167           "clickable_space_scale",
168           0)),
169       median_link_location_scale_(base::GetFieldTrialParamByFeatureAsInt(
170           blink::features::kNavigationPredictor,
171           "median_link_location_scale",
172           0)),
173       viewport_height_scale_(base::GetFieldTrialParamByFeatureAsInt(
174           blink::features::kNavigationPredictor,
175           "viewport_height_scale",
176           0)),
177       viewport_width_scale_(base::GetFieldTrialParamByFeatureAsInt(
178           blink::features::kNavigationPredictor,
179           "viewport_width_scale",
180           0)),
181       sum_link_scales_(ratio_area_scale_ + is_in_iframe_scale_ +
182                        is_same_host_scale_ + contains_image_scale_ +
183                        is_url_incremented_scale_ + area_rank_scale_ +
184                        ratio_distance_root_top_scale_),
185       sum_page_scales_(link_total_scale_ + iframe_link_total_scale_ +
186                        increment_link_total_scale_ +
187                        same_origin_link_total_scale_ + image_link_total_scale_ +
188                        clickable_space_scale_ + median_link_location_scale_ +
189                        viewport_height_scale_ + viewport_width_scale_),
190       is_low_end_device_(base::SysInfo::IsLowEndDevice()),
191       prefetch_url_score_threshold_(base::GetFieldTrialParamByFeatureAsInt(
192           blink::features::kNavigationPredictor,
193           "prefetch_url_score_threshold",
194           0)),
195       prefetch_enabled_(base::GetFieldTrialParamByFeatureAsBool(
196           blink::features::kNavigationPredictor,
197           "prefetch_after_preconnect",
198           false)),
199       normalize_navigation_scores_(base::GetFieldTrialParamByFeatureAsBool(
200           blink::features::kNavigationPredictor,
201           "normalize_scores",
202           true)) {
203   DCHECK(browser_context_);
204   DETACH_FROM_SEQUENCE(sequence_checker_);
205 
206   if (browser_context_->IsOffTheRecord())
207     return;
208 
209   ukm_recorder_ = ukm::UkmRecorder::Get();
210 
211   current_visibility_ = web_contents->GetVisibility();
212   ukm_source_id_ = web_contents->GetMainFrame()->GetPageUkmSourceId();
213   Observe(web_contents);
214 }
215 
~NavigationPredictor()216 NavigationPredictor::~NavigationPredictor() {
217   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
218   Observe(nullptr);
219 
220   if (prerender_handle_) {
221     prerender_handle_->SetObserver(nullptr);
222     prerender_handle_->OnNavigateAway();
223   }
224 }
225 
Create(content::RenderFrameHost * render_frame_host,mojo::PendingReceiver<blink::mojom::AnchorElementMetricsHost> receiver)226 void NavigationPredictor::Create(
227     content::RenderFrameHost* render_frame_host,
228     mojo::PendingReceiver<blink::mojom::AnchorElementMetricsHost> receiver) {
229   DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
230 
231   // Only valid for the main frame.
232   if (render_frame_host->GetParent())
233     return;
234 
235   content::WebContents* web_contents =
236       content::WebContents::FromRenderFrameHost(render_frame_host);
237   if (!web_contents)
238     return;
239 
240   mojo::MakeSelfOwnedReceiver(
241       std::make_unique<NavigationPredictor>(web_contents), std::move(receiver));
242 }
243 
IsValidMetricFromRenderer(const blink::mojom::AnchorElementMetrics & metric) const244 bool NavigationPredictor::IsValidMetricFromRenderer(
245     const blink::mojom::AnchorElementMetrics& metric) const {
246   return metric.target_url.SchemeIsHTTPOrHTTPS() &&
247          metric.source_url.SchemeIsHTTPOrHTTPS();
248 }
249 
RecordActionAccuracyOnClick(const GURL & target_url) const250 void NavigationPredictor::RecordActionAccuracyOnClick(
251     const GURL& target_url) const {
252   // We don't pre-render default search engine at all, so measuring metrics here
253   // doesn't make sense.
254   if (source_is_default_search_engine_page_)
255     return;
256 
257   bool is_cross_origin =
258       url::Origin::Create(document_url_) != url::Origin::Create(target_url);
259 
260   auto prefetch_result = is_cross_origin ? PrerenderResult::kCrossOriginNotSeen
261                                          : PrerenderResult::kSameOriginNotSeen;
262 
263   if ((prefetch_url_ && prefetch_url_.value() == target_url) ||
264       base::Contains(partial_prerfetches_, target_url)) {
265     prefetch_result = PrerenderResult::kSameOriginPrefetchPartiallyComplete;
266   } else if (base::Contains(urls_prefetched_, target_url)) {
267     prefetch_result = PrerenderResult::kSameOriginPrefetchFinished;
268   } else if (std::find(urls_to_prefetch_.begin(), urls_to_prefetch_.end(),
269                        target_url) != urls_to_prefetch_.end()) {
270     prefetch_result = PrerenderResult::kSameOriginPrefetchInQueue;
271   } else if (!is_cross_origin &&
272              base::Contains(urls_above_threshold_, target_url)) {
273     prefetch_result = PrerenderResult::kSameOriginPrefetchSkipped;
274   } else if (base::Contains(urls_above_threshold_, target_url)) {
275     prefetch_result = PrerenderResult::kCrossOriginAboveThreshold;
276   } else if (!is_cross_origin &&
277              base::Contains(navigation_scores_map_, target_url.spec())) {
278     prefetch_result = PrerenderResult::kSameOriginBelowThreshold;
279   } else if (base::Contains(navigation_scores_map_, target_url.spec())) {
280     prefetch_result = PrerenderResult::kCrossOriginBelowThreshold;
281   }
282 
283   UMA_HISTOGRAM_ENUMERATION("NavigationPredictor.LinkClickedPrerenderResult",
284                             prefetch_result);
285 }
286 
RecordActionAccuracyOnTearDown()287 void NavigationPredictor::RecordActionAccuracyOnTearDown() {
288   auto document_origin = url::Origin::Create(document_url_);
289   int cross_origin_urls_above_threshold =
290       std::count_if(urls_above_threshold_.begin(), urls_above_threshold_.end(),
291                     [document_origin](const GURL& url) {
292                       return document_origin != url::Origin::Create(url);
293                     });
294 
295   UMA_HISTOGRAM_COUNTS_100("NavigationPredictor.CountOfURLsAboveThreshold",
296                            urls_above_threshold_.size());
297 
298   UMA_HISTOGRAM_COUNTS_100(
299       "NavigationPredictor.CountOfURLsAboveThreshold.CrossOrigin",
300       cross_origin_urls_above_threshold);
301 
302   UMA_HISTOGRAM_COUNTS_100(
303       "NavigationPredictor.CountOfURLsAboveThreshold.SameOrigin",
304       urls_above_threshold_.size() - cross_origin_urls_above_threshold);
305 
306   int cross_origin_urls_above_threshold_in_top_n = std::count_if(
307       urls_above_threshold_.begin(),
308       urls_above_threshold_.begin() +
309           std::min(urls_above_threshold_.size(),
310                    static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
311                        kNavigationPredictorMultiplePrerenders,
312                        "prerender_limit", 1))),
313       [document_origin](const GURL& url) {
314         return document_origin != url::Origin::Create(url);
315       });
316 
317   int same_origin_urls_above_threshold_in_top_n =
318       std::min(
319           static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
320               kNavigationPredictorMultiplePrerenders, "prerender_limit", 1)),
321           urls_above_threshold_.size()) -
322       cross_origin_urls_above_threshold_in_top_n;
323 
324   UMA_HISTOGRAM_COUNTS_100(
325       "NavigationPredictor.CountOfURLsInPredictedSet.CrossOrigin",
326       cross_origin_urls_above_threshold_in_top_n);
327   UMA_HISTOGRAM_COUNTS_100(
328       "NavigationPredictor.CountOfURLsInPredictedSet.SameOrigin",
329       same_origin_urls_above_threshold_in_top_n);
330   UMA_HISTOGRAM_COUNTS_100("NavigationPredictor.CountOfURLsInPredictedSet",
331                            cross_origin_urls_above_threshold_in_top_n +
332                                same_origin_urls_above_threshold_in_top_n);
333 
334   UMA_HISTOGRAM_COUNTS_100("NavigationPredictor.CountOfStartedPrerenders",
335                            urls_prefetched_.size());
336 }
337 
OnVisibilityChanged(content::Visibility visibility)338 void NavigationPredictor::OnVisibilityChanged(content::Visibility visibility) {
339   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
340 
341   if (current_visibility_ == visibility)
342     return;
343 
344   // Check if the visibility changed from VISIBLE to HIDDEN. Since navigation
345   // predictor is currently restricted to Android, it is okay to disregard the
346   // occluded state.
347   if (current_visibility_ != content::Visibility::HIDDEN ||
348       visibility != content::Visibility::VISIBLE) {
349     current_visibility_ = visibility;
350 
351     if (prerender_handle_) {
352       prerender_handle_->SetObserver(nullptr);
353       prerender_handle_->OnNavigateAway();
354       prerender_handle_.reset();
355       partial_prerfetches_.emplace(prefetch_url_.value());
356       prefetch_url_ = base::nullopt;
357     }
358     return;
359   }
360 
361   current_visibility_ = visibility;
362 
363   MaybePrefetch();
364 }
365 
DidStartNavigation(content::NavigationHandle * navigation_handle)366 void NavigationPredictor::DidStartNavigation(
367     content::NavigationHandle* navigation_handle) {
368   if (!navigation_handle->IsInMainFrame() ||
369       navigation_handle->IsSameDocument()) {
370     return;
371   }
372 
373   if (next_navigation_started_)
374     return;
375 
376   RecordActionAccuracyOnTearDown();
377 
378   // Don't start new prerenders.
379   next_navigation_started_ = true;
380 
381   // If there is no ongoing prerender, there is nothing to do.
382   if (!prefetch_url_.has_value())
383     return;
384 
385   // Let the prerender continue if it matches the navigation URL.
386   if (navigation_handle->GetURL() == prefetch_url_.value())
387     return;
388 
389   if (!prerender_handle_)
390     return;
391 
392   // Stop prerender to reduce network contention during main frame fetch.
393   prerender_handle_->SetObserver(nullptr);
394   prerender_handle_->OnNavigateAway();
395   prerender_handle_.reset();
396   partial_prerfetches_.emplace(prefetch_url_.value());
397   prefetch_url_ = base::nullopt;
398 }
399 
RecordAction(Action log_action)400 void NavigationPredictor::RecordAction(Action log_action) {
401   std::string action_histogram_name =
402       source_is_default_search_engine_page_
403           ? "NavigationPredictor.OnDSE.ActionTaken"
404           : "NavigationPredictor.OnNonDSE.ActionTaken";
405   base::UmaHistogramEnumeration(action_histogram_name, log_action);
406 }
407 
MaybeSendMetricsToUkm() const408 void NavigationPredictor::MaybeSendMetricsToUkm() const {
409   if (!ukm_recorder_) {
410     return;
411   }
412 
413   ukm::builders::NavigationPredictorPageLinkMetrics page_link_builder(
414       ukm_source_id_);
415 
416   page_link_builder.SetNumberOfAnchors_Total(
417       GetBucketMinForPageMetrics(number_of_anchors_));
418   page_link_builder.SetNumberOfAnchors_SameHost(
419       GetBucketMinForPageMetrics(number_of_anchors_same_host_));
420   page_link_builder.SetNumberOfAnchors_ContainsImage(
421       GetBucketMinForPageMetrics(number_of_anchors_contains_image_));
422   page_link_builder.SetNumberOfAnchors_InIframe(
423       GetBucketMinForPageMetrics(number_of_anchors_in_iframe_));
424   page_link_builder.SetNumberOfAnchors_URLIncremented(
425       GetBucketMinForPageMetrics(number_of_anchors_url_incremented_));
426   page_link_builder.SetTotalClickableSpace(
427       GetBucketMinForPageMetrics(static_cast<int>(total_clickable_space_)));
428   page_link_builder.SetMedianLinkLocation(
429       GetLinearBucketForLinkLocation(median_link_location_));
430   page_link_builder.SetViewport_Height(
431       GetBucketMinForPageMetrics(viewport_size_.height()));
432   page_link_builder.SetViewport_Width(
433       GetBucketMinForPageMetrics(viewport_size_.width()));
434 
435   page_link_builder.Record(ukm_recorder_);
436 
437   for (const auto& navigation_score_tuple : navigation_scores_map_) {
438     const auto& navigation_score = navigation_score_tuple.second;
439     ukm::builders::NavigationPredictorAnchorElementMetrics
440         anchor_element_builder(ukm_source_id_);
441 
442     // Offset index to be 1-based indexing.
443     anchor_element_builder.SetAnchorIndex(navigation_score->index);
444     anchor_element_builder.SetIsInIframe(navigation_score->is_in_iframe);
445     anchor_element_builder.SetIsURLIncrementedByOne(
446         navigation_score->is_url_incremented_by_one);
447     anchor_element_builder.SetContainsImage(navigation_score->contains_image);
448     anchor_element_builder.SetSameOrigin(
449         url::Origin::Create(navigation_score->url) ==
450         url::Origin::Create(document_url_));
451 
452     // Convert the ratio area and ratio distance from [0,1] to [0,100].
453     int percent_ratio_area =
454         static_cast<int>(navigation_score->ratio_area * 100);
455     int percent_ratio_distance_root_top =
456         static_cast<int>(navigation_score->ratio_distance_root_top * 100);
457 
458     anchor_element_builder.SetPercentClickableArea(
459         GetLinearBucketForRatioArea(percent_ratio_area));
460     anchor_element_builder.SetPercentVerticalDistance(
461         GetLinearBucketForLinkLocation(percent_ratio_distance_root_top));
462 
463     anchor_element_builder.Record(ukm_recorder_);
464   }
465 }
466 
GetBucketMinForPageMetrics(int value) const467 int NavigationPredictor::GetBucketMinForPageMetrics(int value) const {
468   return ukm::GetExponentialBucketMin(value, 1.3);
469 }
470 
GetLinearBucketForLinkLocation(int value) const471 int NavigationPredictor::GetLinearBucketForLinkLocation(int value) const {
472   return ukm::GetLinearBucketMin(static_cast<int64_t>(value), 10);
473 }
474 
GetLinearBucketForRatioArea(int value) const475 int NavigationPredictor::GetLinearBucketForRatioArea(int value) const {
476   return ukm::GetLinearBucketMin(static_cast<int64_t>(value), 5);
477 }
478 
MaybeSendClickMetricsToUkm(const std::string & clicked_url) const479 void NavigationPredictor::MaybeSendClickMetricsToUkm(
480     const std::string& clicked_url) const {
481   if (!ukm_recorder_) {
482     return;
483   }
484 
485   if (clicked_count_ > 10)
486     return;
487 
488   auto nav_score = navigation_scores_map_.find(clicked_url);
489 
490   int anchor_element_index = (nav_score == navigation_scores_map_.end())
491                                  ? 0
492                                  : nav_score->second->index;
493 
494   ukm::builders::NavigationPredictorPageLinkClick builder(ukm_source_id_);
495   builder.SetAnchorElementIndex(anchor_element_index);
496   builder.Record(ukm_recorder_);
497 }
498 
GetTemplateURLService() const499 TemplateURLService* NavigationPredictor::GetTemplateURLService() const {
500   return TemplateURLServiceFactory::GetForProfile(
501       Profile::FromBrowserContext(browser_context_));
502 }
503 
ReportAnchorElementMetricsOnClick(blink::mojom::AnchorElementMetricsPtr metrics)504 void NavigationPredictor::ReportAnchorElementMetricsOnClick(
505     blink::mojom::AnchorElementMetricsPtr metrics) {
506   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
507   DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
508 
509   if (browser_context_->IsOffTheRecord())
510     return;
511 
512   if (!IsValidMetricFromRenderer(*metrics)) {
513     mojo::ReportBadMessage("Bad anchor element metrics: onClick.");
514     return;
515   }
516 
517   source_is_default_search_engine_page_ =
518       GetTemplateURLService() &&
519       GetTemplateURLService()->IsSearchResultsPageFromDefaultSearchProvider(
520           metrics->source_url);
521   if (!metrics->source_url.SchemeIsCryptographic() ||
522       !metrics->target_url.SchemeIsCryptographic()) {
523     return;
524   }
525 
526   clicked_count_++;
527 
528   document_url_ = metrics->source_url;
529 
530   RecordActionAccuracyOnClick(metrics->target_url);
531   MaybeSendClickMetricsToUkm(metrics->target_url.spec());
532 
533   // Look up the clicked URL in |navigation_scores_map_|. Record if we find it.
534   auto iter = navigation_scores_map_.find(metrics->target_url.spec());
535   if (iter == navigation_scores_map_.end())
536     return;
537 
538 
539   // Guaranteed to be non-zero since we have found the clicked link in
540   // |navigation_scores_map_|.
541   DCHECK_LT(0, number_of_anchors_);
542 
543   if (source_is_default_search_engine_page_) {
544     UMA_HISTOGRAM_BOOLEAN("AnchorElementMetrics.Clicked.OnDSE.SameHost",
545                           metrics->is_same_host);
546   } else {
547     UMA_HISTOGRAM_BOOLEAN("AnchorElementMetrics.Clicked.OnNonDSE.SameHost",
548                           metrics->is_same_host);
549   }
550 }
551 
MergeMetricsSameTargetUrl(std::vector<blink::mojom::AnchorElementMetricsPtr> * metrics) const552 void NavigationPredictor::MergeMetricsSameTargetUrl(
553     std::vector<blink::mojom::AnchorElementMetricsPtr>* metrics) const {
554   // Maps from target url (href) to anchor element metrics from renderer.
555   std::unordered_map<std::string, blink::mojom::AnchorElementMetricsPtr>
556       metrics_map;
557 
558   // This size reserve is aggressive since |metrics_map| may contain fewer
559   // elements than metrics->size() after merge.
560   metrics_map.reserve(metrics->size());
561 
562   for (auto& metric : *metrics) {
563     // Do not include anchor elements that point to the same URL as the URL of
564     // the current navigation since these are unlikely to be clicked. Also,
565     // exclude the anchor elements that differ from the URL of the current
566     // navigation by only the ref param.
567     if (AreGURLsEqualExcludingRefParams(metric->target_url,
568                                         metric->source_url)) {
569       continue;
570     }
571 
572     if (!metric->target_url.SchemeIsCryptographic())
573       continue;
574 
575     // Currently, all predictions are made based on elements that are within the
576     // main frame since it is unclear if we can pre* the target of the elements
577     // within iframes.
578     if (metric->is_in_iframe)
579       continue;
580 
581     // Skip ref params when merging the anchor elements. This ensures that two
582     // anchor elements which differ only in the ref params are combined
583     // together.
584     const std::string& key = GetURLWithoutRefParams(metric->target_url);
585     auto iter = metrics_map.find(key);
586     if (iter == metrics_map.end()) {
587       metrics_map[key] = std::move(metric);
588     } else {
589       auto& prev_metric = iter->second;
590       prev_metric->ratio_area += metric->ratio_area;
591       prev_metric->ratio_visible_area += metric->ratio_visible_area;
592 
593       // After merging, value of |ratio_area| can go beyond 1.0. This can
594       // happen, e.g., when there are 2 anchor elements pointing to the same
595       // target. The first anchor element occupies 90% of the viewport. The
596       // second one has size 0.8 times the viewport, and only part of it is
597       // visible in the viewport. In that case, |ratio_area| may be 1.7.
598       if (prev_metric->ratio_area > 1.0)
599         prev_metric->ratio_area = 1.0;
600       DCHECK_LE(0.0, prev_metric->ratio_area);
601       DCHECK_GE(1.0, prev_metric->ratio_area);
602 
603       DCHECK_GE(1.0, prev_metric->ratio_visible_area);
604 
605       // Position related metrics are tricky to merge. Another possible way to
606       // merge is simply add up the calculated navigation scores.
607       prev_metric->ratio_distance_root_top =
608           std::min(prev_metric->ratio_distance_root_top,
609                    metric->ratio_distance_root_top);
610       prev_metric->ratio_distance_root_bottom =
611           std::max(prev_metric->ratio_distance_root_bottom,
612                    metric->ratio_distance_root_bottom);
613       prev_metric->ratio_distance_top_to_visible_top =
614           std::min(prev_metric->ratio_distance_top_to_visible_top,
615                    metric->ratio_distance_top_to_visible_top);
616       prev_metric->ratio_distance_center_to_visible_top =
617           std::min(prev_metric->ratio_distance_center_to_visible_top,
618                    metric->ratio_distance_center_to_visible_top);
619 
620       // Anchor element is not considered in an iframe as long as at least one
621       // of them is not in an iframe.
622       prev_metric->is_in_iframe =
623           prev_metric->is_in_iframe && metric->is_in_iframe;
624       prev_metric->contains_image =
625           prev_metric->contains_image || metric->contains_image;
626       DCHECK_EQ(prev_metric->is_same_host, metric->is_same_host);
627     }
628   }
629 
630   metrics->clear();
631 
632   if (metrics_map.empty())
633     return;
634 
635   metrics->reserve(metrics_map.size());
636   for (auto& metric_mapping : metrics_map) {
637     metrics->push_back(std::move(metric_mapping.second));
638   }
639 
640   DCHECK(!metrics->empty());
641   UMA_HISTOGRAM_COUNTS_100(
642       "AnchorElementMetrics.Visible.NumberOfAnchorElementsAfterMerge",
643       metrics->size());
644 }
645 
ReportAnchorElementMetricsOnLoad(std::vector<blink::mojom::AnchorElementMetricsPtr> metrics,const gfx::Size & viewport_size)646 void NavigationPredictor::ReportAnchorElementMetricsOnLoad(
647     std::vector<blink::mojom::AnchorElementMetricsPtr> metrics,
648     const gfx::Size& viewport_size) {
649   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
650   DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
651 
652   // Each document should only report metrics once when page is loaded.
653   DCHECK(navigation_scores_map_.empty());
654 
655   if (browser_context_->IsOffTheRecord())
656     return;
657 
658   if (metrics.empty()) {
659     mojo::ReportBadMessage("Bad anchor element metrics: empty.");
660     return;
661   }
662 
663   for (const auto& metric : metrics) {
664     if (!IsValidMetricFromRenderer(*metric)) {
665       mojo::ReportBadMessage("Bad anchor element metrics: onLoad.");
666       return;
667     }
668   }
669 
670   if (!metrics[0]->source_url.SchemeIsCryptographic())
671     return;
672 
673   source_is_default_search_engine_page_ =
674       GetTemplateURLService() &&
675       GetTemplateURLService()->IsSearchResultsPageFromDefaultSearchProvider(
676           metrics[0]->source_url);
677   MergeMetricsSameTargetUrl(&metrics);
678 
679   if (metrics.empty() || viewport_size.IsEmpty())
680     return;
681 
682   number_of_anchors_ = metrics.size();
683   viewport_size_ = viewport_size;
684 
685   // Count the number of anchors that have specific metrics.
686   std::vector<double> link_locations;
687   link_locations.reserve(metrics.size());
688 
689   for (const auto& metric : metrics) {
690     number_of_anchors_same_host_ += static_cast<int>(metric->is_same_host);
691     number_of_anchors_contains_image_ +=
692         static_cast<int>(metric->contains_image);
693     number_of_anchors_in_iframe_ += static_cast<int>(metric->is_in_iframe);
694     number_of_anchors_url_incremented_ +=
695         static_cast<int>(metric->is_url_incremented_by_one);
696 
697     link_locations.push_back(metric->ratio_distance_top_to_visible_top);
698     total_clickable_space_ += metric->ratio_visible_area * 100.0;
699   }
700 
701   sort(link_locations.begin(), link_locations.end());
702   median_link_location_ = link_locations[link_locations.size() / 2] * 100;
703   double page_metrics_score = GetPageMetricsScore();
704 
705   // Sort metric by area in descending order to get area rank, which is a
706   // derived feature to calculate navigation score.
707   std::sort(metrics.begin(), metrics.end(), [](const auto& a, const auto& b) {
708     return a->ratio_area > b->ratio_area;
709   });
710 
711   // Loop |metrics| to compute navigation scores.
712   std::vector<std::unique_ptr<NavigationScore>> navigation_scores;
713   navigation_scores.reserve(metrics.size());
714   double total_score = 0.0;
715 
716   std::vector<int> indices(metrics.size());
717   std::generate(indices.begin(), indices.end(),
718                 [n = 1]() mutable { return n++; });
719 
720   // Shuffle the indices to keep metrics less identifiable in UKM.
721   base::RandomShuffle(indices.begin(), indices.end());
722 
723   for (size_t i = 0; i != metrics.size(); ++i) {
724     const auto& metric = metrics[i];
725 
726     // Anchor elements with the same area are assigned with the same rank.
727     size_t area_rank = i;
728     if (i > 0 && metric->ratio_area == metrics[i - 1]->ratio_area)
729       area_rank = navigation_scores[navigation_scores.size() - 1]->area_rank;
730 
731     double score =
732         CalculateAnchorNavigationScore(*metric, area_rank) + page_metrics_score;
733     total_score += score;
734 
735     navigation_scores.push_back(std::make_unique<NavigationScore>(
736         metric->target_url, static_cast<double>(metric->ratio_area),
737         metric->is_url_incremented_by_one, area_rank, score,
738         metric->ratio_distance_root_top, metric->contains_image,
739         metric->is_in_iframe, indices[i]));
740   }
741 
742   if (normalize_navigation_scores_) {
743     // Normalize |score| to a total sum of 100.0 across all anchor elements
744     // received.
745     if (total_score > 0.0) {
746       for (auto& navigation_score : navigation_scores) {
747         navigation_score->score = navigation_score->score / total_score * 100.0;
748       }
749     }
750   }
751 
752   // Sort scores by the calculated navigation score in descending order. This
753   // score rank is used by MaybeTakeActionOnLoad, and stored in
754   // |navigation_scores_map_|.
755   std::sort(navigation_scores.begin(), navigation_scores.end(),
756             [](const auto& a, const auto& b) { return a->score > b->score; });
757 
758   document_url_ = metrics[0]->source_url;
759   MaybeTakeActionOnLoad(document_url_, navigation_scores);
760 
761   // Store navigation scores in |navigation_scores_map_| for fast look up upon
762   // clicks.
763   navigation_scores_map_.reserve(navigation_scores.size());
764   for (size_t i = 0; i != navigation_scores.size(); ++i) {
765     navigation_scores[i]->score_rank = base::make_optional(i);
766     std::string url_spec = navigation_scores[i]->url.spec();
767     navigation_scores_map_[url_spec] = std::move(navigation_scores[i]);
768   }
769 
770   MaybeSendMetricsToUkm();
771 }
772 
CalculateAnchorNavigationScore(const blink::mojom::AnchorElementMetrics & metrics,int area_rank) const773 double NavigationPredictor::CalculateAnchorNavigationScore(
774     const blink::mojom::AnchorElementMetrics& metrics,
775     int area_rank) const {
776   DCHECK(!browser_context_->IsOffTheRecord());
777 
778   if (sum_link_scales_ == 0)
779     return 0.0;
780 
781   double area_rank_score =
782       (double)((number_of_anchors_ - area_rank)) / number_of_anchors_;
783 
784   DCHECK_LE(0, metrics.ratio_visible_area);
785   DCHECK_GE(1, metrics.ratio_visible_area);
786 
787   DCHECK_LE(0, metrics.is_in_iframe);
788   DCHECK_GE(1, metrics.is_in_iframe);
789 
790   DCHECK_LE(0, metrics.is_same_host);
791   DCHECK_GE(1, metrics.is_same_host);
792 
793   DCHECK_LE(0, metrics.contains_image);
794   DCHECK_GE(1, metrics.contains_image);
795 
796   DCHECK_LE(0, metrics.is_url_incremented_by_one);
797   DCHECK_GE(1, metrics.is_url_incremented_by_one);
798 
799   DCHECK_LE(0, area_rank_score);
800   DCHECK_GE(1, area_rank_score);
801 
802   double host_score = 0.0;
803   // On pages from default search engine, give higher weight to target URLs that
804   // link to a different host. On non-default search engine pages, give higher
805   // weight to target URLs that link to the same host.
806   if (!source_is_default_search_engine_page_ && metrics.is_same_host) {
807     host_score = is_same_host_scale_;
808   } else if (source_is_default_search_engine_page_ && !metrics.is_same_host) {
809     host_score = is_same_host_scale_;
810   }
811 
812   // TODO(chelu): https://crbug.com/850624/. Experiment with other heuristic
813   // algorithms for computing the anchor elements score.
814   double score =
815       (ratio_area_scale_ * GetLinearBucketForRatioArea(
816                                static_cast<int>(metrics.ratio_area * 100.0))) +
817       (metrics.is_in_iframe ? is_in_iframe_scale_ : 0.0) +
818       (metrics.contains_image ? contains_image_scale_ : 0.0) + host_score +
819       (metrics.is_url_incremented_by_one ? is_url_incremented_scale_ : 0.0) +
820       (area_rank_scale_ * area_rank_score) +
821       (ratio_distance_root_top_scale_ *
822        GetLinearBucketForLinkLocation(
823            static_cast<int>(metrics.ratio_distance_root_top * 100.0)));
824 
825   if (normalize_navigation_scores_) {
826     score = score / sum_link_scales_ * 100.0;
827     DCHECK_LE(0.0, score);
828   }
829 
830   return score;
831 }
832 
GetPageMetricsScore() const833 double NavigationPredictor::GetPageMetricsScore() const {
834   if (sum_page_scales_ == 0.0) {
835     return 0;
836   } else {
837     DCHECK(!viewport_size_.IsEmpty());
838     return (link_total_scale_ *
839             GetBucketMinForPageMetrics(number_of_anchors_)) +
840            (iframe_link_total_scale_ *
841             GetBucketMinForPageMetrics(number_of_anchors_in_iframe_)) +
842            (increment_link_total_scale_ *
843             GetBucketMinForPageMetrics(number_of_anchors_url_incremented_)) +
844            (same_origin_link_total_scale_ *
845             GetBucketMinForPageMetrics(number_of_anchors_same_host_)) +
846            (image_link_total_scale_ *
847             GetBucketMinForPageMetrics(number_of_anchors_contains_image_)) +
848            (clickable_space_scale_ *
849             GetBucketMinForPageMetrics(total_clickable_space_)) +
850            (median_link_location_scale_ *
851             GetLinearBucketForLinkLocation(median_link_location_)) +
852            (viewport_width_scale_ *
853             GetBucketMinForPageMetrics(viewport_size_.width())) +
854            (viewport_height_scale_ *
855             GetBucketMinForPageMetrics(viewport_size_.height()));
856   }
857 }
858 
NotifyPredictionUpdated(const std::vector<std::unique_ptr<NavigationScore>> & sorted_navigation_scores)859 void NavigationPredictor::NotifyPredictionUpdated(
860     const std::vector<std::unique_ptr<NavigationScore>>&
861         sorted_navigation_scores) {
862   // It is possible for this class to still exist while its WebContents and
863   // RenderFrameHost are being destroyed. This can be detected by checking
864   // |web_contents()| which will be nullptr if the WebContents has been
865   // destroyed.
866   if (!web_contents())
867     return;
868 
869   NavigationPredictorKeyedService* service =
870       NavigationPredictorKeyedServiceFactory::GetForProfile(
871           Profile::FromBrowserContext(browser_context_));
872   DCHECK(service);
873   std::vector<GURL> top_urls;
874   top_urls.reserve(sorted_navigation_scores.size());
875   for (const auto& nav_score : sorted_navigation_scores) {
876     top_urls.push_back(nav_score->url);
877   }
878   service->OnPredictionUpdated(
879       web_contents(), document_url_,
880       NavigationPredictorKeyedService::PredictionSource::
881           kAnchorElementsParsedFromWebPage,
882       top_urls);
883 }
884 
MaybeTakeActionOnLoad(const GURL & document_url,const std::vector<std::unique_ptr<NavigationScore>> & sorted_navigation_scores)885 void NavigationPredictor::MaybeTakeActionOnLoad(
886     const GURL& document_url,
887     const std::vector<std::unique_ptr<NavigationScore>>&
888         sorted_navigation_scores) {
889   DCHECK(!browser_context_->IsOffTheRecord());
890 
891   NotifyPredictionUpdated(sorted_navigation_scores);
892 
893   // Try prefetch first.
894   urls_to_prefetch_ = GetUrlsToPrefetch(document_url, sorted_navigation_scores);
895   RecordAction(urls_to_prefetch_.empty() ? Action::kNone : Action::kPrefetch);
896   MaybePrefetch();
897 }
898 
MaybePrefetch()899 void NavigationPredictor::MaybePrefetch() {
900   // If prefetches aren't allowed here, this URL has already
901   // been prefetched, or the current tab is hidden,
902   // we shouldn't prefetch again.
903   if (!prefetch_enabled_ || urls_to_prefetch_.empty() ||
904       current_visibility_ == content::Visibility::HIDDEN) {
905     return;
906   }
907 
908   // Already an on-going prefetch.
909   if (prefetch_url_.has_value())
910     return;
911 
912   // Don't prerender if the next navigation started.
913   if (next_navigation_started_)
914     return;
915 
916   prerender::PrerenderManager* prerender_manager =
917       prerender::PrerenderManagerFactory::GetForBrowserContext(
918           browser_context_);
919 
920   if (prerender_manager) {
921     GURL url_to_prefetch = urls_to_prefetch_.front();
922     urls_to_prefetch_.pop_front();
923     Prefetch(prerender_manager, url_to_prefetch);
924   }
925 }
926 
Prefetch(prerender::PrerenderManager * prerender_manager,const GURL & url_to_prefetch)927 void NavigationPredictor::Prefetch(
928     prerender::PrerenderManager* prerender_manager,
929     const GURL& url_to_prefetch) {
930   DCHECK(!prerender_handle_);
931   DCHECK(!prefetch_url_);
932 
933   // It is possible for this class to still exist while its WebContents and
934   // RenderFrameHost are being destroyed. This can be detected by checking
935   // |web_contents()| which will be nullptr if the WebContents has been
936   // destroyed.
937   if (!web_contents())
938     return;
939 
940   content::SessionStorageNamespace* session_storage_namespace =
941       web_contents()->GetController().GetDefaultSessionStorageNamespace();
942   gfx::Size size = web_contents()->GetContainerBounds().size();
943 
944   prerender_handle_ = prerender_manager->AddPrerenderFromNavigationPredictor(
945       url_to_prefetch, session_storage_namespace, size);
946 
947   // Prerender was prevented for some reason, try next URL.
948   if (!prerender_handle_) {
949     MaybePrefetch();
950     return;
951   }
952 
953   prefetch_url_ = url_to_prefetch;
954   urls_prefetched_.emplace(url_to_prefetch);
955 
956   prerender_handle_->SetObserver(this);
957 }
958 
OnPrerenderStop(prerender::PrerenderHandle * handle)959 void NavigationPredictor::OnPrerenderStop(prerender::PrerenderHandle* handle) {
960   DCHECK_EQ(prerender_handle_.get(), handle);
961   prerender_handle_.reset();
962   prefetch_url_ = base::nullopt;
963 
964   MaybePrefetch();
965 }
966 
GetUrlsToPrefetch(const GURL & document_url,const std::vector<std::unique_ptr<NavigationScore>> & sorted_navigation_scores)967 std::deque<GURL> NavigationPredictor::GetUrlsToPrefetch(
968     const GURL& document_url,
969     const std::vector<std::unique_ptr<NavigationScore>>&
970         sorted_navigation_scores) {
971   urls_above_threshold_.clear();
972   std::deque<GURL> urls_to_prefetch;
973   // Currently, prefetch is disabled on low-end devices since prefetch may
974   // increase memory usage.
975   if (is_low_end_device_)
976     return urls_to_prefetch;
977 
978   // On search engine results page, next navigation is likely to be a different
979   // origin. Currently, the prefetch is only allowed for same orgins. Hence,
980   // prefetch is currently disabled on search engine results page.
981   if (source_is_default_search_engine_page_)
982     return urls_to_prefetch;
983 
984   if (sorted_navigation_scores.empty())
985     return urls_to_prefetch;
986 
987   // Place in order the top n scoring links. If the top n scoring links contain
988   // a cross origin link, only place n-1 links. All links must score above
989   // |prefetch_url_score_threshold_|.
990   for (size_t i = 0; i < sorted_navigation_scores.size(); ++i) {
991     double navigation_score = sorted_navigation_scores[i]->score;
992     GURL url_to_prefetch = sorted_navigation_scores[i]->url;
993 
994     // If the prediction score of the highest scoring URL is less than the
995     // threshold, then return.
996     if (navigation_score < prefetch_url_score_threshold_)
997       break;
998 
999     // Log the links above the threshold.
1000     urls_above_threshold_.push_back(url_to_prefetch);
1001 
1002     if (i >=
1003         static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
1004             kNavigationPredictorMultiplePrerenders, "prerender_limit", 1))) {
1005       continue;
1006     }
1007 
1008     // Only the same origin URLs are eligible for prefetching. If the URL with
1009     // the highest score is from a different origin, then we skip prefetching
1010     // since same origin URLs are not likely to be clicked.
1011     if (url::Origin::Create(url_to_prefetch) !=
1012         url::Origin::Create(document_url)) {
1013       continue;
1014     }
1015 
1016     urls_to_prefetch.emplace_back(url_to_prefetch);
1017   }
1018 
1019   return urls_to_prefetch;
1020 }
1021 
1022