1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/navigation_predictor/navigation_predictor.h"
6
7 #include <algorithm>
8 #include <memory>
9
10 #include "base/check_op.h"
11 #include "base/metrics/field_trial_params.h"
12 #include "base/metrics/histogram_functions.h"
13 #include "base/metrics/histogram_macros.h"
14 #include "base/optional.h"
15 #include "base/rand_util.h"
16 #include "base/stl_util.h"
17 #include "base/system/sys_info.h"
18 #include "chrome/browser/navigation_predictor/navigation_predictor_keyed_service.h"
19 #include "chrome/browser/navigation_predictor/navigation_predictor_keyed_service_factory.h"
20 #include "chrome/browser/prefetch/no_state_prefetch/prerender_manager_factory.h"
21 #include "chrome/browser/profiles/profile.h"
22 #include "chrome/browser/search_engines/template_url_service_factory.h"
23 #include "components/no_state_prefetch/browser/prerender_manager.h"
24 #include "components/search_engines/template_url_service.h"
25 #include "content/public/browser/navigation_handle.h"
26 #include "content/public/browser/site_instance.h"
27 #include "content/public/browser/web_contents.h"
28 #include "mojo/public/cpp/bindings/message.h"
29 #include "mojo/public/cpp/bindings/self_owned_receiver.h"
30 #include "services/metrics/public/cpp/metrics_utils.h"
31 #include "services/metrics/public/cpp/ukm_builders.h"
32 #include "services/metrics/public/cpp/ukm_recorder.h"
33 #include "third_party/blink/public/common/features.h"
34 #include "url/gurl.h"
35 #include "url/url_canon.h"
36
37 namespace {
38
39 // A feature to allow multiple prerenders. The feature itself is always enabled,
40 // but the params it exposes are variable.
41 const base::Feature kNavigationPredictorMultiplePrerenders{
42 "NavigationPredictorMultiplePrerenders", base::FEATURE_ENABLED_BY_DEFAULT};
43
GetURLWithoutRefParams(const GURL & gurl)44 std::string GetURLWithoutRefParams(const GURL& gurl) {
45 url::Replacements<char> replacements;
46 replacements.ClearRef();
47 return gurl.ReplaceComponents(replacements).spec();
48 }
49
50 // Returns true if |a| and |b| are both valid HTTP/HTTPS URLs and have the
51 // same scheme, host, path and query params. This method does not take into
52 // account the ref params of the two URLs.
AreGURLsEqualExcludingRefParams(const GURL & a,const GURL & b)53 bool AreGURLsEqualExcludingRefParams(const GURL& a, const GURL& b) {
54 return GetURLWithoutRefParams(a) == GetURLWithoutRefParams(b);
55 }
56 } // namespace
57
58 struct NavigationPredictor::NavigationScore {
NavigationScoreNavigationPredictor::NavigationScore59 NavigationScore(const GURL& url,
60 double ratio_area,
61 bool is_url_incremented_by_one,
62 size_t area_rank,
63 double score,
64 double ratio_distance_root_top,
65 bool contains_image,
66 bool is_in_iframe,
67 size_t index)
68 : url(url),
69 ratio_area(ratio_area),
70 is_url_incremented_by_one(is_url_incremented_by_one),
71 area_rank(area_rank),
72 score(score),
73 ratio_distance_root_top(ratio_distance_root_top),
74 contains_image(contains_image),
75 is_in_iframe(is_in_iframe),
76 index(index) {}
77 // URL of the target link.
78 const GURL url;
79
80 // The ratio between the absolute clickable region of an anchor element and
81 // the document area. This should be in the range [0, 1].
82 const double ratio_area;
83
84 // Whether the url increments the current page's url by 1.
85 const bool is_url_incremented_by_one;
86
87 // Rank in terms of anchor element area. It starts at 0, a lower rank implies
88 // a larger area. Capped at 100.
89 const size_t area_rank;
90
91 // Calculated navigation score, based on |area_rank| and other metrics.
92 double score;
93
94 // The distance from the top of the document to the anchor element, expressed
95 // as a ratio with the length of the document.
96 const double ratio_distance_root_top;
97
98 // Multiple anchor elements may point to the same |url|. |contains_image| is
99 // true if at least one of the anchor elements pointing to |url| contains an
100 // image.
101 const bool contains_image;
102
103 // |is_in_iframe| is true if at least one of the anchor elements point to
104 // |url| is in an iframe.
105 const bool is_in_iframe;
106
107 // An index reported to UKM.
108 const size_t index;
109
110 // Rank of the |score| in this document. It starts at 0, a lower rank implies
111 // a higher |score|.
112 base::Optional<size_t> score_rank;
113 };
114
NavigationPredictor(content::WebContents * web_contents)115 NavigationPredictor::NavigationPredictor(content::WebContents* web_contents)
116 : browser_context_(web_contents->GetBrowserContext()),
117 ratio_area_scale_(base::GetFieldTrialParamByFeatureAsInt(
118 blink::features::kNavigationPredictor,
119 "ratio_area_scale",
120 100)),
121 is_in_iframe_scale_(base::GetFieldTrialParamByFeatureAsInt(
122 blink::features::kNavigationPredictor,
123 "is_in_iframe_scale",
124 0)),
125 is_same_host_scale_(base::GetFieldTrialParamByFeatureAsInt(
126 blink::features::kNavigationPredictor,
127 "is_same_host_scale",
128 0)),
129 contains_image_scale_(base::GetFieldTrialParamByFeatureAsInt(
130 blink::features::kNavigationPredictor,
131 "contains_image_scale",
132 50)),
133 is_url_incremented_scale_(base::GetFieldTrialParamByFeatureAsInt(
134 blink::features::kNavigationPredictor,
135 "is_url_incremented_scale",
136 100)),
137 area_rank_scale_(base::GetFieldTrialParamByFeatureAsInt(
138 blink::features::kNavigationPredictor,
139 "area_rank_scale",
140 100)),
141 ratio_distance_root_top_scale_(base::GetFieldTrialParamByFeatureAsInt(
142 blink::features::kNavigationPredictor,
143 "ratio_distance_root_top_scale",
144 0)),
145 link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
146 blink::features::kNavigationPredictor,
147 "link_total_scale",
148 0)),
149 iframe_link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
150 blink::features::kNavigationPredictor,
151 "iframe_link_total_scale",
152 0)),
153 increment_link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
154 blink::features::kNavigationPredictor,
155 "increment_link_total_scale",
156 0)),
157 same_origin_link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
158 blink::features::kNavigationPredictor,
159 "same_origin_link_total_scale",
160 0)),
161 image_link_total_scale_(base::GetFieldTrialParamByFeatureAsInt(
162 blink::features::kNavigationPredictor,
163 "image_link_total_scale",
164 0)),
165 clickable_space_scale_(base::GetFieldTrialParamByFeatureAsInt(
166 blink::features::kNavigationPredictor,
167 "clickable_space_scale",
168 0)),
169 median_link_location_scale_(base::GetFieldTrialParamByFeatureAsInt(
170 blink::features::kNavigationPredictor,
171 "median_link_location_scale",
172 0)),
173 viewport_height_scale_(base::GetFieldTrialParamByFeatureAsInt(
174 blink::features::kNavigationPredictor,
175 "viewport_height_scale",
176 0)),
177 viewport_width_scale_(base::GetFieldTrialParamByFeatureAsInt(
178 blink::features::kNavigationPredictor,
179 "viewport_width_scale",
180 0)),
181 sum_link_scales_(ratio_area_scale_ + is_in_iframe_scale_ +
182 is_same_host_scale_ + contains_image_scale_ +
183 is_url_incremented_scale_ + area_rank_scale_ +
184 ratio_distance_root_top_scale_),
185 sum_page_scales_(link_total_scale_ + iframe_link_total_scale_ +
186 increment_link_total_scale_ +
187 same_origin_link_total_scale_ + image_link_total_scale_ +
188 clickable_space_scale_ + median_link_location_scale_ +
189 viewport_height_scale_ + viewport_width_scale_),
190 is_low_end_device_(base::SysInfo::IsLowEndDevice()),
191 prefetch_url_score_threshold_(base::GetFieldTrialParamByFeatureAsInt(
192 blink::features::kNavigationPredictor,
193 "prefetch_url_score_threshold",
194 0)),
195 prefetch_enabled_(base::GetFieldTrialParamByFeatureAsBool(
196 blink::features::kNavigationPredictor,
197 "prefetch_after_preconnect",
198 false)),
199 normalize_navigation_scores_(base::GetFieldTrialParamByFeatureAsBool(
200 blink::features::kNavigationPredictor,
201 "normalize_scores",
202 true)) {
203 DCHECK(browser_context_);
204 DETACH_FROM_SEQUENCE(sequence_checker_);
205
206 if (browser_context_->IsOffTheRecord())
207 return;
208
209 ukm_recorder_ = ukm::UkmRecorder::Get();
210
211 current_visibility_ = web_contents->GetVisibility();
212 ukm_source_id_ = web_contents->GetMainFrame()->GetPageUkmSourceId();
213 Observe(web_contents);
214 }
215
~NavigationPredictor()216 NavigationPredictor::~NavigationPredictor() {
217 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
218 Observe(nullptr);
219
220 if (prerender_handle_) {
221 prerender_handle_->SetObserver(nullptr);
222 prerender_handle_->OnNavigateAway();
223 }
224 }
225
Create(content::RenderFrameHost * render_frame_host,mojo::PendingReceiver<blink::mojom::AnchorElementMetricsHost> receiver)226 void NavigationPredictor::Create(
227 content::RenderFrameHost* render_frame_host,
228 mojo::PendingReceiver<blink::mojom::AnchorElementMetricsHost> receiver) {
229 DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
230
231 // Only valid for the main frame.
232 if (render_frame_host->GetParent())
233 return;
234
235 content::WebContents* web_contents =
236 content::WebContents::FromRenderFrameHost(render_frame_host);
237 if (!web_contents)
238 return;
239
240 mojo::MakeSelfOwnedReceiver(
241 std::make_unique<NavigationPredictor>(web_contents), std::move(receiver));
242 }
243
IsValidMetricFromRenderer(const blink::mojom::AnchorElementMetrics & metric) const244 bool NavigationPredictor::IsValidMetricFromRenderer(
245 const blink::mojom::AnchorElementMetrics& metric) const {
246 return metric.target_url.SchemeIsHTTPOrHTTPS() &&
247 metric.source_url.SchemeIsHTTPOrHTTPS();
248 }
249
RecordActionAccuracyOnClick(const GURL & target_url) const250 void NavigationPredictor::RecordActionAccuracyOnClick(
251 const GURL& target_url) const {
252 // We don't pre-render default search engine at all, so measuring metrics here
253 // doesn't make sense.
254 if (source_is_default_search_engine_page_)
255 return;
256
257 bool is_cross_origin =
258 url::Origin::Create(document_url_) != url::Origin::Create(target_url);
259
260 auto prefetch_result = is_cross_origin ? PrerenderResult::kCrossOriginNotSeen
261 : PrerenderResult::kSameOriginNotSeen;
262
263 if ((prefetch_url_ && prefetch_url_.value() == target_url) ||
264 base::Contains(partial_prerfetches_, target_url)) {
265 prefetch_result = PrerenderResult::kSameOriginPrefetchPartiallyComplete;
266 } else if (base::Contains(urls_prefetched_, target_url)) {
267 prefetch_result = PrerenderResult::kSameOriginPrefetchFinished;
268 } else if (std::find(urls_to_prefetch_.begin(), urls_to_prefetch_.end(),
269 target_url) != urls_to_prefetch_.end()) {
270 prefetch_result = PrerenderResult::kSameOriginPrefetchInQueue;
271 } else if (!is_cross_origin &&
272 base::Contains(urls_above_threshold_, target_url)) {
273 prefetch_result = PrerenderResult::kSameOriginPrefetchSkipped;
274 } else if (base::Contains(urls_above_threshold_, target_url)) {
275 prefetch_result = PrerenderResult::kCrossOriginAboveThreshold;
276 } else if (!is_cross_origin &&
277 base::Contains(navigation_scores_map_, target_url.spec())) {
278 prefetch_result = PrerenderResult::kSameOriginBelowThreshold;
279 } else if (base::Contains(navigation_scores_map_, target_url.spec())) {
280 prefetch_result = PrerenderResult::kCrossOriginBelowThreshold;
281 }
282
283 UMA_HISTOGRAM_ENUMERATION("NavigationPredictor.LinkClickedPrerenderResult",
284 prefetch_result);
285 }
286
RecordActionAccuracyOnTearDown()287 void NavigationPredictor::RecordActionAccuracyOnTearDown() {
288 auto document_origin = url::Origin::Create(document_url_);
289 int cross_origin_urls_above_threshold =
290 std::count_if(urls_above_threshold_.begin(), urls_above_threshold_.end(),
291 [document_origin](const GURL& url) {
292 return document_origin != url::Origin::Create(url);
293 });
294
295 UMA_HISTOGRAM_COUNTS_100("NavigationPredictor.CountOfURLsAboveThreshold",
296 urls_above_threshold_.size());
297
298 UMA_HISTOGRAM_COUNTS_100(
299 "NavigationPredictor.CountOfURLsAboveThreshold.CrossOrigin",
300 cross_origin_urls_above_threshold);
301
302 UMA_HISTOGRAM_COUNTS_100(
303 "NavigationPredictor.CountOfURLsAboveThreshold.SameOrigin",
304 urls_above_threshold_.size() - cross_origin_urls_above_threshold);
305
306 int cross_origin_urls_above_threshold_in_top_n = std::count_if(
307 urls_above_threshold_.begin(),
308 urls_above_threshold_.begin() +
309 std::min(urls_above_threshold_.size(),
310 static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
311 kNavigationPredictorMultiplePrerenders,
312 "prerender_limit", 1))),
313 [document_origin](const GURL& url) {
314 return document_origin != url::Origin::Create(url);
315 });
316
317 int same_origin_urls_above_threshold_in_top_n =
318 std::min(
319 static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
320 kNavigationPredictorMultiplePrerenders, "prerender_limit", 1)),
321 urls_above_threshold_.size()) -
322 cross_origin_urls_above_threshold_in_top_n;
323
324 UMA_HISTOGRAM_COUNTS_100(
325 "NavigationPredictor.CountOfURLsInPredictedSet.CrossOrigin",
326 cross_origin_urls_above_threshold_in_top_n);
327 UMA_HISTOGRAM_COUNTS_100(
328 "NavigationPredictor.CountOfURLsInPredictedSet.SameOrigin",
329 same_origin_urls_above_threshold_in_top_n);
330 UMA_HISTOGRAM_COUNTS_100("NavigationPredictor.CountOfURLsInPredictedSet",
331 cross_origin_urls_above_threshold_in_top_n +
332 same_origin_urls_above_threshold_in_top_n);
333
334 UMA_HISTOGRAM_COUNTS_100("NavigationPredictor.CountOfStartedPrerenders",
335 urls_prefetched_.size());
336 }
337
OnVisibilityChanged(content::Visibility visibility)338 void NavigationPredictor::OnVisibilityChanged(content::Visibility visibility) {
339 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
340
341 if (current_visibility_ == visibility)
342 return;
343
344 // Check if the visibility changed from VISIBLE to HIDDEN. Since navigation
345 // predictor is currently restricted to Android, it is okay to disregard the
346 // occluded state.
347 if (current_visibility_ != content::Visibility::HIDDEN ||
348 visibility != content::Visibility::VISIBLE) {
349 current_visibility_ = visibility;
350
351 if (prerender_handle_) {
352 prerender_handle_->SetObserver(nullptr);
353 prerender_handle_->OnNavigateAway();
354 prerender_handle_.reset();
355 partial_prerfetches_.emplace(prefetch_url_.value());
356 prefetch_url_ = base::nullopt;
357 }
358 return;
359 }
360
361 current_visibility_ = visibility;
362
363 MaybePrefetch();
364 }
365
DidStartNavigation(content::NavigationHandle * navigation_handle)366 void NavigationPredictor::DidStartNavigation(
367 content::NavigationHandle* navigation_handle) {
368 if (!navigation_handle->IsInMainFrame() ||
369 navigation_handle->IsSameDocument()) {
370 return;
371 }
372
373 if (next_navigation_started_)
374 return;
375
376 RecordActionAccuracyOnTearDown();
377
378 // Don't start new prerenders.
379 next_navigation_started_ = true;
380
381 // If there is no ongoing prerender, there is nothing to do.
382 if (!prefetch_url_.has_value())
383 return;
384
385 // Let the prerender continue if it matches the navigation URL.
386 if (navigation_handle->GetURL() == prefetch_url_.value())
387 return;
388
389 if (!prerender_handle_)
390 return;
391
392 // Stop prerender to reduce network contention during main frame fetch.
393 prerender_handle_->SetObserver(nullptr);
394 prerender_handle_->OnNavigateAway();
395 prerender_handle_.reset();
396 partial_prerfetches_.emplace(prefetch_url_.value());
397 prefetch_url_ = base::nullopt;
398 }
399
RecordAction(Action log_action)400 void NavigationPredictor::RecordAction(Action log_action) {
401 std::string action_histogram_name =
402 source_is_default_search_engine_page_
403 ? "NavigationPredictor.OnDSE.ActionTaken"
404 : "NavigationPredictor.OnNonDSE.ActionTaken";
405 base::UmaHistogramEnumeration(action_histogram_name, log_action);
406 }
407
MaybeSendMetricsToUkm() const408 void NavigationPredictor::MaybeSendMetricsToUkm() const {
409 if (!ukm_recorder_) {
410 return;
411 }
412
413 ukm::builders::NavigationPredictorPageLinkMetrics page_link_builder(
414 ukm_source_id_);
415
416 page_link_builder.SetNumberOfAnchors_Total(
417 GetBucketMinForPageMetrics(number_of_anchors_));
418 page_link_builder.SetNumberOfAnchors_SameHost(
419 GetBucketMinForPageMetrics(number_of_anchors_same_host_));
420 page_link_builder.SetNumberOfAnchors_ContainsImage(
421 GetBucketMinForPageMetrics(number_of_anchors_contains_image_));
422 page_link_builder.SetNumberOfAnchors_InIframe(
423 GetBucketMinForPageMetrics(number_of_anchors_in_iframe_));
424 page_link_builder.SetNumberOfAnchors_URLIncremented(
425 GetBucketMinForPageMetrics(number_of_anchors_url_incremented_));
426 page_link_builder.SetTotalClickableSpace(
427 GetBucketMinForPageMetrics(static_cast<int>(total_clickable_space_)));
428 page_link_builder.SetMedianLinkLocation(
429 GetLinearBucketForLinkLocation(median_link_location_));
430 page_link_builder.SetViewport_Height(
431 GetBucketMinForPageMetrics(viewport_size_.height()));
432 page_link_builder.SetViewport_Width(
433 GetBucketMinForPageMetrics(viewport_size_.width()));
434
435 page_link_builder.Record(ukm_recorder_);
436
437 for (const auto& navigation_score_tuple : navigation_scores_map_) {
438 const auto& navigation_score = navigation_score_tuple.second;
439 ukm::builders::NavigationPredictorAnchorElementMetrics
440 anchor_element_builder(ukm_source_id_);
441
442 // Offset index to be 1-based indexing.
443 anchor_element_builder.SetAnchorIndex(navigation_score->index);
444 anchor_element_builder.SetIsInIframe(navigation_score->is_in_iframe);
445 anchor_element_builder.SetIsURLIncrementedByOne(
446 navigation_score->is_url_incremented_by_one);
447 anchor_element_builder.SetContainsImage(navigation_score->contains_image);
448 anchor_element_builder.SetSameOrigin(
449 url::Origin::Create(navigation_score->url) ==
450 url::Origin::Create(document_url_));
451
452 // Convert the ratio area and ratio distance from [0,1] to [0,100].
453 int percent_ratio_area =
454 static_cast<int>(navigation_score->ratio_area * 100);
455 int percent_ratio_distance_root_top =
456 static_cast<int>(navigation_score->ratio_distance_root_top * 100);
457
458 anchor_element_builder.SetPercentClickableArea(
459 GetLinearBucketForRatioArea(percent_ratio_area));
460 anchor_element_builder.SetPercentVerticalDistance(
461 GetLinearBucketForLinkLocation(percent_ratio_distance_root_top));
462
463 anchor_element_builder.Record(ukm_recorder_);
464 }
465 }
466
GetBucketMinForPageMetrics(int value) const467 int NavigationPredictor::GetBucketMinForPageMetrics(int value) const {
468 return ukm::GetExponentialBucketMin(value, 1.3);
469 }
470
GetLinearBucketForLinkLocation(int value) const471 int NavigationPredictor::GetLinearBucketForLinkLocation(int value) const {
472 return ukm::GetLinearBucketMin(static_cast<int64_t>(value), 10);
473 }
474
GetLinearBucketForRatioArea(int value) const475 int NavigationPredictor::GetLinearBucketForRatioArea(int value) const {
476 return ukm::GetLinearBucketMin(static_cast<int64_t>(value), 5);
477 }
478
MaybeSendClickMetricsToUkm(const std::string & clicked_url) const479 void NavigationPredictor::MaybeSendClickMetricsToUkm(
480 const std::string& clicked_url) const {
481 if (!ukm_recorder_) {
482 return;
483 }
484
485 if (clicked_count_ > 10)
486 return;
487
488 auto nav_score = navigation_scores_map_.find(clicked_url);
489
490 int anchor_element_index = (nav_score == navigation_scores_map_.end())
491 ? 0
492 : nav_score->second->index;
493
494 ukm::builders::NavigationPredictorPageLinkClick builder(ukm_source_id_);
495 builder.SetAnchorElementIndex(anchor_element_index);
496 builder.Record(ukm_recorder_);
497 }
498
GetTemplateURLService() const499 TemplateURLService* NavigationPredictor::GetTemplateURLService() const {
500 return TemplateURLServiceFactory::GetForProfile(
501 Profile::FromBrowserContext(browser_context_));
502 }
503
ReportAnchorElementMetricsOnClick(blink::mojom::AnchorElementMetricsPtr metrics)504 void NavigationPredictor::ReportAnchorElementMetricsOnClick(
505 blink::mojom::AnchorElementMetricsPtr metrics) {
506 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
507 DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
508
509 if (browser_context_->IsOffTheRecord())
510 return;
511
512 if (!IsValidMetricFromRenderer(*metrics)) {
513 mojo::ReportBadMessage("Bad anchor element metrics: onClick.");
514 return;
515 }
516
517 source_is_default_search_engine_page_ =
518 GetTemplateURLService() &&
519 GetTemplateURLService()->IsSearchResultsPageFromDefaultSearchProvider(
520 metrics->source_url);
521 if (!metrics->source_url.SchemeIsCryptographic() ||
522 !metrics->target_url.SchemeIsCryptographic()) {
523 return;
524 }
525
526 clicked_count_++;
527
528 document_url_ = metrics->source_url;
529
530 RecordActionAccuracyOnClick(metrics->target_url);
531 MaybeSendClickMetricsToUkm(metrics->target_url.spec());
532
533 // Look up the clicked URL in |navigation_scores_map_|. Record if we find it.
534 auto iter = navigation_scores_map_.find(metrics->target_url.spec());
535 if (iter == navigation_scores_map_.end())
536 return;
537
538
539 // Guaranteed to be non-zero since we have found the clicked link in
540 // |navigation_scores_map_|.
541 DCHECK_LT(0, number_of_anchors_);
542
543 if (source_is_default_search_engine_page_) {
544 UMA_HISTOGRAM_BOOLEAN("AnchorElementMetrics.Clicked.OnDSE.SameHost",
545 metrics->is_same_host);
546 } else {
547 UMA_HISTOGRAM_BOOLEAN("AnchorElementMetrics.Clicked.OnNonDSE.SameHost",
548 metrics->is_same_host);
549 }
550 }
551
MergeMetricsSameTargetUrl(std::vector<blink::mojom::AnchorElementMetricsPtr> * metrics) const552 void NavigationPredictor::MergeMetricsSameTargetUrl(
553 std::vector<blink::mojom::AnchorElementMetricsPtr>* metrics) const {
554 // Maps from target url (href) to anchor element metrics from renderer.
555 std::unordered_map<std::string, blink::mojom::AnchorElementMetricsPtr>
556 metrics_map;
557
558 // This size reserve is aggressive since |metrics_map| may contain fewer
559 // elements than metrics->size() after merge.
560 metrics_map.reserve(metrics->size());
561
562 for (auto& metric : *metrics) {
563 // Do not include anchor elements that point to the same URL as the URL of
564 // the current navigation since these are unlikely to be clicked. Also,
565 // exclude the anchor elements that differ from the URL of the current
566 // navigation by only the ref param.
567 if (AreGURLsEqualExcludingRefParams(metric->target_url,
568 metric->source_url)) {
569 continue;
570 }
571
572 if (!metric->target_url.SchemeIsCryptographic())
573 continue;
574
575 // Currently, all predictions are made based on elements that are within the
576 // main frame since it is unclear if we can pre* the target of the elements
577 // within iframes.
578 if (metric->is_in_iframe)
579 continue;
580
581 // Skip ref params when merging the anchor elements. This ensures that two
582 // anchor elements which differ only in the ref params are combined
583 // together.
584 const std::string& key = GetURLWithoutRefParams(metric->target_url);
585 auto iter = metrics_map.find(key);
586 if (iter == metrics_map.end()) {
587 metrics_map[key] = std::move(metric);
588 } else {
589 auto& prev_metric = iter->second;
590 prev_metric->ratio_area += metric->ratio_area;
591 prev_metric->ratio_visible_area += metric->ratio_visible_area;
592
593 // After merging, value of |ratio_area| can go beyond 1.0. This can
594 // happen, e.g., when there are 2 anchor elements pointing to the same
595 // target. The first anchor element occupies 90% of the viewport. The
596 // second one has size 0.8 times the viewport, and only part of it is
597 // visible in the viewport. In that case, |ratio_area| may be 1.7.
598 if (prev_metric->ratio_area > 1.0)
599 prev_metric->ratio_area = 1.0;
600 DCHECK_LE(0.0, prev_metric->ratio_area);
601 DCHECK_GE(1.0, prev_metric->ratio_area);
602
603 DCHECK_GE(1.0, prev_metric->ratio_visible_area);
604
605 // Position related metrics are tricky to merge. Another possible way to
606 // merge is simply add up the calculated navigation scores.
607 prev_metric->ratio_distance_root_top =
608 std::min(prev_metric->ratio_distance_root_top,
609 metric->ratio_distance_root_top);
610 prev_metric->ratio_distance_root_bottom =
611 std::max(prev_metric->ratio_distance_root_bottom,
612 metric->ratio_distance_root_bottom);
613 prev_metric->ratio_distance_top_to_visible_top =
614 std::min(prev_metric->ratio_distance_top_to_visible_top,
615 metric->ratio_distance_top_to_visible_top);
616 prev_metric->ratio_distance_center_to_visible_top =
617 std::min(prev_metric->ratio_distance_center_to_visible_top,
618 metric->ratio_distance_center_to_visible_top);
619
620 // Anchor element is not considered in an iframe as long as at least one
621 // of them is not in an iframe.
622 prev_metric->is_in_iframe =
623 prev_metric->is_in_iframe && metric->is_in_iframe;
624 prev_metric->contains_image =
625 prev_metric->contains_image || metric->contains_image;
626 DCHECK_EQ(prev_metric->is_same_host, metric->is_same_host);
627 }
628 }
629
630 metrics->clear();
631
632 if (metrics_map.empty())
633 return;
634
635 metrics->reserve(metrics_map.size());
636 for (auto& metric_mapping : metrics_map) {
637 metrics->push_back(std::move(metric_mapping.second));
638 }
639
640 DCHECK(!metrics->empty());
641 UMA_HISTOGRAM_COUNTS_100(
642 "AnchorElementMetrics.Visible.NumberOfAnchorElementsAfterMerge",
643 metrics->size());
644 }
645
ReportAnchorElementMetricsOnLoad(std::vector<blink::mojom::AnchorElementMetricsPtr> metrics,const gfx::Size & viewport_size)646 void NavigationPredictor::ReportAnchorElementMetricsOnLoad(
647 std::vector<blink::mojom::AnchorElementMetricsPtr> metrics,
648 const gfx::Size& viewport_size) {
649 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
650 DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
651
652 // Each document should only report metrics once when page is loaded.
653 DCHECK(navigation_scores_map_.empty());
654
655 if (browser_context_->IsOffTheRecord())
656 return;
657
658 if (metrics.empty()) {
659 mojo::ReportBadMessage("Bad anchor element metrics: empty.");
660 return;
661 }
662
663 for (const auto& metric : metrics) {
664 if (!IsValidMetricFromRenderer(*metric)) {
665 mojo::ReportBadMessage("Bad anchor element metrics: onLoad.");
666 return;
667 }
668 }
669
670 if (!metrics[0]->source_url.SchemeIsCryptographic())
671 return;
672
673 source_is_default_search_engine_page_ =
674 GetTemplateURLService() &&
675 GetTemplateURLService()->IsSearchResultsPageFromDefaultSearchProvider(
676 metrics[0]->source_url);
677 MergeMetricsSameTargetUrl(&metrics);
678
679 if (metrics.empty() || viewport_size.IsEmpty())
680 return;
681
682 number_of_anchors_ = metrics.size();
683 viewport_size_ = viewport_size;
684
685 // Count the number of anchors that have specific metrics.
686 std::vector<double> link_locations;
687 link_locations.reserve(metrics.size());
688
689 for (const auto& metric : metrics) {
690 number_of_anchors_same_host_ += static_cast<int>(metric->is_same_host);
691 number_of_anchors_contains_image_ +=
692 static_cast<int>(metric->contains_image);
693 number_of_anchors_in_iframe_ += static_cast<int>(metric->is_in_iframe);
694 number_of_anchors_url_incremented_ +=
695 static_cast<int>(metric->is_url_incremented_by_one);
696
697 link_locations.push_back(metric->ratio_distance_top_to_visible_top);
698 total_clickable_space_ += metric->ratio_visible_area * 100.0;
699 }
700
701 sort(link_locations.begin(), link_locations.end());
702 median_link_location_ = link_locations[link_locations.size() / 2] * 100;
703 double page_metrics_score = GetPageMetricsScore();
704
705 // Sort metric by area in descending order to get area rank, which is a
706 // derived feature to calculate navigation score.
707 std::sort(metrics.begin(), metrics.end(), [](const auto& a, const auto& b) {
708 return a->ratio_area > b->ratio_area;
709 });
710
711 // Loop |metrics| to compute navigation scores.
712 std::vector<std::unique_ptr<NavigationScore>> navigation_scores;
713 navigation_scores.reserve(metrics.size());
714 double total_score = 0.0;
715
716 std::vector<int> indices(metrics.size());
717 std::generate(indices.begin(), indices.end(),
718 [n = 1]() mutable { return n++; });
719
720 // Shuffle the indices to keep metrics less identifiable in UKM.
721 base::RandomShuffle(indices.begin(), indices.end());
722
723 for (size_t i = 0; i != metrics.size(); ++i) {
724 const auto& metric = metrics[i];
725
726 // Anchor elements with the same area are assigned with the same rank.
727 size_t area_rank = i;
728 if (i > 0 && metric->ratio_area == metrics[i - 1]->ratio_area)
729 area_rank = navigation_scores[navigation_scores.size() - 1]->area_rank;
730
731 double score =
732 CalculateAnchorNavigationScore(*metric, area_rank) + page_metrics_score;
733 total_score += score;
734
735 navigation_scores.push_back(std::make_unique<NavigationScore>(
736 metric->target_url, static_cast<double>(metric->ratio_area),
737 metric->is_url_incremented_by_one, area_rank, score,
738 metric->ratio_distance_root_top, metric->contains_image,
739 metric->is_in_iframe, indices[i]));
740 }
741
742 if (normalize_navigation_scores_) {
743 // Normalize |score| to a total sum of 100.0 across all anchor elements
744 // received.
745 if (total_score > 0.0) {
746 for (auto& navigation_score : navigation_scores) {
747 navigation_score->score = navigation_score->score / total_score * 100.0;
748 }
749 }
750 }
751
752 // Sort scores by the calculated navigation score in descending order. This
753 // score rank is used by MaybeTakeActionOnLoad, and stored in
754 // |navigation_scores_map_|.
755 std::sort(navigation_scores.begin(), navigation_scores.end(),
756 [](const auto& a, const auto& b) { return a->score > b->score; });
757
758 document_url_ = metrics[0]->source_url;
759 MaybeTakeActionOnLoad(document_url_, navigation_scores);
760
761 // Store navigation scores in |navigation_scores_map_| for fast look up upon
762 // clicks.
763 navigation_scores_map_.reserve(navigation_scores.size());
764 for (size_t i = 0; i != navigation_scores.size(); ++i) {
765 navigation_scores[i]->score_rank = base::make_optional(i);
766 std::string url_spec = navigation_scores[i]->url.spec();
767 navigation_scores_map_[url_spec] = std::move(navigation_scores[i]);
768 }
769
770 MaybeSendMetricsToUkm();
771 }
772
CalculateAnchorNavigationScore(const blink::mojom::AnchorElementMetrics & metrics,int area_rank) const773 double NavigationPredictor::CalculateAnchorNavigationScore(
774 const blink::mojom::AnchorElementMetrics& metrics,
775 int area_rank) const {
776 DCHECK(!browser_context_->IsOffTheRecord());
777
778 if (sum_link_scales_ == 0)
779 return 0.0;
780
781 double area_rank_score =
782 (double)((number_of_anchors_ - area_rank)) / number_of_anchors_;
783
784 DCHECK_LE(0, metrics.ratio_visible_area);
785 DCHECK_GE(1, metrics.ratio_visible_area);
786
787 DCHECK_LE(0, metrics.is_in_iframe);
788 DCHECK_GE(1, metrics.is_in_iframe);
789
790 DCHECK_LE(0, metrics.is_same_host);
791 DCHECK_GE(1, metrics.is_same_host);
792
793 DCHECK_LE(0, metrics.contains_image);
794 DCHECK_GE(1, metrics.contains_image);
795
796 DCHECK_LE(0, metrics.is_url_incremented_by_one);
797 DCHECK_GE(1, metrics.is_url_incremented_by_one);
798
799 DCHECK_LE(0, area_rank_score);
800 DCHECK_GE(1, area_rank_score);
801
802 double host_score = 0.0;
803 // On pages from default search engine, give higher weight to target URLs that
804 // link to a different host. On non-default search engine pages, give higher
805 // weight to target URLs that link to the same host.
806 if (!source_is_default_search_engine_page_ && metrics.is_same_host) {
807 host_score = is_same_host_scale_;
808 } else if (source_is_default_search_engine_page_ && !metrics.is_same_host) {
809 host_score = is_same_host_scale_;
810 }
811
812 // TODO(chelu): https://crbug.com/850624/. Experiment with other heuristic
813 // algorithms for computing the anchor elements score.
814 double score =
815 (ratio_area_scale_ * GetLinearBucketForRatioArea(
816 static_cast<int>(metrics.ratio_area * 100.0))) +
817 (metrics.is_in_iframe ? is_in_iframe_scale_ : 0.0) +
818 (metrics.contains_image ? contains_image_scale_ : 0.0) + host_score +
819 (metrics.is_url_incremented_by_one ? is_url_incremented_scale_ : 0.0) +
820 (area_rank_scale_ * area_rank_score) +
821 (ratio_distance_root_top_scale_ *
822 GetLinearBucketForLinkLocation(
823 static_cast<int>(metrics.ratio_distance_root_top * 100.0)));
824
825 if (normalize_navigation_scores_) {
826 score = score / sum_link_scales_ * 100.0;
827 DCHECK_LE(0.0, score);
828 }
829
830 return score;
831 }
832
GetPageMetricsScore() const833 double NavigationPredictor::GetPageMetricsScore() const {
834 if (sum_page_scales_ == 0.0) {
835 return 0;
836 } else {
837 DCHECK(!viewport_size_.IsEmpty());
838 return (link_total_scale_ *
839 GetBucketMinForPageMetrics(number_of_anchors_)) +
840 (iframe_link_total_scale_ *
841 GetBucketMinForPageMetrics(number_of_anchors_in_iframe_)) +
842 (increment_link_total_scale_ *
843 GetBucketMinForPageMetrics(number_of_anchors_url_incremented_)) +
844 (same_origin_link_total_scale_ *
845 GetBucketMinForPageMetrics(number_of_anchors_same_host_)) +
846 (image_link_total_scale_ *
847 GetBucketMinForPageMetrics(number_of_anchors_contains_image_)) +
848 (clickable_space_scale_ *
849 GetBucketMinForPageMetrics(total_clickable_space_)) +
850 (median_link_location_scale_ *
851 GetLinearBucketForLinkLocation(median_link_location_)) +
852 (viewport_width_scale_ *
853 GetBucketMinForPageMetrics(viewport_size_.width())) +
854 (viewport_height_scale_ *
855 GetBucketMinForPageMetrics(viewport_size_.height()));
856 }
857 }
858
NotifyPredictionUpdated(const std::vector<std::unique_ptr<NavigationScore>> & sorted_navigation_scores)859 void NavigationPredictor::NotifyPredictionUpdated(
860 const std::vector<std::unique_ptr<NavigationScore>>&
861 sorted_navigation_scores) {
862 // It is possible for this class to still exist while its WebContents and
863 // RenderFrameHost are being destroyed. This can be detected by checking
864 // |web_contents()| which will be nullptr if the WebContents has been
865 // destroyed.
866 if (!web_contents())
867 return;
868
869 NavigationPredictorKeyedService* service =
870 NavigationPredictorKeyedServiceFactory::GetForProfile(
871 Profile::FromBrowserContext(browser_context_));
872 DCHECK(service);
873 std::vector<GURL> top_urls;
874 top_urls.reserve(sorted_navigation_scores.size());
875 for (const auto& nav_score : sorted_navigation_scores) {
876 top_urls.push_back(nav_score->url);
877 }
878 service->OnPredictionUpdated(
879 web_contents(), document_url_,
880 NavigationPredictorKeyedService::PredictionSource::
881 kAnchorElementsParsedFromWebPage,
882 top_urls);
883 }
884
MaybeTakeActionOnLoad(const GURL & document_url,const std::vector<std::unique_ptr<NavigationScore>> & sorted_navigation_scores)885 void NavigationPredictor::MaybeTakeActionOnLoad(
886 const GURL& document_url,
887 const std::vector<std::unique_ptr<NavigationScore>>&
888 sorted_navigation_scores) {
889 DCHECK(!browser_context_->IsOffTheRecord());
890
891 NotifyPredictionUpdated(sorted_navigation_scores);
892
893 // Try prefetch first.
894 urls_to_prefetch_ = GetUrlsToPrefetch(document_url, sorted_navigation_scores);
895 RecordAction(urls_to_prefetch_.empty() ? Action::kNone : Action::kPrefetch);
896 MaybePrefetch();
897 }
898
MaybePrefetch()899 void NavigationPredictor::MaybePrefetch() {
900 // If prefetches aren't allowed here, this URL has already
901 // been prefetched, or the current tab is hidden,
902 // we shouldn't prefetch again.
903 if (!prefetch_enabled_ || urls_to_prefetch_.empty() ||
904 current_visibility_ == content::Visibility::HIDDEN) {
905 return;
906 }
907
908 // Already an on-going prefetch.
909 if (prefetch_url_.has_value())
910 return;
911
912 // Don't prerender if the next navigation started.
913 if (next_navigation_started_)
914 return;
915
916 prerender::PrerenderManager* prerender_manager =
917 prerender::PrerenderManagerFactory::GetForBrowserContext(
918 browser_context_);
919
920 if (prerender_manager) {
921 GURL url_to_prefetch = urls_to_prefetch_.front();
922 urls_to_prefetch_.pop_front();
923 Prefetch(prerender_manager, url_to_prefetch);
924 }
925 }
926
Prefetch(prerender::PrerenderManager * prerender_manager,const GURL & url_to_prefetch)927 void NavigationPredictor::Prefetch(
928 prerender::PrerenderManager* prerender_manager,
929 const GURL& url_to_prefetch) {
930 DCHECK(!prerender_handle_);
931 DCHECK(!prefetch_url_);
932
933 // It is possible for this class to still exist while its WebContents and
934 // RenderFrameHost are being destroyed. This can be detected by checking
935 // |web_contents()| which will be nullptr if the WebContents has been
936 // destroyed.
937 if (!web_contents())
938 return;
939
940 content::SessionStorageNamespace* session_storage_namespace =
941 web_contents()->GetController().GetDefaultSessionStorageNamespace();
942 gfx::Size size = web_contents()->GetContainerBounds().size();
943
944 prerender_handle_ = prerender_manager->AddPrerenderFromNavigationPredictor(
945 url_to_prefetch, session_storage_namespace, size);
946
947 // Prerender was prevented for some reason, try next URL.
948 if (!prerender_handle_) {
949 MaybePrefetch();
950 return;
951 }
952
953 prefetch_url_ = url_to_prefetch;
954 urls_prefetched_.emplace(url_to_prefetch);
955
956 prerender_handle_->SetObserver(this);
957 }
958
OnPrerenderStop(prerender::PrerenderHandle * handle)959 void NavigationPredictor::OnPrerenderStop(prerender::PrerenderHandle* handle) {
960 DCHECK_EQ(prerender_handle_.get(), handle);
961 prerender_handle_.reset();
962 prefetch_url_ = base::nullopt;
963
964 MaybePrefetch();
965 }
966
GetUrlsToPrefetch(const GURL & document_url,const std::vector<std::unique_ptr<NavigationScore>> & sorted_navigation_scores)967 std::deque<GURL> NavigationPredictor::GetUrlsToPrefetch(
968 const GURL& document_url,
969 const std::vector<std::unique_ptr<NavigationScore>>&
970 sorted_navigation_scores) {
971 urls_above_threshold_.clear();
972 std::deque<GURL> urls_to_prefetch;
973 // Currently, prefetch is disabled on low-end devices since prefetch may
974 // increase memory usage.
975 if (is_low_end_device_)
976 return urls_to_prefetch;
977
978 // On search engine results page, next navigation is likely to be a different
979 // origin. Currently, the prefetch is only allowed for same orgins. Hence,
980 // prefetch is currently disabled on search engine results page.
981 if (source_is_default_search_engine_page_)
982 return urls_to_prefetch;
983
984 if (sorted_navigation_scores.empty())
985 return urls_to_prefetch;
986
987 // Place in order the top n scoring links. If the top n scoring links contain
988 // a cross origin link, only place n-1 links. All links must score above
989 // |prefetch_url_score_threshold_|.
990 for (size_t i = 0; i < sorted_navigation_scores.size(); ++i) {
991 double navigation_score = sorted_navigation_scores[i]->score;
992 GURL url_to_prefetch = sorted_navigation_scores[i]->url;
993
994 // If the prediction score of the highest scoring URL is less than the
995 // threshold, then return.
996 if (navigation_score < prefetch_url_score_threshold_)
997 break;
998
999 // Log the links above the threshold.
1000 urls_above_threshold_.push_back(url_to_prefetch);
1001
1002 if (i >=
1003 static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
1004 kNavigationPredictorMultiplePrerenders, "prerender_limit", 1))) {
1005 continue;
1006 }
1007
1008 // Only the same origin URLs are eligible for prefetching. If the URL with
1009 // the highest score is from a different origin, then we skip prefetching
1010 // since same origin URLs are not likely to be clicked.
1011 if (url::Origin::Create(url_to_prefetch) !=
1012 url::Origin::Create(document_url)) {
1013 continue;
1014 }
1015
1016 urls_to_prefetch.emplace_back(url_to_prefetch);
1017 }
1018
1019 return urls_to_prefetch;
1020 }
1021
1022