1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_NAVIGATION_PREDICTOR_NAVIGATION_PREDICTOR_H_
6 #define CHROME_BROWSER_NAVIGATION_PREDICTOR_NAVIGATION_PREDICTOR_H_
7 
8 #include <deque>
9 #include <set>
10 #include <string>
11 #include <unordered_map>
12 #include <vector>
13 
14 #include "base/macros.h"
15 #include "base/optional.h"
16 #include "base/sequence_checker.h"
17 #include "base/time/time.h"
18 #include "components/no_state_prefetch/browser/prerender_handle.h"
19 #include "content/public/browser/visibility.h"
20 #include "content/public/browser/web_contents_observer.h"
21 #include "mojo/public/cpp/bindings/pending_receiver.h"
22 #include "services/metrics/public/cpp/ukm_recorder.h"
23 #include "services/metrics/public/cpp/ukm_source_id.h"
24 #include "third_party/blink/public/mojom/loader/navigation_predictor.mojom.h"
25 #include "ui/gfx/geometry/size.h"
26 #include "url/origin.h"
27 
28 namespace content {
29 class BrowserContext;
30 class NavigationHandle;
31 class RenderFrameHost;
32 }  // namespace content
33 
34 namespace prerender {
35 class PrerenderManager;
36 }
37 
38 class TemplateURLService;
39 
40 // This class gathers metrics of anchor elements from both renderer process
41 // and browser process. Then it uses these metrics to make predictions on what
42 // are the most likely anchor elements that the user will click.
43 class NavigationPredictor : public blink::mojom::AnchorElementMetricsHost,
44                             public content::WebContentsObserver,
45                             public prerender::PrerenderHandle::Observer {
46  public:
47   explicit NavigationPredictor(content::WebContents* web_contents);
48   ~NavigationPredictor() override;
49 
50   // Create and bind NavigationPredictor.
51   static void Create(content::RenderFrameHost* render_frame_host,
52                      mojo::PendingReceiver<AnchorElementMetricsHost> receiver);
53 
54   // Enum describing the possible set of actions that navigation predictor may
55   // take. This enum should remain synchronized with enum
56   // NavigationPredictorActionTaken in enums.xml. Order of enum values should
57   // not be changed since the values are recorded in UMA.
58   enum class Action {
59     kUnknown = 0,
60     kNone = 1,
61     // DEPRECATED: kPreresolve = 2,
62     // DEPRECATED: kPreconnect = 3,
63     kPrefetch = 4,
64     // DEPRECATED: kPreconnectOnVisibilityChange = 5,
65     // DEPRECATED: kPreconnectOnAppForeground = 6,  // Deprecated.
66     // DEPRECATED: kPreconnectAfterTimeout = 7,
67     kMaxValue = kPrefetch,
68   };
69 
70   // Enum to report the prerender result of the clicked link. Changes must be
71   // propagated to enums.xml, and the enum should not be re-ordered.
72   enum class PrerenderResult {
73     // The prerender finished entirely before the link was clicked.
74     kSameOriginPrefetchFinished = 0,
75     // The prerender was started but not finished before the user navigated or
76     // backgrounded the page.
77     kSameOriginPrefetchPartiallyComplete = 1,
78     // The link was waiting to be prerendered while another prerender was in
79     // progress.
80     kSameOriginPrefetchInQueue = 2,
81     // The prerender was attempted, but a prerender mechanism skipped the
82     // prerender.
83     kSameOriginPrefetchSkipped = 3,
84     // The link was same origin, but scored poorly in the decider logic.
85     kSameOriginBelowThreshold = 4,
86     // The URL was not seen in the load event.
87     kSameOriginNotSeen = 5,
88     // The link was cross origin and scored above the threshold, but we did not
89     // prerender it.
90     kCrossOriginAboveThreshold = 6,
91     // The link was cross origin and scored below the threshold.
92     kCrossOriginBelowThreshold = 7,
93     // The URL was not seen in the load event.
94     kCrossOriginNotSeen = 8,
95     kMaxValue = kCrossOriginNotSeen,
96   };
97 
98  private:
99   // Struct holding navigation score, rank and other info of the anchor element.
100   // Used for look up when an anchor element is clicked.
101   struct NavigationScore;
102 
103   // blink::mojom::AnchorElementMetricsHost:
104   void ReportAnchorElementMetricsOnClick(
105       blink::mojom::AnchorElementMetricsPtr metrics) override;
106   void ReportAnchorElementMetricsOnLoad(
107       std::vector<blink::mojom::AnchorElementMetricsPtr> metrics,
108       const gfx::Size& viewport_size) override;
109 
110   // content::WebContentsObserver:
111   void OnVisibilityChanged(content::Visibility visibility) override;
112   void DidStartNavigation(
113       content::NavigationHandle* navigation_handle) override;
114 
115   // prerender::PrerenderHandle::Observer:
116   void OnPrerenderStop(prerender::PrerenderHandle* handle) override;
OnPrerenderStart(prerender::PrerenderHandle * handle)117   void OnPrerenderStart(prerender::PrerenderHandle* handle) override {}
OnPrerenderStopLoading(prerender::PrerenderHandle * handle)118   void OnPrerenderStopLoading(prerender::PrerenderHandle* handle) override {}
OnPrerenderDomContentLoaded(prerender::PrerenderHandle * handle)119   void OnPrerenderDomContentLoaded(
120       prerender::PrerenderHandle* handle) override {}
OnPrerenderNetworkBytesChanged(prerender::PrerenderHandle * handle)121   void OnPrerenderNetworkBytesChanged(
122       prerender::PrerenderHandle* handle) override {}
123 
124   // Returns true if the anchor element metric from the renderer process is
125   // valid.
126   bool IsValidMetricFromRenderer(
127       const blink::mojom::AnchorElementMetrics& metric) const;
128 
129   // Returns template URL service. Guaranteed to be non-null.
130   TemplateURLService* GetTemplateURLService() const;
131 
132   // Merge anchor element metrics that have the same target url (href).
133   void MergeMetricsSameTargetUrl(
134       std::vector<blink::mojom::AnchorElementMetricsPtr>* metrics) const;
135 
136   // Computes and stores document level metrics, including |number_of_anchors_|
137   // etc.
138   void ComputeDocumentMetricsOnLoad(
139       const std::vector<blink::mojom::AnchorElementMetricsPtr>& metrics);
140 
141   // Given metrics of an anchor element from both renderer and browser process,
142   // returns navigation score. Virtual for testing purposes.
143   virtual double CalculateAnchorNavigationScore(
144       const blink::mojom::AnchorElementMetrics& metrics,
145       int area_rank) const;
146 
147   // If |sum_page_scales_| is non-zero, return the page-wide score to add to
148   // all the navigation scores. Computed once per page.
149   double GetPageMetricsScore() const;
150 
151   // Given a vector of navigation scores sorted in descending order, decide what
152   // action to take, or decide not to do anything. Example actions including
153   // preresolve, preload, prerendering, etc.
154   void MaybeTakeActionOnLoad(
155       const GURL& document_url,
156       const std::vector<std::unique_ptr<NavigationScore>>&
157           sorted_navigation_scores);
158 
159   // Decides whether to prefetch a URL and, if yes, calls Prefetch.
160   void MaybePrefetch();
161 
162   // Given a url to prefetch, uses PrerenderManager to start a NoStatePrefetch
163   // of that URL.
164   virtual void Prefetch(prerender::PrerenderManager* prerender_manager,
165                         const GURL& url_to_prefetch);
166 
167   // Returns a collection of URLs that can be prefetched. Only one should be
168   // prefetched at a time.
169   std::deque<GURL> GetUrlsToPrefetch(
170       const GURL& document_url,
171       const std::vector<std::unique_ptr<NavigationScore>>&
172           sorted_navigation_scores);
173 
174   // Record anchor element metrics on page load.
175   void RecordMetricsOnLoad(
176       const blink::mojom::AnchorElementMetrics& metric) const;
177 
178   // Record timing information when an anchor element is clicked.
179   void RecordTimingOnClick();
180 
181   // Records the accuracy of the action taken by the navigator predictor based
182   // on the action taken as well as the URL that was navigated to.
183   // |target_url| is the URL navigated to by the user.
184   void RecordActionAccuracyOnClick(const GURL& target_url) const;
185 
186   // Records metrics on which action the predictor is taking.
187   void RecordAction(Action log_action);
188 
189   // Sends metrics to the UKM id at |ukm_source_id_|.
190   void MaybeSendMetricsToUkm() const;
191 
192   // After an in-page click, sends the index of the url that was clicked to the
193   // UKM id at |ukm_source_id_|.
194   void MaybeSendClickMetricsToUkm(const std::string& clicked_url) const;
195 
196   // Returns the minimum of the bucket that |value| belongs in, for page-wide
197   // metrics, excluding |median_link_location_|.
198   int GetBucketMinForPageMetrics(int value) const;
199 
200   // Returns the minimum of the bucket that |value| belongs in, used for
201   // |median_link_location_| and the |ratio_distance_root_top|.
202   int GetLinearBucketForLinkLocation(int value) const;
203 
204   // Returns the minimum of the bucket that |value| belongs in, used for
205   // |ratio_area|.
206   int GetLinearBucketForRatioArea(int value) const;
207 
208   // Notifies the keyed service of the updated predicted navigation.
209   void NotifyPredictionUpdated(
210       const std::vector<std::unique_ptr<NavigationScore>>&
211           sorted_navigation_scores);
212 
213   // Record metrics about how many prerenders were started and finished.
214   void RecordActionAccuracyOnTearDown();
215 
216   // Used to get keyed services.
217   content::BrowserContext* const browser_context_;
218 
219   // Maps from target url (href) to navigation score.
220   std::unordered_map<std::string, std::unique_ptr<NavigationScore>>
221       navigation_scores_map_;
222 
223   // Total number of anchors that: href has the same host as the document,
224   // contains image, inside an iframe, href incremented by 1 from document url.
225   int number_of_anchors_same_host_ = 0;
226   int number_of_anchors_contains_image_ = 0;
227   int number_of_anchors_in_iframe_ = 0;
228   int number_of_anchors_url_incremented_ = 0;
229   int number_of_anchors_ = 0;
230 
231   // Viewport-related metrics for anchor elements: the viewport size,
232   // the median distance down the viewport of all the links, and the
233   // total clickable space for first viewport links. |total_clickable_space_| is
234   // a percent (between 0 and 100).
235   gfx::Size viewport_size_;
236   int median_link_location_ = 0;
237   float total_clickable_space_ = 0;
238 
239   // Anchor-specific scaling factors used to compute navigation scores.
240   const int ratio_area_scale_;
241   const int is_in_iframe_scale_;
242   const int is_same_host_scale_;
243   const int contains_image_scale_;
244   const int is_url_incremented_scale_;
245   const int area_rank_scale_;
246   const int ratio_distance_root_top_scale_;
247 
248   // Page-wide scaling factors used to compute navigation scores.
249   const int link_total_scale_;
250   const int iframe_link_total_scale_;
251   const int increment_link_total_scale_;
252   const int same_origin_link_total_scale_;
253   const int image_link_total_scale_;
254   const int clickable_space_scale_;
255   const int median_link_location_scale_;
256   const int viewport_height_scale_;
257   const int viewport_width_scale_;
258 
259   // Sum of all scales for individual anchor metrics.
260   // Used to normalize the final computed weight.
261   const int sum_link_scales_;
262 
263   // Sum of all scales for page-wide metrics.
264   const int sum_page_scales_;
265 
266   // True if device is a low end device.
267   const bool is_low_end_device_;
268 
269   // Minimum score that a URL should have for it to be prefetched. Note
270   // that scores of origins are computed differently from scores of URLs, so
271   // they are not comparable.
272   const int prefetch_url_score_threshold_;
273 
274   // True if |this| should use the PrerenderManager to prefetch.
275   const bool prefetch_enabled_;
276 
277   // True by default, otherwise navigation scores will not be normalized
278   // by the sum of metrics weights nor normalized from 0 to 100 across
279   // all navigation scores for a page.
280   const bool normalize_navigation_scores_;
281 
282   // A count of clicks to prevent reporting more than 10 clicks to UKM.
283   size_t clicked_count_ = 0;
284 
285   // Whether a new navigation has started (only set if load event comes before
286   // DidStartNavigation).
287   bool next_navigation_started_ = false;
288 
289   // True if the source webpage (i.e., the page on which we are trying to
290   // predict the next navigation) is a page from user's default search engine.
291   bool source_is_default_search_engine_page_ = false;
292 
293   // Current visibility state of the web contents.
294   content::Visibility current_visibility_;
295 
296   // Current prerender handle.
297   std::unique_ptr<prerender::PrerenderHandle> prerender_handle_;
298 
299   // URL that we decided to prefetch, and are currently prefetching.
300   base::Optional<GURL> prefetch_url_;
301 
302   // An ordered list of URLs that should be prefetched in succession.
303   std::deque<GURL> urls_to_prefetch_;
304 
305   // URLs that were successfully prefetched.
306   std::set<GURL> urls_prefetched_;
307 
308   // URLs that scored above the threshold in sorted order.
309   std::vector<GURL> urls_above_threshold_;
310 
311   // URLs that had a prerender started, but were canceled due to background or
312   // next navigation.
313   std::set<GURL> partial_prerfetches_;
314 
315   // UKM ID for navigation
316   ukm::SourceId ukm_source_id_;
317 
318   // UKM recorder
319   ukm::UkmRecorder* ukm_recorder_ = nullptr;
320 
321   // The URL of the current page.
322   GURL document_url_;
323 
324   // WebContents of the current page.
325   const content::WebContents* web_contents_;
326 
327   SEQUENCE_CHECKER(sequence_checker_);
328 
329   DISALLOW_COPY_AND_ASSIGN(NavigationPredictor);
330 };
331 
332 #endif  // CHROME_BROWSER_NAVIGATION_PREDICTOR_NAVIGATION_PREDICTOR_H_
333