1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/optimization_guide/hints_fetcher.h"
6 
7 #include <memory>
8 #include <utility>
9 
10 #include "base/feature_list.h"
11 #include "base/metrics/histogram_functions.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/time/default_clock.h"
14 #include "components/optimization_guide/hints_processing_util.h"
15 #include "components/optimization_guide/optimization_guide_features.h"
16 #include "components/optimization_guide/optimization_guide_prefs.h"
17 #include "components/optimization_guide/optimization_guide_util.h"
18 #include "components/optimization_guide/proto/hints.pb.h"
19 #include "components/prefs/pref_service.h"
20 #include "components/prefs/scoped_user_pref_update.h"
21 #include "components/variations/net/variations_http_headers.h"
22 #include "content/public/browser/network_service_instance.h"
23 #include "net/base/load_flags.h"
24 #include "net/base/url_util.h"
25 #include "net/http/http_request_headers.h"
26 #include "net/http/http_response_headers.h"
27 #include "net/http/http_status_code.h"
28 #include "net/traffic_annotation/network_traffic_annotation.h"
29 #include "services/network/public/cpp/shared_url_loader_factory.h"
30 #include "services/network/public/cpp/simple_url_loader.h"
31 
32 namespace optimization_guide {
33 
34 namespace {
35 
36 // Returns the string that can be used to record histograms for the request
37 // context.
38 //
39 // Keep in sync with OptimizationGuide.RequestContexts histogram_suffixes in
40 // histograms.xml.
GetStringNameForRequestContext(proto::RequestContext request_context)41 std::string GetStringNameForRequestContext(
42     proto::RequestContext request_context) {
43   switch (request_context) {
44     case proto::RequestContext::CONTEXT_UNSPECIFIED:
45       NOTREACHED();
46       return "Unknown";
47     case proto::RequestContext::CONTEXT_BATCH_UPDATE:
48       return "BatchUpdate";
49     case proto::RequestContext::CONTEXT_PAGE_NAVIGATION:
50       return "PageNavigation";
51   }
52   NOTREACHED();
53   return std::string();
54 }
55 
56 // Returns the subset of URLs from |urls| for which the URL is considered
57 // valid and can be included in a hints fetch.
GetValidURLsForFetching(const std::vector<GURL> & urls)58 std::vector<GURL> GetValidURLsForFetching(const std::vector<GURL>& urls) {
59   std::vector<GURL> valid_urls;
60   for (const auto& url : urls) {
61     if (valid_urls.size() >=
62         features::MaxUrlsForOptimizationGuideServiceHintsFetch()) {
63       break;
64     }
65     if (IsValidURLForURLKeyedHint(url))
66       valid_urls.push_back(url);
67   }
68   return valid_urls;
69 }
70 
RecordRequestStatusHistogram(proto::RequestContext request_context,HintsFetcherRequestStatus status)71 void RecordRequestStatusHistogram(proto::RequestContext request_context,
72                                   HintsFetcherRequestStatus status) {
73   base::UmaHistogramEnumeration(
74       "OptimizationGuide.HintsFetcher.RequestStatus." +
75           GetStringNameForRequestContext(request_context),
76       status);
77 }
78 
79 }  // namespace
80 
HintsFetcher(scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,const GURL & optimization_guide_service_url,PrefService * pref_service)81 HintsFetcher::HintsFetcher(
82     scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
83     const GURL& optimization_guide_service_url,
84     PrefService* pref_service)
85     : optimization_guide_service_url_(net::AppendOrReplaceQueryParameter(
86           optimization_guide_service_url,
87           "key",
88           features::GetOptimizationGuideServiceAPIKey())),
89       pref_service_(pref_service),
90       time_clock_(base::DefaultClock::GetInstance()) {
91   url_loader_factory_ = std::move(url_loader_factory);
92   CHECK(optimization_guide_service_url_.SchemeIs(url::kHttpsScheme));
93   DCHECK(features::IsRemoteFetchingEnabled());
94 }
95 
~HintsFetcher()96 HintsFetcher::~HintsFetcher() {
97   if (active_url_loader_) {
98     if (hints_fetched_callback_)
99       std::move(hints_fetched_callback_).Run(base::nullopt);
100     base::UmaHistogramExactLinear(
101         "OptimizationGuide.HintsFetcher.GetHintsRequest."
102         "ActiveRequestCanceled." +
103             GetStringNameForRequestContext(request_context_),
104         1, 1);
105   }
106 }
107 
108 // static
ClearHostsSuccessfullyFetched(PrefService * pref_service)109 void HintsFetcher::ClearHostsSuccessfullyFetched(PrefService* pref_service) {
110   DictionaryPrefUpdate hosts_fetched_list(
111       pref_service, prefs::kHintsFetcherHostsSuccessfullyFetched);
112   hosts_fetched_list->Clear();
113 }
114 
SetTimeClockForTesting(const base::Clock * time_clock)115 void HintsFetcher::SetTimeClockForTesting(const base::Clock* time_clock) {
116   time_clock_ = time_clock;
117 }
118 
119 // static
WasHostCoveredByFetch(PrefService * pref_service,const std::string & host)120 bool HintsFetcher::WasHostCoveredByFetch(PrefService* pref_service,
121                                          const std::string& host) {
122   return WasHostCoveredByFetch(pref_service, host,
123                                base::DefaultClock::GetInstance());
124 }
125 
126 // static
WasHostCoveredByFetch(PrefService * pref_service,const std::string & host,const base::Clock * time_clock)127 bool HintsFetcher::WasHostCoveredByFetch(PrefService* pref_service,
128                                          const std::string& host,
129                                          const base::Clock* time_clock) {
130   if (!optimization_guide::features::ShouldPersistHintsToDisk()) {
131     // Don't consult the pref if we aren't even persisting hints to disk.
132     return false;
133   }
134 
135   DictionaryPrefUpdate hosts_fetched(
136       pref_service, prefs::kHintsFetcherHostsSuccessfullyFetched);
137   base::Optional<double> value =
138       hosts_fetched->FindDoubleKey(HashHostForDictionary(host));
139   if (!value)
140     return false;
141 
142   base::Time host_valid_time = base::Time::FromDeltaSinceWindowsEpoch(
143       base::TimeDelta::FromSecondsD(*value));
144   return host_valid_time > time_clock->Now();
145 }
146 
FetchOptimizationGuideServiceHints(const std::vector<std::string> & hosts,const std::vector<GURL> & urls,const base::flat_set<optimization_guide::proto::OptimizationType> & optimization_types,optimization_guide::proto::RequestContext request_context,HintsFetchedCallback hints_fetched_callback)147 bool HintsFetcher::FetchOptimizationGuideServiceHints(
148     const std::vector<std::string>& hosts,
149     const std::vector<GURL>& urls,
150     const base::flat_set<optimization_guide::proto::OptimizationType>&
151         optimization_types,
152     optimization_guide::proto::RequestContext request_context,
153     HintsFetchedCallback hints_fetched_callback) {
154   SEQUENCE_CHECKER(sequence_checker_);
155   DCHECK_GT(optimization_types.size(), 0u);
156 
157   if (content::GetNetworkConnectionTracker()->IsOffline()) {
158     RecordRequestStatusHistogram(request_context,
159                                  HintsFetcherRequestStatus::kNetworkOffline);
160     std::move(hints_fetched_callback).Run(base::nullopt);
161     return false;
162   }
163 
164   if (active_url_loader_) {
165     RecordRequestStatusHistogram(request_context,
166                                  HintsFetcherRequestStatus::kFetcherBusy);
167     std::move(hints_fetched_callback).Run(base::nullopt);
168     return false;
169   }
170 
171   std::vector<std::string> filtered_hosts =
172       GetSizeLimitedHostsDueForHintsRefresh(hosts);
173   std::vector<GURL> valid_urls = GetValidURLsForFetching(urls);
174   if (filtered_hosts.empty() && valid_urls.empty()) {
175     RecordRequestStatusHistogram(
176         request_context, HintsFetcherRequestStatus::kNoHostsOrURLsToFetch);
177     std::move(hints_fetched_callback).Run(base::nullopt);
178     return false;
179   }
180 
181   DCHECK_GE(features::MaxHostsForOptimizationGuideServiceHintsFetch(),
182             filtered_hosts.size());
183   DCHECK_GE(features::MaxUrlsForOptimizationGuideServiceHintsFetch(),
184             valid_urls.size());
185 
186   if (optimization_types.empty()) {
187     RecordRequestStatusHistogram(
188         request_context,
189         HintsFetcherRequestStatus::kNoSupportedOptimizationTypes);
190     std::move(hints_fetched_callback).Run(base::nullopt);
191     return false;
192   }
193 
194   hints_fetch_start_time_ = base::TimeTicks::Now();
195   request_context_ = request_context;
196 
197   proto::GetHintsRequest get_hints_request;
198   get_hints_request.add_supported_key_representations(proto::HOST);
199   get_hints_request.add_supported_key_representations(proto::FULL_URL);
200 
201   for (const auto& optimization_type : optimization_types)
202     get_hints_request.add_supported_optimizations(optimization_type);
203 
204   get_hints_request.set_context(request_context_);
205 
206   *get_hints_request.mutable_active_field_trials() =
207       GetActiveFieldTrialsAllowedForFetch();
208 
209   for (const auto& url : valid_urls)
210     get_hints_request.add_urls()->set_url(url.spec());
211 
212   for (const auto& host : filtered_hosts) {
213     proto::HostInfo* host_info = get_hints_request.add_hosts();
214     host_info->set_host(host);
215   }
216 
217   std::string serialized_request;
218   get_hints_request.SerializeToString(&serialized_request);
219 
220   net::NetworkTrafficAnnotationTag traffic_annotation =
221       net::DefineNetworkTrafficAnnotation("hintsfetcher_gethintsrequest", R"(
222         semantics {
223           sender: "HintsFetcher"
224           description:
225             "Requests Hints from the Optimization Guide Service for use in "
226             "providing data saving and pageload optimizations for Chrome."
227           trigger:
228             "Requested periodically if Data Saver is enabled and the browser "
229             "has Hints that are older than a threshold set by "
230             "the server."
231           data: "A list of the user's most engaged websites."
232           destination: GOOGLE_OWNED_SERVICE
233         }
234         policy {
235           cookies_allowed: NO
236           setting:
237             "Users can control Data Saver on Android via 'Data Saver' setting. "
238             "Data Saver is not available on iOS."
239           policy_exception_justification: "Not implemented."
240         })");
241 
242   auto resource_request = std::make_unique<network::ResourceRequest>();
243 
244   resource_request->url = optimization_guide_service_url_;
245 
246   resource_request->method = "POST";
247   resource_request->credentials_mode = network::mojom::CredentialsMode::kOmit;
248 
249   active_url_loader_ = variations::CreateSimpleURLLoaderWithVariationsHeader(
250       std::move(resource_request),
251       // This is always InIncognito::kNo as the OptimizationGuideKeyedService is
252       // not enabled on incognito sessions and is rechecked before each fetch.
253       variations::InIncognito::kNo, variations::SignedIn::kNo,
254       traffic_annotation);
255 
256   active_url_loader_->AttachStringForUpload(serialized_request,
257                                             "application/x-protobuf");
258 
259   UMA_HISTOGRAM_COUNTS_100(
260       "OptimizationGuide.HintsFetcher.GetHintsRequest.HostCount",
261       filtered_hosts.size());
262   UMA_HISTOGRAM_COUNTS_100(
263       "OptimizationGuide.HintsFetcher.GetHintsRequest.UrlCount",
264       valid_urls.size());
265 
266   // |active_url_loader_| should not retry on 5xx errors since the server may
267   // already be overloaded. |active_url_loader_| should retry on network changes
268   // since the network stack may receive the connection change event later than
269   // |this|.
270   static const int kMaxRetries = 1;
271   active_url_loader_->SetRetryOptions(
272       kMaxRetries, network::SimpleURLLoader::RETRY_ON_NETWORK_CHANGE);
273 
274   // It's safe to use |base::Unretained(this)| here because |this| owns
275   // |active_url_loader_| and the callback will be canceled if
276   // |active_url_loader_| is destroyed.
277   active_url_loader_->DownloadToStringOfUnboundedSizeUntilCrashAndDie(
278       url_loader_factory_.get(),
279       base::BindOnce(&HintsFetcher::OnURLLoadComplete, base::Unretained(this)));
280 
281   hints_fetched_callback_ = std::move(hints_fetched_callback);
282   hosts_fetched_ = filtered_hosts;
283   return true;
284 }
285 
HandleResponse(const std::string & get_hints_response_data,int net_status,int response_code)286 void HintsFetcher::HandleResponse(const std::string& get_hints_response_data,
287                                   int net_status,
288                                   int response_code) {
289   SEQUENCE_CHECKER(sequence_checker_);
290 
291   std::unique_ptr<proto::GetHintsResponse> get_hints_response =
292       std::make_unique<proto::GetHintsResponse>();
293 
294   UMA_HISTOGRAM_ENUMERATION(
295       "OptimizationGuide.HintsFetcher.GetHintsRequest.Status",
296       static_cast<net::HttpStatusCode>(response_code),
297       net::HTTP_VERSION_NOT_SUPPORTED);
298   // Net error codes are negative but histogram enums must be positive.
299   base::UmaHistogramSparse(
300       "OptimizationGuide.HintsFetcher.GetHintsRequest.NetErrorCode",
301       -net_status);
302 
303   if (net_status == net::OK && response_code == net::HTTP_OK &&
304       get_hints_response->ParseFromString(get_hints_response_data)) {
305     UMA_HISTOGRAM_COUNTS_100(
306         "OptimizationGuide.HintsFetcher.GetHintsRequest.HintCount",
307         get_hints_response->hints_size());
308     base::TimeDelta fetch_latency =
309         base::TimeTicks::Now() - hints_fetch_start_time_;
310     UMA_HISTOGRAM_MEDIUM_TIMES(
311         "OptimizationGuide.HintsFetcher.GetHintsRequest.FetchLatency",
312         fetch_latency);
313     base::UmaHistogramMediumTimes(
314         "OptimizationGuide.HintsFetcher.GetHintsRequest.FetchLatency." +
315             GetStringNameForRequestContext(request_context_),
316         fetch_latency);
317     base::TimeDelta valid_duration =
318         features::StoredFetchedHintsFreshnessDuration();
319     if (get_hints_response->has_max_cache_duration()) {
320       valid_duration = base::TimeDelta::FromSeconds(
321           get_hints_response->max_cache_duration().seconds());
322     }
323     UpdateHostsSuccessfullyFetched(valid_duration);
324     RecordRequestStatusHistogram(request_context_,
325                                  HintsFetcherRequestStatus::kSuccess);
326     std::move(hints_fetched_callback_).Run(std::move(get_hints_response));
327   } else {
328     hosts_fetched_.clear();
329     RecordRequestStatusHistogram(request_context_,
330                                  HintsFetcherRequestStatus::kResponseError);
331     std::move(hints_fetched_callback_).Run(base::nullopt);
332   }
333 }
334 
UpdateHostsSuccessfullyFetched(base::TimeDelta valid_duration)335 void HintsFetcher::UpdateHostsSuccessfullyFetched(
336     base::TimeDelta valid_duration) {
337   if (!optimization_guide::features::ShouldPersistHintsToDisk()) {
338     // Do not persist any state if we aren't persisting hints to disk.
339     return;
340   }
341 
342   DictionaryPrefUpdate hosts_fetched_list(
343       pref_service_, prefs::kHintsFetcherHostsSuccessfullyFetched);
344 
345   // Remove any expired hosts.
346   std::vector<std::string> entries_to_remove;
347   for (const auto& it : hosts_fetched_list->DictItems()) {
348     if (base::Time::FromDeltaSinceWindowsEpoch(base::TimeDelta::FromSecondsD(
349             it.second.GetDouble())) < time_clock_->Now()) {
350       entries_to_remove.emplace_back(it.first);
351     }
352   }
353   for (const auto& host : entries_to_remove) {
354     hosts_fetched_list->Remove(host, nullptr);
355   }
356 
357   if (hosts_fetched_.empty())
358     return;
359 
360   // Ensure there is enough space in the dictionary pref for the
361   // most recent set of hosts to be stored.
362   if (hosts_fetched_list->size() + hosts_fetched_.size() >
363       features::MaxHostsForRecordingSuccessfullyCovered()) {
364     entries_to_remove.clear();
365     size_t num_entries_to_remove =
366         hosts_fetched_list->size() + hosts_fetched_.size() -
367         features::MaxHostsForRecordingSuccessfullyCovered();
368     for (const auto& it : hosts_fetched_list->DictItems()) {
369       if (entries_to_remove.size() >= num_entries_to_remove)
370         break;
371       entries_to_remove.emplace_back(it.first);
372     }
373     for (const auto& host : entries_to_remove) {
374       hosts_fetched_list->Remove(host, nullptr);
375     }
376   }
377 
378   // Add the covered hosts in |hosts_fetched_| to the dictionary pref.
379   base::Time host_invalid_time = time_clock_->Now() + valid_duration;
380   for (const std::string& host : hosts_fetched_) {
381     hosts_fetched_list->SetDoubleKey(
382         HashHostForDictionary(host),
383         host_invalid_time.ToDeltaSinceWindowsEpoch().InSecondsF());
384   }
385   DCHECK_LE(hosts_fetched_list->size(),
386             features::MaxHostsForRecordingSuccessfullyCovered());
387   hosts_fetched_.clear();
388 }
389 
390 // Callback is only invoked if |active_url_loader_| is bound and still alive.
OnURLLoadComplete(std::unique_ptr<std::string> response_body)391 void HintsFetcher::OnURLLoadComplete(
392     std::unique_ptr<std::string> response_body) {
393   SEQUENCE_CHECKER(sequence_checker_);
394 
395   int response_code = -1;
396   if (active_url_loader_->ResponseInfo() &&
397       active_url_loader_->ResponseInfo()->headers) {
398     response_code =
399         active_url_loader_->ResponseInfo()->headers->response_code();
400   }
401   auto net_error = active_url_loader_->NetError();
402   // Reset the active URL loader here since actions happening during response
403   // handling may destroy |this|.
404   active_url_loader_.reset();
405 
406   HandleResponse(response_body ? *response_body : "", net_error, response_code);
407 }
408 
GetSizeLimitedHostsDueForHintsRefresh(const std::vector<std::string> & hosts) const409 std::vector<std::string> HintsFetcher::GetSizeLimitedHostsDueForHintsRefresh(
410     const std::vector<std::string>& hosts) const {
411   SEQUENCE_CHECKER(sequence_checker_);
412 
413   DictionaryPrefUpdate hosts_fetched(
414       pref_service_, prefs::kHintsFetcherHostsSuccessfullyFetched);
415 
416   std::vector<std::string> target_hosts;
417   target_hosts.reserve(hosts.size());
418 
419   for (const auto& host : hosts) {
420     // Skip over localhosts, IP addresses, and invalid hosts.
421     if (net::HostStringIsLocalhost(host))
422       continue;
423     url::CanonHostInfo host_info;
424     std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
425     if (host_info.IsIPAddress() ||
426         !net::IsCanonicalizedHostCompliant(canonicalized_host)) {
427       continue;
428     }
429 
430     bool host_hints_due_for_refresh = true;
431 
432     base::Optional<double> value =
433         hosts_fetched->FindDoubleKey(HashHostForDictionary(host));
434     if (value && optimization_guide::features::ShouldPersistHintsToDisk()) {
435       base::Time host_valid_time = base::Time::FromDeltaSinceWindowsEpoch(
436           base::TimeDelta::FromSecondsD(*value));
437       host_hints_due_for_refresh =
438           (host_valid_time - features::GetHintsFetchRefreshDuration() <=
439            time_clock_->Now());
440     }
441     if (host_hints_due_for_refresh)
442       target_hosts.push_back(host);
443 
444     if (target_hosts.size() >=
445         features::MaxHostsForOptimizationGuideServiceHintsFetch()) {
446       break;
447     }
448   }
449   DCHECK_GE(features::MaxHostsForOptimizationGuideServiceHintsFetch(),
450             target_hosts.size());
451   return target_hosts;
452 }
453 
454 }  // namespace optimization_guide
455