1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/optimization_guide/hints_fetcher.h"
6
7 #include <memory>
8 #include <utility>
9
10 #include "base/feature_list.h"
11 #include "base/metrics/histogram_functions.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/time/default_clock.h"
14 #include "components/optimization_guide/hints_processing_util.h"
15 #include "components/optimization_guide/optimization_guide_features.h"
16 #include "components/optimization_guide/optimization_guide_prefs.h"
17 #include "components/optimization_guide/optimization_guide_util.h"
18 #include "components/optimization_guide/proto/hints.pb.h"
19 #include "components/prefs/pref_service.h"
20 #include "components/prefs/scoped_user_pref_update.h"
21 #include "components/variations/net/variations_http_headers.h"
22 #include "content/public/browser/network_service_instance.h"
23 #include "net/base/load_flags.h"
24 #include "net/base/url_util.h"
25 #include "net/http/http_request_headers.h"
26 #include "net/http/http_response_headers.h"
27 #include "net/http/http_status_code.h"
28 #include "net/traffic_annotation/network_traffic_annotation.h"
29 #include "services/network/public/cpp/shared_url_loader_factory.h"
30 #include "services/network/public/cpp/simple_url_loader.h"
31
32 namespace optimization_guide {
33
34 namespace {
35
36 // Returns the string that can be used to record histograms for the request
37 // context.
38 //
39 // Keep in sync with OptimizationGuide.RequestContexts histogram_suffixes in
40 // histograms.xml.
GetStringNameForRequestContext(proto::RequestContext request_context)41 std::string GetStringNameForRequestContext(
42 proto::RequestContext request_context) {
43 switch (request_context) {
44 case proto::RequestContext::CONTEXT_UNSPECIFIED:
45 NOTREACHED();
46 return "Unknown";
47 case proto::RequestContext::CONTEXT_BATCH_UPDATE:
48 return "BatchUpdate";
49 case proto::RequestContext::CONTEXT_PAGE_NAVIGATION:
50 return "PageNavigation";
51 }
52 NOTREACHED();
53 return std::string();
54 }
55
56 // Returns the subset of URLs from |urls| for which the URL is considered
57 // valid and can be included in a hints fetch.
GetValidURLsForFetching(const std::vector<GURL> & urls)58 std::vector<GURL> GetValidURLsForFetching(const std::vector<GURL>& urls) {
59 std::vector<GURL> valid_urls;
60 for (const auto& url : urls) {
61 if (valid_urls.size() >=
62 features::MaxUrlsForOptimizationGuideServiceHintsFetch()) {
63 break;
64 }
65 if (IsValidURLForURLKeyedHint(url))
66 valid_urls.push_back(url);
67 }
68 return valid_urls;
69 }
70
RecordRequestStatusHistogram(proto::RequestContext request_context,HintsFetcherRequestStatus status)71 void RecordRequestStatusHistogram(proto::RequestContext request_context,
72 HintsFetcherRequestStatus status) {
73 base::UmaHistogramEnumeration(
74 "OptimizationGuide.HintsFetcher.RequestStatus." +
75 GetStringNameForRequestContext(request_context),
76 status);
77 }
78
79 } // namespace
80
HintsFetcher(scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,const GURL & optimization_guide_service_url,PrefService * pref_service)81 HintsFetcher::HintsFetcher(
82 scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
83 const GURL& optimization_guide_service_url,
84 PrefService* pref_service)
85 : optimization_guide_service_url_(net::AppendOrReplaceQueryParameter(
86 optimization_guide_service_url,
87 "key",
88 features::GetOptimizationGuideServiceAPIKey())),
89 pref_service_(pref_service),
90 time_clock_(base::DefaultClock::GetInstance()) {
91 url_loader_factory_ = std::move(url_loader_factory);
92 CHECK(optimization_guide_service_url_.SchemeIs(url::kHttpsScheme));
93 DCHECK(features::IsRemoteFetchingEnabled());
94 }
95
~HintsFetcher()96 HintsFetcher::~HintsFetcher() {
97 if (active_url_loader_) {
98 if (hints_fetched_callback_)
99 std::move(hints_fetched_callback_).Run(base::nullopt);
100 base::UmaHistogramExactLinear(
101 "OptimizationGuide.HintsFetcher.GetHintsRequest."
102 "ActiveRequestCanceled." +
103 GetStringNameForRequestContext(request_context_),
104 1, 1);
105 }
106 }
107
108 // static
ClearHostsSuccessfullyFetched(PrefService * pref_service)109 void HintsFetcher::ClearHostsSuccessfullyFetched(PrefService* pref_service) {
110 DictionaryPrefUpdate hosts_fetched_list(
111 pref_service, prefs::kHintsFetcherHostsSuccessfullyFetched);
112 hosts_fetched_list->Clear();
113 }
114
SetTimeClockForTesting(const base::Clock * time_clock)115 void HintsFetcher::SetTimeClockForTesting(const base::Clock* time_clock) {
116 time_clock_ = time_clock;
117 }
118
119 // static
WasHostCoveredByFetch(PrefService * pref_service,const std::string & host)120 bool HintsFetcher::WasHostCoveredByFetch(PrefService* pref_service,
121 const std::string& host) {
122 return WasHostCoveredByFetch(pref_service, host,
123 base::DefaultClock::GetInstance());
124 }
125
126 // static
WasHostCoveredByFetch(PrefService * pref_service,const std::string & host,const base::Clock * time_clock)127 bool HintsFetcher::WasHostCoveredByFetch(PrefService* pref_service,
128 const std::string& host,
129 const base::Clock* time_clock) {
130 if (!optimization_guide::features::ShouldPersistHintsToDisk()) {
131 // Don't consult the pref if we aren't even persisting hints to disk.
132 return false;
133 }
134
135 DictionaryPrefUpdate hosts_fetched(
136 pref_service, prefs::kHintsFetcherHostsSuccessfullyFetched);
137 base::Optional<double> value =
138 hosts_fetched->FindDoubleKey(HashHostForDictionary(host));
139 if (!value)
140 return false;
141
142 base::Time host_valid_time = base::Time::FromDeltaSinceWindowsEpoch(
143 base::TimeDelta::FromSecondsD(*value));
144 return host_valid_time > time_clock->Now();
145 }
146
FetchOptimizationGuideServiceHints(const std::vector<std::string> & hosts,const std::vector<GURL> & urls,const base::flat_set<optimization_guide::proto::OptimizationType> & optimization_types,optimization_guide::proto::RequestContext request_context,HintsFetchedCallback hints_fetched_callback)147 bool HintsFetcher::FetchOptimizationGuideServiceHints(
148 const std::vector<std::string>& hosts,
149 const std::vector<GURL>& urls,
150 const base::flat_set<optimization_guide::proto::OptimizationType>&
151 optimization_types,
152 optimization_guide::proto::RequestContext request_context,
153 HintsFetchedCallback hints_fetched_callback) {
154 SEQUENCE_CHECKER(sequence_checker_);
155 DCHECK_GT(optimization_types.size(), 0u);
156
157 if (content::GetNetworkConnectionTracker()->IsOffline()) {
158 RecordRequestStatusHistogram(request_context,
159 HintsFetcherRequestStatus::kNetworkOffline);
160 std::move(hints_fetched_callback).Run(base::nullopt);
161 return false;
162 }
163
164 if (active_url_loader_) {
165 RecordRequestStatusHistogram(request_context,
166 HintsFetcherRequestStatus::kFetcherBusy);
167 std::move(hints_fetched_callback).Run(base::nullopt);
168 return false;
169 }
170
171 std::vector<std::string> filtered_hosts =
172 GetSizeLimitedHostsDueForHintsRefresh(hosts);
173 std::vector<GURL> valid_urls = GetValidURLsForFetching(urls);
174 if (filtered_hosts.empty() && valid_urls.empty()) {
175 RecordRequestStatusHistogram(
176 request_context, HintsFetcherRequestStatus::kNoHostsOrURLsToFetch);
177 std::move(hints_fetched_callback).Run(base::nullopt);
178 return false;
179 }
180
181 DCHECK_GE(features::MaxHostsForOptimizationGuideServiceHintsFetch(),
182 filtered_hosts.size());
183 DCHECK_GE(features::MaxUrlsForOptimizationGuideServiceHintsFetch(),
184 valid_urls.size());
185
186 if (optimization_types.empty()) {
187 RecordRequestStatusHistogram(
188 request_context,
189 HintsFetcherRequestStatus::kNoSupportedOptimizationTypes);
190 std::move(hints_fetched_callback).Run(base::nullopt);
191 return false;
192 }
193
194 hints_fetch_start_time_ = base::TimeTicks::Now();
195 request_context_ = request_context;
196
197 proto::GetHintsRequest get_hints_request;
198 get_hints_request.add_supported_key_representations(proto::HOST);
199 get_hints_request.add_supported_key_representations(proto::FULL_URL);
200
201 for (const auto& optimization_type : optimization_types)
202 get_hints_request.add_supported_optimizations(optimization_type);
203
204 get_hints_request.set_context(request_context_);
205
206 *get_hints_request.mutable_active_field_trials() =
207 GetActiveFieldTrialsAllowedForFetch();
208
209 for (const auto& url : valid_urls)
210 get_hints_request.add_urls()->set_url(url.spec());
211
212 for (const auto& host : filtered_hosts) {
213 proto::HostInfo* host_info = get_hints_request.add_hosts();
214 host_info->set_host(host);
215 }
216
217 std::string serialized_request;
218 get_hints_request.SerializeToString(&serialized_request);
219
220 net::NetworkTrafficAnnotationTag traffic_annotation =
221 net::DefineNetworkTrafficAnnotation("hintsfetcher_gethintsrequest", R"(
222 semantics {
223 sender: "HintsFetcher"
224 description:
225 "Requests Hints from the Optimization Guide Service for use in "
226 "providing data saving and pageload optimizations for Chrome."
227 trigger:
228 "Requested periodically if Data Saver is enabled and the browser "
229 "has Hints that are older than a threshold set by "
230 "the server."
231 data: "A list of the user's most engaged websites."
232 destination: GOOGLE_OWNED_SERVICE
233 }
234 policy {
235 cookies_allowed: NO
236 setting:
237 "Users can control Data Saver on Android via 'Data Saver' setting. "
238 "Data Saver is not available on iOS."
239 policy_exception_justification: "Not implemented."
240 })");
241
242 auto resource_request = std::make_unique<network::ResourceRequest>();
243
244 resource_request->url = optimization_guide_service_url_;
245
246 resource_request->method = "POST";
247 resource_request->credentials_mode = network::mojom::CredentialsMode::kOmit;
248
249 active_url_loader_ = variations::CreateSimpleURLLoaderWithVariationsHeader(
250 std::move(resource_request),
251 // This is always InIncognito::kNo as the OptimizationGuideKeyedService is
252 // not enabled on incognito sessions and is rechecked before each fetch.
253 variations::InIncognito::kNo, variations::SignedIn::kNo,
254 traffic_annotation);
255
256 active_url_loader_->AttachStringForUpload(serialized_request,
257 "application/x-protobuf");
258
259 UMA_HISTOGRAM_COUNTS_100(
260 "OptimizationGuide.HintsFetcher.GetHintsRequest.HostCount",
261 filtered_hosts.size());
262 UMA_HISTOGRAM_COUNTS_100(
263 "OptimizationGuide.HintsFetcher.GetHintsRequest.UrlCount",
264 valid_urls.size());
265
266 // |active_url_loader_| should not retry on 5xx errors since the server may
267 // already be overloaded. |active_url_loader_| should retry on network changes
268 // since the network stack may receive the connection change event later than
269 // |this|.
270 static const int kMaxRetries = 1;
271 active_url_loader_->SetRetryOptions(
272 kMaxRetries, network::SimpleURLLoader::RETRY_ON_NETWORK_CHANGE);
273
274 // It's safe to use |base::Unretained(this)| here because |this| owns
275 // |active_url_loader_| and the callback will be canceled if
276 // |active_url_loader_| is destroyed.
277 active_url_loader_->DownloadToStringOfUnboundedSizeUntilCrashAndDie(
278 url_loader_factory_.get(),
279 base::BindOnce(&HintsFetcher::OnURLLoadComplete, base::Unretained(this)));
280
281 hints_fetched_callback_ = std::move(hints_fetched_callback);
282 hosts_fetched_ = filtered_hosts;
283 return true;
284 }
285
HandleResponse(const std::string & get_hints_response_data,int net_status,int response_code)286 void HintsFetcher::HandleResponse(const std::string& get_hints_response_data,
287 int net_status,
288 int response_code) {
289 SEQUENCE_CHECKER(sequence_checker_);
290
291 std::unique_ptr<proto::GetHintsResponse> get_hints_response =
292 std::make_unique<proto::GetHintsResponse>();
293
294 UMA_HISTOGRAM_ENUMERATION(
295 "OptimizationGuide.HintsFetcher.GetHintsRequest.Status",
296 static_cast<net::HttpStatusCode>(response_code),
297 net::HTTP_VERSION_NOT_SUPPORTED);
298 // Net error codes are negative but histogram enums must be positive.
299 base::UmaHistogramSparse(
300 "OptimizationGuide.HintsFetcher.GetHintsRequest.NetErrorCode",
301 -net_status);
302
303 if (net_status == net::OK && response_code == net::HTTP_OK &&
304 get_hints_response->ParseFromString(get_hints_response_data)) {
305 UMA_HISTOGRAM_COUNTS_100(
306 "OptimizationGuide.HintsFetcher.GetHintsRequest.HintCount",
307 get_hints_response->hints_size());
308 base::TimeDelta fetch_latency =
309 base::TimeTicks::Now() - hints_fetch_start_time_;
310 UMA_HISTOGRAM_MEDIUM_TIMES(
311 "OptimizationGuide.HintsFetcher.GetHintsRequest.FetchLatency",
312 fetch_latency);
313 base::UmaHistogramMediumTimes(
314 "OptimizationGuide.HintsFetcher.GetHintsRequest.FetchLatency." +
315 GetStringNameForRequestContext(request_context_),
316 fetch_latency);
317 base::TimeDelta valid_duration =
318 features::StoredFetchedHintsFreshnessDuration();
319 if (get_hints_response->has_max_cache_duration()) {
320 valid_duration = base::TimeDelta::FromSeconds(
321 get_hints_response->max_cache_duration().seconds());
322 }
323 UpdateHostsSuccessfullyFetched(valid_duration);
324 RecordRequestStatusHistogram(request_context_,
325 HintsFetcherRequestStatus::kSuccess);
326 std::move(hints_fetched_callback_).Run(std::move(get_hints_response));
327 } else {
328 hosts_fetched_.clear();
329 RecordRequestStatusHistogram(request_context_,
330 HintsFetcherRequestStatus::kResponseError);
331 std::move(hints_fetched_callback_).Run(base::nullopt);
332 }
333 }
334
UpdateHostsSuccessfullyFetched(base::TimeDelta valid_duration)335 void HintsFetcher::UpdateHostsSuccessfullyFetched(
336 base::TimeDelta valid_duration) {
337 if (!optimization_guide::features::ShouldPersistHintsToDisk()) {
338 // Do not persist any state if we aren't persisting hints to disk.
339 return;
340 }
341
342 DictionaryPrefUpdate hosts_fetched_list(
343 pref_service_, prefs::kHintsFetcherHostsSuccessfullyFetched);
344
345 // Remove any expired hosts.
346 std::vector<std::string> entries_to_remove;
347 for (const auto& it : hosts_fetched_list->DictItems()) {
348 if (base::Time::FromDeltaSinceWindowsEpoch(base::TimeDelta::FromSecondsD(
349 it.second.GetDouble())) < time_clock_->Now()) {
350 entries_to_remove.emplace_back(it.first);
351 }
352 }
353 for (const auto& host : entries_to_remove) {
354 hosts_fetched_list->Remove(host, nullptr);
355 }
356
357 if (hosts_fetched_.empty())
358 return;
359
360 // Ensure there is enough space in the dictionary pref for the
361 // most recent set of hosts to be stored.
362 if (hosts_fetched_list->size() + hosts_fetched_.size() >
363 features::MaxHostsForRecordingSuccessfullyCovered()) {
364 entries_to_remove.clear();
365 size_t num_entries_to_remove =
366 hosts_fetched_list->size() + hosts_fetched_.size() -
367 features::MaxHostsForRecordingSuccessfullyCovered();
368 for (const auto& it : hosts_fetched_list->DictItems()) {
369 if (entries_to_remove.size() >= num_entries_to_remove)
370 break;
371 entries_to_remove.emplace_back(it.first);
372 }
373 for (const auto& host : entries_to_remove) {
374 hosts_fetched_list->Remove(host, nullptr);
375 }
376 }
377
378 // Add the covered hosts in |hosts_fetched_| to the dictionary pref.
379 base::Time host_invalid_time = time_clock_->Now() + valid_duration;
380 for (const std::string& host : hosts_fetched_) {
381 hosts_fetched_list->SetDoubleKey(
382 HashHostForDictionary(host),
383 host_invalid_time.ToDeltaSinceWindowsEpoch().InSecondsF());
384 }
385 DCHECK_LE(hosts_fetched_list->size(),
386 features::MaxHostsForRecordingSuccessfullyCovered());
387 hosts_fetched_.clear();
388 }
389
390 // Callback is only invoked if |active_url_loader_| is bound and still alive.
OnURLLoadComplete(std::unique_ptr<std::string> response_body)391 void HintsFetcher::OnURLLoadComplete(
392 std::unique_ptr<std::string> response_body) {
393 SEQUENCE_CHECKER(sequence_checker_);
394
395 int response_code = -1;
396 if (active_url_loader_->ResponseInfo() &&
397 active_url_loader_->ResponseInfo()->headers) {
398 response_code =
399 active_url_loader_->ResponseInfo()->headers->response_code();
400 }
401 auto net_error = active_url_loader_->NetError();
402 // Reset the active URL loader here since actions happening during response
403 // handling may destroy |this|.
404 active_url_loader_.reset();
405
406 HandleResponse(response_body ? *response_body : "", net_error, response_code);
407 }
408
GetSizeLimitedHostsDueForHintsRefresh(const std::vector<std::string> & hosts) const409 std::vector<std::string> HintsFetcher::GetSizeLimitedHostsDueForHintsRefresh(
410 const std::vector<std::string>& hosts) const {
411 SEQUENCE_CHECKER(sequence_checker_);
412
413 DictionaryPrefUpdate hosts_fetched(
414 pref_service_, prefs::kHintsFetcherHostsSuccessfullyFetched);
415
416 std::vector<std::string> target_hosts;
417 target_hosts.reserve(hosts.size());
418
419 for (const auto& host : hosts) {
420 // Skip over localhosts, IP addresses, and invalid hosts.
421 if (net::HostStringIsLocalhost(host))
422 continue;
423 url::CanonHostInfo host_info;
424 std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
425 if (host_info.IsIPAddress() ||
426 !net::IsCanonicalizedHostCompliant(canonicalized_host)) {
427 continue;
428 }
429
430 bool host_hints_due_for_refresh = true;
431
432 base::Optional<double> value =
433 hosts_fetched->FindDoubleKey(HashHostForDictionary(host));
434 if (value && optimization_guide::features::ShouldPersistHintsToDisk()) {
435 base::Time host_valid_time = base::Time::FromDeltaSinceWindowsEpoch(
436 base::TimeDelta::FromSecondsD(*value));
437 host_hints_due_for_refresh =
438 (host_valid_time - features::GetHintsFetchRefreshDuration() <=
439 time_clock_->Now());
440 }
441 if (host_hints_due_for_refresh)
442 target_hosts.push_back(host);
443
444 if (target_hosts.size() >=
445 features::MaxHostsForOptimizationGuideServiceHintsFetch()) {
446 break;
447 }
448 }
449 DCHECK_GE(features::MaxHostsForOptimizationGuideServiceHintsFetch(),
450 target_hosts.size());
451 return target_hosts;
452 }
453
454 } // namespace optimization_guide
455