1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/federated_learning/floc_id_provider_impl.h"
6
7 #include <unordered_set>
8
9 #include "chrome/browser/browser_process.h"
10 #include "chrome/browser/content_settings/cookie_settings_factory.h"
11 #include "chrome/browser/federated_learning/floc_remote_permission_service.h"
12 #include "chrome/browser/history/history_service_factory.h"
13 #include "chrome/browser/net/profile_network_context_service.h"
14 #include "chrome/browser/net/profile_network_context_service_factory.h"
15 #include "chrome/browser/sync/profile_sync_service_factory.h"
16 #include "chrome/browser/sync/user_event_service_factory.h"
17 #include "chrome/common/chrome_features.h"
18 #include "components/content_settings/core/browser/cookie_settings.h"
19 #include "components/history/core/browser/history_service.h"
20 #include "components/sync/driver/profile_sync_service.h"
21 #include "components/sync_user_events/user_event_service.h"
22 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
23
24 namespace federated_learning {
25
26 namespace {
27
28 constexpr size_t kMinHistoryDomainSizeToReportFlocId = 1;
29 constexpr base::TimeDelta kFlocScheduledUpdateInterval =
30 base::TimeDelta::FromDays(1);
31 constexpr int kQueryHistoryWindowInDays = 7;
32
33 // The placeholder sorting-lsh version when the sorting-lsh feature is disabled.
34 constexpr uint32_t kSortingLshVersionPlaceholder = 0;
35
36 } // namespace
37
FlocIdProviderImpl(syncer::SyncService * sync_service,scoped_refptr<content_settings::CookieSettings> cookie_settings,FlocRemotePermissionService * floc_remote_permission_service,history::HistoryService * history_service,syncer::UserEventService * user_event_service)38 FlocIdProviderImpl::FlocIdProviderImpl(
39 syncer::SyncService* sync_service,
40 scoped_refptr<content_settings::CookieSettings> cookie_settings,
41 FlocRemotePermissionService* floc_remote_permission_service,
42 history::HistoryService* history_service,
43 syncer::UserEventService* user_event_service)
44 : sync_service_(sync_service),
45 cookie_settings_(std::move(cookie_settings)),
46 floc_remote_permission_service_(floc_remote_permission_service),
47 history_service_(history_service),
48 user_event_service_(user_event_service) {
49 history_service->AddObserver(this);
50 sync_service_->AddObserver(this);
51 g_browser_process->floc_sorting_lsh_clusters_service()->AddObserver(this);
52
53 OnStateChanged(sync_service);
54
55 if (g_browser_process->floc_sorting_lsh_clusters_service()
56 ->IsSortingLshClustersFileReady()) {
57 OnSortingLshClustersFileReady();
58 }
59 }
60
61 FlocIdProviderImpl::~FlocIdProviderImpl() = default;
62
GetInterestCohortForJsApi(const url::Origin & requesting_origin,const net::SiteForCookies & site_for_cookies) const63 std::string FlocIdProviderImpl::GetInterestCohortForJsApi(
64 const url::Origin& requesting_origin,
65 const net::SiteForCookies& site_for_cookies) const {
66 // These checks could be / become unnecessary, as we are planning on
67 // invalidating the |floc_id_| whenever a setting is disabled. Check them
68 // anyway to be safe.
69 if (!IsSyncHistoryEnabled() || !AreThirdPartyCookiesAllowed())
70 return std::string();
71
72 // Only allow floc access if cookie access is allowed.
73 if (!cookie_settings_->IsCookieAccessAllowed(
74 requesting_origin.GetURL(), site_for_cookies.RepresentativeUrl(),
75 base::nullopt)) {
76 return std::string();
77 }
78
79 if (!floc_id_.IsValid())
80 return std::string();
81
82 return floc_id_.ToStringForJsApi();
83 }
84
OnComputeFlocCompleted(ComputeFlocTrigger trigger,ComputeFlocResult result)85 void FlocIdProviderImpl::OnComputeFlocCompleted(ComputeFlocTrigger trigger,
86 ComputeFlocResult result) {
87 DCHECK(floc_computation_in_progress_);
88 floc_computation_in_progress_ = false;
89
90 // History-delete event came in when this computation was in progress. Ignore
91 // this computation completely and recompute.
92 if (need_recompute_) {
93 need_recompute_ = false;
94 ComputeFloc(trigger);
95 return;
96 }
97
98 LogFlocComputedEvent(trigger, result);
99 floc_id_ = result.floc_id;
100
101 // Abandon the scheduled task if any, and schedule a new compute-floc task
102 // that is |kFlocScheduledUpdateInterval| from now.
103 compute_floc_timer_.Start(
104 FROM_HERE, kFlocScheduledUpdateInterval,
105 base::BindOnce(&FlocIdProviderImpl::OnComputeFlocScheduledUpdate,
106 weak_ptr_factory_.GetWeakPtr()));
107 }
108
LogFlocComputedEvent(ComputeFlocTrigger trigger,const ComputeFlocResult & result)109 void FlocIdProviderImpl::LogFlocComputedEvent(ComputeFlocTrigger trigger,
110 const ComputeFlocResult& result) {
111 if (!base::FeatureList::IsEnabled(features::kFlocIdComputedEventLogging))
112 return;
113
114 // Don't log if it's the 1st computation and sim_hash is not computed. This
115 // is likely due to sync just gets enabled but some floc permission settings
116 // are disabled. We don't want to mess up with the initial user event
117 // messagings (and some sync integration tests would fail otherwise).
118 if (trigger == ComputeFlocTrigger::kBrowserStart && !result.sim_hash_computed)
119 return;
120
121 auto specifics = std::make_unique<sync_pb::UserEventSpecifics>();
122 specifics->set_event_time_usec(
123 base::Time::Now().ToDeltaSinceWindowsEpoch().InMicroseconds());
124
125 sync_pb::UserEventSpecifics_FlocIdComputed* const floc_id_computed_event =
126 specifics->mutable_floc_id_computed_event();
127
128 sync_pb::UserEventSpecifics_FlocIdComputed_EventTrigger event_trigger;
129 switch (trigger) {
130 case ComputeFlocTrigger::kBrowserStart:
131 event_trigger =
132 sync_pb::UserEventSpecifics_FlocIdComputed_EventTrigger_NEW;
133 break;
134 case ComputeFlocTrigger::kScheduledUpdate:
135 event_trigger =
136 sync_pb::UserEventSpecifics_FlocIdComputed_EventTrigger_REFRESHED;
137 break;
138 case ComputeFlocTrigger::kHistoryDelete:
139 event_trigger = sync_pb::
140 UserEventSpecifics_FlocIdComputed_EventTrigger_HISTORY_DELETE;
141 break;
142 }
143
144 floc_id_computed_event->set_event_trigger(event_trigger);
145
146 if (result.sim_hash_computed)
147 floc_id_computed_event->set_floc_id(result.sim_hash);
148
149 user_event_service_->RecordUserEvent(std::move(specifics));
150 }
151
Shutdown()152 void FlocIdProviderImpl::Shutdown() {
153 if (sync_service_)
154 sync_service_->RemoveObserver(this);
155 sync_service_ = nullptr;
156
157 if (history_service_)
158 history_service_->RemoveObserver(this);
159 history_service_ = nullptr;
160
161 g_browser_process->floc_sorting_lsh_clusters_service()->RemoveObserver(this);
162 }
163
OnURLsDeleted(history::HistoryService * history_service,const history::DeletionInfo & deletion_info)164 void FlocIdProviderImpl::OnURLsDeleted(
165 history::HistoryService* history_service,
166 const history::DeletionInfo& deletion_info) {
167 // Set the |need_recompute_| flag so that we will recompute the floc
168 // immediately after the in-progress one finishes, so as to avoid potential
169 // data races.
170 if (floc_computation_in_progress_) {
171 DCHECK(first_floc_computation_triggered_);
172 need_recompute_ = true;
173 return;
174 }
175
176 if (!floc_id_.IsValid())
177 return;
178
179 // Only invalidate the floc if it's delete-all or if the time range overlaps
180 // with the time range of the history used to compute the current floc.
181 if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid()) {
182 return;
183 }
184
185 if (deletion_info.time_range().begin() > floc_id_.history_end_time() ||
186 deletion_info.time_range().end() < floc_id_.history_begin_time()) {
187 return;
188 }
189
190 // We log the invalidation event although it's technically not a recompute.
191 // It'd give us a better idea how often the floc is invalidated due to
192 // history-delete.
193 LogFlocComputedEvent(ComputeFlocTrigger::kHistoryDelete, ComputeFlocResult());
194 floc_id_ = FlocId();
195 }
196
OnSortingLshClustersFileReady()197 void FlocIdProviderImpl::OnSortingLshClustersFileReady() {
198 if (first_sorting_lsh_file_ready_seen_)
199 return;
200
201 first_sorting_lsh_file_ready_seen_ = true;
202
203 MaybeTriggerFirstFlocComputation();
204 }
205
OnStateChanged(syncer::SyncService * sync_service)206 void FlocIdProviderImpl::OnStateChanged(syncer::SyncService* sync_service) {
207 if (first_sync_history_enabled_seen_)
208 return;
209
210 if (!IsSyncHistoryEnabled())
211 return;
212
213 first_sync_history_enabled_seen_ = true;
214
215 MaybeTriggerFirstFlocComputation();
216 }
217
MaybeTriggerFirstFlocComputation()218 void FlocIdProviderImpl::MaybeTriggerFirstFlocComputation() {
219 if (first_floc_computation_triggered_)
220 return;
221
222 bool sorting_lsh_ready_or_not_required =
223 !base::FeatureList::IsEnabled(
224 features::kFlocIdSortingLshBasedComputation) ||
225 first_sorting_lsh_file_ready_seen_;
226
227 if (!first_sync_history_enabled_seen_ || !sorting_lsh_ready_or_not_required)
228 return;
229
230 ComputeFloc(ComputeFlocTrigger::kBrowserStart);
231 }
232
OnComputeFlocScheduledUpdate()233 void FlocIdProviderImpl::OnComputeFlocScheduledUpdate() {
234 DCHECK(!floc_computation_in_progress_);
235 ComputeFloc(ComputeFlocTrigger::kScheduledUpdate);
236 }
237
ComputeFloc(ComputeFlocTrigger trigger)238 void FlocIdProviderImpl::ComputeFloc(ComputeFlocTrigger trigger) {
239 DCHECK(trigger == ComputeFlocTrigger::kBrowserStart ||
240 (trigger == ComputeFlocTrigger::kScheduledUpdate &&
241 first_floc_computation_triggered_));
242
243 DCHECK(!floc_computation_in_progress_);
244
245 floc_computation_in_progress_ = true;
246 first_floc_computation_triggered_ = true;
247
248 auto compute_floc_completed_callback =
249 base::BindOnce(&FlocIdProviderImpl::OnComputeFlocCompleted,
250 weak_ptr_factory_.GetWeakPtr(), trigger);
251
252 CheckCanComputeFloc(
253 base::BindOnce(&FlocIdProviderImpl::OnCheckCanComputeFlocCompleted,
254 weak_ptr_factory_.GetWeakPtr(),
255 std::move(compute_floc_completed_callback)));
256 }
257
CheckCanComputeFloc(CanComputeFlocCallback callback)258 void FlocIdProviderImpl::CheckCanComputeFloc(CanComputeFlocCallback callback) {
259 if (!IsSyncHistoryEnabled() || !AreThirdPartyCookiesAllowed()) {
260 std::move(callback).Run(false);
261 return;
262 }
263
264 IsSwaaNacAccountEnabled(std::move(callback));
265 }
266
OnCheckCanComputeFlocCompleted(ComputeFlocCompletedCallback callback,bool can_compute_floc)267 void FlocIdProviderImpl::OnCheckCanComputeFlocCompleted(
268 ComputeFlocCompletedCallback callback,
269 bool can_compute_floc) {
270 if (!can_compute_floc) {
271 std::move(callback).Run(ComputeFlocResult());
272 return;
273 }
274
275 GetRecentlyVisitedURLs(
276 base::BindOnce(&FlocIdProviderImpl::OnGetRecentlyVisitedURLsCompleted,
277 weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
278 }
279
IsSyncHistoryEnabled() const280 bool FlocIdProviderImpl::IsSyncHistoryEnabled() const {
281 syncer::SyncUserSettings* setting = sync_service_->GetUserSettings();
282 DCHECK(setting);
283
284 return sync_service_->IsSyncFeatureActive() &&
285 sync_service_->GetActiveDataTypes().Has(
286 syncer::HISTORY_DELETE_DIRECTIVES);
287 }
288
AreThirdPartyCookiesAllowed() const289 bool FlocIdProviderImpl::AreThirdPartyCookiesAllowed() const {
290 return !cookie_settings_->ShouldBlockThirdPartyCookies();
291 }
292
IsSwaaNacAccountEnabled(CanComputeFlocCallback callback)293 void FlocIdProviderImpl::IsSwaaNacAccountEnabled(
294 CanComputeFlocCallback callback) {
295 net::PartialNetworkTrafficAnnotationTag partial_traffic_annotation =
296 net::DefinePartialNetworkTrafficAnnotation(
297 "floc_id_provider_impl", "floc_remote_permission_service",
298 R"(
299 semantics {
300 description:
301 "Queries google to find out if user has enabled 'web and app "
302 "activity' and 'ad personalization', and if the account type is "
303 "NOT a child account. Those permission bits will be checked before "
304 "computing the FLoC (Federated Learning of Cohorts) ID - an "
305 "anonymous similarity hash value of user’s navigation history. "
306 "This ensures that the FLoC ID is derived from data that Google "
307 "already owns and the user has explicitly granted permission on "
308 "what they will be used for."
309 trigger:
310 "This request is sent at each time a FLoC (Federated Learning of "
311 "Cohorts) ID is to be computed. A FLoC ID is an anonymous "
312 "similarity hash value of user’s navigation history. It'll be "
313 "computed at the start of each browser profile session and will be "
314 "refreshed every 24 hours during that session."
315 data:
316 "Google credentials if user is signed in."
317 }
318 policy {
319 setting:
320 "This feature cannot be disabled in settings, but disabling sync "
321 "or third-party cookies will prevent it."
322 })");
323
324 floc_remote_permission_service_->QueryFlocPermission(
325 std::move(callback), partial_traffic_annotation);
326 }
327
328 void FlocIdProviderImpl::GetRecentlyVisitedURLs(
329 GetRecentlyVisitedURLsCallback callback) {
330 history::QueryOptions options;
331 options.SetRecentDayRange(kQueryHistoryWindowInDays);
332 options.duplicate_policy = history::QueryOptions::KEEP_ALL_DUPLICATES;
333
334 history_service_->QueryHistory(base::string16(), options, std::move(callback),
335 &history_task_tracker_);
336 }
337
338 void FlocIdProviderImpl::OnGetRecentlyVisitedURLsCompleted(
339 ComputeFlocCompletedCallback callback,
340 history::QueryResults results) {
341 std::unordered_set<std::string> domains;
342
343 base::Time history_begin_time = base::Time::Max();
344 base::Time history_end_time = base::Time::Min();
345
346 for (const history::URLResult& url_result : results) {
347 if (!url_result.publicly_routable())
348 continue;
349
350 if (url_result.visit_time() < history_begin_time)
351 history_begin_time = url_result.visit_time();
352
353 if (url_result.visit_time() > history_end_time)
354 history_end_time = url_result.visit_time();
355
356 domains.insert(net::registry_controlled_domains::GetDomainAndRegistry(
357 url_result.url(),
358 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES));
359 }
360
361 if (domains.size() < kMinHistoryDomainSizeToReportFlocId) {
362 std::move(callback).Run(ComputeFlocResult());
363 return;
364 }
365
366 ApplySortingLshPostProcessing(std::move(callback),
367 FlocId::SimHashHistory(domains),
368 history_begin_time, history_end_time);
369 }
370
371 void FlocIdProviderImpl::ApplySortingLshPostProcessing(
372 ComputeFlocCompletedCallback callback,
373 uint64_t sim_hash,
374 base::Time history_begin_time,
375 base::Time history_end_time) {
376 if (!base::FeatureList::IsEnabled(
377 features::kFlocIdSortingLshBasedComputation)) {
378 std::move(callback).Run(ComputeFlocResult(
379 sim_hash, FlocId(sim_hash, history_begin_time, history_end_time,
380 kSortingLshVersionPlaceholder)));
381 return;
382 }
383
384 g_browser_process->floc_sorting_lsh_clusters_service()->ApplySortingLsh(
385 sim_hash,
386 base::BindOnce(&FlocIdProviderImpl::DidApplySortingLshPostProcessing,
387 weak_ptr_factory_.GetWeakPtr(), std::move(callback),
388 sim_hash, history_begin_time, history_end_time));
389 }
390
391 void FlocIdProviderImpl::DidApplySortingLshPostProcessing(
392 ComputeFlocCompletedCallback callback,
393 uint64_t sim_hash,
394 base::Time history_begin_time,
395 base::Time history_end_time,
396 base::Optional<uint64_t> final_hash,
397 base::Version version) {
398 if (!final_hash) {
399 std::move(callback).Run(ComputeFlocResult(sim_hash, FlocId()));
400 return;
401 }
402
403 std::move(callback).Run(ComputeFlocResult(
404 sim_hash, FlocId(final_hash.value(), history_begin_time, history_end_time,
405 version.components().front())));
406 }
407
408 } // namespace federated_learning
409