1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/federated_learning/floc_id_provider_impl.h"
6 
7 #include <unordered_set>
8 
9 #include "chrome/browser/browser_process.h"
10 #include "chrome/browser/content_settings/cookie_settings_factory.h"
11 #include "chrome/browser/federated_learning/floc_remote_permission_service.h"
12 #include "chrome/browser/history/history_service_factory.h"
13 #include "chrome/browser/net/profile_network_context_service.h"
14 #include "chrome/browser/net/profile_network_context_service_factory.h"
15 #include "chrome/browser/sync/profile_sync_service_factory.h"
16 #include "chrome/browser/sync/user_event_service_factory.h"
17 #include "chrome/common/chrome_features.h"
18 #include "components/content_settings/core/browser/cookie_settings.h"
19 #include "components/history/core/browser/history_service.h"
20 #include "components/sync/driver/profile_sync_service.h"
21 #include "components/sync_user_events/user_event_service.h"
22 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
23 
24 namespace federated_learning {
25 
26 namespace {
27 
28 constexpr size_t kMinHistoryDomainSizeToReportFlocId = 1;
29 constexpr base::TimeDelta kFlocScheduledUpdateInterval =
30     base::TimeDelta::FromDays(1);
31 constexpr int kQueryHistoryWindowInDays = 7;
32 
33 // The placeholder sorting-lsh version when the sorting-lsh feature is disabled.
34 constexpr uint32_t kSortingLshVersionPlaceholder = 0;
35 
36 }  // namespace
37 
FlocIdProviderImpl(syncer::SyncService * sync_service,scoped_refptr<content_settings::CookieSettings> cookie_settings,FlocRemotePermissionService * floc_remote_permission_service,history::HistoryService * history_service,syncer::UserEventService * user_event_service)38 FlocIdProviderImpl::FlocIdProviderImpl(
39     syncer::SyncService* sync_service,
40     scoped_refptr<content_settings::CookieSettings> cookie_settings,
41     FlocRemotePermissionService* floc_remote_permission_service,
42     history::HistoryService* history_service,
43     syncer::UserEventService* user_event_service)
44     : sync_service_(sync_service),
45       cookie_settings_(std::move(cookie_settings)),
46       floc_remote_permission_service_(floc_remote_permission_service),
47       history_service_(history_service),
48       user_event_service_(user_event_service) {
49   history_service->AddObserver(this);
50   sync_service_->AddObserver(this);
51   g_browser_process->floc_sorting_lsh_clusters_service()->AddObserver(this);
52 
53   OnStateChanged(sync_service);
54 
55   if (g_browser_process->floc_sorting_lsh_clusters_service()
56           ->IsSortingLshClustersFileReady()) {
57     OnSortingLshClustersFileReady();
58   }
59 }
60 
61 FlocIdProviderImpl::~FlocIdProviderImpl() = default;
62 
GetInterestCohortForJsApi(const url::Origin & requesting_origin,const net::SiteForCookies & site_for_cookies) const63 std::string FlocIdProviderImpl::GetInterestCohortForJsApi(
64     const url::Origin& requesting_origin,
65     const net::SiteForCookies& site_for_cookies) const {
66   // These checks could be / become unnecessary, as we are planning on
67   // invalidating the |floc_id_| whenever a setting is disabled. Check them
68   // anyway to be safe.
69   if (!IsSyncHistoryEnabled() || !AreThirdPartyCookiesAllowed())
70     return std::string();
71 
72   // Only allow floc access if cookie access is allowed.
73   if (!cookie_settings_->IsCookieAccessAllowed(
74           requesting_origin.GetURL(), site_for_cookies.RepresentativeUrl(),
75           base::nullopt)) {
76     return std::string();
77   }
78 
79   if (!floc_id_.IsValid())
80     return std::string();
81 
82   return floc_id_.ToStringForJsApi();
83 }
84 
OnComputeFlocCompleted(ComputeFlocTrigger trigger,ComputeFlocResult result)85 void FlocIdProviderImpl::OnComputeFlocCompleted(ComputeFlocTrigger trigger,
86                                                 ComputeFlocResult result) {
87   DCHECK(floc_computation_in_progress_);
88   floc_computation_in_progress_ = false;
89 
90   // History-delete event came in when this computation was in progress. Ignore
91   // this computation completely and recompute.
92   if (need_recompute_) {
93     need_recompute_ = false;
94     ComputeFloc(trigger);
95     return;
96   }
97 
98   LogFlocComputedEvent(trigger, result);
99   floc_id_ = result.floc_id;
100 
101   // Abandon the scheduled task if any, and schedule a new compute-floc task
102   // that is |kFlocScheduledUpdateInterval| from now.
103   compute_floc_timer_.Start(
104       FROM_HERE, kFlocScheduledUpdateInterval,
105       base::BindOnce(&FlocIdProviderImpl::OnComputeFlocScheduledUpdate,
106                      weak_ptr_factory_.GetWeakPtr()));
107 }
108 
LogFlocComputedEvent(ComputeFlocTrigger trigger,const ComputeFlocResult & result)109 void FlocIdProviderImpl::LogFlocComputedEvent(ComputeFlocTrigger trigger,
110                                               const ComputeFlocResult& result) {
111   if (!base::FeatureList::IsEnabled(features::kFlocIdComputedEventLogging))
112     return;
113 
114   // Don't log if it's the 1st computation and sim_hash is not computed. This
115   // is likely due to sync just gets enabled but some floc permission settings
116   // are disabled. We don't want to mess up with the initial user event
117   // messagings (and some sync integration tests would fail otherwise).
118   if (trigger == ComputeFlocTrigger::kBrowserStart && !result.sim_hash_computed)
119     return;
120 
121   auto specifics = std::make_unique<sync_pb::UserEventSpecifics>();
122   specifics->set_event_time_usec(
123       base::Time::Now().ToDeltaSinceWindowsEpoch().InMicroseconds());
124 
125   sync_pb::UserEventSpecifics_FlocIdComputed* const floc_id_computed_event =
126       specifics->mutable_floc_id_computed_event();
127 
128   sync_pb::UserEventSpecifics_FlocIdComputed_EventTrigger event_trigger;
129   switch (trigger) {
130     case ComputeFlocTrigger::kBrowserStart:
131       event_trigger =
132           sync_pb::UserEventSpecifics_FlocIdComputed_EventTrigger_NEW;
133       break;
134     case ComputeFlocTrigger::kScheduledUpdate:
135       event_trigger =
136           sync_pb::UserEventSpecifics_FlocIdComputed_EventTrigger_REFRESHED;
137       break;
138     case ComputeFlocTrigger::kHistoryDelete:
139       event_trigger = sync_pb::
140           UserEventSpecifics_FlocIdComputed_EventTrigger_HISTORY_DELETE;
141       break;
142   }
143 
144   floc_id_computed_event->set_event_trigger(event_trigger);
145 
146   if (result.sim_hash_computed)
147     floc_id_computed_event->set_floc_id(result.sim_hash);
148 
149   user_event_service_->RecordUserEvent(std::move(specifics));
150 }
151 
Shutdown()152 void FlocIdProviderImpl::Shutdown() {
153   if (sync_service_)
154     sync_service_->RemoveObserver(this);
155   sync_service_ = nullptr;
156 
157   if (history_service_)
158     history_service_->RemoveObserver(this);
159   history_service_ = nullptr;
160 
161   g_browser_process->floc_sorting_lsh_clusters_service()->RemoveObserver(this);
162 }
163 
OnURLsDeleted(history::HistoryService * history_service,const history::DeletionInfo & deletion_info)164 void FlocIdProviderImpl::OnURLsDeleted(
165     history::HistoryService* history_service,
166     const history::DeletionInfo& deletion_info) {
167   // Set the |need_recompute_| flag so that we will recompute the floc
168   // immediately after the in-progress one finishes, so as to avoid potential
169   // data races.
170   if (floc_computation_in_progress_) {
171     DCHECK(first_floc_computation_triggered_);
172     need_recompute_ = true;
173     return;
174   }
175 
176   if (!floc_id_.IsValid())
177     return;
178 
179   // Only invalidate the floc if it's delete-all or if the time range overlaps
180   // with the time range of the history used to compute the current floc.
181   if (!deletion_info.IsAllHistory() && !deletion_info.time_range().IsValid()) {
182     return;
183   }
184 
185   if (deletion_info.time_range().begin() > floc_id_.history_end_time() ||
186       deletion_info.time_range().end() < floc_id_.history_begin_time()) {
187     return;
188   }
189 
190   // We log the invalidation event although it's technically not a recompute.
191   // It'd give us a better idea how often the floc is invalidated due to
192   // history-delete.
193   LogFlocComputedEvent(ComputeFlocTrigger::kHistoryDelete, ComputeFlocResult());
194   floc_id_ = FlocId();
195 }
196 
OnSortingLshClustersFileReady()197 void FlocIdProviderImpl::OnSortingLshClustersFileReady() {
198   if (first_sorting_lsh_file_ready_seen_)
199     return;
200 
201   first_sorting_lsh_file_ready_seen_ = true;
202 
203   MaybeTriggerFirstFlocComputation();
204 }
205 
OnStateChanged(syncer::SyncService * sync_service)206 void FlocIdProviderImpl::OnStateChanged(syncer::SyncService* sync_service) {
207   if (first_sync_history_enabled_seen_)
208     return;
209 
210   if (!IsSyncHistoryEnabled())
211     return;
212 
213   first_sync_history_enabled_seen_ = true;
214 
215   MaybeTriggerFirstFlocComputation();
216 }
217 
MaybeTriggerFirstFlocComputation()218 void FlocIdProviderImpl::MaybeTriggerFirstFlocComputation() {
219   if (first_floc_computation_triggered_)
220     return;
221 
222   bool sorting_lsh_ready_or_not_required =
223       !base::FeatureList::IsEnabled(
224           features::kFlocIdSortingLshBasedComputation) ||
225       first_sorting_lsh_file_ready_seen_;
226 
227   if (!first_sync_history_enabled_seen_ || !sorting_lsh_ready_or_not_required)
228     return;
229 
230   ComputeFloc(ComputeFlocTrigger::kBrowserStart);
231 }
232 
OnComputeFlocScheduledUpdate()233 void FlocIdProviderImpl::OnComputeFlocScheduledUpdate() {
234   DCHECK(!floc_computation_in_progress_);
235   ComputeFloc(ComputeFlocTrigger::kScheduledUpdate);
236 }
237 
ComputeFloc(ComputeFlocTrigger trigger)238 void FlocIdProviderImpl::ComputeFloc(ComputeFlocTrigger trigger) {
239   DCHECK(trigger == ComputeFlocTrigger::kBrowserStart ||
240          (trigger == ComputeFlocTrigger::kScheduledUpdate &&
241           first_floc_computation_triggered_));
242 
243   DCHECK(!floc_computation_in_progress_);
244 
245   floc_computation_in_progress_ = true;
246   first_floc_computation_triggered_ = true;
247 
248   auto compute_floc_completed_callback =
249       base::BindOnce(&FlocIdProviderImpl::OnComputeFlocCompleted,
250                      weak_ptr_factory_.GetWeakPtr(), trigger);
251 
252   CheckCanComputeFloc(
253       base::BindOnce(&FlocIdProviderImpl::OnCheckCanComputeFlocCompleted,
254                      weak_ptr_factory_.GetWeakPtr(),
255                      std::move(compute_floc_completed_callback)));
256 }
257 
CheckCanComputeFloc(CanComputeFlocCallback callback)258 void FlocIdProviderImpl::CheckCanComputeFloc(CanComputeFlocCallback callback) {
259   if (!IsSyncHistoryEnabled() || !AreThirdPartyCookiesAllowed()) {
260     std::move(callback).Run(false);
261     return;
262   }
263 
264   IsSwaaNacAccountEnabled(std::move(callback));
265 }
266 
OnCheckCanComputeFlocCompleted(ComputeFlocCompletedCallback callback,bool can_compute_floc)267 void FlocIdProviderImpl::OnCheckCanComputeFlocCompleted(
268     ComputeFlocCompletedCallback callback,
269     bool can_compute_floc) {
270   if (!can_compute_floc) {
271     std::move(callback).Run(ComputeFlocResult());
272     return;
273   }
274 
275   GetRecentlyVisitedURLs(
276       base::BindOnce(&FlocIdProviderImpl::OnGetRecentlyVisitedURLsCompleted,
277                      weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
278 }
279 
IsSyncHistoryEnabled() const280 bool FlocIdProviderImpl::IsSyncHistoryEnabled() const {
281   syncer::SyncUserSettings* setting = sync_service_->GetUserSettings();
282   DCHECK(setting);
283 
284   return sync_service_->IsSyncFeatureActive() &&
285          sync_service_->GetActiveDataTypes().Has(
286              syncer::HISTORY_DELETE_DIRECTIVES);
287 }
288 
AreThirdPartyCookiesAllowed() const289 bool FlocIdProviderImpl::AreThirdPartyCookiesAllowed() const {
290   return !cookie_settings_->ShouldBlockThirdPartyCookies();
291 }
292 
IsSwaaNacAccountEnabled(CanComputeFlocCallback callback)293 void FlocIdProviderImpl::IsSwaaNacAccountEnabled(
294     CanComputeFlocCallback callback) {
295   net::PartialNetworkTrafficAnnotationTag partial_traffic_annotation =
296       net::DefinePartialNetworkTrafficAnnotation(
297           "floc_id_provider_impl", "floc_remote_permission_service",
298           R"(
299         semantics {
300           description:
301             "Queries google to find out if user has enabled 'web and app "
302             "activity' and 'ad personalization', and if the account type is "
303             "NOT a child account. Those permission bits will be checked before "
304             "computing the FLoC (Federated Learning of Cohorts) ID - an "
305             "anonymous similarity hash value of users navigation history. "
306             "This ensures that the FLoC ID is derived from data that Google "
307             "already owns and the user has explicitly granted permission on "
308             "what they will be used for."
309           trigger:
310             "This request is sent at each time a FLoC (Federated Learning of "
311             "Cohorts) ID is to be computed. A FLoC ID is an anonymous "
312             "similarity hash value of users navigation history. It'll be "
313             "computed at the start of each browser profile session and will be "
314             "refreshed every 24 hours during that session."
315           data:
316             "Google credentials if user is signed in."
317         }
318         policy {
319             setting:
320               "This feature cannot be disabled in settings, but disabling sync "
321               "or third-party cookies will prevent it."
322         })");
323 
324   floc_remote_permission_service_->QueryFlocPermission(
325       std::move(callback), partial_traffic_annotation);
326 }
327 
328 void FlocIdProviderImpl::GetRecentlyVisitedURLs(
329     GetRecentlyVisitedURLsCallback callback) {
330   history::QueryOptions options;
331   options.SetRecentDayRange(kQueryHistoryWindowInDays);
332   options.duplicate_policy = history::QueryOptions::KEEP_ALL_DUPLICATES;
333 
334   history_service_->QueryHistory(base::string16(), options, std::move(callback),
335                                  &history_task_tracker_);
336 }
337 
338 void FlocIdProviderImpl::OnGetRecentlyVisitedURLsCompleted(
339     ComputeFlocCompletedCallback callback,
340     history::QueryResults results) {
341   std::unordered_set<std::string> domains;
342 
343   base::Time history_begin_time = base::Time::Max();
344   base::Time history_end_time = base::Time::Min();
345 
346   for (const history::URLResult& url_result : results) {
347     if (!url_result.publicly_routable())
348       continue;
349 
350     if (url_result.visit_time() < history_begin_time)
351       history_begin_time = url_result.visit_time();
352 
353     if (url_result.visit_time() > history_end_time)
354       history_end_time = url_result.visit_time();
355 
356     domains.insert(net::registry_controlled_domains::GetDomainAndRegistry(
357         url_result.url(),
358         net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES));
359   }
360 
361   if (domains.size() < kMinHistoryDomainSizeToReportFlocId) {
362     std::move(callback).Run(ComputeFlocResult());
363     return;
364   }
365 
366   ApplySortingLshPostProcessing(std::move(callback),
367                                 FlocId::SimHashHistory(domains),
368                                 history_begin_time, history_end_time);
369 }
370 
371 void FlocIdProviderImpl::ApplySortingLshPostProcessing(
372     ComputeFlocCompletedCallback callback,
373     uint64_t sim_hash,
374     base::Time history_begin_time,
375     base::Time history_end_time) {
376   if (!base::FeatureList::IsEnabled(
377           features::kFlocIdSortingLshBasedComputation)) {
378     std::move(callback).Run(ComputeFlocResult(
379         sim_hash, FlocId(sim_hash, history_begin_time, history_end_time,
380                          kSortingLshVersionPlaceholder)));
381     return;
382   }
383 
384   g_browser_process->floc_sorting_lsh_clusters_service()->ApplySortingLsh(
385       sim_hash,
386       base::BindOnce(&FlocIdProviderImpl::DidApplySortingLshPostProcessing,
387                      weak_ptr_factory_.GetWeakPtr(), std::move(callback),
388                      sim_hash, history_begin_time, history_end_time));
389 }
390 
391 void FlocIdProviderImpl::DidApplySortingLshPostProcessing(
392     ComputeFlocCompletedCallback callback,
393     uint64_t sim_hash,
394     base::Time history_begin_time,
395     base::Time history_end_time,
396     base::Optional<uint64_t> final_hash,
397     base::Version version) {
398   if (!final_hash) {
399     std::move(callback).Run(ComputeFlocResult(sim_hash, FlocId()));
400     return;
401   }
402 
403   std::move(callback).Run(ComputeFlocResult(
404       sim_hash, FlocId(final_hash.value(), history_begin_time, history_end_time,
405                        version.components().front())));
406 }
407 
408 }  // namespace federated_learning
409