1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_FEDERATED_LEARNING_FLOC_ID_PROVIDER_IMPL_H_
6 #define CHROME_BROWSER_FEDERATED_LEARNING_FLOC_ID_PROVIDER_IMPL_H_
7 
8 #include "base/gtest_prod_util.h"
9 #include "base/task/cancelable_task_tracker.h"
10 #include "base/timer/timer.h"
11 #include "chrome/browser/federated_learning/floc_id_provider.h"
12 #include "components/federated_learning/floc_sorting_lsh_clusters_service.h"
13 #include "components/history/core/browser/history_service.h"
14 #include "components/history/core/browser/history_service_observer.h"
15 #include "components/sync/driver/sync_service_observer.h"
16 
17 namespace content_settings {
18 class CookieSettings;
19 }
20 
21 namespace syncer {
22 class UserEventService;
23 }
24 
25 namespace federated_learning {
26 
27 class FlocRemotePermissionService;
28 
29 // A service that regularly computes the floc id and logs it in a user event. A
30 // computed floc can be in either a valid or invalid state, based on whether all
31 // the prerequisites are met:
32 // 1) Sync & sync-history are enabled.
33 // 2) 3rd party cookies are NOT blocked.
34 // 3) Supplemental Web and App Activity is enabled.
35 // 4) Supplemental Ad Personalization is enabled.
36 // 5) The account type is NOT a child account.
37 //
38 // When all the prerequisites are met, the floc will be computed by sim-hashing
39 // navigation URL domains in the last 7 days; otherwise, an invalid floc will be
40 // given. The floc can be further translated or blocked with the SortingLSH
41 // post-processing.
42 //
43 // The floc will be first computed after sync & sync-history are enabled. After
44 // each computation, another computation will be scheduled 24 hours later. In
45 // the event of history deletion, the floc will be invalidated immediately
46 // if the time range of the deletion overlaps with the time range used to
47 // compute the existing floc.
48 class FlocIdProviderImpl : public FlocIdProvider,
49                            public FlocSortingLshClustersService::Observer,
50                            public history::HistoryServiceObserver,
51                            public syncer::SyncServiceObserver {
52  public:
53   enum class ComputeFlocTrigger {
54     kBrowserStart,
55     kScheduledUpdate,
56     kHistoryDelete,
57   };
58 
59   struct ComputeFlocResult {
60     ComputeFlocResult() = default;
61 
ComputeFlocResultComputeFlocResult62     ComputeFlocResult(uint64_t sim_hash, const FlocId& floc_id)
63         : sim_hash_computed(true), sim_hash(sim_hash), floc_id(floc_id) {}
64 
65     bool sim_hash_computed = false;
66 
67     // Sim-hash of the browsing history. This is the baseline value where the
68     // |floc_id| field should be derived from. We'll log this field for the
69     // server to calculate the sorting-lsh cutting points.
70     uint64_t sim_hash = 0;
71 
72     // The floc to be exposed to JS API. It's derived from applying the
73     // sorting-lsh & blocklist post-processing on the |sim_hash|.
74     FlocId floc_id;
75   };
76 
77   using CanComputeFlocCallback = base::OnceCallback<void(bool)>;
78   using ComputeFlocCompletedCallback =
79       base::OnceCallback<void(ComputeFlocResult)>;
80   using GetRecentlyVisitedURLsCallback =
81       history::HistoryService::QueryHistoryCallback;
82 
83   FlocIdProviderImpl(
84       syncer::SyncService* sync_service,
85       scoped_refptr<content_settings::CookieSettings> cookie_settings,
86       FlocRemotePermissionService* floc_remote_permission_service,
87       history::HistoryService* history_service,
88       syncer::UserEventService* user_event_service);
89   ~FlocIdProviderImpl() override;
90   FlocIdProviderImpl(const FlocIdProviderImpl&) = delete;
91   FlocIdProviderImpl& operator=(const FlocIdProviderImpl&) = delete;
92 
93   std::string GetInterestCohortForJsApi(
94       const url::Origin& requesting_origin,
95       const net::SiteForCookies& site_for_cookies) const override;
96 
97  protected:
98   // protected virtual for testing.
99   virtual void OnComputeFlocCompleted(ComputeFlocTrigger trigger,
100                                       ComputeFlocResult result);
101   virtual void LogFlocComputedEvent(ComputeFlocTrigger trigger,
102                                     const ComputeFlocResult& result);
103 
104  private:
105   friend class FlocIdProviderUnitTest;
106   friend class FlocIdProviderBrowserTest;
107 
108   // KeyedService:
109   void Shutdown() override;
110 
111   // history::HistoryServiceObserver
112   //
113   // On history deletion, recompute the floc if the current floc is speculated
114   // to be derived from the deleted history.
115   void OnURLsDeleted(history::HistoryService* history_service,
116                      const history::DeletionInfo& deletion_info) override;
117 
118   // FlocSortingLshClustersService::Observer
119   void OnSortingLshClustersFileReady() override;
120 
121   // syncer::SyncServiceObserver:
122   void OnStateChanged(syncer::SyncService* sync_service) override;
123 
124   void MaybeTriggerFirstFlocComputation();
125 
126   void OnComputeFlocScheduledUpdate();
127 
128   void ComputeFloc(ComputeFlocTrigger trigger);
129 
130   void CheckCanComputeFloc(CanComputeFlocCallback callback);
131   void OnCheckCanComputeFlocCompleted(ComputeFlocCompletedCallback callback,
132                                       bool can_compute_floc);
133 
134   bool IsSyncHistoryEnabled() const;
135   bool AreThirdPartyCookiesAllowed() const;
136 
137   void IsSwaaNacAccountEnabled(CanComputeFlocCallback callback);
138 
139   void GetRecentlyVisitedURLs(GetRecentlyVisitedURLsCallback callback);
140   void OnGetRecentlyVisitedURLsCompleted(ComputeFlocCompletedCallback callback,
141                                          history::QueryResults results);
142 
143   // Apply the sorting-lsh post processing to compute the final versioned floc.
144   // The final floc may be invalid if the file is corrupted or the floc end up
145   // being blocked.
146   void ApplySortingLshPostProcessing(ComputeFlocCompletedCallback callback,
147                                      uint64_t sim_hash,
148                                      base::Time history_begin_time,
149                                      base::Time history_end_time);
150   void DidApplySortingLshPostProcessing(ComputeFlocCompletedCallback callback,
151                                         uint64_t sim_hash,
152                                         base::Time history_begin_time,
153                                         base::Time history_end_time,
154                                         base::Optional<uint64_t> final_hash,
155                                         base::Version version);
156 
157   // The id to be exposed to the JS API.
158   FlocId floc_id_;
159 
160   bool floc_computation_in_progress_ = false;
161   bool first_floc_computation_triggered_ = false;
162 
163   // True if history-delete occurs during an in-progress computation. When the
164   // in-progress one finishes, we would disregard the result (i.e. no loggings
165   // or floc update), and compute again. Potentially we could maintain extra
166   // states to tell if the history-delete would have impact on the in-progress
167   // result, but since this would only happen in rare race situations, we just
168   // always recompute to keep things simple.
169   bool need_recompute_ = false;
170 
171   bool first_sorting_lsh_file_ready_seen_ = false;
172   bool first_sync_history_enabled_seen_ = false;
173 
174   syncer::SyncService* sync_service_;
175   scoped_refptr<content_settings::CookieSettings> cookie_settings_;
176   FlocRemotePermissionService* floc_remote_permission_service_;
177   history::HistoryService* history_service_;
178   syncer::UserEventService* user_event_service_;
179 
180   // Used for the async tasks querying the HistoryService.
181   base::CancelableTaskTracker history_task_tracker_;
182 
183   // The timer used to schedule a floc computation.
184   base::OneShotTimer compute_floc_timer_;
185 
186   base::WeakPtrFactory<FlocIdProviderImpl> weak_ptr_factory_{this};
187 };
188 
189 }  // namespace federated_learning
190 
191 #endif  // CHROME_BROWSER_FEDERATED_LEARNING_FLOC_ID_PROVIDER_IMPL_H_
192