1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/feed/core/common/user_classifier.h"
6 
7 #include <algorithm>
8 #include <cfloat>
9 #include <cmath>
10 #include <string>
11 
12 #include "base/metrics/histogram_macros.h"
13 #include "base/numerics/ranges.h"
14 #include "base/stl_util.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/time/clock.h"
17 #include "components/feed/core/common/pref_names.h"
18 #include "components/prefs/pref_registry_simple.h"
19 #include "components/prefs/pref_service.h"
20 
21 namespace feed {
22 
23 namespace {
24 
25 // The discount rate for computing the discounted-average rates. Must be
26 // strictly larger than 0 and strictly smaller than 1!
27 constexpr double kDiscountRatePerDay = 0.25;
28 static_assert(kDiscountRatePerDay > 0 && kDiscountRatePerDay < 1,
29               "invalid value");
30 // Compute discount_rate_per_hour such that
31 //   kDiscountRatePerDay = 1 - e^{-kDiscountRatePerHour * 24}.
32 const double kDiscountRatePerHour =
33     std::log(1.0 / (1.0 - kDiscountRatePerDay)) / 24.0;
34 
35 // Never consider any larger interval than this (so that extreme situations such
36 // as losing your phone or going for a long offline vacation do not skew the
37 // average too much).
38 const double kMaxHours = 7 * 24;
39 
40 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
41 // the length of the browsing session where subsequent events of the same type
42 // do not count again).
43 const double kMinHours = 0.5;
44 
45 // Classification constants.
46 const double kActiveConsumerClicksAtLeastOncePerHours = 96;
47 const double kRareUserViewsAtMostOncePerHours = 96;
48 
49 // Histograms for logging the estimated average hours to next event. During
50 // launch these must match legacy histogram names.
51 const char kHistogramAverageHoursToOpenNTP[] =
52     "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
53 const char kHistogramAverageHoursToUseSuggestions[] =
54     "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
55 
56 // List of all Events used for iteration.
57 const UserClassifier::Event kEvents[] = {
58     UserClassifier::Event::kSuggestionsViewed,
59     UserClassifier::Event::kSuggestionsUsed};
60 static_assert(base::size(kEvents) ==
61                   static_cast<int>(UserClassifier::Event::kMaxValue) + 1,
62               "kEvents should have all enum values.");
63 
GetRateKey(UserClassifier::Event event)64 const char* GetRateKey(UserClassifier::Event event) {
65   switch (event) {
66     case UserClassifier::Event::kSuggestionsViewed:
67       return prefs::kUserClassifierAverageSuggestionsViwedPerHour;
68     case UserClassifier::Event::kSuggestionsUsed:
69       return prefs::kUserClassifierAverageSuggestionsUsedPerHour;
70   }
71 }
72 
GetLastTimeKey(UserClassifier::Event event)73 const char* GetLastTimeKey(UserClassifier::Event event) {
74   switch (event) {
75     case UserClassifier::Event::kSuggestionsViewed:
76       return prefs::kUserClassifierLastTimeToViewSuggestions;
77     case UserClassifier::Event::kSuggestionsUsed:
78       return prefs::kUserClassifierLastTimeToUseSuggestions;
79   }
80 }
81 
GetInitialHoursBetweenEvents(UserClassifier::Event event)82 double GetInitialHoursBetweenEvents(UserClassifier::Event event) {
83   switch (event) {
84     case UserClassifier::Event::kSuggestionsViewed:
85       return 24;
86     case UserClassifier::Event::kSuggestionsUsed:
87       return 120;
88   }
89 }
90 
91 // Returns the new value of the rate using its |old_value|, assuming
92 // |hours_since_last_time| hours have passed since it was last discounted.
DiscountRate(double old_value,double hours_since_last_time,double discount_rate_per_hour)93 double DiscountRate(double old_value,
94                     double hours_since_last_time,
95                     double discount_rate_per_hour) {
96   // Compute the new discounted average according to the formula
97   //   avg_events := e^{-discount_rate_per_hour * hours_since} * avg_events
98   return std::exp(-discount_rate_per_hour * hours_since_last_time) * old_value;
99 }
100 
101 // Compute the number of hours between two events for the given rate value
102 // assuming the events were equally distributed.
GetEstimateHoursBetweenEvents(double rate,double discount_rate_per_hour,double min_hours,double max_hours)103 double GetEstimateHoursBetweenEvents(double rate,
104                                      double discount_rate_per_hour,
105                                      double min_hours,
106                                      double max_hours) {
107   // The computation below is well-defined only for |rate| > 1 (log of
108   // negative value or division by zero). When |rate| -> 1, the estimate
109   // below -> infinity, so max_hours is a natural result, here.
110   if (rate <= 1) {
111     return max_hours;
112   }
113 
114   // This is the estimate with the assumption that last event happened right
115   // now and the system is in the steady-state. Solve estimate_hours in the
116   // steady-state equation:
117   //   rate = 1 + e^{-discount_rate * estimate_hours} * rate,
118   // i.e.
119   //   -discount_rate * estimate_hours = log((rate - 1) / rate),
120   //   discount_rate * estimate_hours = log(rate / (rate - 1)),
121   //   estimate_hours = log(rate / (rate - 1)) / discount_rate.
122   double estimate_hours = std::log(rate / (rate - 1)) / discount_rate_per_hour;
123   return base::ClampToRange(estimate_hours, min_hours, max_hours);
124 }
125 
126 // The inverse of GetEstimateHoursBetweenEvents().
GetRateForEstimateHoursBetweenEvents(double estimate_hours,double discount_rate_per_hour,double min_hours,double max_hours)127 double GetRateForEstimateHoursBetweenEvents(double estimate_hours,
128                                             double discount_rate_per_hour,
129                                             double min_hours,
130                                             double max_hours) {
131   // Keep the input value within [min_hours, max_hours].
132   estimate_hours = base::ClampToRange(estimate_hours, min_hours, max_hours);
133   // Return |rate| such that GetEstimateHoursBetweenEvents for |rate| returns
134   // |estimate_hours|. Thus, solve |rate| in
135   //   rate = 1 + e^{-discount_rate * estimate_hours} * rate,
136   // i.e.
137   //   rate * (1 - e^{-discount_rate * estimate_hours}) = 1,
138   //   rate = 1 / (1 - e^{-discount_rate * estimate_hours}).
139   return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours));
140 }
141 
142 }  // namespace
143 
UserClassifier(PrefService * pref_service,const base::Clock * clock)144 UserClassifier::UserClassifier(PrefService* pref_service,
145                                const base::Clock* clock)
146     : pref_service_(pref_service), clock_(clock) {
147   // The pref_service_ can be null in tests.
148   if (!pref_service_) {
149     return;
150   }
151 
152   // TODO(jkrcal): Store the current discount rate per hour into prefs. If it
153   // differs from the previous value, rescale the rate values so that the
154   // expectation does not change abruptly!
155 
156   // Initialize the prefs storing the last time: the counter has just started!
157   for (const Event event : kEvents) {
158     if (!HasLastTime(event)) {
159       SetLastTimeToNow(event);
160     }
161   }
162 }
163 
164 UserClassifier::~UserClassifier() = default;
165 
166 // static
RegisterProfilePrefs(PrefRegistrySimple * registry)167 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
168   for (Event event : kEvents) {
169     double default_rate = GetRateForEstimateHoursBetweenEvents(
170         GetInitialHoursBetweenEvents(event), kDiscountRatePerHour, kMinHours,
171         kMaxHours);
172     registry->RegisterDoublePref(GetRateKey(event), default_rate);
173     registry->RegisterTimePref(GetLastTimeKey(event), base::Time());
174   }
175 }
176 
OnEvent(Event event)177 void UserClassifier::OnEvent(Event event) {
178   double metric_value = UpdateRateOnEvent(event);
179   double avg = GetEstimateHoursBetweenEvents(metric_value, kDiscountRatePerHour,
180                                              kMinHours, kMaxHours);
181   // We use kMaxHours as the max value below as the maximum value for the
182   // histograms must be constant.
183   switch (event) {
184     case Event::kSuggestionsViewed:
185       UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
186                                   kMaxHours, 50);
187       break;
188     case Event::kSuggestionsUsed:
189       UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg,
190                                   1, kMaxHours, 50);
191       break;
192   }
193 }
194 
GetEstimatedAvgTime(Event event) const195 double UserClassifier::GetEstimatedAvgTime(Event event) const {
196   double rate = GetUpToDateRate(event);
197   return GetEstimateHoursBetweenEvents(rate, kDiscountRatePerHour, kMinHours,
198                                        kMaxHours);
199 }
200 
GetUserClass() const201 UserClass UserClassifier::GetUserClass() const {
202   // The pref_service_ can be null in tests.
203   if (!pref_service_) {
204     return UserClass::kActiveSuggestionsViewer;
205   }
206 
207   if (GetEstimatedAvgTime(Event::kSuggestionsViewed) >=
208       kRareUserViewsAtMostOncePerHours) {
209     return UserClass::kRareSuggestionsViewer;
210   }
211 
212   if (GetEstimatedAvgTime(Event::kSuggestionsUsed) <=
213       kActiveConsumerClicksAtLeastOncePerHours) {
214     return UserClass::kActiveSuggestionsConsumer;
215   }
216 
217   return UserClass::kActiveSuggestionsViewer;
218 }
219 
GetUserClassDescriptionForDebugging() const220 std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
221   switch (GetUserClass()) {
222     case UserClass::kRareSuggestionsViewer:
223       return "Rare viewer of Feed articles";
224     case UserClass::kActiveSuggestionsViewer:
225       return "Active viewer of Feed articles";
226     case UserClass::kActiveSuggestionsConsumer:
227       return "Active consumer of Feed articles";
228   }
229   NOTREACHED();
230   return std::string();
231 }
232 
ClearClassificationForDebugging()233 void UserClassifier::ClearClassificationForDebugging() {
234   // The pref_service_ can be null in tests.
235   if (!pref_service_) {
236     return;
237   }
238 
239   for (const Event& event : kEvents) {
240     ClearRate(event);
241     SetLastTimeToNow(event);
242   }
243 }
244 
UpdateRateOnEvent(Event event)245 double UserClassifier::UpdateRateOnEvent(Event event) {
246   // The pref_service_ can be null in tests.
247   if (!pref_service_) {
248     return 0;
249   }
250 
251   double hours_since_last_time =
252       std::min(kMaxHours, GetHoursSinceLastTime(event));
253   // Ignore events within the same "browsing session".
254   if (hours_since_last_time < kMinHours) {
255     return GetUpToDateRate(event);
256   }
257 
258   SetLastTimeToNow(event);
259 
260   double rate = GetRate(event);
261   // Add 1 to the discounted rate as the event has happened right now.
262   double new_rate =
263       1 + DiscountRate(rate, hours_since_last_time, kDiscountRatePerHour);
264   SetRate(event, new_rate);
265   return new_rate;
266 }
267 
GetUpToDateRate(Event event) const268 double UserClassifier::GetUpToDateRate(Event event) const {
269   // The pref_service_ can be null in tests.
270   if (!pref_service_) {
271     return 0;
272   }
273 
274   const double hours_since_last_time =
275       std::min(kMaxHours, GetHoursSinceLastTime(event));
276 
277   const double rate = GetRate(event);
278   return DiscountRate(rate, hours_since_last_time, kDiscountRatePerHour);
279 }
280 
GetHoursSinceLastTime(Event event) const281 double UserClassifier::GetHoursSinceLastTime(Event event) const {
282   if (!HasLastTime(event)) {
283     return 0;
284   }
285 
286   base::TimeDelta since_last_time =
287       clock_->Now() - pref_service_->GetTime(GetLastTimeKey(event));
288   return since_last_time.InSecondsF() / 3600;
289 }
290 
HasLastTime(Event event) const291 bool UserClassifier::HasLastTime(Event event) const {
292   return pref_service_->HasPrefPath(GetLastTimeKey(event));
293 }
294 
SetLastTimeToNow(Event event)295 void UserClassifier::SetLastTimeToNow(Event event) {
296   pref_service_->SetTime(GetLastTimeKey(event), clock_->Now());
297 }
298 
GetRate(Event event) const299 double UserClassifier::GetRate(Event event) const {
300   return pref_service_->GetDouble(GetRateKey(event));
301 }
302 
SetRate(Event event,double rate)303 void UserClassifier::SetRate(Event event, double rate) {
304   pref_service_->SetDouble(GetRateKey(event), rate);
305 }
306 
ClearRate(Event event)307 void UserClassifier::ClearRate(Event event) {
308   pref_service_->ClearPref(GetRateKey(event));
309 }
310 
311 }  // namespace feed
312