1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/feed/core/common/user_classifier.h"
6
7 #include <algorithm>
8 #include <cfloat>
9 #include <cmath>
10 #include <string>
11
12 #include "base/metrics/histogram_macros.h"
13 #include "base/numerics/ranges.h"
14 #include "base/stl_util.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/time/clock.h"
17 #include "components/feed/core/common/pref_names.h"
18 #include "components/prefs/pref_registry_simple.h"
19 #include "components/prefs/pref_service.h"
20
21 namespace feed {
22
23 namespace {
24
25 // The discount rate for computing the discounted-average rates. Must be
26 // strictly larger than 0 and strictly smaller than 1!
27 constexpr double kDiscountRatePerDay = 0.25;
28 static_assert(kDiscountRatePerDay > 0 && kDiscountRatePerDay < 1,
29 "invalid value");
30 // Compute discount_rate_per_hour such that
31 // kDiscountRatePerDay = 1 - e^{-kDiscountRatePerHour * 24}.
32 const double kDiscountRatePerHour =
33 std::log(1.0 / (1.0 - kDiscountRatePerDay)) / 24.0;
34
35 // Never consider any larger interval than this (so that extreme situations such
36 // as losing your phone or going for a long offline vacation do not skew the
37 // average too much).
38 const double kMaxHours = 7 * 24;
39
40 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
41 // the length of the browsing session where subsequent events of the same type
42 // do not count again).
43 const double kMinHours = 0.5;
44
45 // Classification constants.
46 const double kActiveConsumerClicksAtLeastOncePerHours = 96;
47 const double kRareUserViewsAtMostOncePerHours = 96;
48
49 // Histograms for logging the estimated average hours to next event. During
50 // launch these must match legacy histogram names.
51 const char kHistogramAverageHoursToOpenNTP[] =
52 "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
53 const char kHistogramAverageHoursToUseSuggestions[] =
54 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
55
56 // List of all Events used for iteration.
57 const UserClassifier::Event kEvents[] = {
58 UserClassifier::Event::kSuggestionsViewed,
59 UserClassifier::Event::kSuggestionsUsed};
60 static_assert(base::size(kEvents) ==
61 static_cast<int>(UserClassifier::Event::kMaxValue) + 1,
62 "kEvents should have all enum values.");
63
GetRateKey(UserClassifier::Event event)64 const char* GetRateKey(UserClassifier::Event event) {
65 switch (event) {
66 case UserClassifier::Event::kSuggestionsViewed:
67 return prefs::kUserClassifierAverageSuggestionsViwedPerHour;
68 case UserClassifier::Event::kSuggestionsUsed:
69 return prefs::kUserClassifierAverageSuggestionsUsedPerHour;
70 }
71 }
72
GetLastTimeKey(UserClassifier::Event event)73 const char* GetLastTimeKey(UserClassifier::Event event) {
74 switch (event) {
75 case UserClassifier::Event::kSuggestionsViewed:
76 return prefs::kUserClassifierLastTimeToViewSuggestions;
77 case UserClassifier::Event::kSuggestionsUsed:
78 return prefs::kUserClassifierLastTimeToUseSuggestions;
79 }
80 }
81
GetInitialHoursBetweenEvents(UserClassifier::Event event)82 double GetInitialHoursBetweenEvents(UserClassifier::Event event) {
83 switch (event) {
84 case UserClassifier::Event::kSuggestionsViewed:
85 return 24;
86 case UserClassifier::Event::kSuggestionsUsed:
87 return 120;
88 }
89 }
90
91 // Returns the new value of the rate using its |old_value|, assuming
92 // |hours_since_last_time| hours have passed since it was last discounted.
DiscountRate(double old_value,double hours_since_last_time,double discount_rate_per_hour)93 double DiscountRate(double old_value,
94 double hours_since_last_time,
95 double discount_rate_per_hour) {
96 // Compute the new discounted average according to the formula
97 // avg_events := e^{-discount_rate_per_hour * hours_since} * avg_events
98 return std::exp(-discount_rate_per_hour * hours_since_last_time) * old_value;
99 }
100
101 // Compute the number of hours between two events for the given rate value
102 // assuming the events were equally distributed.
GetEstimateHoursBetweenEvents(double rate,double discount_rate_per_hour,double min_hours,double max_hours)103 double GetEstimateHoursBetweenEvents(double rate,
104 double discount_rate_per_hour,
105 double min_hours,
106 double max_hours) {
107 // The computation below is well-defined only for |rate| > 1 (log of
108 // negative value or division by zero). When |rate| -> 1, the estimate
109 // below -> infinity, so max_hours is a natural result, here.
110 if (rate <= 1) {
111 return max_hours;
112 }
113
114 // This is the estimate with the assumption that last event happened right
115 // now and the system is in the steady-state. Solve estimate_hours in the
116 // steady-state equation:
117 // rate = 1 + e^{-discount_rate * estimate_hours} * rate,
118 // i.e.
119 // -discount_rate * estimate_hours = log((rate - 1) / rate),
120 // discount_rate * estimate_hours = log(rate / (rate - 1)),
121 // estimate_hours = log(rate / (rate - 1)) / discount_rate.
122 double estimate_hours = std::log(rate / (rate - 1)) / discount_rate_per_hour;
123 return base::ClampToRange(estimate_hours, min_hours, max_hours);
124 }
125
126 // The inverse of GetEstimateHoursBetweenEvents().
GetRateForEstimateHoursBetweenEvents(double estimate_hours,double discount_rate_per_hour,double min_hours,double max_hours)127 double GetRateForEstimateHoursBetweenEvents(double estimate_hours,
128 double discount_rate_per_hour,
129 double min_hours,
130 double max_hours) {
131 // Keep the input value within [min_hours, max_hours].
132 estimate_hours = base::ClampToRange(estimate_hours, min_hours, max_hours);
133 // Return |rate| such that GetEstimateHoursBetweenEvents for |rate| returns
134 // |estimate_hours|. Thus, solve |rate| in
135 // rate = 1 + e^{-discount_rate * estimate_hours} * rate,
136 // i.e.
137 // rate * (1 - e^{-discount_rate * estimate_hours}) = 1,
138 // rate = 1 / (1 - e^{-discount_rate * estimate_hours}).
139 return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours));
140 }
141
142 } // namespace
143
UserClassifier(PrefService * pref_service,const base::Clock * clock)144 UserClassifier::UserClassifier(PrefService* pref_service,
145 const base::Clock* clock)
146 : pref_service_(pref_service), clock_(clock) {
147 // The pref_service_ can be null in tests.
148 if (!pref_service_) {
149 return;
150 }
151
152 // TODO(jkrcal): Store the current discount rate per hour into prefs. If it
153 // differs from the previous value, rescale the rate values so that the
154 // expectation does not change abruptly!
155
156 // Initialize the prefs storing the last time: the counter has just started!
157 for (const Event event : kEvents) {
158 if (!HasLastTime(event)) {
159 SetLastTimeToNow(event);
160 }
161 }
162 }
163
164 UserClassifier::~UserClassifier() = default;
165
166 // static
RegisterProfilePrefs(PrefRegistrySimple * registry)167 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
168 for (Event event : kEvents) {
169 double default_rate = GetRateForEstimateHoursBetweenEvents(
170 GetInitialHoursBetweenEvents(event), kDiscountRatePerHour, kMinHours,
171 kMaxHours);
172 registry->RegisterDoublePref(GetRateKey(event), default_rate);
173 registry->RegisterTimePref(GetLastTimeKey(event), base::Time());
174 }
175 }
176
OnEvent(Event event)177 void UserClassifier::OnEvent(Event event) {
178 double metric_value = UpdateRateOnEvent(event);
179 double avg = GetEstimateHoursBetweenEvents(metric_value, kDiscountRatePerHour,
180 kMinHours, kMaxHours);
181 // We use kMaxHours as the max value below as the maximum value for the
182 // histograms must be constant.
183 switch (event) {
184 case Event::kSuggestionsViewed:
185 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
186 kMaxHours, 50);
187 break;
188 case Event::kSuggestionsUsed:
189 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg,
190 1, kMaxHours, 50);
191 break;
192 }
193 }
194
GetEstimatedAvgTime(Event event) const195 double UserClassifier::GetEstimatedAvgTime(Event event) const {
196 double rate = GetUpToDateRate(event);
197 return GetEstimateHoursBetweenEvents(rate, kDiscountRatePerHour, kMinHours,
198 kMaxHours);
199 }
200
GetUserClass() const201 UserClass UserClassifier::GetUserClass() const {
202 // The pref_service_ can be null in tests.
203 if (!pref_service_) {
204 return UserClass::kActiveSuggestionsViewer;
205 }
206
207 if (GetEstimatedAvgTime(Event::kSuggestionsViewed) >=
208 kRareUserViewsAtMostOncePerHours) {
209 return UserClass::kRareSuggestionsViewer;
210 }
211
212 if (GetEstimatedAvgTime(Event::kSuggestionsUsed) <=
213 kActiveConsumerClicksAtLeastOncePerHours) {
214 return UserClass::kActiveSuggestionsConsumer;
215 }
216
217 return UserClass::kActiveSuggestionsViewer;
218 }
219
GetUserClassDescriptionForDebugging() const220 std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
221 switch (GetUserClass()) {
222 case UserClass::kRareSuggestionsViewer:
223 return "Rare viewer of Feed articles";
224 case UserClass::kActiveSuggestionsViewer:
225 return "Active viewer of Feed articles";
226 case UserClass::kActiveSuggestionsConsumer:
227 return "Active consumer of Feed articles";
228 }
229 NOTREACHED();
230 return std::string();
231 }
232
ClearClassificationForDebugging()233 void UserClassifier::ClearClassificationForDebugging() {
234 // The pref_service_ can be null in tests.
235 if (!pref_service_) {
236 return;
237 }
238
239 for (const Event& event : kEvents) {
240 ClearRate(event);
241 SetLastTimeToNow(event);
242 }
243 }
244
UpdateRateOnEvent(Event event)245 double UserClassifier::UpdateRateOnEvent(Event event) {
246 // The pref_service_ can be null in tests.
247 if (!pref_service_) {
248 return 0;
249 }
250
251 double hours_since_last_time =
252 std::min(kMaxHours, GetHoursSinceLastTime(event));
253 // Ignore events within the same "browsing session".
254 if (hours_since_last_time < kMinHours) {
255 return GetUpToDateRate(event);
256 }
257
258 SetLastTimeToNow(event);
259
260 double rate = GetRate(event);
261 // Add 1 to the discounted rate as the event has happened right now.
262 double new_rate =
263 1 + DiscountRate(rate, hours_since_last_time, kDiscountRatePerHour);
264 SetRate(event, new_rate);
265 return new_rate;
266 }
267
GetUpToDateRate(Event event) const268 double UserClassifier::GetUpToDateRate(Event event) const {
269 // The pref_service_ can be null in tests.
270 if (!pref_service_) {
271 return 0;
272 }
273
274 const double hours_since_last_time =
275 std::min(kMaxHours, GetHoursSinceLastTime(event));
276
277 const double rate = GetRate(event);
278 return DiscountRate(rate, hours_since_last_time, kDiscountRatePerHour);
279 }
280
GetHoursSinceLastTime(Event event) const281 double UserClassifier::GetHoursSinceLastTime(Event event) const {
282 if (!HasLastTime(event)) {
283 return 0;
284 }
285
286 base::TimeDelta since_last_time =
287 clock_->Now() - pref_service_->GetTime(GetLastTimeKey(event));
288 return since_last_time.InSecondsF() / 3600;
289 }
290
HasLastTime(Event event) const291 bool UserClassifier::HasLastTime(Event event) const {
292 return pref_service_->HasPrefPath(GetLastTimeKey(event));
293 }
294
SetLastTimeToNow(Event event)295 void UserClassifier::SetLastTimeToNow(Event event) {
296 pref_service_->SetTime(GetLastTimeKey(event), clock_->Now());
297 }
298
GetRate(Event event) const299 double UserClassifier::GetRate(Event event) const {
300 return pref_service_->GetDouble(GetRateKey(event));
301 }
302
SetRate(Event event,double rate)303 void UserClassifier::SetRate(Event event, double rate) {
304 pref_service_->SetDouble(GetRateKey(event), rate);
305 }
306
ClearRate(Event event)307 void UserClassifier::ClearRate(Event event) {
308 pref_service_->ClearPref(GetRateKey(event));
309 }
310
311 } // namespace feed
312