1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/ntp_snippets/user_classifier.h"
6
7 #include <algorithm>
8 #include <cfloat>
9 #include <string>
10
11 #include "base/logging.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/numerics/ranges.h"
14 #include "base/stl_util.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/time/clock.h"
17 #include "components/ntp_snippets/features.h"
18 #include "components/ntp_snippets/pref_names.h"
19 #include "components/ntp_snippets/time_serialization.h"
20 #include "components/prefs/pref_registry_simple.h"
21 #include "components/prefs/pref_service.h"
22 #include "components/variations/variations_associated_data.h"
23
24 namespace ntp_snippets {
25
26 namespace {
27
28 // The discount rate for computing the discounted-average metrics. Must be
29 // strictly larger than 0 and strictly smaller than 1!
30 const double kDiscountRatePerDay = 0.25;
31 const char kDiscountRatePerDayParam[] = "user_classifier_discount_rate_per_day";
32
33 // Never consider any larger interval than this (so that extreme situations such
34 // as losing your phone or going for a long offline vacation do not skew the
35 // average too much).
36 // When everriding via variation parameters, it is better to use smaller values
37 // than |kMaxHours| as this it the maximum value reported in the histograms.
38 const double kMaxHours = 7 * 24;
39 const char kMaxHoursParam[] = "user_classifier_max_hours";
40
41 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
42 // the length of the browsing session where subsequent events of the same type
43 // do not count again).
44 const double kMinHours = 0.5;
45 const char kMinHoursParam[] = "user_classifier_min_hours";
46
47 // Classification constants.
48 const double kActiveConsumerClicksAtLeastOncePerHours = 96;
49 const char kActiveConsumerClicksAtLeastOncePerHoursParam[] =
50 "user_classifier_active_consumer_clicks_at_least_once_per_hours";
51
52 // The previous value in production was 66, i.e. 2.75 days. The new value is a
53 // shift in the direction we want (having more active users).
54 const double kRareUserOpensNTPAtMostOncePerHours = 96;
55 const char kRareUserOpensNTPAtMostOncePerHoursParam[] =
56 "user_classifier_rare_user_opens_ntp_at_most_once_per_hours";
57
58 // Histograms for logging the estimated average hours to next event.
59 const char kHistogramAverageHoursToOpenNTP[] =
60 "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
61 const char kHistogramAverageHoursToShowSuggestions[] =
62 "NewTabPage.UserClassifier.AverageHoursToShowSuggestions";
63 const char kHistogramAverageHoursToUseSuggestions[] =
64 "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
65
66 // The enum used for iteration.
67 const UserClassifier::Metric kMetrics[] = {
68 UserClassifier::Metric::NTP_OPENED,
69 UserClassifier::Metric::SUGGESTIONS_SHOWN,
70 UserClassifier::Metric::SUGGESTIONS_USED};
71
72 // The summary of the prefs.
73 const char* kMetricKeys[] = {
74 prefs::kUserClassifierAverageNTPOpenedPerHour,
75 prefs::kUserClassifierAverageSuggestionsShownPerHour,
76 prefs::kUserClassifierAverageSuggestionsUsedPerHour};
77 const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP,
78 prefs::kUserClassifierLastTimeToShowSuggestions,
79 prefs::kUserClassifierLastTimeToUseSuggestions};
80
81 // Default lengths of the intervals for new users for the metrics.
82 const double kInitialHoursBetweenEvents[] = {24, 48, 120};
83 const char* kInitialHoursBetweenEventsParams[] = {
84 "user_classifier_default_interval_ntp_opened",
85 "user_classifier_default_interval_suggestions_shown",
86 "user_classifier_default_interval_suggestions_used"};
87
88 static_assert(base::size(kMetrics) ==
89 static_cast<int>(UserClassifier::Metric::COUNT) &&
90 base::size(kMetricKeys) ==
91 static_cast<int>(UserClassifier::Metric::COUNT) &&
92 base::size(kLastTimeKeys) ==
93 static_cast<int>(UserClassifier::Metric::COUNT) &&
94 base::size(kInitialHoursBetweenEvents) ==
95 static_cast<int>(UserClassifier::Metric::COUNT) &&
96 base::size(kInitialHoursBetweenEventsParams) ==
97 static_cast<int>(UserClassifier::Metric::COUNT),
98 "Fill in info for all metrics.");
99
100 // Computes the discount rate.
GetDiscountRatePerHour()101 double GetDiscountRatePerHour() {
102 double discount_rate_per_day = variations::GetVariationParamByFeatureAsDouble(
103 kArticleSuggestionsFeature, kDiscountRatePerDayParam,
104 kDiscountRatePerDay);
105 // Check for illegal values.
106 if (discount_rate_per_day <= 0 || discount_rate_per_day >= 1) {
107 DLOG(WARNING) << "Illegal value " << discount_rate_per_day
108 << " for the parameter " << kDiscountRatePerDayParam
109 << " (must be strictly between 0 and 1; the default "
110 << kDiscountRatePerDay << " is used, instead).";
111 discount_rate_per_day = kDiscountRatePerDay;
112 }
113 // Compute discount_rate_per_hour such that
114 // discount_rate_per_day = 1 - e^{-discount_rate_per_hour * 24}.
115 return std::log(1.0 / (1.0 - discount_rate_per_day)) / 24.0;
116 }
117
GetInitialHoursBetweenEvents(UserClassifier::Metric metric)118 double GetInitialHoursBetweenEvents(UserClassifier::Metric metric) {
119 return variations::GetVariationParamByFeatureAsDouble(
120 kArticleSuggestionsFeature,
121 kInitialHoursBetweenEventsParams[static_cast<int>(metric)],
122 kInitialHoursBetweenEvents[static_cast<int>(metric)]);
123 }
124
GetMinHours()125 double GetMinHours() {
126 return variations::GetVariationParamByFeatureAsDouble(
127 kArticleSuggestionsFeature, kMinHoursParam, kMinHours);
128 }
129
GetMaxHours()130 double GetMaxHours() {
131 return variations::GetVariationParamByFeatureAsDouble(
132 kArticleSuggestionsFeature, kMaxHoursParam, kMaxHours);
133 }
134
135 // Returns the new value of the metric using its |old_value|, assuming
136 // |hours_since_last_time| hours have passed since it was last discounted.
DiscountMetric(double old_value,double hours_since_last_time,double discount_rate_per_hour)137 double DiscountMetric(double old_value,
138 double hours_since_last_time,
139 double discount_rate_per_hour) {
140 // Compute the new discounted average according to the formula
141 // avg_events := e^{-discount_rate_per_hour * hours_since} * avg_events
142 return std::exp(-discount_rate_per_hour * hours_since_last_time) * old_value;
143 }
144
145 // Compute the number of hours between two events for the given metric value
146 // assuming the events were equally distributed.
GetEstimateHoursBetweenEvents(double metric_value,double discount_rate_per_hour,double min_hours,double max_hours)147 double GetEstimateHoursBetweenEvents(double metric_value,
148 double discount_rate_per_hour,
149 double min_hours,
150 double max_hours) {
151 // The computation below is well-defined only for |metric_value| > 1 (log of
152 // negative value or division by zero). When |metric_value| -> 1, the estimate
153 // below -> infinity, so max_hours is a natural result, here.
154 if (metric_value <= 1) {
155 return max_hours;
156 }
157
158 // This is the estimate with the assumption that last event happened right
159 // now and the system is in the steady-state. Solve estimate_hours in the
160 // steady-state equation:
161 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
162 // i.e.
163 // -discount_rate * estimate_hours = log((metric_value - 1) / metric_value),
164 // discount_rate * estimate_hours = log(metric_value / (metric_value - 1)),
165 // estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate.
166 double estimate_hours =
167 std::log(metric_value / (metric_value - 1)) / discount_rate_per_hour;
168 return base::ClampToRange(estimate_hours, min_hours, max_hours);
169 }
170
171 // The inverse of GetEstimateHoursBetweenEvents().
GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours,double discount_rate_per_hour,double min_hours,double max_hours)172 double GetMetricValueForEstimateHoursBetweenEvents(
173 double estimate_hours,
174 double discount_rate_per_hour,
175 double min_hours,
176 double max_hours) {
177 estimate_hours = base::ClampToRange(estimate_hours, min_hours, max_hours);
178 // Return |metric_value| such that GetEstimateHoursBetweenEvents for
179 // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
180 // metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
181 // i.e.
182 // metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1,
183 // metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
184 return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours));
185 }
186
187 } // namespace
188
UserClassifier(PrefService * pref_service,base::Clock * clock)189 UserClassifier::UserClassifier(PrefService* pref_service, base::Clock* clock)
190 : pref_service_(pref_service),
191 clock_(clock),
192 discount_rate_per_hour_(GetDiscountRatePerHour()),
193 min_hours_(GetMinHours()),
194 max_hours_(GetMaxHours()),
195 active_consumer_clicks_at_least_once_per_hours_(
196 variations::GetVariationParamByFeatureAsDouble(
197 kArticleSuggestionsFeature,
198 kActiveConsumerClicksAtLeastOncePerHoursParam,
199 kActiveConsumerClicksAtLeastOncePerHours)),
200 rare_user_opens_ntp_at_most_once_per_hours_(
201 variations::GetVariationParamByFeatureAsDouble(
202 kArticleSuggestionsFeature,
203 kRareUserOpensNTPAtMostOncePerHoursParam,
204 kRareUserOpensNTPAtMostOncePerHours)) {
205 // The pref_service_ can be null in tests.
206 if (!pref_service_) {
207 return;
208 }
209
210 // TODO(jkrcal): Store the current discount rate per hour into prefs. If it
211 // differs from the previous value, rescale the metric values so that the
212 // expectation does not change abruptly!
213
214 // Initialize the prefs storing the last time: the counter has just started!
215 for (const Metric metric : kMetrics) {
216 if (!HasLastTime(metric)) {
217 SetLastTimeToNow(metric);
218 }
219 }
220 }
221
222 UserClassifier::~UserClassifier() = default;
223
224 // static
RegisterProfilePrefs(PrefRegistrySimple * registry)225 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
226 double discount_rate = GetDiscountRatePerHour();
227 double min_hours = GetMinHours();
228 double max_hours = GetMaxHours();
229
230 for (Metric metric : kMetrics) {
231 double default_metric_value = GetMetricValueForEstimateHoursBetweenEvents(
232 GetInitialHoursBetweenEvents(metric), discount_rate, min_hours,
233 max_hours);
234 registry->RegisterDoublePref(kMetricKeys[static_cast<int>(metric)],
235 default_metric_value);
236 registry->RegisterInt64Pref(kLastTimeKeys[static_cast<int>(metric)], 0);
237 }
238 }
239
OnEvent(Metric metric)240 void UserClassifier::OnEvent(Metric metric) {
241 DCHECK_NE(metric, Metric::COUNT);
242 double metric_value = UpdateMetricOnEvent(metric);
243
244 double avg = GetEstimateHoursBetweenEvents(
245 metric_value, discount_rate_per_hour_, min_hours_, max_hours_);
246 // We use kMaxHours as the max value below as the maximum value for the
247 // histograms must be constant.
248 switch (metric) {
249 case Metric::NTP_OPENED:
250 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
251 kMaxHours, 50);
252 break;
253 case Metric::SUGGESTIONS_SHOWN:
254 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg,
255 1, kMaxHours, 50);
256 break;
257 case Metric::SUGGESTIONS_USED:
258 UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg,
259 1, kMaxHours, 50);
260 break;
261 case Metric::COUNT:
262 NOTREACHED();
263 break;
264 }
265 }
266
GetEstimatedAvgTime(Metric metric) const267 double UserClassifier::GetEstimatedAvgTime(Metric metric) const {
268 DCHECK_NE(metric, Metric::COUNT);
269 double metric_value = GetUpToDateMetricValue(metric);
270 return GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_,
271 min_hours_, max_hours_);
272 }
273
GetUserClass() const274 UserClassifier::UserClass UserClassifier::GetUserClass() const {
275 // The pref_service_ can be null in tests.
276 if (!pref_service_) {
277 return UserClass::ACTIVE_NTP_USER;
278 }
279
280 if (GetEstimatedAvgTime(Metric::NTP_OPENED) >=
281 rare_user_opens_ntp_at_most_once_per_hours_) {
282 return UserClass::RARE_NTP_USER;
283 }
284
285 if (GetEstimatedAvgTime(Metric::SUGGESTIONS_USED) <=
286 active_consumer_clicks_at_least_once_per_hours_) {
287 return UserClass::ACTIVE_SUGGESTIONS_CONSUMER;
288 }
289
290 return UserClass::ACTIVE_NTP_USER;
291 }
292
GetUserClassDescriptionForDebugging() const293 std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
294 switch (GetUserClass()) {
295 case UserClass::RARE_NTP_USER:
296 return "Rare user of the NTP";
297 case UserClass::ACTIVE_NTP_USER:
298 return "Active user of the NTP";
299 case UserClass::ACTIVE_SUGGESTIONS_CONSUMER:
300 return "Active consumer of NTP suggestions";
301 }
302 NOTREACHED();
303 return std::string();
304 }
305
ClearClassificationForDebugging()306 void UserClassifier::ClearClassificationForDebugging() {
307 // The pref_service_ can be null in tests.
308 if (!pref_service_) {
309 return;
310 }
311
312 for (const Metric& metric : kMetrics) {
313 ClearMetricValue(metric);
314 SetLastTimeToNow(metric);
315 }
316 }
317
UpdateMetricOnEvent(Metric metric)318 double UserClassifier::UpdateMetricOnEvent(Metric metric) {
319 // The pref_service_ can be null in tests.
320 if (!pref_service_) {
321 return 0;
322 }
323
324 double hours_since_last_time =
325 std::min(max_hours_, GetHoursSinceLastTime(metric));
326 // Ignore events within the same "browsing session".
327 if (hours_since_last_time < min_hours_) {
328 return GetUpToDateMetricValue(metric);
329 }
330
331 SetLastTimeToNow(metric);
332
333 double metric_value = GetMetricValue(metric);
334 // Add 1 to the discounted metric as the event has happened right now.
335 double new_metric_value =
336 1 + DiscountMetric(metric_value, hours_since_last_time,
337 discount_rate_per_hour_);
338 SetMetricValue(metric, new_metric_value);
339 return new_metric_value;
340 }
341
GetUpToDateMetricValue(Metric metric) const342 double UserClassifier::GetUpToDateMetricValue(Metric metric) const {
343 // The pref_service_ can be null in tests.
344 if (!pref_service_) {
345 return 0;
346 }
347
348 double hours_since_last_time =
349 std::min(max_hours_, GetHoursSinceLastTime(metric));
350
351 double metric_value = GetMetricValue(metric);
352 return DiscountMetric(metric_value, hours_since_last_time,
353 discount_rate_per_hour_);
354 }
355
GetHoursSinceLastTime(Metric metric) const356 double UserClassifier::GetHoursSinceLastTime(Metric metric) const {
357 if (!HasLastTime(metric)) {
358 return 0;
359 }
360
361 const base::TimeDelta since_last_time =
362 clock_->Now() - DeserializeTime(pref_service_->GetInt64(
363 kLastTimeKeys[static_cast<int>(metric)]));
364 return since_last_time / base::TimeDelta::FromHours(1);
365 }
366
HasLastTime(Metric metric) const367 bool UserClassifier::HasLastTime(Metric metric) const {
368 return pref_service_->HasPrefPath(kLastTimeKeys[static_cast<int>(metric)]);
369 }
370
SetLastTimeToNow(Metric metric)371 void UserClassifier::SetLastTimeToNow(Metric metric) {
372 pref_service_->SetInt64(kLastTimeKeys[static_cast<int>(metric)],
373 SerializeTime(clock_->Now()));
374 }
375
GetMetricValue(Metric metric) const376 double UserClassifier::GetMetricValue(Metric metric) const {
377 return pref_service_->GetDouble(kMetricKeys[static_cast<int>(metric)]);
378 }
379
SetMetricValue(Metric metric,double metric_value)380 void UserClassifier::SetMetricValue(Metric metric, double metric_value) {
381 pref_service_->SetDouble(kMetricKeys[static_cast<int>(metric)], metric_value);
382 }
383
ClearMetricValue(Metric metric)384 void UserClassifier::ClearMetricValue(Metric metric) {
385 pref_service_->ClearPref(kMetricKeys[static_cast<int>(metric)]);
386 }
387
388 } // namespace ntp_snippets
389