1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/ntp_snippets/user_classifier.h"
6 
7 #include <algorithm>
8 #include <cfloat>
9 #include <string>
10 
11 #include "base/logging.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/numerics/ranges.h"
14 #include "base/stl_util.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/time/clock.h"
17 #include "components/ntp_snippets/features.h"
18 #include "components/ntp_snippets/pref_names.h"
19 #include "components/ntp_snippets/time_serialization.h"
20 #include "components/prefs/pref_registry_simple.h"
21 #include "components/prefs/pref_service.h"
22 #include "components/variations/variations_associated_data.h"
23 
24 namespace ntp_snippets {
25 
26 namespace {
27 
28 // The discount rate for computing the discounted-average metrics. Must be
29 // strictly larger than 0 and strictly smaller than 1!
30 const double kDiscountRatePerDay = 0.25;
31 const char kDiscountRatePerDayParam[] = "user_classifier_discount_rate_per_day";
32 
33 // Never consider any larger interval than this (so that extreme situations such
34 // as losing your phone or going for a long offline vacation do not skew the
35 // average too much).
36 // When everriding via variation parameters, it is better to use smaller values
37 // than |kMaxHours| as this it the maximum value reported in the histograms.
38 const double kMaxHours = 7 * 24;
39 const char kMaxHoursParam[] = "user_classifier_max_hours";
40 
41 // Ignore events within |kMinHours| hours since the last event (|kMinHours| is
42 // the length of the browsing session where subsequent events of the same type
43 // do not count again).
44 const double kMinHours = 0.5;
45 const char kMinHoursParam[] = "user_classifier_min_hours";
46 
47 // Classification constants.
48 const double kActiveConsumerClicksAtLeastOncePerHours = 96;
49 const char kActiveConsumerClicksAtLeastOncePerHoursParam[] =
50     "user_classifier_active_consumer_clicks_at_least_once_per_hours";
51 
52 // The previous value in production was 66, i.e. 2.75 days. The new value is a
53 // shift in the direction we want (having more active users).
54 const double kRareUserOpensNTPAtMostOncePerHours = 96;
55 const char kRareUserOpensNTPAtMostOncePerHoursParam[] =
56     "user_classifier_rare_user_opens_ntp_at_most_once_per_hours";
57 
58 // Histograms for logging the estimated average hours to next event.
59 const char kHistogramAverageHoursToOpenNTP[] =
60     "NewTabPage.UserClassifier.AverageHoursToOpenNTP";
61 const char kHistogramAverageHoursToShowSuggestions[] =
62     "NewTabPage.UserClassifier.AverageHoursToShowSuggestions";
63 const char kHistogramAverageHoursToUseSuggestions[] =
64     "NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
65 
66 // The enum used for iteration.
67 const UserClassifier::Metric kMetrics[] = {
68     UserClassifier::Metric::NTP_OPENED,
69     UserClassifier::Metric::SUGGESTIONS_SHOWN,
70     UserClassifier::Metric::SUGGESTIONS_USED};
71 
72 // The summary of the prefs.
73 const char* kMetricKeys[] = {
74     prefs::kUserClassifierAverageNTPOpenedPerHour,
75     prefs::kUserClassifierAverageSuggestionsShownPerHour,
76     prefs::kUserClassifierAverageSuggestionsUsedPerHour};
77 const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP,
78                                prefs::kUserClassifierLastTimeToShowSuggestions,
79                                prefs::kUserClassifierLastTimeToUseSuggestions};
80 
81 // Default lengths of the intervals for new users for the metrics.
82 const double kInitialHoursBetweenEvents[] = {24, 48, 120};
83 const char* kInitialHoursBetweenEventsParams[] = {
84     "user_classifier_default_interval_ntp_opened",
85     "user_classifier_default_interval_suggestions_shown",
86     "user_classifier_default_interval_suggestions_used"};
87 
88 static_assert(base::size(kMetrics) ==
89                       static_cast<int>(UserClassifier::Metric::COUNT) &&
90                   base::size(kMetricKeys) ==
91                       static_cast<int>(UserClassifier::Metric::COUNT) &&
92                   base::size(kLastTimeKeys) ==
93                       static_cast<int>(UserClassifier::Metric::COUNT) &&
94                   base::size(kInitialHoursBetweenEvents) ==
95                       static_cast<int>(UserClassifier::Metric::COUNT) &&
96                   base::size(kInitialHoursBetweenEventsParams) ==
97                       static_cast<int>(UserClassifier::Metric::COUNT),
98               "Fill in info for all metrics.");
99 
100 // Computes the discount rate.
GetDiscountRatePerHour()101 double GetDiscountRatePerHour() {
102   double discount_rate_per_day = variations::GetVariationParamByFeatureAsDouble(
103       kArticleSuggestionsFeature, kDiscountRatePerDayParam,
104       kDiscountRatePerDay);
105   // Check for illegal values.
106   if (discount_rate_per_day <= 0 || discount_rate_per_day >= 1) {
107     DLOG(WARNING) << "Illegal value " << discount_rate_per_day
108                   << " for the parameter " << kDiscountRatePerDayParam
109                   << " (must be strictly between 0 and 1; the default "
110                   << kDiscountRatePerDay << " is used, instead).";
111     discount_rate_per_day = kDiscountRatePerDay;
112   }
113   // Compute discount_rate_per_hour such that
114   //   discount_rate_per_day = 1 - e^{-discount_rate_per_hour * 24}.
115   return std::log(1.0 / (1.0 - discount_rate_per_day)) / 24.0;
116 }
117 
GetInitialHoursBetweenEvents(UserClassifier::Metric metric)118 double GetInitialHoursBetweenEvents(UserClassifier::Metric metric) {
119   return variations::GetVariationParamByFeatureAsDouble(
120       kArticleSuggestionsFeature,
121       kInitialHoursBetweenEventsParams[static_cast<int>(metric)],
122       kInitialHoursBetweenEvents[static_cast<int>(metric)]);
123 }
124 
GetMinHours()125 double GetMinHours() {
126   return variations::GetVariationParamByFeatureAsDouble(
127       kArticleSuggestionsFeature, kMinHoursParam, kMinHours);
128 }
129 
GetMaxHours()130 double GetMaxHours() {
131   return variations::GetVariationParamByFeatureAsDouble(
132       kArticleSuggestionsFeature, kMaxHoursParam, kMaxHours);
133 }
134 
135 // Returns the new value of the metric using its |old_value|, assuming
136 // |hours_since_last_time| hours have passed since it was last discounted.
DiscountMetric(double old_value,double hours_since_last_time,double discount_rate_per_hour)137 double DiscountMetric(double old_value,
138                       double hours_since_last_time,
139                       double discount_rate_per_hour) {
140   // Compute the new discounted average according to the formula
141   //   avg_events := e^{-discount_rate_per_hour * hours_since} * avg_events
142   return std::exp(-discount_rate_per_hour * hours_since_last_time) * old_value;
143 }
144 
145 // Compute the number of hours between two events for the given metric value
146 // assuming the events were equally distributed.
GetEstimateHoursBetweenEvents(double metric_value,double discount_rate_per_hour,double min_hours,double max_hours)147 double GetEstimateHoursBetweenEvents(double metric_value,
148                                      double discount_rate_per_hour,
149                                      double min_hours,
150                                      double max_hours) {
151   // The computation below is well-defined only for |metric_value| > 1 (log of
152   // negative value or division by zero). When |metric_value| -> 1, the estimate
153   // below -> infinity, so max_hours is a natural result, here.
154   if (metric_value <= 1) {
155     return max_hours;
156   }
157 
158   // This is the estimate with the assumption that last event happened right
159   // now and the system is in the steady-state. Solve estimate_hours in the
160   // steady-state equation:
161   //   metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
162   // i.e.
163   //   -discount_rate * estimate_hours = log((metric_value - 1) / metric_value),
164   //   discount_rate * estimate_hours = log(metric_value / (metric_value - 1)),
165   //   estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate.
166   double estimate_hours =
167       std::log(metric_value / (metric_value - 1)) / discount_rate_per_hour;
168   return base::ClampToRange(estimate_hours, min_hours, max_hours);
169 }
170 
171 // The inverse of GetEstimateHoursBetweenEvents().
GetMetricValueForEstimateHoursBetweenEvents(double estimate_hours,double discount_rate_per_hour,double min_hours,double max_hours)172 double GetMetricValueForEstimateHoursBetweenEvents(
173     double estimate_hours,
174     double discount_rate_per_hour,
175     double min_hours,
176     double max_hours) {
177   estimate_hours = base::ClampToRange(estimate_hours, min_hours, max_hours);
178   // Return |metric_value| such that GetEstimateHoursBetweenEvents for
179   // |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
180   //   metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
181   // i.e.
182   //   metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1,
183   //   metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
184   return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours));
185 }
186 
187 }  // namespace
188 
UserClassifier(PrefService * pref_service,base::Clock * clock)189 UserClassifier::UserClassifier(PrefService* pref_service, base::Clock* clock)
190     : pref_service_(pref_service),
191       clock_(clock),
192       discount_rate_per_hour_(GetDiscountRatePerHour()),
193       min_hours_(GetMinHours()),
194       max_hours_(GetMaxHours()),
195       active_consumer_clicks_at_least_once_per_hours_(
196           variations::GetVariationParamByFeatureAsDouble(
197               kArticleSuggestionsFeature,
198               kActiveConsumerClicksAtLeastOncePerHoursParam,
199               kActiveConsumerClicksAtLeastOncePerHours)),
200       rare_user_opens_ntp_at_most_once_per_hours_(
201           variations::GetVariationParamByFeatureAsDouble(
202               kArticleSuggestionsFeature,
203               kRareUserOpensNTPAtMostOncePerHoursParam,
204               kRareUserOpensNTPAtMostOncePerHours)) {
205   // The pref_service_ can be null in tests.
206   if (!pref_service_) {
207     return;
208   }
209 
210   // TODO(jkrcal): Store the current discount rate per hour into prefs. If it
211   // differs from the previous value, rescale the metric values so that the
212   // expectation does not change abruptly!
213 
214   // Initialize the prefs storing the last time: the counter has just started!
215   for (const Metric metric : kMetrics) {
216     if (!HasLastTime(metric)) {
217       SetLastTimeToNow(metric);
218     }
219   }
220 }
221 
222 UserClassifier::~UserClassifier() = default;
223 
224 // static
RegisterProfilePrefs(PrefRegistrySimple * registry)225 void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
226   double discount_rate = GetDiscountRatePerHour();
227   double min_hours = GetMinHours();
228   double max_hours = GetMaxHours();
229 
230   for (Metric metric : kMetrics) {
231     double default_metric_value = GetMetricValueForEstimateHoursBetweenEvents(
232         GetInitialHoursBetweenEvents(metric), discount_rate, min_hours,
233         max_hours);
234     registry->RegisterDoublePref(kMetricKeys[static_cast<int>(metric)],
235                                  default_metric_value);
236     registry->RegisterInt64Pref(kLastTimeKeys[static_cast<int>(metric)], 0);
237   }
238 }
239 
OnEvent(Metric metric)240 void UserClassifier::OnEvent(Metric metric) {
241   DCHECK_NE(metric, Metric::COUNT);
242   double metric_value = UpdateMetricOnEvent(metric);
243 
244   double avg = GetEstimateHoursBetweenEvents(
245       metric_value, discount_rate_per_hour_, min_hours_, max_hours_);
246   // We use kMaxHours as the max value below as the maximum value for the
247   // histograms must be constant.
248   switch (metric) {
249     case Metric::NTP_OPENED:
250       UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
251                                   kMaxHours, 50);
252       break;
253     case Metric::SUGGESTIONS_SHOWN:
254       UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg,
255                                   1, kMaxHours, 50);
256       break;
257     case Metric::SUGGESTIONS_USED:
258       UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg,
259                                   1, kMaxHours, 50);
260       break;
261     case Metric::COUNT:
262       NOTREACHED();
263       break;
264   }
265 }
266 
GetEstimatedAvgTime(Metric metric) const267 double UserClassifier::GetEstimatedAvgTime(Metric metric) const {
268   DCHECK_NE(metric, Metric::COUNT);
269   double metric_value = GetUpToDateMetricValue(metric);
270   return GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_,
271                                        min_hours_, max_hours_);
272 }
273 
GetUserClass() const274 UserClassifier::UserClass UserClassifier::GetUserClass() const {
275   // The pref_service_ can be null in tests.
276   if (!pref_service_) {
277     return UserClass::ACTIVE_NTP_USER;
278   }
279 
280   if (GetEstimatedAvgTime(Metric::NTP_OPENED) >=
281       rare_user_opens_ntp_at_most_once_per_hours_) {
282     return UserClass::RARE_NTP_USER;
283   }
284 
285   if (GetEstimatedAvgTime(Metric::SUGGESTIONS_USED) <=
286       active_consumer_clicks_at_least_once_per_hours_) {
287     return UserClass::ACTIVE_SUGGESTIONS_CONSUMER;
288   }
289 
290   return UserClass::ACTIVE_NTP_USER;
291 }
292 
GetUserClassDescriptionForDebugging() const293 std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
294   switch (GetUserClass()) {
295     case UserClass::RARE_NTP_USER:
296       return "Rare user of the NTP";
297     case UserClass::ACTIVE_NTP_USER:
298       return "Active user of the NTP";
299     case UserClass::ACTIVE_SUGGESTIONS_CONSUMER:
300       return "Active consumer of NTP suggestions";
301   }
302   NOTREACHED();
303   return std::string();
304 }
305 
ClearClassificationForDebugging()306 void UserClassifier::ClearClassificationForDebugging() {
307   // The pref_service_ can be null in tests.
308   if (!pref_service_) {
309     return;
310   }
311 
312   for (const Metric& metric : kMetrics) {
313     ClearMetricValue(metric);
314     SetLastTimeToNow(metric);
315   }
316 }
317 
UpdateMetricOnEvent(Metric metric)318 double UserClassifier::UpdateMetricOnEvent(Metric metric) {
319   // The pref_service_ can be null in tests.
320   if (!pref_service_) {
321     return 0;
322   }
323 
324   double hours_since_last_time =
325       std::min(max_hours_, GetHoursSinceLastTime(metric));
326   // Ignore events within the same "browsing session".
327   if (hours_since_last_time < min_hours_) {
328     return GetUpToDateMetricValue(metric);
329   }
330 
331   SetLastTimeToNow(metric);
332 
333   double metric_value = GetMetricValue(metric);
334   // Add 1 to the discounted metric as the event has happened right now.
335   double new_metric_value =
336       1 + DiscountMetric(metric_value, hours_since_last_time,
337                          discount_rate_per_hour_);
338   SetMetricValue(metric, new_metric_value);
339   return new_metric_value;
340 }
341 
GetUpToDateMetricValue(Metric metric) const342 double UserClassifier::GetUpToDateMetricValue(Metric metric) const {
343   // The pref_service_ can be null in tests.
344   if (!pref_service_) {
345     return 0;
346   }
347 
348   double hours_since_last_time =
349       std::min(max_hours_, GetHoursSinceLastTime(metric));
350 
351   double metric_value = GetMetricValue(metric);
352   return DiscountMetric(metric_value, hours_since_last_time,
353                         discount_rate_per_hour_);
354 }
355 
GetHoursSinceLastTime(Metric metric) const356 double UserClassifier::GetHoursSinceLastTime(Metric metric) const {
357   if (!HasLastTime(metric)) {
358     return 0;
359   }
360 
361   const base::TimeDelta since_last_time =
362       clock_->Now() - DeserializeTime(pref_service_->GetInt64(
363                           kLastTimeKeys[static_cast<int>(metric)]));
364   return since_last_time / base::TimeDelta::FromHours(1);
365 }
366 
HasLastTime(Metric metric) const367 bool UserClassifier::HasLastTime(Metric metric) const {
368   return pref_service_->HasPrefPath(kLastTimeKeys[static_cast<int>(metric)]);
369 }
370 
SetLastTimeToNow(Metric metric)371 void UserClassifier::SetLastTimeToNow(Metric metric) {
372   pref_service_->SetInt64(kLastTimeKeys[static_cast<int>(metric)],
373                           SerializeTime(clock_->Now()));
374 }
375 
GetMetricValue(Metric metric) const376 double UserClassifier::GetMetricValue(Metric metric) const {
377   return pref_service_->GetDouble(kMetricKeys[static_cast<int>(metric)]);
378 }
379 
SetMetricValue(Metric metric,double metric_value)380 void UserClassifier::SetMetricValue(Metric metric, double metric_value) {
381   pref_service_->SetDouble(kMetricKeys[static_cast<int>(metric)], metric_value);
382 }
383 
ClearMetricValue(Metric metric)384 void UserClassifier::ClearMetricValue(Metric metric) {
385   pref_service_->ClearPref(kMetricKeys[static_cast<int>(metric)]);
386 }
387 
388 }  // namespace ntp_snippets
389