1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/ntp_snippets/category_rankers/click_based_category_ranker.h"
6 
7 #include <algorithm>
8 #include <string>
9 #include <utility>
10 
11 #include "base/strings/string_number_conversions.h"
12 #include "base/strings/string_util.h"
13 #include "base/values.h"
14 #include "components/ntp_snippets/category_rankers/constant_category_ranker.h"
15 #include "components/ntp_snippets/content_suggestions_metrics.h"
16 #include "components/ntp_snippets/pref_names.h"
17 #include "components/ntp_snippets/time_serialization.h"
18 #include "components/prefs/pref_registry_simple.h"
19 #include "components/prefs/pref_service.h"
20 
21 namespace ntp_snippets {
22 
23 namespace {
24 
25 // In order to increase stability and predictability of the order, an extra
26 // level of "confidence" is required before moving a category upwards. In other
27 // words, the category is moved not when it reaches the previous one, but rather
28 // when it leads by some amount. We refer to this required extra "confidence" as
29 // a passing margin. Each position has its own passing margin. The category is
30 // moved upwards (i.e. passes another category) when it has at least passing
31 // margin of the previous category position more clicks.
32 const int kPassingMargin = 5;
33 
34 // The first categories get more attention and, therefore, here more stability
35 // is needed. The passing margin of such categories is increased and they are
36 // referred to as top categories (with extra margin). Only category position
37 // defines whether a category is top, but not its content.
38 const int kNumTopCategoriesWithExtraMargin = 3;
39 
40 // The increase of passing margin for each top category compared to the next
41 // category (e.g. the first top category has passing margin larger by this value
42 // than the second top category, the last top category has it larger by this
43 // value than the first non-top category).
44 const int kExtraPassingMargin = 2;
45 
46 // The ranker must "forget" history with time, so that changes in the user
47 // behavior are reflected by the order in reasonable time. This is done using
48 // click count decay with time. However, if there is not enough data, there is
49 // no need in "forgetting" it. This value defines how many total clicks (across
50 // categories) are considered enough to decay.
51 const int kMinNumClicksToDecay = 30;
52 
53 // Time between two consecutive decays (assuming enough clicks).
54 constexpr base::TimeDelta kTimeBetweenDecays =
55     base::TimeDelta::FromHours(24);  // 24 hours = 1 day
56 
57 // Decay factor as a fraction. The current value approximates the seventh root
58 // of 0.5. This yields a 50% decay per seven decays. Seven weak decays are used
59 // instead of one 50% decay in order to decrease difference of click weight in
60 // time.
61 const int kDecayFactorNumerator = 91;
62 const int kDecayFactorDenominator = 100;  // pow(0.91, 7) = 0.517
63 
64 // Number of positions by which a dismissed category is downgraded.
65 const int kDismissedCategoryPenalty = 1;
66 
67 const char kCategoryIdKey[] = "category";
68 const char kClicksKey[] = "clicks";
69 const char kLastDismissedKey[] = "last_dismissed";
70 
71 }  // namespace
72 
ClickBasedCategoryRanker(PrefService * pref_service,base::Clock * clock)73 ClickBasedCategoryRanker::ClickBasedCategoryRanker(PrefService* pref_service,
74                                                    base::Clock* clock)
75     : pref_service_(pref_service), clock_(clock) {
76   if (!ReadOrderFromPrefs(&ordered_categories_)) {
77     // TODO(crbug.com/676273): Handle adding new hardcoded KnownCategories to
78     // existing order from prefs. Currently such new category is completely
79     // ignored and may be never shown.
80     RestoreDefaultOrder();
81   }
82 
83   if (ReadLastDecayTimeFromPrefs() == DeserializeTime(0)) {
84     StoreLastDecayTimeToPrefs(clock_->Now());
85   }
86 }
87 
88 ClickBasedCategoryRanker::~ClickBasedCategoryRanker() = default;
89 
Compare(Category left,Category right) const90 bool ClickBasedCategoryRanker::Compare(Category left, Category right) const {
91   if (!ContainsCategory(left)) {
92     LOG(DFATAL) << "The category with ID " << left.id()
93                 << " has not been added using AppendCategoryIfNecessary.";
94   }
95   if (!ContainsCategory(right)) {
96     LOG(DFATAL) << "The category with ID " << right.id()
97                 << " has not been added using AppendCategoryIfNecessary.";
98   }
99   if (left == right) {
100     return false;
101   }
102   for (const RankedCategory& ranked_category : ordered_categories_) {
103     if (ranked_category.category == left) {
104       return true;
105     }
106     if (ranked_category.category == right) {
107       return false;
108     }
109   }
110   // This fallback is provided only to satisfy "Compare" contract if by mistake
111   // categories are not added using AppendCategoryIfNecessary. One should not
112   // rely on this, instead the order must be defined explicitly using
113   // AppendCategoryIfNecessary.
114   return left.id() < right.id();
115 }
116 
ClearHistory(base::Time begin,base::Time end)117 void ClickBasedCategoryRanker::ClearHistory(base::Time begin, base::Time end) {
118   // Ignore all partial removals and react only to "entire" history removal.
119   bool is_entire_history = (begin == base::Time() && end == base::Time::Max());
120   if (!is_entire_history) {
121     return;
122   }
123 
124   StoreLastDecayTimeToPrefs(DeserializeTime(0));
125 
126   // The categories added through |AppendCategoryIfNecessary| cannot be
127   // completely removed, since no one is required to reregister them. Instead
128   // they are preserved in the default order (sorted by id).
129   std::vector<RankedCategory> old_categories = ordered_categories_;
130   RestoreDefaultOrder();
131 
132   std::vector<Category> added_categories;
133   for (const RankedCategory& old_category : old_categories) {
134     auto it =
135         std::find_if(ordered_categories_.begin(), ordered_categories_.end(),
136                      [old_category](const RankedCategory& other) {
137                        return other.category == old_category.category;
138                      });
139     if (it == ordered_categories_.end()) {
140       added_categories.push_back(old_category.category);
141     }
142   }
143 
144   // Sort added categories by id to make their order history independent.
145   std::sort(added_categories.begin(), added_categories.end(),
146             Category::CompareByID());
147 
148   for (Category added_category : added_categories) {
149     ordered_categories_.push_back(RankedCategory(
150         added_category, /*clicks=*/0, /*last_dismissed=*/base::Time()));
151   }
152 
153   StoreOrderToPrefs(ordered_categories_);
154 }
155 
AppendCategoryIfNecessary(Category category)156 void ClickBasedCategoryRanker::AppendCategoryIfNecessary(Category category) {
157   if (!ContainsCategory(category)) {
158     ordered_categories_.push_back(RankedCategory(
159         category, /*clicks=*/0, /*last_dismissed=*/base::Time()));
160     StoreOrderToPrefs(ordered_categories_);
161   }
162 }
163 
InsertCategoryBeforeIfNecessary(Category category_to_insert,Category anchor)164 void ClickBasedCategoryRanker::InsertCategoryBeforeIfNecessary(
165     Category category_to_insert,
166     Category anchor) {
167   InsertCategoryRelativeToIfNecessary(category_to_insert, anchor,
168                                       /*after=*/false);
169 }
170 
InsertCategoryAfterIfNecessary(Category category_to_insert,Category anchor)171 void ClickBasedCategoryRanker::InsertCategoryAfterIfNecessary(
172     Category category_to_insert,
173     Category anchor) {
174   InsertCategoryRelativeToIfNecessary(category_to_insert, anchor,
175                                       /*after=*/true);
176 }
177 
178 std::vector<CategoryRanker::DebugDataItem>
GetDebugData()179 ClickBasedCategoryRanker::GetDebugData() {
180   std::vector<CategoryRanker::DebugDataItem> result;
181   result.push_back(
182       CategoryRanker::DebugDataItem("Type", "ClickBasedCategoryRanker"));
183 
184   std::vector<std::string> category_strings;
185   for (const auto& ranked_category : ordered_categories_) {
186     category_strings.push_back(base::ReplaceStringPlaceholders(
187         "($1; $2)",
188         {base::NumberToString(ranked_category.category.id()),
189          base::NumberToString(ranked_category.clicks)},
190         /*offsets=*/nullptr));
191   }
192   result.push_back(
193       CategoryRanker::DebugDataItem("Current order (with click counts)",
194                                     base::JoinString(category_strings, ", ")));
195 
196   return result;
197 }
198 
OnSuggestionOpened(Category category)199 void ClickBasedCategoryRanker::OnSuggestionOpened(Category category) {
200   if (!ContainsCategory(category)) {
201     LOG(DFATAL) << "The category with ID " << category.id()
202                 << " has not been added using AppendCategoryIfNecessary.";
203     return;
204   }
205 
206   DecayClicksIfNeeded();
207 
208   auto current = FindCategory(category);
209   DCHECK_GE(current->clicks, 0);
210   // The overflow is ignored. It is unlikely to happen, because of click count
211   // decay.
212   current->clicks++;
213 
214   // Move the category up if appropriate.
215   if (current != ordered_categories_.begin()) {
216     auto previous = current - 1;
217     const int passing_margin = GetPositionPassingMargin(previous);
218     if (current->clicks >= previous->clicks + passing_margin) {
219       const int new_index = previous - ordered_categories_.begin();
220       ntp_snippets::metrics::OnCategoryMovedUp(new_index);
221       // It is intended to move only by one position per click in order to avoid
222       // dramatic changes, which could confuse the user.
223       std::swap(*current, *previous);
224     }
225   }
226 
227   StoreOrderToPrefs(ordered_categories_);
228 }
229 
OnCategoryDismissed(Category category)230 void ClickBasedCategoryRanker::OnCategoryDismissed(Category category) {
231   if (!ContainsCategory(category)) {
232     LOG(DFATAL) << "The category with ID " << category.id()
233                 << " has not been added using AppendCategoryIfNecessary.";
234     return;
235   }
236 
237   const int penalty = GetDismissedCategoryPenalty();
238   if (penalty != 0) {  // Dismissed category penalty is turned on?
239     auto current = FindCategory(category);
240     for (int downgrade = 0; downgrade < penalty; ++downgrade) {
241       auto next = current + 1;
242       if (next == ordered_categories_.end()) {
243         break;
244       }
245       std::swap(*current, *next);
246       current = next;
247     }
248 
249     DCHECK(current != ordered_categories_.begin());
250     auto previous = current - 1;
251     int new_clicks = std::max(previous->clicks - GetPassingMargin(), 0);
252     // The previous category may have more clicks (but not enough to pass the
253     // margin, this is possible when penalty >= 2), therefore, we ensure that
254     // for this category we don't increase clicks.
255     current->clicks = std::min(current->clicks, new_clicks);
256   }
257   FindCategory(category)->last_dismissed = clock_->Now();
258   StoreOrderToPrefs(ordered_categories_);
259 }
260 
GetLastDecayTime() const261 base::Time ClickBasedCategoryRanker::GetLastDecayTime() const {
262   return ReadLastDecayTimeFromPrefs();
263 }
264 
265 // static
RegisterProfilePrefs(PrefRegistrySimple * registry)266 void ClickBasedCategoryRanker::RegisterProfilePrefs(
267     PrefRegistrySimple* registry) {
268   registry->RegisterListPref(prefs::kClickBasedCategoryRankerOrderWithClicks);
269   registry->RegisterInt64Pref(prefs::kClickBasedCategoryRankerLastDecayTime,
270                               /*default_value=*/0);
271 }
272 
273 // static
GetPassingMargin()274 int ClickBasedCategoryRanker::GetPassingMargin() {
275   return kPassingMargin;
276 }
277 
278 // static
GetNumTopCategoriesWithExtraMargin()279 int ClickBasedCategoryRanker::GetNumTopCategoriesWithExtraMargin() {
280   return kNumTopCategoriesWithExtraMargin;
281 }
282 
283 // static
GetDismissedCategoryPenalty()284 int ClickBasedCategoryRanker::GetDismissedCategoryPenalty() {
285   return kDismissedCategoryPenalty;
286 }
287 
RankedCategory(Category category,int clicks,const base::Time & last_dismissed)288 ClickBasedCategoryRanker::RankedCategory::RankedCategory(
289     Category category,
290     int clicks,
291     const base::Time& last_dismissed)
292     : category(category), clicks(clicks), last_dismissed(last_dismissed) {}
293 
294 // Returns passing margin for a given position taking into account whether it is
295 // a top category.
GetPositionPassingMargin(std::vector<RankedCategory>::const_iterator category_position) const296 int ClickBasedCategoryRanker::GetPositionPassingMargin(
297     std::vector<RankedCategory>::const_iterator category_position) const {
298   int index = category_position - ordered_categories_.cbegin();
299   int passing_margin_increase = 0;
300   const int num_top_categories_with_extra_margin =
301       GetNumTopCategoriesWithExtraMargin();
302   if (index < num_top_categories_with_extra_margin) {
303     passing_margin_increase =
304         kExtraPassingMargin * (num_top_categories_with_extra_margin - index);
305   }
306   return GetPassingMargin() + passing_margin_increase;
307 }
308 
RestoreDefaultOrder()309 void ClickBasedCategoryRanker::RestoreDefaultOrder() {
310   ordered_categories_.clear();
311 
312   std::vector<KnownCategories> ordered_known_categories =
313       ConstantCategoryRanker::GetKnownCategoriesDefaultOrder();
314 
315   for (KnownCategories known_category : ordered_known_categories) {
316     AppendKnownCategory(known_category);
317   }
318 
319   StoreOrderToPrefs(ordered_categories_);
320 }
321 
AppendKnownCategory(KnownCategories known_category)322 void ClickBasedCategoryRanker::AppendKnownCategory(
323     KnownCategories known_category) {
324   Category category = Category::FromKnownCategory(known_category);
325   DCHECK(!ContainsCategory(category));
326   ordered_categories_.push_back(
327       RankedCategory(category, /*clicks=*/0, /*last_dismissed=*/base::Time()));
328 }
329 
330 namespace {
331 
ParseLastDismissedDate(const base::DictionaryValue & value)332 base::Time ParseLastDismissedDate(const base::DictionaryValue& value) {
333   // We don't expect the last-dismissed value to be present in all cases (we
334   // added this after the fact).
335   std::string serialized_value;
336   int64_t parsed_value;
337   if (value.GetString(kLastDismissedKey, &serialized_value) &&
338       base::StringToInt64(serialized_value, &parsed_value)) {
339     return DeserializeTime(parsed_value);
340   }
341   return base::Time();
342 }
343 
344 }  // namespace
345 
ReadOrderFromPrefs(std::vector<RankedCategory> * result_categories) const346 bool ClickBasedCategoryRanker::ReadOrderFromPrefs(
347     std::vector<RankedCategory>* result_categories) const {
348   result_categories->clear();
349   const base::ListValue* list =
350       pref_service_->GetList(prefs::kClickBasedCategoryRankerOrderWithClicks);
351   if (!list || list->GetSize() == 0) {
352     return false;
353   }
354 
355   for (const base::Value& value : *list) {
356     const base::DictionaryValue* dictionary;
357     if (!value.GetAsDictionary(&dictionary)) {
358       LOG(DFATAL) << "Failed to parse category data from prefs param "
359                   << prefs::kClickBasedCategoryRankerOrderWithClicks
360                   << " into dictionary.";
361       return false;
362     }
363     int category_id, clicks;
364     if (!dictionary->GetInteger(kCategoryIdKey, &category_id)) {
365       LOG(DFATAL) << "Dictionary does not have '" << kCategoryIdKey << "' key.";
366       return false;
367     }
368     if (!dictionary->GetInteger(kClicksKey, &clicks)) {
369       LOG(DFATAL) << "Dictionary does not have '" << kClicksKey << "' key.";
370       return false;
371     }
372     base::Time last_dismissed = ParseLastDismissedDate(*dictionary);
373     Category category = Category::FromIDValue(category_id);
374     result_categories->push_back(
375         RankedCategory(category, clicks, last_dismissed));
376   }
377   return true;
378 }
379 
StoreOrderToPrefs(const std::vector<RankedCategory> & ordered_categories)380 void ClickBasedCategoryRanker::StoreOrderToPrefs(
381     const std::vector<RankedCategory>& ordered_categories) {
382   base::ListValue list;
383   for (const RankedCategory& category : ordered_categories) {
384     auto dictionary = std::make_unique<base::DictionaryValue>();
385     dictionary->SetInteger(kCategoryIdKey, category.category.id());
386     dictionary->SetInteger(kClicksKey, category.clicks);
387     dictionary->SetString(
388         kLastDismissedKey,
389         base::NumberToString(SerializeTime(category.last_dismissed)));
390     list.Append(std::move(dictionary));
391   }
392   pref_service_->Set(prefs::kClickBasedCategoryRankerOrderWithClicks, list);
393 }
394 
395 std::vector<ClickBasedCategoryRanker::RankedCategory>::iterator
FindCategory(Category category)396 ClickBasedCategoryRanker::FindCategory(Category category) {
397   return std::find_if(ordered_categories_.begin(), ordered_categories_.end(),
398                       [category](const RankedCategory& ranked_category) {
399                         return category == ranked_category.category;
400                       });
401 }
402 
ContainsCategory(Category category) const403 bool ClickBasedCategoryRanker::ContainsCategory(Category category) const {
404   for (const auto& ranked_category : ordered_categories_) {
405     if (category == ranked_category.category) {
406       return true;
407     }
408   }
409   return false;
410 }
411 
InsertCategoryRelativeToIfNecessary(Category category_to_insert,Category anchor,bool after)412 void ClickBasedCategoryRanker::InsertCategoryRelativeToIfNecessary(
413     Category category_to_insert,
414     Category anchor,
415     bool after) {
416   DCHECK(ContainsCategory(anchor));
417   if (ContainsCategory(category_to_insert)) {
418     return;
419   }
420 
421   auto anchor_it = FindCategory(anchor);
422   ordered_categories_.insert(anchor_it + (after ? 1 : 0),
423                              RankedCategory(category_to_insert,
424                                             /*clicks=*/anchor_it->clicks,
425                                             /*last_dismissed=*/base::Time()));
426   StoreOrderToPrefs(ordered_categories_);
427 }
428 
ReadLastDecayTimeFromPrefs() const429 base::Time ClickBasedCategoryRanker::ReadLastDecayTimeFromPrefs() const {
430   return DeserializeTime(
431       pref_service_->GetInt64(prefs::kClickBasedCategoryRankerLastDecayTime));
432 }
433 
StoreLastDecayTimeToPrefs(base::Time last_decay_time)434 void ClickBasedCategoryRanker::StoreLastDecayTimeToPrefs(
435     base::Time last_decay_time) {
436   pref_service_->SetInt64(prefs::kClickBasedCategoryRankerLastDecayTime,
437                           SerializeTime(last_decay_time));
438 }
439 
IsEnoughClicksToDecay() const440 bool ClickBasedCategoryRanker::IsEnoughClicksToDecay() const {
441   int64_t num_clicks = 0;
442   for (const RankedCategory& ranked_category : ordered_categories_) {
443     num_clicks += ranked_category.clicks;
444   }
445   return num_clicks >= kMinNumClicksToDecay;
446 }
447 
DecayClicksIfNeeded()448 bool ClickBasedCategoryRanker::DecayClicksIfNeeded() {
449   base::Time now = clock_->Now();
450   base::Time last_decay = ReadLastDecayTimeFromPrefs();
451   if (last_decay == base::Time::FromInternalValue(0)) {
452     // No last decay time, start from now.
453     StoreLastDecayTimeToPrefs(clock_->Now());
454     return false;
455   }
456   DCHECK_LE(last_decay, now);
457 
458   int num_pending_decays = (now - last_decay) / kTimeBetweenDecays;
459   int executed_decays = 0;
460   while (executed_decays < num_pending_decays && IsEnoughClicksToDecay()) {
461     for (RankedCategory& ranked_category : ordered_categories_) {
462       DCHECK_GE(ranked_category.clicks, 0);
463       const int64_t old_clicks = static_cast<int64_t>(ranked_category.clicks);
464       ranked_category.clicks =
465           old_clicks * kDecayFactorNumerator / kDecayFactorDenominator;
466     }
467 
468     ++executed_decays;
469   }
470 
471   // No matter how many decays were actually executed, all of them are marked
472   // done. Even if some were ignored due to absense of clicks, they would have
473   // no effect anyway for the same reason.
474   StoreLastDecayTimeToPrefs(last_decay +
475                             num_pending_decays * kTimeBetweenDecays);
476 
477   if (executed_decays > 0) {
478     StoreOrderToPrefs(ordered_categories_);
479     return true;
480   }
481   return false;
482 }
483 
484 }  // namespace ntp_snippets
485