1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/ntp_snippets/category_rankers/click_based_category_ranker.h"
6
7 #include <algorithm>
8 #include <string>
9 #include <utility>
10
11 #include "base/strings/string_number_conversions.h"
12 #include "base/strings/string_util.h"
13 #include "base/values.h"
14 #include "components/ntp_snippets/category_rankers/constant_category_ranker.h"
15 #include "components/ntp_snippets/content_suggestions_metrics.h"
16 #include "components/ntp_snippets/pref_names.h"
17 #include "components/ntp_snippets/time_serialization.h"
18 #include "components/prefs/pref_registry_simple.h"
19 #include "components/prefs/pref_service.h"
20
21 namespace ntp_snippets {
22
23 namespace {
24
25 // In order to increase stability and predictability of the order, an extra
26 // level of "confidence" is required before moving a category upwards. In other
27 // words, the category is moved not when it reaches the previous one, but rather
28 // when it leads by some amount. We refer to this required extra "confidence" as
29 // a passing margin. Each position has its own passing margin. The category is
30 // moved upwards (i.e. passes another category) when it has at least passing
31 // margin of the previous category position more clicks.
32 const int kPassingMargin = 5;
33
34 // The first categories get more attention and, therefore, here more stability
35 // is needed. The passing margin of such categories is increased and they are
36 // referred to as top categories (with extra margin). Only category position
37 // defines whether a category is top, but not its content.
38 const int kNumTopCategoriesWithExtraMargin = 3;
39
40 // The increase of passing margin for each top category compared to the next
41 // category (e.g. the first top category has passing margin larger by this value
42 // than the second top category, the last top category has it larger by this
43 // value than the first non-top category).
44 const int kExtraPassingMargin = 2;
45
46 // The ranker must "forget" history with time, so that changes in the user
47 // behavior are reflected by the order in reasonable time. This is done using
48 // click count decay with time. However, if there is not enough data, there is
49 // no need in "forgetting" it. This value defines how many total clicks (across
50 // categories) are considered enough to decay.
51 const int kMinNumClicksToDecay = 30;
52
53 // Time between two consecutive decays (assuming enough clicks).
54 constexpr base::TimeDelta kTimeBetweenDecays =
55 base::TimeDelta::FromHours(24); // 24 hours = 1 day
56
57 // Decay factor as a fraction. The current value approximates the seventh root
58 // of 0.5. This yields a 50% decay per seven decays. Seven weak decays are used
59 // instead of one 50% decay in order to decrease difference of click weight in
60 // time.
61 const int kDecayFactorNumerator = 91;
62 const int kDecayFactorDenominator = 100; // pow(0.91, 7) = 0.517
63
64 // Number of positions by which a dismissed category is downgraded.
65 const int kDismissedCategoryPenalty = 1;
66
67 const char kCategoryIdKey[] = "category";
68 const char kClicksKey[] = "clicks";
69 const char kLastDismissedKey[] = "last_dismissed";
70
71 } // namespace
72
ClickBasedCategoryRanker(PrefService * pref_service,base::Clock * clock)73 ClickBasedCategoryRanker::ClickBasedCategoryRanker(PrefService* pref_service,
74 base::Clock* clock)
75 : pref_service_(pref_service), clock_(clock) {
76 if (!ReadOrderFromPrefs(&ordered_categories_)) {
77 // TODO(crbug.com/676273): Handle adding new hardcoded KnownCategories to
78 // existing order from prefs. Currently such new category is completely
79 // ignored and may be never shown.
80 RestoreDefaultOrder();
81 }
82
83 if (ReadLastDecayTimeFromPrefs() == DeserializeTime(0)) {
84 StoreLastDecayTimeToPrefs(clock_->Now());
85 }
86 }
87
88 ClickBasedCategoryRanker::~ClickBasedCategoryRanker() = default;
89
Compare(Category left,Category right) const90 bool ClickBasedCategoryRanker::Compare(Category left, Category right) const {
91 if (!ContainsCategory(left)) {
92 LOG(DFATAL) << "The category with ID " << left.id()
93 << " has not been added using AppendCategoryIfNecessary.";
94 }
95 if (!ContainsCategory(right)) {
96 LOG(DFATAL) << "The category with ID " << right.id()
97 << " has not been added using AppendCategoryIfNecessary.";
98 }
99 if (left == right) {
100 return false;
101 }
102 for (const RankedCategory& ranked_category : ordered_categories_) {
103 if (ranked_category.category == left) {
104 return true;
105 }
106 if (ranked_category.category == right) {
107 return false;
108 }
109 }
110 // This fallback is provided only to satisfy "Compare" contract if by mistake
111 // categories are not added using AppendCategoryIfNecessary. One should not
112 // rely on this, instead the order must be defined explicitly using
113 // AppendCategoryIfNecessary.
114 return left.id() < right.id();
115 }
116
ClearHistory(base::Time begin,base::Time end)117 void ClickBasedCategoryRanker::ClearHistory(base::Time begin, base::Time end) {
118 // Ignore all partial removals and react only to "entire" history removal.
119 bool is_entire_history = (begin == base::Time() && end == base::Time::Max());
120 if (!is_entire_history) {
121 return;
122 }
123
124 StoreLastDecayTimeToPrefs(DeserializeTime(0));
125
126 // The categories added through |AppendCategoryIfNecessary| cannot be
127 // completely removed, since no one is required to reregister them. Instead
128 // they are preserved in the default order (sorted by id).
129 std::vector<RankedCategory> old_categories = ordered_categories_;
130 RestoreDefaultOrder();
131
132 std::vector<Category> added_categories;
133 for (const RankedCategory& old_category : old_categories) {
134 auto it =
135 std::find_if(ordered_categories_.begin(), ordered_categories_.end(),
136 [old_category](const RankedCategory& other) {
137 return other.category == old_category.category;
138 });
139 if (it == ordered_categories_.end()) {
140 added_categories.push_back(old_category.category);
141 }
142 }
143
144 // Sort added categories by id to make their order history independent.
145 std::sort(added_categories.begin(), added_categories.end(),
146 Category::CompareByID());
147
148 for (Category added_category : added_categories) {
149 ordered_categories_.push_back(RankedCategory(
150 added_category, /*clicks=*/0, /*last_dismissed=*/base::Time()));
151 }
152
153 StoreOrderToPrefs(ordered_categories_);
154 }
155
AppendCategoryIfNecessary(Category category)156 void ClickBasedCategoryRanker::AppendCategoryIfNecessary(Category category) {
157 if (!ContainsCategory(category)) {
158 ordered_categories_.push_back(RankedCategory(
159 category, /*clicks=*/0, /*last_dismissed=*/base::Time()));
160 StoreOrderToPrefs(ordered_categories_);
161 }
162 }
163
InsertCategoryBeforeIfNecessary(Category category_to_insert,Category anchor)164 void ClickBasedCategoryRanker::InsertCategoryBeforeIfNecessary(
165 Category category_to_insert,
166 Category anchor) {
167 InsertCategoryRelativeToIfNecessary(category_to_insert, anchor,
168 /*after=*/false);
169 }
170
InsertCategoryAfterIfNecessary(Category category_to_insert,Category anchor)171 void ClickBasedCategoryRanker::InsertCategoryAfterIfNecessary(
172 Category category_to_insert,
173 Category anchor) {
174 InsertCategoryRelativeToIfNecessary(category_to_insert, anchor,
175 /*after=*/true);
176 }
177
178 std::vector<CategoryRanker::DebugDataItem>
GetDebugData()179 ClickBasedCategoryRanker::GetDebugData() {
180 std::vector<CategoryRanker::DebugDataItem> result;
181 result.push_back(
182 CategoryRanker::DebugDataItem("Type", "ClickBasedCategoryRanker"));
183
184 std::vector<std::string> category_strings;
185 for (const auto& ranked_category : ordered_categories_) {
186 category_strings.push_back(base::ReplaceStringPlaceholders(
187 "($1; $2)",
188 {base::NumberToString(ranked_category.category.id()),
189 base::NumberToString(ranked_category.clicks)},
190 /*offsets=*/nullptr));
191 }
192 result.push_back(
193 CategoryRanker::DebugDataItem("Current order (with click counts)",
194 base::JoinString(category_strings, ", ")));
195
196 return result;
197 }
198
OnSuggestionOpened(Category category)199 void ClickBasedCategoryRanker::OnSuggestionOpened(Category category) {
200 if (!ContainsCategory(category)) {
201 LOG(DFATAL) << "The category with ID " << category.id()
202 << " has not been added using AppendCategoryIfNecessary.";
203 return;
204 }
205
206 DecayClicksIfNeeded();
207
208 auto current = FindCategory(category);
209 DCHECK_GE(current->clicks, 0);
210 // The overflow is ignored. It is unlikely to happen, because of click count
211 // decay.
212 current->clicks++;
213
214 // Move the category up if appropriate.
215 if (current != ordered_categories_.begin()) {
216 auto previous = current - 1;
217 const int passing_margin = GetPositionPassingMargin(previous);
218 if (current->clicks >= previous->clicks + passing_margin) {
219 const int new_index = previous - ordered_categories_.begin();
220 ntp_snippets::metrics::OnCategoryMovedUp(new_index);
221 // It is intended to move only by one position per click in order to avoid
222 // dramatic changes, which could confuse the user.
223 std::swap(*current, *previous);
224 }
225 }
226
227 StoreOrderToPrefs(ordered_categories_);
228 }
229
OnCategoryDismissed(Category category)230 void ClickBasedCategoryRanker::OnCategoryDismissed(Category category) {
231 if (!ContainsCategory(category)) {
232 LOG(DFATAL) << "The category with ID " << category.id()
233 << " has not been added using AppendCategoryIfNecessary.";
234 return;
235 }
236
237 const int penalty = GetDismissedCategoryPenalty();
238 if (penalty != 0) { // Dismissed category penalty is turned on?
239 auto current = FindCategory(category);
240 for (int downgrade = 0; downgrade < penalty; ++downgrade) {
241 auto next = current + 1;
242 if (next == ordered_categories_.end()) {
243 break;
244 }
245 std::swap(*current, *next);
246 current = next;
247 }
248
249 DCHECK(current != ordered_categories_.begin());
250 auto previous = current - 1;
251 int new_clicks = std::max(previous->clicks - GetPassingMargin(), 0);
252 // The previous category may have more clicks (but not enough to pass the
253 // margin, this is possible when penalty >= 2), therefore, we ensure that
254 // for this category we don't increase clicks.
255 current->clicks = std::min(current->clicks, new_clicks);
256 }
257 FindCategory(category)->last_dismissed = clock_->Now();
258 StoreOrderToPrefs(ordered_categories_);
259 }
260
GetLastDecayTime() const261 base::Time ClickBasedCategoryRanker::GetLastDecayTime() const {
262 return ReadLastDecayTimeFromPrefs();
263 }
264
265 // static
RegisterProfilePrefs(PrefRegistrySimple * registry)266 void ClickBasedCategoryRanker::RegisterProfilePrefs(
267 PrefRegistrySimple* registry) {
268 registry->RegisterListPref(prefs::kClickBasedCategoryRankerOrderWithClicks);
269 registry->RegisterInt64Pref(prefs::kClickBasedCategoryRankerLastDecayTime,
270 /*default_value=*/0);
271 }
272
273 // static
GetPassingMargin()274 int ClickBasedCategoryRanker::GetPassingMargin() {
275 return kPassingMargin;
276 }
277
278 // static
GetNumTopCategoriesWithExtraMargin()279 int ClickBasedCategoryRanker::GetNumTopCategoriesWithExtraMargin() {
280 return kNumTopCategoriesWithExtraMargin;
281 }
282
283 // static
GetDismissedCategoryPenalty()284 int ClickBasedCategoryRanker::GetDismissedCategoryPenalty() {
285 return kDismissedCategoryPenalty;
286 }
287
RankedCategory(Category category,int clicks,const base::Time & last_dismissed)288 ClickBasedCategoryRanker::RankedCategory::RankedCategory(
289 Category category,
290 int clicks,
291 const base::Time& last_dismissed)
292 : category(category), clicks(clicks), last_dismissed(last_dismissed) {}
293
294 // Returns passing margin for a given position taking into account whether it is
295 // a top category.
GetPositionPassingMargin(std::vector<RankedCategory>::const_iterator category_position) const296 int ClickBasedCategoryRanker::GetPositionPassingMargin(
297 std::vector<RankedCategory>::const_iterator category_position) const {
298 int index = category_position - ordered_categories_.cbegin();
299 int passing_margin_increase = 0;
300 const int num_top_categories_with_extra_margin =
301 GetNumTopCategoriesWithExtraMargin();
302 if (index < num_top_categories_with_extra_margin) {
303 passing_margin_increase =
304 kExtraPassingMargin * (num_top_categories_with_extra_margin - index);
305 }
306 return GetPassingMargin() + passing_margin_increase;
307 }
308
RestoreDefaultOrder()309 void ClickBasedCategoryRanker::RestoreDefaultOrder() {
310 ordered_categories_.clear();
311
312 std::vector<KnownCategories> ordered_known_categories =
313 ConstantCategoryRanker::GetKnownCategoriesDefaultOrder();
314
315 for (KnownCategories known_category : ordered_known_categories) {
316 AppendKnownCategory(known_category);
317 }
318
319 StoreOrderToPrefs(ordered_categories_);
320 }
321
AppendKnownCategory(KnownCategories known_category)322 void ClickBasedCategoryRanker::AppendKnownCategory(
323 KnownCategories known_category) {
324 Category category = Category::FromKnownCategory(known_category);
325 DCHECK(!ContainsCategory(category));
326 ordered_categories_.push_back(
327 RankedCategory(category, /*clicks=*/0, /*last_dismissed=*/base::Time()));
328 }
329
330 namespace {
331
ParseLastDismissedDate(const base::DictionaryValue & value)332 base::Time ParseLastDismissedDate(const base::DictionaryValue& value) {
333 // We don't expect the last-dismissed value to be present in all cases (we
334 // added this after the fact).
335 std::string serialized_value;
336 int64_t parsed_value;
337 if (value.GetString(kLastDismissedKey, &serialized_value) &&
338 base::StringToInt64(serialized_value, &parsed_value)) {
339 return DeserializeTime(parsed_value);
340 }
341 return base::Time();
342 }
343
344 } // namespace
345
ReadOrderFromPrefs(std::vector<RankedCategory> * result_categories) const346 bool ClickBasedCategoryRanker::ReadOrderFromPrefs(
347 std::vector<RankedCategory>* result_categories) const {
348 result_categories->clear();
349 const base::ListValue* list =
350 pref_service_->GetList(prefs::kClickBasedCategoryRankerOrderWithClicks);
351 if (!list || list->GetSize() == 0) {
352 return false;
353 }
354
355 for (const base::Value& value : *list) {
356 const base::DictionaryValue* dictionary;
357 if (!value.GetAsDictionary(&dictionary)) {
358 LOG(DFATAL) << "Failed to parse category data from prefs param "
359 << prefs::kClickBasedCategoryRankerOrderWithClicks
360 << " into dictionary.";
361 return false;
362 }
363 int category_id, clicks;
364 if (!dictionary->GetInteger(kCategoryIdKey, &category_id)) {
365 LOG(DFATAL) << "Dictionary does not have '" << kCategoryIdKey << "' key.";
366 return false;
367 }
368 if (!dictionary->GetInteger(kClicksKey, &clicks)) {
369 LOG(DFATAL) << "Dictionary does not have '" << kClicksKey << "' key.";
370 return false;
371 }
372 base::Time last_dismissed = ParseLastDismissedDate(*dictionary);
373 Category category = Category::FromIDValue(category_id);
374 result_categories->push_back(
375 RankedCategory(category, clicks, last_dismissed));
376 }
377 return true;
378 }
379
StoreOrderToPrefs(const std::vector<RankedCategory> & ordered_categories)380 void ClickBasedCategoryRanker::StoreOrderToPrefs(
381 const std::vector<RankedCategory>& ordered_categories) {
382 base::ListValue list;
383 for (const RankedCategory& category : ordered_categories) {
384 auto dictionary = std::make_unique<base::DictionaryValue>();
385 dictionary->SetInteger(kCategoryIdKey, category.category.id());
386 dictionary->SetInteger(kClicksKey, category.clicks);
387 dictionary->SetString(
388 kLastDismissedKey,
389 base::NumberToString(SerializeTime(category.last_dismissed)));
390 list.Append(std::move(dictionary));
391 }
392 pref_service_->Set(prefs::kClickBasedCategoryRankerOrderWithClicks, list);
393 }
394
395 std::vector<ClickBasedCategoryRanker::RankedCategory>::iterator
FindCategory(Category category)396 ClickBasedCategoryRanker::FindCategory(Category category) {
397 return std::find_if(ordered_categories_.begin(), ordered_categories_.end(),
398 [category](const RankedCategory& ranked_category) {
399 return category == ranked_category.category;
400 });
401 }
402
ContainsCategory(Category category) const403 bool ClickBasedCategoryRanker::ContainsCategory(Category category) const {
404 for (const auto& ranked_category : ordered_categories_) {
405 if (category == ranked_category.category) {
406 return true;
407 }
408 }
409 return false;
410 }
411
InsertCategoryRelativeToIfNecessary(Category category_to_insert,Category anchor,bool after)412 void ClickBasedCategoryRanker::InsertCategoryRelativeToIfNecessary(
413 Category category_to_insert,
414 Category anchor,
415 bool after) {
416 DCHECK(ContainsCategory(anchor));
417 if (ContainsCategory(category_to_insert)) {
418 return;
419 }
420
421 auto anchor_it = FindCategory(anchor);
422 ordered_categories_.insert(anchor_it + (after ? 1 : 0),
423 RankedCategory(category_to_insert,
424 /*clicks=*/anchor_it->clicks,
425 /*last_dismissed=*/base::Time()));
426 StoreOrderToPrefs(ordered_categories_);
427 }
428
ReadLastDecayTimeFromPrefs() const429 base::Time ClickBasedCategoryRanker::ReadLastDecayTimeFromPrefs() const {
430 return DeserializeTime(
431 pref_service_->GetInt64(prefs::kClickBasedCategoryRankerLastDecayTime));
432 }
433
StoreLastDecayTimeToPrefs(base::Time last_decay_time)434 void ClickBasedCategoryRanker::StoreLastDecayTimeToPrefs(
435 base::Time last_decay_time) {
436 pref_service_->SetInt64(prefs::kClickBasedCategoryRankerLastDecayTime,
437 SerializeTime(last_decay_time));
438 }
439
IsEnoughClicksToDecay() const440 bool ClickBasedCategoryRanker::IsEnoughClicksToDecay() const {
441 int64_t num_clicks = 0;
442 for (const RankedCategory& ranked_category : ordered_categories_) {
443 num_clicks += ranked_category.clicks;
444 }
445 return num_clicks >= kMinNumClicksToDecay;
446 }
447
DecayClicksIfNeeded()448 bool ClickBasedCategoryRanker::DecayClicksIfNeeded() {
449 base::Time now = clock_->Now();
450 base::Time last_decay = ReadLastDecayTimeFromPrefs();
451 if (last_decay == base::Time::FromInternalValue(0)) {
452 // No last decay time, start from now.
453 StoreLastDecayTimeToPrefs(clock_->Now());
454 return false;
455 }
456 DCHECK_LE(last_decay, now);
457
458 int num_pending_decays = (now - last_decay) / kTimeBetweenDecays;
459 int executed_decays = 0;
460 while (executed_decays < num_pending_decays && IsEnoughClicksToDecay()) {
461 for (RankedCategory& ranked_category : ordered_categories_) {
462 DCHECK_GE(ranked_category.clicks, 0);
463 const int64_t old_clicks = static_cast<int64_t>(ranked_category.clicks);
464 ranked_category.clicks =
465 old_clicks * kDecayFactorNumerator / kDecayFactorDenominator;
466 }
467
468 ++executed_decays;
469 }
470
471 // No matter how many decays were actually executed, all of them are marked
472 // done. Even if some were ignored due to absense of clicks, they would have
473 // no effect anyway for the same reason.
474 StoreLastDecayTimeToPrefs(last_decay +
475 num_pending_decays * kTimeBetweenDecays);
476
477 if (executed_decays > 0) {
478 StoreOrderToPrefs(ordered_categories_);
479 return true;
480 }
481 return false;
482 }
483
484 } // namespace ntp_snippets
485