1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4"use strict";
5
6const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
7
8ChromeUtils.defineModuleGetter(
9  this,
10  "PlacesUtils",
11  "resource://gre/modules/PlacesUtils.jsm"
12);
13
14const DEFAULT_TIME_SEGMENTS = [
15  { id: "hour", startTime: 3600, endTime: 0, weightPosition: 1 },
16  { id: "day", startTime: 86400, endTime: 3600, weightPosition: 0.75 },
17  { id: "week", startTime: 604800, endTime: 86400, weightPosition: 0.5 },
18  { id: "weekPlus", startTime: 0, endTime: 604800, weightPosition: 0.25 },
19  { id: "alltime", startTime: 0, endTime: 0, weightPosition: 0.25 },
20];
21
22const DEFAULT_PARAMETER_SETS = {
23  "linear-frequency": {
24    recencyFactor: 0.4,
25    frequencyFactor: 0.5,
26    combinedDomainFactor: 0.5,
27    perfectFrequencyVisits: 10,
28    perfectCombinedDomainScore: 2,
29    multiDomainBoost: 0.1,
30    itemScoreFactor: 0,
31  },
32};
33
34const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000;
35
36function merge(...args) {
37  return Object.assign.apply(this, args);
38}
39
40/**
41 * Provides functionality to personalize content recommendations by calculating
42 * user domain affinity scores. These scores are used to calculate relevance
43 * scores for items/recs/stories that have domain affinities.
44 *
45 * The algorithm works as follows:
46 *
47 * - The recommendation endpoint returns a settings object containing
48 * timeSegments and parametersets.
49 *
50 * - For every time segment we calculate the corresponding domain visit counts,
51 * yielding result objects of the following structure: {"mozilla.org": 12,
52 * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits)
53 *
54 * - These visit counts are transformed to domain affinity scores for all
55 * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8,
56 * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}}
57 * (see UserDomainAffinityProvider#calculateScoresForParameterSets)
58 *
59 * - The parameter sets provide factors for weighting which allows for
60 * flexible targeting. The functionality to calculate final scores can
61 * be seen in UserDomainAffinityProvider#calculateScores
62 *
63 * - The user domain affinity scores are summed up across all time segments
64 * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores
65 *
66 * - An item's domain affinities are matched to the user's domain affinity
67 * scores by calculating an item relevance score
68 * (see UserDomainAffinityProvider#calculateItemRelevanceScore)
69 *
70 * - The item relevance scores are used to sort items (see TopStoriesFeed for
71 * more details)
72 *
73 * - The data structure was chosen to allow for fast cache lookups during
74 * relevance score calculation. While user domain affinities are calculated
75 * infrequently (i.e. only once a day), the item relevance score (potentially)
76 * needs to be calculated every time the feed updates. Therefore allowing cache
77 * lookups of scores[domain][parameterSet] is beneficial
78 */
79this.UserDomainAffinityProvider = class UserDomainAffinityProvider {
80  constructor(
81    timeSegments = DEFAULT_TIME_SEGMENTS,
82    parameterSets = DEFAULT_PARAMETER_SETS,
83    maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS,
84    version,
85    scores
86  ) {
87    this.timeSegments = timeSegments;
88    this.maxHistoryQueryResults = maxHistoryQueryResults;
89    this.version = version;
90    if (scores) {
91      this.parameterSets = parameterSets;
92      this.scores = scores;
93    } else {
94      this.parameterSets = this.prepareParameterSets(parameterSets);
95      this.scores = this.calculateAllUserDomainAffinityScores();
96    }
97  }
98
99  /**
100   * Adds dynamic parameters to the given parameter sets that need to be
101   * computed based on time segments.
102   *
103   * @param ps The parameter sets
104   * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights)
105   */
106  prepareParameterSets(ps) {
107    return (
108      Object.keys(ps)
109        // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...}
110        .map(k => ({
111          [k]: merge(ps[k], {
112            timeSegmentWeights: this.calculateTimeSegmentWeights(
113              ps[k].recencyFactor
114            ),
115          }),
116        }))
117        .reduce((acc, cur) => merge(acc, cur))
118    );
119  }
120
121  /**
122   * Calculates a time segment weight based on the provided recencyFactor.
123   *
124   * @param recencyFactor The recency factor indicating how to weigh recency
125   * @return An object containing time segment weights: {"hour": 0.987, "day": 1}
126   */
127  calculateTimeSegmentWeights(recencyFactor) {
128    return this.timeSegments.reduce(
129      (acc, cur) =>
130        merge(acc, {
131          [cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor),
132        }),
133      {}
134    );
135  }
136
137  /**
138   * Calculates user domain affinity scores based on browsing history and the
139   * available times segments and parameter sets.
140   */
141  calculateAllUserDomainAffinityScores() {
142    return (
143      this.timeSegments
144        // Calculate parameter set specific domain scores for each time segment
145        // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...]
146        .map(ts => this.calculateUserDomainAffinityScores(ts))
147        // Keep format, but reduce to single object, with combined scores across all time segments
148        // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}""
149        .reduce((acc, cur) => this._combineScores(acc, cur))
150    );
151  }
152
153  /**
154   * Calculates the user domain affinity scores for the given time segment.
155   *
156   * @param ts The time segment
157   * @return The parameter specific scores for all domains with visits in
158   * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...}
159   */
160  calculateUserDomainAffinityScores(ts) {
161    // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2}
162    let visits = this.queryVisits(ts);
163
164    return Object.keys(visits).reduce(
165      (acc, d) =>
166        merge(acc, {
167          [d]: this.calculateScoresForParameterSets(ts, visits[d]),
168        }),
169      {}
170    );
171  }
172
173  /**
174   * Calculates the scores for all parameter sets for the given time segment
175   * and domain visit count.
176   *
177   * @param ts The time segment
178   * @param vc The domain visit count in the given time segment
179   * @return The parameter specific scores for the visit count in
180   * this time segment: {"ps1": 12, "ps2": 34}
181   */
182  calculateScoresForParameterSets(ts, vc) {
183    return Object.keys(this.parameterSets).reduce(
184      (acc, ps) =>
185        merge(acc, {
186          [ps]: this.calculateScoreForParameterSet(
187            ts,
188            vc,
189            this.parameterSets[ps]
190          ),
191        }),
192      {}
193    );
194  }
195
196  /**
197   * Calculates the final affinity score in the given time segment for the given parameter set
198   *
199   * @param timeSegment The time segment
200   * @param visitCount The domain visit count in the given time segment
201   * @param parameterSet The parameter set to use for scoring
202   * @return The final score
203   */
204  calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) {
205    return this.calculateScore(
206      visitCount * parameterSet.timeSegmentWeights[timeSegment.id],
207      parameterSet.perfectFrequencyVisits,
208      parameterSet.frequencyFactor
209    );
210  }
211
212  /**
213   * Keeps the same format, but reduces the two objects to a single object, with
214   * combined scores across all time segments  => {a.com":{"ps1":2,"ps2":2},
215   * "b.com":{"ps1":3,"ps2":3}}
216   */
217  _combineScores(a, b) {
218    // Merge both score objects so we get a combined object holding all domains.
219    // This is so we can combine them without missing domains that are in a and not in b and vice versa.
220    const c = merge({}, a, b);
221    return Object.keys(c).reduce(
222      (acc, d) => merge(acc, this._combine(a, b, c, d)),
223      {}
224    );
225  }
226
227  _combine(a, b, c, d) {
228    return (
229      Object.keys(c[d])
230        // Summing up the parameter set specific scores of each domain
231        .map(ps => ({
232          [d]: {
233            [ps]: Math.min(
234              1,
235              ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0)
236            ),
237          },
238        }))
239        // Reducing from an array of objects with a single parameter set to a single object
240        // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}}
241        .reduce((acc, cur) => ({ [d]: merge(acc[d], cur[d]) }))
242    );
243  }
244
245  /**
246   * Calculates a value on the curve described by the provided parameters. The curve we're using is
247   * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp
248   *
249   * @param {number} score A value between 0 and maxScore, representing x.
250   * @param {number} maxScore Highest possible score.
251   * @param {number} factor The slope describing the curve to get to maxScore. A low slope value
252   * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve,
253   * a slope of exactly 0.5 is linear.
254   * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum
255   * effect of the slope can be. This represents b in the formula above.
256   * @return {number} the final score
257   */
258  calculateScore(score, maxScore, factor, ease = 2) {
259    let a = 0;
260    let x = Math.max(0, score / maxScore);
261
262    if (x >= 1) {
263      return 1;
264    }
265
266    if (factor === 0.5) {
267      return x;
268    }
269
270    if (factor < 0.5) {
271      // We want a log-shaped curve so we scale "a" between 0 and .99
272      a = (factor / 0.5) * 0.49;
273    } else if (factor > 0.5) {
274      // We want an exp-shaped curve so we scale "a" between 1.01 and 10
275      a = 1 + ((factor - 0.5) / 0.5) * 9;
276    }
277
278    return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1);
279  }
280
281  /**
282   * Queries the visit counts in the given time segment.
283   *
284   * @param ts the time segment
285   * @return the visit count object: {"a.com": 1, "b.com": 2}
286   */
287  queryVisits(ts) {
288    const visitCounts = {};
289    const query = PlacesUtils.history.getNewQuery();
290    if (!query) {
291      return visitCounts;
292    }
293    const wwwRegEx = /^www\./;
294
295    query.beginTimeReference = query.TIME_RELATIVE_NOW;
296    query.beginTime =
297      ts.startTime && ts.startTime !== 0
298        ? -(ts.startTime * 1000 * 1000)
299        : -(Date.now() * 1000);
300
301    query.endTimeReference = query.TIME_RELATIVE_NOW;
302    query.endTime =
303      ts.endTime && ts.endTime !== 0 ? -(ts.endTime * 1000 * 1000) : 0;
304
305    const options = PlacesUtils.history.getNewQueryOptions();
306    options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING;
307    options.maxResults = this.maxHistoryQueryResults;
308
309    const { root } = PlacesUtils.history.executeQuery(query, options);
310    root.containerOpen = true;
311    for (let i = 0; i < root.childCount; i++) {
312      let node = root.getChild(i);
313      let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, "");
314      if (!visitCounts[host]) {
315        visitCounts[host] = 0;
316      }
317      visitCounts[host] += node.accessCount;
318    }
319    root.containerOpen = false;
320    return visitCounts;
321  }
322
323  /**
324   * Calculates an item's relevance score.
325   *
326   * @param item the item (story), must contain domain affinities, otherwise a
327   * score of 1 is returned.
328   * @return the calculated item's score or 1 if item has no domain_affinities
329   * or references an unknown parameter set.
330   */
331  calculateItemRelevanceScore(item) {
332    const params = this.parameterSets[item.parameter_set];
333    if (!item.domain_affinities || !params) {
334      return item.item_score;
335    }
336
337    const scores = Object.keys(item.domain_affinities).reduce(
338      (acc, d) => {
339        let userDomainAffinityScore = this.scores[d]
340          ? this.scores[d][item.parameter_set]
341          : false;
342        if (userDomainAffinityScore) {
343          acc.combinedDomainScore +=
344            userDomainAffinityScore * item.domain_affinities[d];
345          acc.matchingDomainsCount++;
346        }
347        return acc;
348      },
349      { combinedDomainScore: 0, matchingDomainsCount: 0 }
350    );
351
352    // Boost the score as configured in the provided parameter set
353    const boostedCombinedDomainScore =
354      scores.combinedDomainScore *
355      Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount);
356
357    // Calculate what the score would be if the item score is ignored
358    const normalizedCombinedDomainScore = this.calculateScore(
359      boostedCombinedDomainScore,
360      params.perfectCombinedDomainScore,
361      params.combinedDomainFactor
362    );
363
364    // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor
365    // allows weighting the item score in relation to the normalizedCombinedDomainScore:
366    // An itemScoreFactor of 1 results in the item score and ignores the combined domain score
367    // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score
368    // An itemScoreFactor of 0 results in the combined domain score and ignores the item score
369    return (
370      params.itemScoreFactor *
371        (item.item_score - normalizedCombinedDomainScore) +
372      normalizedCombinedDomainScore
373    );
374  }
375
376  /**
377   * Returns an object holding the settings and affinity scores of this provider instance.
378   */
379  getAffinities() {
380    return {
381      timeSegments: this.timeSegments,
382      parameterSets: this.parameterSets,
383      maxHistoryQueryResults: this.maxHistoryQueryResults,
384      version: this.version,
385      scores: this.scores,
386    };
387  }
388};
389
390const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];
391