1/* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4"use strict"; 5 6const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); 7 8ChromeUtils.defineModuleGetter( 9 this, 10 "PlacesUtils", 11 "resource://gre/modules/PlacesUtils.jsm" 12); 13 14const DEFAULT_TIME_SEGMENTS = [ 15 { id: "hour", startTime: 3600, endTime: 0, weightPosition: 1 }, 16 { id: "day", startTime: 86400, endTime: 3600, weightPosition: 0.75 }, 17 { id: "week", startTime: 604800, endTime: 86400, weightPosition: 0.5 }, 18 { id: "weekPlus", startTime: 0, endTime: 604800, weightPosition: 0.25 }, 19 { id: "alltime", startTime: 0, endTime: 0, weightPosition: 0.25 }, 20]; 21 22const DEFAULT_PARAMETER_SETS = { 23 "linear-frequency": { 24 recencyFactor: 0.4, 25 frequencyFactor: 0.5, 26 combinedDomainFactor: 0.5, 27 perfectFrequencyVisits: 10, 28 perfectCombinedDomainScore: 2, 29 multiDomainBoost: 0.1, 30 itemScoreFactor: 0, 31 }, 32}; 33 34const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000; 35 36function merge(...args) { 37 return Object.assign.apply(this, args); 38} 39 40/** 41 * Provides functionality to personalize content recommendations by calculating 42 * user domain affinity scores. These scores are used to calculate relevance 43 * scores for items/recs/stories that have domain affinities. 44 * 45 * The algorithm works as follows: 46 * 47 * - The recommendation endpoint returns a settings object containing 48 * timeSegments and parametersets. 49 * 50 * - For every time segment we calculate the corresponding domain visit counts, 51 * yielding result objects of the following structure: {"mozilla.org": 12, 52 * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits) 53 * 54 * - These visit counts are transformed to domain affinity scores for all 55 * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8, 56 * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}} 57 * (see UserDomainAffinityProvider#calculateScoresForParameterSets) 58 * 59 * - The parameter sets provide factors for weighting which allows for 60 * flexible targeting. The functionality to calculate final scores can 61 * be seen in UserDomainAffinityProvider#calculateScores 62 * 63 * - The user domain affinity scores are summed up across all time segments 64 * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores 65 * 66 * - An item's domain affinities are matched to the user's domain affinity 67 * scores by calculating an item relevance score 68 * (see UserDomainAffinityProvider#calculateItemRelevanceScore) 69 * 70 * - The item relevance scores are used to sort items (see TopStoriesFeed for 71 * more details) 72 * 73 * - The data structure was chosen to allow for fast cache lookups during 74 * relevance score calculation. While user domain affinities are calculated 75 * infrequently (i.e. only once a day), the item relevance score (potentially) 76 * needs to be calculated every time the feed updates. Therefore allowing cache 77 * lookups of scores[domain][parameterSet] is beneficial 78 */ 79this.UserDomainAffinityProvider = class UserDomainAffinityProvider { 80 constructor( 81 timeSegments = DEFAULT_TIME_SEGMENTS, 82 parameterSets = DEFAULT_PARAMETER_SETS, 83 maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS, 84 version, 85 scores 86 ) { 87 this.timeSegments = timeSegments; 88 this.maxHistoryQueryResults = maxHistoryQueryResults; 89 this.version = version; 90 if (scores) { 91 this.parameterSets = parameterSets; 92 this.scores = scores; 93 } else { 94 this.parameterSets = this.prepareParameterSets(parameterSets); 95 this.scores = this.calculateAllUserDomainAffinityScores(); 96 } 97 } 98 99 /** 100 * Adds dynamic parameters to the given parameter sets that need to be 101 * computed based on time segments. 102 * 103 * @param ps The parameter sets 104 * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights) 105 */ 106 prepareParameterSets(ps) { 107 return ( 108 Object.keys(ps) 109 // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...} 110 .map(k => ({ 111 [k]: merge(ps[k], { 112 timeSegmentWeights: this.calculateTimeSegmentWeights( 113 ps[k].recencyFactor 114 ), 115 }), 116 })) 117 .reduce((acc, cur) => merge(acc, cur)) 118 ); 119 } 120 121 /** 122 * Calculates a time segment weight based on the provided recencyFactor. 123 * 124 * @param recencyFactor The recency factor indicating how to weigh recency 125 * @return An object containing time segment weights: {"hour": 0.987, "day": 1} 126 */ 127 calculateTimeSegmentWeights(recencyFactor) { 128 return this.timeSegments.reduce( 129 (acc, cur) => 130 merge(acc, { 131 [cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor), 132 }), 133 {} 134 ); 135 } 136 137 /** 138 * Calculates user domain affinity scores based on browsing history and the 139 * available times segments and parameter sets. 140 */ 141 calculateAllUserDomainAffinityScores() { 142 return ( 143 this.timeSegments 144 // Calculate parameter set specific domain scores for each time segment 145 // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...] 146 .map(ts => this.calculateUserDomainAffinityScores(ts)) 147 // Keep format, but reduce to single object, with combined scores across all time segments 148 // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}"" 149 .reduce((acc, cur) => this._combineScores(acc, cur)) 150 ); 151 } 152 153 /** 154 * Calculates the user domain affinity scores for the given time segment. 155 * 156 * @param ts The time segment 157 * @return The parameter specific scores for all domains with visits in 158 * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...} 159 */ 160 calculateUserDomainAffinityScores(ts) { 161 // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2} 162 let visits = this.queryVisits(ts); 163 164 return Object.keys(visits).reduce( 165 (acc, d) => 166 merge(acc, { 167 [d]: this.calculateScoresForParameterSets(ts, visits[d]), 168 }), 169 {} 170 ); 171 } 172 173 /** 174 * Calculates the scores for all parameter sets for the given time segment 175 * and domain visit count. 176 * 177 * @param ts The time segment 178 * @param vc The domain visit count in the given time segment 179 * @return The parameter specific scores for the visit count in 180 * this time segment: {"ps1": 12, "ps2": 34} 181 */ 182 calculateScoresForParameterSets(ts, vc) { 183 return Object.keys(this.parameterSets).reduce( 184 (acc, ps) => 185 merge(acc, { 186 [ps]: this.calculateScoreForParameterSet( 187 ts, 188 vc, 189 this.parameterSets[ps] 190 ), 191 }), 192 {} 193 ); 194 } 195 196 /** 197 * Calculates the final affinity score in the given time segment for the given parameter set 198 * 199 * @param timeSegment The time segment 200 * @param visitCount The domain visit count in the given time segment 201 * @param parameterSet The parameter set to use for scoring 202 * @return The final score 203 */ 204 calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) { 205 return this.calculateScore( 206 visitCount * parameterSet.timeSegmentWeights[timeSegment.id], 207 parameterSet.perfectFrequencyVisits, 208 parameterSet.frequencyFactor 209 ); 210 } 211 212 /** 213 * Keeps the same format, but reduces the two objects to a single object, with 214 * combined scores across all time segments => {a.com":{"ps1":2,"ps2":2}, 215 * "b.com":{"ps1":3,"ps2":3}} 216 */ 217 _combineScores(a, b) { 218 // Merge both score objects so we get a combined object holding all domains. 219 // This is so we can combine them without missing domains that are in a and not in b and vice versa. 220 const c = merge({}, a, b); 221 return Object.keys(c).reduce( 222 (acc, d) => merge(acc, this._combine(a, b, c, d)), 223 {} 224 ); 225 } 226 227 _combine(a, b, c, d) { 228 return ( 229 Object.keys(c[d]) 230 // Summing up the parameter set specific scores of each domain 231 .map(ps => ({ 232 [d]: { 233 [ps]: Math.min( 234 1, 235 ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0) 236 ), 237 }, 238 })) 239 // Reducing from an array of objects with a single parameter set to a single object 240 // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}} 241 .reduce((acc, cur) => ({ [d]: merge(acc[d], cur[d]) })) 242 ); 243 } 244 245 /** 246 * Calculates a value on the curve described by the provided parameters. The curve we're using is 247 * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp 248 * 249 * @param {number} score A value between 0 and maxScore, representing x. 250 * @param {number} maxScore Highest possible score. 251 * @param {number} factor The slope describing the curve to get to maxScore. A low slope value 252 * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve, 253 * a slope of exactly 0.5 is linear. 254 * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum 255 * effect of the slope can be. This represents b in the formula above. 256 * @return {number} the final score 257 */ 258 calculateScore(score, maxScore, factor, ease = 2) { 259 let a = 0; 260 let x = Math.max(0, score / maxScore); 261 262 if (x >= 1) { 263 return 1; 264 } 265 266 if (factor === 0.5) { 267 return x; 268 } 269 270 if (factor < 0.5) { 271 // We want a log-shaped curve so we scale "a" between 0 and .99 272 a = (factor / 0.5) * 0.49; 273 } else if (factor > 0.5) { 274 // We want an exp-shaped curve so we scale "a" between 1.01 and 10 275 a = 1 + ((factor - 0.5) / 0.5) * 9; 276 } 277 278 return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1); 279 } 280 281 /** 282 * Queries the visit counts in the given time segment. 283 * 284 * @param ts the time segment 285 * @return the visit count object: {"a.com": 1, "b.com": 2} 286 */ 287 queryVisits(ts) { 288 const visitCounts = {}; 289 const query = PlacesUtils.history.getNewQuery(); 290 if (!query) { 291 return visitCounts; 292 } 293 const wwwRegEx = /^www\./; 294 295 query.beginTimeReference = query.TIME_RELATIVE_NOW; 296 query.beginTime = 297 ts.startTime && ts.startTime !== 0 298 ? -(ts.startTime * 1000 * 1000) 299 : -(Date.now() * 1000); 300 301 query.endTimeReference = query.TIME_RELATIVE_NOW; 302 query.endTime = 303 ts.endTime && ts.endTime !== 0 ? -(ts.endTime * 1000 * 1000) : 0; 304 305 const options = PlacesUtils.history.getNewQueryOptions(); 306 options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING; 307 options.maxResults = this.maxHistoryQueryResults; 308 309 const { root } = PlacesUtils.history.executeQuery(query, options); 310 root.containerOpen = true; 311 for (let i = 0; i < root.childCount; i++) { 312 let node = root.getChild(i); 313 let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, ""); 314 if (!visitCounts[host]) { 315 visitCounts[host] = 0; 316 } 317 visitCounts[host] += node.accessCount; 318 } 319 root.containerOpen = false; 320 return visitCounts; 321 } 322 323 /** 324 * Calculates an item's relevance score. 325 * 326 * @param item the item (story), must contain domain affinities, otherwise a 327 * score of 1 is returned. 328 * @return the calculated item's score or 1 if item has no domain_affinities 329 * or references an unknown parameter set. 330 */ 331 calculateItemRelevanceScore(item) { 332 const params = this.parameterSets[item.parameter_set]; 333 if (!item.domain_affinities || !params) { 334 return item.item_score; 335 } 336 337 const scores = Object.keys(item.domain_affinities).reduce( 338 (acc, d) => { 339 let userDomainAffinityScore = this.scores[d] 340 ? this.scores[d][item.parameter_set] 341 : false; 342 if (userDomainAffinityScore) { 343 acc.combinedDomainScore += 344 userDomainAffinityScore * item.domain_affinities[d]; 345 acc.matchingDomainsCount++; 346 } 347 return acc; 348 }, 349 { combinedDomainScore: 0, matchingDomainsCount: 0 } 350 ); 351 352 // Boost the score as configured in the provided parameter set 353 const boostedCombinedDomainScore = 354 scores.combinedDomainScore * 355 Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount); 356 357 // Calculate what the score would be if the item score is ignored 358 const normalizedCombinedDomainScore = this.calculateScore( 359 boostedCombinedDomainScore, 360 params.perfectCombinedDomainScore, 361 params.combinedDomainFactor 362 ); 363 364 // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor 365 // allows weighting the item score in relation to the normalizedCombinedDomainScore: 366 // An itemScoreFactor of 1 results in the item score and ignores the combined domain score 367 // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score 368 // An itemScoreFactor of 0 results in the combined domain score and ignores the item score 369 return ( 370 params.itemScoreFactor * 371 (item.item_score - normalizedCombinedDomainScore) + 372 normalizedCombinedDomainScore 373 ); 374 } 375 376 /** 377 * Returns an object holding the settings and affinity scores of this provider instance. 378 */ 379 getAffinities() { 380 return { 381 timeSegments: this.timeSegments, 382 parameterSets: this.parameterSets, 383 maxHistoryQueryResults: this.maxHistoryQueryResults, 384 version: this.version, 385 scores: this.scores, 386 }; 387 } 388}; 389 390const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"]; 391