newtab/lib/UserDomainAffinityProvider.jsm

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";

const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");

ChromeUtils.defineModuleGetter(
  this,
  "PlacesUtils",
  "resource://gre/modules/PlacesUtils.jsm"
);

const DEFAULT_TIME_SEGMENTS = [
  { id: "hour", startTime: 3600, endTime: 0, weightPosition: 1 },
  { id: "day", startTime: 86400, endTime: 3600, weightPosition: 0.75 },
  { id: "week", startTime: 604800, endTime: 86400, weightPosition: 0.5 },
  { id: "weekPlus", startTime: 0, endTime: 604800, weightPosition: 0.25 },
  { id: "alltime", startTime: 0, endTime: 0, weightPosition: 0.25 },
];

const DEFAULT_PARAMETER_SETS = {
  "linear-frequency": {
    recencyFactor: 0.4,
    frequencyFactor: 0.5,
    combinedDomainFactor: 0.5,
    perfectFrequencyVisits: 10,
    perfectCombinedDomainScore: 2,
    multiDomainBoost: 0.1,
    itemScoreFactor: 0,
  },
};

const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000;

function merge(...args) {
  return Object.assign.apply(this, args);
}

/**
 * Provides functionality to personalize content recommendations by calculating
 * user domain affinity scores. These scores are used to calculate relevance
 * scores for items/recs/stories that have domain affinities.
 *
 * The algorithm works as follows:
 *
 * - The recommendation endpoint returns a settings object containing
 * timeSegments and parametersets.
 *
 * - For every time segment we calculate the corresponding domain visit counts,
 * yielding result objects of the following structure: {"mozilla.org": 12,
 * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits)
 *
 * - These visit counts are transformed to domain affinity scores for all
 * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8,
 * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}}
 * (see UserDomainAffinityProvider#calculateScoresForParameterSets)
 *
 * - The parameter sets provide factors for weighting which allows for
 * flexible targeting. The functionality to calculate final scores can
 * be seen in UserDomainAffinityProvider#calculateScores
 *
 * - The user domain affinity scores are summed up across all time segments
 * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores
 *
 * - An item's domain affinities are matched to the user's domain affinity
 * scores by calculating an item relevance score
 * (see UserDomainAffinityProvider#calculateItemRelevanceScore)
 *
 * - The item relevance scores are used to sort items (see TopStoriesFeed for
 * more details)
 *
 * - The data structure was chosen to allow for fast cache lookups during
 * relevance score calculation. While user domain affinities are calculated
 * infrequently (i.e. only once a day), the item relevance score (potentially)
 * needs to be calculated every time the feed updates. Therefore allowing cache
 * lookups of scores[domain][parameterSet] is beneficial
 */
this.UserDomainAffinityProvider = class UserDomainAffinityProvider {
  constructor(
    timeSegments = DEFAULT_TIME_SEGMENTS,
    parameterSets = DEFAULT_PARAMETER_SETS,
    maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS,
    version,
    scores
  ) {
    this.timeSegments = timeSegments;
    this.maxHistoryQueryResults = maxHistoryQueryResults;
    this.version = version;
    if (scores) {
      this.parameterSets = parameterSets;
      this.scores = scores;
    } else {
      this.parameterSets = this.prepareParameterSets(parameterSets);
      this.scores = this.calculateAllUserDomainAffinityScores();
    }
  }

  /**
   * Adds dynamic parameters to the given parameter sets that need to be
   * computed based on time segments.
   *
   * @param ps The parameter sets
   * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights)
   */
  prepareParameterSets(ps) {
    return (
      Object.keys(ps)
        // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...}
        .map(k => ({
          [k]: merge(ps[k], {
            timeSegmentWeights: this.calculateTimeSegmentWeights(
              ps[k].recencyFactor
            ),
          }),
        }))
        .reduce((acc, cur) => merge(acc, cur))
    );
  }

  /**
   * Calculates a time segment weight based on the provided recencyFactor.
   *
   * @param recencyFactor The recency factor indicating how to weigh recency
   * @return An object containing time segment weights: {"hour": 0.987, "day": 1}
   */
  calculateTimeSegmentWeights(recencyFactor) {
    return this.timeSegments.reduce(
      (acc, cur) =>
        merge(acc, {
          [cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor),
        }),
      {}
    );
  }

  /**
   * Calculates user domain affinity scores based on browsing history and the
   * available times segments and parameter sets.
   */
  calculateAllUserDomainAffinityScores() {
    return (
      this.timeSegments
        // Calculate parameter set specific domain scores for each time segment
        // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...]
        .map(ts => this.calculateUserDomainAffinityScores(ts))
        // Keep format, but reduce to single object, with combined scores across all time segments
        // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}""
        .reduce((acc, cur) => this._combineScores(acc, cur))
    );
  }

  /**
   * Calculates the user domain affinity scores for the given time segment.
   *
   * @param ts The time segment
   * @return The parameter specific scores for all domains with visits in
   * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...}
   */
  calculateUserDomainAffinityScores(ts) {
    // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2}
    let visits = this.queryVisits(ts);

    return Object.keys(visits).reduce(
      (acc, d) =>
        merge(acc, {
          [d]: this.calculateScoresForParameterSets(ts, visits[d]),
        }),
      {}
    );
  }

  /**
   * Calculates the scores for all parameter sets for the given time segment
   * and domain visit count.
   *
   * @param ts The time segment
   * @param vc The domain visit count in the given time segment
   * @return The parameter specific scores for the visit count in
   * this time segment: {"ps1": 12, "ps2": 34}
   */
  calculateScoresForParameterSets(ts, vc) {
    return Object.keys(this.parameterSets).reduce(
      (acc, ps) =>
        merge(acc, {
          [ps]: this.calculateScoreForParameterSet(
            ts,
            vc,
            this.parameterSets[ps]
          ),
        }),
      {}
    );
  }

  /**
   * Calculates the final affinity score in the given time segment for the given parameter set
   *
   * @param timeSegment The time segment
   * @param visitCount The domain visit count in the given time segment
   * @param parameterSet The parameter set to use for scoring
   * @return The final score
   */
  calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) {
    return this.calculateScore(
      visitCount * parameterSet.timeSegmentWeights[timeSegment.id],
      parameterSet.perfectFrequencyVisits,
      parameterSet.frequencyFactor
    );
  }

  /**
   * Keeps the same format, but reduces the two objects to a single object, with
   * combined scores across all time segments  => {a.com":{"ps1":2,"ps2":2},
   * "b.com":{"ps1":3,"ps2":3}}
   */
  _combineScores(a, b) {
    // Merge both score objects so we get a combined object holding all domains.
    // This is so we can combine them without missing domains that are in a and not in b and vice versa.
    const c = merge({}, a, b);
    return Object.keys(c).reduce(
      (acc, d) => merge(acc, this._combine(a, b, c, d)),
      {}
    );
  }

  _combine(a, b, c, d) {
    return (
      Object.keys(c[d])
        // Summing up the parameter set specific scores of each domain
        .map(ps => ({
          [d]: {
            [ps]: Math.min(
              1,
              ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0)
            ),
          },
        }))
        // Reducing from an array of objects with a single parameter set to a single object
        // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}}
        .reduce((acc, cur) => ({ [d]: merge(acc[d], cur[d]) }))
    );
  }

  /**
   * Calculates a value on the curve described by the provided parameters. The curve we're using is
   * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp
   *
   * @param {number} score A value between 0 and maxScore, representing x.
   * @param {number} maxScore Highest possible score.
   * @param {number} factor The slope describing the curve to get to maxScore. A low slope value
   * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve,
   * a slope of exactly 0.5 is linear.
   * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum
   * effect of the slope can be. This represents b in the formula above.
   * @return {number} the final score
   */
  calculateScore(score, maxScore, factor, ease = 2) {
    let a = 0;
    let x = Math.max(0, score / maxScore);

    if (x >= 1) {
      return 1;
    }

    if (factor === 0.5) {
      return x;
    }

    if (factor < 0.5) {
      // We want a log-shaped curve so we scale "a" between 0 and .99
      a = (factor / 0.5) * 0.49;
    } else if (factor > 0.5) {
      // We want an exp-shaped curve so we scale "a" between 1.01 and 10
      a = 1 + ((factor - 0.5) / 0.5) * 9;
    }

    return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1);
  }

  /**
   * Queries the visit counts in the given time segment.
   *
   * @param ts the time segment
   * @return the visit count object: {"a.com": 1, "b.com": 2}
   */
  queryVisits(ts) {
    const visitCounts = {};
    const query = PlacesUtils.history.getNewQuery();
    if (!query) {
      return visitCounts;
    }
    const wwwRegEx = /^www\./;

    query.beginTimeReference = query.TIME_RELATIVE_NOW;
    query.beginTime =
      ts.startTime && ts.startTime !== 0
        ? -(ts.startTime * 1000 * 1000)
        : -(Date.now() * 1000);

    query.endTimeReference = query.TIME_RELATIVE_NOW;
    query.endTime =
      ts.endTime && ts.endTime !== 0 ? -(ts.endTime * 1000 * 1000) : 0;

    const options = PlacesUtils.history.getNewQueryOptions();
    options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING;
    options.maxResults = this.maxHistoryQueryResults;

    const { root } = PlacesUtils.history.executeQuery(query, options);
    root.containerOpen = true;
    for (let i = 0; i < root.childCount; i++) {
      let node = root.getChild(i);
      let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, "");
      if (!visitCounts[host]) {
        visitCounts[host] = 0;
      }
      visitCounts[host] += node.accessCount;
    }
    root.containerOpen = false;
    return visitCounts;
  }

  /**
   * Calculates an item's relevance score.
   *
   * @param item the item (story), must contain domain affinities, otherwise a
   * score of 1 is returned.
   * @return the calculated item's score or 1 if item has no domain_affinities
   * or references an unknown parameter set.
   */
  calculateItemRelevanceScore(item) {
    const params = this.parameterSets[item.parameter_set];
    if (!item.domain_affinities || !params) {
      return item.item_score;
    }

    const scores = Object.keys(item.domain_affinities).reduce(
      (acc, d) => {
        let userDomainAffinityScore = this.scores[d]
          ? this.scores[d][item.parameter_set]
          : false;
        if (userDomainAffinityScore) {
          acc.combinedDomainScore +=
            userDomainAffinityScore * item.domain_affinities[d];
          acc.matchingDomainsCount++;
        }
        return acc;
      },
      { combinedDomainScore: 0, matchingDomainsCount: 0 }
    );

    // Boost the score as configured in the provided parameter set
    const boostedCombinedDomainScore =
      scores.combinedDomainScore *
      Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount);

    // Calculate what the score would be if the item score is ignored
    const normalizedCombinedDomainScore = this.calculateScore(
      boostedCombinedDomainScore,
      params.perfectCombinedDomainScore,
      params.combinedDomainFactor
    );

    // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor
    // allows weighting the item score in relation to the normalizedCombinedDomainScore:
    // An itemScoreFactor of 1 results in the item score and ignores the combined domain score
    // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score
    // An itemScoreFactor of 0 results in the combined domain score and ignores the item score
    return (
      params.itemScoreFactor *
        (item.item_score - normalizedCombinedDomainScore) +
      normalizedCombinedDomainScore
    );
  }

  /**
   * Returns an object holding the settings and affinity scores of this provider instance.
   */
  getAffinities() {
    return {
      timeSegments: this.timeSegments,
      parameterSets: this.parameterSets,
      maxHistoryQueryResults: this.maxHistoryQueryResults,
      version: this.version,
      scores: this.scores,
    };
  }
};

const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];