1 //--------------------------------------------------------------------------------- 2 // GBM alteration by Stefan Schroedl (schroedl@a9.com) 3 // 4 // File: pairwise 5 // 6 // Contains: Distribution object to implement pairwise distributions for ranking 7 // 8 // History: 12/15/2011 Created 9 // 10 //--------------------------------------------------------------------------------- 11 12 // This file implements the LambdaMart algorithm for learning ranking functions. 13 // The main idea is to model p_ij, the probability that item i should rank higher 14 // than j, as 15 // p_ij = 1 / (1 + exp(s_i - s_j)), 16 // where s_i, s_j are the model scores for the two items. 17 // 18 // While scores are still generated one item at a time, gradients for learning 19 // depend on _pairs_ of items. The algorithm is aware of _groups_; all pairs of items 20 // with different labels, belonging to the same group, are used for training. A 21 // typical application is ranking for web search: groups correspond to user queries, 22 // and items to (feature vectors of) web pages in the associated match set. 23 // 24 // Different IR measures can be chosen, to weight instances based on their rank. 25 // Generally, changes in top ranks should have more influence than changes at the 26 // bottom of the result list. This function provides the following options: 27 // 28 // * CONC (concordance index, fraction of correctly raked pairs. This is a generalization 29 // of Area under the ROC Curve (AUC) from binary to multivalued labels. 30 // * Normalized Discounted Cumulative Gain (NDCG) 31 // * Mean Reciprocal Rank (MRR) of the highest-ranked positive instance. 32 // * Mean Average Precision (MAP), a generalization of MRR to multiple positive instances. 33 // 34 // While MRR and MAP expect binary target labels, CONC and NDCG can equally work with 35 // continuous values. More precisely, NDCG is defined as 36 // \Sum_{r=1..n} val_r / log2(r+1), 37 // where val_r is the user-specified target for the item at rank r. Note that this is 38 // contrast to some definitions of NDCG that assume integer targets s_i, and 39 // implicitly transform val_r = 2^{s+i}-1. 40 // 41 // Groups are specified using an integer vector of the same length as the training instances. 42 // 43 // Optionally, item weights can be supplied; it is assumed that all instances belonging 44 // to the same group have the same weight. 45 // 46 // For background information on LambdaMart, please see e.g. the following papers: 47 // 48 // * Burges, C., "From RankNet to LambdaRank to LambdaMART: An Overview", Microsoft 49 // Research Technical Report MSR-TR-2010-82, 2010 50 // * Donmez, P., K. Svore, K., and Burges, C., "On the Local Optimality of 51 // LambdaRank", SIGIR 2009 52 // * Burges, C., Ragno, R., and Le, Q., "Learning to Rank with Non-Smooth Cost 53 // Functions", NIPS 2006 54 55 #ifndef PAIRWISE_H 56 #define PAIRWISE_H 57 58 #include "distribution.h" 59 #include "buildinfo.h" 60 61 // A class to rerank groups based on (intermediate) scores 62 // Note: Smaller ranks are better, the top rank is 1 63 64 class CRanker 65 { 66 public: 67 // Auxiliary structure to store score and rank 68 typedef std::pair<double, unsigned int> CDoubleUintPair; 69 70 // Buffer memory allocation 71 void Init(unsigned int cMaxItemsPerGroup); 72 73 // Initialize ranker with scores of items belonging to the same group 74 // - adScores is a score array, (at least) cNumItems long 75 bool SetGroupScores(const double* const adScores, unsigned int cNumItems); 76 77 // Perform the ranking 78 // - Return true if any item changed its rank 79 bool Rank(); 80 81 // Getter / setter GetNumItems()82 unsigned int GetNumItems() const { return cNumItems; } GetRank(int i)83 unsigned int GetRank(int i) const { return vecdipScoreRank[i].second; } GetItem(unsigned int iRank)84 unsigned int GetItem(unsigned int iRank) const { return (vecpdipScoreRank[iRank-1] - &(vecdipScoreRank[0])); } SetRank(int i,unsigned int r)85 void SetRank(int i, unsigned int r) { vecdipScoreRank[i].second = r; } AddToScore(int i,double delta)86 void AddToScore(int i, double delta) { vecdipScoreRank[i].first += delta; } 87 88 protected: 89 // Number of items in current group 90 unsigned int cNumItems; 91 92 // Pairs of (score, rank) for current group 93 vector<CDoubleUintPair> vecdipScoreRank; 94 95 // Array of pointers to elements of vecdipScoreRank, used for sorting 96 // Note: We need a separate array for sorting in order to be able to 97 // quickly look up the rank for any given item. 98 vector<CDoubleUintPair*> vecpdipScoreRank; 99 }; 100 101 102 // Abstract base class for all IR Measures 103 104 class CIRMeasure 105 { 106 public: 107 // Constructor CIRMeasure()108 CIRMeasure() : cRankCutoff(UINT_MAX) {} 109 110 // Destructor ~CIRMeasure()111 virtual ~CIRMeasure() { } 112 113 // Getter / Setter GetCutoffRank()114 unsigned int GetCutoffRank() const { return cRankCutoff; } SetCutoffRank(unsigned int cRankCutoff)115 void SetCutoffRank(unsigned int cRankCutoff) { this->cRankCutoff = cRankCutoff; } 116 117 // Auxiliary function for sanity check AnyPairs(const double * const adY,unsigned int cNumItems)118 bool AnyPairs(const double* const adY, unsigned int cNumItems) const 119 { 120 return (cNumItems >= 2 // at least two instances 121 && adY[0] > 0.0 // at least one positive example (targets are non-increasing) 122 && adY[cNumItems-1] != adY[0]); // at least two different targets 123 } 124 125 // Memory allocation 126 virtual void Init(unsigned long cMaxGroup, unsigned long cNumItems, unsigned int cRankCutoff = UINT_MAX) { this->cRankCutoff = cRankCutoff; } 127 128 // Calculate the IR measure for the group of items set in the ranker. 129 // Precondition: CRanker::SetGroupScores() has been called 130 // - adY are the target scores 131 virtual double Measure(const double* const adY, const CRanker& ranker) = 0; 132 133 // Calculate the maximum achievable IR measure for a given group. 134 // Side effect: the ranker state might change 135 // Default implementation for MRR and MAP: if any positive items exist, 136 // ranking them at the top yields a perfect measure of 1. MaxMeasure(unsigned int iGroup,const double * const adY,unsigned int cNumItems)137 virtual double MaxMeasure(unsigned int iGroup, const double* const adY, unsigned int cNumItems) 138 { 139 return (AnyPairs(adY, cNumItems) ? 1.0 : 0.0); 140 } 141 142 // Calculate the difference in the IR measure caused by swapping the ranks of two items. 143 // Assumptions: 144 // * iItemBetter has a higher label than iItemWorse (i.e., adY[iItemBetter] > adY[iItemWorse]). 145 // * ranker.setGroup() has been called. 146 virtual double SwapCost(int iItemBetter, int iItemWorse, const double* const adY, const CRanker& ranker) const = 0; 147 148 protected: 149 // Cut-off rank below which items are ignored for measure 150 unsigned int cRankCutoff; 151 }; 152 153 // Class to implement IR Measure 'CONC' (fraction of concordant pairs). For the case of binary labels, this is 154 // equivalent to the area under the ROC curve (AUC). 155 156 class CConc : public CIRMeasure 157 { 158 public: ~CConc()159 virtual ~CConc() { } 160 161 void Init(unsigned long cMaxGroup, unsigned long cNumItems, unsigned int cRankCutoff = UINT_MAX); 162 163 double Measure(const double* const adY, const CRanker& ranker); 164 165 // The maximum number of correctly classified pairs is simply all pairs with different labels MaxMeasure(unsigned int iGroup,const double * const adY,unsigned int cNumItems)166 double MaxMeasure(unsigned int iGroup, const double* const adY, unsigned int cNumItems) 167 { 168 return PairCount(iGroup, adY, cNumItems); 169 } 170 171 // (Cached) calculation of the number of pairs with different labels 172 unsigned int PairCount(unsigned int iGroup, const double* const adY, unsigned int cNumItems); 173 174 double SwapCost(int iItemBetter, int iItemWorse, const double* const adY, const CRanker& ranker) const; 175 176 protected: 177 // Calculate the number of pairs with different labels 178 int ComputePairCount(const double* const adY, unsigned int cNumItems); 179 180 // Caches the number of pairs with different labels, for each group 181 vector<int> veccPairCount; 182 }; 183 184 // Class to implement IR Measure 'Normalized Discounted Cumulative Gain' 185 // Note: Labels can have any non-negative value 186 187 class CNDCG : public CIRMeasure 188 { 189 public: 190 191 void Init(unsigned long cMaxGroup, unsigned long cNumItems, unsigned int cRankCutoff = UINT_MAX); 192 193 // Compute DCG 194 double Measure(const double* const adY, const CRanker& ranker); 195 196 // Compute best possible DCG 197 double MaxMeasure(unsigned int iGroup, const double* const adY, unsigned int cNumItems); 198 199 double SwapCost(int iItemBetter, int iItemWorse, const double* const adY, const CRanker& ranker) const; 200 201 protected: 202 // Lookup table for rank weight (w(rank) = 1/log2(1+rank)) 203 vector<double> vecdRankWeight; 204 205 // Caches the maximum achievable DCG, for each group 206 vector<double> vecdMaxDCG; 207 }; 208 209 // Class to implement IR Measure 'Mean Reciprocal Rank' 210 // Assumption: Labels are 0 or 1 211 212 class CMRR : public CIRMeasure 213 { 214 public: 215 double Measure(const double* const adY, const CRanker& ranker); 216 217 double SwapCost(int iItemPos, int iItemNeg, const double* const adY, const CRanker& ranker) const; 218 219 }; 220 221 222 // Class to implement IR Measure 'Mean Average Precision' 223 // Assumption: Labels are 0 or 1 224 225 class CMAP : public CIRMeasure 226 { 227 public: 228 229 void Init(unsigned long cMaxGroup, unsigned long cNumItems, unsigned int cRankCutoff = UINT_MAX); 230 231 double Measure(const double* const adY, const CRanker& ranker); 232 233 double SwapCost(int iItemPos, int iItemNeg, const double* const adY, const CRanker& ranker) const; 234 protected: 235 236 // Buffer to hold positions of positive examples 237 mutable vector<int> veccRankPos; 238 }; 239 240 241 // Main class for 'pairwise' distribution 242 // Notes and Assumptions: 243 // * The items are sorted such that 244 // * Instances belonging to the same group occur in 245 // a contiguous range 246 // * Within a group, labels are non-increasing. 247 // * adGroup supplies the group ID (positive integer, but double 248 // format for compliance with the base class interface). 249 // * The targets adY are non-negative values, and binary {0,1} 250 // for measures MRR and MAP. 251 // * Higher IR measures are better. 252 // * Only pairs with different labels are used for training. 253 // * Instance weights (adWeight) are constant among groups. 254 // * CPairwise::Initialize() is called before any of the other 255 // functions, with same values for adY, adGroup, adWeight, and 256 // nTrain. Certain values have to be precomputed for 257 // efficiency. 258 259 class CPairwise : public CDistribution 260 { 261 public: 262 263 // Constructor: determine IR measure as either "conc", "map", "mrr", or "ndcg" 264 CPairwise(const char* szIRMeasure); 265 266 virtual ~CPairwise(); 267 268 GBMRESULT Initialize(double *adY, 269 double *adGroup, 270 double *adOffset, 271 double *adWeight, 272 unsigned long cLength); 273 UpdateParams(double * adF,double * adOffset,double * adWeight,unsigned long cLength)274 GBMRESULT UpdateParams(double *adF, 275 double *adOffset, 276 double *adWeight, 277 unsigned long cLength) 278 { 279 return GBM_OK; 280 }; 281 282 GBMRESULT ComputeWorkingResponse(double *adY, 283 double *adGroup, 284 double *adOffset, 285 double *adF, 286 double *adZ, 287 double *adWeight, 288 bool *afInBag, 289 unsigned long nTrain, 290 int cIdxOff); 291 292 double Deviance(double *adY, 293 double *adGroup, 294 double *adOffset, 295 double *adWeight, 296 double *adF, 297 unsigned long cLength, 298 int cIdxOff); 299 300 GBMRESULT InitF(double *adY, 301 double *adGroup, 302 double *adOffset, 303 double *adWeight, 304 double &dInitF, 305 unsigned long cLength); 306 307 GBMRESULT FitBestConstant(double *adY, 308 double *adGroup, 309 double *adOffset, 310 double *adW, 311 double *adF, 312 double *adZ, 313 unsigned long *aiNodeAssign, 314 unsigned long nTrain, 315 VEC_P_NODETERMINAL vecpTermNodes, 316 unsigned long cTermNodes, 317 unsigned long cMinObsInNode, 318 bool *afInBag, 319 double *adFadj, 320 int cIdxOff); 321 322 double BagImprovement(double *adY, 323 double *adGroup, 324 double *adOffset, 325 double *adWeight, 326 double *adF, 327 double *adFadj, 328 bool *afInBag, 329 double dStepSize, 330 unsigned long nTrain); 331 332 protected: 333 334 // Calculate and accumulate up the gradients and Hessians from all training pairs 335 void ComputeLambdas(int iGroup, unsigned int cNumItems, const double* const adY, const double* const adF, const double* const adWeight, double* adZ, double* adDeriv); 336 337 CIRMeasure* pirm; // The IR measure to use 338 CRanker ranker; // The ranker 339 340 vector<double> vecdHessian; // Second derivative of loss function, for each training instance; used for Newton step 341 342 vector<double> vecdNum; // Buffer used for numerator in FitBestConstant(), for each node 343 vector<double> vecdDenom; // Buffer used for denominator in FitBestConstant(), for each node 344 345 vector<double> vecdFPlusOffset; // Temporary buffer for (adF + adOffset), if the latter is not null 346 }; 347 348 #endif // PAIRWISE_H 349