1 /*!
2   \file  evaluate.c
3   \brief Various routines to evaluate classification performance
4 
5   \author George
6   \date 9/23/2008
7   \version\verbatim $Id: evaluate.c 13328 2012-12-31 14:57:40Z karypis $ \endverbatim
8 */
9 
10 #include <GKlib.h>
11 
12 /**********************************************************************
13  * This function computes the max accuracy score of a ranked list,
14  * given +1/-1 class list
15  **********************************************************************/
ComputeAccuracy(int n,gk_fkv_t * list)16 float ComputeAccuracy(int n, gk_fkv_t *list)
17 {
18   int i, P, N, TP, FN = 0;
19   float bAccuracy = 0.0;
20   float acc;
21 
22   for (P=0, i=0;i<n;i++)
23     P += (list[i].val == 1? 1 : 0);
24   N = n - P;
25 
26   TP = FN = 0;
27 
28   for(i=0; i<n; i++){
29     if (list[i].val == 1)
30       TP++;
31     else
32       FN++;
33 
34     acc = (TP + N - FN) * 100.0/ (P + N) ;
35     if (acc > bAccuracy)
36       bAccuracy = acc;
37   }
38 
39   return bAccuracy;
40 }
41 
42 
43 /*****************************************************************************
44  * This function computes the ROC score of a ranked list, given a +1/-1 class
45  * list.
46  ******************************************************************************/
ComputeROCn(int n,int maxN,gk_fkv_t * list)47 float ComputeROCn(int n, int maxN, gk_fkv_t *list)
48 {
49   int i, P, TP, FP, TPprev, FPprev, AUC;
50   float prev;
51 
52   FP = TP = FPprev = TPprev = AUC = 0;
53   prev = list[0].key -1;
54 
55   for (P=0, i=0; i<n; i++)
56     P += (list[i].val == 1 ? 1 : 0);
57 
58   for (i=0; i<n && FP < maxN; i++) {
59     if (list[i].key != prev) {
60       AUC += (TP+TPprev)*(FP-FPprev)/2;
61       prev = list[i].key;
62       FPprev = FP;
63       TPprev = TP;
64     }
65     if (list[i].val == 1)
66       TP++;
67     else {
68       FP++;
69     }
70   }
71   AUC += (TP+TPprev)*(FP-FPprev)/2;
72 
73   return (TP*FP > 0 ? (float)(1.0*AUC/(P*FP)) : 0.0);
74 }
75 
76 
77 /*****************************************************************************
78 * This function computes the median rate of false positive for each positive
79 * instance.
80 ******************************************************************************/
ComputeMedianRFP(int n,gk_fkv_t * list)81 float ComputeMedianRFP(int n, gk_fkv_t *list)
82 {
83   int i, P, N, TP, FP;
84 
85   P = N = 0;
86   for (i=0; i<n; i++) {
87     if (list[i].val == 1)
88       P++;
89     else
90       N++;
91   }
92 
93   FP = TP = 0;
94   for (i=0; i<n && TP < (P+1)/2; i++) {
95     if (list[i].val == 1)
96       TP++;
97     else
98       FP++;
99   }
100 
101   return 1.0*FP/N;
102 }
103 
104 /*********************************************************
105  * Compute the mean
106  ********************************************************/
ComputeMean(int n,float * values)107 float ComputeMean (int n, float *values)
108 {
109   int i;
110   float mean = 0.0;
111 
112   for(i=0; i < n; i++)
113     mean += values[i];
114 
115   return 1.0 * mean/ n;
116 }
117 
118 /********************************************************
119  * Compute the standard deviation
120  ********************************************************/
ComputeStdDev(int n,float * values)121 float ComputeStdDev(int  n, float *values)
122 {
123   int i;
124   float mean = ComputeMean(n, values);
125   float stdDev = 0;
126 
127   for(i=0;i<n;i++){
128     stdDev += (values[i] - mean)* (values[i] - mean);
129   }
130 
131   return sqrt(1.0 * stdDev/n);
132 }
133