1 /***********************************************************************/
2 /*                                                                     */
3 /*   svm_classify.c                                                    */
4 /*                                                                     */
5 /*   Classification module of Support Vector Machine.                  */
6 /*                                                                     */
7 /*   Author: Thorsten Joachims                                         */
8 /*   Date: 02.07.02                                                    */
9 /*                                                                     */
10 /*   Copyright (c) 2002  Thorsten Joachims - All rights reserved       */
11 /*                                                                     */
12 /*   This software is available for non-commercial use only. It must   */
13 /*   not be modified and distributed without prior permission of the   */
14 /*   author. The author is not responsible for implications from the   */
15 /*   use of this software.                                             */
16 /*                                                                     */
17 /************************************************************************/
18 
19 # include "svm_common.h"
20 
21 char docfile[200];
22 char modelfile[200];
23 char predictionsfile[200];
24 
25 void read_input_parameters(int, char **, char *, char *, char *, long *,
26 			   long *);
27 void print_help(void);
28 
29 
main(int argc,char * argv[])30 int main (int argc, char* argv[])
31 {
32   DOC *doc;   /* test example */
33   WORD *words;
34   long max_docs,max_words_doc,lld;
35   long totdoc=0,queryid,slackid;
36   long correct=0,incorrect=0,no_accuracy=0;
37   long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
38   long j;
39   double t1,runtime=0;
40   double dist,doc_label,costfactor;
41   char *line,*comment;
42   FILE *predfl,*docfl;
43   MODEL *model;
44 
45   read_input_parameters(argc,argv,docfile,modelfile,predictionsfile,
46 			&verbosity,&pred_format);
47 
48   nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
49   max_words_doc+=2;
50   lld+=2;
51 
52   line = (char *)my_malloc(sizeof(char)*lld);
53   words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
54 
55   model=read_model(modelfile);
56 
57   if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
58     /* compute weight vector */
59     add_weight_vector_to_linear_model(model);
60   }
61 
62   if(verbosity>=2) {
63     printf("Classifying test examples.."); fflush(stdout);
64   }
65 
66   if ((docfl = fopen (docfile, "r")) == NULL)
67   { perror (docfile); exit (1); }
68   if ((predfl = fopen (predictionsfile, "w")) == NULL)
69   { perror (predictionsfile); exit (1); }
70 
71   while((!feof(docfl)) && fgets(line,(int)lld,docfl)) {
72     if(line[0] == '#') continue;  /* line contains comments */
73     parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
74 		   max_words_doc,&comment);
75     totdoc++;
76     if(model->kernel_parm.kernel_type == 0) {   /* linear kernel */
77       for(j=0;(words[j]).wnum != 0;j++) {  /* Check if feature numbers   */
78 	if((words[j]).wnum>model->totwords) /* are not larger than in     */
79 	  (words[j]).wnum=0;               /* model. Remove feature if   */
80       }                                        /* necessary.                 */
81       doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
82       t1=get_runtime();
83       dist=classify_example_linear(model,doc);
84       runtime+=(get_runtime()-t1);
85       free_example(doc,1);
86     }
87     else {                             /* non-linear kernel */
88       doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
89       t1=get_runtime();
90       dist=classify_example(model,doc);
91       runtime+=(get_runtime()-t1);
92       free_example(doc,1);
93     }
94     if(dist>0) {
95       if(pred_format==0) { /* old weired output format */
96 	fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
97       }
98       if(doc_label>0) correct++; else incorrect++;
99       if(doc_label>0) res_a++; else res_b++;
100     }
101     else {
102       if(pred_format==0) { /* old weired output format */
103 	fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
104       }
105       if(doc_label<0) correct++; else incorrect++;
106       if(doc_label>0) res_c++; else res_d++;
107     }
108     if(pred_format==1) { /* output the value of decision function */
109       fprintf(predfl,"%.8g\n",dist);
110     }
111     if((int)(0.01+(doc_label*doc_label)) != 1)
112       { no_accuracy=1; } /* test data is not binary labeled */
113     if(verbosity>=2) {
114       if(totdoc % 100 == 0) {
115 	printf("%ld..",totdoc); fflush(stdout);
116       }
117     }
118   }
119   free(line);
120   free(words);
121   free_model(model,1);
122 
123   if(verbosity>=2) {
124     printf("done\n");
125 
126 /*   Note by Gary Boone                     Date: 29 April 2000        */
127 /*      o Timing is inaccurate. The timer has 0.01 second resolution.  */
128 /*        Because classification of a single vector takes less than    */
129 /*        0.01 secs, the timer was underflowing.                       */
130     printf("Runtime (without IO) in cpu-seconds: %.2f\n",
131 	   (float)(runtime/100.0));
132 
133   }
134   if((!no_accuracy) && (verbosity>=1)) {
135     printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
136     printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
137   }
138 
139   return(0);
140 }
141 
read_input_parameters(int argc,char ** argv,char * docfile,char * modelfile,char * predictionsfile,long int * verbosity,long int * pred_format)142 void read_input_parameters(int argc, char **argv, char *docfile,
143 			   char *modelfile, char *predictionsfile,
144 			   long int *verbosity, long int *pred_format)
145 {
146   long i;
147 
148   /* set default */
149   strcpy (modelfile, "svm_model");
150   strcpy (predictionsfile, "svm_predictions");
151   (*verbosity)=2;
152   (*pred_format)=1;
153 
154   for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
155     switch ((argv[i])[1])
156       {
157       case 'h': print_help(); exit(0);
158       case 'v': i++; (*verbosity)=atol(argv[i]); break;
159       case 'f': i++; (*pred_format)=atol(argv[i]); break;
160       default: printf("\nUnrecognized option %s!\n\n",argv[i]);
161 	       print_help();
162 	       exit(0);
163       }
164   }
165   if((i+1)>=argc) {
166     printf("\nNot enough input parameters!\n\n");
167     print_help();
168     exit(0);
169   }
170   strcpy (docfile, argv[i]);
171   strcpy (modelfile, argv[i+1]);
172   if((i+2)<argc) {
173     strcpy (predictionsfile, argv[i+2]);
174   }
175   if(((*pred_format) != 0) && ((*pred_format) != 1)) {
176     printf("\nOutput format can only take the values 0 or 1!\n\n");
177     print_help();
178     exit(0);
179   }
180 }
181 
print_help(void)182 void print_help(void)
183 {
184   printf("\nSVM-light %s: Support Vector Machine, classification module     %s\n",VERSION_SVMLIGHT, VERSION_DATE_SVMLIGHT);
185   copyright_notice();
186   printf("   usage: svm_classify [options] example_file model_file output_file\n\n");
187   printf("options: -h         -> this help\n");
188   printf("         -v [0..3]  -> verbosity level (default 2)\n");
189   printf("         -f [0,1]   -> 0: old output format of V1.0\n");
190   printf("                    -> 1: output the value of decision function (default)\n\n");
191 }
192 
193 
194 
195 
196