1 #include <stdio.h>
2 #include <ctype.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <errno.h>
6 #include "linear.h"
7
8 struct feature_node *x;
9 int max_nr_attr = 64;
10
11 struct model* model_;
12 int flag_predict_probability=0;
13
exit_input_error(int line_num)14 void exit_input_error(int line_num)
15 {
16 fprintf(stderr,"Wrong input format at line %d\n", line_num);
17 exit(1);
18 }
19
20 static char *line = NULL;
21 static int max_line_len;
22
readline(FILE * input)23 static char* readline(FILE *input)
24 {
25 int len;
26
27 if(fgets(line,max_line_len,input) == NULL)
28 return NULL;
29
30 while(strrchr(line,'\n') == NULL)
31 {
32 max_line_len *= 2;
33 line = (char *) realloc(line,max_line_len);
34 len = (int) strlen(line);
35 if(fgets(line+len,max_line_len-len,input) == NULL)
36 break;
37 }
38 return line;
39 }
40
do_predict(FILE * input,FILE * output,struct model * model_)41 void do_predict(FILE *input, FILE *output, struct model* model_)
42 {
43 int correct = 0;
44 int total = 0;
45
46 int nr_class=get_nr_class(model_);
47 double *prob_estimates=NULL;
48 int j, n;
49 int nr_feature=get_nr_feature(model_);
50 if(model_->bias>=0)
51 n=nr_feature+1;
52 else
53 n=nr_feature;
54
55 if(flag_predict_probability)
56 {
57 int *labels;
58
59 if(!check_probability_model(model_))
60 {
61 fprintf(stderr, "probability output is only supported for logistic regression\n");
62 exit(1);
63 }
64
65 labels=(int *) malloc(nr_class*sizeof(int));
66 get_labels(model_,labels);
67 prob_estimates = (double *) malloc(nr_class*sizeof(double));
68 fprintf(output,"labels");
69 for(j=0;j<nr_class;j++)
70 fprintf(output," %d",labels[j]);
71 fprintf(output,"\n");
72 free(labels);
73 }
74
75 max_line_len = 1024;
76 line = (char *)malloc(max_line_len*sizeof(char));
77 while(readline(input) != NULL)
78 {
79 int i = 0;
80 int target_label, predict_label;
81 char *idx, *val, *label, *endptr;
82 int inst_max_index = 0; // strtol gives 0 if wrong format
83
84 label = strtok(line," \t\n");
85 if(label == NULL) // empty line
86 exit_input_error(total+1);
87
88 target_label = (int) strtol(label,&endptr,10);
89 if(endptr == label || *endptr != '\0')
90 exit_input_error(total+1);
91
92 while(1)
93 {
94 if(i>=max_nr_attr-2) // need one more for index = -1
95 {
96 max_nr_attr *= 2;
97 x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
98 }
99
100 idx = strtok(NULL,":");
101 val = strtok(NULL," \t");
102
103 if(val == NULL)
104 break;
105 errno = 0;
106 x[i].index = (int) strtol(idx,&endptr,10);
107 if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
108 exit_input_error(total+1);
109 else
110 inst_max_index = x[i].index;
111
112 errno = 0;
113 x[i].value = strtod(val,&endptr);
114 if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
115 exit_input_error(total+1);
116
117 // feature indices larger than those in training are not used
118 if(x[i].index <= nr_feature)
119 ++i;
120 }
121
122 if(model_->bias>=0)
123 {
124 x[i].index = n;
125 x[i].value = model_->bias;
126 i++;
127 }
128 x[i].index = -1;
129
130 if(flag_predict_probability)
131 {
132 int j;
133 predict_label = predict_probability(model_,x,prob_estimates);
134 fprintf(output,"%d",predict_label);
135 for(j=0;j<model_->nr_class;j++)
136 fprintf(output," %g",prob_estimates[j]);
137 fprintf(output,"\n");
138 }
139 else
140 {
141 predict_label = predict(model_,x);
142 fprintf(output,"%d\n",predict_label);
143 }
144
145 if(predict_label == target_label)
146 ++correct;
147 ++total;
148 }
149 printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
150 if(flag_predict_probability)
151 free(prob_estimates);
152 }
153
exit_with_help()154 void exit_with_help()
155 {
156 printf(
157 "Usage: predict [options] test_file model_file output_file\n"
158 "options:\n"
159 "-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0)\n"
160 );
161 exit(1);
162 }
163
main(int argc,char ** argv)164 int main(int argc, char **argv)
165 {
166 FILE *input, *output;
167 int i;
168
169 // parse options
170 for(i=1;i<argc;i++)
171 {
172 if(argv[i][0] != '-') break;
173 ++i;
174 switch(argv[i-1][1])
175 {
176 case 'b':
177 flag_predict_probability = atoi(argv[i]);
178 break;
179
180 default:
181 fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
182 exit_with_help();
183 break;
184 }
185 }
186 if(i>=argc)
187 exit_with_help();
188
189 input = fopen(argv[i],"r");
190 if(input == NULL)
191 {
192 fprintf(stderr,"can't open input file %s\n",argv[i]);
193 exit(1);
194 }
195
196 output = fopen(argv[i+2],"w");
197 if(output == NULL)
198 {
199 fprintf(stderr,"can't open output file %s\n",argv[i+2]);
200 exit(1);
201 }
202
203 if((model_=load_model(argv[i+1]))==0)
204 {
205 fprintf(stderr,"can't open model file %s\n",argv[i+1]);
206 exit(1);
207 }
208
209 x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
210 do_predict(input, output, model_);
211 free_and_destroy_model(&model_);
212 free(line);
213 free(x);
214 fclose(input);
215 fclose(output);
216 return 0;
217 }
218
219