1 #include <stdio.h>
2 #include <ctype.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <errno.h>
6 #include "linear.h"
7 
8 struct feature_node *x;
9 int max_nr_attr = 64;
10 
11 struct model* model_;
12 int flag_predict_probability=0;
13 
exit_input_error(int line_num)14 void exit_input_error(int line_num)
15 {
16 	fprintf(stderr,"Wrong input format at line %d\n", line_num);
17 	exit(1);
18 }
19 
20 static char *line = NULL;
21 static int max_line_len;
22 
readline(FILE * input)23 static char* readline(FILE *input)
24 {
25 	int len;
26 
27 	if(fgets(line,max_line_len,input) == NULL)
28 		return NULL;
29 
30 	while(strrchr(line,'\n') == NULL)
31 	{
32 		max_line_len *= 2;
33 		line = (char *) realloc(line,max_line_len);
34 		len = (int) strlen(line);
35 		if(fgets(line+len,max_line_len-len,input) == NULL)
36 			break;
37 	}
38 	return line;
39 }
40 
do_predict(FILE * input,FILE * output,struct model * model_)41 void do_predict(FILE *input, FILE *output, struct model* model_)
42 {
43 	int correct = 0;
44 	int total = 0;
45 
46 	int nr_class=get_nr_class(model_);
47 	double *prob_estimates=NULL;
48 	int j, n;
49 	int nr_feature=get_nr_feature(model_);
50 	if(model_->bias>=0)
51 		n=nr_feature+1;
52 	else
53 		n=nr_feature;
54 
55 	if(flag_predict_probability)
56 	{
57 		int *labels;
58 
59 		if(!check_probability_model(model_))
60 		{
61 			fprintf(stderr, "probability output is only supported for logistic regression\n");
62 			exit(1);
63 		}
64 
65 		labels=(int *) malloc(nr_class*sizeof(int));
66 		get_labels(model_,labels);
67 		prob_estimates = (double *) malloc(nr_class*sizeof(double));
68 		fprintf(output,"labels");
69 		for(j=0;j<nr_class;j++)
70 			fprintf(output," %d",labels[j]);
71 		fprintf(output,"\n");
72 		free(labels);
73 	}
74 
75 	max_line_len = 1024;
76 	line = (char *)malloc(max_line_len*sizeof(char));
77 	while(readline(input) != NULL)
78 	{
79 		int i = 0;
80 		int target_label, predict_label;
81 		char *idx, *val, *label, *endptr;
82 		int inst_max_index = 0; // strtol gives 0 if wrong format
83 
84 		label = strtok(line," \t\n");
85 		if(label == NULL) // empty line
86 			exit_input_error(total+1);
87 
88 		target_label = (int) strtol(label,&endptr,10);
89 		if(endptr == label || *endptr != '\0')
90 			exit_input_error(total+1);
91 
92 		while(1)
93 		{
94 			if(i>=max_nr_attr-2)	// need one more for index = -1
95 			{
96 				max_nr_attr *= 2;
97 				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
98 			}
99 
100 			idx = strtok(NULL,":");
101 			val = strtok(NULL," \t");
102 
103 			if(val == NULL)
104 				break;
105 			errno = 0;
106 			x[i].index = (int) strtol(idx,&endptr,10);
107 			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
108 				exit_input_error(total+1);
109 			else
110 				inst_max_index = x[i].index;
111 
112 			errno = 0;
113 			x[i].value = strtod(val,&endptr);
114 			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
115 				exit_input_error(total+1);
116 
117 			// feature indices larger than those in training are not used
118 			if(x[i].index <= nr_feature)
119 				++i;
120 		}
121 
122 		if(model_->bias>=0)
123 		{
124 			x[i].index = n;
125 			x[i].value = model_->bias;
126 			i++;
127 		}
128 		x[i].index = -1;
129 
130 		if(flag_predict_probability)
131 		{
132 			int j;
133 			predict_label = predict_probability(model_,x,prob_estimates);
134 			fprintf(output,"%d",predict_label);
135 			for(j=0;j<model_->nr_class;j++)
136 				fprintf(output," %g",prob_estimates[j]);
137 			fprintf(output,"\n");
138 		}
139 		else
140 		{
141 			predict_label = predict(model_,x);
142 			fprintf(output,"%d\n",predict_label);
143 		}
144 
145 		if(predict_label == target_label)
146 			++correct;
147 		++total;
148 	}
149 	printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
150 	if(flag_predict_probability)
151 		free(prob_estimates);
152 }
153 
exit_with_help()154 void exit_with_help()
155 {
156 	printf(
157 	"Usage: predict [options] test_file model_file output_file\n"
158 	"options:\n"
159 	"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0)\n"
160 	);
161 	exit(1);
162 }
163 
main(int argc,char ** argv)164 int main(int argc, char **argv)
165 {
166 	FILE *input, *output;
167 	int i;
168 
169 	// parse options
170 	for(i=1;i<argc;i++)
171 	{
172 		if(argv[i][0] != '-') break;
173 		++i;
174 		switch(argv[i-1][1])
175 		{
176 			case 'b':
177 				flag_predict_probability = atoi(argv[i]);
178 				break;
179 
180 			default:
181 				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
182 				exit_with_help();
183 				break;
184 		}
185 	}
186 	if(i>=argc)
187 		exit_with_help();
188 
189 	input = fopen(argv[i],"r");
190 	if(input == NULL)
191 	{
192 		fprintf(stderr,"can't open input file %s\n",argv[i]);
193 		exit(1);
194 	}
195 
196 	output = fopen(argv[i+2],"w");
197 	if(output == NULL)
198 	{
199 		fprintf(stderr,"can't open output file %s\n",argv[i+2]);
200 		exit(1);
201 	}
202 
203 	if((model_=load_model(argv[i+1]))==0)
204 	{
205 		fprintf(stderr,"can't open model file %s\n",argv[i+1]);
206 		exit(1);
207 	}
208 
209 	x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
210 	do_predict(input, output, model_);
211 	free_and_destroy_model(&model_);
212 	free(line);
213 	free(x);
214 	fclose(input);
215 	fclose(output);
216 	return 0;
217 }
218 
219