1 /*-----------------------------------------------------------------------
2 * linclassif.c: Implementation of linear classification rule classifying
3 * examples from the SVM^light format.
4 *
5 * Copyright (C) 2008-2012 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
6 * Soeren Sonnenburg, soeren.sonnenburg@first.fraunhofer.de
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License as published by the Free Software Foundation;
11 *-------------------------------------------------------------------- */
12
13
14 #include <stdio.h>
15 #include <string.h>
16 #include <stdint.h>
17 #include <math.h>
18 #include <stdlib.h>
19 #include <stdint.h>
20
21 #include "lib_svmlight_format.h"
22 #include "libocas.h"
23 #include "version.h"
24
25 #define MODELFILE_MAXLINELEN 1000000
26
print_usage(void)27 void print_usage(void)
28 {
29 printf("LINCLASSIF: Predict labels by linear classication rule\n"
30 " " OCAS_VERSION "\n"
31 "\n"
32 " usage: linclass [options] example_file model_file\n"
33 "\n"
34 "Arguments:\n"
35 " example_file text file with testing examples stored in SVM^light format\n"
36 " model_file text file which contains either binary (two-class) linear\n"
37 " rule f(x)=w'*x+w0 or multi-class rule f(x)=W'*x which are\n"
38 " produced by svmocas and msvmocas, respectively\n"
39 "Options:\n"
40 " -e prints classification error computed from predicted\n"
41 " labels and labels contained in the example_file.\n"
42 " -h this help\n"
43 " -o output_file save predictions to output_file rather than to stdout.\n"
44 " -t [0,1] output type: 0 .. predicted labels (default)\n"
45 " 1 .. discriminant values\n"
46 " -v [0,1] verbosity level (default 0).\n"
47 "\n"
48 "Examples\n"
49 " Train SVM classifier from riply_trn.light with regularization constant C = 10,\n"
50 " bias switched on, verbosity switched off and model saved to svmocas.model\n"
51 " ./svmocas -c 10 -b 1 -v 0 ./data/riply_trn.light ./data/svmocas.model \n"
52 "\n"
53 " Compute testing error of the classifier stored in svmocas.model using testing\n"
54 " examples from riply_tst.light and save predicted labels to riply_tst.pred\n"
55 " ./linclassif -e -o ./data/riply_tst.pred ./data/riply_tst.light ./data/svmocas.model\n"
56 "\n"
57 );
58 }
59
main(int argc,char * argv[])60 int main(int argc, char *argv[])
61 {
62 uint32_t i, j;
63 int len;
64 int recognized;
65 int exitflag = 1;
66 int verb;
67 int binary_problem;
68 int output_type;
69 int print_error;
70 char *line;
71
72 char *model_fname;
73 char *input_fname;
74 char *output_fname;
75 FILE *fid, *fout;
76
77 double *feat_val;
78 uint32_t *feat_idx;
79
80 int go = 1;
81 char *endptr, *begptr;
82 int nLines = 0;
83 int nCols = 0, tmp_nCols;
84 double val;
85 double *W;
86 double W0;
87 uint32_t nY, nDim;
88
89 long line_cnt = 0;
90 int label, pred_label=0;
91 long max_dim = 0;
92 long nnzf;
93 double dfce, max_dfce;
94 long nErrors = 0;
95 long *nClassErrors, *nClass;
96
97 /* init */
98 fid = NULL;
99 fout = NULL;
100 feat_val = NULL;
101 feat_idx = NULL;
102 W = NULL;
103 W0 = 0;
104 input_fname = NULL;
105 model_fname = NULL;
106 output_fname = NULL;
107 nClassErrors = NULL;
108 nClass = NULL;
109
110 /* default setting of input arguments*/
111 verb = 0;
112 print_error = 0;
113 output_type = 0;
114
115 /* Allocate memory */
116 line = calloc(MODELFILE_MAXLINELEN, sizeof(char));
117 if( line == NULL )
118 {
119 fprintf(stderr,"Not enough memmory to allocate line buffer.\n");
120 goto clean_up;
121 }
122
123 feat_idx = calloc(LIBSLF_MAXLINELEN, sizeof(uint32_t));
124 if( feat_idx == NULL )
125 {
126 fprintf(stderr,"Not enough memmory to allocate feat_idx.\n");
127 goto clean_up;
128 }
129
130 feat_val = calloc(LIBSLF_MAXLINELEN, sizeof(double));
131 if( feat_val == NULL )
132 {
133 fprintf(stderr,"Not enough memmory to allocate feat_val.\n");
134 goto clean_up;
135 }
136
137
138 /*-----------------------------------------------------------
139 Process input arguments
140 ------------------------------------------------------------*/
141 if(argc ==1 || strcmp(argv[1], "-h") == 0)
142 {
143 print_usage();
144 goto clean_up;
145 }
146
147 if(argc < 2)
148 {
149 fprintf(stderr,"Not enough input arguments.\n\n");
150 goto clean_up;
151 }
152
153
154 for (i = 1; i < argc-2; i++)
155 {
156 recognized = 0;
157 if (strcmp(argv[i], "-h") == 0)
158 {
159 print_usage();
160 goto clean_up;
161 }
162
163 if (strcmp(argv[i], "-e") == 0)
164 {
165 print_error = 1;
166 recognized = 1;
167 continue;
168 }
169
170 if (strcmp(argv[i], "-v") == 0)
171 {
172 if(i+1 >= argc-2)
173 {
174 fprintf(stderr,"You have to specify a value after argument -v\n");
175 goto clean_up;
176 }
177 verb = atoi(argv[i+1]);
178 if(verb < 0 || verb > 1)
179 {
180 fprintf(stderr,"A value after the argument -v must be either 0 or 1.\n");
181 goto clean_up;
182 }
183
184 i++;
185 recognized = 1;
186 continue;
187 }
188
189 if (strcmp(argv[i], "-t") == 0)
190 {
191 if(i+1 >= argc-2)
192 {
193 fprintf(stderr,"You have to specify a value after argument -t\n");
194 goto clean_up;
195 }
196 output_type = atoi(argv[i+1]);
197 if(output_type != 0 && output_type != 1)
198 {
199 fprintf(stderr,"A value after the argument -t must be either 0 or 1.\n");
200 goto clean_up;
201 }
202
203 i++;
204 recognized = 1;
205 continue;
206 }
207
208 if (strcmp(argv[i], "-o") == 0)
209 {
210 if(i+1 >= argc-2)
211 {
212 fprintf(stderr,"You have to specify a string after argument -o\n");
213 goto clean_up;
214 }
215
216 len = strlen(argv[i+1]);
217 output_fname = calloc(len+1,sizeof(char));
218 strcpy(output_fname, argv[i+1]);
219
220 i++;
221 recognized = 1;
222 continue;
223 }
224
225 if(recognized == 0)
226 {
227 fprintf(stderr,"Unknown input argument: %s\n", argv[i]);
228 goto clean_up;
229 }
230
231 }
232
233 len = strlen(argv[argc-2]);
234 input_fname = calloc(len+1,sizeof(char));
235 strcpy(input_fname, argv[argc-2]);
236
237 len = strlen(argv[argc-1]);
238 model_fname = calloc(len+1,sizeof(char));
239 strcpy(model_fname, argv[argc-1]);
240
241 if(verb)
242 {
243 printf("Verbosity: %d\n", verb);
244 printf("Output type: %d\n", output_type);
245 printf("Print error: %d\n", print_error);
246 printf("Example file: %s\n", input_fname);
247 printf("Model file: %s\n", model_fname);
248 if( output_fname != NULL)
249 printf("Output file: %s\n", output_fname);
250 else
251 printf("Output file: stdout\n");
252 }
253
254
255 /*----------------------------------------------------------------
256 Load classification rule which is either
257 vector [nDim x 1] + bias [1x1]
258 or
259 matrix [nDim x nY]
260 -------------------------------------------------------------------*/
261
262 /* load W from model file */
263 fid = fopen(model_fname, "r");
264 if(fid == NULL) {
265 fprintf(stderr,"Cannot open model file.\n");
266 perror("fopen error ");
267 goto clean_up;
268 }
269
270 if(verb)
271 {
272 printf("Analysing model file... ");
273 fflush(stdout);
274 }
275
276 /* read the first line */
277 if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
278 {
279 fprintf(stderr,"Empty example file.\n");
280 fclose(fid);
281 goto clean_up;
282 }
283 else
284 {
285 nLines = 1;
286 begptr = line;
287 while(1)
288 {
289 val = strtod(begptr, &endptr);
290
291 if(val == 0 && begptr == endptr)
292 break;
293
294 nCols++;
295 begptr = endptr;
296 }
297 }
298
299 go = 1;
300 while(go)
301 {
302 begptr = line;
303
304 tmp_nCols = 0;
305 while(1)
306 {
307 val = strtod(begptr, &endptr);
308
309 if(val == 0 && begptr == endptr)
310 break;
311
312 tmp_nCols++;
313 begptr = endptr;
314 }
315 if( tmp_nCols != nCols)
316 {
317 fprintf(stderr,"Error: Model file contains lines with different number of colums.\n");
318 fclose(fid);
319 goto clean_up;
320 }
321
322 if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
323 {
324 go = 0;
325 }
326 else
327 nLines++;
328 }
329
330 if(verb)
331 printf("done.\n"
332 "Number of lines: %d\n"
333 "Number of columns: %d\n",
334 nLines,nCols);
335
336 if(nCols == 1)
337 {
338 nY = 2;
339 nDim = nLines-1;
340 binary_problem = 1;
341
342 /* learned weight vector */
343 W = (double*)calloc(nDim,sizeof(double));
344 if(W == NULL)
345 {
346 fprintf(stderr,"Not enough memory for vector W.\n");
347 fclose(fid);
348 goto clean_up;
349 }
350
351 if(verb)
352 {
353 printf("Model file contains binary classification rule.\n");
354 printf("Reading model file...");
355 }
356
357 fseek(fid,0,SEEK_SET);
358 for(i=0; i <= nDim; i++)
359 {
360 if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
361 {
362 fprintf(stderr,"Model file corrupted.\n");
363 fclose(fid);
364 goto clean_up;
365 }
366
367 begptr = line;
368 val = strtod(begptr, &endptr);
369
370 if(val == 0 && begptr == endptr)
371 {
372 fprintf(stderr,"Model file corrupted.\n");
373 fclose(fid);
374 goto clean_up;
375 }
376
377 if(i < nDim)
378 W[i] = val;
379 else
380 W0 = val;
381 }
382
383 if(verb)
384 printf("done.\n");
385
386 }
387 else
388 {
389 nY = nCols;
390 nDim = nLines;
391 binary_problem = 0;
392
393 /* learned weight vector */
394 W = (double*)calloc(nDim*nY,sizeof(double));
395 if(W == NULL)
396 {
397 fprintf(stderr,"Not enough memory for matrix W.\n");
398 fclose(fid);
399 goto clean_up;
400 }
401
402 if(verb)
403 {
404 printf("Model file contains multi-class classification rule.\n");
405 printf("Reading model file...");
406 }
407
408 fseek(fid,0,SEEK_SET);
409 for(i=0; i < nDim; i++)
410 {
411 if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
412 {
413 fprintf(stderr,"Model file corrupted.\n");
414 fclose(fid);
415 goto clean_up;
416 }
417
418 begptr = line;
419 for(j=0; j < nY; j++)
420 {
421 val = strtod(begptr, &endptr);
422
423 if(val == 0 && begptr == endptr)
424 {
425 fprintf(stderr,"Model file corrupted.\n");
426 fclose(fid);
427 goto clean_up;
428 }
429 begptr = endptr;
430
431
432 W[LIBOCAS_INDEX(i,j,nDim)] = val;
433 }
434 }
435 if(verb)
436 printf("done.\n");
437 }
438
439 fclose(fid);
440
441 /* printf("W0=%f, W = [ ", W0);*/
442 /* for(i=0; i < nDim; i++)*/
443 /* printf("%f ", W[i]);*/
444 /* printf("]\n");*/
445 /* printf("W = [\n");*/
446 /* for(j=0; j < nDim; j++)*/
447 /* {*/
448 /* for(i=0; i < nY; i++)*/
449 /* printf("%f ", W[LIBOCAS_INDEX(j,i,nDim)]);*/
450 /* printf("\n");*/
451 /* }*/
452
453 /* load_time = get_time() - load_time;*/
454
455 /*-----------------------------------------------------
456 Read examples and classify them.
457 -----------------------------------------------------*/
458
459 fid = fopen(input_fname, "r");
460 if(fid == NULL) {
461 fprintf(stderr,"Cannot open input file.\n");
462 perror("fopen error ");
463 goto clean_up;
464 }
465
466 if(output_fname == NULL)
467 fout = stdout;
468 else
469 {
470 fout = fopen(output_fname, "w+");
471 if(fout == NULL) {
472 fprintf(stderr,"Cannot open output file.\n");
473 perror("fopen error ");
474 fclose(fid);
475 goto clean_up;
476 }
477 }
478
479 if(verb)
480 {
481 if(output_fname != NULL)
482 printf("Classifying...");
483 else
484 printf("Outputs:\n");
485 }
486
487 nClassErrors = (long*)calloc(nY,sizeof(long));
488 if(nClassErrors == NULL)
489 {
490 fprintf(stderr,"Not enough memory for vector nClassError.\n");
491 fclose(fid);
492 if(output_fname != NULL)
493 fclose(fout);
494 goto clean_up;
495 }
496
497 nClass = (long*)calloc(nY,sizeof(long));
498 if(nClass == NULL)
499 {
500 fprintf(stderr,"Not enough memory for vector nClass.\n");
501 fclose(fid);
502 if(output_fname != NULL)
503 fclose(fout);
504 goto clean_up;
505 }
506
507
508 go = 1;
509 while(go) {
510
511 if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
512 {
513 go = 0;
514 }
515 else
516 {
517 line_cnt ++;
518 nnzf = svmlight_format_parse_line(line, &label, feat_idx, feat_val);
519
520 if(nnzf == -1)
521 {
522 fprintf(stderr,"Parsing error on line %ld .\n", line_cnt);
523 fprintf(stderr,"Probably defective input file.\n");
524 fclose(fid);
525 if(output_fname != NULL)
526 fclose(fout);
527 goto clean_up;
528 }
529
530 max_dim = LIBOCAS_MAX(max_dim,feat_idx[nnzf-1]);
531
532 if(binary_problem == 1)
533 {
534 dfce = W0;
535 for(i=0; i < nnzf; i++)
536 {
537 if(feat_idx[i]-1 < nDim)
538 dfce += feat_val[i]*W[feat_idx[i]-1];
539 }
540
541 if(label == +1)
542 nClass[0]++;
543 else
544 nClass[1]++;
545
546 if(dfce >=0 && label == -1)
547 {
548 nClassErrors[1]++;
549 nErrors++;
550 }
551 else if (dfce < 0 && label== +1)
552 {
553 nClassErrors[0]++;
554 nErrors++;
555 }
556
557 if(output_type == 0)
558 {
559 if(dfce >=0 )
560 fprintf(fout,"+1\n");
561 else
562 fprintf(fout,"-1\n");
563
564 }
565 else
566 fprintf(fout,"%.20f\n", dfce);
567 }
568 else
569 {
570 max_dfce = -LIBOCAS_PLUS_INF;
571 for(j=0; j < nY; j++)
572 {
573 dfce = 0;
574 for(i=0; i < nnzf; i++)
575 {
576 if(feat_idx[i]-1 < nDim)
577 dfce += feat_val[i]*W[LIBOCAS_INDEX(feat_idx[i]-1,j,nDim)];
578 }
579 if(output_type==1)
580 fprintf(fout,"%.20f ", dfce);
581
582 if(max_dfce < dfce)
583 {
584 max_dfce = dfce;
585 pred_label = j+1;
586 }
587 }
588 if(output_type==0)
589 fprintf(fout,"%d", pred_label);
590
591 fprintf(fout,"\n");
592
593 nClass[label-1]++;
594
595 if(label != pred_label)
596 {
597 nErrors++;
598 nClassErrors[label-1]++;
599 }
600
601 }
602 }
603 }
604
605 if(verb)
606 {
607 if(output_fname != NULL)
608 printf("done.\n");
609
610 printf("Number of examples: %ld\n"
611 "Maximal dimensionality: %ld\n", line_cnt, max_dim);
612 }
613 if(print_error)
614 {
615 printf("Classification error: %f%%(%ld/%ld)\n", 100.0*(double)nErrors/(double)line_cnt,nErrors,line_cnt);
616 printf("Per-class errors: ");
617 if(binary_problem)
618 {
619 printf("+1: %f%%(%ld/%ld) -1: %f%%(%ld/%ld)\n",
620 100.0*(double)nClassErrors[0]/(double)nClass[0], nClassErrors[0],nClass[0],
621 100.0*(double)nClassErrors[1]/(double)nClass[1], nClassErrors[1],nClass[1]);
622 }
623 else
624 {
625 for(i=0; i < nY; i++)
626 printf("%d: %f%%(%ld/%ld) ", i+1, 100.0*(double)nClassErrors[i]/(double)nClass[i],
627 nClassErrors[i],nClass[i]);
628 printf("\n");
629 }
630 }
631
632
633 fclose(fid);
634 if(output_fname != NULL)
635 fclose(fout);
636
637 exitflag = 0;
638
639 clean_up:
640
641 free(input_fname);
642 free(model_fname);
643 free(output_fname);
644 free(W);
645 free(line);
646 free(feat_val);
647 free(feat_idx);
648 free(nClassErrors);
649 free(nClass);
650
651 return(exitflag);
652 }
653
654
655
656