1 /*-----------------------------------------------------------------------
2 * ocas_helper.c: Implementation of helper functions for the OCAS solver.
3 *
4 *-------------------------------------------------------------------- */
5
6 #define _FILE_OFFSET_BITS 64
7
8 #include <pthread.h>
9
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdint.h>
13 #include <sys/time.h>
14 #include <stdlib.h>
15 #include <time.h>
16
17 #include "lib_svmlight_format.h"
18 #include "libocas.h"
19 #include "ocas_helper.h"
20
21 mxArray *data_X;
22 uint32_t nDim, nData, nY;
23 double *data_y;
24 cutting_plane_buf_T sparse_A;
25 double *full_A;
26 double *W;
27 double *oldW;
28 double *new_a;
29
30 double *A0;
31 double W0;
32 double oldW0;
33 double X0;
34
35 /* parallelization via threads */
36 struct thread_params_output
37 {
38 double* output;
39 uint32_t start;
40 uint32_t end;
41 };
42
43 struct thread_qsort
44 {
45 double* output;
46 /* uint32_t* index;*/
47 double* data;
48 uint32_t size;
49 };
50
51 struct thread_params_add
52 {
53 double *new_a;
54 uint32_t *new_cut;
55 uint32_t start;
56 uint32_t end;
57 };
58
59
60 typedef enum
61 {
62 FALSE = 0,
63 TRUE = 1
64 }
65 boolean;
66
67
68 static int qsort_threads;
69 static pthread_t* threads = NULL;
70 static uint32_t* thread_slices = NULL;
71 static int num_threads;
72 static const int sort_limit=4096;
73 static struct thread_params_output* params_output;
74 static struct thread_params_add* params_add;
75
76 /* use multi-threads only if minimal number of examples to add is higher than the constant*/
77 static const uint32_t MinimalParallelCutLenght = 100;
78
79
80 /*----------------------------------------------------------------------
81 ----------------------------------------------------------------------*/
full_add_nnw_constr(uint32_t idx,uint32_t nSel,void * user_data)82 int full_add_nnw_constr(uint32_t idx, uint32_t nSel, void* user_data)
83 {
84 full_A[LIBOCAS_INDEX(idx,nSel,nDim)] = 1.0;
85 A0[nSel] = 0.0;
86
87 return( 0 );
88 }
89
90
91 /*----------------------------------------------------------------------
92 ----------------------------------------------------------------------*/
sparse_add_nnw_constr(uint32_t idx,uint32_t nSel,void * user_data)93 int sparse_add_nnw_constr(uint32_t idx, uint32_t nSel, void* user_data)
94 {
95 sparse_A.nz_dims[nSel] = 1;
96 sparse_A.index[nSel] = NULL;
97 sparse_A.value[nSel] = NULL;
98 sparse_A.index[nSel] = mxCalloc(1,sizeof(uint32_t));
99 sparse_A.value[nSel] = mxCalloc(1,sizeof(double));
100 if(sparse_A.index[nSel]==NULL || sparse_A.value[nSel]==NULL)
101 {
102 mxFree(sparse_A.index[nSel]);
103 mxFree(sparse_A.value[nSel]);
104 return(-1);
105 }
106
107 sparse_A.index[nSel][0] = idx;
108 sparse_A.value[nSel][0] = 1.0;
109
110 A0[nSel] = 0.0;
111 return( 0 );
112 }
113
114
115 /*----------------------------------------------------------------------
116 ----------------------------------------------------------------------*/
clip_neg_W(uint32_t num_pw_constr,uint32_t * pw_idx,void * user_data)117 void clip_neg_W( uint32_t num_pw_constr, uint32_t *pw_idx, void* user_data )
118 {
119 uint32_t i;
120 for(i=0; i< num_pw_constr; i++ ) {
121 W[LIBOCAS_INDEX(pw_idx[i],0,nDim)] = LIBOCAS_MAX(0,W[LIBOCAS_INDEX(pw_idx[i],0,nDim)]);
122 }
123 }
124
125
126 /*----------------------------------------------------------------------
127 sq_norm_W = sparse_compute_W( alpha, nSel ) does the following:
128
129 oldW = W;
130 W = sparse_A(:,1:nSel)'*alpha;
131 sq_norm_W = W'*W;
132 dp_WoldW = W'*oldW';
133
134 ----------------------------------------------------------------------*/
sparse_compute_W(double * sq_norm_W,double * dp_WoldW,double * alpha,uint32_t nSel,void * user_data)135 void sparse_compute_W( double *sq_norm_W,
136 double *dp_WoldW,
137 double *alpha,
138 uint32_t nSel,
139 void* user_data )
140 {
141 uint32_t i,j, nz_dims;
142
143 memcpy(oldW, W, sizeof(double)*nDim );
144 memset(W, 0, sizeof(double)*nDim);
145
146 oldW0 = W0;
147 W0 = 0;
148
149 for(i=0; i < nSel; i++) {
150 nz_dims = sparse_A.nz_dims[i];
151 if(nz_dims > 0 && alpha[i] > 0) {
152 for(j=0; j < nz_dims; j++) {
153 W[sparse_A.index[i][j]] += alpha[i]*sparse_A.value[i][j];
154 }
155 }
156 W0 += A0[i]*alpha[i];
157 }
158
159 *sq_norm_W = W0*W0;
160 *dp_WoldW = W0*oldW0;
161 for(j=0; j < nDim; j++) {
162 *sq_norm_W += W[j]*W[j];
163 *dp_WoldW += W[j]*oldW[j];
164 }
165
166 return;
167 }
168
169
170 /*----------------------------------------------------------------------
171 sq_norm_W = sparse_compute_W( alpha, nSel ) does the following:
172
173 oldW = W;
174 W = sparse_A(:,1:nSel)'*alpha;
175 sq_norm_W = W'*W;
176 dp_WoldW = W'*oldW';
177
178 ----------------------------------------------------------------------*/
msvm_sparse_compute_W(double * sq_norm_W,double * dp_WoldW,double * alpha,uint32_t nSel,void * user_data)179 void msvm_sparse_compute_W( double *sq_norm_W,
180 double *dp_WoldW,
181 double *alpha,
182 uint32_t nSel,
183 void* user_data )
184 {
185 uint32_t i,j, nz_dims;
186
187 memcpy(oldW, W, sizeof(double)*nY*nDim );
188 memset(W, 0, sizeof(double)*nY*nDim );
189
190 for(i=0; i < nSel; i++) {
191 nz_dims = sparse_A.nz_dims[i];
192 if(nz_dims > 0 && alpha[i] > 0) {
193 for(j=0; j < nz_dims; j++) {
194 W[sparse_A.index[i][j]] += alpha[i]*sparse_A.value[i][j];
195 }
196 }
197 }
198
199 *sq_norm_W = 0;
200 *dp_WoldW = 0;
201 for(j=0; j < nY*nDim; j++)
202 {
203 *sq_norm_W += W[j]*W[j];
204 *dp_WoldW += W[j]*oldW[j];
205 }
206
207 return;
208 }
209
210
211 /*----------------------------------------------------------------------------------
212 sq_norm_W = sparse_update_W( t ) does the following:
213
214 W = oldW*(1-t) + t*W;
215 sq_norm_W = W'*W;
216
217 ---------------------------------------------------------------------------------*/
msvm_update_W(double t,void * user_data)218 double msvm_update_W( double t, void* user_data )
219 {
220 uint32_t j;
221 double sq_norm_W;
222
223 sq_norm_W = 0;
224
225 for(j=0; j < nY*nDim; j++) {
226 W[j] = oldW[j]*(1-t) + t*W[j];
227 sq_norm_W += W[j]*W[j];
228 }
229
230 return( sq_norm_W );
231 }
232
233
234 /*-------------------------------------------------------------------------
235 sq_norm_W = full_update_W( t ) does the following:
236
237 W = oldW*(1-t) + t*W;
238 sq_norm_W = W'*W;
239 ---------------------------------------------------------------------------*/
update_W(double t,void * user_data)240 double update_W( double t, void* user_data )
241 {
242 uint32_t j;
243 double sq_norm_W;
244
245 W0 = oldW0*(1-t) + t*W0;
246 sq_norm_W = W0*W0;
247
248 for(j=0; j <nDim; j++) {
249 W[j] = oldW[j]*(1-t) + t*W[j];
250 sq_norm_W += W[j]*W[j];
251 }
252
253 return( sq_norm_W );
254 }
255
256
257 /*----------------------------------------------------------------------
258 sq_norm_W = full_compute_W( alpha, nSel ) does the following:
259
260 oldW = W;
261 W = full_A(:,1:nSel)'*alpha;
262 sq_norm_W = W'*W;
263 dp_WoldW = W'*oldW';
264
265 ----------------------------------------------------------------------*/
msvm_full_compute_W(double * sq_norm_W,double * dp_WoldW,double * alpha,uint32_t nSel,void * user_data)266 void msvm_full_compute_W( double *sq_norm_W, double *dp_WoldW, double *alpha, uint32_t nSel, void* user_data )
267 {
268 uint32_t i,j;
269
270 memcpy(oldW, W, sizeof(double)*nDim*nY );
271 memset(W, 0, sizeof(double)*nDim*nY);
272
273 for(i=0; i < nSel; i++) {
274 if( alpha[i] > 0 ) {
275 for(j=0; j< nDim*nY; j++ ) {
276 W[j] += alpha[i]*full_A[LIBOCAS_INDEX(j,i,nDim*nY)];
277 }
278
279 }
280 }
281
282 *sq_norm_W = 0;
283 *dp_WoldW = 0;
284 for(j=0; j < nDim*nY; j++) {
285 *sq_norm_W += W[j]*W[j];
286 *dp_WoldW += W[j]*oldW[j];
287 }
288
289 return;
290 }
291
292
293 /*-----------------------------------------------------------------------
294 Print statistics.
295 -----------------------------------------------------------------------*/
ocas_print(ocas_return_value_T value)296 void ocas_print(ocas_return_value_T value)
297 {
298 mexPrintf("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
299 value.nIter,value.ocas_time, value.Q_P,value.Q_D,value.Q_P-value.Q_D,(value.Q_P-value.Q_D)/LIBOCAS_ABS(value.Q_P),
300 value.nNZAlpha, 100*(double)value.trn_err/(double)nData, value.qp_exitflag );
301 }
302
ocas_print_null(ocas_return_value_T value)303 void ocas_print_null(ocas_return_value_T value)
304 {
305 return;
306 }
307
308
309 /*-----------------------------------------------------------------------
310 Get absolute time in seconds.
311 -----------------------------------------------------------------------*/
get_time()312 double get_time()
313 {
314 struct timeval tv;
315 if (gettimeofday(&tv, NULL)==0)
316 return tv.tv_sec+((double)(tv.tv_usec))/1e6;
317 else
318 return 0.0;
319 }
320
321
322 /*----------------------------------------------------------------------
323 in-place computes sparse_mat(:,col)= alpha * sparse_mat(:,col)
324 where alpha is a scalar and sparse_mat is Matlab sparse matrix.
325 ----------------------------------------------------------------------*/
mul_sparse_col(double alpha,mxArray * sparse_mat,uint32_t col)326 void mul_sparse_col(double alpha, mxArray *sparse_mat, uint32_t col)
327 {
328 uint32_t nItems, ptr, i;
329 INDEX_TYPE_T *Jc;
330 double *Pr;
331
332 Jc = mxGetJc(sparse_mat);
333 Pr = mxGetPr(sparse_mat);
334
335 nItems = Jc[col+1] - Jc[col];
336 ptr = Jc[col];
337
338 for(i=0; i < nItems; i++)
339 Pr[ptr++]*=alpha;
340 }
341
342
343 /*----------------------------------------------------------------------
344 It computes full_vec = full_vec + sparse_mat(:,col)
345 where full_vec is a double array and sparse_mat is Matlab
346 sparse matrix.
347 ----------------------------------------------------------------------*/
add_sparse_col(double * full_vec,mxArray * sparse_mat,uint32_t col)348 void add_sparse_col(double *full_vec, mxArray *sparse_mat, uint32_t col)
349 {
350 uint32_t nItems, ptr, i, row;
351 INDEX_TYPE_T *Ir, *Jc;
352 double *Pr, val;
353
354 Ir = mxGetIr(sparse_mat);
355 Jc = mxGetJc(sparse_mat);
356 Pr = mxGetPr(sparse_mat);
357
358 nItems = Jc[col+1] - Jc[col];
359 ptr = Jc[col];
360
361 for(i=0; i < nItems; i++) {
362 val = Pr[ptr];
363 row = Ir[ptr++];
364
365 full_vec[row] += val;
366 }
367 }
368
369
370 /*----------------------------------------------------------------------
371 It computes full_vec = full_vec - sparse_mat(:,col)
372 where full_vec is a double array and sparse_mat is Matlab
373 sparse matrix.
374 ----------------------------------------------------------------------*/
subtract_sparse_col(double * full_vec,mxArray * sparse_mat,uint32_t col)375 void subtract_sparse_col(double *full_vec, mxArray *sparse_mat, uint32_t col)
376 {
377 uint32_t nItems, ptr, i, row;
378 INDEX_TYPE_T *Ir, *Jc;
379 double *Pr, val;
380
381 Ir = mxGetIr(sparse_mat);
382 Jc = mxGetJc(sparse_mat);
383 Pr = mxGetPr(sparse_mat);
384
385 nItems = Jc[col+1] - Jc[col];
386 ptr = Jc[col];
387
388 for(i=0; i < nItems; i++) {
389 val = Pr[ptr];
390 row = Ir[ptr++];
391
392 full_vec[row] -= val;
393 }
394 }
395
396
397 /*----------------------------------------------------------------------
398 It computes dp = full_vec'*sparse_mat(:,col)
399 where full_vec is a double array and sparse_mat is Matlab
400 sparse matrix.
401 ----------------------------------------------------------------------*/
dp_sparse_col(double * full_vec,mxArray * sparse_mat,uint32_t col)402 double dp_sparse_col(double *full_vec, mxArray *sparse_mat, uint32_t col)
403 {
404 uint32_t nItems, ptr, i, row;
405 INDEX_TYPE_T *Ir, *Jc;
406 double *Pr, val, dp;
407
408 Ir = mxGetIr(sparse_mat);
409 Jc = mxGetJc(sparse_mat);
410 Pr = mxGetPr(sparse_mat);
411
412 dp = 0;
413 nItems = Jc[col+1] - Jc[col];
414 ptr = Jc[col];
415
416 for(i=0; i < nItems; i++) {
417 val = Pr[ptr];
418 row = Ir[ptr++];
419
420 dp += full_vec[row]*val;
421 }
422
423 return(dp);
424 }
425
426
427 /*----------------------------------------------------------------------
428 sq_norm_W = full_compute_W( alpha, nSel ) does the following:
429
430 oldW = W;
431 W = full_A(:,1:nSel)'*alpha;
432 sq_norm_W = W'*W;
433 dp_WoldW = W'*oldW';
434
435 ----------------------------------------------------------------------*/
full_compute_W(double * sq_norm_W,double * dp_WoldW,double * alpha,uint32_t nSel,void * user_data)436 void full_compute_W( double *sq_norm_W, double *dp_WoldW, double *alpha, uint32_t nSel, void* user_data )
437 {
438 uint32_t i,j;
439
440 memcpy(oldW, W, sizeof(double)*nDim );
441 memset(W, 0, sizeof(double)*nDim);
442
443 oldW0 = W0;
444 W0 = 0;
445
446 for(i=0; i < nSel; i++) {
447 if( alpha[i] > 0 ) {
448 for(j=0; j< nDim; j++ ) {
449 W[j] += alpha[i]*full_A[LIBOCAS_INDEX(j,i,nDim)];
450 }
451
452 W0 += A0[i]*alpha[i];
453 }
454 }
455
456 *sq_norm_W = W0*W0;
457 *dp_WoldW = W0*oldW0;
458 for(j=0; j < nDim; j++) {
459 *sq_norm_W += W[j]*W[j];
460 *dp_WoldW += W[j]*oldW[j];
461 }
462
463 return;
464 }
465
466
467
468 /*----------------------------------------------------------------------
469 parallel_sparse_compute_output( output, user_data ) does the following
470
471 output = data_X'*W + W0*X0*y[i];
472 ----------------------------------------------------------------------*/
473
parallel_compute_output_helper(void * p)474 static void* parallel_compute_output_helper(void* p)
475 {
476 struct thread_params_output* params = (struct thread_params_output*) p;
477 double* output=params->output;
478 uint32_t start=params->start;
479 uint32_t end=params->end;
480 uint32_t i;
481
482 for(i=start; i < end; i++)
483 output[i] = data_y[i]*X0*W0 + dp_sparse_col(W, data_X, i);
484
485 return(NULL);
486 }
487
parallel_sparse_compute_output(double * output,void * user_data)488 int parallel_sparse_compute_output( double *output, void* user_data )
489 {
490 /* one-thraed code looks like:
491
492 uint32_t i;
493
494 for(i=0; i < nData; i++) {
495 output[i] = data_y[i]*X0*W0 + dp_sparse_col(W, data_X, i);
496 }
497 */
498
499 /* struct thread_params_output params;*/
500
501 int nthreads=num_threads-1;
502 int end=0;
503 int t;
504
505 if (nData < num_threads)
506 {
507 nthreads=nData-1;
508 }
509
510 for (t=0; t<nthreads; t++)
511 {
512 params_output[t].output = output;
513 if (t==0)
514 params_output[t].start = 0;
515 else
516 params_output[t].start = thread_slices[t-1];
517 params_output[t].end = thread_slices[t];
518
519 if (pthread_create(&threads[t], NULL, parallel_compute_output_helper, (void*)¶ms_output[t]) != 0)
520 {
521 mexPrintf("\nError: Thread creation failed.\n");
522 return(-1);
523 }
524
525 end=params_output[t].end;
526 }
527
528 params_output[t].output = output;
529 params_output[t].start = end;
530 params_output[t].end = nData;
531
532 parallel_compute_output_helper(¶ms_output[t]);
533
534 for (t=0; t<nthreads; t++)
535 {
536 if (pthread_join(threads[t], NULL) != 0)
537 {
538 mexPrintf("\nError: pthread_join failed.\n");
539 return(-1);
540 }
541 }
542
543 return 0;
544 }
545
546 /* Ihis function initialize parallel processing . It must be
547 called prior to paralel_XXXX OCAS heleper functions. */
init_parallel_ocas(int number_of_threads)548 int init_parallel_ocas(int number_of_threads)
549 {
550 num_threads = number_of_threads;
551
552 thread_slices = (uint32_t*)mxCalloc(num_threads,sizeof(uint32_t));
553 if(thread_slices == NULL)
554 {
555 mexPrintf("Not enough memory for vector num_threads.");
556 goto clean_up;
557
558 }
559
560 threads = (pthread_t*)mxCalloc(num_threads,sizeof(pthread_t));
561 if(threads== NULL)
562 {
563 mexPrintf("Not enough memory for threads structure.");
564 goto clean_up;
565 }
566
567 params_output = (struct thread_params_output*)mxCalloc(num_threads,sizeof(struct thread_params_output));
568 if(params_output== NULL)
569 {
570 mexPrintf("Not enough memory for params structure.");
571 goto clean_up;
572 }
573
574 params_add = (struct thread_params_add*)mxCalloc(num_threads,sizeof(struct thread_params_add));
575 if(params_add== NULL)
576 {
577 mexPrintf("Not enough memory for params structure.");
578 goto clean_up;
579 }
580
581 /* The following code finds splits of the data such that each
582 data slice contains approximately the same number of nonzero elements. */
583 uint32_t i;
584 int nnz_split = 0;
585 INDEX_TYPE_T* Jc = mxGetJc(data_X);
586
587 for(i=0; i < nData; i++)
588 nnz_split += Jc[i+1] - Jc[i];
589
590 nnz_split/=num_threads;
591 uint64_t accum_nnz = 0;
592 int thr = 0;
593
594 mexPrintf("Data slices: ");
595 for(i=0; i < nData; i++)
596 {
597 INDEX_TYPE_T nItems = Jc[i+1] - Jc[i];
598
599 if (accum_nnz < nnz_split*(thr+1))
600 accum_nnz+=nItems;
601 else
602 {
603 thread_slices[thr]=i;
604 mexPrintf("%d ", thread_slices[thr]);
605 accum_nnz+=nItems;
606 thr++;
607 }
608 }
609 mexPrintf("\n");
610
611 return(0);
612
613 clean_up:
614
615 mxFree(threads);
616 mxFree(thread_slices);
617
618 return(-1);
619 }
620
621 /* release memory allocated for parallelized functions */
destroy_parallel_ocas(void)622 void destroy_parallel_ocas(void)
623 {
624 mxFree(params_add);
625 mxFree(params_output);
626 mxFree(threads);
627 mxFree(thread_slices);
628 }
629
630 /*----------------------------------------------------------------------------------
631 Parallel version of
632 sparse_add_new_cut( new_col_H, new_cut, cut_length, nSel ) does the following:
633
634 new_a = sum(data_X(:,find(new_cut ~=0 )),2);
635 new_col_H = [sparse_A(:,1:nSel)'*new_a ; new_a'*new_a];
636 sparse_A(:,nSel+1) = new_a;
637
638 ---------------------------------------------------------------------------------*/
639
parallel_sparse_add_helper(void * p)640 static void* parallel_sparse_add_helper(void* p)
641 {
642 struct thread_params_add* params = (struct thread_params_add*) p;
643 double* local_new_a=params->new_a;
644 uint32_t* new_cut=params->new_cut;
645 uint32_t start=params->start;
646 uint32_t end=params->end;
647 uint32_t i;
648
649 for(i=start; i <= end; i++)
650 add_sparse_col(local_new_a, data_X, new_cut[i]);
651
652 return(NULL);
653 }
654
parallel_sparse_add_new_cut(double * new_col_H,uint32_t * new_cut,uint32_t cut_length,uint32_t nSel,void * user_data)655 int parallel_sparse_add_new_cut( double *new_col_H,
656 uint32_t *new_cut,
657 uint32_t cut_length,
658 uint32_t nSel,
659 void* user_data )
660 {
661 /* double *new_a, */
662 double sq_norm_a;
663 uint32_t i, j, nz_dims, ptr;
664
665 /* temporary vector */
666 /* new_a = (double*)mxCalloc(nDim,sizeof(double));*/
667 /* if(new_a == NULL) */
668 /* return(-1); */
669 memset(new_a, 0, sizeof(double)*nDim*num_threads);
670
671 if((cut_length < MinimalParallelCutLenght) && (cut_length >= num_threads))
672 {
673
674 for(i=0; i < cut_length; i++) {
675 add_sparse_col(new_a, data_X, new_cut[i]);
676
677 A0[nSel] += X0*data_y[new_cut[i]];
678 }
679 }
680 else
681 {
682
683 /* struct thread_params_add params;*/
684 /* params.new_cut = new_cut;*/
685
686 uint32_t chunk = cut_length/num_threads;
687 uint32_t start = 0;
688 uint32_t end = chunk-1;
689 int t;
690 for(t = 0; t < num_threads-1; t++)
691 {
692 params_add[t].start = start;
693 params_add[t].end = end;
694 params_add[t].new_a = &new_a[nDim*(t+1)];
695 params_add[t].new_cut = new_cut;
696
697 start = end+1;
698 end = end+chunk;
699
700 if (pthread_create(&threads[t], NULL, parallel_sparse_add_helper, (void*)¶ms_add[t]) != 0)
701 {
702 mexPrintf("\nError: Thread creation failed.\n");
703 return(-1);
704 }
705
706 }
707
708 params_add[t].start = start;
709 params_add[t].end = cut_length-1;
710 params_add[t].new_a = new_a;
711 params_add[t].new_cut = new_cut;
712
713 parallel_sparse_add_helper((void*)¶ms_add[t]);
714
715 for (t=0; t<num_threads-1; t++)
716 {
717 if (pthread_join(threads[t], NULL) != 0)
718 {
719 return(-1);
720 }
721
722 double* a = &new_a[nDim*(t+1)];
723
724 for (i=0; i<nDim; i++)
725 new_a[i]+=a[i];
726 }
727
728 for(i=0; i < cut_length; i++)
729 A0[nSel] += X0*data_y[new_cut[i]];
730 }
731
732 /* compute new_a'*new_a and count number of non-zero dimensions */
733 nz_dims = 0;
734 sq_norm_a = A0[nSel]*A0[nSel];
735 for(j=0; j < nDim; j++ ) {
736 if(new_a[j] != 0) {
737 nz_dims++;
738 sq_norm_a += new_a[j]*new_a[j];
739 }
740 }
741
742 /* sparsify new_a and insert it to the last column of sparse_A */
743 sparse_A.nz_dims[nSel] = nz_dims;
744 if(nz_dims > 0) {
745 sparse_A.index[nSel] = NULL;
746 sparse_A.value[nSel] = NULL;
747 sparse_A.index[nSel] = mxCalloc(nz_dims,sizeof(uint32_t));
748 sparse_A.value[nSel] = mxCalloc(nz_dims,sizeof(double));
749 if(sparse_A.index[nSel]==NULL || sparse_A.value[nSel]==NULL)
750 {
751 /* mexErrMsgTxt("Not enough memory for vector sparse_A.index[nSel], sparse_A.value[nSel].");*/
752 mxFree(sparse_A.index[nSel]);
753 mxFree(sparse_A.value[nSel]);
754 return(-1);
755 }
756
757 ptr = 0;
758 for(j=0; j < nDim; j++ ) {
759 if(new_a[j] != 0) {
760 sparse_A.index[nSel][ptr] = j;
761 sparse_A.value[nSel][ptr++] = new_a[j];
762 }
763 }
764 }
765
766 new_col_H[nSel] = sq_norm_a;
767 for(i=0; i < nSel; i++) {
768 double tmp = A0[nSel]*A0[i];
769
770 for(j=0; j < sparse_A.nz_dims[i]; j++) {
771 tmp += new_a[sparse_A.index[i][j]]*sparse_A.value[i][j];
772 }
773
774 new_col_H[i] = tmp;
775 }
776
777 /* mxFree( new_a );*/
778
779 return 0;
780 }
781
782
783 /*=======================================================================
784 OCAS helper functions for sorting numbers.
785 =======================================================================*/
swapf(double * a,double * b)786 static void swapf(double* a, double* b)
787 {
788 double dummy=*b;
789 *b=*a;
790 *a=dummy;
791 }
792
793
794 /* sort arrays value and data according to value in ascending order */
qsort_data(double * value,double * data,uint32_t size)795 int qsort_data(double* value, double* data, uint32_t size)
796 {
797 if(size == 1)
798 return 0;
799
800 if (size==2)
801 {
802 if (value[0] > value[1])
803 {
804 swapf(&value[0], &value[1]);
805 /* swapi(&data[0], &data[1]);*/
806 swapf(&data[0], &data[1]);
807 }
808 return 0;
809 }
810 double split=value[size/2];
811
812 uint32_t left=0;
813 uint32_t right=size-1;
814
815 while (left<=right)
816 {
817 while (value[left] < split)
818 left++;
819 while (value[right] > split)
820 right--;
821
822 if (left<=right)
823 {
824 swapf(&value[left], &value[right]);
825 /* swapi(&data[left], &data[right]);*/
826 swapf(&data[left], &data[right]);
827 left++;
828 right--;
829 }
830 }
831
832 if (right+1> 1)
833 qsort_data(value,data,right+1);
834
835 if (size-left> 1)
836 qsort_data(&value[left],&data[left], size-left);
837
838
839 return 0;
840 }
841
842 /*-------------------------------------------------------------------------
843 Parallel version of qsort_data.
844 -------------------------------------------------------------------------*/
parallel_qsort_helper(void * p)845 void* parallel_qsort_helper(void* p)
846 {
847 struct thread_qsort* ps=(struct thread_qsort*) p;
848 double* output=ps->output;
849 /* uint32_t* data=ps->data;*/
850 double* data=ps->data;
851 uint32_t size=ps->size;
852
853 if(size == 1)
854 return 0;
855
856
857 if (size==2)
858 {
859 if (output[0] > output [1])
860 {
861 swapf(&output[0], &output[1]);
862 /* swapi(&data[0], &data[1]);*/
863 swapf(&data[0], &data[1]);
864 }
865 return(NULL);
866 }
867 /*double split=output[(((uint64_t) size)*rand())/(((uint64_t)RAND_MAX)+1)];*/
868 double split=output[size/2];
869
870 uint32_t left=0;
871 uint32_t right=size-1;
872
873 while (left<=right)
874 {
875 while (output[left] < split)
876 left++;
877 while (output[right] > split)
878 right--;
879
880 if (left<=right)
881 {
882 swapf(&output[left], &output[right]);
883 /* swapi(&index[left], &index[right]);*/
884 swapf(&data[left], &data[right]);
885 left++;
886 right--;
887 }
888 }
889 boolean lthread_start=FALSE;
890 boolean rthread_start=FALSE;
891 pthread_t lthread;
892 pthread_t rthread;
893 struct thread_qsort t1;
894 struct thread_qsort t2;
895
896 if (right+1> 1 && (right+1< sort_limit || qsort_threads >= num_threads-1))
897 qsort_data(output,data,right+1);
898 else if (right+1> 1)
899 {
900 qsort_threads++;
901 lthread_start=TRUE;
902 t1.output=output;
903 t1.data=data;
904 t1.size=right+1;
905 if (pthread_create(<hread, NULL, parallel_qsort_helper, (void*) &t1) != 0)
906 {
907 lthread_start=FALSE;
908 qsort_threads--;
909 qsort_data(output,data,right+1);
910 }
911 }
912
913
914 if (size-left> 1 && (size-left< sort_limit || qsort_threads >= num_threads-1))
915 qsort_data(&output[left],&data[left], size-left);
916 else if (size-left> 1)
917 {
918 qsort_threads++;
919 rthread_start=TRUE;
920 t2.output=&output[left];
921 t2.data=&data[left];
922 t2.size=size-left;
923 if (pthread_create(&rthread, NULL, parallel_qsort_helper, (void*)&t2) != 0)
924 {
925 rthread_start=FALSE;
926 qsort_threads--;
927 qsort_data(&output[left],&data[left], size-left);
928 }
929 }
930
931 if (lthread_start)
932 {
933 pthread_join(lthread, NULL);
934 qsort_threads--;
935 }
936
937 if (rthread_start)
938 {
939 pthread_join(rthread, NULL);
940 qsort_threads--;
941 }
942
943 return(NULL);
944 }
945
946
parallel_qsort_data(double * value,double * data,uint32_t size)947 int parallel_qsort_data(double* value, double* data, uint32_t size)
948 {
949 struct thread_qsort qthr;
950
951 qsort_threads=0;
952
953 qthr.output=value;
954 qthr.data=data;
955 qthr.size=size;
956 parallel_qsort_helper((void*)&qthr);
957
958 return(0);
959 }
960
961
962
963
964 /* ---------------------------------------------------------------------------------
965 This function loads regularization constants from a text file. Each line contains
966 a single constant.
967 ---------------------------------------------------------------------------------*/
load_regconsts(char * fname,double ** vec_C,uint32_t * len_vec_C,int verb)968 int load_regconsts(char *fname, double **vec_C, uint32_t *len_vec_C, int verb)
969 {
970 double C;
971 char *line = NULL;
972 int exitflag = 0;
973 FILE *fid;
974
975 if(verb) mexPrintf("Input file: %s\n", fname);
976
977 fid = fopen(fname, "r");
978 if(fid == NULL) {
979 perror("fopen error ");
980 mexPrintf("Cannot open input file.\n");
981 exitflag = -1;
982 goto clean_up;
983 }
984
985 line = mxCalloc(LIBSLF_MAXLINELEN, sizeof(char));
986 if( line == NULL )
987 {
988 mexPrintf("Not enough memmory to allocate line buffer.\n");
989 exitflag = -1;
990 goto clean_up;
991 }
992
993
994 if(verb) mexPrintf("Counting regularization constants...");
995 int go = 1;
996 long line_cnt = 0;
997 while(go) {
998
999 if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
1000 {
1001 go = 0;
1002 if(verb)
1003 {
1004 if( (line_cnt % 1000) != 0)
1005 mexPrintf(" %ld", line_cnt);
1006 mexPrintf(" EOF.\n");
1007 }
1008
1009 }
1010 else
1011 {
1012 line_cnt ++;
1013
1014 C = atof(line);
1015
1016 if(verb)
1017 {
1018 if( (line_cnt % 1000) == 0) {
1019 mexPrintf(" %ld", line_cnt);
1020 fflush(NULL);
1021 }
1022 }
1023 }
1024 }
1025
1026 *vec_C = (double*)mxCalloc(line_cnt, sizeof(double));
1027 if( vec_C == NULL )
1028 {
1029 mexPrintf("Not enough memmory to allocate vec_C.\n");
1030 exitflag = -1;
1031 goto clean_up;
1032 }
1033
1034 fseek(fid, 0, SEEK_SET);
1035
1036 if(verb) mexPrintf("Reading regularization constants...");
1037 go = 1;
1038 line_cnt = 0;
1039 while(go)
1040 {
1041
1042 if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
1043 {
1044 go = 0;
1045 if(verb)
1046 {
1047 if( (line_cnt % 1000) != 0)
1048 mexPrintf(" %ld", line_cnt);
1049 mexPrintf(" EOF.\n");
1050 }
1051
1052 }
1053 else
1054 {
1055 (*vec_C)[line_cnt] = atof(line);
1056 line_cnt ++;
1057
1058 if(verb)
1059 {
1060 if( (line_cnt % 1000) == 0) {
1061 mexPrintf(" %ld", line_cnt);
1062 fflush(NULL);
1063 }
1064 }
1065 }
1066 }
1067
1068 fclose(fid);
1069 *len_vec_C = line_cnt;
1070
1071 clean_up:
1072 mxFree(line);
1073
1074 return(exitflag);
1075 }
1076
1077
1078
1079 /* ---------------------------------------------------------------------------------
1080 This function loads SVMlight data file to sparse matlab matrix data_X and
1081 dense vector data_y which both are assumed to global variables.
1082 ---------------------------------------------------------------------------------*/
load_svmlight_file(char * fname,int verb)1083 int load_svmlight_file(char *fname, int verb)
1084 {
1085 char *line = NULL;
1086 FILE *fid;
1087 double *feat_val = NULL;
1088 double sparse_memory_requirement, full_memory_requirement;
1089 uint32_t *feat_idx = NULL;
1090 long nnzf;
1091 int max_dim = 0;
1092 long j;
1093 uint64_t nnz = 0;
1094 mwSize *irs = NULL, *jcs = NULL;
1095 int exitflag = 0;
1096 double *sr = NULL;
1097
1098 /* mexPrintf("Input file: %s\n", fname);*/
1099
1100 fid = fopen(fname, "r");
1101 if(fid == NULL) {
1102 perror("fopen error ");
1103 mexPrintf("Cannot open input file.\n");
1104 exitflag = -1;
1105 goto clean_up;
1106 }
1107
1108 line = mxCalloc(LIBSLF_MAXLINELEN, sizeof(char));
1109 if( line == NULL )
1110 {
1111 mexPrintf("Not enough memmory to allocate line buffer.\n");
1112 exitflag = -1;
1113 goto clean_up;
1114 }
1115
1116 feat_idx = mxCalloc(LIBSLF_MAXLINELEN, sizeof(uint32_t));
1117 if( feat_idx == NULL )
1118 {
1119 mexPrintf("Not enough memmory to allocate feat_idx.\n");
1120 exitflag = -1;
1121 goto clean_up;
1122 }
1123
1124 feat_val = mxCalloc(LIBSLF_MAXLINELEN, sizeof(double));
1125 if( feat_val == NULL )
1126 {
1127 mexPrintf("Not enough memmory to allocate feat_val.\n");
1128 exitflag = -1;
1129 goto clean_up;
1130 }
1131
1132 if(verb) mexPrintf("Analysing input data...");
1133 int label;
1134 int go = 1;
1135 long line_cnt = 0;
1136
1137 while(go) {
1138
1139 if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
1140 {
1141 go = 0;
1142 if(verb)
1143 {
1144 if( (line_cnt % 1000) != 0)
1145 mexPrintf(" %ld", line_cnt);
1146 mexPrintf(" EOF.\n");
1147 }
1148
1149 }
1150 else
1151 {
1152 line_cnt ++;
1153 nnzf = svmlight_format_parse_line(line, &label, feat_idx, feat_val);
1154
1155 if(nnzf == -1)
1156 {
1157 mexPrintf("Parsing error on line %ld .\n", line_cnt);
1158 mexPrintf("Probably defective input file.\n");
1159 exitflag = -1;
1160 goto clean_up;
1161 }
1162
1163 max_dim = LIBOCAS_MAX(max_dim,feat_idx[nnzf-1]);
1164 nnz += nnzf;
1165
1166 if(verb)
1167 {
1168 if( (line_cnt % 1000) == 0) {
1169 mexPrintf(" %ld", line_cnt);
1170 fflush(NULL);
1171 }
1172 }
1173 }
1174 }
1175
1176 fclose(fid);
1177 if(verb)
1178 {
1179 mexPrintf("Data statistics:\n");
1180 mexPrintf("# of examples: %ld\n", line_cnt);
1181 mexPrintf("dimensionality: %d\n", max_dim);
1182 mexPrintf("nnz: %ld, density: %f%%\n", (long)nnz, 100*(double)nnz/((double)max_dim*(double)line_cnt));
1183 }
1184
1185 sparse_memory_requirement = ((double)nnz*((double)sizeof(double)+(double)sizeof(mwSize)))/(1024.0*1024.0);
1186 full_memory_requirement = sizeof(double)*(double)max_dim*(double)line_cnt/(1024.0*1024.0);
1187
1188 if(verb)
1189 {
1190 mexPrintf("Memory requirements for sparse matrix: %.3f MB\n", sparse_memory_requirement);
1191 mexPrintf("Memory requirements for full matrix: %.3f MB\n", full_memory_requirement);
1192 }
1193
1194 if( full_memory_requirement < sparse_memory_requirement)
1195 {
1196 if(verb)
1197 mexPrintf("Full matrix represenation used.\n");
1198
1199 data_X = mxCreateDoubleMatrix(max_dim, line_cnt, mxREAL);
1200
1201 if( data_X == NULL)
1202 {
1203 mexPrintf("Not enough memory to allocate data_X .\n");
1204 exitflag = -1;
1205 goto clean_up;
1206 }
1207
1208 }
1209 else
1210 {
1211 if(verb)
1212 mexPrintf("Sparse matrix represenation used.\n");
1213
1214 data_X = mxCreateSparse(max_dim, line_cnt, nnz, mxREAL);
1215 if( data_X == NULL)
1216 {
1217 mexPrintf("Not enough memory to allocate data_X .\n");
1218 exitflag = -1;
1219 goto clean_up;
1220 }
1221
1222 sr = mxGetPr(data_X);
1223 irs = (mwSize*)mxGetIr(data_X);
1224 jcs = (mwSize*)mxGetJc(data_X);
1225
1226 }
1227
1228
1229 /* mexPrintf("Required memory: %.3f MB\n", */
1230 /* ((double)nnz*((double)sizeof(double)+(double)sizeof(mwSize)))/(1024.0*1024.0));*/
1231
1232 /*---------------------------------------------*/
1233
1234 data_y = mxCalloc(line_cnt, sizeof(double));
1235 if(data_y == NULL)
1236 {
1237 mexPrintf("Not enough memory to allocate data_y.\n");
1238 exitflag = -1;
1239 goto clean_up;
1240 }
1241
1242 fid = fopen(fname, "r");
1243 if(fid == NULL) {
1244 perror("fopen error ");
1245 mexPrintf("Cannot open input file.\n");
1246 exitflag = -1;
1247 goto clean_up;
1248 }
1249
1250 if(verb)
1251 mexPrintf("Reading examples...");
1252
1253 go = 1;
1254 line_cnt = 0;
1255 long k=0;
1256 while(go) {
1257 if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
1258 {
1259 go = 0;
1260 if(verb)
1261 {
1262 if( (line_cnt % 1000) != 0)
1263 mexPrintf(" %ld", line_cnt);
1264 mexPrintf(" EOF.\n");
1265 }
1266 }
1267 else
1268 {
1269 line_cnt ++;
1270 nnzf = svmlight_format_parse_line(line, &label, feat_idx, feat_val);
1271
1272 if(nnzf == -1)
1273 {
1274 mexPrintf("Parsing error on line %ld .\n", line_cnt);
1275 mexPrintf("Defective input file.\n");
1276 exitflag = -1;
1277 goto clean_up;
1278 }
1279
1280 data_y[line_cnt-1] = (double)label;
1281
1282 if( mxIsSparse( data_X) )
1283 {
1284 jcs[line_cnt-1] = k;
1285
1286 for(j = 0; j < nnzf; j++) {
1287 sr[k] = feat_val[j];
1288 irs[k] = feat_idx[j]-1;
1289 k++;
1290 }
1291 }
1292 else
1293 {
1294 double *ptr = mxGetPr(data_X);
1295 for(j=0; j < nnzf; j++ ) {
1296 ptr[LIBOCAS_INDEX(feat_idx[j]-1,line_cnt-1,max_dim)] = feat_val[j];
1297 }
1298
1299 }
1300
1301 if(verb)
1302 {
1303 if( (line_cnt % 1000) == 0) {
1304 mexPrintf(" %ld", line_cnt);
1305 fflush(NULL);
1306 }
1307 }
1308 }
1309 }
1310
1311 fclose(fid);
1312
1313 if( mxIsSparse( data_X) )
1314 jcs[line_cnt] = k;
1315
1316 /* mexPrintf("\n");*/
1317
1318 if(verb)
1319 mexPrintf("Leaving svmlight reading function.\n");
1320
1321 exitflag = 0;
1322
1323 clean_up:
1324
1325 mxFree(line);
1326 mxFree(feat_val);
1327 mxFree(feat_idx);
1328
1329 return(exitflag);
1330 }
1331
1332
1333 /*----------------------------------------------------------------------
1334 Compute area under ROC (1st class label[i]==1; 2nd class label[i] != 1).
1335 ----------------------------------------------------------------------*/
compute_auc(double * score,int * label,uint32_t nData)1336 double compute_auc(double *score, int *label, uint32_t nData)
1337 {
1338 double *sorted_score = NULL;
1339 double *sorted_lab = NULL;
1340 uint32_t i;
1341 uint32_t neg, pos;
1342 double auc = -1;
1343
1344 sorted_score = mxCalloc(nData, sizeof(double));
1345 if( sorted_score == NULL ) {
1346 mexPrintf("Not enough memmory to allocate sorted_score when computing AUC.");
1347 goto clean_up;
1348 }
1349
1350 sorted_lab = mxCalloc(nData, sizeof(double));
1351 if( sorted_lab == NULL )
1352 {
1353 mexPrintf("Not enough memmory to allocate sorted_lab when computing AUC.");
1354 goto clean_up;
1355 }
1356
1357 for(i=0; i < nData; i++)
1358 if(label[i] == 1) sorted_lab[i] = 1.0; else sorted_lab[i] = 0.0;
1359
1360
1361 memcpy(sorted_score,score,sizeof(double)*nData);
1362
1363 qsort_data(sorted_score, sorted_lab, nData);
1364
1365 pos = 0;
1366 neg = 0;
1367 auc = 0;
1368
1369 for(i = 0; i < nData; i++)
1370 {
1371 if(sorted_lab[i] ==1.0 )
1372 {
1373 pos ++;
1374 }
1375 else
1376 {
1377 neg ++;
1378 auc += (double)pos;
1379 }
1380 }
1381 auc = 1 - auc/((double)neg*(double)pos);
1382
1383 clean_up:
1384 mxFree(sorted_score);
1385 mxFree(sorted_lab);
1386
1387 return(auc);
1388 }
1389
1390