1 /*-----------------------------------------------------------------------
2  * ocas_helper.c: Implementation of helper functions for the OCAS solver.
3  *
4  *-------------------------------------------------------------------- */
5 
6 #define _FILE_OFFSET_BITS  64
7 
8 #include <pthread.h>
9 
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdint.h>
13 #include <sys/time.h>
14 #include <stdlib.h>
15 #include <time.h>
16 
17 #include "lib_svmlight_format.h"
18 #include "libocas.h"
19 #include "ocas_helper.h"
20 
21 mxArray *data_X;
22 uint32_t nDim, nData, nY;
23 double *data_y;
24 cutting_plane_buf_T sparse_A;
25 double *full_A;
26 double *W;
27 double *oldW;
28 double *new_a;
29 
30 double *A0;
31 double W0;
32 double oldW0;
33 double X0;
34 
35 /* parallelization via threads */
36 struct thread_params_output
37 {
38 	double* output;
39 	uint32_t start;
40 	uint32_t end;
41 };
42 
43 struct thread_qsort
44 {
45 	double* output;
46 /*	uint32_t* index;*/
47 	double* data;
48 	uint32_t size;
49 };
50 
51 struct thread_params_add
52 {
53   double *new_a;
54   uint32_t *new_cut;
55   uint32_t start;
56   uint32_t end;
57 };
58 
59 
60 typedef enum
61 {
62   FALSE = 0,
63   TRUE = 1
64 }
65 boolean;
66 
67 
68 static int qsort_threads;
69 static pthread_t* threads = NULL;
70 static uint32_t* thread_slices = NULL;
71 static int num_threads;
72 static const int sort_limit=4096;
73 static struct thread_params_output* params_output;
74 static struct thread_params_add* params_add;
75 
76 /* use multi-threads only if minimal number of examples to add is higher than the constant*/
77 static const uint32_t MinimalParallelCutLenght = 100;
78 
79 
80 /*----------------------------------------------------------------------
81   ----------------------------------------------------------------------*/
full_add_nnw_constr(uint32_t idx,uint32_t nSel,void * user_data)82 int full_add_nnw_constr(uint32_t idx, uint32_t nSel, void* user_data)
83 {
84     full_A[LIBOCAS_INDEX(idx,nSel,nDim)] = 1.0;
85     A0[nSel] = 0.0;
86 
87     return( 0 );
88 }
89 
90 
91 /*----------------------------------------------------------------------
92   ----------------------------------------------------------------------*/
sparse_add_nnw_constr(uint32_t idx,uint32_t nSel,void * user_data)93 int sparse_add_nnw_constr(uint32_t idx, uint32_t nSel, void* user_data)
94 {
95   sparse_A.nz_dims[nSel] = 1;
96   sparse_A.index[nSel] = NULL;
97   sparse_A.value[nSel] = NULL;
98   sparse_A.index[nSel] = mxCalloc(1,sizeof(uint32_t));
99   sparse_A.value[nSel] = mxCalloc(1,sizeof(double));
100   if(sparse_A.index[nSel]==NULL || sparse_A.value[nSel]==NULL)
101   {
102       mxFree(sparse_A.index[nSel]);
103       mxFree(sparse_A.value[nSel]);
104       return(-1);
105   }
106 
107   sparse_A.index[nSel][0] = idx;
108   sparse_A.value[nSel][0] = 1.0;
109 
110   A0[nSel] = 0.0;
111   return( 0 );
112 }
113 
114 
115 /*----------------------------------------------------------------------
116   ----------------------------------------------------------------------*/
clip_neg_W(uint32_t num_pw_constr,uint32_t * pw_idx,void * user_data)117 void clip_neg_W( uint32_t num_pw_constr, uint32_t *pw_idx, void* user_data )
118 {
119     uint32_t i;
120     for(i=0; i< num_pw_constr; i++ ) {
121         W[LIBOCAS_INDEX(pw_idx[i],0,nDim)] = LIBOCAS_MAX(0,W[LIBOCAS_INDEX(pw_idx[i],0,nDim)]);
122     }
123 }
124 
125 
126 /*----------------------------------------------------------------------
127   sq_norm_W = sparse_compute_W( alpha, nSel ) does the following:
128 
129   oldW = W;
130   W = sparse_A(:,1:nSel)'*alpha;
131   sq_norm_W = W'*W;
132   dp_WoldW = W'*oldW';
133 
134   ----------------------------------------------------------------------*/
sparse_compute_W(double * sq_norm_W,double * dp_WoldW,double * alpha,uint32_t nSel,void * user_data)135 void sparse_compute_W( double *sq_norm_W,
136                        double *dp_WoldW,
137                        double *alpha,
138                        uint32_t nSel,
139                        void* user_data )
140 {
141   uint32_t i,j, nz_dims;
142 
143   memcpy(oldW, W, sizeof(double)*nDim );
144   memset(W, 0, sizeof(double)*nDim);
145 
146   oldW0 = W0;
147   W0 = 0;
148 
149   for(i=0; i < nSel; i++) {
150     nz_dims = sparse_A.nz_dims[i];
151     if(nz_dims > 0 && alpha[i] > 0) {
152       for(j=0; j < nz_dims; j++) {
153         W[sparse_A.index[i][j]] += alpha[i]*sparse_A.value[i][j];
154       }
155     }
156     W0 += A0[i]*alpha[i];
157   }
158 
159   *sq_norm_W = W0*W0;
160   *dp_WoldW = W0*oldW0;
161   for(j=0; j < nDim; j++) {
162     *sq_norm_W += W[j]*W[j];
163     *dp_WoldW += W[j]*oldW[j];
164   }
165 
166   return;
167 }
168 
169 
170 /*----------------------------------------------------------------------
171   sq_norm_W = sparse_compute_W( alpha, nSel ) does the following:
172 
173   oldW = W;
174   W = sparse_A(:,1:nSel)'*alpha;
175   sq_norm_W = W'*W;
176   dp_WoldW = W'*oldW';
177 
178   ----------------------------------------------------------------------*/
msvm_sparse_compute_W(double * sq_norm_W,double * dp_WoldW,double * alpha,uint32_t nSel,void * user_data)179 void msvm_sparse_compute_W( double *sq_norm_W,
180                        double *dp_WoldW,
181                        double *alpha,
182                        uint32_t nSel,
183                        void* user_data )
184 {
185   uint32_t i,j, nz_dims;
186 
187   memcpy(oldW, W, sizeof(double)*nY*nDim );
188   memset(W, 0, sizeof(double)*nY*nDim );
189 
190   for(i=0; i < nSel; i++) {
191     nz_dims = sparse_A.nz_dims[i];
192     if(nz_dims > 0 && alpha[i] > 0) {
193       for(j=0; j < nz_dims; j++) {
194         W[sparse_A.index[i][j]] += alpha[i]*sparse_A.value[i][j];
195       }
196     }
197   }
198 
199   *sq_norm_W = 0;
200   *dp_WoldW = 0;
201   for(j=0; j < nY*nDim; j++)
202   {
203     *sq_norm_W += W[j]*W[j];
204     *dp_WoldW += W[j]*oldW[j];
205   }
206 
207   return;
208 }
209 
210 
211 /*----------------------------------------------------------------------------------
212   sq_norm_W = sparse_update_W( t ) does the following:
213 
214   W = oldW*(1-t) + t*W;
215   sq_norm_W = W'*W;
216 
217   ---------------------------------------------------------------------------------*/
msvm_update_W(double t,void * user_data)218 double msvm_update_W( double t, void* user_data )
219 {
220   uint32_t j;
221   double sq_norm_W;
222 
223   sq_norm_W = 0;
224 
225   for(j=0; j < nY*nDim; j++) {
226     W[j] = oldW[j]*(1-t) + t*W[j];
227     sq_norm_W += W[j]*W[j];
228   }
229 
230   return( sq_norm_W );
231 }
232 
233 
234 /*-------------------------------------------------------------------------
235   sq_norm_W = full_update_W( t ) does the following:
236 
237   W = oldW*(1-t) + t*W;
238   sq_norm_W = W'*W;
239 ---------------------------------------------------------------------------*/
update_W(double t,void * user_data)240 double update_W( double t, void* user_data )
241 {
242   uint32_t j;
243   double sq_norm_W;
244 
245   W0 = oldW0*(1-t) + t*W0;
246   sq_norm_W = W0*W0;
247 
248   for(j=0; j <nDim; j++) {
249     W[j] = oldW[j]*(1-t) + t*W[j];
250     sq_norm_W += W[j]*W[j];
251   }
252 
253   return( sq_norm_W );
254 }
255 
256 
257 /*----------------------------------------------------------------------
258   sq_norm_W = full_compute_W( alpha, nSel ) does the following:
259 
260   oldW = W;
261   W = full_A(:,1:nSel)'*alpha;
262   sq_norm_W = W'*W;
263   dp_WoldW = W'*oldW';
264 
265   ----------------------------------------------------------------------*/
msvm_full_compute_W(double * sq_norm_W,double * dp_WoldW,double * alpha,uint32_t nSel,void * user_data)266 void msvm_full_compute_W( double *sq_norm_W, double *dp_WoldW, double *alpha, uint32_t nSel, void* user_data )
267 {
268   uint32_t i,j;
269 
270   memcpy(oldW, W, sizeof(double)*nDim*nY );
271   memset(W, 0, sizeof(double)*nDim*nY);
272 
273   for(i=0; i < nSel; i++) {
274     if( alpha[i] > 0 ) {
275       for(j=0; j< nDim*nY; j++ ) {
276         W[j] += alpha[i]*full_A[LIBOCAS_INDEX(j,i,nDim*nY)];
277       }
278 
279     }
280   }
281 
282   *sq_norm_W = 0;
283   *dp_WoldW = 0;
284   for(j=0; j < nDim*nY; j++) {
285     *sq_norm_W += W[j]*W[j];
286     *dp_WoldW += W[j]*oldW[j];
287   }
288 
289   return;
290 }
291 
292 
293 /*-----------------------------------------------------------------------
294   Print statistics.
295   -----------------------------------------------------------------------*/
ocas_print(ocas_return_value_T value)296 void ocas_print(ocas_return_value_T value)
297 {
298   mexPrintf("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
299             value.nIter,value.ocas_time, value.Q_P,value.Q_D,value.Q_P-value.Q_D,(value.Q_P-value.Q_D)/LIBOCAS_ABS(value.Q_P),
300             value.nNZAlpha, 100*(double)value.trn_err/(double)nData, value.qp_exitflag );
301 }
302 
ocas_print_null(ocas_return_value_T value)303 void ocas_print_null(ocas_return_value_T value)
304 {
305   return;
306 }
307 
308 
309 /*-----------------------------------------------------------------------
310   Get absolute time in seconds.
311   -----------------------------------------------------------------------*/
get_time()312 double get_time()
313 {
314 	struct timeval tv;
315 	if (gettimeofday(&tv, NULL)==0)
316 		return tv.tv_sec+((double)(tv.tv_usec))/1e6;
317 	else
318 		return 0.0;
319 }
320 
321 
322 /*----------------------------------------------------------------------
323   in-place computes sparse_mat(:,col)= alpha * sparse_mat(:,col)
324   where alpha is a scalar and sparse_mat is Matlab sparse matrix.
325   ----------------------------------------------------------------------*/
mul_sparse_col(double alpha,mxArray * sparse_mat,uint32_t col)326 void mul_sparse_col(double alpha, mxArray *sparse_mat, uint32_t col)
327 {
328 	uint32_t nItems, ptr, i;
329 	INDEX_TYPE_T *Jc;
330 	double *Pr;
331 
332 	Jc = mxGetJc(sparse_mat);
333 	Pr = mxGetPr(sparse_mat);
334 
335 	nItems = Jc[col+1] - Jc[col];
336 	ptr = Jc[col];
337 
338 	for(i=0; i < nItems; i++)
339 		Pr[ptr++]*=alpha;
340 }
341 
342 
343 /*----------------------------------------------------------------------
344  It computes full_vec = full_vec + sparse_mat(:,col)
345  where full_vec is a double array and sparse_mat is Matlab
346  sparse matrix.
347   ----------------------------------------------------------------------*/
add_sparse_col(double * full_vec,mxArray * sparse_mat,uint32_t col)348 void add_sparse_col(double *full_vec, mxArray *sparse_mat, uint32_t col)
349 {
350   uint32_t nItems, ptr, i, row;
351   INDEX_TYPE_T *Ir, *Jc;
352   double *Pr, val;
353 
354   Ir = mxGetIr(sparse_mat);
355   Jc = mxGetJc(sparse_mat);
356   Pr = mxGetPr(sparse_mat);
357 
358   nItems = Jc[col+1] - Jc[col];
359   ptr = Jc[col];
360 
361   for(i=0; i < nItems; i++) {
362     val = Pr[ptr];
363     row = Ir[ptr++];
364 
365     full_vec[row] += val;
366   }
367 }
368 
369 
370 /*----------------------------------------------------------------------
371  It computes full_vec = full_vec - sparse_mat(:,col)
372  where full_vec is a double array and sparse_mat is Matlab
373  sparse matrix.
374   ----------------------------------------------------------------------*/
subtract_sparse_col(double * full_vec,mxArray * sparse_mat,uint32_t col)375 void subtract_sparse_col(double *full_vec, mxArray *sparse_mat, uint32_t col)
376 {
377   uint32_t nItems, ptr, i, row;
378   INDEX_TYPE_T *Ir, *Jc;
379   double *Pr, val;
380 
381   Ir = mxGetIr(sparse_mat);
382   Jc = mxGetJc(sparse_mat);
383   Pr = mxGetPr(sparse_mat);
384 
385   nItems = Jc[col+1] - Jc[col];
386   ptr = Jc[col];
387 
388   for(i=0; i < nItems; i++) {
389     val = Pr[ptr];
390     row = Ir[ptr++];
391 
392     full_vec[row] -= val;
393   }
394 }
395 
396 
397 /*----------------------------------------------------------------------
398  It computes dp = full_vec'*sparse_mat(:,col)
399  where full_vec is a double array and sparse_mat is Matlab
400  sparse matrix.
401   ----------------------------------------------------------------------*/
dp_sparse_col(double * full_vec,mxArray * sparse_mat,uint32_t col)402 double dp_sparse_col(double *full_vec, mxArray *sparse_mat, uint32_t col)
403 {
404   uint32_t nItems, ptr, i, row;
405   INDEX_TYPE_T *Ir, *Jc;
406   double *Pr, val, dp;
407 
408   Ir = mxGetIr(sparse_mat);
409   Jc = mxGetJc(sparse_mat);
410   Pr = mxGetPr(sparse_mat);
411 
412   dp = 0;
413   nItems = Jc[col+1] - Jc[col];
414   ptr = Jc[col];
415 
416   for(i=0; i < nItems; i++) {
417     val = Pr[ptr];
418     row = Ir[ptr++];
419 
420     dp += full_vec[row]*val;
421   }
422 
423   return(dp);
424 }
425 
426 
427 /*----------------------------------------------------------------------
428   sq_norm_W = full_compute_W( alpha, nSel ) does the following:
429 
430   oldW = W;
431   W = full_A(:,1:nSel)'*alpha;
432   sq_norm_W = W'*W;
433   dp_WoldW = W'*oldW';
434 
435   ----------------------------------------------------------------------*/
full_compute_W(double * sq_norm_W,double * dp_WoldW,double * alpha,uint32_t nSel,void * user_data)436 void full_compute_W( double *sq_norm_W, double *dp_WoldW, double *alpha, uint32_t nSel, void* user_data )
437 {
438   uint32_t i,j;
439 
440   memcpy(oldW, W, sizeof(double)*nDim );
441   memset(W, 0, sizeof(double)*nDim);
442 
443   oldW0 = W0;
444   W0 = 0;
445 
446   for(i=0; i < nSel; i++) {
447     if( alpha[i] > 0 ) {
448       for(j=0; j< nDim; j++ ) {
449         W[j] += alpha[i]*full_A[LIBOCAS_INDEX(j,i,nDim)];
450       }
451 
452       W0 += A0[i]*alpha[i];
453     }
454   }
455 
456   *sq_norm_W = W0*W0;
457   *dp_WoldW = W0*oldW0;
458   for(j=0; j < nDim; j++) {
459     *sq_norm_W += W[j]*W[j];
460     *dp_WoldW += W[j]*oldW[j];
461   }
462 
463   return;
464 }
465 
466 
467 
468 /*----------------------------------------------------------------------
469   parallel_sparse_compute_output( output, user_data ) does the following
470 
471   output = data_X'*W + W0*X0*y[i];
472   ----------------------------------------------------------------------*/
473 
parallel_compute_output_helper(void * p)474 static void* parallel_compute_output_helper(void* p)
475 {
476 	struct thread_params_output* params = (struct thread_params_output*) p;
477 	double* output=params->output;
478 	uint32_t start=params->start;
479 	uint32_t end=params->end;
480 	uint32_t i;
481 
482     for(i=start; i < end; i++)
483       output[i] = data_y[i]*X0*W0 + dp_sparse_col(W, data_X, i);
484 
485     return(NULL);
486 }
487 
parallel_sparse_compute_output(double * output,void * user_data)488 int parallel_sparse_compute_output( double *output, void* user_data )
489 {
490   /*  one-thraed code looks like:
491 
492   uint32_t i;
493 
494   for(i=0; i < nData; i++) {
495     output[i] = data_y[i]*X0*W0 + dp_sparse_col(W, data_X, i);
496   }
497   */
498 
499 /*  struct thread_params_output params;*/
500 
501   int nthreads=num_threads-1;
502   int end=0;
503   int t;
504 
505   if (nData < num_threads)
506   {
507     nthreads=nData-1;
508   }
509 
510   for (t=0; t<nthreads; t++)
511   {
512     params_output[t].output = output;
513     if (t==0)
514       params_output[t].start = 0;
515     else
516       params_output[t].start = thread_slices[t-1];
517     params_output[t].end = thread_slices[t];
518 
519     if (pthread_create(&threads[t], NULL, parallel_compute_output_helper, (void*)&params_output[t]) != 0)
520     {
521       mexPrintf("\nError: Thread creation failed.\n");
522       return(-1);
523     }
524 
525     end=params_output[t].end;
526   }
527 
528   params_output[t].output = output;
529   params_output[t].start = end;
530   params_output[t].end = nData;
531 
532   parallel_compute_output_helper(&params_output[t]);
533 
534   for (t=0; t<nthreads; t++)
535   {
536     if (pthread_join(threads[t], NULL) != 0)
537     {
538       mexPrintf("\nError: pthread_join failed.\n");
539       return(-1);
540     }
541   }
542 
543   return 0;
544 }
545 
546 /* Ihis function initialize parallel processing . It must be
547  called prior to paralel_XXXX  OCAS heleper functions. */
init_parallel_ocas(int number_of_threads)548 int init_parallel_ocas(int number_of_threads)
549 {
550   num_threads = number_of_threads;
551 
552   thread_slices = (uint32_t*)mxCalloc(num_threads,sizeof(uint32_t));
553   if(thread_slices == NULL)
554   {
555     mexPrintf("Not enough memory for vector num_threads.");
556     goto clean_up;
557 
558   }
559 
560   threads = (pthread_t*)mxCalloc(num_threads,sizeof(pthread_t));
561   if(threads== NULL)
562   {
563     mexPrintf("Not enough memory for threads structure.");
564     goto clean_up;
565   }
566 
567   params_output = (struct thread_params_output*)mxCalloc(num_threads,sizeof(struct thread_params_output));
568   if(params_output== NULL)
569   {
570     mexPrintf("Not enough memory for params structure.");
571     goto clean_up;
572   }
573 
574   params_add = (struct thread_params_add*)mxCalloc(num_threads,sizeof(struct thread_params_add));
575   if(params_add== NULL)
576   {
577     mexPrintf("Not enough memory for params structure.");
578     goto clean_up;
579   }
580 
581   /* The following code finds splits of the data such that each
582   data slice contains approximately the same number of nonzero elements. */
583   uint32_t i;
584   int nnz_split = 0;
585   INDEX_TYPE_T* Jc = mxGetJc(data_X);
586 
587   for(i=0; i < nData; i++)
588     nnz_split += Jc[i+1] - Jc[i];
589 
590   nnz_split/=num_threads;
591   uint64_t accum_nnz = 0;
592   int thr = 0;
593 
594   mexPrintf("Data slices: ");
595   for(i=0; i < nData; i++)
596   {
597     INDEX_TYPE_T nItems = Jc[i+1] - Jc[i];
598 
599     if (accum_nnz < nnz_split*(thr+1))
600       accum_nnz+=nItems;
601     else
602     {
603       thread_slices[thr]=i;
604       mexPrintf("%d ", thread_slices[thr]);
605       accum_nnz+=nItems;
606       thr++;
607     }
608   }
609   mexPrintf("\n");
610 
611   return(0);
612 
613 clean_up:
614 
615   mxFree(threads);
616   mxFree(thread_slices);
617 
618   return(-1);
619 }
620 
621 /* release memory allocated for parallelized functions */
destroy_parallel_ocas(void)622 void destroy_parallel_ocas(void)
623 {
624   mxFree(params_add);
625   mxFree(params_output);
626   mxFree(threads);
627   mxFree(thread_slices);
628 }
629 
630 /*----------------------------------------------------------------------------------
631   Parallel version of
632   sparse_add_new_cut( new_col_H, new_cut, cut_length, nSel ) does the following:
633 
634     new_a = sum(data_X(:,find(new_cut ~=0 )),2);
635     new_col_H = [sparse_A(:,1:nSel)'*new_a ; new_a'*new_a];
636     sparse_A(:,nSel+1) = new_a;
637 
638   ---------------------------------------------------------------------------------*/
639 
parallel_sparse_add_helper(void * p)640 static void* parallel_sparse_add_helper(void* p)
641 {
642 	struct thread_params_add* params = (struct thread_params_add*) p;
643 	double* local_new_a=params->new_a;
644     uint32_t* new_cut=params->new_cut;
645 	uint32_t start=params->start;
646 	uint32_t end=params->end;
647 	uint32_t i;
648 
649     for(i=start; i <= end; i++)
650       add_sparse_col(local_new_a, data_X, new_cut[i]);
651 
652     return(NULL);
653 }
654 
parallel_sparse_add_new_cut(double * new_col_H,uint32_t * new_cut,uint32_t cut_length,uint32_t nSel,void * user_data)655 int parallel_sparse_add_new_cut( double *new_col_H,
656                          uint32_t *new_cut,
657                          uint32_t cut_length,
658                          uint32_t nSel,
659                          void* user_data )
660 {
661 /*  double *new_a, */
662   double sq_norm_a;
663   uint32_t i, j, nz_dims, ptr;
664 
665   /* temporary vector */
666 /*  new_a = (double*)mxCalloc(nDim,sizeof(double));*/
667 /*  if(new_a == NULL) */
668 /*    return(-1); */
669   memset(new_a, 0, sizeof(double)*nDim*num_threads);
670 
671   if((cut_length < MinimalParallelCutLenght) && (cut_length >= num_threads))
672   {
673 
674     for(i=0; i < cut_length; i++) {
675       add_sparse_col(new_a, data_X, new_cut[i]);
676 
677       A0[nSel] += X0*data_y[new_cut[i]];
678     }
679   }
680   else
681   {
682 
683 /*    struct thread_params_add params;*/
684 /*    params.new_cut = new_cut;*/
685 
686     uint32_t chunk = cut_length/num_threads;
687     uint32_t start = 0;
688     uint32_t end = chunk-1;
689     int t;
690     for(t = 0; t < num_threads-1; t++)
691     {
692       params_add[t].start = start;
693       params_add[t].end = end;
694       params_add[t].new_a = &new_a[nDim*(t+1)];
695       params_add[t].new_cut = new_cut;
696 
697       start = end+1;
698       end = end+chunk;
699 
700       if (pthread_create(&threads[t], NULL, parallel_sparse_add_helper, (void*)&params_add[t]) != 0)
701       {
702         mexPrintf("\nError: Thread creation failed.\n");
703         return(-1);
704       }
705 
706     }
707 
708     params_add[t].start = start;
709     params_add[t].end = cut_length-1;
710     params_add[t].new_a = new_a;
711     params_add[t].new_cut = new_cut;
712 
713     parallel_sparse_add_helper((void*)&params_add[t]);
714 
715     for (t=0; t<num_threads-1; t++)
716     {
717       if (pthread_join(threads[t], NULL) != 0)
718       {
719         return(-1);
720       }
721 
722       double* a = &new_a[nDim*(t+1)];
723 
724       for (i=0; i<nDim; i++)
725         new_a[i]+=a[i];
726     }
727 
728     for(i=0; i < cut_length; i++)
729       A0[nSel] += X0*data_y[new_cut[i]];
730   }
731 
732   /* compute new_a'*new_a and count number of non-zero dimensions */
733   nz_dims = 0;
734   sq_norm_a = A0[nSel]*A0[nSel];
735   for(j=0; j < nDim; j++ ) {
736     if(new_a[j] != 0) {
737       nz_dims++;
738       sq_norm_a += new_a[j]*new_a[j];
739     }
740   }
741 
742   /* sparsify new_a and insert it to the last column  of sparse_A */
743   sparse_A.nz_dims[nSel] = nz_dims;
744   if(nz_dims > 0) {
745     sparse_A.index[nSel] = NULL;
746     sparse_A.value[nSel] = NULL;
747     sparse_A.index[nSel] = mxCalloc(nz_dims,sizeof(uint32_t));
748     sparse_A.value[nSel] = mxCalloc(nz_dims,sizeof(double));
749     if(sparse_A.index[nSel]==NULL || sparse_A.value[nSel]==NULL)
750     {
751 /*      mexErrMsgTxt("Not enough memory for vector sparse_A.index[nSel], sparse_A.value[nSel].");*/
752       mxFree(sparse_A.index[nSel]);
753       mxFree(sparse_A.value[nSel]);
754       return(-1);
755     }
756 
757     ptr = 0;
758     for(j=0; j < nDim; j++ ) {
759       if(new_a[j] != 0) {
760         sparse_A.index[nSel][ptr] = j;
761         sparse_A.value[nSel][ptr++] = new_a[j];
762       }
763     }
764   }
765 
766   new_col_H[nSel] = sq_norm_a;
767   for(i=0; i < nSel; i++) {
768     double tmp = A0[nSel]*A0[i];
769 
770     for(j=0; j < sparse_A.nz_dims[i]; j++) {
771       tmp += new_a[sparse_A.index[i][j]]*sparse_A.value[i][j];
772     }
773 
774     new_col_H[i] = tmp;
775   }
776 
777 /*  mxFree( new_a );*/
778 
779   return 0;
780 }
781 
782 
783 /*=======================================================================
784  OCAS helper functions for sorting numbers.
785 =======================================================================*/
swapf(double * a,double * b)786 static void swapf(double* a, double* b)
787 {
788 	double dummy=*b;
789 	*b=*a;
790 	*a=dummy;
791 }
792 
793 
794 /* sort arrays value and data according to value in ascending order */
qsort_data(double * value,double * data,uint32_t size)795 int qsort_data(double* value, double* data, uint32_t size)
796 {
797     if(size == 1)
798       return 0;
799 
800 	if (size==2)
801 	{
802 		if (value[0] > value[1])
803 		{
804 			swapf(&value[0], &value[1]);
805 /*			swapi(&data[0], &data[1]);*/
806 			swapf(&data[0], &data[1]);
807 		}
808 		return 0;
809 	}
810 	double split=value[size/2];
811 
812 	uint32_t left=0;
813 	uint32_t right=size-1;
814 
815 	while (left<=right)
816 	{
817 		while (value[left] < split)
818 			left++;
819 		while (value[right] > split)
820 			right--;
821 
822 		if (left<=right)
823 		{
824 			swapf(&value[left], &value[right]);
825 /*			swapi(&data[left], &data[right]);*/
826 			swapf(&data[left], &data[right]);
827 			left++;
828 			right--;
829 		}
830 	}
831 
832 	if (right+1> 1)
833 		qsort_data(value,data,right+1);
834 
835 	if (size-left> 1)
836 		qsort_data(&value[left],&data[left], size-left);
837 
838 
839     return 0;
840 }
841 
842 /*-------------------------------------------------------------------------
843  Parallel version of qsort_data.
844   -------------------------------------------------------------------------*/
parallel_qsort_helper(void * p)845 void* parallel_qsort_helper(void* p)
846 {
847 	struct thread_qsort* ps=(struct thread_qsort*) p;
848 	double* output=ps->output;
849 /*	uint32_t* data=ps->data;*/
850 	double* data=ps->data;
851 	uint32_t size=ps->size;
852 
853     if(size == 1)
854       return 0;
855 
856 
857 	if (size==2)
858 	{
859 		if (output[0] > output [1])
860 		{
861 			swapf(&output[0], &output[1]);
862 /*			swapi(&data[0], &data[1]);*/
863 			swapf(&data[0], &data[1]);
864 		}
865 		return(NULL);
866 	}
867 	/*double split=output[(((uint64_t) size)*rand())/(((uint64_t)RAND_MAX)+1)];*/
868 	double split=output[size/2];
869 
870 	uint32_t left=0;
871 	uint32_t right=size-1;
872 
873 	while (left<=right)
874 	{
875 		while (output[left] < split)
876 			left++;
877 		while (output[right] > split)
878 			right--;
879 
880 		if (left<=right)
881 		{
882 			swapf(&output[left], &output[right]);
883 /*			swapi(&index[left], &index[right]);*/
884 			swapf(&data[left], &data[right]);
885 			left++;
886 			right--;
887 		}
888 	}
889 	boolean lthread_start=FALSE;
890 	boolean rthread_start=FALSE;
891 	pthread_t lthread;
892 	pthread_t rthread;
893 	struct thread_qsort t1;
894 	struct thread_qsort t2;
895 
896 	if (right+1> 1 && (right+1< sort_limit || qsort_threads >= num_threads-1))
897 		qsort_data(output,data,right+1);
898 	else if (right+1> 1)
899 	{
900 		qsort_threads++;
901 		lthread_start=TRUE;
902 		t1.output=output;
903 		t1.data=data;
904 		t1.size=right+1;
905 		if (pthread_create(&lthread, NULL, parallel_qsort_helper, (void*) &t1) != 0)
906 		{
907 			lthread_start=FALSE;
908 			qsort_threads--;
909 			qsort_data(output,data,right+1);
910 		}
911 	}
912 
913 
914 	if (size-left> 1 && (size-left< sort_limit || qsort_threads >= num_threads-1))
915 		qsort_data(&output[left],&data[left], size-left);
916 	else if (size-left> 1)
917 	{
918 		qsort_threads++;
919 		rthread_start=TRUE;
920 		t2.output=&output[left];
921 		t2.data=&data[left];
922 		t2.size=size-left;
923 		if (pthread_create(&rthread, NULL, parallel_qsort_helper, (void*)&t2) != 0)
924 		{
925 			rthread_start=FALSE;
926 			qsort_threads--;
927 			qsort_data(&output[left],&data[left], size-left);
928 		}
929 	}
930 
931 	if (lthread_start)
932 	{
933 		pthread_join(lthread, NULL);
934 		qsort_threads--;
935 	}
936 
937 	if (rthread_start)
938 	{
939 		pthread_join(rthread, NULL);
940 		qsort_threads--;
941 	}
942 
943     return(NULL);
944 }
945 
946 
parallel_qsort_data(double * value,double * data,uint32_t size)947 int parallel_qsort_data(double* value, double* data, uint32_t size)
948 {
949   struct thread_qsort qthr;
950 
951   qsort_threads=0;
952 
953   qthr.output=value;
954   qthr.data=data;
955   qthr.size=size;
956   parallel_qsort_helper((void*)&qthr);
957 
958   return(0);
959 }
960 
961 
962 
963 
964 /* ---------------------------------------------------------------------------------
965 This function loads regularization constants from a text file. Each line contains
966 a single constant.
967   ---------------------------------------------------------------------------------*/
load_regconsts(char * fname,double ** vec_C,uint32_t * len_vec_C,int verb)968 int load_regconsts(char *fname, double **vec_C, uint32_t *len_vec_C, int verb)
969 {
970   double C;
971   char *line = NULL;
972   int exitflag = 0;
973   FILE *fid;
974 
975   if(verb) mexPrintf("Input file: %s\n", fname);
976 
977   fid = fopen(fname, "r");
978   if(fid == NULL) {
979     perror("fopen error ");
980     mexPrintf("Cannot open input file.\n");
981     exitflag = -1;
982     goto clean_up;
983   }
984 
985   line = mxCalloc(LIBSLF_MAXLINELEN, sizeof(char));
986   if( line == NULL )
987   {
988     mexPrintf("Not enough memmory to allocate line buffer.\n");
989     exitflag = -1;
990     goto clean_up;
991   }
992 
993 
994   if(verb) mexPrintf("Counting regularization constants...");
995   int go = 1;
996   long line_cnt = 0;
997   while(go) {
998 
999     if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
1000     {
1001       go = 0;
1002       if(verb)
1003       {
1004         if( (line_cnt % 1000) != 0)
1005           mexPrintf(" %ld", line_cnt);
1006         mexPrintf(" EOF.\n");
1007       }
1008 
1009     }
1010     else
1011     {
1012       line_cnt ++;
1013 
1014       C = atof(line);
1015 
1016       if(verb)
1017       {
1018         if( (line_cnt % 1000) == 0) {
1019           mexPrintf(" %ld", line_cnt);
1020           fflush(NULL);
1021         }
1022       }
1023     }
1024   }
1025 
1026   *vec_C = (double*)mxCalloc(line_cnt, sizeof(double));
1027   if( vec_C == NULL )
1028   {
1029     mexPrintf("Not enough memmory to allocate vec_C.\n");
1030     exitflag = -1;
1031     goto clean_up;
1032   }
1033 
1034   fseek(fid, 0, SEEK_SET);
1035 
1036   if(verb) mexPrintf("Reading regularization constants...");
1037   go = 1;
1038   line_cnt = 0;
1039   while(go)
1040   {
1041 
1042     if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
1043     {
1044       go = 0;
1045       if(verb)
1046       {
1047         if( (line_cnt % 1000) != 0)
1048           mexPrintf(" %ld", line_cnt);
1049         mexPrintf(" EOF.\n");
1050       }
1051 
1052     }
1053     else
1054     {
1055       (*vec_C)[line_cnt] = atof(line);
1056       line_cnt ++;
1057 
1058       if(verb)
1059       {
1060         if( (line_cnt % 1000) == 0) {
1061           mexPrintf(" %ld", line_cnt);
1062           fflush(NULL);
1063         }
1064       }
1065     }
1066   }
1067 
1068   fclose(fid);
1069   *len_vec_C = line_cnt;
1070 
1071 clean_up:
1072   mxFree(line);
1073 
1074   return(exitflag);
1075 }
1076 
1077 
1078 
1079 /* ---------------------------------------------------------------------------------
1080 This function loads SVMlight data file to sparse matlab matrix data_X and
1081 dense vector data_y which both are assumed to global variables.
1082   ---------------------------------------------------------------------------------*/
load_svmlight_file(char * fname,int verb)1083 int load_svmlight_file(char *fname, int verb)
1084 {
1085   char *line = NULL;
1086   FILE *fid;
1087   double *feat_val = NULL;
1088   double sparse_memory_requirement, full_memory_requirement;
1089   uint32_t *feat_idx = NULL;
1090   long nnzf;
1091   int max_dim = 0;
1092   long j;
1093   uint64_t nnz = 0;
1094   mwSize *irs = NULL, *jcs = NULL;
1095   int exitflag = 0;
1096   double *sr = NULL;
1097 
1098 /*  mexPrintf("Input file: %s\n", fname);*/
1099 
1100   fid = fopen(fname, "r");
1101   if(fid == NULL) {
1102     perror("fopen error ");
1103     mexPrintf("Cannot open input file.\n");
1104     exitflag = -1;
1105     goto clean_up;
1106   }
1107 
1108   line = mxCalloc(LIBSLF_MAXLINELEN, sizeof(char));
1109   if( line == NULL )
1110   {
1111     mexPrintf("Not enough memmory to allocate line buffer.\n");
1112     exitflag = -1;
1113     goto clean_up;
1114   }
1115 
1116   feat_idx = mxCalloc(LIBSLF_MAXLINELEN, sizeof(uint32_t));
1117   if( feat_idx == NULL )
1118   {
1119     mexPrintf("Not enough memmory to allocate feat_idx.\n");
1120     exitflag = -1;
1121     goto clean_up;
1122   }
1123 
1124   feat_val = mxCalloc(LIBSLF_MAXLINELEN, sizeof(double));
1125   if( feat_val == NULL )
1126   {
1127     mexPrintf("Not enough memmory to allocate feat_val.\n");
1128     exitflag = -1;
1129     goto clean_up;
1130   }
1131 
1132   if(verb) mexPrintf("Analysing input data...");
1133   int label;
1134   int go = 1;
1135   long line_cnt = 0;
1136 
1137   while(go) {
1138 
1139     if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
1140     {
1141       go = 0;
1142       if(verb)
1143       {
1144         if( (line_cnt % 1000) != 0)
1145           mexPrintf(" %ld", line_cnt);
1146         mexPrintf(" EOF.\n");
1147       }
1148 
1149     }
1150     else
1151     {
1152       line_cnt ++;
1153       nnzf = svmlight_format_parse_line(line, &label, feat_idx, feat_val);
1154 
1155       if(nnzf == -1)
1156       {
1157          mexPrintf("Parsing error on line %ld .\n", line_cnt);
1158          mexPrintf("Probably defective input file.\n");
1159          exitflag = -1;
1160          goto clean_up;
1161       }
1162 
1163       max_dim = LIBOCAS_MAX(max_dim,feat_idx[nnzf-1]);
1164       nnz += nnzf;
1165 
1166       if(verb)
1167       {
1168         if( (line_cnt % 1000) == 0) {
1169           mexPrintf(" %ld", line_cnt);
1170           fflush(NULL);
1171         }
1172       }
1173     }
1174   }
1175 
1176   fclose(fid);
1177   if(verb)
1178   {
1179     mexPrintf("Data statistics:\n");
1180     mexPrintf("# of examples: %ld\n", line_cnt);
1181     mexPrintf("dimensionality: %d\n", max_dim);
1182     mexPrintf("nnz: %ld, density: %f%%\n", (long)nnz, 100*(double)nnz/((double)max_dim*(double)line_cnt));
1183   }
1184 
1185   sparse_memory_requirement = ((double)nnz*((double)sizeof(double)+(double)sizeof(mwSize)))/(1024.0*1024.0);
1186   full_memory_requirement = sizeof(double)*(double)max_dim*(double)line_cnt/(1024.0*1024.0);
1187 
1188   if(verb)
1189   {
1190     mexPrintf("Memory requirements for sparse matrix: %.3f MB\n", sparse_memory_requirement);
1191     mexPrintf("Memory requirements for full matrix: %.3f MB\n", full_memory_requirement);
1192   }
1193 
1194   if( full_memory_requirement < sparse_memory_requirement)
1195   {
1196     if(verb)
1197       mexPrintf("Full matrix represenation used.\n");
1198 
1199     data_X = mxCreateDoubleMatrix(max_dim, line_cnt, mxREAL);
1200 
1201     if( data_X == NULL)
1202     {
1203       mexPrintf("Not enough memory to allocate data_X .\n");
1204       exitflag = -1;
1205       goto clean_up;
1206     }
1207 
1208   }
1209   else
1210   {
1211     if(verb)
1212       mexPrintf("Sparse matrix represenation used.\n");
1213 
1214     data_X = mxCreateSparse(max_dim, line_cnt, nnz, mxREAL);
1215     if( data_X == NULL)
1216     {
1217       mexPrintf("Not enough memory to allocate data_X .\n");
1218       exitflag = -1;
1219       goto clean_up;
1220     }
1221 
1222     sr  = mxGetPr(data_X);
1223     irs = (mwSize*)mxGetIr(data_X);
1224     jcs = (mwSize*)mxGetJc(data_X);
1225 
1226   }
1227 
1228 
1229 /*  mexPrintf("Required memory: %.3f MB\n", */
1230 /*    ((double)nnz*((double)sizeof(double)+(double)sizeof(mwSize)))/(1024.0*1024.0));*/
1231 
1232   /*---------------------------------------------*/
1233 
1234   data_y = mxCalloc(line_cnt, sizeof(double));
1235   if(data_y == NULL)
1236   {
1237     mexPrintf("Not enough memory to allocate data_y.\n");
1238     exitflag = -1;
1239     goto clean_up;
1240   }
1241 
1242   fid = fopen(fname, "r");
1243   if(fid == NULL) {
1244     perror("fopen error ");
1245     mexPrintf("Cannot open input file.\n");
1246     exitflag = -1;
1247     goto clean_up;
1248   }
1249 
1250   if(verb)
1251     mexPrintf("Reading examples...");
1252 
1253   go = 1;
1254   line_cnt = 0;
1255   long k=0;
1256   while(go) {
1257     if(fgets(line,LIBSLF_MAXLINELEN, fid) == NULL )
1258     {
1259       go = 0;
1260       if(verb)
1261       {
1262         if( (line_cnt % 1000) != 0)
1263           mexPrintf(" %ld", line_cnt);
1264         mexPrintf(" EOF.\n");
1265       }
1266     }
1267     else
1268     {
1269       line_cnt ++;
1270       nnzf = svmlight_format_parse_line(line, &label, feat_idx, feat_val);
1271 
1272       if(nnzf == -1)
1273       {
1274          mexPrintf("Parsing error on line %ld .\n", line_cnt);
1275          mexPrintf("Defective input file.\n");
1276          exitflag = -1;
1277          goto clean_up;
1278       }
1279 
1280       data_y[line_cnt-1] = (double)label;
1281 
1282       if( mxIsSparse( data_X) )
1283       {
1284         jcs[line_cnt-1] = k;
1285 
1286         for(j = 0; j < nnzf; j++) {
1287           sr[k] = feat_val[j];
1288           irs[k] = feat_idx[j]-1;
1289           k++;
1290         }
1291       }
1292       else
1293       {
1294         double *ptr = mxGetPr(data_X);
1295         for(j=0; j < nnzf; j++ ) {
1296           ptr[LIBOCAS_INDEX(feat_idx[j]-1,line_cnt-1,max_dim)] = feat_val[j];
1297         }
1298 
1299       }
1300 
1301       if(verb)
1302       {
1303         if( (line_cnt % 1000) == 0) {
1304           mexPrintf(" %ld", line_cnt);
1305           fflush(NULL);
1306         }
1307       }
1308     }
1309   }
1310 
1311   fclose(fid);
1312 
1313   if( mxIsSparse( data_X) )
1314     jcs[line_cnt] = k;
1315 
1316 /*  mexPrintf("\n");*/
1317 
1318   if(verb)
1319     mexPrintf("Leaving svmlight reading function.\n");
1320 
1321   exitflag = 0;
1322 
1323 clean_up:
1324 
1325   mxFree(line);
1326   mxFree(feat_val);
1327   mxFree(feat_idx);
1328 
1329   return(exitflag);
1330 }
1331 
1332 
1333 /*----------------------------------------------------------------------
1334  Compute area under ROC (1st class label[i]==1; 2nd class label[i] != 1).
1335   ----------------------------------------------------------------------*/
compute_auc(double * score,int * label,uint32_t nData)1336 double compute_auc(double *score, int *label, uint32_t nData)
1337 {
1338   double *sorted_score = NULL;
1339   double *sorted_lab = NULL;
1340   uint32_t i;
1341   uint32_t neg, pos;
1342   double auc = -1;
1343 
1344   sorted_score = mxCalloc(nData, sizeof(double));
1345   if( sorted_score == NULL ) {
1346       mexPrintf("Not enough memmory to allocate sorted_score when computing AUC.");
1347       goto clean_up;
1348   }
1349 
1350   sorted_lab = mxCalloc(nData, sizeof(double));
1351   if( sorted_lab == NULL )
1352   {
1353       mexPrintf("Not enough memmory to allocate sorted_lab when computing AUC.");
1354       goto clean_up;
1355   }
1356 
1357   for(i=0; i < nData; i++)
1358     if(label[i] == 1) sorted_lab[i] = 1.0; else sorted_lab[i] = 0.0;
1359 
1360 
1361   memcpy(sorted_score,score,sizeof(double)*nData);
1362 
1363   qsort_data(sorted_score, sorted_lab, nData);
1364 
1365   pos = 0;
1366   neg = 0;
1367   auc = 0;
1368 
1369   for(i = 0; i < nData; i++)
1370   {
1371     if(sorted_lab[i] ==1.0 )
1372     {
1373       pos ++;
1374     }
1375     else
1376     {
1377       neg ++;
1378       auc += (double)pos;
1379     }
1380   }
1381   auc = 1 - auc/((double)neg*(double)pos);
1382 
1383 clean_up:
1384   mxFree(sorted_score);
1385   mxFree(sorted_lab);
1386 
1387   return(auc);
1388 }
1389 
1390