1 /*********************************************************/
2 /* TAUCS                                                 */
3 /* Author: Sivan Toledo                                  */
4 /* File  : taucs_ccs_xxt.c                               */
5 /* Description: computes the Cholesky factor of A^-1     */
6 /*********************************************************/
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <assert.h>
11 #include <math.h>
12 #include "taucs.h"
13 
14 #ifdef TAUCS_CORE_DOUBLE
15 
16 typedef struct {
17   int     length;
18   int*    indices;
19   int*    bitmap;
20   double* values;
21 } spa;
22 
23 /*********************************************************/
24 /* Returns the strictly upper part of A                  */
25 /*********************************************************/
26 
27 static
28 taucs_ccs_matrix*
ccs_syml_to_symu(taucs_ccs_matrix * A)29 ccs_syml_to_symu(taucs_ccs_matrix* A) {
30   taucs_ccs_matrix* U;
31   int n;
32   int* temp;
33   int i,j,ip;/*kp,k,jp omer*/
34   double v;
35 
36   n = A->n;
37 
38   temp = (int*) taucs_malloc(n * sizeof(int));
39   if (!temp) return NULL;
40 
41   U = taucs_dtl(ccs_create)(n, n, (A->colptr)[n] - n);
42   if (!U) {
43     taucs_free(temp);
44     return NULL;
45   }
46 
47   U->flags = TAUCS_SYMMETRIC | TAUCS_UPPER;
48 
49   for (j=0; j<=n; j++) (U->colptr)[j] = 0;
50   for (j=0; j<n; j++)  temp[j] = 0;
51 
52   for (j=0; j<n; j++) {
53     for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
54       i = (A->rowind)[ip];
55       if (i!=j) temp[i]++;
56     }
57   }
58 
59   for (j=1; j<=n; j++) (U->colptr)[j] = (U->colptr)[j-1] + temp[j-1];
60   for (j=0; j< n; j++) temp[j] = (U->colptr)[j];
61 
62   for (j=0; j<n; j++) {
63     for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
64       i = (A->rowind)[ip];
65       v = (A->taucs_values)[ip];
66       if (i!=j) {
67 	(U->rowind)[ temp[i] ] = j;
68 	(U->taucs_values)[ temp[i] ] = v;
69 	temp[i]++;
70       }
71     }
72   }
73 
74   /*
75   taucs_ccs_write_ijv(A,"AA.ijv");
76   taucs_ccs_write_ijv(U,"UU.ijv");
77   */
78 
79   assert((U->colptr)[n] == (A->colptr)[n] - n);
80 
81   return U;
82 }
83 
84 /*********************************************************/
85 /*                                                       */
86 /*********************************************************/
87 
spa_create(int n)88 static spa* spa_create(int n)
89 {
90   int i;
91   spa* s;
92 
93   s = (spa*) taucs_malloc( sizeof(spa) );
94   if ( !s ) return NULL;
95 
96   s->indices = (int*)    taucs_malloc( n * sizeof(int) );
97   s->bitmap  = (int*)    taucs_malloc( n * sizeof(int) );
98   s->values  = (double*) taucs_malloc( n * sizeof(double) );
99 
100   if ( !(s->indices) || !(s->values) || !(s->bitmap) ) {
101     taucs_printf("chol: cannot create spa\n");
102     taucs_free( s->indices );
103     taucs_free( s->bitmap  );
104     taucs_free( s->values  );
105     taucs_free( s );
106     return NULL;
107   }
108 
109   s->length = 0;
110 
111   for (i=0; i<n; i++) (s->bitmap)[i] = -1;
112 
113   return s;
114 }
115 
spa_free(spa * s)116 static void spa_free(spa* s)
117 {
118   taucs_free( s->indices );
119   taucs_free( s->values  );
120   taucs_free( s );
121 }
122 
spa_set_lu(spa * s,taucs_ccs_matrix * L,taucs_ccs_matrix * U,int j)123 static void spa_set_lu(spa* s, taucs_ccs_matrix* L, taucs_ccs_matrix* U, int j)
124 {
125   int i, ip, next;
126   double Aij;
127 
128   assert(j < L->n);
129 
130   next = 0;
131   for (ip = (U->colptr)[j]; ip < (U->colptr)[j+1]; ip++) {
132     i   = (U->rowind)[ip];
133     Aij = (U->taucs_values)[ip];
134 
135     assert( i < j ); /* U must be strictly upper */
136 
137     (s->indices)[ next ] = i;
138     (s->values) [ i    ] = Aij;
139     (s->bitmap) [ i    ] = j;
140     next++;
141   }
142   for (ip = (L->colptr)[j]; ip < (L->colptr)[j+1]; ip++) {
143     i   = (L->rowind)[ip];
144     Aij = (L->taucs_values)[ip];
145 
146     assert( i >= j ); /* A must be lower */
147 
148     (s->indices)[ next ] = i;
149     (s->values) [ i    ] = Aij;
150     (s->bitmap) [ i    ] = j;
151     next++;
152   }
153 
154   s->length = next;
155 }
156 
spa_scale_add(spa * s,int j,taucs_ccs_matrix * A,int k,double alpha)157 static void spa_scale_add(spa* s, int j, taucs_ccs_matrix* A, int k, double alpha)
158 {
159   int i, ip, next;
160   double Aik;
161 
162   assert(k < A->n);
163 
164   /*
165   printf("spa_scale_add: updating column %d with column %d\n",j,k);
166   printf("spa_scale_add: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
167   */
168 
169   next = 0;
170   for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
171     i   = (A->rowind)[ip];
172     /*if (i < j) continue;*/
173     Aik = (A->taucs_values)[ip];
174 
175     if ( (s->bitmap)[ i ] < j ) {
176       /*printf("fill in (%d,%d)\n",i,j);*/
177       (s->bitmap)[i] = j;
178       (s->values)[i] = 0.0;
179       (s->indices)[ s->length ] = i;
180       (s->length)++;
181     }
182 
183     (s->values)[ i ] += alpha * Aik;
184 
185     /*printf("spa_scale_add: A(%d,%d) -= %lg * %lg ==> %lg\n",i,j,alpha,Aik,(s->values)[i]);*/
186   }
187 }
188 
spa_dot(spa * s,int j,taucs_ccs_matrix * A,int k)189 static double spa_dot(spa* s, int j, taucs_ccs_matrix* A, int k)
190 {
191   int i, ip;
192   double Aik;
193   double x = 0.0;
194 
195   assert(k < A->n);
196 
197   /*
198   printf("spa_dot: updating column %d with column %d\n",j,k);
199   printf("spa_dot: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
200   */
201 
202   for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
203     i   = (A->rowind)[ip];
204     Aik = (A->taucs_values)[ip];
205 
206 
207     if ( (s->bitmap)[ i ] == j ) {
208       /*printf("j=%d, i=%d k=%d ::: %lg, %lg\n",j,i,k,Aik,(s->values)[ i ]);*/
209       x += Aik * (s->values)[ i ];
210     } else {
211       /*printf("@@@ j=%d, i=%d k=%d ::: %lg, %lg\n",j,i,k,Aik,(s->values)[ i ]);*/
212     }
213   }
214 
215   return x;
216 }
217 
spa_A_norm(spa * s,int j,taucs_ccs_matrix * A)218 static double spa_A_norm(spa* s, int j, taucs_ccs_matrix* A)
219 {
220   int i, ip, k, kp;
221   double Aik;
222   double x = 0.0;
223 
224   assert(A->flags | TAUCS_SYMMETRIC);
225   assert(A->flags | TAUCS_LOWER);
226 
227   /*
228   printf("spa_scale_add: updating column %d with column %d\n",j,k);
229   printf("spa_scale_add: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
230   */
231 
232   for (kp=0; kp<s->length; kp++) {
233     k = (s->indices)[kp];
234 
235     for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
236       i   = (A->rowind)[ip];
237       Aik = (A->taucs_values)[ip];
238 
239       if ( (s->bitmap)[ i ] == j ) {
240 	/*printf("j=%d, i=%d k=%d ::: %lg, %lg\n",j,i,k,Aik,(s->values)[ i ]);*/
241 	if (i == k)
242 	  x += (s->values)[k] * Aik * (s->values)[ i ];
243 	else
244 	  x += 2.0 * (s->values)[k] * Aik * (s->values)[ i ];
245       }
246     }
247   }
248 
249   return x;
250 }
251 
252 /*********************************************************/
253 /*                                                       */
254 /*********************************************************/
255 
256 int*    rowlist;
257 int*    rowlist_next;
258 int*    rowlist_colind;
259 double* rowlist_values;
260 
261 int     rowlist_freelist;
262 int     rowlist_size;
263 
rowlist_create(int n)264 static int rowlist_create(int n)
265 {
266   int i;
267 
268   rowlist = (int*) taucs_malloc( n * sizeof(int) );
269 
270   rowlist_size    = 1000;
271   rowlist_next    = (int*)    taucs_malloc( rowlist_size * sizeof(int) );
272   rowlist_colind = (int*)    taucs_malloc( rowlist_size * sizeof(int) );
273   rowlist_values  = (double*) taucs_malloc( rowlist_size * sizeof(double) );
274 
275   for (i=0; i<n; i++) rowlist[i] = -1; /* no list yet for row i */
276 
277   /* free list */
278   rowlist_freelist = 0;
279   for (i=0; i<rowlist_size-1; i++) rowlist_next[i] = i+1;
280   rowlist_next[rowlist_size-1] = -1;
281 
282   return 0;
283 }
284 
rowlist_free()285 static void rowlist_free()
286 {
287   taucs_free(rowlist);
288   taucs_free(rowlist_next);
289   taucs_free(rowlist_colind);
290   taucs_free(rowlist_values);
291 }
292 
293 /*static void rowlist_freerow(int i){}*/
294 
rowlist_add(int i,int j,double v)295 static void rowlist_add(int i,int j,double v)
296 {
297   int l;
298 
299   if (rowlist_freelist == -1) {
300     int inc = 1000;
301     int ii;
302 
303     rowlist_next   = (int*)    taucs_realloc( rowlist_next,   (rowlist_size+inc) * sizeof(int) );
304     rowlist_colind = (int*)    taucs_realloc( rowlist_colind, (rowlist_size+inc) * sizeof(int) );
305     rowlist_values = (double*) taucs_realloc( rowlist_values, (rowlist_size+inc) * sizeof(double) );
306 
307     rowlist_freelist = rowlist_size;
308     for (ii=rowlist_size; ii<rowlist_size+inc-1; ii++)
309       rowlist_next[ii] = ii+1;
310     rowlist_next[ rowlist_size+inc-1 ] = -1;
311 
312     rowlist_size    += inc;
313   }
314 
315   l = rowlist_freelist;
316   rowlist_freelist = rowlist_next[ rowlist_freelist ];
317 
318   rowlist_next  [ l ] = rowlist[ i ];
319   rowlist_colind[ l ] = j;
320   rowlist_values[ l ] = v;
321 
322   rowlist[ i ] = l;
323 }
324 
rowlist_getfirst(int i)325 static int rowlist_getfirst(int i)
326 {
327   return rowlist[ i ];
328 }
329 
rowlist_getnext(int l)330 static int rowlist_getnext(int l)
331 {
332   return rowlist_next[ l ];
333 }
334 
rowlist_getcolind(int l)335 static int rowlist_getcolind(int l)
336 {
337   return rowlist_colind[ l ];
338 }
339 
340 /*
341 static double rowlist_getvalue(int l)
342 {
343   return rowlist_values[ l ];
344 }
345 */
346 
347 /*********************************************************/
348 /* Inverse Cholesky factorization                        */
349 /*********************************************************/
350 
351 taucs_ccs_matrix*
taucs_ccs_factor_xxt(taucs_ccs_matrix * A)352 taucs_ccs_factor_xxt(taucs_ccs_matrix* A)
353 {
354   int            i,j,k,l,n,ip,next,Lnnz;
355   double v;/*Lkj,pivot,norm omer*/
356   spa*           s;
357   spa*           Aej;
358   taucs_ccs_matrix* L;
359   taucs_ccs_matrix* U;
360   /*int Aj_nnz;omer*/
361   /*double flops = 0.0;*/
362   double x;
363   int* bitmap;
364 
365   if (!(A->flags & TAUCS_SYMMETRIC)) {
366     taucs_printf("taucs_ccs_factor_xxt: matrix must be symmetric\n");
367     return NULL;
368   }
369   if (!(A->flags & TAUCS_LOWER)) {
370     taucs_printf("taucs_ccs_factor_xxt: lower part must be represented\n");
371     return NULL;
372   }
373 
374   if (!(A->flags & TAUCS_DOUBLE)) {
375     taucs_printf("taucs_ccs_factor_xxt: only works for double-precision real matrices\n");
376     return NULL;
377   }
378 
379   n = A->n;
380 
381   taucs_printf("taucs_ccs_factor_xxt: starting n=%d\n",n);
382 
383   bitmap = (int*) taucs_malloc(n * sizeof(int));
384   if (!bitmap) return NULL;
385   for (i=0; i<n; i++) bitmap[i] = -1;
386 
387   U = ccs_syml_to_symu(A);
388 
389 
390   L = taucs_dtl(ccs_create)(n,n,1000);
391   /*  L->flags = TAUCS_TRIANGULAR | TAUCS_LOWER; */
392   L->flags = 0;
393 
394   Lnnz = 1000;
395   next = 0;
396 
397   s   = spa_create(n);
398   Aej = spa_create(n);
399   rowlist_create(n);
400 
401   for (j=0; j<n; j++) {
402 
403     /* set the spa to ej */
404 
405     s->length = 1;
406     (s->values)[j] = 1.0;
407     (s->bitmap)[j] = j;
408     (s->indices)[0] = j;
409 
410     /* compute A*ej, get both upper and lower parts! */
411 
412     spa_set_lu(Aej,A,U,j);
413 
414     /*for (k=0; k<j; k++) {*/
415 
416     for (ip=0; ip<Aej->length; ip++) {
417       i = (Aej->indices)[ip];
418 
419       for (l = rowlist_getfirst(i);
420 	   l != -1;
421 	   l = rowlist_getnext(l)) {
422 	k   = rowlist_getcolind(l);
423 
424 	if (bitmap[k] == j) continue;
425 	bitmap[k] = j;
426 
427 	/* inner product of column k of X with A*ej */
428 
429 	x = spa_dot(Aej,j,L,k);
430 	if (x != 0.0) {
431 	  /*printf("adding column %d to e_%d, before=%d\n",k,j,s->length);*/
432 	  spa_scale_add(s,j,L,k,-x); /* L_*j -= x * L_*k  */
433 	}
434       }
435     }
436 
437     /* normalize the column to unit A-norm */
438 
439     x = sqrt(spa_A_norm(s,j,A));
440     /*printf("A-norm of column %d = %lg\n",j,x);*/
441 
442     for (ip = 0; ip < s->length; ip++) {
443       i = (s->indices)[ip];
444       (s->values)[i] /= x;
445     }
446 
447     /* we now add the j'th column of L to the taucs_ccs */
448 
449     if ( next+(s->length) > Lnnz ) {
450       int*    rowind;
451       double* values;
452       int inc = max( 8192, s->length );
453 
454       Lnnz += inc;
455 
456       rowind = (int*)    taucs_realloc( L->rowind, Lnnz * sizeof(int) );
457       values = (double*) taucs_realloc( L->taucs_values, Lnnz * sizeof(double) );
458       /* check for errors */
459       assert( rowind && values );
460       L->rowind = rowind;
461       L->taucs_values = values;
462     }
463 
464     (L->colptr)[j] = next;
465 
466     for (ip = 0; ip < s->length; ip++) {
467       i = (s->indices)[ip];
468       v = (s->values)[i];
469 
470       (L->rowind)[next] = i;
471       (L->taucs_values)[next] = v;
472       next++;
473       rowlist_add(i,j,v);
474     }
475 
476     (L->colptr)[j+1] = next;
477   }
478 
479   (L->colptr)[n] = next;
480 
481   taucs_free(bitmap);
482   rowlist_free();
483   spa_free(Aej);
484   spa_free(s);
485   taucs_ccs_free(U);
486 
487   taucs_printf("taucs_ccs_factor_xxt: done; nnz(L) = %d\n",(L->colptr)[n]);
488 
489   return L;
490 }
491 
492 /*********************************************************/
493 /* XXT Solve                                             */
494 /*********************************************************/
495 
496 int
taucs_ccs_solve_xxt(void * vX,double * x,double * b)497 taucs_ccs_solve_xxt(void* vX, double* x, double* b)
498 {
499   taucs_ccs_matrix* X = (taucs_ccs_matrix*) vX;
500   int n;
501   int i,j,ip;
502   double v;
503   double* y;
504 
505   if (!(X->flags & TAUCS_TRIANGULAR)
506       || !(X->flags & TAUCS_LOWER)
507       || !(X->flags & TAUCS_DOUBLE)
508       ) {
509     taucs_printf("taucs_ccs_solve_xxt: matrix must be lower triangular double-precision real\n");
510     return 0;
511   }
512 
513   n = X->n;
514 
515   y = (double*) taucs_malloc(n * sizeof(double));
516   if (!y) return -1;
517 
518   /* multiply by X' */
519 
520   for (j=0; j<n; j++) {
521     y[j] = 0.0;
522 
523     for (ip=(X->colptr)[j]; ip<(X->colptr)[j+1]; ip++) {
524       i = (X->rowind)[ip];
525       v = (X->taucs_values)[ip];
526       y[j] += v*b[i];
527     }
528   }
529 
530   for (i=0; i<n; i++) x[i] = 0.0;
531 
532   /* multiply by X */
533 
534   for (j=0; j<n; j++) {
535     for (ip=(X->colptr)[j]; ip<(X->colptr)[j+1]; ip++) {
536       i = (X->rowind)[ip];
537       v = (X->taucs_values)[ip];
538       x[i] += v*y[j];
539     }
540   }
541 
542   taucs_free(y);
543 
544   return 0;
545 }
546 
547 #endif /* TAUCS_CORE_DOUBLE */
548