1 /*********************************************************/
2 /* TAUCS */
3 /* Author: Sivan Toledo */
4 /* File : taucs_ccs_xxt.c */
5 /* Description: computes the Cholesky factor of A^-1 */
6 /*********************************************************/
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <assert.h>
11 #include <math.h>
12 #include "taucs.h"
13
14 #ifdef TAUCS_CORE_DOUBLE
15
16 typedef struct {
17 int length;
18 int* indices;
19 int* bitmap;
20 double* values;
21 } spa;
22
23 /*********************************************************/
24 /* Returns the strictly upper part of A */
25 /*********************************************************/
26
27 static
28 taucs_ccs_matrix*
ccs_syml_to_symu(taucs_ccs_matrix * A)29 ccs_syml_to_symu(taucs_ccs_matrix* A) {
30 taucs_ccs_matrix* U;
31 int n;
32 int* temp;
33 int i,j,ip;/*kp,k,jp omer*/
34 double v;
35
36 n = A->n;
37
38 temp = (int*) taucs_malloc(n * sizeof(int));
39 if (!temp) return NULL;
40
41 U = taucs_dtl(ccs_create)(n, n, (A->colptr)[n] - n);
42 if (!U) {
43 taucs_free(temp);
44 return NULL;
45 }
46
47 U->flags = TAUCS_SYMMETRIC | TAUCS_UPPER;
48
49 for (j=0; j<=n; j++) (U->colptr)[j] = 0;
50 for (j=0; j<n; j++) temp[j] = 0;
51
52 for (j=0; j<n; j++) {
53 for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
54 i = (A->rowind)[ip];
55 if (i!=j) temp[i]++;
56 }
57 }
58
59 for (j=1; j<=n; j++) (U->colptr)[j] = (U->colptr)[j-1] + temp[j-1];
60 for (j=0; j< n; j++) temp[j] = (U->colptr)[j];
61
62 for (j=0; j<n; j++) {
63 for (ip=(A->colptr)[j]; ip<(A->colptr)[j+1]; ip++) {
64 i = (A->rowind)[ip];
65 v = (A->taucs_values)[ip];
66 if (i!=j) {
67 (U->rowind)[ temp[i] ] = j;
68 (U->taucs_values)[ temp[i] ] = v;
69 temp[i]++;
70 }
71 }
72 }
73
74 /*
75 taucs_ccs_write_ijv(A,"AA.ijv");
76 taucs_ccs_write_ijv(U,"UU.ijv");
77 */
78
79 assert((U->colptr)[n] == (A->colptr)[n] - n);
80
81 return U;
82 }
83
84 /*********************************************************/
85 /* */
86 /*********************************************************/
87
spa_create(int n)88 static spa* spa_create(int n)
89 {
90 int i;
91 spa* s;
92
93 s = (spa*) taucs_malloc( sizeof(spa) );
94 if ( !s ) return NULL;
95
96 s->indices = (int*) taucs_malloc( n * sizeof(int) );
97 s->bitmap = (int*) taucs_malloc( n * sizeof(int) );
98 s->values = (double*) taucs_malloc( n * sizeof(double) );
99
100 if ( !(s->indices) || !(s->values) || !(s->bitmap) ) {
101 taucs_printf("chol: cannot create spa\n");
102 taucs_free( s->indices );
103 taucs_free( s->bitmap );
104 taucs_free( s->values );
105 taucs_free( s );
106 return NULL;
107 }
108
109 s->length = 0;
110
111 for (i=0; i<n; i++) (s->bitmap)[i] = -1;
112
113 return s;
114 }
115
spa_free(spa * s)116 static void spa_free(spa* s)
117 {
118 taucs_free( s->indices );
119 taucs_free( s->values );
120 taucs_free( s );
121 }
122
spa_set_lu(spa * s,taucs_ccs_matrix * L,taucs_ccs_matrix * U,int j)123 static void spa_set_lu(spa* s, taucs_ccs_matrix* L, taucs_ccs_matrix* U, int j)
124 {
125 int i, ip, next;
126 double Aij;
127
128 assert(j < L->n);
129
130 next = 0;
131 for (ip = (U->colptr)[j]; ip < (U->colptr)[j+1]; ip++) {
132 i = (U->rowind)[ip];
133 Aij = (U->taucs_values)[ip];
134
135 assert( i < j ); /* U must be strictly upper */
136
137 (s->indices)[ next ] = i;
138 (s->values) [ i ] = Aij;
139 (s->bitmap) [ i ] = j;
140 next++;
141 }
142 for (ip = (L->colptr)[j]; ip < (L->colptr)[j+1]; ip++) {
143 i = (L->rowind)[ip];
144 Aij = (L->taucs_values)[ip];
145
146 assert( i >= j ); /* A must be lower */
147
148 (s->indices)[ next ] = i;
149 (s->values) [ i ] = Aij;
150 (s->bitmap) [ i ] = j;
151 next++;
152 }
153
154 s->length = next;
155 }
156
spa_scale_add(spa * s,int j,taucs_ccs_matrix * A,int k,double alpha)157 static void spa_scale_add(spa* s, int j, taucs_ccs_matrix* A, int k, double alpha)
158 {
159 int i, ip, next;
160 double Aik;
161
162 assert(k < A->n);
163
164 /*
165 printf("spa_scale_add: updating column %d with column %d\n",j,k);
166 printf("spa_scale_add: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
167 */
168
169 next = 0;
170 for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
171 i = (A->rowind)[ip];
172 /*if (i < j) continue;*/
173 Aik = (A->taucs_values)[ip];
174
175 if ( (s->bitmap)[ i ] < j ) {
176 /*printf("fill in (%d,%d)\n",i,j);*/
177 (s->bitmap)[i] = j;
178 (s->values)[i] = 0.0;
179 (s->indices)[ s->length ] = i;
180 (s->length)++;
181 }
182
183 (s->values)[ i ] += alpha * Aik;
184
185 /*printf("spa_scale_add: A(%d,%d) -= %lg * %lg ==> %lg\n",i,j,alpha,Aik,(s->values)[i]);*/
186 }
187 }
188
spa_dot(spa * s,int j,taucs_ccs_matrix * A,int k)189 static double spa_dot(spa* s, int j, taucs_ccs_matrix* A, int k)
190 {
191 int i, ip;
192 double Aik;
193 double x = 0.0;
194
195 assert(k < A->n);
196
197 /*
198 printf("spa_dot: updating column %d with column %d\n",j,k);
199 printf("spa_dot: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
200 */
201
202 for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
203 i = (A->rowind)[ip];
204 Aik = (A->taucs_values)[ip];
205
206
207 if ( (s->bitmap)[ i ] == j ) {
208 /*printf("j=%d, i=%d k=%d ::: %lg, %lg\n",j,i,k,Aik,(s->values)[ i ]);*/
209 x += Aik * (s->values)[ i ];
210 } else {
211 /*printf("@@@ j=%d, i=%d k=%d ::: %lg, %lg\n",j,i,k,Aik,(s->values)[ i ]);*/
212 }
213 }
214
215 return x;
216 }
217
spa_A_norm(spa * s,int j,taucs_ccs_matrix * A)218 static double spa_A_norm(spa* s, int j, taucs_ccs_matrix* A)
219 {
220 int i, ip, k, kp;
221 double Aik;
222 double x = 0.0;
223
224 assert(A->flags | TAUCS_SYMMETRIC);
225 assert(A->flags | TAUCS_LOWER);
226
227 /*
228 printf("spa_scale_add: updating column %d with column %d\n",j,k);
229 printf("spa_scale_add: colptr %d to %d-1\n",(A->colptr)[k],(A->colptr)[k+1]);
230 */
231
232 for (kp=0; kp<s->length; kp++) {
233 k = (s->indices)[kp];
234
235 for (ip = (A->colptr)[k]; ip < (A->colptr)[k+1]; ip++) {
236 i = (A->rowind)[ip];
237 Aik = (A->taucs_values)[ip];
238
239 if ( (s->bitmap)[ i ] == j ) {
240 /*printf("j=%d, i=%d k=%d ::: %lg, %lg\n",j,i,k,Aik,(s->values)[ i ]);*/
241 if (i == k)
242 x += (s->values)[k] * Aik * (s->values)[ i ];
243 else
244 x += 2.0 * (s->values)[k] * Aik * (s->values)[ i ];
245 }
246 }
247 }
248
249 return x;
250 }
251
252 /*********************************************************/
253 /* */
254 /*********************************************************/
255
256 int* rowlist;
257 int* rowlist_next;
258 int* rowlist_colind;
259 double* rowlist_values;
260
261 int rowlist_freelist;
262 int rowlist_size;
263
rowlist_create(int n)264 static int rowlist_create(int n)
265 {
266 int i;
267
268 rowlist = (int*) taucs_malloc( n * sizeof(int) );
269
270 rowlist_size = 1000;
271 rowlist_next = (int*) taucs_malloc( rowlist_size * sizeof(int) );
272 rowlist_colind = (int*) taucs_malloc( rowlist_size * sizeof(int) );
273 rowlist_values = (double*) taucs_malloc( rowlist_size * sizeof(double) );
274
275 for (i=0; i<n; i++) rowlist[i] = -1; /* no list yet for row i */
276
277 /* free list */
278 rowlist_freelist = 0;
279 for (i=0; i<rowlist_size-1; i++) rowlist_next[i] = i+1;
280 rowlist_next[rowlist_size-1] = -1;
281
282 return 0;
283 }
284
rowlist_free()285 static void rowlist_free()
286 {
287 taucs_free(rowlist);
288 taucs_free(rowlist_next);
289 taucs_free(rowlist_colind);
290 taucs_free(rowlist_values);
291 }
292
293 /*static void rowlist_freerow(int i){}*/
294
rowlist_add(int i,int j,double v)295 static void rowlist_add(int i,int j,double v)
296 {
297 int l;
298
299 if (rowlist_freelist == -1) {
300 int inc = 1000;
301 int ii;
302
303 rowlist_next = (int*) taucs_realloc( rowlist_next, (rowlist_size+inc) * sizeof(int) );
304 rowlist_colind = (int*) taucs_realloc( rowlist_colind, (rowlist_size+inc) * sizeof(int) );
305 rowlist_values = (double*) taucs_realloc( rowlist_values, (rowlist_size+inc) * sizeof(double) );
306
307 rowlist_freelist = rowlist_size;
308 for (ii=rowlist_size; ii<rowlist_size+inc-1; ii++)
309 rowlist_next[ii] = ii+1;
310 rowlist_next[ rowlist_size+inc-1 ] = -1;
311
312 rowlist_size += inc;
313 }
314
315 l = rowlist_freelist;
316 rowlist_freelist = rowlist_next[ rowlist_freelist ];
317
318 rowlist_next [ l ] = rowlist[ i ];
319 rowlist_colind[ l ] = j;
320 rowlist_values[ l ] = v;
321
322 rowlist[ i ] = l;
323 }
324
rowlist_getfirst(int i)325 static int rowlist_getfirst(int i)
326 {
327 return rowlist[ i ];
328 }
329
rowlist_getnext(int l)330 static int rowlist_getnext(int l)
331 {
332 return rowlist_next[ l ];
333 }
334
rowlist_getcolind(int l)335 static int rowlist_getcolind(int l)
336 {
337 return rowlist_colind[ l ];
338 }
339
340 /*
341 static double rowlist_getvalue(int l)
342 {
343 return rowlist_values[ l ];
344 }
345 */
346
347 /*********************************************************/
348 /* Inverse Cholesky factorization */
349 /*********************************************************/
350
351 taucs_ccs_matrix*
taucs_ccs_factor_xxt(taucs_ccs_matrix * A)352 taucs_ccs_factor_xxt(taucs_ccs_matrix* A)
353 {
354 int i,j,k,l,n,ip,next,Lnnz;
355 double v;/*Lkj,pivot,norm omer*/
356 spa* s;
357 spa* Aej;
358 taucs_ccs_matrix* L;
359 taucs_ccs_matrix* U;
360 /*int Aj_nnz;omer*/
361 /*double flops = 0.0;*/
362 double x;
363 int* bitmap;
364
365 if (!(A->flags & TAUCS_SYMMETRIC)) {
366 taucs_printf("taucs_ccs_factor_xxt: matrix must be symmetric\n");
367 return NULL;
368 }
369 if (!(A->flags & TAUCS_LOWER)) {
370 taucs_printf("taucs_ccs_factor_xxt: lower part must be represented\n");
371 return NULL;
372 }
373
374 if (!(A->flags & TAUCS_DOUBLE)) {
375 taucs_printf("taucs_ccs_factor_xxt: only works for double-precision real matrices\n");
376 return NULL;
377 }
378
379 n = A->n;
380
381 taucs_printf("taucs_ccs_factor_xxt: starting n=%d\n",n);
382
383 bitmap = (int*) taucs_malloc(n * sizeof(int));
384 if (!bitmap) return NULL;
385 for (i=0; i<n; i++) bitmap[i] = -1;
386
387 U = ccs_syml_to_symu(A);
388
389
390 L = taucs_dtl(ccs_create)(n,n,1000);
391 /* L->flags = TAUCS_TRIANGULAR | TAUCS_LOWER; */
392 L->flags = 0;
393
394 Lnnz = 1000;
395 next = 0;
396
397 s = spa_create(n);
398 Aej = spa_create(n);
399 rowlist_create(n);
400
401 for (j=0; j<n; j++) {
402
403 /* set the spa to ej */
404
405 s->length = 1;
406 (s->values)[j] = 1.0;
407 (s->bitmap)[j] = j;
408 (s->indices)[0] = j;
409
410 /* compute A*ej, get both upper and lower parts! */
411
412 spa_set_lu(Aej,A,U,j);
413
414 /*for (k=0; k<j; k++) {*/
415
416 for (ip=0; ip<Aej->length; ip++) {
417 i = (Aej->indices)[ip];
418
419 for (l = rowlist_getfirst(i);
420 l != -1;
421 l = rowlist_getnext(l)) {
422 k = rowlist_getcolind(l);
423
424 if (bitmap[k] == j) continue;
425 bitmap[k] = j;
426
427 /* inner product of column k of X with A*ej */
428
429 x = spa_dot(Aej,j,L,k);
430 if (x != 0.0) {
431 /*printf("adding column %d to e_%d, before=%d\n",k,j,s->length);*/
432 spa_scale_add(s,j,L,k,-x); /* L_*j -= x * L_*k */
433 }
434 }
435 }
436
437 /* normalize the column to unit A-norm */
438
439 x = sqrt(spa_A_norm(s,j,A));
440 /*printf("A-norm of column %d = %lg\n",j,x);*/
441
442 for (ip = 0; ip < s->length; ip++) {
443 i = (s->indices)[ip];
444 (s->values)[i] /= x;
445 }
446
447 /* we now add the j'th column of L to the taucs_ccs */
448
449 if ( next+(s->length) > Lnnz ) {
450 int* rowind;
451 double* values;
452 int inc = max( 8192, s->length );
453
454 Lnnz += inc;
455
456 rowind = (int*) taucs_realloc( L->rowind, Lnnz * sizeof(int) );
457 values = (double*) taucs_realloc( L->taucs_values, Lnnz * sizeof(double) );
458 /* check for errors */
459 assert( rowind && values );
460 L->rowind = rowind;
461 L->taucs_values = values;
462 }
463
464 (L->colptr)[j] = next;
465
466 for (ip = 0; ip < s->length; ip++) {
467 i = (s->indices)[ip];
468 v = (s->values)[i];
469
470 (L->rowind)[next] = i;
471 (L->taucs_values)[next] = v;
472 next++;
473 rowlist_add(i,j,v);
474 }
475
476 (L->colptr)[j+1] = next;
477 }
478
479 (L->colptr)[n] = next;
480
481 taucs_free(bitmap);
482 rowlist_free();
483 spa_free(Aej);
484 spa_free(s);
485 taucs_ccs_free(U);
486
487 taucs_printf("taucs_ccs_factor_xxt: done; nnz(L) = %d\n",(L->colptr)[n]);
488
489 return L;
490 }
491
492 /*********************************************************/
493 /* XXT Solve */
494 /*********************************************************/
495
496 int
taucs_ccs_solve_xxt(void * vX,double * x,double * b)497 taucs_ccs_solve_xxt(void* vX, double* x, double* b)
498 {
499 taucs_ccs_matrix* X = (taucs_ccs_matrix*) vX;
500 int n;
501 int i,j,ip;
502 double v;
503 double* y;
504
505 if (!(X->flags & TAUCS_TRIANGULAR)
506 || !(X->flags & TAUCS_LOWER)
507 || !(X->flags & TAUCS_DOUBLE)
508 ) {
509 taucs_printf("taucs_ccs_solve_xxt: matrix must be lower triangular double-precision real\n");
510 return 0;
511 }
512
513 n = X->n;
514
515 y = (double*) taucs_malloc(n * sizeof(double));
516 if (!y) return -1;
517
518 /* multiply by X' */
519
520 for (j=0; j<n; j++) {
521 y[j] = 0.0;
522
523 for (ip=(X->colptr)[j]; ip<(X->colptr)[j+1]; ip++) {
524 i = (X->rowind)[ip];
525 v = (X->taucs_values)[ip];
526 y[j] += v*b[i];
527 }
528 }
529
530 for (i=0; i<n; i++) x[i] = 0.0;
531
532 /* multiply by X */
533
534 for (j=0; j<n; j++) {
535 for (ip=(X->colptr)[j]; ip<(X->colptr)[j+1]; ip++) {
536 i = (X->rowind)[ip];
537 v = (X->taucs_values)[ip];
538 x[i] += v*y[j];
539 }
540 }
541
542 taucs_free(y);
543
544 return 0;
545 }
546
547 #endif /* TAUCS_CORE_DOUBLE */
548