1 #include "mrilib.h"
2
3 /****************************************************************************/
4 /****** Generic functions to process a sparse matrix in rcmat format. *******/
5 /****************************************************************************/
6
7 /*--------------------------------------------------------------------------*/
8 /*! Create a rcmat struct, ready to be filled up. */
9
rcmat_init(int n)10 rcmat * rcmat_init( int n )
11 {
12 rcmat *rcm ;
13
14 ENTRY("rcmat_init") ;
15
16 if( n <= 1 ) RETURN(NULL) ;
17
18 rcm = (rcmat *)malloc( sizeof(rcmat) ) ;
19 rcm->nrc = n ;
20 rcm->len = (LENTYP * )calloc( n , sizeof(LENTYP ) ) ;
21 rcm->rc = (double **)calloc( n , sizeof(double *) ) ;
22 RETURN(rcm) ;
23 }
24
25 /*--------------------------------------------------------------------------*/
26 /*! Delete a rcmat structure. */
27
rcmat_destroy(rcmat * rcm)28 void rcmat_destroy( rcmat *rcm )
29 {
30 int nn , ii ;
31 double **rc ;
32 LENTYP *len ;
33
34 if( rcm == NULL ) return ;
35
36 nn = rcm->nrc ; rc = rcm->rc ; len = rcm->len ;
37 if( rc != NULL ){
38 for( ii=0 ; ii < nn ; ii++ ) if( rc[ii] != NULL ) free((void *)rc[ii]) ;
39 free((void *)rc) ;
40 }
41 if( len != NULL ) free((void *)len) ;
42 free((void *)rcm) ;
43 return ;
44 }
45
46 /*--------------------------------------------------------------------------*/
47 /*! Average row length in an rcmat. */
48
rcmat_avglen(rcmat * rcm)49 float rcmat_avglen( rcmat *rcm )
50 {
51 int nn , ii ;
52 float sum=0.0f ;
53 LENTYP *len ;
54
55 if( rcm == NULL ) return sum ;
56 nn = rcm->nrc ; len = rcm->len ; if( nn == 0 || len == NULL ) return sum ;
57 for( ii=0 ; ii < nn ; ii++ ) sum += (float)len[ii] ;
58 sum /= nn ; return sum ;
59 }
60
61 /*--------------------------------------------------------------------------*/
62 /*! Duplicate a rcmat struct. */
63
rcmat_copy(rcmat * rcm)64 rcmat * rcmat_copy( rcmat *rcm )
65 {
66 rcmat *qcm ;
67 int ii,nn ;
68
69 if( !ISVALID_RCMAT(rcm) ) return NULL ;
70
71 nn = rcm->nrc ;
72 qcm = rcmat_init(nn) ;
73 AAmemcpy( qcm->len , rcm->len , sizeof(LENTYP)*nn ) ;
74 for( ii=0 ; ii < nn ; ii++ ){
75 qcm->rc[ii] = malloc( sizeof(double)*qcm->len[ii] ) ;
76 AAmemcpy( qcm->rc[ii] , rcm->rc[ii] , sizeof(double)*qcm->len[ii] ) ;
77 }
78 return qcm ;
79 }
80
81 /*--------------------------------------------------------------------------*/
82 /*! Consider a rcmat struct as a symmetric matrix, and
83 Choleski factor it in place. Return value is 0 if all is OK.
84 A positive return indicates the row/column that had trouble. */
85
rcmat_choleski(rcmat * rcm)86 int rcmat_choleski( rcmat *rcm )
87 {
88 int ii,jj,kk , nn , jbot,kbot ; LENTYP *len ;
89 double sum , **rc , *rii , *rjj ;
90
91 if( !ISVALID_RCMAT(rcm) ) return 999999999 ;
92
93 nn = rcm->nrc ;
94 rc = rcm->rc ;
95 len = rcm->len ;
96
97 for( ii=0 ; ii < nn ; ii++ ){
98 if( len[ii] == 1 ){
99 if( rc[ii][0] <= 0.0 ) return (ii+1) ;
100 rc[ii][0] = sqrt(rc[ii][0]) ; continue ;
101 }
102 jbot = ii - len[ii] + 1 ;
103 rii = rc[ii] - jbot ;
104 for( jj=jbot ; jj <= ii ; jj++ ){
105 if( len[jj] == 1 ){
106 rii[jj] = rii[jj] / rc[jj][0] ;
107 continue ;
108 }
109 kbot = jj - len[jj] + 1 ;
110 rjj = rc[jj] - kbot ;
111 sum = rii[jj] ;
112 if( kbot < jbot ) kbot = jbot ;
113 for( kk=kbot ; kk < jj ; kk++ ) sum -= rii[kk]*rjj[kk] ;
114 if( jj < ii ){
115 rii[jj] = sum / rjj[jj] ;
116 } else {
117 if( sum <= 0.0 ) return (ii+1) ;
118 rii[ii] = sqrt(sum) ;
119 }
120 }
121 }
122 return 0 ;
123 }
124
125 /*--------------------------------------------------------------------------*/
126
rcmat_lowert_vecmul(rcmat * rcm,double * vec)127 void rcmat_lowert_vecmul( rcmat *rcm , double *vec ) /* 02 Oct 2009 */
128 {
129 int nn , jbot ; LENTYP *len ; int ii,jj ;
130 double **rc ; double *rii , sum , *vv , *uu ;
131
132 if( !ISVALID_RCMAT(rcm) || vec == NULL ) return ;
133
134 nn = rcm->nrc ;
135 rc = rcm->rc ;
136 len = rcm->len ;
137 vv = vec ;
138 uu = (double *)malloc(sizeof(double)*nn) ;
139
140 for( ii=0 ; ii < nn ; ii++ ){
141 if( len[ii] == 1 ){
142 sum = rc[ii][0] * vv[ii] ;
143 } else {
144 jbot = ii - len[ii] + 1 ; rii = rc[ii] - jbot ; sum = 0.0 ;
145 for( jj=jbot ; jj <= ii ; jj++ ) sum += rii[jj]*vv[jj] ;
146 }
147 uu[ii] = sum ;
148 }
149
150 for( ii=0 ; ii < nn ; ii++ ) vv[ii] = uu[ii] ;
151 free(uu) ;
152 return ;
153 }
154
155 /*--------------------------------------------------------------------------*/
156 #undef RV
157 #define RV(k) rii[k]*vv[k]
158
159 #define UNROLL
160
161 /*! Consider a rcmat struct as a lower triangular matrix,
162 and solve the matrix-vector equation [rcm][x] = [vec], in place. */
163
rcmat_lowert_solve(rcmat * rcm,double * vec)164 void rcmat_lowert_solve( rcmat *rcm , double *vec )
165 {
166 int nn , jbot ; LENTYP *len ; int ii,jj ;
167 double **rc ; double *rii , sum , *vv ;
168 #ifdef UNROLL
169 int im1 ;
170 #endif
171
172 if( !ISVALID_RCMAT(rcm) || vec == NULL ) return ;
173
174 nn = rcm->nrc ;
175 #if 0
176 if( nn > 999 ){ rcmat_lowert_solve_unrolled(rcm,vec) ; return ; }
177 #endif
178 rc = rcm->rc ;
179 len = rcm->len ;
180 vv = vec ;
181
182 for( ii=0 ; ii < nn ; ii++ ){
183 if( len[ii] == 1 ){
184 vv[ii] = vv[ii] / rc[ii][0] ; continue ;
185 }
186 jbot = ii - len[ii] + 1 ; rii = rc[ii] - jbot ;
187 sum = vv[ii] ;
188 #ifndef UNROLL
189 for( jj=jbot ; jj < ii ; jj++ ) sum -= RV(jj) ;
190 #else
191 im1 = ii-1 ;
192 for( jj=jbot ; jj < im1 ; jj+=2 ) sum -= RV(jj)+RV(jj+1) ;
193 if( jj == im1 ) sum -= RV(jj) ;
194 #endif
195 vv[ii] = sum / rii[ii] ;
196 }
197 return ;
198 }
199
200 /*--------------------------------------------------------------------------*/
201 /*! Consider a rcmat struct as a lower triangular matrix,
202 and solve the matrix-vector equation [rcm][x] = [vec], in place.
203 Innermost vector loop unrolled. (Doesn't seem to speed things up?!) */
204
rcmat_lowert_solve_unrolled(rcmat * rcm,double * vec)205 void rcmat_lowert_solve_unrolled( rcmat *rcm , double *vec )
206 {
207 int nn , jbot ; LENTYP *len ; int ii,jj,ll ;
208 double **rc ; double *rii , sum , *vv ;
209
210 if( !ISVALID_RCMAT(rcm) || vec == NULL ) return ;
211
212 nn = rcm->nrc ;
213 rc = rcm->rc ;
214 len = rcm->len ;
215 vv = vec ;
216
217 for( ii=0 ; ii < nn ; ii++ ){
218 ll = len[ii] ;
219 if( ll == 1 ){
220 vv[ii] = vv[ii] / rc[ii][0] ; continue ;
221 }
222 jbot = ii - ll + 1 ; rii = rc[ii] - jbot ;
223 switch( ll ){
224 default:
225 sum = vv[ii] ;
226 for( jj=jbot ; jj+1 < ii ; jj+=2 ) sum -= RV(jj)+RV(jj+1) ;
227 if( jj < ii ) sum -= RV(jj) ;
228 break ;
229 case 2:
230 sum = vv[ii]-RV(ii-1) ; break ;
231 case 3:
232 sum = vv[ii]-RV(ii-2)-RV(ii-1) ; break ;
233 case 4:
234 sum = vv[ii]-RV(ii-3)-RV(ii-2)-RV(ii-1) ; break ;
235 case 5:
236 sum = vv[ii]-RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ; break ;
237 case 6:
238 sum = vv[ii]-RV(ii-5)-RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ; break ;
239 case 7:
240 sum = vv[ii]-RV(ii-6)-RV(ii-5)-RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ; break ;
241 case 8:
242 sum = vv[ii]-RV(ii-7)-RV(ii-6)-RV(ii-5)-RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
243 break ;
244 case 9:
245 sum = vv[ii]-RV(ii-8)-RV(ii-7)-RV(ii-6)-RV(ii-5)
246 -RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
247 break ;
248 case 10:
249 sum = vv[ii]-RV(ii-9)-RV(ii-8)-RV(ii-7)-RV(ii-6)-RV(ii-5)
250 -RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
251 break ;
252 case 11:
253 sum = vv[ii]-RV(ii-10)-RV(ii-9)-RV(ii-8)-RV(ii-7)-RV(ii-6)-RV(ii-5)
254 -RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
255 break ;
256 case 12:
257 sum = vv[ii]-RV(ii-11)-RV(ii-10)-RV(ii-9)
258 -RV(ii-8)-RV(ii-7)-RV(ii-6)-RV(ii-5)
259 -RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
260 break ;
261 }
262 vv[ii] = sum / rii[ii] ;
263 }
264 return ;
265 }
266
267 /*--------------------------------------------------------------------------*/
268 /*! Consider a rcmat struct as an upper triangular matrix,
269 and solve the matrix-vector equation [rcm][x] = [vec], in place. */
270
rcmat_uppert_solve(rcmat * rcm,double * vec)271 void rcmat_uppert_solve( rcmat *rcm , double *vec )
272 {
273 int nn , ibot ; LENTYP *len ; int ii,jj ;
274 double **rc ; double *rjj , *vv , xj ;
275 #ifdef UNROLL
276 int jm1 ;
277 #endif
278
279 if( !ISVALID_RCMAT(rcm) || vec == NULL ) return ;
280
281 nn = rcm->nrc ;
282 rc = rcm->rc ;
283 len = rcm->len ;
284 vv = vec ;
285
286 for( jj=nn-1 ; jj >= 0 ; jj-- ){
287 ibot = jj - len[jj] + 1 ;
288 rjj = rc[jj] - ibot ;
289 xj = vv[jj] = vv[jj] / rjj[jj] ;
290 #ifndef UNROLL
291 for( ii=ibot ; ii < jj ; ii++ ) vv[ii] -= rjj[ii] * xj ;
292 #else
293 jm1 = jj-1 ;
294 for( ii=ibot ; ii < jm1 ; ii+=2 ){
295 vv[ii] -= rjj[ii] * xj ;
296 vv[ii+1] -= rjj[ii+1] * xj ;
297 }
298 if( ii == jm1 ) vv[ii] -= rjj[ii] * xj ;
299 #endif
300 }
301 return ;
302 }
303
304 /*--------------------------------------------------------------------------*/
305 /* The code below is not good for use inside OpenMP, since it
306 uses static variables, and also uses malloc/free repeatedly.
307 *//*------------------------------------------------------------------------*/
308
309 #undef EPS
310 #define EPS 1.e-12
311 #undef DM
312 #define DM(a,b) ((double)(a))*((double)(b)) /* double multiply */
313
314 static int *rbot = NULL ; /* first nonzero entry in each column */
315 static int *rtop = NULL ; /* last one */
316 static double *rfac = NULL ; /* scale factor for each column */
317
318 /*---------------------- Destroy the above arrays. ----------------------*/
319
free_rbotop(void)320 static void free_rbotop(void)
321 {
322 if( rbot != NULL ){ free(rbot) ; rbot = NULL ; }
323 if( rtop != NULL ){ free(rtop) ; rtop = NULL ; }
324 if( rfac != NULL ){ free(rfac) ; rfac = NULL ; }
325 }
326
327 /*------------ Set the first/last elements and scale factors ------------*/
328
set_rbotop(int npt,int nref,float * ref[])329 static void set_rbotop( int npt , int nref , float *ref[] )
330 {
331 int jj , ii , nerr ; float *rj ; double sum ;
332
333 free_rbotop() ;
334 if( npt < 1 || nref < 1 || ref == NULL ) return ;
335
336 /* find first and last nonzero entries in each column;
337 N.B.: If column #j is all zero, then rbot[j] > rtop[j].
338 However, this means that we can't do Choleski least squares. */
339
340 rbot = (int *)malloc(sizeof(int)*nref) ;
341 for( jj=0 ; jj < nref ; jj++ ){
342 rj = ref[jj] ; if( rj == NULL ){ free_rbotop(); return; }
343 for( ii=0 ; ii < npt && rj[ii] == 0.0f ; ii++ ) ; /*nada*/
344 rbot[jj] = ii ;
345 }
346
347 rtop = (int *)malloc(sizeof(int)*nref) ;
348 for( jj=0 ; jj < nref ; jj++ ){
349 rj = ref[jj] ;
350 for( ii=npt-1; ii >= 0 && rj[ii] == 0.0f ; ii-- ) ; /*nada */
351 rtop[jj] = ii ;
352 }
353
354 /* compute reciprocal of L2 norm of each column;
355 any zero value will abort the calculations and kill everything */
356
357 rfac = (double *)malloc(sizeof(double)*nref) ;
358 for( jj=0 ; jj < nref ; jj++ ){
359 rj = ref[jj] ;
360 for( sum=0.0,ii=rbot[jj] ; ii <= rtop[jj] ; ii++ ) sum += DM(rj[ii],rj[ii]) ;
361 if( sum == 0.0 ){ free_rbotop(); return; }
362 rfac[jj] = 1.0/sqrt(sum) ;
363 }
364
365 return ;
366 }
367
368 /*--------------------------------------------------------------------------*/
369 /*! Construct sparse normal equations. */
370
rcmat_normeqn(int npt,int nref,float * ref[])371 rcmat * rcmat_normeqn( int npt , int nref , float *ref[] )
372 {
373 rcmat *rcm=NULL ;
374 LENTYP *len ;
375 double **rc ;
376 int jj,kk , kbot,ibot,itop,rjb,rjt,rkb,rkt ; int ii ;
377 float *rj, *rk ;
378 double sum , *rcjj ;
379
380 ENTRY("rcmat_normeqn") ;
381
382 if( npt < 1 || nref < 1 || ref == NULL ) RETURN(NULL) ;
383
384 /* find first and last nonzero entries in each column;
385 N.B.: if column #j is all zero, then rbot[j] > rtop[j] */
386
387 set_rbotop(npt,nref,ref) ; if( rbot == NULL ) RETURN(NULL) ;
388
389 /* create sparse matrix */
390
391 rcm = rcmat_init( nref ) ;
392 len = rcm->len ;
393 rc = rcm->rc ;
394
395 /* create first row */
396
397 jj = 0 ;
398 len[jj] = 1 ; rc[jj] = malloc(sizeof(double)) ;
399 rc[jj][0] = 1.0 + EPS ;
400
401 /* create subsequent rows */
402
403 for( jj=1 ; jj < nref ; jj++ ){
404 rj = ref[jj] ; rjb = rbot[jj] ; rjt = rtop[jj] ;
405
406 for( kk=0 ; kk < jj ; kk++ ){ /* find 1st ref[kk] that overlaps ref[jj] */
407 rkb = rbot[kk] ; rkt = rtop[kk] ;
408 if( rkb > rkt || rkb > rjt || rkt < rjb ) continue ;
409 break ;
410 }
411
412 kbot = kk ; /* earliest index in jj-th row */
413 len[jj] = jj+1-kbot ; /* number of entries in jj-th row */
414 rc[jj] = (double *)calloc(sizeof(double),len[jj]) ; /* the jj-th row */
415 rcjj = rc[jj] - kbot ; /* shifted pointer to the jj-th row */
416
417 for( kk=kbot ; kk < jj ; kk++ ){ /* dot product of ref[kk] & ref[jj] */
418 rkb = rbot[kk] ; rkt = rtop[kk] ; if( rkb > rkt ) continue ; /* zero */
419 ibot = MAX(rkb,rjb) ; itop = MIN(rkt,rjt) ; rk = ref[kk] ;
420 for( sum=0.0,ii=ibot ; ii <= itop ; ii++ ) sum += DM(rj[ii],rk[ii]) ;
421 rcjj[kk] = sum * rfac[jj] * rfac[kk] ; /* scale by L2 norms */
422 }
423
424 rcjj[jj] = 1.0 + EPS ; /* diagonal element, scaled by L2 norms */
425 }
426
427 #if 0
428 fprintf(stderr,"rcmat_normeqn\n") ;
429 for( ii=0 ; ii < nref ; ii++ ){
430 fprintf(stderr,"%2d:",ii) ;
431 kk = ii - len[ii] + 1 ;
432 for( jj=0 ; jj <= ii ; jj++ )
433 fprintf(stderr," %11.4g", (jj < kk) ? 0.0 : rc[ii][jj-kk] ) ;
434 fprintf(stderr,"\n") ;
435 }
436 for( ii=0 ; ii < nref ; ii++ ){
437 fprintf(stderr,"%2d: rbot=%d rtop=%d rfac=%11.4g len=%d\n",
438 ii,rbot[ii],rtop[ii],rfac[ii],len[ii]) ;
439 }
440 #endif
441
442 RETURN(rcm) ;
443 }
444
445 /*--------------------------------------------------------------------------*/
446
rcmat_lsqfit(int npt,float * far,int nref,float * ref[])447 float * rcmat_lsqfit( int npt , float *far ,
448 int nref , float *ref[] )
449 {
450 rcmat *rcm ; int ii,jj,rib,rit ; float *wt,*ri ; double *dt , sum ;
451
452 ENTRY("rcmat_lsqfit") ;
453
454 if( far == NULL ) RETURN(NULL) ;
455
456 free_rbotop() ;
457
458 STATUS("normal eqns") ;
459 rcm = rcmat_normeqn( npt , nref , ref ) ; if( rcm == NULL ) RETURN(NULL) ;
460
461 STATUS("choleski") ;
462 ii = rcmat_choleski( rcm ) ;
463 if( ii != 0 ){
464 ERROR_message("Choleski fails in rcmat_lsqfit at row %d",ii) ;
465 free_rbotop() ; rcmat_destroy(rcm) ; RETURN(NULL) ;
466 }
467
468 STATUS("make dt") ;
469 dt = (double *)calloc(sizeof(double),nref) ;
470 for( ii=0 ; ii < nref ; ii++ ){
471 ri = ref[ii] ; rib = rbot[ii] ; rit = rtop[ii] ;
472 for( sum=0.0,jj=rib ; jj <= rit ; jj++ ) sum += DM(far[jj],ri[jj]) ;
473 dt[ii] = sum * rfac[ii] ;
474 }
475 STATUS("solve") ;
476 rcmat_lowert_solve( rcm , dt ) ;
477 rcmat_uppert_solve( rcm , dt ) ;
478 rcmat_destroy(rcm) ;
479
480 STATUS("copy") ;
481 wt = (float *)malloc(sizeof(float)*nref) ;
482 for( ii=0 ; ii < nref ; ii++ ) wt[ii] = dt[ii] * rfac[ii] ;
483
484 free_rbotop() ; free(dt) ; RETURN(wt) ;
485 }
486
487 /*--------------------------------------------------------------------------*/
488 /*! Create an rcmat struct from a set of float rows. For each rr[ii],
489 only rr[ii][jj] for jj=0..ii (inclusive) will be accessed
490 (since an rcmat is for symmetric matrices).
491 *//*------------------------------------------------------------------------*/
492
rcmat_from_rows(int nn,float * rr[])493 rcmat * rcmat_from_rows( int nn , float *rr[] )
494 {
495 rcmat *rcm ;
496 LENTYP *len ;
497 double **rc , *rcii ;
498 int ii , jj,jbot ;
499
500 ENTRY("rcmat_from_columns") ;
501
502 if( nn < 1 || rr == NULL ) RETURN(NULL) ;
503
504 /* create sparse matrix struct */
505
506 rcm = rcmat_init( nn ) ;
507 len = rcm->len ;
508 rc = rcm->rc ;
509
510 /* create first row */
511
512 ii = 0 ;
513 len[ii] = 1 ; rc[ii] = malloc(sizeof(double)) ;
514 rc[ii][0] = (double)rr[ii][0] ;
515
516 /* create subsequent rows */
517
518 for( ii=1 ; ii < nn ; ii++ ){
519 for( jj=0 ; jj < ii && rr[ii][jj] == 0.0f ; jj++ ) ; /*nada*/
520 jbot = jj ; /* first nonzero entry */
521 len[ii] = ii+1-jbot ; /* number of nonzero entries */
522 rc[ii] = (double *)calloc(sizeof(double),len[ii]) ;
523 rcii = rc[ii] - jbot ; /* shifted ptr to the ii-th row */
524 for( jj=jbot ; jj <= ii ; jj++ ) /* copy data into row */
525 rcii[jj] = (double)rr[ii][jj] ;
526 }
527
528 RETURN(rcm) ;
529 }
530