1 #include "mrilib.h"
2 
3 /****************************************************************************/
4 /****** Generic functions to process a sparse matrix in rcmat format. *******/
5 /****************************************************************************/
6 
7 /*--------------------------------------------------------------------------*/
8 /*! Create a rcmat struct, ready to be filled up. */
9 
rcmat_init(int n)10 rcmat * rcmat_init( int n )
11 {
12    rcmat *rcm ;
13 
14 ENTRY("rcmat_init") ;
15 
16    if( n <= 1 ) RETURN(NULL) ;
17 
18    rcm      = (rcmat *)malloc( sizeof(rcmat) ) ;
19    rcm->nrc = n ;
20    rcm->len = (LENTYP * )calloc( n , sizeof(LENTYP  ) ) ;
21    rcm->rc  = (double **)calloc( n , sizeof(double *) ) ;
22    RETURN(rcm) ;
23 }
24 
25 /*--------------------------------------------------------------------------*/
26 /*! Delete a rcmat structure. */
27 
rcmat_destroy(rcmat * rcm)28 void rcmat_destroy( rcmat *rcm )
29 {
30    int nn , ii ;
31    double **rc ;
32    LENTYP *len ;
33 
34    if( rcm == NULL ) return ;
35 
36    nn = rcm->nrc ; rc = rcm->rc ; len = rcm->len ;
37    if( rc != NULL ){
38      for( ii=0 ; ii < nn ; ii++ ) if( rc[ii] != NULL ) free((void *)rc[ii]) ;
39      free((void *)rc) ;
40    }
41    if( len != NULL ) free((void *)len) ;
42    free((void *)rcm) ;
43    return ;
44 }
45 
46 /*--------------------------------------------------------------------------*/
47 /*! Average row length in an rcmat. */
48 
rcmat_avglen(rcmat * rcm)49 float rcmat_avglen( rcmat *rcm )
50 {
51    int nn , ii ;
52    float sum=0.0f ;
53    LENTYP *len ;
54 
55    if( rcm == NULL ) return sum ;
56    nn = rcm->nrc ; len = rcm->len ; if( nn == 0 || len == NULL ) return sum ;
57    for( ii=0 ; ii < nn ; ii++ ) sum += (float)len[ii] ;
58    sum /= nn ; return sum ;
59 }
60 
61 /*--------------------------------------------------------------------------*/
62 /*! Duplicate a rcmat struct. */
63 
rcmat_copy(rcmat * rcm)64 rcmat * rcmat_copy( rcmat *rcm )
65 {
66    rcmat *qcm ;
67    int ii,nn ;
68 
69    if( !ISVALID_RCMAT(rcm) ) return NULL ;
70 
71    nn  = rcm->nrc ;
72    qcm = rcmat_init(nn) ;
73    AAmemcpy( qcm->len , rcm->len , sizeof(LENTYP)*nn ) ;
74    for( ii=0 ; ii < nn ; ii++ ){
75      qcm->rc[ii] = malloc( sizeof(double)*qcm->len[ii] ) ;
76      AAmemcpy( qcm->rc[ii] , rcm->rc[ii] , sizeof(double)*qcm->len[ii] ) ;
77    }
78    return qcm ;
79 }
80 
81 /*--------------------------------------------------------------------------*/
82 /*! Consider a rcmat struct as a symmetric matrix, and
83     Choleski factor it in place.  Return value is 0 if all is OK.
84     A positive return indicates the row/column that had trouble. */
85 
rcmat_choleski(rcmat * rcm)86 int rcmat_choleski( rcmat *rcm )
87 {
88    int ii,jj,kk , nn , jbot,kbot ; LENTYP *len ;
89    double sum , **rc , *rii , *rjj ;
90 
91    if( !ISVALID_RCMAT(rcm) ) return 999999999 ;
92 
93    nn  = rcm->nrc ;
94    rc  = rcm->rc ;
95    len = rcm->len ;
96 
97    for( ii=0 ; ii < nn ; ii++ ){
98      if( len[ii] == 1 ){
99        if( rc[ii][0] <= 0.0 ) return (ii+1) ;
100        rc[ii][0] = sqrt(rc[ii][0]) ; continue ;
101      }
102      jbot = ii - len[ii] + 1 ;
103      rii  = rc[ii] - jbot ;
104      for( jj=jbot ; jj <= ii ; jj++ ){
105        if( len[jj] == 1 ){
106          rii[jj] = rii[jj] / rc[jj][0] ;
107          continue ;
108        }
109        kbot = jj - len[jj] + 1 ;
110        rjj  = rc[jj] - kbot ;
111        sum  = rii[jj] ;
112        if( kbot < jbot ) kbot = jbot ;
113        for( kk=kbot ; kk < jj ; kk++ ) sum -= rii[kk]*rjj[kk] ;
114        if( jj < ii ){
115          rii[jj] = sum / rjj[jj] ;
116        } else {
117          if( sum <= 0.0 ) return (ii+1) ;
118          rii[ii] = sqrt(sum) ;
119        }
120      }
121    }
122    return 0 ;
123 }
124 
125 /*--------------------------------------------------------------------------*/
126 
rcmat_lowert_vecmul(rcmat * rcm,double * vec)127 void rcmat_lowert_vecmul( rcmat *rcm , double *vec )  /* 02 Oct 2009 */
128 {
129    int nn , jbot ; LENTYP *len ; int ii,jj ;
130    double **rc ; double *rii , sum , *vv , *uu ;
131 
132    if( !ISVALID_RCMAT(rcm) || vec == NULL ) return ;
133 
134    nn  = rcm->nrc ;
135    rc  = rcm->rc ;
136    len = rcm->len ;
137    vv  = vec ;
138    uu  = (double *)malloc(sizeof(double)*nn) ;
139 
140    for( ii=0 ; ii < nn ; ii++ ){
141      if( len[ii] == 1 ){
142        sum = rc[ii][0] * vv[ii] ;
143      } else {
144        jbot = ii - len[ii] + 1 ; rii = rc[ii] - jbot ; sum = 0.0 ;
145        for( jj=jbot ; jj <= ii ; jj++ ) sum += rii[jj]*vv[jj] ;
146      }
147      uu[ii] = sum ;
148    }
149 
150    for( ii=0 ; ii < nn ; ii++ ) vv[ii] = uu[ii] ;
151    free(uu) ;
152    return ;
153 }
154 
155 /*--------------------------------------------------------------------------*/
156 #undef  RV
157 #define RV(k) rii[k]*vv[k]
158 
159 #define UNROLL
160 
161 /*! Consider a rcmat struct as a lower triangular matrix,
162     and solve the matrix-vector equation [rcm][x] = [vec], in place. */
163 
rcmat_lowert_solve(rcmat * rcm,double * vec)164 void rcmat_lowert_solve( rcmat *rcm , double *vec )
165 {
166    int nn , jbot ; LENTYP *len ; int ii,jj ;
167    double **rc ; double *rii , sum , *vv ;
168 #ifdef UNROLL
169    int im1 ;
170 #endif
171 
172    if( !ISVALID_RCMAT(rcm) || vec == NULL ) return ;
173 
174    nn  = rcm->nrc ;
175 #if 0
176    if( nn > 999 ){ rcmat_lowert_solve_unrolled(rcm,vec) ; return ; }
177 #endif
178    rc  = rcm->rc ;
179    len = rcm->len ;
180    vv  = vec ;
181 
182    for( ii=0 ; ii < nn ; ii++ ){
183      if( len[ii] == 1 ){
184        vv[ii] = vv[ii] / rc[ii][0] ; continue ;
185      }
186      jbot = ii - len[ii] + 1 ; rii = rc[ii] - jbot ;
187      sum = vv[ii] ;
188 #ifndef UNROLL
189      for( jj=jbot ; jj < ii ; jj++ ) sum -= RV(jj) ;
190 #else
191      im1 = ii-1 ;
192      for( jj=jbot ; jj < im1 ; jj+=2 ) sum -= RV(jj)+RV(jj+1) ;
193      if( jj == im1 ) sum -= RV(jj) ;
194 #endif
195      vv[ii] = sum / rii[ii] ;
196    }
197    return ;
198 }
199 
200 /*--------------------------------------------------------------------------*/
201 /*! Consider a rcmat struct as a lower triangular matrix,
202     and solve the matrix-vector equation [rcm][x] = [vec], in place.
203     Innermost vector loop unrolled.  (Doesn't seem to speed things up?!) */
204 
rcmat_lowert_solve_unrolled(rcmat * rcm,double * vec)205 void rcmat_lowert_solve_unrolled( rcmat *rcm , double *vec )
206 {
207    int nn , jbot ; LENTYP *len ; int ii,jj,ll ;
208    double **rc ; double *rii , sum , *vv ;
209 
210    if( !ISVALID_RCMAT(rcm) || vec == NULL ) return ;
211 
212    nn  = rcm->nrc ;
213    rc  = rcm->rc ;
214    len = rcm->len ;
215    vv  = vec ;
216 
217    for( ii=0 ; ii < nn ; ii++ ){
218      ll = len[ii] ;
219      if( ll == 1 ){
220        vv[ii] = vv[ii] / rc[ii][0] ; continue ;
221      }
222      jbot = ii - ll + 1 ; rii = rc[ii] - jbot ;
223      switch( ll ){
224        default:
225          sum = vv[ii] ;
226          for( jj=jbot ; jj+1 < ii ; jj+=2 ) sum -= RV(jj)+RV(jj+1) ;
227          if( jj < ii ) sum -= RV(jj) ;
228        break ;
229        case 2:
230          sum = vv[ii]-RV(ii-1) ; break ;
231        case 3:
232          sum = vv[ii]-RV(ii-2)-RV(ii-1) ; break ;
233        case 4:
234          sum = vv[ii]-RV(ii-3)-RV(ii-2)-RV(ii-1) ; break ;
235        case 5:
236          sum = vv[ii]-RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ; break ;
237        case 6:
238          sum = vv[ii]-RV(ii-5)-RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ; break ;
239        case 7:
240          sum = vv[ii]-RV(ii-6)-RV(ii-5)-RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ; break ;
241        case 8:
242          sum = vv[ii]-RV(ii-7)-RV(ii-6)-RV(ii-5)-RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
243        break ;
244        case 9:
245          sum = vv[ii]-RV(ii-8)-RV(ii-7)-RV(ii-6)-RV(ii-5)
246                      -RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
247        break ;
248        case 10:
249          sum = vv[ii]-RV(ii-9)-RV(ii-8)-RV(ii-7)-RV(ii-6)-RV(ii-5)
250                      -RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
251        break ;
252        case 11:
253          sum = vv[ii]-RV(ii-10)-RV(ii-9)-RV(ii-8)-RV(ii-7)-RV(ii-6)-RV(ii-5)
254                      -RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
255        break ;
256        case 12:
257          sum = vv[ii]-RV(ii-11)-RV(ii-10)-RV(ii-9)
258                      -RV(ii-8)-RV(ii-7)-RV(ii-6)-RV(ii-5)
259                      -RV(ii-4)-RV(ii-3)-RV(ii-2)-RV(ii-1) ;
260        break ;
261      }
262      vv[ii] = sum / rii[ii] ;
263    }
264    return ;
265 }
266 
267 /*--------------------------------------------------------------------------*/
268 /*! Consider a rcmat struct as an upper triangular matrix,
269     and solve the matrix-vector equation [rcm][x] = [vec], in place. */
270 
rcmat_uppert_solve(rcmat * rcm,double * vec)271 void rcmat_uppert_solve( rcmat *rcm , double *vec )
272 {
273    int nn , ibot ; LENTYP *len ; int ii,jj ;
274    double **rc ; double *rjj , *vv , xj ;
275 #ifdef UNROLL
276    int jm1 ;
277 #endif
278 
279    if( !ISVALID_RCMAT(rcm) || vec == NULL ) return ;
280 
281    nn  = rcm->nrc ;
282    rc  = rcm->rc ;
283    len = rcm->len ;
284    vv  = vec ;
285 
286    for( jj=nn-1 ; jj >= 0 ; jj-- ){
287      ibot = jj - len[jj] + 1 ;
288      rjj  = rc[jj] - ibot ;
289      xj   = vv[jj] = vv[jj] / rjj[jj] ;
290 #ifndef UNROLL
291      for( ii=ibot ; ii < jj ; ii++ ) vv[ii] -= rjj[ii] * xj ;
292 #else
293      jm1 = jj-1 ;
294      for( ii=ibot ; ii < jm1 ; ii+=2 ){
295        vv[ii]   -= rjj[ii]   * xj ;
296        vv[ii+1] -= rjj[ii+1] * xj ;
297      }
298      if( ii == jm1 ) vv[ii] -= rjj[ii] * xj ;
299 #endif
300    }
301    return ;
302 }
303 
304 /*--------------------------------------------------------------------------*/
305 /* The code below is not good for use inside OpenMP, since it
306    uses static variables, and also uses malloc/free repeatedly.
307 *//*------------------------------------------------------------------------*/
308 
309 #undef  EPS
310 #define EPS 1.e-12
311 #undef  DM
312 #define DM(a,b) ((double)(a))*((double)(b))  /* double multiply */
313 
314 static int    *rbot = NULL ;  /* first nonzero entry in each column */
315 static int    *rtop = NULL ;  /* last one */
316 static double *rfac = NULL ;  /* scale factor for each column */
317 
318 /*---------------------- Destroy the above arrays. ----------------------*/
319 
free_rbotop(void)320 static void free_rbotop(void)
321 {
322   if( rbot != NULL ){ free(rbot) ; rbot = NULL ; }
323   if( rtop != NULL ){ free(rtop) ; rtop = NULL ; }
324   if( rfac != NULL ){ free(rfac) ; rfac = NULL ; }
325 }
326 
327 /*------------ Set the first/last elements and scale factors ------------*/
328 
set_rbotop(int npt,int nref,float * ref[])329 static void set_rbotop( int npt , int nref , float *ref[] )
330 {
331    int jj , ii , nerr ; float *rj ; double sum ;
332 
333    free_rbotop() ;
334    if( npt < 1 || nref < 1 || ref == NULL ) return ;
335 
336    /* find first and last nonzero entries in each column;
337       N.B.: If column #j is all zero, then rbot[j] > rtop[j].
338             However, this means that we can't do Choleski least squares. */
339 
340    rbot = (int *)malloc(sizeof(int)*nref) ;
341    for( jj=0 ; jj < nref ; jj++ ){
342      rj = ref[jj] ; if( rj == NULL ){ free_rbotop(); return; }
343      for( ii=0 ; ii < npt && rj[ii] == 0.0f ; ii++ ) ; /*nada*/
344      rbot[jj] = ii ;
345    }
346 
347    rtop = (int *)malloc(sizeof(int)*nref) ;
348    for( jj=0 ; jj < nref ; jj++ ){
349      rj = ref[jj] ;
350      for( ii=npt-1; ii >= 0 && rj[ii] == 0.0f ; ii-- ) ; /*nada */
351      rtop[jj] = ii ;
352    }
353 
354    /* compute reciprocal of L2 norm of each column;
355       any zero value will abort the calculations and kill everything */
356 
357    rfac = (double *)malloc(sizeof(double)*nref) ;
358    for( jj=0 ; jj < nref ; jj++ ){
359      rj = ref[jj] ;
360      for( sum=0.0,ii=rbot[jj] ; ii <= rtop[jj] ; ii++ ) sum += DM(rj[ii],rj[ii]) ;
361      if( sum == 0.0 ){ free_rbotop(); return; }
362      rfac[jj] = 1.0/sqrt(sum) ;
363    }
364 
365    return ;
366 }
367 
368 /*--------------------------------------------------------------------------*/
369 /*! Construct sparse normal equations. */
370 
rcmat_normeqn(int npt,int nref,float * ref[])371 rcmat * rcmat_normeqn( int npt , int nref , float *ref[] )
372 {
373    rcmat  *rcm=NULL ;
374    LENTYP *len ;
375    double **rc ;
376    int jj,kk , kbot,ibot,itop,rjb,rjt,rkb,rkt ; int ii ;
377    float *rj, *rk ;
378    double sum , *rcjj ;
379 
380 ENTRY("rcmat_normeqn") ;
381 
382    if( npt < 1 || nref < 1 || ref == NULL ) RETURN(NULL) ;
383 
384    /* find first and last nonzero entries in each column;
385       N.B.: if column #j is all zero, then rbot[j] > rtop[j] */
386 
387    set_rbotop(npt,nref,ref) ; if( rbot == NULL ) RETURN(NULL) ;
388 
389    /* create sparse matrix */
390 
391    rcm = rcmat_init( nref ) ;
392    len = rcm->len ;
393    rc  = rcm->rc ;
394 
395    /* create first row */
396 
397    jj = 0 ;
398    len[jj] = 1 ; rc[jj] = malloc(sizeof(double)) ;
399    rc[jj][0] = 1.0 + EPS ;
400 
401    /* create subsequent rows */
402 
403    for( jj=1 ; jj < nref ; jj++ ){
404      rj = ref[jj] ; rjb = rbot[jj] ; rjt = rtop[jj] ;
405 
406      for( kk=0 ; kk < jj ; kk++ ){ /* find 1st ref[kk] that overlaps ref[jj] */
407        rkb = rbot[kk] ; rkt = rtop[kk] ;
408        if( rkb > rkt || rkb > rjt || rkt < rjb ) continue ;
409        break ;
410      }
411 
412      kbot    = kk ;                           /* earliest index in jj-th row */
413      len[jj] = jj+1-kbot ;                 /* number of entries in jj-th row */
414      rc[jj]  = (double *)calloc(sizeof(double),len[jj]) ;   /* the jj-th row */
415      rcjj    = rc[jj] - kbot ;           /* shifted pointer to the jj-th row */
416 
417      for( kk=kbot ; kk < jj ; kk++ ){    /* dot product of ref[kk] & ref[jj] */
418        rkb = rbot[kk] ; rkt = rtop[kk] ; if( rkb > rkt ) continue ;  /* zero */
419        ibot = MAX(rkb,rjb) ; itop = MIN(rkt,rjt) ; rk = ref[kk] ;
420        for( sum=0.0,ii=ibot ; ii <= itop ; ii++ ) sum += DM(rj[ii],rk[ii]) ;
421        rcjj[kk] = sum * rfac[jj] * rfac[kk] ;           /* scale by L2 norms */
422      }
423 
424      rcjj[jj] = 1.0 + EPS ;          /* diagonal element, scaled by L2 norms */
425    }
426 
427 #if 0
428 fprintf(stderr,"rcmat_normeqn\n") ;
429 for( ii=0 ; ii < nref ; ii++ ){
430   fprintf(stderr,"%2d:",ii) ;
431   kk = ii - len[ii] + 1 ;
432   for( jj=0 ; jj <= ii ; jj++ )
433     fprintf(stderr," %11.4g", (jj < kk) ? 0.0 : rc[ii][jj-kk] ) ;
434   fprintf(stderr,"\n") ;
435 }
436 for( ii=0 ; ii < nref ; ii++ ){
437   fprintf(stderr,"%2d: rbot=%d rtop=%d rfac=%11.4g len=%d\n",
438           ii,rbot[ii],rtop[ii],rfac[ii],len[ii]) ;
439 }
440 #endif
441 
442    RETURN(rcm) ;
443 }
444 
445 /*--------------------------------------------------------------------------*/
446 
rcmat_lsqfit(int npt,float * far,int nref,float * ref[])447 float * rcmat_lsqfit( int npt  , float *far   ,
448                       int nref , float *ref[]  )
449 {
450    rcmat *rcm ; int ii,jj,rib,rit ; float *wt,*ri ; double *dt , sum ;
451 
452 ENTRY("rcmat_lsqfit") ;
453 
454    if( far == NULL ) RETURN(NULL) ;
455 
456    free_rbotop() ;
457 
458 STATUS("normal eqns") ;
459    rcm = rcmat_normeqn( npt , nref , ref ) ; if( rcm == NULL ) RETURN(NULL) ;
460 
461 STATUS("choleski") ;
462    ii = rcmat_choleski( rcm ) ;
463    if( ii != 0 ){
464      ERROR_message("Choleski fails in rcmat_lsqfit at row %d",ii) ;
465      free_rbotop() ; rcmat_destroy(rcm) ; RETURN(NULL) ;
466    }
467 
468 STATUS("make dt") ;
469    dt = (double *)calloc(sizeof(double),nref) ;
470    for( ii=0 ; ii < nref ; ii++ ){
471      ri = ref[ii] ; rib = rbot[ii] ; rit = rtop[ii] ;
472      for( sum=0.0,jj=rib ; jj <= rit ; jj++ ) sum += DM(far[jj],ri[jj]) ;
473      dt[ii] = sum * rfac[ii] ;
474    }
475 STATUS("solve") ;
476    rcmat_lowert_solve( rcm , dt ) ;
477    rcmat_uppert_solve( rcm , dt ) ;
478    rcmat_destroy(rcm) ;
479 
480 STATUS("copy") ;
481    wt = (float *)malloc(sizeof(float)*nref) ;
482    for( ii=0 ; ii < nref ; ii++ ) wt[ii] = dt[ii] * rfac[ii] ;
483 
484    free_rbotop() ; free(dt) ; RETURN(wt) ;
485 }
486 
487 /*--------------------------------------------------------------------------*/
488 /*! Create an rcmat struct from a set of float rows.  For each rr[ii],
489     only rr[ii][jj] for jj=0..ii (inclusive) will be accessed
490     (since an rcmat is for symmetric matrices).
491 *//*------------------------------------------------------------------------*/
492 
rcmat_from_rows(int nn,float * rr[])493 rcmat * rcmat_from_rows( int nn , float *rr[] )
494 {
495    rcmat *rcm ;
496    LENTYP *len ;
497    double **rc , *rcii ;
498    int ii , jj,jbot ;
499 
500 ENTRY("rcmat_from_columns") ;
501 
502    if( nn < 1 || rr == NULL ) RETURN(NULL) ;
503 
504    /* create sparse matrix struct */
505 
506    rcm = rcmat_init( nn ) ;
507    len = rcm->len ;
508    rc  = rcm->rc ;
509 
510    /* create first row */
511 
512    ii = 0 ;
513    len[ii] = 1 ; rc[ii] = malloc(sizeof(double)) ;
514    rc[ii][0] = (double)rr[ii][0] ;
515 
516    /* create subsequent rows */
517 
518    for( ii=1 ; ii < nn ; ii++ ){
519      for( jj=0 ; jj < ii && rr[ii][jj] == 0.0f ; jj++ ) ; /*nada*/
520      jbot    = jj ;                      /* first nonzero entry */
521      len[ii] = ii+1-jbot ;         /* number of nonzero entries */
522      rc[ii]  = (double *)calloc(sizeof(double),len[ii]) ;
523      rcii    = rc[ii] - jbot ;  /* shifted ptr to the ii-th row */
524      for( jj=jbot ; jj <= ii ; jj++ )     /* copy data into row */
525        rcii[jj] = (double)rr[ii][jj] ;
526    }
527 
528    RETURN(rcm) ;
529 }
530