1 #ifndef _CS_PV_ALREADY_INCLUDED_
2 #define _CS_PV_ALREADY_INCLUDED_
3 /******************************************************************************/
4 
5 #include "mrilib.h"
6 
7 #ifdef USE_OMP
8 # include <omp.h>
9 #endif
10 
11 #define MTH 999 /* threshold for using multiple threads in OpenMP code below */
12 #define NTH  99
13 
14 /*----------------------------------------------------------------------------*/
15 
16 static float symeig_sim1( int n  , float *asym , float *vec ,
17                           float *ws , unsigned short xran[] ) ;
18 static void  symeig_2D  ( double *a , double *e   , int dovec  ) ;
19 static void  symeig_3D  ( double *a , double *e   , int dovec  ) ;
20 
21 static float_pair symeig_sim2( int nn, float *asym, float *vec, float *wec,
22                                float *ws , unsigned short xran[] ) ;
23 
24 /*----------------------------------------------------------------------------*/
25 /* Create workspace for a principal_vector() job.  free() this when done.
26    The reason for doing it this way is to avoid a lot of malloc/free
27    cycles in 3dLocalPV.
28 *//*--------------------------------------------------------------------------*/
29 
pv_get_workspace(int n,int m)30 void * pv_get_workspace( int n , int m )
31 {
32    UAint64 nn = (UAint64)n , mm = (UAint64)m ;
33    UAint64 mmm , nb,nt ; void *ws ;
34 
35    nb  = MIN(nn,mm) ; nt = MAX(nn,mm) ;
36    mmm = nb*nb + 16*nt ;
37 #if 0
38 ININFO_message("   pv_get_workspace %lld bytes",(long long)(sizeof(float)*(size_t)mmm) ) ;
39 #endif
40    ws  = malloc( sizeof(float)*(size_t)mmm ) ;
41    return (ws) ;
42 }
43 
44 /*----------------------------------------------------------------------------*/
45 
46 /* nsym X nsym matrix element (i,j) */
47 #undef  A
48 #define A(i,j) asym[(i)+(j)*nsym]
49 
50 /* pointer to the q'th column of length nn (for q=0..mm-1) */
51 /* Method of getting pointer depends on the xtyp variable  */
52 #undef  XPT
53 #define XPT(q) ( (xtyp<=0) ? xx+(q)*nn : xar[q] )
54 
55 /*----------------------------------------------------------------------------*/
56 /*! Compute the mean vector of a set of m columns, each of length n.
57    * If xtyp <=0, the columns are stored in one big array:
58       ((float *)xp)[i+j*n] for i=0..n-1, j=0..m-1.
59    * If xtyp > 0, the columns are stored in an array of arrays:
60       ((float **)xp)[j][i]
61    * The return value is the L2-norm of the vector, and the vector is
62      stored into uvec.  The output vector is NOT normalized. (That's on you.)
63 *//*--------------------------------------------------------------------------*/
64 
mean_vector(int n,int m,int xtyp,void * xp,float * uvec)65 float mean_vector( int n , int m , int xtyp , void *xp , float *uvec )
66 {
67    UAint64 nn=n , mm=m , jj , ii ;
68    float *xj , fac,sum ; float *xx=NULL , **xar=NULL ;
69 
70 ENTRY("mean_vector") ;
71 
72    if( nn < 1 || mm < 1 || xp == NULL || uvec == NULL ) RETURN( -1.0f ) ;
73 
74    if( xtyp <= 0 ) xx  = (float * )xp ;
75    else            xar = (float **)xp ;
76 
77    for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = 0.0f ;
78 
79    for( jj=0 ; jj < mm ; jj++ ){
80      xj = XPT(jj) ;
81      for( ii=0 ; ii < nn ; ii++ ) uvec[ii] += xj[ii] ;
82    }
83 
84    fac = 1.0f / nn ; sum = 0.0f ;
85    for( ii=0 ; ii < nn ; ii++ ){ uvec[ii] *= fac; sum += uvec[ii]*uvec[ii]; }
86    RETURN( sqrtf(sum) ) ;
87 }
88 
89 /*----------------------------------------------------------------------------*/
90 /* If you can't figure out what this does without help, please
91    back away slowly from the code and take up competive toenail clipping.
92 *//*--------------------------------------------------------------------------*/
93 
is_allzero(int nf,float * ff)94 static int is_allzero( int nf , float *ff )
95 {
96    int ii ;
97    if( nf == 0 || ff == NULL ) return 1 ;
98    for( ii=0 ; ii < nf ; ii++ ) if( ff[ii] != 0.0f ) return 0 ;
99    return 1 ;
100 }
101 
102 /*----------------------------------------------------------------------------*/
103 /*! Compute the principal singular vector of a set of m columns, each
104     of length n (normally, n = time series length, m = number of voxels).
105    * If xtyp <=0, the columns are stored in one big array:
106       ((float *)xp)[i+j*n] for i=0..n-1, j=0..m-1.
107    * If xtyp > 0, the columns are stored in an array of arrays:
108       ((float **)xp)[j][i]
109    * If ws is not NULL, then workspace will be allocated and discarded
110      internally. The point of ws is that it can be pre-allocated by
111      pv_get_workspace() and then re-used, avoiding the overhead of
112      many malloc/free cycles in 3dLocalPV (e.g.).
113    * The singular value is returned, and the vector is stored into uvec[].
114    * tvec is a vector so that the sign of uvec dot tvec will be non-negative.
115    * If the return value is not positive, something ugly happened.
116 *//*--------------------------------------------------------------------------*/
117 
principal_vector(int n,int m,int xtyp,void * xp,float * uvec,float * tvec,float * ws,unsigned short xran[])118 float principal_vector( int n , int m , int xtyp , void *xp ,
119                                 float *uvec , float *tvec ,
120                                 float *ws , unsigned short xran[] )
121 {
122    UAint64 nn=n , mm=m , nsym , jj,kk,qq , ii ;
123    float *asym ;
124    float sum,qsum ; float *xj,*xk,*xi ;
125    float sval , *xx=NULL , **xar=NULL ;
126    float *wws=ws ; UAint64 nws=0 ;
127 
128    nsym = MIN(nn,mm) ;  /* size of the symmetric matrix to create */
129 
130    if( nsym < 1 || xp == NULL || uvec == NULL ) return (-666.0f) ;
131 
132    if( xtyp <= 0 ) xx  = (float * )xp ;  /* later will create a local xar */
133    else            xar = (float **)xp ;
134 
135    if( nsym == 1 ){  /*----- trivial case -----*/
136 
137      if( mm == 1 ){  /* really trivial: only 1 vector */
138        xj = XPT(0) ; qsum = sum = 0.0f ;
139        for( ii=0; ii < nn; ii++ ){
140          uvec[ii] = xj[ii] ; qsum += uvec[ii]*uvec[ii] ;
141          if( tvec != NULL ) sum += tvec[ii]*uvec[ii] ;
142        }
143        sval = sqrtf(qsum) ;
144        if( sval > 0.0f ){
145          qsum = 1.0f / sval ;
146          if( sum < 0.0f ) qsum = -qsum ;
147          for( ii=0 ; ii < nn ; ii++ ) uvec[ii] *= qsum ;
148        } else {
149          qsum = sqrtf(1.0f/nn) ;
150          for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = qsum ;
151        }
152      } else {  /* nn==1 and have mm > 1 such 'vectors' */
153        uvec[0] = (tvec != NULL && tvec[0] < 0.0f) ? -1.0f : 1.0f ;
154        for( sval=0.0f,ii=0 ; ii < mm ; ii++ ){
155          xj = XPT(ii) ; sval += xj[0]*xj[0] ;
156        }
157        sval = sqrtf(sval) ;
158      }
159      return (sval) ;
160 
161    } /*----- end of trivial case -----*/
162 
163    if( xtyp <= 0 ){  /* create xar for this storage type */
164      xar = (float **)malloc(sizeof(float *)*mm) ;
165      for( ii=0 ; ii < mm ; ii++ ) xar[ii] = XPT(ii) ;
166    }
167 
168    /* need locally allocated workspace? */
169 
170    if( wws == NULL ) wws = pv_get_workspace(nn,mm) ;
171 
172    asym = wws ; nws = nsym*nsym ;  /* symmetric matrix A, in workspace */
173                                    /* nws = how much of wws we've used */
174 
175    /** setup matrix to eigensolve: choose smaller of [X]'[X] and [X][X]' **/
176    /**     since [X] is n x m, [X]'[X] is m x m and [X][X]' is n x n     **/
177 
178    if( nn > mm ){                       /* more rows than columns:  */
179                                         /* so [A] = [X]'[X] = m x m */
180                                         /* in FMRI, this would be unusual */
181      for( jj=0 ; jj < mm ; jj++ ){
182        xj = xar[jj] ;
183        for( kk=0 ; kk <= jj ; kk++ ){
184          xk = xar[jj] ;
185          for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += xj[ii]*xk[ii] ;
186          A(jj,kk) = sum ; if( kk < jj ) A(kk,jj) = sum ;
187        }
188      }
189 
190    } else {                             /* more columns than rows:  */
191                                         /* so [A] = [X][X]' = n x n */
192                                         /* in FMRI, this would be common: */
193                                         /* more voxels (m) than time points (n) */
194 
195      memset( asym , 0 , sizeof(float)*nsym*nsym ) ; /* zero out A=asym matrix */
196 
197 #ifdef USE_OMP
198 #pragma omp parallel if( mm > MTH && nn > NTH )
199  {
200 #pragma omp master
201   { int nthr = omp_get_num_threads() ;
202     if( mm > MTH && nn > NTH )
203       ININFO_message(" PV: nn=%lld < mm=%lld -- using %d threads to compute A matrix",
204                      (long long)nn , (long long)mm , nthr    ) ;
205  } }
206 #endif
207 
208      for( ii=0 ; ii < mm ; ii++ ){  /* loop over columns = voxels */
209        xi = xar[ii] ;
210 
211        if( mm > MTH && nn > NTH && ii > 0 && ii%10000 == 0 )
212          ININFO_message("  start sums using column/voxel ii=%lld",(long long)ii) ;
213 
214 AFNI_OMP_START ;
215 #pragma omp parallel if( mm > MTH && nn > NTH )
216 {      UAint64 jj,kk,qq , qtop ;
217        qtop = nn*(nn+1)/2 ;
218        /* In the code below:
219             qq = kk + jj*(jj+1)/2 = lower triangular index in symmetric matrix
220                                             kk=0,1 ->
221                                      jj=0 [ 0       ]
222                                      jj=1 [ 1 2     ]
223                                           [ 3 4 5   ]
224                                           [ 6 7 8 9 ]
225            and we want to compute (jj,kk) given qq -- the reason for this
226            convoluted code is to allow this loop to be parallelized, and
227            for that to happen, it has to be (1) a single loop - not a double
228            loop over (jj,kk), and (2) it has to have a pre-computable number
229            of iterates (qtop) determinable by the OpenMP software.
230            * The result is jj = [ sqrt(8*qq+1)-1 ] / 2 and then kk follows
231            * For example, qq=7: sqrt(7*8+1) - 1 = 6.54983
232                                 ditto / 2       = 3.27492
233                                 floor(ditto)    = 3 = correct row index
234            * Twisted counting logic for (hopeful) efficiency when there's
235              a lot of computation going on (mm and nn big). [RWC - 29 Nov 2021]
236            * This method is probably slower for moderate
237              nn and mm, but then it is so fast that who cares?
238        */
239 #pragma omp for
240        for( qq=0 ; qq < qtop ; qq++ ){ /* loop over (jj,kk): lower triangle */
241          jj = (UAint64)floor((sqrt(8.0*(double)qq+1.000001)-0.999999)*0.5) ;
242          kk = qq - jj*(jj+1)/2 ;
243          A(jj,kk) += xi[jj]*xi[kk] ;
244        }
245 }
246 AFNI_OMP_END ;
247      }
248      for( jj=0 ; jj < nn ; jj++ ){           /* reflect result to upper triangle */
249        for( kk=0 ; kk < jj ; kk++ ) A(kk,jj) = A(jj,kk) ;
250      }
251 
252    }
253 
254    if( is_allzero(nsym*nsym,asym) ){
255      for( jj=0 ; jj < nn ; jj++ ) uvec[jj] = 0.0f ;
256      if( xtyp <= 0 ) free(xar) ;
257      ININFO_message("   PV -- %d x %d matrix is all zero :(",nsym,nsym) ;
258      return 0.0f ;
259    }
260 
261    /** SVD is [X] = [U] [S] [V]', where [U] = desired output vectors
262 
263        case n <= m: [A] = [X][X]' = [U] [S][S]' [U]'
264                     so [A][U] = [U] [S][S]'
265                     so eigenvectors of [A] are just [U]
266 
267        case n > m:  [A] = [X]'[X] = [V] [S]'[S] [V]'
268                     so [A][V] = [V] [S'][S]
269                     so eigenvectors of [A] are [V], but we want [U]
270                     note that [X][V] = [U] [S]
271                     so pre-multiplying each column vector in [V] by matrix [X]
272                     will give the corresponding column in [U], but scaled;
273                     below, just L2-normalize the column to get output vector **/
274 
275    if( nn <= mm ){                   /* copy eigenvector into output directly */
276                                      /* (e.g., more voxels than time points)  */
277 
278      (void)mean_vector( nsym , nsym , 0 , asym , uvec ) ;  /* initialize=mean */
279      sval = symeig_sim1( (int)nsym , asym , uvec , wws+nws , xran ) ;
280 
281    } else {       /* n > m: transform eigenvector to get left singular vector */
282                   /* (e.g., more time points than voxels) */
283 
284      float *qvec ;
285 
286      qvec = wws + nws ; nws += nsym ; /* qvec = mean vector, in workspace */
287      (void)mean_vector( nsym , nsym , 0 , asym , qvec ) ;  /* initialize=mean */
288      sval = symeig_sim1( (int)nsym , asym , qvec , wws+nws , xran ) ;
289      for( qsum=0.0f,ii=0 ; ii < nn ; ii++ ){
290        for( sum=0.0f,kk=0 ; kk < mm ; kk++ ) sum += xar[kk][ii] * qvec[kk] ;
291        uvec[ii] = sum ; qsum += sum*sum ;
292      }
293      if( qsum > 0.0f ){       /* L2 normalize */
294        sum = 1.0f / sqrtf(qsum) ;
295        for( ii=0 ; ii < nn ; ii++ ) uvec[ii] *= sum ;
296      }
297      nws -= nsym ;
298    }
299 
300    /** make it so that uvec dotted into tvec is positive **/
301 
302    if( tvec != NULL ){
303      for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += tvec[ii]*uvec[ii] ;
304      if( sum < 0.0f ){
305        for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = -uvec[ii] ;
306      }
307    }
308 
309    /** free at last!!! **/
310 
311    if( xtyp <= 0 ) free(xar) ;
312    if( wws != ws ) free(wws) ;
313 
314    return (sqrtf(sval)) ;
315 }
316 
317 /*----------------------------------------------------------------------------*/
318 /* Carry out simultaneous iteration (generalized power iteration) on an
319    nn X nn matrix stored in asym, with the initial vector stored in vec,
320    and the final vector stored there.  Goal is to compute the dominant
321    eigenvalue/vector -- the return value is the eigenvalue.  Three vectors
322    are iterated to speed up the convergence.
323 *//*--------------------------------------------------------------------------*/
324 
325 #undef  FEPS
326 #define FEPS 0.000000123456f  /* convergence test */
327 
328 #undef  DEPS
329 #undef  DEPSQ
330 #define DEPS  1.e-8
331 #define DEPSQ 1.e-4   /* sqrt(DEPS) */
332 
symeig_sim1(int nn,float * asym,float * vec,float * ws,unsigned short xran[])333 static float symeig_sim1( int nn , float *asym , float *vec , float *ws ,
334                           unsigned short xran[] )
335 {
336    float *u1,*u2,*u3 , *v1,*v2,*v3 , *aj ;
337    float sum1,sum2,sum3 , q1,q2,q3 , r1,r2,r3 , s1,s2,s3 ;
338    int ii , jj , nite=0 , n=nn ;
339    double bb[9] , ev[3] , evold=0.0 ;
340    double g11,g12,g13,g22,g23,g33 ;
341    double h11,h12,h13,h22,h23,h33 ;
342 
343    if( n < 1 || asym == NULL || vec == NULL ) return -666.0f ;
344 
345    /* special cases: 1x1 and 2x2 and 3x3 matrices */
346 
347    if( n == 1 ){
348      vec[0] = 1 ; return asym[0] ;
349    } else if( n == 2 ){
350      for( ii=0 ; ii < 4 ; ii++ ) bb[ii] = asym[ii] ;
351      symeig_2D( bb , ev , 1 ) ;
352      vec[0] = bb[0] ; vec[1] = bb[1] ; return ev[0] ;
353    } else if( n == 3 ){
354      for( ii=0 ; ii < 9 ; ii++ ) bb[ii] = asym[ii] ;
355      symeig_3D( bb , ev , 1 ) ;
356      vec[0] = bb[0] ; vec[1] = bb[1] ; vec[2] = bb[2] ; return ev[0] ;
357    }
358 
359    /* allocate iteration vectors in the workspace ws */
360 
361    u1 = ws ; u2 = u1+n ; u3 = u2+n ; v1 = u3+n ; v2 = v1+n ; v3 = v2+n ;
362 
363    /* initialize u1 vector from input vec */
364 
365    for( sum1=0.0f,ii=0 ; ii < n ; ii++ ){
366      u1[ii] = vec[ii] ; sum1 += u1[ii]*u1[ii] ;
367    }
368    if( sum1 == 0.0f ){  /* backup if input vector is all zero */
369      for( ii=0 ; ii < n ; ii++ ){
370        u1[ii] = erand48(xran)-0.3 ; sum1 += u1[ii]*u1[ii] ;
371      }
372    }
373    sum1 = 1.0f / sqrtf(sum1) ;
374    for( ii=0 ; ii < n ; ii++ ) u1[ii] *= sum1 ;
375 
376    /* initialize u2, u3 by flipping signs in u1 and adding in some junk*/
377 
378    sum1 = 0.02468f / n ;
379    for( ii=0 ; ii < n ; ii++ ){
380      jj   = (int)jrand48(xran) ;
381      sum2 = sum1 * ((jj >> 1)%4 - 1.5f) ;
382      sum3 = sum1 * ((jj >> 5)%4 - 1.5f) ;
383      u2[ii] = (((jj >> 3)%2 == 0) ? u1[ii] : -u1[ii]) + sum2 ;
384      u3[ii] = (((jj >> 7)%2 == 0) ? u1[ii] : -u1[ii]) + sum3 ;
385    }
386 
387    /*----- iteration loop -----*/
388 
389    while(1){
390      /* form V = A U */
391 
392      for( ii=0 ; ii < n ; ii++ ){ v1[ii] = v2[ii] = v3[ii] = 0.0f ; }
393      for( jj=0 ; jj < n ; jj++ ){
394        aj = asym + jj*n ;            /* ptr to jj-th column of asym */
395        sum1 = u1[jj] ; sum2 = u2[jj] ; sum3 = u3[jj] ;
396        for( ii=0 ; ii < n ; ii++ ){
397          v1[ii] += aj[ii] * sum1 ;
398          v2[ii] += aj[ii] * sum2 ;
399          v3[ii] += aj[ii] * sum3 ;
400        }
401      }
402 
403      /* form G = U' U   and   H = U' V */
404 
405      g11 = g12 = g22 = g13 = g23 = g33 = 0.0 ;
406      h11 = h12 = h22 = h13 = h23 = h33 = 0.0 ;
407      for( ii=0 ; ii < n ; ii++ ){
408        g11 += u1[ii] * u1[ii] ; g12 += u1[ii] * u2[ii] ;
409        g22 += u2[ii] * u2[ii] ; g13 += u1[ii] * u3[ii] ;
410        g23 += u2[ii] * u3[ii] ; g33 += u3[ii] * u3[ii] ;
411        h11 += u1[ii] * v1[ii] ; h12 += u1[ii] * v2[ii] ;
412        h22 += u2[ii] * v2[ii] ; h13 += u1[ii] * v3[ii] ;
413        h23 += u2[ii] * v3[ii] ; h33 += u3[ii] * v3[ii] ;
414      }
415 
416      /* Choleski-ize G = L L' (in place) */
417 
418      g11 = (g11 <= 0.0) ? DEPSQ : sqrt(g11) ;
419      g12 = g12 / g11 ;
420      g22 = g22 - g12*g12 ; g22 = (g22 <= 0.0) ? DEPSQ : sqrt(g22) ;
421      g13 = g13 / g11 ;
422      g23 = (g23 - g12*g13) / g22 ;
423      g33 = g33 - g13*g13 * g23*g23 ; g33 = (g33 <= 0.0) ? DEPSQ : sqrt(g33) ;
424 
425      /* invert lower triangular L (in place) */
426 
427      g13 = ( g12*g23 - g22*g13 ) / (g11*g22*g33) ;
428      g11 = 1.0 / g11 ; g22 = 1.0 / g22 ; g33 = 1.0 / g33 ;
429      g23 = -g23 * g33 * g22 ;
430      g12 = -g12 * g11 * g22 ;
431 
432      /* form B = inv[L] H inv[L]' (code from Maple 9.5) */
433 
434     { double t1, t3, t5, t13, t18, t34, t42, t47 ;
435       t1  = g11 * g11 ; t3  = g11 * h11 ; t5  = g11 * h12 ;
436       t13 = g12 * g12 ; t18 = g22 * g22 ; t34 = g13 * g13 ;
437       t42 = g23 * g23 ; t47 = g33 * g33 ;
438       bb[0] = t1 * h11 ;
439       bb[4] = t13 * h11 + 2.0 * g12 * g22 * h12 + t18 * h22 ;
440       bb[8] = t34 * h11 + 2.0 * g13 * g23 * h12 + 2.0 * g13 * g33 * h13
441             + t42 * h22 + 2.0 * g23 * g33 * h23 + t47 * h33 ;
442       bb[1] = bb[3] = t3 * g12 + t5 * g22 ;
443       bb[2] = bb[6] = t3 * g13 + t5 * g23 + g11 * h13 * g33 ;
444       bb[5] = bb[7] = g13 * g12 * h11 + g13 * g22 * h12 + g23 * g12 * h12
445                     + g23 * g22 * h22 + g33 * g12 * h13 + g33 * g22 * h23 ;
446     }
447 
448      /* eigensolve B P  = P D */
449 
450      symeig_3D( bb , ev , 1 ) ;  /* P is in bb, D is in ev */
451 
452 #if 0
453 fprintf(stderr,"nite=%d  ev=%.9g %.9g %.9g\n",nite,ev[0],ev[1],ev[2]) ;
454 fprintf(stderr,"         bb=%.5g %.5g %.5g\n"
455                "            %.5g %.5g %.5g\n"
456                "            %.5g %.5g %.5g\n",
457         bb[0],bb[1],bb[2],bb[3],bb[4],bb[5],bb[6],bb[7],bb[8]) ;
458 #endif
459 
460      /* form Q = inv[L]' P */
461 
462      q1 = g11*bb[0] + g12*bb[1] + g13*bb[2] ;
463      q2 =             g22*bb[1] + g23*bb[2] ;
464      q3 =                         g33*bb[2] ;
465 
466      /* form U = V Q and normalize it */
467 
468      for( sum1=0.0f,ii=0 ; ii < n ; ii++ ){
469        u1[ii] = q1 * v1[ii] + q2 * v2[ii] + q3 * v3[ii]; sum1 += u1[ii]*u1[ii];
470      }
471      sum1 = 1.0f / sqrtf(sum1) ;
472      for( ii=0 ; ii < n ; ii++ ) u1[ii] *= sum1 ;
473 
474      /* check first eigenvalue in D for convergence */
475 
476      if( nite > 266 ||
477          ( nite > 0 && fabs(ev[0]-evold) <= FEPS*(fabs(evold)+FEPS) ) ) break ;
478 
479      /* not done yet ==> iterate */
480 
481      nite++ ; evold = ev[0] ;
482 
483      /* finish u2 and u3 vectors in U */
484 
485      r1 = g11*bb[3] + g12*bb[4] + g13*bb[5] ;
486      r2 =             g22*bb[4] + g23*bb[5] ;
487      r3 =                         g33*bb[5] ;
488 
489      s1 = g11*bb[6] + g12*bb[7] + g13*bb[8] ;
490      s2 =             g22*bb[7] + g23*bb[8] ;
491      s3 =                         g33*bb[8] ;
492 
493      for( sum2=sum3=0.0f,ii=0 ; ii < n ; ii++ ){
494        u2[ii] = r1 * v1[ii] + r2 * v2[ii] + r3 * v3[ii]; sum2 += u2[ii]*u2[ii];
495        u3[ii] = s1 * v1[ii] + s2 * v2[ii] + s3 * v3[ii]; sum3 += u3[ii]*u3[ii];
496      }
497      sum2 = 1.0f / sqrtf(sum2) ; sum3 = 1.0f / sqrtf(sum3) ;
498      for( ii=0 ; ii < n ; ii++ ){ u2[ii] *= sum2 ; u3[ii] *= sum3 ; }
499 
500    } /*----- end of iteration loop -----*/
501 
502    /* result vector is u1 */
503 
504    for( ii=0 ; ii < n ; ii++ ) vec[ii] = u1[ii] ;
505 
506 #if 0
507    { static int first=1 ;
508      if( first ){ fprintf(stderr,"nite=%d nsym=%d\n",nite,n) ; first=0 ; }
509    }
510 #endif
511 
512    return (float)ev[0] ;
513 }
514 
515 /*----------------------------------------------------------------------------*/
516 /*! Compute the first 2 principal singular vectors of a set of m columns,
517     each of length n.
518    * If xtyp <=0, the columns are stored in one big array:
519       ((float *)xp)[i+j*n] for i=0..n-1, j=0..m-1.
520    * If xtyp > 0, the columns are stored in an array of arrays:
521       ((float **)xp)[j][i]
522    * The singular value pair is returned, and the vectors are stored into
523        uvec[] and vvec[]
524    * tvec is a vector so that the sign of uvec dot tvec will be non-negative.
525    * If the two return values are not positive, something ugly happened.
526    * This function is very similar to principal_vector(), with a few
527      changes to deal with the desire for TWO outputs rather than ONE.
528 *//*--------------------------------------------------------------------------*/
529 
principal_vector_pair(int n,int m,int xtyp,void * xp,float * uvec,float * vvec,float * tvec,float * ws,unsigned short xran[])530 float_pair principal_vector_pair( int n , int m , int xtyp , void *xp ,
531                                   float *uvec, float *vvec, float *tvec,
532                                   float *ws , unsigned short xran[] )
533 {
534    UAint64 nn=(UAint64)n , mm=(UAint64)m , nsym , jj,kk,qq , ii ;
535    float *asym ;
536    float sum,qsum ; float *xi,*xj,*xk ;
537    float sval , *xx=NULL , **xar=NULL ;
538    float_pair svout = {-666.0f,-666.0f} ;
539    float *wws=(float *)ws ; int64_t nws=0 ;
540 
541 ENTRY("principal_vector_pair") ;
542 
543    nsym = MIN(nn,mm) ;  /* size of the symmetric matrix to create */
544 
545    if( nsym < 1 || xp == NULL || uvec == NULL || vvec == NULL ) RETURN (svout);
546 
547    if( xtyp <= 0 ) xx  = (float * )xp ;
548    else            xar = (float **)xp ;
549 
550    if( nsym == 1 ){  /*----- trivial case -----*/
551 
552      for( ii=0 ; ii < nn ; ii++ ) vvec[ii] = 0.0f ; /* nothing to do here */
553 
554      if( mm == 1 ){  /* really trivial: only 1 vector */
555        xj = XPT(0) ; qsum = sum = 0.0f ;
556        for( ii=0; ii < nn; ii++ ){
557          uvec[ii] = xj[ii] ; qsum += uvec[ii]*uvec[ii] ;
558          if( tvec != NULL ) sum += tvec[ii]*uvec[ii] ;
559        }
560        sval = sqrtf(qsum) ;
561        if( sval > 0.0f ){
562          qsum = 1.0f / sval ; if( sum < 0.0f ) qsum = -qsum ;
563          for( ii=0 ; ii < nn ; ii++ ) uvec[ii] *= qsum ;
564        } else {
565          qsum = sqrtf(1.0f/nn) ;
566          for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = qsum ;
567        }
568      } else {  /* nn==1 and have mm > 1 such 'vectors' */
569        uvec[0] = (tvec != NULL && tvec[0] < 0.0f) ? -1.0f : 1.0f ;
570        for( sval=0.0f,ii=0 ; ii < mm ; ii++ ){
571          xj = XPT(ii) ; sval += xj[0]*xj[0] ;
572        }
573        sval = sqrtf(sval) ;
574      }
575 
576      svout.a = sval ; svout.b = 0.0f ; RETURN (svout) ;
577 
578    } /*----- end of trivial case -----*/
579 
580    if( xtyp <= 0 ){  /* create xar for this storage type */
581      xar = (float **)malloc(sizeof(float *)*mm) ;
582      for( ii=0 ; ii < mm ; ii++ ) xar[ii] = XPT(ii) ;
583    }
584 
585    if( wws == NULL ) wws = (float *)pv_get_workspace(nn,mm) ;
586 
587    asym = wws ; nws = nsym*nsym ;  /* symmetric matrix */
588 
589    /** setup matrix to eigensolve: choose smaller of [X]'[X] and [X][X]' **/
590    /**     since [X] is n x m, [X]'[X] is m x m and [X][X]' is n x n     **/
591 
592    if( nn > mm ){                       /* more rows than columns:  */
593                                         /* so [A] = [X]'[X] = m x m */
594      for( jj=0 ; jj < mm ; jj++ ){
595        xj = xar[jj] ;
596        for( kk=0 ; kk <= jj ; kk++ ){
597          xk = xar[kk] ;
598          for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += xj[ii]*xk[ii] ;
599          A(jj,kk) = sum ; if( kk < jj ) A(kk,jj) = sum ;
600        }
601      }
602 
603    } else {                             /* more columns than rows:  */
604                                         /* so [A] = [X][X]' = n x n */
605 
606      memset( asym , 0 , sizeof(float)*nsym*nsym ) ; /* zero out A=asym matrix */
607 
608 #ifdef USE_OMP
609 #pragma omp parallel if( mm > MTH && nn > NTH )
610  {
611 #pragma omp master
612   { int nthr = omp_get_num_threads() ;
613     if( mm > MTH && nn > NTH )
614       ININFO_message(" PVpair: nn=%lld < mm=%lld -- using %d threads to compute A matrix",
615                      (long long)nn , (long long)mm , nthr    ) ;
616  } }
617 #endif
618 
619      for( ii=0 ; ii < mm ; ii++ ){
620        xi = xar[ii] ;
621 
622        if( mm > MTH && nn > NTH && ii > 0 && ii%10000 == 0 )
623          ININFO_message("  start sums using column/voxel ii=%lld",(long long)ii) ;
624 
625 AFNI_OMP_START ;
626 #pragma omp parallel if( mm > MTH && nn > NTH )
627 {      UAint64 jj,kk,qq , qtop ;
628        qtop = nn*(nn+1)/2 ;
629        /* In the code below:
630             qq = kk + jj*(jj+1)/2 = lower triangular index in symmetric matrix
631                                             kk=0,1 ->
632                                      jj=0 [ 0       ]
633                                      jj=1 [ 1 2     ]
634                                           [ 3 4 5   ]
635                                           [ 6 7 8 9 ]
636            and we want to compute (jj,kk) given qq -- the reason for this
637            convoluted code is to allow this loop to be parallelized, and
638            for that to happen, it has to be (1) a single loop - not a double
639            loop over (jj,kk), and (2) it has to have a pre-computable number
640            of iterates (qtop) determinable by the OpenMP software.
641            * The result is jj = [ sqrt(8*qq+1)-1 ] / 2 and then kk follows
642            * For example, qq=7: sqrt(7*8+1) - 1 = 6.54983
643                                 ditto / 2       = 3.27492
644                                 floor(ditto)    = 3 = correct row index
645            * Twisted counting logic for (hopeful) efficiency when there's
646              a lot of computation going on (mm and nn big). [RWC - 29 Nov 2021]
647        */
648 #pragma omp for
649        for( qq=0 ; qq < qtop ; qq++ ){ /* loop over (jj,kk): lower triangle */
650          jj = (UAint64)floor((sqrt(8.0*(double)qq+1.000001)-0.999999)*0.5) ;
651          kk = qq - jj*(jj+1)/2 ;
652          A(jj,kk) += xi[jj]*xi[kk] ;
653        }
654 }
655 AFNI_OMP_END ;
656      }
657      for( jj=0 ; jj < nn ; jj++ ){           /* reflect result to upper triangle */
658        for( kk=0 ; kk < jj ; kk++ ) A(kk,jj) = A(jj,kk) ;
659      }
660 
661    } /* at this point, asym[] contains the symmetric matrix */
662 
663    if( is_allzero(nsym*nsym,asym) ){
664      for( jj=0 ; jj < nn ; jj++ ) uvec[jj] = vvec[jj] = 0.0f ;
665      svout.a = svout.b = 0.0f ;
666      ININFO_message("   PVpair -- %d x %d matrix is all zero :(",nsym,nsym) ;
667      if( xtyp <= 0 ) free(xar) ;
668      RETURN( svout );
669    }
670 
671    /** SVD is [X] = [U] [S] [V]', where [U] = desired output vectors
672 
673        case n <= m: [A] = [X][X]' = [U] [S][S]' [U]'
674                     so [A][U] = [U] [S][S]'
675                     so eigenvectors of [A] are just [U]
676 
677        case n > m:  [A] = [X]'[X] = [V] [S]'[S] [V]'
678                     so [A][V] = [V] [S'][S]
679                     so eigenvectors of [A] are [V], but we want [U]
680                     note that [X][V] = [U] [S]
681                     so pre-multiplying each column vector in [V] by matrix [X]
682                     will give the corresponding column in [U], but scaled;
683                     below, just L2-normalize the column to get output vector **/
684 
685    if( nn <= mm ){                    /* copy eigenvector into output directly */
686                                       /* (e.g., more vectors than time points) */
687 
688      (void)mean_vector( nsym , nsym , 0 , asym , uvec ) ;
689      svout = symeig_sim2( (int)nsym , asym , uvec , vvec , wws+nws , xran ) ;
690 
691    } else {  /* n > m: transform eigenvector to get left singular vector */
692              /* (e.g., more time points than vectors) */
693 
694      float *qvec , *rvec , rsum , ssum ;
695 
696      qvec = wws + nws ; nws += nsym ;
697      rvec = wws + nws ; nws += nsym ;
698      (void)mean_vector( nsym , nsym , 0 , asym , qvec ) ;
699      svout = symeig_sim2( (int)nsym , asym , qvec , rvec , wws+nws , xran ) ;
700      for( rsum=qsum=0.0f,ii=0 ; ii < nn ; ii++ ){
701        ssum = sum = 0.0f ;
702        for( kk=0 ; kk < mm ; kk++ ){
703           sum += xar[kk][ii] * qvec[kk] ;
704          ssum += xar[kk][ii] * rvec[kk] ;
705        }
706        uvec[ii] = sum ; vvec[ii] = ssum ; qsum += sum*sum ; rsum += ssum*ssum ;
707      }
708      if( qsum > 0.0f ){       /* L2 normalize uvec */
709        sum = 1.0f / sqrtf(qsum) ;
710        for( ii=0 ; ii < nn ; ii++ ) uvec[ii] *= sum ;
711      }
712      if( rsum > 0.0f ){       /* L2 normalize vvec */
713        sum = 1.0f / sqrtf(rsum) ;
714        for( ii=0 ; ii < nn ; ii++ ) vvec[ii] *= sum ;
715      }
716      nws -= 2*nsym ;
717    }
718 
719    /** make it so that uvec dotted into the mean vector is positive **/
720 
721    if( tvec != NULL ){
722      for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += tvec[ii]*uvec[ii] ;
723      if( sum < 0.0f ){
724        for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = -uvec[ii] ;
725      }
726      for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += tvec[ii]*vvec[ii] ;
727      if( sum < 0.0f ){
728        for( ii=0 ; ii < nn ; ii++ ) vvec[ii] = -vvec[ii] ;
729      }
730    }
731 
732    /** free at last!!! **/
733 
734    if( wws != ws ) free(wws) ;
735    if( xtyp <= 0 ) free(xar) ;
736 
737    svout.a = sqrtf(svout.a) ; svout.b = sqrtf(svout.b) ; RETURN (svout) ;
738 }
739 
740 /*---------------------------------------------------------------------------*/
741 /* Same as symeig_sim1() but converge to top TWO eigenvectors. */
742 
symeig_sim2(int nn,float * asym,float * vec,float * wec,float * ws,unsigned short xran[])743 static float_pair symeig_sim2( int nn, float *asym, float *vec,
744                                float *wec, float *ws , unsigned short xran[] )
745 {
746    float *u1,*u2,*u3 , *v1,*v2,*v3 , *aj ;
747    float sum1,sum2,sum3 , q1,q2,q3 , r1,r2,r3 , s1,s2,s3 ;
748    int ii , jj , nite=0 , n=nn ;
749    double bb[9] , ev[3] , evold1=0.0 , evold2=0.0 ;
750    double g11,g12,g13,g22,g23,g33 ;
751    double h11,h12,h13,h22,h23,h33 ;
752    float_pair evout = {-666.0f,-666.0f} ;
753 
754    if( n < 1 || asym == NULL || vec == NULL || wec == NULL ) return (evout) ;
755 
756    /* special cases: 1x1 and 2x2 and 3x3 matrices */
757 
758    if( n == 1 ){
759      vec[0] = 1.0f ; wec[0] = 0.0f ;
760      evout.a = asym[0] ; evout.b = 0.0f ; return (evout) ;
761    } else if( n == 2 ){
762      for( ii=0 ; ii < 4 ; ii++ ) bb[ii] = asym[ii] ;
763      symeig_2D( bb , ev , 1 ) ;
764      vec[0] = bb[0] ; vec[1] = bb[1] ;
765      wec[0] = bb[2] ; wec[1] = bb[3] ;
766      evout.a = (float)ev[0] ; evout.b = (float)ev[1] ; return (evout) ;
767    } else if( n == 3 ){
768      for( ii=0 ; ii < 9 ; ii++ ) bb[ii] = asym[ii] ;
769      symeig_3D( bb , ev , 1 ) ;
770      vec[0] = bb[0] ; vec[1] = bb[1] ; vec[2] = bb[2] ;
771      wec[0] = bb[3] ; wec[1] = bb[4] ; wec[2] = bb[5] ;
772      evout.a = (float)ev[0] ; evout.b = (float)ev[1] ; return (evout) ;
773    }
774 
775    /* allocate iteration vectors */
776 
777    u1 = ws ; u2 = u1+n ; u3 = u2+n ; v1 = u3+n ; v2 = v1+n ; v3 = v2+n ;
778 
779    /* initialize u1 vector from input vec */
780 
781    for( sum1=0.0f,ii=0 ; ii < n ; ii++ ){
782      u1[ii] = vec[ii] ; sum1 += u1[ii]*u1[ii] ;
783    }
784    if( sum1 == 0.0f ){  /* backup if input vector is all zero */
785      for( ii=0 ; ii < n ; ii++ ){
786        u1[ii] = erand48(xran)-0.3 ; sum1 += u1[ii]*u1[ii] ;
787      }
788    }
789    sum1 = 1.0f / sqrtf(sum1) ;
790    for( ii=0 ; ii < n ; ii++ ) u1[ii] *= sum1 ;
791 
792    /* initialize u2, u3 by flipping signs in u1 */
793 
794    sum1 = 0.02468f / n ;
795    for( ii=0 ; ii < n ; ii++ ){
796      jj   = (int)jrand48(xran) ;
797      sum2 = sum1 * ((jj >> 1)%4 - 1.5f) ;
798      sum3 = sum1 * ((jj >> 5)%4 - 1.5f) ;
799      u2[ii] = (((jj >> 3)%2 == 0) ? u1[ii] : -u1[ii]) + sum2 ;
800      u3[ii] = (((jj >> 7)%2 == 0) ? u1[ii] : -u1[ii]) + sum3 ;
801    }
802 
803    /*----- iteration loop -----*/
804 
805    while(1){
806      /* form V = A U */
807 
808      for( ii=0 ; ii < n ; ii++ ){ v1[ii] = v2[ii] = v3[ii] = 0.0f ; }
809      for( jj=0 ; jj < n ; jj++ ){
810        aj = asym + jj*n ;            /* ptr to jj-th column of asym */
811        sum1 = u1[jj] ; sum2 = u2[jj] ; sum3 = u3[jj] ;
812        for( ii=0 ; ii < n ; ii++ ){
813          v1[ii] += aj[ii] * sum1 ;
814          v2[ii] += aj[ii] * sum2 ;
815          v3[ii] += aj[ii] * sum3 ;
816        }
817      }
818 
819      /* form G = U' U   and   H = U' V */
820 
821      g11 = g12 = g22 = g13 = g23 = g33 = 0.0 ;
822      h11 = h12 = h22 = h13 = h23 = h33 = 0.0 ;
823      for( ii=0 ; ii < n ; ii++ ){
824        g11 += u1[ii] * u1[ii] ; g12 += u1[ii] * u2[ii] ;
825        g22 += u2[ii] * u2[ii] ; g13 += u1[ii] * u3[ii] ;
826        g23 += u2[ii] * u3[ii] ; g33 += u3[ii] * u3[ii] ;
827        h11 += u1[ii] * v1[ii] ; h12 += u1[ii] * v2[ii] ;
828        h22 += u2[ii] * v2[ii] ; h13 += u1[ii] * v3[ii] ;
829        h23 += u2[ii] * v3[ii] ; h33 += u3[ii] * v3[ii] ;
830      }
831 
832      /* Choleski-ize G = L L' (in place) */
833 
834      g11 = (g11 <= 0.0) ? DEPSQ : sqrt(g11) ;
835      g12 = g12 / g11 ;
836      g22 = g22 - g12*g12 ; g22 = (g22 <= 0.0) ? DEPSQ : sqrt(g22) ;
837      g13 = g13 / g11 ;
838      g23 = (g23 - g12*g13) / g22 ;
839      g33 = g33 - g13*g13 * g23*g23 ; g33 = (g33 <= 0.0) ? DEPSQ : sqrt(g33) ;
840 
841      /* invert lower triangular L (in place) */
842 
843      g13 = ( g12*g23 - g22*g13 ) / (g11*g22*g33) ;
844      g11 = 1.0 / g11 ; g22 = 1.0 / g22 ; g33 = 1.0 / g33 ;
845      g23 = -g23 * g33 * g22 ;
846      g12 = -g12 * g11 * g22 ;
847 
848      /* form B = inv[L] H inv[L]' (code from Maple 9.5) */
849 
850     { double t1, t3, t5, t13, t18, t34, t42, t47 ;
851       t1  = g11 * g11 ; t3  = g11 * h11 ; t5  = g11 * h12 ;
852       t13 = g12 * g12 ; t18 = g22 * g22 ; t34 = g13 * g13 ;
853       t42 = g23 * g23 ; t47 = g33 * g33 ;
854       bb[0] = t1 * h11 ;
855       bb[4] = t13 * h11 + 2.0 * g12 * g22 * h12 + t18 * h22 ;
856       bb[8] = t34 * h11 + 2.0 * g13 * g23 * h12 + 2.0 * g13 * g33 * h13
857             + t42 * h22 + 2.0 * g23 * g33 * h23 + t47 * h33 ;
858       bb[1] = bb[3] = t3 * g12 + t5 * g22 ;
859       bb[2] = bb[6] = t3 * g13 + t5 * g23 + g11 * h13 * g33 ;
860       bb[5] = bb[7] = g13 * g12 * h11 + g13 * g22 * h12 + g23 * g12 * h12
861                     + g23 * g22 * h22 + g33 * g12 * h13 + g33 * g22 * h23 ;
862     }
863 
864      /* eigensolve B P  = P D */
865 
866      symeig_3D( bb , ev , 1 ) ;  /* P is in bb, D is in ev */
867 
868 #if 0
869 fprintf(stderr,"nite=%d  ev=%.9g %.9g %.9g\n",nite,ev[0],ev[1],ev[2]) ;
870 fprintf(stderr,"         bb=%.5g %.5g %.5g\n"
871                "            %.5g %.5g %.5g\n"
872                "            %.5g %.5g %.5g\n",
873         bb[0],bb[1],bb[2],bb[3],bb[4],bb[5],bb[6],bb[7],bb[8]) ;
874 #endif
875 
876      /* form Q = inv[L]' P */
877 
878      q1 = g11*bb[0] + g12*bb[1] + g13*bb[2] ;
879      q2 =             g22*bb[1] + g23*bb[2] ;
880      q3 =                         g33*bb[2] ;
881 
882      r1 = g11*bb[3] + g12*bb[4] + g13*bb[5] ;
883      r2 =             g22*bb[4] + g23*bb[5] ;
884      r3 =                         g33*bb[5] ;
885 
886      s1 = g11*bb[6] + g12*bb[7] + g13*bb[8] ;
887      s2 =             g22*bb[7] + g23*bb[8] ;
888      s3 =                         g33*bb[8] ;
889 
890      /* form U = V Q and normalize it */
891 
892      for( sum1=sum2=sum3=0.0f,ii=0 ; ii < n ; ii++ ){
893        u1[ii] = q1 * v1[ii] + q2 * v2[ii] + q3 * v3[ii]; sum1 += u1[ii]*u1[ii];
894        u2[ii] = r1 * v1[ii] + r2 * v2[ii] + r3 * v3[ii]; sum2 += u2[ii]*u2[ii];
895        u3[ii] = s1 * v1[ii] + s2 * v2[ii] + s3 * v3[ii]; sum3 += u3[ii]*u3[ii];
896      }
897      sum1 = 1.0f/sqrtf(sum1); sum2 = 1.0f/sqrtf(sum2); sum3 = 1.0f/sqrtf(sum3);
898      for( ii=0 ; ii < n ; ii++ ){ u1[ii] *= sum1; u2[ii] *= sum2; u3[ii] *= sum3; }
899 
900      /* check first two eigenvalues in D for convergence */
901 
902      if( nite > 266 ||
903          ( nite > 0                                       &&
904            fabs(ev[0]-evold1) <= FEPS*(fabs(evold1)+FEPS) &&
905            fabs(ev[1]-evold2) <= FEPS*(fabs(evold2)+FEPS)   ) ) break ;
906 
907      /* not done yet ==> iterate */
908 
909      nite++ ; evold1 = ev[0] ; evold2 = ev[1] ;
910 
911    } /*----- end of iteration loop -----*/
912 
913    /* result vectors are u1, u2 */
914 
915    for( ii=0 ; ii < n ; ii++ ){ vec[ii] = u1[ii] ; wec[ii] = u2[ii] ; }
916 
917    evout.a = (float)ev[0] ; evout.b = (float)ev[1] ; return (evout) ;
918 }
919 
920 /*---------------------------------------------------------------------------*/
921 
922 #undef  SQR
923 #define SQR(a)  ((a)*(a))
924 
925 #undef  DET3
926 #define DET3(m) ( m[0]*m[4]*m[8]-m[0]*m[7]*m[5]-m[1]*m[3]*m[8] \
927                  +m[1]*m[6]*m[5]+m[2]*m[3]*m[7]-m[2]*m[6]*m[4] )
928 
929 #undef  PI
930 #define PI 3.14159265358979323846
931 
932 #undef  SWAP
933 #define SWAP(x,y) (th=(x),(x)=(y),(y)=th)
934 
935 #undef  CSWAP       /* swap columns in a[] starting at indexes i and j */
936 #define CSWAP(i,j) (SWAP(a[i],a[j]),SWAP(a[i+1],a[j+1]),SWAP(a[i+2],a[j+2]))
937 
938 /*----------------------------------------------------------------------------*/
939 /*! Do a 3x3 symmetric eigen-problem as fast as possible, using cubic formula.
940      - INPUT: double a[9] = input matrix; a[i+3*j] = matrix (i,j) element
941               (actually, only the 0,1,2,4,5,8 elements of a[] are used).
942      - OUTPUT: e[i] = i'th eigenvalue, with e[0] >= e[1] >= e[2].
943      - OUTPUT: if(dovec) then a[] is replaced with eigenvectors,
944                and this orthogonal matrix will have determinant=1.
945                Vector #0 is in a[0..2] , #1 in a[3..5], #3 in a[6..8].
946 *//*--------------------------------------------------------------------------*/
947 
symeig_3D(double * a,double * e,int dovec)948 static void symeig_3D( double *a , double *e , int dovec )
949 {
950    double aa,bb,cc,dd,ee,ff ;
951    double a1,a2,a3 , qq,rr, qs,th , lam1,lam2,lam3 ;
952    double aba,abb,abc,abd,abe,abf , ann ;
953    double d12,d13,d23 ;
954    double u1,u2,u3 , v1,v2,v3 , w1,w2,w3 , t1,t2,t3 , tn ;
955    double anni ;
956 
957    if( a == NULL || e == NULL ) return ;
958 
959    /*----- unload matrix into local variables -----*/
960 
961    aa = a[0] ; bb = a[1] ; cc = a[2] ;  /* matrix is [ aa bb cc ]  */
962    dd = a[4] ; ee = a[5] ; ff = a[8] ;  /*           [ bb dd ee ]  */
963                                         /*           [ cc ee ff ]  */
964    aba = fabs(aa) ; abb = fabs(bb) ; abc = fabs(cc) ;
965    abd = fabs(dd) ; abe = fabs(ee) ; abf = fabs(ff) ;
966    ann = aba+abb+abc+abd+abe+abf   ;                 /* matrix 'norm' */
967 
968    if( ann == 0.0 ){             /* matrix is all zero! */
969      e[0] = e[1] = e[2] = 0.0 ;
970      if( dovec ){
971        a[0] = a[4] = a[8] = 1.0 ;
972        a[1] = a[2] = a[3] = a[5] = a[6] = a[7] = 0.0 ;
973      }
974      return ;
975    }
976 
977    /*----- check for matrix that is essentially diagonal -----*/
978 
979    if( abb+abc+abe == 0.0 ||
980        ( DEPS*aba > (abb+abc) && DEPS*abd > (abb+abe) && DEPS*abf > (abc+abe) ) ){
981 
982      lam1 = aa ; lam2 = dd ; lam3 = ff ;
983 
984      if( dovec ){
985        a[0] = a[4] = a[8] = 1.0 ;
986        a[1] = a[2] = a[3] = a[5] = a[6] = a[7] = 0.0 ;
987 
988        if( lam1 < lam2 ){ SWAP(lam1,lam2) ; CSWAP(0,3) ; }
989        if( lam1 < lam3 ){ SWAP(lam1,lam3) ; CSWAP(0,6) ; }
990        if( lam2 < lam3 ){ SWAP(lam2,lam3) ; CSWAP(3,6) ; }
991        if( DET3(a) < 0.0 ){ a[6] = -a[6]; a[7] = -a[7]; a[8] = -a[8]; }
992      } else {
993        if( lam1 < lam2 )  SWAP(lam1,lam2) ;
994        if( lam1 < lam3 )  SWAP(lam1,lam3) ;
995        if( lam2 < lam3 )  SWAP(lam2,lam3) ;
996      }
997      e[0] = lam1 ; e[1] = lam2 ; e[2] = lam3 ;
998      return ;
999    }
1000 
1001    /*-- Scale matrix so abs sum is 1; unscale e[i] on output [26 Oct 2005] --*/
1002 
1003    anni = 1.0 / ann ;                      /* ann != 0, from above */
1004    aa *= anni ; bb *= anni ; cc *= anni ;
1005    dd *= anni ; ee *= anni ; ff *= anni ;
1006 
1007    /*----- not diagonal ==> must solve cubic polynomial for eigenvalues -----*/
1008    /*      the cubic polynomial is x**3 + a1*x**2 + a2*x + a3 = 0            */
1009 
1010    a1 = -(aa+dd+ff) ;
1011    a2 =  (aa*ff+aa*dd+dd*ff - bb*bb-cc*cc-ee*ee) ;
1012    a3 =  ( aa*(ee*ee-dd*ff) + bb*(bb*ff-cc*ee) + cc*(cc*dd-bb*ee) ) ;
1013 
1014    /*-- Rewrite classical formula for qq as a sum of squares [26 Oct 2005] --*/
1015 #if 0
1016    qq = (a1*a1 - 3.0*a2) / 9.0 ;
1017 #else
1018    qq = (  0.5 * ( SQR(dd-aa) + SQR(ff-aa) + SQR(ff-dd) )
1019          + 3.0 * ( bb*bb      + cc*cc      + ee*ee      ) ) / 9.0 ;
1020 #endif
1021    rr = (2.0*a1*a1*a1 - 9.0*a1*a2 + 27.0*a3) / 54.0 ;
1022 
1023    if( qq <= 0.0 ){       /*** This should never happen!!! ***/
1024 #ifndef USE_OMP
1025      {
1026      static int nerr=0 ;
1027      if( ++nerr < 4 )
1028        fprintf(stderr,"** ERROR in symeig_3D: discrim=%g numer=%g\n",qq,rr) ;
1029      }
1030 #endif
1031      qs = qq = rr = 0.0 ;
1032    } else {
1033      qs = sqrt(qq) ; rr = rr / (qs*qq) ;
1034      if( rr < -1.0 ) rr = -1.0 ; else if( rr > 1.0 ) rr = 1.0 ;
1035    }
1036    th = acos(rr) ;
1037 
1038    lam1 = -2.0 * qs * cos(  th        /3.0 ) - a1 / 3.0 ;
1039    lam2 = -2.0 * qs * cos( (th+2.0*PI)/3.0 ) - a1 / 3.0 ;
1040    lam3 = -2.0 * qs * cos( (th+4.0*PI)/3.0 ) - a1 / 3.0 ;
1041 
1042    /*-- if not doing eigenvectors, just sort the eigenvalues to be done --*/
1043 
1044    if( !dovec ){
1045      if( lam1 < lam2 ) SWAP(lam1,lam2) ;
1046      if( lam1 < lam3 ) SWAP(lam1,lam3) ;
1047      if( lam2 < lam3 ) SWAP(lam2,lam3) ;
1048      e[0] = ann*lam1 ; e[1] = ann*lam2 ; e[2] = ann*lam3 ;
1049      return ;
1050    }
1051 
1052    /*-- are doing eigenvectors; must do double root as a special case --*/
1053 
1054 #undef  CROSS  /* cross product (x1,x2,x3) X (y1,y2,y3) -> (z1,z2,z3) */
1055 #define CROSS(x1,x2,x3,y1,y2,y3,z1,z2,z3) \
1056  ( (z1)=(x2)*(y3)-(x3)*(y2), (z2)=(x3)*(y1)-(x1)*(y3), (z3)=(x1)*(y2)-(x2)*(y1) )
1057 
1058    d12 = fabs(lam1-lam2) ; d13 = fabs(lam1-lam3) ; d23 = fabs(lam2-lam3) ;
1059    rr  = MIN(d12,d13)    ; rr  = MIN(rr,d23)     ;
1060 
1061    if( rr > DEPS*ann ){  /*---- not a double root ----*/
1062 
1063      if( lam1 < lam2 )  SWAP(lam1,lam2) ;  /* start by sorting eigenvalues */
1064      if( lam1 < lam3 )  SWAP(lam1,lam3) ;
1065      if( lam2 < lam3 )  SWAP(lam2,lam3) ;
1066      e[0] = ann*lam1 ; e[1] = ann*lam2 ; e[2] = ann*lam3 ;
1067 
1068      /* find eigenvector for lam1 by computing Ay-lam1*y for
1069         vectors y1=[1,0,0], y2=[0,1,0], and y3=[0,0,1]; the eigenvector
1070         is orthogonal to all of these, so use the cross product to get it */
1071 
1072      u1 = aa-lam1 ; u2 = bb      ; u3 = cc ;   /* A*y1 - lam1*y1 */
1073      v1 = bb      ; v2 = dd-lam1 ; v3 = ee ;   /* A*y2 - lam1*y2 */
1074      CROSS(u1,u2,u3 , v1,v2,v3 , t1,t2,t3 ) ;     tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1075      if( tn < DEPSQ*ann ){                     /* u and v were parallel? */
1076        w1 = cc ; w2 = ee ; w3 = ff-lam1 ;      /* A*y3 - lam1*y3 */
1077        CROSS(u1,u2,u3 , w1,w2,w3 , t1,t2,t3 ) ;   tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1078        if( tn < DEPSQ*ann ){                   /* u and w were parallel? */
1079          CROSS(v1,v2,v3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1080        }
1081      }
1082      a[0] = t1/tn ; a[1] = t2/tn ; a[2] = t3/tn ;  /* normalize */
1083 
1084      /* do same for lam2 */
1085 
1086      u1 = aa-lam2 ; u2 = bb      ; u3 = cc ;
1087      v1 = bb      ; v2 = dd-lam2 ; v3 = ee ;
1088      CROSS(u1,u2,u3 , v1,v2,v3 , t1,t2,t3 ) ;     tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1089      if( tn < DEPSQ*ann ){
1090        w1 = cc ; w2 = ee ; w3 = ff-lam2 ;
1091        CROSS(u1,u2,u3 , w1,w2,w3 , t1,t2,t3 ) ;   tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1092        if( tn < DEPSQ*ann ){
1093          CROSS(v1,v2,v3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1094        }
1095      }
1096      a[3] = t1/tn ; a[4] = t2/tn ; a[5] = t3/tn ;
1097 
1098      /* orthgonality of eigenvectors ==> can get last one by cross product */
1099 
1100 #if 1
1101      CROSS( a[0],a[1],a[2] , a[3],a[4],a[5] , a[6],a[7],a[8] ) ;
1102 #else
1103      u1 = aa-lam3 ; u2 = bb      ; u3 = cc ;
1104      v1 = bb      ; v2 = dd-lam3 ; v3 = ee ;
1105      CROSS(u1,u2,u3 , v1,v2,v3 , t1,t2,t3 ) ;     tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1106      if( tn < DEPSQ*ann ){
1107        w1 = cc ; w2 = ee ; w3 = ff-lam3 ;
1108        CROSS(u1,u2,u3 , w1,w2,w3 , t1,t2,t3 ) ;   tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1109        if( tn < DEPSQ*ann ){
1110          CROSS(v1,v2,v3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1111        }
1112      }
1113      a[6] = t1/tn ; a[7] = t2/tn ; a[8] = t3/tn ;
1114 #endif
1115 
1116      return ;
1117 
1118    } else { /*---- if here, we have a double root ----*/
1119 
1120      /* make sure that we have lam1=lam2 and lam3 is the outlier */
1121 
1122           if( d13 < d12 && d13 < d23 ) SWAP(lam2,lam3) ;
1123      else if( d23 < d12 && d23 < d13 ) SWAP(lam1,lam3) ;
1124      lam1 = lam2 = 0.5*(lam1+lam2) ;
1125 
1126      /* compute eigenvector for lam3 using method as above */
1127 
1128      u1 = aa-lam3 ; u2 = bb      ; u3 = cc ;
1129      v1 = bb      ; v2 = dd-lam3 ; v3 = ee ;
1130      CROSS(u1,u2,u3 , v1,v2,v3 , t1,t2,t3 ) ;     tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1131      if( tn < DEPSQ*ann ){
1132        w1 = cc ; w2 = ee ; w3 = ff-lam3 ;
1133        CROSS(u1,u2,u3 , w1,w2,w3 , t1,t2,t3 ) ;   tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1134        if( tn < DEPSQ*ann ){
1135          CROSS(v1,v2,v3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1136        }
1137      }
1138      w1 = a[6] = t1/tn ; w2 = a[7] = t2/tn ; w3 = a[8] = t3/tn ;
1139 
1140      /* find a vector orthogonal to it */
1141 
1142      CROSS(w1,w2,w3 , 1,0,0 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1143      if( tn < DEPSQ ){
1144        CROSS(w1,w2,w3 , 0,1,0 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1145        if( tn < DEPSQ ){
1146          CROSS(w1,w2,w3 , 0,0,1 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1147        }
1148      }
1149      a[0] = t1/tn ; a[1] = t2/tn ; a[2] = t3/tn ;
1150 
1151      /* and the final vector is the cross product of these two */
1152 
1153      CROSS( w1,w2,w3 , a[0],a[1],a[2] , a[3],a[4],a[5] ) ;
1154 
1155      /* sort results (we know lam1==lam2) */
1156 
1157      if( lam1 < lam3 ){
1158        SWAP(lam1,lam3) ; CSWAP(0,6) ;
1159        if( DET3(a) < 0.0 ){ a[6] = -a[6]; a[7] = -a[7]; a[8] = -a[8]; }
1160      }
1161 
1162      e[0] = ann*lam1 ; e[1] = ann*lam2 ; e[2] = ann*lam3 ;
1163      return ;
1164    }
1165 }
1166 
1167 /*---------------------------------------------------------------------------*/
1168 /*! 2x2 symmetric eigenvalue/vector problem, like symeig_3D() above. */
1169 
symeig_2D(double * a,double * e,int dovec)1170 static void symeig_2D( double *a , double *e , int dovec )
1171 {
1172    double sxx,sxy,syy , lam1,lam2 , ss,tt , x,y ;
1173 
1174    if( a == NULL || e == NULL ) return ;
1175 
1176    /*----- unload matrix into local variables -----*/
1177 
1178    sxx = a[0] ; sxy = a[1] ; syy = a[3] ;
1179 
1180    ss = fabs(sxx) ; tt = fabs(syy) ; if( ss > tt ) ss = tt ;
1181 
1182    if( fabs(sxy) < DEPS*ss ){   /*--- essentially a diagonal matrix ---*/
1183      if( sxx >= syy ){
1184        lam1 = sxx ; lam2 = syy ;
1185        if( dovec ){ a[0]=a[3]=1.0; a[1]=a[2]=0.0; }
1186      } else {
1187        lam1 = syy ; lam2 = sxx ;
1188        if( dovec ){ a[0]=a[3]=1.0 ; a[1]=a[2]=1.0; }
1189      }
1190      e[0] = lam1 ; e[1] = lam2 ;
1191      return ;
1192    }
1193 
1194    /*--- non-diagonal matrix ==> solve quadratic equation for eigenvalues ---*/
1195 
1196    ss = sqrt( (sxx-syy)*(sxx-syy) + 4.0*sxy*sxy ) ;   /* positive */
1197    lam1 = 0.5 * ( sxx + syy + ss ) ;                  /* larger */
1198    lam2 = 0.5 * ( sxx + syy - ss ) ;                  /* smaller */
1199 
1200    if( dovec ){
1201      x = 2.0*sxy ; y = syy - sxx + ss ; tt = sqrt(x*x+y*y) ;
1202      a[0] = x/tt ; a[1] = y/tt ;
1203 
1204      y = syy - sxx - ss ; tt = sqrt(x*x+y*y) ;
1205      a[2] = x/tt ; a[3] = y/tt ;
1206    }
1207    e[0] = lam1 ; e[1] = lam2 ;
1208    return ;
1209 }
1210 
1211 /******************************************************************************/
1212 #endif /* _CS_PV_ALREADY_INCLUDED_ */
1213