1 #ifndef _CS_PV_ALREADY_INCLUDED_
2 #define _CS_PV_ALREADY_INCLUDED_
3 /******************************************************************************/
4
5 #include "mrilib.h"
6
7 #ifdef USE_OMP
8 # include <omp.h>
9 #endif
10
11 #define MTH 999 /* threshold for using multiple threads in OpenMP code below */
12 #define NTH 99
13
14 /*----------------------------------------------------------------------------*/
15
16 static float symeig_sim1( int n , float *asym , float *vec ,
17 float *ws , unsigned short xran[] ) ;
18 static void symeig_2D ( double *a , double *e , int dovec ) ;
19 static void symeig_3D ( double *a , double *e , int dovec ) ;
20
21 static float_pair symeig_sim2( int nn, float *asym, float *vec, float *wec,
22 float *ws , unsigned short xran[] ) ;
23
24 /*----------------------------------------------------------------------------*/
25 /* Create workspace for a principal_vector() job. free() this when done.
26 The reason for doing it this way is to avoid a lot of malloc/free
27 cycles in 3dLocalPV.
28 *//*--------------------------------------------------------------------------*/
29
pv_get_workspace(int n,int m)30 void * pv_get_workspace( int n , int m )
31 {
32 UAint64 nn = (UAint64)n , mm = (UAint64)m ;
33 UAint64 mmm , nb,nt ; void *ws ;
34
35 nb = MIN(nn,mm) ; nt = MAX(nn,mm) ;
36 mmm = nb*nb + 16*nt ;
37 #if 0
38 ININFO_message(" pv_get_workspace %lld bytes",(long long)(sizeof(float)*(size_t)mmm) ) ;
39 #endif
40 ws = malloc( sizeof(float)*(size_t)mmm ) ;
41 return (ws) ;
42 }
43
44 /*----------------------------------------------------------------------------*/
45
46 /* nsym X nsym matrix element (i,j) */
47 #undef A
48 #define A(i,j) asym[(i)+(j)*nsym]
49
50 /* pointer to the q'th column of length nn (for q=0..mm-1) */
51 /* Method of getting pointer depends on the xtyp variable */
52 #undef XPT
53 #define XPT(q) ( (xtyp<=0) ? xx+(q)*nn : xar[q] )
54
55 /*----------------------------------------------------------------------------*/
56 /*! Compute the mean vector of a set of m columns, each of length n.
57 * If xtyp <=0, the columns are stored in one big array:
58 ((float *)xp)[i+j*n] for i=0..n-1, j=0..m-1.
59 * If xtyp > 0, the columns are stored in an array of arrays:
60 ((float **)xp)[j][i]
61 * The return value is the L2-norm of the vector, and the vector is
62 stored into uvec. The output vector is NOT normalized. (That's on you.)
63 *//*--------------------------------------------------------------------------*/
64
mean_vector(int n,int m,int xtyp,void * xp,float * uvec)65 float mean_vector( int n , int m , int xtyp , void *xp , float *uvec )
66 {
67 UAint64 nn=n , mm=m , jj , ii ;
68 float *xj , fac,sum ; float *xx=NULL , **xar=NULL ;
69
70 ENTRY("mean_vector") ;
71
72 if( nn < 1 || mm < 1 || xp == NULL || uvec == NULL ) RETURN( -1.0f ) ;
73
74 if( xtyp <= 0 ) xx = (float * )xp ;
75 else xar = (float **)xp ;
76
77 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = 0.0f ;
78
79 for( jj=0 ; jj < mm ; jj++ ){
80 xj = XPT(jj) ;
81 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] += xj[ii] ;
82 }
83
84 fac = 1.0f / nn ; sum = 0.0f ;
85 for( ii=0 ; ii < nn ; ii++ ){ uvec[ii] *= fac; sum += uvec[ii]*uvec[ii]; }
86 RETURN( sqrtf(sum) ) ;
87 }
88
89 /*----------------------------------------------------------------------------*/
90 /* If you can't figure out what this does without help, please
91 back away slowly from the code and take up competive toenail clipping.
92 *//*--------------------------------------------------------------------------*/
93
is_allzero(int nf,float * ff)94 static int is_allzero( int nf , float *ff )
95 {
96 int ii ;
97 if( nf == 0 || ff == NULL ) return 1 ;
98 for( ii=0 ; ii < nf ; ii++ ) if( ff[ii] != 0.0f ) return 0 ;
99 return 1 ;
100 }
101
102 /*----------------------------------------------------------------------------*/
103 /*! Compute the principal singular vector of a set of m columns, each
104 of length n (normally, n = time series length, m = number of voxels).
105 * If xtyp <=0, the columns are stored in one big array:
106 ((float *)xp)[i+j*n] for i=0..n-1, j=0..m-1.
107 * If xtyp > 0, the columns are stored in an array of arrays:
108 ((float **)xp)[j][i]
109 * If ws is not NULL, then workspace will be allocated and discarded
110 internally. The point of ws is that it can be pre-allocated by
111 pv_get_workspace() and then re-used, avoiding the overhead of
112 many malloc/free cycles in 3dLocalPV (e.g.).
113 * The singular value is returned, and the vector is stored into uvec[].
114 * tvec is a vector so that the sign of uvec dot tvec will be non-negative.
115 * If the return value is not positive, something ugly happened.
116 *//*--------------------------------------------------------------------------*/
117
principal_vector(int n,int m,int xtyp,void * xp,float * uvec,float * tvec,float * ws,unsigned short xran[])118 float principal_vector( int n , int m , int xtyp , void *xp ,
119 float *uvec , float *tvec ,
120 float *ws , unsigned short xran[] )
121 {
122 UAint64 nn=n , mm=m , nsym , jj,kk,qq , ii ;
123 float *asym ;
124 float sum,qsum ; float *xj,*xk,*xi ;
125 float sval , *xx=NULL , **xar=NULL ;
126 float *wws=ws ; UAint64 nws=0 ;
127
128 nsym = MIN(nn,mm) ; /* size of the symmetric matrix to create */
129
130 if( nsym < 1 || xp == NULL || uvec == NULL ) return (-666.0f) ;
131
132 if( xtyp <= 0 ) xx = (float * )xp ; /* later will create a local xar */
133 else xar = (float **)xp ;
134
135 if( nsym == 1 ){ /*----- trivial case -----*/
136
137 if( mm == 1 ){ /* really trivial: only 1 vector */
138 xj = XPT(0) ; qsum = sum = 0.0f ;
139 for( ii=0; ii < nn; ii++ ){
140 uvec[ii] = xj[ii] ; qsum += uvec[ii]*uvec[ii] ;
141 if( tvec != NULL ) sum += tvec[ii]*uvec[ii] ;
142 }
143 sval = sqrtf(qsum) ;
144 if( sval > 0.0f ){
145 qsum = 1.0f / sval ;
146 if( sum < 0.0f ) qsum = -qsum ;
147 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] *= qsum ;
148 } else {
149 qsum = sqrtf(1.0f/nn) ;
150 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = qsum ;
151 }
152 } else { /* nn==1 and have mm > 1 such 'vectors' */
153 uvec[0] = (tvec != NULL && tvec[0] < 0.0f) ? -1.0f : 1.0f ;
154 for( sval=0.0f,ii=0 ; ii < mm ; ii++ ){
155 xj = XPT(ii) ; sval += xj[0]*xj[0] ;
156 }
157 sval = sqrtf(sval) ;
158 }
159 return (sval) ;
160
161 } /*----- end of trivial case -----*/
162
163 if( xtyp <= 0 ){ /* create xar for this storage type */
164 xar = (float **)malloc(sizeof(float *)*mm) ;
165 for( ii=0 ; ii < mm ; ii++ ) xar[ii] = XPT(ii) ;
166 }
167
168 /* need locally allocated workspace? */
169
170 if( wws == NULL ) wws = pv_get_workspace(nn,mm) ;
171
172 asym = wws ; nws = nsym*nsym ; /* symmetric matrix A, in workspace */
173 /* nws = how much of wws we've used */
174
175 /** setup matrix to eigensolve: choose smaller of [X]'[X] and [X][X]' **/
176 /** since [X] is n x m, [X]'[X] is m x m and [X][X]' is n x n **/
177
178 if( nn > mm ){ /* more rows than columns: */
179 /* so [A] = [X]'[X] = m x m */
180 /* in FMRI, this would be unusual */
181 for( jj=0 ; jj < mm ; jj++ ){
182 xj = xar[jj] ;
183 for( kk=0 ; kk <= jj ; kk++ ){
184 xk = xar[jj] ;
185 for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += xj[ii]*xk[ii] ;
186 A(jj,kk) = sum ; if( kk < jj ) A(kk,jj) = sum ;
187 }
188 }
189
190 } else { /* more columns than rows: */
191 /* so [A] = [X][X]' = n x n */
192 /* in FMRI, this would be common: */
193 /* more voxels (m) than time points (n) */
194
195 memset( asym , 0 , sizeof(float)*nsym*nsym ) ; /* zero out A=asym matrix */
196
197 #ifdef USE_OMP
198 #pragma omp parallel if( mm > MTH && nn > NTH )
199 {
200 #pragma omp master
201 { int nthr = omp_get_num_threads() ;
202 if( mm > MTH && nn > NTH )
203 ININFO_message(" PV: nn=%lld < mm=%lld -- using %d threads to compute A matrix",
204 (long long)nn , (long long)mm , nthr ) ;
205 } }
206 #endif
207
208 for( ii=0 ; ii < mm ; ii++ ){ /* loop over columns = voxels */
209 xi = xar[ii] ;
210
211 if( mm > MTH && nn > NTH && ii > 0 && ii%10000 == 0 )
212 ININFO_message(" start sums using column/voxel ii=%lld",(long long)ii) ;
213
214 AFNI_OMP_START ;
215 #pragma omp parallel if( mm > MTH && nn > NTH )
216 { UAint64 jj,kk,qq , qtop ;
217 qtop = nn*(nn+1)/2 ;
218 /* In the code below:
219 qq = kk + jj*(jj+1)/2 = lower triangular index in symmetric matrix
220 kk=0,1 ->
221 jj=0 [ 0 ]
222 jj=1 [ 1 2 ]
223 [ 3 4 5 ]
224 [ 6 7 8 9 ]
225 and we want to compute (jj,kk) given qq -- the reason for this
226 convoluted code is to allow this loop to be parallelized, and
227 for that to happen, it has to be (1) a single loop - not a double
228 loop over (jj,kk), and (2) it has to have a pre-computable number
229 of iterates (qtop) determinable by the OpenMP software.
230 * The result is jj = [ sqrt(8*qq+1)-1 ] / 2 and then kk follows
231 * For example, qq=7: sqrt(7*8+1) - 1 = 6.54983
232 ditto / 2 = 3.27492
233 floor(ditto) = 3 = correct row index
234 * Twisted counting logic for (hopeful) efficiency when there's
235 a lot of computation going on (mm and nn big). [RWC - 29 Nov 2021]
236 * This method is probably slower for moderate
237 nn and mm, but then it is so fast that who cares?
238 */
239 #pragma omp for
240 for( qq=0 ; qq < qtop ; qq++ ){ /* loop over (jj,kk): lower triangle */
241 jj = (UAint64)floor((sqrt(8.0*(double)qq+1.000001)-0.999999)*0.5) ;
242 kk = qq - jj*(jj+1)/2 ;
243 A(jj,kk) += xi[jj]*xi[kk] ;
244 }
245 }
246 AFNI_OMP_END ;
247 }
248 for( jj=0 ; jj < nn ; jj++ ){ /* reflect result to upper triangle */
249 for( kk=0 ; kk < jj ; kk++ ) A(kk,jj) = A(jj,kk) ;
250 }
251
252 }
253
254 if( is_allzero(nsym*nsym,asym) ){
255 for( jj=0 ; jj < nn ; jj++ ) uvec[jj] = 0.0f ;
256 if( xtyp <= 0 ) free(xar) ;
257 ININFO_message(" PV -- %d x %d matrix is all zero :(",nsym,nsym) ;
258 return 0.0f ;
259 }
260
261 /** SVD is [X] = [U] [S] [V]', where [U] = desired output vectors
262
263 case n <= m: [A] = [X][X]' = [U] [S][S]' [U]'
264 so [A][U] = [U] [S][S]'
265 so eigenvectors of [A] are just [U]
266
267 case n > m: [A] = [X]'[X] = [V] [S]'[S] [V]'
268 so [A][V] = [V] [S'][S]
269 so eigenvectors of [A] are [V], but we want [U]
270 note that [X][V] = [U] [S]
271 so pre-multiplying each column vector in [V] by matrix [X]
272 will give the corresponding column in [U], but scaled;
273 below, just L2-normalize the column to get output vector **/
274
275 if( nn <= mm ){ /* copy eigenvector into output directly */
276 /* (e.g., more voxels than time points) */
277
278 (void)mean_vector( nsym , nsym , 0 , asym , uvec ) ; /* initialize=mean */
279 sval = symeig_sim1( (int)nsym , asym , uvec , wws+nws , xran ) ;
280
281 } else { /* n > m: transform eigenvector to get left singular vector */
282 /* (e.g., more time points than voxels) */
283
284 float *qvec ;
285
286 qvec = wws + nws ; nws += nsym ; /* qvec = mean vector, in workspace */
287 (void)mean_vector( nsym , nsym , 0 , asym , qvec ) ; /* initialize=mean */
288 sval = symeig_sim1( (int)nsym , asym , qvec , wws+nws , xran ) ;
289 for( qsum=0.0f,ii=0 ; ii < nn ; ii++ ){
290 for( sum=0.0f,kk=0 ; kk < mm ; kk++ ) sum += xar[kk][ii] * qvec[kk] ;
291 uvec[ii] = sum ; qsum += sum*sum ;
292 }
293 if( qsum > 0.0f ){ /* L2 normalize */
294 sum = 1.0f / sqrtf(qsum) ;
295 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] *= sum ;
296 }
297 nws -= nsym ;
298 }
299
300 /** make it so that uvec dotted into tvec is positive **/
301
302 if( tvec != NULL ){
303 for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += tvec[ii]*uvec[ii] ;
304 if( sum < 0.0f ){
305 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = -uvec[ii] ;
306 }
307 }
308
309 /** free at last!!! **/
310
311 if( xtyp <= 0 ) free(xar) ;
312 if( wws != ws ) free(wws) ;
313
314 return (sqrtf(sval)) ;
315 }
316
317 /*----------------------------------------------------------------------------*/
318 /* Carry out simultaneous iteration (generalized power iteration) on an
319 nn X nn matrix stored in asym, with the initial vector stored in vec,
320 and the final vector stored there. Goal is to compute the dominant
321 eigenvalue/vector -- the return value is the eigenvalue. Three vectors
322 are iterated to speed up the convergence.
323 *//*--------------------------------------------------------------------------*/
324
325 #undef FEPS
326 #define FEPS 0.000000123456f /* convergence test */
327
328 #undef DEPS
329 #undef DEPSQ
330 #define DEPS 1.e-8
331 #define DEPSQ 1.e-4 /* sqrt(DEPS) */
332
symeig_sim1(int nn,float * asym,float * vec,float * ws,unsigned short xran[])333 static float symeig_sim1( int nn , float *asym , float *vec , float *ws ,
334 unsigned short xran[] )
335 {
336 float *u1,*u2,*u3 , *v1,*v2,*v3 , *aj ;
337 float sum1,sum2,sum3 , q1,q2,q3 , r1,r2,r3 , s1,s2,s3 ;
338 int ii , jj , nite=0 , n=nn ;
339 double bb[9] , ev[3] , evold=0.0 ;
340 double g11,g12,g13,g22,g23,g33 ;
341 double h11,h12,h13,h22,h23,h33 ;
342
343 if( n < 1 || asym == NULL || vec == NULL ) return -666.0f ;
344
345 /* special cases: 1x1 and 2x2 and 3x3 matrices */
346
347 if( n == 1 ){
348 vec[0] = 1 ; return asym[0] ;
349 } else if( n == 2 ){
350 for( ii=0 ; ii < 4 ; ii++ ) bb[ii] = asym[ii] ;
351 symeig_2D( bb , ev , 1 ) ;
352 vec[0] = bb[0] ; vec[1] = bb[1] ; return ev[0] ;
353 } else if( n == 3 ){
354 for( ii=0 ; ii < 9 ; ii++ ) bb[ii] = asym[ii] ;
355 symeig_3D( bb , ev , 1 ) ;
356 vec[0] = bb[0] ; vec[1] = bb[1] ; vec[2] = bb[2] ; return ev[0] ;
357 }
358
359 /* allocate iteration vectors in the workspace ws */
360
361 u1 = ws ; u2 = u1+n ; u3 = u2+n ; v1 = u3+n ; v2 = v1+n ; v3 = v2+n ;
362
363 /* initialize u1 vector from input vec */
364
365 for( sum1=0.0f,ii=0 ; ii < n ; ii++ ){
366 u1[ii] = vec[ii] ; sum1 += u1[ii]*u1[ii] ;
367 }
368 if( sum1 == 0.0f ){ /* backup if input vector is all zero */
369 for( ii=0 ; ii < n ; ii++ ){
370 u1[ii] = erand48(xran)-0.3 ; sum1 += u1[ii]*u1[ii] ;
371 }
372 }
373 sum1 = 1.0f / sqrtf(sum1) ;
374 for( ii=0 ; ii < n ; ii++ ) u1[ii] *= sum1 ;
375
376 /* initialize u2, u3 by flipping signs in u1 and adding in some junk*/
377
378 sum1 = 0.02468f / n ;
379 for( ii=0 ; ii < n ; ii++ ){
380 jj = (int)jrand48(xran) ;
381 sum2 = sum1 * ((jj >> 1)%4 - 1.5f) ;
382 sum3 = sum1 * ((jj >> 5)%4 - 1.5f) ;
383 u2[ii] = (((jj >> 3)%2 == 0) ? u1[ii] : -u1[ii]) + sum2 ;
384 u3[ii] = (((jj >> 7)%2 == 0) ? u1[ii] : -u1[ii]) + sum3 ;
385 }
386
387 /*----- iteration loop -----*/
388
389 while(1){
390 /* form V = A U */
391
392 for( ii=0 ; ii < n ; ii++ ){ v1[ii] = v2[ii] = v3[ii] = 0.0f ; }
393 for( jj=0 ; jj < n ; jj++ ){
394 aj = asym + jj*n ; /* ptr to jj-th column of asym */
395 sum1 = u1[jj] ; sum2 = u2[jj] ; sum3 = u3[jj] ;
396 for( ii=0 ; ii < n ; ii++ ){
397 v1[ii] += aj[ii] * sum1 ;
398 v2[ii] += aj[ii] * sum2 ;
399 v3[ii] += aj[ii] * sum3 ;
400 }
401 }
402
403 /* form G = U' U and H = U' V */
404
405 g11 = g12 = g22 = g13 = g23 = g33 = 0.0 ;
406 h11 = h12 = h22 = h13 = h23 = h33 = 0.0 ;
407 for( ii=0 ; ii < n ; ii++ ){
408 g11 += u1[ii] * u1[ii] ; g12 += u1[ii] * u2[ii] ;
409 g22 += u2[ii] * u2[ii] ; g13 += u1[ii] * u3[ii] ;
410 g23 += u2[ii] * u3[ii] ; g33 += u3[ii] * u3[ii] ;
411 h11 += u1[ii] * v1[ii] ; h12 += u1[ii] * v2[ii] ;
412 h22 += u2[ii] * v2[ii] ; h13 += u1[ii] * v3[ii] ;
413 h23 += u2[ii] * v3[ii] ; h33 += u3[ii] * v3[ii] ;
414 }
415
416 /* Choleski-ize G = L L' (in place) */
417
418 g11 = (g11 <= 0.0) ? DEPSQ : sqrt(g11) ;
419 g12 = g12 / g11 ;
420 g22 = g22 - g12*g12 ; g22 = (g22 <= 0.0) ? DEPSQ : sqrt(g22) ;
421 g13 = g13 / g11 ;
422 g23 = (g23 - g12*g13) / g22 ;
423 g33 = g33 - g13*g13 * g23*g23 ; g33 = (g33 <= 0.0) ? DEPSQ : sqrt(g33) ;
424
425 /* invert lower triangular L (in place) */
426
427 g13 = ( g12*g23 - g22*g13 ) / (g11*g22*g33) ;
428 g11 = 1.0 / g11 ; g22 = 1.0 / g22 ; g33 = 1.0 / g33 ;
429 g23 = -g23 * g33 * g22 ;
430 g12 = -g12 * g11 * g22 ;
431
432 /* form B = inv[L] H inv[L]' (code from Maple 9.5) */
433
434 { double t1, t3, t5, t13, t18, t34, t42, t47 ;
435 t1 = g11 * g11 ; t3 = g11 * h11 ; t5 = g11 * h12 ;
436 t13 = g12 * g12 ; t18 = g22 * g22 ; t34 = g13 * g13 ;
437 t42 = g23 * g23 ; t47 = g33 * g33 ;
438 bb[0] = t1 * h11 ;
439 bb[4] = t13 * h11 + 2.0 * g12 * g22 * h12 + t18 * h22 ;
440 bb[8] = t34 * h11 + 2.0 * g13 * g23 * h12 + 2.0 * g13 * g33 * h13
441 + t42 * h22 + 2.0 * g23 * g33 * h23 + t47 * h33 ;
442 bb[1] = bb[3] = t3 * g12 + t5 * g22 ;
443 bb[2] = bb[6] = t3 * g13 + t5 * g23 + g11 * h13 * g33 ;
444 bb[5] = bb[7] = g13 * g12 * h11 + g13 * g22 * h12 + g23 * g12 * h12
445 + g23 * g22 * h22 + g33 * g12 * h13 + g33 * g22 * h23 ;
446 }
447
448 /* eigensolve B P = P D */
449
450 symeig_3D( bb , ev , 1 ) ; /* P is in bb, D is in ev */
451
452 #if 0
453 fprintf(stderr,"nite=%d ev=%.9g %.9g %.9g\n",nite,ev[0],ev[1],ev[2]) ;
454 fprintf(stderr," bb=%.5g %.5g %.5g\n"
455 " %.5g %.5g %.5g\n"
456 " %.5g %.5g %.5g\n",
457 bb[0],bb[1],bb[2],bb[3],bb[4],bb[5],bb[6],bb[7],bb[8]) ;
458 #endif
459
460 /* form Q = inv[L]' P */
461
462 q1 = g11*bb[0] + g12*bb[1] + g13*bb[2] ;
463 q2 = g22*bb[1] + g23*bb[2] ;
464 q3 = g33*bb[2] ;
465
466 /* form U = V Q and normalize it */
467
468 for( sum1=0.0f,ii=0 ; ii < n ; ii++ ){
469 u1[ii] = q1 * v1[ii] + q2 * v2[ii] + q3 * v3[ii]; sum1 += u1[ii]*u1[ii];
470 }
471 sum1 = 1.0f / sqrtf(sum1) ;
472 for( ii=0 ; ii < n ; ii++ ) u1[ii] *= sum1 ;
473
474 /* check first eigenvalue in D for convergence */
475
476 if( nite > 266 ||
477 ( nite > 0 && fabs(ev[0]-evold) <= FEPS*(fabs(evold)+FEPS) ) ) break ;
478
479 /* not done yet ==> iterate */
480
481 nite++ ; evold = ev[0] ;
482
483 /* finish u2 and u3 vectors in U */
484
485 r1 = g11*bb[3] + g12*bb[4] + g13*bb[5] ;
486 r2 = g22*bb[4] + g23*bb[5] ;
487 r3 = g33*bb[5] ;
488
489 s1 = g11*bb[6] + g12*bb[7] + g13*bb[8] ;
490 s2 = g22*bb[7] + g23*bb[8] ;
491 s3 = g33*bb[8] ;
492
493 for( sum2=sum3=0.0f,ii=0 ; ii < n ; ii++ ){
494 u2[ii] = r1 * v1[ii] + r2 * v2[ii] + r3 * v3[ii]; sum2 += u2[ii]*u2[ii];
495 u3[ii] = s1 * v1[ii] + s2 * v2[ii] + s3 * v3[ii]; sum3 += u3[ii]*u3[ii];
496 }
497 sum2 = 1.0f / sqrtf(sum2) ; sum3 = 1.0f / sqrtf(sum3) ;
498 for( ii=0 ; ii < n ; ii++ ){ u2[ii] *= sum2 ; u3[ii] *= sum3 ; }
499
500 } /*----- end of iteration loop -----*/
501
502 /* result vector is u1 */
503
504 for( ii=0 ; ii < n ; ii++ ) vec[ii] = u1[ii] ;
505
506 #if 0
507 { static int first=1 ;
508 if( first ){ fprintf(stderr,"nite=%d nsym=%d\n",nite,n) ; first=0 ; }
509 }
510 #endif
511
512 return (float)ev[0] ;
513 }
514
515 /*----------------------------------------------------------------------------*/
516 /*! Compute the first 2 principal singular vectors of a set of m columns,
517 each of length n.
518 * If xtyp <=0, the columns are stored in one big array:
519 ((float *)xp)[i+j*n] for i=0..n-1, j=0..m-1.
520 * If xtyp > 0, the columns are stored in an array of arrays:
521 ((float **)xp)[j][i]
522 * The singular value pair is returned, and the vectors are stored into
523 uvec[] and vvec[]
524 * tvec is a vector so that the sign of uvec dot tvec will be non-negative.
525 * If the two return values are not positive, something ugly happened.
526 * This function is very similar to principal_vector(), with a few
527 changes to deal with the desire for TWO outputs rather than ONE.
528 *//*--------------------------------------------------------------------------*/
529
principal_vector_pair(int n,int m,int xtyp,void * xp,float * uvec,float * vvec,float * tvec,float * ws,unsigned short xran[])530 float_pair principal_vector_pair( int n , int m , int xtyp , void *xp ,
531 float *uvec, float *vvec, float *tvec,
532 float *ws , unsigned short xran[] )
533 {
534 UAint64 nn=(UAint64)n , mm=(UAint64)m , nsym , jj,kk,qq , ii ;
535 float *asym ;
536 float sum,qsum ; float *xi,*xj,*xk ;
537 float sval , *xx=NULL , **xar=NULL ;
538 float_pair svout = {-666.0f,-666.0f} ;
539 float *wws=(float *)ws ; int64_t nws=0 ;
540
541 ENTRY("principal_vector_pair") ;
542
543 nsym = MIN(nn,mm) ; /* size of the symmetric matrix to create */
544
545 if( nsym < 1 || xp == NULL || uvec == NULL || vvec == NULL ) RETURN (svout);
546
547 if( xtyp <= 0 ) xx = (float * )xp ;
548 else xar = (float **)xp ;
549
550 if( nsym == 1 ){ /*----- trivial case -----*/
551
552 for( ii=0 ; ii < nn ; ii++ ) vvec[ii] = 0.0f ; /* nothing to do here */
553
554 if( mm == 1 ){ /* really trivial: only 1 vector */
555 xj = XPT(0) ; qsum = sum = 0.0f ;
556 for( ii=0; ii < nn; ii++ ){
557 uvec[ii] = xj[ii] ; qsum += uvec[ii]*uvec[ii] ;
558 if( tvec != NULL ) sum += tvec[ii]*uvec[ii] ;
559 }
560 sval = sqrtf(qsum) ;
561 if( sval > 0.0f ){
562 qsum = 1.0f / sval ; if( sum < 0.0f ) qsum = -qsum ;
563 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] *= qsum ;
564 } else {
565 qsum = sqrtf(1.0f/nn) ;
566 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = qsum ;
567 }
568 } else { /* nn==1 and have mm > 1 such 'vectors' */
569 uvec[0] = (tvec != NULL && tvec[0] < 0.0f) ? -1.0f : 1.0f ;
570 for( sval=0.0f,ii=0 ; ii < mm ; ii++ ){
571 xj = XPT(ii) ; sval += xj[0]*xj[0] ;
572 }
573 sval = sqrtf(sval) ;
574 }
575
576 svout.a = sval ; svout.b = 0.0f ; RETURN (svout) ;
577
578 } /*----- end of trivial case -----*/
579
580 if( xtyp <= 0 ){ /* create xar for this storage type */
581 xar = (float **)malloc(sizeof(float *)*mm) ;
582 for( ii=0 ; ii < mm ; ii++ ) xar[ii] = XPT(ii) ;
583 }
584
585 if( wws == NULL ) wws = (float *)pv_get_workspace(nn,mm) ;
586
587 asym = wws ; nws = nsym*nsym ; /* symmetric matrix */
588
589 /** setup matrix to eigensolve: choose smaller of [X]'[X] and [X][X]' **/
590 /** since [X] is n x m, [X]'[X] is m x m and [X][X]' is n x n **/
591
592 if( nn > mm ){ /* more rows than columns: */
593 /* so [A] = [X]'[X] = m x m */
594 for( jj=0 ; jj < mm ; jj++ ){
595 xj = xar[jj] ;
596 for( kk=0 ; kk <= jj ; kk++ ){
597 xk = xar[kk] ;
598 for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += xj[ii]*xk[ii] ;
599 A(jj,kk) = sum ; if( kk < jj ) A(kk,jj) = sum ;
600 }
601 }
602
603 } else { /* more columns than rows: */
604 /* so [A] = [X][X]' = n x n */
605
606 memset( asym , 0 , sizeof(float)*nsym*nsym ) ; /* zero out A=asym matrix */
607
608 #ifdef USE_OMP
609 #pragma omp parallel if( mm > MTH && nn > NTH )
610 {
611 #pragma omp master
612 { int nthr = omp_get_num_threads() ;
613 if( mm > MTH && nn > NTH )
614 ININFO_message(" PVpair: nn=%lld < mm=%lld -- using %d threads to compute A matrix",
615 (long long)nn , (long long)mm , nthr ) ;
616 } }
617 #endif
618
619 for( ii=0 ; ii < mm ; ii++ ){
620 xi = xar[ii] ;
621
622 if( mm > MTH && nn > NTH && ii > 0 && ii%10000 == 0 )
623 ININFO_message(" start sums using column/voxel ii=%lld",(long long)ii) ;
624
625 AFNI_OMP_START ;
626 #pragma omp parallel if( mm > MTH && nn > NTH )
627 { UAint64 jj,kk,qq , qtop ;
628 qtop = nn*(nn+1)/2 ;
629 /* In the code below:
630 qq = kk + jj*(jj+1)/2 = lower triangular index in symmetric matrix
631 kk=0,1 ->
632 jj=0 [ 0 ]
633 jj=1 [ 1 2 ]
634 [ 3 4 5 ]
635 [ 6 7 8 9 ]
636 and we want to compute (jj,kk) given qq -- the reason for this
637 convoluted code is to allow this loop to be parallelized, and
638 for that to happen, it has to be (1) a single loop - not a double
639 loop over (jj,kk), and (2) it has to have a pre-computable number
640 of iterates (qtop) determinable by the OpenMP software.
641 * The result is jj = [ sqrt(8*qq+1)-1 ] / 2 and then kk follows
642 * For example, qq=7: sqrt(7*8+1) - 1 = 6.54983
643 ditto / 2 = 3.27492
644 floor(ditto) = 3 = correct row index
645 * Twisted counting logic for (hopeful) efficiency when there's
646 a lot of computation going on (mm and nn big). [RWC - 29 Nov 2021]
647 */
648 #pragma omp for
649 for( qq=0 ; qq < qtop ; qq++ ){ /* loop over (jj,kk): lower triangle */
650 jj = (UAint64)floor((sqrt(8.0*(double)qq+1.000001)-0.999999)*0.5) ;
651 kk = qq - jj*(jj+1)/2 ;
652 A(jj,kk) += xi[jj]*xi[kk] ;
653 }
654 }
655 AFNI_OMP_END ;
656 }
657 for( jj=0 ; jj < nn ; jj++ ){ /* reflect result to upper triangle */
658 for( kk=0 ; kk < jj ; kk++ ) A(kk,jj) = A(jj,kk) ;
659 }
660
661 } /* at this point, asym[] contains the symmetric matrix */
662
663 if( is_allzero(nsym*nsym,asym) ){
664 for( jj=0 ; jj < nn ; jj++ ) uvec[jj] = vvec[jj] = 0.0f ;
665 svout.a = svout.b = 0.0f ;
666 ININFO_message(" PVpair -- %d x %d matrix is all zero :(",nsym,nsym) ;
667 if( xtyp <= 0 ) free(xar) ;
668 RETURN( svout );
669 }
670
671 /** SVD is [X] = [U] [S] [V]', where [U] = desired output vectors
672
673 case n <= m: [A] = [X][X]' = [U] [S][S]' [U]'
674 so [A][U] = [U] [S][S]'
675 so eigenvectors of [A] are just [U]
676
677 case n > m: [A] = [X]'[X] = [V] [S]'[S] [V]'
678 so [A][V] = [V] [S'][S]
679 so eigenvectors of [A] are [V], but we want [U]
680 note that [X][V] = [U] [S]
681 so pre-multiplying each column vector in [V] by matrix [X]
682 will give the corresponding column in [U], but scaled;
683 below, just L2-normalize the column to get output vector **/
684
685 if( nn <= mm ){ /* copy eigenvector into output directly */
686 /* (e.g., more vectors than time points) */
687
688 (void)mean_vector( nsym , nsym , 0 , asym , uvec ) ;
689 svout = symeig_sim2( (int)nsym , asym , uvec , vvec , wws+nws , xran ) ;
690
691 } else { /* n > m: transform eigenvector to get left singular vector */
692 /* (e.g., more time points than vectors) */
693
694 float *qvec , *rvec , rsum , ssum ;
695
696 qvec = wws + nws ; nws += nsym ;
697 rvec = wws + nws ; nws += nsym ;
698 (void)mean_vector( nsym , nsym , 0 , asym , qvec ) ;
699 svout = symeig_sim2( (int)nsym , asym , qvec , rvec , wws+nws , xran ) ;
700 for( rsum=qsum=0.0f,ii=0 ; ii < nn ; ii++ ){
701 ssum = sum = 0.0f ;
702 for( kk=0 ; kk < mm ; kk++ ){
703 sum += xar[kk][ii] * qvec[kk] ;
704 ssum += xar[kk][ii] * rvec[kk] ;
705 }
706 uvec[ii] = sum ; vvec[ii] = ssum ; qsum += sum*sum ; rsum += ssum*ssum ;
707 }
708 if( qsum > 0.0f ){ /* L2 normalize uvec */
709 sum = 1.0f / sqrtf(qsum) ;
710 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] *= sum ;
711 }
712 if( rsum > 0.0f ){ /* L2 normalize vvec */
713 sum = 1.0f / sqrtf(rsum) ;
714 for( ii=0 ; ii < nn ; ii++ ) vvec[ii] *= sum ;
715 }
716 nws -= 2*nsym ;
717 }
718
719 /** make it so that uvec dotted into the mean vector is positive **/
720
721 if( tvec != NULL ){
722 for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += tvec[ii]*uvec[ii] ;
723 if( sum < 0.0f ){
724 for( ii=0 ; ii < nn ; ii++ ) uvec[ii] = -uvec[ii] ;
725 }
726 for( sum=0.0f,ii=0 ; ii < nn ; ii++ ) sum += tvec[ii]*vvec[ii] ;
727 if( sum < 0.0f ){
728 for( ii=0 ; ii < nn ; ii++ ) vvec[ii] = -vvec[ii] ;
729 }
730 }
731
732 /** free at last!!! **/
733
734 if( wws != ws ) free(wws) ;
735 if( xtyp <= 0 ) free(xar) ;
736
737 svout.a = sqrtf(svout.a) ; svout.b = sqrtf(svout.b) ; RETURN (svout) ;
738 }
739
740 /*---------------------------------------------------------------------------*/
741 /* Same as symeig_sim1() but converge to top TWO eigenvectors. */
742
symeig_sim2(int nn,float * asym,float * vec,float * wec,float * ws,unsigned short xran[])743 static float_pair symeig_sim2( int nn, float *asym, float *vec,
744 float *wec, float *ws , unsigned short xran[] )
745 {
746 float *u1,*u2,*u3 , *v1,*v2,*v3 , *aj ;
747 float sum1,sum2,sum3 , q1,q2,q3 , r1,r2,r3 , s1,s2,s3 ;
748 int ii , jj , nite=0 , n=nn ;
749 double bb[9] , ev[3] , evold1=0.0 , evold2=0.0 ;
750 double g11,g12,g13,g22,g23,g33 ;
751 double h11,h12,h13,h22,h23,h33 ;
752 float_pair evout = {-666.0f,-666.0f} ;
753
754 if( n < 1 || asym == NULL || vec == NULL || wec == NULL ) return (evout) ;
755
756 /* special cases: 1x1 and 2x2 and 3x3 matrices */
757
758 if( n == 1 ){
759 vec[0] = 1.0f ; wec[0] = 0.0f ;
760 evout.a = asym[0] ; evout.b = 0.0f ; return (evout) ;
761 } else if( n == 2 ){
762 for( ii=0 ; ii < 4 ; ii++ ) bb[ii] = asym[ii] ;
763 symeig_2D( bb , ev , 1 ) ;
764 vec[0] = bb[0] ; vec[1] = bb[1] ;
765 wec[0] = bb[2] ; wec[1] = bb[3] ;
766 evout.a = (float)ev[0] ; evout.b = (float)ev[1] ; return (evout) ;
767 } else if( n == 3 ){
768 for( ii=0 ; ii < 9 ; ii++ ) bb[ii] = asym[ii] ;
769 symeig_3D( bb , ev , 1 ) ;
770 vec[0] = bb[0] ; vec[1] = bb[1] ; vec[2] = bb[2] ;
771 wec[0] = bb[3] ; wec[1] = bb[4] ; wec[2] = bb[5] ;
772 evout.a = (float)ev[0] ; evout.b = (float)ev[1] ; return (evout) ;
773 }
774
775 /* allocate iteration vectors */
776
777 u1 = ws ; u2 = u1+n ; u3 = u2+n ; v1 = u3+n ; v2 = v1+n ; v3 = v2+n ;
778
779 /* initialize u1 vector from input vec */
780
781 for( sum1=0.0f,ii=0 ; ii < n ; ii++ ){
782 u1[ii] = vec[ii] ; sum1 += u1[ii]*u1[ii] ;
783 }
784 if( sum1 == 0.0f ){ /* backup if input vector is all zero */
785 for( ii=0 ; ii < n ; ii++ ){
786 u1[ii] = erand48(xran)-0.3 ; sum1 += u1[ii]*u1[ii] ;
787 }
788 }
789 sum1 = 1.0f / sqrtf(sum1) ;
790 for( ii=0 ; ii < n ; ii++ ) u1[ii] *= sum1 ;
791
792 /* initialize u2, u3 by flipping signs in u1 */
793
794 sum1 = 0.02468f / n ;
795 for( ii=0 ; ii < n ; ii++ ){
796 jj = (int)jrand48(xran) ;
797 sum2 = sum1 * ((jj >> 1)%4 - 1.5f) ;
798 sum3 = sum1 * ((jj >> 5)%4 - 1.5f) ;
799 u2[ii] = (((jj >> 3)%2 == 0) ? u1[ii] : -u1[ii]) + sum2 ;
800 u3[ii] = (((jj >> 7)%2 == 0) ? u1[ii] : -u1[ii]) + sum3 ;
801 }
802
803 /*----- iteration loop -----*/
804
805 while(1){
806 /* form V = A U */
807
808 for( ii=0 ; ii < n ; ii++ ){ v1[ii] = v2[ii] = v3[ii] = 0.0f ; }
809 for( jj=0 ; jj < n ; jj++ ){
810 aj = asym + jj*n ; /* ptr to jj-th column of asym */
811 sum1 = u1[jj] ; sum2 = u2[jj] ; sum3 = u3[jj] ;
812 for( ii=0 ; ii < n ; ii++ ){
813 v1[ii] += aj[ii] * sum1 ;
814 v2[ii] += aj[ii] * sum2 ;
815 v3[ii] += aj[ii] * sum3 ;
816 }
817 }
818
819 /* form G = U' U and H = U' V */
820
821 g11 = g12 = g22 = g13 = g23 = g33 = 0.0 ;
822 h11 = h12 = h22 = h13 = h23 = h33 = 0.0 ;
823 for( ii=0 ; ii < n ; ii++ ){
824 g11 += u1[ii] * u1[ii] ; g12 += u1[ii] * u2[ii] ;
825 g22 += u2[ii] * u2[ii] ; g13 += u1[ii] * u3[ii] ;
826 g23 += u2[ii] * u3[ii] ; g33 += u3[ii] * u3[ii] ;
827 h11 += u1[ii] * v1[ii] ; h12 += u1[ii] * v2[ii] ;
828 h22 += u2[ii] * v2[ii] ; h13 += u1[ii] * v3[ii] ;
829 h23 += u2[ii] * v3[ii] ; h33 += u3[ii] * v3[ii] ;
830 }
831
832 /* Choleski-ize G = L L' (in place) */
833
834 g11 = (g11 <= 0.0) ? DEPSQ : sqrt(g11) ;
835 g12 = g12 / g11 ;
836 g22 = g22 - g12*g12 ; g22 = (g22 <= 0.0) ? DEPSQ : sqrt(g22) ;
837 g13 = g13 / g11 ;
838 g23 = (g23 - g12*g13) / g22 ;
839 g33 = g33 - g13*g13 * g23*g23 ; g33 = (g33 <= 0.0) ? DEPSQ : sqrt(g33) ;
840
841 /* invert lower triangular L (in place) */
842
843 g13 = ( g12*g23 - g22*g13 ) / (g11*g22*g33) ;
844 g11 = 1.0 / g11 ; g22 = 1.0 / g22 ; g33 = 1.0 / g33 ;
845 g23 = -g23 * g33 * g22 ;
846 g12 = -g12 * g11 * g22 ;
847
848 /* form B = inv[L] H inv[L]' (code from Maple 9.5) */
849
850 { double t1, t3, t5, t13, t18, t34, t42, t47 ;
851 t1 = g11 * g11 ; t3 = g11 * h11 ; t5 = g11 * h12 ;
852 t13 = g12 * g12 ; t18 = g22 * g22 ; t34 = g13 * g13 ;
853 t42 = g23 * g23 ; t47 = g33 * g33 ;
854 bb[0] = t1 * h11 ;
855 bb[4] = t13 * h11 + 2.0 * g12 * g22 * h12 + t18 * h22 ;
856 bb[8] = t34 * h11 + 2.0 * g13 * g23 * h12 + 2.0 * g13 * g33 * h13
857 + t42 * h22 + 2.0 * g23 * g33 * h23 + t47 * h33 ;
858 bb[1] = bb[3] = t3 * g12 + t5 * g22 ;
859 bb[2] = bb[6] = t3 * g13 + t5 * g23 + g11 * h13 * g33 ;
860 bb[5] = bb[7] = g13 * g12 * h11 + g13 * g22 * h12 + g23 * g12 * h12
861 + g23 * g22 * h22 + g33 * g12 * h13 + g33 * g22 * h23 ;
862 }
863
864 /* eigensolve B P = P D */
865
866 symeig_3D( bb , ev , 1 ) ; /* P is in bb, D is in ev */
867
868 #if 0
869 fprintf(stderr,"nite=%d ev=%.9g %.9g %.9g\n",nite,ev[0],ev[1],ev[2]) ;
870 fprintf(stderr," bb=%.5g %.5g %.5g\n"
871 " %.5g %.5g %.5g\n"
872 " %.5g %.5g %.5g\n",
873 bb[0],bb[1],bb[2],bb[3],bb[4],bb[5],bb[6],bb[7],bb[8]) ;
874 #endif
875
876 /* form Q = inv[L]' P */
877
878 q1 = g11*bb[0] + g12*bb[1] + g13*bb[2] ;
879 q2 = g22*bb[1] + g23*bb[2] ;
880 q3 = g33*bb[2] ;
881
882 r1 = g11*bb[3] + g12*bb[4] + g13*bb[5] ;
883 r2 = g22*bb[4] + g23*bb[5] ;
884 r3 = g33*bb[5] ;
885
886 s1 = g11*bb[6] + g12*bb[7] + g13*bb[8] ;
887 s2 = g22*bb[7] + g23*bb[8] ;
888 s3 = g33*bb[8] ;
889
890 /* form U = V Q and normalize it */
891
892 for( sum1=sum2=sum3=0.0f,ii=0 ; ii < n ; ii++ ){
893 u1[ii] = q1 * v1[ii] + q2 * v2[ii] + q3 * v3[ii]; sum1 += u1[ii]*u1[ii];
894 u2[ii] = r1 * v1[ii] + r2 * v2[ii] + r3 * v3[ii]; sum2 += u2[ii]*u2[ii];
895 u3[ii] = s1 * v1[ii] + s2 * v2[ii] + s3 * v3[ii]; sum3 += u3[ii]*u3[ii];
896 }
897 sum1 = 1.0f/sqrtf(sum1); sum2 = 1.0f/sqrtf(sum2); sum3 = 1.0f/sqrtf(sum3);
898 for( ii=0 ; ii < n ; ii++ ){ u1[ii] *= sum1; u2[ii] *= sum2; u3[ii] *= sum3; }
899
900 /* check first two eigenvalues in D for convergence */
901
902 if( nite > 266 ||
903 ( nite > 0 &&
904 fabs(ev[0]-evold1) <= FEPS*(fabs(evold1)+FEPS) &&
905 fabs(ev[1]-evold2) <= FEPS*(fabs(evold2)+FEPS) ) ) break ;
906
907 /* not done yet ==> iterate */
908
909 nite++ ; evold1 = ev[0] ; evold2 = ev[1] ;
910
911 } /*----- end of iteration loop -----*/
912
913 /* result vectors are u1, u2 */
914
915 for( ii=0 ; ii < n ; ii++ ){ vec[ii] = u1[ii] ; wec[ii] = u2[ii] ; }
916
917 evout.a = (float)ev[0] ; evout.b = (float)ev[1] ; return (evout) ;
918 }
919
920 /*---------------------------------------------------------------------------*/
921
922 #undef SQR
923 #define SQR(a) ((a)*(a))
924
925 #undef DET3
926 #define DET3(m) ( m[0]*m[4]*m[8]-m[0]*m[7]*m[5]-m[1]*m[3]*m[8] \
927 +m[1]*m[6]*m[5]+m[2]*m[3]*m[7]-m[2]*m[6]*m[4] )
928
929 #undef PI
930 #define PI 3.14159265358979323846
931
932 #undef SWAP
933 #define SWAP(x,y) (th=(x),(x)=(y),(y)=th)
934
935 #undef CSWAP /* swap columns in a[] starting at indexes i and j */
936 #define CSWAP(i,j) (SWAP(a[i],a[j]),SWAP(a[i+1],a[j+1]),SWAP(a[i+2],a[j+2]))
937
938 /*----------------------------------------------------------------------------*/
939 /*! Do a 3x3 symmetric eigen-problem as fast as possible, using cubic formula.
940 - INPUT: double a[9] = input matrix; a[i+3*j] = matrix (i,j) element
941 (actually, only the 0,1,2,4,5,8 elements of a[] are used).
942 - OUTPUT: e[i] = i'th eigenvalue, with e[0] >= e[1] >= e[2].
943 - OUTPUT: if(dovec) then a[] is replaced with eigenvectors,
944 and this orthogonal matrix will have determinant=1.
945 Vector #0 is in a[0..2] , #1 in a[3..5], #3 in a[6..8].
946 *//*--------------------------------------------------------------------------*/
947
symeig_3D(double * a,double * e,int dovec)948 static void symeig_3D( double *a , double *e , int dovec )
949 {
950 double aa,bb,cc,dd,ee,ff ;
951 double a1,a2,a3 , qq,rr, qs,th , lam1,lam2,lam3 ;
952 double aba,abb,abc,abd,abe,abf , ann ;
953 double d12,d13,d23 ;
954 double u1,u2,u3 , v1,v2,v3 , w1,w2,w3 , t1,t2,t3 , tn ;
955 double anni ;
956
957 if( a == NULL || e == NULL ) return ;
958
959 /*----- unload matrix into local variables -----*/
960
961 aa = a[0] ; bb = a[1] ; cc = a[2] ; /* matrix is [ aa bb cc ] */
962 dd = a[4] ; ee = a[5] ; ff = a[8] ; /* [ bb dd ee ] */
963 /* [ cc ee ff ] */
964 aba = fabs(aa) ; abb = fabs(bb) ; abc = fabs(cc) ;
965 abd = fabs(dd) ; abe = fabs(ee) ; abf = fabs(ff) ;
966 ann = aba+abb+abc+abd+abe+abf ; /* matrix 'norm' */
967
968 if( ann == 0.0 ){ /* matrix is all zero! */
969 e[0] = e[1] = e[2] = 0.0 ;
970 if( dovec ){
971 a[0] = a[4] = a[8] = 1.0 ;
972 a[1] = a[2] = a[3] = a[5] = a[6] = a[7] = 0.0 ;
973 }
974 return ;
975 }
976
977 /*----- check for matrix that is essentially diagonal -----*/
978
979 if( abb+abc+abe == 0.0 ||
980 ( DEPS*aba > (abb+abc) && DEPS*abd > (abb+abe) && DEPS*abf > (abc+abe) ) ){
981
982 lam1 = aa ; lam2 = dd ; lam3 = ff ;
983
984 if( dovec ){
985 a[0] = a[4] = a[8] = 1.0 ;
986 a[1] = a[2] = a[3] = a[5] = a[6] = a[7] = 0.0 ;
987
988 if( lam1 < lam2 ){ SWAP(lam1,lam2) ; CSWAP(0,3) ; }
989 if( lam1 < lam3 ){ SWAP(lam1,lam3) ; CSWAP(0,6) ; }
990 if( lam2 < lam3 ){ SWAP(lam2,lam3) ; CSWAP(3,6) ; }
991 if( DET3(a) < 0.0 ){ a[6] = -a[6]; a[7] = -a[7]; a[8] = -a[8]; }
992 } else {
993 if( lam1 < lam2 ) SWAP(lam1,lam2) ;
994 if( lam1 < lam3 ) SWAP(lam1,lam3) ;
995 if( lam2 < lam3 ) SWAP(lam2,lam3) ;
996 }
997 e[0] = lam1 ; e[1] = lam2 ; e[2] = lam3 ;
998 return ;
999 }
1000
1001 /*-- Scale matrix so abs sum is 1; unscale e[i] on output [26 Oct 2005] --*/
1002
1003 anni = 1.0 / ann ; /* ann != 0, from above */
1004 aa *= anni ; bb *= anni ; cc *= anni ;
1005 dd *= anni ; ee *= anni ; ff *= anni ;
1006
1007 /*----- not diagonal ==> must solve cubic polynomial for eigenvalues -----*/
1008 /* the cubic polynomial is x**3 + a1*x**2 + a2*x + a3 = 0 */
1009
1010 a1 = -(aa+dd+ff) ;
1011 a2 = (aa*ff+aa*dd+dd*ff - bb*bb-cc*cc-ee*ee) ;
1012 a3 = ( aa*(ee*ee-dd*ff) + bb*(bb*ff-cc*ee) + cc*(cc*dd-bb*ee) ) ;
1013
1014 /*-- Rewrite classical formula for qq as a sum of squares [26 Oct 2005] --*/
1015 #if 0
1016 qq = (a1*a1 - 3.0*a2) / 9.0 ;
1017 #else
1018 qq = ( 0.5 * ( SQR(dd-aa) + SQR(ff-aa) + SQR(ff-dd) )
1019 + 3.0 * ( bb*bb + cc*cc + ee*ee ) ) / 9.0 ;
1020 #endif
1021 rr = (2.0*a1*a1*a1 - 9.0*a1*a2 + 27.0*a3) / 54.0 ;
1022
1023 if( qq <= 0.0 ){ /*** This should never happen!!! ***/
1024 #ifndef USE_OMP
1025 {
1026 static int nerr=0 ;
1027 if( ++nerr < 4 )
1028 fprintf(stderr,"** ERROR in symeig_3D: discrim=%g numer=%g\n",qq,rr) ;
1029 }
1030 #endif
1031 qs = qq = rr = 0.0 ;
1032 } else {
1033 qs = sqrt(qq) ; rr = rr / (qs*qq) ;
1034 if( rr < -1.0 ) rr = -1.0 ; else if( rr > 1.0 ) rr = 1.0 ;
1035 }
1036 th = acos(rr) ;
1037
1038 lam1 = -2.0 * qs * cos( th /3.0 ) - a1 / 3.0 ;
1039 lam2 = -2.0 * qs * cos( (th+2.0*PI)/3.0 ) - a1 / 3.0 ;
1040 lam3 = -2.0 * qs * cos( (th+4.0*PI)/3.0 ) - a1 / 3.0 ;
1041
1042 /*-- if not doing eigenvectors, just sort the eigenvalues to be done --*/
1043
1044 if( !dovec ){
1045 if( lam1 < lam2 ) SWAP(lam1,lam2) ;
1046 if( lam1 < lam3 ) SWAP(lam1,lam3) ;
1047 if( lam2 < lam3 ) SWAP(lam2,lam3) ;
1048 e[0] = ann*lam1 ; e[1] = ann*lam2 ; e[2] = ann*lam3 ;
1049 return ;
1050 }
1051
1052 /*-- are doing eigenvectors; must do double root as a special case --*/
1053
1054 #undef CROSS /* cross product (x1,x2,x3) X (y1,y2,y3) -> (z1,z2,z3) */
1055 #define CROSS(x1,x2,x3,y1,y2,y3,z1,z2,z3) \
1056 ( (z1)=(x2)*(y3)-(x3)*(y2), (z2)=(x3)*(y1)-(x1)*(y3), (z3)=(x1)*(y2)-(x2)*(y1) )
1057
1058 d12 = fabs(lam1-lam2) ; d13 = fabs(lam1-lam3) ; d23 = fabs(lam2-lam3) ;
1059 rr = MIN(d12,d13) ; rr = MIN(rr,d23) ;
1060
1061 if( rr > DEPS*ann ){ /*---- not a double root ----*/
1062
1063 if( lam1 < lam2 ) SWAP(lam1,lam2) ; /* start by sorting eigenvalues */
1064 if( lam1 < lam3 ) SWAP(lam1,lam3) ;
1065 if( lam2 < lam3 ) SWAP(lam2,lam3) ;
1066 e[0] = ann*lam1 ; e[1] = ann*lam2 ; e[2] = ann*lam3 ;
1067
1068 /* find eigenvector for lam1 by computing Ay-lam1*y for
1069 vectors y1=[1,0,0], y2=[0,1,0], and y3=[0,0,1]; the eigenvector
1070 is orthogonal to all of these, so use the cross product to get it */
1071
1072 u1 = aa-lam1 ; u2 = bb ; u3 = cc ; /* A*y1 - lam1*y1 */
1073 v1 = bb ; v2 = dd-lam1 ; v3 = ee ; /* A*y2 - lam1*y2 */
1074 CROSS(u1,u2,u3 , v1,v2,v3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1075 if( tn < DEPSQ*ann ){ /* u and v were parallel? */
1076 w1 = cc ; w2 = ee ; w3 = ff-lam1 ; /* A*y3 - lam1*y3 */
1077 CROSS(u1,u2,u3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1078 if( tn < DEPSQ*ann ){ /* u and w were parallel? */
1079 CROSS(v1,v2,v3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1080 }
1081 }
1082 a[0] = t1/tn ; a[1] = t2/tn ; a[2] = t3/tn ; /* normalize */
1083
1084 /* do same for lam2 */
1085
1086 u1 = aa-lam2 ; u2 = bb ; u3 = cc ;
1087 v1 = bb ; v2 = dd-lam2 ; v3 = ee ;
1088 CROSS(u1,u2,u3 , v1,v2,v3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1089 if( tn < DEPSQ*ann ){
1090 w1 = cc ; w2 = ee ; w3 = ff-lam2 ;
1091 CROSS(u1,u2,u3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1092 if( tn < DEPSQ*ann ){
1093 CROSS(v1,v2,v3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1094 }
1095 }
1096 a[3] = t1/tn ; a[4] = t2/tn ; a[5] = t3/tn ;
1097
1098 /* orthgonality of eigenvectors ==> can get last one by cross product */
1099
1100 #if 1
1101 CROSS( a[0],a[1],a[2] , a[3],a[4],a[5] , a[6],a[7],a[8] ) ;
1102 #else
1103 u1 = aa-lam3 ; u2 = bb ; u3 = cc ;
1104 v1 = bb ; v2 = dd-lam3 ; v3 = ee ;
1105 CROSS(u1,u2,u3 , v1,v2,v3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1106 if( tn < DEPSQ*ann ){
1107 w1 = cc ; w2 = ee ; w3 = ff-lam3 ;
1108 CROSS(u1,u2,u3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1109 if( tn < DEPSQ*ann ){
1110 CROSS(v1,v2,v3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1111 }
1112 }
1113 a[6] = t1/tn ; a[7] = t2/tn ; a[8] = t3/tn ;
1114 #endif
1115
1116 return ;
1117
1118 } else { /*---- if here, we have a double root ----*/
1119
1120 /* make sure that we have lam1=lam2 and lam3 is the outlier */
1121
1122 if( d13 < d12 && d13 < d23 ) SWAP(lam2,lam3) ;
1123 else if( d23 < d12 && d23 < d13 ) SWAP(lam1,lam3) ;
1124 lam1 = lam2 = 0.5*(lam1+lam2) ;
1125
1126 /* compute eigenvector for lam3 using method as above */
1127
1128 u1 = aa-lam3 ; u2 = bb ; u3 = cc ;
1129 v1 = bb ; v2 = dd-lam3 ; v3 = ee ;
1130 CROSS(u1,u2,u3 , v1,v2,v3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1131 if( tn < DEPSQ*ann ){
1132 w1 = cc ; w2 = ee ; w3 = ff-lam3 ;
1133 CROSS(u1,u2,u3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1134 if( tn < DEPSQ*ann ){
1135 CROSS(v1,v2,v3 , w1,w2,w3 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1136 }
1137 }
1138 w1 = a[6] = t1/tn ; w2 = a[7] = t2/tn ; w3 = a[8] = t3/tn ;
1139
1140 /* find a vector orthogonal to it */
1141
1142 CROSS(w1,w2,w3 , 1,0,0 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1143 if( tn < DEPSQ ){
1144 CROSS(w1,w2,w3 , 0,1,0 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1145 if( tn < DEPSQ ){
1146 CROSS(w1,w2,w3 , 0,0,1 , t1,t2,t3 ) ; tn = sqrt(t1*t1+t2*t2+t3*t3) ;
1147 }
1148 }
1149 a[0] = t1/tn ; a[1] = t2/tn ; a[2] = t3/tn ;
1150
1151 /* and the final vector is the cross product of these two */
1152
1153 CROSS( w1,w2,w3 , a[0],a[1],a[2] , a[3],a[4],a[5] ) ;
1154
1155 /* sort results (we know lam1==lam2) */
1156
1157 if( lam1 < lam3 ){
1158 SWAP(lam1,lam3) ; CSWAP(0,6) ;
1159 if( DET3(a) < 0.0 ){ a[6] = -a[6]; a[7] = -a[7]; a[8] = -a[8]; }
1160 }
1161
1162 e[0] = ann*lam1 ; e[1] = ann*lam2 ; e[2] = ann*lam3 ;
1163 return ;
1164 }
1165 }
1166
1167 /*---------------------------------------------------------------------------*/
1168 /*! 2x2 symmetric eigenvalue/vector problem, like symeig_3D() above. */
1169
symeig_2D(double * a,double * e,int dovec)1170 static void symeig_2D( double *a , double *e , int dovec )
1171 {
1172 double sxx,sxy,syy , lam1,lam2 , ss,tt , x,y ;
1173
1174 if( a == NULL || e == NULL ) return ;
1175
1176 /*----- unload matrix into local variables -----*/
1177
1178 sxx = a[0] ; sxy = a[1] ; syy = a[3] ;
1179
1180 ss = fabs(sxx) ; tt = fabs(syy) ; if( ss > tt ) ss = tt ;
1181
1182 if( fabs(sxy) < DEPS*ss ){ /*--- essentially a diagonal matrix ---*/
1183 if( sxx >= syy ){
1184 lam1 = sxx ; lam2 = syy ;
1185 if( dovec ){ a[0]=a[3]=1.0; a[1]=a[2]=0.0; }
1186 } else {
1187 lam1 = syy ; lam2 = sxx ;
1188 if( dovec ){ a[0]=a[3]=1.0 ; a[1]=a[2]=1.0; }
1189 }
1190 e[0] = lam1 ; e[1] = lam2 ;
1191 return ;
1192 }
1193
1194 /*--- non-diagonal matrix ==> solve quadratic equation for eigenvalues ---*/
1195
1196 ss = sqrt( (sxx-syy)*(sxx-syy) + 4.0*sxy*sxy ) ; /* positive */
1197 lam1 = 0.5 * ( sxx + syy + ss ) ; /* larger */
1198 lam2 = 0.5 * ( sxx + syy - ss ) ; /* smaller */
1199
1200 if( dovec ){
1201 x = 2.0*sxy ; y = syy - sxx + ss ; tt = sqrt(x*x+y*y) ;
1202 a[0] = x/tt ; a[1] = y/tt ;
1203
1204 y = syy - sxx - ss ; tt = sqrt(x*x+y*y) ;
1205 a[2] = x/tt ; a[3] = y/tt ;
1206 }
1207 e[0] = lam1 ; e[1] = lam2 ;
1208 return ;
1209 }
1210
1211 /******************************************************************************/
1212 #endif /* _CS_PV_ALREADY_INCLUDED_ */
1213