1 /*****************************************************************************
2 Major portions of this software are copyrighted by the Medical College
3 of Wisconsin, 1994-2000, and are released under the Gnu General Public
4 License, Version 2. See the file README.Copyright for details.
5 ******************************************************************************/
6
7 /*!
8 This file contains matrix and vector arithmetic routines.
9
10 File: matrix.c
11 Author: B. Douglas Ward
12 Date: 23 April 1997
13
14 Mod: Changed print format for functions matrix_print and vector_print.
15 Date: 05 August 1997
16
17 Mod: Changed initialization in function vector_create.
18 Date: 04 November 1997
19
20 Mod: Added routines matrix_file_write and matrix_file_read.
21 Date: 02 July 1999
22
23 Mod: Added routine for calculating square root of matrix.
24 Date: 30 September 1999
25
26 Mod: Added routines matrix_sprint and vector_sprint.
27 Date: 04 October 1999
28
29 Mod: Modified matrix_file_read to use mri_read_ascii routine.
30 Date: 12 January 2000
31
32 Mod: Changed return type of vector_dot from float to double.
33 Date: 13 April 2000
34
35 Mod: Added functions column_to_vector and matrix_extract_rows.
36 Date: 21 April 2000
37
38 Mod: Added test for missing matrix file name.
39 Date: 08 May 2000
40
41 Mod: Added "register" declarations and a few other things to speed
42 up calculations (including vector_create_noinit) -- RWCox.
43 Date: 28 Dec 2001
44
45 Mod: Allow matrices and vectors with zero rows and columns.
46 Date: 26 February 2002
47
48 Mod: Corrected errors in vector_multiply and vector_multiply_subtract
49 routines that would produce segmentation fault for certain input
50 matrices.
51 Date: 18 March 2003
52
53 Mod: UNROLL_VECMUL defined to allow unrolling by 2 of vector-multiply
54 dot product loops.
55
56 Mod: 'ipr' added to matrix_print() function.
57 Date: 03 Aug 2004 - RWCox
58
59 Mod: Added USE_SCSLBLAS stuff for SGI Altix, and USE_SUNPERF for Solaris.
60 Date: 01 Mar 2005
61
62 Mod: Freed memory for an orphaned matrix in matrix_sqrt function
63 Date: 26 Mar 2008 - drg
64
65 */
66
67 /*---------------------------------------------------------------------*/
68 /** Vectorization macros:
69 - DOTP(n,x,y,z) computes the n-long dot product of vectors
70 x and y and puts the result into the place pointed to by z.
71 - VSUB(n,x,y,z) computes vector x-y into vector z.
72 - These are intended to be the fast method for doing these things. **/
73 /*---------------------------------------------------------------------*/
74
75 #include "mrilib.h"
76 #include "matrix.h"
77
78 #undef SETUP_BLAS1 /* define this to use BLAS-1 functions */
79 #undef SETUP_BLAS2 /* define this to use BLAS-2 functions */
80 #undef DOTP
81 #undef VSUB
82 #undef MATVEC
83 #undef SUBMATVEC
84
85 #if defined(USE_SCSLBLAS) /** SGI Altix **/
86 # include <scsl_blas.h>
87 # define SETUP_BLAS1
88 # undef SETUP_BLAS2 /* don't use this */
89 # define TRANSA "T"
90 #elif defined(USE_SUNPERF) /** Sun Solaris **/
91 # include <sunperf.h>
92 # define SETUP_BLAS1
93 # undef SETUP_BLAS2
94 # define TRANSA 'T'
95 #elif defined(USE_ACML)
96 # ifndef _ACML_COMPLEX
97 # define _ACML_COMPLEX
98 /* typedef struct { float real, imag; } complex; */
99 /* typedef struct { double real, imag; } doublecomplex; */
100 # endif
101 # include <acml.h>
102 # define SETUP_BLAS1
103 #endif
104
105 /* double precision BLAS-1 functions */
106
107 #ifdef SETUP_BLAS1
108 # define DOTP(n,x,y,z) *(z)=ddot(n,x,1,y,1)
109 # define VSUB(n,x,y,z) (memcpy(z,x,sizeof(double)*n),daxpy(n,-1.0,y,1,z,1))
110 #endif
111
112 /*.......................................................................
113 BLAS-2 function operate on matrix-vector structs defined in matrix.h:
114
115 MATVEC(m,v,z): [z] = [m][v] where m=matrix, z,v = matrices
116 SUBMATVEC(m,v,z): [z] = [z] - [m][v]
117 .........................................................................*/
118
119 #ifdef DONT_USE_MATRIX_MAT
120 # undef SETUP_BLAS2
121 #endif
122
123 #ifdef SETUP_BLAS2 /* doesn't seem to help much */
124
125 # define MATVEC(m,v,z) dgemv( TRANSA , (m).cols , (m).rows , \
126 1.0 , (m).mat , (m).cols , \
127 (v).elts , 1 , 0.0 , (z).elts , 1 )
128
129 # define SUBMATVEC(m,v,z) dgemv( TRANSA , (m).cols , (m).rows , \
130 -1.0 , (m).mat , (m).cols , \
131 (v).elts , 1 , 1.0 , (z).elts , 1 )
132 #endif
133
134 /*---------------------------------------------------------------------------*/
135 static double flops=0.0 ;
get_matrix_flops(void)136 double get_matrix_flops(void){ return flops; }
137
138 static double dotnum=0.0 , dotsum=0.0 ;
get_matrix_dotlen(void)139 double get_matrix_dotlen(void){ return (dotnum > 0.0) ? dotsum/dotnum : 0.0 ; }
140
141 #ifndef USE_OMP
142 # define ENABLE_FLOPS
143 #else
144 # undef ENABLE_FLOPS
145 #endif
146
147 /*---------------------------------------------------------------------------*/
148 /*!
149 Routine to print and error message and stop.
150 */
151
matrix_error(char * message)152 void matrix_error (char *message)
153 {
154 printf ("Matrix error: %s \n", message);
155 EXIT (1);
156 }
157
158
159 /*---------------------------------------------------------------------------*/
160 /*!
161 Initialize matrix data structure.
162 */
163
matrix_initialize(matrix * m)164 void matrix_initialize (matrix *m)
165 {
166 m->rows = 0;
167 m->cols = 0;
168 m->elts = NULL;
169 #ifndef DONT_USE_MATRIX_MAT
170 m->mat = NULL ; /* 04 Mar 2005 */
171 #endif
172 }
173
174
175 /*---------------------------------------------------------------------------*/
176 /*!
177 Destroy matrix data structure by deallocating memory.
178 */
179
matrix_destroy(matrix * m)180 void matrix_destroy (matrix *m)
181 {
182 if (m->elts != NULL){
183 #ifdef DONT_USE_MATRIX_MAT
184 int i ;
185 for( i=0 ; i < m->rows ; i++ )
186 if( m->elts[i] != NULL ) free(m->elts[i]) ;
187 #endif
188 free(m->elts) ;
189 }
190 #ifndef DONT_USE_MATRIX_MAT
191 if( m->mat != NULL) free (m->mat ) ;
192 #endif
193 matrix_initialize (m);
194 }
195
196 /*---------------------------------------------------------------------------*/
197 /*!
198 Create matrix data structure by allocating memory and initializing values.
199
200 */
201
matrix_create(int rows,int cols,matrix * m)202 void matrix_create (int rows, int cols, matrix *m)
203 {
204 register int i ;
205
206 matrix_destroy (m);
207
208 if ((rows < 0) || (cols < 0))
209 matrix_error ("Illegal dimensions for new matrix");
210
211 m->rows = rows;
212 m->cols = cols;
213 if ((rows < 1) || (cols < 1)) return;
214
215 m->elts = (double **) malloc (sizeof(double *) * rows);
216 if (m->elts == NULL)
217 matrix_error ("Memory allocation error");
218
219 #ifdef DONT_USE_MATRIX_MAT
220 for (i = 0; i < rows; i++){
221 m->elts[i] = (double *) calloc (sizeof(double) , cols);
222 if (m->elts[i] == NULL) matrix_error ("Memory allocation error");
223 }
224 #else
225 m->mat = (double *) calloc( sizeof(double) , rows*cols ) ;
226 if( m->mat == NULL )
227 matrix_error ("Memory allocation error");
228 for (i = 0; i < rows; i++)
229 m->elts[i] = m->mat + (i*cols) ; /* 04 Mar 2005: offsets into mat */
230 #endif
231 }
232
233
234 /*---------------------------------------------------------------------------*/
235 /*!
236 Print contents of matrix m.
237 */
238
matrix_print(matrix m)239 void matrix_print (matrix m)
240 {
241 int i=0, j=0;
242 int rows, cols;
243 double val ;
244 int ipr ;
245
246 rows = m.rows; if( rows < 1 ) return ;
247 cols = m.cols; if( cols < 1 ) return ;
248
249 for( i=0 ; i < rows ; i++ ){
250 for( j=0 ; j < cols ; j++ ){
251 val = (int)m.elts[i][j] ;
252 if( val != m.elts[i][j] || fabs(val) > 99.0 ) goto zork ;
253 }
254 }
255 zork:
256 ipr = (i==rows && j==cols) ;
257
258 for (i = 0; i < rows; i++)
259 {
260 for (j = 0; j < cols; j++)
261 if( ipr ) printf (" %3d" , (int)m.elts[i][j]);
262 else printf (" %10.4g", m.elts[i][j]);
263 printf (" \n");
264 }
265 printf (" \n"); fflush(stdout) ;
266 }
267
268
269 /*---------------------------------------------------------------------------*/
270 /*!
271 Print label and contents of matrix m.
272 */
273
matrix_sprint(char * s,matrix m)274 void matrix_sprint (char * s, matrix m)
275 {
276 printf ("%s \n", s);
277 matrix_print (m);
278 }
279
280
281 /*---------------------------------------------------------------------------*/
282 /*!
283 Print contents of matrix m to specified file.
284 */
285
matrix_file_write(char * filename,matrix m)286 void matrix_file_write (char * filename, matrix m)
287 {
288 int i, j;
289 int rows, cols;
290 FILE * outfile = NULL;
291
292
293 /*----- First, check for empty file name -----*/
294 if (filename == NULL) matrix_error ("Missing matrix file name");
295
296
297 outfile = fopen (filename, "w");
298
299 rows = m.rows;
300 cols = m.cols;
301
302 for (i = 0; i < rows; i++)
303 {
304 for (j = 0; j < cols; j++)
305 fprintf (outfile, " %g", m.elts[i][j]);
306 fprintf (outfile, " \n");
307 }
308 fprintf (outfile, " \n");
309
310 fclose (outfile);
311 }
312
313 /*---------------------------------------------------------------------------*/
314 /*!
315 Manual entry of matrix data.
316 */
317
matrix_enter(matrix * m)318 void matrix_enter (matrix * m)
319 {
320 int rows, cols;
321 int i, j;
322 float fval;
323
324 printf ("Enter number of rows: "); fflush(stdout) ;
325 scanf ("%d", &rows);
326 printf ("Enter number of cols: "); fflush(stdout) ;
327 scanf ("%d", &cols);
328
329 matrix_create (rows, cols, m);
330
331 for (i = 0; i < rows; i++)
332 for (j = 0; j < cols; j++)
333 {
334 printf ("elts[%d][%d] = ", i, j); fflush(stdout);
335 scanf ("%f", &fval);
336 m->elts[i][j] = fval;
337 }
338 }
339
340
341 /*---------------------------------------------------------------------------*/
342 /*!
343 Read contents of matrix m from specified file.
344 If unable to read matrix from file, or matrix has wrong dimensions:
345 If error_exit flag is set, then print error message and exit.
346 Otherwise, return null matrix.
347 */
348
matrix_file_read(char * filename,int rows,int cols,matrix * m,int error_exit)349 void matrix_file_read (char * filename, int rows, int cols, matrix * m,
350 int error_exit)
351 {
352 int i, j;
353
354 MRI_IMAGE * flim; /* pointer to image structure
355 -- used to read ASCII file */
356 float * far; /* pointer to MRI_IMAGE floating point data */
357 char message [80]; /* error message */
358
359
360 /*----- First, check for empty file name -----*/
361 if (filename == NULL) matrix_error ("Missing matrix file name");
362
363
364 /*----- Read the matrix file -----*/
365 flim = mri_read_1D(filename);
366 if (flim == NULL) {
367 if (error_exit)
368 {
369 sprintf (message, "Unable to read matrix from file: %s", filename);
370 matrix_error (message);
371 }
372 else
373 {
374 matrix_destroy (m);
375 return;
376 }
377 }
378
379
380 /*----- Set pointer to data -----*/
381 far = MRI_FLOAT_PTR(flim);
382
383
384 /*----- Test for correct dimensions -----*/
385 if ( (rows != flim->nx) || (cols != flim->ny) ) {
386 if (error_exit)
387 {
388 sprintf (message,
389 "In matrix file: %s Expected: %d x %d Actual: %d x %d",
390 filename, rows, cols, flim->nx, flim->ny);
391 matrix_error (message);
392 }
393 else
394 {
395 matrix_destroy (m);
396 return;
397 }
398 }
399
400
401 matrix_create (rows, cols, m);
402
403
404 /*----- Copy data from image structure to matrix structure -----*/
405 for (i = 0; i < rows; i++)
406 for (j = 0; j < cols; j++)
407 m->elts[i][j] = far[i + j*rows];
408
409
410 { mri_free (flim); flim = NULL; }
411
412 }
413
414
415 /*---------------------------------------------------------------------------*/
416 /*!
417 Convert simple array to matrix structure.
418 */
419
array_to_matrix(int rows,int cols,float ** f,matrix * m)420 void array_to_matrix (int rows, int cols, float ** f, matrix * m)
421 {
422 register int i, j;
423
424 matrix_create (rows, cols, m);
425
426 for (i = 0; i < rows; i++)
427 for (j = 0; j < cols; j++)
428 m->elts[i][j] = f[i][j];
429 }
430
431
432 /*---------------------------------------------------------------------------*/
433 /*!
434 Make a copy of the first matrix, return copy as the second matrix.
435 */
436
matrix_equate(matrix a,matrix * b)437 void matrix_equate (matrix a, matrix * b)
438 {
439 register int i;
440 register int rows, cols;
441
442 rows = a.rows;
443 cols = a.cols;
444
445 matrix_create (rows, cols, b);
446
447 for (i = 0; i < rows; i++){
448 #if 0
449 for (j = 0; j < cols; j++)
450 b->elts[i][j] = a.elts[i][j];
451 #else
452 if( cols > 0 )
453 memcpy( b->elts[i] , a.elts[i] , sizeof(double)*cols ) ; /* RWCox */
454 #endif
455 }
456 }
457
458
459 /*---------------------------------------------------------------------------*/
460 /*!
461 Extending a matrix with nradd rows and ncadd columns (zero-filled).
462 */
463
matrix_enlarge(int nradd,int ncadd,matrix * a)464 void matrix_enlarge( int nradd , int ncadd , matrix *a )
465 {
466 int i , rows, cols ;
467 matrix *b ;
468
469 if( ncadd < 0 ) ncadd = 0 ; /* no subtraction allowed */
470 if( nradd < 0 ) nradd = 0 ;
471
472 if( nradd == 0 && ncadd == 0 ) return ; /* nothing to do? */
473
474 rows = a->rows ; cols = a->cols ;
475
476 /* create bigger matrix with extra rows/columns */
477
478 b = (matrix *)malloc(sizeof(matrix)) ;
479 matrix_initialize( b ) ;
480 matrix_create( rows+nradd , cols+ncadd, b ) ; /* zero-filled */
481
482 /* copy row data from a into b */
483
484 if( cols > 0 ){
485 for( i=0 ; i < rows ; i++ ){
486 memcpy( b->elts[i] , a->elts[i] , sizeof(double)*cols ) ;
487 }
488 }
489
490 /* destroy contents of a, replace with contents of b */
491
492 matrix_destroy(a) ; *a = *b ; return ;
493 }
494
495
496 /*---------------------------------------------------------------------------*/
497 /*!
498 Extract p columns (specified by list) from matrix a. Result is matrix b.
499 */
500
matrix_extract(matrix a,int p,int * list,matrix * b)501 void matrix_extract (matrix a, int p, int *list, matrix * b)
502 {
503 register int i, j;
504 register int rows, cols;
505
506 rows = a.rows;
507 cols = p;
508
509 matrix_create (rows, cols, b);
510
511 for (i = 0; i < rows; i++)
512 for (j = 0; j < cols; j++)
513 b->elts[i][j] = a.elts[i][list[j]];
514 }
515
516
517 /*---------------------------------------------------------------------------*/
518 /*!
519 Extract p rows (specified by list) from matrix a. Result is matrix b.
520 */
521
matrix_extract_rows(matrix a,int p,int * list,matrix * b)522 void matrix_extract_rows (matrix a, int p, int * list, matrix * b)
523 {
524 register int i, j;
525 register int rows, cols;
526
527 rows = p;
528 cols = a.cols;
529
530 matrix_create (rows, cols, b);
531
532 for (i = 0; i < rows; i++)
533 for (j = 0; j < cols; j++)
534 b->elts[i][j] = a.elts[list[i]][j];
535 }
536
537 /*---------------------------------------------------------------------------*/
538 /*!
539 Add columns that are all 0 except for a single 1, as indicated. [16 Aug 2019]
540 For use in 3dDeconvolve experimentation [RWC].
541 */
542
matrix_augment_01_columns(matrix a,int nadd,int * addlist,matrix * b)543 void matrix_augment_01_columns( matrix a, int nadd, int *addlist, matrix *b )
544 {
545 int arows,brows , acols,bcols , i,j,k,aa ;
546
547 if( b == NULL ) return ; /* bad input */
548
549 if( nadd <= 0 || addlist == NULL ){ /* nothing to do but copy input */
550 matrix_equate(a,b) ; return ;
551 }
552
553 /* check addlist for bad entries */
554
555 for( aa=0 ; aa < nadd ; aa++ ){
556 if( addlist[aa] < 0 || addlist[aa] >= a.rows ){
557 fprintf(stderr,"** ERROR: bad index in matrix_augment_01_columns\n") ;
558 return ;
559 }
560 }
561
562 arows = brows = a.rows ;
563 acols = a.cols ;
564 bcols = acols + nadd ;
565
566 matrix_create( brows , bcols , b ) ;
567
568 /* copy original part */
569
570 for( i=0 ; i < brows ; i++ )
571 for( j=0 ; j < acols ; j++ ) b->elts[i][j] = a.elts[i][j] ;
572
573 /* add new cols */
574
575 for( aa=0 ; aa < nadd ; aa++ ){
576 k = addlist[aa] ; j = acols+aa ;
577 for( i=0 ; i < brows ; i++ )
578 b->elts[i][j] = (i==k) ? 1.0 : 0.0 ;
579 }
580
581 return ;
582 }
583
584 /*---------------------------------------------------------------------------*/
585 /*! Return value is number of rows deleted.
586 If this is zero, the output matrix *b is not created.
587 If the output matrix would have no rows
588 (return value == number of rows input), then *b is also not created.
589 *//*-------------------------------------------------------------------------*/
590
matrix_delete_allzero_rows(matrix a,matrix * b)591 int matrix_delete_allzero_rows( matrix a , matrix *b )
592 {
593 int ii , jj , rows , cols , np=0 , *lp ;
594
595 rows = a.rows ;
596 cols = a.cols ; if( rows < 1 || cols < 1 || b == NULL ) return 0 ;
597
598 /* make list of rows to keep */
599
600 lp = (int *)malloc(sizeof(int)*rows) ;
601 for( ii=0 ; ii < rows ; ii++ ){
602 for( jj=0 ; jj < cols && a.elts[ii][jj] == 0.0 ; jj++ ) ; /*nada*/
603 if( jj < cols ) lp[np++] = ii ;
604 }
605
606 if( np > 0 && np < rows )
607 matrix_extract_rows( a , np , lp , b ) ;
608
609 free(lp) ; return (rows-np) ;
610 }
611
612 /*---------------------------------------------------------------------------*/
613 /*!
614 Create n x n identity matrix.
615 */
616
matrix_identity(int n,matrix * m)617 void matrix_identity (int n, matrix * m)
618 {
619 register int i, j;
620
621 if (n < 0)
622 matrix_error ("Illegal dimensions for identity matrix");
623
624 matrix_create (n, n, m);
625
626 for (i = 0; i < n; i++)
627 for (j = 0; j < n; j++)
628 if (i == j)
629 m->elts[i][j] = 1.0;
630 else
631 m->elts[i][j] = 0.0;
632 }
633
634
635 /*---------------------------------------------------------------------------*/
636 /*!
637 Add matrix a to matrix b. Result is matrix c.
638 */
639
matrix_add(matrix a,matrix b,matrix * c)640 void matrix_add (matrix a, matrix b, matrix * c)
641 {
642 register int rows, cols;
643 register int i, j;
644
645 if ((a.rows != b.rows) || (a.cols != b.cols))
646 matrix_error ("Incompatible dimensions for matrix addition");
647
648 rows = a.rows;
649 cols = a.cols;
650
651 matrix_create (rows, cols, c);
652
653 for (i = 0; i < rows; i++)
654 for (j = 0; j < cols; j++)
655 c->elts[i][j] = a.elts[i][j] + b.elts[i][j];
656
657 #ifdef ENABLE_FLOPS
658 flops += rows*cols ;
659 #endif
660 }
661
662
663 /*---------------------------------------------------------------------------*/
664 /*!
665 Subtract matrix b from matrix a. Result is matrix c.
666 */
667
matrix_subtract(matrix a,matrix b,matrix * c)668 void matrix_subtract (matrix a, matrix b, matrix * c)
669 {
670 register int rows, cols;
671 register int i, j;
672
673 if ((a.rows != b.rows) || (a.cols != b.cols))
674 matrix_error ("Incompatible dimensions for matrix subtraction");
675
676 rows = a.rows;
677 cols = a.cols;
678
679 matrix_create (rows, cols, c);
680
681 for (i = 0; i < rows; i++)
682 for (j = 0; j < cols; j++)
683 c->elts[i][j] = a.elts[i][j] - b.elts[i][j];
684
685 #ifdef ENABLE_FLOPS
686 flops += rows*cols ;
687 #endif
688 }
689
690
691 /*---------------------------------------------------------------------------*/
692 /*!
693 Multiply matrix a by matrix b. Result is matrix c.
694 */
695
matrix_multiply(matrix a,matrix b,matrix * c)696 void matrix_multiply (matrix a, matrix b, matrix * c)
697 {
698 int rows, cols;
699 register int i, j, k;
700 register double sum ;
701
702 if (a.cols != b.rows)
703 matrix_error ("Incompatible dimensions for matrix multiplication");
704
705 rows = a.rows;
706 cols = b.cols;
707
708 matrix_create (rows, cols, c);
709
710 for (i = 0; i < rows; i++)
711 for (j = 0; j < cols; j++)
712 {
713 sum = 0.0 ;
714 for (k = 0; k < a.cols; k++)
715 sum += a.elts[i][k] * b.elts[k][j];
716 c->elts[i][j] = sum;
717 }
718
719 #ifdef ENABLE_FLOPS
720 flops += 2.0*rows*cols*cols ;
721 #endif
722 }
723
724
725 /*---------------------------------------------------------------------------*/
726 /*!
727 Multiply matrix a by scalar constant k. Result is matrix c.
728 */
729
matrix_scale(double k,matrix a,matrix * c)730 void matrix_scale (double k, matrix a, matrix * c)
731 {
732 register int rows, cols;
733 register int i, j;
734
735 rows = a.rows;
736 cols = a.cols;
737
738 matrix_create (rows, cols, c);
739
740 for (i = 0; i < rows; i++)
741 for (j = 0; j < cols; j++)
742 c->elts[i][j] = k * a.elts[i][j];
743
744 #ifdef ENABLE_FLOPS
745 flops += rows*cols ;
746 #endif
747 }
748
749
750 /*---------------------------------------------------------------------------*/
751 /*!
752 Take transpose of matrix a. Result is matrix t.
753 */
754
matrix_transpose(matrix a,matrix * t)755 void matrix_transpose (matrix a, matrix * t)
756 {
757 register int rows, cols;
758 register int i, j;
759
760 rows = a.cols;
761 cols = a.rows;
762
763 matrix_create (rows, cols, t);
764 for (i = 0; i < rows; i++)
765 for (j = 0; j < cols; j++)
766 t->elts[i][j] = a.elts[j][i];
767 }
768
769 /*---------------------------------------------------------------------------*/
770 /*!
771 Use Gaussian elimination to calculate inverse of matrix a. Result is
772 matrix ainv.
773 */
774
matrix_inverse(matrix a,matrix * ainv)775 int matrix_inverse (matrix a, matrix * ainv)
776 {
777 const double epsilon = 1.0e-10;
778 matrix tmp;
779 register int i, j, ii, n;
780 register double fval;
781 register double fmax;
782 register double * p;
783
784 matrix_initialize (&tmp);
785
786
787 if (a.rows != a.cols)
788 matrix_error ("Illegal dimensions for matrix inversion");
789
790 #if 0
791 matrix_sprint("matrix_inverse:",a) ;
792 #endif
793
794 n = a.rows;
795 matrix_identity (n, ainv);
796 matrix_equate (a, &tmp);
797
798 for (i = 0; i < n; i++)
799 {
800 fmax = fabs(tmp.elts[i][i]);
801 for (j = i+1; j < n; j++)
802 if (fabs(tmp.elts[j][i]) > fmax)
803 {
804 fmax = fabs(tmp.elts[j][i]);
805 p = tmp.elts[i];
806 tmp.elts[i] = tmp.elts[j];
807 tmp.elts[j] = p;
808 p = ainv->elts[i];
809 ainv->elts[i] = ainv->elts[j];
810 ainv->elts[j] = p;
811 }
812
813 if (fmax < epsilon)
814 {
815 matrix_destroy (&tmp);
816 return (0);
817 }
818
819
820 fval = 1.0 / tmp.elts[i][i]; /* RWCox: change division by this to */
821 for (j = 0; j < n; j++) /* multiplication by 1.0/this */
822 {
823 tmp.elts[i][j] *= fval;
824 ainv->elts[i][j] *= fval;
825 }
826 for (ii = 0; ii < n; ii++)
827 if (ii != i)
828 {
829 fval = tmp.elts[ii][i];
830 for (j = 0; j < n; j++)
831 {
832 tmp.elts[ii][j] -= fval*tmp.elts[i][j];
833 ainv->elts[ii][j] -= fval*ainv->elts[i][j];
834 }
835 }
836
837 }
838
839 matrix_destroy (&tmp);
840
841 #ifdef ENABLE_FLOPS
842 flops += 3.0*n*n*n ;
843 #endif
844 return (1);
845 }
846
847
848 /*---------------------------------------------------------------------------*/
849 /*!
850 Use Gaussian elimination to calculate inverse of matrix a, with diagonal
851 scaling applied for stability. Result is matrix ainv.
852 */
853
matrix_inverse_dsc(matrix a,matrix * ainv)854 int matrix_inverse_dsc (matrix a, matrix * ainv) /* 15 Jul 2004 - RWCox */
855 {
856 matrix atmp;
857 register int i, j, n;
858 register double *diag ;
859 int mir ;
860
861 if (a.rows != a.cols)
862 matrix_error ("Illegal dimensions for matrix inversion");
863
864 matrix_initialize (&atmp);
865
866 n = a.rows;
867 matrix_equate (a, &atmp);
868 diag = (double *)malloc( sizeof(double)*n ) ;
869 for( i=0 ; i < n ; i++ ){
870 diag[i] = fabs(atmp.elts[i][i]) ;
871 if( diag[i] == 0.0 ) diag[i] = 1.0 ; /* shouldn't happen? */
872 diag[i] = 1.0 / sqrt(diag[i]) ;
873 }
874
875 for( i=0 ; i < n ; i++ ) /* scale a */
876 for( j=0 ; j < n ; j++ )
877 atmp.elts[i][j] *= diag[i]*diag[j] ;
878
879 mir = matrix_inverse( atmp , ainv ) ; /* invert */
880
881 for( i=0 ; i < n ; i++ ) /* scale inverse */
882 for( j=0 ; j < n ; j++ )
883 ainv->elts[i][j] *= diag[i]*diag[j] ;
884
885 matrix_destroy (&atmp);
886 free((void *)diag) ;
887 #ifdef ENABLE_FLOPS
888 flops += 4.0*n*n + 4.0*n ;
889 #endif
890 return (mir);
891 }
892
893
894 /*---------------------------------------------------------------------------*/
895 /*!
896 Calculate square root of symmetric positive definite matrix a.
897 Result is matrix s.
898 */
899
matrix_sqrt(matrix a,matrix * s)900 int matrix_sqrt (matrix a, matrix * s)
901 {
902 const int MAX_ITER = 100;
903 int n;
904 int ok;
905 int iter;
906 register float sse, psse;
907 register int i, j;
908 matrix x, xinv, axinv, xtemp, error;
909
910 matrix_initialize (&x);
911 matrix_initialize (&xinv);
912 matrix_initialize (&axinv);
913 matrix_initialize (&xtemp);
914 matrix_initialize (&error);
915
916
917 if (a.rows != a.cols)
918 matrix_error ("Illegal dimensions for matrix square root");
919
920
921 n = a.rows;
922 matrix_identity (n, &x);
923
924
925 psse = 1.0e+30;
926 for (iter = 0; iter < MAX_ITER; iter++)
927 {
928 ok = matrix_inverse (x, &xinv);
929 if (! ok) return (0);
930 matrix_multiply (a, xinv, &axinv);
931 matrix_add (x, axinv, &xtemp);
932 matrix_scale (0.5, xtemp, &x);
933
934 matrix_multiply (x, x, &xtemp);
935 matrix_subtract (a, xtemp, &error);
936 sse = 0.0;
937 for (i = 0; i < n; i++)
938 for (j = 0; j < n; j++)
939 sse += error.elts[i][j] * error.elts[i][j] ;
940
941 if (sse >= psse) break;
942
943 psse = sse;
944 }
945
946 if (iter == MAX_ITER) return (0);
947
948 matrix_equate (x, s);
949
950 matrix_destroy (&x);
951 matrix_destroy (&xinv);
952 matrix_destroy (&axinv);
953 matrix_destroy (&xtemp);
954 matrix_destroy (&error); /* destroy error matrix too - 26 Mar 2008 drg */
955
956 return (1);
957
958 }
959
960
961 /*---------------------------------------------------------------------------*/
962 /*!
963 Initialize vector data structure.
964 */
965
vector_initialize(vector * v)966 void vector_initialize (vector * v)
967 {
968 v->dim = 0;
969 v->elts = NULL;
970 }
971
972
973 /*---------------------------------------------------------------------------*/
974 /*!
975 Destroy vector data structure by deallocating memory.
976 */
977
vector_destroy(vector * v)978 void vector_destroy (vector * v)
979 {
980 if (v->elts != NULL) free (v->elts);
981 vector_initialize (v);
982 }
983
984
985 /*---------------------------------------------------------------------------*/
986 /*!
987 Create vector v by allocating memory and initializing values.
988 */
989
vector_create(int dim,vector * v)990 void vector_create (int dim, vector * v)
991 {
992 vector_destroy (v);
993
994 if (dim < 0) matrix_error ("Illegal dimensions for new vector");
995
996 v->dim = dim;
997 if (dim < 1) return;
998
999 v->elts = (double *) calloc (sizeof(double) , dim);
1000 if (v->elts == NULL)
1001 matrix_error ("Memory allocation error");
1002
1003 }
1004
1005 /*---------------------------------------------------------------------------*/
vector_create_noinit(int dim,vector * v)1006 void vector_create_noinit(int dim, vector * v) /* 28 Dec 2001: RWCox */
1007 {
1008 vector_destroy (v);
1009
1010 if (dim < 0) matrix_error ("Illegal dimensions for new vector");
1011
1012 v->dim = dim;
1013 if (dim < 1) return;
1014
1015 v->elts = (double *) malloc (sizeof(double) * dim);
1016 if (v->elts == NULL)
1017 matrix_error ("Memory allocation error");
1018 }
1019
1020 /*---------------------------------------------------------------------------*/
1021 /*!
1022 Print contents of vector v.
1023 */
1024
vector_print(vector v)1025 void vector_print (vector v)
1026 {
1027 int i;
1028
1029 for (i = 0; i < v.dim; i++)
1030 printf (" %10.4g \n", v.elts[i]);
1031 printf (" \n"); fflush(stdout);
1032
1033 }
1034
1035
1036 /*---------------------------------------------------------------------------*/
1037 /*!
1038 Print label and contents of vector v.
1039 */
1040
vector_sprint(char * s,vector v)1041 void vector_sprint (char * s, vector v)
1042 {
1043 printf ("%s \n", s);
1044
1045 vector_print (v);
1046 }
1047
1048
1049 /*---------------------------------------------------------------------------*/
1050 /*!
1051 Copy vector a. Result is vector b.
1052 */
1053
vector_equate(vector a,vector * b)1054 void vector_equate (vector a, vector * b)
1055 {
1056 register int dim;
1057
1058 dim = a.dim;
1059
1060 vector_create_noinit (dim, b);
1061
1062 #if 0
1063 for (i = 0; i < dim; i++)
1064 b->elts[i] = a.elts[i];
1065 #else
1066 if( dim > 0 )
1067 memcpy( b->elts , a.elts , sizeof(double)*dim ) ; /* RWCox */
1068 #endif
1069 }
1070
1071
1072 /*---------------------------------------------------------------------------*/
1073 /*!
1074 Convert simple array f into vector v.
1075 */
1076
array_to_vector(int dim,float * f,vector * v)1077 void array_to_vector (int dim, float * f, vector * v)
1078 {
1079 register int i;
1080
1081 vector_create_noinit (dim, v);
1082
1083 for (i = 0; i < dim; i++)
1084 v->elts[i] = f[i];
1085 }
1086
1087
1088
1089 /*---------------------------------------------------------------------------*/
1090 /*!
1091 Convert column c of matrix m into vector v.
1092 */
1093
column_to_vector(matrix m,int c,vector * v)1094 void column_to_vector (matrix m, int c, vector *v)
1095 {
1096 register int i , dim ;
1097 dim = m.rows; vector_create_noinit (dim, v);
1098 for (i = 0; i < dim; i++) v->elts[i] = m.elts[i][c];
1099 }
1100
row_to_vector(matrix m,int r,vector * v)1101 void row_to_vector( matrix m , int r , vector *v )
1102 {
1103 register int j , dim ;
1104 dim = m.cols ; vector_create_noinit(dim,v) ;
1105 for( j=0 ; j < dim ; j++ ) v->elts[j] = m.elts[r][j] ;
1106 }
1107
1108
1109
1110 /*---------------------------------------------------------------------------*/
1111 /*!
1112 Convert vector v into array f.
1113 */
1114
vector_to_array(vector v,float * f)1115 void vector_to_array (vector v, float * f)
1116 {
1117 register int i;
1118
1119 for (i = 0; i < v.dim; i++)
1120 f[i] = v.elts[i];
1121 }
1122
1123
1124 /*---------------------------------------------------------------------------*/
1125 /*!
1126 Add vector a to vector b. Result is vector c.
1127 */
1128
vector_add(vector a,vector b,vector * c)1129 void vector_add (vector a, vector b, vector * c)
1130 {
1131 register int i, dim;
1132
1133 if (a.dim != b.dim)
1134 matrix_error ("Incompatible dimensions for vector addition");
1135
1136 dim = a.dim;
1137
1138 vector_create_noinit (dim, c);
1139
1140 for (i = 0; i < dim; i++)
1141 c->elts[i] = a.elts[i] + b.elts[i];
1142
1143 #ifdef ENABLE_FLOPS
1144 flops += dim ;
1145 #endif
1146 }
1147
1148
1149 /*---------------------------------------------------------------------------*/
1150 /*!
1151 Subtract vector b from vector a. Result is vector c.
1152 */
1153
vector_subtract(vector a,vector b,vector * c)1154 void vector_subtract (vector a, vector b, vector * c)
1155 {
1156 register int i, dim;
1157 register double *aa,*bb,*cc ;
1158
1159 if (a.dim != b.dim)
1160 matrix_error ("Incompatible dimensions for vector subtraction");
1161
1162 dim = a.dim;
1163
1164 vector_create_noinit (dim, c);
1165
1166 aa = a.elts ; bb = b.elts ; cc = c->elts ;
1167 for (i = 0; i < dim; i++)
1168 #if 0
1169 c->elts[i] = a.elts[i] - b.elts[i];
1170 #else
1171 cc[i] = aa[i] - bb[i] ;
1172 #endif
1173
1174 #ifdef ENABLE_FLOPS
1175 flops += dim ;
1176 #endif
1177 }
1178
1179 /*** for vector-matrix multiply routines below ***/
1180
1181 #define UNROLL_VECMUL /* RWCox */
1182
1183 /*---------------------------------------------------------------------------*/
1184 /*!
1185 Right multiply matrix a by vector b. Result is vector c.
1186 */
vector_multiply(matrix a,vector b,vector * c)1187 void vector_multiply (matrix a, vector b, vector * c)
1188 {
1189 register int rows, cols;
1190 register int i, j;
1191 register double *bb ;
1192 register double sum ;
1193 #ifdef DOTP
1194 register double **aa , *cc ;
1195 #else
1196 register double *aa ;
1197 #endif
1198
1199
1200 if (a.cols != b.dim){
1201 char str[444] ;
1202 sprintf(str,
1203 "Incompatible dimensions for vector multiplication: %dx%d X %d",
1204 a.rows,a.cols,b.dim ) ;
1205 matrix_error(str) ;
1206 }
1207
1208 rows = a.rows;
1209 cols = a.cols;
1210
1211 vector_create_noinit (rows, c);
1212
1213 if( cols <= 0 ){
1214 for( i=0 ; i < rows ; i++ ) c->elts[i] = 0.0 ;
1215 return ;
1216 }
1217
1218 bb = b.elts ;
1219
1220 #ifdef MATVEC
1221 MATVEC( a , b , *c ) ; /* 04 Mar 2005 */
1222
1223 #elif defined(DOTP) /* vectorized */
1224 aa = a.elts ; cc = c->elts ;
1225 i = rows%2 ;
1226 if( i == 1 ) DOTP(cols,aa[0],bb,cc) ;
1227 for( ; i < rows ; i+=2 ){
1228 DOTP(cols,aa[i] ,bb,cc+i ) ;
1229 DOTP(cols,aa[i+1],bb,cc+(i+1)) ;
1230 }
1231 #else
1232
1233 #ifdef UNROLL_VECMUL
1234 switch( cols%4 ){
1235 case 0:
1236 for (i = 0; i < rows; i++){
1237 sum = 0.0 ; aa = a.elts[i] ;
1238 for (j = 0; j < cols; j+=4 )
1239 sum += aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1240 c->elts[i] = sum ;
1241 }
1242 break ;
1243 case 1:
1244 for (i = 0; i < rows; i++){
1245 aa = a.elts[i] ; sum = aa[0]*bb[0] ;
1246 for (j = 1; j < cols; j+=4 )
1247 sum += aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1248 c->elts[i] = sum ;
1249 }
1250 break ;
1251 case 2:
1252 for (i = 0; i < rows; i++){
1253 aa = a.elts[i] ; sum = aa[0]*bb[0]+aa[1]*bb[1] ;
1254 for (j = 2; j < cols; j+=4 )
1255 sum += aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1256 c->elts[i] = sum ;
1257 }
1258 break ;
1259 case 3:
1260 for (i = 0; i < rows; i++){
1261 aa = a.elts[i] ; sum = aa[0]*bb[0]+aa[1]*bb[1]+aa[2]*bb[2] ;
1262 for (j = 3; j < cols; j+=4 )
1263 sum += aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1264 c->elts[i] = sum ;
1265 }
1266 break ;
1267 }
1268 #else
1269 for (i = 0; i < rows; i++){
1270 sum = 0.0 ; aa = a.elts[i] ;
1271 for (j = 0; j < cols; j++ ) sum += aa[j]*bb[j] ;
1272 c->elts[i] = sum ;
1273 }
1274 #endif /* UNROLL_VECMUL */
1275
1276 #endif /* MATVEC, DOTP */
1277
1278 #ifdef ENABLE_FLOPS
1279 flops += 2.0*rows*cols ;
1280 dotsum += rows*cols ; dotnum += rows ;
1281 #endif
1282 return ;
1283 }
1284
1285 /*---------------------------------------------------------------------------*/
1286 /*!
1287 Right multiply matrix a-transpose by vector b. Result is vector c.
1288 */
vector_multiply_transpose(matrix a,vector b,vector * c)1289 void vector_multiply_transpose (matrix a, vector b, vector * c)
1290 {
1291 register int rows, cols;
1292 register int i, j;
1293 register double *bb ;
1294 register double bj ;
1295 register double *aa , *cc ;
1296
1297 if (a.rows != b.dim){
1298 char str[444] ;
1299 sprintf(str,
1300 "Incompatible dimensions for vector_multiply_transpose: [%dx%d]' X %d",
1301 a.rows,a.cols,b.dim ) ;
1302 matrix_error(str) ;
1303 }
1304
1305 rows = a.rows; cols = a.cols;
1306
1307 vector_create(cols, c); /* initialized to 0 */
1308
1309 if( rows <= 0 ) return ;
1310
1311 bb = b.elts ; cc = c->elts ;
1312
1313 #ifdef UNROLL_VECMUL
1314 switch( cols%4 ){
1315 case 0:
1316 for( j=0 ; j < rows ; j++ ){
1317 aa = a.elts[j] ; bj = bb[j] ;
1318 for( i=0 ; i < cols ; i+=4 ){
1319 cc[i] += aa[i] *bj ;
1320 cc[i+1] += aa[i+1]*bj ;
1321 cc[i+2] += aa[i+2]*bj ;
1322 cc[i+3] += aa[i+3]*bj ;
1323 }
1324 }
1325 break ;
1326
1327 case 1:
1328 for( j=0 ; j < rows ; j++ ){
1329 aa = a.elts[j] ; bj = bb[j] ;
1330 cc[0] += aa[0]*bj ;
1331 for( i=1 ; i < cols ; i+=4 ){
1332 cc[i] += aa[i] *bj ;
1333 cc[i+1] += aa[i+1]*bj ;
1334 cc[i+2] += aa[i+2]*bj ;
1335 cc[i+3] += aa[i+3]*bj ;
1336 }
1337 }
1338 break ;
1339
1340 case 2:
1341 for( j=0 ; j < rows ; j++ ){
1342 aa = a.elts[j] ; bj = bb[j] ;
1343 cc[0] += aa[0]*bj ;
1344 cc[1] += aa[1]*bj ;
1345 for( i=2 ; i < cols ; i+=4 ){
1346 cc[i] += aa[i] *bj ;
1347 cc[i+1] += aa[i+1]*bj ;
1348 cc[i+2] += aa[i+2]*bj ;
1349 cc[i+3] += aa[i+3]*bj ;
1350 }
1351 }
1352 break ;
1353
1354 case 3:
1355 for( j=0 ; j < rows ; j++ ){
1356 aa = a.elts[j] ; bj = bb[j] ;
1357 cc[0] += aa[0]*bj ;
1358 cc[1] += aa[1]*bj ;
1359 cc[2] += aa[2]*bj ;
1360 for( i=3 ; i < cols ; i+=4 ){
1361 cc[i] += aa[i] *bj ;
1362 cc[i+1] += aa[i+1]*bj ;
1363 cc[i+2] += aa[i+2]*bj ;
1364 cc[i+3] += aa[i+3]*bj ;
1365 }
1366 }
1367 }
1368 #else
1369 for( j=0 ; j < rows ; j++ ){
1370 aa = a.elts[j] ; bj = bb[j] ;
1371 for( i=0 ; i < cols ; i++ ) cc[i] += aa[i]*bj ;
1372 }
1373 #endif
1374
1375 #ifdef ENABLE_FLOPS
1376 flops += 2.0*rows*cols ;
1377 dotsum += rows*cols ; dotnum += cols ;
1378 #endif
1379 return ;
1380 }
1381
1382 /*---------------------------------------------------------------------------*/
1383 /*!
1384 Compute d = c-a*b: a is a matrix; b,c,d are vectors -- RWCox
1385 26 Feb 2002: return value is sum of squares of d vector
1386 */
1387
vector_multiply_subtract(matrix a,vector b,vector c,vector * d)1388 double vector_multiply_subtract (matrix a, vector b, vector c, vector * d)
1389 {
1390 register int rows, cols;
1391 register int i, j;
1392 register double *bb ;
1393 #ifdef DOTP
1394 double qsum,sum , **aa , *dd,*cc,*ee ;
1395 #else
1396 register double qsum,sum, *aa ;
1397 #endif
1398
1399
1400 if (a.cols != b.dim || a.rows != c.dim )
1401 matrix_error ("Incompatible dimensions for vector multiplication-subtraction");
1402
1403 rows = a.rows;
1404 cols = a.cols;
1405
1406 vector_create_noinit (rows, d);
1407
1408 if( cols <= 0 ){
1409 qsum = 0.0 ;
1410 for( i=0 ; i < rows ; i++ ){
1411 d->elts[i] = c.elts[i] ;
1412 qsum += d->elts[i] * d->elts[i] ;
1413 }
1414 return qsum ;
1415 }
1416
1417 qsum = 0.0 ; bb = b.elts ;
1418
1419 #ifdef DOTP /* vectorized */
1420 aa = a.elts ; dd = d->elts ; cc = c.elts ;
1421 ee = (double *)malloc(sizeof(double)*rows) ;
1422 i = rows%2 ;
1423 if( i == 1 ) DOTP(cols,aa[0],bb,ee) ;
1424 for( ; i < rows ; i+=2 ){
1425 DOTP(cols,aa[i] ,bb,ee+i ) ;
1426 DOTP(cols,aa[i+1],bb,ee+(i+1)) ;
1427 }
1428 VSUB(rows,cc,ee,dd) ;
1429 DOTP(rows,dd,dd,&qsum) ;
1430 free((void *)ee) ;
1431 #else
1432
1433 #ifdef UNROLL_VECMUL
1434 switch( cols%4 ){
1435 case 0:
1436 for (i = 0; i < rows; i++){
1437 aa = a.elts[i] ; sum = c.elts[i] ;
1438 for (j = 0; j < cols; j+=4 )
1439 sum -= aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1440 d->elts[i] = sum ; qsum += sum*sum ;
1441 }
1442 break ;
1443 case 1:
1444 for (i = 0; i < rows; i++){
1445 aa = a.elts[i] ; sum = c.elts[i]-aa[0]*bb[0] ;
1446 for (j = 1; j < cols; j+=4 )
1447 sum -= aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1448 d->elts[i] = sum ; qsum += sum*sum ;
1449 }
1450 break ;
1451 case 2:
1452 for (i = 0; i < rows; i++){
1453 aa = a.elts[i] ; sum = c.elts[i]-aa[0]*bb[0]-aa[1]*bb[1] ;
1454 for (j = 2; j < cols; j+=4 )
1455 sum -= aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1456 d->elts[i] = sum ; qsum += sum*sum ;
1457 }
1458 break ;
1459 case 3:
1460 for (i = 0; i < rows; i++){
1461 aa = a.elts[i] ; sum = c.elts[i]-aa[0]*bb[0]-aa[1]*bb[1]-aa[2]*bb[2] ;
1462 for (j = 3; j < cols; j+=4 )
1463 sum -= aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1464 d->elts[i] = sum ; qsum += sum*sum ;
1465 }
1466 break ;
1467 }
1468 #else
1469 for (i = 0; i < rows; i++){
1470 aa = a.elts[i] ; sum = c.elts[i] ;
1471 for (j = 0; j < cols; j++) sum -= aa[j] * bb[j] ;
1472 d->elts[i] = sum ; qsum += sum*sum ;
1473 }
1474 #endif /* UNROLL_VECMUL */
1475
1476 #endif /* DOTP */
1477
1478 #ifdef ENABLE_FLOPS
1479 flops += 2.0*rows*(cols+1) ;
1480 dotsum += rows*cols ; dotnum += rows ;
1481 #endif
1482
1483 return qsum ; /* 26 Feb 2003 */
1484 }
1485
1486 /*---------------------------------------------------------------------------*/
1487 /*!
1488 Calculate dot product of vector a with vector b.
1489 */
1490
vector_dot(vector a,vector b)1491 double vector_dot (vector a, vector b)
1492 {
1493 register int i, dim;
1494 register double sum;
1495 register double *aa , *bb ;
1496
1497 if (a.dim != b.dim)
1498 matrix_error ("Incompatible dimensions for vector dot product");
1499
1500 dim = a.dim;
1501
1502 sum = 0.0;
1503 aa = a.elts ; bb = b.elts ;
1504 for (i = 0; i < dim; i++)
1505 #if 0
1506 sum += a.elts[i] * b.elts[i];
1507 #else
1508 sum += aa[i] * bb[i] ;
1509 #endif
1510
1511 #ifdef ENABLE_FLOPS
1512 flops += 2.0*dim ;
1513 #endif
1514 return (sum);
1515 }
1516
1517 /*--------------------------------------------------------------------------*/
1518 /*!
1519 Calculate dot product of vector a with itself -- 28 Dec 2001, RWCox.
1520 */
1521
vector_dotself(vector a)1522 double vector_dotself( vector a )
1523 {
1524 register int i, dim;
1525 register double sum;
1526 register double *aa ;
1527
1528 dim = a.dim;
1529 sum = 0.0;
1530 aa = a.elts ;
1531 for (i = 0; i < dim; i++)
1532 #if 0
1533 sum += a.elts[i] * a.elts[i];
1534 #else
1535 sum += aa[i] * aa[i] ;
1536 #endif
1537
1538 #ifdef ENABLE_FLOPS
1539 flops += 2.0*dim ;
1540 #endif
1541 return (sum);
1542 }
1543
1544 /*---------------------------------------------------------------------------*/
1545 /*!
1546 Compute the L_infinity norm of a matrix: the max absolute row sum.
1547 */
1548
matrix_norm(matrix a)1549 double matrix_norm( matrix a )
1550 {
1551 int i,j , rows=a.rows, cols=a.cols ;
1552 double sum , smax=0.0 ;
1553
1554 for (i = 0; i < rows; i++){
1555 sum = 0.0 ;
1556 for (j = 0; j < cols; j++) sum += fabs(a.elts[i][j]) ;
1557 if( sum > smax ) smax = sum ;
1558 }
1559 #ifdef ENABLE_FLOPS
1560 flops += 2.0*rows*cols ;
1561 #endif
1562 return smax ;
1563 }
1564
1565 /*---------------------------------------------------------------------------*/
1566
matrix_frobenius(matrix a)1567 double matrix_frobenius( matrix a ) /* sum of squares of all elements */
1568 {
1569 int i,j , rows=a.rows, cols=a.cols ;
1570 double sum=0.0 , **aa=a.elts ;
1571
1572 for (i = 0; i < rows; i++){
1573 for (j = 0; j < cols; j++) sum += aa[i][j] * aa[i][j] ;
1574 }
1575 #ifdef ENABLE_FLOPS
1576 flops += 2.0*rows*cols ;
1577 #endif
1578 return sum ;
1579 }
1580
1581 /*---------------------------------------------------------------------------*/
1582
matrix_colsqsums(matrix a,vector * v)1583 void matrix_colsqsums( matrix a , vector *v )
1584 {
1585 int i,j , rows=a.rows , cols=a.cols ;
1586 double sum , **aa , *vp ;
1587
1588 vector_create_noinit( cols , v ) ; vp = v->elts ; aa = a.elts ;
1589
1590 for( j=0 ; j < cols ; j++ ){
1591 for( sum=0.0,i=0 ; i < rows ; i++ ) sum += aa[i][j] * aa[i][j] ;
1592 vp[j] = sqrt(sum) ;
1593 }
1594 return ;
1595 }
1596
1597 /*---------------------------------------------------------------------------*/
1598 /*! Search a matrix for nearly identical column pairs, where "nearly identical"
1599 means they are correlated closer than 1-eps.
1600
1601 Return is a pointer to an int array of the form
1602 [ i1 j1 i2 j2 ... -1 -1 ]
1603 where columns (i1,j1) are nearly the same, (i2,j2) also, etc.
1604 In addition:
1605 - A pair (i,-1) indicates that column #i is all zeros.
1606 - The array is terminated with the pair (-1,-1).
1607 - If there are no bad column pairs or all-zero columns, NULL is returned.
1608 - Pairs of all-zero columns are NOT reported.
1609 - The array should be free()-ed when you are done with it.
1610 -----------------------------------------------------------------------------*/
1611
matrix_check_columns(matrix a,double eps)1612 int * matrix_check_columns( matrix a , double eps ) /* 14 Jul 2004 */
1613 {
1614 int i,j,k , rows=a.rows , cols=a.cols ;
1615 int *iar=NULL , nar=0 ;
1616 double sumi,sumj,sumd ;
1617
1618 if( eps <= 0.0 ) eps = 1.e-5 ;
1619
1620 for( i=0 ; i < cols ; i++ ){
1621 sumi = 0.0 ;
1622 for( k=0 ; k < rows ; k++ ) sumi += a.elts[k][i] * a.elts[k][i] ;
1623 if( sumi <= 0.0 ){
1624 iar = (int *)realloc( (void *)iar , sizeof(int)*2*(nar+1) ) ;
1625 iar[2*nar] = i ; iar[2*nar+1] = -1 ; nar++ ;
1626 continue ; /* skip to next column i */
1627 }
1628 for( j=i+1 ; j < cols ; j++ ){
1629 sumj = sumd = 0.0 ;
1630 for( k=0 ; k < rows ; k++ ){
1631 sumj += a.elts[k][j] * a.elts[k][j] ;
1632 sumd += a.elts[k][j] * a.elts[k][i] ;
1633 }
1634 if( sumj > 0.0 ){
1635 sumd = fabs(sumd) / sqrt(sumi*sumj) ;
1636 if( sumd >= 1.0-eps ){
1637 iar = (int *)realloc( (void *)iar , sizeof(int)*2*(nar+1) ) ;
1638 iar[2*nar] = i ; iar[2*nar+1] = j ; nar++ ;
1639 }
1640 }
1641 }
1642 }
1643
1644 if( iar != NULL ){
1645 iar = (int *)realloc( (void *)iar , sizeof(int)*2*(nar+1) ) ;
1646 iar[2*nar] = iar[2*nar+1] = -1 ;
1647 }
1648
1649 return iar ;
1650 }
1651
1652 /*---------------------------------------------------------------------------*/
1653 /*! Return the sqrt(eigenvalues) of matrix X-transpose X, scaled to diagonal 1.
1654 The output points to a vector of doubles, of length X.cols. This
1655 should be free()-ed when you are done with it.
1656 -----------------------------------------------------------------------------*/
1657
matrix_singvals(matrix X)1658 double * matrix_singvals( matrix X ) /* 14 Jul 2004 */
1659 {
1660 int i,j,k , M=X.rows , N=X.cols ;
1661 double *a , *e , sum ;
1662
1663 a = (double *) malloc( sizeof(double)*N*N ) ;
1664 e = (double *) malloc( sizeof(double)*N ) ;
1665
1666 for( i=0 ; i < N ; i++ ){
1667 for( j=0 ; j <= i ; j++ ){
1668 sum = 0.0 ;
1669 for( k=0 ; k < M ; k++ ) sum += X.elts[k][i] * X.elts[k][j] ;
1670 a[j+N*i] = sum ;
1671 if( j < i ) a[i+N*j] = sum ;
1672 }
1673 }
1674
1675 for( i=0 ; i < N ; i++ ){
1676 if( a[i+N*i] > 0.0 ) e[i] = 1.0 / sqrt(a[i+N*i]) ;
1677 else e[i] = 1.0 ;
1678 }
1679 for( i=0 ; i < N ; i++ ){
1680 for( j=0 ; j < N ; j++ ) a[j+N*i] *= e[i]*e[j] ;
1681 }
1682
1683 symeigval_double( N , a , e ) ;
1684 free( (void *)a ) ;
1685 for( i=0 ; i < N ; i++ ){
1686 e[i] = (e[i] <= 0.0) ? 0.0 : sqrt(e[i]) ;
1687 }
1688 #ifdef ENABLE_FLOPS
1689 flops += (M+N+2.0)*N*N+10.0*N ;
1690 #endif
1691 return e ;
1692 }
1693
1694 /*---------------------------------------------------------------------------*/
1695
1696 extern void svd_double( int, int, double *, double *, double *, double * ) ;
1697
1698 static double pseps = 1.e-16 ;
1699
matrix_psinv_seteps(double eps)1700 void matrix_psinv_seteps( double eps )
1701 {
1702 pseps = (eps > 0.0) ? eps : 1.e-16 ;
1703 }
1704
1705 /*---------------------------------------------------------------------------*/
1706 /*! Given MxN matrix X, return the NxN matrix
1707
1708 [ T ]-1 [ T ]-1 T
1709 [ X X ] and the NxM matrix [ X X ] X
1710 -----------------------------------------------------------------------------*/
1711
matrix_psinv(matrix X,matrix * XtXinv,matrix * XtXinvXt)1712 void matrix_psinv( matrix X , matrix *XtXinv , matrix *XtXinvXt )
1713 {
1714 int m = X.rows , n = X.cols , ii,jj,kk ;
1715 double *amat , *umat , *vmat , *sval , *xfac , smax,del,sum ;
1716
1717 if( m < 1 || n < 1 || m < n || (XtXinv == NULL && XtXinvXt == NULL) ) return;
1718
1719 amat = (double *)calloc( sizeof(double),m*n ) ; /* input matrix */
1720 umat = (double *)calloc( sizeof(double),m*n ) ; /* left singular vectors */
1721 vmat = (double *)calloc( sizeof(double),n*n ) ; /* right singular vectors */
1722 sval = (double *)calloc( sizeof(double),n ) ; /* singular values */
1723 xfac = (double *)calloc( sizeof(double),n ) ; /* column norms of [a] */
1724
1725 #undef A
1726 #undef U
1727 #undef V
1728 #define A(i,j) amat[(i)+(j)*m]
1729 #define U(i,j) umat[(i)+(j)*m]
1730 #define V(i,j) vmat[(i)+(j)*n]
1731
1732 /* copy input matrix into amat */
1733
1734 for( ii=0 ; ii < m ; ii++ )
1735 for( jj=0 ; jj < n ; jj++ ) A(ii,jj) = X.elts[ii][jj] ;
1736
1737 /* scale each column to have norm 1 */
1738
1739 for( jj=0 ; jj < n ; jj++ ){
1740 sum = 0.0 ;
1741 for( ii=0 ; ii < m ; ii++ ) sum += A(ii,jj)*A(ii,jj) ;
1742 if( sum > 0.0 ) sum = 1.0/sqrt(sum) ;
1743 xfac[jj] = sum ;
1744 for( ii=0 ; ii < m ; ii++ ) A(ii,jj) *= sum ;
1745 }
1746
1747 /* compute SVD of scaled matrix */
1748
1749 svd_double( m , n , amat , sval , umat , vmat ) ;
1750
1751 free((void *)amat) ; /* done with this */
1752
1753 /* find largest singular value */
1754
1755 smax = sval[0] ;
1756 for( ii=1 ; ii < n ; ii++ )
1757 if( sval[ii] > smax ) smax = sval[ii] ;
1758
1759 if( smax <= 0.0 ){ /* this is bad */
1760 free((void *)xfac); free((void *)sval);
1761 free((void *)vmat); free((void *)umat);
1762 return;
1763 }
1764
1765 for( ii=0 ; ii < n ; ii++ )
1766 if( sval[ii] < 0.0 ) sval[ii] = 0.0 ;
1767
1768 #ifdef FLOATIZE
1769 #define PSINV_EPS 1.e-8
1770 #else
1771 #define PSINV_EPS pseps
1772 #endif
1773
1774 /* "reciprocals" of singular values: 1/s is actually s/(s^2+del) */
1775
1776 del = PSINV_EPS * smax*smax ;
1777 for( ii=0 ; ii < n ; ii++ )
1778 sval[ii] = sval[ii] / ( sval[ii]*sval[ii] + del ) ;
1779
1780 /* create pseudo-inverse */
1781
1782 if( XtXinvXt != NULL ){
1783 matrix_create( n , m , XtXinvXt ) ;
1784 for( ii=0 ; ii < n ; ii++ ){
1785 for( jj=0 ; jj < m ; jj++ ){
1786 sum = 0.0 ;
1787 for( kk=0 ; kk < n ; kk++ )
1788 sum += sval[kk] * V(ii,kk) * U(jj,kk) ;
1789 XtXinvXt->elts[ii][jj] = sum * xfac[ii] ;
1790 }
1791 }
1792 }
1793
1794 if( XtXinv != NULL ){
1795 matrix_create( n , n , XtXinv ) ;
1796 for( ii=0 ; ii < n ; ii++ ) sval[ii] = sval[ii] * sval[ii] ;
1797 matrix_create( n , n , XtXinv ) ;
1798 for( ii=0 ; ii < n ; ii++ ){
1799 for( jj=0 ; jj < n ; jj++ ){
1800 sum = 0.0 ;
1801 for( kk=0 ; kk < n ; kk++ )
1802 sum += sval[kk] * V(ii,kk) * V(jj,kk) ;
1803 XtXinv->elts[ii][jj] = sum * xfac[ii] * xfac[jj] ;
1804 }
1805 }
1806 }
1807
1808 #ifdef ENABLE_FLOPS
1809 flops += n*n*(n+2.0*m+2.0) ;
1810 #endif
1811 free((void *)xfac); free((void *)sval);
1812 free((void *)vmat); free((void *)umat);
1813 return;
1814 }
1815
1816 /*---------------------------------------------------------------------------*/
1817 /*! Check a matrix for column collinearity and adjust it (via the SVD)
1818 if it needs it. Return value is -1 if there is an error, 0 if no
1819 fix was made, or the number of bad singular values if a fix was made
1820 -- in which case the adjusted matrix is in Xa.
1821 *//*-------------------------------------------------------------------------*/
1822 #if 0
1823 int matrix_collinearity_fixup( matrix X , matrix *Xa )
1824 {
1825 int m = X.rows , n = X.cols , ii,jj,kk , nbad ;
1826 double *amat , *umat , *vmat , *sval , *xfac , smax,del,sum ;
1827
1828 if( m < 1 || n < 1 || m < n ) return (-1) ;
1829
1830 amat = (double *)calloc( sizeof(double),m*n ) ; /* input matrix */
1831 umat = (double *)calloc( sizeof(double),m*n ) ; /* left singular vectors */
1832 vmat = (double *)calloc( sizeof(double),n*n ) ; /* right singular vectors */
1833 sval = (double *)calloc( sizeof(double),n ) ; /* singular values */
1834 xfac = (double *)calloc( sizeof(double),n ) ; /* column norms of [a] */
1835
1836 #undef A
1837 #undef U
1838 #undef V
1839 #define A(i,j) amat[(i)+(j)*m]
1840 #define U(i,j) umat[(i)+(j)*m]
1841 #define V(i,j) vmat[(i)+(j)*n]
1842
1843 /* copy input matrix into amat */
1844
1845 for( ii=0 ; ii < m ; ii++ )
1846 for( jj=0 ; jj < n ; jj++ ) A(ii,jj) = X.elts[ii][jj] ;
1847
1848 /* scale each column to have L2 norm 1 */
1849
1850 for( jj=0 ; jj < n ; jj++ ){
1851 sum = 0.0 ;
1852 for( ii=0 ; ii < m ; ii++ ) sum += A(ii,jj)*A(ii,jj) ;
1853 xfac[jj] = sqrt(sum) ;
1854 if( sum > 0.0 ){
1855 sum = 1.0 / xfac[jj] ;
1856 for( ii=0 ; ii < m ; ii++ ) A(ii,jj) *= sum ;
1857 }
1858 }
1859
1860 /* compute SVD of scaled matrix */
1861
1862 svd_double( m , n , amat , sval , umat , vmat ) ;
1863
1864 free((void *)amat) ; /* done with this */
1865
1866 /* find largest singular value */
1867
1868 if( sval[0] < 0.0 ) sval[0] = 0.0 ;
1869 smax = sval[0] ;
1870 for( ii=1 ; ii < n ; ii++ ){
1871 if( sval[ii] > smax ) smax = sval[ii] ;
1872 else if( sval[ii] < 0.0 ) sval[ii] = 0.0 ;
1873 }
1874
1875 if( smax == 0.0 ){ /* this is bad */
1876 free((void *)xfac); free((void *)sval);
1877 free((void *)vmat); free((void *)umat);
1878 return (-1);
1879 }
1880
1881 /* adjust small singular values upwards */
1882
1883 del = 1.e-6 * smax ;
1884 for( nbad=ii=0 ; ii < n ; ii++ )
1885 if( sval[ii] < del ){ sval[ii] = del ; nbad++ ; }
1886
1887 /* if all were OK, then nothing more needs to be done */
1888
1889 if( nbad == 0 || Xa == NULL ){
1890 free((void *)xfac); free((void *)sval);
1891 free((void *)vmat); free((void *)umat);
1892 return (nbad);
1893 }
1894
1895 /* create and compute output matrix */
1896
1897 matrix_create( m , n , Xa ) ;
1898
1899 for( ii=0 ; ii < m ; ii++ ){
1900 for( jj = 0 ; jj < n ; jj++ ){
1901 sum = 0.0 ;
1902 for( kk=0 ; kk < n ; kk++ )
1903 sum += sval[kk] * U(ii,kk) * V(jj,kk) ;
1904 Xa->elts[ii][jj] = sum * xfac[jj] ;
1905 }
1906 }
1907
1908 free((void *)xfac); free((void *)sval);
1909 free((void *)vmat); free((void *)umat);
1910 return (nbad);
1911 }
1912 #endif
1913
1914 /*---------------------------------------------------------------------------*/
1915
1916 static int allow_desing = 0 ;
matrix_allow_desing(int ii)1917 void matrix_allow_desing( int ii ){ allow_desing = ii ; }
1918
1919 /*---------------------------------------------------------------------------*/
1920 /*! Given MxN matrix X, compute the NxN upper triangle factor R in X = QR.
1921 Must have M >= N (more rows than columns).
1922 Q is not computed. If you want Q, then compute it as [Q] = [X] * inv[R].
1923 The return value is the number of diagonal elements that were adjusted
1924 because they were very tiny (should be 0 except in collinear cases).
1925 *//*-------------------------------------------------------------------------*/
1926
matrix_qrr(matrix X,matrix * R)1927 int matrix_qrr( matrix X , matrix *R )
1928 {
1929 int m=X.rows , n=X.cols , ii,jj,kk , m1=m-1 , nsing=0 ;
1930 double *amat , x1 ;
1931 register double alp, sum , *uvec, *Ak;
1932
1933 if( m < 2 || n < 1 || m < n || R == NULL || X.elts == NULL ) return (-1) ;
1934
1935 #undef A
1936 #define A(i,j) amat[(i)+(j)*m]
1937
1938 amat = (double *)malloc( sizeof(double)*m*n ) ; /* copy input matrix */
1939 uvec = (double *)malloc( sizeof(double)*m ) ; /* Householder vector */
1940
1941 /* copy input matrix into amat == A */
1942
1943 for( ii=0 ; ii < m ; ii++ )
1944 for( jj=0 ; jj < n ; jj++ ) A(ii,jj) = X.elts[ii][jj] ;
1945
1946 if( allow_desing )
1947 nsing = svd_desingularize( m , n , amat ) ; /* Ides of March, 2010 */
1948
1949 /* Householder transform each column of A in turn */
1950
1951 for( jj=0 ; jj < n ; jj++ ){
1952 if( jj == m1 ) break ; /* at last column AND have m==n */
1953 x1 = uvec[jj] = A(jj,jj) ;
1954 for( sum=0.0,ii=jj+1 ; ii < m ; ii++ ){
1955 uvec[ii] = alp = A(ii,jj) ; sum += alp*alp ;
1956 }
1957 if( sum == 0.0 ) continue ; /* tail of column is pre-reduced to 0 */
1958 alp = sqrt(sum+x1*x1) ; if( x1 > 0.0 ) alp = -alp ;
1959 x1 = uvec[jj] -= alp ; A(jj,jj) = alp ;
1960 alp = 2.0 / (sum+x1*x1) ;
1961 for( kk=jj+1 ; kk < n ; kk++ ){ /* process trailing columns */
1962 Ak = amat + kk*m ;
1963 for( sum=0.0,ii=jj ; ii < m1 ; ii+=2 )
1964 sum += uvec[ii]*Ak[ii] + uvec[ii+1]*Ak[ii+1] ;
1965 if( ii == m1 ) sum += uvec[m1]*Ak[m1] ;
1966 sum *= alp ;
1967 for( ii=jj ; ii < m1 ; ii+=2 ){
1968 Ak[ii] -= sum*uvec[ii] ; Ak[ii+1] -= sum*uvec[ii+1] ;
1969 }
1970 if( ii == m1 ) Ak[m1] -= sum*uvec[m1] ;
1971 }
1972 }
1973
1974 /* copy result in A to output
1975 (changing row signs if needed to make R's diagonal non-negative) */
1976
1977 matrix_create( n , n , R ) ;
1978 for( ii=0 ; ii < n ; ii++ ){
1979 for( jj=0 ; jj < ii ; jj++ ) R->elts[ii][jj] = 0.0 ; /* sub-diagonal */
1980 if( A(ii,ii) >= 0.0 )
1981 for( jj=ii ; jj < n ; jj++ ) R->elts[ii][jj] = A(ii,jj) ;
1982 else
1983 for( jj=ii ; jj < n ; jj++ ) R->elts[ii][jj] = -A(ii,jj) ;
1984 }
1985
1986 #if 0
1987 fprintf(stderr,"matrix_qrr diagonal:") ;
1988 for( ii=0 ; ii < n ; ii++ ) fprintf(stderr," %g",R->elts[ii][ii]) ;
1989 fprintf(stderr,"\n") ;
1990 #endif
1991
1992 #if 0
1993 /* adjust tiny (or zero) diagonal elements, for solution stability */
1994
1995 for( kk=ii=0 ; ii < n ; ii++ ){
1996 sum = 0.0 ;
1997 for( jj=0 ; jj < ii ; jj++ ) sum += fabs( R->elts[jj][ii] ) ;
1998 for( jj=ii+1 ; jj < n ; jj++ ) sum += fabs( R->elts[ii][jj] ) ;
1999 sum = (sum==0.0) ? 1.0 : sum/n ;
2000 sum *= 1.e-7 ; alp = R->elts[ii][ii] ; /* alp >= 0 from above */
2001 if( alp < sum ){
2002 kk++ ; R->elts[ii][ii] = alp + sum*(sum-alp)/(sum+alp) ;
2003 }
2004 }
2005 #endif
2006
2007 #ifdef ENABLE_FLOPS
2008 flops += n*n*(2*m-0.666*n) ;
2009 #endif
2010
2011 free((void *)uvec) ; free((void *)amat) ;
2012 return (nsing) ;
2013 }
2014
2015 /*----------------- below is a test program for matrix_qrr() -----------------*/
2016
2017 #if 0
2018 #include <stdlib.h>
2019 #include <math.h>
2020 #include <stdio.h>
2021 #include "matrix.h"
2022 int main( int argc , char *argv[] )
2023 {
2024 matrix X , R , Xt , XtX ;
2025 int m , n , ii,jj ;
2026
2027 matrix_initialize(&X) ; matrix_initialize(&R) ;
2028 matrix_initialize(&Xt); matrix_initialize(&XtX);
2029
2030 if( argc < 3 ) exit(0);
2031
2032 m = (int)strtod(argv[1],NULL) ; if( m < 2 ) exit(0) ;
2033 n = (int)strtod(argv[2],NULL) ; if( n < 1 || n > m ) exit(0) ;
2034
2035 matrix_create( m , n , &X ) ;
2036 for( jj=0 ; jj < n ; jj++ )
2037 for( ii=0 ; ii < m ; ii++ )
2038 X.elts[ii][jj] = sin(1.0+ii+jj) + cos((1.0+ii*jj)/(1.0+ii+jj)) ;
2039
2040 matrix_transpose( X , &Xt ) ;
2041 matrix_multiply ( Xt , X , &XtX ) ;
2042 printf("=== X matrix:\n") ; matrix_print(X) ;
2043 printf("=== XtX matrix:\n") ; matrix_print(XtX) ;
2044
2045 matrix_qrr( X , &R ) ;
2046 printf("=== R matrix:\n") ; matrix_print(R) ;
2047
2048 matrix_transpose( R , &Xt ) ;
2049 matrix_multiply( Xt , R , &XtX ) ;
2050 printf("=== RtR matrix:\n") ; matrix_print(XtX) ;
2051 exit(0) ;
2052 }
2053 #endif
2054
2055 /*---------------------------------------------------------------------------*/
2056 /*! Solve [R] [x] = [b] for [x] where R is upper triangular. */
2057
vector_rr_solve(matrix R,vector b,vector * x)2058 void vector_rr_solve( matrix R , vector b , vector *x )
2059 {
2060 register int n , ii,jj , n1 ;
2061 register double sum , *xp , *bp , *rr ;
2062
2063 n = R.rows ; n1 = n-1 ;
2064 if( n < 1 || R.cols != n || x == NULL ) return ;
2065
2066 vector_create_noinit( n , x ) ; xp = x->elts ; bp = b.elts ;
2067
2068 /* backwards loop, from last element to first */
2069
2070 for( ii=n-1 ; ii >= 0 ; ii-- ){
2071 rr = R.elts[ii] ; sum = bp[ii] ;
2072 for( jj=ii+1 ; jj < n1 ; jj+=2 ) /* unrolled by 2 */
2073 sum -= rr[jj] * xp[jj] + rr[jj+1] * xp[jj+1] ;
2074 if( jj == n1 ) sum -= rr[jj] * xp[jj] ; /* fix unroll if odd length */
2075 xp[ii] = sum / rr[ii] ;
2076 }
2077
2078 return ;
2079 }
2080
2081 /*---------------------------------------------------------------------------*/
2082 /*! Solve [R]' [x] = [b] for [x] where R is upper triangular. */
2083
vector_rrtran_solve(matrix R,vector b,vector * x)2084 void vector_rrtran_solve( matrix R , vector b , vector *x )
2085 {
2086 register int n , ii,jj , n1 ;
2087 register double sum , *xp , *bp , *rr ;
2088
2089 n = R.rows ; n1 = n-1 ;
2090 if( n < 1 || R.cols != n || x == NULL ) return ;
2091
2092 bp = b.elts ;
2093 #if 0 /* the obvious way, but is slower */
2094 vector_create_noinit( n , x ) ; xp = x->elts ;
2095 for( ii=0 ; ii < n ; ii++ ){
2096 for( sum=bp[ii],jj=0 ; jj < ii ; jj++ )
2097 sum -= R.elts[jj][ii] * xp[jj] ;
2098 xp[ii] = sum / R.elts[ii][ii] ;
2099 }
2100 #else /* the row ordered way, which is faster in cache */
2101 vector_equate( b , x ) ; xp = x->elts ;
2102 for( ii=0 ; ii < n ; ii++ ){
2103 rr = R.elts[ii] ; sum = xp[ii] = xp[ii] / rr[ii] ;
2104 for( jj=ii+1 ; jj < n1 ; jj+=2 ){ /* unrolled by 2 */
2105 xp[jj] -= rr[jj]*sum ; xp[jj+1] -= rr[jj+1]*sum ;
2106 }
2107 if( jj == n1 ) xp[jj] -= rr[jj]*sum ; /* fix unroll if odd length */
2108 }
2109 #endif
2110
2111 return ;
2112 }
2113
2114 /*---------------------------------------------------------------------------*/
2115 /*! Solve [R] [X] = [B] for matrix X, where R is upper triangular. */
2116
matrix_rr_solve(matrix R,matrix B,matrix * X)2117 void matrix_rr_solve( matrix R , matrix B , matrix *X )
2118 {
2119 int n=R.rows , m=B.cols , i,j ;
2120 vector u, v ;
2121
2122 if( R.cols != n || B.rows != n || X == NULL ) return ;
2123
2124 vector_initialize(&u) ; vector_initialize(&v) ;
2125 matrix_create( n , m , X ) ;
2126
2127 /* extract each column from B, solve, put resulting column into X */
2128
2129 for( j=0 ; j < m ; j++ ){
2130 column_to_vector( B , j , &u ) ;
2131 vector_rr_solve( R , u , &v ) ;
2132 for( i=0 ; i < n ; i++ ) X->elts[i][j] = v.elts[i] ;
2133 }
2134 vector_destroy(&v) ; vector_destroy(&u) ; return ;
2135 }
2136
2137 /*---------------------------------------------------------------------------*/
2138 /*! Solve [R'] [X] = [B] for matrix X, where R is upper triangular
2139 (and so R' is lower triangular). */
2140
matrix_rrtran_solve(matrix R,matrix B,matrix * X)2141 void matrix_rrtran_solve( matrix R , matrix B , matrix *X )
2142 {
2143 int n=R.rows , m=B.cols , i,j ;
2144 vector u, v ;
2145
2146 if( R.cols != n || B.rows != n || X == NULL ) return ;
2147
2148 vector_initialize(&u) ; vector_initialize(&v) ;
2149 matrix_create( n , m , X ) ;
2150
2151 /* extract each column from B, solve, put resulting column into X */
2152
2153 for( j=0 ; j < m ; j++ ){
2154 column_to_vector( B , j , &u ) ;
2155 vector_rrtran_solve( R , u , &v ) ;
2156 for( i=0 ; i < n ; i++ ) X->elts[i][j] = v.elts[i] ;
2157 }
2158 vector_destroy(&v) ; vector_destroy(&u) ; return ;
2159 }
2160
2161 /*---------------------------------------------------------------------------*/
2162 /* not current used anywhere */
2163
2164 #if 0
2165 int matrix_desingularize( matrix X )
2166 {
2167 int m = X.rows , n = X.cols , ii,jj , nfix ;
2168 double *amat , *xfac , sum ;
2169
2170 if( m < 1 || n < 1 ) return -1 ;
2171
2172 amat = (double *)calloc( sizeof(double),m*n ) ; /* input matrix */
2173 xfac = (double *)calloc( sizeof(double),n ) ; /* column norms of [a] */
2174
2175 #undef A
2176 #define A(i,j) amat[(i)+(j)*m]
2177
2178 /* copy input matrix into amat */
2179
2180 for( ii=0 ; ii < m ; ii++ )
2181 for( jj=0 ; jj < n ; jj++ ) A(ii,jj) = X.elts[ii][jj] ;
2182
2183 /* scale each column to have norm 1 */
2184
2185 for( jj=0 ; jj < n ; jj++ ){
2186 #if 0
2187 sum = 0.0 ;
2188 for( ii=0 ; ii < m ; ii++ ) sum += A(ii,jj)*A(ii,jj) ;
2189 if( sum > 0.0 ){
2190 xfac[jj] = sqrt(sum) ;
2191 sum = 1.0 / sum ;
2192 for( ii=0 ; ii < m ; ii++ ) A(ii,jj) *= sum ;
2193 } else {
2194 xfac[jj] = 1.0 ;
2195 }
2196 #else
2197 xfac[jj] = 1.0 ;
2198 #endif
2199 }
2200
2201 nfix = svd_desingularize( m , n , amat ) ;
2202
2203 if( nfix > 0 ){ /* put fixed values back in place */
2204 for( ii=0 ; ii < m ; ii++ ){
2205 for( jj=0 ; jj < n ; jj++ ){
2206 X.elts[ii][jj] = A(ii,jj) * xfac[jj] ;
2207 }
2208 }
2209 }
2210
2211 free(xfac) ; free(amat) ; return nfix ;
2212 }
2213 #endif
2214